block_splitter_literal.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433
  1. package brotli
  2. import "math"
  3. /* Copyright 2013 Google Inc. All Rights Reserved.
  4. Distributed under MIT license.
  5. See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
  6. */
  7. func initialEntropyCodesLiteral(data []byte, length uint, stride uint, num_histograms uint, histograms []histogramLiteral) {
  8. var seed uint32 = 7
  9. var block_length uint = length / num_histograms
  10. var i uint
  11. clearHistogramsLiteral(histograms, num_histograms)
  12. for i = 0; i < num_histograms; i++ {
  13. var pos uint = length * i / num_histograms
  14. if i != 0 {
  15. pos += uint(myRand(&seed) % uint32(block_length))
  16. }
  17. if pos+stride >= length {
  18. pos = length - stride - 1
  19. }
  20. histogramAddVectorLiteral(&histograms[i], data[pos:], stride)
  21. }
  22. }
  23. func randomSampleLiteral(seed *uint32, data []byte, length uint, stride uint, sample *histogramLiteral) {
  24. var pos uint = 0
  25. if stride >= length {
  26. stride = length
  27. } else {
  28. pos = uint(myRand(seed) % uint32(length-stride+1))
  29. }
  30. histogramAddVectorLiteral(sample, data[pos:], stride)
  31. }
  32. func refineEntropyCodesLiteral(data []byte, length uint, stride uint, num_histograms uint, histograms []histogramLiteral) {
  33. var iters uint = kIterMulForRefining*length/stride + kMinItersForRefining
  34. var seed uint32 = 7
  35. var iter uint
  36. iters = ((iters + num_histograms - 1) / num_histograms) * num_histograms
  37. for iter = 0; iter < iters; iter++ {
  38. var sample histogramLiteral
  39. histogramClearLiteral(&sample)
  40. randomSampleLiteral(&seed, data, length, stride, &sample)
  41. histogramAddHistogramLiteral(&histograms[iter%num_histograms], &sample)
  42. }
  43. }
  44. /* Assigns a block id from the range [0, num_histograms) to each data element
  45. in data[0..length) and fills in block_id[0..length) with the assigned values.
  46. Returns the number of blocks, i.e. one plus the number of block switches. */
  47. func findBlocksLiteral(data []byte, length uint, block_switch_bitcost float64, num_histograms uint, histograms []histogramLiteral, insert_cost []float64, cost []float64, switch_signal []byte, block_id []byte) uint {
  48. var data_size uint = histogramDataSizeLiteral()
  49. var bitmaplen uint = (num_histograms + 7) >> 3
  50. var num_blocks uint = 1
  51. var i uint
  52. var j uint
  53. assert(num_histograms <= 256)
  54. if num_histograms <= 1 {
  55. for i = 0; i < length; i++ {
  56. block_id[i] = 0
  57. }
  58. return 1
  59. }
  60. for i := 0; i < int(data_size*num_histograms); i++ {
  61. insert_cost[i] = 0
  62. }
  63. for i = 0; i < num_histograms; i++ {
  64. insert_cost[i] = fastLog2(uint(uint32(histograms[i].total_count_)))
  65. }
  66. for i = data_size; i != 0; {
  67. i--
  68. for j = 0; j < num_histograms; j++ {
  69. insert_cost[i*num_histograms+j] = insert_cost[j] - bitCost(uint(histograms[j].data_[i]))
  70. }
  71. }
  72. for i := 0; i < int(num_histograms); i++ {
  73. cost[i] = 0
  74. }
  75. for i := 0; i < int(length*bitmaplen); i++ {
  76. switch_signal[i] = 0
  77. }
  78. /* After each iteration of this loop, cost[k] will contain the difference
  79. between the minimum cost of arriving at the current byte position using
  80. entropy code k, and the minimum cost of arriving at the current byte
  81. position. This difference is capped at the block switch cost, and if it
  82. reaches block switch cost, it means that when we trace back from the last
  83. position, we need to switch here. */
  84. for i = 0; i < length; i++ {
  85. var byte_ix uint = i
  86. var ix uint = byte_ix * bitmaplen
  87. var insert_cost_ix uint = uint(data[byte_ix]) * num_histograms
  88. var min_cost float64 = 1e99
  89. var block_switch_cost float64 = block_switch_bitcost
  90. var k uint
  91. for k = 0; k < num_histograms; k++ {
  92. /* We are coding the symbol in data[byte_ix] with entropy code k. */
  93. cost[k] += insert_cost[insert_cost_ix+k]
  94. if cost[k] < min_cost {
  95. min_cost = cost[k]
  96. block_id[byte_ix] = byte(k)
  97. }
  98. }
  99. /* More blocks for the beginning. */
  100. if byte_ix < 2000 {
  101. block_switch_cost *= 0.77 + 0.07*float64(byte_ix)/2000
  102. }
  103. for k = 0; k < num_histograms; k++ {
  104. cost[k] -= min_cost
  105. if cost[k] >= block_switch_cost {
  106. var mask byte = byte(1 << (k & 7))
  107. cost[k] = block_switch_cost
  108. assert(k>>3 < bitmaplen)
  109. switch_signal[ix+(k>>3)] |= mask
  110. /* Trace back from the last position and switch at the marked places. */
  111. }
  112. }
  113. }
  114. {
  115. var byte_ix uint = length - 1
  116. var ix uint = byte_ix * bitmaplen
  117. var cur_id byte = block_id[byte_ix]
  118. for byte_ix > 0 {
  119. var mask byte = byte(1 << (cur_id & 7))
  120. assert(uint(cur_id)>>3 < bitmaplen)
  121. byte_ix--
  122. ix -= bitmaplen
  123. if switch_signal[ix+uint(cur_id>>3)]&mask != 0 {
  124. if cur_id != block_id[byte_ix] {
  125. cur_id = block_id[byte_ix]
  126. num_blocks++
  127. }
  128. }
  129. block_id[byte_ix] = cur_id
  130. }
  131. }
  132. return num_blocks
  133. }
  134. var remapBlockIdsLiteral_kInvalidId uint16 = 256
  135. func remapBlockIdsLiteral(block_ids []byte, length uint, new_id []uint16, num_histograms uint) uint {
  136. var next_id uint16 = 0
  137. var i uint
  138. for i = 0; i < num_histograms; i++ {
  139. new_id[i] = remapBlockIdsLiteral_kInvalidId
  140. }
  141. for i = 0; i < length; i++ {
  142. assert(uint(block_ids[i]) < num_histograms)
  143. if new_id[block_ids[i]] == remapBlockIdsLiteral_kInvalidId {
  144. new_id[block_ids[i]] = next_id
  145. next_id++
  146. }
  147. }
  148. for i = 0; i < length; i++ {
  149. block_ids[i] = byte(new_id[block_ids[i]])
  150. assert(uint(block_ids[i]) < num_histograms)
  151. }
  152. assert(uint(next_id) <= num_histograms)
  153. return uint(next_id)
  154. }
  155. func buildBlockHistogramsLiteral(data []byte, length uint, block_ids []byte, num_histograms uint, histograms []histogramLiteral) {
  156. var i uint
  157. clearHistogramsLiteral(histograms, num_histograms)
  158. for i = 0; i < length; i++ {
  159. histogramAddLiteral(&histograms[block_ids[i]], uint(data[i]))
  160. }
  161. }
  162. var clusterBlocksLiteral_kInvalidIndex uint32 = math.MaxUint32
  163. func clusterBlocksLiteral(data []byte, length uint, num_blocks uint, block_ids []byte, split *blockSplit) {
  164. var histogram_symbols []uint32 = make([]uint32, num_blocks)
  165. var block_lengths []uint32 = make([]uint32, num_blocks)
  166. var expected_num_clusters uint = clustersPerBatch * (num_blocks + histogramsPerBatch - 1) / histogramsPerBatch
  167. var all_histograms_size uint = 0
  168. var all_histograms_capacity uint = expected_num_clusters
  169. var all_histograms []histogramLiteral = make([]histogramLiteral, all_histograms_capacity)
  170. var cluster_size_size uint = 0
  171. var cluster_size_capacity uint = expected_num_clusters
  172. var cluster_size []uint32 = make([]uint32, cluster_size_capacity)
  173. var num_clusters uint = 0
  174. var histograms []histogramLiteral = make([]histogramLiteral, brotli_min_size_t(num_blocks, histogramsPerBatch))
  175. var max_num_pairs uint = histogramsPerBatch * histogramsPerBatch / 2
  176. var pairs_capacity uint = max_num_pairs + 1
  177. var pairs []histogramPair = make([]histogramPair, pairs_capacity)
  178. var pos uint = 0
  179. var clusters []uint32
  180. var num_final_clusters uint
  181. var new_index []uint32
  182. var i uint
  183. var sizes = [histogramsPerBatch]uint32{0}
  184. var new_clusters = [histogramsPerBatch]uint32{0}
  185. var symbols = [histogramsPerBatch]uint32{0}
  186. var remap = [histogramsPerBatch]uint32{0}
  187. for i := 0; i < int(num_blocks); i++ {
  188. block_lengths[i] = 0
  189. }
  190. {
  191. var block_idx uint = 0
  192. for i = 0; i < length; i++ {
  193. assert(block_idx < num_blocks)
  194. block_lengths[block_idx]++
  195. if i+1 == length || block_ids[i] != block_ids[i+1] {
  196. block_idx++
  197. }
  198. }
  199. assert(block_idx == num_blocks)
  200. }
  201. for i = 0; i < num_blocks; i += histogramsPerBatch {
  202. var num_to_combine uint = brotli_min_size_t(num_blocks-i, histogramsPerBatch)
  203. var num_new_clusters uint
  204. var j uint
  205. for j = 0; j < num_to_combine; j++ {
  206. var k uint
  207. histogramClearLiteral(&histograms[j])
  208. for k = 0; uint32(k) < block_lengths[i+j]; k++ {
  209. histogramAddLiteral(&histograms[j], uint(data[pos]))
  210. pos++
  211. }
  212. histograms[j].bit_cost_ = populationCostLiteral(&histograms[j])
  213. new_clusters[j] = uint32(j)
  214. symbols[j] = uint32(j)
  215. sizes[j] = 1
  216. }
  217. num_new_clusters = histogramCombineLiteral(histograms, sizes[:], symbols[:], new_clusters[:], []histogramPair(pairs), num_to_combine, num_to_combine, histogramsPerBatch, max_num_pairs)
  218. if all_histograms_capacity < (all_histograms_size + num_new_clusters) {
  219. var _new_size uint
  220. if all_histograms_capacity == 0 {
  221. _new_size = all_histograms_size + num_new_clusters
  222. } else {
  223. _new_size = all_histograms_capacity
  224. }
  225. var new_array []histogramLiteral
  226. for _new_size < (all_histograms_size + num_new_clusters) {
  227. _new_size *= 2
  228. }
  229. new_array = make([]histogramLiteral, _new_size)
  230. if all_histograms_capacity != 0 {
  231. copy(new_array, all_histograms[:all_histograms_capacity])
  232. }
  233. all_histograms = new_array
  234. all_histograms_capacity = _new_size
  235. }
  236. brotli_ensure_capacity_uint32_t(&cluster_size, &cluster_size_capacity, cluster_size_size+num_new_clusters)
  237. for j = 0; j < num_new_clusters; j++ {
  238. all_histograms[all_histograms_size] = histograms[new_clusters[j]]
  239. all_histograms_size++
  240. cluster_size[cluster_size_size] = sizes[new_clusters[j]]
  241. cluster_size_size++
  242. remap[new_clusters[j]] = uint32(j)
  243. }
  244. for j = 0; j < num_to_combine; j++ {
  245. histogram_symbols[i+j] = uint32(num_clusters) + remap[symbols[j]]
  246. }
  247. num_clusters += num_new_clusters
  248. assert(num_clusters == cluster_size_size)
  249. assert(num_clusters == all_histograms_size)
  250. }
  251. histograms = nil
  252. max_num_pairs = brotli_min_size_t(64*num_clusters, (num_clusters/2)*num_clusters)
  253. if pairs_capacity < max_num_pairs+1 {
  254. pairs = nil
  255. pairs = make([]histogramPair, (max_num_pairs + 1))
  256. }
  257. clusters = make([]uint32, num_clusters)
  258. for i = 0; i < num_clusters; i++ {
  259. clusters[i] = uint32(i)
  260. }
  261. num_final_clusters = histogramCombineLiteral(all_histograms, cluster_size, histogram_symbols, clusters, pairs, num_clusters, num_blocks, maxNumberOfBlockTypes, max_num_pairs)
  262. pairs = nil
  263. cluster_size = nil
  264. new_index = make([]uint32, num_clusters)
  265. for i = 0; i < num_clusters; i++ {
  266. new_index[i] = clusterBlocksLiteral_kInvalidIndex
  267. }
  268. pos = 0
  269. {
  270. var next_index uint32 = 0
  271. for i = 0; i < num_blocks; i++ {
  272. var histo histogramLiteral
  273. var j uint
  274. var best_out uint32
  275. var best_bits float64
  276. histogramClearLiteral(&histo)
  277. for j = 0; uint32(j) < block_lengths[i]; j++ {
  278. histogramAddLiteral(&histo, uint(data[pos]))
  279. pos++
  280. }
  281. if i == 0 {
  282. best_out = histogram_symbols[0]
  283. } else {
  284. best_out = histogram_symbols[i-1]
  285. }
  286. best_bits = histogramBitCostDistanceLiteral(&histo, &all_histograms[best_out])
  287. for j = 0; j < num_final_clusters; j++ {
  288. var cur_bits float64 = histogramBitCostDistanceLiteral(&histo, &all_histograms[clusters[j]])
  289. if cur_bits < best_bits {
  290. best_bits = cur_bits
  291. best_out = clusters[j]
  292. }
  293. }
  294. histogram_symbols[i] = best_out
  295. if new_index[best_out] == clusterBlocksLiteral_kInvalidIndex {
  296. new_index[best_out] = next_index
  297. next_index++
  298. }
  299. }
  300. }
  301. clusters = nil
  302. all_histograms = nil
  303. brotli_ensure_capacity_uint8_t(&split.types, &split.types_alloc_size, num_blocks)
  304. brotli_ensure_capacity_uint32_t(&split.lengths, &split.lengths_alloc_size, num_blocks)
  305. {
  306. var cur_length uint32 = 0
  307. var block_idx uint = 0
  308. var max_type byte = 0
  309. for i = 0; i < num_blocks; i++ {
  310. cur_length += block_lengths[i]
  311. if i+1 == num_blocks || histogram_symbols[i] != histogram_symbols[i+1] {
  312. var id byte = byte(new_index[histogram_symbols[i]])
  313. split.types[block_idx] = id
  314. split.lengths[block_idx] = cur_length
  315. max_type = brotli_max_uint8_t(max_type, id)
  316. cur_length = 0
  317. block_idx++
  318. }
  319. }
  320. split.num_blocks = block_idx
  321. split.num_types = uint(max_type) + 1
  322. }
  323. new_index = nil
  324. block_lengths = nil
  325. histogram_symbols = nil
  326. }
  327. func splitByteVectorLiteral(data []byte, length uint, literals_per_histogram uint, max_histograms uint, sampling_stride_length uint, block_switch_cost float64, params *encoderParams, split *blockSplit) {
  328. var data_size uint = histogramDataSizeLiteral()
  329. var num_histograms uint = length/literals_per_histogram + 1
  330. var histograms []histogramLiteral
  331. if num_histograms > max_histograms {
  332. num_histograms = max_histograms
  333. }
  334. if length == 0 {
  335. split.num_types = 1
  336. return
  337. } else if length < kMinLengthForBlockSplitting {
  338. brotli_ensure_capacity_uint8_t(&split.types, &split.types_alloc_size, split.num_blocks+1)
  339. brotli_ensure_capacity_uint32_t(&split.lengths, &split.lengths_alloc_size, split.num_blocks+1)
  340. split.num_types = 1
  341. split.types[split.num_blocks] = 0
  342. split.lengths[split.num_blocks] = uint32(length)
  343. split.num_blocks++
  344. return
  345. }
  346. histograms = make([]histogramLiteral, num_histograms)
  347. /* Find good entropy codes. */
  348. initialEntropyCodesLiteral(data, length, sampling_stride_length, num_histograms, histograms)
  349. refineEntropyCodesLiteral(data, length, sampling_stride_length, num_histograms, histograms)
  350. {
  351. var block_ids []byte = make([]byte, length)
  352. var num_blocks uint = 0
  353. var bitmaplen uint = (num_histograms + 7) >> 3
  354. var insert_cost []float64 = make([]float64, (data_size * num_histograms))
  355. var cost []float64 = make([]float64, num_histograms)
  356. var switch_signal []byte = make([]byte, (length * bitmaplen))
  357. var new_id []uint16 = make([]uint16, num_histograms)
  358. var iters uint
  359. if params.quality < hqZopflificationQuality {
  360. iters = 3
  361. } else {
  362. iters = 10
  363. }
  364. /* Find a good path through literals with the good entropy codes. */
  365. var i uint
  366. for i = 0; i < iters; i++ {
  367. num_blocks = findBlocksLiteral(data, length, block_switch_cost, num_histograms, histograms, insert_cost, cost, switch_signal, block_ids)
  368. num_histograms = remapBlockIdsLiteral(block_ids, length, new_id, num_histograms)
  369. buildBlockHistogramsLiteral(data, length, block_ids, num_histograms, histograms)
  370. }
  371. insert_cost = nil
  372. cost = nil
  373. switch_signal = nil
  374. new_id = nil
  375. histograms = nil
  376. clusterBlocksLiteral(data, length, num_blocks, block_ids, split)
  377. block_ids = nil
  378. }
  379. }