metablock_distance.go 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165
  1. package brotli
  2. /* Copyright 2015 Google Inc. All Rights Reserved.
  3. Distributed under MIT license.
  4. See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
  5. */
  6. /* Greedy block splitter for one block category (literal, command or distance).
  7. */
  8. type blockSplitterDistance struct {
  9. alphabet_size_ uint
  10. min_block_size_ uint
  11. split_threshold_ float64
  12. num_blocks_ uint
  13. split_ *blockSplit
  14. histograms_ []histogramDistance
  15. histograms_size_ *uint
  16. target_block_size_ uint
  17. block_size_ uint
  18. curr_histogram_ix_ uint
  19. last_histogram_ix_ [2]uint
  20. last_entropy_ [2]float64
  21. merge_last_count_ uint
  22. }
  23. func initBlockSplitterDistance(self *blockSplitterDistance, alphabet_size uint, min_block_size uint, split_threshold float64, num_symbols uint, split *blockSplit, histograms *[]histogramDistance, histograms_size *uint) {
  24. var max_num_blocks uint = num_symbols/min_block_size + 1
  25. var max_num_types uint = brotli_min_size_t(max_num_blocks, maxNumberOfBlockTypes+1)
  26. /* We have to allocate one more histogram than the maximum number of block
  27. types for the current histogram when the meta-block is too big. */
  28. self.alphabet_size_ = alphabet_size
  29. self.min_block_size_ = min_block_size
  30. self.split_threshold_ = split_threshold
  31. self.num_blocks_ = 0
  32. self.split_ = split
  33. self.histograms_size_ = histograms_size
  34. self.target_block_size_ = min_block_size
  35. self.block_size_ = 0
  36. self.curr_histogram_ix_ = 0
  37. self.merge_last_count_ = 0
  38. brotli_ensure_capacity_uint8_t(&split.types, &split.types_alloc_size, max_num_blocks)
  39. brotli_ensure_capacity_uint32_t(&split.lengths, &split.lengths_alloc_size, max_num_blocks)
  40. self.split_.num_blocks = max_num_blocks
  41. *histograms_size = max_num_types
  42. if histograms == nil || cap(*histograms) < int(*histograms_size) {
  43. *histograms = make([]histogramDistance, *histograms_size)
  44. } else {
  45. *histograms = (*histograms)[:*histograms_size]
  46. }
  47. self.histograms_ = *histograms
  48. /* Clear only current histogram. */
  49. histogramClearDistance(&self.histograms_[0])
  50. self.last_histogram_ix_[1] = 0
  51. self.last_histogram_ix_[0] = self.last_histogram_ix_[1]
  52. }
  53. /* Does either of three things:
  54. (1) emits the current block with a new block type;
  55. (2) emits the current block with the type of the second last block;
  56. (3) merges the current block with the last block. */
  57. func blockSplitterFinishBlockDistance(self *blockSplitterDistance, is_final bool) {
  58. var split *blockSplit = self.split_
  59. var last_entropy []float64 = self.last_entropy_[:]
  60. var histograms []histogramDistance = self.histograms_
  61. self.block_size_ = brotli_max_size_t(self.block_size_, self.min_block_size_)
  62. if self.num_blocks_ == 0 {
  63. /* Create first block. */
  64. split.lengths[0] = uint32(self.block_size_)
  65. split.types[0] = 0
  66. last_entropy[0] = bitsEntropy(histograms[0].data_[:], self.alphabet_size_)
  67. last_entropy[1] = last_entropy[0]
  68. self.num_blocks_++
  69. split.num_types++
  70. self.curr_histogram_ix_++
  71. if self.curr_histogram_ix_ < *self.histograms_size_ {
  72. histogramClearDistance(&histograms[self.curr_histogram_ix_])
  73. }
  74. self.block_size_ = 0
  75. } else if self.block_size_ > 0 {
  76. var entropy float64 = bitsEntropy(histograms[self.curr_histogram_ix_].data_[:], self.alphabet_size_)
  77. var combined_histo [2]histogramDistance
  78. var combined_entropy [2]float64
  79. var diff [2]float64
  80. var j uint
  81. for j = 0; j < 2; j++ {
  82. var last_histogram_ix uint = self.last_histogram_ix_[j]
  83. combined_histo[j] = histograms[self.curr_histogram_ix_]
  84. histogramAddHistogramDistance(&combined_histo[j], &histograms[last_histogram_ix])
  85. combined_entropy[j] = bitsEntropy(combined_histo[j].data_[0:], self.alphabet_size_)
  86. diff[j] = combined_entropy[j] - entropy - last_entropy[j]
  87. }
  88. if split.num_types < maxNumberOfBlockTypes && diff[0] > self.split_threshold_ && diff[1] > self.split_threshold_ {
  89. /* Create new block. */
  90. split.lengths[self.num_blocks_] = uint32(self.block_size_)
  91. split.types[self.num_blocks_] = byte(split.num_types)
  92. self.last_histogram_ix_[1] = self.last_histogram_ix_[0]
  93. self.last_histogram_ix_[0] = uint(byte(split.num_types))
  94. last_entropy[1] = last_entropy[0]
  95. last_entropy[0] = entropy
  96. self.num_blocks_++
  97. split.num_types++
  98. self.curr_histogram_ix_++
  99. if self.curr_histogram_ix_ < *self.histograms_size_ {
  100. histogramClearDistance(&histograms[self.curr_histogram_ix_])
  101. }
  102. self.block_size_ = 0
  103. self.merge_last_count_ = 0
  104. self.target_block_size_ = self.min_block_size_
  105. } else if diff[1] < diff[0]-20.0 {
  106. split.lengths[self.num_blocks_] = uint32(self.block_size_)
  107. split.types[self.num_blocks_] = split.types[self.num_blocks_-2]
  108. /* Combine this block with second last block. */
  109. var tmp uint = self.last_histogram_ix_[0]
  110. self.last_histogram_ix_[0] = self.last_histogram_ix_[1]
  111. self.last_histogram_ix_[1] = tmp
  112. histograms[self.last_histogram_ix_[0]] = combined_histo[1]
  113. last_entropy[1] = last_entropy[0]
  114. last_entropy[0] = combined_entropy[1]
  115. self.num_blocks_++
  116. self.block_size_ = 0
  117. histogramClearDistance(&histograms[self.curr_histogram_ix_])
  118. self.merge_last_count_ = 0
  119. self.target_block_size_ = self.min_block_size_
  120. } else {
  121. /* Combine this block with last block. */
  122. split.lengths[self.num_blocks_-1] += uint32(self.block_size_)
  123. histograms[self.last_histogram_ix_[0]] = combined_histo[0]
  124. last_entropy[0] = combined_entropy[0]
  125. if split.num_types == 1 {
  126. last_entropy[1] = last_entropy[0]
  127. }
  128. self.block_size_ = 0
  129. histogramClearDistance(&histograms[self.curr_histogram_ix_])
  130. self.merge_last_count_++
  131. if self.merge_last_count_ > 1 {
  132. self.target_block_size_ += self.min_block_size_
  133. }
  134. }
  135. }
  136. if is_final {
  137. *self.histograms_size_ = split.num_types
  138. split.num_blocks = self.num_blocks_
  139. }
  140. }
  141. /* Adds the next symbol to the current histogram. When the current histogram
  142. reaches the target size, decides on merging the block. */
  143. func blockSplitterAddSymbolDistance(self *blockSplitterDistance, symbol uint) {
  144. histogramAddDistance(&self.histograms_[self.curr_histogram_ix_], symbol)
  145. self.block_size_++
  146. if self.block_size_ == self.target_block_size_ {
  147. blockSplitterFinishBlockDistance(self, false) /* is_final = */
  148. }
  149. }