123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165 |
- package brotli
- /* Copyright 2015 Google Inc. All Rights Reserved.
- Distributed under MIT license.
- See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
- */
- /* Greedy block splitter for one block category (literal, command or distance).
- */
- type blockSplitterCommand struct {
- alphabet_size_ uint
- min_block_size_ uint
- split_threshold_ float64
- num_blocks_ uint
- split_ *blockSplit
- histograms_ []histogramCommand
- histograms_size_ *uint
- target_block_size_ uint
- block_size_ uint
- curr_histogram_ix_ uint
- last_histogram_ix_ [2]uint
- last_entropy_ [2]float64
- merge_last_count_ uint
- }
- func initBlockSplitterCommand(self *blockSplitterCommand, alphabet_size uint, min_block_size uint, split_threshold float64, num_symbols uint, split *blockSplit, histograms *[]histogramCommand, histograms_size *uint) {
- var max_num_blocks uint = num_symbols/min_block_size + 1
- var max_num_types uint = brotli_min_size_t(max_num_blocks, maxNumberOfBlockTypes+1)
- /* We have to allocate one more histogram than the maximum number of block
- types for the current histogram when the meta-block is too big. */
- self.alphabet_size_ = alphabet_size
- self.min_block_size_ = min_block_size
- self.split_threshold_ = split_threshold
- self.num_blocks_ = 0
- self.split_ = split
- self.histograms_size_ = histograms_size
- self.target_block_size_ = min_block_size
- self.block_size_ = 0
- self.curr_histogram_ix_ = 0
- self.merge_last_count_ = 0
- brotli_ensure_capacity_uint8_t(&split.types, &split.types_alloc_size, max_num_blocks)
- brotli_ensure_capacity_uint32_t(&split.lengths, &split.lengths_alloc_size, max_num_blocks)
- self.split_.num_blocks = max_num_blocks
- *histograms_size = max_num_types
- if histograms == nil || cap(*histograms) < int(*histograms_size) {
- *histograms = make([]histogramCommand, (*histograms_size))
- } else {
- *histograms = (*histograms)[:*histograms_size]
- }
- self.histograms_ = *histograms
- /* Clear only current histogram. */
- histogramClearCommand(&self.histograms_[0])
- self.last_histogram_ix_[1] = 0
- self.last_histogram_ix_[0] = self.last_histogram_ix_[1]
- }
- /* Does either of three things:
- (1) emits the current block with a new block type;
- (2) emits the current block with the type of the second last block;
- (3) merges the current block with the last block. */
- func blockSplitterFinishBlockCommand(self *blockSplitterCommand, is_final bool) {
- var split *blockSplit = self.split_
- var last_entropy []float64 = self.last_entropy_[:]
- var histograms []histogramCommand = self.histograms_
- self.block_size_ = brotli_max_size_t(self.block_size_, self.min_block_size_)
- if self.num_blocks_ == 0 {
- /* Create first block. */
- split.lengths[0] = uint32(self.block_size_)
- split.types[0] = 0
- last_entropy[0] = bitsEntropy(histograms[0].data_[:], self.alphabet_size_)
- last_entropy[1] = last_entropy[0]
- self.num_blocks_++
- split.num_types++
- self.curr_histogram_ix_++
- if self.curr_histogram_ix_ < *self.histograms_size_ {
- histogramClearCommand(&histograms[self.curr_histogram_ix_])
- }
- self.block_size_ = 0
- } else if self.block_size_ > 0 {
- var entropy float64 = bitsEntropy(histograms[self.curr_histogram_ix_].data_[:], self.alphabet_size_)
- var combined_histo [2]histogramCommand
- var combined_entropy [2]float64
- var diff [2]float64
- var j uint
- for j = 0; j < 2; j++ {
- var last_histogram_ix uint = self.last_histogram_ix_[j]
- combined_histo[j] = histograms[self.curr_histogram_ix_]
- histogramAddHistogramCommand(&combined_histo[j], &histograms[last_histogram_ix])
- combined_entropy[j] = bitsEntropy(combined_histo[j].data_[0:], self.alphabet_size_)
- diff[j] = combined_entropy[j] - entropy - last_entropy[j]
- }
- if split.num_types < maxNumberOfBlockTypes && diff[0] > self.split_threshold_ && diff[1] > self.split_threshold_ {
- /* Create new block. */
- split.lengths[self.num_blocks_] = uint32(self.block_size_)
- split.types[self.num_blocks_] = byte(split.num_types)
- self.last_histogram_ix_[1] = self.last_histogram_ix_[0]
- self.last_histogram_ix_[0] = uint(byte(split.num_types))
- last_entropy[1] = last_entropy[0]
- last_entropy[0] = entropy
- self.num_blocks_++
- split.num_types++
- self.curr_histogram_ix_++
- if self.curr_histogram_ix_ < *self.histograms_size_ {
- histogramClearCommand(&histograms[self.curr_histogram_ix_])
- }
- self.block_size_ = 0
- self.merge_last_count_ = 0
- self.target_block_size_ = self.min_block_size_
- } else if diff[1] < diff[0]-20.0 {
- split.lengths[self.num_blocks_] = uint32(self.block_size_)
- split.types[self.num_blocks_] = split.types[self.num_blocks_-2]
- /* Combine this block with second last block. */
- var tmp uint = self.last_histogram_ix_[0]
- self.last_histogram_ix_[0] = self.last_histogram_ix_[1]
- self.last_histogram_ix_[1] = tmp
- histograms[self.last_histogram_ix_[0]] = combined_histo[1]
- last_entropy[1] = last_entropy[0]
- last_entropy[0] = combined_entropy[1]
- self.num_blocks_++
- self.block_size_ = 0
- histogramClearCommand(&histograms[self.curr_histogram_ix_])
- self.merge_last_count_ = 0
- self.target_block_size_ = self.min_block_size_
- } else {
- /* Combine this block with last block. */
- split.lengths[self.num_blocks_-1] += uint32(self.block_size_)
- histograms[self.last_histogram_ix_[0]] = combined_histo[0]
- last_entropy[0] = combined_entropy[0]
- if split.num_types == 1 {
- last_entropy[1] = last_entropy[0]
- }
- self.block_size_ = 0
- histogramClearCommand(&histograms[self.curr_histogram_ix_])
- self.merge_last_count_++
- if self.merge_last_count_ > 1 {
- self.target_block_size_ += self.min_block_size_
- }
- }
- }
- if is_final {
- *self.histograms_size_ = split.num_types
- split.num_blocks = self.num_blocks_
- }
- }
- /* Adds the next symbol to the current histogram. When the current histogram
- reaches the target size, decides on merging the block. */
- func blockSplitterAddSymbolCommand(self *blockSplitterCommand, symbol uint) {
- histogramAddCommand(&self.histograms_[self.curr_histogram_ix_], symbol)
- self.block_size_++
- if self.block_size_ == self.target_block_size_ {
- blockSplitterFinishBlockCommand(self, false) /* is_final = */
- }
- }
|