123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342 |
- package brotli
- import (
- "encoding/binary"
- "fmt"
- )
- type hasherCommon struct {
- params hasherParams
- is_prepared_ bool
- dict_num_lookups uint
- dict_num_matches uint
- }
- func (h *hasherCommon) Common() *hasherCommon {
- return h
- }
- type hasherHandle interface {
- Common() *hasherCommon
- Initialize(params *encoderParams)
- Prepare(one_shot bool, input_size uint, data []byte)
- StitchToPreviousBlock(num_bytes uint, position uint, ringbuffer []byte, ringbuffer_mask uint)
- HashTypeLength() uint
- StoreLookahead() uint
- PrepareDistanceCache(distance_cache []int)
- FindLongestMatch(dictionary *encoderDictionary, data []byte, ring_buffer_mask uint, distance_cache []int, cur_ix uint, max_length uint, max_backward uint, gap uint, max_distance uint, out *hasherSearchResult)
- StoreRange(data []byte, mask uint, ix_start uint, ix_end uint)
- Store(data []byte, mask uint, ix uint)
- }
- const kCutoffTransformsCount uint32 = 10
- /* 0, 12, 27, 23, 42, 63, 56, 48, 59, 64 */
- /* 0+0, 4+8, 8+19, 12+11, 16+26, 20+43, 24+32, 28+20, 32+27, 36+28 */
- const kCutoffTransforms uint64 = 0x071B520ADA2D3200
- type hasherSearchResult struct {
- len uint
- distance uint
- score uint
- len_code_delta int
- }
- /* kHashMul32 multiplier has these properties:
- * The multiplier must be odd. Otherwise we may lose the highest bit.
- * No long streaks of ones or zeros.
- * There is no effort to ensure that it is a prime, the oddity is enough
- for this use.
- * The number has been tuned heuristically against compression benchmarks. */
- const kHashMul32 uint32 = 0x1E35A7BD
- const kHashMul64 uint64 = 0x1E35A7BD1E35A7BD
- const kHashMul64Long uint64 = 0x1FE35A7BD3579BD3
- func hash14(data []byte) uint32 {
- var h uint32 = binary.LittleEndian.Uint32(data) * kHashMul32
- /* The higher bits contain more mixture from the multiplication,
- so we take our results from there. */
- return h >> (32 - 14)
- }
- func prepareDistanceCache(distance_cache []int, num_distances int) {
- if num_distances > 4 {
- var last_distance int = distance_cache[0]
- distance_cache[4] = last_distance - 1
- distance_cache[5] = last_distance + 1
- distance_cache[6] = last_distance - 2
- distance_cache[7] = last_distance + 2
- distance_cache[8] = last_distance - 3
- distance_cache[9] = last_distance + 3
- if num_distances > 10 {
- var next_last_distance int = distance_cache[1]
- distance_cache[10] = next_last_distance - 1
- distance_cache[11] = next_last_distance + 1
- distance_cache[12] = next_last_distance - 2
- distance_cache[13] = next_last_distance + 2
- distance_cache[14] = next_last_distance - 3
- distance_cache[15] = next_last_distance + 3
- }
- }
- }
- const literalByteScore = 135
- const distanceBitPenalty = 30
- /* Score must be positive after applying maximal penalty. */
- const scoreBase = (distanceBitPenalty * 8 * 8)
- /* Usually, we always choose the longest backward reference. This function
- allows for the exception of that rule.
- If we choose a backward reference that is further away, it will
- usually be coded with more bits. We approximate this by assuming
- log2(distance). If the distance can be expressed in terms of the
- last four distances, we use some heuristic constants to estimate
- the bits cost. For the first up to four literals we use the bit
- cost of the literals from the literal cost model, after that we
- use the average bit cost of the cost model.
- This function is used to sometimes discard a longer backward reference
- when it is not much longer and the bit cost for encoding it is more
- than the saved literals.
- backward_reference_offset MUST be positive. */
- func backwardReferenceScore(copy_length uint, backward_reference_offset uint) uint {
- return scoreBase + literalByteScore*uint(copy_length) - distanceBitPenalty*uint(log2FloorNonZero(backward_reference_offset))
- }
- func backwardReferenceScoreUsingLastDistance(copy_length uint) uint {
- return literalByteScore*uint(copy_length) + scoreBase + 15
- }
- func backwardReferencePenaltyUsingLastDistance(distance_short_code uint) uint {
- return uint(39) + ((0x1CA10 >> (distance_short_code & 0xE)) & 0xE)
- }
- func testStaticDictionaryItem(dictionary *encoderDictionary, item uint, data []byte, max_length uint, max_backward uint, max_distance uint, out *hasherSearchResult) bool {
- var len uint
- var word_idx uint
- var offset uint
- var matchlen uint
- var backward uint
- var score uint
- len = item & 0x1F
- word_idx = item >> 5
- offset = uint(dictionary.words.offsets_by_length[len]) + len*word_idx
- if len > max_length {
- return false
- }
- matchlen = findMatchLengthWithLimit(data, dictionary.words.data[offset:], uint(len))
- if matchlen+uint(dictionary.cutoffTransformsCount) <= len || matchlen == 0 {
- return false
- }
- {
- var cut uint = len - matchlen
- var transform_id uint = (cut << 2) + uint((dictionary.cutoffTransforms>>(cut*6))&0x3F)
- backward = max_backward + 1 + word_idx + (transform_id << dictionary.words.size_bits_by_length[len])
- }
- if backward > max_distance {
- return false
- }
- score = backwardReferenceScore(matchlen, backward)
- if score < out.score {
- return false
- }
- out.len = matchlen
- out.len_code_delta = int(len) - int(matchlen)
- out.distance = backward
- out.score = score
- return true
- }
- func searchInStaticDictionary(dictionary *encoderDictionary, handle hasherHandle, data []byte, max_length uint, max_backward uint, max_distance uint, out *hasherSearchResult, shallow bool) {
- var key uint
- var i uint
- var self *hasherCommon = handle.Common()
- if self.dict_num_matches < self.dict_num_lookups>>7 {
- return
- }
- key = uint(hash14(data) << 1)
- for i = 0; ; (func() { i++; key++ })() {
- var tmp uint
- if shallow {
- tmp = 1
- } else {
- tmp = 2
- }
- if i >= tmp {
- break
- }
- var item uint = uint(dictionary.hash_table[key])
- self.dict_num_lookups++
- if item != 0 {
- var item_matches bool = testStaticDictionaryItem(dictionary, item, data, max_length, max_backward, max_distance, out)
- if item_matches {
- self.dict_num_matches++
- }
- }
- }
- }
- type backwardMatch struct {
- distance uint32
- length_and_code uint32
- }
- func initBackwardMatch(self *backwardMatch, dist uint, len uint) {
- self.distance = uint32(dist)
- self.length_and_code = uint32(len << 5)
- }
- func initDictionaryBackwardMatch(self *backwardMatch, dist uint, len uint, len_code uint) {
- self.distance = uint32(dist)
- var tmp uint
- if len == len_code {
- tmp = 0
- } else {
- tmp = len_code
- }
- self.length_and_code = uint32(len<<5 | tmp)
- }
- func backwardMatchLength(self *backwardMatch) uint {
- return uint(self.length_and_code >> 5)
- }
- func backwardMatchLengthCode(self *backwardMatch) uint {
- var code uint = uint(self.length_and_code) & 31
- if code != 0 {
- return code
- } else {
- return backwardMatchLength(self)
- }
- }
- func hasherReset(handle hasherHandle) {
- if handle == nil {
- return
- }
- handle.Common().is_prepared_ = false
- }
- func newHasher(typ int) hasherHandle {
- switch typ {
- case 2:
- return &hashLongestMatchQuickly{
- bucketBits: 16,
- bucketSweep: 1,
- hashLen: 5,
- useDictionary: true,
- }
- case 3:
- return &hashLongestMatchQuickly{
- bucketBits: 16,
- bucketSweep: 2,
- hashLen: 5,
- useDictionary: false,
- }
- case 4:
- return &hashLongestMatchQuickly{
- bucketBits: 17,
- bucketSweep: 4,
- hashLen: 5,
- useDictionary: true,
- }
- case 5:
- return new(h5)
- case 6:
- return new(h6)
- case 10:
- return new(h10)
- case 35:
- return &hashComposite{
- ha: newHasher(3),
- hb: &hashRolling{jump: 4},
- }
- case 40:
- return &hashForgetfulChain{
- bucketBits: 15,
- numBanks: 1,
- bankBits: 16,
- numLastDistancesToCheck: 4,
- }
- case 41:
- return &hashForgetfulChain{
- bucketBits: 15,
- numBanks: 1,
- bankBits: 16,
- numLastDistancesToCheck: 10,
- }
- case 42:
- return &hashForgetfulChain{
- bucketBits: 15,
- numBanks: 512,
- bankBits: 9,
- numLastDistancesToCheck: 16,
- }
- case 54:
- return &hashLongestMatchQuickly{
- bucketBits: 20,
- bucketSweep: 4,
- hashLen: 7,
- useDictionary: false,
- }
- case 55:
- return &hashComposite{
- ha: newHasher(54),
- hb: &hashRolling{jump: 4},
- }
- case 65:
- return &hashComposite{
- ha: newHasher(6),
- hb: &hashRolling{jump: 1},
- }
- }
- panic(fmt.Sprintf("unknown hasher type: %d", typ))
- }
- func hasherSetup(handle *hasherHandle, params *encoderParams, data []byte, position uint, input_size uint, is_last bool) {
- var self hasherHandle = nil
- var common *hasherCommon = nil
- var one_shot bool = (position == 0 && is_last)
- if *handle == nil {
- chooseHasher(params, ¶ms.hasher)
- self = newHasher(params.hasher.type_)
- *handle = self
- common = self.Common()
- common.params = params.hasher
- self.Initialize(params)
- }
- self = *handle
- common = self.Common()
- if !common.is_prepared_ {
- self.Prepare(one_shot, input_size, data)
- if position == 0 {
- common.dict_num_lookups = 0
- common.dict_num_matches = 0
- }
- common.is_prepared_ = true
- }
- }
- func initOrStitchToPreviousBlock(handle *hasherHandle, data []byte, mask uint, params *encoderParams, position uint, input_size uint, is_last bool) {
- var self hasherHandle
- hasherSetup(handle, params, data, position, input_size, is_last)
- self = *handle
- self.StitchToPreviousBlock(input_size, position, data, mask)
- }
|