hash_forgetful_chain.go 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252
  1. package brotli
  2. import "encoding/binary"
  3. /* Copyright 2016 Google Inc. All Rights Reserved.
  4. Distributed under MIT license.
  5. See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
  6. */
  7. func (*hashForgetfulChain) HashTypeLength() uint {
  8. return 4
  9. }
  10. func (*hashForgetfulChain) StoreLookahead() uint {
  11. return 4
  12. }
  13. /* HashBytes is the function that chooses the bucket to place the address in.*/
  14. func (h *hashForgetfulChain) HashBytes(data []byte) uint {
  15. var hash uint32 = binary.LittleEndian.Uint32(data) * kHashMul32
  16. /* The higher bits contain more mixture from the multiplication,
  17. so we take our results from there. */
  18. return uint(hash >> (32 - h.bucketBits))
  19. }
  20. type slot struct {
  21. delta uint16
  22. next uint16
  23. }
  24. /* A (forgetful) hash table to the data seen by the compressor, to
  25. help create backward references to previous data.
  26. Hashes are stored in chains which are bucketed to groups. Group of chains
  27. share a storage "bank". When more than "bank size" chain nodes are added,
  28. oldest nodes are replaced; this way several chains may share a tail. */
  29. type hashForgetfulChain struct {
  30. hasherCommon
  31. bucketBits uint
  32. numBanks uint
  33. bankBits uint
  34. numLastDistancesToCheck int
  35. addr []uint32
  36. head []uint16
  37. tiny_hash [65536]byte
  38. banks [][]slot
  39. free_slot_idx []uint16
  40. max_hops uint
  41. }
  42. func (h *hashForgetfulChain) Initialize(params *encoderParams) {
  43. var q uint
  44. if params.quality > 6 {
  45. q = 7
  46. } else {
  47. q = 8
  48. }
  49. h.max_hops = q << uint(params.quality-4)
  50. bankSize := 1 << h.bankBits
  51. bucketSize := 1 << h.bucketBits
  52. h.addr = make([]uint32, bucketSize)
  53. h.head = make([]uint16, bucketSize)
  54. h.banks = make([][]slot, h.numBanks)
  55. for i := range h.banks {
  56. h.banks[i] = make([]slot, bankSize)
  57. }
  58. h.free_slot_idx = make([]uint16, h.numBanks)
  59. }
  60. func (h *hashForgetfulChain) Prepare(one_shot bool, input_size uint, data []byte) {
  61. var partial_prepare_threshold uint = (1 << h.bucketBits) >> 6
  62. /* Partial preparation is 100 times slower (per socket). */
  63. if one_shot && input_size <= partial_prepare_threshold {
  64. var i uint
  65. for i = 0; i < input_size; i++ {
  66. var bucket uint = h.HashBytes(data[i:])
  67. /* See InitEmpty comment. */
  68. h.addr[bucket] = 0xCCCCCCCC
  69. h.head[bucket] = 0xCCCC
  70. }
  71. } else {
  72. /* Fill |addr| array with 0xCCCCCCCC value. Because of wrapping, position
  73. processed by hasher never reaches 3GB + 64M; this makes all new chains
  74. to be terminated after the first node. */
  75. for i := range h.addr {
  76. h.addr[i] = 0xCCCCCCCC
  77. }
  78. for i := range h.head {
  79. h.head[i] = 0
  80. }
  81. }
  82. h.tiny_hash = [65536]byte{}
  83. for i := range h.free_slot_idx {
  84. h.free_slot_idx[i] = 0
  85. }
  86. }
  87. /* Look at 4 bytes at &data[ix & mask]. Compute a hash from these, and prepend
  88. node to corresponding chain; also update tiny_hash for current position. */
  89. func (h *hashForgetfulChain) Store(data []byte, mask uint, ix uint) {
  90. var key uint = h.HashBytes(data[ix&mask:])
  91. var bank uint = key & (h.numBanks - 1)
  92. idx := uint(h.free_slot_idx[bank]) & ((1 << h.bankBits) - 1)
  93. h.free_slot_idx[bank]++
  94. var delta uint = ix - uint(h.addr[key])
  95. h.tiny_hash[uint16(ix)] = byte(key)
  96. if delta > 0xFFFF {
  97. delta = 0xFFFF
  98. }
  99. h.banks[bank][idx].delta = uint16(delta)
  100. h.banks[bank][idx].next = h.head[key]
  101. h.addr[key] = uint32(ix)
  102. h.head[key] = uint16(idx)
  103. }
  104. func (h *hashForgetfulChain) StoreRange(data []byte, mask uint, ix_start uint, ix_end uint) {
  105. var i uint
  106. for i = ix_start; i < ix_end; i++ {
  107. h.Store(data, mask, i)
  108. }
  109. }
  110. func (h *hashForgetfulChain) StitchToPreviousBlock(num_bytes uint, position uint, ringbuffer []byte, ring_buffer_mask uint) {
  111. if num_bytes >= h.HashTypeLength()-1 && position >= 3 {
  112. /* Prepare the hashes for three last bytes of the last write.
  113. These could not be calculated before, since they require knowledge
  114. of both the previous and the current block. */
  115. h.Store(ringbuffer, ring_buffer_mask, position-3)
  116. h.Store(ringbuffer, ring_buffer_mask, position-2)
  117. h.Store(ringbuffer, ring_buffer_mask, position-1)
  118. }
  119. }
  120. func (h *hashForgetfulChain) PrepareDistanceCache(distance_cache []int) {
  121. prepareDistanceCache(distance_cache, h.numLastDistancesToCheck)
  122. }
  123. /* Find a longest backward match of &data[cur_ix] up to the length of
  124. max_length and stores the position cur_ix in the hash table.
  125. REQUIRES: PrepareDistanceCachehashForgetfulChain must be invoked for current distance cache
  126. values; if this method is invoked repeatedly with the same distance
  127. cache values, it is enough to invoke PrepareDistanceCachehashForgetfulChain once.
  128. Does not look for matches longer than max_length.
  129. Does not look for matches further away than max_backward.
  130. Writes the best match into |out|.
  131. |out|->score is updated only if a better match is found. */
  132. func (h *hashForgetfulChain) FindLongestMatch(dictionary *encoderDictionary, data []byte, ring_buffer_mask uint, distance_cache []int, cur_ix uint, max_length uint, max_backward uint, gap uint, max_distance uint, out *hasherSearchResult) {
  133. var cur_ix_masked uint = cur_ix & ring_buffer_mask
  134. var min_score uint = out.score
  135. var best_score uint = out.score
  136. var best_len uint = out.len
  137. var key uint = h.HashBytes(data[cur_ix_masked:])
  138. var tiny_hash byte = byte(key)
  139. /* Don't accept a short copy from far away. */
  140. out.len = 0
  141. out.len_code_delta = 0
  142. /* Try last distance first. */
  143. for i := 0; i < h.numLastDistancesToCheck; i++ {
  144. var backward uint = uint(distance_cache[i])
  145. var prev_ix uint = (cur_ix - backward)
  146. /* For distance code 0 we want to consider 2-byte matches. */
  147. if i > 0 && h.tiny_hash[uint16(prev_ix)] != tiny_hash {
  148. continue
  149. }
  150. if prev_ix >= cur_ix || backward > max_backward {
  151. continue
  152. }
  153. prev_ix &= ring_buffer_mask
  154. {
  155. var len uint = findMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length)
  156. if len >= 2 {
  157. var score uint = backwardReferenceScoreUsingLastDistance(uint(len))
  158. if best_score < score {
  159. if i != 0 {
  160. score -= backwardReferencePenaltyUsingLastDistance(uint(i))
  161. }
  162. if best_score < score {
  163. best_score = score
  164. best_len = uint(len)
  165. out.len = best_len
  166. out.distance = backward
  167. out.score = best_score
  168. }
  169. }
  170. }
  171. }
  172. }
  173. {
  174. var bank uint = key & (h.numBanks - 1)
  175. var backward uint = 0
  176. var hops uint = h.max_hops
  177. var delta uint = cur_ix - uint(h.addr[key])
  178. var slot uint = uint(h.head[key])
  179. for {
  180. tmp6 := hops
  181. hops--
  182. if tmp6 == 0 {
  183. break
  184. }
  185. var prev_ix uint
  186. var last uint = slot
  187. backward += delta
  188. if backward > max_backward {
  189. break
  190. }
  191. prev_ix = (cur_ix - backward) & ring_buffer_mask
  192. slot = uint(h.banks[bank][last].next)
  193. delta = uint(h.banks[bank][last].delta)
  194. if cur_ix_masked+best_len > ring_buffer_mask || prev_ix+best_len > ring_buffer_mask || data[cur_ix_masked+best_len] != data[prev_ix+best_len] {
  195. continue
  196. }
  197. {
  198. var len uint = findMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length)
  199. if len >= 4 {
  200. /* Comparing for >= 3 does not change the semantics, but just saves
  201. for a few unnecessary binary logarithms in backward reference
  202. score, since we are not interested in such short matches. */
  203. var score uint = backwardReferenceScore(uint(len), backward)
  204. if best_score < score {
  205. best_score = score
  206. best_len = uint(len)
  207. out.len = best_len
  208. out.distance = backward
  209. out.score = best_score
  210. }
  211. }
  212. }
  213. }
  214. h.Store(data, ring_buffer_mask, cur_ix)
  215. }
  216. if out.score == min_score {
  217. searchInStaticDictionary(dictionary, h, data[cur_ix_masked:], max_length, max_backward+gap, max_distance, out, false)
  218. }
  219. }