hash_longest_match_quickly.go 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214
  1. package brotli
  2. import "encoding/binary"
  3. /* Copyright 2010 Google Inc. All Rights Reserved.
  4. Distributed under MIT license.
  5. See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
  6. */
  7. /* For BUCKET_SWEEP == 1, enabling the dictionary lookup makes compression
  8. a little faster (0.5% - 1%) and it compresses 0.15% better on small text
  9. and HTML inputs. */
  10. func (*hashLongestMatchQuickly) HashTypeLength() uint {
  11. return 8
  12. }
  13. func (*hashLongestMatchQuickly) StoreLookahead() uint {
  14. return 8
  15. }
  16. /* HashBytes is the function that chooses the bucket to place
  17. the address in. The HashLongestMatch and hashLongestMatchQuickly
  18. classes have separate, different implementations of hashing. */
  19. func (h *hashLongestMatchQuickly) HashBytes(data []byte) uint32 {
  20. var hash uint64 = ((binary.LittleEndian.Uint64(data) << (64 - 8*h.hashLen)) * kHashMul64)
  21. /* The higher bits contain more mixture from the multiplication,
  22. so we take our results from there. */
  23. return uint32(hash >> (64 - h.bucketBits))
  24. }
  25. /* A (forgetful) hash table to the data seen by the compressor, to
  26. help create backward references to previous data.
  27. This is a hash map of fixed size (1 << 16). Starting from the
  28. given index, 1 buckets are used to store values of a key. */
  29. type hashLongestMatchQuickly struct {
  30. hasherCommon
  31. bucketBits uint
  32. bucketSweep int
  33. hashLen uint
  34. useDictionary bool
  35. buckets []uint32
  36. }
  37. func (h *hashLongestMatchQuickly) Initialize(params *encoderParams) {
  38. h.buckets = make([]uint32, 1<<h.bucketBits+h.bucketSweep)
  39. }
  40. func (h *hashLongestMatchQuickly) Prepare(one_shot bool, input_size uint, data []byte) {
  41. var partial_prepare_threshold uint = (4 << h.bucketBits) >> 7
  42. /* Partial preparation is 100 times slower (per socket). */
  43. if one_shot && input_size <= partial_prepare_threshold {
  44. var i uint
  45. for i = 0; i < input_size; i++ {
  46. var key uint32 = h.HashBytes(data[i:])
  47. for j := 0; j < h.bucketSweep; j++ {
  48. h.buckets[key+uint32(j)] = 0
  49. }
  50. }
  51. } else {
  52. /* It is not strictly necessary to fill this buffer here, but
  53. not filling will make the results of the compression stochastic
  54. (but correct). This is because random data would cause the
  55. system to find accidentally good backward references here and there. */
  56. for i := range h.buckets {
  57. h.buckets[i] = 0
  58. }
  59. }
  60. }
  61. /* Look at 5 bytes at &data[ix & mask].
  62. Compute a hash from these, and store the value somewhere within
  63. [ix .. ix+3]. */
  64. func (h *hashLongestMatchQuickly) Store(data []byte, mask uint, ix uint) {
  65. var key uint32 = h.HashBytes(data[ix&mask:])
  66. var off uint32 = uint32(ix>>3) % uint32(h.bucketSweep)
  67. /* Wiggle the value with the bucket sweep range. */
  68. h.buckets[key+off] = uint32(ix)
  69. }
  70. func (h *hashLongestMatchQuickly) StoreRange(data []byte, mask uint, ix_start uint, ix_end uint) {
  71. var i uint
  72. for i = ix_start; i < ix_end; i++ {
  73. h.Store(data, mask, i)
  74. }
  75. }
  76. func (h *hashLongestMatchQuickly) StitchToPreviousBlock(num_bytes uint, position uint, ringbuffer []byte, ringbuffer_mask uint) {
  77. if num_bytes >= h.HashTypeLength()-1 && position >= 3 {
  78. /* Prepare the hashes for three last bytes of the last write.
  79. These could not be calculated before, since they require knowledge
  80. of both the previous and the current block. */
  81. h.Store(ringbuffer, ringbuffer_mask, position-3)
  82. h.Store(ringbuffer, ringbuffer_mask, position-2)
  83. h.Store(ringbuffer, ringbuffer_mask, position-1)
  84. }
  85. }
  86. func (*hashLongestMatchQuickly) PrepareDistanceCache(distance_cache []int) {
  87. }
  88. /* Find a longest backward match of &data[cur_ix & ring_buffer_mask]
  89. up to the length of max_length and stores the position cur_ix in the
  90. hash table.
  91. Does not look for matches longer than max_length.
  92. Does not look for matches further away than max_backward.
  93. Writes the best match into |out|.
  94. |out|->score is updated only if a better match is found. */
  95. func (h *hashLongestMatchQuickly) FindLongestMatch(dictionary *encoderDictionary, data []byte, ring_buffer_mask uint, distance_cache []int, cur_ix uint, max_length uint, max_backward uint, gap uint, max_distance uint, out *hasherSearchResult) {
  96. var best_len_in uint = out.len
  97. var cur_ix_masked uint = cur_ix & ring_buffer_mask
  98. var key uint32 = h.HashBytes(data[cur_ix_masked:])
  99. var compare_char int = int(data[cur_ix_masked+best_len_in])
  100. var min_score uint = out.score
  101. var best_score uint = out.score
  102. var best_len uint = best_len_in
  103. var cached_backward uint = uint(distance_cache[0])
  104. var prev_ix uint = cur_ix - cached_backward
  105. var bucket []uint32
  106. out.len_code_delta = 0
  107. if prev_ix < cur_ix {
  108. prev_ix &= uint(uint32(ring_buffer_mask))
  109. if compare_char == int(data[prev_ix+best_len]) {
  110. var len uint = findMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length)
  111. if len >= 4 {
  112. var score uint = backwardReferenceScoreUsingLastDistance(uint(len))
  113. if best_score < score {
  114. best_score = score
  115. best_len = uint(len)
  116. out.len = uint(len)
  117. out.distance = cached_backward
  118. out.score = best_score
  119. compare_char = int(data[cur_ix_masked+best_len])
  120. if h.bucketSweep == 1 {
  121. h.buckets[key] = uint32(cur_ix)
  122. return
  123. }
  124. }
  125. }
  126. }
  127. }
  128. if h.bucketSweep == 1 {
  129. var backward uint
  130. var len uint
  131. /* Only one to look for, don't bother to prepare for a loop. */
  132. prev_ix = uint(h.buckets[key])
  133. h.buckets[key] = uint32(cur_ix)
  134. backward = cur_ix - prev_ix
  135. prev_ix &= uint(uint32(ring_buffer_mask))
  136. if compare_char != int(data[prev_ix+best_len_in]) {
  137. return
  138. }
  139. if backward == 0 || backward > max_backward {
  140. return
  141. }
  142. len = findMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length)
  143. if len >= 4 {
  144. var score uint = backwardReferenceScore(uint(len), backward)
  145. if best_score < score {
  146. out.len = uint(len)
  147. out.distance = backward
  148. out.score = score
  149. return
  150. }
  151. }
  152. } else {
  153. bucket = h.buckets[key:]
  154. var i int
  155. prev_ix = uint(bucket[0])
  156. bucket = bucket[1:]
  157. for i = 0; i < h.bucketSweep; (func() { i++; tmp3 := bucket; bucket = bucket[1:]; prev_ix = uint(tmp3[0]) })() {
  158. var backward uint = cur_ix - prev_ix
  159. var len uint
  160. prev_ix &= uint(uint32(ring_buffer_mask))
  161. if compare_char != int(data[prev_ix+best_len]) {
  162. continue
  163. }
  164. if backward == 0 || backward > max_backward {
  165. continue
  166. }
  167. len = findMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length)
  168. if len >= 4 {
  169. var score uint = backwardReferenceScore(uint(len), backward)
  170. if best_score < score {
  171. best_score = score
  172. best_len = uint(len)
  173. out.len = best_len
  174. out.distance = backward
  175. out.score = score
  176. compare_char = int(data[cur_ix_masked+best_len])
  177. }
  178. }
  179. }
  180. }
  181. if h.useDictionary && min_score == out.score {
  182. searchInStaticDictionary(dictionary, h, data[cur_ix_masked:], max_length, max_backward+gap, max_distance, out, true)
  183. }
  184. h.buckets[key+uint32((cur_ix>>3)%uint(h.bucketSweep))] = uint32(cur_ix)
  185. }