hash_rolling.go 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168
  1. package brotli
  2. /* Copyright 2018 Google Inc. All Rights Reserved.
  3. Distributed under MIT license.
  4. See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
  5. */
  6. /* NOTE: this hasher does not search in the dictionary. It is used as
  7. backup-hasher, the main hasher already searches in it. */
  8. const kRollingHashMul32 uint32 = 69069
  9. const kInvalidPosHashRolling uint32 = 0xffffffff
  10. /* This hasher uses a longer forward length, but returning a higher value here
  11. will hurt compression by the main hasher when combined with a composite
  12. hasher. The hasher tests for forward itself instead. */
  13. func (*hashRolling) HashTypeLength() uint {
  14. return 4
  15. }
  16. func (*hashRolling) StoreLookahead() uint {
  17. return 4
  18. }
  19. /* Computes a code from a single byte. A lookup table of 256 values could be
  20. used, but simply adding 1 works about as good. */
  21. func (*hashRolling) HashByte(b byte) uint32 {
  22. return uint32(b) + 1
  23. }
  24. func (h *hashRolling) HashRollingFunctionInitial(state uint32, add byte, factor uint32) uint32 {
  25. return uint32(factor*state + h.HashByte(add))
  26. }
  27. func (h *hashRolling) HashRollingFunction(state uint32, add byte, rem byte, factor uint32, factor_remove uint32) uint32 {
  28. return uint32(factor*state + h.HashByte(add) - factor_remove*h.HashByte(rem))
  29. }
  30. /* Rolling hash for long distance long string matches. Stores one position
  31. per bucket, bucket key is computed over a long region. */
  32. type hashRolling struct {
  33. hasherCommon
  34. jump int
  35. state uint32
  36. table []uint32
  37. next_ix uint
  38. factor uint32
  39. factor_remove uint32
  40. }
  41. func (h *hashRolling) Initialize(params *encoderParams) {
  42. h.state = 0
  43. h.next_ix = 0
  44. h.factor = kRollingHashMul32
  45. /* Compute the factor of the oldest byte to remove: factor**steps modulo
  46. 0xffffffff (the multiplications rely on 32-bit overflow) */
  47. h.factor_remove = 1
  48. for i := 0; i < 32; i += h.jump {
  49. h.factor_remove *= h.factor
  50. }
  51. h.table = make([]uint32, 16777216)
  52. for i := 0; i < 16777216; i++ {
  53. h.table[i] = kInvalidPosHashRolling
  54. }
  55. }
  56. func (h *hashRolling) Prepare(one_shot bool, input_size uint, data []byte) {
  57. /* Too small size, cannot use this hasher. */
  58. if input_size < 32 {
  59. return
  60. }
  61. h.state = 0
  62. for i := 0; i < 32; i += h.jump {
  63. h.state = h.HashRollingFunctionInitial(h.state, data[i], h.factor)
  64. }
  65. }
  66. func (*hashRolling) Store(data []byte, mask uint, ix uint) {
  67. }
  68. func (*hashRolling) StoreRange(data []byte, mask uint, ix_start uint, ix_end uint) {
  69. }
  70. func (h *hashRolling) StitchToPreviousBlock(num_bytes uint, position uint, ringbuffer []byte, ring_buffer_mask uint) {
  71. var position_masked uint
  72. /* In this case we must re-initialize the hasher from scratch from the
  73. current position. */
  74. var available uint = num_bytes
  75. if position&uint(h.jump-1) != 0 {
  76. var diff uint = uint(h.jump) - (position & uint(h.jump-1))
  77. if diff > available {
  78. available = 0
  79. } else {
  80. available = available - diff
  81. }
  82. position += diff
  83. }
  84. position_masked = position & ring_buffer_mask
  85. /* wrapping around ringbuffer not handled. */
  86. if available > ring_buffer_mask-position_masked {
  87. available = ring_buffer_mask - position_masked
  88. }
  89. h.Prepare(false, available, ringbuffer[position&ring_buffer_mask:])
  90. h.next_ix = position
  91. }
  92. func (*hashRolling) PrepareDistanceCache(distance_cache []int) {
  93. }
  94. func (h *hashRolling) FindLongestMatch(dictionary *encoderDictionary, data []byte, ring_buffer_mask uint, distance_cache []int, cur_ix uint, max_length uint, max_backward uint, gap uint, max_distance uint, out *hasherSearchResult) {
  95. var cur_ix_masked uint = cur_ix & ring_buffer_mask
  96. var pos uint = h.next_ix
  97. if cur_ix&uint(h.jump-1) != 0 {
  98. return
  99. }
  100. /* Not enough lookahead */
  101. if max_length < 32 {
  102. return
  103. }
  104. for pos = h.next_ix; pos <= cur_ix; pos += uint(h.jump) {
  105. var code uint32 = h.state & ((16777216 * 64) - 1)
  106. var rem byte = data[pos&ring_buffer_mask]
  107. var add byte = data[(pos+32)&ring_buffer_mask]
  108. var found_ix uint = uint(kInvalidPosHashRolling)
  109. h.state = h.HashRollingFunction(h.state, add, rem, h.factor, h.factor_remove)
  110. if code < 16777216 {
  111. found_ix = uint(h.table[code])
  112. h.table[code] = uint32(pos)
  113. if pos == cur_ix && uint32(found_ix) != kInvalidPosHashRolling {
  114. /* The cast to 32-bit makes backward distances up to 4GB work even
  115. if cur_ix is above 4GB, despite using 32-bit values in the table. */
  116. var backward uint = uint(uint32(cur_ix - found_ix))
  117. if backward <= max_backward {
  118. var found_ix_masked uint = found_ix & ring_buffer_mask
  119. var len uint = findMatchLengthWithLimit(data[found_ix_masked:], data[cur_ix_masked:], max_length)
  120. if len >= 4 && len > out.len {
  121. var score uint = backwardReferenceScore(uint(len), backward)
  122. if score > out.score {
  123. out.len = uint(len)
  124. out.distance = backward
  125. out.score = score
  126. out.len_code_delta = 0
  127. }
  128. }
  129. }
  130. }
  131. }
  132. }
  133. h.next_ix = cur_ix + uint(h.jump)
  134. }