encoder.go 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168
  1. package brotli
  2. import "github.com/andybalholm/brotli/matchfinder"
  3. // An Encoder implements the matchfinder.Encoder interface, writing in Brotli format.
  4. type Encoder struct {
  5. wroteHeader bool
  6. bw bitWriter
  7. distCache []distanceCode
  8. }
  9. func (e *Encoder) Reset() {
  10. e.wroteHeader = false
  11. e.bw = bitWriter{}
  12. }
  13. func (e *Encoder) Encode(dst []byte, src []byte, matches []matchfinder.Match, lastBlock bool) []byte {
  14. e.bw.dst = dst
  15. if !e.wroteHeader {
  16. e.bw.writeBits(4, 15)
  17. e.wroteHeader = true
  18. }
  19. var literalHisto [256]uint32
  20. var commandHisto [704]uint32
  21. var distanceHisto [64]uint32
  22. literalCount := 0
  23. commandCount := 0
  24. distanceCount := 0
  25. if len(e.distCache) < len(matches) {
  26. e.distCache = make([]distanceCode, len(matches))
  27. }
  28. // first pass: build the histograms
  29. pos := 0
  30. // d is the ring buffer of the last 4 distances.
  31. d := [4]int{-10, -10, -10, -10}
  32. for i, m := range matches {
  33. if m.Unmatched > 0 {
  34. for _, c := range src[pos : pos+m.Unmatched] {
  35. literalHisto[c]++
  36. }
  37. literalCount += m.Unmatched
  38. }
  39. insertCode := getInsertLengthCode(uint(m.Unmatched))
  40. copyCode := getCopyLengthCode(uint(m.Length))
  41. if m.Length == 0 {
  42. // If the stream ends with unmatched bytes, we need a dummy copy length.
  43. copyCode = 2
  44. }
  45. command := combineLengthCodes(insertCode, copyCode, false)
  46. commandHisto[command]++
  47. commandCount++
  48. if command >= 128 && m.Length != 0 {
  49. var distCode distanceCode
  50. switch m.Distance {
  51. case d[3]:
  52. distCode.code = 0
  53. case d[2]:
  54. distCode.code = 1
  55. case d[1]:
  56. distCode.code = 2
  57. case d[0]:
  58. distCode.code = 3
  59. case d[3] - 1:
  60. distCode.code = 4
  61. case d[3] + 1:
  62. distCode.code = 5
  63. case d[3] - 2:
  64. distCode.code = 6
  65. case d[3] + 2:
  66. distCode.code = 7
  67. case d[3] - 3:
  68. distCode.code = 8
  69. case d[3] + 3:
  70. distCode.code = 9
  71. // In my testing, codes 10–15 actually reduced the compression ratio.
  72. default:
  73. distCode = getDistanceCode(m.Distance)
  74. }
  75. e.distCache[i] = distCode
  76. distanceHisto[distCode.code]++
  77. distanceCount++
  78. if distCode.code != 0 {
  79. d[0], d[1], d[2], d[3] = d[1], d[2], d[3], m.Distance
  80. }
  81. }
  82. pos += m.Unmatched + m.Length
  83. }
  84. storeMetaBlockHeaderBW(uint(len(src)), false, &e.bw)
  85. e.bw.writeBits(13, 0)
  86. var literalDepths [256]byte
  87. var literalBits [256]uint16
  88. buildAndStoreHuffmanTreeFastBW(literalHisto[:], uint(literalCount), 8, literalDepths[:], literalBits[:], &e.bw)
  89. var commandDepths [704]byte
  90. var commandBits [704]uint16
  91. buildAndStoreHuffmanTreeFastBW(commandHisto[:], uint(commandCount), 10, commandDepths[:], commandBits[:], &e.bw)
  92. var distanceDepths [64]byte
  93. var distanceBits [64]uint16
  94. buildAndStoreHuffmanTreeFastBW(distanceHisto[:], uint(distanceCount), 6, distanceDepths[:], distanceBits[:], &e.bw)
  95. pos = 0
  96. for i, m := range matches {
  97. insertCode := getInsertLengthCode(uint(m.Unmatched))
  98. copyCode := getCopyLengthCode(uint(m.Length))
  99. if m.Length == 0 {
  100. // If the stream ends with unmatched bytes, we need a dummy copy length.
  101. copyCode = 2
  102. }
  103. command := combineLengthCodes(insertCode, copyCode, false)
  104. e.bw.writeBits(uint(commandDepths[command]), uint64(commandBits[command]))
  105. if kInsExtra[insertCode] > 0 {
  106. e.bw.writeBits(uint(kInsExtra[insertCode]), uint64(m.Unmatched)-uint64(kInsBase[insertCode]))
  107. }
  108. if kCopyExtra[copyCode] > 0 {
  109. e.bw.writeBits(uint(kCopyExtra[copyCode]), uint64(m.Length)-uint64(kCopyBase[copyCode]))
  110. }
  111. if m.Unmatched > 0 {
  112. for _, c := range src[pos : pos+m.Unmatched] {
  113. e.bw.writeBits(uint(literalDepths[c]), uint64(literalBits[c]))
  114. }
  115. }
  116. if command >= 128 && m.Length != 0 {
  117. distCode := e.distCache[i]
  118. e.bw.writeBits(uint(distanceDepths[distCode.code]), uint64(distanceBits[distCode.code]))
  119. if distCode.nExtra > 0 {
  120. e.bw.writeBits(distCode.nExtra, distCode.extraBits)
  121. }
  122. }
  123. pos += m.Unmatched + m.Length
  124. }
  125. if lastBlock {
  126. e.bw.writeBits(2, 3) // islast + isempty
  127. e.bw.jumpToByteBoundary()
  128. }
  129. return e.bw.dst
  130. }
  131. type distanceCode struct {
  132. code int
  133. nExtra uint
  134. extraBits uint64
  135. }
  136. func getDistanceCode(distance int) distanceCode {
  137. d := distance + 3
  138. nbits := log2FloorNonZero(uint(d)) - 1
  139. prefix := (d >> nbits) & 1
  140. offset := (2 + prefix) << nbits
  141. distcode := int(2*(nbits-1)) + prefix + 16
  142. extra := d - offset
  143. return distanceCode{distcode, uint(nbits), uint64(extra)}
  144. }