dict_decoder.go 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184
  1. // Copyright 2016 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package flate
  5. // dictDecoder implements the LZ77 sliding dictionary as used in decompression.
  6. // LZ77 decompresses data through sequences of two forms of commands:
  7. //
  8. // - Literal insertions: Runs of one or more symbols are inserted into the data
  9. // stream as is. This is accomplished through the writeByte method for a
  10. // single symbol, or combinations of writeSlice/writeMark for multiple symbols.
  11. // Any valid stream must start with a literal insertion if no preset dictionary
  12. // is used.
  13. //
  14. // - Backward copies: Runs of one or more symbols are copied from previously
  15. // emitted data. Backward copies come as the tuple (dist, length) where dist
  16. // determines how far back in the stream to copy from and length determines how
  17. // many bytes to copy. Note that it is valid for the length to be greater than
  18. // the distance. Since LZ77 uses forward copies, that situation is used to
  19. // perform a form of run-length encoding on repeated runs of symbols.
  20. // The writeCopy and tryWriteCopy are used to implement this command.
  21. //
  22. // For performance reasons, this implementation performs little to no sanity
  23. // checks about the arguments. As such, the invariants documented for each
  24. // method call must be respected.
  25. type dictDecoder struct {
  26. hist []byte // Sliding window history
  27. // Invariant: 0 <= rdPos <= wrPos <= len(hist)
  28. wrPos int // Current output position in buffer
  29. rdPos int // Have emitted hist[:rdPos] already
  30. full bool // Has a full window length been written yet?
  31. }
  32. // init initializes dictDecoder to have a sliding window dictionary of the given
  33. // size. If a preset dict is provided, it will initialize the dictionary with
  34. // the contents of dict.
  35. func (dd *dictDecoder) init(size int, dict []byte) {
  36. *dd = dictDecoder{hist: dd.hist}
  37. if cap(dd.hist) < size {
  38. dd.hist = make([]byte, size)
  39. }
  40. dd.hist = dd.hist[:size]
  41. if len(dict) > len(dd.hist) {
  42. dict = dict[len(dict)-len(dd.hist):]
  43. }
  44. dd.wrPos = copy(dd.hist, dict)
  45. if dd.wrPos == len(dd.hist) {
  46. dd.wrPos = 0
  47. dd.full = true
  48. }
  49. dd.rdPos = dd.wrPos
  50. }
  51. // histSize reports the total amount of historical data in the dictionary.
  52. func (dd *dictDecoder) histSize() int {
  53. if dd.full {
  54. return len(dd.hist)
  55. }
  56. return dd.wrPos
  57. }
  58. // availRead reports the number of bytes that can be flushed by readFlush.
  59. func (dd *dictDecoder) availRead() int {
  60. return dd.wrPos - dd.rdPos
  61. }
  62. // availWrite reports the available amount of output buffer space.
  63. func (dd *dictDecoder) availWrite() int {
  64. return len(dd.hist) - dd.wrPos
  65. }
  66. // writeSlice returns a slice of the available buffer to write data to.
  67. //
  68. // This invariant will be kept: len(s) <= availWrite()
  69. func (dd *dictDecoder) writeSlice() []byte {
  70. return dd.hist[dd.wrPos:]
  71. }
  72. // writeMark advances the writer pointer by cnt.
  73. //
  74. // This invariant must be kept: 0 <= cnt <= availWrite()
  75. func (dd *dictDecoder) writeMark(cnt int) {
  76. dd.wrPos += cnt
  77. }
  78. // writeByte writes a single byte to the dictionary.
  79. //
  80. // This invariant must be kept: 0 < availWrite()
  81. func (dd *dictDecoder) writeByte(c byte) {
  82. dd.hist[dd.wrPos] = c
  83. dd.wrPos++
  84. }
  85. // writeCopy copies a string at a given (dist, length) to the output.
  86. // This returns the number of bytes copied and may be less than the requested
  87. // length if the available space in the output buffer is too small.
  88. //
  89. // This invariant must be kept: 0 < dist <= histSize()
  90. func (dd *dictDecoder) writeCopy(dist, length int) int {
  91. dstBase := dd.wrPos
  92. dstPos := dstBase
  93. srcPos := dstPos - dist
  94. endPos := dstPos + length
  95. if endPos > len(dd.hist) {
  96. endPos = len(dd.hist)
  97. }
  98. // Copy non-overlapping section after destination position.
  99. //
  100. // This section is non-overlapping in that the copy length for this section
  101. // is always less than or equal to the backwards distance. This can occur
  102. // if a distance refers to data that wraps-around in the buffer.
  103. // Thus, a backwards copy is performed here; that is, the exact bytes in
  104. // the source prior to the copy is placed in the destination.
  105. if srcPos < 0 {
  106. srcPos += len(dd.hist)
  107. dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:])
  108. srcPos = 0
  109. }
  110. // Copy possibly overlapping section before destination position.
  111. //
  112. // This section can overlap if the copy length for this section is larger
  113. // than the backwards distance. This is allowed by LZ77 so that repeated
  114. // strings can be succinctly represented using (dist, length) pairs.
  115. // Thus, a forwards copy is performed here; that is, the bytes copied is
  116. // possibly dependent on the resulting bytes in the destination as the copy
  117. // progresses along. This is functionally equivalent to the following:
  118. //
  119. // for i := 0; i < endPos-dstPos; i++ {
  120. // dd.hist[dstPos+i] = dd.hist[srcPos+i]
  121. // }
  122. // dstPos = endPos
  123. //
  124. for dstPos < endPos {
  125. dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:dstPos])
  126. }
  127. dd.wrPos = dstPos
  128. return dstPos - dstBase
  129. }
  130. // tryWriteCopy tries to copy a string at a given (distance, length) to the
  131. // output. This specialized version is optimized for short distances.
  132. //
  133. // This method is designed to be inlined for performance reasons.
  134. //
  135. // This invariant must be kept: 0 < dist <= histSize()
  136. func (dd *dictDecoder) tryWriteCopy(dist, length int) int {
  137. dstPos := dd.wrPos
  138. endPos := dstPos + length
  139. if dstPos < dist || endPos > len(dd.hist) {
  140. return 0
  141. }
  142. dstBase := dstPos
  143. srcPos := dstPos - dist
  144. // Copy possibly overlapping section before destination position.
  145. loop:
  146. dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:dstPos])
  147. if dstPos < endPos {
  148. goto loop // Avoid for-loop so that this function can be inlined
  149. }
  150. dd.wrPos = dstPos
  151. return dstPos - dstBase
  152. }
  153. // readFlush returns a slice of the historical buffer that is ready to be
  154. // emitted to the user. The data returned by readFlush must be fully consumed
  155. // before calling any other dictDecoder methods.
  156. func (dd *dictDecoder) readFlush() []byte {
  157. toRead := dd.hist[dd.rdPos:dd.wrPos]
  158. dd.rdPos = dd.wrPos
  159. if dd.wrPos == len(dd.hist) {
  160. dd.wrPos, dd.rdPos = 0, 0
  161. dd.full = true
  162. }
  163. return toRead
  164. }