123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184 |
- // Copyright 2016 The Go Authors. All rights reserved.
- // Use of this source code is governed by a BSD-style
- // license that can be found in the LICENSE file.
- package flate
- // dictDecoder implements the LZ77 sliding dictionary as used in decompression.
- // LZ77 decompresses data through sequences of two forms of commands:
- //
- // - Literal insertions: Runs of one or more symbols are inserted into the data
- // stream as is. This is accomplished through the writeByte method for a
- // single symbol, or combinations of writeSlice/writeMark for multiple symbols.
- // Any valid stream must start with a literal insertion if no preset dictionary
- // is used.
- //
- // - Backward copies: Runs of one or more symbols are copied from previously
- // emitted data. Backward copies come as the tuple (dist, length) where dist
- // determines how far back in the stream to copy from and length determines how
- // many bytes to copy. Note that it is valid for the length to be greater than
- // the distance. Since LZ77 uses forward copies, that situation is used to
- // perform a form of run-length encoding on repeated runs of symbols.
- // The writeCopy and tryWriteCopy are used to implement this command.
- //
- // For performance reasons, this implementation performs little to no sanity
- // checks about the arguments. As such, the invariants documented for each
- // method call must be respected.
- type dictDecoder struct {
- hist []byte // Sliding window history
- // Invariant: 0 <= rdPos <= wrPos <= len(hist)
- wrPos int // Current output position in buffer
- rdPos int // Have emitted hist[:rdPos] already
- full bool // Has a full window length been written yet?
- }
- // init initializes dictDecoder to have a sliding window dictionary of the given
- // size. If a preset dict is provided, it will initialize the dictionary with
- // the contents of dict.
- func (dd *dictDecoder) init(size int, dict []byte) {
- *dd = dictDecoder{hist: dd.hist}
- if cap(dd.hist) < size {
- dd.hist = make([]byte, size)
- }
- dd.hist = dd.hist[:size]
- if len(dict) > len(dd.hist) {
- dict = dict[len(dict)-len(dd.hist):]
- }
- dd.wrPos = copy(dd.hist, dict)
- if dd.wrPos == len(dd.hist) {
- dd.wrPos = 0
- dd.full = true
- }
- dd.rdPos = dd.wrPos
- }
- // histSize reports the total amount of historical data in the dictionary.
- func (dd *dictDecoder) histSize() int {
- if dd.full {
- return len(dd.hist)
- }
- return dd.wrPos
- }
- // availRead reports the number of bytes that can be flushed by readFlush.
- func (dd *dictDecoder) availRead() int {
- return dd.wrPos - dd.rdPos
- }
- // availWrite reports the available amount of output buffer space.
- func (dd *dictDecoder) availWrite() int {
- return len(dd.hist) - dd.wrPos
- }
- // writeSlice returns a slice of the available buffer to write data to.
- //
- // This invariant will be kept: len(s) <= availWrite()
- func (dd *dictDecoder) writeSlice() []byte {
- return dd.hist[dd.wrPos:]
- }
- // writeMark advances the writer pointer by cnt.
- //
- // This invariant must be kept: 0 <= cnt <= availWrite()
- func (dd *dictDecoder) writeMark(cnt int) {
- dd.wrPos += cnt
- }
- // writeByte writes a single byte to the dictionary.
- //
- // This invariant must be kept: 0 < availWrite()
- func (dd *dictDecoder) writeByte(c byte) {
- dd.hist[dd.wrPos] = c
- dd.wrPos++
- }
- // writeCopy copies a string at a given (dist, length) to the output.
- // This returns the number of bytes copied and may be less than the requested
- // length if the available space in the output buffer is too small.
- //
- // This invariant must be kept: 0 < dist <= histSize()
- func (dd *dictDecoder) writeCopy(dist, length int) int {
- dstBase := dd.wrPos
- dstPos := dstBase
- srcPos := dstPos - dist
- endPos := dstPos + length
- if endPos > len(dd.hist) {
- endPos = len(dd.hist)
- }
- // Copy non-overlapping section after destination position.
- //
- // This section is non-overlapping in that the copy length for this section
- // is always less than or equal to the backwards distance. This can occur
- // if a distance refers to data that wraps-around in the buffer.
- // Thus, a backwards copy is performed here; that is, the exact bytes in
- // the source prior to the copy is placed in the destination.
- if srcPos < 0 {
- srcPos += len(dd.hist)
- dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:])
- srcPos = 0
- }
- // Copy possibly overlapping section before destination position.
- //
- // This section can overlap if the copy length for this section is larger
- // than the backwards distance. This is allowed by LZ77 so that repeated
- // strings can be succinctly represented using (dist, length) pairs.
- // Thus, a forwards copy is performed here; that is, the bytes copied is
- // possibly dependent on the resulting bytes in the destination as the copy
- // progresses along. This is functionally equivalent to the following:
- //
- // for i := 0; i < endPos-dstPos; i++ {
- // dd.hist[dstPos+i] = dd.hist[srcPos+i]
- // }
- // dstPos = endPos
- //
- for dstPos < endPos {
- dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:dstPos])
- }
- dd.wrPos = dstPos
- return dstPos - dstBase
- }
- // tryWriteCopy tries to copy a string at a given (distance, length) to the
- // output. This specialized version is optimized for short distances.
- //
- // This method is designed to be inlined for performance reasons.
- //
- // This invariant must be kept: 0 < dist <= histSize()
- func (dd *dictDecoder) tryWriteCopy(dist, length int) int {
- dstPos := dd.wrPos
- endPos := dstPos + length
- if dstPos < dist || endPos > len(dd.hist) {
- return 0
- }
- dstBase := dstPos
- srcPos := dstPos - dist
- // Copy possibly overlapping section before destination position.
- loop:
- dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:dstPos])
- if dstPos < endPos {
- goto loop // Avoid for-loop so that this function can be inlined
- }
- dd.wrPos = dstPos
- return dstPos - dstBase
- }
- // readFlush returns a slice of the historical buffer that is ready to be
- // emitted to the user. The data returned by readFlush must be fully consumed
- // before calling any other dictDecoder methods.
- func (dd *dictDecoder) readFlush() []byte {
- toRead := dd.hist[dd.rdPos:dd.wrPos]
- dd.rdPos = dd.wrPos
- if dd.wrPos == len(dd.hist) {
- dd.wrPos, dd.rdPos = 0, 0
- dd.full = true
- }
- return toRead
- }
|