123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318 |
- package flate
- import (
- "io"
- "math"
- "sync"
- )
- const (
- maxStatelessBlock = math.MaxInt16
- // dictionary will be taken from maxStatelessBlock, so limit it.
- maxStatelessDict = 8 << 10
- slTableBits = 13
- slTableSize = 1 << slTableBits
- slTableShift = 32 - slTableBits
- )
- type statelessWriter struct {
- dst io.Writer
- closed bool
- }
- func (s *statelessWriter) Close() error {
- if s.closed {
- return nil
- }
- s.closed = true
- // Emit EOF block
- return StatelessDeflate(s.dst, nil, true, nil)
- }
- func (s *statelessWriter) Write(p []byte) (n int, err error) {
- err = StatelessDeflate(s.dst, p, false, nil)
- if err != nil {
- return 0, err
- }
- return len(p), nil
- }
- func (s *statelessWriter) Reset(w io.Writer) {
- s.dst = w
- s.closed = false
- }
- // NewStatelessWriter will do compression but without maintaining any state
- // between Write calls.
- // There will be no memory kept between Write calls,
- // but compression and speed will be suboptimal.
- // Because of this, the size of actual Write calls will affect output size.
- func NewStatelessWriter(dst io.Writer) io.WriteCloser {
- return &statelessWriter{dst: dst}
- }
- // bitWriterPool contains bit writers that can be reused.
- var bitWriterPool = sync.Pool{
- New: func() interface{} {
- return newHuffmanBitWriter(nil)
- },
- }
- // StatelessDeflate allows compressing directly to a Writer without retaining state.
- // When returning everything will be flushed.
- // Up to 8KB of an optional dictionary can be given which is presumed to precede the block.
- // Longer dictionaries will be truncated and will still produce valid output.
- // Sending nil dictionary is perfectly fine.
- func StatelessDeflate(out io.Writer, in []byte, eof bool, dict []byte) error {
- var dst tokens
- bw := bitWriterPool.Get().(*huffmanBitWriter)
- bw.reset(out)
- defer func() {
- // don't keep a reference to our output
- bw.reset(nil)
- bitWriterPool.Put(bw)
- }()
- if eof && len(in) == 0 {
- // Just write an EOF block.
- // Could be faster...
- bw.writeStoredHeader(0, true)
- bw.flush()
- return bw.err
- }
- // Truncate dict
- if len(dict) > maxStatelessDict {
- dict = dict[len(dict)-maxStatelessDict:]
- }
- // For subsequent loops, keep shallow dict reference to avoid alloc+copy.
- var inDict []byte
- for len(in) > 0 {
- todo := in
- if len(inDict) > 0 {
- if len(todo) > maxStatelessBlock-maxStatelessDict {
- todo = todo[:maxStatelessBlock-maxStatelessDict]
- }
- } else if len(todo) > maxStatelessBlock-len(dict) {
- todo = todo[:maxStatelessBlock-len(dict)]
- }
- inOrg := in
- in = in[len(todo):]
- uncompressed := todo
- if len(dict) > 0 {
- // combine dict and source
- bufLen := len(todo) + len(dict)
- combined := make([]byte, bufLen)
- copy(combined, dict)
- copy(combined[len(dict):], todo)
- todo = combined
- }
- // Compress
- if len(inDict) == 0 {
- statelessEnc(&dst, todo, int16(len(dict)))
- } else {
- statelessEnc(&dst, inDict[:maxStatelessDict+len(todo)], maxStatelessDict)
- }
- isEof := eof && len(in) == 0
- if dst.n == 0 {
- bw.writeStoredHeader(len(uncompressed), isEof)
- if bw.err != nil {
- return bw.err
- }
- bw.writeBytes(uncompressed)
- } else if int(dst.n) > len(uncompressed)-len(uncompressed)>>4 {
- // If we removed less than 1/16th, huffman compress the block.
- bw.writeBlockHuff(isEof, uncompressed, len(in) == 0)
- } else {
- bw.writeBlockDynamic(&dst, isEof, uncompressed, len(in) == 0)
- }
- if len(in) > 0 {
- // Retain a dict if we have more
- inDict = inOrg[len(uncompressed)-maxStatelessDict:]
- dict = nil
- dst.Reset()
- }
- if bw.err != nil {
- return bw.err
- }
- }
- if !eof {
- // Align, only a stored block can do that.
- bw.writeStoredHeader(0, false)
- }
- bw.flush()
- return bw.err
- }
- func hashSL(u uint32) uint32 {
- return (u * 0x1e35a7bd) >> slTableShift
- }
- func load3216(b []byte, i int16) uint32 {
- // Help the compiler eliminate bounds checks on the read so it can be done in a single read.
- b = b[i:]
- b = b[:4]
- return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
- }
- func load6416(b []byte, i int16) uint64 {
- // Help the compiler eliminate bounds checks on the read so it can be done in a single read.
- b = b[i:]
- b = b[:8]
- return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
- uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
- }
- func statelessEnc(dst *tokens, src []byte, startAt int16) {
- const (
- inputMargin = 12 - 1
- minNonLiteralBlockSize = 1 + 1 + inputMargin
- )
- type tableEntry struct {
- offset int16
- }
- var table [slTableSize]tableEntry
- // This check isn't in the Snappy implementation, but there, the caller
- // instead of the callee handles this case.
- if len(src)-int(startAt) < minNonLiteralBlockSize {
- // We do not fill the token table.
- // This will be picked up by caller.
- dst.n = 0
- return
- }
- // Index until startAt
- if startAt > 0 {
- cv := load3232(src, 0)
- for i := int16(0); i < startAt; i++ {
- table[hashSL(cv)] = tableEntry{offset: i}
- cv = (cv >> 8) | (uint32(src[i+4]) << 24)
- }
- }
- s := startAt + 1
- nextEmit := startAt
- // sLimit is when to stop looking for offset/length copies. The inputMargin
- // lets us use a fast path for emitLiteral in the main loop, while we are
- // looking for copies.
- sLimit := int16(len(src) - inputMargin)
- // nextEmit is where in src the next emitLiteral should start from.
- cv := load3216(src, s)
- for {
- const skipLog = 5
- const doEvery = 2
- nextS := s
- var candidate tableEntry
- for {
- nextHash := hashSL(cv)
- candidate = table[nextHash]
- nextS = s + doEvery + (s-nextEmit)>>skipLog
- if nextS > sLimit || nextS <= 0 {
- goto emitRemainder
- }
- now := load6416(src, nextS)
- table[nextHash] = tableEntry{offset: s}
- nextHash = hashSL(uint32(now))
- if cv == load3216(src, candidate.offset) {
- table[nextHash] = tableEntry{offset: nextS}
- break
- }
- // Do one right away...
- cv = uint32(now)
- s = nextS
- nextS++
- candidate = table[nextHash]
- now >>= 8
- table[nextHash] = tableEntry{offset: s}
- if cv == load3216(src, candidate.offset) {
- table[nextHash] = tableEntry{offset: nextS}
- break
- }
- cv = uint32(now)
- s = nextS
- }
- // A 4-byte match has been found. We'll later see if more than 4 bytes
- // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
- // them as literal bytes.
- for {
- // Invariant: we have a 4-byte match at s, and no need to emit any
- // literal bytes prior to s.
- // Extend the 4-byte match as long as possible.
- t := candidate.offset
- l := int16(matchLen(src[s+4:], src[t+4:]) + 4)
- // Extend backwards
- for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
- s--
- t--
- l++
- }
- if nextEmit < s {
- if false {
- emitLiteral(dst, src[nextEmit:s])
- } else {
- for _, v := range src[nextEmit:s] {
- dst.tokens[dst.n] = token(v)
- dst.litHist[v]++
- dst.n++
- }
- }
- }
- // Save the match found
- dst.AddMatchLong(int32(l), uint32(s-t-baseMatchOffset))
- s += l
- nextEmit = s
- if nextS >= s {
- s = nextS + 1
- }
- if s >= sLimit {
- goto emitRemainder
- }
- // We could immediately start working at s now, but to improve
- // compression we first update the hash table at s-2 and at s. If
- // another emitCopy is not our next move, also calculate nextHash
- // at s+1. At least on GOARCH=amd64, these three hash calculations
- // are faster as one load64 call (with some shifts) instead of
- // three load32 calls.
- x := load6416(src, s-2)
- o := s - 2
- prevHash := hashSL(uint32(x))
- table[prevHash] = tableEntry{offset: o}
- x >>= 16
- currHash := hashSL(uint32(x))
- candidate = table[currHash]
- table[currHash] = tableEntry{offset: o + 2}
- if uint32(x) != load3216(src, candidate.offset) {
- cv = uint32(x >> 8)
- s++
- break
- }
- }
- }
- emitRemainder:
- if int(nextEmit) < len(src) {
- // If nothing was added, don't encode literals.
- if dst.n == 0 {
- return
- }
- emitLiteral(dst, src[nextEmit:])
- }
- }
|