stateless.go 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318
  1. package flate
  2. import (
  3. "io"
  4. "math"
  5. "sync"
  6. )
  7. const (
  8. maxStatelessBlock = math.MaxInt16
  9. // dictionary will be taken from maxStatelessBlock, so limit it.
  10. maxStatelessDict = 8 << 10
  11. slTableBits = 13
  12. slTableSize = 1 << slTableBits
  13. slTableShift = 32 - slTableBits
  14. )
  15. type statelessWriter struct {
  16. dst io.Writer
  17. closed bool
  18. }
  19. func (s *statelessWriter) Close() error {
  20. if s.closed {
  21. return nil
  22. }
  23. s.closed = true
  24. // Emit EOF block
  25. return StatelessDeflate(s.dst, nil, true, nil)
  26. }
  27. func (s *statelessWriter) Write(p []byte) (n int, err error) {
  28. err = StatelessDeflate(s.dst, p, false, nil)
  29. if err != nil {
  30. return 0, err
  31. }
  32. return len(p), nil
  33. }
  34. func (s *statelessWriter) Reset(w io.Writer) {
  35. s.dst = w
  36. s.closed = false
  37. }
  38. // NewStatelessWriter will do compression but without maintaining any state
  39. // between Write calls.
  40. // There will be no memory kept between Write calls,
  41. // but compression and speed will be suboptimal.
  42. // Because of this, the size of actual Write calls will affect output size.
  43. func NewStatelessWriter(dst io.Writer) io.WriteCloser {
  44. return &statelessWriter{dst: dst}
  45. }
  46. // bitWriterPool contains bit writers that can be reused.
  47. var bitWriterPool = sync.Pool{
  48. New: func() interface{} {
  49. return newHuffmanBitWriter(nil)
  50. },
  51. }
  52. // StatelessDeflate allows compressing directly to a Writer without retaining state.
  53. // When returning everything will be flushed.
  54. // Up to 8KB of an optional dictionary can be given which is presumed to precede the block.
  55. // Longer dictionaries will be truncated and will still produce valid output.
  56. // Sending nil dictionary is perfectly fine.
  57. func StatelessDeflate(out io.Writer, in []byte, eof bool, dict []byte) error {
  58. var dst tokens
  59. bw := bitWriterPool.Get().(*huffmanBitWriter)
  60. bw.reset(out)
  61. defer func() {
  62. // don't keep a reference to our output
  63. bw.reset(nil)
  64. bitWriterPool.Put(bw)
  65. }()
  66. if eof && len(in) == 0 {
  67. // Just write an EOF block.
  68. // Could be faster...
  69. bw.writeStoredHeader(0, true)
  70. bw.flush()
  71. return bw.err
  72. }
  73. // Truncate dict
  74. if len(dict) > maxStatelessDict {
  75. dict = dict[len(dict)-maxStatelessDict:]
  76. }
  77. // For subsequent loops, keep shallow dict reference to avoid alloc+copy.
  78. var inDict []byte
  79. for len(in) > 0 {
  80. todo := in
  81. if len(inDict) > 0 {
  82. if len(todo) > maxStatelessBlock-maxStatelessDict {
  83. todo = todo[:maxStatelessBlock-maxStatelessDict]
  84. }
  85. } else if len(todo) > maxStatelessBlock-len(dict) {
  86. todo = todo[:maxStatelessBlock-len(dict)]
  87. }
  88. inOrg := in
  89. in = in[len(todo):]
  90. uncompressed := todo
  91. if len(dict) > 0 {
  92. // combine dict and source
  93. bufLen := len(todo) + len(dict)
  94. combined := make([]byte, bufLen)
  95. copy(combined, dict)
  96. copy(combined[len(dict):], todo)
  97. todo = combined
  98. }
  99. // Compress
  100. if len(inDict) == 0 {
  101. statelessEnc(&dst, todo, int16(len(dict)))
  102. } else {
  103. statelessEnc(&dst, inDict[:maxStatelessDict+len(todo)], maxStatelessDict)
  104. }
  105. isEof := eof && len(in) == 0
  106. if dst.n == 0 {
  107. bw.writeStoredHeader(len(uncompressed), isEof)
  108. if bw.err != nil {
  109. return bw.err
  110. }
  111. bw.writeBytes(uncompressed)
  112. } else if int(dst.n) > len(uncompressed)-len(uncompressed)>>4 {
  113. // If we removed less than 1/16th, huffman compress the block.
  114. bw.writeBlockHuff(isEof, uncompressed, len(in) == 0)
  115. } else {
  116. bw.writeBlockDynamic(&dst, isEof, uncompressed, len(in) == 0)
  117. }
  118. if len(in) > 0 {
  119. // Retain a dict if we have more
  120. inDict = inOrg[len(uncompressed)-maxStatelessDict:]
  121. dict = nil
  122. dst.Reset()
  123. }
  124. if bw.err != nil {
  125. return bw.err
  126. }
  127. }
  128. if !eof {
  129. // Align, only a stored block can do that.
  130. bw.writeStoredHeader(0, false)
  131. }
  132. bw.flush()
  133. return bw.err
  134. }
  135. func hashSL(u uint32) uint32 {
  136. return (u * 0x1e35a7bd) >> slTableShift
  137. }
  138. func load3216(b []byte, i int16) uint32 {
  139. // Help the compiler eliminate bounds checks on the read so it can be done in a single read.
  140. b = b[i:]
  141. b = b[:4]
  142. return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
  143. }
  144. func load6416(b []byte, i int16) uint64 {
  145. // Help the compiler eliminate bounds checks on the read so it can be done in a single read.
  146. b = b[i:]
  147. b = b[:8]
  148. return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
  149. uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
  150. }
  151. func statelessEnc(dst *tokens, src []byte, startAt int16) {
  152. const (
  153. inputMargin = 12 - 1
  154. minNonLiteralBlockSize = 1 + 1 + inputMargin
  155. )
  156. type tableEntry struct {
  157. offset int16
  158. }
  159. var table [slTableSize]tableEntry
  160. // This check isn't in the Snappy implementation, but there, the caller
  161. // instead of the callee handles this case.
  162. if len(src)-int(startAt) < minNonLiteralBlockSize {
  163. // We do not fill the token table.
  164. // This will be picked up by caller.
  165. dst.n = 0
  166. return
  167. }
  168. // Index until startAt
  169. if startAt > 0 {
  170. cv := load3232(src, 0)
  171. for i := int16(0); i < startAt; i++ {
  172. table[hashSL(cv)] = tableEntry{offset: i}
  173. cv = (cv >> 8) | (uint32(src[i+4]) << 24)
  174. }
  175. }
  176. s := startAt + 1
  177. nextEmit := startAt
  178. // sLimit is when to stop looking for offset/length copies. The inputMargin
  179. // lets us use a fast path for emitLiteral in the main loop, while we are
  180. // looking for copies.
  181. sLimit := int16(len(src) - inputMargin)
  182. // nextEmit is where in src the next emitLiteral should start from.
  183. cv := load3216(src, s)
  184. for {
  185. const skipLog = 5
  186. const doEvery = 2
  187. nextS := s
  188. var candidate tableEntry
  189. for {
  190. nextHash := hashSL(cv)
  191. candidate = table[nextHash]
  192. nextS = s + doEvery + (s-nextEmit)>>skipLog
  193. if nextS > sLimit || nextS <= 0 {
  194. goto emitRemainder
  195. }
  196. now := load6416(src, nextS)
  197. table[nextHash] = tableEntry{offset: s}
  198. nextHash = hashSL(uint32(now))
  199. if cv == load3216(src, candidate.offset) {
  200. table[nextHash] = tableEntry{offset: nextS}
  201. break
  202. }
  203. // Do one right away...
  204. cv = uint32(now)
  205. s = nextS
  206. nextS++
  207. candidate = table[nextHash]
  208. now >>= 8
  209. table[nextHash] = tableEntry{offset: s}
  210. if cv == load3216(src, candidate.offset) {
  211. table[nextHash] = tableEntry{offset: nextS}
  212. break
  213. }
  214. cv = uint32(now)
  215. s = nextS
  216. }
  217. // A 4-byte match has been found. We'll later see if more than 4 bytes
  218. // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
  219. // them as literal bytes.
  220. for {
  221. // Invariant: we have a 4-byte match at s, and no need to emit any
  222. // literal bytes prior to s.
  223. // Extend the 4-byte match as long as possible.
  224. t := candidate.offset
  225. l := int16(matchLen(src[s+4:], src[t+4:]) + 4)
  226. // Extend backwards
  227. for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
  228. s--
  229. t--
  230. l++
  231. }
  232. if nextEmit < s {
  233. if false {
  234. emitLiteral(dst, src[nextEmit:s])
  235. } else {
  236. for _, v := range src[nextEmit:s] {
  237. dst.tokens[dst.n] = token(v)
  238. dst.litHist[v]++
  239. dst.n++
  240. }
  241. }
  242. }
  243. // Save the match found
  244. dst.AddMatchLong(int32(l), uint32(s-t-baseMatchOffset))
  245. s += l
  246. nextEmit = s
  247. if nextS >= s {
  248. s = nextS + 1
  249. }
  250. if s >= sLimit {
  251. goto emitRemainder
  252. }
  253. // We could immediately start working at s now, but to improve
  254. // compression we first update the hash table at s-2 and at s. If
  255. // another emitCopy is not our next move, also calculate nextHash
  256. // at s+1. At least on GOARCH=amd64, these three hash calculations
  257. // are faster as one load64 call (with some shifts) instead of
  258. // three load32 calls.
  259. x := load6416(src, s-2)
  260. o := s - 2
  261. prevHash := hashSL(uint32(x))
  262. table[prevHash] = tableEntry{offset: o}
  263. x >>= 16
  264. currHash := hashSL(uint32(x))
  265. candidate = table[currHash]
  266. table[currHash] = tableEntry{offset: o + 2}
  267. if uint32(x) != load3216(src, candidate.offset) {
  268. cv = uint32(x >> 8)
  269. s++
  270. break
  271. }
  272. }
  273. }
  274. emitRemainder:
  275. if int(nextEmit) < len(src) {
  276. // If nothing was added, don't encode literals.
  277. if dst.n == 0 {
  278. return
  279. }
  280. emitLiteral(dst, src[nextEmit:])
  281. }
  282. }