inflate.go 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829
  1. // Copyright 2009 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. // Package flate implements the DEFLATE compressed data format, described in
  5. // RFC 1951. The gzip and zlib packages implement access to DEFLATE-based file
  6. // formats.
  7. package flate
  8. import (
  9. "bufio"
  10. "compress/flate"
  11. "fmt"
  12. "io"
  13. "math/bits"
  14. "sync"
  15. )
  16. const (
  17. maxCodeLen = 16 // max length of Huffman code
  18. maxCodeLenMask = 15 // mask for max length of Huffman code
  19. // The next three numbers come from the RFC section 3.2.7, with the
  20. // additional proviso in section 3.2.5 which implies that distance codes
  21. // 30 and 31 should never occur in compressed data.
  22. maxNumLit = 286
  23. maxNumDist = 30
  24. numCodes = 19 // number of codes in Huffman meta-code
  25. debugDecode = false
  26. )
  27. // Value of length - 3 and extra bits.
  28. type lengthExtra struct {
  29. length, extra uint8
  30. }
  31. var decCodeToLen = [32]lengthExtra{{length: 0x0, extra: 0x0}, {length: 0x1, extra: 0x0}, {length: 0x2, extra: 0x0}, {length: 0x3, extra: 0x0}, {length: 0x4, extra: 0x0}, {length: 0x5, extra: 0x0}, {length: 0x6, extra: 0x0}, {length: 0x7, extra: 0x0}, {length: 0x8, extra: 0x1}, {length: 0xa, extra: 0x1}, {length: 0xc, extra: 0x1}, {length: 0xe, extra: 0x1}, {length: 0x10, extra: 0x2}, {length: 0x14, extra: 0x2}, {length: 0x18, extra: 0x2}, {length: 0x1c, extra: 0x2}, {length: 0x20, extra: 0x3}, {length: 0x28, extra: 0x3}, {length: 0x30, extra: 0x3}, {length: 0x38, extra: 0x3}, {length: 0x40, extra: 0x4}, {length: 0x50, extra: 0x4}, {length: 0x60, extra: 0x4}, {length: 0x70, extra: 0x4}, {length: 0x80, extra: 0x5}, {length: 0xa0, extra: 0x5}, {length: 0xc0, extra: 0x5}, {length: 0xe0, extra: 0x5}, {length: 0xff, extra: 0x0}, {length: 0x0, extra: 0x0}, {length: 0x0, extra: 0x0}, {length: 0x0, extra: 0x0}}
  32. var bitMask32 = [32]uint32{
  33. 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF,
  34. 0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF,
  35. 0x1ffff, 0x3ffff, 0x7FFFF, 0xfFFFF, 0x1fFFFF, 0x3fFFFF, 0x7fFFFF, 0xffFFFF,
  36. 0x1ffFFFF, 0x3ffFFFF, 0x7ffFFFF, 0xfffFFFF, 0x1fffFFFF, 0x3fffFFFF, 0x7fffFFFF,
  37. } // up to 32 bits
  38. // Initialize the fixedHuffmanDecoder only once upon first use.
  39. var fixedOnce sync.Once
  40. var fixedHuffmanDecoder huffmanDecoder
  41. // A CorruptInputError reports the presence of corrupt input at a given offset.
  42. type CorruptInputError = flate.CorruptInputError
  43. // An InternalError reports an error in the flate code itself.
  44. type InternalError string
  45. func (e InternalError) Error() string { return "flate: internal error: " + string(e) }
  46. // A ReadError reports an error encountered while reading input.
  47. //
  48. // Deprecated: No longer returned.
  49. type ReadError = flate.ReadError
  50. // A WriteError reports an error encountered while writing output.
  51. //
  52. // Deprecated: No longer returned.
  53. type WriteError = flate.WriteError
  54. // Resetter resets a ReadCloser returned by NewReader or NewReaderDict to
  55. // to switch to a new underlying Reader. This permits reusing a ReadCloser
  56. // instead of allocating a new one.
  57. type Resetter interface {
  58. // Reset discards any buffered data and resets the Resetter as if it was
  59. // newly initialized with the given reader.
  60. Reset(r io.Reader, dict []byte) error
  61. }
  62. // The data structure for decoding Huffman tables is based on that of
  63. // zlib. There is a lookup table of a fixed bit width (huffmanChunkBits),
  64. // For codes smaller than the table width, there are multiple entries
  65. // (each combination of trailing bits has the same value). For codes
  66. // larger than the table width, the table contains a link to an overflow
  67. // table. The width of each entry in the link table is the maximum code
  68. // size minus the chunk width.
  69. //
  70. // Note that you can do a lookup in the table even without all bits
  71. // filled. Since the extra bits are zero, and the DEFLATE Huffman codes
  72. // have the property that shorter codes come before longer ones, the
  73. // bit length estimate in the result is a lower bound on the actual
  74. // number of bits.
  75. //
  76. // See the following:
  77. // http://www.gzip.org/algorithm.txt
  78. // chunk & 15 is number of bits
  79. // chunk >> 4 is value, including table link
  80. const (
  81. huffmanChunkBits = 9
  82. huffmanNumChunks = 1 << huffmanChunkBits
  83. huffmanCountMask = 15
  84. huffmanValueShift = 4
  85. )
  86. type huffmanDecoder struct {
  87. maxRead int // the maximum number of bits we can read and not overread
  88. chunks *[huffmanNumChunks]uint16 // chunks as described above
  89. links [][]uint16 // overflow links
  90. linkMask uint32 // mask the width of the link table
  91. }
  92. // Initialize Huffman decoding tables from array of code lengths.
  93. // Following this function, h is guaranteed to be initialized into a complete
  94. // tree (i.e., neither over-subscribed nor under-subscribed). The exception is a
  95. // degenerate case where the tree has only a single symbol with length 1. Empty
  96. // trees are permitted.
  97. func (h *huffmanDecoder) init(lengths []int) bool {
  98. // Sanity enables additional runtime tests during Huffman
  99. // table construction. It's intended to be used during
  100. // development to supplement the currently ad-hoc unit tests.
  101. const sanity = false
  102. if h.chunks == nil {
  103. h.chunks = new([huffmanNumChunks]uint16)
  104. }
  105. if h.maxRead != 0 {
  106. *h = huffmanDecoder{chunks: h.chunks, links: h.links}
  107. }
  108. // Count number of codes of each length,
  109. // compute maxRead and max length.
  110. var count [maxCodeLen]int
  111. var min, max int
  112. for _, n := range lengths {
  113. if n == 0 {
  114. continue
  115. }
  116. if min == 0 || n < min {
  117. min = n
  118. }
  119. if n > max {
  120. max = n
  121. }
  122. count[n&maxCodeLenMask]++
  123. }
  124. // Empty tree. The decompressor.huffSym function will fail later if the tree
  125. // is used. Technically, an empty tree is only valid for the HDIST tree and
  126. // not the HCLEN and HLIT tree. However, a stream with an empty HCLEN tree
  127. // is guaranteed to fail since it will attempt to use the tree to decode the
  128. // codes for the HLIT and HDIST trees. Similarly, an empty HLIT tree is
  129. // guaranteed to fail later since the compressed data section must be
  130. // composed of at least one symbol (the end-of-block marker).
  131. if max == 0 {
  132. return true
  133. }
  134. code := 0
  135. var nextcode [maxCodeLen]int
  136. for i := min; i <= max; i++ {
  137. code <<= 1
  138. nextcode[i&maxCodeLenMask] = code
  139. code += count[i&maxCodeLenMask]
  140. }
  141. // Check that the coding is complete (i.e., that we've
  142. // assigned all 2-to-the-max possible bit sequences).
  143. // Exception: To be compatible with zlib, we also need to
  144. // accept degenerate single-code codings. See also
  145. // TestDegenerateHuffmanCoding.
  146. if code != 1<<uint(max) && !(code == 1 && max == 1) {
  147. if debugDecode {
  148. fmt.Println("coding failed, code, max:", code, max, code == 1<<uint(max), code == 1 && max == 1, "(one should be true)")
  149. }
  150. return false
  151. }
  152. h.maxRead = min
  153. chunks := h.chunks[:]
  154. for i := range chunks {
  155. chunks[i] = 0
  156. }
  157. if max > huffmanChunkBits {
  158. numLinks := 1 << (uint(max) - huffmanChunkBits)
  159. h.linkMask = uint32(numLinks - 1)
  160. // create link tables
  161. link := nextcode[huffmanChunkBits+1] >> 1
  162. if cap(h.links) < huffmanNumChunks-link {
  163. h.links = make([][]uint16, huffmanNumChunks-link)
  164. } else {
  165. h.links = h.links[:huffmanNumChunks-link]
  166. }
  167. for j := uint(link); j < huffmanNumChunks; j++ {
  168. reverse := int(bits.Reverse16(uint16(j)))
  169. reverse >>= uint(16 - huffmanChunkBits)
  170. off := j - uint(link)
  171. if sanity && h.chunks[reverse] != 0 {
  172. panic("impossible: overwriting existing chunk")
  173. }
  174. h.chunks[reverse] = uint16(off<<huffmanValueShift | (huffmanChunkBits + 1))
  175. if cap(h.links[off]) < numLinks {
  176. h.links[off] = make([]uint16, numLinks)
  177. } else {
  178. h.links[off] = h.links[off][:numLinks]
  179. }
  180. }
  181. } else {
  182. h.links = h.links[:0]
  183. }
  184. for i, n := range lengths {
  185. if n == 0 {
  186. continue
  187. }
  188. code := nextcode[n]
  189. nextcode[n]++
  190. chunk := uint16(i<<huffmanValueShift | n)
  191. reverse := int(bits.Reverse16(uint16(code)))
  192. reverse >>= uint(16 - n)
  193. if n <= huffmanChunkBits {
  194. for off := reverse; off < len(h.chunks); off += 1 << uint(n) {
  195. // We should never need to overwrite
  196. // an existing chunk. Also, 0 is
  197. // never a valid chunk, because the
  198. // lower 4 "count" bits should be
  199. // between 1 and 15.
  200. if sanity && h.chunks[off] != 0 {
  201. panic("impossible: overwriting existing chunk")
  202. }
  203. h.chunks[off] = chunk
  204. }
  205. } else {
  206. j := reverse & (huffmanNumChunks - 1)
  207. if sanity && h.chunks[j]&huffmanCountMask != huffmanChunkBits+1 {
  208. // Longer codes should have been
  209. // associated with a link table above.
  210. panic("impossible: not an indirect chunk")
  211. }
  212. value := h.chunks[j] >> huffmanValueShift
  213. linktab := h.links[value]
  214. reverse >>= huffmanChunkBits
  215. for off := reverse; off < len(linktab); off += 1 << uint(n-huffmanChunkBits) {
  216. if sanity && linktab[off] != 0 {
  217. panic("impossible: overwriting existing chunk")
  218. }
  219. linktab[off] = chunk
  220. }
  221. }
  222. }
  223. if sanity {
  224. // Above we've sanity checked that we never overwrote
  225. // an existing entry. Here we additionally check that
  226. // we filled the tables completely.
  227. for i, chunk := range h.chunks {
  228. if chunk == 0 {
  229. // As an exception, in the degenerate
  230. // single-code case, we allow odd
  231. // chunks to be missing.
  232. if code == 1 && i%2 == 1 {
  233. continue
  234. }
  235. panic("impossible: missing chunk")
  236. }
  237. }
  238. for _, linktab := range h.links {
  239. for _, chunk := range linktab {
  240. if chunk == 0 {
  241. panic("impossible: missing chunk")
  242. }
  243. }
  244. }
  245. }
  246. return true
  247. }
  248. // Reader is the actual read interface needed by NewReader.
  249. // If the passed in io.Reader does not also have ReadByte,
  250. // the NewReader will introduce its own buffering.
  251. type Reader interface {
  252. io.Reader
  253. io.ByteReader
  254. }
  255. type step uint8
  256. const (
  257. copyData step = iota + 1
  258. nextBlock
  259. huffmanBytesBuffer
  260. huffmanBytesReader
  261. huffmanBufioReader
  262. huffmanStringsReader
  263. huffmanGenericReader
  264. )
  265. // Decompress state.
  266. type decompressor struct {
  267. // Input source.
  268. r Reader
  269. roffset int64
  270. // Huffman decoders for literal/length, distance.
  271. h1, h2 huffmanDecoder
  272. // Length arrays used to define Huffman codes.
  273. bits *[maxNumLit + maxNumDist]int
  274. codebits *[numCodes]int
  275. // Output history, buffer.
  276. dict dictDecoder
  277. // Next step in the decompression,
  278. // and decompression state.
  279. step step
  280. stepState int
  281. err error
  282. toRead []byte
  283. hl, hd *huffmanDecoder
  284. copyLen int
  285. copyDist int
  286. // Temporary buffer (avoids repeated allocation).
  287. buf [4]byte
  288. // Input bits, in top of b.
  289. b uint32
  290. nb uint
  291. final bool
  292. }
  293. func (f *decompressor) nextBlock() {
  294. for f.nb < 1+2 {
  295. if f.err = f.moreBits(); f.err != nil {
  296. return
  297. }
  298. }
  299. f.final = f.b&1 == 1
  300. f.b >>= 1
  301. typ := f.b & 3
  302. f.b >>= 2
  303. f.nb -= 1 + 2
  304. switch typ {
  305. case 0:
  306. f.dataBlock()
  307. if debugDecode {
  308. fmt.Println("stored block")
  309. }
  310. case 1:
  311. // compressed, fixed Huffman tables
  312. f.hl = &fixedHuffmanDecoder
  313. f.hd = nil
  314. f.huffmanBlockDecoder()
  315. if debugDecode {
  316. fmt.Println("predefinied huffman block")
  317. }
  318. case 2:
  319. // compressed, dynamic Huffman tables
  320. if f.err = f.readHuffman(); f.err != nil {
  321. break
  322. }
  323. f.hl = &f.h1
  324. f.hd = &f.h2
  325. f.huffmanBlockDecoder()
  326. if debugDecode {
  327. fmt.Println("dynamic huffman block")
  328. }
  329. default:
  330. // 3 is reserved.
  331. if debugDecode {
  332. fmt.Println("reserved data block encountered")
  333. }
  334. f.err = CorruptInputError(f.roffset)
  335. }
  336. }
  337. func (f *decompressor) Read(b []byte) (int, error) {
  338. for {
  339. if len(f.toRead) > 0 {
  340. n := copy(b, f.toRead)
  341. f.toRead = f.toRead[n:]
  342. if len(f.toRead) == 0 {
  343. return n, f.err
  344. }
  345. return n, nil
  346. }
  347. if f.err != nil {
  348. return 0, f.err
  349. }
  350. f.doStep()
  351. if f.err != nil && len(f.toRead) == 0 {
  352. f.toRead = f.dict.readFlush() // Flush what's left in case of error
  353. }
  354. }
  355. }
  356. // WriteTo implements the io.WriteTo interface for io.Copy and friends.
  357. func (f *decompressor) WriteTo(w io.Writer) (int64, error) {
  358. total := int64(0)
  359. flushed := false
  360. for {
  361. if len(f.toRead) > 0 {
  362. n, err := w.Write(f.toRead)
  363. total += int64(n)
  364. if err != nil {
  365. f.err = err
  366. return total, err
  367. }
  368. if n != len(f.toRead) {
  369. return total, io.ErrShortWrite
  370. }
  371. f.toRead = f.toRead[:0]
  372. }
  373. if f.err != nil && flushed {
  374. if f.err == io.EOF {
  375. return total, nil
  376. }
  377. return total, f.err
  378. }
  379. if f.err == nil {
  380. f.doStep()
  381. }
  382. if len(f.toRead) == 0 && f.err != nil && !flushed {
  383. f.toRead = f.dict.readFlush() // Flush what's left in case of error
  384. flushed = true
  385. }
  386. }
  387. }
  388. func (f *decompressor) Close() error {
  389. if f.err == io.EOF {
  390. return nil
  391. }
  392. return f.err
  393. }
  394. // RFC 1951 section 3.2.7.
  395. // Compression with dynamic Huffman codes
  396. var codeOrder = [...]int{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}
  397. func (f *decompressor) readHuffman() error {
  398. // HLIT[5], HDIST[5], HCLEN[4].
  399. for f.nb < 5+5+4 {
  400. if err := f.moreBits(); err != nil {
  401. return err
  402. }
  403. }
  404. nlit := int(f.b&0x1F) + 257
  405. if nlit > maxNumLit {
  406. if debugDecode {
  407. fmt.Println("nlit > maxNumLit", nlit)
  408. }
  409. return CorruptInputError(f.roffset)
  410. }
  411. f.b >>= 5
  412. ndist := int(f.b&0x1F) + 1
  413. if ndist > maxNumDist {
  414. if debugDecode {
  415. fmt.Println("ndist > maxNumDist", ndist)
  416. }
  417. return CorruptInputError(f.roffset)
  418. }
  419. f.b >>= 5
  420. nclen := int(f.b&0xF) + 4
  421. // numCodes is 19, so nclen is always valid.
  422. f.b >>= 4
  423. f.nb -= 5 + 5 + 4
  424. // (HCLEN+4)*3 bits: code lengths in the magic codeOrder order.
  425. for i := 0; i < nclen; i++ {
  426. for f.nb < 3 {
  427. if err := f.moreBits(); err != nil {
  428. return err
  429. }
  430. }
  431. f.codebits[codeOrder[i]] = int(f.b & 0x7)
  432. f.b >>= 3
  433. f.nb -= 3
  434. }
  435. for i := nclen; i < len(codeOrder); i++ {
  436. f.codebits[codeOrder[i]] = 0
  437. }
  438. if !f.h1.init(f.codebits[0:]) {
  439. if debugDecode {
  440. fmt.Println("init codebits failed")
  441. }
  442. return CorruptInputError(f.roffset)
  443. }
  444. // HLIT + 257 code lengths, HDIST + 1 code lengths,
  445. // using the code length Huffman code.
  446. for i, n := 0, nlit+ndist; i < n; {
  447. x, err := f.huffSym(&f.h1)
  448. if err != nil {
  449. return err
  450. }
  451. if x < 16 {
  452. // Actual length.
  453. f.bits[i] = x
  454. i++
  455. continue
  456. }
  457. // Repeat previous length or zero.
  458. var rep int
  459. var nb uint
  460. var b int
  461. switch x {
  462. default:
  463. return InternalError("unexpected length code")
  464. case 16:
  465. rep = 3
  466. nb = 2
  467. if i == 0 {
  468. if debugDecode {
  469. fmt.Println("i==0")
  470. }
  471. return CorruptInputError(f.roffset)
  472. }
  473. b = f.bits[i-1]
  474. case 17:
  475. rep = 3
  476. nb = 3
  477. b = 0
  478. case 18:
  479. rep = 11
  480. nb = 7
  481. b = 0
  482. }
  483. for f.nb < nb {
  484. if err := f.moreBits(); err != nil {
  485. if debugDecode {
  486. fmt.Println("morebits:", err)
  487. }
  488. return err
  489. }
  490. }
  491. rep += int(f.b & uint32(1<<(nb&regSizeMaskUint32)-1))
  492. f.b >>= nb & regSizeMaskUint32
  493. f.nb -= nb
  494. if i+rep > n {
  495. if debugDecode {
  496. fmt.Println("i+rep > n", i, rep, n)
  497. }
  498. return CorruptInputError(f.roffset)
  499. }
  500. for j := 0; j < rep; j++ {
  501. f.bits[i] = b
  502. i++
  503. }
  504. }
  505. if !f.h1.init(f.bits[0:nlit]) || !f.h2.init(f.bits[nlit:nlit+ndist]) {
  506. if debugDecode {
  507. fmt.Println("init2 failed")
  508. }
  509. return CorruptInputError(f.roffset)
  510. }
  511. // As an optimization, we can initialize the maxRead bits to read at a time
  512. // for the HLIT tree to the length of the EOB marker since we know that
  513. // every block must terminate with one. This preserves the property that
  514. // we never read any extra bytes after the end of the DEFLATE stream.
  515. if f.h1.maxRead < f.bits[endBlockMarker] {
  516. f.h1.maxRead = f.bits[endBlockMarker]
  517. }
  518. if !f.final {
  519. // If not the final block, the smallest block possible is
  520. // a predefined table, BTYPE=01, with a single EOB marker.
  521. // This will take up 3 + 7 bits.
  522. f.h1.maxRead += 10
  523. }
  524. return nil
  525. }
  526. // Copy a single uncompressed data block from input to output.
  527. func (f *decompressor) dataBlock() {
  528. // Uncompressed.
  529. // Discard current half-byte.
  530. left := (f.nb) & 7
  531. f.nb -= left
  532. f.b >>= left
  533. offBytes := f.nb >> 3
  534. // Unfilled values will be overwritten.
  535. f.buf[0] = uint8(f.b)
  536. f.buf[1] = uint8(f.b >> 8)
  537. f.buf[2] = uint8(f.b >> 16)
  538. f.buf[3] = uint8(f.b >> 24)
  539. f.roffset += int64(offBytes)
  540. f.nb, f.b = 0, 0
  541. // Length then ones-complement of length.
  542. nr, err := io.ReadFull(f.r, f.buf[offBytes:4])
  543. f.roffset += int64(nr)
  544. if err != nil {
  545. f.err = noEOF(err)
  546. return
  547. }
  548. n := uint16(f.buf[0]) | uint16(f.buf[1])<<8
  549. nn := uint16(f.buf[2]) | uint16(f.buf[3])<<8
  550. if nn != ^n {
  551. if debugDecode {
  552. ncomp := ^n
  553. fmt.Println("uint16(nn) != uint16(^n)", nn, ncomp)
  554. }
  555. f.err = CorruptInputError(f.roffset)
  556. return
  557. }
  558. if n == 0 {
  559. f.toRead = f.dict.readFlush()
  560. f.finishBlock()
  561. return
  562. }
  563. f.copyLen = int(n)
  564. f.copyData()
  565. }
  566. // copyData copies f.copyLen bytes from the underlying reader into f.hist.
  567. // It pauses for reads when f.hist is full.
  568. func (f *decompressor) copyData() {
  569. buf := f.dict.writeSlice()
  570. if len(buf) > f.copyLen {
  571. buf = buf[:f.copyLen]
  572. }
  573. cnt, err := io.ReadFull(f.r, buf)
  574. f.roffset += int64(cnt)
  575. f.copyLen -= cnt
  576. f.dict.writeMark(cnt)
  577. if err != nil {
  578. f.err = noEOF(err)
  579. return
  580. }
  581. if f.dict.availWrite() == 0 || f.copyLen > 0 {
  582. f.toRead = f.dict.readFlush()
  583. f.step = copyData
  584. return
  585. }
  586. f.finishBlock()
  587. }
  588. func (f *decompressor) finishBlock() {
  589. if f.final {
  590. if f.dict.availRead() > 0 {
  591. f.toRead = f.dict.readFlush()
  592. }
  593. f.err = io.EOF
  594. }
  595. f.step = nextBlock
  596. }
  597. func (f *decompressor) doStep() {
  598. switch f.step {
  599. case copyData:
  600. f.copyData()
  601. case nextBlock:
  602. f.nextBlock()
  603. case huffmanBytesBuffer:
  604. f.huffmanBytesBuffer()
  605. case huffmanBytesReader:
  606. f.huffmanBytesReader()
  607. case huffmanBufioReader:
  608. f.huffmanBufioReader()
  609. case huffmanStringsReader:
  610. f.huffmanStringsReader()
  611. case huffmanGenericReader:
  612. f.huffmanGenericReader()
  613. default:
  614. panic("BUG: unexpected step state")
  615. }
  616. }
  617. // noEOF returns err, unless err == io.EOF, in which case it returns io.ErrUnexpectedEOF.
  618. func noEOF(e error) error {
  619. if e == io.EOF {
  620. return io.ErrUnexpectedEOF
  621. }
  622. return e
  623. }
  624. func (f *decompressor) moreBits() error {
  625. c, err := f.r.ReadByte()
  626. if err != nil {
  627. return noEOF(err)
  628. }
  629. f.roffset++
  630. f.b |= uint32(c) << (f.nb & regSizeMaskUint32)
  631. f.nb += 8
  632. return nil
  633. }
  634. // Read the next Huffman-encoded symbol from f according to h.
  635. func (f *decompressor) huffSym(h *huffmanDecoder) (int, error) {
  636. // Since a huffmanDecoder can be empty or be composed of a degenerate tree
  637. // with single element, huffSym must error on these two edge cases. In both
  638. // cases, the chunks slice will be 0 for the invalid sequence, leading it
  639. // satisfy the n == 0 check below.
  640. n := uint(h.maxRead)
  641. // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
  642. // but is smart enough to keep local variables in registers, so use nb and b,
  643. // inline call to moreBits and reassign b,nb back to f on return.
  644. nb, b := f.nb, f.b
  645. for {
  646. for nb < n {
  647. c, err := f.r.ReadByte()
  648. if err != nil {
  649. f.b = b
  650. f.nb = nb
  651. return 0, noEOF(err)
  652. }
  653. f.roffset++
  654. b |= uint32(c) << (nb & regSizeMaskUint32)
  655. nb += 8
  656. }
  657. chunk := h.chunks[b&(huffmanNumChunks-1)]
  658. n = uint(chunk & huffmanCountMask)
  659. if n > huffmanChunkBits {
  660. chunk = h.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&h.linkMask]
  661. n = uint(chunk & huffmanCountMask)
  662. }
  663. if n <= nb {
  664. if n == 0 {
  665. f.b = b
  666. f.nb = nb
  667. if debugDecode {
  668. fmt.Println("huffsym: n==0")
  669. }
  670. f.err = CorruptInputError(f.roffset)
  671. return 0, f.err
  672. }
  673. f.b = b >> (n & regSizeMaskUint32)
  674. f.nb = nb - n
  675. return int(chunk >> huffmanValueShift), nil
  676. }
  677. }
  678. }
  679. func makeReader(r io.Reader) Reader {
  680. if rr, ok := r.(Reader); ok {
  681. return rr
  682. }
  683. return bufio.NewReader(r)
  684. }
  685. func fixedHuffmanDecoderInit() {
  686. fixedOnce.Do(func() {
  687. // These come from the RFC section 3.2.6.
  688. var bits [288]int
  689. for i := 0; i < 144; i++ {
  690. bits[i] = 8
  691. }
  692. for i := 144; i < 256; i++ {
  693. bits[i] = 9
  694. }
  695. for i := 256; i < 280; i++ {
  696. bits[i] = 7
  697. }
  698. for i := 280; i < 288; i++ {
  699. bits[i] = 8
  700. }
  701. fixedHuffmanDecoder.init(bits[:])
  702. })
  703. }
  704. func (f *decompressor) Reset(r io.Reader, dict []byte) error {
  705. *f = decompressor{
  706. r: makeReader(r),
  707. bits: f.bits,
  708. codebits: f.codebits,
  709. h1: f.h1,
  710. h2: f.h2,
  711. dict: f.dict,
  712. step: nextBlock,
  713. }
  714. f.dict.init(maxMatchOffset, dict)
  715. return nil
  716. }
  717. // NewReader returns a new ReadCloser that can be used
  718. // to read the uncompressed version of r.
  719. // If r does not also implement io.ByteReader,
  720. // the decompressor may read more data than necessary from r.
  721. // It is the caller's responsibility to call Close on the ReadCloser
  722. // when finished reading.
  723. //
  724. // The ReadCloser returned by NewReader also implements Resetter.
  725. func NewReader(r io.Reader) io.ReadCloser {
  726. fixedHuffmanDecoderInit()
  727. var f decompressor
  728. f.r = makeReader(r)
  729. f.bits = new([maxNumLit + maxNumDist]int)
  730. f.codebits = new([numCodes]int)
  731. f.step = nextBlock
  732. f.dict.init(maxMatchOffset, nil)
  733. return &f
  734. }
  735. // NewReaderDict is like NewReader but initializes the reader
  736. // with a preset dictionary. The returned Reader behaves as if
  737. // the uncompressed data stream started with the given dictionary,
  738. // which has already been read. NewReaderDict is typically used
  739. // to read data compressed by NewWriterDict.
  740. //
  741. // The ReadCloser returned by NewReader also implements Resetter.
  742. func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser {
  743. fixedHuffmanDecoderInit()
  744. var f decompressor
  745. f.r = makeReader(r)
  746. f.bits = new([maxNumLit + maxNumDist]int)
  747. f.codebits = new([numCodes]int)
  748. f.step = nextBlock
  749. f.dict.init(maxMatchOffset, dict)
  750. return &f
  751. }