matchfinder.go 2.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103
  1. // The matchfinder package defines reusable components for data compression.
  2. //
  3. // Many compression libraries have two main parts:
  4. // - Something that looks for repeated sequences of bytes
  5. // - An encoder for the compressed data format (often an entropy coder)
  6. //
  7. // Although these are logically two separate steps, the implementations are
  8. // usually closely tied together. You can't use flate's matcher with snappy's
  9. // encoder, for example. This package defines interfaces and an intermediate
  10. // representation to allow mixing and matching compression components.
  11. package matchfinder
  12. import "io"
  13. // A Match is the basic unit of LZ77 compression.
  14. type Match struct {
  15. Unmatched int // the number of unmatched bytes since the previous match
  16. Length int // the number of bytes in the matched string; it may be 0 at the end of the input
  17. Distance int // how far back in the stream to copy from
  18. }
  19. // A MatchFinder performs the LZ77 stage of compression, looking for matches.
  20. type MatchFinder interface {
  21. // FindMatches looks for matches in src, appends them to dst, and returns dst.
  22. FindMatches(dst []Match, src []byte) []Match
  23. // Reset clears any internal state, preparing the MatchFinder to be used with
  24. // a new stream.
  25. Reset()
  26. }
  27. // An Encoder encodes the data in its final format.
  28. type Encoder interface {
  29. // Encode appends the encoded format of src to dst, using the match
  30. // information from matches.
  31. Encode(dst []byte, src []byte, matches []Match, lastBlock bool) []byte
  32. // Reset clears any internal state, preparing the Encoder to be used with
  33. // a new stream.
  34. Reset()
  35. }
  36. // A Writer uses MatchFinder and Encoder to write compressed data to Dest.
  37. type Writer struct {
  38. Dest io.Writer
  39. MatchFinder MatchFinder
  40. Encoder Encoder
  41. // BlockSize is the number of bytes to compress at a time. If it is zero,
  42. // each Write operation will be treated as one block.
  43. BlockSize int
  44. err error
  45. inBuf []byte
  46. outBuf []byte
  47. matches []Match
  48. }
  49. func (w *Writer) Write(p []byte) (n int, err error) {
  50. if w.err != nil {
  51. return 0, w.err
  52. }
  53. if w.BlockSize == 0 {
  54. return w.writeBlock(p, false)
  55. }
  56. w.inBuf = append(w.inBuf, p...)
  57. var pos int
  58. for pos = 0; pos+w.BlockSize <= len(w.inBuf) && w.err == nil; pos += w.BlockSize {
  59. w.writeBlock(w.inBuf[pos:pos+w.BlockSize], false)
  60. }
  61. if pos > 0 {
  62. n := copy(w.inBuf, w.inBuf[pos:])
  63. w.inBuf = w.inBuf[:n]
  64. }
  65. return len(p), w.err
  66. }
  67. func (w *Writer) writeBlock(p []byte, lastBlock bool) (n int, err error) {
  68. w.outBuf = w.outBuf[:0]
  69. w.matches = w.MatchFinder.FindMatches(w.matches[:0], p)
  70. w.outBuf = w.Encoder.Encode(w.outBuf, p, w.matches, lastBlock)
  71. _, w.err = w.Dest.Write(w.outBuf)
  72. return len(p), w.err
  73. }
  74. func (w *Writer) Close() error {
  75. w.writeBlock(w.inBuf, true)
  76. w.inBuf = w.inBuf[:0]
  77. return w.err
  78. }
  79. func (w *Writer) Reset(newDest io.Writer) {
  80. w.MatchFinder.Reset()
  81. w.Encoder.Reset()
  82. w.err = nil
  83. w.inBuf = w.inBuf[:0]
  84. w.outBuf = w.outBuf[:0]
  85. w.matches = w.matches[:0]
  86. w.Dest = newDest
  87. }