123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383 |
- // Package fwd provides a buffered reader
- // and writer. Each has methods that help improve
- // the encoding/decoding performance of some binary
- // protocols.
- //
- // The [Writer] and [Reader] type provide similar
- // functionality to their counterparts in [bufio], plus
- // a few extra utility methods that simplify read-ahead
- // and write-ahead. I wrote this package to improve serialization
- // performance for http://github.com/tinylib/msgp,
- // where it provided about a 2x speedup over `bufio` for certain
- // workloads. However, care must be taken to understand the semantics of the
- // extra methods provided by this package, as they allow
- // the user to access and manipulate the buffer memory
- // directly.
- //
- // The extra methods for [Reader] are [Reader.Peek], [Reader.Skip]
- // and [Reader.Next]. (*fwd.Reader).Peek, unlike (*bufio.Reader).Peek,
- // will re-allocate the read buffer in order to accommodate arbitrarily
- // large read-ahead. (*fwd.Reader).Skip skips the next 'n' bytes
- // in the stream, and uses the [io.Seeker] interface if the underlying
- // stream implements it. (*fwd.Reader).Next returns a slice pointing
- // to the next 'n' bytes in the read buffer (like Reader.Peek), but also
- // increments the read position. This allows users to process streams
- // in arbitrary block sizes without having to manage appropriately-sized
- // slices. Additionally, obviating the need to copy the data from the
- // buffer to another location in memory can improve performance dramatically
- // in CPU-bound applications.
- //
- // [Writer] only has one extra method, which is (*fwd.Writer).Next, which
- // returns a slice pointing to the next 'n' bytes of the writer, and increments
- // the write position by the length of the returned slice. This allows users
- // to write directly to the end of the buffer.
- package fwd
- import (
- "io"
- "os"
- )
- const (
- // DefaultReaderSize is the default size of the read buffer
- DefaultReaderSize = 2048
- // minimum read buffer; straight from bufio
- minReaderSize = 16
- )
- // NewReader returns a new *Reader that reads from 'r'
- func NewReader(r io.Reader) *Reader {
- return NewReaderSize(r, DefaultReaderSize)
- }
- // NewReaderSize returns a new *Reader that
- // reads from 'r' and has a buffer size 'n'.
- func NewReaderSize(r io.Reader, n int) *Reader {
- buf := make([]byte, 0, max(n, minReaderSize))
- return NewReaderBuf(r, buf)
- }
- // NewReaderBuf returns a new *Reader that
- // reads from 'r' and uses 'buf' as a buffer.
- // 'buf' is not used when has smaller capacity than 16,
- // custom buffer is allocated instead.
- func NewReaderBuf(r io.Reader, buf []byte) *Reader {
- if cap(buf) < minReaderSize {
- buf = make([]byte, 0, minReaderSize)
- }
- buf = buf[:0]
- rd := &Reader{
- r: r,
- data: buf,
- }
- if s, ok := r.(io.Seeker); ok {
- rd.rs = s
- }
- return rd
- }
- // Reader is a buffered look-ahead reader
- type Reader struct {
- r io.Reader // underlying reader
- // data[n:len(data)] is buffered data; data[len(data):cap(data)] is free buffer space
- data []byte // data
- n int // read offset
- state error // last read error
- // if the reader past to NewReader was
- // also an io.Seeker, this is non-nil
- rs io.Seeker
- }
- // Reset resets the underlying reader
- // and the read buffer.
- func (r *Reader) Reset(rd io.Reader) {
- r.r = rd
- r.data = r.data[0:0]
- r.n = 0
- r.state = nil
- if s, ok := rd.(io.Seeker); ok {
- r.rs = s
- } else {
- r.rs = nil
- }
- }
- // more() does one read on the underlying reader
- func (r *Reader) more() {
- // move data backwards so that
- // the read offset is 0; this way
- // we can supply the maximum number of
- // bytes to the reader
- if r.n != 0 {
- if r.n < len(r.data) {
- r.data = r.data[:copy(r.data[0:], r.data[r.n:])]
- } else {
- r.data = r.data[:0]
- }
- r.n = 0
- }
- var a int
- a, r.state = r.r.Read(r.data[len(r.data):cap(r.data)])
- if a == 0 && r.state == nil {
- r.state = io.ErrNoProgress
- return
- } else if a > 0 && r.state == io.EOF {
- // discard the io.EOF if we read more than 0 bytes.
- // the next call to Read should return io.EOF again.
- r.state = nil
- } else if r.state != nil {
- return
- }
- r.data = r.data[:len(r.data)+a]
- }
- // pop error
- func (r *Reader) err() (e error) {
- e, r.state = r.state, nil
- return
- }
- // pop error; EOF -> io.ErrUnexpectedEOF
- func (r *Reader) noEOF() (e error) {
- e, r.state = r.state, nil
- if e == io.EOF {
- e = io.ErrUnexpectedEOF
- }
- return
- }
- // buffered bytes
- func (r *Reader) buffered() int { return len(r.data) - r.n }
- // Buffered returns the number of bytes currently in the buffer
- func (r *Reader) Buffered() int { return len(r.data) - r.n }
- // BufferSize returns the total size of the buffer
- func (r *Reader) BufferSize() int { return cap(r.data) }
- // Peek returns the next 'n' buffered bytes,
- // reading from the underlying reader if necessary.
- // It will only return a slice shorter than 'n' bytes
- // if it also returns an error. Peek does not advance
- // the reader. EOF errors are *not* returned as
- // io.ErrUnexpectedEOF.
- func (r *Reader) Peek(n int) ([]byte, error) {
- // in the degenerate case,
- // we may need to realloc
- // (the caller asked for more
- // bytes than the size of the buffer)
- if cap(r.data) < n {
- old := r.data[r.n:]
- r.data = make([]byte, n+r.buffered())
- r.data = r.data[:copy(r.data, old)]
- r.n = 0
- }
- // keep filling until
- // we hit an error or
- // read enough bytes
- for r.buffered() < n && r.state == nil {
- r.more()
- }
- // we must have hit an error
- if r.buffered() < n {
- return r.data[r.n:], r.err()
- }
- return r.data[r.n : r.n+n], nil
- }
- // discard(n) discards up to 'n' buffered bytes, and
- // and returns the number of bytes discarded
- func (r *Reader) discard(n int) int {
- inbuf := r.buffered()
- if inbuf <= n {
- r.n = 0
- r.data = r.data[:0]
- return inbuf
- }
- r.n += n
- return n
- }
- // Skip moves the reader forward 'n' bytes.
- // Returns the number of bytes skipped and any
- // errors encountered. It is analogous to Seek(n, 1).
- // If the underlying reader implements io.Seeker, then
- // that method will be used to skip forward.
- //
- // If the reader encounters
- // an EOF before skipping 'n' bytes, it
- // returns [io.ErrUnexpectedEOF]. If the
- // underlying reader implements [io.Seeker], then
- // those rules apply instead. (Many implementations
- // will not return [io.EOF] until the next call
- // to Read).
- func (r *Reader) Skip(n int) (int, error) {
- if n < 0 {
- return 0, os.ErrInvalid
- }
- // discard some or all of the current buffer
- skipped := r.discard(n)
- // if we can Seek() through the remaining bytes, do that
- if n > skipped && r.rs != nil {
- nn, err := r.rs.Seek(int64(n-skipped), 1)
- return int(nn) + skipped, err
- }
- // otherwise, keep filling the buffer
- // and discarding it up to 'n'
- for skipped < n && r.state == nil {
- r.more()
- skipped += r.discard(n - skipped)
- }
- return skipped, r.noEOF()
- }
- // Next returns the next 'n' bytes in the stream.
- // Unlike Peek, Next advances the reader position.
- // The returned bytes point to the same
- // data as the buffer, so the slice is
- // only valid until the next reader method call.
- // An EOF is considered an unexpected error.
- // If an the returned slice is less than the
- // length asked for, an error will be returned,
- // and the reader position will not be incremented.
- func (r *Reader) Next(n int) ([]byte, error) {
- // in case the buffer is too small
- if cap(r.data) < n {
- old := r.data[r.n:]
- r.data = make([]byte, n+r.buffered())
- r.data = r.data[:copy(r.data, old)]
- r.n = 0
- }
- // fill at least 'n' bytes
- for r.buffered() < n && r.state == nil {
- r.more()
- }
- if r.buffered() < n {
- return r.data[r.n:], r.noEOF()
- }
- out := r.data[r.n : r.n+n]
- r.n += n
- return out, nil
- }
- // Read implements [io.Reader].
- func (r *Reader) Read(b []byte) (int, error) {
- // if we have data in the buffer, just
- // return that.
- if r.buffered() != 0 {
- x := copy(b, r.data[r.n:])
- r.n += x
- return x, nil
- }
- var n int
- // we have no buffered data; determine
- // whether or not to buffer or call
- // the underlying reader directly
- if len(b) >= cap(r.data) {
- n, r.state = r.r.Read(b)
- } else {
- r.more()
- n = copy(b, r.data)
- r.n = n
- }
- if n == 0 {
- return 0, r.err()
- }
- return n, nil
- }
- // ReadFull attempts to read len(b) bytes into
- // 'b'. It returns the number of bytes read into
- // 'b', and an error if it does not return len(b).
- // EOF is considered an unexpected error.
- func (r *Reader) ReadFull(b []byte) (int, error) {
- var n int // read into b
- var nn int // scratch
- l := len(b)
- // either read buffered data,
- // or read directly for the underlying
- // buffer, or fetch more buffered data.
- for n < l && r.state == nil {
- if r.buffered() != 0 {
- nn = copy(b[n:], r.data[r.n:])
- n += nn
- r.n += nn
- } else if l-n > cap(r.data) {
- nn, r.state = r.r.Read(b[n:])
- n += nn
- } else {
- r.more()
- }
- }
- if n < l {
- return n, r.noEOF()
- }
- return n, nil
- }
- // ReadByte implements [io.ByteReader].
- func (r *Reader) ReadByte() (byte, error) {
- for r.buffered() < 1 && r.state == nil {
- r.more()
- }
- if r.buffered() < 1 {
- return 0, r.err()
- }
- b := r.data[r.n]
- r.n++
- return b, nil
- }
- // WriteTo implements [io.WriterTo].
- func (r *Reader) WriteTo(w io.Writer) (int64, error) {
- var (
- i int64
- ii int
- err error
- )
- // first, clear buffer
- if r.buffered() > 0 {
- ii, err = w.Write(r.data[r.n:])
- i += int64(ii)
- if err != nil {
- return i, err
- }
- r.data = r.data[0:0]
- r.n = 0
- }
- for r.state == nil {
- // here we just do
- // 1:1 reads and writes
- r.more()
- if r.buffered() > 0 {
- ii, err = w.Write(r.data)
- i += int64(ii)
- if err != nil {
- return i, err
- }
- r.data = r.data[0:0]
- r.n = 0
- }
- }
- if r.state != io.EOF {
- return i, r.err()
- }
- return i, nil
- }
- func max(a int, b int) int {
- if a < b {
- return b
- }
- return a
- }
|