diff --git a/runes/docs.go b/runes/docs.go new file mode 100644 index 0000000..ba5c527 --- /dev/null +++ b/runes/docs.go @@ -0,0 +1,2 @@ +// Package runes helps us work with runes +package runes diff --git a/runes/reader.go b/runes/reader.go new file mode 100644 index 0000000..90cc1b8 --- /dev/null +++ b/runes/reader.go @@ -0,0 +1,162 @@ +package runes + +import ( + "bytes" + "io" + "strings" + "unicode/utf8" +) + +const ( + // ReadBufferSize indicates the initial buffer size + ReadBufferSize = 1 << 7 // 128B + + // DoublingBufferSizeLimit indicates when we stop doubling + // and just add instead + DoublingBufferSizeLimit = 1 << 17 // 128KiB +) + +// implemented interfaces +var ( + _ io.RuneReader = (*Reader)(nil) +) + +// Reader is a RuneReader aimed at implementing text parsers +type Reader struct { + src io.Reader + + buf []byte + off int + cursor int +} + +// ready tells how many bytes are ready to decode +func (b *Reader) ready() int { + return len(b.buf) - b.cursor +} + +// available tells how many free bytes remain at the end of the buffer +func (b *Reader) available() int { + return cap(b.buf) - len(b.buf) +} + +func (b *Reader) needsBytes(n int) error { + for { + if b.ready() >= n { + // ready + return nil + } + + // make room + b.prepareBuffer(n - b.ready()) + + // and read more + _, err := b.fill() + if err != nil { + return err + } + } +} + +func (b *Reader) rebuffer(size int) { + var src, dst []byte + + if size <= cap(b.buf) { + // same buffer + dst = b.buf + } else { + // new buffer + dst = make([]byte, size) + } + + src = b.buf[b.off:] + dst = dst[:len(src)] + + copy(dst, src) + + b.cursor -= b.off + b.buf = dst + b.off = 0 +} + +func (b *Reader) prepareBuffer(n int) { + available := b.available() + + switch { + case n <= available: + // n or more already available + case n <= available+b.off: + // rebase is enough + b.rebuffer(0) + default: + // resize + needed := len(b.buf) + n - b.off + size := cap(b.buf) + + for size < needed { + switch { + case size < DoublingBufferSizeLimit: + size *= 2 + default: + size += DoublingBufferSizeLimit + } + } + + b.rebuffer(size) + } +} + +func (b *Reader) fill() (int, error) { + start := len(b.buf) + n, err := b.src.Read(b.buf[start:cap(b.buf)]) + if n > 0 { + b.buf = b.buf[:start+n] + } + return n, err +} + +// ReadRune reads the next rune +func (b *Reader) ReadRune() (rune, int, error) { + // we need at least one byte to start + count := 1 + for { + err := b.needsBytes(count) + if err != nil { + return 0, 0, err + } + + if utf8.FullRune(b.buf[b.cursor:]) { + // we have a full rune + break + } + + // more + count = b.ready() + 1 + } + + // decode rune + r, l := utf8.DecodeRune(b.buf[b.cursor:]) + return r, l, nil +} + +// NewReader creates a new runes [Reader] using the given [io.Reader] +func NewReader(r io.Reader) *Reader { + if r == nil { + return nil + } + + return &Reader{ + src: r, + buf: make([]byte, 0, ReadBufferSize), + } +} + +// NewReaderBytes creates a new runes [Reader] using the given bytes +func NewReaderBytes(b []byte) *Reader { + return NewReader(bytes.NewReader(b)) +} + +// NewReaderString creates a new runes [Reader] using the given string +func NewReaderString(s string) *Reader { + return NewReader(strings.NewReader(s)) +}