package lexer import ( "bytes" "errors" "io" "strings" "unicode/utf8" ) const ( // ReadBufferSize indicates the initial buffer size ReadBufferSize = 1 << 7 // 128B // DoublingBufferSizeLimit indicates when we stop doubling // and just add instead DoublingBufferSizeLimit = 1 << 17 // 128KiB ) // implemented interfaces var ( _ io.RuneReader = (*Reader)(nil) _ io.RuneScanner = (*Reader)(nil) ) var ( // ErrInvalidUnreadRune indicates UnreadRune() was calls after an // action other than a successful ReadRune() ErrInvalidUnreadRune = errors.New("invalid UnreadRune() call") ) // Reader is a RuneReader aimed at implementing text parsers type Reader struct { src io.Reader buf []byte off int cursor int lastRuneSize int } // String returns what's already Read but not yet emitted or discarded func (b *Reader) String() string { return string(b.buf[b.off:b.cursor]) } // Emit returns what's already being Read and discards it afterwards func (b *Reader) Emit() string { s := b.String() b.Discard() return s } // Discard removes from the buffer everything that has been Read func (b *Reader) Discard() { switch { case b.ready() == 0: // reset b.buf = b.buf[:0] b.cursor = 0 b.off = 0 default: // step b.off = b.cursor } // and prevent UnreadRune() b.lastRuneSize = -1 } // ready tells how many bytes are ready to decode func (b *Reader) ready() int { return len(b.buf) - b.cursor } // available tells how many free bytes remain at the end of the buffer func (b *Reader) available() int { return cap(b.buf) - len(b.buf) } func (b *Reader) needsBytes(n int) error { for { if b.ready() >= n { // ready return nil } // make room b.prepareBuffer(n - b.ready()) // and read more _, err := b.fill() if err != nil { return err } } } func (b *Reader) rebuffer(size int) { var src, dst []byte if size > cap(b.buf) { // new buffer dst = make([]byte, size) } else { // same buffer dst = b.buf } src = b.buf[b.off:] dst = dst[:len(src)] copy(dst, src) b.cursor -= b.off b.buf = dst b.off = 0 } func (b *Reader) prepareBuffer(n int) { if n > b.available() { needed := len(b.buf) + n - b.off size := cap(b.buf) for size < needed { switch { case size < DoublingBufferSizeLimit: size *= 2 default: size += DoublingBufferSizeLimit } } b.rebuffer(size) } } func (b *Reader) fill() (int, error) { start := len(b.buf) n, err := b.src.Read(b.buf[start:cap(b.buf)]) if n > 0 { b.buf = b.buf[:start+n] } return n, err } // ReadRune reads the next rune func (b *Reader) ReadRune() (rune, int, error) { // we need at least one byte to start count := 1 for { err := b.needsBytes(count) if err != nil { b.lastRuneSize = -1 return 0, 0, err } if utf8.FullRune(b.buf[b.cursor:]) { // we have a full rune break } // more count = b.ready() + 1 } // decode rune r, l := utf8.DecodeRune(b.buf[b.cursor:]) // step over b.cursor += l // and remember for UnreadRune() b.lastRuneSize = l return r, l, nil } // UnreadRune moves the cursor where it was before the last call to ReadRune func (b *Reader) UnreadRune() error { if b.lastRuneSize > 0 { b.cursor -= b.lastRuneSize b.lastRuneSize = -1 return nil } return ErrInvalidUnreadRune } // PeekRune returns information about the next rune without moving the // cursor func (b *Reader) PeekRune() (rune, int, error) { r, l, err := b.ReadRune() if err != nil { return r, l, err } err = b.UnreadRune() return r, l, err } // NewReader creates a new runes [Reader] using the given [io.Reader] func NewReader(r io.Reader) *Reader { if r == nil { return nil } return &Reader{ src: r, buf: make([]byte, 0, ReadBufferSize), } } // NewReaderBytes creates a new runes [Reader] using the given bytes func NewReaderBytes(b []byte) *Reader { return NewReader(bytes.NewReader(b)) } // NewReaderString creates a new runes [Reader] using the given string func NewReaderString(s string) *Reader { return NewReader(strings.NewReader(s)) }