|
|
|
package lexer
|
|
|
|
|
|
|
|
import (
|
|
|
|
"bytes"
|
|
|
|
"errors"
|
|
|
|
"io"
|
|
|
|
"strings"
|
|
|
|
"unicode/utf8"
|
|
|
|
)
|
|
|
|
|
|
|
|
const (
|
|
|
|
// ReadBufferSize indicates the initial buffer size
|
|
|
|
ReadBufferSize = 1 << 7 // 128B
|
|
|
|
|
|
|
|
// DoublingBufferSizeLimit indicates when we stop doubling
|
|
|
|
// and just add instead
|
|
|
|
DoublingBufferSizeLimit = 1 << 17 // 128KiB
|
|
|
|
)
|
|
|
|
|
|
|
|
// implemented interfaces
|
|
|
|
var (
|
|
|
|
_ io.RuneReader = (*Reader)(nil)
|
|
|
|
_ io.RuneScanner = (*Reader)(nil)
|
|
|
|
)
|
|
|
|
|
|
|
|
var (
|
|
|
|
// ErrInvalidUnreadRune indicates UnreadRune() was calls after an
|
|
|
|
// action other than a successful ReadRune()
|
|
|
|
ErrInvalidUnreadRune = errors.New("invalid UnreadRune() call")
|
|
|
|
)
|
|
|
|
|
|
|
|
// Reader is a RuneReader aimed at implementing text parsers
|
|
|
|
type Reader struct {
|
|
|
|
src io.Reader
|
|
|
|
|
|
|
|
buf []byte
|
|
|
|
off int
|
|
|
|
cursor int
|
|
|
|
|
|
|
|
lastRuneSize int
|
|
|
|
}
|
|
|
|
|
|
|
|
// String returns what's already Read but not yet emitted or discarded
|
|
|
|
func (b *Reader) String() string {
|
|
|
|
return string(b.buf[b.off:b.cursor])
|
|
|
|
}
|
|
|
|
|
|
|
|
// Emit returns what's already being Read and discards it afterwards
|
|
|
|
func (b *Reader) Emit() string {
|
|
|
|
s := b.String()
|
|
|
|
b.Discard()
|
|
|
|
return s
|
|
|
|
}
|
|
|
|
|
|
|
|
// Discard removes from the buffer everything that has been Read
|
|
|
|
func (b *Reader) Discard() {
|
|
|
|
switch {
|
|
|
|
case b.ready() == 0:
|
|
|
|
// reset
|
|
|
|
b.buf = b.buf[:0]
|
|
|
|
b.cursor = 0
|
|
|
|
b.off = 0
|
|
|
|
default:
|
|
|
|
// step
|
|
|
|
b.off = b.cursor
|
|
|
|
}
|
|
|
|
|
|
|
|
// and prevent UnreadRune()
|
|
|
|
b.lastRuneSize = -1
|
|
|
|
}
|
|
|
|
|
|
|
|
// ready tells how many bytes are ready to decode
|
|
|
|
func (b *Reader) ready() int {
|
|
|
|
return len(b.buf) - b.cursor
|
|
|
|
}
|
|
|
|
|
|
|
|
// available tells how many free bytes remain at the end of the buffer
|
|
|
|
func (b *Reader) available() int {
|
|
|
|
return cap(b.buf) - len(b.buf)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (b *Reader) needsBytes(n int) error {
|
|
|
|
for {
|
|
|
|
if b.ready() >= n {
|
|
|
|
// ready
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// make room
|
|
|
|
b.prepareBuffer(n - b.ready())
|
|
|
|
|
|
|
|
// and read more
|
|
|
|
_, err := b.fill()
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (b *Reader) rebuffer(size int) {
|
|
|
|
var src, dst []byte
|
|
|
|
|
|
|
|
if size > cap(b.buf) {
|
|
|
|
// new buffer
|
|
|
|
dst = make([]byte, size)
|
|
|
|
} else {
|
|
|
|
// same buffer
|
|
|
|
dst = b.buf
|
|
|
|
}
|
|
|
|
|
|
|
|
src = b.buf[b.off:]
|
|
|
|
dst = dst[:len(src)]
|
|
|
|
|
|
|
|
copy(dst, src)
|
|
|
|
|
|
|
|
b.cursor -= b.off
|
|
|
|
b.buf = dst
|
|
|
|
b.off = 0
|
|
|
|
}
|
|
|
|
|
|
|
|
func (b *Reader) prepareBuffer(n int) {
|
|
|
|
if n > b.available() {
|
|
|
|
needed := len(b.buf) + n - b.off
|
|
|
|
size := cap(b.buf)
|
|
|
|
|
|
|
|
for size < needed {
|
|
|
|
switch {
|
|
|
|
case size < DoublingBufferSizeLimit:
|
|
|
|
size *= 2
|
|
|
|
default:
|
|
|
|
size += DoublingBufferSizeLimit
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
b.rebuffer(size)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (b *Reader) fill() (int, error) {
|
|
|
|
start := len(b.buf)
|
|
|
|
n, err := b.src.Read(b.buf[start:cap(b.buf)])
|
|
|
|
if n > 0 {
|
|
|
|
b.buf = b.buf[:start+n]
|
|
|
|
}
|
|
|
|
return n, err
|
|
|
|
}
|
|
|
|
|
|
|
|
// ReadRune reads the next rune
|
|
|
|
func (b *Reader) ReadRune() (rune, int, error) {
|
|
|
|
// we need at least one byte to start
|
|
|
|
count := 1
|
|
|
|
for {
|
|
|
|
err := b.needsBytes(count)
|
|
|
|
if err != nil {
|
|
|
|
b.lastRuneSize = -1
|
|
|
|
|
|
|
|
return 0, 0, err
|
|
|
|
}
|
|
|
|
|
|
|
|
if utf8.FullRune(b.buf[b.cursor:]) {
|
|
|
|
// we have a full rune
|
|
|
|
break
|
|
|
|
}
|
|
|
|
|
|
|
|
// more
|
|
|
|
count = b.ready() + 1
|
|
|
|
}
|
|
|
|
|
|
|
|
// decode rune
|
|
|
|
r, l := utf8.DecodeRune(b.buf[b.cursor:])
|
|
|
|
// step over
|
|
|
|
b.cursor += l
|
|
|
|
// and remember for UnreadRune()
|
|
|
|
b.lastRuneSize = l
|
|
|
|
|
|
|
|
return r, l, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// UnreadRune moves the cursor where it was before the last call to ReadRune
|
|
|
|
func (b *Reader) UnreadRune() error {
|
|
|
|
if b.lastRuneSize > 0 {
|
|
|
|
b.cursor -= b.lastRuneSize
|
|
|
|
b.lastRuneSize = -1
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
return ErrInvalidUnreadRune
|
|
|
|
}
|
|
|
|
|
|
|
|
// PeekRune returns information about the next rune without moving the
|
|
|
|
// cursor
|
|
|
|
func (b *Reader) PeekRune() (rune, int, error) {
|
|
|
|
r, l, err := b.ReadRune()
|
|
|
|
if err != nil {
|
|
|
|
return r, l, err
|
|
|
|
}
|
|
|
|
err = b.UnreadRune()
|
|
|
|
return r, l, err
|
|
|
|
}
|
|
|
|
|
|
|
|
// NewReader creates a new runes [Reader] using the given [io.Reader]
|
|
|
|
func NewReader(r io.Reader) *Reader {
|
|
|
|
if r == nil {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
return &Reader{
|
|
|
|
src: r,
|
|
|
|
buf: make([]byte, 0, ReadBufferSize),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// NewReaderBytes creates a new runes [Reader] using the given bytes
|
|
|
|
func NewReaderBytes(b []byte) *Reader {
|
|
|
|
return NewReader(bytes.NewReader(b))
|
|
|
|
}
|
|
|
|
|
|
|
|
// NewReaderString creates a new runes [Reader] using the given string
|
|
|
|
func NewReaderString(s string) *Reader {
|
|
|
|
return NewReader(strings.NewReader(s))
|
|
|
|
}
|