asciigoat's core library https://asciigoat.org/core
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

257 lines
4.7 KiB

package lexer
import (
"bytes"
"errors"
"io"
"strings"
"unicode/utf8"
)
const (
// ReadBufferSize indicates the initial buffer size
ReadBufferSize = 1 << 7 // 128B
// DoublingBufferSizeLimit indicates when we stop doubling
// and just add instead
DoublingBufferSizeLimit = 1 << 17 // 128KiB
)
// implemented interfaces
var (
_ io.RuneReader = (*Reader)(nil)
_ io.RuneScanner = (*Reader)(nil)
)
var (
// ErrInvalidUnreadRune indicates UnreadRune() was calls after an
// action other than a successful ReadRune()
ErrInvalidUnreadRune = errors.New("invalid UnreadRune() call")
)
// Reader is a RuneReader aimed at implementing text parsers
type Reader struct {
src io.Reader
buf []byte
off int
cursor int
lastRuneSize int
}
// String returns what's already Read but not yet emitted or discarded
func (b *Reader) String() string {
return string(b.buf[b.off:b.cursor])
}
// Emit returns what's already being Read and discards it afterwards
func (b *Reader) Emit() string {
s := b.String()
b.Discard()
return s
}
// Discard removes from the buffer everything that has been Read
func (b *Reader) Discard() {
switch {
case b.ready() == 0:
// reset
b.buf = b.buf[:0]
b.cursor = 0
b.off = 0
default:
// step
b.off = b.cursor
}
// and prevent UnreadRune()
b.lastRuneSize = -1
}
// ready tells how many bytes are ready to decode
func (b *Reader) ready() int {
return len(b.buf) - b.cursor
}
// available tells how many free bytes remain at the end of the buffer
func (b *Reader) available() int {
return cap(b.buf) - len(b.buf)
}
func (b *Reader) needsBytes(n int) error {
for {
if b.ready() >= n {
// ready
return nil
}
// make room
b.prepareBuffer(n - b.ready())
// and read more
_, err := b.fill()
if err != nil {
return err
}
}
}
func (b *Reader) rebuffer(size int) {
var src, dst []byte
if size > cap(b.buf) {
// new buffer
dst = make([]byte, size)
} else {
// same buffer
dst = b.buf
}
src = b.buf[b.off:]
dst = dst[:len(src)]
copy(dst, src)
b.cursor -= b.off
b.buf = dst
b.off = 0
}
func (b *Reader) prepareBuffer(n int) {
if n > b.available() {
needed := len(b.buf) + n - b.off
size := cap(b.buf)
for size < needed {
switch {
case size < DoublingBufferSizeLimit:
size *= 2
default:
size += DoublingBufferSizeLimit
}
}
b.rebuffer(size)
}
}
func (b *Reader) fill() (int, error) {
start := len(b.buf)
n, err := b.src.Read(b.buf[start:cap(b.buf)])
if n > 0 {
b.buf = b.buf[:start+n]
}
return n, err
}
// ReadRune reads the next rune
func (b *Reader) ReadRune() (rune, int, error) {
// we need at least one byte to start
count := 1
for {
err := b.needsBytes(count)
if err != nil {
b.lastRuneSize = -1
return 0, 0, err
}
if utf8.FullRune(b.buf[b.cursor:]) {
// we have a full rune
break
}
// more
count = b.ready() + 1
}
// decode rune
r, l := utf8.DecodeRune(b.buf[b.cursor:])
// step over
b.cursor += l
// and remember for UnreadRune()
b.lastRuneSize = l
return r, l, nil
}
// UnreadRune moves the cursor where it was before the last call to ReadRune
func (b *Reader) UnreadRune() error {
if b.lastRuneSize > 0 {
b.cursor -= b.lastRuneSize
b.lastRuneSize = -1
return nil
}
return ErrInvalidUnreadRune
}
// PeekRune returns information about the next rune without moving the
// cursor
func (b *Reader) PeekRune() (rune, int, error) {
r, l, err := b.ReadRune()
if err != nil {
return r, l, err
}
err = b.UnreadRune()
return r, l, err
}
// Accept consumes a rune from the source if it meets the condition.
// it returns true if the condition was met and false if it wasn't.
func (b *Reader) Accept(cond func(r rune) bool) bool {
r, _, err := b.ReadRune()
switch {
case err != nil:
return false
case cond(r):
return true
default:
_ = b.UnreadRune()
return false
}
}
// AcceptAll consumes runes from the source as long as they meet the
// condition. it returns true if the condition was met for at least one rune,
// and false if it wasn't.
func (b *Reader) AcceptAll(cond func(r rune) bool) bool {
var accepted bool
for {
r, _, err := b.ReadRune()
switch {
case err != nil:
return accepted
case cond(r):
accepted = true
default:
_ = b.UnreadRune()
return accepted
}
}
}
// NewReader creates a new runes [Reader] using the given [io.Reader]
func NewReader(r io.Reader) *Reader {
if r == nil {
return nil
}
return &Reader{
src: r,
buf: make([]byte, 0, ReadBufferSize),
}
}
// NewReaderBytes creates a new runes [Reader] using the given bytes
func NewReaderBytes(b []byte) *Reader {
return NewReader(bytes.NewReader(b))
}
// NewReaderString creates a new runes [Reader] using the given string
func NewReaderString(s string) *Reader {
return NewReader(strings.NewReader(s))
}