asciigoat's core library
https://asciigoat.org/core
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
256 lines
4.7 KiB
256 lines
4.7 KiB
package lexer |
|
|
|
import ( |
|
"bytes" |
|
"errors" |
|
"io" |
|
"strings" |
|
"unicode/utf8" |
|
) |
|
|
|
const ( |
|
// ReadBufferSize indicates the initial buffer size |
|
ReadBufferSize = 1 << 7 // 128B |
|
|
|
// DoublingBufferSizeLimit indicates when we stop doubling |
|
// and just add instead |
|
DoublingBufferSizeLimit = 1 << 17 // 128KiB |
|
) |
|
|
|
// implemented interfaces |
|
var ( |
|
_ io.RuneReader = (*Reader)(nil) |
|
_ io.RuneScanner = (*Reader)(nil) |
|
) |
|
|
|
var ( |
|
// ErrInvalidUnreadRune indicates UnreadRune() was calls after an |
|
// action other than a successful ReadRune() |
|
ErrInvalidUnreadRune = errors.New("invalid UnreadRune() call") |
|
) |
|
|
|
// Reader is a RuneReader aimed at implementing text parsers |
|
type Reader struct { |
|
src io.Reader |
|
|
|
buf []byte |
|
off int |
|
cursor int |
|
|
|
lastRuneSize int |
|
} |
|
|
|
// String returns what's already Read but not yet emitted or discarded |
|
func (b *Reader) String() string { |
|
return string(b.buf[b.off:b.cursor]) |
|
} |
|
|
|
// Emit returns what's already being Read and discards it afterwards |
|
func (b *Reader) Emit() string { |
|
s := b.String() |
|
b.Discard() |
|
return s |
|
} |
|
|
|
// Discard removes from the buffer everything that has been Read |
|
func (b *Reader) Discard() { |
|
switch { |
|
case b.ready() == 0: |
|
// reset |
|
b.buf = b.buf[:0] |
|
b.cursor = 0 |
|
b.off = 0 |
|
default: |
|
// step |
|
b.off = b.cursor |
|
} |
|
|
|
// and prevent UnreadRune() |
|
b.lastRuneSize = -1 |
|
} |
|
|
|
// ready tells how many bytes are ready to decode |
|
func (b *Reader) ready() int { |
|
return len(b.buf) - b.cursor |
|
} |
|
|
|
// available tells how many free bytes remain at the end of the buffer |
|
func (b *Reader) available() int { |
|
return cap(b.buf) - len(b.buf) |
|
} |
|
|
|
func (b *Reader) needsBytes(n int) error { |
|
for { |
|
if b.ready() >= n { |
|
// ready |
|
return nil |
|
} |
|
|
|
// make room |
|
b.prepareBuffer(n - b.ready()) |
|
|
|
// and read more |
|
_, err := b.fill() |
|
if err != nil { |
|
return err |
|
} |
|
} |
|
} |
|
|
|
func (b *Reader) rebuffer(size int) { |
|
var src, dst []byte |
|
|
|
if size > cap(b.buf) { |
|
// new buffer |
|
dst = make([]byte, size) |
|
} else { |
|
// same buffer |
|
dst = b.buf |
|
} |
|
|
|
src = b.buf[b.off:] |
|
dst = dst[:len(src)] |
|
|
|
copy(dst, src) |
|
|
|
b.cursor -= b.off |
|
b.buf = dst |
|
b.off = 0 |
|
} |
|
|
|
func (b *Reader) prepareBuffer(n int) { |
|
if n > b.available() { |
|
needed := len(b.buf) + n - b.off |
|
size := cap(b.buf) |
|
|
|
for size < needed { |
|
switch { |
|
case size < DoublingBufferSizeLimit: |
|
size *= 2 |
|
default: |
|
size += DoublingBufferSizeLimit |
|
} |
|
} |
|
|
|
b.rebuffer(size) |
|
} |
|
} |
|
|
|
func (b *Reader) fill() (int, error) { |
|
start := len(b.buf) |
|
n, err := b.src.Read(b.buf[start:cap(b.buf)]) |
|
if n > 0 { |
|
b.buf = b.buf[:start+n] |
|
} |
|
return n, err |
|
} |
|
|
|
// ReadRune reads the next rune |
|
func (b *Reader) ReadRune() (rune, int, error) { |
|
// we need at least one byte to start |
|
count := 1 |
|
for { |
|
err := b.needsBytes(count) |
|
if err != nil { |
|
b.lastRuneSize = -1 |
|
|
|
return 0, 0, err |
|
} |
|
|
|
if utf8.FullRune(b.buf[b.cursor:]) { |
|
// we have a full rune |
|
break |
|
} |
|
|
|
// more |
|
count = b.ready() + 1 |
|
} |
|
|
|
// decode rune |
|
r, l := utf8.DecodeRune(b.buf[b.cursor:]) |
|
// step over |
|
b.cursor += l |
|
// and remember for UnreadRune() |
|
b.lastRuneSize = l |
|
|
|
return r, l, nil |
|
} |
|
|
|
// UnreadRune moves the cursor where it was before the last call to ReadRune |
|
func (b *Reader) UnreadRune() error { |
|
if b.lastRuneSize > 0 { |
|
b.cursor -= b.lastRuneSize |
|
b.lastRuneSize = -1 |
|
return nil |
|
} |
|
|
|
return ErrInvalidUnreadRune |
|
} |
|
|
|
// PeekRune returns information about the next rune without moving the |
|
// cursor |
|
func (b *Reader) PeekRune() (rune, int, error) { |
|
r, l, err := b.ReadRune() |
|
if err != nil { |
|
return r, l, err |
|
} |
|
err = b.UnreadRune() |
|
return r, l, err |
|
} |
|
|
|
// Accept consumes a rune from the source if it meets the condition. |
|
// it returns true if the condition was met and false if it wasn't. |
|
func (b *Reader) Accept(cond func(r rune) bool) bool { |
|
r, _, err := b.ReadRune() |
|
switch { |
|
case err != nil: |
|
return false |
|
case cond(r): |
|
return true |
|
default: |
|
_ = b.UnreadRune() |
|
return false |
|
} |
|
} |
|
|
|
// AcceptAll consumes runes from the source as long as they meet the |
|
// condition. it returns true if the condition was met for at least one rune, |
|
// and false if it wasn't. |
|
func (b *Reader) AcceptAll(cond func(r rune) bool) bool { |
|
var accepted bool |
|
|
|
for { |
|
r, _, err := b.ReadRune() |
|
switch { |
|
case err != nil: |
|
return accepted |
|
case cond(r): |
|
accepted = true |
|
default: |
|
_ = b.UnreadRune() |
|
return accepted |
|
} |
|
} |
|
} |
|
|
|
// NewReader creates a new runes [Reader] using the given [io.Reader] |
|
func NewReader(r io.Reader) *Reader { |
|
if r == nil { |
|
return nil |
|
} |
|
|
|
return &Reader{ |
|
src: r, |
|
buf: make([]byte, 0, ReadBufferSize), |
|
} |
|
} |
|
|
|
// NewReaderBytes creates a new runes [Reader] using the given bytes |
|
func NewReaderBytes(b []byte) *Reader { |
|
return NewReader(bytes.NewReader(b)) |
|
} |
|
|
|
// NewReaderString creates a new runes [Reader] using the given string |
|
func NewReaderString(s string) *Reader { |
|
return NewReader(strings.NewReader(s)) |
|
}
|
|
|