runes: initial RuneReader implementation
Signed-off-by: Alejandro Mery <amery@jpi.io>
This commit is contained in:
@@ -0,0 +1,2 @@
|
||||
// Package runes helps us work with runes
|
||||
package runes
|
||||
+162
@@ -0,0 +1,162 @@
|
||||
package runes
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"io"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
const (
|
||||
// ReadBufferSize indicates the initial buffer size
|
||||
ReadBufferSize = 1 << 7 // 128B
|
||||
|
||||
// DoublingBufferSizeLimit indicates when we stop doubling
|
||||
// and just add instead
|
||||
DoublingBufferSizeLimit = 1 << 17 // 128KiB
|
||||
)
|
||||
|
||||
// implemented interfaces
|
||||
var (
|
||||
_ io.RuneReader = (*Reader)(nil)
|
||||
)
|
||||
|
||||
// Reader is a RuneReader aimed at implementing text parsers
|
||||
type Reader struct {
|
||||
src io.Reader
|
||||
|
||||
buf []byte
|
||||
off int
|
||||
cursor int
|
||||
}
|
||||
|
||||
// ready tells how many bytes are ready to decode
|
||||
func (b *Reader) ready() int {
|
||||
return len(b.buf) - b.cursor
|
||||
}
|
||||
|
||||
// available tells how many free bytes remain at the end of the buffer
|
||||
func (b *Reader) available() int {
|
||||
return cap(b.buf) - len(b.buf)
|
||||
}
|
||||
|
||||
func (b *Reader) needsBytes(n int) error {
|
||||
for {
|
||||
if b.ready() >= n {
|
||||
// ready
|
||||
return nil
|
||||
}
|
||||
|
||||
// make room
|
||||
b.prepareBuffer(n - b.ready())
|
||||
|
||||
// and read more
|
||||
_, err := b.fill()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (b *Reader) rebuffer(size int) {
|
||||
var src, dst []byte
|
||||
|
||||
if size <= cap(b.buf) {
|
||||
// same buffer
|
||||
dst = b.buf
|
||||
} else {
|
||||
// new buffer
|
||||
dst = make([]byte, size)
|
||||
}
|
||||
|
||||
src = b.buf[b.off:]
|
||||
dst = dst[:len(src)]
|
||||
|
||||
copy(dst, src)
|
||||
|
||||
b.cursor -= b.off
|
||||
b.buf = dst
|
||||
b.off = 0
|
||||
}
|
||||
|
||||
func (b *Reader) prepareBuffer(n int) {
|
||||
available := b.available()
|
||||
|
||||
switch {
|
||||
case n <= available:
|
||||
// n or more already available
|
||||
case n <= available+b.off:
|
||||
// rebase is enough
|
||||
b.rebuffer(0)
|
||||
default:
|
||||
// resize
|
||||
needed := len(b.buf) + n - b.off
|
||||
size := cap(b.buf)
|
||||
|
||||
for size < needed {
|
||||
switch {
|
||||
case size < DoublingBufferSizeLimit:
|
||||
size *= 2
|
||||
default:
|
||||
size += DoublingBufferSizeLimit
|
||||
}
|
||||
}
|
||||
|
||||
b.rebuffer(size)
|
||||
}
|
||||
}
|
||||
|
||||
func (b *Reader) fill() (int, error) {
|
||||
start := len(b.buf)
|
||||
n, err := b.src.Read(b.buf[start:cap(b.buf)])
|
||||
if n > 0 {
|
||||
b.buf = b.buf[:start+n]
|
||||
}
|
||||
return n, err
|
||||
}
|
||||
|
||||
// ReadRune reads the next rune
|
||||
func (b *Reader) ReadRune() (rune, int, error) {
|
||||
// we need at least one byte to start
|
||||
count := 1
|
||||
for {
|
||||
err := b.needsBytes(count)
|
||||
if err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
|
||||
if utf8.FullRune(b.buf[b.cursor:]) {
|
||||
// we have a full rune
|
||||
break
|
||||
}
|
||||
|
||||
// more
|
||||
count = b.ready() + 1
|
||||
}
|
||||
|
||||
// decode rune
|
||||
r, l := utf8.DecodeRune(b.buf[b.cursor:])
|
||||
return r, l, nil
|
||||
}
|
||||
|
||||
// NewReader creates a new runes [Reader] using the given [io.Reader]
|
||||
func NewReader(r io.Reader) *Reader {
|
||||
if r == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
return &Reader{
|
||||
src: r,
|
||||
buf: make([]byte, 0, ReadBufferSize),
|
||||
}
|
||||
}
|
||||
|
||||
// NewReaderBytes creates a new runes [Reader] using the given bytes
|
||||
func NewReaderBytes(b []byte) *Reader {
|
||||
return NewReader(bytes.NewReader(b))
|
||||
}
|
||||
|
||||
// NewReaderString creates a new runes [Reader] using the given string
|
||||
func NewReaderString(s string) *Reader {
|
||||
return NewReader(strings.NewReader(s))
|
||||
}
|
||||
Reference in New Issue
Block a user