13 Commits

Author SHA1 Message Date
amery f7e13e0978 Merge pull request 'lexer: introduce Error{}' (#6)
Reviewed-on: #6
2023-08-29 17:00:09 +02:00
amery f67d8a2443 lexer: introduce Error{}
Signed-off-by: Alejandro Mery <amery@jpi.io>
2023-08-29 13:54:23 +00:00
amery 76e6146e9e Merge pull request 'introduce NewReadCloser to allow byte and string buffers to offer io.ReadCloser' (#1
Reviewed-on: #1
2023-08-29 15:24:36 +02:00
amery f79e2bee9e Merge pull request 'lexer: rename runes.Reader to lexer.Reader and implement UnreadRune() and PeekRune()' (#4)
Reviewed-on: #4
2023-08-29 15:23:15 +02:00
amery 6cca2996ca lexer: Implement Reader.UnreadRune() and Reader.PeekRune()
Signed-off-by: Alejandro Mery <amery@jpi.io>
2023-08-29 02:00:43 +00:00
amery edcba80baa lexer: fix ReadRune() to actually move the cursor
Signed-off-by: Alejandro Mery <amery@jpi.io>
2023-08-29 02:00:38 +00:00
amery 7230a74f49 lexer: runes.Reader renamed to lexer.Reader
Signed-off-by: Alejandro Mery <amery@jpi.io>
2023-08-29 01:59:09 +00:00
amery 1b223e3751 introduce NewReadCloser to allow byte and string buffers to offer io.ReadCloser
Signed-off-by: Alejandro Mery <amery@jpi.io>
2023-08-28 22:06:52 +00:00
amery 07b652c414 chore: rename module to asciigoat.org/core
Signed-off-by: Alejandro Mery <amery@jpi.io>
2023-08-28 17:51:55 +00:00
amery eaa846b64b runes.Reader: simplify buffer handling
Signed-off-by: Alejandro Mery <amery@jpi.io>
2023-08-28 05:01:23 +00:00
amery fef0d81610 runes.Reader: introduce String(), Emit() and Discard()
Signed-off-by: Alejandro Mery <amery@jpi.io>
2023-08-28 04:45:25 +00:00
amery fad6357d91 runes: initial RuneReader implementation
Signed-off-by: Alejandro Mery <amery@jpi.io>
2023-08-28 04:33:44 +00:00
amery f6a391904d Initial commit
Signed-off-by: Alejandro Mery <amery@jpi.io>
2023-08-27 23:20:56 +00:00
27 changed files with 350 additions and 835 deletions
+13
View File
@@ -0,0 +1,13 @@
# http://editorconfig.org
root = true
[*]
charset = utf-8
end_of_line = lf
insert_final_newline = true
trim_trailing_whitespace = true
[*.go]
indent_style = tab
indent_size = 4
View File
+1 -1
View File
@@ -1,4 +1,4 @@
Copyright 2021 JPI Technologies Ltd <oss@jpi.io>
Copyright 2023 JPI Technologies Ltd <oss@jpi.io>
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
-15
View File
@@ -1,15 +0,0 @@
.PHONY: all fmt build test
GO ?= go
all: fmt build
fmt:
$(GO) fmt ./...
$(GO) mod tidy || true
build:
$(GO) get -v ./...
test:
$(GO) test -v ./...
-4
View File
@@ -1,4 +0,0 @@
asciigoat.org/core
==================
helpers and general structs used by asciigoat parsers and generators
+1
View File
@@ -0,0 +1 @@
# asciigoat's core library
-37
View File
@@ -1,37 +0,0 @@
/*
Package ebmf implements an ISO/IEC 14977
Extended BackusNaur Form parser, verifiers,
and additional related helpers for AsciiGoat
A syntax highlighter for vim and a copy of the final draft of the standard
are included in the doc/ directory. The official standard can be downloaded from
http://standards.iso.org/ittf/PubliclyAvailableStandards/s026153_ISO_IEC_14977_1996(E).zip
An uberly simplified version of the EBNF grammar looks like:
letter = "A" | "B" | "C" | "D" | "E" | "F" | "G"
| "H" | "I" | "J" | "K" | "L" | "M" | "N"
| "O" | "P" | "Q" | "R" | "S" | "T" | "U"
| "V" | "W" | "X" | "Y" | "Z" ;
digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ;
symbol = "[" | "]" | "{" | "}" | "(" | ")" | "<" | ">"
| "'" | '"' | "=" | "|" | "." | "," | ";" ;
character = letter | digit | symbol | "_" ;
identifier = letter , { letter | digit | "_" } ;
terminal = "'" , character , { character } , "'"
| '"' , character , { character } , '"' ;
lhs = identifier ;
rhs = identifier
| terminal
| "[" , rhs , "]"
| "{" , rhs , "}"
| "(" , rhs , ")"
| rhs , "|" , rhs
| rhs , "," , rhs ;
rule = lhs , "=" , rhs , ";" ;
grammar = { rule } ;
*/
package ebnf
-36
View File
@@ -1,36 +0,0 @@
" Vim syntax file
" Language: EBNF
" Maintainer: Hans Fugal
" Last Change: $Date: 2003/01/28 14:42:09 $
" Version: $Id: ebnf.vim,v 1.1 2003/01/28 14:42:09 fugalh Exp $
" With thanks to Michael Brailsford for the BNF syntax file.
" Quit when a syntax file was already loaded
if version < 600
syntax clear
elseif exists("b:current_syntax")
finish
endif
syn match ebnfMetaIdentifier /[A-Za-z]/ skipwhite skipempty nextgroup=ebnfSeperator
syn match ebnfSeperator "=" contained nextgroup=ebnfProduction skipwhite skipempty
syn region ebnfProduction start=/\zs[^\.;]/ end=/[\.;]/me=e-1 contained contains=ebnfSpecial,ebnfDelimiter,ebnfTerminal,ebnfSpecialSequence,ebnfComment nextgroup=ebnfEndProduction skipwhite skipempty
syn match ebnfDelimiter #[,(|)\]}\[{/!]\|\(\*)\)\|\((\*\)\|\(/)\)\|\(:)\)\|\((/\)\|\((:\)# contained
syn match ebnfSpecial /[\-\*]/ contained
syn region ebnfSpecialSequence matchgroup=Delimiter start=/?/ end=/?/ contained
syn match ebnfEndProduction /[\.;]/ contained
syn region ebnfTerminal matchgroup=delimiter start=/"/ end=/"/ contained
syn region ebnfTerminal matchgroup=delimiter start=/'/ end=/'/ contained
syn region ebnfComment start="(\*" end="\*)"
hi link ebnfComment Comment
hi link ebnfMetaIdentifier Identifier
hi link ebnfSeperator ebnfSpecial
hi link ebnfEndProduction ebnfDelimiter
hi link ebnfDelimiter Delimiter
hi link ebnfSpecial Special
hi link ebnfSpecialSequence Statement
hi link ebnfTerminal Constant
Binary file not shown.
-230
View File
@@ -1,230 +0,0 @@
(* vim: set ft=ebnf: *)
(*
The syntax of Extended BNF can be defined using
itself. There are four parts in this example,
the first part names the characters, the second
part defines the removal of unnecessary non-
printing characters, the third part defines the
removal of textual comments, and the final part
defines the structure of Extended BNF itself.
Each syntax rule in this example starts with a
comment that identifies the corresponding clause
in the standard.
The meaning of special-sequences is not defined
in the standard. In this example (see the
reference to 7.6) they represent control
functions defined by ISO/IEC 6429:1992.
Another special-sequence defines a
syntactic-exception (see the reference to 4.7).
*)
(*
The first part of the lexical syntax defines the
characters in the 7-bit character set (ISO/IEC
646:1991) that represent each terminal-character
and gap-separator in Extended BNF.
*)
(* see 7.2 *) letter
= 'a' | 'b' | 'c' | 'd' | 'e' | 'f' | 'g' | 'h'
| 'i' | 'j' | 'k' | 'l' | 'm' | 'n' | 'o' | 'p'
| 'q' | 'r' | 's' | 't' | 'u' | 'v' | 'w' | 'x'
| 'y' | 'z'
| 'A' | 'B' | 'C' | 'D' | 'E' | 'F' | 'G' | 'H'
| 'I' | 'J' | 'K' | 'L' | 'M' | 'N' | 'O' | 'P'
| 'Q' | 'R' | 'S' | 'T' | 'U' | 'V' | 'W' | 'X'
| 'Y' | 'Z';
(* see 7.2 *) decimal digit
= '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7'
| '8' | '9';
(*
The representation of the following
terminal-characters is defined in clauses 7.3,
7.4 and tables 1, 2.
*)
concatenate symbol = ',';
defining symbol = '=';
definition separator symbol = '|' | '/' | '!';
end comment symbol = '*)';
end group symbol = ')';
end option symbol = ']' | '/)';
end repeat symbol = '}' | ':)';
except symbol = '-';
first quote symbol = "'";
repetition symbol = '*';
second quote symbol = '"';
special sequence symbol = '?';
start comment symbol = '(*';
start group symbol = '(';
start option symbol = '[' | '(/';
start repeat symbol = '{' | '(:';
terminator symbol = ';' | '.';
(* see 7.5 *) other character
= ' ' | ':' | '+' | '_' | '%' | 'Q'
| '&' | '#' | '$' | '<' | '>' | '\'
| 'ˆ' | '' | ' ̃';
(* see 7.6 *) space character = ' ';
horizontal tabulation character
= ? ISO 6429 character Horizontal Tabulation ? ;
new line
= { ? ISO 6429 character Carriage Return ? },
? ISO 6429 character Line Feed ?,
{ ? ISO 6429 character Carriage Return ? };
vertical tabulation character
= ? ISO 6429 character Vertical Tabulation ? ;
form feed
= ? ISO 6429 character Form Feed ? ;
(*
The second part of the syntax defines the
removal of unnecessary non-printing characters
from a syntax.
*)
(* see 6.2 *) terminal character
= letter
| decimal digit
| concatenate symbol
| defining symbol
| definition separator symbol
| end comment symbol
| end group symbol
| end option symbol
| end repeat symbol
| except symbol
| first quote symbol
| repetition symbol
| second quote symbol
| special sequence symbol
| start comment symbol
| start group symbol
| start option symbol
| start repeat symbol
| terminator symbol
| other character;
(* see 6.3 *) gap free symbol
= terminal character
- (first quote symbol | second quote symbol)
| terminal string;
(* see 4.16 *) terminal string
= first quote symbol, first terminal character,
{first terminal character},
first quote symbol
| second quote symbol, second terminal character,
{second terminal character},
second quote symbol;
(* see 4.17 *) first terminal character
= terminal character - first quote symbol;
(* see 4.18 *) second terminal character
= terminal character - second quote symbol;
(* see 6.4 *) gap separator
= space character
| horizontal tabulation character
| new line
| vertical tabulation character
| form feed;
(* see 6.5 *) syntax
= {gap separator},
gap free symbol, {gap separator},
{gap free symbol, {gap separator}};
(*
The third part of the syntax defines the
removal of bracketed-textual-comments from
gap-free-symbols that form a syntax.
*)
(* see 6.6 *) commentless symbol
= terminal character
- (letter
| decimal digit
| first quote symbol
| second quote symbol
| start comment symbol
| end comment symbol
| special sequence symbol
| other character)
| meta identifier
| integer
| terminal string
| special sequence;
(* see 4.9 *) integer
= decimal digit, {decimal digit};
(* see 4.14 *) meta identifier
= letter, {meta identifier character};
(* see 4.15 *) meta identifier character
= letter
| decimal digit;
(* see 4.19 *) special sequence
= special sequence symbol,
{special sequence character},
special sequence symbol;
(* see 4.20 *) special sequence character
= terminal character - special sequence symbol;
(* see 6.7 *) comment symbol
= bracketed textual comment
| other character
| commentless symbol;
(* see 6.8 *) bracketed textual comment
= start comment symbol, {comment symbol},
end comment symbol;
(* see 6.9 *) syntax
= {bracketed textual comment},
commentless symbol,
{bracketed textual comment},
{commentless symbol,
{bracketed textual comment}};
(*
The final part of the syntax defines the
abstract syntax of Extended BNF, i.e. the
structure in terms of the commentless symbols.
*)
(* see 4.2 *) syntax
= syntax rule, {syntax rule};
(* see 4.3 *) syntax rule
= meta identifier, defining symbol,
definitions list, terminator symbol;
(* see 4.4 *) definitions list
= single definition,
{definition separator symbol,
single definition};
(* see 4.5 *) single definition
= syntactic term,
{concatenate symbol, syntactic term};
(* see 4.6 *) syntactic term
= syntactic factor,
[except symbol, syntactic exception];
(* see 4.7 *) syntactic exception
= ? a syntactic-factor that could be replaced
by a syntactic-factor containing no
meta-identifiers
? ;
(* see 4.8 *) syntactic factor
= [integer, repetition symbol],
syntactic primary;
(* see 4.10 *) syntactic primary
= optional sequence
| repeated sequence
| grouped sequence
| meta identifier
| terminal string
| special sequence
| empty sequence;
(* see 4.11 *) optional sequence
= start option symbol, definitions list,
end option symbol;
(* see 4.12 *) repeated sequence
= start repeat symbol, definitions list,
end repeat symbol;
(* see 4.13 *) grouped sequence
= start group symbol, definitions list,
end group symbol;
(* see 4.21 *) empty sequence
= ;
-1
View File
@@ -1 +0,0 @@
package ebnf
-20
View File
@@ -1,20 +0,0 @@
package token
// types of Token
type TokenType int
const (
TokenError TokenType = iota + 1
TokenEOF
)
func (typ TokenType) String() string {
switch typ {
case TokenError:
return "ERROR"
case TokenEOF:
return "EOF"
default:
return "UNDEFINED"
}
}
-25
View File
@@ -1,25 +0,0 @@
package token
import (
"fmt"
"testing"
)
func TestTokenTypeToString(t *testing.T) {
var foo TokenType
for _, o := range []struct {
typ TokenType
str string
}{
{foo, "UNDEFINED"},
{TokenError, "ERROR"},
{TokenEOF, "EOF"},
{1234, "UNDEFINED"},
} {
str := fmt.Sprintf("%s", o.typ)
if str != o.str {
t.Errorf("TokenType:%v stringified as %s instead of %s.", int(o.typ), str, o.str)
}
}
}
-1
View File
@@ -1 +0,0 @@
package core
+2
View File
@@ -0,0 +1,2 @@
// Package core provides the foundations of asciigoat packages
package core
+1 -1
View File
@@ -1,3 +1,3 @@
module asciigoat.org/core
go 1.16
go 1.19
View File
+45
View File
@@ -0,0 +1,45 @@
package lexer
import (
"fmt"
"strings"
)
var (
_ error = (*Error)(nil)
)
// Error represents a generic parsing error
type Error struct {
Filename string
Line int
Column int
Content string
Err error
}
func (err Error) Error() string {
var s []string
switch {
case err.Line > 0 || err.Column > 0:
s = append(s, fmt.Sprintf("%s:%v:%v", err.Filename, err.Line, err.Column))
case err.Filename != "":
s = append(s, err.Filename)
}
if err.Err != nil {
s = append(s, err.Err.Error())
}
if err.Content != "" {
s = append(s, fmt.Sprintf("%q", err.Content))
}
return strings.Join(s, ": ")
}
func (err Error) Unwrap() error {
return err.Err
}
+2
View File
@@ -0,0 +1,2 @@
// Package lexer provides basic helpers to implement parsers
package lexer
+221
View File
@@ -0,0 +1,221 @@
package lexer
import (
"bytes"
"errors"
"io"
"strings"
"unicode/utf8"
)
const (
// ReadBufferSize indicates the initial buffer size
ReadBufferSize = 1 << 7 // 128B
// DoublingBufferSizeLimit indicates when we stop doubling
// and just add instead
DoublingBufferSizeLimit = 1 << 17 // 128KiB
)
// implemented interfaces
var (
_ io.RuneReader = (*Reader)(nil)
_ io.RuneScanner = (*Reader)(nil)
)
var (
// ErrInvalidUnreadRune indicates UnreadRune() was calls after an
// action other than a successful ReadRune()
ErrInvalidUnreadRune = errors.New("invalid UnreadRune() call")
)
// Reader is a RuneReader aimed at implementing text parsers
type Reader struct {
src io.Reader
buf []byte
off int
cursor int
lastRuneSize int
}
// String returns what's already Read but not yet emitted or discarded
func (b *Reader) String() string {
return string(b.buf[b.off:b.cursor])
}
// Emit returns what's already being Read and discards it afterwards
func (b *Reader) Emit() string {
s := b.String()
b.Discard()
return s
}
// Discard removes from the buffer everything that has been Read
func (b *Reader) Discard() {
switch {
case b.ready() == 0:
// reset
b.buf = b.buf[:0]
b.cursor = 0
b.off = 0
default:
// step
b.off = b.cursor
}
// and prevent UnreadRune()
b.lastRuneSize = -1
}
// ready tells how many bytes are ready to decode
func (b *Reader) ready() int {
return len(b.buf) - b.cursor
}
// available tells how many free bytes remain at the end of the buffer
func (b *Reader) available() int {
return cap(b.buf) - len(b.buf)
}
func (b *Reader) needsBytes(n int) error {
for {
if b.ready() >= n {
// ready
return nil
}
// make room
b.prepareBuffer(n - b.ready())
// and read more
_, err := b.fill()
if err != nil {
return err
}
}
}
func (b *Reader) rebuffer(size int) {
var src, dst []byte
if size > cap(b.buf) {
// new buffer
dst = make([]byte, size)
} else {
// same buffer
dst = b.buf
}
src = b.buf[b.off:]
dst = dst[:len(src)]
copy(dst, src)
b.cursor -= b.off
b.buf = dst
b.off = 0
}
func (b *Reader) prepareBuffer(n int) {
if n > b.available() {
needed := len(b.buf) + n - b.off
size := cap(b.buf)
for size < needed {
switch {
case size < DoublingBufferSizeLimit:
size *= 2
default:
size += DoublingBufferSizeLimit
}
}
b.rebuffer(size)
}
}
func (b *Reader) fill() (int, error) {
start := len(b.buf)
n, err := b.src.Read(b.buf[start:cap(b.buf)])
if n > 0 {
b.buf = b.buf[:start+n]
}
return n, err
}
// ReadRune reads the next rune
func (b *Reader) ReadRune() (rune, int, error) {
// we need at least one byte to start
count := 1
for {
err := b.needsBytes(count)
if err != nil {
b.lastRuneSize = -1
return 0, 0, err
}
if utf8.FullRune(b.buf[b.cursor:]) {
// we have a full rune
break
}
// more
count = b.ready() + 1
}
// decode rune
r, l := utf8.DecodeRune(b.buf[b.cursor:])
// step over
b.cursor += l
// and remember for UnreadRune()
b.lastRuneSize = l
return r, l, nil
}
// UnreadRune moves the cursor where it was before the last call to ReadRune
func (b *Reader) UnreadRune() error {
if b.lastRuneSize > 0 {
b.cursor -= b.lastRuneSize
b.lastRuneSize = -1
return nil
}
return ErrInvalidUnreadRune
}
// PeekRune returns information about the next rune without moving the
// cursor
func (b *Reader) PeekRune() (rune, int, error) {
r, l, err := b.ReadRune()
if err != nil {
return r, l, err
}
err = b.UnreadRune()
return r, l, err
}
// NewReader creates a new runes [Reader] using the given [io.Reader]
func NewReader(r io.Reader) *Reader {
if r == nil {
return nil
}
return &Reader{
src: r,
buf: make([]byte, 0, ReadBufferSize),
}
}
// NewReaderBytes creates a new runes [Reader] using the given bytes
func NewReaderBytes(b []byte) *Reader {
return NewReader(bytes.NewReader(b))
}
// NewReaderString creates a new runes [Reader] using the given string
func NewReaderString(s string) *Reader {
return NewReader(strings.NewReader(s))
}
+64
View File
@@ -0,0 +1,64 @@
package core
import (
"bytes"
"io"
"io/fs"
"strings"
)
// ReadCloser adds a Close() to Readers without one
type ReadCloser struct {
r io.Reader
}
// Read passes the Read() call to the underlying [io.Reader]
// and fail if it was Closed()
func (rc *ReadCloser) Read(b []byte) (int, error) {
switch {
case rc.r != nil:
return rc.r.Read(b)
default:
return 0, fs.ErrClosed
}
}
// Close attempts to Close the underlying [io.Reader], or
// remove it if it doesn't support Close() and fail
// if closed twice
func (rc *ReadCloser) Close() error {
switch {
case rc.r != nil:
rc.r = nil
return nil
default:
return fs.ErrClosed
}
}
// NewReadCloser wraps a [io.Reader] to satisfy
// [io.ReadCloser] if needed
func NewReadCloser(r io.Reader) io.ReadCloser {
switch p := r.(type) {
case io.ReadCloser:
return p
case nil:
return nil
default:
return &ReadCloser{
r: r,
}
}
}
// NewReadCloserBytes wraps a bytes slice to implement
// a [io.ReadCloser]
func NewReadCloserBytes(b []byte) io.ReadCloser {
return NewReadCloser(bytes.NewReader(b))
}
// NewReadCloserString wraps a string to implement
// a [io.ReadCloser]
func NewReadCloserString(s string) io.ReadCloser {
return NewReadCloser(strings.NewReader(s))
}
-135
View File
@@ -1,135 +0,0 @@
package runes
import (
"bufio"
"bytes"
"io"
"strings"
"sync"
)
// feeder is a generic implementation of the output interfaces of Feeder
type Feeder struct {
sync.Mutex
in io.RuneReader
out []rune
sz []int
err error
}
// NewFeederBytes creates a new Feeder using an slice of bytes as input
func NewFeederBytes(b []byte) *Feeder {
return NewFeeder(bytes.NewReader(b))
}
// NewFeederString creates a new Feeder using a string as input
func NewFeederString(s string) *Feeder {
return NewFeeder(strings.NewReader(s))
}
// NewFeeder creates a new Feeder using a Reader as input
func NewFeeder(in io.Reader) *Feeder {
rd, ok := in.(io.RuneReader)
if !ok {
rd = bufio.NewReader(in)
}
return &Feeder{in: rd}
}
// Skip drops n runes from the head of the buffer
func (f *Feeder) Skip(n int) (int, bool) {
f.Lock()
defer f.Unlock()
if l := f.skip(n); l > 0 {
return l, true
} else {
return 0, false
}
}
func (f *Feeder) skip(n int) int {
if l := len(f.out); l > n {
f.out = f.out[n:]
f.sz = f.sz[n:]
return l - n
} else {
f.out = f.out[:0]
f.sz = f.sz[:0]
return 0
}
}
// ReadRune returns the next rune
func (f *Feeder) ReadRune() (r rune, size int, err error) {
f.Lock()
defer f.Unlock()
if f.atLeast(1) {
r = f.out[0]
size = f.sz[0]
f.skip(1)
}
err = f.Err()
return
}
// AtLeast blocks until there are at least n runes on the buffer, or an error or EOF has occurred
func (f *Feeder) AtLeast(n int) (out []rune, err error) {
f.Lock()
defer f.Unlock()
if !f.atLeast(n) {
err = f.err
}
if len(f.out) > 0 {
out = f.out
}
return
}
func (f *Feeder) atLeast(n int) bool {
for len(f.out) < n {
r, size, err := f.in.ReadRune()
if err != nil && f.err == nil {
// store first error
f.err = err
}
if size > 0 {
f.out = append(f.out, r)
f.sz = append(f.sz, size)
} else if f.err != nil {
break
}
}
return len(f.out) >= n
}
// Currently buffered runes
func (f *Feeder) Buffered() []rune {
return f.out
}
// Count of currently buffered runes
func (f *Feeder) Len() int {
return len(f.out)
}
// Feeder has reached EOF
func (f *Feeder) EOF() bool {
return f.err == io.EOF
}
// Feeder encountered an error
func (f *Feeder) Err() error {
if f.err == io.EOF {
return nil
}
return f.err
}
-124
View File
@@ -1,124 +0,0 @@
package runes
import (
"unicode"
)
// Probe was borrowed from https://github.com/JamesOwenHall/json2.Scanner
//
// Probe is a func that returns a subset of the input and a success bool.
type Probe func([]rune) ([]rune, bool)
// If returns a probe that accepts the a rune if it satisfies the condition.
func If(condition func(rune) bool) Probe {
return func(input []rune) ([]rune, bool) {
if len(input) > 0 && condition(input[0]) {
return input[0:1], true
}
return nil, false
}
}
// Rune returns a probe that accepts r.
func Rune(r rune) Probe {
return If(func(b rune) bool {
return r == b
})
}
// Space returns a probe that accepts whitespace as defined in the unicode
// package.
func Space() Probe {
return func(input []rune) ([]rune, bool) {
if len(input) > 0 && unicode.IsSpace(input[0]) {
return input[0:1], true
}
return nil, false
}
}
// And returns a probe that accepts all probes in sequence.
func And(probes ...Probe) Probe {
return func(input []rune) ([]rune, bool) {
remaining := input
accumulated := []rune{}
for _, s := range probes {
if read, ok := s(remaining); !ok {
return nil, false
} else {
accumulated = append(accumulated, read...)
remaining = remaining[len(read):]
}
}
return accumulated, true
}
}
// Or returns a probe that accepts the first successful probe in probes.
func Or(probes ...Probe) Probe {
return func(input []rune) ([]rune, bool) {
for _, s := range probes {
if read, ok := s(input); ok {
return read, true
}
}
return nil, false
}
}
// Maybe runs probe and returns true regardless of the output.
func Maybe(probe Probe) Probe {
return func(input []rune) ([]rune, bool) {
read, _ := probe(input)
return read, true
}
}
// Any returns a probe that accepts any number of occurrences of probe,
// including zero.
func Any(probe Probe) Probe {
return func(input []rune) ([]rune, bool) {
remaining := input
accumulated := []rune{}
for {
if read, ok := probe(remaining); !ok {
return accumulated, true
} else {
accumulated = append(accumulated, read...)
remaining = remaining[len(read):]
}
}
}
}
// N returns a probe that accepts probe exactly n times.
func N(n int, probe Probe) Probe {
return func(input []rune) ([]rune, bool) {
probes := make([]Probe, n)
for i := 0; i < n; i++ {
probes[i] = probe
}
return And(probes...)(input)
}
}
// AtLeast returns a probe that accepts probe at least n times.
func AtLeast(n int, probe Probe) Probe {
return func(input []rune) ([]rune, bool) {
probes := make([]Probe, n, n+1)
for i := range probes {
probes[i] = probe
}
probes = append(probes, Any(probe))
return And(probes...)(input)
}
}
-58
View File
@@ -1,58 +0,0 @@
package runes
import (
"testing"
)
func TestProbe(t *testing.T) {
type TestCase struct {
probe Probe
input string
}
tests := []TestCase{
{Rune('a'), "a"},
{Space(), " "},
{Space(), "\t"},
{Space(), "\n"},
{And(Rune('1'), Rune('2'), Space()), "12 "},
{Or(Rune('r'), Space(), Rune('x')), "r"},
{Or(Rune('r'), Space(), Rune('x')), " "},
{Or(Rune('r'), Space(), Rune('x')), "x"},
{Any(Rune('w')), ""},
{Any(Rune('w')), "w"},
{Any(Rune('w')), "ww"},
{Any(Rune('w')), "www"},
{N(6, Rune('w')), "wwwwww"},
{Maybe(Rune('w')), ""},
{Maybe(Rune('w')), "w"},
}
for _, test := range tests {
if read, ok := test.probe([]rune(test.input)); !ok {
t.Errorf("Expected to read %s", string(test.input))
} else if string(read) != test.input {
t.Errorf("Mismatch of input %s and read %s", test.input, string(read))
}
}
}
func TestProbeFail(t *testing.T) {
type TestCase struct {
probe Probe
input string
}
tests := []TestCase{
{Rune('a'), "b"},
{Space(), "a"},
{And(Rune('1'), Rune('2'), Space()), "12"},
{Or(Rune('r'), Space(), Rune('x')), "4"},
}
for _, test := range tests {
if read, ok := test.probe([]rune(test.input)); ok {
t.Errorf("Unexpectedly read %s with input %s", string(read), test.input)
}
}
}
-5
View File
@@ -1,5 +0,0 @@
/*
Package scanner implements the low level functionality
of AsciiGoat lexers
*/
package scanner
-99
View File
@@ -1,99 +0,0 @@
package scanner
import (
"unicode/utf8"
)
const (
// EOF is a dummy rune representing End-Of-File
EOF = -1
)
// A Position in the input string and in the line-based document
type Position struct {
Offset uint
Line, Column uint
}
// An Scanner represent the low level layer for text parsers
type Scanner struct {
name string
input string
base Position
cursor Position
runes uint
}
// NewScannerFromString instantiates a new Scanner to
// parse a given string
func NewScannerFromString(name, input string) *Scanner {
return &Scanner{
name: name,
input: input,
base: Position{0, 1, 1},
cursor: Position{0, 1, 1},
runes: 0,
}
}
// Length returns the number of bytes and runes in the Terminal that is been detected
func (l *Scanner) Length() (uint, uint) {
return l.cursor.Offset - l.base.Offset, l.runes
}
// Empty tells if there are no runes accounted for the next Terminal yet
func (l *Scanner) Empty() bool {
return l.runes == 0
}
// StepForth moves the cursor forward
func (l *Scanner) StepForth(runes, bytes uint) {
l.cursor.Offset += bytes
l.cursor.Column += runes
l.runes += runes
}
// StepBack moves the cursor backward
func (l *Scanner) StepBack(runes, bytes uint) {
l.cursor.Offset -= bytes
// FIXME: what if column goes < 1?
l.cursor.Column -= runes
l.runes -= runes
}
// Reset moves the cursor back to the base
func (l *Scanner) Reset() {
l.cursor = l.base
l.runes = 0
}
// Skip trashes everything up to the cursor
func (l *Scanner) Skip() {
l.base = l.cursor
l.runes = 0
}
// NewLine accounts a line break in the position of the cursor
func (l *Scanner) NewLine() {
l.cursor.Line++
l.cursor.Column = 1
}
// Peek returns the next rune but not moving the cursor
func (l *Scanner) Peek() (rune, uint) {
if l.cursor.Offset == uint(len(l.input)) {
return EOF, 0
}
r, bytes := utf8.DecodeRuneInString(l.input[l.cursor.Offset:])
return r, uint(bytes)
}
// Next returns the next rune but moving the cursor
func (l *Scanner) Next() (rune, uint) {
r, bytes := l.Peek()
if bytes > 0 {
l.StepForth(1, bytes)
}
return r, bytes
}
-43
View File
@@ -1,43 +0,0 @@
package scanner
import (
"unicode/utf8"
)
// A Terminal represents literal element within a document
type Terminal struct {
val string
bytes, runes uint
line, col uint
}
// NewTerminalFull returns a new Terminal instance
func NewTerminalFull(val string, bytes, runes, line, col uint) *Terminal {
return &Terminal{
val: val,
bytes: bytes,
runes: runes,
line: line,
col: col,
}
}
// NewTerminal creates a Terminal instance without knowing it's length
func NewTerminal(val string, line, col uint) *Terminal {
bytes := uint(len(val))
runes := uint(utf8.RuneCountInString(val))
return NewTerminalFull(val, bytes, runes, line, col)
}
// Position retuns the position (line and column)
// of the Terminal in the source document
func (t *Terminal) Position() (uint, uint) {
return t.line, t.col
}
// Value returns the string corresponding to
// this Terminal and it's size in bytes and runes
func (t *Terminal) Value() (string, uint, uint) {
return t.val, t.bytes, t.runes
}