39 Commits

Author SHA1 Message Date
amery be53431904 lexer: to simplify states, Lexer.EmitError() assumes EOF if nil is passed
Signed-off-by: Alejandro Mery <amery@jpi.io>
2021-07-04 03:02:43 +01:00
amery 3edf777c68 lexer: add Lexer.AtLeast() to gather input data from the Feeder
Signed-off-by: Alejandro Mery <amery@jpi.io>
2021-07-04 00:40:22 +01:00
amery 36427e059f lexer: add initial generic Lexer
Signed-off-by: Alejandro Mery <amery@jpi.io>
2021-07-03 20:36:55 +01:00
amery 90e9fc47cf lexer: add Step()/NewLine()/Reset() methods to TokenPosition
Signed-off-by: Alejandro Mery <amery@jpi.io>
2021-07-03 19:59:25 +01:00
amery 6e05cdbb28 lexer: add generic Token
Signed-off-by: Alejandro Mery <amery@jpi.io>
2021-07-03 18:02:49 +01:00
amery 866fb8374b runes.Feeder: renamed Buffered() to Runes() and Len() to Buffered() for consistency with bufio
Signed-off-by: Alejandro Mery <amery@jpi.io>
2021-06-29 20:48:31 +01:00
amery 7828f8d92f envexp: drop package in favour of asciigoat.org/parsers/shexp
Signed-off-by: Alejandro Mery <amery@jpi.io>
2021-06-29 16:57:24 +01:00
Nagy Károly Gábriel 65f2605a8a fix NewFeeder function help
This will fix the Go Help for function NewFeeder
2021-06-29 13:49:30 +01:00
amery 23f53c4da7 runes: rework NewFeeder() to prevent double wrapping
Signed-off-by: Alejandro Mery <amery@jpi.io>
2021-06-29 12:30:46 +01:00
amery 576937268b runes: introduce Feeder
Signed-off-by: Alejandro Mery <amery@jpi.io>
2021-06-29 11:06:48 +01:00
amery 2271848acf attic/ebnf: go fmt
Signed-off-by: Alejandro Mery <amery@jpi.io>
2021-06-29 03:53:38 +01:00
amery 89ecdcd103 COPYING: add MIT licence text
Signed-off-by: Alejandro Mery <amery@jpi.io>
2021-06-29 03:53:38 +01:00
amery 17208cdc64 attic: ebnf/ moved to attic/ebnf
Signed-off-by: Alejandro Mery <amery@jpi.io>
2021-06-29 03:53:38 +01:00
amery e18e66860d runes: imported github.com/JamesOwenHall/json2.Scanner as Probe
Signed-off-by: Alejandro Mery <amery@jpi.io>
2021-06-29 03:53:34 +01:00
amery f88c3f9b0c envexp: bind Reader and Expander
Signed-off-by: Alejandro Mery <amery@jpi.io>
2021-06-26 20:41:44 +01:00
amery 3e2356a3f6 envexp: turn Expander from interface to struct
Signed-off-by: Alejandro Mery <amery@jpi.io>
2021-06-26 20:34:39 +01:00
amery 607fdb6ee4 envexp: add top-level Expand() using Getenv
Signed-off-by: Alejandro Mery <amery@jpi.io>
2021-06-26 18:59:50 +01:00
amery 2ba6afae4b envexp: add placeholder for Expander
Signed-off-by: Alejandro Mery <amery@jpi.io>
2021-06-26 18:59:24 +01:00
amery aa7bc06646 envexp: rename env package to envexp
Signed-off-by: Alejandro Mery <amery@jpi.io>
2021-06-26 18:52:42 +01:00
amery 9f4f801066 env: extend Reader{} to implement io.Closer
Signed-off-by: Alejandro Mery <amery@jpi.io>
2021-06-24 22:16:40 +01:00
amery 26366f82bd env: add skeleton of io.Reader wrapper
Signed-off-by: Alejandro Mery <amery@jpi.io>
2021-06-24 22:14:54 +01:00
amery d289643458 build-sys: replace gofmt.sh with make fmt
Signed-off-by: Alejandro Mery <amery@jpi.io>
2021-06-24 21:28:28 +01:00
amery 76e566b92e go.mod: add initial go.mod
Signed-off-by: Alejandro Mery <amery@jpi.io>
2021-06-24 21:27:42 +01:00
amery 33dbfec54a ebnf/token: Add initial TokenType 2014-10-30 00:53:02 +01:00
amery 2797253a96 scanner: adjust comments to make golint happy
Signed-off-by: Alejandro Mery <amery@geeks.cl>
2014-10-26 22:31:22 +01:00
amery 71599c9adb scanner: change NewScannerFromString() to accept empty strings
Signed-off-by: Alejandro Mery <amery@geeks.cl>
2014-10-26 00:27:51 +02:00
amery 538ebfe37b scanner.Scanner: rename NextLine() to NewLine()
Signed-off-by: Alejandro Mery <amery@geeks.cl>
2014-10-25 23:17:03 +02:00
amery 42a75bf4d9 ebnf: add some doc
Signed-off-by: Alejandro Mery <amery@geeks.cl>
2014-10-25 12:12:32 +02:00
amery 100d6d5cec scanner: add initial dummy doc
Signed-off-by: Alejandro Mery <amery@geeks.cl>
2014-10-25 08:49:03 +02:00
amery f9405e7fe1 scanner.Scanner: Add .NewLine() helper to increment line of the cursor
Signed-off-by: Alejandro Mery <amery@geeks.cl>
2014-10-24 07:39:08 +02:00
amery 7a4713a353 scanner.Scanner: add StepBack(), Reset() and Skip() methods
Signed-off-by: Alejandro Mery <amery@geeks.cl>
2014-10-24 07:34:34 +02:00
amery d4dbc28aee scanner.Scanner: initial struct and methods for the low level text scanner
Signed-off-by: Alejandro Mery <amery@geeks.cl>
2014-10-24 06:40:39 +02:00
amery ca274e51a3 scanner.Terminal: A literal (utf8) string within a document
Signed-off-by: Alejandro Mery <amery@geeks.cl>
2014-10-24 06:40:39 +02:00
amery e55382c583 Import gofmt.sh helper from asciigoat.org/ini
Signed-off-by: Alejandro Mery <amery@geeks.cl>
2014-10-24 06:40:38 +02:00
amery 2991b67b39 ebnf: replace ebnf.ebnf with grammar from the iso 14977
Signed-off-by: Alejandro Mery <amery@geeks.cl>
2014-10-15 01:38:09 +02:00
amery 494855f0d1 ebnf:doc: add final draft of iso 14977
Signed-off-by: Alejandro Mery <amery@geeks.cl>
2014-10-14 22:59:16 +02:00
amery b402063aae ebnf:doc: add syntax file for vim
Signed-off-by: Alejandro Mery <amery@geeks.cl>
2014-10-14 22:59:11 +02:00
amery 6487b2a49d ebnf: add empty package, and ebnf grammar copied from wikipedia
Signed-off-by: Alejandro Mery <amery@geeks.cl>
2014-10-14 22:57:22 +02:00
amery 5a096152a2 Create asciigoat.org/core package
Signed-off-by: Alejandro Mery <amery@geeks.cl>
2014-06-25 09:04:55 +00:00
27 changed files with 1085 additions and 304 deletions
-13
View File
@@ -1,13 +0,0 @@
# http://editorconfig.org
root = true
[*]
charset = utf-8
end_of_line = lf
insert_final_newline = true
trim_trailing_whitespace = true
[*.go]
indent_style = tab
indent_size = 4
View File
+1 -1
View File
@@ -1,4 +1,4 @@
Copyright 2023 JPI Technologies Ltd <oss@jpi.io>
Copyright 2021 JPI Technologies Ltd <oss@jpi.io>
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
+15
View File
@@ -0,0 +1,15 @@
.PHONY: all fmt build test
GO ?= go
all: fmt build
fmt:
$(GO) fmt ./...
$(GO) mod tidy || true
build:
$(GO) get -v ./...
test:
$(GO) test -v ./...
+4
View File
@@ -0,0 +1,4 @@
asciigoat.org/core
==================
helpers and general structs used by asciigoat parsers and generators
-1
View File
@@ -1 +0,0 @@
# asciigoat's core library
+37
View File
@@ -0,0 +1,37 @@
/*
Package ebmf implements an ISO/IEC 14977
Extended BackusNaur Form parser, verifiers,
and additional related helpers for AsciiGoat
A syntax highlighter for vim and a copy of the final draft of the standard
are included in the doc/ directory. The official standard can be downloaded from
http://standards.iso.org/ittf/PubliclyAvailableStandards/s026153_ISO_IEC_14977_1996(E).zip
An uberly simplified version of the EBNF grammar looks like:
letter = "A" | "B" | "C" | "D" | "E" | "F" | "G"
| "H" | "I" | "J" | "K" | "L" | "M" | "N"
| "O" | "P" | "Q" | "R" | "S" | "T" | "U"
| "V" | "W" | "X" | "Y" | "Z" ;
digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ;
symbol = "[" | "]" | "{" | "}" | "(" | ")" | "<" | ">"
| "'" | '"' | "=" | "|" | "." | "," | ";" ;
character = letter | digit | symbol | "_" ;
identifier = letter , { letter | digit | "_" } ;
terminal = "'" , character , { character } , "'"
| '"' , character , { character } , '"' ;
lhs = identifier ;
rhs = identifier
| terminal
| "[" , rhs , "]"
| "{" , rhs , "}"
| "(" , rhs , ")"
| rhs , "|" , rhs
| rhs , "," , rhs ;
rule = lhs , "=" , rhs , ";" ;
grammar = { rule } ;
*/
package ebnf
+36
View File
@@ -0,0 +1,36 @@
" Vim syntax file
" Language: EBNF
" Maintainer: Hans Fugal
" Last Change: $Date: 2003/01/28 14:42:09 $
" Version: $Id: ebnf.vim,v 1.1 2003/01/28 14:42:09 fugalh Exp $
" With thanks to Michael Brailsford for the BNF syntax file.
" Quit when a syntax file was already loaded
if version < 600
syntax clear
elseif exists("b:current_syntax")
finish
endif
syn match ebnfMetaIdentifier /[A-Za-z]/ skipwhite skipempty nextgroup=ebnfSeperator
syn match ebnfSeperator "=" contained nextgroup=ebnfProduction skipwhite skipempty
syn region ebnfProduction start=/\zs[^\.;]/ end=/[\.;]/me=e-1 contained contains=ebnfSpecial,ebnfDelimiter,ebnfTerminal,ebnfSpecialSequence,ebnfComment nextgroup=ebnfEndProduction skipwhite skipempty
syn match ebnfDelimiter #[,(|)\]}\[{/!]\|\(\*)\)\|\((\*\)\|\(/)\)\|\(:)\)\|\((/\)\|\((:\)# contained
syn match ebnfSpecial /[\-\*]/ contained
syn region ebnfSpecialSequence matchgroup=Delimiter start=/?/ end=/?/ contained
syn match ebnfEndProduction /[\.;]/ contained
syn region ebnfTerminal matchgroup=delimiter start=/"/ end=/"/ contained
syn region ebnfTerminal matchgroup=delimiter start=/'/ end=/'/ contained
syn region ebnfComment start="(\*" end="\*)"
hi link ebnfComment Comment
hi link ebnfMetaIdentifier Identifier
hi link ebnfSeperator ebnfSpecial
hi link ebnfEndProduction ebnfDelimiter
hi link ebnfDelimiter Delimiter
hi link ebnfSpecial Special
hi link ebnfSpecialSequence Statement
hi link ebnfTerminal Constant
Binary file not shown.
+230
View File
@@ -0,0 +1,230 @@
(* vim: set ft=ebnf: *)
(*
The syntax of Extended BNF can be defined using
itself. There are four parts in this example,
the first part names the characters, the second
part defines the removal of unnecessary non-
printing characters, the third part defines the
removal of textual comments, and the final part
defines the structure of Extended BNF itself.
Each syntax rule in this example starts with a
comment that identifies the corresponding clause
in the standard.
The meaning of special-sequences is not defined
in the standard. In this example (see the
reference to 7.6) they represent control
functions defined by ISO/IEC 6429:1992.
Another special-sequence defines a
syntactic-exception (see the reference to 4.7).
*)
(*
The first part of the lexical syntax defines the
characters in the 7-bit character set (ISO/IEC
646:1991) that represent each terminal-character
and gap-separator in Extended BNF.
*)
(* see 7.2 *) letter
= 'a' | 'b' | 'c' | 'd' | 'e' | 'f' | 'g' | 'h'
| 'i' | 'j' | 'k' | 'l' | 'm' | 'n' | 'o' | 'p'
| 'q' | 'r' | 's' | 't' | 'u' | 'v' | 'w' | 'x'
| 'y' | 'z'
| 'A' | 'B' | 'C' | 'D' | 'E' | 'F' | 'G' | 'H'
| 'I' | 'J' | 'K' | 'L' | 'M' | 'N' | 'O' | 'P'
| 'Q' | 'R' | 'S' | 'T' | 'U' | 'V' | 'W' | 'X'
| 'Y' | 'Z';
(* see 7.2 *) decimal digit
= '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7'
| '8' | '9';
(*
The representation of the following
terminal-characters is defined in clauses 7.3,
7.4 and tables 1, 2.
*)
concatenate symbol = ',';
defining symbol = '=';
definition separator symbol = '|' | '/' | '!';
end comment symbol = '*)';
end group symbol = ')';
end option symbol = ']' | '/)';
end repeat symbol = '}' | ':)';
except symbol = '-';
first quote symbol = "'";
repetition symbol = '*';
second quote symbol = '"';
special sequence symbol = '?';
start comment symbol = '(*';
start group symbol = '(';
start option symbol = '[' | '(/';
start repeat symbol = '{' | '(:';
terminator symbol = ';' | '.';
(* see 7.5 *) other character
= ' ' | ':' | '+' | '_' | '%' | 'Q'
| '&' | '#' | '$' | '<' | '>' | '\'
| 'ˆ' | '' | ' ̃';
(* see 7.6 *) space character = ' ';
horizontal tabulation character
= ? ISO 6429 character Horizontal Tabulation ? ;
new line
= { ? ISO 6429 character Carriage Return ? },
? ISO 6429 character Line Feed ?,
{ ? ISO 6429 character Carriage Return ? };
vertical tabulation character
= ? ISO 6429 character Vertical Tabulation ? ;
form feed
= ? ISO 6429 character Form Feed ? ;
(*
The second part of the syntax defines the
removal of unnecessary non-printing characters
from a syntax.
*)
(* see 6.2 *) terminal character
= letter
| decimal digit
| concatenate symbol
| defining symbol
| definition separator symbol
| end comment symbol
| end group symbol
| end option symbol
| end repeat symbol
| except symbol
| first quote symbol
| repetition symbol
| second quote symbol
| special sequence symbol
| start comment symbol
| start group symbol
| start option symbol
| start repeat symbol
| terminator symbol
| other character;
(* see 6.3 *) gap free symbol
= terminal character
- (first quote symbol | second quote symbol)
| terminal string;
(* see 4.16 *) terminal string
= first quote symbol, first terminal character,
{first terminal character},
first quote symbol
| second quote symbol, second terminal character,
{second terminal character},
second quote symbol;
(* see 4.17 *) first terminal character
= terminal character - first quote symbol;
(* see 4.18 *) second terminal character
= terminal character - second quote symbol;
(* see 6.4 *) gap separator
= space character
| horizontal tabulation character
| new line
| vertical tabulation character
| form feed;
(* see 6.5 *) syntax
= {gap separator},
gap free symbol, {gap separator},
{gap free symbol, {gap separator}};
(*
The third part of the syntax defines the
removal of bracketed-textual-comments from
gap-free-symbols that form a syntax.
*)
(* see 6.6 *) commentless symbol
= terminal character
- (letter
| decimal digit
| first quote symbol
| second quote symbol
| start comment symbol
| end comment symbol
| special sequence symbol
| other character)
| meta identifier
| integer
| terminal string
| special sequence;
(* see 4.9 *) integer
= decimal digit, {decimal digit};
(* see 4.14 *) meta identifier
= letter, {meta identifier character};
(* see 4.15 *) meta identifier character
= letter
| decimal digit;
(* see 4.19 *) special sequence
= special sequence symbol,
{special sequence character},
special sequence symbol;
(* see 4.20 *) special sequence character
= terminal character - special sequence symbol;
(* see 6.7 *) comment symbol
= bracketed textual comment
| other character
| commentless symbol;
(* see 6.8 *) bracketed textual comment
= start comment symbol, {comment symbol},
end comment symbol;
(* see 6.9 *) syntax
= {bracketed textual comment},
commentless symbol,
{bracketed textual comment},
{commentless symbol,
{bracketed textual comment}};
(*
The final part of the syntax defines the
abstract syntax of Extended BNF, i.e. the
structure in terms of the commentless symbols.
*)
(* see 4.2 *) syntax
= syntax rule, {syntax rule};
(* see 4.3 *) syntax rule
= meta identifier, defining symbol,
definitions list, terminator symbol;
(* see 4.4 *) definitions list
= single definition,
{definition separator symbol,
single definition};
(* see 4.5 *) single definition
= syntactic term,
{concatenate symbol, syntactic term};
(* see 4.6 *) syntactic term
= syntactic factor,
[except symbol, syntactic exception];
(* see 4.7 *) syntactic exception
= ? a syntactic-factor that could be replaced
by a syntactic-factor containing no
meta-identifiers
? ;
(* see 4.8 *) syntactic factor
= [integer, repetition symbol],
syntactic primary;
(* see 4.10 *) syntactic primary
= optional sequence
| repeated sequence
| grouped sequence
| meta identifier
| terminal string
| special sequence
| empty sequence;
(* see 4.11 *) optional sequence
= start option symbol, definitions list,
end option symbol;
(* see 4.12 *) repeated sequence
= start repeat symbol, definitions list,
end repeat symbol;
(* see 4.13 *) grouped sequence
= start group symbol, definitions list,
end group symbol;
(* see 4.21 *) empty sequence
= ;
+1
View File
@@ -0,0 +1 @@
package ebnf
+20
View File
@@ -0,0 +1,20 @@
package token
// types of Token
type TokenType int
const (
TokenError TokenType = iota + 1
TokenEOF
)
func (typ TokenType) String() string {
switch typ {
case TokenError:
return "ERROR"
case TokenEOF:
return "EOF"
default:
return "UNDEFINED"
}
}
+25
View File
@@ -0,0 +1,25 @@
package token
import (
"fmt"
"testing"
)
func TestTokenTypeToString(t *testing.T) {
var foo TokenType
for _, o := range []struct {
typ TokenType
str string
}{
{foo, "UNDEFINED"},
{TokenError, "ERROR"},
{TokenEOF, "EOF"},
{1234, "UNDEFINED"},
} {
str := fmt.Sprintf("%s", o.typ)
if str != o.str {
t.Errorf("TokenType:%v stringified as %s instead of %s.", int(o.typ), str, o.str)
}
}
}
+1
View File
@@ -0,0 +1 @@
package core
-2
View File
@@ -1,2 +0,0 @@
// Package core provides the foundations of asciigoat packages
package core
+1 -1
View File
@@ -1,3 +1,3 @@
module asciigoat.org/core
go 1.19
go 1.16
View File
+125 -1
View File
@@ -1,2 +1,126 @@
// Package lexer provides basic helpers to implement parsers
package lexer
import (
"errors"
"fmt"
"asciigoat.org/core/runes"
)
// state function
type StateFn func(Lexer) StateFn
type Lexer interface {
Run() // run state machine
Position() TokenPosition // base for the next token
Tokens() <-chan Token // tokens output
AtLeast(n int) ([]rune, error)
NewLine()
Step(n int)
Emit(TokenType)
EmitError(error)
EmitErrorf(string, ...interface{})
EmitSyntaxError(string, ...interface{})
}
type lexer struct {
start StateFn // initial state
in *runes.Feeder // runes source
pos TokenPosition // base for the next token
cursor int // look ahead pointer
tokens chan Token // tokens output
}
func NewLexer(start StateFn, in *runes.Feeder, tokens int) Lexer {
return &lexer{
start: start,
in: in,
pos: TokenPosition{1, 1},
tokens: make(chan Token, tokens),
}
}
func (lex *lexer) Run() {
defer close(lex.tokens)
for state := lex.start; state != nil; {
state = state(lex)
}
}
func (lex *lexer) AtLeast(n int) ([]rune, error) {
min := lex.cursor
if n > 0 {
min += n
}
s, err := lex.in.AtLeast(min)
if len(s) > lex.cursor {
s = s[lex.cursor:]
} else {
s = nil
}
return s, err
}
func (lex *lexer) Position() TokenPosition {
return lex.pos
}
func (lex *lexer) Step(n int) {
lex.cursor += n
}
func (lex *lexer) NewLine() {
lex.pos.NewLine()
}
func (lex *lexer) Tokens() <-chan Token {
return lex.tokens
}
func (lex *lexer) Emit(typ TokenType) {
var text []rune
pos := lex.pos
// extract text to emit, and update cursor for the next
if n := lex.cursor; n > 0 {
text = lex.in.Runes()[:n]
lex.in.Skip(n)
lex.pos.Step(n)
lex.cursor = 0
}
lex.tokens <- NewToken(typ, text, pos)
}
func (lex *lexer) EmitError(err error) {
// if no error is passed, assume they mean EOF
if err == nil {
err = EOF
}
lex.tokens <- NewErrorToken(err, lex.pos)
}
func (lex *lexer) EmitErrorf(s string, args ...interface{}) {
if len(args) > 0 {
s = fmt.Sprintf(s, args...)
}
lex.tokens <- NewErrorToken(errors.New(s), lex.pos)
}
func (lex *lexer) EmitSyntaxError(s string, args ...interface{}) {
if len(args) > 0 {
s = fmt.Sprintf(s, args...)
}
lex.tokens <- NewSyntaxErrorToken(s, lex.pos, lex.cursor, lex.in.Runes())
}
-221
View File
@@ -1,221 +0,0 @@
package lexer
import (
"bytes"
"errors"
"io"
"strings"
"unicode/utf8"
)
const (
// ReadBufferSize indicates the initial buffer size
ReadBufferSize = 1 << 7 // 128B
// DoublingBufferSizeLimit indicates when we stop doubling
// and just add instead
DoublingBufferSizeLimit = 1 << 17 // 128KiB
)
// implemented interfaces
var (
_ io.RuneReader = (*Reader)(nil)
_ io.RuneScanner = (*Reader)(nil)
)
var (
// ErrInvalidUnreadRune indicates UnreadRune() was calls after an
// action other than a successful ReadRune()
ErrInvalidUnreadRune = errors.New("invalid UnreadRune() call")
)
// Reader is a RuneReader aimed at implementing text parsers
type Reader struct {
src io.Reader
buf []byte
off int
cursor int
lastRuneSize int
}
// String returns what's already Read but not yet emitted or discarded
func (b *Reader) String() string {
return string(b.buf[b.off:b.cursor])
}
// Emit returns what's already being Read and discards it afterwards
func (b *Reader) Emit() string {
s := b.String()
b.Discard()
return s
}
// Discard removes from the buffer everything that has been Read
func (b *Reader) Discard() {
switch {
case b.ready() == 0:
// reset
b.buf = b.buf[:0]
b.cursor = 0
b.off = 0
default:
// step
b.off = b.cursor
}
// and prevent UnreadRune()
b.lastRuneSize = -1
}
// ready tells how many bytes are ready to decode
func (b *Reader) ready() int {
return len(b.buf) - b.cursor
}
// available tells how many free bytes remain at the end of the buffer
func (b *Reader) available() int {
return cap(b.buf) - len(b.buf)
}
func (b *Reader) needsBytes(n int) error {
for {
if b.ready() >= n {
// ready
return nil
}
// make room
b.prepareBuffer(n - b.ready())
// and read more
_, err := b.fill()
if err != nil {
return err
}
}
}
func (b *Reader) rebuffer(size int) {
var src, dst []byte
if size > cap(b.buf) {
// new buffer
dst = make([]byte, size)
} else {
// same buffer
dst = b.buf
}
src = b.buf[b.off:]
dst = dst[:len(src)]
copy(dst, src)
b.cursor -= b.off
b.buf = dst
b.off = 0
}
func (b *Reader) prepareBuffer(n int) {
if n > b.available() {
needed := len(b.buf) + n - b.off
size := cap(b.buf)
for size < needed {
switch {
case size < DoublingBufferSizeLimit:
size *= 2
default:
size += DoublingBufferSizeLimit
}
}
b.rebuffer(size)
}
}
func (b *Reader) fill() (int, error) {
start := len(b.buf)
n, err := b.src.Read(b.buf[start:cap(b.buf)])
if n > 0 {
b.buf = b.buf[:start+n]
}
return n, err
}
// ReadRune reads the next rune
func (b *Reader) ReadRune() (rune, int, error) {
// we need at least one byte to start
count := 1
for {
err := b.needsBytes(count)
if err != nil {
b.lastRuneSize = -1
return 0, 0, err
}
if utf8.FullRune(b.buf[b.cursor:]) {
// we have a full rune
break
}
// more
count = b.ready() + 1
}
// decode rune
r, l := utf8.DecodeRune(b.buf[b.cursor:])
// step over
b.cursor += l
// and remember for UnreadRune()
b.lastRuneSize = l
return r, l, nil
}
// UnreadRune moves the cursor where it was before the last call to ReadRune
func (b *Reader) UnreadRune() error {
if b.lastRuneSize > 0 {
b.cursor -= b.lastRuneSize
b.lastRuneSize = -1
return nil
}
return ErrInvalidUnreadRune
}
// PeekRune returns information about the next rune without moving the
// cursor
func (b *Reader) PeekRune() (rune, int, error) {
r, l, err := b.ReadRune()
if err != nil {
return r, l, err
}
err = b.UnreadRune()
return r, l, err
}
// NewReader creates a new runes [Reader] using the given [io.Reader]
func NewReader(r io.Reader) *Reader {
if r == nil {
return nil
}
return &Reader{
src: r,
buf: make([]byte, 0, ReadBufferSize),
}
}
// NewReaderBytes creates a new runes [Reader] using the given bytes
func NewReaderBytes(b []byte) *Reader {
return NewReader(bytes.NewReader(b))
}
// NewReaderString creates a new runes [Reader] using the given string
func NewReaderString(s string) *Reader {
return NewReader(strings.NewReader(s))
}
+125
View File
@@ -0,0 +1,125 @@
package lexer
import (
"errors"
"fmt"
"io"
)
var (
EOF = io.EOF // EOF marker
)
// Token type
type TokenType int
const (
TokenError TokenType = iota
)
// Token Position
type TokenPosition struct {
Line int
Row int
}
func (pos *TokenPosition) Reset() {
pos.Line = 1
pos.Row = 1
}
func (pos *TokenPosition) Step(n int) {
pos.Row += n
}
func (pos *TokenPosition) NewLine() {
pos.Line += 1
pos.Row = 1
}
// Token
type Token interface {
Type() TokenType
String() string
Position() TokenPosition
}
type token struct {
typ TokenType
pos TokenPosition
val string
}
func NewToken(typ TokenType, val []rune, pos TokenPosition) Token {
return &token{
typ: typ,
val: string(val),
pos: pos,
}
}
func (t token) Type() TokenType {
return t.typ
}
func (t token) Position() TokenPosition {
return t.pos
}
func (t token) String() string {
return t.val
}
// ErrorToken
type ErrorToken interface {
Token
Error() string
Unwrap() error
}
type errorToken struct {
token
err error
}
func NewErrorToken(err error, pos TokenPosition) ErrorToken {
return &errorToken{
token: token{
typ: TokenError,
val: err.Error(),
pos: pos,
},
err: err,
}
}
func (t errorToken) Error() string {
return t.err.Error()
}
func (t errorToken) Unwrap() error {
return t.err
}
// SyntaxErrorToken
type SyntaxErrorToken struct {
ErrorToken
Cursor int
Buffer string
}
func NewSyntaxErrorToken(msg string, pos TokenPosition, cur int, buffer []rune) *SyntaxErrorToken {
s := fmt.Sprintf("Syntax Error at %v.%v+%v", pos.Line, pos.Row, cur)
if len(msg) > 0 {
s = fmt.Sprintf("%s: %s", s, msg)
}
return &SyntaxErrorToken{
ErrorToken: NewErrorToken(errors.New(s), pos),
Cursor: cur,
Buffer: string(buffer),
}
}
-64
View File
@@ -1,64 +0,0 @@
package core
import (
"bytes"
"io"
"io/fs"
"strings"
)
// ReadCloser adds a Close() to Readers without one
type ReadCloser struct {
r io.Reader
}
// Read passes the Read() call to the underlying [io.Reader]
// and fail if it was Closed()
func (rc *ReadCloser) Read(b []byte) (int, error) {
switch {
case rc.r != nil:
return rc.r.Read(b)
default:
return 0, fs.ErrClosed
}
}
// Close attempts to Close the underlying [io.Reader], or
// remove it if it doesn't support Close() and fail
// if closed twice
func (rc *ReadCloser) Close() error {
switch {
case rc.r != nil:
rc.r = nil
return nil
default:
return fs.ErrClosed
}
}
// NewReadCloser wraps a [io.Reader] to satisfy
// [io.ReadCloser] if needed
func NewReadCloser(r io.Reader) io.ReadCloser {
switch p := r.(type) {
case io.ReadCloser:
return p
case nil:
return nil
default:
return &ReadCloser{
r: r,
}
}
}
// NewReadCloserBytes wraps a bytes slice to implement
// a [io.ReadCloser]
func NewReadCloserBytes(b []byte) io.ReadCloser {
return NewReadCloser(bytes.NewReader(b))
}
// NewReadCloserString wraps a string to implement
// a [io.ReadCloser]
func NewReadCloserString(s string) io.ReadCloser {
return NewReadCloser(strings.NewReader(s))
}
+135
View File
@@ -0,0 +1,135 @@
package runes
import (
"bufio"
"bytes"
"io"
"strings"
"sync"
)
// feeder is a generic implementation of the output interfaces of Feeder
type Feeder struct {
sync.Mutex
in io.RuneReader
out []rune
sz []int
err error
}
// NewFeederBytes creates a new Feeder using an slice of bytes as input
func NewFeederBytes(b []byte) *Feeder {
return NewFeeder(bytes.NewReader(b))
}
// NewFeederString creates a new Feeder using a string as input
func NewFeederString(s string) *Feeder {
return NewFeeder(strings.NewReader(s))
}
// NewFeeder creates a new Feeder using a Reader as input
func NewFeeder(in io.Reader) *Feeder {
rd, ok := in.(io.RuneReader)
if !ok {
rd = bufio.NewReader(in)
}
return &Feeder{in: rd}
}
// Skip drops n runes from the head of the buffer
func (f *Feeder) Skip(n int) (int, bool) {
f.Lock()
defer f.Unlock()
if l := f.skip(n); l > 0 {
return l, true
} else {
return 0, false
}
}
func (f *Feeder) skip(n int) int {
if l := len(f.out); l > n {
f.out = f.out[n:]
f.sz = f.sz[n:]
return l - n
} else {
f.out = f.out[:0]
f.sz = f.sz[:0]
return 0
}
}
// ReadRune returns the next rune
func (f *Feeder) ReadRune() (r rune, size int, err error) {
f.Lock()
defer f.Unlock()
if f.atLeast(1) {
r = f.out[0]
size = f.sz[0]
f.skip(1)
}
err = f.Err()
return
}
// AtLeast blocks until there are at least n runes on the buffer, or an error or EOF has occurred
func (f *Feeder) AtLeast(n int) (out []rune, err error) {
f.Lock()
defer f.Unlock()
if !f.atLeast(n) {
err = f.err
}
if len(f.out) > 0 {
out = f.out
}
return
}
func (f *Feeder) atLeast(n int) bool {
for len(f.out) < n {
r, size, err := f.in.ReadRune()
if err != nil && f.err == nil {
// store first error
f.err = err
}
if size > 0 {
f.out = append(f.out, r)
f.sz = append(f.sz, size)
} else if f.err != nil {
break
}
}
return len(f.out) >= n
}
// Currently buffered runes
func (f *Feeder) Runes() []rune {
return f.out
}
// Count of currently buffered runes
func (f *Feeder) Buffered() int {
return len(f.out)
}
// Feeder has reached EOF
func (f *Feeder) EOF() bool {
return f.err == io.EOF
}
// Feeder encountered an error
func (f *Feeder) Err() error {
if f.err == io.EOF {
return nil
}
return f.err
}
+124
View File
@@ -0,0 +1,124 @@
package runes
import (
"unicode"
)
// Probe was borrowed from https://github.com/JamesOwenHall/json2.Scanner
//
// Probe is a func that returns a subset of the input and a success bool.
type Probe func([]rune) ([]rune, bool)
// If returns a probe that accepts the a rune if it satisfies the condition.
func If(condition func(rune) bool) Probe {
return func(input []rune) ([]rune, bool) {
if len(input) > 0 && condition(input[0]) {
return input[0:1], true
}
return nil, false
}
}
// Rune returns a probe that accepts r.
func Rune(r rune) Probe {
return If(func(b rune) bool {
return r == b
})
}
// Space returns a probe that accepts whitespace as defined in the unicode
// package.
func Space() Probe {
return func(input []rune) ([]rune, bool) {
if len(input) > 0 && unicode.IsSpace(input[0]) {
return input[0:1], true
}
return nil, false
}
}
// And returns a probe that accepts all probes in sequence.
func And(probes ...Probe) Probe {
return func(input []rune) ([]rune, bool) {
remaining := input
accumulated := []rune{}
for _, s := range probes {
if read, ok := s(remaining); !ok {
return nil, false
} else {
accumulated = append(accumulated, read...)
remaining = remaining[len(read):]
}
}
return accumulated, true
}
}
// Or returns a probe that accepts the first successful probe in probes.
func Or(probes ...Probe) Probe {
return func(input []rune) ([]rune, bool) {
for _, s := range probes {
if read, ok := s(input); ok {
return read, true
}
}
return nil, false
}
}
// Maybe runs probe and returns true regardless of the output.
func Maybe(probe Probe) Probe {
return func(input []rune) ([]rune, bool) {
read, _ := probe(input)
return read, true
}
}
// Any returns a probe that accepts any number of occurrences of probe,
// including zero.
func Any(probe Probe) Probe {
return func(input []rune) ([]rune, bool) {
remaining := input
accumulated := []rune{}
for {
if read, ok := probe(remaining); !ok {
return accumulated, true
} else {
accumulated = append(accumulated, read...)
remaining = remaining[len(read):]
}
}
}
}
// N returns a probe that accepts probe exactly n times.
func N(n int, probe Probe) Probe {
return func(input []rune) ([]rune, bool) {
probes := make([]Probe, n)
for i := 0; i < n; i++ {
probes[i] = probe
}
return And(probes...)(input)
}
}
// AtLeast returns a probe that accepts probe at least n times.
func AtLeast(n int, probe Probe) Probe {
return func(input []rune) ([]rune, bool) {
probes := make([]Probe, n, n+1)
for i := range probes {
probes[i] = probe
}
probes = append(probes, Any(probe))
return And(probes...)(input)
}
}
+58
View File
@@ -0,0 +1,58 @@
package runes
import (
"testing"
)
func TestProbe(t *testing.T) {
type TestCase struct {
probe Probe
input string
}
tests := []TestCase{
{Rune('a'), "a"},
{Space(), " "},
{Space(), "\t"},
{Space(), "\n"},
{And(Rune('1'), Rune('2'), Space()), "12 "},
{Or(Rune('r'), Space(), Rune('x')), "r"},
{Or(Rune('r'), Space(), Rune('x')), " "},
{Or(Rune('r'), Space(), Rune('x')), "x"},
{Any(Rune('w')), ""},
{Any(Rune('w')), "w"},
{Any(Rune('w')), "ww"},
{Any(Rune('w')), "www"},
{N(6, Rune('w')), "wwwwww"},
{Maybe(Rune('w')), ""},
{Maybe(Rune('w')), "w"},
}
for _, test := range tests {
if read, ok := test.probe([]rune(test.input)); !ok {
t.Errorf("Expected to read %s", string(test.input))
} else if string(read) != test.input {
t.Errorf("Mismatch of input %s and read %s", test.input, string(read))
}
}
}
func TestProbeFail(t *testing.T) {
type TestCase struct {
probe Probe
input string
}
tests := []TestCase{
{Rune('a'), "b"},
{Space(), "a"},
{And(Rune('1'), Rune('2'), Space()), "12"},
{Or(Rune('r'), Space(), Rune('x')), "4"},
}
for _, test := range tests {
if read, ok := test.probe([]rune(test.input)); ok {
t.Errorf("Unexpectedly read %s with input %s", string(read), test.input)
}
}
}
+5
View File
@@ -0,0 +1,5 @@
/*
Package scanner implements the low level functionality
of AsciiGoat lexers
*/
package scanner
+99
View File
@@ -0,0 +1,99 @@
package scanner
import (
"unicode/utf8"
)
const (
// EOF is a dummy rune representing End-Of-File
EOF = -1
)
// A Position in the input string and in the line-based document
type Position struct {
Offset uint
Line, Column uint
}
// An Scanner represent the low level layer for text parsers
type Scanner struct {
name string
input string
base Position
cursor Position
runes uint
}
// NewScannerFromString instantiates a new Scanner to
// parse a given string
func NewScannerFromString(name, input string) *Scanner {
return &Scanner{
name: name,
input: input,
base: Position{0, 1, 1},
cursor: Position{0, 1, 1},
runes: 0,
}
}
// Length returns the number of bytes and runes in the Terminal that is been detected
func (l *Scanner) Length() (uint, uint) {
return l.cursor.Offset - l.base.Offset, l.runes
}
// Empty tells if there are no runes accounted for the next Terminal yet
func (l *Scanner) Empty() bool {
return l.runes == 0
}
// StepForth moves the cursor forward
func (l *Scanner) StepForth(runes, bytes uint) {
l.cursor.Offset += bytes
l.cursor.Column += runes
l.runes += runes
}
// StepBack moves the cursor backward
func (l *Scanner) StepBack(runes, bytes uint) {
l.cursor.Offset -= bytes
// FIXME: what if column goes < 1?
l.cursor.Column -= runes
l.runes -= runes
}
// Reset moves the cursor back to the base
func (l *Scanner) Reset() {
l.cursor = l.base
l.runes = 0
}
// Skip trashes everything up to the cursor
func (l *Scanner) Skip() {
l.base = l.cursor
l.runes = 0
}
// NewLine accounts a line break in the position of the cursor
func (l *Scanner) NewLine() {
l.cursor.Line++
l.cursor.Column = 1
}
// Peek returns the next rune but not moving the cursor
func (l *Scanner) Peek() (rune, uint) {
if l.cursor.Offset == uint(len(l.input)) {
return EOF, 0
}
r, bytes := utf8.DecodeRuneInString(l.input[l.cursor.Offset:])
return r, uint(bytes)
}
// Next returns the next rune but moving the cursor
func (l *Scanner) Next() (rune, uint) {
r, bytes := l.Peek()
if bytes > 0 {
l.StepForth(1, bytes)
}
return r, bytes
}
+43
View File
@@ -0,0 +1,43 @@
package scanner
import (
"unicode/utf8"
)
// A Terminal represents literal element within a document
type Terminal struct {
val string
bytes, runes uint
line, col uint
}
// NewTerminalFull returns a new Terminal instance
func NewTerminalFull(val string, bytes, runes, line, col uint) *Terminal {
return &Terminal{
val: val,
bytes: bytes,
runes: runes,
line: line,
col: col,
}
}
// NewTerminal creates a Terminal instance without knowing it's length
func NewTerminal(val string, line, col uint) *Terminal {
bytes := uint(len(val))
runes := uint(utf8.RuneCountInString(val))
return NewTerminalFull(val, bytes, runes, line, col)
}
// Position retuns the position (line and column)
// of the Terminal in the source document
func (t *Terminal) Position() (uint, uint) {
return t.line, t.col
}
// Value returns the string corresponding to
// this Terminal and it's size in bytes and runes
func (t *Terminal) Value() (string, uint, uint) {
return t.val, t.bytes, t.runes
}