lexer: to simplify states, Lexer.EmitError() assumes EOF if nil is passed

Signed-off-by: Alejandro Mery <amery@jpi.io>
lexer: add Lexer.AtLeast() to gather input data from the Feeder
2021-07-04 03:02:43 +01:00 · 2021-07-04 00:40:22 +01:00 · 2021-07-03 20:36:55 +01:00 · 2021-07-03 19:59:25 +01:00 · 2021-07-03 18:02:49 +01:00 · 2021-06-29 20:48:31 +01:00
27 changed files with 1085 additions and 304 deletions
@@ -1,13 +0,0 @@
-# http://editorconfig.org
-
-root = true
-
-[*]
-charset = utf-8
-end_of_line = lf
-insert_final_newline = true
-trim_trailing_whitespace = true
-
-[*.go]
-indent_style = tab
-indent_size = 4
@@ -1,4 +1,4 @@
-Copyright 2023 JPI Technologies Ltd <oss@jpi.io>
+Copyright 2021 JPI Technologies Ltd <oss@jpi.io>

 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
@@ -0,0 +1,15 @@
+.PHONY: all fmt build test
+
+GO ?= go
+
+all: fmt build
+
+fmt:
+	$(GO) fmt ./...
+	$(GO) mod tidy || true
+
+build:
+	$(GO) get -v ./...
+
+test:
+	$(GO) test -v ./...
@@ -0,0 +1,4 @@
+asciigoat.org/core
+==================
+
+helpers and general structs used by asciigoat parsers and generators
@@ -1 +0,0 @@
-# asciigoat's core library
@@ -0,0 +1,37 @@
+/*
+Package ebmf implements an ISO/IEC 14977
+Extended Backus–Naur Form parser, verifiers,
+and additional related helpers for AsciiGoat
+
+A syntax highlighter for vim and a copy of the final draft of the standard
+are included in the doc/ directory. The official standard can be downloaded from
+http://standards.iso.org/ittf/PubliclyAvailableStandards/s026153_ISO_IEC_14977_1996(E).zip
+
+An uberly simplified version of the EBNF grammar looks like:
+
+  letter = "A" | "B" | "C" | "D" | "E" | "F" | "G"
+         | "H" | "I" | "J" | "K" | "L" | "M" | "N"
+         | "O" | "P" | "Q" | "R" | "S" | "T" | "U"
+         | "V" | "W" | "X" | "Y" | "Z" ;
+  digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ;
+  symbol = "[" | "]" | "{" | "}" | "(" | ")" | "<" | ">"
+         | "'" | '"' | "=" | "|" | "." | "," | ";" ;
+  character = letter | digit | symbol | "_" ;
+
+  identifier = letter , { letter | digit | "_" } ;
+  terminal = "'" , character , { character } , "'"
+           | '"' , character , { character } , '"' ;
+
+  lhs = identifier ;
+  rhs = identifier
+      | terminal
+      | "[" , rhs , "]"
+      | "{" , rhs , "}"
+      | "(" , rhs , ")"
+      | rhs , "|" , rhs
+      | rhs , "," , rhs ;
+
+  rule = lhs , "=" , rhs , ";" ;
+  grammar = { rule } ;
+*/
+package ebnf
@@ -0,0 +1,36 @@
+" Vim syntax file
+" Language:         EBNF
+" Maintainer:       Hans Fugal
+" Last Change:      $Date: 2003/01/28 14:42:09 $
+" Version:          $Id: ebnf.vim,v 1.1 2003/01/28 14:42:09 fugalh Exp $    
+" With thanks to Michael Brailsford for the BNF syntax file.
+
+" Quit when a syntax file was already loaded
+if version < 600
+  syntax clear
+elseif exists("b:current_syntax")
+  finish
+endif
+
+syn match ebnfMetaIdentifier /[A-Za-z]/ skipwhite skipempty nextgroup=ebnfSeperator
+
+syn match ebnfSeperator "=" contained nextgroup=ebnfProduction skipwhite skipempty
+
+syn region ebnfProduction start=/\zs[^\.;]/ end=/[\.;]/me=e-1 contained contains=ebnfSpecial,ebnfDelimiter,ebnfTerminal,ebnfSpecialSequence,ebnfComment nextgroup=ebnfEndProduction skipwhite skipempty
+syn match ebnfDelimiter #[,(|)\]}\[{/!]\|\(\*)\)\|\((\*\)\|\(/)\)\|\(:)\)\|\((/\)\|\((:\)# contained
+syn match ebnfSpecial /[\-\*]/ contained
+syn region ebnfSpecialSequence matchgroup=Delimiter start=/?/ end=/?/ contained
+syn match ebnfEndProduction /[\.;]/ contained 
+syn region ebnfTerminal matchgroup=delimiter start=/"/ end=/"/ contained
+syn region ebnfTerminal matchgroup=delimiter start=/'/ end=/'/ contained
+syn region ebnfComment start="(\*" end="\*)"
+
+
+hi link ebnfComment Comment
+hi link ebnfMetaIdentifier Identifier
+hi link ebnfSeperator ebnfSpecial
+hi link ebnfEndProduction ebnfDelimiter
+hi link ebnfDelimiter Delimiter
+hi link ebnfSpecial Special
+hi link ebnfSpecialSequence Statement
+hi link ebnfTerminal Constant
@@ -0,0 +1,230 @@
+(* vim: set ft=ebnf: *)
+
+(*
+  The syntax of Extended BNF can be defined using
+  itself. There are four parts in this example,
+  the first part names the characters, the second
+  part defines the removal of unnecessary non-
+  printing characters, the third part defines the
+  removal of textual comments, and the final part
+  defines the structure of Extended BNF itself.
+
+  Each syntax rule in this example starts with a
+  comment that identifies the corresponding clause
+  in the standard.
+
+  The meaning of special-sequences is not defined
+  in the standard. In this example (see the
+  reference to 7.6) they represent control
+  functions defined by ISO/IEC 6429:1992.
+  Another special-sequence defines a
+  syntactic-exception (see the reference to 4.7).
+*)
+
+(*
+  The first part of the lexical syntax defines the
+  characters in the 7-bit character set (ISO/IEC
+  646:1991) that represent each terminal-character
+  and gap-separator in Extended BNF.
+*)
+
+(* see 7.2 *) letter
+= 'a' | 'b' | 'c' | 'd' | 'e' | 'f' | 'g' | 'h'
+| 'i' | 'j' | 'k' | 'l' | 'm' | 'n' | 'o' | 'p'
+| 'q' | 'r' | 's' | 't' | 'u' | 'v' | 'w' | 'x'
+| 'y' | 'z'
+| 'A' | 'B' | 'C' | 'D' | 'E' | 'F' | 'G' | 'H'
+| 'I' | 'J' | 'K' | 'L' | 'M' | 'N' | 'O' | 'P'
+| 'Q' | 'R' | 'S' | 'T' | 'U' | 'V' | 'W' | 'X'
+| 'Y' | 'Z';
+(* see 7.2 *) decimal digit
+= '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7'
+| '8' | '9';
+
+(*
+  The representation of the following
+  terminal-characters is defined in clauses 7.3,
+  7.4 and tables 1, 2.
+*)
+
+concatenate symbol = ',';
+defining symbol = '=';
+definition separator symbol = '|' | '/' | '!';
+end comment symbol = '*)';
+end group symbol = ')';
+end option symbol = ']' | '/)';
+end repeat symbol = '}' | ':)';
+except symbol = '-';
+first quote symbol = "'";
+repetition symbol = '*';
+second quote symbol = '"';
+special sequence symbol = '?';
+start comment symbol = '(*';
+start group symbol = '(';
+start option symbol = '[' | '(/';
+start repeat symbol = '{' | '(:';
+terminator symbol = ';' | '.';
+(* see 7.5 *) other character
+= ' ' | ':' | '+' | '_' | '%' | 'Q'
+| '&' | '#' | '$' | '<' | '>' | '\'
+| 'ˆ' | '‘' | ' ̃';
+(* see 7.6 *) space character = ' ';
+horizontal tabulation character
+= ? ISO 6429 character Horizontal Tabulation ? ;
+new line
+= { ? ISO 6429 character Carriage Return ? },
+? ISO 6429 character Line Feed ?,
+{ ? ISO 6429 character Carriage Return ? };
+vertical tabulation character
+= ? ISO 6429 character Vertical Tabulation ? ;
+form feed
+= ? ISO 6429 character Form Feed ? ;
+
+(*
+  The second part of the syntax defines the
+  removal of unnecessary non-printing characters
+  from a syntax.
+*)
+
+(* see 6.2 *) terminal character
+= letter
+| decimal digit
+| concatenate symbol
+| defining symbol
+| definition separator symbol
+| end comment symbol
+| end group symbol
+| end option symbol
+| end repeat symbol
+| except symbol
+| first quote symbol
+| repetition symbol
+| second quote symbol
+| special sequence symbol
+| start comment symbol
+| start group symbol
+| start option symbol
+| start repeat symbol
+| terminator symbol
+| other character;
+(* see 6.3 *) gap free symbol
+= terminal character
+- (first quote symbol | second quote symbol)
+| terminal string;
+(* see 4.16 *) terminal string
+= first quote symbol, first terminal character,
+{first terminal character},
+first quote symbol
+| second quote symbol, second terminal character,
+{second terminal character},
+second quote symbol;
+(* see 4.17 *) first terminal character
+= terminal character - first quote symbol;
+(* see 4.18 *) second terminal character
+= terminal character - second quote symbol;
+(* see 6.4 *) gap separator
+= space character
+| horizontal tabulation character
+| new line
+| vertical tabulation character
+| form feed;
+(* see 6.5 *) syntax
+= {gap separator},
+gap free symbol, {gap separator},
+{gap free symbol, {gap separator}};
+
+(*
+  The third part of the syntax defines the
+  removal of bracketed-textual-comments from
+  gap-free-symbols that form a syntax.
+*)
+
+(* see 6.6 *) commentless symbol
+= terminal character
+- (letter
+| decimal digit
+| first quote symbol
+| second quote symbol
+| start comment symbol
+| end comment symbol
+| special sequence symbol
+| other character)
+| meta identifier
+| integer
+| terminal string
+| special sequence;
+(* see 4.9 *) integer
+= decimal digit, {decimal digit};
+(* see 4.14 *) meta identifier
+= letter, {meta identifier character};
+(* see 4.15 *) meta identifier character
+= letter
+| decimal digit;
+(* see 4.19 *) special sequence
+= special sequence symbol,
+{special sequence character},
+special sequence symbol;
+(* see 4.20 *) special sequence character
+= terminal character - special sequence symbol;
+(* see 6.7 *) comment symbol
+= bracketed textual comment
+| other character
+| commentless symbol;
+(* see 6.8 *) bracketed textual comment
+= start comment symbol, {comment symbol},
+end comment symbol;
+(* see 6.9 *) syntax
+= {bracketed textual comment},
+commentless symbol,
+{bracketed textual comment},
+{commentless symbol,
+{bracketed textual comment}};
+
+(*
+  The final part of the syntax defines the
+  abstract syntax of Extended BNF, i.e. the
+  structure in terms of the commentless symbols.
+*)
+
+(* see 4.2 *) syntax
+= syntax rule, {syntax rule};
+(* see 4.3 *) syntax rule
+= meta identifier, defining symbol,
+definitions list, terminator symbol;
+(* see 4.4 *) definitions list
+= single definition,
+{definition separator symbol,
+single definition};
+(* see 4.5 *) single definition
+= syntactic term,
+{concatenate symbol, syntactic term};
+(* see 4.6 *) syntactic term
+= syntactic factor,
+[except symbol, syntactic exception];
+(* see 4.7 *) syntactic exception
+= ? a syntactic-factor that could be replaced
+by a syntactic-factor containing no
+meta-identifiers
+? ;
+(* see 4.8 *) syntactic factor
+= [integer, repetition symbol],
+syntactic primary;
+(* see 4.10 *) syntactic primary
+= optional sequence
+| repeated sequence
+| grouped sequence
+| meta identifier
+| terminal string
+| special sequence
+| empty sequence;
+(* see 4.11 *) optional sequence
+= start option symbol, definitions list,
+end option symbol;
+(* see 4.12 *) repeated sequence
+= start repeat symbol, definitions list,
+end repeat symbol;
+(* see 4.13 *) grouped sequence
+= start group symbol, definitions list,
+end group symbol;
+(* see 4.21 *) empty sequence
+= ;
@@ -0,0 +1 @@
+package ebnf
@@ -0,0 +1,20 @@
+package token
+
+// types of Token
+type TokenType int
+
+const (
+	TokenError TokenType = iota + 1
+	TokenEOF
+)
+
+func (typ TokenType) String() string {
+	switch typ {
+	case TokenError:
+		return "ERROR"
+	case TokenEOF:
+		return "EOF"
+	default:
+		return "UNDEFINED"
+	}
+}
@@ -0,0 +1,25 @@
+package token
+
+import (
+	"fmt"
+	"testing"
+)
+
+func TestTokenTypeToString(t *testing.T) {
+	var foo TokenType
+
+	for _, o := range []struct {
+		typ TokenType
+		str string
+	}{
+		{foo, "UNDEFINED"},
+		{TokenError, "ERROR"},
+		{TokenEOF, "EOF"},
+		{1234, "UNDEFINED"},
+	} {
+		str := fmt.Sprintf("%s", o.typ)
+		if str != o.str {
+			t.Errorf("TokenType:%v stringified as %s instead of %s.", int(o.typ), str, o.str)
+		}
+	}
+}
@@ -0,0 +1 @@
+package core
@@ -1,2 +0,0 @@
-// Package core provides the foundations of asciigoat packages
-package core
@@ -1,3 +1,3 @@
 module asciigoat.org/core

-go 1.19
+go 1.16
@@ -1,2 +1,126 @@
-// Package lexer provides basic helpers to implement parsers
 package lexer
+
+import (
+	"errors"
+	"fmt"
+
+	"asciigoat.org/core/runes"
+)
+
+// state function
+type StateFn func(Lexer) StateFn
+
+type Lexer interface {
+	Run() // run state machine
+
+	Position() TokenPosition // base for the next token
+	Tokens() <-chan Token    // tokens output
+
+	AtLeast(n int) ([]rune, error)
+
+	NewLine()
+	Step(n int)
+
+	Emit(TokenType)
+	EmitError(error)
+	EmitErrorf(string, ...interface{})
+	EmitSyntaxError(string, ...interface{})
+}
+
+type lexer struct {
+	start StateFn // initial state
+
+	in     *runes.Feeder // runes source
+	pos    TokenPosition // base for the next token
+	cursor int           // look ahead pointer
+	tokens chan Token    // tokens output
+}
+
+func NewLexer(start StateFn, in *runes.Feeder, tokens int) Lexer {
+	return &lexer{
+		start:  start,
+		in:     in,
+		pos:    TokenPosition{1, 1},
+		tokens: make(chan Token, tokens),
+	}
+}
+
+func (lex *lexer) Run() {
+	defer close(lex.tokens)
+
+	for state := lex.start; state != nil; {
+		state = state(lex)
+	}
+}
+
+func (lex *lexer) AtLeast(n int) ([]rune, error) {
+	min := lex.cursor
+	if n > 0 {
+		min += n
+	}
+
+	s, err := lex.in.AtLeast(min)
+	if len(s) > lex.cursor {
+		s = s[lex.cursor:]
+	} else {
+		s = nil
+	}
+	return s, err
+}
+
+func (lex *lexer) Position() TokenPosition {
+	return lex.pos
+}
+
+func (lex *lexer) Step(n int) {
+	lex.cursor += n
+}
+
+func (lex *lexer) NewLine() {
+	lex.pos.NewLine()
+}
+
+func (lex *lexer) Tokens() <-chan Token {
+	return lex.tokens
+}
+
+func (lex *lexer) Emit(typ TokenType) {
+	var text []rune
+
+	pos := lex.pos
+
+	// extract text to emit, and update cursor for the next
+	if n := lex.cursor; n > 0 {
+		text = lex.in.Runes()[:n]
+		lex.in.Skip(n)
+		lex.pos.Step(n)
+		lex.cursor = 0
+	}
+
+	lex.tokens <- NewToken(typ, text, pos)
+}
+
+func (lex *lexer) EmitError(err error) {
+	// if no error is passed, assume they mean EOF
+	if err == nil {
+		err = EOF
+	}
+
+	lex.tokens <- NewErrorToken(err, lex.pos)
+}
+
+func (lex *lexer) EmitErrorf(s string, args ...interface{}) {
+	if len(args) > 0 {
+		s = fmt.Sprintf(s, args...)
+	}
+
+	lex.tokens <- NewErrorToken(errors.New(s), lex.pos)
+}
+
+func (lex *lexer) EmitSyntaxError(s string, args ...interface{}) {
+	if len(args) > 0 {
+		s = fmt.Sprintf(s, args...)
+	}
+
+	lex.tokens <- NewSyntaxErrorToken(s, lex.pos, lex.cursor, lex.in.Runes())
+}
@@ -1,221 +0,0 @@
-package lexer
-
-import (
-	"bytes"
-	"errors"
-	"io"
-	"strings"
-	"unicode/utf8"
-)
-
-const (
-	// ReadBufferSize indicates the initial buffer size
-	ReadBufferSize = 1 << 7 // 128B
-
-	// DoublingBufferSizeLimit indicates when we stop doubling
-	// and just add instead
-	DoublingBufferSizeLimit = 1 << 17 // 128KiB
-)
-
-// implemented interfaces
-var (
-	_ io.RuneReader  = (*Reader)(nil)
-	_ io.RuneScanner = (*Reader)(nil)
-)
-
-var (
-	// ErrInvalidUnreadRune indicates UnreadRune() was calls after an
-	// action other than a successful ReadRune()
-	ErrInvalidUnreadRune = errors.New("invalid UnreadRune() call")
-)
-
-// Reader is a RuneReader aimed at implementing text parsers
-type Reader struct {
-	src io.Reader
-
-	buf    []byte
-	off    int
-	cursor int
-
-	lastRuneSize int
-}
-
-// String returns what's already Read but not yet emitted or discarded
-func (b *Reader) String() string {
-	return string(b.buf[b.off:b.cursor])
-}
-
-// Emit returns what's already being Read and discards it afterwards
-func (b *Reader) Emit() string {
-	s := b.String()
-	b.Discard()
-	return s
-}
-
-// Discard removes from the buffer everything that has been Read
-func (b *Reader) Discard() {
-	switch {
-	case b.ready() == 0:
-		// reset
-		b.buf = b.buf[:0]
-		b.cursor = 0
-		b.off = 0
-	default:
-		// step
-		b.off = b.cursor
-	}
-
-	// and prevent UnreadRune()
-	b.lastRuneSize = -1
-}
-
-// ready tells how many bytes are ready to decode
-func (b *Reader) ready() int {
-	return len(b.buf) - b.cursor
-}
-
-// available tells how many free bytes remain at the end of the buffer
-func (b *Reader) available() int {
-	return cap(b.buf) - len(b.buf)
-}
-
-func (b *Reader) needsBytes(n int) error {
-	for {
-		if b.ready() >= n {
-			// ready
-			return nil
-		}
-
-		// make room
-		b.prepareBuffer(n - b.ready())
-
-		// and read more
-		_, err := b.fill()
-		if err != nil {
-			return err
-		}
-	}
-}
-
-func (b *Reader) rebuffer(size int) {
-	var src, dst []byte
-
-	if size > cap(b.buf) {
-		// new buffer
-		dst = make([]byte, size)
-	} else {
-		// same buffer
-		dst = b.buf
-	}
-
-	src = b.buf[b.off:]
-	dst = dst[:len(src)]
-
-	copy(dst, src)
-
-	b.cursor -= b.off
-	b.buf = dst
-	b.off = 0
-}
-
-func (b *Reader) prepareBuffer(n int) {
-	if n > b.available() {
-		needed := len(b.buf) + n - b.off
-		size := cap(b.buf)
-
-		for size < needed {
-			switch {
-			case size < DoublingBufferSizeLimit:
-				size *= 2
-			default:
-				size += DoublingBufferSizeLimit
-			}
-		}
-
-		b.rebuffer(size)
-	}
-}
-
-func (b *Reader) fill() (int, error) {
-	start := len(b.buf)
-	n, err := b.src.Read(b.buf[start:cap(b.buf)])
-	if n > 0 {
-		b.buf = b.buf[:start+n]
-	}
-	return n, err
-}
-
-// ReadRune reads the next rune
-func (b *Reader) ReadRune() (rune, int, error) {
-	// we need at least one byte to start
-	count := 1
-	for {
-		err := b.needsBytes(count)
-		if err != nil {
-			b.lastRuneSize = -1
-
-			return 0, 0, err
-		}
-
-		if utf8.FullRune(b.buf[b.cursor:]) {
-			// we have a full rune
-			break
-		}
-
-		// more
-		count = b.ready() + 1
-	}
-
-	// decode rune
-	r, l := utf8.DecodeRune(b.buf[b.cursor:])
-	// step over
-	b.cursor += l
-	// and remember for UnreadRune()
-	b.lastRuneSize = l
-
-	return r, l, nil
-}
-
-// UnreadRune moves the cursor where it was before the last call to ReadRune
-func (b *Reader) UnreadRune() error {
-	if b.lastRuneSize > 0 {
-		b.cursor -= b.lastRuneSize
-		b.lastRuneSize = -1
-		return nil
-	}
-
-	return ErrInvalidUnreadRune
-}
-
-// PeekRune returns information about the next rune without moving the
-// cursor
-func (b *Reader) PeekRune() (rune, int, error) {
-	r, l, err := b.ReadRune()
-	if err != nil {
-		return r, l, err
-	}
-	err = b.UnreadRune()
-	return r, l, err
-}
-
-// NewReader creates a new runes [Reader] using the given [io.Reader]
-func NewReader(r io.Reader) *Reader {
-	if r == nil {
-		return nil
-	}
-
-	return &Reader{
-		src: r,
-		buf: make([]byte, 0, ReadBufferSize),
-	}
-}
-
-// NewReaderBytes creates a new runes [Reader] using the given bytes
-func NewReaderBytes(b []byte) *Reader {
-	return NewReader(bytes.NewReader(b))
-}
-
-// NewReaderString creates a new runes [Reader] using the given string
-func NewReaderString(s string) *Reader {
-	return NewReader(strings.NewReader(s))
-}
@@ -0,0 +1,125 @@
+package lexer
+
+import (
+	"errors"
+	"fmt"
+	"io"
+)
+
+var (
+	EOF = io.EOF // EOF marker
+)
+
+// Token type
+type TokenType int
+
+const (
+	TokenError TokenType = iota
+)
+
+// Token Position
+type TokenPosition struct {
+	Line int
+	Row  int
+}
+
+func (pos *TokenPosition) Reset() {
+	pos.Line = 1
+	pos.Row = 1
+}
+
+func (pos *TokenPosition) Step(n int) {
+	pos.Row += n
+}
+
+func (pos *TokenPosition) NewLine() {
+	pos.Line += 1
+	pos.Row = 1
+}
+
+// Token
+type Token interface {
+	Type() TokenType
+	String() string
+	Position() TokenPosition
+}
+
+type token struct {
+	typ TokenType
+	pos TokenPosition
+	val string
+}
+
+func NewToken(typ TokenType, val []rune, pos TokenPosition) Token {
+	return &token{
+		typ: typ,
+		val: string(val),
+		pos: pos,
+	}
+}
+
+func (t token) Type() TokenType {
+	return t.typ
+}
+
+func (t token) Position() TokenPosition {
+	return t.pos
+}
+
+func (t token) String() string {
+	return t.val
+}
+
+// ErrorToken
+type ErrorToken interface {
+	Token
+	Error() string
+	Unwrap() error
+}
+
+type errorToken struct {
+	token
+	err error
+}
+
+func NewErrorToken(err error, pos TokenPosition) ErrorToken {
+	return &errorToken{
+		token: token{
+			typ: TokenError,
+			val: err.Error(),
+			pos: pos,
+		},
+		err: err,
+	}
+}
+
+func (t errorToken) Error() string {
+	return t.err.Error()
+}
+
+func (t errorToken) Unwrap() error {
+	return t.err
+}
+
+// SyntaxErrorToken
+type SyntaxErrorToken struct {
+	ErrorToken
+
+	Cursor int
+	Buffer string
+}
+
+func NewSyntaxErrorToken(msg string, pos TokenPosition, cur int, buffer []rune) *SyntaxErrorToken {
+	s := fmt.Sprintf("Syntax Error at %v.%v+%v", pos.Line, pos.Row, cur)
+
+	if len(msg) > 0 {
+		s = fmt.Sprintf("%s: %s", s, msg)
+	}
+
+	return &SyntaxErrorToken{
+		ErrorToken: NewErrorToken(errors.New(s), pos),
+
+		Cursor: cur,
+		Buffer: string(buffer),
+	}
+}
@@ -1,64 +0,0 @@
-package core
-
-import (
-	"bytes"
-	"io"
-	"io/fs"
-	"strings"
-)
-
-// ReadCloser adds a Close() to Readers without one
-type ReadCloser struct {
-	r io.Reader
-}
-
-// Read passes the Read() call to the underlying [io.Reader]
-// and fail if it was Closed()
-func (rc *ReadCloser) Read(b []byte) (int, error) {
-	switch {
-	case rc.r != nil:
-		return rc.r.Read(b)
-	default:
-		return 0, fs.ErrClosed
-	}
-}
-
-// Close attempts to Close the underlying [io.Reader], or
-// remove it if it doesn't support Close() and fail
-// if closed twice
-func (rc *ReadCloser) Close() error {
-	switch {
-	case rc.r != nil:
-		rc.r = nil
-		return nil
-	default:
-		return fs.ErrClosed
-	}
-}
-
-// NewReadCloser wraps a [io.Reader] to satisfy
-// [io.ReadCloser] if needed
-func NewReadCloser(r io.Reader) io.ReadCloser {
-	switch p := r.(type) {
-	case io.ReadCloser:
-		return p
-	case nil:
-		return nil
-	default:
-		return &ReadCloser{
-			r: r,
-		}
-	}
-}
-
-// NewReadCloserBytes wraps a bytes slice to implement
-// a [io.ReadCloser]
-func NewReadCloserBytes(b []byte) io.ReadCloser {
-	return NewReadCloser(bytes.NewReader(b))
-}
-
-// NewReadCloserString wraps a string to implement
-// a [io.ReadCloser]
-func NewReadCloserString(s string) io.ReadCloser {
-	return NewReadCloser(strings.NewReader(s))
-}
@@ -0,0 +1,135 @@
+package runes
+
+import (
+	"bufio"
+	"bytes"
+	"io"
+	"strings"
+	"sync"
+)
+
+// feeder is a generic implementation of the output interfaces of Feeder
+type Feeder struct {
+	sync.Mutex
+
+	in  io.RuneReader
+	out []rune
+	sz  []int
+	err error
+}
+
+// NewFeederBytes creates a new Feeder using an slice of bytes as input
+func NewFeederBytes(b []byte) *Feeder {
+	return NewFeeder(bytes.NewReader(b))
+}
+
+// NewFeederString creates a new Feeder using a string as input
+func NewFeederString(s string) *Feeder {
+	return NewFeeder(strings.NewReader(s))
+}
+
+// NewFeeder creates a new Feeder using a Reader as input
+func NewFeeder(in io.Reader) *Feeder {
+	rd, ok := in.(io.RuneReader)
+	if !ok {
+		rd = bufio.NewReader(in)
+	}
+	return &Feeder{in: rd}
+}
+
+// Skip drops n runes from the head of the buffer
+func (f *Feeder) Skip(n int) (int, bool) {
+	f.Lock()
+	defer f.Unlock()
+
+	if l := f.skip(n); l > 0 {
+		return l, true
+	} else {
+		return 0, false
+	}
+}
+func (f *Feeder) skip(n int) int {
+	if l := len(f.out); l > n {
+		f.out = f.out[n:]
+		f.sz = f.sz[n:]
+		return l - n
+	} else {
+		f.out = f.out[:0]
+		f.sz = f.sz[:0]
+		return 0
+	}
+}
+
+// ReadRune returns the next rune
+func (f *Feeder) ReadRune() (r rune, size int, err error) {
+	f.Lock()
+	defer f.Unlock()
+
+	if f.atLeast(1) {
+		r = f.out[0]
+		size = f.sz[0]
+
+		f.skip(1)
+	}
+
+	err = f.Err()
+	return
+}
+
+// AtLeast blocks until there are at least n runes on the buffer, or an error or EOF has occurred
+func (f *Feeder) AtLeast(n int) (out []rune, err error) {
+	f.Lock()
+	defer f.Unlock()
+
+	if !f.atLeast(n) {
+		err = f.err
+	}
+
+	if len(f.out) > 0 {
+		out = f.out
+	}
+
+	return
+}
+
+func (f *Feeder) atLeast(n int) bool {
+	for len(f.out) < n {
+		r, size, err := f.in.ReadRune()
+		if err != nil && f.err == nil {
+			// store first error
+			f.err = err
+		}
+
+		if size > 0 {
+			f.out = append(f.out, r)
+			f.sz = append(f.sz, size)
+		} else if f.err != nil {
+			break
+		}
+	}
+
+	return len(f.out) >= n
+}
+
+// Currently buffered runes
+func (f *Feeder) Runes() []rune {
+	return f.out
+}
+
+// Count of currently buffered runes
+func (f *Feeder) Buffered() int {
+	return len(f.out)
+}
+
+// Feeder has reached EOF
+func (f *Feeder) EOF() bool {
+	return f.err == io.EOF
+}
+
+// Feeder encountered an error
+func (f *Feeder) Err() error {
+	if f.err == io.EOF {
+		return nil
+	}
+	return f.err
+}
@@ -0,0 +1,124 @@
+package runes
+
+import (
+	"unicode"
+)
+
+// Probe was borrowed from https://github.com/JamesOwenHall/json2.Scanner
+//
+
+// Probe is a func that returns a subset of the input and a success bool.
+type Probe func([]rune) ([]rune, bool)
+
+// If returns a probe that accepts the a rune if it satisfies the condition.
+func If(condition func(rune) bool) Probe {
+	return func(input []rune) ([]rune, bool) {
+		if len(input) > 0 && condition(input[0]) {
+			return input[0:1], true
+		}
+
+		return nil, false
+	}
+}
+
+// Rune returns a probe that accepts r.
+func Rune(r rune) Probe {
+	return If(func(b rune) bool {
+		return r == b
+	})
+}
+
+// Space returns a probe that accepts whitespace as defined in the unicode
+// package.
+func Space() Probe {
+	return func(input []rune) ([]rune, bool) {
+		if len(input) > 0 && unicode.IsSpace(input[0]) {
+			return input[0:1], true
+		}
+
+		return nil, false
+	}
+}
+
+// And returns a probe that accepts all probes in sequence.
+func And(probes ...Probe) Probe {
+	return func(input []rune) ([]rune, bool) {
+		remaining := input
+		accumulated := []rune{}
+
+		for _, s := range probes {
+			if read, ok := s(remaining); !ok {
+				return nil, false
+			} else {
+				accumulated = append(accumulated, read...)
+				remaining = remaining[len(read):]
+			}
+		}
+
+		return accumulated, true
+	}
+}
+
+// Or returns a probe that accepts the first successful probe in probes.
+func Or(probes ...Probe) Probe {
+	return func(input []rune) ([]rune, bool) {
+		for _, s := range probes {
+			if read, ok := s(input); ok {
+				return read, true
+			}
+		}
+
+		return nil, false
+	}
+}
+
+// Maybe runs probe and returns true regardless of the output.
+func Maybe(probe Probe) Probe {
+	return func(input []rune) ([]rune, bool) {
+		read, _ := probe(input)
+		return read, true
+	}
+}
+
+// Any returns a probe that accepts any number of occurrences of probe,
+// including zero.
+func Any(probe Probe) Probe {
+	return func(input []rune) ([]rune, bool) {
+		remaining := input
+		accumulated := []rune{}
+
+		for {
+			if read, ok := probe(remaining); !ok {
+				return accumulated, true
+			} else {
+				accumulated = append(accumulated, read...)
+				remaining = remaining[len(read):]
+			}
+		}
+	}
+}
+
+// N returns a probe that accepts probe exactly n times.
+func N(n int, probe Probe) Probe {
+	return func(input []rune) ([]rune, bool) {
+		probes := make([]Probe, n)
+		for i := 0; i < n; i++ {
+			probes[i] = probe
+		}
+
+		return And(probes...)(input)
+	}
+}
+
+// AtLeast returns a probe that accepts probe at least n times.
+func AtLeast(n int, probe Probe) Probe {
+	return func(input []rune) ([]rune, bool) {
+		probes := make([]Probe, n, n+1)
+		for i := range probes {
+			probes[i] = probe
+		}
+
+		probes = append(probes, Any(probe))
+		return And(probes...)(input)
+	}
+}
@@ -0,0 +1,58 @@
+package runes
+
+import (
+	"testing"
+)
+
+func TestProbe(t *testing.T) {
+	type TestCase struct {
+		probe Probe
+		input string
+	}
+
+	tests := []TestCase{
+		{Rune('a'), "a"},
+		{Space(), " "},
+		{Space(), "\t"},
+		{Space(), "\n"},
+		{And(Rune('1'), Rune('2'), Space()), "12 "},
+		{Or(Rune('r'), Space(), Rune('x')), "r"},
+		{Or(Rune('r'), Space(), Rune('x')), " "},
+		{Or(Rune('r'), Space(), Rune('x')), "x"},
+		{Any(Rune('w')), ""},
+		{Any(Rune('w')), "w"},
+		{Any(Rune('w')), "ww"},
+		{Any(Rune('w')), "www"},
+		{N(6, Rune('w')), "wwwwww"},
+		{Maybe(Rune('w')), ""},
+		{Maybe(Rune('w')), "w"},
+	}
+
+	for _, test := range tests {
+		if read, ok := test.probe([]rune(test.input)); !ok {
+			t.Errorf("Expected to read %s", string(test.input))
+		} else if string(read) != test.input {
+			t.Errorf("Mismatch of input %s and read %s", test.input, string(read))
+		}
+	}
+}
+
+func TestProbeFail(t *testing.T) {
+	type TestCase struct {
+		probe Probe
+		input string
+	}
+
+	tests := []TestCase{
+		{Rune('a'), "b"},
+		{Space(), "a"},
+		{And(Rune('1'), Rune('2'), Space()), "12"},
+		{Or(Rune('r'), Space(), Rune('x')), "4"},
+	}
+
+	for _, test := range tests {
+		if read, ok := test.probe([]rune(test.input)); ok {
+			t.Errorf("Unexpectedly read %s with input %s", string(read), test.input)
+		}
+	}
+}
@@ -0,0 +1,5 @@
+/*
+Package scanner implements the low level functionality
+of AsciiGoat lexers
+*/
+package scanner
@@ -0,0 +1,99 @@
+package scanner
+
+import (
+	"unicode/utf8"
+)
+
+const (
+	// EOF is a dummy rune representing End-Of-File
+	EOF = -1
+)
+
+// A Position in the input string and in the line-based document
+type Position struct {
+	Offset       uint
+	Line, Column uint
+}
+
+// An Scanner represent the low level layer for text parsers
+type Scanner struct {
+	name  string
+	input string
+
+	base   Position
+	cursor Position
+	runes  uint
+}
+
+// NewScannerFromString instantiates a new Scanner to
+// parse a given string
+func NewScannerFromString(name, input string) *Scanner {
+	return &Scanner{
+		name:   name,
+		input:  input,
+		base:   Position{0, 1, 1},
+		cursor: Position{0, 1, 1},
+		runes:  0,
+	}
+}
+
+// Length returns the number of bytes and runes in the Terminal that is been detected
+func (l *Scanner) Length() (uint, uint) {
+	return l.cursor.Offset - l.base.Offset, l.runes
+}
+
+// Empty tells if there are no runes accounted for the next Terminal yet
+func (l *Scanner) Empty() bool {
+	return l.runes == 0
+}
+
+// StepForth moves the cursor forward
+func (l *Scanner) StepForth(runes, bytes uint) {
+	l.cursor.Offset += bytes
+	l.cursor.Column += runes
+	l.runes += runes
+}
+
+// StepBack moves the cursor backward
+func (l *Scanner) StepBack(runes, bytes uint) {
+	l.cursor.Offset -= bytes
+	// FIXME: what if column goes < 1?
+	l.cursor.Column -= runes
+	l.runes -= runes
+}
+
+// Reset moves the cursor back to the base
+func (l *Scanner) Reset() {
+	l.cursor = l.base
+	l.runes = 0
+}
+
+// Skip trashes everything up to the cursor
+func (l *Scanner) Skip() {
+	l.base = l.cursor
+	l.runes = 0
+}
+
+// NewLine accounts a line break in the position of the cursor
+func (l *Scanner) NewLine() {
+	l.cursor.Line++
+	l.cursor.Column = 1
+}
+
+// Peek returns the next rune but not moving the cursor
+func (l *Scanner) Peek() (rune, uint) {
+	if l.cursor.Offset == uint(len(l.input)) {
+		return EOF, 0
+	}
+	r, bytes := utf8.DecodeRuneInString(l.input[l.cursor.Offset:])
+	return r, uint(bytes)
+}
+
+// Next returns the next rune but moving the cursor
+func (l *Scanner) Next() (rune, uint) {
+	r, bytes := l.Peek()
+	if bytes > 0 {
+		l.StepForth(1, bytes)
+	}
+	return r, bytes
+}
@@ -0,0 +1,43 @@
+package scanner
+
+import (
+	"unicode/utf8"
+)
+
+// A Terminal represents literal element within a document
+type Terminal struct {
+	val          string
+	bytes, runes uint
+	line, col    uint
+}
+
+// NewTerminalFull returns a new Terminal instance
+func NewTerminalFull(val string, bytes, runes, line, col uint) *Terminal {
+	return &Terminal{
+		val:   val,
+		bytes: bytes,
+		runes: runes,
+		line:  line,
+		col:   col,
+	}
+}
+
+// NewTerminal creates a Terminal instance without knowing it's length
+func NewTerminal(val string, line, col uint) *Terminal {
+	bytes := uint(len(val))
+	runes := uint(utf8.RuneCountInString(val))
+
+	return NewTerminalFull(val, bytes, runes, line, col)
+}
+
+// Position retuns the position (line and column)
+// of the Terminal in the source document
+func (t *Terminal) Position() (uint, uint) {
+	return t.line, t.col
+}
+
+// Value returns the string corresponding to
+// this Terminal and it's size in bytes and runes
+func (t *Terminal) Value() (string, uint, uint) {
+	return t.val, t.bytes, t.runes
+}
Author	SHA1	Message	Date
amery	be53431904	lexer: to simplify states, Lexer.EmitError() assumes EOF if nil is passed Signed-off-by: Alejandro Mery <amery@jpi.io>	2021-07-04 03:02:43 +01:00
amery	3edf777c68	lexer: add Lexer.AtLeast() to gather input data from the Feeder Signed-off-by: Alejandro Mery <amery@jpi.io>	2021-07-04 00:40:22 +01:00
amery	36427e059f	lexer: add initial generic Lexer Signed-off-by: Alejandro Mery <amery@jpi.io>	2021-07-03 20:36:55 +01:00
amery	90e9fc47cf	lexer: add Step()/NewLine()/Reset() methods to TokenPosition Signed-off-by: Alejandro Mery <amery@jpi.io>	2021-07-03 19:59:25 +01:00
amery	6e05cdbb28	lexer: add generic Token Signed-off-by: Alejandro Mery <amery@jpi.io>	2021-07-03 18:02:49 +01:00
amery	866fb8374b	runes.Feeder: renamed Buffered() to Runes() and Len() to Buffered() for consistency with bufio Signed-off-by: Alejandro Mery <amery@jpi.io>	2021-06-29 20:48:31 +01:00
amery	7828f8d92f	envexp: drop package in favour of asciigoat.org/parsers/shexp Signed-off-by: Alejandro Mery <amery@jpi.io>	2021-06-29 16:57:24 +01:00
Nagy Károly Gábriel	65f2605a8a	fix NewFeeder function help This will fix the Go Help for function NewFeeder	2021-06-29 13:49:30 +01:00
amery	23f53c4da7	runes: rework NewFeeder() to prevent double wrapping Signed-off-by: Alejandro Mery <amery@jpi.io>	2021-06-29 12:30:46 +01:00
amery	576937268b	runes: introduce Feeder Signed-off-by: Alejandro Mery <amery@jpi.io>	2021-06-29 11:06:48 +01:00
amery	2271848acf	attic/ebnf: `go fmt` Signed-off-by: Alejandro Mery <amery@jpi.io>	2021-06-29 03:53:38 +01:00
amery	89ecdcd103	COPYING: add MIT licence text Signed-off-by: Alejandro Mery <amery@jpi.io>	2021-06-29 03:53:38 +01:00
amery	17208cdc64	attic: ebnf/ moved to attic/ebnf Signed-off-by: Alejandro Mery <amery@jpi.io>	2021-06-29 03:53:38 +01:00
amery	e18e66860d	runes: imported github.com/JamesOwenHall/json2.Scanner as Probe Signed-off-by: Alejandro Mery <amery@jpi.io>	2021-06-29 03:53:34 +01:00
amery	f88c3f9b0c	envexp: bind Reader and Expander Signed-off-by: Alejandro Mery <amery@jpi.io>	2021-06-26 20:41:44 +01:00
amery	3e2356a3f6	envexp: turn Expander from interface to struct Signed-off-by: Alejandro Mery <amery@jpi.io>	2021-06-26 20:34:39 +01:00
amery	607fdb6ee4	envexp: add top-level Expand() using Getenv Signed-off-by: Alejandro Mery <amery@jpi.io>	2021-06-26 18:59:50 +01:00
amery	2ba6afae4b	envexp: add placeholder for Expander Signed-off-by: Alejandro Mery <amery@jpi.io>	2021-06-26 18:59:24 +01:00
amery	aa7bc06646	envexp: rename env package to envexp Signed-off-by: Alejandro Mery <amery@jpi.io>	2021-06-26 18:52:42 +01:00
amery	9f4f801066	env: extend Reader{} to implement io.Closer Signed-off-by: Alejandro Mery <amery@jpi.io>	2021-06-24 22:16:40 +01:00
amery	26366f82bd	env: add skeleton of io.Reader wrapper Signed-off-by: Alejandro Mery <amery@jpi.io>	2021-06-24 22:14:54 +01:00
amery	d289643458	build-sys: replace gofmt.sh with `make fmt` Signed-off-by: Alejandro Mery <amery@jpi.io>	2021-06-24 21:28:28 +01:00
amery	76e566b92e	go.mod: add initial go.mod Signed-off-by: Alejandro Mery <amery@jpi.io>	2021-06-24 21:27:42 +01:00
amery	33dbfec54a	ebnf/token: Add initial TokenType	2014-10-30 00:53:02 +01:00
amery	2797253a96	scanner: adjust comments to make golint happy Signed-off-by: Alejandro Mery <amery@geeks.cl>	2014-10-26 22:31:22 +01:00
amery	71599c9adb	scanner: change NewScannerFromString() to accept empty strings Signed-off-by: Alejandro Mery <amery@geeks.cl>	2014-10-26 00:27:51 +02:00
amery	538ebfe37b	scanner.Scanner: rename NextLine() to NewLine() Signed-off-by: Alejandro Mery <amery@geeks.cl>	2014-10-25 23:17:03 +02:00
amery	42a75bf4d9	ebnf: add some doc Signed-off-by: Alejandro Mery <amery@geeks.cl>	2014-10-25 12:12:32 +02:00
amery	100d6d5cec	scanner: add initial dummy doc Signed-off-by: Alejandro Mery <amery@geeks.cl>	2014-10-25 08:49:03 +02:00
amery	f9405e7fe1	scanner.Scanner: Add .NewLine() helper to increment line of the cursor Signed-off-by: Alejandro Mery <amery@geeks.cl>	2014-10-24 07:39:08 +02:00
amery	7a4713a353	scanner.Scanner: add StepBack(), Reset() and Skip() methods Signed-off-by: Alejandro Mery <amery@geeks.cl>	2014-10-24 07:34:34 +02:00
amery	d4dbc28aee	scanner.Scanner: initial struct and methods for the low level text scanner Signed-off-by: Alejandro Mery <amery@geeks.cl>	2014-10-24 06:40:39 +02:00
amery	ca274e51a3	scanner.Terminal: A literal (utf8) string within a document Signed-off-by: Alejandro Mery <amery@geeks.cl>	2014-10-24 06:40:39 +02:00
amery	e55382c583	Import gofmt.sh helper from asciigoat.org/ini Signed-off-by: Alejandro Mery <amery@geeks.cl>	2014-10-24 06:40:38 +02:00
amery	2991b67b39	ebnf: replace ebnf.ebnf with grammar from the iso 14977 Signed-off-by: Alejandro Mery <amery@geeks.cl>	2014-10-15 01:38:09 +02:00
amery	494855f0d1	ebnf:doc: add final draft of iso 14977 Signed-off-by: Alejandro Mery <amery@geeks.cl>	2014-10-14 22:59:16 +02:00
amery	b402063aae	ebnf:doc: add syntax file for vim Signed-off-by: Alejandro Mery <amery@geeks.cl>	2014-10-14 22:59:11 +02:00
amery	6487b2a49d	ebnf: add empty package, and ebnf grammar copied from wikipedia Signed-off-by: Alejandro Mery <amery@geeks.cl>	2014-10-14 22:57:22 +02:00
amery	5a096152a2	Create asciigoat.org/core package Signed-off-by: Alejandro Mery <amery@geeks.cl>	2014-06-25 09:04:55 +00:00