5 Commits

Author SHA1 Message Date
amery be53431904 lexer: to simplify states, Lexer.EmitError() assumes EOF if nil is passed
Signed-off-by: Alejandro Mery <amery@jpi.io>
2021-07-04 03:02:43 +01:00
amery 3edf777c68 lexer: add Lexer.AtLeast() to gather input data from the Feeder
Signed-off-by: Alejandro Mery <amery@jpi.io>
2021-07-04 00:40:22 +01:00
amery 36427e059f lexer: add initial generic Lexer
Signed-off-by: Alejandro Mery <amery@jpi.io>
2021-07-03 20:36:55 +01:00
amery 90e9fc47cf lexer: add Step()/NewLine()/Reset() methods to TokenPosition
Signed-off-by: Alejandro Mery <amery@jpi.io>
2021-07-03 19:59:25 +01:00
amery 6e05cdbb28 lexer: add generic Token
Signed-off-by: Alejandro Mery <amery@jpi.io>
2021-07-03 18:02:49 +01:00
2 changed files with 251 additions and 0 deletions
+126
View File
@@ -0,0 +1,126 @@
package lexer
import (
"errors"
"fmt"
"asciigoat.org/core/runes"
)
// state function
type StateFn func(Lexer) StateFn
type Lexer interface {
Run() // run state machine
Position() TokenPosition // base for the next token
Tokens() <-chan Token // tokens output
AtLeast(n int) ([]rune, error)
NewLine()
Step(n int)
Emit(TokenType)
EmitError(error)
EmitErrorf(string, ...interface{})
EmitSyntaxError(string, ...interface{})
}
type lexer struct {
start StateFn // initial state
in *runes.Feeder // runes source
pos TokenPosition // base for the next token
cursor int // look ahead pointer
tokens chan Token // tokens output
}
func NewLexer(start StateFn, in *runes.Feeder, tokens int) Lexer {
return &lexer{
start: start,
in: in,
pos: TokenPosition{1, 1},
tokens: make(chan Token, tokens),
}
}
func (lex *lexer) Run() {
defer close(lex.tokens)
for state := lex.start; state != nil; {
state = state(lex)
}
}
func (lex *lexer) AtLeast(n int) ([]rune, error) {
min := lex.cursor
if n > 0 {
min += n
}
s, err := lex.in.AtLeast(min)
if len(s) > lex.cursor {
s = s[lex.cursor:]
} else {
s = nil
}
return s, err
}
func (lex *lexer) Position() TokenPosition {
return lex.pos
}
func (lex *lexer) Step(n int) {
lex.cursor += n
}
func (lex *lexer) NewLine() {
lex.pos.NewLine()
}
func (lex *lexer) Tokens() <-chan Token {
return lex.tokens
}
func (lex *lexer) Emit(typ TokenType) {
var text []rune
pos := lex.pos
// extract text to emit, and update cursor for the next
if n := lex.cursor; n > 0 {
text = lex.in.Runes()[:n]
lex.in.Skip(n)
lex.pos.Step(n)
lex.cursor = 0
}
lex.tokens <- NewToken(typ, text, pos)
}
func (lex *lexer) EmitError(err error) {
// if no error is passed, assume they mean EOF
if err == nil {
err = EOF
}
lex.tokens <- NewErrorToken(err, lex.pos)
}
func (lex *lexer) EmitErrorf(s string, args ...interface{}) {
if len(args) > 0 {
s = fmt.Sprintf(s, args...)
}
lex.tokens <- NewErrorToken(errors.New(s), lex.pos)
}
func (lex *lexer) EmitSyntaxError(s string, args ...interface{}) {
if len(args) > 0 {
s = fmt.Sprintf(s, args...)
}
lex.tokens <- NewSyntaxErrorToken(s, lex.pos, lex.cursor, lex.in.Runes())
}
+125
View File
@@ -0,0 +1,125 @@
package lexer
import (
"errors"
"fmt"
"io"
)
var (
EOF = io.EOF // EOF marker
)
// Token type
type TokenType int
const (
TokenError TokenType = iota
)
// Token Position
type TokenPosition struct {
Line int
Row int
}
func (pos *TokenPosition) Reset() {
pos.Line = 1
pos.Row = 1
}
func (pos *TokenPosition) Step(n int) {
pos.Row += n
}
func (pos *TokenPosition) NewLine() {
pos.Line += 1
pos.Row = 1
}
// Token
type Token interface {
Type() TokenType
String() string
Position() TokenPosition
}
type token struct {
typ TokenType
pos TokenPosition
val string
}
func NewToken(typ TokenType, val []rune, pos TokenPosition) Token {
return &token{
typ: typ,
val: string(val),
pos: pos,
}
}
func (t token) Type() TokenType {
return t.typ
}
func (t token) Position() TokenPosition {
return t.pos
}
func (t token) String() string {
return t.val
}
// ErrorToken
type ErrorToken interface {
Token
Error() string
Unwrap() error
}
type errorToken struct {
token
err error
}
func NewErrorToken(err error, pos TokenPosition) ErrorToken {
return &errorToken{
token: token{
typ: TokenError,
val: err.Error(),
pos: pos,
},
err: err,
}
}
func (t errorToken) Error() string {
return t.err.Error()
}
func (t errorToken) Unwrap() error {
return t.err
}
// SyntaxErrorToken
type SyntaxErrorToken struct {
ErrorToken
Cursor int
Buffer string
}
func NewSyntaxErrorToken(msg string, pos TokenPosition, cur int, buffer []rune) *SyntaxErrorToken {
s := fmt.Sprintf("Syntax Error at %v.%v+%v", pos.Line, pos.Row, cur)
if len(msg) > 0 {
s = fmt.Sprintf("%s: %s", s, msg)
}
return &SyntaxErrorToken{
ErrorToken: NewErrorToken(errors.New(s), pos),
Cursor: cur,
Buffer: string(buffer),
}
}