diff --git a/lexer/lexer.go b/lexer/lexer.go new file mode 100644 index 0000000..8033769 --- /dev/null +++ b/lexer/lexer.go @@ -0,0 +1,104 @@ +package lexer + +import ( + "errors" + "fmt" + + "asciigoat.org/core/runes" +) + +// state function +type StateFn func(Lexer) StateFn + +type Lexer interface { + Run() // run state machine + + Position() TokenPosition // base for the next token + Tokens() <-chan Token // tokens output + + NewLine() + Step(n int) + + Emit(TokenType) + EmitError(error) + EmitErrorf(string, ...interface{}) + EmitSyntaxError(string, ...interface{}) +} + +type lexer struct { + start StateFn // initial state + + in *runes.Feeder // runes source + pos TokenPosition // base for the next token + cursor int // look ahead pointer + tokens chan Token // tokens output +} + +func NewLexer(start StateFn, in *runes.Feeder, tokens int) Lexer { + return &lexer{ + start: start, + in: in, + pos: TokenPosition{1, 1}, + tokens: make(chan Token, tokens), + } +} + +func (lex *lexer) Run() { + defer close(lex.tokens) + + for state := lex.start; state != nil; { + state = state(lex) + } +} + +func (lex *lexer) Position() TokenPosition { + return lex.pos +} + +func (lex *lexer) Step(n int) { + lex.cursor += n +} + +func (lex *lexer) NewLine() { + lex.pos.NewLine() +} + +func (lex *lexer) Tokens() <-chan Token { + return lex.tokens +} + +func (lex *lexer) Emit(typ TokenType) { + var text []rune + + pos := lex.pos + + // extract text to emit, and update cursor for the next + if n := lex.cursor; n > 0 { + text = lex.in.Runes()[:n] + lex.in.Skip(n) + lex.pos.Step(n) + lex.cursor = 0 + } + + lex.tokens <- NewToken(typ, text, pos) +} + +func (lex *lexer) EmitError(err error) { + lex.tokens <- NewErrorToken(err, lex.pos) +} + +func (lex *lexer) EmitErrorf(s string, args ...interface{}) { + if len(args) > 0 { + s = fmt.Sprintf(s, args...) + } + + lex.tokens <- NewErrorToken(errors.New(s), lex.pos) +} + +func (lex *lexer) EmitSyntaxError(s string, args ...interface{}) { + if len(args) > 0 { + s = fmt.Sprintf(s, args...) + } + + lex.tokens <- NewSyntaxErrorToken(s, lex.pos, lex.cursor, lex.in.Runes()) +}