From 2b9996da7c06d7dc5655abe100098d8b6a9a7ed8 Mon Sep 17 00:00:00 2001 From: Alejandro Mery Date: Wed, 30 Aug 2023 00:25:15 +0000 Subject: [PATCH] parser: implement initial tokeniser only logging position, errors and non-whitespace elements Signed-off-by: Alejandro Mery --- parser/lexer.go | 71 +++++++++++++++++++++++++++++++++++++++++++ parser/lexer_runes.go | 14 +++++++++ parser/parser.go | 2 ++ 3 files changed, 87 insertions(+) create mode 100644 parser/lexer.go create mode 100644 parser/lexer_runes.go diff --git a/parser/lexer.go b/parser/lexer.go new file mode 100644 index 0000000..57fb4aa --- /dev/null +++ b/parser/lexer.go @@ -0,0 +1,71 @@ +package parser + +import ( + "log" + + "asciigoat.org/core/lexer" +) + +// Run parses the source +func (p *Parser) Run() error { + p.pos.Reset() + return lexer.Run(p.lexStart) +} + +func (p *Parser) lexStart() (lexer.StateFn, error) { + for { + r, _, err := p.src.ReadRune() + switch { + case err != nil: + // read error + log.Printf("%s: %s: %s", p.pos, "error", err) + return nil, err + case IsNewLine(r): + // new line + p.lexNewLine(r) + case IsSpace(r): + // whitespace + p.lexWhitespace() + default: + // token + p.src.UnreadRune() + return p.lexToken, nil + } + } +} + +func (p *Parser) lexToken() (lexer.StateFn, error) { + p.src.AcceptAll(IsNotSpace) + + s := p.src.Emit() + log.Printf("%s: %s: %q", p.pos, "token", s) + p.pos.StepN(len(s)) + + return p.lexStart, nil +} + +func (p *Parser) lexWhitespace() { + p.src.Discard() + p.pos.Step() +} + +func (p *Parser) lexNewLine(r1 rune) { + // r1 is warrantied to be either \n or \r + r2, _, err := p.src.ReadRune() + + switch { + case r1 == '\r' && r2 == '\n': + // CR LN + case r1 == '\r' && err == nil: + // CR + p.src.UnreadRune() + case r2 == '\r': + // LN CR + case err == nil: + // LN + p.src.UnreadRune() + } + + p.src.Discard() + p.pos.StepLine() +} diff --git a/parser/lexer_runes.go b/parser/lexer_runes.go new file mode 100644 index 0000000..25d37d6 --- /dev/null +++ b/parser/lexer_runes.go @@ -0,0 +1,14 @@ +package parser + +import "asciigoat.org/core/lexer" + +var ( + // IsNewLine tells if a rune represents a line break or the start of one + IsNewLine = lexer.NewIsIn("\n\r") + // IsSpace tells if a rune is considered whitespace by unicode + IsSpace = lexer.IsSpace + // IsNotNewLine tells if a rune is anything other than line breaks + IsNotNewLine = lexer.NewIsNot(IsNewLine) + // IsNotSpace tells if a rune is anything other than whitespace + IsNotSpace = lexer.NewIsNot(IsSpace) +) diff --git a/parser/parser.go b/parser/parser.go index 5d399fd..723e589 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -10,6 +10,8 @@ import ( // Parser parses a dosini-style document type Parser struct { src *lexer.Reader + + pos lexer.Position } // NewParser creates a dosini-style parser using