diff --git a/parser/lexer.go b/parser/lexer.go new file mode 100644 index 0000000..b1a4a3e --- /dev/null +++ b/parser/lexer.go @@ -0,0 +1,78 @@ +package parser + +import ( + "log" + + "asciigoat.org/core/lexer" +) + +// Run parses the source +func (p *Parser) Run() error { + p.pos.Reset() + return lexer.Run(p.lexStart) +} + +func (p *Parser) lexStart() (lexer.StateFn, error) { + for { + r, _, err := p.src.ReadRune() + switch { + case err != nil: + // read error + log.Printf("%s: %s: %s", p.pos, "error", err) + return nil, err + case IsNewLine(r): + // new line + p.lexNewLine(r) + p.src.Discard() + p.pos.StepLine() + case IsSpace(r): + // whitespace + p.src.Discard() + p.pos.Step() + default: + // token + p.src.UnreadRune() + return p.lexToken, nil + } + } +} + +func (p *Parser) lexToken() (lexer.StateFn, error) { + p.src.AcceptAll(IsNotSpace) + + s := p.src.Emit() + log.Printf("%s: %s: %q", p.pos, "token", s) + p.pos.StepN(len(s)) + + return p.lexStart, nil +} + +func (p *Parser) lexNewLine(r1 rune) { + // r1 is warrantied to be either \n or \r + r2, _, err := p.src.ReadRune() + + switch r1 { + case '\r': + switch { + case r2 == '\n': + // CR LN + case err == nil: + // CR + p.src.UnreadRune() + default: + // CR EOF + } + case '\n': + switch { + case r2 == '\r': + // LN CR + case err == nil: + // LN + p.src.UnreadRune() + default: + // LN EOF + } + default: + panic("unreachable") + } +} diff --git a/parser/lexer_runes.go b/parser/lexer_runes.go new file mode 100644 index 0000000..25d37d6 --- /dev/null +++ b/parser/lexer_runes.go @@ -0,0 +1,14 @@ +package parser + +import "asciigoat.org/core/lexer" + +var ( + // IsNewLine tells if a rune represents a line break or the start of one + IsNewLine = lexer.NewIsIn("\n\r") + // IsSpace tells if a rune is considered whitespace by unicode + IsSpace = lexer.IsSpace + // IsNotNewLine tells if a rune is anything other than line breaks + IsNotNewLine = lexer.NewIsNot(IsNewLine) + // IsNotSpace tells if a rune is anything other than whitespace + IsNotSpace = lexer.NewIsNot(IsSpace) +) diff --git a/parser/parser.go b/parser/parser.go index 5d399fd..723e589 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -10,6 +10,8 @@ import ( // Parser parses a dosini-style document type Parser struct { src *lexer.Reader + + pos lexer.Position } // NewParser creates a dosini-style parser using