parser: implement initial tokeniser

only logging position, errors and non-whitespace elements

Signed-off-by: Alejandro Mery <amery@jpi.io>
This commit is contained in:
2023-08-30 00:25:15 +00:00
parent 1e75557bc3
commit 196655d53e
2 changed files with 86 additions and 2 deletions
+67 -2
View File
@@ -1,11 +1,76 @@
package parser
import "asciigoat.org/core/lexer"
import (
"log"
"asciigoat.org/core/lexer"
)
// Run parses the source
func (p *Parser) Run() error {
p.setDefaults()
p.pos.Reset()
return lexer.Run(nil)
return lexer.Run(p.lexStart)
}
func (p *Parser) lexStart() (lexer.StateFn, error) {
for {
r, _, err := p.src.ReadRune()
switch {
case err != nil:
// read error
log.Printf("%s: %s: %s", p.pos, "error", err)
return nil, err
case IsNewLine(r):
// new line
p.lexNewLine(r)
p.stepLine()
case IsSpace(r):
// whitespace
p.stepRune()
default:
// token
p.src.UnreadRune()
return p.lexToken, nil
}
}
}
func (p *Parser) lexToken() (lexer.StateFn, error) {
p.src.AcceptAll(IsNotSpace)
p.pushString(TokenUnknown)
return p.lexStart, nil
}
func (p *Parser) lexNewLine(r1 rune) {
// r1 is warrantied to be either \n or \r
r2, _, err := p.src.ReadRune()
switch r1 {
case '\r':
switch {
case r2 == '\n':
// CR LN
case err == nil:
// CR
p.src.UnreadRune()
default:
// CR EOF
}
case '\n':
switch {
case r2 == '\r':
// LN CR
case err == nil:
// LN
p.src.UnreadRune()
default:
// LN EOF
}
default:
panic("unreachable")
}
}
+19
View File
@@ -0,0 +1,19 @@
package parser
import "asciigoat.org/core/lexer"
var (
// IsNewLine tells if a rune represents a line break or the start of one
IsNewLine = lexer.NewIsIn("\n\r")
// IsSpace tells if a rune is considered whitespace by unicode
IsSpace = lexer.IsSpace
// IsNotNewLine tells if a rune is anything other than line breaks
IsNotNewLine = lexer.NewIsNot(IsNewLine)
// IsNotSpace tells if a rune is anything other than whitespace
IsNotSpace = lexer.NewIsNot(IsSpace)
)
// IsSpaceNotNewLine indicates a rune is whitespace but not a new line
func IsSpaceNotNewLine(r rune) bool {
return IsSpace(r) && !IsNewLine(r)
}