4 Commits

Author SHA1 Message Date
amery 169379f5c6 Merge pull request 'parser: implement basic dosini parsing' (#2)
Reviewed-on: #2
2023-08-31 16:17:11 +02:00
amery eb36c195c0 parser: implement basic dosini parsing
Signed-off-by: Alejandro Mery <amery@jpi.io>
2023-08-31 01:55:47 +00:00
amery 1090a374f0 parser: add initial Parser emitting non-whitespace tokens
Signed-off-by: Alejandro Mery <amery@jpi.io>
2023-08-31 00:45:54 +00:00
amery a15deb7e42 tools: add stringer support
Signed-off-by: Alejandro Mery <amery@jpi.io>
2023-08-30 23:01:22 +00:00
9 changed files with 240 additions and 157 deletions
-2
View File
@@ -2,8 +2,6 @@ module asciigoat.org/ini
go 1.19
replace asciigoat.org/core => ../core
require (
asciigoat.org/core v0.3.6
github.com/mgechev/revive v1.3.3
+2
View File
@@ -1,3 +1,5 @@
asciigoat.org/core v0.3.6 h1:b1vL090OxylmSOwLQryjrmC8FhhCtktMyeJSy1e6LwI=
asciigoat.org/core v0.3.6/go.mod h1:tXj+JUutxRbcO40ZQRuUVaZ4rnYz1kAZ0nblisV8u74=
github.com/BurntSushi/toml v1.3.2 h1:o7IhLm0Msx3BaB+n3Ag7L8EVlByGnpq14C4YWiu/gL8=
github.com/BurntSushi/toml v1.3.2/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ=
github.com/chavacava/garif v0.0.0-20230608123814-4bd63c2919ab h1:5JxePczlyGAtj6R1MUEFZ/UFud6FfsOejq7xLC2ZIb0=
+100 -29
View File
@@ -1,8 +1,6 @@
package parser
import (
"asciigoat.org/core/lexer"
)
import "asciigoat.org/core/lexer"
// Run parses the source
func (p *Parser) Run() error {
@@ -17,46 +15,33 @@ func (p *Parser) lexStart() (lexer.StateFn, error) {
r, _, err := p.src.ReadRune()
switch {
case err != nil:
// read error
return p.emitError("", err)
case IsNewLine(r):
// new line
p.lexNewLine(r)
p.lexMoreNewLine(r)
p.stepLine()
case IsSpace(r):
// whitespace
p.stepRune()
default:
// token
case IsCommentStart(r):
// switch to comment lexer
p.src.UnreadRune()
return p.lexToken, nil
return p.lexComment, nil
case IsSectionStart(r):
// section
return p.lexSectionStart, nil
default:
// entry
p.src.UnreadRune()
return p.lexEntryStart, nil
}
}
}
func (p *Parser) lexToken() (lexer.StateFn, error) {
p.src.AcceptAll(IsNotSpace)
p.pushString(TokenUnknown)
return p.lexStart, nil
}
func (p *Parser) lexNewLine(r1 rune) {
// r1 is warrantied to be either \n or \r
func (p *Parser) lexMoreNewLine(r1 rune) {
// r1 is warrantied to be either '\r' or '\n'
r2, _, err := p.src.ReadRune()
switch r1 {
case '\r':
switch {
case r2 == '\n':
// CR LN
case err == nil:
// CR
p.src.UnreadRune()
default:
// CR EOF
}
case '\n':
switch {
case r2 == '\r':
@@ -67,7 +52,93 @@ func (p *Parser) lexNewLine(r1 rune) {
default:
// LN EOF
}
case '\r':
switch {
case r2 == '\n':
// CR LN
case err == nil:
// CR
p.src.UnreadRune()
default:
// CR EOF
}
default:
panic("unreachable")
}
}
func (p *Parser) lexComment() (lexer.StateFn, error) {
// until the end of the line
p.src.AcceptAll(IsNotNewLine)
err := p.emitString(TokenComment)
return p.lexStart, err
}
func (p *Parser) lexSectionStart() (lexer.StateFn, error) {
if err := p.emitString(TokenSectionStart); err != nil {
return nil, err
}
// remove whitespace between `[` and the name
if p.src.AcceptAll(IsSpaceNotNewLine) {
p.stepString()
}
if !p.src.AcceptAll(IsName) {
// no name
return p.emitError("section name missing", lexer.ErrUnacceptableRune)
}
if err := p.emitString(TokenSectionName); err != nil {
return nil, err
}
// remove whitespace between the name andthe closing `]`
if p.src.AcceptAll(IsSpaceNotNewLine) {
p.stepString()
}
r, _, err := p.src.ReadRune()
switch {
case err != nil:
return p.emitError("", err)
case IsSectionEnd(r):
err := p.emitString(TokenSectionEnd)
return p.lexStart, err
default:
return p.emitInvalidRune(r)
}
}
func (p *Parser) lexEntryStart() (lexer.StateFn, error) {
p.src.AcceptAll(IsName)
if err := p.emitString(TokenFieldKey); err != nil {
return nil, err
}
// ignore whitespace between key and the '=' sign
if p.src.AcceptAll(IsSpaceNotNewLine) {
p.stepString()
}
r, _, err := p.src.ReadRune()
switch {
case err != nil:
return p.emitError("", err)
case r != RuneFieldEqual:
return p.emitInvalidRune(r)
}
// ignore whitespace between the '=' and the value
if p.src.AcceptAll(IsSpaceNotNewLine) {
p.stepString()
}
p.src.AcceptAll(IsNotNewLine)
if err := p.emitString(TokenFieldValue); err != nil {
return nil, err
}
return p.lexStart, err
}
-15
View File
@@ -1,15 +0,0 @@
package parser
import "asciigoat.org/core/lexer"
func (p *Parser) emitError(content string, err error) (lexer.StateFn, error) {
err2 := p.OnError(p.pos, content, err)
switch {
case err2 != nil:
// return wrapped error
return nil, err2
default:
// return original error
return nil, err
}
}
-44
View File
@@ -1,44 +0,0 @@
package parser
import "log"
func (p *Parser) push(tok Token) {
n := len(p.queue)
p.queue = append(p.queue, tok)
log.Printf("queue[%v]: %s", n, tok)
}
func (p *Parser) pushString(typ TokenType) {
s := p.src.Emit()
el := Token{
Type: typ,
Value: s,
Position: p.pos,
}
p.pos.StepN(len(s))
p.push(el)
}
// stepLine discards the data and moves the position
// to the next line
func (p *Parser) stepLine() {
p.src.Discard()
p.pos.StepLine()
}
// stepRune discards the data and moves the position
// on rune forward on the same line
func (p *Parser) stepRune() {
p.src.Discard()
p.pos.Step()
}
func (p *Parser) stepString() string {
s := p.src.Emit()
p.pos.StepN(len(s))
return s
}
+55 -7
View File
@@ -1,19 +1,67 @@
package parser
import "asciigoat.org/core/lexer"
import (
"strings"
"asciigoat.org/core/lexer"
)
const (
RuneComment = ';' // RuneComment is the standard dosini comment character
RuneCommentExtra = '#' // RuneCommentExtra is UNIX shell's comment character
RuneSectionStart = '[' // RuneSectionStart indicates the start of a section declaration
RuneSectionEnd = ']' // RuneSectionEnd indiciates the end of a section declaration
RuneFieldEqual = '=' // RuneFieldEqual separates field keys from their values
)
var (
// IsNewLine tells if a rune represents a line break or the start of one
IsNewLine = lexer.NewIsIn("\n\r")
// IsSpace tells if a rune is considered whitespace by unicode
IsSpace = lexer.IsSpace
// IsNotNewLine tells if a rune is anything other than line breaks
// RunesComment is a string containing all runes acceptable to start comments
RunesComment = string([]rune{
RuneComment,
RuneCommentExtra,
})
// RunesSpecial is a string containing all the runes with special meaning
RunesSpecial = string([]rune{
RuneComment,
RuneCommentExtra,
RuneSectionStart,
RuneSectionEnd,
RuneFieldEqual,
})
)
var (
// IsNewLine tells if the rune indicates a line break or the start of one
IsNewLine = lexer.NewIsIn("\r\n")
// IsNotNewLine tells if the rune is not a line break nor the start of one
IsNotNewLine = lexer.NewIsNot(IsNewLine)
// IsNotSpace tells if a rune is anything other than whitespace
// IsSpace tells if the rune is considered whitespace by Unicode
IsSpace = lexer.IsSpace
// IsNotSpace tells if the rune is not considered whitespace by Unicode
IsNotSpace = lexer.NewIsNot(IsSpace)
// IsCommentStart ...
IsCommentStart = lexer.NewIsIn(RunesComment)
)
// IsSpaceNotNewLine indicates a rune is whitespace but not a new line
func IsSpaceNotNewLine(r rune) bool {
return IsSpace(r) && !IsNewLine(r)
}
// IsSectionStart indicates the rune starts the section declaration
func IsSectionStart(r rune) bool { return r == RuneSectionStart }
// IsSectionEnd indicates the rune ends the section declaration
func IsSectionEnd(r rune) bool { return r == RuneSectionEnd }
// IsName indicates a rune is acceptable for section or field names
func IsName(r rune) bool {
switch {
case IsSpace(r):
return false
case strings.ContainsRune(RunesSpecial, r):
return false
default:
return true
}
}
+67 -36
View File
@@ -3,6 +3,7 @@ package parser
import (
"io"
"log"
"asciigoat.org/core/lexer"
)
@@ -10,61 +11,91 @@ import (
// Parser parses a dosini-style document
type Parser struct {
src *lexer.Reader
pos lexer.Position
pos lexer.Position
queue []Token
// OnToken is called for each identified token. if it returns an error
// parsing is interrupted.
OnToken func(pos lexer.Position, typ TokenType, value string) error
// OnSection is called after a [section] is parsed.
// Returning an error will abort the process.
OnSection func(pos lexer.Position, name, subname string, hasSubname bool) error
// OnField is called after a `key = value` entry is parsed
// Returning an error will abort the process.
OnField func(pos lexer.Position, key, value string) error
// OnComment is called after a comment is parsed
// Returning an error will abort the process.
OnComment func(pos lexer.Position, comment string) error
// OnError is called after each parsing error, which you are allowed to
// override.
// OnError is called for EOF as well, but this error isn't returned as such by
// Parser.Run(). The caller will receive (nil, nil) instead indicating the
// processes terminated correctly.
// OnError is called in case of a parsing error, and it's allowed
// to replace the error returned by [Parser.Run].
// OnError is called for io.EOF, but [Parser.Run] will consider it
// normal termination.
OnError func(pos lexer.Position, content string, err error) error
}
func defaultOnSection(_ lexer.Position, _, _ string, _ bool) error { return nil }
func defaultOnField(_ lexer.Position, _, _ string) error { return nil }
func defaultOnComment(_ lexer.Position, _ string) error { return nil }
func defaultOnToken(pos lexer.Position, typ TokenType, value string) error {
log.Printf("%s:%v:%v: %q", typ, pos.Line, pos.Column, value)
return nil
}
func defaultOnError(pos lexer.Position, content string, err error) error {
return &lexer.Error{
Line: pos.Line,
Column: pos.Column,
log.Printf("%s:%v:%v: %q: %s", "error", pos.Line, pos.Column, content, err)
return lexer.Error{
Line: pos.Line,
Column: pos.Column,
Content: content,
Err: err,
}
}
func (p *Parser) setDefaults() {
if p.OnSection == nil {
p.OnSection = defaultOnSection
if p.OnToken == nil {
p.OnToken = defaultOnToken
}
if p.OnField == nil {
p.OnField = defaultOnField
}
if p.OnComment == nil {
p.OnComment = defaultOnComment
}
if p.OnError == nil {
p.OnError = defaultOnError
}
}
func (p *Parser) emitString(typ TokenType) error {
s := p.src.Emit()
err := p.OnToken(p.pos, typ, s)
p.pos.StepN(len(s))
return err
}
func (p *Parser) emitError(content string, err error) (lexer.StateFn, error) {
err2 := p.OnError(p.pos, content, err)
switch {
case err2 != nil:
// return wrapped error
return nil, err2
default:
// return original error
return nil, err
}
}
func (p *Parser) emitInvalidRune(r rune) (lexer.StateFn, error) {
return p.emitError(string([]rune{r}), lexer.ErrUnacceptableRune)
}
// stepLine discards the data and moves the position
// to the next line.
func (p *Parser) stepLine() {
p.src.Discard()
p.pos.StepLine()
}
// stepRune discards the data and moves the position
// one rune forward on the same line.
func (p *Parser) stepRune() {
p.src.Discard()
p.pos.Step()
}
// stepString discards the data and moves the position
// forward on the same line the length of the discarded
// content.
func (p *Parser) stepString() {
s := p.src.Emit()
p.pos.StepN(len(s))
}
// NewParser creates a dosini-style parser using
// an [io.Reader] as source
func NewParser(r io.Reader) *Parser {
+7 -17
View File
@@ -1,11 +1,6 @@
package parser
//go:generate go run golang.org/x/tools/cmd/stringer -type=TokenType
import (
"fmt"
"asciigoat.org/core/lexer"
)
// A TokenType is a type of Token
type TokenType uint
@@ -13,6 +8,12 @@ type TokenType uint
const (
// TokenUnknown represents a Token that hasn't been identified
TokenUnknown TokenType = iota
// TokenSectionStart indicates the opening marker of a section declaration.
// The left squared bracket.
TokenSectionStart
// TokenSectionEnd indicates the closing marker of a section declaration.
// The right squared bracket.
TokenSectionEnd
// TokenSectionName represents the section name between the squared brackets
TokenSectionName
// TokenSectionSubname represents a secondary name in the section represented
@@ -21,21 +22,10 @@ const (
// [section_name "section_subname"]
TokenSectionSubname
// TokenComment represents a comment, including the initial ';' or '#' until
// the end of the line
// the end of the line.
TokenComment
// TokenFieldKey represents a field name in a `key = value` entry
TokenFieldKey
// TokenFieldValue represents a field value in a `key = value` entry
TokenFieldValue
)
// A Token is an element from the document
type Token struct {
Type TokenType
Position lexer.Position
Value string
}
func (t Token) String() string {
return fmt.Sprintf("%s:%v:%v: %q", t.Type, t.Position.Line, t.Position.Column, t.Value)
}
+9 -7
View File
@@ -9,16 +9,18 @@ func _() {
// Re-run the stringer command to generate them again.
var x [1]struct{}
_ = x[TokenUnknown-0]
_ = x[TokenSectionName-1]
_ = x[TokenSectionSubname-2]
_ = x[TokenComment-3]
_ = x[TokenFieldKey-4]
_ = x[TokenFieldValue-5]
_ = x[TokenSectionStart-1]
_ = x[TokenSectionEnd-2]
_ = x[TokenSectionName-3]
_ = x[TokenSectionSubname-4]
_ = x[TokenComment-5]
_ = x[TokenFieldKey-6]
_ = x[TokenFieldValue-7]
}
const _TokenType_name = "TokenUnknownTokenSectionNameTokenSectionSubnameTokenCommentTokenFieldKeyTokenFieldValue"
const _TokenType_name = "TokenUnknownTokenSectionStartTokenSectionEndTokenSectionNameTokenSectionSubnameTokenCommentTokenFieldKeyTokenFieldValue"
var _TokenType_index = [...]uint8{0, 12, 28, 47, 59, 72, 87}
var _TokenType_index = [...]uint8{0, 12, 29, 44, 60, 79, 91, 104, 119}
func (i TokenType) String() string {
if i >= TokenType(len(_TokenType_index)-1) {