Merge pull request 'parser: implement basic dosini parsing' (#2)

Reviewed-on: #2
2 years ago · 169379f5c6
8 changed files with 392 additions and 2 deletions
--- a/go.mod
+++ b/go.mod
@ -2,7 +2,11 @@ module asciigoat.org/ini
 go 1.19
-require github.com/mgechev/revive v1.3.3
+require (
 	asciigoat.org/core v0.3.6
 	github.com/mgechev/revive v1.3.3
 	golang.org/x/tools v0.12.0
 )
 require (
 	github.com/BurntSushi/toml v1.3.2 // indirect
@ -16,6 +20,6 @@ require (
 	github.com/mitchellh/go-homedir v1.1.0 // indirect
 	github.com/olekukonko/tablewriter v0.0.5 // indirect
 	github.com/pkg/errors v0.9.1 // indirect
 	golang.org/x/mod v0.12.0 // indirect
 	golang.org/x/sys v0.11.0 // indirect
 	golang.org/x/tools v0.12.0 // indirect
 )
--- a/go.sum
+++ b/go.sum
@ -1,3 +1,5 @@
 asciigoat.org/core v0.3.6 h1:b1vL090OxylmSOwLQryjrmC8FhhCtktMyeJSy1e6LwI=
 asciigoat.org/core v0.3.6/go.mod h1:tXj+JUutxRbcO40ZQRuUVaZ4rnYz1kAZ0nblisV8u74=
 github.com/BurntSushi/toml v1.3.2 h1:o7IhLm0Msx3BaB+n3Ag7L8EVlByGnpq14C4YWiu/gL8=
 github.com/BurntSushi/toml v1.3.2/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ=
 github.com/chavacava/garif v0.0.0-20230608123814-4bd63c2919ab h1:5JxePczlyGAtj6R1MUEFZ/UFud6FfsOejq7xLC2ZIb0=
@ -36,6 +38,8 @@ github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO
 github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
 github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
 golang.org/x/mod v0.12.0 h1:rmsUpXtvNzj340zd98LZ4KntptpfRHwpFOHG188oHXc=
 golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
 golang.org/x/sync v0.3.0 h1:ftCYgMx6zT/asHUrPw8BLLscYtGznsLAnjq5RH9P66E=
 golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.11.0 h1:eG7RXZHdqOJ1i+0lgLgCpSXAp6M3LYlAo6osgSi0xOM=
--- a/parser/lexer.go
+++ b/parser/lexer.go
@ -0,0 +1,144 @@
 package parser
 import "asciigoat.org/core/lexer"
 // Run parses the source
 func (p *Parser) Run() error {
 	p.setDefaults()
 	p.pos.Reset()
 	return lexer.Run(p.lexStart)
 }
 func (p *Parser) lexStart() (lexer.StateFn, error) {
 	for {
 		r, _, err := p.src.ReadRune()
 		switch {
 		case err != nil:
 			return p.emitError("", err)
 		case IsNewLine(r):
 			// new line
 			p.lexMoreNewLine(r)
 			p.stepLine()
 		case IsSpace(r):
 			// whitespace
 			p.stepRune()
 		case IsCommentStart(r):
 			// switch to comment lexer
 			p.src.UnreadRune()
 			return p.lexComment, nil
 		case IsSectionStart(r):
 			// section
 			return p.lexSectionStart, nil
 		default:
 			// entry
 			p.src.UnreadRune()
 			return p.lexEntryStart, nil
 		}
 	}
 }
 func (p *Parser) lexMoreNewLine(r1 rune) {
 	// r1 is warrantied to be either '\r' or '\n'
 	r2, _, err := p.src.ReadRune()
 	switch r1 {
 	case '\n':
 		switch {
 		case r2 == '\r':
 			// LN CR
 		case err == nil:
 			// LN
 			p.src.UnreadRune()
 		default:
 			// LN EOF
 		}
 	case '\r':
 		switch {
 		case r2 == '\n':
 			// CR LN
 		case err == nil:
 			// CR
 			p.src.UnreadRune()
 		default:
 			// CR EOF
 		}
 	default:
 		panic("unreachable")
 	}
 }
 func (p *Parser) lexComment() (lexer.StateFn, error) {
 	// until the end of the line
 	p.src.AcceptAll(IsNotNewLine)
 	err := p.emitString(TokenComment)
 	return p.lexStart, err
 }
 func (p *Parser) lexSectionStart() (lexer.StateFn, error) {
 	if err := p.emitString(TokenSectionStart); err != nil {
 		return nil, err
 	}
 	// remove whitespace between `[` and the name
 	if p.src.AcceptAll(IsSpaceNotNewLine) {
 		p.stepString()
 	}
 	if !p.src.AcceptAll(IsName) {
 		// no name
 		return p.emitError("section name missing", lexer.ErrUnacceptableRune)
 	}
 	if err := p.emitString(TokenSectionName); err != nil {
 		return nil, err
 	}
 	// remove whitespace between the name andthe closing `]`
 	if p.src.AcceptAll(IsSpaceNotNewLine) {
 		p.stepString()
 	}
 	r, _, err := p.src.ReadRune()
 	switch {
 	case err != nil:
 		return p.emitError("", err)
 	case IsSectionEnd(r):
 		err := p.emitString(TokenSectionEnd)
 		return p.lexStart, err
 	default:
 		return p.emitInvalidRune(r)
 	}
 }
 func (p *Parser) lexEntryStart() (lexer.StateFn, error) {
 	p.src.AcceptAll(IsName)
 	if err := p.emitString(TokenFieldKey); err != nil {
 		return nil, err
 	}
 	// ignore whitespace between key and the '=' sign
 	if p.src.AcceptAll(IsSpaceNotNewLine) {
 		p.stepString()
 	}
 	r, _, err := p.src.ReadRune()
 	switch {
 	case err != nil:
 		return p.emitError("", err)
 	case r != RuneFieldEqual:
 		return p.emitInvalidRune(r)
 	}
 	// ignore whitespace between the '=' and the value
 	if p.src.AcceptAll(IsSpaceNotNewLine) {
 		p.stepString()
 	}
 	p.src.AcceptAll(IsNotNewLine)
 	if err := p.emitString(TokenFieldValue); err != nil {
 		return nil, err
 	}
 	return p.lexStart, err
 }
--- a/parser/lexer_runes.go
+++ b/parser/lexer_runes.go
@ -0,0 +1,67 @@
 package parser
 import (
 	"strings"
 	"asciigoat.org/core/lexer"
 )
 const (
 	RuneComment      = ';' // RuneComment is the standard dosini comment character
 	RuneCommentExtra = '#' // RuneCommentExtra is UNIX shell's comment character
 	RuneSectionStart = '[' // RuneSectionStart indicates the start of a section declaration
 	RuneSectionEnd   = ']' // RuneSectionEnd indiciates the end of a section declaration
 	RuneFieldEqual   = '=' // RuneFieldEqual separates field keys from their values
 )
 var (
 	// RunesComment is a string containing all runes acceptable to start comments
 	RunesComment = string([]rune{
 		RuneComment,
 		RuneCommentExtra,
 	})
 	// RunesSpecial is a string containing all the runes with special meaning
 	RunesSpecial = string([]rune{
 		RuneComment,
 		RuneCommentExtra,
 		RuneSectionStart,
 		RuneSectionEnd,
 		RuneFieldEqual,
 	})
 )
 var (
 	// IsNewLine tells if the rune indicates a line break or the start of one
 	IsNewLine = lexer.NewIsIn("\r\n")
 	// IsNotNewLine tells if the rune is not a line break nor the start of one
 	IsNotNewLine = lexer.NewIsNot(IsNewLine)
 	// IsSpace tells if the rune is considered whitespace by Unicode
 	IsSpace = lexer.IsSpace
 	// IsNotSpace tells if the rune is not considered whitespace by Unicode
 	IsNotSpace = lexer.NewIsNot(IsSpace)
 	// IsCommentStart ...
 	IsCommentStart = lexer.NewIsIn(RunesComment)
 )
 // IsSpaceNotNewLine indicates a rune is whitespace but not a new line
 func IsSpaceNotNewLine(r rune) bool {
 	return IsSpace(r) && !IsNewLine(r)
 }
 // IsSectionStart indicates the rune starts the section declaration
 func IsSectionStart(r rune) bool { return r == RuneSectionStart }
 // IsSectionEnd indicates the rune ends the section declaration
 func IsSectionEnd(r rune) bool { return r == RuneSectionEnd }
 // IsName indicates a rune is acceptable for section or field names
 func IsName(r rune) bool {
 	switch {
 	case IsSpace(r):
 		return false
 	case strings.ContainsRune(RunesSpecial, r):
 		return false
 	default:
 		return true
 	}
 }
--- a/parser/parser.go
+++ b/parser/parser.go
@ -0,0 +1,109 @@
 // Package parser parses dosini-style files
 package parser
 import (
 	"io"
 	"log"
 	"asciigoat.org/core/lexer"
 )
 // Parser parses a dosini-style document
 type Parser struct {
 	src *lexer.Reader
 	pos lexer.Position
 	// OnToken is called for each identified token. if it returns an error
 	// parsing is interrupted.
 	OnToken func(pos lexer.Position, typ TokenType, value string) error
 	// OnError is called in case of a parsing error, and it's allowed
 	// to replace the error returned by [Parser.Run].
 	// OnError is called for io.EOF, but [Parser.Run] will consider it
 	// normal termination.
 	OnError func(pos lexer.Position, content string, err error) error
 }
 func defaultOnToken(pos lexer.Position, typ TokenType, value string) error {
 	log.Printf("%s:%v:%v: %q", typ, pos.Line, pos.Column, value)
 	return nil
 }
 func defaultOnError(pos lexer.Position, content string, err error) error {
 	log.Printf("%s:%v:%v: %q: %s", "error", pos.Line, pos.Column, content, err)
 	return lexer.Error{
 		Line:   pos.Line,
 		Column: pos.Column,
 		Content: content,
 		Err:     err,
 	}
 }
 func (p *Parser) setDefaults() {
 	if p.OnToken == nil {
 		p.OnToken = defaultOnToken
 	}
 	if p.OnError == nil {
 		p.OnError = defaultOnError
 	}
 }
 func (p *Parser) emitString(typ TokenType) error {
 	s := p.src.Emit()
 	err := p.OnToken(p.pos, typ, s)
 	p.pos.StepN(len(s))
 	return err
 }
 func (p *Parser) emitError(content string, err error) (lexer.StateFn, error) {
 	err2 := p.OnError(p.pos, content, err)
 	switch {
 	case err2 != nil:
 		// return wrapped error
 		return nil, err2
 	default:
 		// return original error
 		return nil, err
 	}
 }
 func (p *Parser) emitInvalidRune(r rune) (lexer.StateFn, error) {
 	return p.emitError(string([]rune{r}), lexer.ErrUnacceptableRune)
 }
 // stepLine discards the data and moves the position
 // to the next line.
 func (p *Parser) stepLine() {
 	p.src.Discard()
 	p.pos.StepLine()
 }
 // stepRune discards the data and moves the position
 // one rune forward on the same line.
 func (p *Parser) stepRune() {
 	p.src.Discard()
 	p.pos.Step()
 }
 // stepString discards the data and moves the position
 // forward on the same line the length of the discarded
 // content.
 func (p *Parser) stepString() {
 	s := p.src.Emit()
 	p.pos.StepN(len(s))
 }
 // NewParser creates a dosini-style parser using
 // an [io.Reader] as source
 func NewParser(r io.Reader) *Parser {
 	if r == nil {
 		return nil
 	}
 	return &Parser{
 		src: lexer.NewReader(r),
 	}
 }
--- a/parser/token.go
+++ b/parser/token.go
@ -0,0 +1,31 @@
 package parser
 //go:generate go run golang.org/x/tools/cmd/stringer -type=TokenType
 // A TokenType is a type of Token
 type TokenType uint
 const (
 	// TokenUnknown represents a Token that hasn't been identified
 	TokenUnknown TokenType = iota
 	// TokenSectionStart indicates the opening marker of a section declaration.
 	// The left squared bracket.
 	TokenSectionStart
 	// TokenSectionEnd indicates the closing marker of a section declaration.
 	// The right squared bracket.
 	TokenSectionEnd
 	// TokenSectionName represents the section name between the squared brackets
 	TokenSectionName
 	// TokenSectionSubname represents a secondary name in the section represented
 	// between quotes after the section name.
 	// e.g.
 	// [section_name "section_subname"]
 	TokenSectionSubname
 	// TokenComment represents a comment, including the initial ';' or '#' until
 	// the end of the line.
 	TokenComment
 	// TokenFieldKey represents a field name in a `key = value` entry
 	TokenFieldKey
 	// TokenFieldValue represents a field value in a `key = value` entry
 	TokenFieldValue
 )
--- a/parser/tokentype_string.go
+++ b/parser/tokentype_string.go
@ -0,0 +1,30 @@
 // Code generated by "stringer -type=TokenType"; DO NOT EDIT.
 package parser
 import "strconv"
 func _() {
 	// An "invalid array index" compiler error signifies that the constant values have changed.
 	// Re-run the stringer command to generate them again.
 	var x [1]struct{}
 	_ = x[TokenUnknown-0]
 	_ = x[TokenSectionStart-1]
 	_ = x[TokenSectionEnd-2]
 	_ = x[TokenSectionName-3]
 	_ = x[TokenSectionSubname-4]
 	_ = x[TokenComment-5]
 	_ = x[TokenFieldKey-6]
 	_ = x[TokenFieldValue-7]
 }
 const _TokenType_name = "TokenUnknownTokenSectionStartTokenSectionEndTokenSectionNameTokenSectionSubnameTokenCommentTokenFieldKeyTokenFieldValue"
 var _TokenType_index = [...]uint8{0, 12, 29, 44, 60, 79, 91, 104, 119}
 func (i TokenType) String() string {
 	if i >= TokenType(len(_TokenType_index)-1) {
 		return "TokenType(" + strconv.FormatInt(int64(i), 10) + ")"
 	}
 	return _TokenType_name[_TokenType_index[i]:_TokenType_index[i+1]]
 }
--- a/tools/tools.go
+++ b/tools/tools.go
@ -4,4 +4,5 @@ package tools
 import (
 	_ "github.com/mgechev/revive"
 	_ "golang.org/x/tools/cmd/stringer"
 )