From a15deb7e42265a57275b03a49ec6bab7b0165148 Mon Sep 17 00:00:00 2001
From: Alejandro Mery <amery@jpi.io>
Date: Wed, 30 Aug 2023 23:00:16 +0000
Subject: [PATCH 1/3] tools: add stringer support

Signed-off-by: Alejandro Mery <amery@jpi.io>
---
 go.mod         | 7 +++++--
 go.sum         | 2 ++
 tools/tools.go | 1 +
 3 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/go.mod b/go.mod
index 534d3cd..efb45ff 100644
--- a/go.mod
+++ b/go.mod
@@ -2,7 +2,10 @@ module asciigoat.org/ini
 
 go 1.19
 
-require github.com/mgechev/revive v1.3.3
+require (
+	github.com/mgechev/revive v1.3.3
+	golang.org/x/tools v0.12.0
+)
 
 require (
 	github.com/BurntSushi/toml v1.3.2 // indirect
@@ -16,6 +19,6 @@ require (
 	github.com/mitchellh/go-homedir v1.1.0 // indirect
 	github.com/olekukonko/tablewriter v0.0.5 // indirect
 	github.com/pkg/errors v0.9.1 // indirect
+	golang.org/x/mod v0.12.0 // indirect
 	golang.org/x/sys v0.11.0 // indirect
-	golang.org/x/tools v0.12.0 // indirect
 )
diff --git a/go.sum b/go.sum
index 495d0a7..8ebe90d 100644
--- a/go.sum
+++ b/go.sum
@@ -36,6 +36,8 @@ github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO
 github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
 github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
 golang.org/x/mod v0.12.0 h1:rmsUpXtvNzj340zd98LZ4KntptpfRHwpFOHG188oHXc=
+golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
+golang.org/x/sync v0.3.0 h1:ftCYgMx6zT/asHUrPw8BLLscYtGznsLAnjq5RH9P66E=
 golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.11.0 h1:eG7RXZHdqOJ1i+0lgLgCpSXAp6M3LYlAo6osgSi0xOM=
diff --git a/tools/tools.go b/tools/tools.go
index a3379f0..a4d1df9 100644
--- a/tools/tools.go
+++ b/tools/tools.go
@@ -4,4 +4,5 @@ package tools
 
 import (
 	_ "github.com/mgechev/revive"
+	_ "golang.org/x/tools/cmd/stringer"
 )

From 1090a374f068b5936948e729713c8cc929d9bb58 Mon Sep 17 00:00:00 2001
From: Alejandro Mery <amery@jpi.io>
Date: Thu, 31 Aug 2023 00:11:33 +0000
Subject: [PATCH 2/3] parser: add initial Parser emitting non-whitespace tokens

Signed-off-by: Alejandro Mery <amery@jpi.io>
---
 go.mod                     |   1 +
 go.sum                     |   2 +
 parser/lexer.go            |  67 +++++++++++++++++++++++
 parser/lexer_runes.go      |  16 ++++++
 parser/parser.go           | 105 +++++++++++++++++++++++++++++++++++++
 parser/token.go            |  31 +++++++++++
 parser/tokentype_string.go |  30 +++++++++++
 7 files changed, 252 insertions(+)
 create mode 100644 parser/lexer.go
 create mode 100644 parser/lexer_runes.go
 create mode 100644 parser/parser.go
 create mode 100644 parser/token.go
 create mode 100644 parser/tokentype_string.go

diff --git a/go.mod b/go.mod
index efb45ff..ce1e7e0 100644
--- a/go.mod
+++ b/go.mod
@@ -3,6 +3,7 @@ module asciigoat.org/ini
 go 1.19
 
 require (
+	asciigoat.org/core v0.3.6
 	github.com/mgechev/revive v1.3.3
 	golang.org/x/tools v0.12.0
 )
diff --git a/go.sum b/go.sum
index 8ebe90d..b76ff81 100644
--- a/go.sum
+++ b/go.sum
@@ -1,3 +1,5 @@
+asciigoat.org/core v0.3.6 h1:b1vL090OxylmSOwLQryjrmC8FhhCtktMyeJSy1e6LwI=
+asciigoat.org/core v0.3.6/go.mod h1:tXj+JUutxRbcO40ZQRuUVaZ4rnYz1kAZ0nblisV8u74=
 github.com/BurntSushi/toml v1.3.2 h1:o7IhLm0Msx3BaB+n3Ag7L8EVlByGnpq14C4YWiu/gL8=
 github.com/BurntSushi/toml v1.3.2/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ=
 github.com/chavacava/garif v0.0.0-20230608123814-4bd63c2919ab h1:5JxePczlyGAtj6R1MUEFZ/UFud6FfsOejq7xLC2ZIb0=
diff --git a/parser/lexer.go b/parser/lexer.go
new file mode 100644
index 0000000..c735baa
--- /dev/null
+++ b/parser/lexer.go
@@ -0,0 +1,67 @@
+package parser
+
+import "asciigoat.org/core/lexer"
+
+// Run parses the source
+func (p *Parser) Run() error {
+	p.setDefaults()
+	p.pos.Reset()
+
+	return lexer.Run(p.lexStart)
+}
+
+func (p *Parser) lexStart() (lexer.StateFn, error) {
+	for {
+		r, _, err := p.src.ReadRune()
+		switch {
+		case err != nil:
+			return p.emitError("", err)
+		case IsNewLine(r):
+			// new line
+			p.lexMoreNewLine(r)
+			p.stepLine()
+		case IsSpace(r):
+			// whitespace
+			p.stepRune()
+		default:
+			p.src.UnreadRune()
+			return p.lexToken, nil
+		}
+	}
+}
+
+func (p *Parser) lexMoreNewLine(r1 rune) {
+	// r1 is warrantied to be either '\r' or '\n'
+	r2, _, err := p.src.ReadRune()
+	switch r1 {
+	case '\n':
+		switch {
+		case r2 == '\r':
+			// LN CR
+		case err == nil:
+			// LN
+			p.src.UnreadRune()
+		default:
+			// LN EOF
+		}
+	case '\r':
+		switch {
+		case r2 == '\n':
+			// CR LN
+		case err == nil:
+			// CR
+			p.src.UnreadRune()
+		default:
+			// CR EOF
+		}
+	default:
+		panic("unreachable")
+	}
+}
+
+func (p *Parser) lexToken() (lexer.StateFn, error) {
+	p.src.AcceptAll(IsNotSpace)
+
+	err := p.emitString(TokenUnknown)
+	return p.lexStart, err
+}
diff --git a/parser/lexer_runes.go b/parser/lexer_runes.go
new file mode 100644
index 0000000..872d2be
--- /dev/null
+++ b/parser/lexer_runes.go
@@ -0,0 +1,16 @@
+package parser
+
+import (
+	"asciigoat.org/core/lexer"
+)
+
+var (
+	// IsNewLine tells if the rune indicates a line break or the start of one
+	IsNewLine = lexer.NewIsIn("\r\n")
+	// IsNotNewLine tells if the rune is not a line break nor the start of one
+	IsNotNewLine = lexer.NewIsNot(IsNewLine)
+	// IsSpace tells if the rune is considered whitespace by Unicode
+	IsSpace = lexer.IsSpace
+	// IsNotSpace tells if the rune is not considered whitespace by Unicode
+	IsNotSpace = lexer.NewIsNot(IsSpace)
+)
diff --git a/parser/parser.go b/parser/parser.go
new file mode 100644
index 0000000..04d9b06
--- /dev/null
+++ b/parser/parser.go
@@ -0,0 +1,105 @@
+// Package parser parses dosini-style files
+package parser
+
+import (
+	"io"
+	"log"
+
+	"asciigoat.org/core/lexer"
+)
+
+// Parser parses a dosini-style document
+type Parser struct {
+	src *lexer.Reader
+	pos lexer.Position
+
+	// OnToken is called for each identified token. if it returns an error
+	// parsing is interrupted.
+	OnToken func(pos lexer.Position, typ TokenType, value string) error
+
+	// OnError is called in case of a parsing error, and it's allowed
+	// to replace the error returned by [Parser.Run].
+	// OnError is called for io.EOF, but [Parser.Run] will consider it
+	// normal termination.
+	OnError func(pos lexer.Position, content string, err error) error
+}
+
+func defaultOnToken(pos lexer.Position, typ TokenType, value string) error {
+	log.Printf("%s:%v:%v: %q", typ, pos.Line, pos.Column, value)
+	return nil
+}
+
+func defaultOnError(pos lexer.Position, content string, err error) error {
+	log.Printf("%s:%v:%v: %q: %s", "error", pos.Line, pos.Column, content, err)
+
+	return lexer.Error{
+		Line:   pos.Line,
+		Column: pos.Column,
+
+		Content: content,
+		Err:     err,
+	}
+}
+
+func (p *Parser) setDefaults() {
+	if p.OnToken == nil {
+		p.OnToken = defaultOnToken
+	}
+	if p.OnError == nil {
+		p.OnError = defaultOnError
+	}
+}
+
+func (p *Parser) emitString(typ TokenType) error {
+	s := p.src.Emit()
+	err := p.OnToken(p.pos, typ, s)
+	p.pos.StepN(len(s))
+
+	return err
+}
+
+func (p *Parser) emitError(content string, err error) (lexer.StateFn, error) {
+	err2 := p.OnError(p.pos, content, err)
+	switch {
+	case err2 != nil:
+		// return wrapped error
+		return nil, err2
+	default:
+		// return original error
+		return nil, err
+	}
+}
+
+// stepLine discards the data and moves the position
+// to the next line.
+func (p *Parser) stepLine() {
+	p.src.Discard()
+	p.pos.StepLine()
+}
+
+// stepRune discards the data and moves the position
+// one rune forward on the same line.
+func (p *Parser) stepRune() {
+	p.src.Discard()
+	p.pos.Step()
+}
+
+// stepString discards the data and moves the position
+// forward on the same line the length of the discarded
+// content.
+func (p *Parser) stepString() {
+	s := p.src.Emit()
+	p.pos.StepN(len(s))
+}
+
+// NewParser creates a dosini-style parser using
+// an [io.Reader] as source
+func NewParser(r io.Reader) *Parser {
+	if r == nil {
+		return nil
+	}
+
+	return &Parser{
+		src: lexer.NewReader(r),
+	}
+}
diff --git a/parser/token.go b/parser/token.go
new file mode 100644
index 0000000..c1ddc7e
--- /dev/null
+++ b/parser/token.go
@@ -0,0 +1,31 @@
+package parser
+
+//go:generate go run golang.org/x/tools/cmd/stringer -type=TokenType
+
+// A TokenType is a type of Token
+type TokenType uint
+
+const (
+	// TokenUnknown represents a Token that hasn't been identified
+	TokenUnknown TokenType = iota
+	// TokenSectionStart indicates the opening marker of a section declaration.
+	// The left squared bracket.
+	TokenSectionStart
+	// TokenSectionEnd indicates the closing marker of a section declaration.
+	// The right squared bracket.
+	TokenSectionEnd
+	// TokenSectionName represents the section name between the squared brackets
+	TokenSectionName
+	// TokenSectionSubname represents a secondary name in the section represented
+	// between quotes after the section name.
+	// e.g.
+	// [section_name "section_subname"]
+	TokenSectionSubname
+	// TokenComment represents a comment, including the initial ';' or '#' until
+	// the end of the line.
+	TokenComment
+	// TokenFieldKey represents a field name in a `key = value` entry
+	TokenFieldKey
+	// TokenFieldValue represents a field value in a `key = value` entry
+	TokenFieldValue
+)
diff --git a/parser/tokentype_string.go b/parser/tokentype_string.go
new file mode 100644
index 0000000..455f555
--- /dev/null
+++ b/parser/tokentype_string.go
@@ -0,0 +1,30 @@
+// Code generated by "stringer -type=TokenType"; DO NOT EDIT.
+
+package parser
+
+import "strconv"
+
+func _() {
+	// An "invalid array index" compiler error signifies that the constant values have changed.
+	// Re-run the stringer command to generate them again.
+	var x [1]struct{}
+	_ = x[TokenUnknown-0]
+	_ = x[TokenSectionStart-1]
+	_ = x[TokenSectionEnd-2]
+	_ = x[TokenSectionName-3]
+	_ = x[TokenSectionSubname-4]
+	_ = x[TokenComment-5]
+	_ = x[TokenFieldKey-6]
+	_ = x[TokenFieldValue-7]
+}
+
+const _TokenType_name = "TokenUnknownTokenSectionStartTokenSectionEndTokenSectionNameTokenSectionSubnameTokenCommentTokenFieldKeyTokenFieldValue"
+
+var _TokenType_index = [...]uint8{0, 12, 29, 44, 60, 79, 91, 104, 119}
+
+func (i TokenType) String() string {
+	if i >= TokenType(len(_TokenType_index)-1) {
+		return "TokenType(" + strconv.FormatInt(int64(i), 10) + ")"
+	}
+	return _TokenType_name[_TokenType_index[i]:_TokenType_index[i+1]]
+}

From eb36c195c0cc25f5883b3c8a909e46f390e7a528 Mon Sep 17 00:00:00 2001
From: Alejandro Mery <amery@jpi.io>
Date: Thu, 31 Aug 2023 01:50:05 +0000
Subject: [PATCH 3/3] parser: implement basic dosini parsing

Signed-off-by: Alejandro Mery <amery@jpi.io>
---
 parser/lexer.go       | 85 +++++++++++++++++++++++++++++++++++++++++--
 parser/lexer_runes.go | 51 ++++++++++++++++++++++++++
 parser/parser.go      |  4 ++
 3 files changed, 136 insertions(+), 4 deletions(-)

diff --git a/parser/lexer.go b/parser/lexer.go
index c735baa..5ed5102 100644
--- a/parser/lexer.go
+++ b/parser/lexer.go
@@ -23,9 +23,17 @@ func (p *Parser) lexStart() (lexer.StateFn, error) {
 		case IsSpace(r):
 			// whitespace
 			p.stepRune()
+		case IsCommentStart(r):
+			// switch to comment lexer
+			p.src.UnreadRune()
+			return p.lexComment, nil
+		case IsSectionStart(r):
+			// section
+			return p.lexSectionStart, nil
 		default:
+			// entry
 			p.src.UnreadRune()
-			return p.lexToken, nil
+			return p.lexEntryStart, nil
 		}
 	}
 }
@@ -59,9 +67,78 @@ func (p *Parser) lexMoreNewLine(r1 rune) {
 	}
 }
 
-func (p *Parser) lexToken() (lexer.StateFn, error) {
-	p.src.AcceptAll(IsNotSpace)
+func (p *Parser) lexComment() (lexer.StateFn, error) {
+	// until the end of the line
+	p.src.AcceptAll(IsNotNewLine)
+
+	err := p.emitString(TokenComment)
+	return p.lexStart, err
+}
+
+func (p *Parser) lexSectionStart() (lexer.StateFn, error) {
+	if err := p.emitString(TokenSectionStart); err != nil {
+		return nil, err
+	}
+
+	// remove whitespace between `[` and the name
+	if p.src.AcceptAll(IsSpaceNotNewLine) {
+		p.stepString()
+	}
+
+	if !p.src.AcceptAll(IsName) {
+		// no name
+		return p.emitError("section name missing", lexer.ErrUnacceptableRune)
+	}
+
+	if err := p.emitString(TokenSectionName); err != nil {
+		return nil, err
+	}
+
+	// remove whitespace between the name andthe closing `]`
+	if p.src.AcceptAll(IsSpaceNotNewLine) {
+		p.stepString()
+	}
+
+	r, _, err := p.src.ReadRune()
+	switch {
+	case err != nil:
+		return p.emitError("", err)
+	case IsSectionEnd(r):
+		err := p.emitString(TokenSectionEnd)
+		return p.lexStart, err
+	default:
+		return p.emitInvalidRune(r)
+	}
+}
+
+func (p *Parser) lexEntryStart() (lexer.StateFn, error) {
+	p.src.AcceptAll(IsName)
+	if err := p.emitString(TokenFieldKey); err != nil {
+		return nil, err
+	}
+
+	// ignore whitespace between key and the '=' sign
+	if p.src.AcceptAll(IsSpaceNotNewLine) {
+		p.stepString()
+	}
+
+	r, _, err := p.src.ReadRune()
+	switch {
+	case err != nil:
+		return p.emitError("", err)
+	case r != RuneFieldEqual:
+		return p.emitInvalidRune(r)
+	}
+
+	// ignore whitespace between the '=' and the value
+	if p.src.AcceptAll(IsSpaceNotNewLine) {
+		p.stepString()
+	}
+
+	p.src.AcceptAll(IsNotNewLine)
+	if err := p.emitString(TokenFieldValue); err != nil {
+		return nil, err
+	}
 
-	err := p.emitString(TokenUnknown)
 	return p.lexStart, err
 }
diff --git a/parser/lexer_runes.go b/parser/lexer_runes.go
index 872d2be..950f84c 100644
--- a/parser/lexer_runes.go
+++ b/parser/lexer_runes.go
@@ -1,9 +1,35 @@
 package parser
 
 import (
+	"strings"
+
 	"asciigoat.org/core/lexer"
 )
 
+const (
+	RuneComment      = ';' // RuneComment is the standard dosini comment character
+	RuneCommentExtra = '#' // RuneCommentExtra is UNIX shell's comment character
+	RuneSectionStart = '[' // RuneSectionStart indicates the start of a section declaration
+	RuneSectionEnd   = ']' // RuneSectionEnd indiciates the end of a section declaration
+	RuneFieldEqual   = '=' // RuneFieldEqual separates field keys from their values
+)
+
+var (
+	// RunesComment is a string containing all runes acceptable to start comments
+	RunesComment = string([]rune{
+		RuneComment,
+		RuneCommentExtra,
+	})
+	// RunesSpecial is a string containing all the runes with special meaning
+	RunesSpecial = string([]rune{
+		RuneComment,
+		RuneCommentExtra,
+		RuneSectionStart,
+		RuneSectionEnd,
+		RuneFieldEqual,
+	})
+)
+
 var (
 	// IsNewLine tells if the rune indicates a line break or the start of one
 	IsNewLine = lexer.NewIsIn("\r\n")
@@ -13,4 +39,29 @@ var (
 	IsSpace = lexer.IsSpace
 	// IsNotSpace tells if the rune is not considered whitespace by Unicode
 	IsNotSpace = lexer.NewIsNot(IsSpace)
+	// IsCommentStart ...
+	IsCommentStart = lexer.NewIsIn(RunesComment)
 )
+
+// IsSpaceNotNewLine indicates a rune is whitespace but not a new line
+func IsSpaceNotNewLine(r rune) bool {
+	return IsSpace(r) && !IsNewLine(r)
+}
+
+// IsSectionStart indicates the rune starts the section declaration
+func IsSectionStart(r rune) bool { return r == RuneSectionStart }
+
+// IsSectionEnd indicates the rune ends the section declaration
+func IsSectionEnd(r rune) bool { return r == RuneSectionEnd }
+
+// IsName indicates a rune is acceptable for section or field names
+func IsName(r rune) bool {
+	switch {
+	case IsSpace(r):
+		return false
+	case strings.ContainsRune(RunesSpecial, r):
+		return false
+	default:
+		return true
+	}
+}
diff --git a/parser/parser.go b/parser/parser.go
index 04d9b06..b526b0c 100644
--- a/parser/parser.go
+++ b/parser/parser.go
@@ -70,6 +70,10 @@ func (p *Parser) emitError(content string, err error) (lexer.StateFn, error) {
 	}
 }
 
+func (p *Parser) emitInvalidRune(r rune) (lexer.StateFn, error) {
+	return p.emitError(string([]rune{r}), lexer.ErrUnacceptableRune)
+}
+
 // stepLine discards the data and moves the position
 // to the next line.
 func (p *Parser) stepLine() {