basic: rename and document queue related methods

Signed-off-by: Alejandro Mery <amery@jpi.io>
basic: call executeFinal() when OnToken() fails
2023-09-04 17:37:09 +00:00 · 2023-09-04 17:37:09 +00:00 · 2023-09-04 19:33:24 +02:00 · 2023-09-04 15:25:20 +00:00 · 2023-09-04 15:25:20 +00:00 · 2023-09-04 15:16:43 +00:00
17 changed files with 897 additions and 69 deletions
@@ -0,0 +1,57 @@
 # asciigoat's INI parser
 [![Go Reference][godoc-badge]][godoc]
 [![Go Report Card][goreport-badge]][goreport]
 `asciigoat.org/ini` is a simple Go library that very loosly parses
 [`INI`-style][wikipedia-dosini] documents allowing the implementation
 of stricter parsers of similar form.
 **asciigoat** is [MIT](https://opensource.org/license/mit/) licensed.
 [godoc]: https://pkg.go.dev/asciigoat.org/ini
 [godoc-badge]: https://pkg.go.dev/badge/asciigoat.org/ini.svg
 [goreport]: https://goreportcard.com/report/asciigoat.org/ini
 [goreport-badge]: https://goreportcard.com/badge/asciigoat.org/ini
 [godoc-lexer]: https://pkg.go.dev/asciigoat.org/core/lexer
 [godoc-parser-parser]: https://pkg.go.dev/asciigoat.org/ini/parser#Parser
 [godoc-basic-parser]: https://pkg.go.dev/asciigoat.org/ini/basic#Decode
 [wikipedia-dosini]: https://en.wikipedia.org/wiki/INI_file
 ## Parser
 [`parser.Parser`][godoc-parser-parser] uses
 [`asciigoat`'s lexer][godoc-lexer] to process an `INI`-style document
 emiting tokens and errors via callbacks.
 ## Basic Parser
 [`basic.Decode()`][godoc-basic-parser] provies a one-shot decoder
 that returns a structured document for you to post-process.
 To allow for correct handling of repetition of section and field names downstream,
 it uses arrays instead of maps, and makes almost no judgment
 about what section or field names are acceptable.
 ## Other Implementations
 Other implementations exist, and they are mature and feature-rich, but they
 are highly opinionated about what's a valid file. Built around maps they don't
 allow repeating names and constraint what characters can be used.
 These are great when you can adapt, or already agree, to their conditions but
 that's not always the case when you are parsing configuration files from
 other applications and that's what [asciigoat.org/ini][godoc] attempts to solve.
 * [gcfg](https://pkg.go.dev/gopkg.in/gcfg.v1)
 * [unknwon's go-ini](https://github.com/go-ini/ini)
 * [wlevene's GoINI](https://github.com/wlevene/ini)
 ## See also
 * [asciigoat.org/core](https://asciigoat.org/core)
 * [oss.jpi.io](https://oss.jpi.io)
 * [INI file][wikipedia-dosini] (_wikipedia_)
 * [TOML](https://www.kelche.co/blog/go/toml/)
@@ -0,0 +1,24 @@
 // Package basic provides a basic representation of dosini-style documents
 package basic
 // Document represents an INI-style document
 type Document struct {
 	Global []Field
 	Sections []Section
 }
 // Section represents an INI-style section with optional GIT-style IDs
 type Section struct {
 	Key     string
 	ID      string
 	EmptyID bool
 	Fields []Field
 }
 // Field represents a key = value entry in an INI-style document
 type Field struct {
 	Key   string
 	Value string
 }
@@ -0,0 +1,49 @@
 package basic
 import (
 	"bytes"
 	"io"
 	"io/fs"
 	"strings"
 	"asciigoat.org/ini/parser"
 )
 type decoder struct {
 	p   *parser.Parser
 	out *Document
 	queue   []*token
 	current *Section
 }
 // Decode attempts to decode an INI-style from an [io.Reader] array into a [Document]
 func Decode(r io.Reader) (*Document, error) {
 	var out Document
 	if r == nil {
 		return nil, fs.ErrNotExist
 	}
 	// parser
 	p := parser.NewParser(r)
 	// decoder
 	dec := decoder{p: p, out: &out}
 	// glue
 	p.OnToken = dec.OnToken
 	p.OnError = dec.OnError
 	// Go!
 	err := p.Run()
 	return &out, err
 }
 // DecodeBytes attempts to decode an INI-style bytes array into a [Document]
 func DecodeBytes(b []byte) (*Document, error) {
 	return Decode(bytes.NewReader(b))
 }
 // DecodeString attempts to decode an INI-style string into a [Document]
 func DecodeString(s string) (*Document, error) {
 	return Decode(strings.NewReader(s))
 }
@@ -0,0 +1,22 @@
 package basic
 import (
 	"errors"
 	"asciigoat.org/core/lexer"
 	"asciigoat.org/ini/parser"
 )
 var (
 	errInvalidToken = errors.New("invalid token")
 )
 func newErrInvalidToken(t *token) *lexer.Error {
 	return parser.NewError(t.pos, t.value, "", errInvalidToken)
 }
 func (dec *decoder) OnError(pos lexer.Position, content string, err error) error {
 	err = parser.NewError(pos, content, "", err)
 	dec.executeFinal()
 	return err
 }
@@ -0,0 +1,165 @@
 package basic
 import (
 	"fmt"
 	"asciigoat.org/core/lexer"
 	"asciigoat.org/ini/parser"
 )
 type token struct {
 	pos   lexer.Position
 	typ   parser.TokenType
 	value string
 }
 func (t token) String() string {
 	return fmt.Sprintf("%s %s: %q", t.pos, t.typ, t.value)
 }
 func (dec *decoder) executeFinal() {
 	if len(dec.queue) > 0 {
 		// we have unfinished businesses
 		switch dec.queue[0].typ {
 		case parser.TokenSectionStart:
 			dec.execute(parser.TokenSectionEnd)
 		case parser.TokenFieldKey:
 			dec.execute(parser.TokenFieldValue)
 		}
 	}
 }
 func (dec *decoder) execute(typ parser.TokenType) {
 	switch typ {
 	case parser.TokenSectionEnd:
 		name1, ok1 := dec.queueValue(1, parser.TokenSectionName)
 		if ok1 {
 			name2, ok2 := dec.queueValue(2, parser.TokenSectionSubname)
 			dec.addSection(name1, name2, ok2)
 		}
 		dec.queueReset()
 	case parser.TokenFieldValue:
 		key, _ := dec.queueValue(0, parser.TokenFieldKey)
 		value, _ := dec.queueValue(1, parser.TokenFieldValue)
 		dec.addField(key, value)
 		dec.queueReset()
 	}
 }
 func (dec *decoder) addSection(key, id string, allowEmptyID bool) {
 	emptyID := allowEmptyID && id == ""
 	// index for dec.current
 	n := len(dec.out.Sections)
 	// new section
 	dec.out.Sections = append(dec.out.Sections, Section{
 		Key:     key,
 		ID:      id,
 		EmptyID: emptyID,
 	})
 	// pointer to the latest section
 	dec.current = &dec.out.Sections[n]
 }
 func (dec *decoder) addField(key, value string) {
 	field := Field{
 		Key:   key,
 		Value: value,
 	}
 	if p := dec.current; p != nil {
 		// in section
 		p.Fields = append(p.Fields, field)
 	} else {
 		// global
 		dec.out.Global = append(dec.out.Global, field)
 	}
 }
 // queueValue extracts the value of element on the queue if the type matches.
 func (dec *decoder) queueValue(idx int, typ parser.TokenType) (string, bool) {
 	switch {
 	case idx < 0 || idx >= len(dec.queue):
 		// out of range
 		return "", false
 	case dec.queue[idx].typ != typ:
 		// wrong type
 		return "", false
 	default:
 		return dec.queue[idx].value, true
 	}
 }
 // queueReset removes all tokens from the queue
 func (dec *decoder) queueReset() {
 	dec.queue = dec.queue[:0]
 }
 // queueDepth confirms the current depth of the queue
 func (dec *decoder) queueDepth(depth int) bool {
 	return len(dec.queue) == depth
 }
 // queueDepthType confirms the current depth of the queue and the type of the last
 // element.
 func (dec *decoder) queueDepthType(depth int, typ parser.TokenType) bool {
 	if dec.queueDepth(depth) {
 		return dec.queueType(depth-1, typ)
 	}
 	return false
 }
 // queueType tells if the specified element on the queue is of the required type.
 func (dec *decoder) queueType(idx int, typ parser.TokenType) bool {
 	_, ok := dec.queueValue(idx, typ)
 	return ok
 }
 func (dec *decoder) typeOK(typ parser.TokenType) bool {
 	switch typ {
 	case parser.TokenSectionStart, parser.TokenFieldKey:
 		// first token only
 		return dec.queueDepth(0)
 	case parser.TokenSectionName:
 		// right after TokenSectionStart
 		return dec.queueDepthType(1, parser.TokenSectionStart)
 	case parser.TokenSectionSubname:
 		// right after TokenSectionName
 		return dec.queueDepthType(2, parser.TokenSectionName)
 	case parser.TokenSectionEnd:
 		// only on a section with name
 		return dec.queueType(1, parser.TokenSectionName)
 	case parser.TokenFieldValue:
 		// right after a TokenFieldKey
 		return dec.queueDepthType(1, parser.TokenFieldKey)
 	default:
 		// never
 		return false
 	}
 }
 func (dec *decoder) OnToken(pos lexer.Position, typ parser.TokenType, value string) error {
 	t := &token{pos, typ, value}
 	switch {
 	case typ == parser.TokenComment:
 		// ignore comments
 		return nil
 	case dec.typeOK(typ):
 		// acceptable token
 		dec.queue = append(dec.queue, t)
 		dec.execute(typ)
 		return nil
 	default:
 		// unacceptable
 		err := newErrInvalidToken(t)
 		dec.executeFinal()
 		return err
 	}
 }
@@ -0,0 +1,104 @@
 package basic
 import (
 	"bytes"
 	"fmt"
 	"io"
 	"asciigoat.org/ini/parser"
 )
 // WriteNewLine is the new line representation used by [doc.WriteTo]
 const WriteNewLine = "\n"
 // AsBuffer returns a INI representation of the document on
 // a memory buffer
 func (doc *Document) AsBuffer(nl string) *bytes.Buffer {
 	var buf bytes.Buffer
 	if len(doc.Global) > 0 {
 		_, _ = writeFieldsTo(&buf, doc.Global, nl)
 	}
 	for _, sec := range doc.Sections {
 		if buf.Len() > 0 {
 			_, _ = buf.WriteString(nl)
 		}
 		_ = writeSectionToBuffer(&buf, &sec, nl)
 	}
 	return &buf
 }
 func writeFieldsTo(w io.Writer, fields []Field, nl string) (int64, error) {
 	var written int
 	for _, field := range fields {
 		n, err := fmt.Fprintf(w, "%s = %q%s", field.Key, field.Value, nl)
 		switch {
 		case err != nil:
 			return int64(written), err
 		case n > 0:
 			written += n
 		}
 	}
 	return int64(written), nil
 }
 // String generates a string output for "%s"
 func (field Field) String() string {
 	var buf bytes.Buffer
 	_, _ = writeFieldsTo(&buf, []Field{field}, WriteNewLine)
 	return buf.String()
 }
 func writeSectionToBuffer(w *bytes.Buffer, sec *Section, nl string) int {
 	var written, n int
 	_, _ = w.WriteRune(parser.RuneSectionStart)
 	written++
 	n, _ = w.WriteString(sec.Key)
 	written += n
 	switch {
 	case sec.EmptyID:
 		n, _ = w.WriteString(" \"\"")
 		written += n
 	case sec.ID != "":
 		_, _ = w.WriteRune(' ')
 		n, _ = fmt.Fprintf(w, "%q", sec.ID)
 		written += n + 1
 	}
 	_, _ = w.WriteRune(parser.RuneSectionEnd)
 	written++
 	n, _ = w.WriteString(nl)
 	written += n
 	n64, _ := writeFieldsTo(w, sec.Fields, nl)
 	return written + int(n64)
 }
 // String generates a string output for "%s"
 func (sec *Section) String() string {
 	var buf bytes.Buffer
 	_ = writeSectionToBuffer(&buf, sec, WriteNewLine)
 	return buf.String()
 }
 // WriteTo writes a INI representation of the document
 // onto the provided writer.
 func (doc *Document) WriteTo(w io.Writer) (int64, error) {
 	buf := doc.AsBuffer(WriteNewLine)
 	return buf.WriteTo(w)
 }
 // String generates a string output for "%s"
 func (doc *Document) String() string {
 	buf := doc.AsBuffer(WriteNewLine)
 	return buf.String()
 }
@@ -2,25 +2,25 @@ module asciigoat.org/ini
 go 1.19
 replace asciigoat.org/core => ../core
 require (
-	asciigoat.org/core v0.3.4
+	asciigoat.org/core v0.3.9
 	github.com/mgechev/revive v1.3.3
 	golang.org/x/tools v0.12.0
 )
 require (
 	github.com/BurntSushi/toml v1.3.2 // indirect
-	github.com/chavacava/garif v0.0.0-20230608123814-4bd63c2919ab // indirect
+	github.com/chavacava/garif v0.1.0 // indirect
 	github.com/fatih/color v1.15.0 // indirect
 	github.com/fatih/structtag v1.2.0 // indirect
 	github.com/mattn/go-colorable v0.1.13 // indirect
 	github.com/mattn/go-isatty v0.0.19 // indirect
-	github.com/mattn/go-runewidth v0.0.9 // indirect
+	github.com/mattn/go-runewidth v0.0.15 // indirect
 	github.com/mgechev/dots v0.0.0-20210922191527-e955255bf517 // indirect
 	github.com/mitchellh/go-homedir v1.1.0 // indirect
 	github.com/olekukonko/tablewriter v0.0.5 // indirect
 	github.com/pkg/errors v0.9.1 // indirect
 	github.com/rivo/uniseg v0.4.4 // indirect
 	golang.org/x/mod v0.12.0 // indirect
 	golang.org/x/sys v0.11.0 // indirect
 	golang.org/x/tools v0.12.0 // indirect
 )
@@ -1,7 +1,9 @@
 asciigoat.org/core v0.3.9 h1:hgDDz4ecm3ZvehX++m8A/IzAt+B5oDPiRtxatzfUHPQ=
 asciigoat.org/core v0.3.9/go.mod h1:CAaHwyw8MpAq4a1MYtN2dxJrsK+hmIdW50OndaQZYPI=
 github.com/BurntSushi/toml v1.3.2 h1:o7IhLm0Msx3BaB+n3Ag7L8EVlByGnpq14C4YWiu/gL8=
 github.com/BurntSushi/toml v1.3.2/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ=
-github.com/chavacava/garif v0.0.0-20230608123814-4bd63c2919ab h1:5JxePczlyGAtj6R1MUEFZ/UFud6FfsOejq7xLC2ZIb0=
+github.com/chavacava/garif v0.1.0 h1:2JHa3hbYf5D9dsgseMKAmc/MZ109otzgNFk5s87H9Pc=
-github.com/chavacava/garif v0.0.0-20230608123814-4bd63c2919ab/go.mod h1:XMyYCkEL58DF0oyW4qDjjnPWONs2HBqYKI+UIPD+Gww=
+github.com/chavacava/garif v0.1.0/go.mod h1:XMyYCkEL58DF0oyW4qDjjnPWONs2HBqYKI+UIPD+Gww=
 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
@@ -14,8 +16,9 @@ github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovk
 github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
 github.com/mattn/go-isatty v0.0.19 h1:JITubQf0MOLdlGRuRq+jtsDlekdYPia9ZFsB8h/APPA=
 github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
 github.com/mattn/go-runewidth v0.0.9 h1:Lm995f3rfxdpd6TSmuVCHVb/QhupuXlYr8sCI/QdE+0=
 github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI=
 github.com/mattn/go-runewidth v0.0.15 h1:UNAjwbU9l54TA3KzvqLGxwWjHmMgBUVhBiTjelZgg3U=
 github.com/mattn/go-runewidth v0.0.15/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
 github.com/mgechev/dots v0.0.0-20210922191527-e955255bf517 h1:zpIH83+oKzcpryru8ceC6BxnoG8TBrhgAvRg8obzup0=
 github.com/mgechev/dots v0.0.0-20210922191527-e955255bf517/go.mod h1:KQ7+USdGKfpPjXk4Ga+5XxQM4Lm4e3gAogrreFAYpOg=
 github.com/mgechev/revive v1.3.3 h1:GUWzV3g185agbHN4ZdaQvR6zrLVYTUSA2ktvIinivK0=
@@ -28,6 +31,9 @@ github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
 github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
 github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
 github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
 github.com/rivo/uniseg v0.4.4 h1:8TfxU8dW6PdqD27gjM8MVNuicgxIjxpm4K7x4jp8sis=
 github.com/rivo/uniseg v0.4.4/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
 github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
 github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
 github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
@@ -36,6 +42,8 @@ github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO
 github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
 github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
 golang.org/x/mod v0.12.0 h1:rmsUpXtvNzj340zd98LZ4KntptpfRHwpFOHG188oHXc=
 golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
 golang.org/x/sync v0.3.0 h1:ftCYgMx6zT/asHUrPw8BLLscYtGznsLAnjq5RH9P66E=
 golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.11.0 h1:eG7RXZHdqOJ1i+0lgLgCpSXAp6M3LYlAo6osgSi0xOM=
@@ -0,0 +1,27 @@
 package parser
 import (
 	"asciigoat.org/core/lexer"
 )
 // NewError creates a lexer.Error using a lexer.Position
 func NewError(pos lexer.Position, content, hint string, err error) *lexer.Error {
 	return &lexer.Error{
 		Line:    pos.Line,
 		Column:  pos.Column,
 		Content: content,
 		Hint:    hint,
 		Err:     err,
 	}
 }
 // ErrPlusPosition returns a copy of the given [lexer.Error]
 // offsetting the Line/Column information.
 func ErrPlusPosition(pos lexer.Position, e *lexer.Error) *lexer.Error {
 	pos.Add(lexer.Position{
 		Line:   e.Line,
 		Column: e.Column,
 	})
 	return NewError(pos, e.Content, e.Hint, e.Err)
 }
@@ -1,69 +1,115 @@
 package parser
-import (
+import "asciigoat.org/core/lexer"
 	"log"
 	"asciigoat.org/core/lexer"
 )
 // Run parses the source
 func (p *Parser) Run() error {
-	p.pos.Reset()
+	p.setDefaults()
 	return lexer.Run(p.lexStart)
 }
 func (p *Parser) lexStart() (lexer.StateFn, error) {
 	for {
-		r, _, err := p.src.ReadRune()
+		r, _, err := p.p.ReadRune()
 		switch {
 		case err != nil:
-			// read error
+			return p.emitError("", err)
 			log.Printf("%s: %s: %s", p.pos, "error", err)
 			return nil, err
 		case IsNewLine(r):
 			// new line
-			p.src.UnreadRune()
+			p.p.UnreadRune()
-			p.lexNewLine()
+			p.p.AcceptNewLine()
-
+			p.stepLine()
 			p.src.Discard()
 			p.pos.StepLine()
 		case IsSpace(r):
 			// whitespace
-			p.src.Discard()
+			p.stepString()
-			p.pos.Step()
+		case IsCommentStart(r):
 			// switch to comment lexer
 			p.p.UnreadRune()
 			return p.lexComment, nil
 		case IsSectionStart(r):
 			// section
 			return p.lexSectionStart, nil
 		default:
-			// token
+			// entry
-			p.src.UnreadRune()
+			p.p.UnreadRune()
-			return p.lexToken, nil
+			return p.lexEntryStart, nil
 		}
 	}
 }
-func (p *Parser) lexToken() (lexer.StateFn, error) {
+func (p *Parser) lexComment() (lexer.StateFn, error) {
-	p.src.AcceptAll(IsNotSpace)
+	// until the end of the line
 	p.p.AcceptAll(IsNotNewLine)
-	s := p.src.Emit()
+	err := p.emitString(TokenComment)
-	log.Printf("%s: %s: %q", p.pos, "token", s)
+	return p.lexStart, err
 	p.pos.StepN(len(s))
 	return p.lexStart, nil
 }
-func (p *Parser) lexNewLine() {
+func (p *Parser) lexSectionStart() (lexer.StateFn, error) {
-	// r1 is warrantied to be either \n or \r
+	if err := p.emitString(TokenSectionStart); err != nil {
-	r1, _, _ := p.src.ReadRune()
+		return nil, err
-	r2, _, err := p.src.ReadRune()
+	}
 	// remove whitespace between `[` and the name
 	if p.p.AcceptAll(IsSpaceNotNewLine) {
 		p.stepString()
 	}
 	if !p.p.AcceptAll(IsName) {
 		// no name
 		return p.emitError("section name missing", lexer.ErrUnacceptableRune)
 	}
 	if err := p.emitString(TokenSectionName); err != nil {
 		return nil, err
 	}
 	// remove whitespace between the name and the closing `]`
 	if p.p.AcceptAll(IsSpaceNotNewLine) {
 		p.stepString()
 	}
 	r, _, err := p.p.ReadRune()
 	switch {
-	case r1 == '\r' && r2 == '\n':
+	case err != nil:
-		// CR LN
+		return p.emitError("", err)
-	case r1 == '\r' && err == nil:
+	case IsSectionEnd(r):
-		// CR
+		err := p.emitString(TokenSectionEnd)
-		p.src.UnreadRune()
+		return p.lexStart, err
-	case r2 == '\r':
+	default:
-		// LN CR
+		return p.emitInvalidRune(r)
 	case err == nil:
 		// LN
 		p.src.UnreadRune()
 	}
 }
 func (p *Parser) lexEntryStart() (lexer.StateFn, error) {
 	p.p.AcceptAll(IsName)
 	if err := p.emitString(TokenFieldKey); err != nil {
 		return nil, err
 	}
 	// ignore whitespace between key and the '=' sign
 	if p.p.AcceptAll(IsSpaceNotNewLine) {
 		p.stepString()
 	}
 	r, _, err := p.p.ReadRune()
 	switch {
 	case err != nil:
 		return p.emitError("", err)
 	case r != RuneFieldEqual:
 		return p.emitInvalidRune(r)
 	}
 	// ignore whitespace between the '=' and the value
 	if p.p.AcceptAll(IsSpaceNotNewLine) {
 		p.stepString()
 	}
 	p.p.AcceptAll(IsNotNewLine)
 	if err := p.emitString(TokenFieldValue); err != nil {
 		return nil, err
 	}
 	return p.lexStart, err
 }
@@ -1,14 +1,72 @@
 package parser
-import "asciigoat.org/core/lexer"
+import (
 	"strings"
 	"asciigoat.org/core/lexer"
 )
 const (
 	RuneComment      = ';' // RuneComment is the standard dosini comment character
 	RuneCommentExtra = '#' // RuneCommentExtra is UNIX shell's comment character
 	RuneSectionStart = '[' // RuneSectionStart indicates the start of a section declaration
 	RuneSectionEnd   = ']' // RuneSectionEnd indiciates the end of a section declaration
 	RuneFieldEqual   = '=' // RuneFieldEqual separates field keys from their values
 )
 var (
-	// IsNewLine tells if a rune represents a line break or the start of one
+	// RunesComment is a string containing all runes acceptable to start comments
-	IsNewLine = lexer.NewIsIn("\n\r")
+	RunesComment = string([]rune{
-	// IsSpace tells if a rune is considered whitespace by unicode
+		RuneComment,
-	IsSpace = lexer.IsSpace
+		RuneCommentExtra,
-	// IsNotNewLine tells if a rune is anything other than line breaks
+	})
-	IsNotNewLine = lexer.NewIsNot(IsNewLine)
+	// RunesSpecial is a string containing all the runes with special meaning
-	// IsNotSpace tells if a rune is anything other than whitespace
+	RunesSpecial = string([]rune{
-	IsNotSpace = lexer.NewIsNot(IsSpace)
+		RuneComment,
 		RuneCommentExtra,
 		RuneSectionStart,
 		RuneSectionEnd,
 		RuneFieldEqual,
 	})
 )
 var (
 	// IsNewLine tells if the rune indicates a line break or the start of one
 	IsNewLine = lexer.NewIsIn("\r\n")
 	// IsNotNewLine tells if the rune is not a line break nor the start of one
 	IsNotNewLine = lexer.NewIsNot(IsNewLine)
 	// IsSpace tells if the rune is considered whitespace by Unicode
 	IsSpace = lexer.IsSpace
 	// IsNotSpace tells if the rune is not considered whitespace by Unicode
 	IsNotSpace = lexer.NewIsNot(IsSpace)
 	// IsCommentStart ...
 	IsCommentStart = lexer.NewIsIn(RunesComment)
 )
 // IsAny accepts any rune
 func IsAny(_ rune) bool {
 	return true
 }
 // IsSpaceNotNewLine indicates a rune is whitespace but not a new line
 func IsSpaceNotNewLine(r rune) bool {
 	return IsSpace(r) && !IsNewLine(r)
 }
 // IsSectionStart indicates the rune starts the section declaration
 func IsSectionStart(r rune) bool { return r == RuneSectionStart }
 // IsSectionEnd indicates the rune ends the section declaration
 func IsSectionEnd(r rune) bool { return r == RuneSectionEnd }
 // IsName indicates a rune is acceptable for section or field names
 func IsName(r rune) bool {
 	switch {
 	case IsSpace(r):
 		return false
 	case strings.ContainsRune(RunesSpecial, r):
 		return false
 	default:
 		return true
 	}
 }
@@ -1,27 +1,92 @@
-// Package parser parses dosini-style files
+// Package parser parses ini-style files
 package parser
 import (
 	"io"
 	"log"
 	"asciigoat.org/core/lexer"
 )
-// Parser parses a dosini-style document
+// Parser parses a ini-style document
 type Parser struct {
-	src *lexer.Reader
+	p TextParser
-	pos lexer.Position
+	// OnToken is called for each identified token. if it returns an error
 	// parsing is interrupted.
 	OnToken func(pos lexer.Position, typ TokenType, value string) error
 	// OnError is called in case of a parsing error, and it's allowed
 	// to replace the error returned by [Parser.Run].
 	// OnError is called for io.EOF, but [Parser.Run] will consider it
 	// normal termination.
 	OnError func(pos lexer.Position, content string, err error) error
 }
-// NewParser creates a dosini-style parser using
+func defaultOnToken(pos lexer.Position, typ TokenType, value string) error {
 	log.Printf("%s:%v:%v: %q", typ, pos.Line, pos.Column, value)
 	return nil
 }
 func defaultOnError(pos lexer.Position, content string, err error) error {
 	log.Printf("%s:%v:%v: %q: %s", "error", pos.Line, pos.Column, content, err)
 	return NewError(pos, content, "", err)
 }
 func (p *Parser) setDefaults() {
 	if p.OnToken == nil {
 		p.OnToken = defaultOnToken
 	}
 	if p.OnError == nil {
 		p.OnError = defaultOnError
 	}
 }
 func (p *Parser) emitString(typ TokenType) error {
 	pos, s := p.p.Emit()
 	return p.OnToken(pos, typ, s)
 }
 func (p *Parser) emitError(content string, err error) (lexer.StateFn, error) {
 	pos := p.p.Position()
 	err2 := p.OnError(pos, content, err)
 	switch {
 	case err2 != nil:
 		// return wrapped error
 		return nil, err2
 	default:
 		// return original error
 		return nil, err
 	}
 }
 func (p *Parser) emitInvalidRune(r rune) (lexer.StateFn, error) {
 	return p.emitError(string([]rune{r}), lexer.ErrUnacceptableRune)
 }
 // stepLine discards the data and moves the position
 // to the next line.
 func (p *Parser) stepLine() {
 	p.p.StepLine()
 }
 // stepString discards the data and moves the position
 // forward on the same line the length of the discarded
 // content.
 func (p *Parser) stepString() {
 	p.p.Step()
 }
 // NewParser creates a ini-style parser using
 // an [io.Reader] as source
 func NewParser(r io.Reader) *Parser {
-	if r == nil {
+	var p *Parser
-		return nil
+
 	if r != nil {
 		p = new(Parser)
 		p.p.Init(r)
 	}
-	return &Parser{
+	return p
 		src: lexer.NewReader(r),
 	}
 }
@@ -0,0 +1,103 @@
 package parser
 import (
 	"bytes"
 	"io"
 	"strings"
 	"asciigoat.org/core/lexer"
 )
 // TextParser is a generic text parser.
 type TextParser struct {
 	*lexer.Reader
 	pos lexer.Position
 }
 // Init initializes the [TextParser] with a non-nil [io.Reader].
 func (p *TextParser) Init(r io.Reader) {
 	switch {
 	case p == nil || r == nil:
 		panic("invalid call")
 	case p.Reader != nil:
 		panic("parser already initialized")
 	default:
 		p.Reader = lexer.NewReader(r)
 		p.pos.Reset()
 	}
 }
 // InitBytes initializes the [TextParser] with a byte array
 func (p *TextParser) InitBytes(b []byte) {
 	p.Init(bytes.NewReader(b))
 }
 // InitString initializes the [TextParser] with a byte array
 func (p *TextParser) InitString(s string) {
 	p.Init(strings.NewReader(s))
 }
 // Discard shadows [lexer.Reader]'s, and takes in consideration
 // new lines on the discarded data when moving the position
 func (p *TextParser) Discard() {
 	s := p.Reader.Emit()
 	l := GetPositionalLength(s)
 	p.pos.Add(l)
 }
 // Emit returns the accepted text, its position, and
 // moves the cursor position accordingly
 func (p *TextParser) Emit() (lexer.Position, string) {
 	pos := p.pos
 	s := p.Reader.Emit()
 	l := GetPositionalLength(s)
 	p.pos.Add(l)
 	return pos, s
 }
 // Step discards what's been accepted and increments the
 // position assuming they all increment the column counter
 func (p *TextParser) Step() {
 	s := p.Reader.Emit()
 	p.pos.StepN(len(s))
 }
 // StepLine discards what's been accepted and moves then
 // position to the beginning of the next line
 func (p *TextParser) StepLine() {
 	p.Reader.Discard()
 	p.pos.StepLine()
 }
 // Position returns the position of the first character
 // of the accepted text
 func (p *TextParser) Position() lexer.Position {
 	return p.pos
 }
 // AcceptNewLine checks if next is a new line.
 // It accepts "\n", "\n\r", "\r" and "\r\n".
 func (p *TextParser) AcceptNewLine() bool {
 	r1, _, err := p.ReadRune()
 	switch {
 	case err != nil:
 		return false
 	case r1 == '\n':
 		p.AcceptRune('\r')
 		return true
 	case r1 == '\r':
 		p.AcceptRune('\n')
 		return true
 	default:
 		p.UnreadRune()
 		return false
 	}
 }
 // AcceptRune checks if next is the specified rune
 func (p *TextParser) AcceptRune(r rune) bool {
 	return p.Accept(func(r2 rune) bool {
 		return r == r2
 	})
 }
@@ -0,0 +1,38 @@
 package parser
 import (
 	"io"
 	"asciigoat.org/core/lexer"
 )
 type positionLengthParser struct {
 	TextParser
 	lexer.Position
 }
 func (p *positionLengthParser) lexStart() (lexer.StateFn, error) {
 	for {
 		switch {
 		case p.AcceptNewLine():
 			p.Position.StepLine()
 		case p.Accept(IsAny):
 			p.Position.StepN(1)
 		default:
 			return nil, io.EOF
 		}
 	}
 }
 // GetPositionalLength calculates the [lexer.Position] at
 // the end of a text.
 func GetPositionalLength(s string) lexer.Position {
 	var p positionLengthParser
 	if s == "" {
 		p.InitString(s)
 		_ = lexer.Run(p.lexStart)
 	}
 	return p.Position
 }
@@ -0,0 +1,31 @@
 package parser
 //go:generate go run golang.org/x/tools/cmd/stringer -type=TokenType
 // A TokenType is a type of Token
 type TokenType uint
 const (
 	// TokenUnknown represents a Token that hasn't been identified
 	TokenUnknown TokenType = iota
 	// TokenSectionStart indicates the opening marker of a section declaration.
 	// The left squared bracket.
 	TokenSectionStart
 	// TokenSectionEnd indicates the closing marker of a section declaration.
 	// The right squared bracket.
 	TokenSectionEnd
 	// TokenSectionName represents the section name between the squared brackets
 	TokenSectionName
 	// TokenSectionSubname represents a secondary name in the section represented
 	// between quotes after the section name.
 	// e.g.
 	// [section_name "section_subname"]
 	TokenSectionSubname
 	// TokenComment represents a comment, including the initial ';' or '#' until
 	// the end of the line.
 	TokenComment
 	// TokenFieldKey represents a field name in a `key = value` entry
 	TokenFieldKey
 	// TokenFieldValue represents a field value in a `key = value` entry
 	TokenFieldValue
 )
@@ -0,0 +1,30 @@
 // Code generated by "stringer -type=TokenType"; DO NOT EDIT.
 package parser
 import "strconv"
 func _() {
 	// An "invalid array index" compiler error signifies that the constant values have changed.
 	// Re-run the stringer command to generate them again.
 	var x [1]struct{}
 	_ = x[TokenUnknown-0]
 	_ = x[TokenSectionStart-1]
 	_ = x[TokenSectionEnd-2]
 	_ = x[TokenSectionName-3]
 	_ = x[TokenSectionSubname-4]
 	_ = x[TokenComment-5]
 	_ = x[TokenFieldKey-6]
 	_ = x[TokenFieldValue-7]
 }
 const _TokenType_name = "TokenUnknownTokenSectionStartTokenSectionEndTokenSectionNameTokenSectionSubnameTokenCommentTokenFieldKeyTokenFieldValue"
 var _TokenType_index = [...]uint8{0, 12, 29, 44, 60, 79, 91, 104, 119}
 func (i TokenType) String() string {
 	if i >= TokenType(len(_TokenType_index)-1) {
 		return "TokenType(" + strconv.FormatInt(int64(i), 10) + ")"
 	}
 	return _TokenType_name[_TokenType_index[i]:_TokenType_index[i+1]]
 }
@@ -4,4 +4,5 @@ package tools
 import (
 	_ "github.com/mgechev/revive"
 	_ "golang.org/x/tools/cmd/stringer"
 )
Author	SHA1	Message	Date
amery	fa9a7b4735	basic: rename and document queue related methods Signed-off-by: Alejandro Mery <amery@jpi.io>	2023-09-04 17:37:09 +00:00
amery	cfd4a94559	basic: call executeFinal() when OnToken() fails Signed-off-by: Alejandro Mery <amery@jpi.io>	2023-09-04 17:37:09 +00:00
amery	d8af7821e4	Merge pull request 'parser: introduce NewError() and ErrPlusPosition()' (#8 ) Reviewed-on: #8	2023-09-04 19:33:24 +02:00
amery	8f3e59ec36	parser: introduce ErrPlusPosition to apply a position offset to a lexer.Error Signed-off-by: Alejandro Mery <amery@jpi.io>	2023-09-04 15:25:20 +00:00
amery	d316031c44	basic: cleanup using parser.NewError() Signed-off-by: Alejandro Mery <amery@jpi.io>	2023-09-04 15:25:20 +00:00
amery	c3883cbb0d	parser: introduce NewError() to create lexer.Error using lexer.Position Signed-off-by: Alejandro Mery <amery@jpi.io>	2023-09-04 15:16:43 +00:00
amery	314c004efd	Merge pull request 'parser: introduce TextParser and refactor Parser' (#7 ) Reviewed-on: #7	2023-09-04 16:17:04 +02:00
amery	30a86e170b	parser: use GetPositionalLength() on TextParser.Discard() and TextParser.Emit() Signed-off-by: Alejandro Mery <amery@jpi.io>	2023-09-04 13:32:27 +00:00
amery	8cc75da138	parser: introduce GetPositionalLength() Signed-off-by: Alejandro Mery <amery@jpi.io>	2023-09-03 17:43:46 +00:00
amery	01cd4139bd	parser.Parser: refactor using TextParser Signed-off-by: Alejandro Mery <amery@jpi.io>	2023-09-03 17:01:26 +00:00
amery	e34e8eda0a	parser.TextParser: AcceptNewLine() and AcceptRune() Signed-off-by: Alejandro Mery <amery@jpi.io>	2023-09-03 17:01:26 +00:00
amery	ecff6f6016	parser: introduce generic-ish TextParser Signed-off-by: Alejandro Mery <amery@jpi.io>	2023-09-03 17:01:26 +00:00
amery	79fc27f965	chore: update dependencies Signed-off-by: Alejandro Mery <amery@jpi.io>	2023-09-03 15:09:05 +00:00
amery	11c5ce70a6	Merge pull request 'basic: implement Section.String() and Field.String()' (#5 ) Reviewed-on: #5	2023-09-02 18:00:08 +02:00
amery	16d52188f6	basic: implement Section.String() and Field.String() to ease development Signed-off-by: Alejandro Mery <amery@jpi.io>	2023-09-01 21:13:13 +00:00
amery	a4f981610e	Merge pull request 'basic: introduce basic one-shot INI-style decoder' (#4 ) Reviewed-on: #4	2023-09-01 19:29:11 +02:00
amery	a1e20fa3b6	basic: introduce Document.WriteTo() and Document.String() producing an INI-style representation of the Document Signed-off-by: Alejandro Mery <amery@jpi.io>	2023-09-01 14:22:48 +00:00
amery	174f72c4cf	basic: introduce basic one-shot INI-style decoder Signed-off-by: Alejandro Mery <amery@jpi.io>	2023-09-01 14:21:55 +00:00
amery	c92e0df47b	chore: update asciigoat.org/core Signed-off-by: Alejandro Mery <amery@jpi.io>	2023-09-01 13:04:05 +00:00
amery	cf100578c0	Merge pull request 'README: add initial description of the package' (#3 ) Reviewed-on: #3	2023-09-01 15:02:34 +02:00
amery	2eacc65215	README: add initial description of the package Signed-off-by: Alejandro Mery <amery@jpi.io>	2023-09-01 12:57:55 +00:00
amery	169379f5c6	Merge pull request 'parser: implement basic dosini parsing' (#2 ) Reviewed-on: #2	2023-08-31 16:17:11 +02:00
amery	eb36c195c0	parser: implement basic dosini parsing Signed-off-by: Alejandro Mery <amery@jpi.io>	2023-08-31 01:55:47 +00:00
amery	1090a374f0	parser: add initial Parser emitting non-whitespace tokens Signed-off-by: Alejandro Mery <amery@jpi.io>	2023-08-31 00:45:54 +00:00
amery	a15deb7e42	tools: add stringer support Signed-off-by: Alejandro Mery <amery@jpi.io>	2023-08-30 23:01:22 +00:00