build-sys: use local asciigoat.org/core [DO-NOT-MERGE]

Signed-off-by: Alejandro Mery <amery@jpi.io>
parser: add initial Parser emitting non-whitespace tokens
2023-08-31 00:19:16 +00:00 · 2023-08-31 00:18:36 +00:00
17 changed files with 82 additions and 1038 deletions
@@ -1,7 +0,0 @@
-{
-    "cSpell.words": [
-        "asciigoat",
-        "Subname",
-        "unescapes"
-    ]
-}
@@ -1,57 +0,0 @@
-# asciigoat's INI parser
-
-[![Go Reference][godoc-badge]][godoc]
-[![Go Report Card][goreport-badge]][goreport]
-
-`asciigoat.org/ini` is a simple Go library that very loosly parses
-[`INI`-style][wikipedia-dosini] documents allowing the implementation
-of stricter parsers of similar form.
-
-**asciigoat** is [MIT](https://opensource.org/license/mit/) licensed.
-
-[godoc]: https://pkg.go.dev/asciigoat.org/ini
-[godoc-badge]: https://pkg.go.dev/badge/asciigoat.org/ini.svg
-[goreport]: https://goreportcard.com/report/asciigoat.org/ini
-[goreport-badge]: https://goreportcard.com/badge/asciigoat.org/ini
-
-[godoc-lexer]: https://pkg.go.dev/asciigoat.org/core/lexer
-[godoc-parser-parser]: https://pkg.go.dev/asciigoat.org/ini/parser#Parser
-[godoc-basic-parser]: https://pkg.go.dev/asciigoat.org/ini/basic#Decode
-
-[wikipedia-dosini]: https://en.wikipedia.org/wiki/INI_file
-
-## Parser
-
-[`parser.Parser`][godoc-parser-parser] uses
-[`asciigoat`'s lexer][godoc-lexer] to process an `INI`-style document
-emiting tokens and errors via callbacks.
-
-## Basic Parser
-
-[`basic.Decode()`][godoc-basic-parser] provies a one-shot decoder
-that returns a structured document for you to post-process.
-
-To allow for correct handling of repetition of section and field names downstream,
-it uses arrays instead of maps, and makes almost no judgment
-about what section or field names are acceptable.
-
-## Other Implementations
-
-Other implementations exist, and they are mature and feature-rich, but they
-are highly opinionated about what's a valid file. Built around maps they don't
-allow repeating names and constraint what characters can be used.
-
-These are great when you can adapt, or already agree, to their conditions but
-that's not always the case when you are parsing configuration files from
-other applications and that's what [asciigoat.org/ini][godoc] attempts to solve.
-
-* [gcfg](https://pkg.go.dev/gopkg.in/gcfg.v1)
-* [unknwon's go-ini](https://github.com/go-ini/ini)
-* [wlevene's GoINI](https://github.com/wlevene/ini)
-
-## See also
-
-* [asciigoat.org/core](https://asciigoat.org/core)
-* [oss.jpi.io](https://oss.jpi.io)
-* [INI file][wikipedia-dosini] (_wikipedia_)
-* [TOML](https://www.kelche.co/blog/go/toml/)
@@ -1,24 +0,0 @@
-// Package basic provides a basic representation of dosini-style documents
-package basic
-
-// Document represents an INI-style document
-type Document struct {
-	Global []Field
-
-	Sections []Section
-}
-
-// Section represents an INI-style section with optional GIT-style IDs
-type Section struct {
-	Key     string
-	ID      string
-	EmptyID bool
-
-	Fields []Field
-}
-
-// Field represents a key = value entry in an INI-style document
-type Field struct {
-	Key   string
-	Value string
-}
@@ -1,49 +0,0 @@
-package basic
-
-import (
-	"bytes"
-	"io"
-	"io/fs"
-	"strings"
-
-	"asciigoat.org/ini/parser"
-)
-
-type decoder struct {
-	p   *parser.Parser
-	out *Document
-
-	queue   []*token
-	current *Section
-}
-
-// Decode attempts to decode an INI-style from an [io.Reader] array into a [Document]
-func Decode(r io.Reader) (*Document, error) {
-	var out Document
-
-	if r == nil {
-		return nil, fs.ErrNotExist
-	}
-
-	// parser
-	p := parser.NewParser(r)
-	// decoder
-	dec := decoder{p: p, out: &out}
-	// glue
-	p.OnToken = dec.OnToken
-	p.OnError = dec.OnError
-
-	// Go!
-	err := p.Run()
-	return &out, err
-}
-
-// DecodeBytes attempts to decode an INI-style bytes array into a [Document]
-func DecodeBytes(b []byte) (*Document, error) {
-	return Decode(bytes.NewReader(b))
-}
-
-// DecodeString attempts to decode an INI-style string into a [Document]
-func DecodeString(s string) (*Document, error) {
-	return Decode(strings.NewReader(s))
-}
@@ -1,22 +0,0 @@
-package basic
-
-import (
-	"errors"
-
-	"asciigoat.org/core/lexer"
-	"asciigoat.org/ini/parser"
-)
-
-var (
-	errInvalidToken = errors.New("invalid token")
-)
-
-func newErrInvalidToken(t *token) *lexer.Error {
-	return parser.NewError(t.pos, t.value, "", errInvalidToken)
-}
-
-func (dec *decoder) OnError(pos lexer.Position, content string, err error) error {
-	err = parser.NewError(pos, content, "", err)
-	dec.executeFinal()
-	return err
-}
@@ -1,165 +0,0 @@
-package basic
-
-import (
-	"fmt"
-
-	"asciigoat.org/core/lexer"
-	"asciigoat.org/ini/parser"
-)
-
-type token struct {
-	pos   lexer.Position
-	typ   parser.TokenType
-	value string
-}
-
-func (t token) String() string {
-	return fmt.Sprintf("%s %s: %q", t.pos, t.typ, t.value)
-}
-
-func (dec *decoder) executeFinal() {
-	if len(dec.queue) > 0 {
-		// we have unfinished businesses
-		switch dec.queue[0].typ {
-		case parser.TokenSectionStart:
-			dec.execute(parser.TokenSectionEnd)
-		case parser.TokenFieldKey:
-			dec.execute(parser.TokenFieldValue)
-		}
-	}
-}
-
-func (dec *decoder) execute(typ parser.TokenType) {
-	switch typ {
-	case parser.TokenSectionEnd:
-		name1, ok1 := dec.queueValue(1, parser.TokenSectionName)
-
-		if ok1 {
-			name2, ok2 := dec.queueValue(2, parser.TokenSectionSubname)
-
-			dec.addSection(name1, name2, ok2)
-		}
-
-		dec.queueReset()
-	case parser.TokenFieldValue:
-		key, _ := dec.queueValue(0, parser.TokenFieldKey)
-		value, _ := dec.queueValue(1, parser.TokenFieldValue)
-
-		dec.addField(key, value)
-		dec.queueReset()
-	}
-}
-
-func (dec *decoder) addSection(key, id string, allowEmptyID bool) {
-	emptyID := allowEmptyID && id == ""
-
-	// index for dec.current
-	n := len(dec.out.Sections)
-
-	// new section
-	dec.out.Sections = append(dec.out.Sections, Section{
-		Key:     key,
-		ID:      id,
-		EmptyID: emptyID,
-	})
-
-	// pointer to the latest section
-	dec.current = &dec.out.Sections[n]
-}
-
-func (dec *decoder) addField(key, value string) {
-	field := Field{
-		Key:   key,
-		Value: value,
-	}
-
-	if p := dec.current; p != nil {
-		// in section
-		p.Fields = append(p.Fields, field)
-	} else {
-		// global
-		dec.out.Global = append(dec.out.Global, field)
-	}
-}
-
-// queueValue extracts the value of element on the queue if the type matches.
-func (dec *decoder) queueValue(idx int, typ parser.TokenType) (string, bool) {
-	switch {
-	case idx < 0 || idx >= len(dec.queue):
-		// out of range
-		return "", false
-	case dec.queue[idx].typ != typ:
-		// wrong type
-		return "", false
-	default:
-		return dec.queue[idx].value, true
-	}
-}
-
-// queueReset removes all tokens from the queue
-func (dec *decoder) queueReset() {
-	dec.queue = dec.queue[:0]
-}
-
-// queueDepth confirms the current depth of the queue
-func (dec *decoder) queueDepth(depth int) bool {
-	return len(dec.queue) == depth
-}
-
-// queueDepthType confirms the current depth of the queue and the type of the last
-// element.
-func (dec *decoder) queueDepthType(depth int, typ parser.TokenType) bool {
-	if dec.queueDepth(depth) {
-		return dec.queueType(depth-1, typ)
-	}
-	return false
-}
-
-// queueType tells if the specified element on the queue is of the required type.
-func (dec *decoder) queueType(idx int, typ parser.TokenType) bool {
-	_, ok := dec.queueValue(idx, typ)
-	return ok
-}
-
-func (dec *decoder) typeOK(typ parser.TokenType) bool {
-	switch typ {
-	case parser.TokenSectionStart, parser.TokenFieldKey:
-		// first token only
-		return dec.queueDepth(0)
-	case parser.TokenSectionName:
-		// right after TokenSectionStart
-		return dec.queueDepthType(1, parser.TokenSectionStart)
-	case parser.TokenSectionSubname:
-		// right after TokenSectionName
-		return dec.queueDepthType(2, parser.TokenSectionName)
-	case parser.TokenSectionEnd:
-		// only on a section with name
-		return dec.queueType(1, parser.TokenSectionName)
-	case parser.TokenFieldValue:
-		// right after a TokenFieldKey
-		return dec.queueDepthType(1, parser.TokenFieldKey)
-	default:
-		// never
-		return false
-	}
-}
-
-func (dec *decoder) OnToken(pos lexer.Position, typ parser.TokenType, value string) error {
-	t := &token{pos, typ, value}
-
-	switch {
-	case typ == parser.TokenComment:
-		// ignore comments
-		return nil
-	case dec.typeOK(typ):
-		// acceptable token
-		dec.queue = append(dec.queue, t)
-		dec.execute(typ)
-		return nil
-	default:
-		// unacceptable
-		err := newErrInvalidToken(t)
-		dec.executeFinal()
-		return err
-	}
-}
@@ -1,104 +0,0 @@
-package basic
-
-import (
-	"bytes"
-	"fmt"
-	"io"
-
-	"asciigoat.org/ini/parser"
-)
-
-// WriteNewLine is the new line representation used by [doc.WriteTo]
-const WriteNewLine = "\n"
-
-// AsBuffer returns a INI representation of the document on
-// a memory buffer
-func (doc *Document) AsBuffer(nl string) *bytes.Buffer {
-	var buf bytes.Buffer
-
-	if len(doc.Global) > 0 {
-		_, _ = writeFieldsTo(&buf, doc.Global, nl)
-	}
-
-	for _, sec := range doc.Sections {
-		if buf.Len() > 0 {
-			_, _ = buf.WriteString(nl)
-		}
-
-		_ = writeSectionToBuffer(&buf, &sec, nl)
-	}
-
-	return &buf
-}
-
-func writeFieldsTo(w io.Writer, fields []Field, nl string) (int64, error) {
-	var written int
-	for _, field := range fields {
-		n, err := fmt.Fprintf(w, "%s = %q%s", field.Key, field.Value, nl)
-		switch {
-		case err != nil:
-			return int64(written), err
-		case n > 0:
-			written += n
-		}
-	}
-	return int64(written), nil
-}
-
-// String generates a string output for "%s"
-func (field Field) String() string {
-	var buf bytes.Buffer
-
-	_, _ = writeFieldsTo(&buf, []Field{field}, WriteNewLine)
-	return buf.String()
-}
-
-func writeSectionToBuffer(w *bytes.Buffer, sec *Section, nl string) int {
-	var written, n int
-
-	_, _ = w.WriteRune(parser.RuneSectionStart)
-	written++
-
-	n, _ = w.WriteString(sec.Key)
-	written += n
-
-	switch {
-	case sec.EmptyID:
-		n, _ = w.WriteString(" \"\"")
-		written += n
-	case sec.ID != "":
-		_, _ = w.WriteRune(' ')
-		n, _ = fmt.Fprintf(w, "%q", sec.ID)
-		written += n + 1
-	}
-
-	_, _ = w.WriteRune(parser.RuneSectionEnd)
-	written++
-
-	n, _ = w.WriteString(nl)
-	written += n
-
-	n64, _ := writeFieldsTo(w, sec.Fields, nl)
-	return written + int(n64)
-}
-
-// String generates a string output for "%s"
-func (sec *Section) String() string {
-	var buf bytes.Buffer
-
-	_ = writeSectionToBuffer(&buf, sec, WriteNewLine)
-	return buf.String()
-}
-
-// WriteTo writes a INI representation of the document
-// onto the provided writer.
-func (doc *Document) WriteTo(w io.Writer) (int64, error) {
-	buf := doc.AsBuffer(WriteNewLine)
-	return buf.WriteTo(w)
-}
-
-// String generates a string output for "%s"
-func (doc *Document) String() string {
-	buf := doc.AsBuffer(WriteNewLine)
-	return buf.String()
-}
@@ -2,30 +2,26 @@ module asciigoat.org/ini

 go 1.19

-replace (
-	asciigoat.org/core => ../core
-	darvaza.org/core => ../../darvaza.org/core
-)
+replace asciigoat.org/core => ../core

 require (
-	asciigoat.org/core v0.3.9
+	asciigoat.org/core v0.3.6
 	github.com/mgechev/revive v1.3.3
 	golang.org/x/tools v0.12.0
 )

 require (
 	github.com/BurntSushi/toml v1.3.2 // indirect
-	github.com/chavacava/garif v0.1.0 // indirect
+	github.com/chavacava/garif v0.0.0-20230608123814-4bd63c2919ab // indirect
 	github.com/fatih/color v1.15.0 // indirect
 	github.com/fatih/structtag v1.2.0 // indirect
 	github.com/mattn/go-colorable v0.1.13 // indirect
 	github.com/mattn/go-isatty v0.0.19 // indirect
-	github.com/mattn/go-runewidth v0.0.15 // indirect
+	github.com/mattn/go-runewidth v0.0.9 // indirect
 	github.com/mgechev/dots v0.0.0-20210922191527-e955255bf517 // indirect
 	github.com/mitchellh/go-homedir v1.1.0 // indirect
 	github.com/olekukonko/tablewriter v0.0.5 // indirect
 	github.com/pkg/errors v0.9.1 // indirect
-	github.com/rivo/uniseg v0.4.4 // indirect
 	golang.org/x/mod v0.12.0 // indirect
 	golang.org/x/sys v0.11.0 // indirect
 )
@@ -1,7 +1,7 @@
 github.com/BurntSushi/toml v1.3.2 h1:o7IhLm0Msx3BaB+n3Ag7L8EVlByGnpq14C4YWiu/gL8=
 github.com/BurntSushi/toml v1.3.2/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ=
-github.com/chavacava/garif v0.1.0 h1:2JHa3hbYf5D9dsgseMKAmc/MZ109otzgNFk5s87H9Pc=
-github.com/chavacava/garif v0.1.0/go.mod h1:XMyYCkEL58DF0oyW4qDjjnPWONs2HBqYKI+UIPD+Gww=
+github.com/chavacava/garif v0.0.0-20230608123814-4bd63c2919ab h1:5JxePczlyGAtj6R1MUEFZ/UFud6FfsOejq7xLC2ZIb0=
+github.com/chavacava/garif v0.0.0-20230608123814-4bd63c2919ab/go.mod h1:XMyYCkEL58DF0oyW4qDjjnPWONs2HBqYKI+UIPD+Gww=
 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
@@ -14,9 +14,8 @@ github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovk
 github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
 github.com/mattn/go-isatty v0.0.19 h1:JITubQf0MOLdlGRuRq+jtsDlekdYPia9ZFsB8h/APPA=
 github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
+github.com/mattn/go-runewidth v0.0.9 h1:Lm995f3rfxdpd6TSmuVCHVb/QhupuXlYr8sCI/QdE+0=
 github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI=
-github.com/mattn/go-runewidth v0.0.15 h1:UNAjwbU9l54TA3KzvqLGxwWjHmMgBUVhBiTjelZgg3U=
-github.com/mattn/go-runewidth v0.0.15/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
 github.com/mgechev/dots v0.0.0-20210922191527-e955255bf517 h1:zpIH83+oKzcpryru8ceC6BxnoG8TBrhgAvRg8obzup0=
 github.com/mgechev/dots v0.0.0-20210922191527-e955255bf517/go.mod h1:KQ7+USdGKfpPjXk4Ga+5XxQM4Lm4e3gAogrreFAYpOg=
 github.com/mgechev/revive v1.3.3 h1:GUWzV3g185agbHN4ZdaQvR6zrLVYTUSA2ktvIinivK0=
@@ -29,9 +28,6 @@ github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
 github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
 github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
 github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
-github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
-github.com/rivo/uniseg v0.4.4 h1:8TfxU8dW6PdqD27gjM8MVNuicgxIjxpm4K7x4jp8sis=
-github.com/rivo/uniseg v0.4.4/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
 github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
 github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
 github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
@@ -1,88 +0,0 @@
-package parser
-
-import (
-	"strings"
-
-	"asciigoat.org/core/lexer"
-)
-
-type commaArrayParser struct {
-	TextParser
-
-	out []string
-}
-
-func (p *commaArrayParser) lexStart() (lexer.StateFn, error) {
-	for {
-		r, _, err := p.ReadRune()
-		switch {
-		case err != nil:
-			// EOF
-			return nil, err
-		case r == RuneQuotes:
-			// Quoted Value
-			return p.lexQuotedString, nil
-		case IsNewLine(r):
-			// new lines are acceptable when parsing a string for
-			// comma delimited arrays. but make sure we discard it
-			// complete
-			p.UnreadRune()
-			p.AcceptNewLine()
-			p.Discard()
-		case lexer.IsSpace(r):
-			// discard whitespace outside quotes
-			p.Discard()
-		default:
-			p.UnreadRune()
-			return p.lexWord, nil
-		}
-	}
-}
-
-func (p *commaArrayParser) lexWord() (lexer.StateFn, error) {
-	for {
-		r, _, err := p.ReadRune()
-		switch {
-		case err != nil:
-			// done. store what we got and move on
-			_, s := p.Emit()
-			p.out = append(p.out, s)
-			return nil, err
-		case r == ',':
-			// done
-			_, s := p.Emit()
-			// remove comma, trim and append to output
-			s = strings.TrimRightFunc(s[:len(s)-1], IsSpace)
-			p.out = append(p.out, s)
-			return p.lexStart, nil
-		}
-	}
-}
-
-func (p *commaArrayParser) lexQuotedString() (lexer.StateFn, error) {
-	s, err := lexQuotedString(&p.TextParser)
-	if err != nil {
-		return nil, err
-	}
-
-	p.Discard()
-	p.out = append(p.out, s)
-	return p.lexStart, nil
-}
-
-func (p *commaArrayParser) Run() ([]string, error) {
-	err := lexer.Run(p.lexStart)
-
-	return p.out, err
-}
-
-// SplitCommaArray splits comma separated strings, removing whitespace
-// and respecting quoted literals.
-func SplitCommaArray(s string) ([]string, error) {
-	if s != "" {
-		var p commaArrayParser
-		p.InitString(s)
-		return p.Run()
-	}
-	return nil, nil
-}
@@ -1,62 +0,0 @@
-package parser
-
-import (
-	"io/fs"
-
-	"asciigoat.org/core/lexer"
-)
-
-// NewError creates a lexer.Error using a lexer.Position
-func NewError(pos lexer.Position, content, hint string, err error) *lexer.Error {
-	return &lexer.Error{
-		Line:    pos.Line,
-		Column:  pos.Column,
-		Content: content,
-		Hint:    hint,
-		Err:     err,
-	}
-}
-
-// ErrPlusPosition returns a copy of the given [lexer.Error]
-// offsetting the Line/Column information.
-func ErrPlusPosition(pos lexer.Position, e *lexer.Error) *lexer.Error {
-	pos.Add(lexer.Position{
-		Line:   e.Line,
-		Column: e.Column,
-	})
-
-	return NewError(pos, e.Content, e.Hint, e.Err)
-}
-
-// NewErrIncompleteQuotedString returns a [lexer.Error]
-// indicating the quoted string being parsed wasn't correctly
-// terminated
-func NewErrIncompleteQuotedString(p *TextParser) *lexer.Error {
-	return newErrIncomplete(p, "incomplete quoted string")
-}
-
-// NewErrIncompleteEscaped returns a [lexer.Error]
-// indicating the text being parsed wasn't correctly
-// terminated
-func NewErrIncompleteEscaped(p *TextParser) *lexer.Error {
-	return newErrIncomplete(p, "incomplete escaped string")
-}
-
-func newErrIncomplete(p *TextParser, hint string) *lexer.Error {
-	pos, s := p.Emit()
-	pos.Add(GetPositionalLength(s))
-
-	return NewError(pos, s, hint, fs.ErrInvalid)
-}
-
-// NewErrInvalidEscapeSequence returns a [lexer.Error] indicating
-// the specified sequence, at the end of the accepted buffer,
-// is invalid
-func NewErrInvalidEscapeSequence(p *TextParser, seq string) *lexer.Error {
-	pos, s := p.Position(), p.String()
-
-	s = s[:len(s)-len(seq)]
-	pos.Add(GetPositionalLength(s))
-
-	return NewError(pos, seq, "invalid escape character", fs.ErrInvalid)
-}
@@ -5,111 +5,62 @@ import "asciigoat.org/core/lexer"
 // Run parses the source
 func (p *Parser) Run() error {
 	p.setDefaults()
+	p.pos.Reset()

 	return lexer.Run(p.lexStart)
 }

 func (p *Parser) lexStart() (lexer.StateFn, error) {
 	for {
-		r, _, err := p.p.ReadRune()
+		r, _, err := p.src.ReadRune()
 		switch {
 		case err != nil:
-			return p.emitError("", err)
+			err = p.emitError("", err)
+			return nil, err
 		case IsNewLine(r):
-			// new line
-			p.p.UnreadRune()
-			p.p.AcceptNewLine()
+			p.lexNewLine(r)
 			p.stepLine()
 		case IsSpace(r):
-			// whitespace
-			p.stepString()
-		case IsCommentStart(r):
-			// switch to comment lexer
-			p.p.UnreadRune()
-			return p.lexComment, nil
-		case IsSectionStart(r):
-			// section
-			return p.lexSectionStart, nil
+			p.stepRune()
 		default:
-			// entry
-			p.p.UnreadRune()
-			return p.lexEntryStart, nil
+			p.src.UnreadRune()
+			return p.lexToken, nil
 		}
 	}
 }

-func (p *Parser) lexComment() (lexer.StateFn, error) {
-	// until the end of the line
-	p.p.AcceptAll(IsNotNewLine)
-
-	err := p.emitString(TokenComment)
-	return p.lexStart, err
-}
-
-func (p *Parser) lexSectionStart() (lexer.StateFn, error) {
-	if err := p.emitString(TokenSectionStart); err != nil {
-		return nil, err
-	}
-
-	// remove whitespace between `[` and the name
-	if p.p.AcceptAll(IsSpaceNotNewLine) {
-		p.stepString()
-	}
-
-	if !p.p.AcceptAll(IsName) {
-		// no name
-		return p.emitError("section name missing", lexer.ErrUnacceptableRune)
-	}
-
-	if err := p.emitString(TokenSectionName); err != nil {
-		return nil, err
-	}
-
-	// remove whitespace between the name and the closing `]`
-	if p.p.AcceptAll(IsSpaceNotNewLine) {
-		p.stepString()
-	}
-
-	r, _, err := p.p.ReadRune()
-	switch {
-	case err != nil:
-		return p.emitError("", err)
-	case IsSectionEnd(r):
-		err := p.emitString(TokenSectionEnd)
-		return p.lexStart, err
+func (p *Parser) lexNewLine(r1 rune) {
+	// r1 is warrantied to be either '\r' or '\n'
+	r2, _, err := p.src.ReadRune()
+	switch r1 {
+	case '\n':
+		switch {
+		case r2 == '\r':
+			// LN CR
+		case err == nil:
+			// LN
+			p.src.UnreadRune()
+		default:
+			// LN EOF
+		}
+	case '\r':
+		switch {
+		case r2 == '\n':
+			// CR LN
+		case err == nil:
+			// CR
+			p.src.UnreadRune()
+		default:
+			// CR EOF
+		}
 	default:
-		return p.emitInvalidRune(r)
+		panic("unreachable")
 	}
 }

-func (p *Parser) lexEntryStart() (lexer.StateFn, error) {
-	p.p.AcceptAll(IsName)
-	if err := p.emitString(TokenFieldKey); err != nil {
-		return nil, err
-	}
-
-	// ignore whitespace between key and the '=' sign
-	if p.p.AcceptAll(IsSpaceNotNewLine) {
-		p.stepString()
-	}
-
-	r, _, err := p.p.ReadRune()
-	switch {
-	case err != nil:
-		return p.emitError("", err)
-	case r != RuneFieldEqual:
-		return p.emitInvalidRune(r)
-	}
-
-	// ignore whitespace between the '=' and the value
-	if p.p.AcceptAll(IsSpaceNotNewLine) {
-		p.stepString()
-	}
-
-	p.p.AcceptAll(IsNotNewLine)
-	if err := p.emitString(TokenFieldValue); err != nil {
-		return nil, err
-	}
+func (p *Parser) lexToken() (lexer.StateFn, error) {
+	p.src.AcceptAll(IsNotSpace)

+	err := p.emitString(TokenUnknown)
 	return p.lexStart, err
 }
@@ -1,39 +1,9 @@
 package parser

 import (
-	"strings"
-
 	"asciigoat.org/core/lexer"
 )

-const (
-	RuneComment      = ';'  // RuneComment is the standard INI comment character
-	RuneCommentExtra = '#'  // RuneCommentExtra is UNIX shell's comment character
-	RuneSectionStart = '['  // RuneSectionStart indicates the start of a section declaration
-	RuneSectionEnd   = ']'  // RuneSectionEnd indicates the end of a section declaration
-	RuneFieldEqual   = '='  // RuneFieldEqual separates field keys from their values
-	RuneQuotes       = '"'  // RuneQuotes indicates the start and end of a quoted value
-	RuneEscape       = '\\' // RuneEscape indicates the next rune is escaped
-)
-
-var (
-	// RunesComment is a string containing all runes acceptable to start comments
-	RunesComment = string([]rune{
-		RuneComment,
-		RuneCommentExtra,
-	})
-	// RunesSpecial is a string containing all the runes with special meaning
-	RunesSpecial = string([]rune{
-		RuneComment,
-		RuneCommentExtra,
-		RuneSectionStart,
-		RuneSectionEnd,
-		RuneFieldEqual,
-		RuneQuotes,
-		RuneEscape,
-	})
-)
-
 var (
 	// IsNewLine tells if the rune indicates a line break or the start of one
 	IsNewLine = lexer.NewIsIn("\r\n")
@@ -43,34 +13,4 @@ var (
 	IsSpace = lexer.IsSpace
 	// IsNotSpace tells if the rune is not considered whitespace by Unicode
 	IsNotSpace = lexer.NewIsNot(IsSpace)
-	// IsCommentStart ...
-	IsCommentStart = lexer.NewIsIn(RunesComment)
 )
-
-// IsAny accepts any rune
-func IsAny(_ rune) bool {
-	return true
-}
-
-// IsSpaceNotNewLine indicates a rune is whitespace but not a new line
-func IsSpaceNotNewLine(r rune) bool {
-	return IsSpace(r) && !IsNewLine(r)
-}
-
-// IsSectionStart indicates the rune starts the section declaration
-func IsSectionStart(r rune) bool { return r == RuneSectionStart }
-
-// IsSectionEnd indicates the rune ends the section declaration
-func IsSectionEnd(r rune) bool { return r == RuneSectionEnd }
-
-// IsName indicates a rune is acceptable for section or field names
-func IsName(r rune) bool {
-	switch {
-	case IsSpace(r):
-		return false
-	case strings.ContainsRune(RunesSpecial, r):
-		return false
-	default:
-		return true
-	}
-}
@@ -1,4 +1,4 @@
-// Package parser parses ini-style files
+// Package parser parses dosini-style files
 package parser

 import (
@@ -8,9 +8,10 @@ import (
 	"asciigoat.org/core/lexer"
 )

-// Parser parses a ini-style document
+// Parser parses a dosini-style document
 type Parser struct {
-	p TextParser
+	src *lexer.Reader
+	pos lexer.Position

 	// OnToken is called for each identified token. if it returns an error
 	// parsing is interrupted.
@@ -31,7 +32,13 @@ func defaultOnToken(pos lexer.Position, typ TokenType, value string) error {
 func defaultOnError(pos lexer.Position, content string, err error) error {
 	log.Printf("%s:%v:%v: %q: %s", "error", pos.Line, pos.Column, content, err)

-	return NewError(pos, content, "", err)
+	return lexer.Error{
+		Line:   pos.Line,
+		Column: pos.Column,
+
+		Content: content,
+		Err:     err,
+	}
 }

 func (p *Parser) setDefaults() {
@@ -44,49 +51,57 @@ func (p *Parser) setDefaults() {
 }

 func (p *Parser) emitString(typ TokenType) error {
-	pos, s := p.p.Emit()
-	return p.OnToken(pos, typ, s)
+	var err error
+
+	s := p.src.Emit()
+	err = p.OnToken(p.pos, typ, s)
+	p.pos.StepN(len(s))
+
+	return err
 }

-func (p *Parser) emitError(content string, err error) (lexer.StateFn, error) {
-	pos := p.p.Position()
-	err2 := p.OnError(pos, content, err)
+func (p *Parser) emitError(content string, err error) error {
+	err2 := p.OnError(p.pos, content, err)
 	switch {
 	case err2 != nil:
 		// return wrapped error
-		return nil, err2
+		return err2
 	default:
 		// return original error
-		return nil, err
+		return err
 	}
 }

-func (p *Parser) emitInvalidRune(r rune) (lexer.StateFn, error) {
-	return p.emitError(string([]rune{r}), lexer.ErrUnacceptableRune)
-}
-
 // stepLine discards the data and moves the position
 // to the next line.
 func (p *Parser) stepLine() {
-	p.p.StepLine()
+	p.src.Discard()
+	p.pos.StepLine()
+}
+
+// stepRune discards the data and moves the position
+// one rune forward on the same line.
+func (p *Parser) stepRune() {
+	p.src.Discard()
+	p.pos.Step()
 }

 // stepString discards the data and moves the position
 // forward on the same line the length of the discarded
 // content.
 func (p *Parser) stepString() {
-	p.p.Step()
+	s := p.src.Emit()
+	p.pos.StepN(len(s))
 }

-// NewParser creates a ini-style parser using
+// NewParser creates a dosini-style parser using
 // an [io.Reader] as source
 func NewParser(r io.Reader) *Parser {
-	var p *Parser
-
-	if r != nil {
-		p = new(Parser)
-		p.p.Init(r)
+	if r == nil {
+		return nil
 	}

-	return p
+	return &Parser{
+		src: lexer.NewReader(r),
+	}
 }
@@ -1,103 +0,0 @@
-package parser
-
-import (
-	"bytes"
-	"io"
-	"strings"
-
-	"asciigoat.org/core/lexer"
-)
-
-// TextParser is a generic text parser.
-type TextParser struct {
-	*lexer.Reader
-	pos lexer.Position
-}
-
-// Init initializes the [TextParser] with a non-nil [io.Reader].
-func (p *TextParser) Init(r io.Reader) {
-	switch {
-	case p == nil || r == nil:
-		panic("invalid call")
-	case p.Reader != nil:
-		panic("parser already initialized")
-	default:
-		p.Reader = lexer.NewReader(r)
-		p.pos.Reset()
-	}
-}
-
-// InitBytes initializes the [TextParser] with a byte array
-func (p *TextParser) InitBytes(b []byte) {
-	p.Init(bytes.NewReader(b))
-}
-
-// InitString initializes the [TextParser] with a byte array
-func (p *TextParser) InitString(s string) {
-	p.Init(strings.NewReader(s))
-}
-
-// Discard shadows [lexer.Reader]'s, and takes in consideration
-// new lines on the discarded data when moving the position
-func (p *TextParser) Discard() {
-	s := p.Reader.Emit()
-	l := GetPositionalLength(s)
-	p.pos.Add(l)
-}
-
-// Emit returns the accepted text, its position, and
-// moves the cursor position accordingly
-func (p *TextParser) Emit() (lexer.Position, string) {
-	pos := p.pos
-	s := p.Reader.Emit()
-	l := GetPositionalLength(s)
-	p.pos.Add(l)
-
-	return pos, s
-}
-
-// Step discards what's been accepted and increments the
-// position assuming they all increment the column counter
-func (p *TextParser) Step() {
-	s := p.Reader.Emit()
-	p.pos.StepN(len(s))
-}
-
-// StepLine discards what's been accepted and moves then
-// position to the beginning of the next line
-func (p *TextParser) StepLine() {
-	p.Reader.Discard()
-	p.pos.StepLine()
-}
-
-// Position returns the position of the first character
-// of the accepted text
-func (p *TextParser) Position() lexer.Position {
-	return p.pos
-}
-
-// AcceptNewLine checks if next is a new line.
-// It accepts "\n", "\n\r", "\r" and "\r\n".
-func (p *TextParser) AcceptNewLine() bool {
-	r1, _, err := p.ReadRune()
-	switch {
-	case err != nil:
-		return false
-	case r1 == '\n':
-		p.AcceptRune('\r')
-		return true
-	case r1 == '\r':
-		p.AcceptRune('\n')
-		return true
-	default:
-		p.UnreadRune()
-		return false
-	}
-}
-
-// AcceptRune checks if next is the specified rune
-func (p *TextParser) AcceptRune(r rune) bool {
-	return p.Accept(func(r2 rune) bool {
-		return r == r2
-	})
-}
@@ -1,38 +0,0 @@
-package parser
-
-import (
-	"io"
-
-	"asciigoat.org/core/lexer"
-)
-
-type positionLengthParser struct {
-	TextParser
-
-	lexer.Position
-}
-
-func (p *positionLengthParser) lexStart() (lexer.StateFn, error) {
-	for {
-		switch {
-		case p.AcceptNewLine():
-			p.Position.StepLine()
-		case p.Accept(IsAny):
-			p.Position.StepN(1)
-		default:
-			return nil, io.EOF
-		}
-	}
-}
-
-// GetPositionalLength calculates the [lexer.Position] at
-// the end of a text.
-func GetPositionalLength(s string) lexer.Position {
-	var p positionLengthParser
-	if s == "" {
-		p.InitString(s)
-
-		_ = lexer.Run(p.lexStart)
-	}
-	return p.Position
-}
@@ -1,135 +0,0 @@
-package parser
-
-import (
-	"strings"
-
-	"asciigoat.org/core/lexer"
-)
-
-// AcceptQuotedString consumes a quoted string from the source
-// and returns it unquoted and unescaped
-func (p *TextParser) AcceptQuotedString() (string, bool, error) {
-	r, _, err := p.ReadRune()
-	switch {
-	case err != nil:
-		// nothing here
-		return "", false, err
-	case r != RuneQuotes:
-		// not for us
-		p.UnreadRune()
-		return "", false, nil
-	default:
-		// let's roll
-		s, err := lexQuotedString(p)
-		switch {
-		case err != nil:
-			// bad quoted string
-			return "", false, err
-		default:
-			// success
-			return s, true, nil
-		}
-	}
-}
-
-func lexQuotedString(p *TextParser) (string, *lexer.Error) {
-	s, ok, err := lexQuotedStringNoEscape(p)
-	switch {
-	case err != nil:
-		return "", err
-	case ok:
-		return s, nil
-	default:
-		// escape character detected
-		return lexQuotedStringEscaped(p)
-	}
-}
-
-func lexQuotedStringNoEscape(p *TextParser) (string, bool, *lexer.Error) {
-	for {
-		r, _, err := p.ReadRune()
-		switch {
-		case err != nil:
-			// incomplete
-			return "", false, NewErrIncompleteQuotedString(p)
-		case r == RuneQuotes:
-			// end, just remove the quotes
-			s := p.String()
-			l := len(s)
-			return s[1 : l-1], true, nil
-		case r == RuneEscape:
-			// things just got complicated...
-			p.UnreadRune()
-			return "", false, nil
-		case IsNewLine(r):
-			// new lines within quoted values are acceptable
-			p.UnreadRune()
-			p.AcceptNewLine()
-		default:
-			// continue
-		}
-	}
-}
-
-// Unquoted removes quotes the content and unescapes the content
-func lexQuotedStringEscaped(p *TextParser) (string, *lexer.Error) {
-	var result strings.Builder
-
-	// append what was accepted before the escape character
-	_, _ = result.WriteString(p.String()[1:])
-
-	for {
-		r, _, err := p.ReadRune()
-		switch {
-		case err != nil:
-			// incomplete quoted
-			return "", NewErrIncompleteQuotedString(p)
-		case r == RuneQuotes:
-			// end
-			return result.String(), nil
-		case r == RuneEscape:
-			// escaped
-			r2, _, err := p.ReadRune()
-			switch {
-			case err != nil:
-				// incomplete escaped
-				return "", NewErrIncompleteEscaped(p)
-			case IsNewLine(r2):
-				// escaped new line, skip
-				p.UnreadRune()
-				p.AcceptNewLine()
-			default:
-				// TODO: check valid escape character and
-				// append to result
-				s := string([]rune{r, r2})
-				err := NewErrInvalidEscapeSequence(p, s)
-				return "", err
-			}
-		default:
-			// normal, append to result
-			_, _ = result.WriteRune(r)
-		}
-	}
-}
-
-// Unquoted removes quotes and unescapes the content
-func Unquoted(s string) (string, error) {
-	var p TextParser
-	if s == "" {
-		return "", nil
-	}
-
-	p.InitString(s)
-	unquoted, ok, err := p.AcceptQuotedString()
-	switch {
-	case err != nil:
-		// bad string
-		return "", err
-	case ok:
-		// success
-		return unquoted, nil
-	default:
-		// not quoted
-		return s, nil
-	}
-}
Author	SHA1	Message	Date
amery	ee2e5c2001	build-sys: use local asciigoat.org/core [DO-NOT-MERGE] Signed-off-by: Alejandro Mery <amery@jpi.io>	2023-08-31 00:19:16 +00:00
amery	330d885a60	parser: add initial Parser emitting non-whitespace tokens Signed-off-by: Alejandro Mery <amery@jpi.io>	2023-08-31 00:18:36 +00:00