Merge pull request 'README: add initial description of the package' (#3 )

Reviewed-on: #3
README: add initial description of the package
2023-09-01 15:02:34 +02:00 · 2023-09-01 12:57:55 +00:00 · 2023-08-31 16:17:11 +02:00 · 2023-08-31 01:55:47 +00:00 · 2023-08-31 00:45:54 +00:00
6 changed files with 195 additions and 17 deletions
@@ -0,0 +1,47 @@
+# asciigoat's INI parser
+
+[![Go Reference][godoc-badge]][godoc]
+[![Go Report Card][goreport-badge]][goreport]
+
+`asciigoat.org/ini` is a simple Go library that very loosly parses
+[`INI`-style][wikipedia-dosini] documents allowing the implementation
+of stricter parsers of similar form.
+
+**asciigoat** is [MIT](https://opensource.org/license/mit/) licensed.
+
+[godoc]: https://pkg.go.dev/asciigoat.org/ini
+[godoc-badge]: https://pkg.go.dev/badge/asciigoat.org/ini.svg
+[goreport]: https://goreportcard.com/report/asciigoat.org/ini
+[goreport-badge]: https://goreportcard.com/badge/asciigoat.org/ini
+
+[godoc-lexer]: https://pkg.go.dev/asciigoat.org/core/lexer
+[godoc-parser-parser]: https://pkg.go.dev/asciigoat.org/ini/parser#Parser
+
+[wikipedia-dosini]: https://en.wikipedia.org/wiki/INI_file
+
+## Parser
+
+[`parser.Parser`][godoc-parser-parser] uses
+[`asciigoat`'s lexer][godoc-lexer] to process an `INI`-style document
+emiting tokens and errors via callbacks.
+
+## Other Implementations
+
+Other implementations exist, and they are mature and feature-rich, but they
+are highly opinionated about what's a valid file. Built around maps they don't
+allow repeating names and constraint what characters can be used.
+
+These are great when you can adapt, or already agree, to their conditions but
+that's not always the case when you are parsing configuration files from
+other applications and that's what [asciigoat.org/ini][godoc] attempts to solve.
+
+* [gcfg](https://pkg.go.dev/gopkg.in/gcfg.v1)
+* [unknwon's go-ini](https://github.com/go-ini/ini)
+* [wlevene's GoINI](https://github.com/wlevene/ini)
+
+## See also
+
+* [asciigoat.org/core](https://asciigoat.org/core)
+* [oss.jpi.io](https://oss.jpi.io)
+* [INI file][wikipedia-dosini] (_wikipedia_)
+* [TOML](https://www.kelche.co/blog/go/toml/)
@@ -2,8 +2,6 @@ module asciigoat.org/ini

 go 1.19

-replace asciigoat.org/core => ../core
-
 require (
 	asciigoat.org/core v0.3.6
 	github.com/mgechev/revive v1.3.3
@@ -1,3 +1,5 @@
+asciigoat.org/core v0.3.6 h1:b1vL090OxylmSOwLQryjrmC8FhhCtktMyeJSy1e6LwI=
+asciigoat.org/core v0.3.6/go.mod h1:tXj+JUutxRbcO40ZQRuUVaZ4rnYz1kAZ0nblisV8u74=
 github.com/BurntSushi/toml v1.3.2 h1:o7IhLm0Msx3BaB+n3Ag7L8EVlByGnpq14C4YWiu/gL8=
 github.com/BurntSushi/toml v1.3.2/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ=
 github.com/chavacava/garif v0.0.0-20230608123814-4bd63c2919ab h1:5JxePczlyGAtj6R1MUEFZ/UFud6FfsOejq7xLC2ZIb0=
@@ -15,21 +15,30 @@ func (p *Parser) lexStart() (lexer.StateFn, error) {
 		r, _, err := p.src.ReadRune()
 		switch {
 		case err != nil:
-			err = p.emitError("", err)
-			return nil, err
+			return p.emitError("", err)
 		case IsNewLine(r):
-			p.lexNewLine(r)
+			// new line
+			p.lexMoreNewLine(r)
 			p.stepLine()
 		case IsSpace(r):
+			// whitespace
 			p.stepRune()
-		default:
+		case IsCommentStart(r):
+			// switch to comment lexer
 			p.src.UnreadRune()
-			return p.lexToken, nil
+			return p.lexComment, nil
+		case IsSectionStart(r):
+			// section
+			return p.lexSectionStart, nil
+		default:
+			// entry
+			p.src.UnreadRune()
+			return p.lexEntryStart, nil
 		}
 	}
 }

-func (p *Parser) lexNewLine(r1 rune) {
+func (p *Parser) lexMoreNewLine(r1 rune) {
 	// r1 is warrantied to be either '\r' or '\n'
 	r2, _, err := p.src.ReadRune()
 	switch r1 {
@@ -58,9 +67,78 @@ func (p *Parser) lexNewLine(r1 rune) {
 	}
 }

-func (p *Parser) lexToken() (lexer.StateFn, error) {
-	p.src.AcceptAll(IsNotSpace)
+func (p *Parser) lexComment() (lexer.StateFn, error) {
+	// until the end of the line
+	p.src.AcceptAll(IsNotNewLine)
+
+	err := p.emitString(TokenComment)
+	return p.lexStart, err
+}
+
+func (p *Parser) lexSectionStart() (lexer.StateFn, error) {
+	if err := p.emitString(TokenSectionStart); err != nil {
+		return nil, err
+	}
+
+	// remove whitespace between `[` and the name
+	if p.src.AcceptAll(IsSpaceNotNewLine) {
+		p.stepString()
+	}
+
+	if !p.src.AcceptAll(IsName) {
+		// no name
+		return p.emitError("section name missing", lexer.ErrUnacceptableRune)
+	}
+
+	if err := p.emitString(TokenSectionName); err != nil {
+		return nil, err
+	}
+
+	// remove whitespace between the name andthe closing `]`
+	if p.src.AcceptAll(IsSpaceNotNewLine) {
+		p.stepString()
+	}
+
+	r, _, err := p.src.ReadRune()
+	switch {
+	case err != nil:
+		return p.emitError("", err)
+	case IsSectionEnd(r):
+		err := p.emitString(TokenSectionEnd)
+		return p.lexStart, err
+	default:
+		return p.emitInvalidRune(r)
+	}
+}
+
+func (p *Parser) lexEntryStart() (lexer.StateFn, error) {
+	p.src.AcceptAll(IsName)
+	if err := p.emitString(TokenFieldKey); err != nil {
+		return nil, err
+	}
+
+	// ignore whitespace between key and the '=' sign
+	if p.src.AcceptAll(IsSpaceNotNewLine) {
+		p.stepString()
+	}
+
+	r, _, err := p.src.ReadRune()
+	switch {
+	case err != nil:
+		return p.emitError("", err)
+	case r != RuneFieldEqual:
+		return p.emitInvalidRune(r)
+	}
+
+	// ignore whitespace between the '=' and the value
+	if p.src.AcceptAll(IsSpaceNotNewLine) {
+		p.stepString()
+	}
+
+	p.src.AcceptAll(IsNotNewLine)
+	if err := p.emitString(TokenFieldValue); err != nil {
+		return nil, err
+	}

-	err := p.emitString(TokenUnknown)
 	return p.lexStart, err
 }
@@ -1,9 +1,35 @@
 package parser

 import (
+	"strings"
+
 	"asciigoat.org/core/lexer"
 )

+const (
+	RuneComment      = ';' // RuneComment is the standard dosini comment character
+	RuneCommentExtra = '#' // RuneCommentExtra is UNIX shell's comment character
+	RuneSectionStart = '[' // RuneSectionStart indicates the start of a section declaration
+	RuneSectionEnd   = ']' // RuneSectionEnd indiciates the end of a section declaration
+	RuneFieldEqual   = '=' // RuneFieldEqual separates field keys from their values
+)
+
+var (
+	// RunesComment is a string containing all runes acceptable to start comments
+	RunesComment = string([]rune{
+		RuneComment,
+		RuneCommentExtra,
+	})
+	// RunesSpecial is a string containing all the runes with special meaning
+	RunesSpecial = string([]rune{
+		RuneComment,
+		RuneCommentExtra,
+		RuneSectionStart,
+		RuneSectionEnd,
+		RuneFieldEqual,
+	})
+)
+
 var (
 	// IsNewLine tells if the rune indicates a line break or the start of one
 	IsNewLine = lexer.NewIsIn("\r\n")
@@ -13,4 +39,29 @@ var (
 	IsSpace = lexer.IsSpace
 	// IsNotSpace tells if the rune is not considered whitespace by Unicode
 	IsNotSpace = lexer.NewIsNot(IsSpace)
+	// IsCommentStart ...
+	IsCommentStart = lexer.NewIsIn(RunesComment)
 )
+
+// IsSpaceNotNewLine indicates a rune is whitespace but not a new line
+func IsSpaceNotNewLine(r rune) bool {
+	return IsSpace(r) && !IsNewLine(r)
+}
+
+// IsSectionStart indicates the rune starts the section declaration
+func IsSectionStart(r rune) bool { return r == RuneSectionStart }
+
+// IsSectionEnd indicates the rune ends the section declaration
+func IsSectionEnd(r rune) bool { return r == RuneSectionEnd }
+
+// IsName indicates a rune is acceptable for section or field names
+func IsName(r rune) bool {
+	switch {
+	case IsSpace(r):
+		return false
+	case strings.ContainsRune(RunesSpecial, r):
+		return false
+	default:
+		return true
+	}
+}
@@ -51,27 +51,29 @@ func (p *Parser) setDefaults() {
 }

 func (p *Parser) emitString(typ TokenType) error {
-	var err error
-
 	s := p.src.Emit()
-	err = p.OnToken(p.pos, typ, s)
+	err := p.OnToken(p.pos, typ, s)
 	p.pos.StepN(len(s))

 	return err
 }

-func (p *Parser) emitError(content string, err error) error {
+func (p *Parser) emitError(content string, err error) (lexer.StateFn, error) {
 	err2 := p.OnError(p.pos, content, err)
 	switch {
 	case err2 != nil:
 		// return wrapped error
-		return err2
+		return nil, err2
 	default:
 		// return original error
-		return err
+		return nil, err
 	}
 }

+func (p *Parser) emitInvalidRune(r rune) (lexer.StateFn, error) {
+	return p.emitError(string([]rune{r}), lexer.ErrUnacceptableRune)
+}
+
 // stepLine discards the data and moves the position
 // to the next line.
 func (p *Parser) stepLine() {
Author	SHA1	Message	Date
amery	cf100578c0	Merge pull request 'README: add initial description of the package' (#3 ) Reviewed-on: #3	2023-09-01 15:02:34 +02:00
amery	2eacc65215	README: add initial description of the package Signed-off-by: Alejandro Mery <amery@jpi.io>	2023-09-01 12:57:55 +00:00
amery	169379f5c6	Merge pull request 'parser: implement basic dosini parsing' (#2 ) Reviewed-on: #2	2023-08-31 16:17:11 +02:00
amery	eb36c195c0	parser: implement basic dosini parsing Signed-off-by: Alejandro Mery <amery@jpi.io>	2023-08-31 01:55:47 +00:00
amery	1090a374f0	parser: add initial Parser emitting non-whitespace tokens Signed-off-by: Alejandro Mery <amery@jpi.io>	2023-08-31 00:45:54 +00:00