5 Commits

Author SHA1 Message Date
amery 62328d9e43 build-sys: use local asciigoat.org/core [DO-NOT-MERGE]
Signed-off-by: Alejandro Mery <amery@jpi.io>
2023-08-30 20:54:44 +00:00
amery 5be0785a55 parser: implement initial tokeniser
only logging position, errors and non-whitespace elements

Signed-off-by: Alejandro Mery <amery@jpi.io>
2023-08-30 20:54:21 +00:00
amery 35b9d56b3d parser: add internal []Token queue to the Parser
Signed-off-by: Alejandro Mery <amery@jpi.io>
2023-08-30 20:54:21 +00:00
amery 604ecfaed2 parser: introduce Token and TokenType enum
Signed-off-by: Alejandro Mery <amery@jpi.io>
2023-08-30 20:54:21 +00:00
amery 5288cd4537 parser: add placeholder for ini Parser
Signed-off-by: Alejandro Mery <amery@jpi.io>
2023-08-30 20:47:12 +00:00
10 changed files with 156 additions and 286 deletions
-47
View File
@@ -1,47 +0,0 @@
# asciigoat's INI parser
[![Go Reference][godoc-badge]][godoc]
[![Go Report Card][goreport-badge]][goreport]
`asciigoat.org/ini` is a simple Go library that very loosly parses
[`INI`-style][wikipedia-dosini] documents allowing the implementation
of stricter parsers of similar form.
**asciigoat** is [MIT](https://opensource.org/license/mit/) licensed.
[godoc]: https://pkg.go.dev/asciigoat.org/ini
[godoc-badge]: https://pkg.go.dev/badge/asciigoat.org/ini.svg
[goreport]: https://goreportcard.com/report/asciigoat.org/ini
[goreport-badge]: https://goreportcard.com/badge/asciigoat.org/ini
[godoc-lexer]: https://pkg.go.dev/asciigoat.org/core/lexer
[godoc-parser-parser]: https://pkg.go.dev/asciigoat.org/ini/parser#Parser
[wikipedia-dosini]: https://en.wikipedia.org/wiki/INI_file
## Parser
[`parser.Parser`][godoc-parser-parser] uses
[`asciigoat`'s lexer][godoc-lexer] to process an `INI`-style document
emiting tokens and errors via callbacks.
## Other Implementations
Other implementations exist, and they are mature and feature-rich, but they
are highly opinionated about what's a valid file. Built around maps they don't
allow repeating names and constraint what characters can be used.
These are great when you can adapt, or already agree, to their conditions but
that's not always the case when you are parsing configuration files from
other applications and that's what [asciigoat.org/ini][godoc] attempts to solve.
* [gcfg](https://pkg.go.dev/gopkg.in/gcfg.v1)
* [unknwon's go-ini](https://github.com/go-ini/ini)
* [wlevene's GoINI](https://github.com/wlevene/ini)
## See also
* [asciigoat.org/core](https://asciigoat.org/core)
* [oss.jpi.io](https://oss.jpi.io)
* [INI file][wikipedia-dosini] (_wikipedia_)
* [TOML](https://www.kelche.co/blog/go/toml/)
+2
View File
@@ -2,6 +2,8 @@ module asciigoat.org/ini
go 1.19
replace asciigoat.org/core => ../core
require (
asciigoat.org/core v0.3.6
github.com/mgechev/revive v1.3.3
-2
View File
@@ -1,5 +1,3 @@
asciigoat.org/core v0.3.6 h1:b1vL090OxylmSOwLQryjrmC8FhhCtktMyeJSy1e6LwI=
asciigoat.org/core v0.3.6/go.mod h1:tXj+JUutxRbcO40ZQRuUVaZ4rnYz1kAZ0nblisV8u74=
github.com/BurntSushi/toml v1.3.2 h1:o7IhLm0Msx3BaB+n3Ag7L8EVlByGnpq14C4YWiu/gL8=
github.com/BurntSushi/toml v1.3.2/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ=
github.com/chavacava/garif v0.0.0-20230608123814-4bd63c2919ab h1:5JxePczlyGAtj6R1MUEFZ/UFud6FfsOejq7xLC2ZIb0=
+28 -99
View File
@@ -1,6 +1,8 @@
package parser
import "asciigoat.org/core/lexer"
import (
"asciigoat.org/core/lexer"
)
// Run parses the source
func (p *Parser) Run() error {
@@ -15,43 +17,36 @@ func (p *Parser) lexStart() (lexer.StateFn, error) {
r, _, err := p.src.ReadRune()
switch {
case err != nil:
// read error
return p.emitError("", err)
case IsNewLine(r):
// new line
p.lexMoreNewLine(r)
p.lexNewLine(r)
p.stepLine()
case IsSpace(r):
// whitespace
p.stepRune()
case IsCommentStart(r):
// switch to comment lexer
p.src.UnreadRune()
return p.lexComment, nil
case IsSectionStart(r):
// section
return p.lexSectionStart, nil
default:
// entry
// token
p.src.UnreadRune()
return p.lexEntryStart, nil
return p.lexToken, nil
}
}
}
func (p *Parser) lexMoreNewLine(r1 rune) {
// r1 is warrantied to be either '\r' or '\n'
func (p *Parser) lexToken() (lexer.StateFn, error) {
p.src.AcceptAll(IsNotSpace)
p.pushString(TokenUnknown)
return p.lexStart, nil
}
func (p *Parser) lexNewLine(r1 rune) {
// r1 is warrantied to be either \n or \r
r2, _, err := p.src.ReadRune()
switch r1 {
case '\n':
switch {
case r2 == '\r':
// LN CR
case err == nil:
// LN
p.src.UnreadRune()
default:
// LN EOF
}
case '\r':
switch {
case r2 == '\n':
@@ -62,83 +57,17 @@ func (p *Parser) lexMoreNewLine(r1 rune) {
default:
// CR EOF
}
case '\n':
switch {
case r2 == '\r':
// LN CR
case err == nil:
// LN
p.src.UnreadRune()
default:
// LN EOF
}
default:
panic("unreachable")
}
}
func (p *Parser) lexComment() (lexer.StateFn, error) {
// until the end of the line
p.src.AcceptAll(IsNotNewLine)
err := p.emitString(TokenComment)
return p.lexStart, err
}
func (p *Parser) lexSectionStart() (lexer.StateFn, error) {
if err := p.emitString(TokenSectionStart); err != nil {
return nil, err
}
// remove whitespace between `[` and the name
if p.src.AcceptAll(IsSpaceNotNewLine) {
p.stepString()
}
if !p.src.AcceptAll(IsName) {
// no name
return p.emitError("section name missing", lexer.ErrUnacceptableRune)
}
if err := p.emitString(TokenSectionName); err != nil {
return nil, err
}
// remove whitespace between the name andthe closing `]`
if p.src.AcceptAll(IsSpaceNotNewLine) {
p.stepString()
}
r, _, err := p.src.ReadRune()
switch {
case err != nil:
return p.emitError("", err)
case IsSectionEnd(r):
err := p.emitString(TokenSectionEnd)
return p.lexStart, err
default:
return p.emitInvalidRune(r)
}
}
func (p *Parser) lexEntryStart() (lexer.StateFn, error) {
p.src.AcceptAll(IsName)
if err := p.emitString(TokenFieldKey); err != nil {
return nil, err
}
// ignore whitespace between key and the '=' sign
if p.src.AcceptAll(IsSpaceNotNewLine) {
p.stepString()
}
r, _, err := p.src.ReadRune()
switch {
case err != nil:
return p.emitError("", err)
case r != RuneFieldEqual:
return p.emitInvalidRune(r)
}
// ignore whitespace between the '=' and the value
if p.src.AcceptAll(IsSpaceNotNewLine) {
p.stepString()
}
p.src.AcceptAll(IsNotNewLine)
if err := p.emitString(TokenFieldValue); err != nil {
return nil, err
}
return p.lexStart, err
}
+15
View File
@@ -0,0 +1,15 @@
package parser
import "asciigoat.org/core/lexer"
func (p *Parser) emitError(content string, err error) (lexer.StateFn, error) {
err2 := p.OnError(p.pos, content, err)
switch {
case err2 != nil:
// return wrapped error
return nil, err2
default:
// return original error
return nil, err
}
}
+44
View File
@@ -0,0 +1,44 @@
package parser
import "log"
func (p *Parser) push(tok Token) {
n := len(p.queue)
p.queue = append(p.queue, tok)
log.Printf("queue[%v]: %s", n, tok)
}
func (p *Parser) pushString(typ TokenType) {
s := p.src.Emit()
el := Token{
Type: typ,
Value: s,
Position: p.pos,
}
p.pos.StepN(len(s))
p.push(el)
}
// stepLine discards the data and moves the position
// to the next line
func (p *Parser) stepLine() {
p.src.Discard()
p.pos.StepLine()
}
// stepRune discards the data and moves the position
// on rune forward on the same line
func (p *Parser) stepRune() {
p.src.Discard()
p.pos.Step()
}
func (p *Parser) stepString() string {
s := p.src.Emit()
p.pos.StepN(len(s))
return s
}
+7 -55
View File
@@ -1,67 +1,19 @@
package parser
import (
"strings"
"asciigoat.org/core/lexer"
)
const (
RuneComment = ';' // RuneComment is the standard dosini comment character
RuneCommentExtra = '#' // RuneCommentExtra is UNIX shell's comment character
RuneSectionStart = '[' // RuneSectionStart indicates the start of a section declaration
RuneSectionEnd = ']' // RuneSectionEnd indiciates the end of a section declaration
RuneFieldEqual = '=' // RuneFieldEqual separates field keys from their values
)
import "asciigoat.org/core/lexer"
var (
// RunesComment is a string containing all runes acceptable to start comments
RunesComment = string([]rune{
RuneComment,
RuneCommentExtra,
})
// RunesSpecial is a string containing all the runes with special meaning
RunesSpecial = string([]rune{
RuneComment,
RuneCommentExtra,
RuneSectionStart,
RuneSectionEnd,
RuneFieldEqual,
})
)
var (
// IsNewLine tells if the rune indicates a line break or the start of one
IsNewLine = lexer.NewIsIn("\r\n")
// IsNotNewLine tells if the rune is not a line break nor the start of one
IsNotNewLine = lexer.NewIsNot(IsNewLine)
// IsSpace tells if the rune is considered whitespace by Unicode
// IsNewLine tells if a rune represents a line break or the start of one
IsNewLine = lexer.NewIsIn("\n\r")
// IsSpace tells if a rune is considered whitespace by unicode
IsSpace = lexer.IsSpace
// IsNotSpace tells if the rune is not considered whitespace by Unicode
// IsNotNewLine tells if a rune is anything other than line breaks
IsNotNewLine = lexer.NewIsNot(IsNewLine)
// IsNotSpace tells if a rune is anything other than whitespace
IsNotSpace = lexer.NewIsNot(IsSpace)
// IsCommentStart ...
IsCommentStart = lexer.NewIsIn(RunesComment)
)
// IsSpaceNotNewLine indicates a rune is whitespace but not a new line
func IsSpaceNotNewLine(r rune) bool {
return IsSpace(r) && !IsNewLine(r)
}
// IsSectionStart indicates the rune starts the section declaration
func IsSectionStart(r rune) bool { return r == RuneSectionStart }
// IsSectionEnd indicates the rune ends the section declaration
func IsSectionEnd(r rune) bool { return r == RuneSectionEnd }
// IsName indicates a rune is acceptable for section or field names
func IsName(r rune) bool {
switch {
case IsSpace(r):
return false
case strings.ContainsRune(RunesSpecial, r):
return false
default:
return true
}
}
+36 -67
View File
@@ -3,7 +3,6 @@ package parser
import (
"io"
"log"
"asciigoat.org/core/lexer"
)
@@ -11,91 +10,61 @@ import (
// Parser parses a dosini-style document
type Parser struct {
src *lexer.Reader
pos lexer.Position
// OnToken is called for each identified token. if it returns an error
// parsing is interrupted.
OnToken func(pos lexer.Position, typ TokenType, value string) error
pos lexer.Position
queue []Token
// OnError is called in case of a parsing error, and it's allowed
// to replace the error returned by [Parser.Run].
// OnError is called for io.EOF, but [Parser.Run] will consider it
// normal termination.
// OnSection is called after a [section] is parsed.
// Returning an error will abort the process.
OnSection func(pos lexer.Position, name, subname string, hasSubname bool) error
// OnField is called after a `key = value` entry is parsed
// Returning an error will abort the process.
OnField func(pos lexer.Position, key, value string) error
// OnComment is called after a comment is parsed
// Returning an error will abort the process.
OnComment func(pos lexer.Position, comment string) error
// OnError is called after each parsing error, which you are allowed to
// override.
// OnError is called for EOF as well, but this error isn't returned as such by
// Parser.Run(). The caller will receive (nil, nil) instead indicating the
// processes terminated correctly.
OnError func(pos lexer.Position, content string, err error) error
}
func defaultOnToken(pos lexer.Position, typ TokenType, value string) error {
log.Printf("%s:%v:%v: %q", typ, pos.Line, pos.Column, value)
return nil
}
func defaultOnSection(_ lexer.Position, _, _ string, _ bool) error { return nil }
func defaultOnField(_ lexer.Position, _, _ string) error { return nil }
func defaultOnComment(_ lexer.Position, _ string) error { return nil }
func defaultOnError(pos lexer.Position, content string, err error) error {
log.Printf("%s:%v:%v: %q: %s", "error", pos.Line, pos.Column, content, err)
return lexer.Error{
Line: pos.Line,
Column: pos.Column,
return &lexer.Error{
Line: pos.Line,
Column: pos.Column,
Content: content,
Err: err,
}
}
func (p *Parser) setDefaults() {
if p.OnToken == nil {
p.OnToken = defaultOnToken
if p.OnSection == nil {
p.OnSection = defaultOnSection
}
if p.OnField == nil {
p.OnField = defaultOnField
}
if p.OnComment == nil {
p.OnComment = defaultOnComment
}
if p.OnError == nil {
p.OnError = defaultOnError
}
}
func (p *Parser) emitString(typ TokenType) error {
s := p.src.Emit()
err := p.OnToken(p.pos, typ, s)
p.pos.StepN(len(s))
return err
}
func (p *Parser) emitError(content string, err error) (lexer.StateFn, error) {
err2 := p.OnError(p.pos, content, err)
switch {
case err2 != nil:
// return wrapped error
return nil, err2
default:
// return original error
return nil, err
}
}
func (p *Parser) emitInvalidRune(r rune) (lexer.StateFn, error) {
return p.emitError(string([]rune{r}), lexer.ErrUnacceptableRune)
}
// stepLine discards the data and moves the position
// to the next line.
func (p *Parser) stepLine() {
p.src.Discard()
p.pos.StepLine()
}
// stepRune discards the data and moves the position
// one rune forward on the same line.
func (p *Parser) stepRune() {
p.src.Discard()
p.pos.Step()
}
// stepString discards the data and moves the position
// forward on the same line the length of the discarded
// content.
func (p *Parser) stepString() {
s := p.src.Emit()
p.pos.StepN(len(s))
}
// NewParser creates a dosini-style parser using
// an [io.Reader] as source
func NewParser(r io.Reader) *Parser {
+17 -7
View File
@@ -1,6 +1,11 @@
package parser
//go:generate go run golang.org/x/tools/cmd/stringer -type=TokenType
import (
"fmt"
"asciigoat.org/core/lexer"
)
// A TokenType is a type of Token
type TokenType uint
@@ -8,12 +13,6 @@ type TokenType uint
const (
// TokenUnknown represents a Token that hasn't been identified
TokenUnknown TokenType = iota
// TokenSectionStart indicates the opening marker of a section declaration.
// The left squared bracket.
TokenSectionStart
// TokenSectionEnd indicates the closing marker of a section declaration.
// The right squared bracket.
TokenSectionEnd
// TokenSectionName represents the section name between the squared brackets
TokenSectionName
// TokenSectionSubname represents a secondary name in the section represented
@@ -22,10 +21,21 @@ const (
// [section_name "section_subname"]
TokenSectionSubname
// TokenComment represents a comment, including the initial ';' or '#' until
// the end of the line.
// the end of the line
TokenComment
// TokenFieldKey represents a field name in a `key = value` entry
TokenFieldKey
// TokenFieldValue represents a field value in a `key = value` entry
TokenFieldValue
)
// A Token is an element from the document
type Token struct {
Type TokenType
Position lexer.Position
Value string
}
func (t Token) String() string {
return fmt.Sprintf("%s:%v:%v: %q", t.Type, t.Position.Line, t.Position.Column, t.Value)
}
+7 -9
View File
@@ -9,18 +9,16 @@ func _() {
// Re-run the stringer command to generate them again.
var x [1]struct{}
_ = x[TokenUnknown-0]
_ = x[TokenSectionStart-1]
_ = x[TokenSectionEnd-2]
_ = x[TokenSectionName-3]
_ = x[TokenSectionSubname-4]
_ = x[TokenComment-5]
_ = x[TokenFieldKey-6]
_ = x[TokenFieldValue-7]
_ = x[TokenSectionName-1]
_ = x[TokenSectionSubname-2]
_ = x[TokenComment-3]
_ = x[TokenFieldKey-4]
_ = x[TokenFieldValue-5]
}
const _TokenType_name = "TokenUnknownTokenSectionStartTokenSectionEndTokenSectionNameTokenSectionSubnameTokenCommentTokenFieldKeyTokenFieldValue"
const _TokenType_name = "TokenUnknownTokenSectionNameTokenSectionSubnameTokenCommentTokenFieldKeyTokenFieldValue"
var _TokenType_index = [...]uint8{0, 12, 29, 44, 60, 79, 91, 104, 119}
var _TokenType_index = [...]uint8{0, 12, 28, 47, 59, 72, 87}
func (i TokenType) String() string {
if i >= TokenType(len(_TokenType_index)-1) {