22 Commits

Author SHA1 Message Date
amery 3bf20948c0 parser: Unescaped [WIP]
Signed-off-by: Alejandro Mery <amery@jpi.io>
2023-09-05 13:22:39 +00:00
amery 0dd29272e9 build-sys: use local darvaza.org/core [DO-NOT-MERGE]
Signed-off-by: Alejandro Mery <amery@jpi.io>
2023-09-05 13:22:09 +00:00
amery 7fab1a799a build-sys: use local asciigoat.org/core [DO-NOT-MERGE]
Signed-off-by: Alejandro Mery <amery@jpi.io>
2023-09-05 13:22:09 +00:00
amery 16dfde1503 vscode: add Subname to the dictionary
Signed-off-by: Alejandro Mery <amery@jpi.io>
2023-09-05 13:22:09 +00:00
amery 41d7c6e04d vscode: add unescapes to the dictionary
Signed-off-by: Alejandro Mery <amery@jpi.io>
2023-09-05 13:22:09 +00:00
amery 48adaeb8a8 vscode: add asciigoat to the dictionary
Signed-off-by: Alejandro Mery <amery@jpi.io>
2023-09-05 13:22:09 +00:00
amery 99ca8d0b3b Merge branch 'pr-amery-basic' into next-amery 2023-09-05 13:22:01 +00:00
amery 986b6d1c6d Merge pull request 'parser: Unquoted(), AcceptQuotedString() and SplitCommaArray' (#9)
Reviewed-on: #9
2023-09-05 15:20:38 +02:00
amery d41cd781d9 parser: introduce SplitCommaArray to splits comma separated strings
removing whitespace and respecting quoted literals.

Signed-off-by: Alejandro Mery <amery@jpi.io>
2023-09-04 18:58:06 +00:00
amery 651fcb6215 parser: Unquoted(), AcceptQuotedString()
TODO: reduce quoted strings with escaped characters

Signed-off-by: Alejandro Mery <amery@jpi.io>
2023-09-04 18:58:06 +00:00
amery fa9a7b4735 basic: rename and document queue related methods
Signed-off-by: Alejandro Mery <amery@jpi.io>
2023-09-04 17:37:09 +00:00
amery cfd4a94559 basic: call executeFinal() when OnToken() fails
Signed-off-by: Alejandro Mery <amery@jpi.io>
2023-09-04 17:37:09 +00:00
amery d8af7821e4 Merge pull request 'parser: introduce NewError() and ErrPlusPosition()' (#8)
Reviewed-on: #8
2023-09-04 19:33:24 +02:00
amery 8f3e59ec36 parser: introduce ErrPlusPosition to apply a position offset to a lexer.Error
Signed-off-by: Alejandro Mery <amery@jpi.io>
2023-09-04 15:25:20 +00:00
amery d316031c44 basic: cleanup using parser.NewError()
Signed-off-by: Alejandro Mery <amery@jpi.io>
2023-09-04 15:25:20 +00:00
amery c3883cbb0d parser: introduce NewError() to create lexer.Error using lexer.Position
Signed-off-by: Alejandro Mery <amery@jpi.io>
2023-09-04 15:16:43 +00:00
amery 314c004efd Merge pull request 'parser: introduce TextParser and refactor Parser' (#7)
Reviewed-on: #7
2023-09-04 16:17:04 +02:00
amery 30a86e170b parser: use GetPositionalLength() on TextParser.Discard() and TextParser.Emit()
Signed-off-by: Alejandro Mery <amery@jpi.io>
2023-09-04 13:32:27 +00:00
amery 8cc75da138 parser: introduce GetPositionalLength()
Signed-off-by: Alejandro Mery <amery@jpi.io>
2023-09-03 17:43:46 +00:00
amery 01cd4139bd parser.Parser: refactor using TextParser
Signed-off-by: Alejandro Mery <amery@jpi.io>
2023-09-03 17:01:26 +00:00
amery e34e8eda0a parser.TextParser: AcceptNewLine() and AcceptRune()
Signed-off-by: Alejandro Mery <amery@jpi.io>
2023-09-03 17:01:26 +00:00
amery ecff6f6016 parser: introduce generic-ish TextParser
Signed-off-by: Alejandro Mery <amery@jpi.io>
2023-09-03 17:01:26 +00:00
13 changed files with 521 additions and 122 deletions
+7
View File
@@ -0,0 +1,7 @@
{
"cSpell.words": [
"asciigoat",
"Subname",
"unescapes"
]
}
+3 -14
View File
@@ -4,6 +4,7 @@ import (
"errors" "errors"
"asciigoat.org/core/lexer" "asciigoat.org/core/lexer"
"asciigoat.org/ini/parser"
) )
var ( var (
@@ -11,23 +12,11 @@ var (
) )
func newErrInvalidToken(t *token) *lexer.Error { func newErrInvalidToken(t *token) *lexer.Error {
err := &lexer.Error{ return parser.NewError(t.pos, t.value, "", errInvalidToken)
Line: t.pos.Line,
Column: t.pos.Column,
Content: t.value,
Err: errInvalidToken,
}
return err
} }
func (dec *decoder) OnError(pos lexer.Position, content string, err error) error { func (dec *decoder) OnError(pos lexer.Position, content string, err error) error {
err = &lexer.Error{ err = parser.NewError(pos, content, "", err)
Line: pos.Line,
Column: pos.Column,
Content: content,
Err: err,
}
dec.executeFinal() dec.executeFinal()
return err return err
} }
+31 -20
View File
@@ -32,21 +32,21 @@ func (dec *decoder) executeFinal() {
func (dec *decoder) execute(typ parser.TokenType) { func (dec *decoder) execute(typ parser.TokenType) {
switch typ { switch typ {
case parser.TokenSectionEnd: case parser.TokenSectionEnd:
name1, ok1 := dec.getValue(1, parser.TokenSectionName) name1, ok1 := dec.queueValue(1, parser.TokenSectionName)
if ok1 { if ok1 {
name2, ok2 := dec.getValue(2, parser.TokenSectionSubname) name2, ok2 := dec.queueValue(2, parser.TokenSectionSubname)
dec.addSection(name1, name2, ok2) dec.addSection(name1, name2, ok2)
} }
dec.reset() dec.queueReset()
case parser.TokenFieldValue: case parser.TokenFieldValue:
key, _ := dec.getValue(0, parser.TokenFieldKey) key, _ := dec.queueValue(0, parser.TokenFieldKey)
value, _ := dec.getValue(1, parser.TokenFieldValue) value, _ := dec.queueValue(1, parser.TokenFieldValue)
dec.addField(key, value) dec.addField(key, value)
dec.reset() dec.queueReset()
} }
} }
@@ -82,7 +82,8 @@ func (dec *decoder) addField(key, value string) {
} }
} }
func (dec *decoder) getValue(idx int, typ parser.TokenType) (string, bool) { // queueValue extracts the value of element on the queue if the type matches.
func (dec *decoder) queueValue(idx int, typ parser.TokenType) (string, bool) {
switch { switch {
case idx < 0 || idx >= len(dec.queue): case idx < 0 || idx >= len(dec.queue):
// out of range // out of range
@@ -95,40 +96,48 @@ func (dec *decoder) getValue(idx int, typ parser.TokenType) (string, bool) {
} }
} }
func (dec *decoder) reset() { // queueReset removes all tokens from the queue
func (dec *decoder) queueReset() {
dec.queue = dec.queue[:0] dec.queue = dec.queue[:0]
} }
func (dec *decoder) depth(depth int) bool { // queueDepth confirms the current depth of the queue
func (dec *decoder) queueDepth(depth int) bool {
return len(dec.queue) == depth return len(dec.queue) == depth
} }
func (dec *decoder) depthAfter(depth int, typ parser.TokenType) bool { // queueDepthType confirms the current depth of the queue and the type of the last
_, ok := dec.getValue(depth-1, typ) // element.
if ok { func (dec *decoder) queueDepthType(depth int, typ parser.TokenType) bool {
return len(dec.queue) == depth if dec.queueDepth(depth) {
return dec.queueType(depth-1, typ)
} }
return false return false
} }
// queueType tells if the specified element on the queue is of the required type.
func (dec *decoder) queueType(idx int, typ parser.TokenType) bool {
_, ok := dec.queueValue(idx, typ)
return ok
}
func (dec *decoder) typeOK(typ parser.TokenType) bool { func (dec *decoder) typeOK(typ parser.TokenType) bool {
switch typ { switch typ {
case parser.TokenSectionStart, parser.TokenFieldKey: case parser.TokenSectionStart, parser.TokenFieldKey:
// first token only // first token only
return dec.depth(0) return dec.queueDepth(0)
case parser.TokenSectionName: case parser.TokenSectionName:
// right after TokenSectionStart // right after TokenSectionStart
return dec.depthAfter(1, parser.TokenSectionStart) return dec.queueDepthType(1, parser.TokenSectionStart)
case parser.TokenSectionSubname: case parser.TokenSectionSubname:
// right after TokenSectionName // right after TokenSectionName
return dec.depthAfter(2, parser.TokenSectionName) return dec.queueDepthType(2, parser.TokenSectionName)
case parser.TokenSectionEnd: case parser.TokenSectionEnd:
// only on a section with name // only on a section with name
_, ok := dec.getValue(1, parser.TokenSectionName) return dec.queueType(1, parser.TokenSectionName)
return ok
case parser.TokenFieldValue: case parser.TokenFieldValue:
// right after a TokenFieldKey // right after a TokenFieldKey
return dec.depthAfter(1, parser.TokenFieldKey) return dec.queueDepthType(1, parser.TokenFieldKey)
default: default:
// never // never
return false return false
@@ -149,6 +158,8 @@ func (dec *decoder) OnToken(pos lexer.Position, typ parser.TokenType, value stri
return nil return nil
default: default:
// unacceptable // unacceptable
return newErrInvalidToken(t) err := newErrInvalidToken(t)
dec.executeFinal()
return err
} }
} }
+6 -1
View File
@@ -2,8 +2,13 @@ module asciigoat.org/ini
go 1.19 go 1.19
replace (
asciigoat.org/core => ../core
darvaza.org/core => ../../darvaza.org/core
)
require ( require (
asciigoat.org/core v0.3.7 asciigoat.org/core v0.3.9
github.com/mgechev/revive v1.3.3 github.com/mgechev/revive v1.3.3
golang.org/x/tools v0.12.0 golang.org/x/tools v0.12.0
) )
-2
View File
@@ -1,5 +1,3 @@
asciigoat.org/core v0.3.7 h1:tMasdvZgsMJJMVsZVfXXB5lqq82pFiCsyEmOEmcmAfI=
asciigoat.org/core v0.3.7/go.mod h1:tXj+JUutxRbcO40ZQRuUVaZ4rnYz1kAZ0nblisV8u74=
github.com/BurntSushi/toml v1.3.2 h1:o7IhLm0Msx3BaB+n3Ag7L8EVlByGnpq14C4YWiu/gL8= github.com/BurntSushi/toml v1.3.2 h1:o7IhLm0Msx3BaB+n3Ag7L8EVlByGnpq14C4YWiu/gL8=
github.com/BurntSushi/toml v1.3.2/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ= github.com/BurntSushi/toml v1.3.2/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ=
github.com/chavacava/garif v0.1.0 h1:2JHa3hbYf5D9dsgseMKAmc/MZ109otzgNFk5s87H9Pc= github.com/chavacava/garif v0.1.0 h1:2JHa3hbYf5D9dsgseMKAmc/MZ109otzgNFk5s87H9Pc=
+88
View File
@@ -0,0 +1,88 @@
package parser
import (
"strings"
"asciigoat.org/core/lexer"
)
type commaArrayParser struct {
TextParser
out []string
}
func (p *commaArrayParser) lexStart() (lexer.StateFn, error) {
for {
r, _, err := p.ReadRune()
switch {
case err != nil:
// EOF
return nil, err
case r == RuneQuotes:
// Quoted Value
return p.lexQuotedString, nil
case IsNewLine(r):
// new lines are acceptable when parsing a string for
// comma delimited arrays. but make sure we discard it
// complete
p.UnreadRune()
p.AcceptNewLine()
p.Discard()
case lexer.IsSpace(r):
// discard whitespace outside quotes
p.Discard()
default:
p.UnreadRune()
return p.lexWord, nil
}
}
}
func (p *commaArrayParser) lexWord() (lexer.StateFn, error) {
for {
r, _, err := p.ReadRune()
switch {
case err != nil:
// done. store what we got and move on
_, s := p.Emit()
p.out = append(p.out, s)
return nil, err
case r == ',':
// done
_, s := p.Emit()
// remove comma, trim and append to output
s = strings.TrimRightFunc(s[:len(s)-1], IsSpace)
p.out = append(p.out, s)
return p.lexStart, nil
}
}
}
func (p *commaArrayParser) lexQuotedString() (lexer.StateFn, error) {
s, err := lexQuotedString(&p.TextParser)
if err != nil {
return nil, err
}
p.Discard()
p.out = append(p.out, s)
return p.lexStart, nil
}
func (p *commaArrayParser) Run() ([]string, error) {
err := lexer.Run(p.lexStart)
return p.out, err
}
// SplitCommaArray splits comma separated strings, removing whitespace
// and respecting quoted literals.
func SplitCommaArray(s string) ([]string, error) {
if s != "" {
var p commaArrayParser
p.InitString(s)
return p.Run()
}
return nil, nil
}
+62
View File
@@ -0,0 +1,62 @@
package parser
import (
"io/fs"
"asciigoat.org/core/lexer"
)
// NewError creates a lexer.Error using a lexer.Position
func NewError(pos lexer.Position, content, hint string, err error) *lexer.Error {
return &lexer.Error{
Line: pos.Line,
Column: pos.Column,
Content: content,
Hint: hint,
Err: err,
}
}
// ErrPlusPosition returns a copy of the given [lexer.Error]
// offsetting the Line/Column information.
func ErrPlusPosition(pos lexer.Position, e *lexer.Error) *lexer.Error {
pos.Add(lexer.Position{
Line: e.Line,
Column: e.Column,
})
return NewError(pos, e.Content, e.Hint, e.Err)
}
// NewErrIncompleteQuotedString returns a [lexer.Error]
// indicating the quoted string being parsed wasn't correctly
// terminated
func NewErrIncompleteQuotedString(p *TextParser) *lexer.Error {
return newErrIncomplete(p, "incomplete quoted string")
}
// NewErrIncompleteEscaped returns a [lexer.Error]
// indicating the text being parsed wasn't correctly
// terminated
func NewErrIncompleteEscaped(p *TextParser) *lexer.Error {
return newErrIncomplete(p, "incomplete escaped string")
}
func newErrIncomplete(p *TextParser, hint string) *lexer.Error {
pos, s := p.Emit()
pos.Add(GetPositionalLength(s))
return NewError(pos, s, hint, fs.ErrInvalid)
}
// NewErrInvalidEscapeSequence returns a [lexer.Error] indicating
// the specified sequence, at the end of the accepted buffer,
// is invalid
func NewErrInvalidEscapeSequence(p *TextParser, seq string) *lexer.Error {
pos, s := p.Position(), p.String()
s = s[:len(s)-len(seq)]
pos.Add(GetPositionalLength(s))
return NewError(pos, seq, "invalid escape character", fs.ErrInvalid)
}
+17 -46
View File
@@ -5,71 +5,42 @@ import "asciigoat.org/core/lexer"
// Run parses the source // Run parses the source
func (p *Parser) Run() error { func (p *Parser) Run() error {
p.setDefaults() p.setDefaults()
p.pos.Reset()
return lexer.Run(p.lexStart) return lexer.Run(p.lexStart)
} }
func (p *Parser) lexStart() (lexer.StateFn, error) { func (p *Parser) lexStart() (lexer.StateFn, error) {
for { for {
r, _, err := p.src.ReadRune() r, _, err := p.p.ReadRune()
switch { switch {
case err != nil: case err != nil:
return p.emitError("", err) return p.emitError("", err)
case IsNewLine(r): case IsNewLine(r):
// new line // new line
p.lexMoreNewLine(r) p.p.UnreadRune()
p.p.AcceptNewLine()
p.stepLine() p.stepLine()
case IsSpace(r): case IsSpace(r):
// whitespace // whitespace
p.stepRune() p.stepString()
case IsCommentStart(r): case IsCommentStart(r):
// switch to comment lexer // switch to comment lexer
p.src.UnreadRune() p.p.UnreadRune()
return p.lexComment, nil return p.lexComment, nil
case IsSectionStart(r): case IsSectionStart(r):
// section // section
return p.lexSectionStart, nil return p.lexSectionStart, nil
default: default:
// entry // entry
p.src.UnreadRune() p.p.UnreadRune()
return p.lexEntryStart, nil return p.lexEntryStart, nil
} }
} }
} }
func (p *Parser) lexMoreNewLine(r1 rune) {
// r1 is warrantied to be either '\r' or '\n'
r2, _, err := p.src.ReadRune()
switch r1 {
case '\n':
switch {
case r2 == '\r':
// LN CR
case err == nil:
// LN
p.src.UnreadRune()
default:
// LN EOF
}
case '\r':
switch {
case r2 == '\n':
// CR LN
case err == nil:
// CR
p.src.UnreadRune()
default:
// CR EOF
}
default:
panic("unreachable")
}
}
func (p *Parser) lexComment() (lexer.StateFn, error) { func (p *Parser) lexComment() (lexer.StateFn, error) {
// until the end of the line // until the end of the line
p.src.AcceptAll(IsNotNewLine) p.p.AcceptAll(IsNotNewLine)
err := p.emitString(TokenComment) err := p.emitString(TokenComment)
return p.lexStart, err return p.lexStart, err
@@ -81,11 +52,11 @@ func (p *Parser) lexSectionStart() (lexer.StateFn, error) {
} }
// remove whitespace between `[` and the name // remove whitespace between `[` and the name
if p.src.AcceptAll(IsSpaceNotNewLine) { if p.p.AcceptAll(IsSpaceNotNewLine) {
p.stepString() p.stepString()
} }
if !p.src.AcceptAll(IsName) { if !p.p.AcceptAll(IsName) {
// no name // no name
return p.emitError("section name missing", lexer.ErrUnacceptableRune) return p.emitError("section name missing", lexer.ErrUnacceptableRune)
} }
@@ -94,12 +65,12 @@ func (p *Parser) lexSectionStart() (lexer.StateFn, error) {
return nil, err return nil, err
} }
// remove whitespace between the name andthe closing `]` // remove whitespace between the name and the closing `]`
if p.src.AcceptAll(IsSpaceNotNewLine) { if p.p.AcceptAll(IsSpaceNotNewLine) {
p.stepString() p.stepString()
} }
r, _, err := p.src.ReadRune() r, _, err := p.p.ReadRune()
switch { switch {
case err != nil: case err != nil:
return p.emitError("", err) return p.emitError("", err)
@@ -112,17 +83,17 @@ func (p *Parser) lexSectionStart() (lexer.StateFn, error) {
} }
func (p *Parser) lexEntryStart() (lexer.StateFn, error) { func (p *Parser) lexEntryStart() (lexer.StateFn, error) {
p.src.AcceptAll(IsName) p.p.AcceptAll(IsName)
if err := p.emitString(TokenFieldKey); err != nil { if err := p.emitString(TokenFieldKey); err != nil {
return nil, err return nil, err
} }
// ignore whitespace between key and the '=' sign // ignore whitespace between key and the '=' sign
if p.src.AcceptAll(IsSpaceNotNewLine) { if p.p.AcceptAll(IsSpaceNotNewLine) {
p.stepString() p.stepString()
} }
r, _, err := p.src.ReadRune() r, _, err := p.p.ReadRune()
switch { switch {
case err != nil: case err != nil:
return p.emitError("", err) return p.emitError("", err)
@@ -131,11 +102,11 @@ func (p *Parser) lexEntryStart() (lexer.StateFn, error) {
} }
// ignore whitespace between the '=' and the value // ignore whitespace between the '=' and the value
if p.src.AcceptAll(IsSpaceNotNewLine) { if p.p.AcceptAll(IsSpaceNotNewLine) {
p.stepString() p.stepString()
} }
p.src.AcceptAll(IsNotNewLine) p.p.AcceptAll(IsNotNewLine)
if err := p.emitString(TokenFieldValue); err != nil { if err := p.emitString(TokenFieldValue); err != nil {
return nil, err return nil, err
} }
+14 -5
View File
@@ -7,11 +7,13 @@ import (
) )
const ( const (
RuneComment = ';' // RuneComment is the standard dosini comment character RuneComment = ';' // RuneComment is the standard INI comment character
RuneCommentExtra = '#' // RuneCommentExtra is UNIX shell's comment character RuneCommentExtra = '#' // RuneCommentExtra is UNIX shell's comment character
RuneSectionStart = '[' // RuneSectionStart indicates the start of a section declaration RuneSectionStart = '[' // RuneSectionStart indicates the start of a section declaration
RuneSectionEnd = ']' // RuneSectionEnd indiciates the end of a section declaration RuneSectionEnd = ']' // RuneSectionEnd indicates the end of a section declaration
RuneFieldEqual = '=' // RuneFieldEqual separates field keys from their values RuneFieldEqual = '=' // RuneFieldEqual separates field keys from their values
RuneQuotes = '"' // RuneQuotes indicates the start and end of a quoted value
RuneEscape = '\\' // RuneEscape indicates the next rune is escaped
) )
var ( var (
@@ -27,6 +29,8 @@ var (
RuneSectionStart, RuneSectionStart,
RuneSectionEnd, RuneSectionEnd,
RuneFieldEqual, RuneFieldEqual,
RuneQuotes,
RuneEscape,
}) })
) )
@@ -43,6 +47,11 @@ var (
IsCommentStart = lexer.NewIsIn(RunesComment) IsCommentStart = lexer.NewIsIn(RunesComment)
) )
// IsAny accepts any rune
func IsAny(_ rune) bool {
return true
}
// IsSpaceNotNewLine indicates a rune is whitespace but not a new line // IsSpaceNotNewLine indicates a rune is whitespace but not a new line
func IsSpaceNotNewLine(r rune) bool { func IsSpaceNotNewLine(r rune) bool {
return IsSpace(r) && !IsNewLine(r) return IsSpace(r) && !IsNewLine(r)
+17 -34
View File
@@ -1,4 +1,4 @@
// Package parser parses dosini-style files // Package parser parses ini-style files
package parser package parser
import ( import (
@@ -8,10 +8,9 @@ import (
"asciigoat.org/core/lexer" "asciigoat.org/core/lexer"
) )
// Parser parses a dosini-style document // Parser parses a ini-style document
type Parser struct { type Parser struct {
src *lexer.Reader p TextParser
pos lexer.Position
// OnToken is called for each identified token. if it returns an error // OnToken is called for each identified token. if it returns an error
// parsing is interrupted. // parsing is interrupted.
@@ -32,13 +31,7 @@ func defaultOnToken(pos lexer.Position, typ TokenType, value string) error {
func defaultOnError(pos lexer.Position, content string, err error) error { func defaultOnError(pos lexer.Position, content string, err error) error {
log.Printf("%s:%v:%v: %q: %s", "error", pos.Line, pos.Column, content, err) log.Printf("%s:%v:%v: %q: %s", "error", pos.Line, pos.Column, content, err)
return lexer.Error{ return NewError(pos, content, "", err)
Line: pos.Line,
Column: pos.Column,
Content: content,
Err: err,
}
} }
func (p *Parser) setDefaults() { func (p *Parser) setDefaults() {
@@ -51,15 +44,13 @@ func (p *Parser) setDefaults() {
} }
func (p *Parser) emitString(typ TokenType) error { func (p *Parser) emitString(typ TokenType) error {
s := p.src.Emit() pos, s := p.p.Emit()
err := p.OnToken(p.pos, typ, s) return p.OnToken(pos, typ, s)
p.pos.StepN(len(s))
return err
} }
func (p *Parser) emitError(content string, err error) (lexer.StateFn, error) { func (p *Parser) emitError(content string, err error) (lexer.StateFn, error) {
err2 := p.OnError(p.pos, content, err) pos := p.p.Position()
err2 := p.OnError(pos, content, err)
switch { switch {
case err2 != nil: case err2 != nil:
// return wrapped error // return wrapped error
@@ -77,33 +68,25 @@ func (p *Parser) emitInvalidRune(r rune) (lexer.StateFn, error) {
// stepLine discards the data and moves the position // stepLine discards the data and moves the position
// to the next line. // to the next line.
func (p *Parser) stepLine() { func (p *Parser) stepLine() {
p.src.Discard() p.p.StepLine()
p.pos.StepLine()
}
// stepRune discards the data and moves the position
// one rune forward on the same line.
func (p *Parser) stepRune() {
p.src.Discard()
p.pos.Step()
} }
// stepString discards the data and moves the position // stepString discards the data and moves the position
// forward on the same line the length of the discarded // forward on the same line the length of the discarded
// content. // content.
func (p *Parser) stepString() { func (p *Parser) stepString() {
s := p.src.Emit() p.p.Step()
p.pos.StepN(len(s))
} }
// NewParser creates a dosini-style parser using // NewParser creates a ini-style parser using
// an [io.Reader] as source // an [io.Reader] as source
func NewParser(r io.Reader) *Parser { func NewParser(r io.Reader) *Parser {
if r == nil { var p *Parser
return nil
if r != nil {
p = new(Parser)
p.p.Init(r)
} }
return &Parser{ return p
src: lexer.NewReader(r),
}
} }
+103
View File
@@ -0,0 +1,103 @@
package parser
import (
"bytes"
"io"
"strings"
"asciigoat.org/core/lexer"
)
// TextParser is a generic text parser.
type TextParser struct {
*lexer.Reader
pos lexer.Position
}
// Init initializes the [TextParser] with a non-nil [io.Reader].
func (p *TextParser) Init(r io.Reader) {
switch {
case p == nil || r == nil:
panic("invalid call")
case p.Reader != nil:
panic("parser already initialized")
default:
p.Reader = lexer.NewReader(r)
p.pos.Reset()
}
}
// InitBytes initializes the [TextParser] with a byte array
func (p *TextParser) InitBytes(b []byte) {
p.Init(bytes.NewReader(b))
}
// InitString initializes the [TextParser] with a byte array
func (p *TextParser) InitString(s string) {
p.Init(strings.NewReader(s))
}
// Discard shadows [lexer.Reader]'s, and takes in consideration
// new lines on the discarded data when moving the position
func (p *TextParser) Discard() {
s := p.Reader.Emit()
l := GetPositionalLength(s)
p.pos.Add(l)
}
// Emit returns the accepted text, its position, and
// moves the cursor position accordingly
func (p *TextParser) Emit() (lexer.Position, string) {
pos := p.pos
s := p.Reader.Emit()
l := GetPositionalLength(s)
p.pos.Add(l)
return pos, s
}
// Step discards what's been accepted and increments the
// position assuming they all increment the column counter
func (p *TextParser) Step() {
s := p.Reader.Emit()
p.pos.StepN(len(s))
}
// StepLine discards what's been accepted and moves then
// position to the beginning of the next line
func (p *TextParser) StepLine() {
p.Reader.Discard()
p.pos.StepLine()
}
// Position returns the position of the first character
// of the accepted text
func (p *TextParser) Position() lexer.Position {
return p.pos
}
// AcceptNewLine checks if next is a new line.
// It accepts "\n", "\n\r", "\r" and "\r\n".
func (p *TextParser) AcceptNewLine() bool {
r1, _, err := p.ReadRune()
switch {
case err != nil:
return false
case r1 == '\n':
p.AcceptRune('\r')
return true
case r1 == '\r':
p.AcceptRune('\n')
return true
default:
p.UnreadRune()
return false
}
}
// AcceptRune checks if next is the specified rune
func (p *TextParser) AcceptRune(r rune) bool {
return p.Accept(func(r2 rune) bool {
return r == r2
})
}
+38
View File
@@ -0,0 +1,38 @@
package parser
import (
"io"
"asciigoat.org/core/lexer"
)
type positionLengthParser struct {
TextParser
lexer.Position
}
func (p *positionLengthParser) lexStart() (lexer.StateFn, error) {
for {
switch {
case p.AcceptNewLine():
p.Position.StepLine()
case p.Accept(IsAny):
p.Position.StepN(1)
default:
return nil, io.EOF
}
}
}
// GetPositionalLength calculates the [lexer.Position] at
// the end of a text.
func GetPositionalLength(s string) lexer.Position {
var p positionLengthParser
if s == "" {
p.InitString(s)
_ = lexer.Run(p.lexStart)
}
return p.Position
}
+135
View File
@@ -0,0 +1,135 @@
package parser
import (
"strings"
"asciigoat.org/core/lexer"
)
// AcceptQuotedString consumes a quoted string from the source
// and returns it unquoted and unescaped
func (p *TextParser) AcceptQuotedString() (string, bool, error) {
r, _, err := p.ReadRune()
switch {
case err != nil:
// nothing here
return "", false, err
case r != RuneQuotes:
// not for us
p.UnreadRune()
return "", false, nil
default:
// let's roll
s, err := lexQuotedString(p)
switch {
case err != nil:
// bad quoted string
return "", false, err
default:
// success
return s, true, nil
}
}
}
func lexQuotedString(p *TextParser) (string, *lexer.Error) {
s, ok, err := lexQuotedStringNoEscape(p)
switch {
case err != nil:
return "", err
case ok:
return s, nil
default:
// escape character detected
return lexQuotedStringEscaped(p)
}
}
func lexQuotedStringNoEscape(p *TextParser) (string, bool, *lexer.Error) {
for {
r, _, err := p.ReadRune()
switch {
case err != nil:
// incomplete
return "", false, NewErrIncompleteQuotedString(p)
case r == RuneQuotes:
// end, just remove the quotes
s := p.String()
l := len(s)
return s[1 : l-1], true, nil
case r == RuneEscape:
// things just got complicated...
p.UnreadRune()
return "", false, nil
case IsNewLine(r):
// new lines within quoted values are acceptable
p.UnreadRune()
p.AcceptNewLine()
default:
// continue
}
}
}
// Unquoted removes quotes the content and unescapes the content
func lexQuotedStringEscaped(p *TextParser) (string, *lexer.Error) {
var result strings.Builder
// append what was accepted before the escape character
_, _ = result.WriteString(p.String()[1:])
for {
r, _, err := p.ReadRune()
switch {
case err != nil:
// incomplete quoted
return "", NewErrIncompleteQuotedString(p)
case r == RuneQuotes:
// end
return result.String(), nil
case r == RuneEscape:
// escaped
r2, _, err := p.ReadRune()
switch {
case err != nil:
// incomplete escaped
return "", NewErrIncompleteEscaped(p)
case IsNewLine(r2):
// escaped new line, skip
p.UnreadRune()
p.AcceptNewLine()
default:
// TODO: check valid escape character and
// append to result
s := string([]rune{r, r2})
err := NewErrInvalidEscapeSequence(p, s)
return "", err
}
default:
// normal, append to result
_, _ = result.WriteRune(r)
}
}
}
// Unquoted removes quotes and unescapes the content
func Unquoted(s string) (string, error) {
var p TextParser
if s == "" {
return "", nil
}
p.InitString(s)
unquoted, ok, err := p.AcceptQuotedString()
switch {
case err != nil:
// bad string
return "", err
case ok:
// success
return unquoted, nil
default:
// not quoted
return s, nil
}
}