8 Commits

Author SHA1 Message Date
amery 986b6d1c6d Merge pull request 'parser: Unquoted(), AcceptQuotedString() and SplitCommaArray' (#9)
Reviewed-on: #9
2023-09-05 15:20:38 +02:00
amery d41cd781d9 parser: introduce SplitCommaArray to splits comma separated strings
removing whitespace and respecting quoted literals.

Signed-off-by: Alejandro Mery <amery@jpi.io>
2023-09-04 18:58:06 +00:00
amery 651fcb6215 parser: Unquoted(), AcceptQuotedString()
TODO: reduce quoted strings with escaped characters

Signed-off-by: Alejandro Mery <amery@jpi.io>
2023-09-04 18:58:06 +00:00
amery d8af7821e4 Merge pull request 'parser: introduce NewError() and ErrPlusPosition()' (#8)
Reviewed-on: #8
2023-09-04 19:33:24 +02:00
amery 8f3e59ec36 parser: introduce ErrPlusPosition to apply a position offset to a lexer.Error
Signed-off-by: Alejandro Mery <amery@jpi.io>
2023-09-04 15:25:20 +00:00
amery d316031c44 basic: cleanup using parser.NewError()
Signed-off-by: Alejandro Mery <amery@jpi.io>
2023-09-04 15:25:20 +00:00
amery c3883cbb0d parser: introduce NewError() to create lexer.Error using lexer.Position
Signed-off-by: Alejandro Mery <amery@jpi.io>
2023-09-04 15:16:43 +00:00
amery 314c004efd Merge pull request 'parser: introduce TextParser and refactor Parser' (#7)
Reviewed-on: #7
2023-09-04 16:17:04 +02:00
9 changed files with 76 additions and 137 deletions
-7
View File
@@ -1,7 +0,0 @@
{
"cSpell.words": [
"asciigoat",
"Subname",
"unescapes"
]
}
+3 -12
View File
@@ -4,28 +4,19 @@ import (
"errors" "errors"
"asciigoat.org/core/lexer" "asciigoat.org/core/lexer"
"asciigoat.org/ini/parser"
) )
var ( var (
errInvalidToken = errors.New("invalid token") errInvalidToken = errors.New("invalid token")
) )
func newError(pos lexer.Position, content, hint string, err error) *lexer.Error {
return &lexer.Error{
Line: pos.Line,
Column: pos.Column,
Content: content,
Hint: hint,
Err: err,
}
}
func newErrInvalidToken(t *token) *lexer.Error { func newErrInvalidToken(t *token) *lexer.Error {
return newError(t.pos, t.value, "", errInvalidToken) return parser.NewError(t.pos, t.value, "", errInvalidToken)
} }
func (dec *decoder) OnError(pos lexer.Position, content string, err error) error { func (dec *decoder) OnError(pos lexer.Position, content string, err error) error {
err = newError(pos, content, "", err) err = parser.NewError(pos, content, "", err)
dec.executeFinal() dec.executeFinal()
return err return err
} }
+20 -31
View File
@@ -32,21 +32,21 @@ func (dec *decoder) executeFinal() {
func (dec *decoder) execute(typ parser.TokenType) { func (dec *decoder) execute(typ parser.TokenType) {
switch typ { switch typ {
case parser.TokenSectionEnd: case parser.TokenSectionEnd:
name1, ok1 := dec.queueValue(1, parser.TokenSectionName) name1, ok1 := dec.getValue(1, parser.TokenSectionName)
if ok1 { if ok1 {
name2, ok2 := dec.queueValue(2, parser.TokenSectionSubname) name2, ok2 := dec.getValue(2, parser.TokenSectionSubname)
dec.addSection(name1, name2, ok2) dec.addSection(name1, name2, ok2)
} }
dec.queueReset() dec.reset()
case parser.TokenFieldValue: case parser.TokenFieldValue:
key, _ := dec.queueValue(0, parser.TokenFieldKey) key, _ := dec.getValue(0, parser.TokenFieldKey)
value, _ := dec.queueValue(1, parser.TokenFieldValue) value, _ := dec.getValue(1, parser.TokenFieldValue)
dec.addField(key, value) dec.addField(key, value)
dec.queueReset() dec.reset()
} }
} }
@@ -82,8 +82,7 @@ func (dec *decoder) addField(key, value string) {
} }
} }
// queueValue extracts the value of element on the queue if the type matches. func (dec *decoder) getValue(idx int, typ parser.TokenType) (string, bool) {
func (dec *decoder) queueValue(idx int, typ parser.TokenType) (string, bool) {
switch { switch {
case idx < 0 || idx >= len(dec.queue): case idx < 0 || idx >= len(dec.queue):
// out of range // out of range
@@ -96,48 +95,40 @@ func (dec *decoder) queueValue(idx int, typ parser.TokenType) (string, bool) {
} }
} }
// queueReset removes all tokens from the queue func (dec *decoder) reset() {
func (dec *decoder) queueReset() {
dec.queue = dec.queue[:0] dec.queue = dec.queue[:0]
} }
// queueDepth confirms the current depth of the queue func (dec *decoder) depth(depth int) bool {
func (dec *decoder) queueDepth(depth int) bool {
return len(dec.queue) == depth return len(dec.queue) == depth
} }
// queueDepthType confirms the current depth of the queue and the type of the last func (dec *decoder) depthAfter(depth int, typ parser.TokenType) bool {
// element. _, ok := dec.getValue(depth-1, typ)
func (dec *decoder) queueDepthType(depth int, typ parser.TokenType) bool { if ok {
if dec.queueDepth(depth) { return len(dec.queue) == depth
return dec.queueType(depth-1, typ)
} }
return false return false
} }
// queueType tells if the specified element on the queue is of the required type.
func (dec *decoder) queueType(idx int, typ parser.TokenType) bool {
_, ok := dec.queueValue(idx, typ)
return ok
}
func (dec *decoder) typeOK(typ parser.TokenType) bool { func (dec *decoder) typeOK(typ parser.TokenType) bool {
switch typ { switch typ {
case parser.TokenSectionStart, parser.TokenFieldKey: case parser.TokenSectionStart, parser.TokenFieldKey:
// first token only // first token only
return dec.queueDepth(0) return dec.depth(0)
case parser.TokenSectionName: case parser.TokenSectionName:
// right after TokenSectionStart // right after TokenSectionStart
return dec.queueDepthType(1, parser.TokenSectionStart) return dec.depthAfter(1, parser.TokenSectionStart)
case parser.TokenSectionSubname: case parser.TokenSectionSubname:
// right after TokenSectionName // right after TokenSectionName
return dec.queueDepthType(2, parser.TokenSectionName) return dec.depthAfter(2, parser.TokenSectionName)
case parser.TokenSectionEnd: case parser.TokenSectionEnd:
// only on a section with name // only on a section with name
return dec.queueType(1, parser.TokenSectionName) _, ok := dec.getValue(1, parser.TokenSectionName)
return ok
case parser.TokenFieldValue: case parser.TokenFieldValue:
// right after a TokenFieldKey // right after a TokenFieldKey
return dec.queueDepthType(1, parser.TokenFieldKey) return dec.depthAfter(1, parser.TokenFieldKey)
default: default:
// never // never
return false return false
@@ -158,8 +149,6 @@ func (dec *decoder) OnToken(pos lexer.Position, typ parser.TokenType, value stri
return nil return nil
default: default:
// unacceptable // unacceptable
err := newErrInvalidToken(t) return newErrInvalidToken(t)
dec.executeFinal()
return err
} }
} }
-5
View File
@@ -2,11 +2,6 @@ module asciigoat.org/ini
go 1.19 go 1.19
replace (
asciigoat.org/core => ../core
darvaza.org/core => ../../darvaza.org/core
)
require ( require (
asciigoat.org/core v0.3.9 asciigoat.org/core v0.3.9
github.com/mgechev/revive v1.3.3 github.com/mgechev/revive v1.3.3
+2
View File
@@ -1,3 +1,5 @@
asciigoat.org/core v0.3.9 h1:hgDDz4ecm3ZvehX++m8A/IzAt+B5oDPiRtxatzfUHPQ=
asciigoat.org/core v0.3.9/go.mod h1:CAaHwyw8MpAq4a1MYtN2dxJrsK+hmIdW50OndaQZYPI=
github.com/BurntSushi/toml v1.3.2 h1:o7IhLm0Msx3BaB+n3Ag7L8EVlByGnpq14C4YWiu/gL8= github.com/BurntSushi/toml v1.3.2 h1:o7IhLm0Msx3BaB+n3Ag7L8EVlByGnpq14C4YWiu/gL8=
github.com/BurntSushi/toml v1.3.2/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ= github.com/BurntSushi/toml v1.3.2/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ=
github.com/chavacava/garif v0.1.0 h1:2JHa3hbYf5D9dsgseMKAmc/MZ109otzgNFk5s87H9Pc= github.com/chavacava/garif v0.1.0 h1:2JHa3hbYf5D9dsgseMKAmc/MZ109otzgNFk5s87H9Pc=
+11 -13
View File
@@ -1,6 +1,8 @@
package parser package parser
import ( import (
"strings"
"asciigoat.org/core/lexer" "asciigoat.org/core/lexer"
) )
@@ -46,18 +48,13 @@ func (p *commaArrayParser) lexWord() (lexer.StateFn, error) {
_, s := p.Emit() _, s := p.Emit()
p.out = append(p.out, s) p.out = append(p.out, s)
return nil, err return nil, err
case r == RuneEscape: case r == ',':
// escaped // done
r2, _, err := p.ReadRune() _, s := p.Emit()
switch { // remove comma, trim and append to output
case err != nil: s = strings.TrimRightFunc(s[:len(s)-1], IsSpace)
// incomplete p.out = append(p.out, s)
return nil, NewErrIncompleteEscaped(&p.TextParser) return p.lexStart, nil
case IsNewLine(r2):
// escaped new line
p.UnreadRune()
p.AcceptNewLine()
}
} }
} }
} }
@@ -79,7 +76,8 @@ func (p *commaArrayParser) Run() ([]string, error) {
return p.out, err return p.out, err
} }
// SplitCommaArray splits // SplitCommaArray splits comma separated strings, removing whitespace
// and respecting quoted literals.
func SplitCommaArray(s string) ([]string, error) { func SplitCommaArray(s string) ([]string, error) {
if s != "" { if s != "" {
var p commaArrayParser var p commaArrayParser
+13 -21
View File
@@ -6,6 +6,17 @@ import (
"asciigoat.org/core/lexer" "asciigoat.org/core/lexer"
) )
// NewError creates a lexer.Error using a lexer.Position
func NewError(pos lexer.Position, content, hint string, err error) *lexer.Error {
return &lexer.Error{
Line: pos.Line,
Column: pos.Column,
Content: content,
Hint: hint,
Err: err,
}
}
// ErrPlusPosition returns a copy of the given [lexer.Error] // ErrPlusPosition returns a copy of the given [lexer.Error]
// offsetting the Line/Column information. // offsetting the Line/Column information.
func ErrPlusPosition(pos lexer.Position, e *lexer.Error) *lexer.Error { func ErrPlusPosition(pos lexer.Position, e *lexer.Error) *lexer.Error {
@@ -14,13 +25,7 @@ func ErrPlusPosition(pos lexer.Position, e *lexer.Error) *lexer.Error {
Column: e.Column, Column: e.Column,
}) })
return &lexer.Error{ return NewError(pos, e.Content, e.Hint, e.Err)
Line: pos.Line,
Column: pos.Column,
Content: e.Content,
Hint: e.Hint,
Err: e.Err,
}
} }
// NewErrIncompleteQuotedString returns a [lexer.Error] // NewErrIncompleteQuotedString returns a [lexer.Error]
@@ -30,22 +35,9 @@ func NewErrIncompleteQuotedString(p *TextParser) *lexer.Error {
return newErrIncomplete(p, "incomplete quoted string") return newErrIncomplete(p, "incomplete quoted string")
} }
// NewErrIncompleteEscaped returns a [lexer.Error]
// indicating the text being parsed wasn't correctly
// terminated
func NewErrIncompleteEscaped(p *TextParser) *lexer.Error {
return newErrIncomplete(p, "incomplete escaped string")
}
func newErrIncomplete(p *TextParser, hint string) *lexer.Error { func newErrIncomplete(p *TextParser, hint string) *lexer.Error {
pos, s := p.Emit() pos, s := p.Emit()
pos.Add(GetPositionalLength(s)) pos.Add(GetPositionalLength(s))
return &lexer.Error{ return NewError(pos, s, hint, fs.ErrInvalid)
Line: pos.Line,
Column: pos.Column,
Content: s,
Hint: hint,
Err: fs.ErrInvalid,
}
} }
+1 -7
View File
@@ -31,13 +31,7 @@ func defaultOnToken(pos lexer.Position, typ TokenType, value string) error {
func defaultOnError(pos lexer.Position, content string, err error) error { func defaultOnError(pos lexer.Position, content string, err error) error {
log.Printf("%s:%v:%v: %q: %s", "error", pos.Line, pos.Column, content, err) log.Printf("%s:%v:%v: %q: %s", "error", pos.Line, pos.Column, content, err)
return lexer.Error{ return NewError(pos, content, "", err)
Line: pos.Line,
Column: pos.Column,
Content: content,
Err: err,
}
} }
func (p *Parser) setDefaults() { func (p *Parser) setDefaults() {
+26 -41
View File
@@ -1,11 +1,13 @@
package parser package parser
import ( import (
"strings"
"asciigoat.org/core/lexer" "asciigoat.org/core/lexer"
) )
// AcceptQuotedString consumes a quoted string from the source // AcceptQuotedString consumes a quoted string from the source
// and returns it unquoted. // and returns it unquoted and unescaped
func (p *TextParser) AcceptQuotedString() (string, bool, error) { func (p *TextParser) AcceptQuotedString() (string, bool, error) {
r, _, err := p.ReadRune() r, _, err := p.ReadRune()
switch { switch {
@@ -31,33 +33,22 @@ func (p *TextParser) AcceptQuotedString() (string, bool, error) {
} }
func lexQuotedString(p *TextParser) (string, *lexer.Error) { func lexQuotedString(p *TextParser) (string, *lexer.Error) {
s, ok, err := lexQuotedStringNoEscape(p)
switch {
case err != nil:
return "", err
case ok:
return s, nil
default:
// escape character detected
return lexQuotedStringEscaped(p)
}
}
func lexQuotedStringNoEscape(p *TextParser) (string, bool, *lexer.Error) {
for { for {
r, _, err := p.ReadRune() r, _, err := p.ReadRune()
switch { switch {
case err != nil: case err != nil:
// incomplete // incomplete
return "", false, NewErrIncompleteQuotedString(p) return "", NewErrIncompleteQuotedString(p)
case r == RuneQuotes: case r == RuneQuotes:
// end, just remove the quotes // end, remove quotes and process escaped characters
s := p.String() return lexReturnUnescapedQuotedString(p)
l := len(s)
return s[1 : l-2], true, nil
case r == RuneEscape: case r == RuneEscape:
// things just got complicated... // escaped, take another
return "", false, nil _, _, err := p.ReadRune()
if err != nil {
// incomplete
return "", NewErrIncompleteQuotedString(p)
}
case IsNewLine(r): case IsNewLine(r):
// new lines within quoted values are acceptable // new lines within quoted values are acceptable
p.UnreadRune() p.UnreadRune()
@@ -68,28 +59,22 @@ func lexQuotedStringNoEscape(p *TextParser) (string, bool, *lexer.Error) {
} }
} }
func lexQuotedStringEscaped(*TextParser) (string, *lexer.Error) { func lexReturnUnescapedQuotedString(p *TextParser) (string, *lexer.Error) {
return "", nil // remove quotes
s := p.String()
l := len(s)
s = s[1 : l-1]
if strings.ContainsRune(s, RuneEscape) {
// TODO: implement unescaping
err := NewError(p.Position(), s, "escaped characters", lexer.ErrNotImplemented)
return "", err
}
return s, nil
} }
// // escaped. append partial // Unquoted removes quotes the content and unescapes the content
// mark = lexQuotedAppendPartial(p, &buf, mark)
//
// r2, _, err := p.ReadRune()
// switch {
// case err != nil:
// // incomplete
// return "", NewErrIncompleteQuotedString(p)
// case IsNewLine(r2):
// // escaped new line
// p.UnreadRune()
// p.AcceptNewLine()
// mark = lexQuotedAppendNewLine(p, &buf, mark)
// default:
// // bad escaped
// }
// Unquoted removes quotes and unescapes the content
func Unquoted(s string) (string, error) { func Unquoted(s string) (string, error) {
var p TextParser var p TextParser
if s == "" { if s == "" {