10 Commits

Author SHA1 Message Date
amery 3bf20948c0 parser: Unescaped [WIP]
Signed-off-by: Alejandro Mery <amery@jpi.io>
2023-09-05 13:22:39 +00:00
amery 0dd29272e9 build-sys: use local darvaza.org/core [DO-NOT-MERGE]
Signed-off-by: Alejandro Mery <amery@jpi.io>
2023-09-05 13:22:09 +00:00
amery 7fab1a799a build-sys: use local asciigoat.org/core [DO-NOT-MERGE]
Signed-off-by: Alejandro Mery <amery@jpi.io>
2023-09-05 13:22:09 +00:00
amery 16dfde1503 vscode: add Subname to the dictionary
Signed-off-by: Alejandro Mery <amery@jpi.io>
2023-09-05 13:22:09 +00:00
amery 41d7c6e04d vscode: add unescapes to the dictionary
Signed-off-by: Alejandro Mery <amery@jpi.io>
2023-09-05 13:22:09 +00:00
amery 48adaeb8a8 vscode: add asciigoat to the dictionary
Signed-off-by: Alejandro Mery <amery@jpi.io>
2023-09-05 13:22:09 +00:00
amery 99ca8d0b3b Merge branch 'pr-amery-basic' into next-amery 2023-09-05 13:22:01 +00:00
amery 986b6d1c6d Merge pull request 'parser: Unquoted(), AcceptQuotedString() and SplitCommaArray' (#9)
Reviewed-on: #9
2023-09-05 15:20:38 +02:00
amery d41cd781d9 parser: introduce SplitCommaArray to splits comma separated strings
removing whitespace and respecting quoted literals.

Signed-off-by: Alejandro Mery <amery@jpi.io>
2023-09-04 18:58:06 +00:00
amery 651fcb6215 parser: Unquoted(), AcceptQuotedString()
TODO: reduce quoted strings with escaped characters

Signed-off-by: Alejandro Mery <amery@jpi.io>
2023-09-04 18:58:06 +00:00
7 changed files with 279 additions and 7 deletions
+7
View File
@@ -0,0 +1,7 @@
{
"cSpell.words": [
"asciigoat",
"Subname",
"unescapes"
]
}
+5
View File
@@ -2,6 +2,11 @@ module asciigoat.org/ini
go 1.19 go 1.19
replace (
asciigoat.org/core => ../core
darvaza.org/core => ../../darvaza.org/core
)
require ( require (
asciigoat.org/core v0.3.9 asciigoat.org/core v0.3.9
github.com/mgechev/revive v1.3.3 github.com/mgechev/revive v1.3.3
-2
View File
@@ -1,5 +1,3 @@
asciigoat.org/core v0.3.9 h1:hgDDz4ecm3ZvehX++m8A/IzAt+B5oDPiRtxatzfUHPQ=
asciigoat.org/core v0.3.9/go.mod h1:CAaHwyw8MpAq4a1MYtN2dxJrsK+hmIdW50OndaQZYPI=
github.com/BurntSushi/toml v1.3.2 h1:o7IhLm0Msx3BaB+n3Ag7L8EVlByGnpq14C4YWiu/gL8= github.com/BurntSushi/toml v1.3.2 h1:o7IhLm0Msx3BaB+n3Ag7L8EVlByGnpq14C4YWiu/gL8=
github.com/BurntSushi/toml v1.3.2/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ= github.com/BurntSushi/toml v1.3.2/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ=
github.com/chavacava/garif v0.1.0 h1:2JHa3hbYf5D9dsgseMKAmc/MZ109otzgNFk5s87H9Pc= github.com/chavacava/garif v0.1.0 h1:2JHa3hbYf5D9dsgseMKAmc/MZ109otzgNFk5s87H9Pc=
+88
View File
@@ -0,0 +1,88 @@
package parser
import (
"strings"
"asciigoat.org/core/lexer"
)
type commaArrayParser struct {
TextParser
out []string
}
func (p *commaArrayParser) lexStart() (lexer.StateFn, error) {
for {
r, _, err := p.ReadRune()
switch {
case err != nil:
// EOF
return nil, err
case r == RuneQuotes:
// Quoted Value
return p.lexQuotedString, nil
case IsNewLine(r):
// new lines are acceptable when parsing a string for
// comma delimited arrays. but make sure we discard it
// complete
p.UnreadRune()
p.AcceptNewLine()
p.Discard()
case lexer.IsSpace(r):
// discard whitespace outside quotes
p.Discard()
default:
p.UnreadRune()
return p.lexWord, nil
}
}
}
func (p *commaArrayParser) lexWord() (lexer.StateFn, error) {
for {
r, _, err := p.ReadRune()
switch {
case err != nil:
// done. store what we got and move on
_, s := p.Emit()
p.out = append(p.out, s)
return nil, err
case r == ',':
// done
_, s := p.Emit()
// remove comma, trim and append to output
s = strings.TrimRightFunc(s[:len(s)-1], IsSpace)
p.out = append(p.out, s)
return p.lexStart, nil
}
}
}
func (p *commaArrayParser) lexQuotedString() (lexer.StateFn, error) {
s, err := lexQuotedString(&p.TextParser)
if err != nil {
return nil, err
}
p.Discard()
p.out = append(p.out, s)
return p.lexStart, nil
}
func (p *commaArrayParser) Run() ([]string, error) {
err := lexer.Run(p.lexStart)
return p.out, err
}
// SplitCommaArray splits comma separated strings, removing whitespace
// and respecting quoted literals.
func SplitCommaArray(s string) ([]string, error) {
if s != "" {
var p commaArrayParser
p.InitString(s)
return p.Run()
}
return nil, nil
}
+35
View File
@@ -1,6 +1,8 @@
package parser package parser
import ( import (
"io/fs"
"asciigoat.org/core/lexer" "asciigoat.org/core/lexer"
) )
@@ -25,3 +27,36 @@ func ErrPlusPosition(pos lexer.Position, e *lexer.Error) *lexer.Error {
return NewError(pos, e.Content, e.Hint, e.Err) return NewError(pos, e.Content, e.Hint, e.Err)
} }
// NewErrIncompleteQuotedString returns a [lexer.Error]
// indicating the quoted string being parsed wasn't correctly
// terminated
func NewErrIncompleteQuotedString(p *TextParser) *lexer.Error {
return newErrIncomplete(p, "incomplete quoted string")
}
// NewErrIncompleteEscaped returns a [lexer.Error]
// indicating the text being parsed wasn't correctly
// terminated
func NewErrIncompleteEscaped(p *TextParser) *lexer.Error {
return newErrIncomplete(p, "incomplete escaped string")
}
func newErrIncomplete(p *TextParser, hint string) *lexer.Error {
pos, s := p.Emit()
pos.Add(GetPositionalLength(s))
return NewError(pos, s, hint, fs.ErrInvalid)
}
// NewErrInvalidEscapeSequence returns a [lexer.Error] indicating
// the specified sequence, at the end of the accepted buffer,
// is invalid
func NewErrInvalidEscapeSequence(p *TextParser, seq string) *lexer.Error {
pos, s := p.Position(), p.String()
s = s[:len(s)-len(seq)]
pos.Add(GetPositionalLength(s))
return NewError(pos, seq, "invalid escape character", fs.ErrInvalid)
}
+6 -2
View File
@@ -7,11 +7,13 @@ import (
) )
const ( const (
RuneComment = ';' // RuneComment is the standard dosini comment character RuneComment = ';' // RuneComment is the standard INI comment character
RuneCommentExtra = '#' // RuneCommentExtra is UNIX shell's comment character RuneCommentExtra = '#' // RuneCommentExtra is UNIX shell's comment character
RuneSectionStart = '[' // RuneSectionStart indicates the start of a section declaration RuneSectionStart = '[' // RuneSectionStart indicates the start of a section declaration
RuneSectionEnd = ']' // RuneSectionEnd indiciates the end of a section declaration RuneSectionEnd = ']' // RuneSectionEnd indicates the end of a section declaration
RuneFieldEqual = '=' // RuneFieldEqual separates field keys from their values RuneFieldEqual = '=' // RuneFieldEqual separates field keys from their values
RuneQuotes = '"' // RuneQuotes indicates the start and end of a quoted value
RuneEscape = '\\' // RuneEscape indicates the next rune is escaped
) )
var ( var (
@@ -27,6 +29,8 @@ var (
RuneSectionStart, RuneSectionStart,
RuneSectionEnd, RuneSectionEnd,
RuneFieldEqual, RuneFieldEqual,
RuneQuotes,
RuneEscape,
}) })
) )
+135
View File
@@ -0,0 +1,135 @@
package parser
import (
"strings"
"asciigoat.org/core/lexer"
)
// AcceptQuotedString consumes a quoted string from the source
// and returns it unquoted and unescaped
func (p *TextParser) AcceptQuotedString() (string, bool, error) {
r, _, err := p.ReadRune()
switch {
case err != nil:
// nothing here
return "", false, err
case r != RuneQuotes:
// not for us
p.UnreadRune()
return "", false, nil
default:
// let's roll
s, err := lexQuotedString(p)
switch {
case err != nil:
// bad quoted string
return "", false, err
default:
// success
return s, true, nil
}
}
}
func lexQuotedString(p *TextParser) (string, *lexer.Error) {
s, ok, err := lexQuotedStringNoEscape(p)
switch {
case err != nil:
return "", err
case ok:
return s, nil
default:
// escape character detected
return lexQuotedStringEscaped(p)
}
}
func lexQuotedStringNoEscape(p *TextParser) (string, bool, *lexer.Error) {
for {
r, _, err := p.ReadRune()
switch {
case err != nil:
// incomplete
return "", false, NewErrIncompleteQuotedString(p)
case r == RuneQuotes:
// end, just remove the quotes
s := p.String()
l := len(s)
return s[1 : l-1], true, nil
case r == RuneEscape:
// things just got complicated...
p.UnreadRune()
return "", false, nil
case IsNewLine(r):
// new lines within quoted values are acceptable
p.UnreadRune()
p.AcceptNewLine()
default:
// continue
}
}
}
// Unquoted removes quotes the content and unescapes the content
func lexQuotedStringEscaped(p *TextParser) (string, *lexer.Error) {
var result strings.Builder
// append what was accepted before the escape character
_, _ = result.WriteString(p.String()[1:])
for {
r, _, err := p.ReadRune()
switch {
case err != nil:
// incomplete quoted
return "", NewErrIncompleteQuotedString(p)
case r == RuneQuotes:
// end
return result.String(), nil
case r == RuneEscape:
// escaped
r2, _, err := p.ReadRune()
switch {
case err != nil:
// incomplete escaped
return "", NewErrIncompleteEscaped(p)
case IsNewLine(r2):
// escaped new line, skip
p.UnreadRune()
p.AcceptNewLine()
default:
// TODO: check valid escape character and
// append to result
s := string([]rune{r, r2})
err := NewErrInvalidEscapeSequence(p, s)
return "", err
}
default:
// normal, append to result
_, _ = result.WriteRune(r)
}
}
}
// Unquoted removes quotes and unescapes the content
func Unquoted(s string) (string, error) {
var p TextParser
if s == "" {
return "", nil
}
p.InitString(s)
unquoted, ok, err := p.AcceptQuotedString()
switch {
case err != nil:
// bad string
return "", err
case ok:
// success
return unquoted, nil
default:
// not quoted
return s, nil
}
}