17 Commits

Author SHA1 Message Date
amery 3bf20948c0 parser: Unescaped [WIP]
Signed-off-by: Alejandro Mery <amery@jpi.io>
2023-09-05 13:22:39 +00:00
amery 0dd29272e9 build-sys: use local darvaza.org/core [DO-NOT-MERGE]
Signed-off-by: Alejandro Mery <amery@jpi.io>
2023-09-05 13:22:09 +00:00
amery 7fab1a799a build-sys: use local asciigoat.org/core [DO-NOT-MERGE]
Signed-off-by: Alejandro Mery <amery@jpi.io>
2023-09-05 13:22:09 +00:00
amery 16dfde1503 vscode: add Subname to the dictionary
Signed-off-by: Alejandro Mery <amery@jpi.io>
2023-09-05 13:22:09 +00:00
amery 41d7c6e04d vscode: add unescapes to the dictionary
Signed-off-by: Alejandro Mery <amery@jpi.io>
2023-09-05 13:22:09 +00:00
amery 48adaeb8a8 vscode: add asciigoat to the dictionary
Signed-off-by: Alejandro Mery <amery@jpi.io>
2023-09-05 13:22:09 +00:00
amery 99ca8d0b3b Merge branch 'pr-amery-basic' into next-amery 2023-09-05 13:22:01 +00:00
amery 986b6d1c6d Merge pull request 'parser: Unquoted(), AcceptQuotedString() and SplitCommaArray' (#9)
Reviewed-on: #9
2023-09-05 15:20:38 +02:00
amery d41cd781d9 parser: introduce SplitCommaArray to splits comma separated strings
removing whitespace and respecting quoted literals.

Signed-off-by: Alejandro Mery <amery@jpi.io>
2023-09-04 18:58:06 +00:00
amery 651fcb6215 parser: Unquoted(), AcceptQuotedString()
TODO: reduce quoted strings with escaped characters

Signed-off-by: Alejandro Mery <amery@jpi.io>
2023-09-04 18:58:06 +00:00
amery fa9a7b4735 basic: rename and document queue related methods
Signed-off-by: Alejandro Mery <amery@jpi.io>
2023-09-04 17:37:09 +00:00
amery cfd4a94559 basic: call executeFinal() when OnToken() fails
Signed-off-by: Alejandro Mery <amery@jpi.io>
2023-09-04 17:37:09 +00:00
amery d8af7821e4 Merge pull request 'parser: introduce NewError() and ErrPlusPosition()' (#8)
Reviewed-on: #8
2023-09-04 19:33:24 +02:00
amery 8f3e59ec36 parser: introduce ErrPlusPosition to apply a position offset to a lexer.Error
Signed-off-by: Alejandro Mery <amery@jpi.io>
2023-09-04 15:25:20 +00:00
amery d316031c44 basic: cleanup using parser.NewError()
Signed-off-by: Alejandro Mery <amery@jpi.io>
2023-09-04 15:25:20 +00:00
amery c3883cbb0d parser: introduce NewError() to create lexer.Error using lexer.Position
Signed-off-by: Alejandro Mery <amery@jpi.io>
2023-09-04 15:16:43 +00:00
amery 314c004efd Merge pull request 'parser: introduce TextParser and refactor Parser' (#7)
Reviewed-on: #7
2023-09-04 16:17:04 +02:00
9 changed files with 298 additions and 106 deletions
+3 -12
View File
@@ -4,28 +4,19 @@ import (
"errors" "errors"
"asciigoat.org/core/lexer" "asciigoat.org/core/lexer"
"asciigoat.org/ini/parser"
) )
var ( var (
errInvalidToken = errors.New("invalid token") errInvalidToken = errors.New("invalid token")
) )
func newError(pos lexer.Position, content, hint string, err error) *lexer.Error {
return &lexer.Error{
Line: pos.Line,
Column: pos.Column,
Content: content,
Hint: hint,
Err: err,
}
}
func newErrInvalidToken(t *token) *lexer.Error { func newErrInvalidToken(t *token) *lexer.Error {
return newError(t.pos, t.value, "", errInvalidToken) return parser.NewError(t.pos, t.value, "", errInvalidToken)
} }
func (dec *decoder) OnError(pos lexer.Position, content string, err error) error { func (dec *decoder) OnError(pos lexer.Position, content string, err error) error {
err = newError(pos, content, "", err) err = parser.NewError(pos, content, "", err)
dec.executeFinal() dec.executeFinal()
return err return err
} }
-18
View File
@@ -53,12 +53,6 @@ func (field Field) String() string {
return buf.String() return buf.String()
} }
// GoString generates a string output for "%#v"
func (*Field) GoString() string {
var buf bytes.Buffer
return buf.String()
}
func writeSectionToBuffer(w *bytes.Buffer, sec *Section, nl string) int { func writeSectionToBuffer(w *bytes.Buffer, sec *Section, nl string) int {
var written, n int var written, n int
@@ -96,12 +90,6 @@ func (sec *Section) String() string {
return buf.String() return buf.String()
} }
// GoString generates a string output for "%#v"
func (*Section) GoString() string {
var buf bytes.Buffer
return buf.String()
}
// WriteTo writes a INI representation of the document // WriteTo writes a INI representation of the document
// onto the provided writer. // onto the provided writer.
func (doc *Document) WriteTo(w io.Writer) (int64, error) { func (doc *Document) WriteTo(w io.Writer) (int64, error) {
@@ -114,9 +102,3 @@ func (doc *Document) String() string {
buf := doc.AsBuffer(WriteNewLine) buf := doc.AsBuffer(WriteNewLine)
return buf.String() return buf.String()
} }
// GoString generates a string output for "%#v"
func (Document) GoString() string {
var buf bytes.Buffer
return buf.String()
}
-49
View File
@@ -1,49 +0,0 @@
package ini
import (
"bytes"
"io"
"strings"
"asciigoat.org/core"
"asciigoat.org/ini/parser"
)
// Decoder ...
type Decoder struct {
io.Closer
p *parser.Parser
}
// Decode ...
func (dec *Decoder) Decode() error {
defer dec.Close()
return dec.p.Run()
}
// NewDecoder creates a Decoder over the provided [io.Reader]
func NewDecoder(r io.Reader) *Decoder {
rc := core.NewReadCloser(r)
switch {
case rc == nil:
return nil
default:
dec := &Decoder{
p: parser.NewParser(rc),
Closer: rc,
}
return dec
}
}
// NewDecoderBytes creates a Decoder over a provided bytes array
func NewDecoderBytes(b []byte) *Decoder {
return NewDecoder(bytes.NewReader(b))
}
// NewDecoderString creates a Decoder over a provided string of data
func NewDecoderString(s string) *Decoder {
return NewDecoder(strings.NewReader(s))
}
+88
View File
@@ -0,0 +1,88 @@
package parser
import (
"strings"
"asciigoat.org/core/lexer"
)
type commaArrayParser struct {
TextParser
out []string
}
func (p *commaArrayParser) lexStart() (lexer.StateFn, error) {
for {
r, _, err := p.ReadRune()
switch {
case err != nil:
// EOF
return nil, err
case r == RuneQuotes:
// Quoted Value
return p.lexQuotedString, nil
case IsNewLine(r):
// new lines are acceptable when parsing a string for
// comma delimited arrays. but make sure we discard it
// complete
p.UnreadRune()
p.AcceptNewLine()
p.Discard()
case lexer.IsSpace(r):
// discard whitespace outside quotes
p.Discard()
default:
p.UnreadRune()
return p.lexWord, nil
}
}
}
func (p *commaArrayParser) lexWord() (lexer.StateFn, error) {
for {
r, _, err := p.ReadRune()
switch {
case err != nil:
// done. store what we got and move on
_, s := p.Emit()
p.out = append(p.out, s)
return nil, err
case r == ',':
// done
_, s := p.Emit()
// remove comma, trim and append to output
s = strings.TrimRightFunc(s[:len(s)-1], IsSpace)
p.out = append(p.out, s)
return p.lexStart, nil
}
}
}
func (p *commaArrayParser) lexQuotedString() (lexer.StateFn, error) {
s, err := lexQuotedString(&p.TextParser)
if err != nil {
return nil, err
}
p.Discard()
p.out = append(p.out, s)
return p.lexStart, nil
}
func (p *commaArrayParser) Run() ([]string, error) {
err := lexer.Run(p.lexStart)
return p.out, err
}
// SplitCommaArray splits comma separated strings, removing whitespace
// and respecting quoted literals.
func SplitCommaArray(s string) ([]string, error) {
if s != "" {
var p commaArrayParser
p.InitString(s)
return p.Run()
}
return nil, nil
}
+62
View File
@@ -0,0 +1,62 @@
package parser
import (
"io/fs"
"asciigoat.org/core/lexer"
)
// NewError creates a lexer.Error using a lexer.Position
func NewError(pos lexer.Position, content, hint string, err error) *lexer.Error {
return &lexer.Error{
Line: pos.Line,
Column: pos.Column,
Content: content,
Hint: hint,
Err: err,
}
}
// ErrPlusPosition returns a copy of the given [lexer.Error]
// offsetting the Line/Column information.
func ErrPlusPosition(pos lexer.Position, e *lexer.Error) *lexer.Error {
pos.Add(lexer.Position{
Line: e.Line,
Column: e.Column,
})
return NewError(pos, e.Content, e.Hint, e.Err)
}
// NewErrIncompleteQuotedString returns a [lexer.Error]
// indicating the quoted string being parsed wasn't correctly
// terminated
func NewErrIncompleteQuotedString(p *TextParser) *lexer.Error {
return newErrIncomplete(p, "incomplete quoted string")
}
// NewErrIncompleteEscaped returns a [lexer.Error]
// indicating the text being parsed wasn't correctly
// terminated
func NewErrIncompleteEscaped(p *TextParser) *lexer.Error {
return newErrIncomplete(p, "incomplete escaped string")
}
func newErrIncomplete(p *TextParser, hint string) *lexer.Error {
pos, s := p.Emit()
pos.Add(GetPositionalLength(s))
return NewError(pos, s, hint, fs.ErrInvalid)
}
// NewErrInvalidEscapeSequence returns a [lexer.Error] indicating
// the specified sequence, at the end of the accepted buffer,
// is invalid
func NewErrInvalidEscapeSequence(p *TextParser, seq string) *lexer.Error {
pos, s := p.Position(), p.String()
s = s[:len(s)-len(seq)]
pos.Add(GetPositionalLength(s))
return NewError(pos, seq, "invalid escape character", fs.ErrInvalid)
}
+9 -5
View File
@@ -7,11 +7,13 @@ import (
) )
const ( const (
RuneComment = ';' // RuneComment is the standard dosini comment character RuneComment = ';' // RuneComment is the standard INI comment character
RuneCommentExtra = '#' // RuneCommentExtra is UNIX shell's comment character RuneCommentExtra = '#' // RuneCommentExtra is UNIX shell's comment character
RuneSectionStart = '[' // RuneSectionStart indicates the start of a section declaration RuneSectionStart = '[' // RuneSectionStart indicates the start of a section declaration
RuneSectionEnd = ']' // RuneSectionEnd indiciates the end of a section declaration RuneSectionEnd = ']' // RuneSectionEnd indicates the end of a section declaration
RuneFieldEqual = '=' // RuneFieldEqual separates field keys from their values RuneFieldEqual = '=' // RuneFieldEqual separates field keys from their values
RuneQuotes = '"' // RuneQuotes indicates the start and end of a quoted value
RuneEscape = '\\' // RuneEscape indicates the next rune is escaped
) )
var ( var (
@@ -27,6 +29,8 @@ var (
RuneSectionStart, RuneSectionStart,
RuneSectionEnd, RuneSectionEnd,
RuneFieldEqual, RuneFieldEqual,
RuneQuotes,
RuneEscape,
}) })
) )
+1 -7
View File
@@ -31,13 +31,7 @@ func defaultOnToken(pos lexer.Position, typ TokenType, value string) error {
func defaultOnError(pos lexer.Position, content string, err error) error { func defaultOnError(pos lexer.Position, content string, err error) error {
log.Printf("%s:%v:%v: %q: %s", "error", pos.Line, pos.Column, content, err) log.Printf("%s:%v:%v: %q: %s", "error", pos.Line, pos.Column, content, err)
return lexer.Error{ return NewError(pos, content, "", err)
Line: pos.Line,
Column: pos.Column,
Content: content,
Err: err,
}
} }
func (p *Parser) setDefaults() { func (p *Parser) setDefaults() {
+135
View File
@@ -0,0 +1,135 @@
package parser
import (
"strings"
"asciigoat.org/core/lexer"
)
// AcceptQuotedString consumes a quoted string from the source
// and returns it unquoted and unescaped
func (p *TextParser) AcceptQuotedString() (string, bool, error) {
r, _, err := p.ReadRune()
switch {
case err != nil:
// nothing here
return "", false, err
case r != RuneQuotes:
// not for us
p.UnreadRune()
return "", false, nil
default:
// let's roll
s, err := lexQuotedString(p)
switch {
case err != nil:
// bad quoted string
return "", false, err
default:
// success
return s, true, nil
}
}
}
func lexQuotedString(p *TextParser) (string, *lexer.Error) {
s, ok, err := lexQuotedStringNoEscape(p)
switch {
case err != nil:
return "", err
case ok:
return s, nil
default:
// escape character detected
return lexQuotedStringEscaped(p)
}
}
func lexQuotedStringNoEscape(p *TextParser) (string, bool, *lexer.Error) {
for {
r, _, err := p.ReadRune()
switch {
case err != nil:
// incomplete
return "", false, NewErrIncompleteQuotedString(p)
case r == RuneQuotes:
// end, just remove the quotes
s := p.String()
l := len(s)
return s[1 : l-1], true, nil
case r == RuneEscape:
// things just got complicated...
p.UnreadRune()
return "", false, nil
case IsNewLine(r):
// new lines within quoted values are acceptable
p.UnreadRune()
p.AcceptNewLine()
default:
// continue
}
}
}
// Unquoted removes quotes the content and unescapes the content
func lexQuotedStringEscaped(p *TextParser) (string, *lexer.Error) {
var result strings.Builder
// append what was accepted before the escape character
_, _ = result.WriteString(p.String()[1:])
for {
r, _, err := p.ReadRune()
switch {
case err != nil:
// incomplete quoted
return "", NewErrIncompleteQuotedString(p)
case r == RuneQuotes:
// end
return result.String(), nil
case r == RuneEscape:
// escaped
r2, _, err := p.ReadRune()
switch {
case err != nil:
// incomplete escaped
return "", NewErrIncompleteEscaped(p)
case IsNewLine(r2):
// escaped new line, skip
p.UnreadRune()
p.AcceptNewLine()
default:
// TODO: check valid escape character and
// append to result
s := string([]rune{r, r2})
err := NewErrInvalidEscapeSequence(p, s)
return "", err
}
default:
// normal, append to result
_, _ = result.WriteRune(r)
}
}
}
// Unquoted removes quotes and unescapes the content
func Unquoted(s string) (string, error) {
var p TextParser
if s == "" {
return "", nil
}
p.InitString(s)
unquoted, ok, err := p.AcceptQuotedString()
switch {
case err != nil:
// bad string
return "", err
case ok:
// success
return unquoted, nil
default:
// not quoted
return s, nil
}
}
-15
View File
@@ -1,15 +0,0 @@
package ini
import "io"
// ReadInto ...
func ReadInto(v any, r io.Reader) error {
dec := NewDecoder(r)
return dec.Unmarshal(v)
}
// Unmarshal ...
func (dec *Decoder) Unmarshal(any) error {
return dec.p.Run()
}