10 Commits

Author SHA1 Message Date
amery 986b6d1c6d Merge pull request 'parser: Unquoted(), AcceptQuotedString() and SplitCommaArray' (#9)
Reviewed-on: #9
2023-09-05 15:20:38 +02:00
amery d41cd781d9 parser: introduce SplitCommaArray to splits comma separated strings
removing whitespace and respecting quoted literals.

Signed-off-by: Alejandro Mery <amery@jpi.io>
2023-09-04 18:58:06 +00:00
amery 651fcb6215 parser: Unquoted(), AcceptQuotedString()
TODO: reduce quoted strings with escaped characters

Signed-off-by: Alejandro Mery <amery@jpi.io>
2023-09-04 18:58:06 +00:00
amery d8af7821e4 Merge pull request 'parser: introduce NewError() and ErrPlusPosition()' (#8)
Reviewed-on: #8
2023-09-04 19:33:24 +02:00
amery 8f3e59ec36 parser: introduce ErrPlusPosition to apply a position offset to a lexer.Error
Signed-off-by: Alejandro Mery <amery@jpi.io>
2023-09-04 15:25:20 +00:00
amery d316031c44 basic: cleanup using parser.NewError()
Signed-off-by: Alejandro Mery <amery@jpi.io>
2023-09-04 15:25:20 +00:00
amery c3883cbb0d parser: introduce NewError() to create lexer.Error using lexer.Position
Signed-off-by: Alejandro Mery <amery@jpi.io>
2023-09-04 15:16:43 +00:00
amery 314c004efd Merge pull request 'parser: introduce TextParser and refactor Parser' (#7)
Reviewed-on: #7
2023-09-04 16:17:04 +02:00
amery 30a86e170b parser: use GetPositionalLength() on TextParser.Discard() and TextParser.Emit()
Signed-off-by: Alejandro Mery <amery@jpi.io>
2023-09-04 13:32:27 +00:00
amery 8cc75da138 parser: introduce GetPositionalLength()
Signed-off-by: Alejandro Mery <amery@jpi.io>
2023-09-03 17:43:46 +00:00
10 changed files with 293 additions and 34 deletions
+3 -14
View File
@@ -4,6 +4,7 @@ import (
"errors"
"asciigoat.org/core/lexer"
"asciigoat.org/ini/parser"
)
var (
@@ -11,23 +12,11 @@ var (
)
func newErrInvalidToken(t *token) *lexer.Error {
err := &lexer.Error{
Line: t.pos.Line,
Column: t.pos.Column,
Content: t.value,
Err: errInvalidToken,
}
return err
return parser.NewError(t.pos, t.value, "", errInvalidToken)
}
func (dec *decoder) OnError(pos lexer.Position, content string, err error) error {
err = &lexer.Error{
Line: pos.Line,
Column: pos.Column,
Content: content,
Err: err,
}
err = parser.NewError(pos, content, "", err)
dec.executeFinal()
return err
}
+1 -1
View File
@@ -3,7 +3,7 @@ module asciigoat.org/ini
go 1.19
require (
asciigoat.org/core v0.3.7
asciigoat.org/core v0.3.9
github.com/mgechev/revive v1.3.3
golang.org/x/tools v0.12.0
)
+2 -2
View File
@@ -1,5 +1,5 @@
asciigoat.org/core v0.3.7 h1:tMasdvZgsMJJMVsZVfXXB5lqq82pFiCsyEmOEmcmAfI=
asciigoat.org/core v0.3.7/go.mod h1:tXj+JUutxRbcO40ZQRuUVaZ4rnYz1kAZ0nblisV8u74=
asciigoat.org/core v0.3.9 h1:hgDDz4ecm3ZvehX++m8A/IzAt+B5oDPiRtxatzfUHPQ=
asciigoat.org/core v0.3.9/go.mod h1:CAaHwyw8MpAq4a1MYtN2dxJrsK+hmIdW50OndaQZYPI=
github.com/BurntSushi/toml v1.3.2 h1:o7IhLm0Msx3BaB+n3Ag7L8EVlByGnpq14C4YWiu/gL8=
github.com/BurntSushi/toml v1.3.2/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ=
github.com/chavacava/garif v0.1.0 h1:2JHa3hbYf5D9dsgseMKAmc/MZ109otzgNFk5s87H9Pc=
+88
View File
@@ -0,0 +1,88 @@
package parser
import (
"strings"
"asciigoat.org/core/lexer"
)
type commaArrayParser struct {
TextParser
out []string
}
func (p *commaArrayParser) lexStart() (lexer.StateFn, error) {
for {
r, _, err := p.ReadRune()
switch {
case err != nil:
// EOF
return nil, err
case r == RuneQuotes:
// Quoted Value
return p.lexQuotedString, nil
case IsNewLine(r):
// new lines are acceptable when parsing a string for
// comma delimited arrays. but make sure we discard it
// complete
p.UnreadRune()
p.AcceptNewLine()
p.Discard()
case lexer.IsSpace(r):
// discard whitespace outside quotes
p.Discard()
default:
p.UnreadRune()
return p.lexWord, nil
}
}
}
func (p *commaArrayParser) lexWord() (lexer.StateFn, error) {
for {
r, _, err := p.ReadRune()
switch {
case err != nil:
// done. store what we got and move on
_, s := p.Emit()
p.out = append(p.out, s)
return nil, err
case r == ',':
// done
_, s := p.Emit()
// remove comma, trim and append to output
s = strings.TrimRightFunc(s[:len(s)-1], IsSpace)
p.out = append(p.out, s)
return p.lexStart, nil
}
}
}
func (p *commaArrayParser) lexQuotedString() (lexer.StateFn, error) {
s, err := lexQuotedString(&p.TextParser)
if err != nil {
return nil, err
}
p.Discard()
p.out = append(p.out, s)
return p.lexStart, nil
}
func (p *commaArrayParser) Run() ([]string, error) {
err := lexer.Run(p.lexStart)
return p.out, err
}
// SplitCommaArray splits comma separated strings, removing whitespace
// and respecting quoted literals.
func SplitCommaArray(s string) ([]string, error) {
if s != "" {
var p commaArrayParser
p.InitString(s)
return p.Run()
}
return nil, nil
}
+43
View File
@@ -0,0 +1,43 @@
package parser
import (
"io/fs"
"asciigoat.org/core/lexer"
)
// NewError creates a lexer.Error using a lexer.Position
func NewError(pos lexer.Position, content, hint string, err error) *lexer.Error {
return &lexer.Error{
Line: pos.Line,
Column: pos.Column,
Content: content,
Hint: hint,
Err: err,
}
}
// ErrPlusPosition returns a copy of the given [lexer.Error]
// offsetting the Line/Column information.
func ErrPlusPosition(pos lexer.Position, e *lexer.Error) *lexer.Error {
pos.Add(lexer.Position{
Line: e.Line,
Column: e.Column,
})
return NewError(pos, e.Content, e.Hint, e.Err)
}
// NewErrIncompleteQuotedString returns a [lexer.Error]
// indicating the quoted string being parsed wasn't correctly
// terminated
func NewErrIncompleteQuotedString(p *TextParser) *lexer.Error {
return newErrIncomplete(p, "incomplete quoted string")
}
func newErrIncomplete(p *TextParser, hint string) *lexer.Error {
pos, s := p.Emit()
pos.Add(GetPositionalLength(s))
return NewError(pos, s, hint, fs.ErrInvalid)
}
+14 -5
View File
@@ -7,11 +7,13 @@ import (
)
const (
RuneComment = ';' // RuneComment is the standard dosini comment character
RuneCommentExtra = '#' // RuneCommentExtra is UNIX shell's comment character
RuneSectionStart = '[' // RuneSectionStart indicates the start of a section declaration
RuneSectionEnd = ']' // RuneSectionEnd indiciates the end of a section declaration
RuneFieldEqual = '=' // RuneFieldEqual separates field keys from their values
RuneComment = ';' // RuneComment is the standard INI comment character
RuneCommentExtra = '#' // RuneCommentExtra is UNIX shell's comment character
RuneSectionStart = '[' // RuneSectionStart indicates the start of a section declaration
RuneSectionEnd = ']' // RuneSectionEnd indicates the end of a section declaration
RuneFieldEqual = '=' // RuneFieldEqual separates field keys from their values
RuneQuotes = '"' // RuneQuotes indicates the start and end of a quoted value
RuneEscape = '\\' // RuneEscape indicates the next rune is escaped
)
var (
@@ -27,6 +29,8 @@ var (
RuneSectionStart,
RuneSectionEnd,
RuneFieldEqual,
RuneQuotes,
RuneEscape,
})
)
@@ -43,6 +47,11 @@ var (
IsCommentStart = lexer.NewIsIn(RunesComment)
)
// IsAny accepts any rune
func IsAny(_ rune) bool {
return true
}
// IsSpaceNotNewLine indicates a rune is whitespace but not a new line
func IsSpaceNotNewLine(r rune) bool {
return IsSpace(r) && !IsNewLine(r)
+1 -7
View File
@@ -31,13 +31,7 @@ func defaultOnToken(pos lexer.Position, typ TokenType, value string) error {
func defaultOnError(pos lexer.Position, content string, err error) error {
log.Printf("%s:%v:%v: %q: %s", "error", pos.Line, pos.Column, content, err)
return lexer.Error{
Line: pos.Line,
Column: pos.Column,
Content: content,
Err: err,
}
return NewError(pos, content, "", err)
}
func (p *Parser) setDefaults() {
+6 -5
View File
@@ -39,9 +39,10 @@ func (p *TextParser) InitString(s string) {
// Discard shadows [lexer.Reader]'s, and takes in consideration
// new lines on the discarded data when moving the position
func (*TextParser) Discard() {
// TODO: consider new lines
panic("not implemented")
func (p *TextParser) Discard() {
s := p.Reader.Emit()
l := GetPositionalLength(s)
p.pos.Add(l)
}
// Emit returns the accepted text, its position, and
@@ -49,8 +50,8 @@ func (*TextParser) Discard() {
func (p *TextParser) Emit() (lexer.Position, string) {
pos := p.pos
s := p.Reader.Emit()
// TODO: consider new lines
p.pos.StepN(len(s))
l := GetPositionalLength(s)
p.pos.Add(l)
return pos, s
}
+38
View File
@@ -0,0 +1,38 @@
package parser
import (
"io"
"asciigoat.org/core/lexer"
)
type positionLengthParser struct {
TextParser
lexer.Position
}
func (p *positionLengthParser) lexStart() (lexer.StateFn, error) {
for {
switch {
case p.AcceptNewLine():
p.Position.StepLine()
case p.Accept(IsAny):
p.Position.StepN(1)
default:
return nil, io.EOF
}
}
}
// GetPositionalLength calculates the [lexer.Position] at
// the end of a text.
func GetPositionalLength(s string) lexer.Position {
var p positionLengthParser
if s == "" {
p.InitString(s)
_ = lexer.Run(p.lexStart)
}
return p.Position
}
+97
View File
@@ -0,0 +1,97 @@
package parser
import (
"strings"
"asciigoat.org/core/lexer"
)
// AcceptQuotedString consumes a quoted string from the source
// and returns it unquoted and unescaped
func (p *TextParser) AcceptQuotedString() (string, bool, error) {
r, _, err := p.ReadRune()
switch {
case err != nil:
// nothing here
return "", false, err
case r != RuneQuotes:
// not for us
p.UnreadRune()
return "", false, nil
default:
// let's roll
s, err := lexQuotedString(p)
switch {
case err != nil:
// bad quoted string
return "", false, err
default:
// success
return s, true, nil
}
}
}
func lexQuotedString(p *TextParser) (string, *lexer.Error) {
for {
r, _, err := p.ReadRune()
switch {
case err != nil:
// incomplete
return "", NewErrIncompleteQuotedString(p)
case r == RuneQuotes:
// end, remove quotes and process escaped characters
return lexReturnUnescapedQuotedString(p)
case r == RuneEscape:
// escaped, take another
_, _, err := p.ReadRune()
if err != nil {
// incomplete
return "", NewErrIncompleteQuotedString(p)
}
case IsNewLine(r):
// new lines within quoted values are acceptable
p.UnreadRune()
p.AcceptNewLine()
default:
// continue
}
}
}
func lexReturnUnescapedQuotedString(p *TextParser) (string, *lexer.Error) {
// remove quotes
s := p.String()
l := len(s)
s = s[1 : l-1]
if strings.ContainsRune(s, RuneEscape) {
// TODO: implement unescaping
err := NewError(p.Position(), s, "escaped characters", lexer.ErrNotImplemented)
return "", err
}
return s, nil
}
// Unquoted removes quotes the content and unescapes the content
func Unquoted(s string) (string, error) {
var p TextParser
if s == "" {
return "", nil
}
p.InitString(s)
unquoted, ok, err := p.AcceptQuotedString()
switch {
case err != nil:
// bad string
return "", err
case ok:
// success
return unquoted, nil
default:
// not quoted
return s, nil
}
}