parser: Unquoted(), AcceptQuotedString() and SplitCommaArray #9
@@ -0,0 +1,88 @@
|
||||
package parser
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
"asciigoat.org/core/lexer"
|
||||
)
|
||||
|
||||
type commaArrayParser struct {
|
||||
TextParser
|
||||
|
||||
out []string
|
||||
}
|
||||
|
||||
func (p *commaArrayParser) lexStart() (lexer.StateFn, error) {
|
||||
for {
|
||||
r, _, err := p.ReadRune()
|
||||
switch {
|
||||
case err != nil:
|
||||
// EOF
|
||||
return nil, err
|
||||
case r == RuneQuotes:
|
||||
// Quoted Value
|
||||
return p.lexQuotedString, nil
|
||||
case IsNewLine(r):
|
||||
// new lines are acceptable when parsing a string for
|
||||
// comma delimited arrays. but make sure we discard it
|
||||
// complete
|
||||
p.UnreadRune()
|
||||
p.AcceptNewLine()
|
||||
p.Discard()
|
||||
case lexer.IsSpace(r):
|
||||
// discard whitespace outside quotes
|
||||
p.Discard()
|
||||
default:
|
||||
p.UnreadRune()
|
||||
return p.lexWord, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (p *commaArrayParser) lexWord() (lexer.StateFn, error) {
|
||||
for {
|
||||
r, _, err := p.ReadRune()
|
||||
switch {
|
||||
case err != nil:
|
||||
// done. store what we got and move on
|
||||
_, s := p.Emit()
|
||||
p.out = append(p.out, s)
|
||||
return nil, err
|
||||
case r == ',':
|
||||
// done
|
||||
_, s := p.Emit()
|
||||
// remove comma, trim and append to output
|
||||
s = strings.TrimRightFunc(s[:len(s)-1], IsSpace)
|
||||
p.out = append(p.out, s)
|
||||
return p.lexStart, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (p *commaArrayParser) lexQuotedString() (lexer.StateFn, error) {
|
||||
s, err := lexQuotedString(&p.TextParser)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
p.Discard()
|
||||
p.out = append(p.out, s)
|
||||
return p.lexStart, nil
|
||||
}
|
||||
|
||||
func (p *commaArrayParser) Run() ([]string, error) {
|
||||
err := lexer.Run(p.lexStart)
|
||||
|
||||
return p.out, err
|
||||
}
|
||||
|
||||
// SplitCommaArray splits comma separated strings, removing whitespace
|
||||
// and respecting quoted literals.
|
||||
func SplitCommaArray(s string) ([]string, error) {
|
||||
if s != "" {
|
||||
var p commaArrayParser
|
||||
p.InitString(s)
|
||||
return p.Run()
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
@@ -1,6 +1,8 @@
|
||||
package parser
|
||||
|
||||
import (
|
||||
"io/fs"
|
||||
|
||||
"asciigoat.org/core/lexer"
|
||||
)
|
||||
|
||||
@@ -25,3 +27,17 @@ func ErrPlusPosition(pos lexer.Position, e *lexer.Error) *lexer.Error {
|
||||
|
||||
return NewError(pos, e.Content, e.Hint, e.Err)
|
||||
}
|
||||
|
||||
// NewErrIncompleteQuotedString returns a [lexer.Error]
|
||||
// indicating the quoted string being parsed wasn't correctly
|
||||
// terminated
|
||||
func NewErrIncompleteQuotedString(p *TextParser) *lexer.Error {
|
||||
return newErrIncomplete(p, "incomplete quoted string")
|
||||
}
|
||||
|
||||
func newErrIncomplete(p *TextParser, hint string) *lexer.Error {
|
||||
pos, s := p.Emit()
|
||||
pos.Add(GetPositionalLength(s))
|
||||
|
||||
return NewError(pos, s, hint, fs.ErrInvalid)
|
||||
}
|
||||
|
||||
@@ -7,11 +7,13 @@ import (
|
||||
)
|
||||
|
||||
const (
|
||||
RuneComment = ';' // RuneComment is the standard dosini comment character
|
||||
RuneCommentExtra = '#' // RuneCommentExtra is UNIX shell's comment character
|
||||
RuneSectionStart = '[' // RuneSectionStart indicates the start of a section declaration
|
||||
RuneSectionEnd = ']' // RuneSectionEnd indiciates the end of a section declaration
|
||||
RuneFieldEqual = '=' // RuneFieldEqual separates field keys from their values
|
||||
RuneComment = ';' // RuneComment is the standard INI comment character
|
||||
RuneCommentExtra = '#' // RuneCommentExtra is UNIX shell's comment character
|
||||
RuneSectionStart = '[' // RuneSectionStart indicates the start of a section declaration
|
||||
RuneSectionEnd = ']' // RuneSectionEnd indicates the end of a section declaration
|
||||
RuneFieldEqual = '=' // RuneFieldEqual separates field keys from their values
|
||||
RuneQuotes = '"' // RuneQuotes indicates the start and end of a quoted value
|
||||
RuneEscape = '\\' // RuneEscape indicates the next rune is escaped
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -27,6 +29,8 @@ var (
|
||||
RuneSectionStart,
|
||||
RuneSectionEnd,
|
||||
RuneFieldEqual,
|
||||
RuneQuotes,
|
||||
RuneEscape,
|
||||
})
|
||||
)
|
||||
|
||||
|
||||
@@ -0,0 +1,97 @@
|
||||
package parser
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
"asciigoat.org/core/lexer"
|
||||
)
|
||||
|
||||
// AcceptQuotedString consumes a quoted string from the source
|
||||
// and returns it unquoted and unescaped
|
||||
func (p *TextParser) AcceptQuotedString() (string, bool, error) {
|
||||
r, _, err := p.ReadRune()
|
||||
switch {
|
||||
case err != nil:
|
||||
// nothing here
|
||||
return "", false, err
|
||||
case r != RuneQuotes:
|
||||
// not for us
|
||||
p.UnreadRune()
|
||||
return "", false, nil
|
||||
default:
|
||||
// let's roll
|
||||
s, err := lexQuotedString(p)
|
||||
switch {
|
||||
case err != nil:
|
||||
// bad quoted string
|
||||
return "", false, err
|
||||
default:
|
||||
// success
|
||||
return s, true, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func lexQuotedString(p *TextParser) (string, *lexer.Error) {
|
||||
for {
|
||||
r, _, err := p.ReadRune()
|
||||
switch {
|
||||
case err != nil:
|
||||
// incomplete
|
||||
return "", NewErrIncompleteQuotedString(p)
|
||||
case r == RuneQuotes:
|
||||
// end, remove quotes and process escaped characters
|
||||
return lexReturnUnescapedQuotedString(p)
|
||||
case r == RuneEscape:
|
||||
// escaped, take another
|
||||
_, _, err := p.ReadRune()
|
||||
if err != nil {
|
||||
// incomplete
|
||||
return "", NewErrIncompleteQuotedString(p)
|
||||
}
|
||||
case IsNewLine(r):
|
||||
// new lines within quoted values are acceptable
|
||||
p.UnreadRune()
|
||||
p.AcceptNewLine()
|
||||
default:
|
||||
// continue
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func lexReturnUnescapedQuotedString(p *TextParser) (string, *lexer.Error) {
|
||||
// remove quotes
|
||||
s := p.String()
|
||||
l := len(s)
|
||||
s = s[1 : l-1]
|
||||
|
||||
if strings.ContainsRune(s, RuneEscape) {
|
||||
// TODO: implement unescaping
|
||||
err := NewError(p.Position(), s, "escaped characters", lexer.ErrNotImplemented)
|
||||
return "", err
|
||||
}
|
||||
|
||||
return s, nil
|
||||
}
|
||||
|
||||
// Unquoted removes quotes the content and unescapes the content
|
||||
func Unquoted(s string) (string, error) {
|
||||
var p TextParser
|
||||
if s == "" {
|
||||
return "", nil
|
||||
}
|
||||
|
||||
p.InitString(s)
|
||||
unquoted, ok, err := p.AcceptQuotedString()
|
||||
switch {
|
||||
case err != nil:
|
||||
// bad string
|
||||
return "", err
|
||||
case ok:
|
||||
// success
|
||||
return unquoted, nil
|
||||
default:
|
||||
// not quoted
|
||||
return s, nil
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user