parser: Unquoted(), AcceptQuotedString() and SplitCommaArray #9
Merged
amery
merged 2 commits from pr-amery-parser-quoted
into main
1 year ago
4 changed files with 210 additions and 5 deletions
@ -0,0 +1,88 @@
|
||||
package parser |
||||
|
||||
import ( |
||||
"strings" |
||||
|
||||
"asciigoat.org/core/lexer" |
||||
) |
||||
|
||||
type commaArrayParser struct { |
||||
TextParser |
||||
|
||||
out []string |
||||
} |
||||
|
||||
func (p *commaArrayParser) lexStart() (lexer.StateFn, error) { |
||||
for { |
||||
r, _, err := p.ReadRune() |
||||
switch { |
||||
case err != nil: |
||||
// EOF
|
||||
return nil, err |
||||
case r == RuneQuotes: |
||||
// Quoted Value
|
||||
return p.lexQuotedString, nil |
||||
case IsNewLine(r): |
||||
// new lines are acceptable when parsing a string for
|
||||
// comma delimited arrays. but make sure we discard it
|
||||
// complete
|
||||
p.UnreadRune() |
||||
p.AcceptNewLine() |
||||
p.Discard() |
||||
case lexer.IsSpace(r): |
||||
// discard whitespace outside quotes
|
||||
p.Discard() |
||||
default: |
||||
p.UnreadRune() |
||||
return p.lexWord, nil |
||||
} |
||||
} |
||||
} |
||||
|
||||
func (p *commaArrayParser) lexWord() (lexer.StateFn, error) { |
||||
for { |
||||
r, _, err := p.ReadRune() |
||||
switch { |
||||
case err != nil: |
||||
// done. store what we got and move on
|
||||
_, s := p.Emit() |
||||
p.out = append(p.out, s) |
||||
return nil, err |
||||
case r == ',': |
||||
// done
|
||||
_, s := p.Emit() |
||||
// remove comma, trim and append to output
|
||||
s = strings.TrimRightFunc(s[:len(s)-1], IsSpace) |
||||
p.out = append(p.out, s) |
||||
return p.lexStart, nil |
||||
} |
||||
} |
||||
} |
||||
|
||||
func (p *commaArrayParser) lexQuotedString() (lexer.StateFn, error) { |
||||
s, err := lexQuotedString(&p.TextParser) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
|
||||
p.Discard() |
||||
p.out = append(p.out, s) |
||||
return p.lexStart, nil |
||||
} |
||||
|
||||
func (p *commaArrayParser) Run() ([]string, error) { |
||||
err := lexer.Run(p.lexStart) |
||||
|
||||
return p.out, err |
||||
} |
||||
|
||||
// SplitCommaArray splits comma separated strings, removing whitespace
|
||||
// and respecting quoted literals.
|
||||
func SplitCommaArray(s string) ([]string, error) { |
||||
if s != "" { |
||||
var p commaArrayParser |
||||
p.InitString(s) |
||||
return p.Run() |
||||
} |
||||
return nil, nil |
||||
} |
@ -0,0 +1,97 @@
|
||||
package parser |
||||
|
||||
import ( |
||||
"strings" |
||||
|
||||
"asciigoat.org/core/lexer" |
||||
) |
||||
|
||||
// AcceptQuotedString consumes a quoted string from the source
|
||||
// and returns it unquoted and unescaped
|
||||
func (p *TextParser) AcceptQuotedString() (string, bool, error) { |
||||
r, _, err := p.ReadRune() |
||||
switch { |
||||
case err != nil: |
||||
// nothing here
|
||||
return "", false, err |
||||
case r != RuneQuotes: |
||||
// not for us
|
||||
p.UnreadRune() |
||||
return "", false, nil |
||||
default: |
||||
// let's roll
|
||||
s, err := lexQuotedString(p) |
||||
switch { |
||||
case err != nil: |
||||
// bad quoted string
|
||||
return "", false, err |
||||
default: |
||||
// success
|
||||
return s, true, nil |
||||
} |
||||
} |
||||
} |
||||
|
||||
func lexQuotedString(p *TextParser) (string, *lexer.Error) { |
||||
for { |
||||
r, _, err := p.ReadRune() |
||||
switch { |
||||
case err != nil: |
||||
// incomplete
|
||||
return "", NewErrIncompleteQuotedString(p) |
||||
case r == RuneQuotes: |
||||
// end, remove quotes and process escaped characters
|
||||
return lexReturnUnescapedQuotedString(p) |
||||
case r == RuneEscape: |
||||
// escaped, take another
|
||||
_, _, err := p.ReadRune() |
||||
if err != nil { |
||||
// incomplete
|
||||
return "", NewErrIncompleteQuotedString(p) |
||||
} |
||||
case IsNewLine(r): |
||||
// new lines within quoted values are acceptable
|
||||
p.UnreadRune() |
||||
p.AcceptNewLine() |
||||
default: |
||||
// continue
|
||||
} |
||||
} |
||||
} |
||||
|
||||
func lexReturnUnescapedQuotedString(p *TextParser) (string, *lexer.Error) { |
||||
// remove quotes
|
||||
s := p.String() |
||||
l := len(s) |
||||
s = s[1 : l-1] |
||||
|
||||
if strings.ContainsRune(s, RuneEscape) { |
||||
// TODO: implement unescaping
|
||||
err := NewError(p.Position(), s, "escaped characters", lexer.ErrNotImplemented) |
||||
return "", err |
||||
} |
||||
|
||||
return s, nil |
||||
} |
||||
|
||||
// Unquoted removes quotes the content and unescapes the content
|
||||
func Unquoted(s string) (string, error) { |
||||
var p TextParser |
||||
if s == "" { |
||||
return "", nil |
||||
} |
||||
|
||||
p.InitString(s) |
||||
unquoted, ok, err := p.AcceptQuotedString() |
||||
switch { |
||||
case err != nil: |
||||
// bad string
|
||||
return "", err |
||||
case ok: |
||||
// success
|
||||
return unquoted, nil |
||||
default: |
||||
// not quoted
|
||||
return s, nil |
||||
} |
||||
} |
Loading…
Reference in new issue