Browse Source

parser: Unescaped [WIP]

Signed-off-by: Alejandro Mery <amery@jpi.io>
dev-amery-parser
Alejandro Mery 1 year ago
parent
commit
3bf20948c0
  1. 19
      parser/error.go
  2. 78
      parser/text_quoted.go

19
parser/error.go

@ -35,9 +35,28 @@ func NewErrIncompleteQuotedString(p *TextParser) *lexer.Error {
return newErrIncomplete(p, "incomplete quoted string")
}
// NewErrIncompleteEscaped returns a [lexer.Error]
// indicating the text being parsed wasn't correctly
// terminated
func NewErrIncompleteEscaped(p *TextParser) *lexer.Error {
return newErrIncomplete(p, "incomplete escaped string")
}
func newErrIncomplete(p *TextParser, hint string) *lexer.Error {
pos, s := p.Emit()
pos.Add(GetPositionalLength(s))
return NewError(pos, s, hint, fs.ErrInvalid)
}
// NewErrInvalidEscapeSequence returns a [lexer.Error] indicating
// the specified sequence, at the end of the accepted buffer,
// is invalid
func NewErrInvalidEscapeSequence(p *TextParser, seq string) *lexer.Error {
pos, s := p.Position(), p.String()
s = s[:len(s)-len(seq)]
pos.Add(GetPositionalLength(s))
return NewError(pos, seq, "invalid escape character", fs.ErrInvalid)
}

78
parser/text_quoted.go

@ -33,22 +33,34 @@ func (p *TextParser) AcceptQuotedString() (string, bool, error) {
}
func lexQuotedString(p *TextParser) (string, *lexer.Error) {
s, ok, err := lexQuotedStringNoEscape(p)
switch {
case err != nil:
return "", err
case ok:
return s, nil
default:
// escape character detected
return lexQuotedStringEscaped(p)
}
}
func lexQuotedStringNoEscape(p *TextParser) (string, bool, *lexer.Error) {
for {
r, _, err := p.ReadRune()
switch {
case err != nil:
// incomplete
return "", NewErrIncompleteQuotedString(p)
return "", false, NewErrIncompleteQuotedString(p)
case r == RuneQuotes:
// end, remove quotes and process escaped characters
return lexReturnUnescapedQuotedString(p)
// end, just remove the quotes
s := p.String()
l := len(s)
return s[1 : l-1], true, nil
case r == RuneEscape:
// escaped, take another
_, _, err := p.ReadRune()
if err != nil {
// incomplete
return "", NewErrIncompleteQuotedString(p)
}
// things just got complicated...
p.UnreadRune()
return "", false, nil
case IsNewLine(r):
// new lines within quoted values are acceptable
p.UnreadRune()
@ -59,22 +71,48 @@ func lexQuotedString(p *TextParser) (string, *lexer.Error) {
}
}
func lexReturnUnescapedQuotedString(p *TextParser) (string, *lexer.Error) {
// remove quotes
s := p.String()
l := len(s)
s = s[1 : l-1]
// Unquoted removes quotes the content and unescapes the content
func lexQuotedStringEscaped(p *TextParser) (string, *lexer.Error) {
var result strings.Builder
if strings.ContainsRune(s, RuneEscape) {
// TODO: implement unescaping
err := NewError(p.Position(), s, "escaped characters", lexer.ErrNotImplemented)
// append what was accepted before the escape character
_, _ = result.WriteString(p.String()[1:])
for {
r, _, err := p.ReadRune()
switch {
case err != nil:
// incomplete quoted
return "", NewErrIncompleteQuotedString(p)
case r == RuneQuotes:
// end
return result.String(), nil
case r == RuneEscape:
// escaped
r2, _, err := p.ReadRune()
switch {
case err != nil:
// incomplete escaped
return "", NewErrIncompleteEscaped(p)
case IsNewLine(r2):
// escaped new line, skip
p.UnreadRune()
p.AcceptNewLine()
default:
// TODO: check valid escape character and
// append to result
s := string([]rune{r, r2})
err := NewErrInvalidEscapeSequence(p, s)
return "", err
}
return s, nil
default:
// normal, append to result
_, _ = result.WriteRune(r)
}
}
}
// Unquoted removes quotes the content and unescapes the content
// Unquoted removes quotes and unescapes the content
func Unquoted(s string) (string, error) {
var p TextParser
if s == "" {

Loading…
Cancel
Save