diff --git a/parser/error.go b/parser/error.go index ca8f19d..da80e29 100644 --- a/parser/error.go +++ b/parser/error.go @@ -35,9 +35,28 @@ func NewErrIncompleteQuotedString(p *TextParser) *lexer.Error { return newErrIncomplete(p, "incomplete quoted string") } +// NewErrIncompleteEscaped returns a [lexer.Error] +// indicating the text being parsed wasn't correctly +// terminated +func NewErrIncompleteEscaped(p *TextParser) *lexer.Error { + return newErrIncomplete(p, "incomplete escaped string") +} + func newErrIncomplete(p *TextParser, hint string) *lexer.Error { pos, s := p.Emit() pos.Add(GetPositionalLength(s)) return NewError(pos, s, hint, fs.ErrInvalid) } + +// NewErrInvalidEscapeSequence returns a [lexer.Error] indicating +// the specified sequence, at the end of the accepted buffer, +// is invalid +func NewErrInvalidEscapeSequence(p *TextParser, seq string) *lexer.Error { + pos, s := p.Position(), p.String() + + s = s[:len(s)-len(seq)] + pos.Add(GetPositionalLength(s)) + + return NewError(pos, seq, "invalid escape character", fs.ErrInvalid) +} diff --git a/parser/text_quoted.go b/parser/text_quoted.go index 59f14b3..222a5d6 100644 --- a/parser/text_quoted.go +++ b/parser/text_quoted.go @@ -33,22 +33,34 @@ func (p *TextParser) AcceptQuotedString() (string, bool, error) { } func lexQuotedString(p *TextParser) (string, *lexer.Error) { + s, ok, err := lexQuotedStringNoEscape(p) + switch { + case err != nil: + return "", err + case ok: + return s, nil + default: + // escape character detected + return lexQuotedStringEscaped(p) + } +} + +func lexQuotedStringNoEscape(p *TextParser) (string, bool, *lexer.Error) { for { r, _, err := p.ReadRune() switch { case err != nil: // incomplete - return "", NewErrIncompleteQuotedString(p) + return "", false, NewErrIncompleteQuotedString(p) case r == RuneQuotes: - // end, remove quotes and process escaped characters - return lexReturnUnescapedQuotedString(p) + // end, just remove the quotes + s := p.String() + l := len(s) + return s[1 : l-1], true, nil case r == RuneEscape: - // escaped, take another - _, _, err := p.ReadRune() - if err != nil { - // incomplete - return "", NewErrIncompleteQuotedString(p) - } + // things just got complicated... + p.UnreadRune() + return "", false, nil case IsNewLine(r): // new lines within quoted values are acceptable p.UnreadRune() @@ -59,22 +71,48 @@ func lexQuotedString(p *TextParser) (string, *lexer.Error) { } } -func lexReturnUnescapedQuotedString(p *TextParser) (string, *lexer.Error) { - // remove quotes - s := p.String() - l := len(s) - s = s[1 : l-1] +// Unquoted removes quotes the content and unescapes the content +func lexQuotedStringEscaped(p *TextParser) (string, *lexer.Error) { + var result strings.Builder - if strings.ContainsRune(s, RuneEscape) { - // TODO: implement unescaping - err := NewError(p.Position(), s, "escaped characters", lexer.ErrNotImplemented) - return "", err - } + // append what was accepted before the escape character + _, _ = result.WriteString(p.String()[1:]) - return s, nil + for { + r, _, err := p.ReadRune() + switch { + case err != nil: + // incomplete quoted + return "", NewErrIncompleteQuotedString(p) + case r == RuneQuotes: + // end + return result.String(), nil + case r == RuneEscape: + // escaped + r2, _, err := p.ReadRune() + switch { + case err != nil: + // incomplete escaped + return "", NewErrIncompleteEscaped(p) + case IsNewLine(r2): + // escaped new line, skip + p.UnreadRune() + p.AcceptNewLine() + default: + // TODO: check valid escape character and + // append to result + s := string([]rune{r, r2}) + err := NewErrInvalidEscapeSequence(p, s) + return "", err + } + default: + // normal, append to result + _, _ = result.WriteRune(r) + } + } } -// Unquoted removes quotes the content and unescapes the content +// Unquoted removes quotes and unescapes the content func Unquoted(s string) (string, error) { var p TextParser if s == "" {