From b16aa0214018a81b62b69beaa08eccca98ca1d16 Mon Sep 17 00:00:00 2001 From: Alejandro Mery Date: Mon, 4 Sep 2023 15:33:46 +0000 Subject: [PATCH] parser: Unquoted(), AcceptQuotedString() TODO: reduce quoted strings with escaped characters Signed-off-by: Alejandro Mery --- parser/error.go | 23 ++++++++++ parser/lexer_runes.go | 14 ++++--- parser/text_quoted.go | 97 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 129 insertions(+), 5 deletions(-) create mode 100644 parser/text_quoted.go diff --git a/parser/error.go b/parser/error.go index 3cc5a53..01b1759 100644 --- a/parser/error.go +++ b/parser/error.go @@ -1,6 +1,8 @@ package parser import ( + "io/fs" + "asciigoat.org/core/lexer" ) @@ -25,3 +27,24 @@ func ErrPlusPosition(pos lexer.Position, e *lexer.Error) *lexer.Error { return NewError(pos, e.Content, e.Hint, e.Err) } + +// NewErrIncompleteQuotedString returns a [lexer.Error] +// indicating the quoted string being parsed wasn't correctly +// terminated +func NewErrIncompleteQuotedString(p *TextParser) *lexer.Error { + return newErrIncomplete(p, "incomplete quoted string") +} + +// NewErrIncompleteEscaped returns a [lexer.Error] +// indicating the text being parsed wasn't correctly +// terminated +func NewErrIncompleteEscaped(p *TextParser) *lexer.Error { + return newErrIncomplete(p, "incomplete escaped string") +} + +func newErrIncomplete(p *TextParser, hint string) *lexer.Error { + pos, s := p.Emit() + pos.Add(GetPositionalLength(s)) + + return NewError(pos, s, hint, fs.ErrInvalid) +} diff --git a/parser/lexer_runes.go b/parser/lexer_runes.go index 1d8d080..e410a71 100644 --- a/parser/lexer_runes.go +++ b/parser/lexer_runes.go @@ -7,11 +7,13 @@ import ( ) const ( - RuneComment = ';' // RuneComment is the standard dosini comment character - RuneCommentExtra = '#' // RuneCommentExtra is UNIX shell's comment character - RuneSectionStart = '[' // RuneSectionStart indicates the start of a section declaration - RuneSectionEnd = ']' // RuneSectionEnd indiciates the end of a section declaration - RuneFieldEqual = '=' // RuneFieldEqual separates field keys from their values + RuneComment = ';' // RuneComment is the standard INI comment character + RuneCommentExtra = '#' // RuneCommentExtra is UNIX shell's comment character + RuneSectionStart = '[' // RuneSectionStart indicates the start of a section declaration + RuneSectionEnd = ']' // RuneSectionEnd indicates the end of a section declaration + RuneFieldEqual = '=' // RuneFieldEqual separates field keys from their values + RuneQuotes = '"' // RuneQuotes indicates the start and end of a quoted value + RuneEscape = '\\' // RuneEscape indicates the next rune is escaped ) var ( @@ -27,6 +29,8 @@ var ( RuneSectionStart, RuneSectionEnd, RuneFieldEqual, + RuneQuotes, + RuneEscape, }) ) diff --git a/parser/text_quoted.go b/parser/text_quoted.go new file mode 100644 index 0000000..59f14b3 --- /dev/null +++ b/parser/text_quoted.go @@ -0,0 +1,97 @@ +package parser + +import ( + "strings" + + "asciigoat.org/core/lexer" +) + +// AcceptQuotedString consumes a quoted string from the source +// and returns it unquoted and unescaped +func (p *TextParser) AcceptQuotedString() (string, bool, error) { + r, _, err := p.ReadRune() + switch { + case err != nil: + // nothing here + return "", false, err + case r != RuneQuotes: + // not for us + p.UnreadRune() + return "", false, nil + default: + // let's roll + s, err := lexQuotedString(p) + switch { + case err != nil: + // bad quoted string + return "", false, err + default: + // success + return s, true, nil + } + } +} + +func lexQuotedString(p *TextParser) (string, *lexer.Error) { + for { + r, _, err := p.ReadRune() + switch { + case err != nil: + // incomplete + return "", NewErrIncompleteQuotedString(p) + case r == RuneQuotes: + // end, remove quotes and process escaped characters + return lexReturnUnescapedQuotedString(p) + case r == RuneEscape: + // escaped, take another + _, _, err := p.ReadRune() + if err != nil { + // incomplete + return "", NewErrIncompleteQuotedString(p) + } + case IsNewLine(r): + // new lines within quoted values are acceptable + p.UnreadRune() + p.AcceptNewLine() + default: + // continue + } + } +} + +func lexReturnUnescapedQuotedString(p *TextParser) (string, *lexer.Error) { + // remove quotes + s := p.String() + l := len(s) + s = s[1 : l-1] + + if strings.ContainsRune(s, RuneEscape) { + // TODO: implement unescaping + err := NewError(p.Position(), s, "escaped characters", lexer.ErrNotImplemented) + return "", err + } + + return s, nil +} + +// Unquoted removes quotes the content and unescapes the content +func Unquoted(s string) (string, error) { + var p TextParser + if s == "" { + return "", nil + } + + p.InitString(s) + unquoted, ok, err := p.AcceptQuotedString() + switch { + case err != nil: + // bad string + return "", err + case ok: + // success + return unquoted, nil + default: + // not quoted + return s, nil + } +}