From b2b66035376d765482e60349d3ff2e5abc88f61f Mon Sep 17 00:00:00 2001 From: Alejandro Mery Date: Mon, 4 Sep 2023 13:54:36 +0000 Subject: [PATCH] WIP Signed-off-by: Alejandro Mery --- parser/comma_array.go | 90 +++++++++++++++++++++++++++++++++ parser/error.go | 23 +++++++++ parser/lexer_runes.go | 14 ++++-- parser/text_quoted.go | 112 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 234 insertions(+), 5 deletions(-) create mode 100644 parser/comma_array.go create mode 100644 parser/text_quoted.go diff --git a/parser/comma_array.go b/parser/comma_array.go new file mode 100644 index 0000000..ba8a3c6 --- /dev/null +++ b/parser/comma_array.go @@ -0,0 +1,90 @@ +package parser + +import ( + "asciigoat.org/core/lexer" +) + +type commaArrayParser struct { + TextParser + + out []string +} + +func (p *commaArrayParser) lexStart() (lexer.StateFn, error) { + for { + r, _, err := p.ReadRune() + switch { + case err != nil: + // EOF + return nil, err + case r == RuneQuotes: + // Quoted Value + return p.lexQuotedString, nil + case IsNewLine(r): + // new lines are acceptable when parsing a string for + // comma delimited arrays. but make sure we discard it + // complete + p.UnreadRune() + p.AcceptNewLine() + p.Discard() + case lexer.IsSpace(r): + // discard whitespace outside quotes + p.Discard() + default: + p.UnreadRune() + return p.lexWord, nil + } + } +} + +func (p *commaArrayParser) lexWord() (lexer.StateFn, error) { + for { + r, _, err := p.ReadRune() + switch { + case err != nil: + // done. store what we got and move on + _, s := p.Emit() + p.out = append(p.out, s) + return nil, err + case r == RuneEscape: + // escaped + r2, _, err := p.ReadRune() + switch { + case err != nil: + // incomplete + return nil, NewErrIncompleteEscaped(&p.TextParser) + case IsNewLine(r2): + // escaped new line + p.UnreadRune() + p.AcceptNewLine() + } + } + } +} + +func (p *commaArrayParser) lexQuotedString() (lexer.StateFn, error) { + s, err := lexQuotedString(&p.TextParser) + if err != nil { + return nil, err + } + + p.Discard() + p.out = append(p.out, s) + return p.lexStart, nil +} + +func (p *commaArrayParser) Run() ([]string, error) { + err := lexer.Run(p.lexStart) + + return p.out, err +} + +// SplitCommaArray splits +func SplitCommaArray(s string) ([]string, error) { + if s != "" { + var p commaArrayParser + p.InitString(s) + return p.Run() + } + return nil, nil +} diff --git a/parser/error.go b/parser/error.go index 3cc5a53..01b1759 100644 --- a/parser/error.go +++ b/parser/error.go @@ -1,6 +1,8 @@ package parser import ( + "io/fs" + "asciigoat.org/core/lexer" ) @@ -25,3 +27,24 @@ func ErrPlusPosition(pos lexer.Position, e *lexer.Error) *lexer.Error { return NewError(pos, e.Content, e.Hint, e.Err) } + +// NewErrIncompleteQuotedString returns a [lexer.Error] +// indicating the quoted string being parsed wasn't correctly +// terminated +func NewErrIncompleteQuotedString(p *TextParser) *lexer.Error { + return newErrIncomplete(p, "incomplete quoted string") +} + +// NewErrIncompleteEscaped returns a [lexer.Error] +// indicating the text being parsed wasn't correctly +// terminated +func NewErrIncompleteEscaped(p *TextParser) *lexer.Error { + return newErrIncomplete(p, "incomplete escaped string") +} + +func newErrIncomplete(p *TextParser, hint string) *lexer.Error { + pos, s := p.Emit() + pos.Add(GetPositionalLength(s)) + + return NewError(pos, s, hint, fs.ErrInvalid) +} diff --git a/parser/lexer_runes.go b/parser/lexer_runes.go index 1d8d080..e410a71 100644 --- a/parser/lexer_runes.go +++ b/parser/lexer_runes.go @@ -7,11 +7,13 @@ import ( ) const ( - RuneComment = ';' // RuneComment is the standard dosini comment character - RuneCommentExtra = '#' // RuneCommentExtra is UNIX shell's comment character - RuneSectionStart = '[' // RuneSectionStart indicates the start of a section declaration - RuneSectionEnd = ']' // RuneSectionEnd indiciates the end of a section declaration - RuneFieldEqual = '=' // RuneFieldEqual separates field keys from their values + RuneComment = ';' // RuneComment is the standard INI comment character + RuneCommentExtra = '#' // RuneCommentExtra is UNIX shell's comment character + RuneSectionStart = '[' // RuneSectionStart indicates the start of a section declaration + RuneSectionEnd = ']' // RuneSectionEnd indicates the end of a section declaration + RuneFieldEqual = '=' // RuneFieldEqual separates field keys from their values + RuneQuotes = '"' // RuneQuotes indicates the start and end of a quoted value + RuneEscape = '\\' // RuneEscape indicates the next rune is escaped ) var ( @@ -27,6 +29,8 @@ var ( RuneSectionStart, RuneSectionEnd, RuneFieldEqual, + RuneQuotes, + RuneEscape, }) ) diff --git a/parser/text_quoted.go b/parser/text_quoted.go new file mode 100644 index 0000000..3b177e6 --- /dev/null +++ b/parser/text_quoted.go @@ -0,0 +1,112 @@ +package parser + +import ( + "asciigoat.org/core/lexer" +) + +// AcceptQuotedString consumes a quoted string from the source +// and returns it unquoted. +func (p *TextParser) AcceptQuotedString() (string, bool, error) { + r, _, err := p.ReadRune() + switch { + case err != nil: + // nothing here + return "", false, err + case r != RuneQuotes: + // not for us + p.UnreadRune() + return "", false, nil + default: + // let's roll + s, err := lexQuotedString(p) + switch { + case err != nil: + // bad quoted string + return "", false, err + default: + // success + return s, true, nil + } + } +} + +func lexQuotedString(p *TextParser) (string, *lexer.Error) { + s, ok, err := lexQuotedStringNoEscape(p) + switch { + case err != nil: + return "", err + case ok: + return s, nil + default: + // escape character detected + return lexQuotedStringEscaped(p) + } +} + +func lexQuotedStringNoEscape(p *TextParser) (string, bool, *lexer.Error) { + for { + r, _, err := p.ReadRune() + switch { + case err != nil: + // incomplete + return "", false, NewErrIncompleteQuotedString(p) + case r == RuneQuotes: + // end, just remove the quotes + s := p.String() + l := len(s) + return s[1 : l-2], true, nil + case r == RuneEscape: + // things just got complicated... + return "", false, nil + case IsNewLine(r): + // new lines within quoted values are acceptable + p.UnreadRune() + p.AcceptNewLine() + default: + // continue + } + } +} + +func lexQuotedStringEscaped(*TextParser) (string, *lexer.Error) { + return "", nil +} + +// // escaped. append partial +// mark = lexQuotedAppendPartial(p, &buf, mark) +// +// r2, _, err := p.ReadRune() +// switch { +// case err != nil: +// // incomplete +// return "", NewErrIncompleteQuotedString(p) +// case IsNewLine(r2): +// // escaped new line +// p.UnreadRune() +// p.AcceptNewLine() +// mark = lexQuotedAppendNewLine(p, &buf, mark) +// default: +// // bad escaped +// } + +// Unquoted removes quotes and unescapes the content +func Unquoted(s string) (string, error) { + var p TextParser + if s == "" { + return "", nil + } + + p.InitString(s) + unquoted, ok, err := p.AcceptQuotedString() + switch { + case err != nil: + // bad string + return "", err + case ok: + // success + return unquoted, nil + default: + // not quoted + return s, nil + } +}