parser: Unquoted(), AcceptQuotedString() [WIP]

TODO: escaped characters

Signed-off-by: Alejandro Mery <amery@jpi.io>
This commit is contained in:
2023-09-04 15:33:46 +00:00
parent 1e57b62ff9
commit 7bed648af3
3 changed files with 127 additions and 5 deletions
+23
View File
@@ -1,6 +1,8 @@
package parser
import (
"io/fs"
"asciigoat.org/core/lexer"
)
@@ -25,3 +27,24 @@ func ErrPlusPosition(pos lexer.Position, e *lexer.Error) *lexer.Error {
return NewError(pos, e.Content, e.Hint, e.Err)
}
// NewErrIncompleteQuotedString returns a [lexer.Error]
// indicating the quoted string being parsed wasn't correctly
// terminated
func NewErrIncompleteQuotedString(p *TextParser) *lexer.Error {
return newErrIncomplete(p, "incomplete quoted string")
}
// NewErrIncompleteEscaped returns a [lexer.Error]
// indicating the text being parsed wasn't correctly
// terminated
func NewErrIncompleteEscaped(p *TextParser) *lexer.Error {
return newErrIncomplete(p, "incomplete escaped string")
}
func newErrIncomplete(p *TextParser, hint string) *lexer.Error {
pos, s := p.Emit()
pos.Add(GetPositionalLength(s))
return NewError(pos, s, hint, fs.ErrInvalid)
}
+9 -5
View File
@@ -7,11 +7,13 @@ import (
)
const (
RuneComment = ';' // RuneComment is the standard dosini comment character
RuneCommentExtra = '#' // RuneCommentExtra is UNIX shell's comment character
RuneSectionStart = '[' // RuneSectionStart indicates the start of a section declaration
RuneSectionEnd = ']' // RuneSectionEnd indiciates the end of a section declaration
RuneFieldEqual = '=' // RuneFieldEqual separates field keys from their values
RuneComment = ';' // RuneComment is the standard INI comment character
RuneCommentExtra = '#' // RuneCommentExtra is UNIX shell's comment character
RuneSectionStart = '[' // RuneSectionStart indicates the start of a section declaration
RuneSectionEnd = ']' // RuneSectionEnd indicates the end of a section declaration
RuneFieldEqual = '=' // RuneFieldEqual separates field keys from their values
RuneQuotes = '"' // RuneQuotes indicates the start and end of a quoted value
RuneEscape = '\\' // RuneEscape indicates the next rune is escaped
)
var (
@@ -27,6 +29,8 @@ var (
RuneSectionStart,
RuneSectionEnd,
RuneFieldEqual,
RuneQuotes,
RuneEscape,
})
)
+95
View File
@@ -0,0 +1,95 @@
package parser
import (
"asciigoat.org/core/lexer"
)
// AcceptQuotedString consumes a quoted string from the source
// and returns it unquoted.
func (p *TextParser) AcceptQuotedString() (string, bool, error) {
r, _, err := p.ReadRune()
switch {
case err != nil:
// nothing here
return "", false, err
case r != RuneQuotes:
// not for us
p.UnreadRune()
return "", false, nil
default:
// let's roll
s, err := lexQuotedString(p)
switch {
case err != nil:
// bad quoted string
return "", false, err
default:
// success
return s, true, nil
}
}
}
func lexQuotedString(p *TextParser) (string, *lexer.Error) {
s, ok, err := lexQuotedStringNoEscape(p)
switch {
case err != nil:
return "", err
case ok:
return s, nil
default:
// escape character detected
return lexQuotedStringEscaped(p)
}
}
func lexQuotedStringNoEscape(p *TextParser) (string, bool, *lexer.Error) {
for {
r, _, err := p.ReadRune()
switch {
case err != nil:
// incomplete
return "", false, NewErrIncompleteQuotedString(p)
case r == RuneQuotes:
// end, just remove the quotes
s := p.String()
l := len(s)
return s[1 : l-2], true, nil
case r == RuneEscape:
// things just got complicated...
return "", false, nil
case IsNewLine(r):
// new lines within quoted values are acceptable
p.UnreadRune()
p.AcceptNewLine()
default:
// continue
}
}
}
func lexQuotedStringEscaped(*TextParser) (string, *lexer.Error) {
panic("not implemented")
}
// Unquoted removes quotes and unescapes the content
func Unquoted(s string) (string, error) {
var p TextParser
if s == "" {
return "", nil
}
p.InitString(s)
unquoted, ok, err := p.AcceptQuotedString()
switch {
case err != nil:
// bad string
return "", err
case ok:
// success
return unquoted, nil
default:
// not quoted
return s, nil
}
}