Compare commits

..

3 Commits

Author SHA1 Message Date
Alejandro Mery 986b6d1c6d Merge pull request 'parser: Unquoted(), AcceptQuotedString() and SplitCommaArray' (#9) 1 year ago
Alejandro Mery d41cd781d9 parser: introduce SplitCommaArray to splits comma separated strings 1 year ago
Alejandro Mery 651fcb6215 parser: Unquoted(), AcceptQuotedString() 1 year ago
  1. 51
      basic/token.go
  2. 88
      parser/comma_array.go
  3. 16
      parser/error.go
  4. 14
      parser/lexer_runes.go
  5. 97
      parser/text_quoted.go

51
basic/token.go

@ -32,21 +32,21 @@ func (dec *decoder) executeFinal() {
func (dec *decoder) execute(typ parser.TokenType) { func (dec *decoder) execute(typ parser.TokenType) {
switch typ { switch typ {
case parser.TokenSectionEnd: case parser.TokenSectionEnd:
name1, ok1 := dec.queueValue(1, parser.TokenSectionName) name1, ok1 := dec.getValue(1, parser.TokenSectionName)
if ok1 { if ok1 {
name2, ok2 := dec.queueValue(2, parser.TokenSectionSubname) name2, ok2 := dec.getValue(2, parser.TokenSectionSubname)
dec.addSection(name1, name2, ok2) dec.addSection(name1, name2, ok2)
} }
dec.queueReset() dec.reset()
case parser.TokenFieldValue: case parser.TokenFieldValue:
key, _ := dec.queueValue(0, parser.TokenFieldKey) key, _ := dec.getValue(0, parser.TokenFieldKey)
value, _ := dec.queueValue(1, parser.TokenFieldValue) value, _ := dec.getValue(1, parser.TokenFieldValue)
dec.addField(key, value) dec.addField(key, value)
dec.queueReset() dec.reset()
} }
} }
@ -82,8 +82,7 @@ func (dec *decoder) addField(key, value string) {
} }
} }
// queueValue extracts the value of element on the queue if the type matches. func (dec *decoder) getValue(idx int, typ parser.TokenType) (string, bool) {
func (dec *decoder) queueValue(idx int, typ parser.TokenType) (string, bool) {
switch { switch {
case idx < 0 || idx >= len(dec.queue): case idx < 0 || idx >= len(dec.queue):
// out of range // out of range
@ -96,48 +95,40 @@ func (dec *decoder) queueValue(idx int, typ parser.TokenType) (string, bool) {
} }
} }
// queueReset removes all tokens from the queue func (dec *decoder) reset() {
func (dec *decoder) queueReset() {
dec.queue = dec.queue[:0] dec.queue = dec.queue[:0]
} }
// queueDepth confirms the current depth of the queue func (dec *decoder) depth(depth int) bool {
func (dec *decoder) queueDepth(depth int) bool {
return len(dec.queue) == depth return len(dec.queue) == depth
} }
// queueDepthType confirms the current depth of the queue and the type of the last func (dec *decoder) depthAfter(depth int, typ parser.TokenType) bool {
// element. _, ok := dec.getValue(depth-1, typ)
func (dec *decoder) queueDepthType(depth int, typ parser.TokenType) bool { if ok {
if dec.queueDepth(depth) { return len(dec.queue) == depth
return dec.queueType(depth-1, typ)
} }
return false return false
} }
// queueType tells if the specified element on the queue is of the required type.
func (dec *decoder) queueType(idx int, typ parser.TokenType) bool {
_, ok := dec.queueValue(idx, typ)
return ok
}
func (dec *decoder) typeOK(typ parser.TokenType) bool { func (dec *decoder) typeOK(typ parser.TokenType) bool {
switch typ { switch typ {
case parser.TokenSectionStart, parser.TokenFieldKey: case parser.TokenSectionStart, parser.TokenFieldKey:
// first token only // first token only
return dec.queueDepth(0) return dec.depth(0)
case parser.TokenSectionName: case parser.TokenSectionName:
// right after TokenSectionStart // right after TokenSectionStart
return dec.queueDepthType(1, parser.TokenSectionStart) return dec.depthAfter(1, parser.TokenSectionStart)
case parser.TokenSectionSubname: case parser.TokenSectionSubname:
// right after TokenSectionName // right after TokenSectionName
return dec.queueDepthType(2, parser.TokenSectionName) return dec.depthAfter(2, parser.TokenSectionName)
case parser.TokenSectionEnd: case parser.TokenSectionEnd:
// only on a section with name // only on a section with name
return dec.queueType(1, parser.TokenSectionName) _, ok := dec.getValue(1, parser.TokenSectionName)
return ok
case parser.TokenFieldValue: case parser.TokenFieldValue:
// right after a TokenFieldKey // right after a TokenFieldKey
return dec.queueDepthType(1, parser.TokenFieldKey) return dec.depthAfter(1, parser.TokenFieldKey)
default: default:
// never // never
return false return false
@ -158,8 +149,6 @@ func (dec *decoder) OnToken(pos lexer.Position, typ parser.TokenType, value stri
return nil return nil
default: default:
// unacceptable // unacceptable
err := newErrInvalidToken(t) return newErrInvalidToken(t)
dec.executeFinal()
return err
} }
} }

88
parser/comma_array.go

@ -0,0 +1,88 @@
package parser
import (
"strings"
"asciigoat.org/core/lexer"
)
type commaArrayParser struct {
TextParser
out []string
}
func (p *commaArrayParser) lexStart() (lexer.StateFn, error) {
for {
r, _, err := p.ReadRune()
switch {
case err != nil:
// EOF
return nil, err
case r == RuneQuotes:
// Quoted Value
return p.lexQuotedString, nil
case IsNewLine(r):
// new lines are acceptable when parsing a string for
// comma delimited arrays. but make sure we discard it
// complete
p.UnreadRune()
p.AcceptNewLine()
p.Discard()
case lexer.IsSpace(r):
// discard whitespace outside quotes
p.Discard()
default:
p.UnreadRune()
return p.lexWord, nil
}
}
}
func (p *commaArrayParser) lexWord() (lexer.StateFn, error) {
for {
r, _, err := p.ReadRune()
switch {
case err != nil:
// done. store what we got and move on
_, s := p.Emit()
p.out = append(p.out, s)
return nil, err
case r == ',':
// done
_, s := p.Emit()
// remove comma, trim and append to output
s = strings.TrimRightFunc(s[:len(s)-1], IsSpace)
p.out = append(p.out, s)
return p.lexStart, nil
}
}
}
func (p *commaArrayParser) lexQuotedString() (lexer.StateFn, error) {
s, err := lexQuotedString(&p.TextParser)
if err != nil {
return nil, err
}
p.Discard()
p.out = append(p.out, s)
return p.lexStart, nil
}
func (p *commaArrayParser) Run() ([]string, error) {
err := lexer.Run(p.lexStart)
return p.out, err
}
// SplitCommaArray splits comma separated strings, removing whitespace
// and respecting quoted literals.
func SplitCommaArray(s string) ([]string, error) {
if s != "" {
var p commaArrayParser
p.InitString(s)
return p.Run()
}
return nil, nil
}

16
parser/error.go

@ -1,6 +1,8 @@
package parser package parser
import ( import (
"io/fs"
"asciigoat.org/core/lexer" "asciigoat.org/core/lexer"
) )
@ -25,3 +27,17 @@ func ErrPlusPosition(pos lexer.Position, e *lexer.Error) *lexer.Error {
return NewError(pos, e.Content, e.Hint, e.Err) return NewError(pos, e.Content, e.Hint, e.Err)
} }
// NewErrIncompleteQuotedString returns a [lexer.Error]
// indicating the quoted string being parsed wasn't correctly
// terminated
func NewErrIncompleteQuotedString(p *TextParser) *lexer.Error {
return newErrIncomplete(p, "incomplete quoted string")
}
func newErrIncomplete(p *TextParser, hint string) *lexer.Error {
pos, s := p.Emit()
pos.Add(GetPositionalLength(s))
return NewError(pos, s, hint, fs.ErrInvalid)
}

14
parser/lexer_runes.go

@ -7,11 +7,13 @@ import (
) )
const ( const (
RuneComment = ';' // RuneComment is the standard dosini comment character RuneComment = ';' // RuneComment is the standard INI comment character
RuneCommentExtra = '#' // RuneCommentExtra is UNIX shell's comment character RuneCommentExtra = '#' // RuneCommentExtra is UNIX shell's comment character
RuneSectionStart = '[' // RuneSectionStart indicates the start of a section declaration RuneSectionStart = '[' // RuneSectionStart indicates the start of a section declaration
RuneSectionEnd = ']' // RuneSectionEnd indiciates the end of a section declaration RuneSectionEnd = ']' // RuneSectionEnd indicates the end of a section declaration
RuneFieldEqual = '=' // RuneFieldEqual separates field keys from their values RuneFieldEqual = '=' // RuneFieldEqual separates field keys from their values
RuneQuotes = '"' // RuneQuotes indicates the start and end of a quoted value
RuneEscape = '\\' // RuneEscape indicates the next rune is escaped
) )
var ( var (
@ -27,6 +29,8 @@ var (
RuneSectionStart, RuneSectionStart,
RuneSectionEnd, RuneSectionEnd,
RuneFieldEqual, RuneFieldEqual,
RuneQuotes,
RuneEscape,
}) })
) )

97
parser/text_quoted.go

@ -0,0 +1,97 @@
package parser
import (
"strings"
"asciigoat.org/core/lexer"
)
// AcceptQuotedString consumes a quoted string from the source
// and returns it unquoted and unescaped
func (p *TextParser) AcceptQuotedString() (string, bool, error) {
r, _, err := p.ReadRune()
switch {
case err != nil:
// nothing here
return "", false, err
case r != RuneQuotes:
// not for us
p.UnreadRune()
return "", false, nil
default:
// let's roll
s, err := lexQuotedString(p)
switch {
case err != nil:
// bad quoted string
return "", false, err
default:
// success
return s, true, nil
}
}
}
func lexQuotedString(p *TextParser) (string, *lexer.Error) {
for {
r, _, err := p.ReadRune()
switch {
case err != nil:
// incomplete
return "", NewErrIncompleteQuotedString(p)
case r == RuneQuotes:
// end, remove quotes and process escaped characters
return lexReturnUnescapedQuotedString(p)
case r == RuneEscape:
// escaped, take another
_, _, err := p.ReadRune()
if err != nil {
// incomplete
return "", NewErrIncompleteQuotedString(p)
}
case IsNewLine(r):
// new lines within quoted values are acceptable
p.UnreadRune()
p.AcceptNewLine()
default:
// continue
}
}
}
func lexReturnUnescapedQuotedString(p *TextParser) (string, *lexer.Error) {
// remove quotes
s := p.String()
l := len(s)
s = s[1 : l-1]
if strings.ContainsRune(s, RuneEscape) {
// TODO: implement unescaping
err := NewError(p.Position(), s, "escaped characters", lexer.ErrNotImplemented)
return "", err
}
return s, nil
}
// Unquoted removes quotes the content and unescapes the content
func Unquoted(s string) (string, error) {
var p TextParser
if s == "" {
return "", nil
}
p.InitString(s)
unquoted, ok, err := p.AcceptQuotedString()
switch {
case err != nil:
// bad string
return "", err
case ok:
// success
return unquoted, nil
default:
// not quoted
return s, nil
}
}
Loading…
Cancel
Save