Compare commits

..

6 Commits

Author SHA1 Message Date
Alejandro Mery df0fe8b9c0 decoder [WIP] 1 year ago
Alejandro Mery 23a6bd090c build-sys: use local darvaza.org/core [DO-NOT-MERGE] 1 year ago
Alejandro Mery f8fa9d678a build-sys: use local asciigoat.org/core [DO-NOT-MERGE] 1 year ago
Alejandro Mery 7e2797607d Merge branch 'pr-amery-textparser' into next-amery 1 year ago
Alejandro Mery d75b2dbc78 basic: rename and document queue related methods 1 year ago
Alejandro Mery 46ba96d6b4 basic: refactor error handling 1 year ago
  1. 15
      basic/error.go
  2. 51
      basic/token.go
  3. 83
      decoder.go
  4. 32
      decoder_error.go
  5. 144
      decoder_token.go
  6. 7
      go.mod
  7. 2
      go.sum
  8. 88
      parser/comma_array.go
  9. 43
      parser/error.go
  10. 13
      parser/lexer_runes.go
  11. 8
      parser/parser.go
  12. 11
      parser/text.go
  13. 38
      parser/text_position.go
  14. 97
      parser/text_quoted.go

15
basic/error.go

@ -4,19 +4,28 @@ import (
"errors" "errors"
"asciigoat.org/core/lexer" "asciigoat.org/core/lexer"
"asciigoat.org/ini/parser"
) )
var ( var (
errInvalidToken = errors.New("invalid token") errInvalidToken = errors.New("invalid token")
) )
func newError(pos lexer.Position, content, hint string, err error) *lexer.Error {
return &lexer.Error{
Line: pos.Line,
Column: pos.Column,
Content: content,
Hint: hint,
Err: err,
}
}
func newErrInvalidToken(t *token) *lexer.Error { func newErrInvalidToken(t *token) *lexer.Error {
return parser.NewError(t.pos, t.value, "", errInvalidToken) return newError(t.pos, t.value, "", errInvalidToken)
} }
func (dec *decoder) OnError(pos lexer.Position, content string, err error) error { func (dec *decoder) OnError(pos lexer.Position, content string, err error) error {
err = parser.NewError(pos, content, "", err) err = newError(pos, content, "", err)
dec.executeFinal() dec.executeFinal()
return err return err
} }

51
basic/token.go

@ -32,21 +32,21 @@ func (dec *decoder) executeFinal() {
func (dec *decoder) execute(typ parser.TokenType) { func (dec *decoder) execute(typ parser.TokenType) {
switch typ { switch typ {
case parser.TokenSectionEnd: case parser.TokenSectionEnd:
name1, ok1 := dec.getValue(1, parser.TokenSectionName) name1, ok1 := dec.queueValue(1, parser.TokenSectionName)
if ok1 { if ok1 {
name2, ok2 := dec.getValue(2, parser.TokenSectionSubname) name2, ok2 := dec.queueValue(2, parser.TokenSectionSubname)
dec.addSection(name1, name2, ok2) dec.addSection(name1, name2, ok2)
} }
dec.reset() dec.queueReset()
case parser.TokenFieldValue: case parser.TokenFieldValue:
key, _ := dec.getValue(0, parser.TokenFieldKey) key, _ := dec.queueValue(0, parser.TokenFieldKey)
value, _ := dec.getValue(1, parser.TokenFieldValue) value, _ := dec.queueValue(1, parser.TokenFieldValue)
dec.addField(key, value) dec.addField(key, value)
dec.reset() dec.queueReset()
} }
} }
@ -82,7 +82,8 @@ func (dec *decoder) addField(key, value string) {
} }
} }
func (dec *decoder) getValue(idx int, typ parser.TokenType) (string, bool) { // queueValue extracts the value of element on the queue if the type matches.
func (dec *decoder) queueValue(idx int, typ parser.TokenType) (string, bool) {
switch { switch {
case idx < 0 || idx >= len(dec.queue): case idx < 0 || idx >= len(dec.queue):
// out of range // out of range
@ -95,40 +96,48 @@ func (dec *decoder) getValue(idx int, typ parser.TokenType) (string, bool) {
} }
} }
func (dec *decoder) reset() { // queueReset removes all tokens from the queue
func (dec *decoder) queueReset() {
dec.queue = dec.queue[:0] dec.queue = dec.queue[:0]
} }
func (dec *decoder) depth(depth int) bool { // queueDepth confirms the current depth of the queue
func (dec *decoder) queueDepth(depth int) bool {
return len(dec.queue) == depth return len(dec.queue) == depth
} }
func (dec *decoder) depthAfter(depth int, typ parser.TokenType) bool { // queueDepthType confirms the current depth of the queue and the type of the last
_, ok := dec.getValue(depth-1, typ) // element.
if ok { func (dec *decoder) queueDepthType(depth int, typ parser.TokenType) bool {
return len(dec.queue) == depth if dec.queueDepth(depth) {
return dec.queueType(depth-1, typ)
} }
return false return false
} }
// queueType tells if the specified element on the queue is of the required type.
func (dec *decoder) queueType(idx int, typ parser.TokenType) bool {
_, ok := dec.queueValue(idx, typ)
return ok
}
func (dec *decoder) typeOK(typ parser.TokenType) bool { func (dec *decoder) typeOK(typ parser.TokenType) bool {
switch typ { switch typ {
case parser.TokenSectionStart, parser.TokenFieldKey: case parser.TokenSectionStart, parser.TokenFieldKey:
// first token only // first token only
return dec.depth(0) return dec.queueDepth(0)
case parser.TokenSectionName: case parser.TokenSectionName:
// right after TokenSectionStart // right after TokenSectionStart
return dec.depthAfter(1, parser.TokenSectionStart) return dec.queueDepthType(1, parser.TokenSectionStart)
case parser.TokenSectionSubname: case parser.TokenSectionSubname:
// right after TokenSectionName // right after TokenSectionName
return dec.depthAfter(2, parser.TokenSectionName) return dec.queueDepthType(2, parser.TokenSectionName)
case parser.TokenSectionEnd: case parser.TokenSectionEnd:
// only on a section with name // only on a section with name
_, ok := dec.getValue(1, parser.TokenSectionName) return dec.queueType(1, parser.TokenSectionName)
return ok
case parser.TokenFieldValue: case parser.TokenFieldValue:
// right after a TokenFieldKey // right after a TokenFieldKey
return dec.depthAfter(1, parser.TokenFieldKey) return dec.queueDepthType(1, parser.TokenFieldKey)
default: default:
// never // never
return false return false
@ -149,6 +158,8 @@ func (dec *decoder) OnToken(pos lexer.Position, typ parser.TokenType, value stri
return nil return nil
default: default:
// unacceptable // unacceptable
return newErrInvalidToken(t) err := newErrInvalidToken(t)
dec.executeFinal()
return err
} }
} }

83
decoder.go

@ -0,0 +1,83 @@
package ini
import (
"bytes"
"io"
"strings"
"asciigoat.org/core"
"asciigoat.org/core/reflective"
"asciigoat.org/ini/parser"
)
const (
decPrefixLiteral = "ini: Decode"
)
var (
decINIPrefix = decPrefixLiteral[0:3]
decDecPrefix = decPrefixLiteral[5:]
)
// Decoder ...
type Decoder struct {
io.Closer
out *reflective.Reflection
p *parser.Parser
queue []*token
}
// Decode ...
func (dec *Decoder) Decode(v any) error {
defer dec.Close()
r, err := reflective.New(v)
switch e := err.(type) {
case *reflective.InvalidUnmarshalError:
// customize error
e.Prefix = decINIPrefix
e.Method = decDecPrefix
case *reflective.UnmarshalTypeError:
// customize error
e.Prefix = decPrefixLiteral
case nil:
// good reflection. Go!
dec.out = r
err = dec.p.Run()
}
return err
}
// NewDecoder creates a Decoder using the provided [io.Reader]
// as source
func NewDecoder(r io.Reader) *Decoder {
rc := core.NewReadCloser(r)
switch {
case rc == nil:
return nil
default:
dec := &Decoder{
p: parser.NewParser(rc),
Closer: rc,
}
// callbacks
dec.p.OnToken = dec.parserOnToken
dec.p.OnError = dec.parserOnError
return dec
}
}
// NewDecoderBytes creates a Decoder using the provided bytes array
// as source
func NewDecoderBytes(b []byte) *Decoder {
return NewDecoder(bytes.NewReader(b))
}
// NewDecoderString creates a Decoder over a provided string of data
func NewDecoderString(s string) *Decoder {
return NewDecoder(strings.NewReader(s))
}

32
decoder_error.go

@ -0,0 +1,32 @@
package ini
import (
"errors"
"log"
"asciigoat.org/core/lexer"
)
var (
errInvalidToken = errors.New("invalid token")
)
func newError(pos lexer.Position, content, hint string, err error) *lexer.Error {
return &lexer.Error{
Line: pos.Line,
Column: pos.Column,
Content: content,
Hint: hint,
Err: err,
}
}
func (*Decoder) newErrInvalidToken(t *token) *lexer.Error {
return newError(t.pos, t.value, "", errInvalidToken)
}
// parserOnError is the callback for lexer errors
func (*Decoder) parserOnError(pos lexer.Position, content string, err error) error {
log.Printf("%s: %s %s: %q: %v", "ini", pos, "error", content, err)
return newError(pos, content, "", err)
}

144
decoder_token.go

@ -0,0 +1,144 @@
package ini
import (
"fmt"
"log"
"asciigoat.org/core/lexer"
"asciigoat.org/ini/parser"
)
type token struct {
pos lexer.Position
typ parser.TokenType
value string
}
func (t token) String() string {
return fmt.Sprintf("%s %s: %q", t.pos, t.typ, t.value)
}
// queueValue extracts the value of element on the queue if the type matches.
func (dec *Decoder) queueValue(idx int, typ parser.TokenType) (string, bool) {
switch {
case idx < 0 || idx >= len(dec.queue):
// out of range
return "", false
case dec.queue[idx].typ != typ:
// wrong type
return "", false
default:
// match
return dec.queue[idx].value, true
}
}
// queueReset removes all tokens from the queue
func (dec *Decoder) queueReset() {
dec.queue = dec.queue[:0]
}
// queueType tells if the specified element on the queue is of the required type.
func (dec *Decoder) queueType(idx int, typ parser.TokenType) bool {
_, ok := dec.queueValue(idx, typ)
return ok
}
// queueDepth confirms the current depth of the queue
func (dec *Decoder) queueDepth(depth int) bool {
return len(dec.queue) == depth
}
// queueDepthType confirms the current depth of the queue and the type of the last
// element.
func (dec *Decoder) queueDepthType(depth int, typ parser.TokenType) bool {
if dec.queueDepth(depth) {
return dec.queueType(depth-1, typ)
}
return false
}
// typeOK tells if a token of the specified type is acceptable
// at this time.
func (dec *Decoder) typeOK(typ parser.TokenType) bool {
switch typ {
case parser.TokenSectionStart:
return dec.queueDepth(0)
case parser.TokenSectionName:
return dec.queueDepthType(1, parser.TokenSectionStart)
case parser.TokenSectionSubname:
return dec.queueDepthType(2, parser.TokenSectionName)
case parser.TokenSectionEnd:
return dec.queueType(1, parser.TokenSectionName)
case parser.TokenFieldKey:
return dec.queueDepth(0)
case parser.TokenFieldValue:
return dec.queueDepthType(1, parser.TokenFieldKey)
case parser.TokenComment:
panic("unreachable")
default:
return false
}
}
// execute is called after each acceptable token is appended to the queue
func (dec *Decoder) execute() error {
if l := len(dec.queue); l > 0 {
// based on the type of the last element
switch dec.queue[l-1].typ {
case parser.TokenSectionEnd:
name1, _ := dec.queueValue(1, parser.TokenSectionName)
name2, ok2 := dec.queueValue(2, parser.TokenSectionSubname)
defer dec.queueReset()
return dec.executeSection(name1, name2, ok2)
case parser.TokenFieldValue:
key, _ := dec.queueValue(0, parser.TokenFieldKey)
value, _ := dec.queueValue(1, parser.TokenFieldValue)
defer dec.queueReset()
return dec.executeField(key, value)
}
}
return nil
}
// revive:disable:flag-parameter
func (*Decoder) executeSection(key, id string, hasID bool) error {
// revive:enable:flag-parameter
if hasID {
log.Printf("%s: %s%s[%q]: %q", "ini", "", "section", key, id)
} else {
log.Printf("%s: %s%s[%q]", "ini", "", "section", key)
}
return nil
}
func (*Decoder) executeField(key, value string) error {
log.Printf("%s: %s%s[%q]: %q", "ini", " ", "field", key, value)
return nil
}
// parserOnToken is the callback from the parser
func (dec *Decoder) parserOnToken(pos lexer.Position, typ parser.TokenType, value string) error {
var err error
t := &token{pos, typ, value}
switch {
case typ == parser.TokenComment:
// ignore comments
case dec.typeOK(typ):
// acceptable token
dec.queue = append(dec.queue, t)
err = dec.execute()
default:
// unacceptable
err = dec.newErrInvalidToken(t)
}
return err
}

7
go.mod

@ -2,8 +2,13 @@ module asciigoat.org/ini
go 1.19 go 1.19
replace (
asciigoat.org/core => ../core
darvaza.org/core => ../../darvaza.org/core
)
require ( require (
asciigoat.org/core v0.3.9 asciigoat.org/core v0.3.7
github.com/mgechev/revive v1.3.3 github.com/mgechev/revive v1.3.3
golang.org/x/tools v0.12.0 golang.org/x/tools v0.12.0
) )

2
go.sum

@ -1,5 +1,3 @@
asciigoat.org/core v0.3.9 h1:hgDDz4ecm3ZvehX++m8A/IzAt+B5oDPiRtxatzfUHPQ=
asciigoat.org/core v0.3.9/go.mod h1:CAaHwyw8MpAq4a1MYtN2dxJrsK+hmIdW50OndaQZYPI=
github.com/BurntSushi/toml v1.3.2 h1:o7IhLm0Msx3BaB+n3Ag7L8EVlByGnpq14C4YWiu/gL8= github.com/BurntSushi/toml v1.3.2 h1:o7IhLm0Msx3BaB+n3Ag7L8EVlByGnpq14C4YWiu/gL8=
github.com/BurntSushi/toml v1.3.2/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ= github.com/BurntSushi/toml v1.3.2/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ=
github.com/chavacava/garif v0.1.0 h1:2JHa3hbYf5D9dsgseMKAmc/MZ109otzgNFk5s87H9Pc= github.com/chavacava/garif v0.1.0 h1:2JHa3hbYf5D9dsgseMKAmc/MZ109otzgNFk5s87H9Pc=

88
parser/comma_array.go

@ -1,88 +0,0 @@
package parser
import (
"strings"
"asciigoat.org/core/lexer"
)
type commaArrayParser struct {
TextParser
out []string
}
func (p *commaArrayParser) lexStart() (lexer.StateFn, error) {
for {
r, _, err := p.ReadRune()
switch {
case err != nil:
// EOF
return nil, err
case r == RuneQuotes:
// Quoted Value
return p.lexQuotedString, nil
case IsNewLine(r):
// new lines are acceptable when parsing a string for
// comma delimited arrays. but make sure we discard it
// complete
p.UnreadRune()
p.AcceptNewLine()
p.Discard()
case lexer.IsSpace(r):
// discard whitespace outside quotes
p.Discard()
default:
p.UnreadRune()
return p.lexWord, nil
}
}
}
func (p *commaArrayParser) lexWord() (lexer.StateFn, error) {
for {
r, _, err := p.ReadRune()
switch {
case err != nil:
// done. store what we got and move on
_, s := p.Emit()
p.out = append(p.out, s)
return nil, err
case r == ',':
// done
_, s := p.Emit()
// remove comma, trim and append to output
s = strings.TrimRightFunc(s[:len(s)-1], IsSpace)
p.out = append(p.out, s)
return p.lexStart, nil
}
}
}
func (p *commaArrayParser) lexQuotedString() (lexer.StateFn, error) {
s, err := lexQuotedString(&p.TextParser)
if err != nil {
return nil, err
}
p.Discard()
p.out = append(p.out, s)
return p.lexStart, nil
}
func (p *commaArrayParser) Run() ([]string, error) {
err := lexer.Run(p.lexStart)
return p.out, err
}
// SplitCommaArray splits comma separated strings, removing whitespace
// and respecting quoted literals.
func SplitCommaArray(s string) ([]string, error) {
if s != "" {
var p commaArrayParser
p.InitString(s)
return p.Run()
}
return nil, nil
}

43
parser/error.go

@ -1,43 +0,0 @@
package parser
import (
"io/fs"
"asciigoat.org/core/lexer"
)
// NewError creates a lexer.Error using a lexer.Position
func NewError(pos lexer.Position, content, hint string, err error) *lexer.Error {
return &lexer.Error{
Line: pos.Line,
Column: pos.Column,
Content: content,
Hint: hint,
Err: err,
}
}
// ErrPlusPosition returns a copy of the given [lexer.Error]
// offsetting the Line/Column information.
func ErrPlusPosition(pos lexer.Position, e *lexer.Error) *lexer.Error {
pos.Add(lexer.Position{
Line: e.Line,
Column: e.Column,
})
return NewError(pos, e.Content, e.Hint, e.Err)
}
// NewErrIncompleteQuotedString returns a [lexer.Error]
// indicating the quoted string being parsed wasn't correctly
// terminated
func NewErrIncompleteQuotedString(p *TextParser) *lexer.Error {
return newErrIncomplete(p, "incomplete quoted string")
}
func newErrIncomplete(p *TextParser, hint string) *lexer.Error {
pos, s := p.Emit()
pos.Add(GetPositionalLength(s))
return NewError(pos, s, hint, fs.ErrInvalid)
}

13
parser/lexer_runes.go

@ -7,13 +7,11 @@ import (
) )
const ( const (
RuneComment = ';' // RuneComment is the standard INI comment character RuneComment = ';' // RuneComment is the standard dosini comment character
RuneCommentExtra = '#' // RuneCommentExtra is UNIX shell's comment character RuneCommentExtra = '#' // RuneCommentExtra is UNIX shell's comment character
RuneSectionStart = '[' // RuneSectionStart indicates the start of a section declaration RuneSectionStart = '[' // RuneSectionStart indicates the start of a section declaration
RuneSectionEnd = ']' // RuneSectionEnd indicates the end of a section declaration RuneSectionEnd = ']' // RuneSectionEnd indiciates the end of a section declaration
RuneFieldEqual = '=' // RuneFieldEqual separates field keys from their values RuneFieldEqual = '=' // RuneFieldEqual separates field keys from their values
RuneQuotes = '"' // RuneQuotes indicates the start and end of a quoted value
RuneEscape = '\\' // RuneEscape indicates the next rune is escaped
) )
var ( var (
@ -29,8 +27,6 @@ var (
RuneSectionStart, RuneSectionStart,
RuneSectionEnd, RuneSectionEnd,
RuneFieldEqual, RuneFieldEqual,
RuneQuotes,
RuneEscape,
}) })
) )
@ -47,11 +43,6 @@ var (
IsCommentStart = lexer.NewIsIn(RunesComment) IsCommentStart = lexer.NewIsIn(RunesComment)
) )
// IsAny accepts any rune
func IsAny(_ rune) bool {
return true
}
// IsSpaceNotNewLine indicates a rune is whitespace but not a new line // IsSpaceNotNewLine indicates a rune is whitespace but not a new line
func IsSpaceNotNewLine(r rune) bool { func IsSpaceNotNewLine(r rune) bool {
return IsSpace(r) && !IsNewLine(r) return IsSpace(r) && !IsNewLine(r)

8
parser/parser.go

@ -31,7 +31,13 @@ func defaultOnToken(pos lexer.Position, typ TokenType, value string) error {
func defaultOnError(pos lexer.Position, content string, err error) error { func defaultOnError(pos lexer.Position, content string, err error) error {
log.Printf("%s:%v:%v: %q: %s", "error", pos.Line, pos.Column, content, err) log.Printf("%s:%v:%v: %q: %s", "error", pos.Line, pos.Column, content, err)
return NewError(pos, content, "", err) return lexer.Error{
Line: pos.Line,
Column: pos.Column,
Content: content,
Err: err,
}
} }
func (p *Parser) setDefaults() { func (p *Parser) setDefaults() {

11
parser/text.go

@ -39,10 +39,9 @@ func (p *TextParser) InitString(s string) {
// Discard shadows [lexer.Reader]'s, and takes in consideration // Discard shadows [lexer.Reader]'s, and takes in consideration
// new lines on the discarded data when moving the position // new lines on the discarded data when moving the position
func (p *TextParser) Discard() { func (*TextParser) Discard() {
s := p.Reader.Emit() // TODO: consider new lines
l := GetPositionalLength(s) panic("not implemented")
p.pos.Add(l)
} }
// Emit returns the accepted text, its position, and // Emit returns the accepted text, its position, and
@ -50,8 +49,8 @@ func (p *TextParser) Discard() {
func (p *TextParser) Emit() (lexer.Position, string) { func (p *TextParser) Emit() (lexer.Position, string) {
pos := p.pos pos := p.pos
s := p.Reader.Emit() s := p.Reader.Emit()
l := GetPositionalLength(s) // TODO: consider new lines
p.pos.Add(l) p.pos.StepN(len(s))
return pos, s return pos, s
} }

38
parser/text_position.go

@ -1,38 +0,0 @@
package parser
import (
"io"
"asciigoat.org/core/lexer"
)
type positionLengthParser struct {
TextParser
lexer.Position
}
func (p *positionLengthParser) lexStart() (lexer.StateFn, error) {
for {
switch {
case p.AcceptNewLine():
p.Position.StepLine()
case p.Accept(IsAny):
p.Position.StepN(1)
default:
return nil, io.EOF
}
}
}
// GetPositionalLength calculates the [lexer.Position] at
// the end of a text.
func GetPositionalLength(s string) lexer.Position {
var p positionLengthParser
if s == "" {
p.InitString(s)
_ = lexer.Run(p.lexStart)
}
return p.Position
}

97
parser/text_quoted.go

@ -1,97 +0,0 @@
package parser
import (
"strings"
"asciigoat.org/core/lexer"
)
// AcceptQuotedString consumes a quoted string from the source
// and returns it unquoted and unescaped
func (p *TextParser) AcceptQuotedString() (string, bool, error) {
r, _, err := p.ReadRune()
switch {
case err != nil:
// nothing here
return "", false, err
case r != RuneQuotes:
// not for us
p.UnreadRune()
return "", false, nil
default:
// let's roll
s, err := lexQuotedString(p)
switch {
case err != nil:
// bad quoted string
return "", false, err
default:
// success
return s, true, nil
}
}
}
func lexQuotedString(p *TextParser) (string, *lexer.Error) {
for {
r, _, err := p.ReadRune()
switch {
case err != nil:
// incomplete
return "", NewErrIncompleteQuotedString(p)
case r == RuneQuotes:
// end, remove quotes and process escaped characters
return lexReturnUnescapedQuotedString(p)
case r == RuneEscape:
// escaped, take another
_, _, err := p.ReadRune()
if err != nil {
// incomplete
return "", NewErrIncompleteQuotedString(p)
}
case IsNewLine(r):
// new lines within quoted values are acceptable
p.UnreadRune()
p.AcceptNewLine()
default:
// continue
}
}
}
func lexReturnUnescapedQuotedString(p *TextParser) (string, *lexer.Error) {
// remove quotes
s := p.String()
l := len(s)
s = s[1 : l-1]
if strings.ContainsRune(s, RuneEscape) {
// TODO: implement unescaping
err := NewError(p.Position(), s, "escaped characters", lexer.ErrNotImplemented)
return "", err
}
return s, nil
}
// Unquoted removes quotes the content and unescapes the content
func Unquoted(s string) (string, error) {
var p TextParser
if s == "" {
return "", nil
}
p.InitString(s)
unquoted, ok, err := p.AcceptQuotedString()
switch {
case err != nil:
// bad string
return "", err
case ok:
// success
return unquoted, nil
default:
// not quoted
return s, nil
}
}
Loading…
Cancel
Save