3 Commits

Author SHA1 Message Date
amery 314c004efd Merge pull request 'parser: introduce TextParser and refactor Parser' (#7)
Reviewed-on: #7
2023-09-04 16:17:04 +02:00
amery 30a86e170b parser: use GetPositionalLength() on TextParser.Discard() and TextParser.Emit()
Signed-off-by: Alejandro Mery <amery@jpi.io>
2023-09-04 13:32:27 +00:00
amery 8cc75da138 parser: introduce GetPositionalLength()
Signed-off-by: Alejandro Mery <amery@jpi.io>
2023-09-03 17:43:46 +00:00
10 changed files with 86 additions and 313 deletions
+14 -12
View File
@@ -10,22 +10,24 @@ var (
errInvalidToken = errors.New("invalid token") errInvalidToken = errors.New("invalid token")
) )
func newError(pos lexer.Position, content, hint string, err error) *lexer.Error {
return &lexer.Error{
Line: pos.Line,
Column: pos.Column,
Content: content,
Hint: hint,
Err: err,
}
}
func newErrInvalidToken(t *token) *lexer.Error { func newErrInvalidToken(t *token) *lexer.Error {
return newError(t.pos, t.value, "", errInvalidToken) err := &lexer.Error{
Line: t.pos.Line,
Column: t.pos.Column,
Content: t.value,
Err: errInvalidToken,
}
return err
} }
func (dec *decoder) OnError(pos lexer.Position, content string, err error) error { func (dec *decoder) OnError(pos lexer.Position, content string, err error) error {
err = newError(pos, content, "", err) err = &lexer.Error{
Line: pos.Line,
Column: pos.Column,
Content: content,
Err: err,
}
dec.executeFinal() dec.executeFinal()
return err return err
} }
+20 -31
View File
@@ -32,21 +32,21 @@ func (dec *decoder) executeFinal() {
func (dec *decoder) execute(typ parser.TokenType) { func (dec *decoder) execute(typ parser.TokenType) {
switch typ { switch typ {
case parser.TokenSectionEnd: case parser.TokenSectionEnd:
name1, ok1 := dec.queueValue(1, parser.TokenSectionName) name1, ok1 := dec.getValue(1, parser.TokenSectionName)
if ok1 { if ok1 {
name2, ok2 := dec.queueValue(2, parser.TokenSectionSubname) name2, ok2 := dec.getValue(2, parser.TokenSectionSubname)
dec.addSection(name1, name2, ok2) dec.addSection(name1, name2, ok2)
} }
dec.queueReset() dec.reset()
case parser.TokenFieldValue: case parser.TokenFieldValue:
key, _ := dec.queueValue(0, parser.TokenFieldKey) key, _ := dec.getValue(0, parser.TokenFieldKey)
value, _ := dec.queueValue(1, parser.TokenFieldValue) value, _ := dec.getValue(1, parser.TokenFieldValue)
dec.addField(key, value) dec.addField(key, value)
dec.queueReset() dec.reset()
} }
} }
@@ -82,8 +82,7 @@ func (dec *decoder) addField(key, value string) {
} }
} }
// queueValue extracts the value of element on the queue if the type matches. func (dec *decoder) getValue(idx int, typ parser.TokenType) (string, bool) {
func (dec *decoder) queueValue(idx int, typ parser.TokenType) (string, bool) {
switch { switch {
case idx < 0 || idx >= len(dec.queue): case idx < 0 || idx >= len(dec.queue):
// out of range // out of range
@@ -96,48 +95,40 @@ func (dec *decoder) queueValue(idx int, typ parser.TokenType) (string, bool) {
} }
} }
// queueReset removes all tokens from the queue func (dec *decoder) reset() {
func (dec *decoder) queueReset() {
dec.queue = dec.queue[:0] dec.queue = dec.queue[:0]
} }
// queueDepth confirms the current depth of the queue func (dec *decoder) depth(depth int) bool {
func (dec *decoder) queueDepth(depth int) bool {
return len(dec.queue) == depth return len(dec.queue) == depth
} }
// queueDepthType confirms the current depth of the queue and the type of the last func (dec *decoder) depthAfter(depth int, typ parser.TokenType) bool {
// element. _, ok := dec.getValue(depth-1, typ)
func (dec *decoder) queueDepthType(depth int, typ parser.TokenType) bool { if ok {
if dec.queueDepth(depth) { return len(dec.queue) == depth
return dec.queueType(depth-1, typ)
} }
return false return false
} }
// queueType tells if the specified element on the queue is of the required type.
func (dec *decoder) queueType(idx int, typ parser.TokenType) bool {
_, ok := dec.queueValue(idx, typ)
return ok
}
func (dec *decoder) typeOK(typ parser.TokenType) bool { func (dec *decoder) typeOK(typ parser.TokenType) bool {
switch typ { switch typ {
case parser.TokenSectionStart, parser.TokenFieldKey: case parser.TokenSectionStart, parser.TokenFieldKey:
// first token only // first token only
return dec.queueDepth(0) return dec.depth(0)
case parser.TokenSectionName: case parser.TokenSectionName:
// right after TokenSectionStart // right after TokenSectionStart
return dec.queueDepthType(1, parser.TokenSectionStart) return dec.depthAfter(1, parser.TokenSectionStart)
case parser.TokenSectionSubname: case parser.TokenSectionSubname:
// right after TokenSectionName // right after TokenSectionName
return dec.queueDepthType(2, parser.TokenSectionName) return dec.depthAfter(2, parser.TokenSectionName)
case parser.TokenSectionEnd: case parser.TokenSectionEnd:
// only on a section with name // only on a section with name
return dec.queueType(1, parser.TokenSectionName) _, ok := dec.getValue(1, parser.TokenSectionName)
return ok
case parser.TokenFieldValue: case parser.TokenFieldValue:
// right after a TokenFieldKey // right after a TokenFieldKey
return dec.queueDepthType(1, parser.TokenFieldKey) return dec.depthAfter(1, parser.TokenFieldKey)
default: default:
// never // never
return false return false
@@ -158,8 +149,6 @@ func (dec *decoder) OnToken(pos lexer.Position, typ parser.TokenType, value stri
return nil return nil
default: default:
// unacceptable // unacceptable
err := newErrInvalidToken(t) return newErrInvalidToken(t)
dec.executeFinal()
return err
} }
} }
-83
View File
@@ -1,83 +0,0 @@
package ini
import (
"bytes"
"io"
"strings"
"asciigoat.org/core"
"asciigoat.org/core/reflective"
"asciigoat.org/ini/parser"
)
const (
decPrefixLiteral = "ini: Decode"
)
var (
decINIPrefix = decPrefixLiteral[0:3]
decDecPrefix = decPrefixLiteral[5:]
)
// Decoder ...
type Decoder struct {
io.Closer
out *reflective.Reflection
p *parser.Parser
queue []*token
}
// Decode ...
func (dec *Decoder) Decode(v any) error {
defer dec.Close()
r, err := reflective.New(v)
switch e := err.(type) {
case *reflective.InvalidUnmarshalError:
// customize error
e.Prefix = decINIPrefix
e.Method = decDecPrefix
case *reflective.UnmarshalTypeError:
// customize error
e.Prefix = decPrefixLiteral
case nil:
// good reflection. Go!
dec.out = r
err = dec.p.Run()
}
return err
}
// NewDecoder creates a Decoder using the provided [io.Reader]
// as source
func NewDecoder(r io.Reader) *Decoder {
rc := core.NewReadCloser(r)
switch {
case rc == nil:
return nil
default:
dec := &Decoder{
p: parser.NewParser(rc),
Closer: rc,
}
// callbacks
dec.p.OnToken = dec.parserOnToken
dec.p.OnError = dec.parserOnError
return dec
}
}
// NewDecoderBytes creates a Decoder using the provided bytes array
// as source
func NewDecoderBytes(b []byte) *Decoder {
return NewDecoder(bytes.NewReader(b))
}
// NewDecoderString creates a Decoder over a provided string of data
func NewDecoderString(s string) *Decoder {
return NewDecoder(strings.NewReader(s))
}
-32
View File
@@ -1,32 +0,0 @@
package ini
import (
"errors"
"log"
"asciigoat.org/core/lexer"
)
var (
errInvalidToken = errors.New("invalid token")
)
func newError(pos lexer.Position, content, hint string, err error) *lexer.Error {
return &lexer.Error{
Line: pos.Line,
Column: pos.Column,
Content: content,
Hint: hint,
Err: err,
}
}
func (*Decoder) newErrInvalidToken(t *token) *lexer.Error {
return newError(t.pos, t.value, "", errInvalidToken)
}
// parserOnError is the callback for lexer errors
func (*Decoder) parserOnError(pos lexer.Position, content string, err error) error {
log.Printf("%s: %s %s: %q: %v", "ini", pos, "error", content, err)
return newError(pos, content, "", err)
}
-144
View File
@@ -1,144 +0,0 @@
package ini
import (
"fmt"
"log"
"asciigoat.org/core/lexer"
"asciigoat.org/ini/parser"
)
type token struct {
pos lexer.Position
typ parser.TokenType
value string
}
func (t token) String() string {
return fmt.Sprintf("%s %s: %q", t.pos, t.typ, t.value)
}
// queueValue extracts the value of element on the queue if the type matches.
func (dec *Decoder) queueValue(idx int, typ parser.TokenType) (string, bool) {
switch {
case idx < 0 || idx >= len(dec.queue):
// out of range
return "", false
case dec.queue[idx].typ != typ:
// wrong type
return "", false
default:
// match
return dec.queue[idx].value, true
}
}
// queueReset removes all tokens from the queue
func (dec *Decoder) queueReset() {
dec.queue = dec.queue[:0]
}
// queueType tells if the specified element on the queue is of the required type.
func (dec *Decoder) queueType(idx int, typ parser.TokenType) bool {
_, ok := dec.queueValue(idx, typ)
return ok
}
// queueDepth confirms the current depth of the queue
func (dec *Decoder) queueDepth(depth int) bool {
return len(dec.queue) == depth
}
// queueDepthType confirms the current depth of the queue and the type of the last
// element.
func (dec *Decoder) queueDepthType(depth int, typ parser.TokenType) bool {
if dec.queueDepth(depth) {
return dec.queueType(depth-1, typ)
}
return false
}
// typeOK tells if a token of the specified type is acceptable
// at this time.
func (dec *Decoder) typeOK(typ parser.TokenType) bool {
switch typ {
case parser.TokenSectionStart:
return dec.queueDepth(0)
case parser.TokenSectionName:
return dec.queueDepthType(1, parser.TokenSectionStart)
case parser.TokenSectionSubname:
return dec.queueDepthType(2, parser.TokenSectionName)
case parser.TokenSectionEnd:
return dec.queueType(1, parser.TokenSectionName)
case parser.TokenFieldKey:
return dec.queueDepth(0)
case parser.TokenFieldValue:
return dec.queueDepthType(1, parser.TokenFieldKey)
case parser.TokenComment:
panic("unreachable")
default:
return false
}
}
// execute is called after each acceptable token is appended to the queue
func (dec *Decoder) execute() error {
if l := len(dec.queue); l > 0 {
// based on the type of the last element
switch dec.queue[l-1].typ {
case parser.TokenSectionEnd:
name1, _ := dec.queueValue(1, parser.TokenSectionName)
name2, ok2 := dec.queueValue(2, parser.TokenSectionSubname)
defer dec.queueReset()
return dec.executeSection(name1, name2, ok2)
case parser.TokenFieldValue:
key, _ := dec.queueValue(0, parser.TokenFieldKey)
value, _ := dec.queueValue(1, parser.TokenFieldValue)
defer dec.queueReset()
return dec.executeField(key, value)
}
}
return nil
}
// revive:disable:flag-parameter
func (*Decoder) executeSection(key, id string, hasID bool) error {
// revive:enable:flag-parameter
if hasID {
log.Printf("%s: %s%s[%q]: %q", "ini", "", "section", key, id)
} else {
log.Printf("%s: %s%s[%q]", "ini", "", "section", key)
}
return nil
}
func (*Decoder) executeField(key, value string) error {
log.Printf("%s: %s%s[%q]: %q", "ini", " ", "field", key, value)
return nil
}
// parserOnToken is the callback from the parser
func (dec *Decoder) parserOnToken(pos lexer.Position, typ parser.TokenType, value string) error {
var err error
t := &token{pos, typ, value}
switch {
case typ == parser.TokenComment:
// ignore comments
case dec.typeOK(typ):
// acceptable token
dec.queue = append(dec.queue, t)
err = dec.execute()
default:
// unacceptable
err = dec.newErrInvalidToken(t)
}
return err
}
+1 -6
View File
@@ -2,13 +2,8 @@ module asciigoat.org/ini
go 1.19 go 1.19
replace (
asciigoat.org/core => ../core
darvaza.org/core => ../../darvaza.org/core
)
require ( require (
asciigoat.org/core v0.3.7 asciigoat.org/core v0.3.9
github.com/mgechev/revive v1.3.3 github.com/mgechev/revive v1.3.3
golang.org/x/tools v0.12.0 golang.org/x/tools v0.12.0
) )
+2
View File
@@ -1,3 +1,5 @@
asciigoat.org/core v0.3.9 h1:hgDDz4ecm3ZvehX++m8A/IzAt+B5oDPiRtxatzfUHPQ=
asciigoat.org/core v0.3.9/go.mod h1:CAaHwyw8MpAq4a1MYtN2dxJrsK+hmIdW50OndaQZYPI=
github.com/BurntSushi/toml v1.3.2 h1:o7IhLm0Msx3BaB+n3Ag7L8EVlByGnpq14C4YWiu/gL8= github.com/BurntSushi/toml v1.3.2 h1:o7IhLm0Msx3BaB+n3Ag7L8EVlByGnpq14C4YWiu/gL8=
github.com/BurntSushi/toml v1.3.2/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ= github.com/BurntSushi/toml v1.3.2/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ=
github.com/chavacava/garif v0.1.0 h1:2JHa3hbYf5D9dsgseMKAmc/MZ109otzgNFk5s87H9Pc= github.com/chavacava/garif v0.1.0 h1:2JHa3hbYf5D9dsgseMKAmc/MZ109otzgNFk5s87H9Pc=
+5
View File
@@ -43,6 +43,11 @@ var (
IsCommentStart = lexer.NewIsIn(RunesComment) IsCommentStart = lexer.NewIsIn(RunesComment)
) )
// IsAny accepts any rune
func IsAny(_ rune) bool {
return true
}
// IsSpaceNotNewLine indicates a rune is whitespace but not a new line // IsSpaceNotNewLine indicates a rune is whitespace but not a new line
func IsSpaceNotNewLine(r rune) bool { func IsSpaceNotNewLine(r rune) bool {
return IsSpace(r) && !IsNewLine(r) return IsSpace(r) && !IsNewLine(r)
+6 -5
View File
@@ -39,9 +39,10 @@ func (p *TextParser) InitString(s string) {
// Discard shadows [lexer.Reader]'s, and takes in consideration // Discard shadows [lexer.Reader]'s, and takes in consideration
// new lines on the discarded data when moving the position // new lines on the discarded data when moving the position
func (*TextParser) Discard() { func (p *TextParser) Discard() {
// TODO: consider new lines s := p.Reader.Emit()
panic("not implemented") l := GetPositionalLength(s)
p.pos.Add(l)
} }
// Emit returns the accepted text, its position, and // Emit returns the accepted text, its position, and
@@ -49,8 +50,8 @@ func (*TextParser) Discard() {
func (p *TextParser) Emit() (lexer.Position, string) { func (p *TextParser) Emit() (lexer.Position, string) {
pos := p.pos pos := p.pos
s := p.Reader.Emit() s := p.Reader.Emit()
// TODO: consider new lines l := GetPositionalLength(s)
p.pos.StepN(len(s)) p.pos.Add(l)
return pos, s return pos, s
} }
+38
View File
@@ -0,0 +1,38 @@
package parser
import (
"io"
"asciigoat.org/core/lexer"
)
type positionLengthParser struct {
TextParser
lexer.Position
}
func (p *positionLengthParser) lexStart() (lexer.StateFn, error) {
for {
switch {
case p.AcceptNewLine():
p.Position.StepLine()
case p.Accept(IsAny):
p.Position.StepN(1)
default:
return nil, io.EOF
}
}
}
// GetPositionalLength calculates the [lexer.Position] at
// the end of a text.
func GetPositionalLength(s string) lexer.Position {
var p positionLengthParser
if s == "" {
p.InitString(s)
_ = lexer.Run(p.lexStart)
}
return p.Position
}