5 Commits

Author SHA1 Message Date
amery 62328d9e43 build-sys: use local asciigoat.org/core [DO-NOT-MERGE]
Signed-off-by: Alejandro Mery <amery@jpi.io>
2023-08-30 20:54:44 +00:00
amery 5be0785a55 parser: implement initial tokeniser
only logging position, errors and non-whitespace elements

Signed-off-by: Alejandro Mery <amery@jpi.io>
2023-08-30 20:54:21 +00:00
amery 35b9d56b3d parser: add internal []Token queue to the Parser
Signed-off-by: Alejandro Mery <amery@jpi.io>
2023-08-30 20:54:21 +00:00
amery 604ecfaed2 parser: introduce Token and TokenType enum
Signed-off-by: Alejandro Mery <amery@jpi.io>
2023-08-30 20:54:21 +00:00
amery 5288cd4537 parser: add placeholder for ini Parser
Signed-off-by: Alejandro Mery <amery@jpi.io>
2023-08-30 20:47:12 +00:00
10 changed files with 309 additions and 2 deletions
+8 -2
View File
@@ -2,7 +2,13 @@ module asciigoat.org/ini
go 1.19 go 1.19
require github.com/mgechev/revive v1.3.3 replace asciigoat.org/core => ../core
require (
asciigoat.org/core v0.3.6
github.com/mgechev/revive v1.3.3
golang.org/x/tools v0.12.0
)
require ( require (
github.com/BurntSushi/toml v1.3.2 // indirect github.com/BurntSushi/toml v1.3.2 // indirect
@@ -16,6 +22,6 @@ require (
github.com/mitchellh/go-homedir v1.1.0 // indirect github.com/mitchellh/go-homedir v1.1.0 // indirect
github.com/olekukonko/tablewriter v0.0.5 // indirect github.com/olekukonko/tablewriter v0.0.5 // indirect
github.com/pkg/errors v0.9.1 // indirect github.com/pkg/errors v0.9.1 // indirect
golang.org/x/mod v0.12.0 // indirect
golang.org/x/sys v0.11.0 // indirect golang.org/x/sys v0.11.0 // indirect
golang.org/x/tools v0.12.0 // indirect
) )
+2
View File
@@ -36,6 +36,8 @@ github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO
github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
golang.org/x/mod v0.12.0 h1:rmsUpXtvNzj340zd98LZ4KntptpfRHwpFOHG188oHXc= golang.org/x/mod v0.12.0 h1:rmsUpXtvNzj340zd98LZ4KntptpfRHwpFOHG188oHXc=
golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
golang.org/x/sync v0.3.0 h1:ftCYgMx6zT/asHUrPw8BLLscYtGznsLAnjq5RH9P66E=
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.11.0 h1:eG7RXZHdqOJ1i+0lgLgCpSXAp6M3LYlAo6osgSi0xOM= golang.org/x/sys v0.11.0 h1:eG7RXZHdqOJ1i+0lgLgCpSXAp6M3LYlAo6osgSi0xOM=
+73
View File
@@ -0,0 +1,73 @@
package parser
import (
"asciigoat.org/core/lexer"
)
// Run parses the source
func (p *Parser) Run() error {
p.setDefaults()
p.pos.Reset()
return lexer.Run(p.lexStart)
}
func (p *Parser) lexStart() (lexer.StateFn, error) {
for {
r, _, err := p.src.ReadRune()
switch {
case err != nil:
// read error
return p.emitError("", err)
case IsNewLine(r):
// new line
p.lexNewLine(r)
p.stepLine()
case IsSpace(r):
// whitespace
p.stepRune()
default:
// token
p.src.UnreadRune()
return p.lexToken, nil
}
}
}
func (p *Parser) lexToken() (lexer.StateFn, error) {
p.src.AcceptAll(IsNotSpace)
p.pushString(TokenUnknown)
return p.lexStart, nil
}
func (p *Parser) lexNewLine(r1 rune) {
// r1 is warrantied to be either \n or \r
r2, _, err := p.src.ReadRune()
switch r1 {
case '\r':
switch {
case r2 == '\n':
// CR LN
case err == nil:
// CR
p.src.UnreadRune()
default:
// CR EOF
}
case '\n':
switch {
case r2 == '\r':
// LN CR
case err == nil:
// LN
p.src.UnreadRune()
default:
// LN EOF
}
default:
panic("unreachable")
}
}
+15
View File
@@ -0,0 +1,15 @@
package parser
import "asciigoat.org/core/lexer"
func (p *Parser) emitError(content string, err error) (lexer.StateFn, error) {
err2 := p.OnError(p.pos, content, err)
switch {
case err2 != nil:
// return wrapped error
return nil, err2
default:
// return original error
return nil, err
}
}
+44
View File
@@ -0,0 +1,44 @@
package parser
import "log"
func (p *Parser) push(tok Token) {
n := len(p.queue)
p.queue = append(p.queue, tok)
log.Printf("queue[%v]: %s", n, tok)
}
func (p *Parser) pushString(typ TokenType) {
s := p.src.Emit()
el := Token{
Type: typ,
Value: s,
Position: p.pos,
}
p.pos.StepN(len(s))
p.push(el)
}
// stepLine discards the data and moves the position
// to the next line
func (p *Parser) stepLine() {
p.src.Discard()
p.pos.StepLine()
}
// stepRune discards the data and moves the position
// on rune forward on the same line
func (p *Parser) stepRune() {
p.src.Discard()
p.pos.Step()
}
func (p *Parser) stepString() string {
s := p.src.Emit()
p.pos.StepN(len(s))
return s
}
+19
View File
@@ -0,0 +1,19 @@
package parser
import "asciigoat.org/core/lexer"
var (
// IsNewLine tells if a rune represents a line break or the start of one
IsNewLine = lexer.NewIsIn("\n\r")
// IsSpace tells if a rune is considered whitespace by unicode
IsSpace = lexer.IsSpace
// IsNotNewLine tells if a rune is anything other than line breaks
IsNotNewLine = lexer.NewIsNot(IsNewLine)
// IsNotSpace tells if a rune is anything other than whitespace
IsNotSpace = lexer.NewIsNot(IsSpace)
)
// IsSpaceNotNewLine indicates a rune is whitespace but not a new line
func IsSpaceNotNewLine(r rune) bool {
return IsSpace(r) && !IsNewLine(r)
}
+78
View File
@@ -0,0 +1,78 @@
// Package parser parses dosini-style files
package parser
import (
"io"
"asciigoat.org/core/lexer"
)
// Parser parses a dosini-style document
type Parser struct {
src *lexer.Reader
pos lexer.Position
queue []Token
// OnSection is called after a [section] is parsed.
// Returning an error will abort the process.
OnSection func(pos lexer.Position, name, subname string, hasSubname bool) error
// OnField is called after a `key = value` entry is parsed
// Returning an error will abort the process.
OnField func(pos lexer.Position, key, value string) error
// OnComment is called after a comment is parsed
// Returning an error will abort the process.
OnComment func(pos lexer.Position, comment string) error
// OnError is called after each parsing error, which you are allowed to
// override.
// OnError is called for EOF as well, but this error isn't returned as such by
// Parser.Run(). The caller will receive (nil, nil) instead indicating the
// processes terminated correctly.
OnError func(pos lexer.Position, content string, err error) error
}
func defaultOnSection(_ lexer.Position, _, _ string, _ bool) error { return nil }
func defaultOnField(_ lexer.Position, _, _ string) error { return nil }
func defaultOnComment(_ lexer.Position, _ string) error { return nil }
func defaultOnError(pos lexer.Position, content string, err error) error {
return &lexer.Error{
Line: pos.Line,
Column: pos.Column,
Content: content,
Err: err,
}
}
func (p *Parser) setDefaults() {
if p.OnSection == nil {
p.OnSection = defaultOnSection
}
if p.OnField == nil {
p.OnField = defaultOnField
}
if p.OnComment == nil {
p.OnComment = defaultOnComment
}
if p.OnError == nil {
p.OnError = defaultOnError
}
}
// NewParser creates a dosini-style parser using
// an [io.Reader] as source
func NewParser(r io.Reader) *Parser {
if r == nil {
return nil
}
return &Parser{
src: lexer.NewReader(r),
}
}
+41
View File
@@ -0,0 +1,41 @@
package parser
//go:generate go run golang.org/x/tools/cmd/stringer -type=TokenType
import (
"fmt"
"asciigoat.org/core/lexer"
)
// A TokenType is a type of Token
type TokenType uint
const (
// TokenUnknown represents a Token that hasn't been identified
TokenUnknown TokenType = iota
// TokenSectionName represents the section name between the squared brackets
TokenSectionName
// TokenSectionSubname represents a secondary name in the section represented
// between quotes after the section name.
// e.g.
// [section_name "section_subname"]
TokenSectionSubname
// TokenComment represents a comment, including the initial ';' or '#' until
// the end of the line
TokenComment
// TokenFieldKey represents a field name in a `key = value` entry
TokenFieldKey
// TokenFieldValue represents a field value in a `key = value` entry
TokenFieldValue
)
// A Token is an element from the document
type Token struct {
Type TokenType
Position lexer.Position
Value string
}
func (t Token) String() string {
return fmt.Sprintf("%s:%v:%v: %q", t.Type, t.Position.Line, t.Position.Column, t.Value)
}
+28
View File
@@ -0,0 +1,28 @@
// Code generated by "stringer -type=TokenType"; DO NOT EDIT.
package parser
import "strconv"
func _() {
// An "invalid array index" compiler error signifies that the constant values have changed.
// Re-run the stringer command to generate them again.
var x [1]struct{}
_ = x[TokenUnknown-0]
_ = x[TokenSectionName-1]
_ = x[TokenSectionSubname-2]
_ = x[TokenComment-3]
_ = x[TokenFieldKey-4]
_ = x[TokenFieldValue-5]
}
const _TokenType_name = "TokenUnknownTokenSectionNameTokenSectionSubnameTokenCommentTokenFieldKeyTokenFieldValue"
var _TokenType_index = [...]uint8{0, 12, 28, 47, 59, 72, 87}
func (i TokenType) String() string {
if i >= TokenType(len(_TokenType_index)-1) {
return "TokenType(" + strconv.FormatInt(int64(i), 10) + ")"
}
return _TokenType_name[_TokenType_index[i]:_TokenType_index[i+1]]
}
+1
View File
@@ -4,4 +4,5 @@ package tools
import ( import (
_ "github.com/mgechev/revive" _ "github.com/mgechev/revive"
_ "golang.org/x/tools/cmd/stringer"
) )