From a15deb7e42265a57275b03a49ec6bab7b0165148 Mon Sep 17 00:00:00 2001 From: Alejandro Mery Date: Wed, 30 Aug 2023 23:00:16 +0000 Subject: [PATCH 1/3] tools: add stringer support Signed-off-by: Alejandro Mery --- go.mod | 7 +++++-- go.sum | 2 ++ tools/tools.go | 1 + 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/go.mod b/go.mod index 534d3cd..efb45ff 100644 --- a/go.mod +++ b/go.mod @@ -2,7 +2,10 @@ module asciigoat.org/ini go 1.19 -require github.com/mgechev/revive v1.3.3 +require ( + github.com/mgechev/revive v1.3.3 + golang.org/x/tools v0.12.0 +) require ( github.com/BurntSushi/toml v1.3.2 // indirect @@ -16,6 +19,6 @@ require ( github.com/mitchellh/go-homedir v1.1.0 // indirect github.com/olekukonko/tablewriter v0.0.5 // indirect github.com/pkg/errors v0.9.1 // indirect + golang.org/x/mod v0.12.0 // indirect golang.org/x/sys v0.11.0 // indirect - golang.org/x/tools v0.12.0 // indirect ) diff --git a/go.sum b/go.sum index 495d0a7..8ebe90d 100644 --- a/go.sum +++ b/go.sum @@ -36,6 +36,8 @@ github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= golang.org/x/mod v0.12.0 h1:rmsUpXtvNzj340zd98LZ4KntptpfRHwpFOHG188oHXc= +golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= +golang.org/x/sync v0.3.0 h1:ftCYgMx6zT/asHUrPw8BLLscYtGznsLAnjq5RH9P66E= golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.11.0 h1:eG7RXZHdqOJ1i+0lgLgCpSXAp6M3LYlAo6osgSi0xOM= diff --git a/tools/tools.go b/tools/tools.go index a3379f0..a4d1df9 100644 --- a/tools/tools.go +++ b/tools/tools.go @@ -4,4 +4,5 @@ package tools import ( _ "github.com/mgechev/revive" + _ "golang.org/x/tools/cmd/stringer" ) From 1090a374f068b5936948e729713c8cc929d9bb58 Mon Sep 17 00:00:00 2001 From: Alejandro Mery Date: Thu, 31 Aug 2023 00:11:33 +0000 Subject: [PATCH 2/3] parser: add initial Parser emitting non-whitespace tokens Signed-off-by: Alejandro Mery --- go.mod | 1 + go.sum | 2 + parser/lexer.go | 67 +++++++++++++++++++++++ parser/lexer_runes.go | 16 ++++++ parser/parser.go | 105 +++++++++++++++++++++++++++++++++++++ parser/token.go | 31 +++++++++++ parser/tokentype_string.go | 30 +++++++++++ 7 files changed, 252 insertions(+) create mode 100644 parser/lexer.go create mode 100644 parser/lexer_runes.go create mode 100644 parser/parser.go create mode 100644 parser/token.go create mode 100644 parser/tokentype_string.go diff --git a/go.mod b/go.mod index efb45ff..ce1e7e0 100644 --- a/go.mod +++ b/go.mod @@ -3,6 +3,7 @@ module asciigoat.org/ini go 1.19 require ( + asciigoat.org/core v0.3.6 github.com/mgechev/revive v1.3.3 golang.org/x/tools v0.12.0 ) diff --git a/go.sum b/go.sum index 8ebe90d..b76ff81 100644 --- a/go.sum +++ b/go.sum @@ -1,3 +1,5 @@ +asciigoat.org/core v0.3.6 h1:b1vL090OxylmSOwLQryjrmC8FhhCtktMyeJSy1e6LwI= +asciigoat.org/core v0.3.6/go.mod h1:tXj+JUutxRbcO40ZQRuUVaZ4rnYz1kAZ0nblisV8u74= github.com/BurntSushi/toml v1.3.2 h1:o7IhLm0Msx3BaB+n3Ag7L8EVlByGnpq14C4YWiu/gL8= github.com/BurntSushi/toml v1.3.2/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ= github.com/chavacava/garif v0.0.0-20230608123814-4bd63c2919ab h1:5JxePczlyGAtj6R1MUEFZ/UFud6FfsOejq7xLC2ZIb0= diff --git a/parser/lexer.go b/parser/lexer.go new file mode 100644 index 0000000..c735baa --- /dev/null +++ b/parser/lexer.go @@ -0,0 +1,67 @@ +package parser + +import "asciigoat.org/core/lexer" + +// Run parses the source +func (p *Parser) Run() error { + p.setDefaults() + p.pos.Reset() + + return lexer.Run(p.lexStart) +} + +func (p *Parser) lexStart() (lexer.StateFn, error) { + for { + r, _, err := p.src.ReadRune() + switch { + case err != nil: + return p.emitError("", err) + case IsNewLine(r): + // new line + p.lexMoreNewLine(r) + p.stepLine() + case IsSpace(r): + // whitespace + p.stepRune() + default: + p.src.UnreadRune() + return p.lexToken, nil + } + } +} + +func (p *Parser) lexMoreNewLine(r1 rune) { + // r1 is warrantied to be either '\r' or '\n' + r2, _, err := p.src.ReadRune() + switch r1 { + case '\n': + switch { + case r2 == '\r': + // LN CR + case err == nil: + // LN + p.src.UnreadRune() + default: + // LN EOF + } + case '\r': + switch { + case r2 == '\n': + // CR LN + case err == nil: + // CR + p.src.UnreadRune() + default: + // CR EOF + } + default: + panic("unreachable") + } +} + +func (p *Parser) lexToken() (lexer.StateFn, error) { + p.src.AcceptAll(IsNotSpace) + + err := p.emitString(TokenUnknown) + return p.lexStart, err +} diff --git a/parser/lexer_runes.go b/parser/lexer_runes.go new file mode 100644 index 0000000..872d2be --- /dev/null +++ b/parser/lexer_runes.go @@ -0,0 +1,16 @@ +package parser + +import ( + "asciigoat.org/core/lexer" +) + +var ( + // IsNewLine tells if the rune indicates a line break or the start of one + IsNewLine = lexer.NewIsIn("\r\n") + // IsNotNewLine tells if the rune is not a line break nor the start of one + IsNotNewLine = lexer.NewIsNot(IsNewLine) + // IsSpace tells if the rune is considered whitespace by Unicode + IsSpace = lexer.IsSpace + // IsNotSpace tells if the rune is not considered whitespace by Unicode + IsNotSpace = lexer.NewIsNot(IsSpace) +) diff --git a/parser/parser.go b/parser/parser.go new file mode 100644 index 0000000..04d9b06 --- /dev/null +++ b/parser/parser.go @@ -0,0 +1,105 @@ +// Package parser parses dosini-style files +package parser + +import ( + "io" + "log" + + "asciigoat.org/core/lexer" +) + +// Parser parses a dosini-style document +type Parser struct { + src *lexer.Reader + pos lexer.Position + + // OnToken is called for each identified token. if it returns an error + // parsing is interrupted. + OnToken func(pos lexer.Position, typ TokenType, value string) error + + // OnError is called in case of a parsing error, and it's allowed + // to replace the error returned by [Parser.Run]. + // OnError is called for io.EOF, but [Parser.Run] will consider it + // normal termination. + OnError func(pos lexer.Position, content string, err error) error +} + +func defaultOnToken(pos lexer.Position, typ TokenType, value string) error { + log.Printf("%s:%v:%v: %q", typ, pos.Line, pos.Column, value) + return nil +} + +func defaultOnError(pos lexer.Position, content string, err error) error { + log.Printf("%s:%v:%v: %q: %s", "error", pos.Line, pos.Column, content, err) + + return lexer.Error{ + Line: pos.Line, + Column: pos.Column, + + Content: content, + Err: err, + } +} + +func (p *Parser) setDefaults() { + if p.OnToken == nil { + p.OnToken = defaultOnToken + } + if p.OnError == nil { + p.OnError = defaultOnError + } +} + +func (p *Parser) emitString(typ TokenType) error { + s := p.src.Emit() + err := p.OnToken(p.pos, typ, s) + p.pos.StepN(len(s)) + + return err +} + +func (p *Parser) emitError(content string, err error) (lexer.StateFn, error) { + err2 := p.OnError(p.pos, content, err) + switch { + case err2 != nil: + // return wrapped error + return nil, err2 + default: + // return original error + return nil, err + } +} + +// stepLine discards the data and moves the position +// to the next line. +func (p *Parser) stepLine() { + p.src.Discard() + p.pos.StepLine() +} + +// stepRune discards the data and moves the position +// one rune forward on the same line. +func (p *Parser) stepRune() { + p.src.Discard() + p.pos.Step() +} + +// stepString discards the data and moves the position +// forward on the same line the length of the discarded +// content. +func (p *Parser) stepString() { + s := p.src.Emit() + p.pos.StepN(len(s)) +} + +// NewParser creates a dosini-style parser using +// an [io.Reader] as source +func NewParser(r io.Reader) *Parser { + if r == nil { + return nil + } + + return &Parser{ + src: lexer.NewReader(r), + } +} diff --git a/parser/token.go b/parser/token.go new file mode 100644 index 0000000..c1ddc7e --- /dev/null +++ b/parser/token.go @@ -0,0 +1,31 @@ +package parser + +//go:generate go run golang.org/x/tools/cmd/stringer -type=TokenType + +// A TokenType is a type of Token +type TokenType uint + +const ( + // TokenUnknown represents a Token that hasn't been identified + TokenUnknown TokenType = iota + // TokenSectionStart indicates the opening marker of a section declaration. + // The left squared bracket. + TokenSectionStart + // TokenSectionEnd indicates the closing marker of a section declaration. + // The right squared bracket. + TokenSectionEnd + // TokenSectionName represents the section name between the squared brackets + TokenSectionName + // TokenSectionSubname represents a secondary name in the section represented + // between quotes after the section name. + // e.g. + // [section_name "section_subname"] + TokenSectionSubname + // TokenComment represents a comment, including the initial ';' or '#' until + // the end of the line. + TokenComment + // TokenFieldKey represents a field name in a `key = value` entry + TokenFieldKey + // TokenFieldValue represents a field value in a `key = value` entry + TokenFieldValue +) diff --git a/parser/tokentype_string.go b/parser/tokentype_string.go new file mode 100644 index 0000000..455f555 --- /dev/null +++ b/parser/tokentype_string.go @@ -0,0 +1,30 @@ +// Code generated by "stringer -type=TokenType"; DO NOT EDIT. + +package parser + +import "strconv" + +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[TokenUnknown-0] + _ = x[TokenSectionStart-1] + _ = x[TokenSectionEnd-2] + _ = x[TokenSectionName-3] + _ = x[TokenSectionSubname-4] + _ = x[TokenComment-5] + _ = x[TokenFieldKey-6] + _ = x[TokenFieldValue-7] +} + +const _TokenType_name = "TokenUnknownTokenSectionStartTokenSectionEndTokenSectionNameTokenSectionSubnameTokenCommentTokenFieldKeyTokenFieldValue" + +var _TokenType_index = [...]uint8{0, 12, 29, 44, 60, 79, 91, 104, 119} + +func (i TokenType) String() string { + if i >= TokenType(len(_TokenType_index)-1) { + return "TokenType(" + strconv.FormatInt(int64(i), 10) + ")" + } + return _TokenType_name[_TokenType_index[i]:_TokenType_index[i+1]] +} From eb36c195c0cc25f5883b3c8a909e46f390e7a528 Mon Sep 17 00:00:00 2001 From: Alejandro Mery Date: Thu, 31 Aug 2023 01:50:05 +0000 Subject: [PATCH 3/3] parser: implement basic dosini parsing Signed-off-by: Alejandro Mery --- parser/lexer.go | 85 +++++++++++++++++++++++++++++++++++++++++-- parser/lexer_runes.go | 51 ++++++++++++++++++++++++++ parser/parser.go | 4 ++ 3 files changed, 136 insertions(+), 4 deletions(-) diff --git a/parser/lexer.go b/parser/lexer.go index c735baa..5ed5102 100644 --- a/parser/lexer.go +++ b/parser/lexer.go @@ -23,9 +23,17 @@ func (p *Parser) lexStart() (lexer.StateFn, error) { case IsSpace(r): // whitespace p.stepRune() + case IsCommentStart(r): + // switch to comment lexer + p.src.UnreadRune() + return p.lexComment, nil + case IsSectionStart(r): + // section + return p.lexSectionStart, nil default: + // entry p.src.UnreadRune() - return p.lexToken, nil + return p.lexEntryStart, nil } } } @@ -59,9 +67,78 @@ func (p *Parser) lexMoreNewLine(r1 rune) { } } -func (p *Parser) lexToken() (lexer.StateFn, error) { - p.src.AcceptAll(IsNotSpace) +func (p *Parser) lexComment() (lexer.StateFn, error) { + // until the end of the line + p.src.AcceptAll(IsNotNewLine) + + err := p.emitString(TokenComment) + return p.lexStart, err +} + +func (p *Parser) lexSectionStart() (lexer.StateFn, error) { + if err := p.emitString(TokenSectionStart); err != nil { + return nil, err + } + + // remove whitespace between `[` and the name + if p.src.AcceptAll(IsSpaceNotNewLine) { + p.stepString() + } + + if !p.src.AcceptAll(IsName) { + // no name + return p.emitError("section name missing", lexer.ErrUnacceptableRune) + } + + if err := p.emitString(TokenSectionName); err != nil { + return nil, err + } + + // remove whitespace between the name andthe closing `]` + if p.src.AcceptAll(IsSpaceNotNewLine) { + p.stepString() + } + + r, _, err := p.src.ReadRune() + switch { + case err != nil: + return p.emitError("", err) + case IsSectionEnd(r): + err := p.emitString(TokenSectionEnd) + return p.lexStart, err + default: + return p.emitInvalidRune(r) + } +} + +func (p *Parser) lexEntryStart() (lexer.StateFn, error) { + p.src.AcceptAll(IsName) + if err := p.emitString(TokenFieldKey); err != nil { + return nil, err + } + + // ignore whitespace between key and the '=' sign + if p.src.AcceptAll(IsSpaceNotNewLine) { + p.stepString() + } + + r, _, err := p.src.ReadRune() + switch { + case err != nil: + return p.emitError("", err) + case r != RuneFieldEqual: + return p.emitInvalidRune(r) + } + + // ignore whitespace between the '=' and the value + if p.src.AcceptAll(IsSpaceNotNewLine) { + p.stepString() + } + + p.src.AcceptAll(IsNotNewLine) + if err := p.emitString(TokenFieldValue); err != nil { + return nil, err + } - err := p.emitString(TokenUnknown) return p.lexStart, err } diff --git a/parser/lexer_runes.go b/parser/lexer_runes.go index 872d2be..950f84c 100644 --- a/parser/lexer_runes.go +++ b/parser/lexer_runes.go @@ -1,9 +1,35 @@ package parser import ( + "strings" + "asciigoat.org/core/lexer" ) +const ( + RuneComment = ';' // RuneComment is the standard dosini comment character + RuneCommentExtra = '#' // RuneCommentExtra is UNIX shell's comment character + RuneSectionStart = '[' // RuneSectionStart indicates the start of a section declaration + RuneSectionEnd = ']' // RuneSectionEnd indiciates the end of a section declaration + RuneFieldEqual = '=' // RuneFieldEqual separates field keys from their values +) + +var ( + // RunesComment is a string containing all runes acceptable to start comments + RunesComment = string([]rune{ + RuneComment, + RuneCommentExtra, + }) + // RunesSpecial is a string containing all the runes with special meaning + RunesSpecial = string([]rune{ + RuneComment, + RuneCommentExtra, + RuneSectionStart, + RuneSectionEnd, + RuneFieldEqual, + }) +) + var ( // IsNewLine tells if the rune indicates a line break or the start of one IsNewLine = lexer.NewIsIn("\r\n") @@ -13,4 +39,29 @@ var ( IsSpace = lexer.IsSpace // IsNotSpace tells if the rune is not considered whitespace by Unicode IsNotSpace = lexer.NewIsNot(IsSpace) + // IsCommentStart ... + IsCommentStart = lexer.NewIsIn(RunesComment) ) + +// IsSpaceNotNewLine indicates a rune is whitespace but not a new line +func IsSpaceNotNewLine(r rune) bool { + return IsSpace(r) && !IsNewLine(r) +} + +// IsSectionStart indicates the rune starts the section declaration +func IsSectionStart(r rune) bool { return r == RuneSectionStart } + +// IsSectionEnd indicates the rune ends the section declaration +func IsSectionEnd(r rune) bool { return r == RuneSectionEnd } + +// IsName indicates a rune is acceptable for section or field names +func IsName(r rune) bool { + switch { + case IsSpace(r): + return false + case strings.ContainsRune(RunesSpecial, r): + return false + default: + return true + } +} diff --git a/parser/parser.go b/parser/parser.go index 04d9b06..b526b0c 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -70,6 +70,10 @@ func (p *Parser) emitError(content string, err error) (lexer.StateFn, error) { } } +func (p *Parser) emitInvalidRune(r rune) (lexer.StateFn, error) { + return p.emitError(string([]rune{r}), lexer.ErrUnacceptableRune) +} + // stepLine discards the data and moves the position // to the next line. func (p *Parser) stepLine() {