diff --git a/go.mod b/go.mod index efb45ff..ce1e7e0 100644 --- a/go.mod +++ b/go.mod @@ -3,6 +3,7 @@ module asciigoat.org/ini go 1.19 require ( + asciigoat.org/core v0.3.6 github.com/mgechev/revive v1.3.3 golang.org/x/tools v0.12.0 ) diff --git a/go.sum b/go.sum index 8ebe90d..b76ff81 100644 --- a/go.sum +++ b/go.sum @@ -1,3 +1,5 @@ +asciigoat.org/core v0.3.6 h1:b1vL090OxylmSOwLQryjrmC8FhhCtktMyeJSy1e6LwI= +asciigoat.org/core v0.3.6/go.mod h1:tXj+JUutxRbcO40ZQRuUVaZ4rnYz1kAZ0nblisV8u74= github.com/BurntSushi/toml v1.3.2 h1:o7IhLm0Msx3BaB+n3Ag7L8EVlByGnpq14C4YWiu/gL8= github.com/BurntSushi/toml v1.3.2/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ= github.com/chavacava/garif v0.0.0-20230608123814-4bd63c2919ab h1:5JxePczlyGAtj6R1MUEFZ/UFud6FfsOejq7xLC2ZIb0= diff --git a/parser/lexer.go b/parser/lexer.go new file mode 100644 index 0000000..c735baa --- /dev/null +++ b/parser/lexer.go @@ -0,0 +1,67 @@ +package parser + +import "asciigoat.org/core/lexer" + +// Run parses the source +func (p *Parser) Run() error { + p.setDefaults() + p.pos.Reset() + + return lexer.Run(p.lexStart) +} + +func (p *Parser) lexStart() (lexer.StateFn, error) { + for { + r, _, err := p.src.ReadRune() + switch { + case err != nil: + return p.emitError("", err) + case IsNewLine(r): + // new line + p.lexMoreNewLine(r) + p.stepLine() + case IsSpace(r): + // whitespace + p.stepRune() + default: + p.src.UnreadRune() + return p.lexToken, nil + } + } +} + +func (p *Parser) lexMoreNewLine(r1 rune) { + // r1 is warrantied to be either '\r' or '\n' + r2, _, err := p.src.ReadRune() + switch r1 { + case '\n': + switch { + case r2 == '\r': + // LN CR + case err == nil: + // LN + p.src.UnreadRune() + default: + // LN EOF + } + case '\r': + switch { + case r2 == '\n': + // CR LN + case err == nil: + // CR + p.src.UnreadRune() + default: + // CR EOF + } + default: + panic("unreachable") + } +} + +func (p *Parser) lexToken() (lexer.StateFn, error) { + p.src.AcceptAll(IsNotSpace) + + err := p.emitString(TokenUnknown) + return p.lexStart, err +} diff --git a/parser/lexer_runes.go b/parser/lexer_runes.go new file mode 100644 index 0000000..872d2be --- /dev/null +++ b/parser/lexer_runes.go @@ -0,0 +1,16 @@ +package parser + +import ( + "asciigoat.org/core/lexer" +) + +var ( + // IsNewLine tells if the rune indicates a line break or the start of one + IsNewLine = lexer.NewIsIn("\r\n") + // IsNotNewLine tells if the rune is not a line break nor the start of one + IsNotNewLine = lexer.NewIsNot(IsNewLine) + // IsSpace tells if the rune is considered whitespace by Unicode + IsSpace = lexer.IsSpace + // IsNotSpace tells if the rune is not considered whitespace by Unicode + IsNotSpace = lexer.NewIsNot(IsSpace) +) diff --git a/parser/parser.go b/parser/parser.go new file mode 100644 index 0000000..04d9b06 --- /dev/null +++ b/parser/parser.go @@ -0,0 +1,105 @@ +// Package parser parses dosini-style files +package parser + +import ( + "io" + "log" + + "asciigoat.org/core/lexer" +) + +// Parser parses a dosini-style document +type Parser struct { + src *lexer.Reader + pos lexer.Position + + // OnToken is called for each identified token. if it returns an error + // parsing is interrupted. + OnToken func(pos lexer.Position, typ TokenType, value string) error + + // OnError is called in case of a parsing error, and it's allowed + // to replace the error returned by [Parser.Run]. + // OnError is called for io.EOF, but [Parser.Run] will consider it + // normal termination. + OnError func(pos lexer.Position, content string, err error) error +} + +func defaultOnToken(pos lexer.Position, typ TokenType, value string) error { + log.Printf("%s:%v:%v: %q", typ, pos.Line, pos.Column, value) + return nil +} + +func defaultOnError(pos lexer.Position, content string, err error) error { + log.Printf("%s:%v:%v: %q: %s", "error", pos.Line, pos.Column, content, err) + + return lexer.Error{ + Line: pos.Line, + Column: pos.Column, + + Content: content, + Err: err, + } +} + +func (p *Parser) setDefaults() { + if p.OnToken == nil { + p.OnToken = defaultOnToken + } + if p.OnError == nil { + p.OnError = defaultOnError + } +} + +func (p *Parser) emitString(typ TokenType) error { + s := p.src.Emit() + err := p.OnToken(p.pos, typ, s) + p.pos.StepN(len(s)) + + return err +} + +func (p *Parser) emitError(content string, err error) (lexer.StateFn, error) { + err2 := p.OnError(p.pos, content, err) + switch { + case err2 != nil: + // return wrapped error + return nil, err2 + default: + // return original error + return nil, err + } +} + +// stepLine discards the data and moves the position +// to the next line. +func (p *Parser) stepLine() { + p.src.Discard() + p.pos.StepLine() +} + +// stepRune discards the data and moves the position +// one rune forward on the same line. +func (p *Parser) stepRune() { + p.src.Discard() + p.pos.Step() +} + +// stepString discards the data and moves the position +// forward on the same line the length of the discarded +// content. +func (p *Parser) stepString() { + s := p.src.Emit() + p.pos.StepN(len(s)) +} + +// NewParser creates a dosini-style parser using +// an [io.Reader] as source +func NewParser(r io.Reader) *Parser { + if r == nil { + return nil + } + + return &Parser{ + src: lexer.NewReader(r), + } +} diff --git a/parser/token.go b/parser/token.go new file mode 100644 index 0000000..c1ddc7e --- /dev/null +++ b/parser/token.go @@ -0,0 +1,31 @@ +package parser + +//go:generate go run golang.org/x/tools/cmd/stringer -type=TokenType + +// A TokenType is a type of Token +type TokenType uint + +const ( + // TokenUnknown represents a Token that hasn't been identified + TokenUnknown TokenType = iota + // TokenSectionStart indicates the opening marker of a section declaration. + // The left squared bracket. + TokenSectionStart + // TokenSectionEnd indicates the closing marker of a section declaration. + // The right squared bracket. + TokenSectionEnd + // TokenSectionName represents the section name between the squared brackets + TokenSectionName + // TokenSectionSubname represents a secondary name in the section represented + // between quotes after the section name. + // e.g. + // [section_name "section_subname"] + TokenSectionSubname + // TokenComment represents a comment, including the initial ';' or '#' until + // the end of the line. + TokenComment + // TokenFieldKey represents a field name in a `key = value` entry + TokenFieldKey + // TokenFieldValue represents a field value in a `key = value` entry + TokenFieldValue +) diff --git a/parser/tokentype_string.go b/parser/tokentype_string.go new file mode 100644 index 0000000..455f555 --- /dev/null +++ b/parser/tokentype_string.go @@ -0,0 +1,30 @@ +// Code generated by "stringer -type=TokenType"; DO NOT EDIT. + +package parser + +import "strconv" + +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[TokenUnknown-0] + _ = x[TokenSectionStart-1] + _ = x[TokenSectionEnd-2] + _ = x[TokenSectionName-3] + _ = x[TokenSectionSubname-4] + _ = x[TokenComment-5] + _ = x[TokenFieldKey-6] + _ = x[TokenFieldValue-7] +} + +const _TokenType_name = "TokenUnknownTokenSectionStartTokenSectionEndTokenSectionNameTokenSectionSubnameTokenCommentTokenFieldKeyTokenFieldValue" + +var _TokenType_index = [...]uint8{0, 12, 29, 44, 60, 79, 91, 104, 119} + +func (i TokenType) String() string { + if i >= TokenType(len(_TokenType_index)-1) { + return "TokenType(" + strconv.FormatInt(int64(i), 10) + ")" + } + return _TokenType_name[_TokenType_index[i]:_TokenType_index[i+1]] +}