diff --git a/README.md b/README.md index fe9bc8b..0310e45 100644 --- a/README.md +++ b/README.md @@ -16,6 +16,7 @@ of stricter parsers of similar form. [godoc-lexer]: https://pkg.go.dev/asciigoat.org/core/lexer [godoc-parser-parser]: https://pkg.go.dev/asciigoat.org/ini/parser#Parser +[godoc-basic-parser]: https://pkg.go.dev/asciigoat.org/ini/basic#Decode [wikipedia-dosini]: https://en.wikipedia.org/wiki/INI_file @@ -25,6 +26,15 @@ of stricter parsers of similar form. [`asciigoat`'s lexer][godoc-lexer] to process an `INI`-style document emiting tokens and errors via callbacks. +## Basic Parser + +[`basic.Decode()`][godoc-basic-parser] provies a one-shot decoder +that returns a structured document for you to post-process. + +To allow for correct handling of repetition of section and field names downstream, +it uses arrays instead of maps, and makes almost no judgment +about what section or field names are acceptable. + ## Other Implementations Other implementations exist, and they are mature and feature-rich, but they diff --git a/basic/basic.go b/basic/basic.go new file mode 100644 index 0000000..9b40607 --- /dev/null +++ b/basic/basic.go @@ -0,0 +1,24 @@ +// Package basic provides a basic representation of dosini-style documents +package basic + +// Document represents an INI-style document +type Document struct { + Global []Field + + Sections []Section +} + +// Section represents an INI-style section with optional GIT-style IDs +type Section struct { + Key string + ID string + EmptyID bool + + Fields []Field +} + +// Field represents a key = value entry in an INI-style document +type Field struct { + Key string + Value string +} diff --git a/basic/decoder.go b/basic/decoder.go new file mode 100644 index 0000000..d6c09f5 --- /dev/null +++ b/basic/decoder.go @@ -0,0 +1,49 @@ +package basic + +import ( + "bytes" + "io" + "io/fs" + "strings" + + "asciigoat.org/ini/parser" +) + +type decoder struct { + p *parser.Parser + out *Document + + queue []*token + current *Section +} + +// Decode attempts to decode an INI-style from an [io.Reader] array into a [Document] +func Decode(r io.Reader) (*Document, error) { + var out Document + + if r == nil { + return nil, fs.ErrNotExist + } + + // parser + p := parser.NewParser(r) + // decoder + dec := decoder{p: p, out: &out} + // glue + p.OnToken = dec.OnToken + p.OnError = dec.OnError + + // Go! + err := p.Run() + return &out, err +} + +// DecodeBytes attempts to decode an INI-style bytes array into a [Document] +func DecodeBytes(b []byte) (*Document, error) { + return Decode(bytes.NewReader(b)) +} + +// DecodeString attempts to decode an INI-style string into a [Document] +func DecodeString(s string) (*Document, error) { + return Decode(strings.NewReader(s)) +} diff --git a/basic/error.go b/basic/error.go new file mode 100644 index 0000000..fec162d --- /dev/null +++ b/basic/error.go @@ -0,0 +1,33 @@ +package basic + +import ( + "errors" + + "asciigoat.org/core/lexer" +) + +var ( + errInvalidToken = errors.New("invalid token") +) + +func newErrInvalidToken(t *token) *lexer.Error { + err := &lexer.Error{ + Line: t.pos.Line, + Column: t.pos.Column, + Content: t.value, + Err: errInvalidToken, + } + return err +} + +func (dec *decoder) OnError(pos lexer.Position, content string, err error) error { + err = &lexer.Error{ + Line: pos.Line, + Column: pos.Column, + Content: content, + Err: err, + } + + dec.executeFinal() + return err +} diff --git a/basic/token.go b/basic/token.go new file mode 100644 index 0000000..4f2103b --- /dev/null +++ b/basic/token.go @@ -0,0 +1,154 @@ +package basic + +import ( + "fmt" + + "asciigoat.org/core/lexer" + "asciigoat.org/ini/parser" +) + +type token struct { + pos lexer.Position + typ parser.TokenType + value string +} + +func (t token) String() string { + return fmt.Sprintf("%s %s: %q", t.pos, t.typ, t.value) +} + +func (dec *decoder) executeFinal() { + if len(dec.queue) > 0 { + // we have unfinished businesses + switch dec.queue[0].typ { + case parser.TokenSectionStart: + dec.execute(parser.TokenSectionEnd) + case parser.TokenFieldKey: + dec.execute(parser.TokenFieldValue) + } + } +} + +func (dec *decoder) execute(typ parser.TokenType) { + switch typ { + case parser.TokenSectionEnd: + name1, ok1 := dec.getValue(1, parser.TokenSectionName) + + if ok1 { + name2, ok2 := dec.getValue(2, parser.TokenSectionSubname) + + dec.addSection(name1, name2, ok2) + } + + dec.reset() + case parser.TokenFieldValue: + key, _ := dec.getValue(0, parser.TokenFieldKey) + value, _ := dec.getValue(1, parser.TokenFieldValue) + + dec.addField(key, value) + dec.reset() + } +} + +func (dec *decoder) addSection(key, id string, allowEmptyID bool) { + emptyID := allowEmptyID && id == "" + + // index for dec.current + n := len(dec.out.Sections) + + // new section + dec.out.Sections = append(dec.out.Sections, Section{ + Key: key, + ID: id, + EmptyID: emptyID, + }) + + // pointer to the latest section + dec.current = &dec.out.Sections[n] +} + +func (dec *decoder) addField(key, value string) { + field := Field{ + Key: key, + Value: value, + } + + if p := dec.current; p != nil { + // in section + p.Fields = append(p.Fields, field) + } else { + // global + dec.out.Global = append(dec.out.Global, field) + } +} + +func (dec *decoder) getValue(idx int, typ parser.TokenType) (string, bool) { + switch { + case idx < 0 || idx >= len(dec.queue): + // out of range + return "", false + case dec.queue[idx].typ != typ: + // wrong type + return "", false + default: + return dec.queue[idx].value, true + } +} + +func (dec *decoder) reset() { + dec.queue = dec.queue[:0] +} + +func (dec *decoder) depth(depth int) bool { + return len(dec.queue) == depth +} + +func (dec *decoder) depthAfter(depth int, typ parser.TokenType) bool { + _, ok := dec.getValue(depth-1, typ) + if ok { + return len(dec.queue) == depth + } + return false +} + +func (dec *decoder) typeOK(typ parser.TokenType) bool { + switch typ { + case parser.TokenSectionStart, parser.TokenFieldKey: + // first token only + return dec.depth(0) + case parser.TokenSectionName: + // right after TokenSectionStart + return dec.depthAfter(1, parser.TokenSectionStart) + case parser.TokenSectionSubname: + // right after TokenSectionName + return dec.depthAfter(2, parser.TokenSectionName) + case parser.TokenSectionEnd: + // only on a section with name + _, ok := dec.getValue(1, parser.TokenSectionName) + return ok + case parser.TokenFieldValue: + // right after a TokenFieldKey + return dec.depthAfter(1, parser.TokenFieldKey) + default: + // never + return false + } +} + +func (dec *decoder) OnToken(pos lexer.Position, typ parser.TokenType, value string) error { + t := &token{pos, typ, value} + + switch { + case typ == parser.TokenComment: + // ignore comments + return nil + case dec.typeOK(typ): + // acceptable token + dec.queue = append(dec.queue, t) + dec.execute(typ) + return nil + default: + // unacceptable + return newErrInvalidToken(t) + } +}