From 174f72c4cf4fd24aeed0b70c0c24b818b3076f74 Mon Sep 17 00:00:00 2001 From: Alejandro Mery Date: Fri, 1 Sep 2023 14:21:55 +0000 Subject: [PATCH 1/2] basic: introduce basic one-shot INI-style decoder Signed-off-by: Alejandro Mery --- README.md | 10 +++ basic/basic.go | 24 ++++++++ basic/decoder.go | 49 +++++++++++++++ basic/error.go | 33 ++++++++++ basic/token.go | 154 +++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 270 insertions(+) create mode 100644 basic/basic.go create mode 100644 basic/decoder.go create mode 100644 basic/error.go create mode 100644 basic/token.go diff --git a/README.md b/README.md index fe9bc8b..0310e45 100644 --- a/README.md +++ b/README.md @@ -16,6 +16,7 @@ of stricter parsers of similar form. [godoc-lexer]: https://pkg.go.dev/asciigoat.org/core/lexer [godoc-parser-parser]: https://pkg.go.dev/asciigoat.org/ini/parser#Parser +[godoc-basic-parser]: https://pkg.go.dev/asciigoat.org/ini/basic#Decode [wikipedia-dosini]: https://en.wikipedia.org/wiki/INI_file @@ -25,6 +26,15 @@ of stricter parsers of similar form. [`asciigoat`'s lexer][godoc-lexer] to process an `INI`-style document emiting tokens and errors via callbacks. +## Basic Parser + +[`basic.Decode()`][godoc-basic-parser] provies a one-shot decoder +that returns a structured document for you to post-process. + +To allow for correct handling of repetition of section and field names downstream, +it uses arrays instead of maps, and makes almost no judgment +about what section or field names are acceptable. + ## Other Implementations Other implementations exist, and they are mature and feature-rich, but they diff --git a/basic/basic.go b/basic/basic.go new file mode 100644 index 0000000..9b40607 --- /dev/null +++ b/basic/basic.go @@ -0,0 +1,24 @@ +// Package basic provides a basic representation of dosini-style documents +package basic + +// Document represents an INI-style document +type Document struct { + Global []Field + + Sections []Section +} + +// Section represents an INI-style section with optional GIT-style IDs +type Section struct { + Key string + ID string + EmptyID bool + + Fields []Field +} + +// Field represents a key = value entry in an INI-style document +type Field struct { + Key string + Value string +} diff --git a/basic/decoder.go b/basic/decoder.go new file mode 100644 index 0000000..d6c09f5 --- /dev/null +++ b/basic/decoder.go @@ -0,0 +1,49 @@ +package basic + +import ( + "bytes" + "io" + "io/fs" + "strings" + + "asciigoat.org/ini/parser" +) + +type decoder struct { + p *parser.Parser + out *Document + + queue []*token + current *Section +} + +// Decode attempts to decode an INI-style from an [io.Reader] array into a [Document] +func Decode(r io.Reader) (*Document, error) { + var out Document + + if r == nil { + return nil, fs.ErrNotExist + } + + // parser + p := parser.NewParser(r) + // decoder + dec := decoder{p: p, out: &out} + // glue + p.OnToken = dec.OnToken + p.OnError = dec.OnError + + // Go! + err := p.Run() + return &out, err +} + +// DecodeBytes attempts to decode an INI-style bytes array into a [Document] +func DecodeBytes(b []byte) (*Document, error) { + return Decode(bytes.NewReader(b)) +} + +// DecodeString attempts to decode an INI-style string into a [Document] +func DecodeString(s string) (*Document, error) { + return Decode(strings.NewReader(s)) +} diff --git a/basic/error.go b/basic/error.go new file mode 100644 index 0000000..fec162d --- /dev/null +++ b/basic/error.go @@ -0,0 +1,33 @@ +package basic + +import ( + "errors" + + "asciigoat.org/core/lexer" +) + +var ( + errInvalidToken = errors.New("invalid token") +) + +func newErrInvalidToken(t *token) *lexer.Error { + err := &lexer.Error{ + Line: t.pos.Line, + Column: t.pos.Column, + Content: t.value, + Err: errInvalidToken, + } + return err +} + +func (dec *decoder) OnError(pos lexer.Position, content string, err error) error { + err = &lexer.Error{ + Line: pos.Line, + Column: pos.Column, + Content: content, + Err: err, + } + + dec.executeFinal() + return err +} diff --git a/basic/token.go b/basic/token.go new file mode 100644 index 0000000..4f2103b --- /dev/null +++ b/basic/token.go @@ -0,0 +1,154 @@ +package basic + +import ( + "fmt" + + "asciigoat.org/core/lexer" + "asciigoat.org/ini/parser" +) + +type token struct { + pos lexer.Position + typ parser.TokenType + value string +} + +func (t token) String() string { + return fmt.Sprintf("%s %s: %q", t.pos, t.typ, t.value) +} + +func (dec *decoder) executeFinal() { + if len(dec.queue) > 0 { + // we have unfinished businesses + switch dec.queue[0].typ { + case parser.TokenSectionStart: + dec.execute(parser.TokenSectionEnd) + case parser.TokenFieldKey: + dec.execute(parser.TokenFieldValue) + } + } +} + +func (dec *decoder) execute(typ parser.TokenType) { + switch typ { + case parser.TokenSectionEnd: + name1, ok1 := dec.getValue(1, parser.TokenSectionName) + + if ok1 { + name2, ok2 := dec.getValue(2, parser.TokenSectionSubname) + + dec.addSection(name1, name2, ok2) + } + + dec.reset() + case parser.TokenFieldValue: + key, _ := dec.getValue(0, parser.TokenFieldKey) + value, _ := dec.getValue(1, parser.TokenFieldValue) + + dec.addField(key, value) + dec.reset() + } +} + +func (dec *decoder) addSection(key, id string, allowEmptyID bool) { + emptyID := allowEmptyID && id == "" + + // index for dec.current + n := len(dec.out.Sections) + + // new section + dec.out.Sections = append(dec.out.Sections, Section{ + Key: key, + ID: id, + EmptyID: emptyID, + }) + + // pointer to the latest section + dec.current = &dec.out.Sections[n] +} + +func (dec *decoder) addField(key, value string) { + field := Field{ + Key: key, + Value: value, + } + + if p := dec.current; p != nil { + // in section + p.Fields = append(p.Fields, field) + } else { + // global + dec.out.Global = append(dec.out.Global, field) + } +} + +func (dec *decoder) getValue(idx int, typ parser.TokenType) (string, bool) { + switch { + case idx < 0 || idx >= len(dec.queue): + // out of range + return "", false + case dec.queue[idx].typ != typ: + // wrong type + return "", false + default: + return dec.queue[idx].value, true + } +} + +func (dec *decoder) reset() { + dec.queue = dec.queue[:0] +} + +func (dec *decoder) depth(depth int) bool { + return len(dec.queue) == depth +} + +func (dec *decoder) depthAfter(depth int, typ parser.TokenType) bool { + _, ok := dec.getValue(depth-1, typ) + if ok { + return len(dec.queue) == depth + } + return false +} + +func (dec *decoder) typeOK(typ parser.TokenType) bool { + switch typ { + case parser.TokenSectionStart, parser.TokenFieldKey: + // first token only + return dec.depth(0) + case parser.TokenSectionName: + // right after TokenSectionStart + return dec.depthAfter(1, parser.TokenSectionStart) + case parser.TokenSectionSubname: + // right after TokenSectionName + return dec.depthAfter(2, parser.TokenSectionName) + case parser.TokenSectionEnd: + // only on a section with name + _, ok := dec.getValue(1, parser.TokenSectionName) + return ok + case parser.TokenFieldValue: + // right after a TokenFieldKey + return dec.depthAfter(1, parser.TokenFieldKey) + default: + // never + return false + } +} + +func (dec *decoder) OnToken(pos lexer.Position, typ parser.TokenType, value string) error { + t := &token{pos, typ, value} + + switch { + case typ == parser.TokenComment: + // ignore comments + return nil + case dec.typeOK(typ): + // acceptable token + dec.queue = append(dec.queue, t) + dec.execute(typ) + return nil + default: + // unacceptable + return newErrInvalidToken(t) + } +} From a1e20fa3b6137c8c6112e69927f18bdfe6cabe75 Mon Sep 17 00:00:00 2001 From: Alejandro Mery Date: Fri, 1 Sep 2023 14:22:48 +0000 Subject: [PATCH 2/2] basic: introduce Document.WriteTo() and Document.String() producing an INI-style representation of the Document Signed-off-by: Alejandro Mery --- basic/write.go | 88 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) create mode 100644 basic/write.go diff --git a/basic/write.go b/basic/write.go new file mode 100644 index 0000000..d4a74e9 --- /dev/null +++ b/basic/write.go @@ -0,0 +1,88 @@ +package basic + +import ( + "bytes" + "fmt" + "io" + + "asciigoat.org/ini/parser" +) + +// WriteNewLine is the new line representation used by [doc.WriteTo] +const WriteNewLine = "\n" + +// AsBuffer returns a INI representation of the document on +// a memory buffer +func (doc *Document) AsBuffer(nl string) *bytes.Buffer { + var buf bytes.Buffer + + if len(doc.Global) > 0 { + _, _ = writeFieldsTo(&buf, doc.Global, nl) + } + + for _, sec := range doc.Sections { + if buf.Len() > 0 { + _, _ = buf.WriteString(nl) + } + + _ = writeSectionToBuffer(&buf, &sec, nl) + } + + return &buf +} + +func writeFieldsTo(w io.Writer, fields []Field, nl string) (int64, error) { + var written int + for _, field := range fields { + n, err := fmt.Fprintf(w, "%s = %q%s", field.Key, field.Value, nl) + switch { + case err != nil: + return int64(written), err + case n > 0: + written += n + } + } + return int64(written), nil +} + +func writeSectionToBuffer(w *bytes.Buffer, sec *Section, nl string) int { + var written, n int + + _, _ = w.WriteRune(parser.RuneSectionStart) + written++ + + n, _ = w.WriteString(sec.Key) + written += n + + switch { + case sec.EmptyID: + n, _ = w.WriteString(" \"\"") + written += n + case sec.ID != "": + _, _ = w.WriteRune(' ') + n, _ = fmt.Fprintf(w, "%q", sec.ID) + written += n + 1 + } + + _, _ = w.WriteRune(parser.RuneSectionEnd) + written++ + + n, _ = w.WriteString(nl) + written += n + + n64, _ := writeFieldsTo(w, sec.Fields, nl) + return written + int(n64) +} + +// WriteTo writes a INI representation of the document +// onto the provided writer. +func (doc *Document) WriteTo(w io.Writer) (int64, error) { + buf := doc.AsBuffer(WriteNewLine) + return buf.WriteTo(w) +} + +// GoString generates a string output for "%s" +func (doc *Document) String() string { + buf := doc.AsBuffer(WriteNewLine) + return buf.String() +}