From eb36c195c0cc25f5883b3c8a909e46f390e7a528 Mon Sep 17 00:00:00 2001 From: Alejandro Mery Date: Thu, 31 Aug 2023 01:50:05 +0000 Subject: [PATCH] parser: implement basic dosini parsing Signed-off-by: Alejandro Mery --- parser/lexer.go | 85 +++++++++++++++++++++++++++++++++++++++++-- parser/lexer_runes.go | 51 ++++++++++++++++++++++++++ parser/parser.go | 4 ++ 3 files changed, 136 insertions(+), 4 deletions(-) diff --git a/parser/lexer.go b/parser/lexer.go index c735baa..5ed5102 100644 --- a/parser/lexer.go +++ b/parser/lexer.go @@ -23,9 +23,17 @@ func (p *Parser) lexStart() (lexer.StateFn, error) { case IsSpace(r): // whitespace p.stepRune() + case IsCommentStart(r): + // switch to comment lexer + p.src.UnreadRune() + return p.lexComment, nil + case IsSectionStart(r): + // section + return p.lexSectionStart, nil default: + // entry p.src.UnreadRune() - return p.lexToken, nil + return p.lexEntryStart, nil } } } @@ -59,9 +67,78 @@ func (p *Parser) lexMoreNewLine(r1 rune) { } } -func (p *Parser) lexToken() (lexer.StateFn, error) { - p.src.AcceptAll(IsNotSpace) +func (p *Parser) lexComment() (lexer.StateFn, error) { + // until the end of the line + p.src.AcceptAll(IsNotNewLine) + + err := p.emitString(TokenComment) + return p.lexStart, err +} + +func (p *Parser) lexSectionStart() (lexer.StateFn, error) { + if err := p.emitString(TokenSectionStart); err != nil { + return nil, err + } + + // remove whitespace between `[` and the name + if p.src.AcceptAll(IsSpaceNotNewLine) { + p.stepString() + } + + if !p.src.AcceptAll(IsName) { + // no name + return p.emitError("section name missing", lexer.ErrUnacceptableRune) + } + + if err := p.emitString(TokenSectionName); err != nil { + return nil, err + } + + // remove whitespace between the name andthe closing `]` + if p.src.AcceptAll(IsSpaceNotNewLine) { + p.stepString() + } + + r, _, err := p.src.ReadRune() + switch { + case err != nil: + return p.emitError("", err) + case IsSectionEnd(r): + err := p.emitString(TokenSectionEnd) + return p.lexStart, err + default: + return p.emitInvalidRune(r) + } +} + +func (p *Parser) lexEntryStart() (lexer.StateFn, error) { + p.src.AcceptAll(IsName) + if err := p.emitString(TokenFieldKey); err != nil { + return nil, err + } + + // ignore whitespace between key and the '=' sign + if p.src.AcceptAll(IsSpaceNotNewLine) { + p.stepString() + } + + r, _, err := p.src.ReadRune() + switch { + case err != nil: + return p.emitError("", err) + case r != RuneFieldEqual: + return p.emitInvalidRune(r) + } + + // ignore whitespace between the '=' and the value + if p.src.AcceptAll(IsSpaceNotNewLine) { + p.stepString() + } + + p.src.AcceptAll(IsNotNewLine) + if err := p.emitString(TokenFieldValue); err != nil { + return nil, err + } - err := p.emitString(TokenUnknown) return p.lexStart, err } diff --git a/parser/lexer_runes.go b/parser/lexer_runes.go index 872d2be..950f84c 100644 --- a/parser/lexer_runes.go +++ b/parser/lexer_runes.go @@ -1,9 +1,35 @@ package parser import ( + "strings" + "asciigoat.org/core/lexer" ) +const ( + RuneComment = ';' // RuneComment is the standard dosini comment character + RuneCommentExtra = '#' // RuneCommentExtra is UNIX shell's comment character + RuneSectionStart = '[' // RuneSectionStart indicates the start of a section declaration + RuneSectionEnd = ']' // RuneSectionEnd indiciates the end of a section declaration + RuneFieldEqual = '=' // RuneFieldEqual separates field keys from their values +) + +var ( + // RunesComment is a string containing all runes acceptable to start comments + RunesComment = string([]rune{ + RuneComment, + RuneCommentExtra, + }) + // RunesSpecial is a string containing all the runes with special meaning + RunesSpecial = string([]rune{ + RuneComment, + RuneCommentExtra, + RuneSectionStart, + RuneSectionEnd, + RuneFieldEqual, + }) +) + var ( // IsNewLine tells if the rune indicates a line break or the start of one IsNewLine = lexer.NewIsIn("\r\n") @@ -13,4 +39,29 @@ var ( IsSpace = lexer.IsSpace // IsNotSpace tells if the rune is not considered whitespace by Unicode IsNotSpace = lexer.NewIsNot(IsSpace) + // IsCommentStart ... + IsCommentStart = lexer.NewIsIn(RunesComment) ) + +// IsSpaceNotNewLine indicates a rune is whitespace but not a new line +func IsSpaceNotNewLine(r rune) bool { + return IsSpace(r) && !IsNewLine(r) +} + +// IsSectionStart indicates the rune starts the section declaration +func IsSectionStart(r rune) bool { return r == RuneSectionStart } + +// IsSectionEnd indicates the rune ends the section declaration +func IsSectionEnd(r rune) bool { return r == RuneSectionEnd } + +// IsName indicates a rune is acceptable for section or field names +func IsName(r rune) bool { + switch { + case IsSpace(r): + return false + case strings.ContainsRune(RunesSpecial, r): + return false + default: + return true + } +} diff --git a/parser/parser.go b/parser/parser.go index 04d9b06..b526b0c 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -70,6 +70,10 @@ func (p *Parser) emitError(content string, err error) (lexer.StateFn, error) { } } +func (p *Parser) emitInvalidRune(r rune) (lexer.StateFn, error) { + return p.emitError(string([]rune{r}), lexer.ErrUnacceptableRune) +} + // stepLine discards the data and moves the position // to the next line. func (p *Parser) stepLine() {