From eb36c195c0cc25f5883b3c8a909e46f390e7a528 Mon Sep 17 00:00:00 2001
From: Alejandro Mery <amery@jpi.io>
Date: Thu, 31 Aug 2023 01:50:05 +0000
Subject: [PATCH] parser: implement basic dosini parsing

Signed-off-by: Alejandro Mery <amery@jpi.io>
---
 parser/lexer.go       | 85 +++++++++++++++++++++++++++++++++++++++++--
 parser/lexer_runes.go | 51 ++++++++++++++++++++++++++
 parser/parser.go      |  4 ++
 3 files changed, 136 insertions(+), 4 deletions(-)

diff --git a/parser/lexer.go b/parser/lexer.go
index c735baa..5ed5102 100644
--- a/parser/lexer.go
+++ b/parser/lexer.go
@@ -23,9 +23,17 @@ func (p *Parser) lexStart() (lexer.StateFn, error) {
 		case IsSpace(r):
 			// whitespace
 			p.stepRune()
+		case IsCommentStart(r):
+			// switch to comment lexer
+			p.src.UnreadRune()
+			return p.lexComment, nil
+		case IsSectionStart(r):
+			// section
+			return p.lexSectionStart, nil
 		default:
+			// entry
 			p.src.UnreadRune()
-			return p.lexToken, nil
+			return p.lexEntryStart, nil
 		}
 	}
 }
@@ -59,9 +67,78 @@ func (p *Parser) lexMoreNewLine(r1 rune) {
 	}
 }
 
-func (p *Parser) lexToken() (lexer.StateFn, error) {
-	p.src.AcceptAll(IsNotSpace)
+func (p *Parser) lexComment() (lexer.StateFn, error) {
+	// until the end of the line
+	p.src.AcceptAll(IsNotNewLine)
+
+	err := p.emitString(TokenComment)
+	return p.lexStart, err
+}
+
+func (p *Parser) lexSectionStart() (lexer.StateFn, error) {
+	if err := p.emitString(TokenSectionStart); err != nil {
+		return nil, err
+	}
+
+	// remove whitespace between `[` and the name
+	if p.src.AcceptAll(IsSpaceNotNewLine) {
+		p.stepString()
+	}
+
+	if !p.src.AcceptAll(IsName) {
+		// no name
+		return p.emitError("section name missing", lexer.ErrUnacceptableRune)
+	}
+
+	if err := p.emitString(TokenSectionName); err != nil {
+		return nil, err
+	}
+
+	// remove whitespace between the name andthe closing `]`
+	if p.src.AcceptAll(IsSpaceNotNewLine) {
+		p.stepString()
+	}
+
+	r, _, err := p.src.ReadRune()
+	switch {
+	case err != nil:
+		return p.emitError("", err)
+	case IsSectionEnd(r):
+		err := p.emitString(TokenSectionEnd)
+		return p.lexStart, err
+	default:
+		return p.emitInvalidRune(r)
+	}
+}
+
+func (p *Parser) lexEntryStart() (lexer.StateFn, error) {
+	p.src.AcceptAll(IsName)
+	if err := p.emitString(TokenFieldKey); err != nil {
+		return nil, err
+	}
+
+	// ignore whitespace between key and the '=' sign
+	if p.src.AcceptAll(IsSpaceNotNewLine) {
+		p.stepString()
+	}
+
+	r, _, err := p.src.ReadRune()
+	switch {
+	case err != nil:
+		return p.emitError("", err)
+	case r != RuneFieldEqual:
+		return p.emitInvalidRune(r)
+	}
+
+	// ignore whitespace between the '=' and the value
+	if p.src.AcceptAll(IsSpaceNotNewLine) {
+		p.stepString()
+	}
+
+	p.src.AcceptAll(IsNotNewLine)
+	if err := p.emitString(TokenFieldValue); err != nil {
+		return nil, err
+	}
 
-	err := p.emitString(TokenUnknown)
 	return p.lexStart, err
 }
diff --git a/parser/lexer_runes.go b/parser/lexer_runes.go
index 872d2be..950f84c 100644
--- a/parser/lexer_runes.go
+++ b/parser/lexer_runes.go
@@ -1,9 +1,35 @@
 package parser
 
 import (
+	"strings"
+
 	"asciigoat.org/core/lexer"
 )
 
+const (
+	RuneComment      = ';' // RuneComment is the standard dosini comment character
+	RuneCommentExtra = '#' // RuneCommentExtra is UNIX shell's comment character
+	RuneSectionStart = '[' // RuneSectionStart indicates the start of a section declaration
+	RuneSectionEnd   = ']' // RuneSectionEnd indiciates the end of a section declaration
+	RuneFieldEqual   = '=' // RuneFieldEqual separates field keys from their values
+)
+
+var (
+	// RunesComment is a string containing all runes acceptable to start comments
+	RunesComment = string([]rune{
+		RuneComment,
+		RuneCommentExtra,
+	})
+	// RunesSpecial is a string containing all the runes with special meaning
+	RunesSpecial = string([]rune{
+		RuneComment,
+		RuneCommentExtra,
+		RuneSectionStart,
+		RuneSectionEnd,
+		RuneFieldEqual,
+	})
+)
+
 var (
 	// IsNewLine tells if the rune indicates a line break or the start of one
 	IsNewLine = lexer.NewIsIn("\r\n")
@@ -13,4 +39,29 @@ var (
 	IsSpace = lexer.IsSpace
 	// IsNotSpace tells if the rune is not considered whitespace by Unicode
 	IsNotSpace = lexer.NewIsNot(IsSpace)
+	// IsCommentStart ...
+	IsCommentStart = lexer.NewIsIn(RunesComment)
 )
+
+// IsSpaceNotNewLine indicates a rune is whitespace but not a new line
+func IsSpaceNotNewLine(r rune) bool {
+	return IsSpace(r) && !IsNewLine(r)
+}
+
+// IsSectionStart indicates the rune starts the section declaration
+func IsSectionStart(r rune) bool { return r == RuneSectionStart }
+
+// IsSectionEnd indicates the rune ends the section declaration
+func IsSectionEnd(r rune) bool { return r == RuneSectionEnd }
+
+// IsName indicates a rune is acceptable for section or field names
+func IsName(r rune) bool {
+	switch {
+	case IsSpace(r):
+		return false
+	case strings.ContainsRune(RunesSpecial, r):
+		return false
+	default:
+		return true
+	}
+}
diff --git a/parser/parser.go b/parser/parser.go
index 04d9b06..b526b0c 100644
--- a/parser/parser.go
+++ b/parser/parser.go
@@ -70,6 +70,10 @@ func (p *Parser) emitError(content string, err error) (lexer.StateFn, error) {
 	}
 }
 
+func (p *Parser) emitInvalidRune(r rune) (lexer.StateFn, error) {
+	return p.emitError(string([]rune{r}), lexer.ErrUnacceptableRune)
+}
+
 // stepLine discards the data and moves the position
 // to the next line.
 func (p *Parser) stepLine() {