Merge pull request 'parser: introduce TextParser and refactor Parser' (#7)

Reviewed-on: #7
2 years ago · 314c004efd
7 changed files with 182 additions and 76 deletions
--- a/go.mod
+++ b/go.mod
@ -3,7 +3,7 @@ module asciigoat.org/ini
 go 1.19

 require (
-	asciigoat.org/core v0.3.7
+	asciigoat.org/core v0.3.9
 	github.com/mgechev/revive v1.3.3
 	golang.org/x/tools v0.12.0
 )
--- a/go.sum
+++ b/go.sum
@ -1,5 +1,5 @@
-asciigoat.org/core v0.3.7 h1:tMasdvZgsMJJMVsZVfXXB5lqq82pFiCsyEmOEmcmAfI=
-asciigoat.org/core v0.3.7/go.mod h1:tXj+JUutxRbcO40ZQRuUVaZ4rnYz1kAZ0nblisV8u74=
+asciigoat.org/core v0.3.9 h1:hgDDz4ecm3ZvehX++m8A/IzAt+B5oDPiRtxatzfUHPQ=
+asciigoat.org/core v0.3.9/go.mod h1:CAaHwyw8MpAq4a1MYtN2dxJrsK+hmIdW50OndaQZYPI=
 github.com/BurntSushi/toml v1.3.2 h1:o7IhLm0Msx3BaB+n3Ag7L8EVlByGnpq14C4YWiu/gL8=
 github.com/BurntSushi/toml v1.3.2/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ=
 github.com/chavacava/garif v0.1.0 h1:2JHa3hbYf5D9dsgseMKAmc/MZ109otzgNFk5s87H9Pc=
--- a/parser/lexer.go
+++ b/parser/lexer.go
@ -5,71 +5,42 @@ import "asciigoat.org/core/lexer"
 // Run parses the source
 func (p *Parser) Run() error {
 	p.setDefaults()
-	p.pos.Reset()

 	return lexer.Run(p.lexStart)
 }

 func (p *Parser) lexStart() (lexer.StateFn, error) {
 	for {
-		r, _, err := p.src.ReadRune()
+		r, _, err := p.p.ReadRune()
 		switch {
 		case err != nil:
 			return p.emitError("", err)
 		case IsNewLine(r):
 			// new line
-			p.lexMoreNewLine(r)
+			p.p.UnreadRune()
+			p.p.AcceptNewLine()
 			p.stepLine()
 		case IsSpace(r):
 			// whitespace
-			p.stepRune()
+			p.stepString()
 		case IsCommentStart(r):
 			// switch to comment lexer
-			p.src.UnreadRune()
+			p.p.UnreadRune()
 			return p.lexComment, nil
 		case IsSectionStart(r):
 			// section
 			return p.lexSectionStart, nil
 		default:
 			// entry
-			p.src.UnreadRune()
+			p.p.UnreadRune()
 			return p.lexEntryStart, nil
 		}
 	}
 }

-func (p *Parser) lexMoreNewLine(r1 rune) {
-	// r1 is warrantied to be either '\r' or '\n'
-	r2, _, err := p.src.ReadRune()
-	switch r1 {
-	case '\n':
-		switch {
-		case r2 == '\r':
-			// LN CR
-		case err == nil:
-			// LN
-			p.src.UnreadRune()
-		default:
-			// LN EOF
-		}
-	case '\r':
-		switch {
-		case r2 == '\n':
-			// CR LN
-		case err == nil:
-			// CR
-			p.src.UnreadRune()
-		default:
-			// CR EOF
-		}
-	default:
-		panic("unreachable")
-	}
-}
-
 func (p *Parser) lexComment() (lexer.StateFn, error) {
 	// until the end of the line
-	p.src.AcceptAll(IsNotNewLine)
+	p.p.AcceptAll(IsNotNewLine)

 	err := p.emitString(TokenComment)
 	return p.lexStart, err
@ -81,11 +52,11 @@ func (p *Parser) lexSectionStart() (lexer.StateFn, error) {
 	}

 	// remove whitespace between `[` and the name
-	if p.src.AcceptAll(IsSpaceNotNewLine) {
+	if p.p.AcceptAll(IsSpaceNotNewLine) {
 		p.stepString()
 	}

-	if !p.src.AcceptAll(IsName) {
+	if !p.p.AcceptAll(IsName) {
 		// no name
 		return p.emitError("section name missing", lexer.ErrUnacceptableRune)
 	}
@ -94,12 +65,12 @@ func (p *Parser) lexSectionStart() (lexer.StateFn, error) {
 		return nil, err
 	}

-	// remove whitespace between the name andthe closing `]`
-	if p.src.AcceptAll(IsSpaceNotNewLine) {
+	// remove whitespace between the name and the closing `]`
+	if p.p.AcceptAll(IsSpaceNotNewLine) {
 		p.stepString()
 	}

-	r, _, err := p.src.ReadRune()
+	r, _, err := p.p.ReadRune()
 	switch {
 	case err != nil:
 		return p.emitError("", err)
@ -112,17 +83,17 @@ func (p *Parser) lexSectionStart() (lexer.StateFn, error) {
 }

 func (p *Parser) lexEntryStart() (lexer.StateFn, error) {
-	p.src.AcceptAll(IsName)
+	p.p.AcceptAll(IsName)
 	if err := p.emitString(TokenFieldKey); err != nil {
 		return nil, err
 	}

 	// ignore whitespace between key and the '=' sign
-	if p.src.AcceptAll(IsSpaceNotNewLine) {
+	if p.p.AcceptAll(IsSpaceNotNewLine) {
 		p.stepString()
 	}

-	r, _, err := p.src.ReadRune()
+	r, _, err := p.p.ReadRune()
 	switch {
 	case err != nil:
 		return p.emitError("", err)
@ -131,11 +102,11 @@ func (p *Parser) lexEntryStart() (lexer.StateFn, error) {
 	}

 	// ignore whitespace between the '=' and the value
-	if p.src.AcceptAll(IsSpaceNotNewLine) {
+	if p.p.AcceptAll(IsSpaceNotNewLine) {
 		p.stepString()
 	}

-	p.src.AcceptAll(IsNotNewLine)
+	p.p.AcceptAll(IsNotNewLine)
 	if err := p.emitString(TokenFieldValue); err != nil {
 		return nil, err
 	}
--- a/parser/lexer_runes.go
+++ b/parser/lexer_runes.go
@ -43,6 +43,11 @@ var (
 	IsCommentStart = lexer.NewIsIn(RunesComment)
 )

+// IsAny accepts any rune
+func IsAny(_ rune) bool {
+	return true
+}
+
 // IsSpaceNotNewLine indicates a rune is whitespace but not a new line
 func IsSpaceNotNewLine(r rune) bool {
 	return IsSpace(r) && !IsNewLine(r)
--- a/parser/parser.go
+++ b/parser/parser.go
@ -1,4 +1,4 @@
-// Package parser parses dosini-style files
+// Package parser parses ini-style files
 package parser

 import (
@ -8,10 +8,9 @@ import (
 	"asciigoat.org/core/lexer"
 )

-// Parser parses a dosini-style document
+// Parser parses a ini-style document
 type Parser struct {
-	src *lexer.Reader
-	pos lexer.Position
+	p TextParser

 	// OnToken is called for each identified token. if it returns an error
 	// parsing is interrupted.
@ -51,15 +50,13 @@ func (p *Parser) setDefaults() {
 }

 func (p *Parser) emitString(typ TokenType) error {
-	s := p.src.Emit()
-	err := p.OnToken(p.pos, typ, s)
-	p.pos.StepN(len(s))
-
-	return err
+	pos, s := p.p.Emit()
+	return p.OnToken(pos, typ, s)
 }

 func (p *Parser) emitError(content string, err error) (lexer.StateFn, error) {
-	err2 := p.OnError(p.pos, content, err)
+	pos := p.p.Position()
+	err2 := p.OnError(pos, content, err)
 	switch {
 	case err2 != nil:
 		// return wrapped error
@ -77,33 +74,25 @@ func (p *Parser) emitInvalidRune(r rune) (lexer.StateFn, error) {
 // stepLine discards the data and moves the position
 // to the next line.
 func (p *Parser) stepLine() {
-	p.src.Discard()
-	p.pos.StepLine()
-}
-
-// stepRune discards the data and moves the position
-// one rune forward on the same line.
-func (p *Parser) stepRune() {
-	p.src.Discard()
-	p.pos.Step()
+	p.p.StepLine()
 }

 // stepString discards the data and moves the position
 // forward on the same line the length of the discarded
 // content.
 func (p *Parser) stepString() {
-	s := p.src.Emit()
-	p.pos.StepN(len(s))
+	p.p.Step()
 }

-// NewParser creates a dosini-style parser using
+// NewParser creates a ini-style parser using
 // an [io.Reader] as source
 func NewParser(r io.Reader) *Parser {
-	if r == nil {
-		return nil
-	}
+	var p *Parser

-	return &Parser{
-		src: lexer.NewReader(r),
+	if r != nil {
+		p = new(Parser)
+		p.p.Init(r)
 	}
+
+	return p
 }
--- a/parser/text.go
+++ b/parser/text.go
@ -0,0 +1,103 @@
+package parser
+
+import (
+	"bytes"
+	"io"
+	"strings"
+
+	"asciigoat.org/core/lexer"
+)
+
+// TextParser is a generic text parser.
+type TextParser struct {
+	*lexer.Reader
+	pos lexer.Position
+}
+
+// Init initializes the [TextParser] with a non-nil [io.Reader].
+func (p *TextParser) Init(r io.Reader) {
+	switch {
+	case p == nil || r == nil:
+		panic("invalid call")
+	case p.Reader != nil:
+		panic("parser already initialized")
+	default:
+		p.Reader = lexer.NewReader(r)
+		p.pos.Reset()
+	}
+}
+
+// InitBytes initializes the [TextParser] with a byte array
+func (p *TextParser) InitBytes(b []byte) {
+	p.Init(bytes.NewReader(b))
+}
+
+// InitString initializes the [TextParser] with a byte array
+func (p *TextParser) InitString(s string) {
+	p.Init(strings.NewReader(s))
+}
+
+// Discard shadows [lexer.Reader]'s, and takes in consideration
+// new lines on the discarded data when moving the position
+func (p *TextParser) Discard() {
+	s := p.Reader.Emit()
+	l := GetPositionalLength(s)
+	p.pos.Add(l)
+}
+
+// Emit returns the accepted text, its position, and
+// moves the cursor position accordingly
+func (p *TextParser) Emit() (lexer.Position, string) {
+	pos := p.pos
+	s := p.Reader.Emit()
+	l := GetPositionalLength(s)
+	p.pos.Add(l)
+
+	return pos, s
+}
+
+// Step discards what's been accepted and increments the
+// position assuming they all increment the column counter
+func (p *TextParser) Step() {
+	s := p.Reader.Emit()
+	p.pos.StepN(len(s))
+}
+
+// StepLine discards what's been accepted and moves then
+// position to the beginning of the next line
+func (p *TextParser) StepLine() {
+	p.Reader.Discard()
+	p.pos.StepLine()
+}
+
+// Position returns the position of the first character
+// of the accepted text
+func (p *TextParser) Position() lexer.Position {
+	return p.pos
+}
+
+// AcceptNewLine checks if next is a new line.
+// It accepts "\n", "\n\r", "\r" and "\r\n".
+func (p *TextParser) AcceptNewLine() bool {
+	r1, _, err := p.ReadRune()
+	switch {
+	case err != nil:
+		return false
+	case r1 == '\n':
+		p.AcceptRune('\r')
+		return true
+	case r1 == '\r':
+		p.AcceptRune('\n')
+		return true
+	default:
+		p.UnreadRune()
+		return false
+	}
+}
+
+// AcceptRune checks if next is the specified rune
+func (p *TextParser) AcceptRune(r rune) bool {
+	return p.Accept(func(r2 rune) bool {
+		return r == r2
+	})
+}
--- a/parser/text_position.go
+++ b/parser/text_position.go
@ -0,0 +1,38 @@
+package parser
+
+import (
+	"io"
+
+	"asciigoat.org/core/lexer"
+)
+
+type positionLengthParser struct {
+	TextParser
+
+	lexer.Position
+}
+
+func (p *positionLengthParser) lexStart() (lexer.StateFn, error) {
+	for {
+		switch {
+		case p.AcceptNewLine():
+			p.Position.StepLine()
+		case p.Accept(IsAny):
+			p.Position.StepN(1)
+		default:
+			return nil, io.EOF
+		}
+	}
+}
+
+// GetPositionalLength calculates the [lexer.Position] at
+// the end of a text.
+func GetPositionalLength(s string) lexer.Position {
+	var p positionLengthParser
+	if s == "" {
+		p.InitString(s)
+
+		_ = lexer.Run(p.lexStart)
+	}
+	return p.Position
+}