parser: Unescaped [WIP]

Signed-off-by: Alejandro Mery <amery@jpi.io>
build-sys: use local darvaza.org/core [DO-NOT-MERGE]
2023-09-05 13:22:39 +00:00 · 2023-09-05 13:22:09 +00:00 · 2023-09-05 13:22:09 +00:00 · 2023-09-05 13:22:09 +00:00 · 2023-09-05 13:22:09 +00:00 · 2023-09-05 13:22:09 +00:00
5 changed files with 84 additions and 67 deletions
@@ -4,28 +4,19 @@ import (
 	"errors"
 	"asciigoat.org/core/lexer"
 	"asciigoat.org/ini/parser"
 )
 var (
 	errInvalidToken = errors.New("invalid token")
 )
 func newError(pos lexer.Position, content, hint string, err error) *lexer.Error {
 	return &lexer.Error{
 		Line:    pos.Line,
 		Column:  pos.Column,
 		Content: content,
 		Hint:    hint,
 		Err:     err,
 	}
 }
 func newErrInvalidToken(t *token) *lexer.Error {
-	return newError(t.pos, t.value, "", errInvalidToken)
+	return parser.NewError(t.pos, t.value, "", errInvalidToken)
 }
 func (dec *decoder) OnError(pos lexer.Position, content string, err error) error {
-	err = newError(pos, content, "", err)
+	err = parser.NewError(pos, content, "", err)
 	dec.executeFinal()
 	return err
 }
@@ -1,6 +1,8 @@
 package parser
 import (
 	"strings"
 	"asciigoat.org/core/lexer"
 )
@@ -46,18 +48,13 @@ func (p *commaArrayParser) lexWord() (lexer.StateFn, error) {
 			_, s := p.Emit()
 			p.out = append(p.out, s)
 			return nil, err
-		case r == RuneEscape:
+		case r == ',':
-			// escaped
+			// done
-			r2, _, err := p.ReadRune()
+			_, s := p.Emit()
-			switch {
+			// remove comma, trim and append to output
-			case err != nil:
+			s = strings.TrimRightFunc(s[:len(s)-1], IsSpace)
-				// incomplete
+			p.out = append(p.out, s)
-				return nil, NewErrIncompleteEscaped(&p.TextParser)
+			return p.lexStart, nil
 			case IsNewLine(r2):
 				// escaped new line
 				p.UnreadRune()
 				p.AcceptNewLine()
 			}
 		}
 	}
 }
@@ -79,7 +76,8 @@ func (p *commaArrayParser) Run() ([]string, error) {
 	return p.out, err
 }
-// SplitCommaArray splits
+// SplitCommaArray splits comma separated strings, removing whitespace
 // and respecting quoted literals.
 func SplitCommaArray(s string) ([]string, error) {
 	if s != "" {
 		var p commaArrayParser
@@ -6,6 +6,17 @@ import (
 	"asciigoat.org/core/lexer"
 )
 // NewError creates a lexer.Error using a lexer.Position
 func NewError(pos lexer.Position, content, hint string, err error) *lexer.Error {
 	return &lexer.Error{
 		Line:    pos.Line,
 		Column:  pos.Column,
 		Content: content,
 		Hint:    hint,
 		Err:     err,
 	}
 }
 // ErrPlusPosition returns a copy of the given [lexer.Error]
 // offsetting the Line/Column information.
 func ErrPlusPosition(pos lexer.Position, e *lexer.Error) *lexer.Error {
@@ -14,13 +25,7 @@ func ErrPlusPosition(pos lexer.Position, e *lexer.Error) *lexer.Error {
 		Column: e.Column,
 	})
-	return &lexer.Error{
+	return NewError(pos, e.Content, e.Hint, e.Err)
 		Line:    pos.Line,
 		Column:  pos.Column,
 		Content: e.Content,
 		Hint:    e.Hint,
 		Err:     e.Err,
 	}
 }
 // NewErrIncompleteQuotedString returns a [lexer.Error]
@@ -41,11 +46,17 @@ func newErrIncomplete(p *TextParser, hint string) *lexer.Error {
 	pos, s := p.Emit()
 	pos.Add(GetPositionalLength(s))
-	return &lexer.Error{
+	return NewError(pos, s, hint, fs.ErrInvalid)
-		Line:    pos.Line,
+}
-		Column:  pos.Column,
+
-		Content: s,
+// NewErrInvalidEscapeSequence returns a [lexer.Error] indicating
-		Hint:    hint,
+// the specified sequence, at the end of the accepted buffer,
-		Err:     fs.ErrInvalid,
+// is invalid
-	}
+func NewErrInvalidEscapeSequence(p *TextParser, seq string) *lexer.Error {
 	pos, s := p.Position(), p.String()
 	s = s[:len(s)-len(seq)]
 	pos.Add(GetPositionalLength(s))
 	return NewError(pos, seq, "invalid escape character", fs.ErrInvalid)
 }
@@ -31,13 +31,7 @@ func defaultOnToken(pos lexer.Position, typ TokenType, value string) error {
 func defaultOnError(pos lexer.Position, content string, err error) error {
 	log.Printf("%s:%v:%v: %q: %s", "error", pos.Line, pos.Column, content, err)
-	return lexer.Error{
+	return NewError(pos, content, "", err)
 		Line:   pos.Line,
 		Column: pos.Column,
 		Content: content,
 		Err:     err,
 	}
 }
 func (p *Parser) setDefaults() {
@@ -1,11 +1,13 @@
 package parser
 import (
 	"strings"
 	"asciigoat.org/core/lexer"
 )
 // AcceptQuotedString consumes a quoted string from the source
-// and returns it unquoted.
+// and returns it unquoted and unescaped
 func (p *TextParser) AcceptQuotedString() (string, bool, error) {
 	r, _, err := p.ReadRune()
 	switch {
@@ -54,9 +56,10 @@ func lexQuotedStringNoEscape(p *TextParser) (string, bool, *lexer.Error) {
 			// end, just remove the quotes
 			s := p.String()
 			l := len(s)
-			return s[1 : l-2], true, nil
+			return s[1 : l-1], true, nil
 		case r == RuneEscape:
 			// things just got complicated...
 			p.UnreadRune()
 			return "", false, nil
 		case IsNewLine(r):
 			// new lines within quoted values are acceptable
@@ -68,26 +71,46 @@ func lexQuotedStringNoEscape(p *TextParser) (string, bool, *lexer.Error) {
 	}
 }
-func lexQuotedStringEscaped(*TextParser) (string, *lexer.Error) {
+// Unquoted removes quotes the content and unescapes the content
-	return "", nil
+func lexQuotedStringEscaped(p *TextParser) (string, *lexer.Error) {
-}
+	var result strings.Builder
-//	// escaped. append partial
+	// append what was accepted before the escape character
-//	mark = lexQuotedAppendPartial(p, &buf, mark)
+	_, _ = result.WriteString(p.String()[1:])
-//
+
-//	r2, _, err := p.ReadRune()
+	for {
-//	switch {
+		r, _, err := p.ReadRune()
-//	case err != nil:
+		switch {
-//		// incomplete
+		case err != nil:
-//		return "", NewErrIncompleteQuotedString(p)
+			// incomplete quoted
-//	case IsNewLine(r2):
+			return "", NewErrIncompleteQuotedString(p)
-//		// escaped new line
+		case r == RuneQuotes:
-//		p.UnreadRune()
+			// end
-//		p.AcceptNewLine()
+			return result.String(), nil
-//		mark = lexQuotedAppendNewLine(p, &buf, mark)
+		case r == RuneEscape:
-//	default:
+			// escaped
-//		// bad escaped
+			r2, _, err := p.ReadRune()
-//	}
+			switch {
 			case err != nil:
 				// incomplete escaped
 				return "", NewErrIncompleteEscaped(p)
 			case IsNewLine(r2):
 				// escaped new line, skip
 				p.UnreadRune()
 				p.AcceptNewLine()
 			default:
 				// TODO: check valid escape character and
 				// append to result
 				s := string([]rune{r, r2})
 				err := NewErrInvalidEscapeSequence(p, s)
 				return "", err
 			}
 		default:
 			// normal, append to result
 			_, _ = result.WriteRune(r)
 		}
 	}
 }
 // Unquoted removes quotes and unescapes the content
 func Unquoted(s string) (string, error) {
Author	SHA1	Message	Date
amery	3bf20948c0	parser: Unescaped [WIP] Signed-off-by: Alejandro Mery <amery@jpi.io>	2023-09-05 13:22:39 +00:00
amery	0dd29272e9	build-sys: use local darvaza.org/core [DO-NOT-MERGE] Signed-off-by: Alejandro Mery <amery@jpi.io>	2023-09-05 13:22:09 +00:00
amery	7fab1a799a	build-sys: use local asciigoat.org/core [DO-NOT-MERGE] Signed-off-by: Alejandro Mery <amery@jpi.io>	2023-09-05 13:22:09 +00:00
amery	16dfde1503	vscode: add Subname to the dictionary Signed-off-by: Alejandro Mery <amery@jpi.io>	2023-09-05 13:22:09 +00:00
amery	41d7c6e04d	vscode: add unescapes to the dictionary Signed-off-by: Alejandro Mery <amery@jpi.io>	2023-09-05 13:22:09 +00:00
amery	48adaeb8a8	vscode: add asciigoat to the dictionary Signed-off-by: Alejandro Mery <amery@jpi.io>	2023-09-05 13:22:09 +00:00
amery	99ca8d0b3b	Merge branch 'pr-amery-basic' into next-amery	2023-09-05 13:22:01 +00:00
amery	986b6d1c6d	Merge pull request 'parser: Unquoted(), AcceptQuotedString() and SplitCommaArray' (#9 ) Reviewed-on: #9	2023-09-05 15:20:38 +02:00
amery	d41cd781d9	parser: introduce SplitCommaArray to splits comma separated strings removing whitespace and respecting quoted literals. Signed-off-by: Alejandro Mery <amery@jpi.io>	2023-09-04 18:58:06 +00:00
amery	651fcb6215	parser: Unquoted(), AcceptQuotedString() TODO: reduce quoted strings with escaped characters Signed-off-by: Alejandro Mery <amery@jpi.io>	2023-09-04 18:58:06 +00:00
amery	fa9a7b4735	basic: rename and document queue related methods Signed-off-by: Alejandro Mery <amery@jpi.io>	2023-09-04 17:37:09 +00:00
amery	cfd4a94559	basic: call executeFinal() when OnToken() fails Signed-off-by: Alejandro Mery <amery@jpi.io>	2023-09-04 17:37:09 +00:00
amery	d8af7821e4	Merge pull request 'parser: introduce NewError() and ErrPlusPosition()' (#8 ) Reviewed-on: #8	2023-09-04 19:33:24 +02:00
amery	8f3e59ec36	parser: introduce ErrPlusPosition to apply a position offset to a lexer.Error Signed-off-by: Alejandro Mery <amery@jpi.io>	2023-09-04 15:25:20 +00:00
amery	d316031c44	basic: cleanup using parser.NewError() Signed-off-by: Alejandro Mery <amery@jpi.io>	2023-09-04 15:25:20 +00:00
amery	c3883cbb0d	parser: introduce NewError() to create lexer.Error using lexer.Position Signed-off-by: Alejandro Mery <amery@jpi.io>	2023-09-04 15:16:43 +00:00
amery	314c004efd	Merge pull request 'parser: introduce TextParser and refactor Parser' (#7 ) Reviewed-on: #7	2023-09-04 16:17:04 +02:00