Merge pull request 'lexer: introduce Error{}' (#6 )

Reviewed-on: #6
lexer: introduce Error{}
2023-08-29 17:00:09 +02:00 · 2023-08-29 13:54:23 +00:00 · 2023-08-29 15:24:36 +02:00 · 2023-08-29 15:23:15 +02:00 · 2023-08-29 02:00:43 +00:00 · 2023-08-29 02:00:38 +00:00
27 changed files with 350 additions and 835 deletions
@@ -0,0 +1,13 @@
+# http://editorconfig.org
+
+root = true
+
+[*]
+charset = utf-8
+end_of_line = lf
+insert_final_newline = true
+trim_trailing_whitespace = true
+
+[*.go]
+indent_style = tab
+indent_size = 4
@@ -1,4 +1,4 @@
-Copyright 2021 JPI Technologies Ltd <oss@jpi.io>
+Copyright 2023 JPI Technologies Ltd <oss@jpi.io>

 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
@@ -1,15 +0,0 @@
-.PHONY: all fmt build test
-
-GO ?= go
-
-all: fmt build
-
-fmt:
-	$(GO) fmt ./...
-	$(GO) mod tidy || true
-
-build:
-	$(GO) get -v ./...
-
-test:
-	$(GO) test -v ./...
@@ -1,4 +0,0 @@
-asciigoat.org/core
-==================
-
-helpers and general structs used by asciigoat parsers and generators
@@ -0,0 +1 @@
+# asciigoat's core library
@@ -1,37 +0,0 @@
-/*
-Package ebmf implements an ISO/IEC 14977
-Extended Backus–Naur Form parser, verifiers,
-and additional related helpers for AsciiGoat
-
-A syntax highlighter for vim and a copy of the final draft of the standard
-are included in the doc/ directory. The official standard can be downloaded from
-http://standards.iso.org/ittf/PubliclyAvailableStandards/s026153_ISO_IEC_14977_1996(E).zip
-
-An uberly simplified version of the EBNF grammar looks like:
-
-  letter = "A" | "B" | "C" | "D" | "E" | "F" | "G"
-         | "H" | "I" | "J" | "K" | "L" | "M" | "N"
-         | "O" | "P" | "Q" | "R" | "S" | "T" | "U"
-         | "V" | "W" | "X" | "Y" | "Z" ;
-  digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ;
-  symbol = "[" | "]" | "{" | "}" | "(" | ")" | "<" | ">"
-         | "'" | '"' | "=" | "|" | "." | "," | ";" ;
-  character = letter | digit | symbol | "_" ;
-
-  identifier = letter , { letter | digit | "_" } ;
-  terminal = "'" , character , { character } , "'"
-           | '"' , character , { character } , '"' ;
-
-  lhs = identifier ;
-  rhs = identifier
-      | terminal
-      | "[" , rhs , "]"
-      | "{" , rhs , "}"
-      | "(" , rhs , ")"
-      | rhs , "|" , rhs
-      | rhs , "," , rhs ;
-
-  rule = lhs , "=" , rhs , ";" ;
-  grammar = { rule } ;
-*/
-package ebnf
@@ -1,36 +0,0 @@
-" Vim syntax file
-" Language:         EBNF
-" Maintainer:       Hans Fugal
-" Last Change:      $Date: 2003/01/28 14:42:09 $
-" Version:          $Id: ebnf.vim,v 1.1 2003/01/28 14:42:09 fugalh Exp $    
-" With thanks to Michael Brailsford for the BNF syntax file.
-
-" Quit when a syntax file was already loaded
-if version < 600
-  syntax clear
-elseif exists("b:current_syntax")
-  finish
-endif
-
-syn match ebnfMetaIdentifier /[A-Za-z]/ skipwhite skipempty nextgroup=ebnfSeperator
-
-syn match ebnfSeperator "=" contained nextgroup=ebnfProduction skipwhite skipempty
-
-syn region ebnfProduction start=/\zs[^\.;]/ end=/[\.;]/me=e-1 contained contains=ebnfSpecial,ebnfDelimiter,ebnfTerminal,ebnfSpecialSequence,ebnfComment nextgroup=ebnfEndProduction skipwhite skipempty
-syn match ebnfDelimiter #[,(|)\]}\[{/!]\|\(\*)\)\|\((\*\)\|\(/)\)\|\(:)\)\|\((/\)\|\((:\)# contained
-syn match ebnfSpecial /[\-\*]/ contained
-syn region ebnfSpecialSequence matchgroup=Delimiter start=/?/ end=/?/ contained
-syn match ebnfEndProduction /[\.;]/ contained 
-syn region ebnfTerminal matchgroup=delimiter start=/"/ end=/"/ contained
-syn region ebnfTerminal matchgroup=delimiter start=/'/ end=/'/ contained
-syn region ebnfComment start="(\*" end="\*)"
-
-
-hi link ebnfComment Comment
-hi link ebnfMetaIdentifier Identifier
-hi link ebnfSeperator ebnfSpecial
-hi link ebnfEndProduction ebnfDelimiter
-hi link ebnfDelimiter Delimiter
-hi link ebnfSpecial Special
-hi link ebnfSpecialSequence Statement
-hi link ebnfTerminal Constant
@@ -1,230 +0,0 @@
-(* vim: set ft=ebnf: *)
-
-(*
-  The syntax of Extended BNF can be defined using
-  itself. There are four parts in this example,
-  the first part names the characters, the second
-  part defines the removal of unnecessary non-
-  printing characters, the third part defines the
-  removal of textual comments, and the final part
-  defines the structure of Extended BNF itself.
-
-  Each syntax rule in this example starts with a
-  comment that identifies the corresponding clause
-  in the standard.
-
-  The meaning of special-sequences is not defined
-  in the standard. In this example (see the
-  reference to 7.6) they represent control
-  functions defined by ISO/IEC 6429:1992.
-  Another special-sequence defines a
-  syntactic-exception (see the reference to 4.7).
-*)
-
-(*
-  The first part of the lexical syntax defines the
-  characters in the 7-bit character set (ISO/IEC
-  646:1991) that represent each terminal-character
-  and gap-separator in Extended BNF.
-*)
-
-(* see 7.2 *) letter
-= 'a' | 'b' | 'c' | 'd' | 'e' | 'f' | 'g' | 'h'
-| 'i' | 'j' | 'k' | 'l' | 'm' | 'n' | 'o' | 'p'
-| 'q' | 'r' | 's' | 't' | 'u' | 'v' | 'w' | 'x'
-| 'y' | 'z'
-| 'A' | 'B' | 'C' | 'D' | 'E' | 'F' | 'G' | 'H'
-| 'I' | 'J' | 'K' | 'L' | 'M' | 'N' | 'O' | 'P'
-| 'Q' | 'R' | 'S' | 'T' | 'U' | 'V' | 'W' | 'X'
-| 'Y' | 'Z';
-(* see 7.2 *) decimal digit
-= '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7'
-| '8' | '9';
-
-(*
-  The representation of the following
-  terminal-characters is defined in clauses 7.3,
-  7.4 and tables 1, 2.
-*)
-
-concatenate symbol = ',';
-defining symbol = '=';
-definition separator symbol = '|' | '/' | '!';
-end comment symbol = '*)';
-end group symbol = ')';
-end option symbol = ']' | '/)';
-end repeat symbol = '}' | ':)';
-except symbol = '-';
-first quote symbol = "'";
-repetition symbol = '*';
-second quote symbol = '"';
-special sequence symbol = '?';
-start comment symbol = '(*';
-start group symbol = '(';
-start option symbol = '[' | '(/';
-start repeat symbol = '{' | '(:';
-terminator symbol = ';' | '.';
-(* see 7.5 *) other character
-= ' ' | ':' | '+' | '_' | '%' | 'Q'
-| '&' | '#' | '$' | '<' | '>' | '\'
-| 'ˆ' | '‘' | ' ̃';
-(* see 7.6 *) space character = ' ';
-horizontal tabulation character
-= ? ISO 6429 character Horizontal Tabulation ? ;
-new line
-= { ? ISO 6429 character Carriage Return ? },
-? ISO 6429 character Line Feed ?,
-{ ? ISO 6429 character Carriage Return ? };
-vertical tabulation character
-= ? ISO 6429 character Vertical Tabulation ? ;
-form feed
-= ? ISO 6429 character Form Feed ? ;
-
-(*
-  The second part of the syntax defines the
-  removal of unnecessary non-printing characters
-  from a syntax.
-*)
-
-(* see 6.2 *) terminal character
-= letter
-| decimal digit
-| concatenate symbol
-| defining symbol
-| definition separator symbol
-| end comment symbol
-| end group symbol
-| end option symbol
-| end repeat symbol
-| except symbol
-| first quote symbol
-| repetition symbol
-| second quote symbol
-| special sequence symbol
-| start comment symbol
-| start group symbol
-| start option symbol
-| start repeat symbol
-| terminator symbol
-| other character;
-(* see 6.3 *) gap free symbol
-= terminal character
- (first quote symbol | second quote symbol)
-| terminal string;
-(* see 4.16 *) terminal string
-= first quote symbol, first terminal character,
-{first terminal character},
-first quote symbol
-| second quote symbol, second terminal character,
-{second terminal character},
-second quote symbol;
-(* see 4.17 *) first terminal character
-= terminal character - first quote symbol;
-(* see 4.18 *) second terminal character
-= terminal character - second quote symbol;
-(* see 6.4 *) gap separator
-= space character
-| horizontal tabulation character
-| new line
-| vertical tabulation character
-| form feed;
-(* see 6.5 *) syntax
-= {gap separator},
-gap free symbol, {gap separator},
-{gap free symbol, {gap separator}};
-
-(*
-  The third part of the syntax defines the
-  removal of bracketed-textual-comments from
-  gap-free-symbols that form a syntax.
-*)
-
-(* see 6.6 *) commentless symbol
-= terminal character
- (letter
-| decimal digit
-| first quote symbol
-| second quote symbol
-| start comment symbol
-| end comment symbol
-| special sequence symbol
-| other character)
-| meta identifier
-| integer
-| terminal string
-| special sequence;
-(* see 4.9 *) integer
-= decimal digit, {decimal digit};
-(* see 4.14 *) meta identifier
-= letter, {meta identifier character};
-(* see 4.15 *) meta identifier character
-= letter
-| decimal digit;
-(* see 4.19 *) special sequence
-= special sequence symbol,
-{special sequence character},
-special sequence symbol;
-(* see 4.20 *) special sequence character
-= terminal character - special sequence symbol;
-(* see 6.7 *) comment symbol
-= bracketed textual comment
-| other character
-| commentless symbol;
-(* see 6.8 *) bracketed textual comment
-= start comment symbol, {comment symbol},
-end comment symbol;
-(* see 6.9 *) syntax
-= {bracketed textual comment},
-commentless symbol,
-{bracketed textual comment},
-{commentless symbol,
-{bracketed textual comment}};
-
-(*
-  The final part of the syntax defines the
-  abstract syntax of Extended BNF, i.e. the
-  structure in terms of the commentless symbols.
-*)
-
-(* see 4.2 *) syntax
-= syntax rule, {syntax rule};
-(* see 4.3 *) syntax rule
-= meta identifier, defining symbol,
-definitions list, terminator symbol;
-(* see 4.4 *) definitions list
-= single definition,
-{definition separator symbol,
-single definition};
-(* see 4.5 *) single definition
-= syntactic term,
-{concatenate symbol, syntactic term};
-(* see 4.6 *) syntactic term
-= syntactic factor,
-[except symbol, syntactic exception];
-(* see 4.7 *) syntactic exception
-= ? a syntactic-factor that could be replaced
-by a syntactic-factor containing no
-meta-identifiers
-? ;
-(* see 4.8 *) syntactic factor
-= [integer, repetition symbol],
-syntactic primary;
-(* see 4.10 *) syntactic primary
-= optional sequence
-| repeated sequence
-| grouped sequence
-| meta identifier
-| terminal string
-| special sequence
-| empty sequence;
-(* see 4.11 *) optional sequence
-= start option symbol, definitions list,
-end option symbol;
-(* see 4.12 *) repeated sequence
-= start repeat symbol, definitions list,
-end repeat symbol;
-(* see 4.13 *) grouped sequence
-= start group symbol, definitions list,
-end group symbol;
-(* see 4.21 *) empty sequence
-= ;
@@ -1 +0,0 @@
-package ebnf
@@ -1,20 +0,0 @@
-package token
-
-// types of Token
-type TokenType int
-
-const (
-	TokenError TokenType = iota + 1
-	TokenEOF
-)
-
-func (typ TokenType) String() string {
-	switch typ {
-	case TokenError:
-		return "ERROR"
-	case TokenEOF:
-		return "EOF"
-	default:
-		return "UNDEFINED"
-	}
-}
@@ -1,25 +0,0 @@
-package token
-
-import (
-	"fmt"
-	"testing"
-)
-
-func TestTokenTypeToString(t *testing.T) {
-	var foo TokenType
-
-	for _, o := range []struct {
-		typ TokenType
-		str string
-	}{
-		{foo, "UNDEFINED"},
-		{TokenError, "ERROR"},
-		{TokenEOF, "EOF"},
-		{1234, "UNDEFINED"},
-	} {
-		str := fmt.Sprintf("%s", o.typ)
-		if str != o.str {
-			t.Errorf("TokenType:%v stringified as %s instead of %s.", int(o.typ), str, o.str)
-		}
-	}
-}
@@ -1 +0,0 @@
-package core
@@ -0,0 +1,2 @@
+// Package core provides the foundations of asciigoat packages
+package core
@@ -1,3 +1,3 @@
 module asciigoat.org/core

-go 1.16
+go 1.19
@@ -0,0 +1,45 @@
+package lexer
+
+import (
+	"fmt"
+	"strings"
+)
+
+var (
+	_ error = (*Error)(nil)
+)
+
+// Error represents a generic parsing error
+type Error struct {
+	Filename string
+	Line     int
+	Column   int
+
+	Content string
+	Err     error
+}
+
+func (err Error) Error() string {
+	var s []string
+
+	switch {
+	case err.Line > 0 || err.Column > 0:
+		s = append(s, fmt.Sprintf("%s:%v:%v", err.Filename, err.Line, err.Column))
+	case err.Filename != "":
+		s = append(s, err.Filename)
+	}
+
+	if err.Err != nil {
+		s = append(s, err.Err.Error())
+	}
+
+	if err.Content != "" {
+		s = append(s, fmt.Sprintf("%q", err.Content))
+	}
+
+	return strings.Join(s, ": ")
+}
+
+func (err Error) Unwrap() error {
+	return err.Err
+}
@@ -0,0 +1,2 @@
+// Package lexer provides basic helpers to implement parsers
+package lexer
@@ -0,0 +1,221 @@
+package lexer
+
+import (
+	"bytes"
+	"errors"
+	"io"
+	"strings"
+	"unicode/utf8"
+)
+
+const (
+	// ReadBufferSize indicates the initial buffer size
+	ReadBufferSize = 1 << 7 // 128B
+
+	// DoublingBufferSizeLimit indicates when we stop doubling
+	// and just add instead
+	DoublingBufferSizeLimit = 1 << 17 // 128KiB
+)
+
+// implemented interfaces
+var (
+	_ io.RuneReader  = (*Reader)(nil)
+	_ io.RuneScanner = (*Reader)(nil)
+)
+
+var (
+	// ErrInvalidUnreadRune indicates UnreadRune() was calls after an
+	// action other than a successful ReadRune()
+	ErrInvalidUnreadRune = errors.New("invalid UnreadRune() call")
+)
+
+// Reader is a RuneReader aimed at implementing text parsers
+type Reader struct {
+	src io.Reader
+
+	buf    []byte
+	off    int
+	cursor int
+
+	lastRuneSize int
+}
+
+// String returns what's already Read but not yet emitted or discarded
+func (b *Reader) String() string {
+	return string(b.buf[b.off:b.cursor])
+}
+
+// Emit returns what's already being Read and discards it afterwards
+func (b *Reader) Emit() string {
+	s := b.String()
+	b.Discard()
+	return s
+}
+
+// Discard removes from the buffer everything that has been Read
+func (b *Reader) Discard() {
+	switch {
+	case b.ready() == 0:
+		// reset
+		b.buf = b.buf[:0]
+		b.cursor = 0
+		b.off = 0
+	default:
+		// step
+		b.off = b.cursor
+	}
+
+	// and prevent UnreadRune()
+	b.lastRuneSize = -1
+}
+
+// ready tells how many bytes are ready to decode
+func (b *Reader) ready() int {
+	return len(b.buf) - b.cursor
+}
+
+// available tells how many free bytes remain at the end of the buffer
+func (b *Reader) available() int {
+	return cap(b.buf) - len(b.buf)
+}
+
+func (b *Reader) needsBytes(n int) error {
+	for {
+		if b.ready() >= n {
+			// ready
+			return nil
+		}
+
+		// make room
+		b.prepareBuffer(n - b.ready())
+
+		// and read more
+		_, err := b.fill()
+		if err != nil {
+			return err
+		}
+	}
+}
+
+func (b *Reader) rebuffer(size int) {
+	var src, dst []byte
+
+	if size > cap(b.buf) {
+		// new buffer
+		dst = make([]byte, size)
+	} else {
+		// same buffer
+		dst = b.buf
+	}
+
+	src = b.buf[b.off:]
+	dst = dst[:len(src)]
+
+	copy(dst, src)
+
+	b.cursor -= b.off
+	b.buf = dst
+	b.off = 0
+}
+
+func (b *Reader) prepareBuffer(n int) {
+	if n > b.available() {
+		needed := len(b.buf) + n - b.off
+		size := cap(b.buf)
+
+		for size < needed {
+			switch {
+			case size < DoublingBufferSizeLimit:
+				size *= 2
+			default:
+				size += DoublingBufferSizeLimit
+			}
+		}
+
+		b.rebuffer(size)
+	}
+}
+
+func (b *Reader) fill() (int, error) {
+	start := len(b.buf)
+	n, err := b.src.Read(b.buf[start:cap(b.buf)])
+	if n > 0 {
+		b.buf = b.buf[:start+n]
+	}
+	return n, err
+}
+
+// ReadRune reads the next rune
+func (b *Reader) ReadRune() (rune, int, error) {
+	// we need at least one byte to start
+	count := 1
+	for {
+		err := b.needsBytes(count)
+		if err != nil {
+			b.lastRuneSize = -1
+
+			return 0, 0, err
+		}
+
+		if utf8.FullRune(b.buf[b.cursor:]) {
+			// we have a full rune
+			break
+		}
+
+		// more
+		count = b.ready() + 1
+	}
+
+	// decode rune
+	r, l := utf8.DecodeRune(b.buf[b.cursor:])
+	// step over
+	b.cursor += l
+	// and remember for UnreadRune()
+	b.lastRuneSize = l
+
+	return r, l, nil
+}
+
+// UnreadRune moves the cursor where it was before the last call to ReadRune
+func (b *Reader) UnreadRune() error {
+	if b.lastRuneSize > 0 {
+		b.cursor -= b.lastRuneSize
+		b.lastRuneSize = -1
+		return nil
+	}
+
+	return ErrInvalidUnreadRune
+}
+
+// PeekRune returns information about the next rune without moving the
+// cursor
+func (b *Reader) PeekRune() (rune, int, error) {
+	r, l, err := b.ReadRune()
+	if err != nil {
+		return r, l, err
+	}
+	err = b.UnreadRune()
+	return r, l, err
+}
+
+// NewReader creates a new runes [Reader] using the given [io.Reader]
+func NewReader(r io.Reader) *Reader {
+	if r == nil {
+		return nil
+	}
+
+	return &Reader{
+		src: r,
+		buf: make([]byte, 0, ReadBufferSize),
+	}
+}
+
+// NewReaderBytes creates a new runes [Reader] using the given bytes
+func NewReaderBytes(b []byte) *Reader {
+	return NewReader(bytes.NewReader(b))
+}
+
+// NewReaderString creates a new runes [Reader] using the given string
+func NewReaderString(s string) *Reader {
+	return NewReader(strings.NewReader(s))
+}
@@ -0,0 +1,64 @@
+package core
+
+import (
+	"bytes"
+	"io"
+	"io/fs"
+	"strings"
+)
+
+// ReadCloser adds a Close() to Readers without one
+type ReadCloser struct {
+	r io.Reader
+}
+
+// Read passes the Read() call to the underlying [io.Reader]
+// and fail if it was Closed()
+func (rc *ReadCloser) Read(b []byte) (int, error) {
+	switch {
+	case rc.r != nil:
+		return rc.r.Read(b)
+	default:
+		return 0, fs.ErrClosed
+	}
+}
+
+// Close attempts to Close the underlying [io.Reader], or
+// remove it if it doesn't support Close() and fail
+// if closed twice
+func (rc *ReadCloser) Close() error {
+	switch {
+	case rc.r != nil:
+		rc.r = nil
+		return nil
+	default:
+		return fs.ErrClosed
+	}
+}
+
+// NewReadCloser wraps a [io.Reader] to satisfy
+// [io.ReadCloser] if needed
+func NewReadCloser(r io.Reader) io.ReadCloser {
+	switch p := r.(type) {
+	case io.ReadCloser:
+		return p
+	case nil:
+		return nil
+	default:
+		return &ReadCloser{
+			r: r,
+		}
+	}
+}
+
+// NewReadCloserBytes wraps a bytes slice to implement
+// a [io.ReadCloser]
+func NewReadCloserBytes(b []byte) io.ReadCloser {
+	return NewReadCloser(bytes.NewReader(b))
+}
+
+// NewReadCloserString wraps a string to implement
+// a [io.ReadCloser]
+func NewReadCloserString(s string) io.ReadCloser {
+	return NewReadCloser(strings.NewReader(s))
+}
@@ -1,135 +0,0 @@
-package runes
-
-import (
-	"bufio"
-	"bytes"
-	"io"
-	"strings"
-	"sync"
-)
-
-// feeder is a generic implementation of the output interfaces of Feeder
-type Feeder struct {
-	sync.Mutex
-
-	in  io.RuneReader
-	out []rune
-	sz  []int
-	err error
-}
-
-// NewFeederBytes creates a new Feeder using an slice of bytes as input
-func NewFeederBytes(b []byte) *Feeder {
-	return NewFeeder(bytes.NewReader(b))
-}
-
-// NewFeederString creates a new Feeder using a string as input
-func NewFeederString(s string) *Feeder {
-	return NewFeeder(strings.NewReader(s))
-}
-
-// NewFeeder creates a new Feeder using a Reader as input
-func NewFeeder(in io.Reader) *Feeder {
-	rd, ok := in.(io.RuneReader)
-	if !ok {
-		rd = bufio.NewReader(in)
-	}
-	return &Feeder{in: rd}
-}
-
-// Skip drops n runes from the head of the buffer
-func (f *Feeder) Skip(n int) (int, bool) {
-	f.Lock()
-	defer f.Unlock()
-
-	if l := f.skip(n); l > 0 {
-		return l, true
-	} else {
-		return 0, false
-	}
-}
-func (f *Feeder) skip(n int) int {
-	if l := len(f.out); l > n {
-		f.out = f.out[n:]
-		f.sz = f.sz[n:]
-		return l - n
-	} else {
-		f.out = f.out[:0]
-		f.sz = f.sz[:0]
-		return 0
-	}
-}
-
-// ReadRune returns the next rune
-func (f *Feeder) ReadRune() (r rune, size int, err error) {
-	f.Lock()
-	defer f.Unlock()
-
-	if f.atLeast(1) {
-		r = f.out[0]
-		size = f.sz[0]
-
-		f.skip(1)
-	}
-
-	err = f.Err()
-	return
-}
-
-// AtLeast blocks until there are at least n runes on the buffer, or an error or EOF has occurred
-func (f *Feeder) AtLeast(n int) (out []rune, err error) {
-	f.Lock()
-	defer f.Unlock()
-
-	if !f.atLeast(n) {
-		err = f.err
-	}
-
-	if len(f.out) > 0 {
-		out = f.out
-	}
-
-	return
-}
-
-func (f *Feeder) atLeast(n int) bool {
-	for len(f.out) < n {
-		r, size, err := f.in.ReadRune()
-		if err != nil && f.err == nil {
-			// store first error
-			f.err = err
-		}
-
-		if size > 0 {
-			f.out = append(f.out, r)
-			f.sz = append(f.sz, size)
-		} else if f.err != nil {
-			break
-		}
-	}
-
-	return len(f.out) >= n
-}
-
-// Currently buffered runes
-func (f *Feeder) Buffered() []rune {
-	return f.out
-}
-
-// Count of currently buffered runes
-func (f *Feeder) Len() int {
-	return len(f.out)
-}
-
-// Feeder has reached EOF
-func (f *Feeder) EOF() bool {
-	return f.err == io.EOF
-}
-
-// Feeder encountered an error
-func (f *Feeder) Err() error {
-	if f.err == io.EOF {
-		return nil
-	}
-	return f.err
-}
@@ -1,124 +0,0 @@
-package runes
-
-import (
-	"unicode"
-)
-
-// Probe was borrowed from https://github.com/JamesOwenHall/json2.Scanner
-//
-
-// Probe is a func that returns a subset of the input and a success bool.
-type Probe func([]rune) ([]rune, bool)
-
-// If returns a probe that accepts the a rune if it satisfies the condition.
-func If(condition func(rune) bool) Probe {
-	return func(input []rune) ([]rune, bool) {
-		if len(input) > 0 && condition(input[0]) {
-			return input[0:1], true
-		}
-
-		return nil, false
-	}
-}
-
-// Rune returns a probe that accepts r.
-func Rune(r rune) Probe {
-	return If(func(b rune) bool {
-		return r == b
-	})
-}
-
-// Space returns a probe that accepts whitespace as defined in the unicode
-// package.
-func Space() Probe {
-	return func(input []rune) ([]rune, bool) {
-		if len(input) > 0 && unicode.IsSpace(input[0]) {
-			return input[0:1], true
-		}
-
-		return nil, false
-	}
-}
-
-// And returns a probe that accepts all probes in sequence.
-func And(probes ...Probe) Probe {
-	return func(input []rune) ([]rune, bool) {
-		remaining := input
-		accumulated := []rune{}
-
-		for _, s := range probes {
-			if read, ok := s(remaining); !ok {
-				return nil, false
-			} else {
-				accumulated = append(accumulated, read...)
-				remaining = remaining[len(read):]
-			}
-		}
-
-		return accumulated, true
-	}
-}
-
-// Or returns a probe that accepts the first successful probe in probes.
-func Or(probes ...Probe) Probe {
-	return func(input []rune) ([]rune, bool) {
-		for _, s := range probes {
-			if read, ok := s(input); ok {
-				return read, true
-			}
-		}
-
-		return nil, false
-	}
-}
-
-// Maybe runs probe and returns true regardless of the output.
-func Maybe(probe Probe) Probe {
-	return func(input []rune) ([]rune, bool) {
-		read, _ := probe(input)
-		return read, true
-	}
-}
-
-// Any returns a probe that accepts any number of occurrences of probe,
-// including zero.
-func Any(probe Probe) Probe {
-	return func(input []rune) ([]rune, bool) {
-		remaining := input
-		accumulated := []rune{}
-
-		for {
-			if read, ok := probe(remaining); !ok {
-				return accumulated, true
-			} else {
-				accumulated = append(accumulated, read...)
-				remaining = remaining[len(read):]
-			}
-		}
-	}
-}
-
-// N returns a probe that accepts probe exactly n times.
-func N(n int, probe Probe) Probe {
-	return func(input []rune) ([]rune, bool) {
-		probes := make([]Probe, n)
-		for i := 0; i < n; i++ {
-			probes[i] = probe
-		}
-
-		return And(probes...)(input)
-	}
-}
-
-// AtLeast returns a probe that accepts probe at least n times.
-func AtLeast(n int, probe Probe) Probe {
-	return func(input []rune) ([]rune, bool) {
-		probes := make([]Probe, n, n+1)
-		for i := range probes {
-			probes[i] = probe
-		}
-
-		probes = append(probes, Any(probe))
-		return And(probes...)(input)
-	}
-}
@@ -1,58 +0,0 @@
-package runes
-
-import (
-	"testing"
-)
-
-func TestProbe(t *testing.T) {
-	type TestCase struct {
-		probe Probe
-		input string
-	}
-
-	tests := []TestCase{
-		{Rune('a'), "a"},
-		{Space(), " "},
-		{Space(), "\t"},
-		{Space(), "\n"},
-		{And(Rune('1'), Rune('2'), Space()), "12 "},
-		{Or(Rune('r'), Space(), Rune('x')), "r"},
-		{Or(Rune('r'), Space(), Rune('x')), " "},
-		{Or(Rune('r'), Space(), Rune('x')), "x"},
-		{Any(Rune('w')), ""},
-		{Any(Rune('w')), "w"},
-		{Any(Rune('w')), "ww"},
-		{Any(Rune('w')), "www"},
-		{N(6, Rune('w')), "wwwwww"},
-		{Maybe(Rune('w')), ""},
-		{Maybe(Rune('w')), "w"},
-	}
-
-	for _, test := range tests {
-		if read, ok := test.probe([]rune(test.input)); !ok {
-			t.Errorf("Expected to read %s", string(test.input))
-		} else if string(read) != test.input {
-			t.Errorf("Mismatch of input %s and read %s", test.input, string(read))
-		}
-	}
-}
-
-func TestProbeFail(t *testing.T) {
-	type TestCase struct {
-		probe Probe
-		input string
-	}
-
-	tests := []TestCase{
-		{Rune('a'), "b"},
-		{Space(), "a"},
-		{And(Rune('1'), Rune('2'), Space()), "12"},
-		{Or(Rune('r'), Space(), Rune('x')), "4"},
-	}
-
-	for _, test := range tests {
-		if read, ok := test.probe([]rune(test.input)); ok {
-			t.Errorf("Unexpectedly read %s with input %s", string(read), test.input)
-		}
-	}
-}
@@ -1,5 +0,0 @@
-/*
-Package scanner implements the low level functionality
-of AsciiGoat lexers
-*/
-package scanner
@@ -1,99 +0,0 @@
-package scanner
-
-import (
-	"unicode/utf8"
-)
-
-const (
-	// EOF is a dummy rune representing End-Of-File
-	EOF = -1
-)
-
-// A Position in the input string and in the line-based document
-type Position struct {
-	Offset       uint
-	Line, Column uint
-}
-
-// An Scanner represent the low level layer for text parsers
-type Scanner struct {
-	name  string
-	input string
-
-	base   Position
-	cursor Position
-	runes  uint
-}
-
-// NewScannerFromString instantiates a new Scanner to
-// parse a given string
-func NewScannerFromString(name, input string) *Scanner {
-	return &Scanner{
-		name:   name,
-		input:  input,
-		base:   Position{0, 1, 1},
-		cursor: Position{0, 1, 1},
-		runes:  0,
-	}
-}
-
-// Length returns the number of bytes and runes in the Terminal that is been detected
-func (l *Scanner) Length() (uint, uint) {
-	return l.cursor.Offset - l.base.Offset, l.runes
-}
-
-// Empty tells if there are no runes accounted for the next Terminal yet
-func (l *Scanner) Empty() bool {
-	return l.runes == 0
-}
-
-// StepForth moves the cursor forward
-func (l *Scanner) StepForth(runes, bytes uint) {
-	l.cursor.Offset += bytes
-	l.cursor.Column += runes
-	l.runes += runes
-}
-
-// StepBack moves the cursor backward
-func (l *Scanner) StepBack(runes, bytes uint) {
-	l.cursor.Offset -= bytes
-	// FIXME: what if column goes < 1?
-	l.cursor.Column -= runes
-	l.runes -= runes
-}
-
-// Reset moves the cursor back to the base
-func (l *Scanner) Reset() {
-	l.cursor = l.base
-	l.runes = 0
-}
-
-// Skip trashes everything up to the cursor
-func (l *Scanner) Skip() {
-	l.base = l.cursor
-	l.runes = 0
-}
-
-// NewLine accounts a line break in the position of the cursor
-func (l *Scanner) NewLine() {
-	l.cursor.Line++
-	l.cursor.Column = 1
-}
-
-// Peek returns the next rune but not moving the cursor
-func (l *Scanner) Peek() (rune, uint) {
-	if l.cursor.Offset == uint(len(l.input)) {
-		return EOF, 0
-	}
-	r, bytes := utf8.DecodeRuneInString(l.input[l.cursor.Offset:])
-	return r, uint(bytes)
-}
-
-// Next returns the next rune but moving the cursor
-func (l *Scanner) Next() (rune, uint) {
-	r, bytes := l.Peek()
-	if bytes > 0 {
-		l.StepForth(1, bytes)
-	}
-	return r, bytes
-}
@@ -1,43 +0,0 @@
-package scanner
-
-import (
-	"unicode/utf8"
-)
-
-// A Terminal represents literal element within a document
-type Terminal struct {
-	val          string
-	bytes, runes uint
-	line, col    uint
-}
-
-// NewTerminalFull returns a new Terminal instance
-func NewTerminalFull(val string, bytes, runes, line, col uint) *Terminal {
-	return &Terminal{
-		val:   val,
-		bytes: bytes,
-		runes: runes,
-		line:  line,
-		col:   col,
-	}
-}
-
-// NewTerminal creates a Terminal instance without knowing it's length
-func NewTerminal(val string, line, col uint) *Terminal {
-	bytes := uint(len(val))
-	runes := uint(utf8.RuneCountInString(val))
-
-	return NewTerminalFull(val, bytes, runes, line, col)
-}
-
-// Position retuns the position (line and column)
-// of the Terminal in the source document
-func (t *Terminal) Position() (uint, uint) {
-	return t.line, t.col
-}
-
-// Value returns the string corresponding to
-// this Terminal and it's size in bytes and runes
-func (t *Terminal) Value() (string, uint, uint) {
-	return t.val, t.bytes, t.runes
-}
Author	SHA1	Message	Date
amery	f7e13e0978	Merge pull request 'lexer: introduce Error{}' (#6 ) Reviewed-on: #6	2023-08-29 17:00:09 +02:00
amery	f67d8a2443	lexer: introduce Error{} Signed-off-by: Alejandro Mery <amery@jpi.io>	2023-08-29 13:54:23 +00:00
amery	76e6146e9e	Merge pull request 'introduce NewReadCloser to allow byte and string buffers to offer io.ReadCloser' (#1 Reviewed-on: #1	2023-08-29 15:24:36 +02:00
amery	f79e2bee9e	Merge pull request 'lexer: rename runes.Reader to lexer.Reader and implement UnreadRune() and PeekRune()' (#4 ) Reviewed-on: #4	2023-08-29 15:23:15 +02:00
amery	6cca2996ca	lexer: Implement Reader.UnreadRune() and Reader.PeekRune() Signed-off-by: Alejandro Mery <amery@jpi.io>	2023-08-29 02:00:43 +00:00
amery	edcba80baa	lexer: fix ReadRune() to actually move the cursor Signed-off-by: Alejandro Mery <amery@jpi.io>	2023-08-29 02:00:38 +00:00
amery	7230a74f49	lexer: runes.Reader renamed to lexer.Reader Signed-off-by: Alejandro Mery <amery@jpi.io>	2023-08-29 01:59:09 +00:00
amery	1b223e3751	introduce NewReadCloser to allow byte and string buffers to offer io.ReadCloser Signed-off-by: Alejandro Mery <amery@jpi.io>	2023-08-28 22:06:52 +00:00
amery	07b652c414	chore: rename module to asciigoat.org/core Signed-off-by: Alejandro Mery <amery@jpi.io>	2023-08-28 17:51:55 +00:00
amery	eaa846b64b	runes.Reader: simplify buffer handling Signed-off-by: Alejandro Mery <amery@jpi.io>	2023-08-28 05:01:23 +00:00
amery	fef0d81610	runes.Reader: introduce String(), Emit() and Discard() Signed-off-by: Alejandro Mery <amery@jpi.io>	2023-08-28 04:45:25 +00:00
amery	fad6357d91	runes: initial RuneReader implementation Signed-off-by: Alejandro Mery <amery@jpi.io>	2023-08-28 04:33:44 +00:00
amery	f6a391904d	Initial commit Signed-off-by: Alejandro Mery <amery@jpi.io>	2023-08-27 23:20:56 +00:00