diff --git a/examples/example_basiccalculator1_test.go b/examples/example_basiccalculator1_test.go index d27bb4a..02f20eb 100644 --- a/examples/example_basiccalculator1_test.go +++ b/examples/example_basiccalculator1_test.go @@ -79,7 +79,7 @@ func (c *simpleCalculator) number(p *parsekit.ParseAPI) { c.Result += c.op * p.Result().Value(0).(int64) p.Handle(c.operatorOrEndOfFile) } else { - p.UnexpectedInput("integer number") + p.Expected("integer number") } } @@ -93,7 +93,7 @@ func (c *simpleCalculator) operatorOrEndOfFile(p *parsekit.ParseAPI) { c.op = -1 p.Handle(c.number) case !p.On(A.EndOfFile).Stay(): - p.UnexpectedInput("operator, '+' or '-'") + p.Expected("operator, '+' or '-'") default: p.ExpectEndOfFile() } diff --git a/examples/example_basiccalculator2_test.go b/examples/example_basiccalculator2_test.go index e5973b5..ebf08b8 100644 --- a/examples/example_basiccalculator2_test.go +++ b/examples/example_basiccalculator2_test.go @@ -140,11 +140,11 @@ func (calc *calculator) factor(p *parsekit.ParseAPI) { return } if !p.On(A.RightParen).Skip() { - p.UnexpectedInput("')'") + p.Expected("')'") return } default: - p.UnexpectedInput("factor or parenthesized expression") + p.Expected("factor or parenthesized expression") return } p.On(A.Blanks).Skip() diff --git a/examples/example_helloManyStateParser_test.go b/examples/example_helloManyStateParser_test.go index df57fed..c7ece89 100644 --- a/examples/example_helloManyStateParser_test.go +++ b/examples/example_helloManyStateParser_test.go @@ -85,7 +85,7 @@ func (h *helloparser1) start(p *parsekit.ParseAPI) { if p.On(a.StrNoCase("hello")).Skip() { p.Handle(h.comma) } else { - p.UnexpectedInput("hello") + p.Expected("hello") } } @@ -97,7 +97,7 @@ func (h *helloparser1) comma(p *parsekit.ParseAPI) { case p.On(a.Comma).Skip(): p.Handle(h.startName) default: - p.UnexpectedInput("comma") + p.Expected("comma") } } @@ -107,7 +107,7 @@ func (h *helloparser1) startName(p *parsekit.ParseAPI) { if p.On(a.AnyRune).Stay() { p.Handle(h.name) } else { - p.UnexpectedInput("name") + p.Expected("name") } } @@ -120,7 +120,7 @@ func (h *helloparser1) name(p *parsekit.ParseAPI) { h.greetee += p.Result().String() p.Handle(h.name) default: - p.UnexpectedInput("exclamation mark") + p.Expected("exclamation mark") } } @@ -129,7 +129,7 @@ func (h *helloparser1) exclamation(p *parsekit.ParseAPI) { if p.On(a.Excl).Accept() { p.Handle(h.end) } else { - p.UnexpectedInput("exclamation") + p.Expected("exclamation") } } @@ -139,7 +139,7 @@ func (h *helloparser1) exclamation(p *parsekit.ParseAPI) { func (h *helloparser1) end(p *parsekit.ParseAPI) { var a = parsekit.A if !p.On(a.EndOfFile).Stay() { - p.UnexpectedInput("end of greeting") + p.Expected("end of greeting") return } diff --git a/parseapi.go b/parseapi.go index d596b4f..3791e5b 100644 --- a/parseapi.go +++ b/parseapi.go @@ -5,10 +5,10 @@ import ( "io" ) -// ParseAPI holds the internal state of a parse run and provides an API to -// ParseHandler methods to communicate with the parser. +// ParseAPI holds the internal state of a parse run and provides an API that +// ParseHandler methods can use to communicate with the parser. type ParseAPI struct { - tokenAPI *TokenAPI // the input reader + tokenAPI *TokenAPI // the TokenAPI, used for communicating with TokenHandler functions loopCheck map[string]bool // used for parser loop detection result *TokenHandlerResult // Last TokenHandler result as produced by On(...).Accept() err *Error // error during parsing, retrieved by Error(), further ParseAPI calls are ignored @@ -123,8 +123,9 @@ type ParseAPIOnAction struct { // found by a TokenHandler, and to make the TokenHandlerResult from the // TokenAPI available in the ParseAPI through the ParseAPI.Result() method. // -// Returns true in case a match was found. -// When no match was found, then no action is taken and false is returned. +// Returns true in case a match was found by On(). +// When no match was found, then no action is taken, no results are +// exposed and false is returned. func (a *ParseAPIOnAction) Accept() bool { if a.ok { a.forkedTokenAPI.Merge() @@ -145,7 +146,7 @@ func (a *ParseAPIOnAction) Accept() bool { // than the Accept() call and (more important if you ask me) the code // expresses more clearly that your intent is to skip the match. // -// Returns true in case a match was found. +// Returns true in case a match was found by On(). // When no match was found, then no action is taken and false is returned. func (a *ParseAPIOnAction) Skip() bool { if a.ok { @@ -167,7 +168,7 @@ func (a *ParseAPIOnAction) Skip() bool { // When a match is found, it hands off control to another ParseHandler // to take care of the actual token parsing. // -// Returns true in case a match was found, false otherwise. +// Returns true in case a match was found by On(), false otherwise. func (a *ParseAPIOnAction) Stay() bool { if a.ok { a.parseAPI.result = nil @@ -179,6 +180,9 @@ func (a *ParseAPIOnAction) Stay() bool { // Result returns a TokenHandlerResult struct, containing results as produced by the // last ParseAPI.On().Accept() call. +// +// When Result() is called without first doing a ParsAPI.On().Accept(), then no +// result will be available and the method will panic. func (p *ParseAPI) Result() *TokenHandlerResult { result := p.result if p.result == nil { @@ -213,7 +217,7 @@ func (p *ParseAPI) panicWhenParseHandlerNil(parseHandler ParseHandler) { // When the parser implementation returns without stopping first (and // without running into an error), the Parser.Execute() will call // ParserAPI.ExpectEndOfFile() to check if the end of the file was reached. -// If not, then things will end in an UnexpectedError(). +// If not, then things will end in an unexpected input error. // Even though this fallback mechanism will work in a lot of cases, try to make // your parser explicit about things and call Stop() actively yourself. // @@ -238,25 +242,28 @@ func (p *ParseAPI) Error(format string, args ...interface{}) { // // When it finds that the end of the file was indeed reached, then the // parser will be stopped through ParseAPI.Stop(). Otherwise unexpected -// input is reported through ParseAPI.UnexpectedInput() with "end of file" +// input is reported through ParseAPI.Expected() with "end of file" // as the expectation. func (p *ParseAPI) ExpectEndOfFile() { p.panicWhenStoppedOrInError() if p.On(A.EndOfFile).Stay() { p.Stop() } else { - p.UnexpectedInput("end of file") + p.Expected("end of file") } } -// UnexpectedInput is used to set an error that tells the user that some -// unexpected input was encountered. +// Expected is used to set an error that tells the user that some +// unexpected input was encountered, and that input was expected. +// +// The 'expected' argument can be an empty string. In that case the error +// message will not contain a description of the expected input. // // It automatically produces an error message for a couple of situations: // 1) the input simply didn't match the expectation // 2) the end of the input was reached // 3) there was an error while reading the input. -func (p *ParseAPI) UnexpectedInput(expected string) { +func (p *ParseAPI) Expected(expected string) { p.panicWhenStoppedOrInError() _, err := p.tokenAPI.NextRune() switch { diff --git a/parser.go b/parser.go index e10cdf3..fc29824 100644 --- a/parser.go +++ b/parser.go @@ -1,9 +1,5 @@ package parsekit -import ( - "strings" -) - // Parser is the top-level struct that holds the configuration for a parser. // The Parser can be instantiated using the parsekit.NewParser() method. type Parser struct { @@ -34,10 +30,12 @@ func NewParser(startHandler ParseHandler) *Parser { } // Execute starts the parser for the provided input. -// When an error occurs during parsing, then this error is returned. Nil otherwise. -func (p *Parser) Execute(input string) *Error { +// For an overview of allowed inputs, take a look at the documentation for parsekit.reader.New(). +// +// When an error occurs during parsing, then this error is returned, nil otherwise. +func (p *Parser) Execute(input interface{}) *Error { api := &ParseAPI{ - tokenAPI: NewTokenAPI(strings.NewReader(input)), + tokenAPI: NewTokenAPI(input), loopCheck: map[string]bool{}, } if api.Handle(p.startHandler) { diff --git a/parser_test.go b/parser_test.go index 137695a..5d48b8b 100644 --- a/parser_test.go +++ b/parser_test.go @@ -44,9 +44,9 @@ func ExampleParser_usingTokens() { // Token values: RUNE("¡", value = (int32)161) RUNE("ö", value = (int32)246) RUNE("k", value = (int32)107) RUNE("!", value = (int32)33) } -func ExampleParseAPI_UnexpectedInput() { +func ExampleParseAPI_Expected() { parser := parsekit.NewParser(func(p *parsekit.ParseAPI) { - p.UnexpectedInput("a thing") + p.Expected("a thing") }) err := parser.Execute("Whatever, this parser will never be happy...") fmt.Println(err.Full()) diff --git a/reader/reader.go b/reader/reader.go index 44c9f70..95b30b0 100644 --- a/reader/reader.go +++ b/reader/reader.go @@ -43,13 +43,15 @@ import ( "bufio" "fmt" "io" + "strings" "unicode/utf8" ) -// Reader wraps around an io.Reader and provides buffering that allows us to read -// the same runes over and over again. This is useful for implementing a parser -// that must be able to do lookahead on the input, returning to the original -// input position after finishing that lookahead). +// Reader wraps around a bufio.Reader and provides an additional layer of +// buffering that allows us to read the same runes over and over again. +// This is useful for implementing a parser that must be able to do lookahead +// on the input, returning to the original input position after finishing +// that lookahead). // // To minimze memory use, it is also possible to flush the read buffer when there is // no more need to go back to previously read runes. @@ -62,14 +64,34 @@ type Reader struct { bufferLen int // Input size, the number of runes in the buffer } -// New initializes a new reader struct, wrapped around the provided io.Reader. -func New(r io.Reader) *Reader { +// New initializes a new reader struct, wrapped around the provided input. +// +// The input can be any one of the following types: +// - string +// - type implementing io.Reader +// - bufio.Reader +func New(input interface{}) *Reader { return &Reader{ - bufio: bufio.NewReader(r), + bufio: makeBufioReader(input), buffer: []rune{}, } } +func makeBufioReader(input interface{}) *bufio.Reader { + switch input := input.(type) { + case bufio.Reader: + return &input + case *bufio.Reader: + return input + case io.Reader: + return bufio.NewReader(input) + case string: + return bufio.NewReader(strings.NewReader(input)) + default: + panic(fmt.Sprintf("parsekit.reader.New(): no support for input of type %T", input)) + } +} + // RuneAt reads the rune at the provided rune offset. // // This offset is relative to the current starting position of the buffer in diff --git a/reader/reader_test.go b/reader/reader_test.go index 85df380..902320a 100644 --- a/reader/reader_test.go +++ b/reader/reader_test.go @@ -1,6 +1,7 @@ package reader_test import ( + "bufio" "fmt" "io" "strings" @@ -21,6 +22,34 @@ func ExampleNew() { // H! } +func TestNew_VariousInputTypesCanBeUsed(t *testing.T) { + for _, test := range []struct { + name string + input interface{} + }{ + {"string", "Hello, world!"}, + {"io.Reader", strings.NewReader("Hello, world!")}, + {"*bufio.Reader", bufio.NewReader(strings.NewReader("Hello, world!"))}, + {"bufio.Reader", *(bufio.NewReader(strings.NewReader("Hello, world!")))}, + } { + r := reader.New(test.input) + firstRune, _ := r.RuneAt(0) + if firstRune != 'H' { + t.Errorf("[%s] first rune not 'H'", test.name) + } + lastRune, _ := r.RuneAt(12) + if lastRune != '!' { + t.Errorf("[%s] last rune not '!'", test.name) + } + } +} + +func TestNew_UnhandledInputType_Panics(t *testing.T) { + assert.PanicsWithValue(t, + "parsekit.reader.New(): no support for input of type int", + func() { reader.New(12345) }) +} + func TestReader_RuneAt(t *testing.T) { r := reader.New(strings.NewReader("Hello, world!")) at := func(i int) rune { r, _ := r.RuneAt(i); return r } diff --git a/tokenapi.go b/tokenapi.go index ede53ab..d706af0 100644 --- a/tokenapi.go +++ b/tokenapi.go @@ -2,7 +2,6 @@ package parsekit import ( "fmt" - "io" "git.makaay.nl/mauricem/go-parsekit/reader" ) @@ -69,12 +68,11 @@ type TokenAPI struct { } // NewTokenAPI initializes a new TokenAPI struct, wrapped around the provided io.Reader. -func NewTokenAPI(r io.Reader) *TokenAPI { - input := &TokenAPI{ - reader: reader.New(r), +func NewTokenAPI(input interface{}) *TokenAPI { + return &TokenAPI{ + reader: reader.New(input), result: newTokenHandlerResult(), } - return input } // NextRune returns the rune at the current read offset. diff --git a/tokenhandlers_builtin_test.go b/tokenhandlers_builtin_test.go index 3f90c47..07ea93d 100644 --- a/tokenhandlers_builtin_test.go +++ b/tokenhandlers_builtin_test.go @@ -394,7 +394,7 @@ func TestSequenceOfRunes(t *testing.T) { output = p.Result().String() p.Stop() } else { - p.UnexpectedInput("sequence of runes") + p.Expected("sequence of runes") } }) err := parser.Execute(input) diff --git a/tokenizer.go b/tokenizer.go index 9225aaf..c9530bb 100644 --- a/tokenizer.go +++ b/tokenizer.go @@ -30,16 +30,18 @@ func NewTokenizer(tokenHandler TokenHandler) *Tokenizer { tokenizer.result = p.Result() p.Stop() } else { - p.UnexpectedInput("") + p.Expected("") } }) return tokenizer } // Execute feeds the input to the wrapped TokenHandler function. +// For an overview of allowed inputs, take a look at the documentation for parsekit.reader.New(). +// // It returns the TokenHandler's TokenHandlerResult. When an error occurred // during parsing, the error will be set, nil otherwise. -func (t *Tokenizer) Execute(input string) (*TokenHandlerResult, *Error) { +func (t *Tokenizer) Execute(input interface{}) (*TokenHandlerResult, *Error) { err := t.parser.Execute(input) return t.result, err }