diff --git a/examples/example_basiccalculator1_test.go b/examples/example_basiccalculator1_test.go index 1dfc5ca..a4a23ad 100644 --- a/examples/example_basiccalculator1_test.go +++ b/examples/example_basiccalculator1_test.go @@ -77,7 +77,7 @@ var int64Token = tokenize.T.Int64(nil, bareInteger) func (c *simpleCalculator) number(p *parse.API) { if p.Accept(int64Token) { - c.Result += c.op * p.Result.Tokens[0].Value.(int64) + c.Result += c.op * p.Result.Token(0).Value.(int64) p.Handle(c.operatorOrEndOfFile) } else { p.Expected("integer number") diff --git a/examples/example_basiccalculator2_test.go b/examples/example_basiccalculator2_test.go index a71b38d..8b97e45 100644 --- a/examples/example_basiccalculator2_test.go +++ b/examples/example_basiccalculator2_test.go @@ -98,7 +98,7 @@ func (calc *calculator) expr(p *parse.API) { var A = tokenize.A if p.Handle(calc.term) { for p.Accept(A.Add.Or(A.Subtract)) { - op := p.Result.Bytes[0] + op := p.Result.Byte(0) if !p.Handle(calc.term) { return } @@ -116,7 +116,7 @@ func (calc *calculator) term(p *parse.API) { var A = tokenize.A if p.Handle(calc.factor) { for p.Accept(A.Multiply.Or(A.Divide)) { - op := p.Result.Bytes[0] + op := p.Result.Byte(0) if !p.Handle(calc.factor) { return } @@ -134,7 +134,7 @@ func (calc *calculator) factor(p *parse.API) { p.Skip(A.Blanks) switch { case p.Accept(T.Float64(nil, A.Signed(A.Decimal))): - value := p.Result.Tokens[0].Value.(float64) + value := p.Result.Token(0).Value.(float64) calc.interpreter.pushValue(value) case p.Skip(A.LeftParen): if !p.Handle(calc.expr) { diff --git a/examples/example_dutchpostcode_test.go b/examples/example_dutchpostcode_test.go index b40a3b3..560d750 100644 --- a/examples/example_dutchpostcode_test.go +++ b/examples/example_dutchpostcode_test.go @@ -29,7 +29,7 @@ func Example_dutchPostcodeUsingTokenizer() { fmt.Printf("[%d] Input: %q Error: %s\n", i, input, err) } else { fmt.Printf("[%d] Input: %q Output: %s Tokens:", i, input, result) - for _, t := range result.Tokens { + for _, t := range result.Tokens() { fmt.Printf(" %s(%s)", t.Type, t.Value) } fmt.Printf("\n") diff --git a/parse/api.go b/parse/api.go index ab1928a..9e22363 100644 --- a/parse/api.go +++ b/parse/api.go @@ -16,10 +16,10 @@ import ( // // • call other parse.Handler functions, the core of recursive-descent parsing (Handle) type API struct { - tokenAPI *tokenize.API // the tokenize.API, used for communicating with tokenize.Handler functions - Result tokenize.Result // a struct, holding the results of the last Peek() or Accept() call - err error // parse error, retrieved by Error(), using API methods is denied when set - stopped bool // a boolean set to true by Stop() + tokenAPI *tokenize.API // the tokenize.API, used for communicating with tokenize.Handler functions + Result *tokenize.Result // a struct, providing access to the results of the last successful Peek() or Accept() call + err error // parse error, retrieved by Error(), using API methods is denied when set + stopped bool // a boolean set to true by Stop() } // Peek checks if the upcoming input data matches the provided tokenize.Handler. @@ -32,13 +32,8 @@ type API struct { func (parseAPI *API) Peek(tokenHandler tokenize.Handler) bool { tokenAPI := parseAPI.tokenAPI snap := tokenAPI.MakeSnapshot() - parseAPI.Result.Tokens = nil - parseAPI.Result.Bytes = nil ok := parseAPI.invokeTokenizeHandler("Peek", tokenHandler) - if ok { - parseAPI.Result.Tokens = tokenAPI.Output.Tokens() - parseAPI.Result.Bytes = tokenAPI.Output.Bytes() - } + tokenAPI.Result.Store() tokenAPI.RestoreSnapshot(snap) return ok } @@ -54,15 +49,9 @@ func (parseAPI *API) Accept(tokenHandler tokenize.Handler) bool { ok := parseAPI.invokeTokenizeHandler("Accept", tokenHandler) if ok { // Keep track of the results as produced by this child. - // TODO put in function and also in Peek() Record Cursor() / error too? - parseAPI.Result.Tokens = tokenAPI.Output.Tokens() - parseAPI.Result.Bytes = tokenAPI.Output.Bytes() + tokenAPI.Result.Store() - // Now the results are stored, we can flush the results. - // This does not empty the byte and token store, but it does move the - // pointers within those stores back to the start. By doing this, - // the stores will be reused for the upcoming calls, which saves on - // memory usage. + // Flush the output as initialization for the next token handler. tokenAPI.Output.Flush() // Also flush the input reader buffer. Accepting input means that we @@ -75,13 +64,14 @@ func (parseAPI *API) Accept(tokenHandler tokenize.Handler) bool { func (parseAPI *API) Skip(tokenHandler tokenize.Handler) bool { tokenAPI := parseAPI.tokenAPI + tokenAPI.Output.Suspend() if !parseAPI.invokeTokenizeHandler("Skip", tokenHandler) { + tokenAPI.Output.Resume() return false } - parseAPI.Result.Tokens = nil - parseAPI.Result.Bytes = nil - tokenAPI.Output.Flush() + tokenAPI.Output.Resume() + tokenAPI.Result.Clear() tokenAPI.Input.Flush() return true } diff --git a/parse/parse.go b/parse/parse.go index cc8da66..6eb6a18 100644 --- a/parse/parse.go +++ b/parse/parse.go @@ -30,9 +30,10 @@ func New(startHandler Handler) Func { callerPanic("New", "parsekit.parse.{name}(): {name}() called with nil input at {caller}") } return func(input interface{}) error { + tokenAPI := tokenize.NewAPI(input) api := &API{ - tokenAPI: tokenize.NewAPI(input), - // NOOPCHECK loopCheck: make(map[uintptr]bool), + tokenAPI: tokenAPI, + Result: &tokenAPI.Result, } if api.Handle(startHandler) { // Handle returned true, indicating that parsing could still continue. diff --git a/parse/parse_test.go b/parse/parse_test.go index 5864b47..8809361 100644 --- a/parse/parse_test.go +++ b/parse/parse_test.go @@ -35,7 +35,8 @@ func ExampleNew_usingTokens() { if p.Accept(c.OneOrMore(tok.Rune("RUNE", a.AnyRune))) { fmt.Printf("Runes accepted: %q\n", p.Result.String()) fmt.Printf("Tokens:\n") - for i, token := range p.Result.Tokens { + tokens := p.Result.Tokens() + for i, token := range tokens { fmt.Printf("[%d] %s\n", i, token) } } diff --git a/tokenize/api.go b/tokenize/api.go index 6ec2872..46f6b52 100644 --- a/tokenize/api.go +++ b/tokenize/api.go @@ -76,7 +76,8 @@ type API struct { Input Input // access to a set of general input-related methods Byte InputByteMode // access to a set of byte-based input methods Rune InputRuneMode // access to a set of UTF8 rune-based input methods - Output Output // access to a set of output-related functionality + Output Output // access to a set of output-related methods + Result Result // access to a set of result retrieval methods outputTokens []Token // storage for accepted tokens outputBytes []byte // storage for accepted bytes } @@ -85,10 +86,10 @@ type stackFrame struct { offset int // the read offset, relative to the start of the reader buffer column int // the column at which the cursor is (0-indexed, relative to the start of the stack frame) line int // the line at which the cursor is (0-indexed, relative to the start of the stack frame) - bytesStart int // the starting point in the API.bytes slice for runes produced by this stack level - bytesEnd int // the end point in the API.bytes slice for runes produced by this stack level - tokenStart int // the starting point in the API.tokens slice for tokens produced by this stack level - tokenEnd int // the end point in the API.tokens slice for tokens produced by this stack level + bytesStart int // the starting point in the API.bytes slice for produced bytes + bytesEnd int // the end point in the API.bytes slice for produced bytes + tokenStart int // the starting point in the API.tokens slice for produced tokens + tokenEnd int // the end point in the API.tokens slice for produced tokens } // NewAPI initializes a new API struct, wrapped around the provided input. @@ -99,9 +100,10 @@ func NewAPI(input interface{}) *API { reader: read.New(input), } tokenAPI.Input = Input{api: tokenAPI} - tokenAPI.Byte = InputByteMode{api: tokenAPI} - tokenAPI.Rune = InputRuneMode{api: tokenAPI} + tokenAPI.Input.Byte = InputByteMode{api: tokenAPI} + tokenAPI.Input.Rune = InputRuneMode{api: tokenAPI} tokenAPI.Output = Output{api: tokenAPI} + tokenAPI.Result = Result{api: tokenAPI} return tokenAPI } diff --git a/tokenize/api_bytemode.go b/tokenize/api_bytemode.go index 196c13f..a004b9a 100644 --- a/tokenize/api_bytemode.go +++ b/tokenize/api_bytemode.go @@ -25,7 +25,10 @@ func (byteMode InputByteMode) PeekMulti(offset int, count int) ([]byte, error) { } func (byteMode InputByteMode) Accept(b byte) { - byteMode.api.Output.AddByte(b) + a := byteMode.api + if a.Output.suspended == 0 { + byteMode.api.Output.AddByte(b) + } byteMode.MoveCursor(b) } @@ -41,7 +44,10 @@ func (byteMode InputByteMode) Accept(b byte) { // After the call, byte offset 0 for PeekByte() and PeekRune() will point at // the first byte after the accepted bytes. func (byteMode InputByteMode) AcceptMulti(bytes ...byte) { - byteMode.api.Output.AddBytes(bytes...) + a := byteMode.api + if a.Output.suspended == 0 { + a.Output.AddBytes(bytes...) + } byteMode.MoveCursorMulti(bytes...) } diff --git a/tokenize/api_input.go b/tokenize/api_input.go index 28b546b..c813ef9 100644 --- a/tokenize/api_input.go +++ b/tokenize/api_input.go @@ -7,7 +7,9 @@ import ( // Input provides input-related functionality for the tokenize API, // which is not specifically bound to a specific read mode (byte, rune). type Input struct { - api *API + api *API + Byte InputByteMode + Rune InputRuneMode } // Cursor returns a string that describes the current read cursor position. diff --git a/tokenize/api_output.go b/tokenize/api_output.go index 4b7d837..a621279 100644 --- a/tokenize/api_output.go +++ b/tokenize/api_output.go @@ -6,7 +6,16 @@ import ( // Output provides output-related functionality for the tokenize API. type Output struct { - api *API + api *API + suspended int // whether or not the output has been suspended (i.e. value > 0) +} + +func (o *Output) Suspend() { + o.suspended++ +} + +func (o *Output) Resume() { + o.suspended-- } func (o Output) Bytes() []byte { @@ -30,23 +39,30 @@ func (o Output) Rune(offset int) rune { func (o Output) Flush() { a := o.api - a.pointers.bytesStart = 0 - a.pointers.bytesEnd = 0 - a.pointers.tokenStart = 0 - a.pointers.tokenEnd = 0 + a.pointers.bytesStart = a.pointers.bytesEnd + a.pointers.tokenStart = a.pointers.tokenEnd } func (o Output) ClearData() { + if o.suspended > 0 { + return + } a := o.api a.pointers.bytesEnd = a.pointers.bytesStart } func (o Output) SetBytes(bytes ...byte) { + if o.suspended > 0 { + return + } o.ClearData() o.AddBytes(bytes...) } func (o Output) AddByte(b byte) { + if o.suspended > 0 { + return + } a := o.api curBytesEnd := a.pointers.bytesEnd a.growOutputData(curBytesEnd + 1) @@ -55,11 +71,17 @@ func (o Output) AddByte(b byte) { } func (o Output) SetRunes(runes ...rune) { + if o.suspended > 0 { + return + } o.ClearData() o.AddRunes(runes...) } func (o Output) AddBytes(bytes ...byte) { + if o.suspended > 0 { + return + } a := o.api curBytesEnd := a.pointers.bytesEnd newBytesEnd := curBytesEnd + len(bytes) @@ -69,6 +91,9 @@ func (o Output) AddBytes(bytes ...byte) { } func (o Output) AddRunes(runes ...rune) { + if o.suspended > 0 { + return + } a := o.api runesAsString := string(runes) newBytesEnd := a.pointers.bytesEnd + len(runesAsString) @@ -78,10 +103,16 @@ func (o Output) AddRunes(runes ...rune) { } func (o Output) AddString(s string) { + if o.suspended > 0 { + return + } o.AddBytes([]byte(s)...) } func (o Output) SetString(s string) { + if o.suspended > 0 { + return + } o.ClearData() o.AddBytes([]byte(s)...) } @@ -102,16 +133,25 @@ func (o Output) TokenValue(offset int) interface{} { } func (o Output) ClearTokens() { + if o.suspended > 0 { + return + } a := o.api a.pointers.tokenEnd = a.pointers.tokenStart } func (o Output) SetTokens(tokens ...Token) { + if o.suspended > 0 { + return + } o.ClearTokens() o.AddTokens(tokens...) } func (o Output) AddToken(token Token) { + if o.suspended > 0 { + return + } a := o.api tokenEnd := a.pointers.tokenEnd a.growOutputTokens(tokenEnd + 1) @@ -120,6 +160,9 @@ func (o Output) AddToken(token Token) { } func (o Output) InsertTokenAtStart(token Token) { + if o.suspended > 0 { + return + } a := o.api tokenEnd := a.pointers.tokenEnd tokenStart := a.pointers.tokenStart @@ -134,6 +177,9 @@ func (o Output) InsertTokenAtStart(token Token) { } func (o Output) AddTokens(tokens ...Token) { + if o.suspended > 0 { + return + } a := o.api a.growOutputTokens(a.pointers.tokenEnd + len(tokens)) for _, t := range tokens { diff --git a/tokenize/api_result.go b/tokenize/api_result.go index 88a2f14..50afa76 100644 --- a/tokenize/api_result.go +++ b/tokenize/api_result.go @@ -1,11 +1,57 @@ package tokenize +import "unicode/utf8" + // Result holds the bytes and tokens as produced by the tokenizer. type Result struct { - Tokens []Token - Bytes []byte + api *API + bytesStart int // the starting point in the API.bytes slice for runes produced by this stack level + bytesEnd int // the end point in the API.bytes slice for runes produced by this stack level + tokenStart int // the starting point in the API.tokens slice for tokens produced by this stack level + tokenEnd int // the end point in the API.tokens slice for tokens produced by this stack level +} + +func (result *Result) Store() { + p := result.api.pointers + result.bytesStart = p.bytesStart + result.bytesEnd = p.bytesEnd + result.tokenStart = p.tokenStart + result.tokenEnd = p.tokenEnd +} + +func (result *Result) Clear() { + result.bytesStart = 0 + result.bytesEnd = 0 + result.tokenStart = 0 + result.tokenEnd = 0 } func (result *Result) String() string { - return string(result.Bytes) + return string(result.api.outputBytes[result.bytesStart:result.bytesEnd]) +} + +func (result *Result) Byte(offset int) byte { + return result.api.outputBytes[result.bytesStart+offset] +} + +func (result *Result) Bytes() []byte { + return result.api.outputBytes[result.bytesStart:result.bytesEnd] +} + +func (result *Result) Rune(offset int) rune { + r, _ := utf8.DecodeRune(result.api.outputBytes[result.bytesStart+offset:]) + return r +} + +func (result *Result) Runes() []rune { + return []rune(result.String()) +} + +func (result *Result) Token(offset int) Token { + a := result.api + return a.outputTokens[result.tokenStart+offset] +} + +func (result *Result) Tokens() []Token { + return result.api.outputTokens[result.tokenStart:result.tokenEnd] } diff --git a/tokenize/api_runemode.go b/tokenize/api_runemode.go index 6d2b7f6..93ab2bd 100644 --- a/tokenize/api_runemode.go +++ b/tokenize/api_runemode.go @@ -43,6 +43,10 @@ func (runeMode InputRuneMode) Peek(offset int) (rune, int, error) { // the first byte after the accepted rune. func (runeMode InputRuneMode) Accept(r rune) { a := runeMode.api + if a.Output.suspended > 0 { + runeMode.MoveCursor(r) + return + } curBytesEnd := a.pointers.bytesEnd maxRequiredBytes := curBytesEnd + utf8.UTFMax a.growOutputData(maxRequiredBytes) @@ -64,10 +68,13 @@ func (runeMode InputRuneMode) Accept(r rune) { // the first byte after the accepted runes. func (runeMode InputRuneMode) AcceptMulti(runes ...rune) { a := runeMode.api + if a.Output.suspended > 0 { + runeMode.MoveCursorMulti(runes...) + return + } curBytesEnd := a.pointers.bytesEnd maxBytes := curBytesEnd + len(runes)*utf8.UTFMax a.growOutputData(maxBytes) - for _, r := range runes { w := utf8.EncodeRune(a.outputBytes[curBytesEnd:], r) curBytesEnd += w diff --git a/tokenize/assertions_test.go b/tokenize/assertions_test.go index ec758f7..41ef0f2 100644 --- a/tokenize/assertions_test.go +++ b/tokenize/assertions_test.go @@ -102,16 +102,17 @@ func AssertTokenMaker(t *testing.T, test TokenMakerT) { if err != nil { t.Errorf("Test %q failed with error: %s", test.Input, err) } else { - if len(result.Tokens) != len(test.Expected) { - t.Errorf("Unexpected number of tokens in output:\nexpected: %d\nactual: %d", len(test.Expected), len(result.Tokens)) + tokens := result.Tokens() + if len(tokens) != len(test.Expected) { + t.Errorf("Unexpected number of tokens in output:\nexpected: %d\nactual: %d", len(test.Expected), len(tokens)) } for i, expected := range test.Expected { - actual := result.Tokens[i] + actual := tokens[i] if expected.Type != actual.Type { - t.Errorf("Unexpected Type in result.Tokens[%d]:\nexpected: (%T) %s\nactual: (%T) %s", i, expected.Type, expected.Type, actual.Type, actual.Type) + t.Errorf("Unexpected Type in result.Tokens, idx %d:\nexpected: (%T) %s\nactual: (%T) %s", i, expected.Type, expected.Type, actual.Type, actual.Type) } if expected.Value != actual.Value { - t.Errorf("Unexpected Value in result.Tokens[%d]:\nexpected: (%T) %s\nactual: (%T) %s", i, expected.Value, expected.Value, actual.Value, actual.Value) + t.Errorf("Unexpected Value in result.Tokens, idx %d:\nexpected: (%T) %s\nactual: (%T) %s", i, expected.Value, expected.Value, actual.Value, actual.Value) } } } diff --git a/tokenize/handler_test.go b/tokenize/handler_test.go index c7058c2..786c8f3 100644 --- a/tokenize/handler_test.go +++ b/tokenize/handler_test.go @@ -62,7 +62,8 @@ func ExampleHandler_SeparatedBy() { csv := t.Int("number", a.Digits).SeparatedBy(a.Comma) r, _ := csv.Match("123,456,7,8,9") - for i, token := range r.Tokens { + tokens := r.Tokens() + for i, token := range tokens { fmt.Printf("[%d] %v\n", i, token) } // Output: diff --git a/tokenize/handlers_builtin.go b/tokenize/handlers_builtin.go index c1c1ce2..91620c3 100644 --- a/tokenize/handlers_builtin.go +++ b/tokenize/handlers_builtin.go @@ -1516,16 +1516,10 @@ func MatchIPv6Net(normalize bool) Handler { // In both cases, it would match the first form. func ModifyDrop(handler Handler) Handler { return func(tokenAPI *API) bool { - runeEnd := tokenAPI.pointers.bytesEnd - tokenEnd := tokenAPI.pointers.tokenEnd - if handler(tokenAPI) { - // We keep offset and cursor updates, but rollback any runes / tokens - // that were added by the handler. - tokenAPI.pointers.bytesEnd = runeEnd - tokenAPI.pointers.tokenEnd = tokenEnd - return true - } - return false + tokenAPI.Output.Suspend() + ok := handler(tokenAPI) + tokenAPI.Output.Resume() + return ok } } diff --git a/tokenize/handlers_builtin_test.go b/tokenize/handlers_builtin_test.go index 21d1886..18c4e5c 100644 --- a/tokenize/handlers_builtin_test.go +++ b/tokenize/handlers_builtin_test.go @@ -459,7 +459,7 @@ func TestTokenGroup_Match(t *testing.T) { api, err := tokenizer("xxxxx") AssertTrue(t, err == nil, "Tokenizer result") - tokens := api.Tokens + tokens := api.Tokens() AssertEqual(t, 1, len(tokens), "Length of tokens slice") contained := tokens[0].Value.([]tokenize.Token) AssertEqual(t, 3, len(contained), "Length of contained tokens") @@ -475,7 +475,7 @@ func TestTokenGroup_Mismatch(t *testing.T) { api, err := tokenizer("12345") AssertTrue(t, err == nil, "Tokenizer result") - tokens := api.Tokens + tokens := api.Tokens() AssertEqual(t, 0, len(tokens), "Length of tokens slice") } diff --git a/tokenize/tokenize.go b/tokenize/tokenize.go index 5647b7a..7d8fa54 100644 --- a/tokenize/tokenize.go +++ b/tokenize/tokenize.go @@ -36,10 +36,8 @@ func New(tokenHandler Handler) Func { err := fmt.Errorf("mismatch at %s", tokenAPI.Input.Cursor()) return nil, err } - result := &Result{ - Bytes: tokenAPI.Output.Bytes(), - Tokens: tokenAPI.Output.Tokens(), - } - return result, nil + + tokenAPI.Result.Store() + return &tokenAPI.Result, nil } } diff --git a/tokenize/tokenizer_test.go b/tokenize/tokenizer_test.go index 328e236..812b219 100644 --- a/tokenize/tokenizer_test.go +++ b/tokenize/tokenizer_test.go @@ -38,9 +38,9 @@ func ExampleNew() { } { // Execute returns a Result and an error, which is nil on success. result, err := tokenizer(input) - if err == nil { - fmt.Printf("Result: %s\n", result.Tokens) + tokens := result.Tokens() + fmt.Printf("Result: %s\n", tokens) } else { fmt.Printf("Error: %s\n", err) }