From 070e6a13a7894d3aad32293e0f06e14ea029e7a2 Mon Sep 17 00:00:00 2001 From: Maurice Makaay Date: Mon, 22 Jul 2019 15:37:52 +0000 Subject: [PATCH] Made some nice steps, backup and continue! --- parse/api.go | 28 ++-- tokenize/api.go | 36 +++++- tokenize/api_byte.go | 92 +++++++++++++ tokenize/api_input.go | 150 +-------------------- tokenize/api_output.go | 76 +++-------- tokenize/api_test.go | 14 +- tokenize/cursor_test.go | 4 +- tokenize/handlers_builtin.go | 194 ++++++++++++++-------------- tokenize/tokenize.go | 2 +- tokenize/tokenizer_test.go | 12 +- tokenize/tokenizer_whitebox_test.go | 6 +- 11 files changed, 284 insertions(+), 330 deletions(-) create mode 100644 tokenize/api_byte.go diff --git a/parse/api.go b/parse/api.go index 431c7ce..3429332 100644 --- a/parse/api.go +++ b/parse/api.go @@ -28,11 +28,13 @@ type API struct { // will start from the same cursor position. func (p *API) Peek(tokenHandler tokenize.Handler) bool { forkedAPI, ok := p.invokeHandler("Peek", tokenHandler) + t := p.tokenAPI if ok { - p.Result.Tokens = p.tokenAPI.Output.Tokens() - p.Result.Runes = p.tokenAPI.Output.Runes() + r := p.Result + r.Tokens = t.Output.Tokens() + r.Runes = t.Output.Runes() } - p.tokenAPI.Dispose(forkedAPI) + t.Dispose(forkedAPI) return ok } @@ -42,21 +44,25 @@ func (p *API) Peek(tokenHandler tokenize.Handler) bool { // and the read cursor will stay at the same position. // // After calling this method, you can retrieve the results using the Result() method. +// TODO lessen indirection by introducing a := p.tokenAPI (maybe some other parser funcs too?) +// TODO Eh why keep these results all the time? Same for Peek! func (p *API) Accept(tokenHandler tokenize.Handler) bool { + t := p.tokenAPI forkedAPI, ok := p.invokeHandler("Accept", tokenHandler) if ok { // Keep track of the results. - p.Result.Tokens = p.tokenAPI.Output.Tokens() - p.Result.Runes = p.tokenAPI.Output.Runes() + r := p.Result + r.Tokens = t.Output.Tokens() + r.Runes = t.Output.Runes() // Merge to the parent level. - p.tokenAPI.Merge(forkedAPI) - p.tokenAPI.Dispose(forkedAPI) + t.Merge(forkedAPI) + t.Dispose(forkedAPI) // And flush the input reader buffer. - p.tokenAPI.Input.Flush() + t.FlushInput() } else { - p.tokenAPI.Dispose(forkedAPI) + t.Dispose(forkedAPI) } return ok } @@ -159,7 +165,7 @@ func (p *API) Error(format string, data ...interface{}) { // No call to p.panicWhenStoppedOrInError(), to allow a parser to // set a different error message when needed. message := fmt.Sprintf(format, data...) - p.err = fmt.Errorf("%s at %s", message, p.tokenAPI.Input.Cursor()) + p.err = fmt.Errorf("%s at %s", message, p.tokenAPI.Cursor()) } // ExpectEndOfFile can be used to check if the input is at end of file. @@ -191,7 +197,7 @@ func (p *API) ExpectEndOfFile() { // • there was an error while reading the input. func (p *API) Expected(expected string) { p.panicWhenStoppedOrInError("Expected") - _, err := p.tokenAPI.Input.PeekByte(0) + _, err := p.tokenAPI.Byte.Peek(0) switch { case err == nil: p.Error("unexpected input%s", fmtExpects(expected)) diff --git a/tokenize/api.go b/tokenize/api.go index a2a3a0d..7b1d243 100644 --- a/tokenize/api.go +++ b/tokenize/api.go @@ -1,12 +1,16 @@ package tokenize import ( + "fmt" + "git.makaay.nl/mauricem/go-parsekit/read" ) -// API holds the internal state of a tokenizer run. A run uses tokenize.Handler -// functions to move the tokenizer forward through the input and to provide -// tokenizer output. The API is used by these tokenize.Handler functions to: +// API holds the internal state of a tokenizer run. A tokenizer run uses' +// tokenize.Handler functions to move the tokenizer forward through the +// input and to provide tokenizer output. +// +// The methods as provided by the API are used by tokenize.Handler functions to: // // • access and process runes / bytes from the input data // @@ -72,8 +76,9 @@ type API struct { stackFrames []stackFrame // the stack frames, containing stack level-specific data stackLevel int // the current stack level stackFrame *stackFrame // the current stack frame + Byte ByteMode // byte-mode operations Input *Input // provides input-related functionality - reader *read.Buffer // the input data reader + reader *read.Buffer // the buffered input reader Output *Output // provides output-related functionality outputTokens []Token // accepted tokens outputData []byte // accepted data @@ -104,6 +109,7 @@ func NewAPI(input interface{}) *API { reader: read.New(input), stackFrames: make([]stackFrame, initialStackDepth), } + api.Byte = ByteMode{api: api} api.Input = &Input{api: api} api.Output = &Output{api: api} api.stackFrame = &api.stackFrames[0] @@ -237,3 +243,25 @@ func (tokenAPI *API) Dispose(stackLevel int) { tokenAPI.stackLevel = stackLevel - 1 tokenAPI.stackFrame = &tokenAPI.stackFrames[stackLevel-1] } + +// FlushInput flushes input data from the read.Buffer up to the current +// read offset of the parser. +// +// Note: +// When writing your own TokenHandler, you normally won't have to call this +// method yourself. It is automatically called by parsekit when possible. +func (api *API) FlushInput() bool { + if api.stackFrame.offset > 0 { + api.reader.Flush(api.stackFrame.offset) + api.stackFrame.offset = 0 + return true + } + return false +} + +func (api *API) Cursor() string { + if api.stackFrame.line == 0 && api.stackFrame.column == 0 { + return fmt.Sprintf("start of file") + } + return fmt.Sprintf("line %d, column %d", api.stackFrame.line+1, api.stackFrame.column+1) +} diff --git a/tokenize/api_byte.go b/tokenize/api_byte.go new file mode 100644 index 0000000..a0f83b4 --- /dev/null +++ b/tokenize/api_byte.go @@ -0,0 +1,92 @@ +package tokenize + +// Input provides input-related functionality for the tokenize API. +type ByteMode struct { + api *API +} + +// Peek returns the byte at the provided byte offset. +// +// When an error occurs during reading the input, an error will be returned. +// When an offset is requested that is beyond the length of the available input +// data, then the error will be io.EOF. +func (byteMode ByteMode) Peek(offset int) (byte, error) { + a := byteMode.api + return a.reader.ByteAt(a.stackFrame.offset + offset) +} + +// Skip is used to skip over one or more bytes that were read from the input. +// This tells the tokenizer: "I've seen these bytes. They are of no interest. +// I will now continue reading after these bytes." +// +// This will merely update the position of the cursor (which keeps track of what +// line and column we are on in the input data). The bytes are not added to +// the output. +// +// After the call, byte offset 0 for PeekByte() and PeekRune() will point at +// the first byte after the skipped bytes. +func (byteMode ByteMode) Skip(b byte) { + f := byteMode.api.stackFrame + f.moveCursorByByte(b) + f.offset++ +} + +func (byteMode ByteMode) SkipMulti(bytes ...byte) { + f := byteMode.api.stackFrame + for _, b := range bytes { + f.moveCursorByByte(b) + f.offset++ + } +} + +func (byteMode ByteMode) Accept(b byte) { + byteMode.Append(b) + byteMode.Skip(b) +} + +func (byteMode ByteMode) Append(b byte) { + a := byteMode.api + f := a.stackFrame + a.growOutputData(f.bytesEnd + 1) + a.outputData[f.bytesEnd] = b + f.bytesEnd++ +} + +// AcceptMulti is used to accept one or more bytes that were read from the input. +// This tells the tokenizer: "I've seen these bytes. I want to make use of them +// for the final output, so please remember them for me. I will now continue +// reading after these bytes." +// +// This will update the position of the cursor (which keeps track of what line +// and column we are on in the input data) and add the bytes to the tokenizer +// output. +// +// After the call, byte offset 0 for PeekByte() and PeekRune() will point at +// the first byte after the accepted bytes. +func (byteMode ByteMode) AcceptMulti(bytes ...byte) { + byteMode.AppendMulti(bytes...) + byteMode.SkipMulti(bytes...) +} + +func (byteMode ByteMode) AppendMulti(bytes ...byte) { + a := byteMode.api + f := a.stackFrame + curBytesEnd := f.bytesEnd + newBytesEnd := curBytesEnd + len(bytes) + + a.growOutputData(newBytesEnd) + copy(a.outputData[curBytesEnd:], bytes) + f.bytesEnd = newBytesEnd +} + +func (api *API) dataAddByte(b byte) { + curBytesEnd := api.stackFrame.bytesEnd + api.growOutputData(curBytesEnd + 1) + api.outputData[curBytesEnd] = b + api.stackFrame.bytesEnd++ +} + +func (api *API) dataSetBytes(bytes ...byte) { + api.dataClear() + api.Byte.AppendMulti(bytes...) +} diff --git a/tokenize/api_input.go b/tokenize/api_input.go index cec1c09..a725a9c 100644 --- a/tokenize/api_input.go +++ b/tokenize/api_input.go @@ -1,7 +1,6 @@ package tokenize import ( - "fmt" "unicode/utf8" ) @@ -10,100 +9,6 @@ type Input struct { api *API } -// PeekByte returns the byte at the provided byte offset. -// -// When an error occurs during reading the input, an error will be returned. -// When an offset is requested that is beyond the length of the available input -// data, then the error will be io.EOF. -func (i *Input) PeekByte(offset int) (byte, error) { - return i.api.peekByte(offset) -} - -func (api *API) peekByte(offset int) (byte, error) { - return api.reader.ByteAt(api.stackFrame.offset + offset) -} - -// SkipByte is used to skip over a single bytes that was read from the input. -// This tells the tokenizer: "I've seen this byte. It is of no interest. -// I will now continue reading after this byte." -// -// This will merely update the position of the cursor (which keeps track of what -// line and column we are on in the input data). The byte is not added to -// the output. -// -// After the call, byte offset 0 for PeekByte() and PeekRune() will point at -// the first byte after the skipped byte. -func (i *Input) SkipByte(b byte) { - i.api.stackFrame.moveCursorByByte(b) - i.api.stackFrame.offset++ -} - -func (api *API) skipByte(b byte) { - api.stackFrame.moveCursorByByte(b) - api.stackFrame.offset++ -} - -// SkipBytes is used to skip over one or more bytes that were read from the input. -// This tells the tokenizer: "I've seen these bytes. They are of no interest. -// I will now continue reading after these bytes." -// -// This will merely update the position of the cursor (which keeps track of what -// line and column we are on in the input data). The bytes are not added to -// the output. -// -// After the call, byte offset 0 for PeekByte() and PeekRune() will point at -// the first byte after the skipped bytes. -func (i *Input) SkipBytes(bytes ...byte) { - i.api.skipBytes(bytes...) -} - -func (api *API) skipBytes(bytes ...byte) { - for _, b := range bytes { - api.stackFrame.moveCursorByByte(b) - api.stackFrame.offset++ - } -} - -// AcceptByte is used to accept a single byte that was read from the input. -// This tells the tokenizer: "I've seen this byte. I want to make use of it -// for the final output, so please remember it for me. I will now continue -// reading after this byte." -// -// This will update the position of the cursor (which keeps track of what line -// and column we are on in the input data) and add the byte to the tokenizer -// output. -// -// After the call, byte offset 0 for PeekByte() and PeekRune() will point at -// the first byte after the accepted byte. -func (i *Input) AcceptByte(b byte) { - i.api.acceptByte(b) -} - -func (api *API) acceptByte(b byte) { - api.dataAddByte(b) - api.skipByte(b) -} - -// AcceptBytes is used to accept one or more bytes that were read from the input. -// This tells the tokenizer: "I've seen these bytes. I want to make use of them -// for the final output, so please remember them for me. I will now continue -// reading after these bytes." -// -// This will update the position of the cursor (which keeps track of what line -// and column we are on in the input data) and add the bytes to the tokenizer -// output. -// -// After the call, byte offset 0 for PeekByte() and PeekRune() will point at -// the first byte after the accepted bytes. -func (i *Input) AcceptBytes(bytes ...byte) { - i.api.acceptBytes(bytes...) -} - -func (api *API) acceptBytes(bytes ...byte) { - api.dataAddBytes(bytes...) - api.skipBytes(bytes...) -} - // PeekRune returns the UTF8 rune at the provided byte offset, including its byte width. // // The byte width is useful to know what byte offset you'll have to use to peek @@ -184,17 +89,12 @@ func (api *API) acceptRune(r rune) { curBytesEnd := api.stackFrame.bytesEnd maxRequiredBytes := curBytesEnd + utf8.UTFMax - // Grow the runes capacity when needed. - if cap(api.outputData) < maxRequiredBytes { - newBytes := make([]byte, maxRequiredBytes*2) - copy(newBytes, api.outputData) - api.outputData = newBytes - } - - api.stackFrame.moveCursorByRune(r) + api.growOutputData(maxRequiredBytes) w := utf8.EncodeRune(api.outputData[curBytesEnd:], r) api.stackFrame.bytesEnd += w api.stackFrame.offset += w + + api.stackFrame.moveCursorByRune(r) } // AcceptRunes is used to accept one or more runes that were read from the input. @@ -218,48 +118,12 @@ func (api *API) acceptRunes(runes ...rune) { curBytesEnd := api.stackFrame.bytesEnd newBytesEnd := curBytesEnd + byteLen - // Grow the runes capacity when needed. - if cap(api.outputData) < newBytesEnd { - newBytes := make([]byte, newBytesEnd*2) - copy(newBytes, api.outputData) - api.outputData = newBytes - } + api.growOutputData(newBytesEnd) + copy(api.outputData[curBytesEnd:], runesAsString) + api.stackFrame.bytesEnd = newBytesEnd + api.stackFrame.offset += byteLen for _, r := range runes { api.stackFrame.moveCursorByRune(r) } - copy(api.outputData[curBytesEnd:], runesAsString) - - api.stackFrame.bytesEnd = newBytesEnd - api.stackFrame.offset += byteLen -} - -// Flush flushes input data from the read.Buffer up to the current -// read offset of the parser. -// -// Note: -// When writing your own TokenHandler, you normally won't have to call this -// method yourself. It is automatically called by parsekit when possible. -func (i *Input) Flush() bool { - return i.api.flushInput() -} - -func (api *API) flushInput() bool { - if api.stackFrame.offset > 0 { - api.reader.Flush(api.stackFrame.offset) - api.stackFrame.offset = 0 - return true - } - return false -} - -func (i *Input) Cursor() string { - return i.api.cursor() -} - -func (api *API) cursor() string { - if api.stackFrame.line == 0 && api.stackFrame.column == 0 { - return fmt.Sprintf("start of file") - } - return fmt.Sprintf("line %d, column %d", api.stackFrame.line+1, api.stackFrame.column+1) } diff --git a/tokenize/api_output.go b/tokenize/api_output.go index 38cb471..3908c9c 100644 --- a/tokenize/api_output.go +++ b/tokenize/api_output.go @@ -47,47 +47,12 @@ func (o *Output) SetBytes(bytes ...byte) { o.api.dataSetBytes(bytes...) } -func (api *API) dataSetBytes(bytes ...byte) { - api.dataClear() - api.dataAddBytes(bytes...) -} - func (o *Output) AddByte(b byte) { o.api.dataAddByte(b) } -func (api *API) dataAddByte(b byte) { - curBytesEnd := api.stackFrame.bytesEnd - newBytesEnd := curBytesEnd + 1 - - // Grow the bytes capacity when needed. - if cap(api.outputData) < newBytesEnd { - newBytes := make([]byte, newBytesEnd*2) - copy(newBytes, api.outputData) - api.outputData = newBytes - } - - api.stackFrame.bytesEnd++ - api.outputData[curBytesEnd] = b -} - func (o *Output) AddBytes(bytes ...byte) { - o.api.dataAddBytes(bytes...) -} - -func (api *API) dataAddBytes(bytes ...byte) { - curBytesEnd := api.stackFrame.bytesEnd - newBytesEnd := curBytesEnd + len(bytes) - - // Grow the runes capacity when needed. - if cap(api.outputData) < newBytesEnd { - newBytes := make([]byte, newBytesEnd*2) - copy(newBytes, api.outputData) - api.outputData = newBytes - } - - copy(api.outputData[curBytesEnd:], bytes) - api.stackFrame.bytesEnd = newBytesEnd + o.api.Byte.AppendMulti(bytes...) } func (o *Output) SetRunes(runes ...rune) { @@ -104,15 +69,9 @@ func (o *Output) AddRunes(runes ...rune) { } func (api *API) dataAddRunes(runes ...rune) { - // Grow the runes capacity when needed. runesAsString := string(runes) newBytesEnd := api.stackFrame.bytesEnd + len(runesAsString) - if cap(api.outputData) < newBytesEnd { - newBytes := make([]byte, newBytesEnd*2) - copy(newBytes, api.outputData) - api.outputData = newBytes - } - + api.growOutputData(newBytesEnd) copy(api.outputData[api.stackFrame.bytesEnd:], runesAsString) api.stackFrame.bytesEnd = newBytesEnd } @@ -122,7 +81,7 @@ func (o *Output) AddString(s string) { } func (api *API) dataAddString(s string) { - api.dataAddBytes([]byte(s)...) + api.Byte.AppendMulti([]byte(s)...) } func (o *Output) SetString(s string) { @@ -189,20 +148,25 @@ func (o *Output) AddTokens(tokens ...Token) { func (api *API) tokensAdd(tokens ...Token) { // Grow the tokens capacity when needed. newTokenEnd := api.stackFrame.tokenEnd + len(tokens) - if cap(api.outputTokens) < newTokenEnd { - type Func func(input interface{}) (*Result, error) - - // Result holds the runes and tokens as produced by the tokenizer. - type Result struct { - Tokens []Token - Runes []rune - } - newTokens := make([]Token, newTokenEnd*2) - copy(newTokens, api.outputTokens) - api.outputTokens = newTokens - } + api.growOutputTokens(newTokenEnd) for offset, t := range tokens { api.outputTokens[api.stackFrame.tokenEnd+offset] = t } api.stackFrame.tokenEnd = newTokenEnd } + +func (api *API) growOutputTokens(requiredTokens int) { + if cap(api.outputTokens) < requiredTokens { + newTokens := make([]Token, requiredTokens*2) + copy(newTokens, api.outputTokens) + api.outputTokens = newTokens + } +} + +func (api *API) growOutputData(requiredBytes int) { + if cap(api.outputData) < requiredBytes { + newBytes := make([]byte, requiredBytes*2) + copy(newBytes, api.outputData) + api.outputData = newBytes + } +} diff --git a/tokenize/api_test.go b/tokenize/api_test.go index d8830f4..adad8df 100644 --- a/tokenize/api_test.go +++ b/tokenize/api_test.go @@ -148,18 +148,18 @@ func ExampleAPI_Reset() { api.Input.AcceptRune(r) r, _, _ = api.Input.PeekRune(0) // read 'e' api.Input.AcceptRune(r) - fmt.Printf("API results: %q at %s\n", api.Output.String(), api.Input.Cursor()) + fmt.Printf("API results: %q at %s\n", api.Output.String(), api.Cursor()) // Reset clears the results. api.Reset() - fmt.Printf("API results: %q at %s\n", api.Output.String(), api.Input.Cursor()) + fmt.Printf("API results: %q at %s\n", api.Output.String(), api.Cursor()) // So then doing the same read operations, the same data are read. r, _, _ = api.Input.PeekRune(0) // read 'V' api.Input.AcceptRune(r) r, _, _ = api.Input.PeekRune(0) // read 'e' api.Input.AcceptRune(r) - fmt.Printf("API results: %q at %s\n", api.Output.String(), api.Input.Cursor()) + fmt.Printf("API results: %q at %s\n", api.Output.String(), api.Cursor()) // Output: // API results: "Ve" at line 1, column 3 @@ -262,7 +262,7 @@ func TestMultipleLevelsOfForksAndMerges(t *testing.T) { AssertEqual(t, 'c', r, "child4 rune 3") api.Input.AcceptRune(r) AssertEqual(t, "c", api.Output.String(), "child4 runes after rune 1") - AssertEqual(t, "line 1, column 4", api.Input.Cursor(), "cursor child4 rune 3") + AssertEqual(t, "line 1, column 4", api.Cursor(), "cursor child4 rune 3") // Merge "c" from child4 to child3. api.Merge(child4) @@ -272,7 +272,7 @@ func TestMultipleLevelsOfForksAndMerges(t *testing.T) { // Child3 should now have the compbined results "abc" from child4's work. AssertEqual(t, "abc", api.Output.String(), "child3 after merge of child4") - AssertEqual(t, "line 1, column 4", api.Input.Cursor(), "cursor child3 rune 3, after merge of child4") + AssertEqual(t, "line 1, column 4", api.Cursor(), "cursor child3 rune 3, after merge of child4") // Now read some data from child3. r, _, _ = api.Input.PeekRune(0) @@ -308,7 +308,7 @@ func TestMultipleLevelsOfForksAndMerges(t *testing.T) { api.Dispose(child3) AssertEqual(t, "abcdef", api.Output.String(), "child2 total result after merge of child3") - AssertEqual(t, "line 1, column 7", api.Input.Cursor(), "cursor child2 after merge child3") + AssertEqual(t, "line 1, column 7", api.Cursor(), "cursor child2 after merge child3") // Merge child2 to child1 and dispose of it. api.Merge(child2) @@ -328,7 +328,7 @@ func TestMultipleLevelsOfForksAndMerges(t *testing.T) { api.Input.AcceptRune(r) AssertEqual(t, "abcdefg", api.Output.String(), "api string end result") - AssertEqual(t, "line 1, column 8", api.Input.Cursor(), "api cursor end result") + AssertEqual(t, "line 1, column 8", api.Cursor(), "api cursor end result") } func TestClearData(t *testing.T) { diff --git a/tokenize/cursor_test.go b/tokenize/cursor_test.go index b19e9d3..971fd98 100644 --- a/tokenize/cursor_test.go +++ b/tokenize/cursor_test.go @@ -14,7 +14,7 @@ func TestMoveCursorByBytes(t *testing.T) { api.stackFrame.moveCursorByByte('a') api.stackFrame.moveCursorByByte('b') - AssertEqual(t, "line 2, column 3", api.Input.Cursor(), "Cursor position after moving by byte") + AssertEqual(t, "line 2, column 3", api.Cursor(), "Cursor position after moving by byte") } func TestMoveCursorByRunes(t *testing.T) { @@ -26,7 +26,7 @@ func TestMoveCursorByRunes(t *testing.T) { api.stackFrame.moveCursorByRune('\n') api.stackFrame.moveCursorByRune('ǝ') - AssertEqual(t, "line 2, column 2", api.Input.Cursor(), "Cursor position after moving by rune") + AssertEqual(t, "line 2, column 2", api.Cursor(), "Cursor position after moving by rune") } func TestWhenMovingCursor_CursorPositionIsUpdated(t *testing.T) { diff --git a/tokenize/handlers_builtin.go b/tokenize/handlers_builtin.go index 0c29923..d76f111 100644 --- a/tokenize/handlers_builtin.go +++ b/tokenize/handlers_builtin.go @@ -350,9 +350,9 @@ var T = struct { // MatchByte creates a Handler function that matches against the provided byte. func MatchByte(expected byte) Handler { return func(t *API) bool { - b, err := t.peekByte(0) + b, err := t.Byte.Peek(0) if err == nil && b == expected { - t.acceptByte(b) + t.Byte.Accept(b) return true } return false @@ -378,13 +378,13 @@ func MatchRune(expected rune) Handler { // one of the provided bytes. The first match counts. func MatchBytes(expected ...byte) Handler { return func(t *API) bool { - b, err := t.peekByte(0) + b, err := t.Byte.Peek(0) if err != nil { return false } for _, e := range expected { if b == e { - t.acceptByte(b) + t.Byte.Accept(b) return true } } @@ -434,9 +434,9 @@ func MatchByteRange(start byte, end byte) Handler { callerPanic("MatchByteRange", "Handler: {name} definition error at {caller}: start %q must not be < end %q", start, end) } return func(t *API) bool { - r, err := t.peekByte(0) - if err == nil && r >= start && r <= end { - t.acceptByte(r) + b, err := t.Byte.Peek(0) + if err == nil && b >= start && b <= end { + t.Byte.Accept(b) return true } return false @@ -471,18 +471,18 @@ func MatchRuneRange(start rune, end rune) Handler { // a DOS-style newline (CRLF, \r\n) or a UNIX-style newline (just a LF, \n). func MatchNewline() Handler { return func(t *API) bool { - b1, err := t.peekByte(0) + b1, err := t.Byte.Peek(0) if err != nil { return false } if b1 == '\n' { - t.acceptBytes(b1) + t.Byte.AcceptMulti(b1) return true } if b1 == '\r' { - b2, err := t.peekByte(1) + b2, err := t.Byte.Peek(1) if err == nil && b2 == '\n' { - t.acceptBytes(b1, b2) + t.Byte.AcceptMulti(b1, b2) return true } } @@ -497,9 +497,9 @@ func MatchNewline() Handler { // newlines, then take a look at MatchWhitespace(). func MatchBlank() Handler { return func(t *API) bool { - b, err := t.peekByte(0) + b, err := t.Byte.Peek(0) if err == nil && (b == ' ' || b == '\t') { - t.acceptByte(b) + t.Byte.Accept(b) return true } return false @@ -516,20 +516,20 @@ func MatchBlank() Handler { func MatchBlanks() Handler { return func(t *API) bool { // Match the first blank. - b, err := t.peekByte(0) + b, err := t.Byte.Peek(0) if err != nil || (b != ' ' && b != '\t') { return false } - t.acceptByte(b) + t.Byte.Accept(b) // Now match any number of followup blanks. We've already got // a successful match at this point, so we'll always return true at the end. for { - b, err := t.peekByte(0) + b, err := t.Byte.Peek(0) if err != nil || (b != ' ' && b != '\t') { return true } - t.acceptByte(b) + t.Byte.Accept(b) } } } @@ -540,35 +540,35 @@ func MatchBlanks() Handler { func MatchWhitespace() Handler { return func(t *API) bool { // Match the first whitespace. - b1, err := t.peekByte(0) + b1, err := t.Byte.Peek(0) if err != nil || (b1 != ' ' && b1 != '\t' && b1 != '\n' && b1 != '\r') { return false } if b1 == '\r' { - b2, err := t.peekByte(1) + b2, err := t.Byte.Peek(1) if err != nil || b2 != '\n' { return false } - t.acceptBytes(b1, b2) + t.Byte.AcceptMulti(b1, b2) } else { - t.acceptByte(b1) + t.Byte.Accept(b1) } // Now match any number of followup whitespace. We've already got // a successful match at this point, so we'll always return true at the end. for { - b1, err := t.peekByte(0) + b1, err := t.Byte.Peek(0) if err != nil || (b1 != ' ' && b1 != '\t' && b1 != '\n' && b1 != '\r') { return true } if b1 == '\r' { - b2, err := t.peekByte(1) + b2, err := t.Byte.Peek(1) if err != nil || b2 != '\n' { return true } - t.acceptBytes(b1, b2) + t.Byte.AcceptMulti(b1, b2) } else { - t.acceptByte(b1) + t.Byte.Accept(b1) } } } @@ -588,9 +588,9 @@ func MatchUnicodeSpace() Handler { // so those can be used. E.g. MatchRuneByCallback(unicode.IsLower). func MatchByteByCallback(callback func(byte) bool) Handler { return func(t *API) bool { - b, err := t.peekByte(0) + b, err := t.Byte.Peek(0) if err == nil && callback(b) { - t.acceptByte(b) + t.Byte.Accept(b) return true } return false @@ -617,18 +617,18 @@ func MatchRuneByCallback(callback func(rune) bool) Handler { // MatchEndOfLine creates a Handler that matches a newline ("\r\n" or "\n") or EOF. func MatchEndOfLine() Handler { return func(t *API) bool { - b1, err := t.peekByte(0) + b1, err := t.Byte.Peek(0) if err != nil { return err == io.EOF } if b1 == '\n' { - t.acceptByte(b1) + t.Byte.Accept(b1) return true } if b1 == '\r' { - b2, _ := t.peekByte(1) + b2, _ := t.Byte.Peek(1) if b2 == '\n' { - t.acceptBytes(b1, b2) + t.Byte.AcceptMulti(b1, b2) return true } } @@ -644,7 +644,7 @@ func MatchStr(expected string) Handler { offset := 0 for _, e := range expectedRunes { if e <= '\x7F' { - b, err := t.peekByte(offset) + b, err := t.Byte.Peek(offset) if err != nil || b != byte(e) { return false } @@ -673,7 +673,7 @@ func MatchStrNoCase(expected string) Handler { i := 0 for _, e := range expected { if e <= '\x7F' { - b, err := t.peekByte(width) + b, err := t.Byte.Peek(width) if err != nil || (b != byte(e) && unicode.ToUpper(rune(b)) != unicode.ToUpper(e)) { return false } @@ -941,7 +941,7 @@ func MatchNotFollowedBy(lookAhead Handler, handler Handler) Handler { func MakeInputFlusher(handler Handler) Handler { return func(t *API) bool { if handler(t) { - t.flushInput() + t.FlushInput() return true } return false @@ -956,13 +956,13 @@ func MakeInputFlusher(handler Handler) Handler { func MatchSigned(handler Handler) Handler { return func(t *API) bool { child := t.Fork() - b, err := t.peekByte(0) + b, err := t.Byte.Peek(0) if err != nil { t.Dispose(child) return false } if b == '-' || b == '+' { - t.acceptByte(b) + t.Byte.Accept(b) } if handler(t) { t.Merge(child) @@ -1002,7 +1002,7 @@ func MatchIntegerBetween(min int64, max int64) Handler { func MatchEndOfFile() Handler { return func(t *API) bool { child := t.Fork() - _, err := t.peekByte(0) + _, err := t.Byte.Peek(0) t.Dispose(child) return err == io.EOF } @@ -1018,9 +1018,9 @@ func MatchUntilEndOfLine() Handler { // MatchAnyByte creates a Handler function that accepts any byte from the input. func MatchAnyByte() Handler { return func(t *API) bool { - b, err := t.peekByte(0) + b, err := t.Byte.Peek(0) if err == nil { - t.acceptByte(b) + t.Byte.Accept(b) return true } return false @@ -1078,19 +1078,19 @@ func MatchDigit() Handler { func MatchDigits() Handler { return func(t *API) bool { // Check if the first character is a digit. - b, err := t.peekByte(0) + b, err := t.Byte.Peek(0) if err != nil || b < '0' || b > '9' { return false } - t.acceptByte(b) + t.Byte.Accept(b) // Continue accepting bytes as long as they are digits. for { - b, err := t.peekByte(0) + b, err := t.Byte.Peek(0) if err != nil || b < '0' || b > '9' { return true } - t.acceptByte(b) + t.Byte.Accept(b) } } } @@ -1109,7 +1109,7 @@ func MatchDigitNotZero() Handler { func MatchInteger(normalize bool) Handler { return func(t *API) bool { // Check if the first character is a digit. - b, err := t.peekByte(0) + b, err := t.Byte.Peek(0) if err != nil || b < '0' || b > '9' { return false } @@ -1117,33 +1117,33 @@ func MatchInteger(normalize bool) Handler { // When normalization is requested, drop leading zeroes. if normalize && b == '0' { for { - b2, err := t.peekByte(1) + b2, err := t.Byte.Peek(1) // The next character is a zero, skip the leading zero and check again. if err == nil && b2 == b { - t.skipByte('0') + t.Byte.Skip('0') continue } // The next character is not a zero, nor a digit at all. // We're looking at a zero on its own here. if err != nil || b2 < '1' || b2 > '9' { - t.acceptByte('0') + t.Byte.Accept('0') return true } // The next character is a digit. SKip the leading zero and go with the digit. - t.skipByte('0') - t.acceptByte(b2) + t.Byte.Skip('0') + t.Byte.Accept(b2) break } } // Continue accepting bytes as long as they are digits. for { - b, err := t.peekByte(0) + b, err := t.Byte.Peek(0) if err != nil || b < '0' || b > '9' { return true } - t.acceptByte(b) + t.Byte.Accept(b) } } } @@ -1158,7 +1158,7 @@ func MatchInteger(normalize bool) Handler { func MatchDecimal(normalize bool) Handler { return func(t *API) bool { // Check if the first character is a digit. - b, err := t.peekByte(0) + b, err := t.Byte.Peek(0) if err != nil || b < '0' || b > '9' { return false } @@ -1166,58 +1166,58 @@ func MatchDecimal(normalize bool) Handler { // When normalization is requested, drop leading zeroes. if normalize && b == '0' { for { - b2, err := t.peekByte(1) + b2, err := t.Byte.Peek(1) // The next character is a zero, skip the leading zero and check again. if err == nil && b2 == b { - t.skipByte('0') + t.Byte.Skip('0') continue } // The next character is a dot, go with the zero before the dot and // let the upcoming code handle the dot. if err == nil && b2 == '.' { - t.acceptByte('0') + t.Byte.Accept('0') break } // The next character is not a zero, nor a digit at all. // We're looking at a zero on its own here. if err != nil || b2 < '1' || b2 > '9' { - t.acceptByte('0') + t.Byte.Accept('0') return true } // The next character is a digit. SKip the leading zero and go with the digit. - t.skipByte('0') - t.acceptByte(b2) + t.Byte.Skip('0') + t.Byte.Accept(b2) break } } // Continue accepting bytes as long as they are digits. for { - b, err = t.peekByte(0) + b, err = t.Byte.Peek(0) if err != nil || b < '0' || b > '9' { break } - t.acceptBytes(b) + t.Byte.AcceptMulti(b) } // No dot or no digit after a dot? Then we're done. if b != '.' { return true } - b, err = t.peekByte(1) + b, err = t.Byte.Peek(1) if err != nil || b < '0' || b > '9' { return true } // Continue accepting bytes as long as they are digits. - t.acceptBytes('.', b) + t.Byte.AcceptMulti('.', b) for { - b, err = t.peekByte(0) + b, err = t.Byte.Peek(0) if err != nil || b < '0' || b > '9' { break } - t.acceptByte(b) + t.Byte.Accept(b) } return true } @@ -1232,52 +1232,52 @@ func MatchDecimal(normalize bool) Handler { // False falues: false, FALSE, False, 0, f, F func MatchBoolean() Handler { return func(t *API) bool { - b1, err := t.peekByte(0) + b1, err := t.Byte.Peek(0) if err != nil { return false } if b1 == '1' || b1 == '0' { - t.acceptByte(b1) + t.Byte.Accept(b1) return true } if b1 == 't' || b1 == 'T' { - b2, err := t.peekByte(1) + b2, err := t.Byte.Peek(1) if err != nil || (b2 != 'R' && b2 != 'r') { - t.acceptByte(b1) + t.Byte.Accept(b1) return true } - b3, _ := t.peekByte(2) - b4, err := t.peekByte(3) + b3, _ := t.Byte.Peek(2) + b4, err := t.Byte.Peek(3) if err == nil && b2 == 'r' && b3 == 'u' && b4 == 'e' { - t.acceptBytes(b1, b2, b3, b4) + t.Byte.AcceptMulti(b1, b2, b3, b4) return true } if err == nil && b1 == 'T' && b2 == 'R' && b3 == 'U' && b4 == 'E' { - t.acceptBytes(b1, b2, b3, b4) + t.Byte.AcceptMulti(b1, b2, b3, b4) return true } - t.acceptByte(b1) + t.Byte.Accept(b1) return true } if b1 == 'f' || b1 == 'F' { - b2, err := t.peekByte(1) + b2, err := t.Byte.Peek(1) if err != nil || (b2 != 'A' && b2 != 'a') { - t.acceptByte(b1) + t.Byte.Accept(b1) return true } - b3, _ := t.peekByte(2) - b4, _ := t.peekByte(3) - b5, err := t.peekByte(4) + b3, _ := t.Byte.Peek(2) + b4, _ := t.Byte.Peek(3) + b5, err := t.Byte.Peek(4) if err == nil && b2 == 'a' && b3 == 'l' && b4 == 's' && b5 == 'e' { - t.acceptBytes(b1, b2, b3, b4, b5) + t.Byte.AcceptMulti(b1, b2, b3, b4, b5) return true } if err == nil && b1 == 'F' && b2 == 'A' && b3 == 'L' && b4 == 'S' && b5 == 'E' { - t.acceptBytes(b1, b2, b3, b4, b5) + t.Byte.AcceptMulti(b1, b2, b3, b4, b5) return true } - t.acceptByte(b1) + t.Byte.Accept(b1) return true } return false @@ -1324,9 +1324,9 @@ func MatchUnicodeLower() Handler { // digit can be read from the input. func MatchHexDigit() Handler { return func(t *API) bool { - b, err := t.peekByte(0) + b, err := t.Byte.Peek(0) if err == nil && ((b >= '0' && b <= '9') || (b >= 'a' && b <= 'f') || (b >= 'A' && b <= 'F')) { - t.acceptByte(b) + t.Byte.Accept(b) return true } return false @@ -1344,28 +1344,28 @@ func MatchHexDigit() Handler { func MatchOctet(normalize bool) Handler { return func(t *API) bool { // Digit 1 - b0, err := t.peekByte(0) + b0, err := t.Byte.Peek(0) if err != nil || b0 < '0' || b0 > '9' { return false } // Digit 2 - b1, err := t.peekByte(1) + b1, err := t.Byte.Peek(1) if err != nil || b1 < '0' || b1 > '9' { // Output 1-digit octet. - t.acceptByte(b0) + t.Byte.Accept(b0) return true } // Digit 3 - b2, err := t.peekByte(2) + b2, err := t.Byte.Peek(2) if err != nil || b2 < '0' || b2 > '9' { // Output 2-digit octet. if normalize && b0 == '0' { - t.skipByte(b0) - t.acceptByte(b1) + t.Byte.Skip(b0) + t.Byte.Accept(b1) } else { - t.acceptBytes(b0, b1) + t.Byte.AcceptMulti(b0, b1) } return true } @@ -1377,15 +1377,15 @@ func MatchOctet(normalize bool) Handler { // Output 3-digit octet. if normalize && b0 == '0' { - t.skipByte(b0) + t.Byte.Skip(b0) if b1 == '0' { - t.skipByte(b1) + t.Byte.Skip(b1) } else { - t.acceptByte(b1) + t.Byte.Accept(b1) } - t.acceptByte(b2) + t.Byte.Accept(b2) } else { - t.acceptBytes(b0, b1, b2) + t.Byte.AcceptMulti(b0, b1, b2) } return true } @@ -1586,7 +1586,7 @@ func ModifyDrop(handler Handler) Handler { func ModifyDropUntilEndOfLine() Handler { return func(t *API) bool { for { - b, err := t.peekByte(0) + b, err := t.Byte.Peek(0) if err != nil { if err == io.EOF { return true @@ -1596,7 +1596,7 @@ func ModifyDropUntilEndOfLine() Handler { if b == '\n' { return true } - t.skipByte(b) + t.Byte.Skip(b) } } } diff --git a/tokenize/tokenize.go b/tokenize/tokenize.go index 18fe65f..96fc366 100644 --- a/tokenize/tokenize.go +++ b/tokenize/tokenize.go @@ -43,7 +43,7 @@ func New(tokenHandler Handler) Func { ok := tokenHandler(api) if !ok { - err := fmt.Errorf("mismatch at %s", api.cursor()) + err := fmt.Errorf("mismatch at %s", api.Cursor()) return nil, err } result := &Result{ diff --git a/tokenize/tokenizer_test.go b/tokenize/tokenizer_test.go index 630315b..1d8a095 100644 --- a/tokenize/tokenizer_test.go +++ b/tokenize/tokenizer_test.go @@ -134,22 +134,22 @@ func TestCallingForkOnForkedParentAPI_Panics(t *testing.T) { func TestAccept_UpdatesCursor(t *testing.T) { i := tokenize.NewAPI(strings.NewReader("input\r\nwith\r\nnewlines")) - AssertEqual(t, "start of file", i.Input.Cursor(), "cursor 1") + AssertEqual(t, "start of file", i.Cursor(), "cursor 1") for j := 0; j < 6; j++ { // read "input\r", cursor end up at "\n" r, _, _ := i.Input.PeekRune(0) i.Input.AcceptRune(r) } - AssertEqual(t, "line 1, column 7", i.Input.Cursor(), "cursor 2") + AssertEqual(t, "line 1, column 7", i.Cursor(), "cursor 2") r, _, _ := i.Input.PeekRune(0) // read "\n", cursor ends up at start of new line i.Input.AcceptRune(r) - AssertEqual(t, "line 2, column 1", i.Input.Cursor(), "cursor 3") + AssertEqual(t, "line 2, column 1", i.Cursor(), "cursor 3") for j := 0; j < 10; j++ { // read "with\r\nnewl", cursor end up at "i" - b, _ := i.Input.PeekByte(0) - i.Input.AcceptByte(b) + b, _ := i.Byte.Peek(0) + i.Byte.Accept(b) } - AssertEqual(t, "line 3, column 5", i.Input.Cursor(), "cursor 4") + AssertEqual(t, "line 3, column 5", i.Cursor(), "cursor 4") } func TestWhenCallingPeekruneAtEndOfFile_EOFIsReturned(t *testing.T) { diff --git a/tokenize/tokenizer_whitebox_test.go b/tokenize/tokenizer_whitebox_test.go index 0adb12e..abd97d8 100644 --- a/tokenize/tokenizer_whitebox_test.go +++ b/tokenize/tokenizer_whitebox_test.go @@ -63,17 +63,17 @@ func TestFlushInput(t *testing.T) { // Flushing without any read data is okay. FlushInput() will return // false in this case, and nothing else happens. - AssertTrue(t, i.Input.Flush() == false, "flush input at start") + AssertTrue(t, i.FlushInput() == false, "flush input at start") r, _, _ := i.Input.PeekRune(0) i.Input.AcceptRune(r) // c r, _, _ = i.Input.PeekRune(0) i.Input.AcceptRune(r) // o - AssertTrue(t, i.Input.Flush() == true, "flush input after reading some data") + AssertTrue(t, i.FlushInput() == true, "flush input after reading some data") AssertEqual(t, 0, i.stackFrame.offset, "offset after flush input") - AssertTrue(t, i.Input.Flush() == false, "flush input after flush input") + AssertTrue(t, i.FlushInput() == false, "flush input after flush input") // Read offset is now zero, but reading should continue after "co". // The output so far isn't modified, so the following accept calls