Made some nice steps, backup and continue!

This commit is contained in:
Maurice Makaay 2019-07-22 15:37:52 +00:00
parent dd1159e309
commit 070e6a13a7
11 changed files with 284 additions and 330 deletions

View File

@ -28,11 +28,13 @@ type API struct {
// will start from the same cursor position.
func (p *API) Peek(tokenHandler tokenize.Handler) bool {
forkedAPI, ok := p.invokeHandler("Peek", tokenHandler)
t := p.tokenAPI
if ok {
p.Result.Tokens = p.tokenAPI.Output.Tokens()
p.Result.Runes = p.tokenAPI.Output.Runes()
r := p.Result
r.Tokens = t.Output.Tokens()
r.Runes = t.Output.Runes()
}
p.tokenAPI.Dispose(forkedAPI)
t.Dispose(forkedAPI)
return ok
}
@ -42,21 +44,25 @@ func (p *API) Peek(tokenHandler tokenize.Handler) bool {
// and the read cursor will stay at the same position.
//
// After calling this method, you can retrieve the results using the Result() method.
// TODO lessen indirection by introducing a := p.tokenAPI (maybe some other parser funcs too?)
// TODO Eh why keep these results all the time? Same for Peek!
func (p *API) Accept(tokenHandler tokenize.Handler) bool {
t := p.tokenAPI
forkedAPI, ok := p.invokeHandler("Accept", tokenHandler)
if ok {
// Keep track of the results.
p.Result.Tokens = p.tokenAPI.Output.Tokens()
p.Result.Runes = p.tokenAPI.Output.Runes()
r := p.Result
r.Tokens = t.Output.Tokens()
r.Runes = t.Output.Runes()
// Merge to the parent level.
p.tokenAPI.Merge(forkedAPI)
p.tokenAPI.Dispose(forkedAPI)
t.Merge(forkedAPI)
t.Dispose(forkedAPI)
// And flush the input reader buffer.
p.tokenAPI.Input.Flush()
t.FlushInput()
} else {
p.tokenAPI.Dispose(forkedAPI)
t.Dispose(forkedAPI)
}
return ok
}
@ -159,7 +165,7 @@ func (p *API) Error(format string, data ...interface{}) {
// No call to p.panicWhenStoppedOrInError(), to allow a parser to
// set a different error message when needed.
message := fmt.Sprintf(format, data...)
p.err = fmt.Errorf("%s at %s", message, p.tokenAPI.Input.Cursor())
p.err = fmt.Errorf("%s at %s", message, p.tokenAPI.Cursor())
}
// ExpectEndOfFile can be used to check if the input is at end of file.
@ -191,7 +197,7 @@ func (p *API) ExpectEndOfFile() {
// • there was an error while reading the input.
func (p *API) Expected(expected string) {
p.panicWhenStoppedOrInError("Expected")
_, err := p.tokenAPI.Input.PeekByte(0)
_, err := p.tokenAPI.Byte.Peek(0)
switch {
case err == nil:
p.Error("unexpected input%s", fmtExpects(expected))

View File

@ -1,12 +1,16 @@
package tokenize
import (
"fmt"
"git.makaay.nl/mauricem/go-parsekit/read"
)
// API holds the internal state of a tokenizer run. A run uses tokenize.Handler
// functions to move the tokenizer forward through the input and to provide
// tokenizer output. The API is used by these tokenize.Handler functions to:
// API holds the internal state of a tokenizer run. A tokenizer run uses'
// tokenize.Handler functions to move the tokenizer forward through the
// input and to provide tokenizer output.
//
// The methods as provided by the API are used by tokenize.Handler functions to:
//
// • access and process runes / bytes from the input data
//
@ -72,8 +76,9 @@ type API struct {
stackFrames []stackFrame // the stack frames, containing stack level-specific data
stackLevel int // the current stack level
stackFrame *stackFrame // the current stack frame
Byte ByteMode // byte-mode operations
Input *Input // provides input-related functionality
reader *read.Buffer // the input data reader
reader *read.Buffer // the buffered input reader
Output *Output // provides output-related functionality
outputTokens []Token // accepted tokens
outputData []byte // accepted data
@ -104,6 +109,7 @@ func NewAPI(input interface{}) *API {
reader: read.New(input),
stackFrames: make([]stackFrame, initialStackDepth),
}
api.Byte = ByteMode{api: api}
api.Input = &Input{api: api}
api.Output = &Output{api: api}
api.stackFrame = &api.stackFrames[0]
@ -237,3 +243,25 @@ func (tokenAPI *API) Dispose(stackLevel int) {
tokenAPI.stackLevel = stackLevel - 1
tokenAPI.stackFrame = &tokenAPI.stackFrames[stackLevel-1]
}
// FlushInput flushes input data from the read.Buffer up to the current
// read offset of the parser.
//
// Note:
// When writing your own TokenHandler, you normally won't have to call this
// method yourself. It is automatically called by parsekit when possible.
func (api *API) FlushInput() bool {
if api.stackFrame.offset > 0 {
api.reader.Flush(api.stackFrame.offset)
api.stackFrame.offset = 0
return true
}
return false
}
func (api *API) Cursor() string {
if api.stackFrame.line == 0 && api.stackFrame.column == 0 {
return fmt.Sprintf("start of file")
}
return fmt.Sprintf("line %d, column %d", api.stackFrame.line+1, api.stackFrame.column+1)
}

92
tokenize/api_byte.go Normal file
View File

@ -0,0 +1,92 @@
package tokenize
// Input provides input-related functionality for the tokenize API.
type ByteMode struct {
api *API
}
// Peek returns the byte at the provided byte offset.
//
// When an error occurs during reading the input, an error will be returned.
// When an offset is requested that is beyond the length of the available input
// data, then the error will be io.EOF.
func (byteMode ByteMode) Peek(offset int) (byte, error) {
a := byteMode.api
return a.reader.ByteAt(a.stackFrame.offset + offset)
}
// Skip is used to skip over one or more bytes that were read from the input.
// This tells the tokenizer: "I've seen these bytes. They are of no interest.
// I will now continue reading after these bytes."
//
// This will merely update the position of the cursor (which keeps track of what
// line and column we are on in the input data). The bytes are not added to
// the output.
//
// After the call, byte offset 0 for PeekByte() and PeekRune() will point at
// the first byte after the skipped bytes.
func (byteMode ByteMode) Skip(b byte) {
f := byteMode.api.stackFrame
f.moveCursorByByte(b)
f.offset++
}
func (byteMode ByteMode) SkipMulti(bytes ...byte) {
f := byteMode.api.stackFrame
for _, b := range bytes {
f.moveCursorByByte(b)
f.offset++
}
}
func (byteMode ByteMode) Accept(b byte) {
byteMode.Append(b)
byteMode.Skip(b)
}
func (byteMode ByteMode) Append(b byte) {
a := byteMode.api
f := a.stackFrame
a.growOutputData(f.bytesEnd + 1)
a.outputData[f.bytesEnd] = b
f.bytesEnd++
}
// AcceptMulti is used to accept one or more bytes that were read from the input.
// This tells the tokenizer: "I've seen these bytes. I want to make use of them
// for the final output, so please remember them for me. I will now continue
// reading after these bytes."
//
// This will update the position of the cursor (which keeps track of what line
// and column we are on in the input data) and add the bytes to the tokenizer
// output.
//
// After the call, byte offset 0 for PeekByte() and PeekRune() will point at
// the first byte after the accepted bytes.
func (byteMode ByteMode) AcceptMulti(bytes ...byte) {
byteMode.AppendMulti(bytes...)
byteMode.SkipMulti(bytes...)
}
func (byteMode ByteMode) AppendMulti(bytes ...byte) {
a := byteMode.api
f := a.stackFrame
curBytesEnd := f.bytesEnd
newBytesEnd := curBytesEnd + len(bytes)
a.growOutputData(newBytesEnd)
copy(a.outputData[curBytesEnd:], bytes)
f.bytesEnd = newBytesEnd
}
func (api *API) dataAddByte(b byte) {
curBytesEnd := api.stackFrame.bytesEnd
api.growOutputData(curBytesEnd + 1)
api.outputData[curBytesEnd] = b
api.stackFrame.bytesEnd++
}
func (api *API) dataSetBytes(bytes ...byte) {
api.dataClear()
api.Byte.AppendMulti(bytes...)
}

View File

@ -1,7 +1,6 @@
package tokenize
import (
"fmt"
"unicode/utf8"
)
@ -10,100 +9,6 @@ type Input struct {
api *API
}
// PeekByte returns the byte at the provided byte offset.
//
// When an error occurs during reading the input, an error will be returned.
// When an offset is requested that is beyond the length of the available input
// data, then the error will be io.EOF.
func (i *Input) PeekByte(offset int) (byte, error) {
return i.api.peekByte(offset)
}
func (api *API) peekByte(offset int) (byte, error) {
return api.reader.ByteAt(api.stackFrame.offset + offset)
}
// SkipByte is used to skip over a single bytes that was read from the input.
// This tells the tokenizer: "I've seen this byte. It is of no interest.
// I will now continue reading after this byte."
//
// This will merely update the position of the cursor (which keeps track of what
// line and column we are on in the input data). The byte is not added to
// the output.
//
// After the call, byte offset 0 for PeekByte() and PeekRune() will point at
// the first byte after the skipped byte.
func (i *Input) SkipByte(b byte) {
i.api.stackFrame.moveCursorByByte(b)
i.api.stackFrame.offset++
}
func (api *API) skipByte(b byte) {
api.stackFrame.moveCursorByByte(b)
api.stackFrame.offset++
}
// SkipBytes is used to skip over one or more bytes that were read from the input.
// This tells the tokenizer: "I've seen these bytes. They are of no interest.
// I will now continue reading after these bytes."
//
// This will merely update the position of the cursor (which keeps track of what
// line and column we are on in the input data). The bytes are not added to
// the output.
//
// After the call, byte offset 0 for PeekByte() and PeekRune() will point at
// the first byte after the skipped bytes.
func (i *Input) SkipBytes(bytes ...byte) {
i.api.skipBytes(bytes...)
}
func (api *API) skipBytes(bytes ...byte) {
for _, b := range bytes {
api.stackFrame.moveCursorByByte(b)
api.stackFrame.offset++
}
}
// AcceptByte is used to accept a single byte that was read from the input.
// This tells the tokenizer: "I've seen this byte. I want to make use of it
// for the final output, so please remember it for me. I will now continue
// reading after this byte."
//
// This will update the position of the cursor (which keeps track of what line
// and column we are on in the input data) and add the byte to the tokenizer
// output.
//
// After the call, byte offset 0 for PeekByte() and PeekRune() will point at
// the first byte after the accepted byte.
func (i *Input) AcceptByte(b byte) {
i.api.acceptByte(b)
}
func (api *API) acceptByte(b byte) {
api.dataAddByte(b)
api.skipByte(b)
}
// AcceptBytes is used to accept one or more bytes that were read from the input.
// This tells the tokenizer: "I've seen these bytes. I want to make use of them
// for the final output, so please remember them for me. I will now continue
// reading after these bytes."
//
// This will update the position of the cursor (which keeps track of what line
// and column we are on in the input data) and add the bytes to the tokenizer
// output.
//
// After the call, byte offset 0 for PeekByte() and PeekRune() will point at
// the first byte after the accepted bytes.
func (i *Input) AcceptBytes(bytes ...byte) {
i.api.acceptBytes(bytes...)
}
func (api *API) acceptBytes(bytes ...byte) {
api.dataAddBytes(bytes...)
api.skipBytes(bytes...)
}
// PeekRune returns the UTF8 rune at the provided byte offset, including its byte width.
//
// The byte width is useful to know what byte offset you'll have to use to peek
@ -184,17 +89,12 @@ func (api *API) acceptRune(r rune) {
curBytesEnd := api.stackFrame.bytesEnd
maxRequiredBytes := curBytesEnd + utf8.UTFMax
// Grow the runes capacity when needed.
if cap(api.outputData) < maxRequiredBytes {
newBytes := make([]byte, maxRequiredBytes*2)
copy(newBytes, api.outputData)
api.outputData = newBytes
}
api.stackFrame.moveCursorByRune(r)
api.growOutputData(maxRequiredBytes)
w := utf8.EncodeRune(api.outputData[curBytesEnd:], r)
api.stackFrame.bytesEnd += w
api.stackFrame.offset += w
api.stackFrame.moveCursorByRune(r)
}
// AcceptRunes is used to accept one or more runes that were read from the input.
@ -218,48 +118,12 @@ func (api *API) acceptRunes(runes ...rune) {
curBytesEnd := api.stackFrame.bytesEnd
newBytesEnd := curBytesEnd + byteLen
// Grow the runes capacity when needed.
if cap(api.outputData) < newBytesEnd {
newBytes := make([]byte, newBytesEnd*2)
copy(newBytes, api.outputData)
api.outputData = newBytes
}
api.growOutputData(newBytesEnd)
copy(api.outputData[curBytesEnd:], runesAsString)
api.stackFrame.bytesEnd = newBytesEnd
api.stackFrame.offset += byteLen
for _, r := range runes {
api.stackFrame.moveCursorByRune(r)
}
copy(api.outputData[curBytesEnd:], runesAsString)
api.stackFrame.bytesEnd = newBytesEnd
api.stackFrame.offset += byteLen
}
// Flush flushes input data from the read.Buffer up to the current
// read offset of the parser.
//
// Note:
// When writing your own TokenHandler, you normally won't have to call this
// method yourself. It is automatically called by parsekit when possible.
func (i *Input) Flush() bool {
return i.api.flushInput()
}
func (api *API) flushInput() bool {
if api.stackFrame.offset > 0 {
api.reader.Flush(api.stackFrame.offset)
api.stackFrame.offset = 0
return true
}
return false
}
func (i *Input) Cursor() string {
return i.api.cursor()
}
func (api *API) cursor() string {
if api.stackFrame.line == 0 && api.stackFrame.column == 0 {
return fmt.Sprintf("start of file")
}
return fmt.Sprintf("line %d, column %d", api.stackFrame.line+1, api.stackFrame.column+1)
}

View File

@ -47,47 +47,12 @@ func (o *Output) SetBytes(bytes ...byte) {
o.api.dataSetBytes(bytes...)
}
func (api *API) dataSetBytes(bytes ...byte) {
api.dataClear()
api.dataAddBytes(bytes...)
}
func (o *Output) AddByte(b byte) {
o.api.dataAddByte(b)
}
func (api *API) dataAddByte(b byte) {
curBytesEnd := api.stackFrame.bytesEnd
newBytesEnd := curBytesEnd + 1
// Grow the bytes capacity when needed.
if cap(api.outputData) < newBytesEnd {
newBytes := make([]byte, newBytesEnd*2)
copy(newBytes, api.outputData)
api.outputData = newBytes
}
api.stackFrame.bytesEnd++
api.outputData[curBytesEnd] = b
}
func (o *Output) AddBytes(bytes ...byte) {
o.api.dataAddBytes(bytes...)
}
func (api *API) dataAddBytes(bytes ...byte) {
curBytesEnd := api.stackFrame.bytesEnd
newBytesEnd := curBytesEnd + len(bytes)
// Grow the runes capacity when needed.
if cap(api.outputData) < newBytesEnd {
newBytes := make([]byte, newBytesEnd*2)
copy(newBytes, api.outputData)
api.outputData = newBytes
}
copy(api.outputData[curBytesEnd:], bytes)
api.stackFrame.bytesEnd = newBytesEnd
o.api.Byte.AppendMulti(bytes...)
}
func (o *Output) SetRunes(runes ...rune) {
@ -104,15 +69,9 @@ func (o *Output) AddRunes(runes ...rune) {
}
func (api *API) dataAddRunes(runes ...rune) {
// Grow the runes capacity when needed.
runesAsString := string(runes)
newBytesEnd := api.stackFrame.bytesEnd + len(runesAsString)
if cap(api.outputData) < newBytesEnd {
newBytes := make([]byte, newBytesEnd*2)
copy(newBytes, api.outputData)
api.outputData = newBytes
}
api.growOutputData(newBytesEnd)
copy(api.outputData[api.stackFrame.bytesEnd:], runesAsString)
api.stackFrame.bytesEnd = newBytesEnd
}
@ -122,7 +81,7 @@ func (o *Output) AddString(s string) {
}
func (api *API) dataAddString(s string) {
api.dataAddBytes([]byte(s)...)
api.Byte.AppendMulti([]byte(s)...)
}
func (o *Output) SetString(s string) {
@ -189,20 +148,25 @@ func (o *Output) AddTokens(tokens ...Token) {
func (api *API) tokensAdd(tokens ...Token) {
// Grow the tokens capacity when needed.
newTokenEnd := api.stackFrame.tokenEnd + len(tokens)
if cap(api.outputTokens) < newTokenEnd {
type Func func(input interface{}) (*Result, error)
// Result holds the runes and tokens as produced by the tokenizer.
type Result struct {
Tokens []Token
Runes []rune
}
newTokens := make([]Token, newTokenEnd*2)
copy(newTokens, api.outputTokens)
api.outputTokens = newTokens
}
api.growOutputTokens(newTokenEnd)
for offset, t := range tokens {
api.outputTokens[api.stackFrame.tokenEnd+offset] = t
}
api.stackFrame.tokenEnd = newTokenEnd
}
func (api *API) growOutputTokens(requiredTokens int) {
if cap(api.outputTokens) < requiredTokens {
newTokens := make([]Token, requiredTokens*2)
copy(newTokens, api.outputTokens)
api.outputTokens = newTokens
}
}
func (api *API) growOutputData(requiredBytes int) {
if cap(api.outputData) < requiredBytes {
newBytes := make([]byte, requiredBytes*2)
copy(newBytes, api.outputData)
api.outputData = newBytes
}
}

View File

@ -148,18 +148,18 @@ func ExampleAPI_Reset() {
api.Input.AcceptRune(r)
r, _, _ = api.Input.PeekRune(0) // read 'e'
api.Input.AcceptRune(r)
fmt.Printf("API results: %q at %s\n", api.Output.String(), api.Input.Cursor())
fmt.Printf("API results: %q at %s\n", api.Output.String(), api.Cursor())
// Reset clears the results.
api.Reset()
fmt.Printf("API results: %q at %s\n", api.Output.String(), api.Input.Cursor())
fmt.Printf("API results: %q at %s\n", api.Output.String(), api.Cursor())
// So then doing the same read operations, the same data are read.
r, _, _ = api.Input.PeekRune(0) // read 'V'
api.Input.AcceptRune(r)
r, _, _ = api.Input.PeekRune(0) // read 'e'
api.Input.AcceptRune(r)
fmt.Printf("API results: %q at %s\n", api.Output.String(), api.Input.Cursor())
fmt.Printf("API results: %q at %s\n", api.Output.String(), api.Cursor())
// Output:
// API results: "Ve" at line 1, column 3
@ -262,7 +262,7 @@ func TestMultipleLevelsOfForksAndMerges(t *testing.T) {
AssertEqual(t, 'c', r, "child4 rune 3")
api.Input.AcceptRune(r)
AssertEqual(t, "c", api.Output.String(), "child4 runes after rune 1")
AssertEqual(t, "line 1, column 4", api.Input.Cursor(), "cursor child4 rune 3")
AssertEqual(t, "line 1, column 4", api.Cursor(), "cursor child4 rune 3")
// Merge "c" from child4 to child3.
api.Merge(child4)
@ -272,7 +272,7 @@ func TestMultipleLevelsOfForksAndMerges(t *testing.T) {
// Child3 should now have the compbined results "abc" from child4's work.
AssertEqual(t, "abc", api.Output.String(), "child3 after merge of child4")
AssertEqual(t, "line 1, column 4", api.Input.Cursor(), "cursor child3 rune 3, after merge of child4")
AssertEqual(t, "line 1, column 4", api.Cursor(), "cursor child3 rune 3, after merge of child4")
// Now read some data from child3.
r, _, _ = api.Input.PeekRune(0)
@ -308,7 +308,7 @@ func TestMultipleLevelsOfForksAndMerges(t *testing.T) {
api.Dispose(child3)
AssertEqual(t, "abcdef", api.Output.String(), "child2 total result after merge of child3")
AssertEqual(t, "line 1, column 7", api.Input.Cursor(), "cursor child2 after merge child3")
AssertEqual(t, "line 1, column 7", api.Cursor(), "cursor child2 after merge child3")
// Merge child2 to child1 and dispose of it.
api.Merge(child2)
@ -328,7 +328,7 @@ func TestMultipleLevelsOfForksAndMerges(t *testing.T) {
api.Input.AcceptRune(r)
AssertEqual(t, "abcdefg", api.Output.String(), "api string end result")
AssertEqual(t, "line 1, column 8", api.Input.Cursor(), "api cursor end result")
AssertEqual(t, "line 1, column 8", api.Cursor(), "api cursor end result")
}
func TestClearData(t *testing.T) {

View File

@ -14,7 +14,7 @@ func TestMoveCursorByBytes(t *testing.T) {
api.stackFrame.moveCursorByByte('a')
api.stackFrame.moveCursorByByte('b')
AssertEqual(t, "line 2, column 3", api.Input.Cursor(), "Cursor position after moving by byte")
AssertEqual(t, "line 2, column 3", api.Cursor(), "Cursor position after moving by byte")
}
func TestMoveCursorByRunes(t *testing.T) {
@ -26,7 +26,7 @@ func TestMoveCursorByRunes(t *testing.T) {
api.stackFrame.moveCursorByRune('\n')
api.stackFrame.moveCursorByRune('ǝ')
AssertEqual(t, "line 2, column 2", api.Input.Cursor(), "Cursor position after moving by rune")
AssertEqual(t, "line 2, column 2", api.Cursor(), "Cursor position after moving by rune")
}
func TestWhenMovingCursor_CursorPositionIsUpdated(t *testing.T) {

View File

@ -350,9 +350,9 @@ var T = struct {
// MatchByte creates a Handler function that matches against the provided byte.
func MatchByte(expected byte) Handler {
return func(t *API) bool {
b, err := t.peekByte(0)
b, err := t.Byte.Peek(0)
if err == nil && b == expected {
t.acceptByte(b)
t.Byte.Accept(b)
return true
}
return false
@ -378,13 +378,13 @@ func MatchRune(expected rune) Handler {
// one of the provided bytes. The first match counts.
func MatchBytes(expected ...byte) Handler {
return func(t *API) bool {
b, err := t.peekByte(0)
b, err := t.Byte.Peek(0)
if err != nil {
return false
}
for _, e := range expected {
if b == e {
t.acceptByte(b)
t.Byte.Accept(b)
return true
}
}
@ -434,9 +434,9 @@ func MatchByteRange(start byte, end byte) Handler {
callerPanic("MatchByteRange", "Handler: {name} definition error at {caller}: start %q must not be < end %q", start, end)
}
return func(t *API) bool {
r, err := t.peekByte(0)
if err == nil && r >= start && r <= end {
t.acceptByte(r)
b, err := t.Byte.Peek(0)
if err == nil && b >= start && b <= end {
t.Byte.Accept(b)
return true
}
return false
@ -471,18 +471,18 @@ func MatchRuneRange(start rune, end rune) Handler {
// a DOS-style newline (CRLF, \r\n) or a UNIX-style newline (just a LF, \n).
func MatchNewline() Handler {
return func(t *API) bool {
b1, err := t.peekByte(0)
b1, err := t.Byte.Peek(0)
if err != nil {
return false
}
if b1 == '\n' {
t.acceptBytes(b1)
t.Byte.AcceptMulti(b1)
return true
}
if b1 == '\r' {
b2, err := t.peekByte(1)
b2, err := t.Byte.Peek(1)
if err == nil && b2 == '\n' {
t.acceptBytes(b1, b2)
t.Byte.AcceptMulti(b1, b2)
return true
}
}
@ -497,9 +497,9 @@ func MatchNewline() Handler {
// newlines, then take a look at MatchWhitespace().
func MatchBlank() Handler {
return func(t *API) bool {
b, err := t.peekByte(0)
b, err := t.Byte.Peek(0)
if err == nil && (b == ' ' || b == '\t') {
t.acceptByte(b)
t.Byte.Accept(b)
return true
}
return false
@ -516,20 +516,20 @@ func MatchBlank() Handler {
func MatchBlanks() Handler {
return func(t *API) bool {
// Match the first blank.
b, err := t.peekByte(0)
b, err := t.Byte.Peek(0)
if err != nil || (b != ' ' && b != '\t') {
return false
}
t.acceptByte(b)
t.Byte.Accept(b)
// Now match any number of followup blanks. We've already got
// a successful match at this point, so we'll always return true at the end.
for {
b, err := t.peekByte(0)
b, err := t.Byte.Peek(0)
if err != nil || (b != ' ' && b != '\t') {
return true
}
t.acceptByte(b)
t.Byte.Accept(b)
}
}
}
@ -540,35 +540,35 @@ func MatchBlanks() Handler {
func MatchWhitespace() Handler {
return func(t *API) bool {
// Match the first whitespace.
b1, err := t.peekByte(0)
b1, err := t.Byte.Peek(0)
if err != nil || (b1 != ' ' && b1 != '\t' && b1 != '\n' && b1 != '\r') {
return false
}
if b1 == '\r' {
b2, err := t.peekByte(1)
b2, err := t.Byte.Peek(1)
if err != nil || b2 != '\n' {
return false
}
t.acceptBytes(b1, b2)
t.Byte.AcceptMulti(b1, b2)
} else {
t.acceptByte(b1)
t.Byte.Accept(b1)
}
// Now match any number of followup whitespace. We've already got
// a successful match at this point, so we'll always return true at the end.
for {
b1, err := t.peekByte(0)
b1, err := t.Byte.Peek(0)
if err != nil || (b1 != ' ' && b1 != '\t' && b1 != '\n' && b1 != '\r') {
return true
}
if b1 == '\r' {
b2, err := t.peekByte(1)
b2, err := t.Byte.Peek(1)
if err != nil || b2 != '\n' {
return true
}
t.acceptBytes(b1, b2)
t.Byte.AcceptMulti(b1, b2)
} else {
t.acceptByte(b1)
t.Byte.Accept(b1)
}
}
}
@ -588,9 +588,9 @@ func MatchUnicodeSpace() Handler {
// so those can be used. E.g. MatchRuneByCallback(unicode.IsLower).
func MatchByteByCallback(callback func(byte) bool) Handler {
return func(t *API) bool {
b, err := t.peekByte(0)
b, err := t.Byte.Peek(0)
if err == nil && callback(b) {
t.acceptByte(b)
t.Byte.Accept(b)
return true
}
return false
@ -617,18 +617,18 @@ func MatchRuneByCallback(callback func(rune) bool) Handler {
// MatchEndOfLine creates a Handler that matches a newline ("\r\n" or "\n") or EOF.
func MatchEndOfLine() Handler {
return func(t *API) bool {
b1, err := t.peekByte(0)
b1, err := t.Byte.Peek(0)
if err != nil {
return err == io.EOF
}
if b1 == '\n' {
t.acceptByte(b1)
t.Byte.Accept(b1)
return true
}
if b1 == '\r' {
b2, _ := t.peekByte(1)
b2, _ := t.Byte.Peek(1)
if b2 == '\n' {
t.acceptBytes(b1, b2)
t.Byte.AcceptMulti(b1, b2)
return true
}
}
@ -644,7 +644,7 @@ func MatchStr(expected string) Handler {
offset := 0
for _, e := range expectedRunes {
if e <= '\x7F' {
b, err := t.peekByte(offset)
b, err := t.Byte.Peek(offset)
if err != nil || b != byte(e) {
return false
}
@ -673,7 +673,7 @@ func MatchStrNoCase(expected string) Handler {
i := 0
for _, e := range expected {
if e <= '\x7F' {
b, err := t.peekByte(width)
b, err := t.Byte.Peek(width)
if err != nil || (b != byte(e) && unicode.ToUpper(rune(b)) != unicode.ToUpper(e)) {
return false
}
@ -941,7 +941,7 @@ func MatchNotFollowedBy(lookAhead Handler, handler Handler) Handler {
func MakeInputFlusher(handler Handler) Handler {
return func(t *API) bool {
if handler(t) {
t.flushInput()
t.FlushInput()
return true
}
return false
@ -956,13 +956,13 @@ func MakeInputFlusher(handler Handler) Handler {
func MatchSigned(handler Handler) Handler {
return func(t *API) bool {
child := t.Fork()
b, err := t.peekByte(0)
b, err := t.Byte.Peek(0)
if err != nil {
t.Dispose(child)
return false
}
if b == '-' || b == '+' {
t.acceptByte(b)
t.Byte.Accept(b)
}
if handler(t) {
t.Merge(child)
@ -1002,7 +1002,7 @@ func MatchIntegerBetween(min int64, max int64) Handler {
func MatchEndOfFile() Handler {
return func(t *API) bool {
child := t.Fork()
_, err := t.peekByte(0)
_, err := t.Byte.Peek(0)
t.Dispose(child)
return err == io.EOF
}
@ -1018,9 +1018,9 @@ func MatchUntilEndOfLine() Handler {
// MatchAnyByte creates a Handler function that accepts any byte from the input.
func MatchAnyByte() Handler {
return func(t *API) bool {
b, err := t.peekByte(0)
b, err := t.Byte.Peek(0)
if err == nil {
t.acceptByte(b)
t.Byte.Accept(b)
return true
}
return false
@ -1078,19 +1078,19 @@ func MatchDigit() Handler {
func MatchDigits() Handler {
return func(t *API) bool {
// Check if the first character is a digit.
b, err := t.peekByte(0)
b, err := t.Byte.Peek(0)
if err != nil || b < '0' || b > '9' {
return false
}
t.acceptByte(b)
t.Byte.Accept(b)
// Continue accepting bytes as long as they are digits.
for {
b, err := t.peekByte(0)
b, err := t.Byte.Peek(0)
if err != nil || b < '0' || b > '9' {
return true
}
t.acceptByte(b)
t.Byte.Accept(b)
}
}
}
@ -1109,7 +1109,7 @@ func MatchDigitNotZero() Handler {
func MatchInteger(normalize bool) Handler {
return func(t *API) bool {
// Check if the first character is a digit.
b, err := t.peekByte(0)
b, err := t.Byte.Peek(0)
if err != nil || b < '0' || b > '9' {
return false
}
@ -1117,33 +1117,33 @@ func MatchInteger(normalize bool) Handler {
// When normalization is requested, drop leading zeroes.
if normalize && b == '0' {
for {
b2, err := t.peekByte(1)
b2, err := t.Byte.Peek(1)
// The next character is a zero, skip the leading zero and check again.
if err == nil && b2 == b {
t.skipByte('0')
t.Byte.Skip('0')
continue
}
// The next character is not a zero, nor a digit at all.
// We're looking at a zero on its own here.
if err != nil || b2 < '1' || b2 > '9' {
t.acceptByte('0')
t.Byte.Accept('0')
return true
}
// The next character is a digit. SKip the leading zero and go with the digit.
t.skipByte('0')
t.acceptByte(b2)
t.Byte.Skip('0')
t.Byte.Accept(b2)
break
}
}
// Continue accepting bytes as long as they are digits.
for {
b, err := t.peekByte(0)
b, err := t.Byte.Peek(0)
if err != nil || b < '0' || b > '9' {
return true
}
t.acceptByte(b)
t.Byte.Accept(b)
}
}
}
@ -1158,7 +1158,7 @@ func MatchInteger(normalize bool) Handler {
func MatchDecimal(normalize bool) Handler {
return func(t *API) bool {
// Check if the first character is a digit.
b, err := t.peekByte(0)
b, err := t.Byte.Peek(0)
if err != nil || b < '0' || b > '9' {
return false
}
@ -1166,58 +1166,58 @@ func MatchDecimal(normalize bool) Handler {
// When normalization is requested, drop leading zeroes.
if normalize && b == '0' {
for {
b2, err := t.peekByte(1)
b2, err := t.Byte.Peek(1)
// The next character is a zero, skip the leading zero and check again.
if err == nil && b2 == b {
t.skipByte('0')
t.Byte.Skip('0')
continue
}
// The next character is a dot, go with the zero before the dot and
// let the upcoming code handle the dot.
if err == nil && b2 == '.' {
t.acceptByte('0')
t.Byte.Accept('0')
break
}
// The next character is not a zero, nor a digit at all.
// We're looking at a zero on its own here.
if err != nil || b2 < '1' || b2 > '9' {
t.acceptByte('0')
t.Byte.Accept('0')
return true
}
// The next character is a digit. SKip the leading zero and go with the digit.
t.skipByte('0')
t.acceptByte(b2)
t.Byte.Skip('0')
t.Byte.Accept(b2)
break
}
}
// Continue accepting bytes as long as they are digits.
for {
b, err = t.peekByte(0)
b, err = t.Byte.Peek(0)
if err != nil || b < '0' || b > '9' {
break
}
t.acceptBytes(b)
t.Byte.AcceptMulti(b)
}
// No dot or no digit after a dot? Then we're done.
if b != '.' {
return true
}
b, err = t.peekByte(1)
b, err = t.Byte.Peek(1)
if err != nil || b < '0' || b > '9' {
return true
}
// Continue accepting bytes as long as they are digits.
t.acceptBytes('.', b)
t.Byte.AcceptMulti('.', b)
for {
b, err = t.peekByte(0)
b, err = t.Byte.Peek(0)
if err != nil || b < '0' || b > '9' {
break
}
t.acceptByte(b)
t.Byte.Accept(b)
}
return true
}
@ -1232,52 +1232,52 @@ func MatchDecimal(normalize bool) Handler {
// False falues: false, FALSE, False, 0, f, F
func MatchBoolean() Handler {
return func(t *API) bool {
b1, err := t.peekByte(0)
b1, err := t.Byte.Peek(0)
if err != nil {
return false
}
if b1 == '1' || b1 == '0' {
t.acceptByte(b1)
t.Byte.Accept(b1)
return true
}
if b1 == 't' || b1 == 'T' {
b2, err := t.peekByte(1)
b2, err := t.Byte.Peek(1)
if err != nil || (b2 != 'R' && b2 != 'r') {
t.acceptByte(b1)
t.Byte.Accept(b1)
return true
}
b3, _ := t.peekByte(2)
b4, err := t.peekByte(3)
b3, _ := t.Byte.Peek(2)
b4, err := t.Byte.Peek(3)
if err == nil && b2 == 'r' && b3 == 'u' && b4 == 'e' {
t.acceptBytes(b1, b2, b3, b4)
t.Byte.AcceptMulti(b1, b2, b3, b4)
return true
}
if err == nil && b1 == 'T' && b2 == 'R' && b3 == 'U' && b4 == 'E' {
t.acceptBytes(b1, b2, b3, b4)
t.Byte.AcceptMulti(b1, b2, b3, b4)
return true
}
t.acceptByte(b1)
t.Byte.Accept(b1)
return true
}
if b1 == 'f' || b1 == 'F' {
b2, err := t.peekByte(1)
b2, err := t.Byte.Peek(1)
if err != nil || (b2 != 'A' && b2 != 'a') {
t.acceptByte(b1)
t.Byte.Accept(b1)
return true
}
b3, _ := t.peekByte(2)
b4, _ := t.peekByte(3)
b5, err := t.peekByte(4)
b3, _ := t.Byte.Peek(2)
b4, _ := t.Byte.Peek(3)
b5, err := t.Byte.Peek(4)
if err == nil && b2 == 'a' && b3 == 'l' && b4 == 's' && b5 == 'e' {
t.acceptBytes(b1, b2, b3, b4, b5)
t.Byte.AcceptMulti(b1, b2, b3, b4, b5)
return true
}
if err == nil && b1 == 'F' && b2 == 'A' && b3 == 'L' && b4 == 'S' && b5 == 'E' {
t.acceptBytes(b1, b2, b3, b4, b5)
t.Byte.AcceptMulti(b1, b2, b3, b4, b5)
return true
}
t.acceptByte(b1)
t.Byte.Accept(b1)
return true
}
return false
@ -1324,9 +1324,9 @@ func MatchUnicodeLower() Handler {
// digit can be read from the input.
func MatchHexDigit() Handler {
return func(t *API) bool {
b, err := t.peekByte(0)
b, err := t.Byte.Peek(0)
if err == nil && ((b >= '0' && b <= '9') || (b >= 'a' && b <= 'f') || (b >= 'A' && b <= 'F')) {
t.acceptByte(b)
t.Byte.Accept(b)
return true
}
return false
@ -1344,28 +1344,28 @@ func MatchHexDigit() Handler {
func MatchOctet(normalize bool) Handler {
return func(t *API) bool {
// Digit 1
b0, err := t.peekByte(0)
b0, err := t.Byte.Peek(0)
if err != nil || b0 < '0' || b0 > '9' {
return false
}
// Digit 2
b1, err := t.peekByte(1)
b1, err := t.Byte.Peek(1)
if err != nil || b1 < '0' || b1 > '9' {
// Output 1-digit octet.
t.acceptByte(b0)
t.Byte.Accept(b0)
return true
}
// Digit 3
b2, err := t.peekByte(2)
b2, err := t.Byte.Peek(2)
if err != nil || b2 < '0' || b2 > '9' {
// Output 2-digit octet.
if normalize && b0 == '0' {
t.skipByte(b0)
t.acceptByte(b1)
t.Byte.Skip(b0)
t.Byte.Accept(b1)
} else {
t.acceptBytes(b0, b1)
t.Byte.AcceptMulti(b0, b1)
}
return true
}
@ -1377,15 +1377,15 @@ func MatchOctet(normalize bool) Handler {
// Output 3-digit octet.
if normalize && b0 == '0' {
t.skipByte(b0)
t.Byte.Skip(b0)
if b1 == '0' {
t.skipByte(b1)
t.Byte.Skip(b1)
} else {
t.acceptByte(b1)
t.Byte.Accept(b1)
}
t.acceptByte(b2)
t.Byte.Accept(b2)
} else {
t.acceptBytes(b0, b1, b2)
t.Byte.AcceptMulti(b0, b1, b2)
}
return true
}
@ -1586,7 +1586,7 @@ func ModifyDrop(handler Handler) Handler {
func ModifyDropUntilEndOfLine() Handler {
return func(t *API) bool {
for {
b, err := t.peekByte(0)
b, err := t.Byte.Peek(0)
if err != nil {
if err == io.EOF {
return true
@ -1596,7 +1596,7 @@ func ModifyDropUntilEndOfLine() Handler {
if b == '\n' {
return true
}
t.skipByte(b)
t.Byte.Skip(b)
}
}
}

View File

@ -43,7 +43,7 @@ func New(tokenHandler Handler) Func {
ok := tokenHandler(api)
if !ok {
err := fmt.Errorf("mismatch at %s", api.cursor())
err := fmt.Errorf("mismatch at %s", api.Cursor())
return nil, err
}
result := &Result{

View File

@ -134,22 +134,22 @@ func TestCallingForkOnForkedParentAPI_Panics(t *testing.T) {
func TestAccept_UpdatesCursor(t *testing.T) {
i := tokenize.NewAPI(strings.NewReader("input\r\nwith\r\nnewlines"))
AssertEqual(t, "start of file", i.Input.Cursor(), "cursor 1")
AssertEqual(t, "start of file", i.Cursor(), "cursor 1")
for j := 0; j < 6; j++ { // read "input\r", cursor end up at "\n"
r, _, _ := i.Input.PeekRune(0)
i.Input.AcceptRune(r)
}
AssertEqual(t, "line 1, column 7", i.Input.Cursor(), "cursor 2")
AssertEqual(t, "line 1, column 7", i.Cursor(), "cursor 2")
r, _, _ := i.Input.PeekRune(0) // read "\n", cursor ends up at start of new line
i.Input.AcceptRune(r)
AssertEqual(t, "line 2, column 1", i.Input.Cursor(), "cursor 3")
AssertEqual(t, "line 2, column 1", i.Cursor(), "cursor 3")
for j := 0; j < 10; j++ { // read "with\r\nnewl", cursor end up at "i"
b, _ := i.Input.PeekByte(0)
i.Input.AcceptByte(b)
b, _ := i.Byte.Peek(0)
i.Byte.Accept(b)
}
AssertEqual(t, "line 3, column 5", i.Input.Cursor(), "cursor 4")
AssertEqual(t, "line 3, column 5", i.Cursor(), "cursor 4")
}
func TestWhenCallingPeekruneAtEndOfFile_EOFIsReturned(t *testing.T) {

View File

@ -63,17 +63,17 @@ func TestFlushInput(t *testing.T) {
// Flushing without any read data is okay. FlushInput() will return
// false in this case, and nothing else happens.
AssertTrue(t, i.Input.Flush() == false, "flush input at start")
AssertTrue(t, i.FlushInput() == false, "flush input at start")
r, _, _ := i.Input.PeekRune(0)
i.Input.AcceptRune(r) // c
r, _, _ = i.Input.PeekRune(0)
i.Input.AcceptRune(r) // o
AssertTrue(t, i.Input.Flush() == true, "flush input after reading some data")
AssertTrue(t, i.FlushInput() == true, "flush input after reading some data")
AssertEqual(t, 0, i.stackFrame.offset, "offset after flush input")
AssertTrue(t, i.Input.Flush() == false, "flush input after flush input")
AssertTrue(t, i.FlushInput() == false, "flush input after flush input")
// Read offset is now zero, but reading should continue after "co".
// The output so far isn't modified, so the following accept calls