Fixing some naming inconsistencies.

This commit is contained in:
Maurice Makaay 2019-07-23 17:55:13 +00:00
parent a968f22d45
commit 7037c6d24a
14 changed files with 750 additions and 700 deletions

View File

@ -22,64 +22,99 @@ type API struct {
stopped bool // a boolean set to true by Stop() stopped bool // a boolean set to true by Stop()
} }
// Peek checks if the upcoming input data matches the provided tokenize.Handler. // PeekWithResult checks if the upcoming input data matches the provided tokenize.Handler.
// If it does, then true will be returned, false otherwise. The read cursor // If it does, then true will be returned, false otherwise. The read cursor
// will be kept at the same position, so the next call to Peek() or Accept() // will be kept at the same position, so the next call to Peek() or Accept()
// will start from the same cursor position. // will start from the same cursor position.
func (p *API) PeekWithResult(tokenHandler tokenize.Handler) bool { //
forkedAPI, ok := p.invokeHandler("Peek", tokenHandler) // On a successful peek, the results (data + tokens) are returned by the peek.
t := p.tokenAPI // They are availablel (as with Accept()) through parse.API.Result.
func (parseAPI *API) PeekWithResult(tokenHandler tokenize.Handler) bool {
forkedAPI, ok := parseAPI.invokeHandler("Peek", tokenHandler)
t := parseAPI.tokenAPI
if ok { if ok {
p.Result.Tokens = t.Output.Tokens() parseAPI.Result.Tokens = t.Output.Tokens()
p.Result.Runes = t.Output.Runes() parseAPI.Result.Runes = t.Output.Runes()
} }
t.Dispose(forkedAPI) t.Dispose(forkedAPI)
return ok return ok
} }
func (p *API) Peek(tokenHandler tokenize.Handler) bool { // Peek checks if the upcoming input data matches the provided tokenize.Handler.
forkedAPI, ok := p.invokeHandler("Peek", tokenHandler) // If it does, then true will be returned, false otherwise. The read cursor
t := p.tokenAPI // will be kept at the same position, so the next call to Peek() or Accept()
p.Result.Tokens = nil // will start from the same cursor position.
p.Result.Runes = nil //
// No results (data + tokens) are returned by Peek(). If want access to the data
// through parse.API.Result, make use of PeekWithResult() instead.
func (parseAPI *API) Peek(tokenHandler tokenize.Handler) bool {
forkedAPI, ok := parseAPI.invokeHandler("Peek", tokenHandler)
t := parseAPI.tokenAPI
parseAPI.Result.Tokens = nil
parseAPI.Result.Runes = nil
t.Dispose(forkedAPI) t.Dispose(forkedAPI)
return ok return ok
} }
// PeekChars is a very lightweight peek command, which takes a look at one or
// more upcoming characters on the input data.
//
// If you need more complex logic for checking the upcoming input data, then
// make use of the Peek() method with a tokenize.Handler function instead.
func (parseAPI *API) PeekChars(chars ...rune) bool {
offset := 0
for _, r := range chars {
if r <= 0x1F {
b, err := parseAPI.tokenAPI.Byte.Peek(offset)
if err != nil || b != byte(r) {
return false
}
offset++
} else {
rRead, w, err := parseAPI.tokenAPI.Rune.Peek(offset)
if err != nil || rRead != r {
return false
}
offset += w
}
}
return true
}
// Accept checks if the upcoming input data matches the provided tokenize.Handler. // Accept checks if the upcoming input data matches the provided tokenize.Handler.
// If it does, then true will be returned and the read cursor will be moved // If it does, then true will be returned and the read cursor will be moved
// forward to beyond the match that was found. Otherwise false will be // forward to beyond the match that was found. Otherwise false will be
// and the read cursor will stay at the same position. // and the read cursor will stay at the same position.
// //
// After calling this method, you can retrieve the results using the Result() method. // After calling this method, you can retrieve the results using the Result() method.
func (p *API) Accept(tokenHandler tokenize.Handler) bool { func (parseAPI *API) Accept(tokenHandler tokenize.Handler) bool {
t := p.tokenAPI t := parseAPI.tokenAPI
forkedAPI, ok := p.invokeHandler("Accept", tokenHandler) forkedAPI, ok := parseAPI.invokeHandler("Accept", tokenHandler)
if ok { if ok {
// Keep track of the results as produced by this child. // Keep track of the results as produced by this child.
p.Result.Tokens = t.Output.Tokens() parseAPI.Result.Tokens = t.Output.Tokens()
p.Result.Runes = t.Output.Runes() parseAPI.Result.Runes = t.Output.Runes()
// Merge to the parent level. // Merge to the parent level.
t.Merge(forkedAPI) t.Merge(forkedAPI)
t.Dispose(forkedAPI) t.Dispose(forkedAPI)
// And flush the input reader buffer. // And flush the input reader buffer.
t.FlushInput() t.Input.Flush()
} else { } else {
t.Dispose(forkedAPI) t.Dispose(forkedAPI)
} }
return ok return ok
} }
func (p *API) invokeHandler(name string, tokenHandler tokenize.Handler) (int, bool) { func (parseAPI *API) invokeHandler(name string, tokenHandler tokenize.Handler) (int, bool) {
p.panicWhenStoppedOrInError(name) parseAPI.panicWhenStoppedOrInError(name)
if tokenHandler == nil { if tokenHandler == nil {
callerPanic(name, "parsekit.parse.API.{name}(): {name}() called with nil tokenHandler argument at {caller}") callerPanic(name, "parsekit.parse.API.{name}(): {name}() called with nil tokenHandler argument at {caller}")
} }
child := p.tokenAPI.Fork() child := parseAPI.tokenAPI.Fork()
ok := tokenHandler(p.tokenAPI) ok := tokenHandler(parseAPI.tokenAPI)
return child, ok return child, ok
} }
@ -91,13 +126,13 @@ func (p *API) invokeHandler(name string, tokenHandler tokenize.Handler) (int, bo
// Basically, this guard helps with proper coding of parsers, making sure // Basically, this guard helps with proper coding of parsers, making sure
// that clean routes are followed. You can consider this check a runtime // that clean routes are followed. You can consider this check a runtime
// unit test. // unit test.
func (p *API) panicWhenStoppedOrInError(name string) { func (parseAPI *API) panicWhenStoppedOrInError(name string) {
if !p.IsStoppedOrInError() { if !parseAPI.IsStoppedOrInError() {
return return
} }
after := "Error()" after := "Error()"
if p.stopped { if parseAPI.stopped {
after = "Stop()" after = "Stop()"
} }
callerPanic(name, "parsekit.parse.API.{name}(): Illegal call to {name}() at {caller}: "+ callerPanic(name, "parsekit.parse.API.{name}(): Illegal call to {name}() at {caller}: "+
@ -107,8 +142,8 @@ func (p *API) panicWhenStoppedOrInError(name string) {
// IsStoppedOrInError checks if the parser has stopped or if an error was set. // IsStoppedOrInError checks if the parser has stopped or if an error was set.
// When true, then the parser can no longer continue. If your parser tries to // When true, then the parser can no longer continue. If your parser tries to
// call parse.API methods when true is returned, this will result in a panic. // call parse.API methods when true is returned, this will result in a panic.
func (p *API) IsStoppedOrInError() bool { func (parseAPI *API) IsStoppedOrInError() bool {
return p.stopped || p.err != nil return parseAPI.stopped || parseAPI.err != nil
} }
// Handle executes other parse.Handler functions from within the active // Handle executes other parse.Handler functions from within the active
@ -118,27 +153,21 @@ func (p *API) IsStoppedOrInError() bool {
// It will be false when either an error was set using Error(), or the // It will be false when either an error was set using Error(), or the
// parser was stopped using Stop(). // parser was stopped using Stop().
// //
// When multiple parse.Handler functions are provided as arguments, they
// will be executed in the provided order. When one of those handlers stops
// the parser or sets an error, then the following handlers will not be called.
//
// Instead of calling another handler using this method, you can also call // Instead of calling another handler using this method, you can also call
// that other handler directly. However, it is generally advised to make use // that other handler directly. However, it is generally advised to make use
// of this method, because it performs some sanity checks and it will return // of this method, because it performs some sanity checks and it will return
// an easy to use boolean indicating whether the parser can continue or not. // an easy to use boolean indicating whether the parser can continue or not.
func (p *API) Handle(parseHandler ...Handler) bool { func (parseAPI *API) Handle(handler Handler) bool {
p.panicWhenStoppedOrInError("Handle") parseAPI.panicWhenStoppedOrInError("Handle")
for _, handler := range parseHandler { parseAPI.panicWhenHandlerNil("Handle", handler)
p.panicWhenHandlerNil("Handle", handler) handler(parseAPI)
handler(p) if parseAPI.IsStoppedOrInError() {
if p.IsStoppedOrInError() {
return false return false
} }
}
return true return true
} }
func (p *API) panicWhenHandlerNil(name string, parseHandler Handler) { func (parseAPI *API) panicWhenHandlerNil(name string, parseHandler Handler) {
if parseHandler == nil { if parseHandler == nil {
callerPanic(name, "parsekit.parse.API.{name}(): {name}() called with nil input at {caller}") callerPanic(name, "parsekit.parse.API.{name}(): {name}() called with nil input at {caller}")
} }
@ -157,8 +186,8 @@ func (p *API) panicWhenHandlerNil(name string, parseHandler Handler) {
// //
// After stopping, no more calls to API methods are allowed. // After stopping, no more calls to API methods are allowed.
// Calling a method in this state will result in a panic. // Calling a method in this state will result in a panic.
func (p *API) Stop() { func (parseAPI *API) Stop() {
p.stopped = true parseAPI.stopped = true
} }
// Error sets the error message in the API. // Error sets the error message in the API.
@ -166,11 +195,11 @@ func (p *API) Stop() {
// After setting an error, no more calls to API methods are allowed. // After setting an error, no more calls to API methods are allowed.
// Calling a method in this state will result in a panic. // Calling a method in this state will result in a panic.
// TODO ... wait how do I read the error? I don't I guess, I just return it. Is Error() a good name or SetError() better for example? // TODO ... wait how do I read the error? I don't I guess, I just return it. Is Error() a good name or SetError() better for example?
func (p *API) Error(format string, data ...interface{}) { func (parseAPI *API) Error(format string, data ...interface{}) {
// No call to p.panicWhenStoppedOrInError(), to allow a parser to // No call to p.panicWhenStoppedOrInError(), to allow a parser to
// set a different error message when needed. // set a different error message when needed.
message := fmt.Sprintf(format, data...) message := fmt.Sprintf(format, data...)
p.err = fmt.Errorf("%s at %s", message, p.tokenAPI.Cursor()) parseAPI.err = fmt.Errorf("%s at %s", message, parseAPI.tokenAPI.Input.Cursor())
} }
// ExpectEndOfFile can be used to check if the input is at end of file. // ExpectEndOfFile can be used to check if the input is at end of file.
@ -178,12 +207,12 @@ func (p *API) Error(format string, data ...interface{}) {
// When it finds that the end of the file was indeed reached, then the parser // When it finds that the end of the file was indeed reached, then the parser
// will be stopped through Stop(). Otherwise, the unexpected input is reported // will be stopped through Stop(). Otherwise, the unexpected input is reported
// using Expected("end of file"). // using Expected("end of file").
func (p *API) ExpectEndOfFile() { func (parseAPI *API) ExpectEndOfFile() {
p.panicWhenStoppedOrInError("ExpectEndofFile") parseAPI.panicWhenStoppedOrInError("ExpectEndofFile")
if p.Peek(tokenize.A.EndOfFile) { if parseAPI.Peek(tokenize.A.EndOfFile) {
p.Stop() parseAPI.Stop()
} else { } else {
p.Expected("end of file") parseAPI.Expected("end of file")
} }
} }
@ -200,16 +229,16 @@ func (p *API) ExpectEndOfFile() {
// • the end of the input was reached // • the end of the input was reached
// //
// • there was an error while reading the input. // • there was an error while reading the input.
func (p *API) Expected(expected string) { func (parseAPI *API) Expected(expected string) {
p.panicWhenStoppedOrInError("Expected") parseAPI.panicWhenStoppedOrInError("Expected")
_, err := p.tokenAPI.Byte.Peek(0) _, err := parseAPI.tokenAPI.Byte.Peek(0)
switch { switch {
case err == nil: case err == nil:
p.Error("unexpected input%s", fmtExpects(expected)) parseAPI.Error("unexpected input%s", fmtExpects(expected))
case err == io.EOF: case err == io.EOF:
p.Error("unexpected end of file%s", fmtExpects(expected)) parseAPI.Error("unexpected end of file%s", fmtExpects(expected))
default: default:
p.Error("unexpected error '%s'%s", err, fmtExpects(expected)) parseAPI.Error("unexpected error '%s'%s", err, fmtExpects(expected))
} }
} }

View File

@ -1,8 +1,6 @@
package tokenize package tokenize
import ( import (
"fmt"
"git.makaay.nl/mauricem/go-parsekit/read" "git.makaay.nl/mauricem/go-parsekit/read"
) )
@ -76,12 +74,15 @@ type API struct {
stackFrames []stackFrame // the stack frames, containing stack level-specific dat stackFrames []stackFrame // the stack frames, containing stack level-specific dat
stackLevel int // the current stack level stackLevel int // the current stack level
stackFrame *stackFrame // the current stack frame stackFrame *stackFrame // the current stack frame
reader *read.Buffer // the buffered input reader reader *read.Buffer // the buffered input reader
Byte ByteMode // access to a set of byte-based input methods Input Input // provides input-related functionality
Rune RuneMode // access to a set of rune-based input methods Byte InputByteMode // access to a set of byte-based input methods
Rune InputRuneMode // access to a set of rune-based input methods
Output Output // provides output-related functionality Output Output // provides output-related functionality
outputTokens []Token // accepted tokens outputTokens []Token // accepted tokens
outputData []byte // accepted data outputBytes []byte // accepted bytes
} }
type stackFrame struct { type stackFrame struct {
@ -106,16 +107,19 @@ const initialByteStoreLength = 1024
// for parsekit.read.New(). // for parsekit.read.New().
func NewAPI(input interface{}) *API { func NewAPI(input interface{}) *API {
reader := read.New(input) reader := read.New(input)
api := &API{ tokenAPI := &API{
stackFrames: make([]stackFrame, initialStackDepth), stackFrames: make([]stackFrame, initialStackDepth),
outputBytes: make([]byte, initialByteStoreLength),
outputTokens: make([]Token, initialTokenStoreLength),
reader: reader, reader: reader,
} }
api.Byte = ByteMode{api: api, reader: reader} tokenAPI.Input = Input{api: tokenAPI, reader: reader}
api.Rune = RuneMode{api: api, reader: reader} tokenAPI.Byte = InputByteMode{api: tokenAPI, reader: reader}
api.Output = Output{api: api} tokenAPI.Rune = InputRuneMode{api: tokenAPI, reader: reader}
api.stackFrame = &api.stackFrames[0] tokenAPI.Output = Output{api: tokenAPI}
tokenAPI.stackFrame = &tokenAPI.stackFrames[0]
return api return tokenAPI
} }
// Fork forks off a child of the API struct. It will reuse the same // Fork forks off a child of the API struct. It will reuse the same
@ -214,14 +218,14 @@ func (tokenAPI *API) Merge(stackLevel int) {
// Reset moves the read cursor back to the beginning for the currently active API child. // Reset moves the read cursor back to the beginning for the currently active API child.
// Aditionally, all output (bytes and tokens) that was emitted from the API child is // Aditionally, all output (bytes and tokens) that was emitted from the API child is
// cleared as well. // cleared as well.
func (api *API) Reset() { func (tokenAPI *API) Reset() {
f := api.stackFrame f := tokenAPI.stackFrame
if api.stackLevel == 0 { if tokenAPI.stackLevel == 0 {
f.column = 0 f.column = 0
f.line = 0 f.line = 0
f.offset = 0 f.offset = 0
} else { } else {
parent := api.stackFrames[api.stackLevel-1] parent := tokenAPI.stackFrames[tokenAPI.stackLevel-1]
f.column = parent.column f.column = parent.column
f.line = parent.line f.line = parent.line
f.offset = parent.offset f.offset = parent.offset
@ -245,25 +249,3 @@ func (tokenAPI *API) Dispose(stackLevel int) {
tokenAPI.stackLevel = stackLevel - 1 tokenAPI.stackLevel = stackLevel - 1
tokenAPI.stackFrame = &tokenAPI.stackFrames[stackLevel-1] tokenAPI.stackFrame = &tokenAPI.stackFrames[stackLevel-1]
} }
// FlushInput flushes input data from the read.Buffer up to the current
// read offset of the parser.
//
// Note:
// When writing your own TokenHandler, you normally won't have to call this
// method yourself. It is automatically called by parsekit when possible.
func (api *API) FlushInput() bool {
if api.stackFrame.offset > 0 {
api.reader.Flush(api.stackFrame.offset)
api.stackFrame.offset = 0
return true
}
return false
}
func (api *API) Cursor() string {
if api.stackFrame.line == 0 && api.stackFrame.column == 0 {
return fmt.Sprintf("start of file")
}
return fmt.Sprintf("line %d, column %d", api.stackFrame.line+1, api.stackFrame.column+1)
}

View File

@ -2,8 +2,8 @@ package tokenize
import "git.makaay.nl/mauricem/go-parsekit/read" import "git.makaay.nl/mauricem/go-parsekit/read"
// ByteMode provides byte-driven input/output functionality for the tokenize API. // InputByteMode provides byte-driven input/output functionality for the tokenize API.
type ByteMode struct { type InputByteMode struct {
api *API api *API
reader *read.Buffer // the buffered input reader reader *read.Buffer // the buffered input reader
} }
@ -13,11 +13,11 @@ type ByteMode struct {
// When an error occurs during reading the input, an error will be returned. // When an error occurs during reading the input, an error will be returned.
// When an offset is requested that is beyond the length of the available input // When an offset is requested that is beyond the length of the available input
// data, then the error will be io.EOF. // data, then the error will be io.EOF.
func (byteMode ByteMode) Peek(offset int) (byte, error) { func (byteMode InputByteMode) Peek(offset int) (byte, error) {
return byteMode.reader.ByteAt(byteMode.api.stackFrame.offset + offset) return byteMode.reader.ByteAt(byteMode.api.stackFrame.offset + offset)
} }
func (byteMode ByteMode) Accept(b byte) { func (byteMode InputByteMode) Accept(b byte) {
byteMode.api.Output.AddByte(b) byteMode.api.Output.AddByte(b)
byteMode.MoveCursor(b) byteMode.MoveCursor(b)
} }
@ -33,7 +33,7 @@ func (byteMode ByteMode) Accept(b byte) {
// //
// After the call, byte offset 0 for PeekByte() and PeekRune() will point at // After the call, byte offset 0 for PeekByte() and PeekRune() will point at
// the first byte after the accepted bytes. // the first byte after the accepted bytes.
func (byteMode ByteMode) AcceptMulti(bytes ...byte) { func (byteMode InputByteMode) AcceptMulti(bytes ...byte) {
byteMode.api.Output.AddBytes(bytes...) byteMode.api.Output.AddBytes(bytes...)
byteMode.MoveCursorMulti(bytes...) byteMode.MoveCursorMulti(bytes...)
} }
@ -44,7 +44,7 @@ func (byteMode ByteMode) AcceptMulti(bytes ...byte) {
// //
// After the call, byte offset 0 for Peek() and PeekMulti() will point at // After the call, byte offset 0 for Peek() and PeekMulti() will point at
// the first byte at the new cursor position. // the first byte at the new cursor position.
func (byteMode ByteMode) MoveCursor(b byte) { func (byteMode InputByteMode) MoveCursor(b byte) {
f := byteMode.api.stackFrame f := byteMode.api.stackFrame
if b == '\n' { if b == '\n' {
f.column = 0 f.column = 0
@ -62,7 +62,7 @@ func (byteMode ByteMode) MoveCursor(b byte) {
// //
// After the call, byte offset 0 for Peek() and PeekMulti() will point at // After the call, byte offset 0 for Peek() and PeekMulti() will point at
// the first byte at the new cursor position. // the first byte at the new cursor position.
func (byteMode ByteMode) MoveCursorMulti(bytes ...byte) { func (byteMode InputByteMode) MoveCursorMulti(bytes ...byte) {
for _, b := range bytes { for _, b := range bytes {
byteMode.MoveCursor(b) byteMode.MoveCursor(b)
} }

39
tokenize/api_input.go Normal file
View File

@ -0,0 +1,39 @@
package tokenize
import (
"fmt"
"git.makaay.nl/mauricem/go-parsekit/read"
)
// Input provides input-related functionality for the tokenize API,
// which is not specifically bound to a specific read mode (byte, rune).
type Input struct {
api *API
reader *read.Buffer // the buffered input reader
}
// Cursor returns a string that describes the current read cursor position.
func (i Input) Cursor() string {
f := i.api.stackFrame
if f.line == 0 && f.column == 0 {
return fmt.Sprintf("start of file")
}
return fmt.Sprintf("line %d, column %d", f.line+1, f.column+1)
}
// Flush flushes input data from the read buffer up to the current
// read cursor position of the tokenizer.
//
// Note: in most cases, you won't have to call this method yourself.
// Parsekit will call this method at points where it knows it is a
// safe thing to do.
func (i Input) Flush() bool {
f := i.api.stackFrame
if f.offset > 0 {
i.reader.Flush(f.offset)
f.offset = 0
return true
}
return false
}

View File

@ -0,0 +1,65 @@
package tokenize
import (
"testing"
)
func TestMoveCursorByBytes(t *testing.T) {
tokenAPI := NewAPI("")
tokenAPI.Byte.MoveCursor('a')
tokenAPI.Byte.MoveCursor('b')
tokenAPI.Byte.MoveCursor('c')
tokenAPI.Byte.MoveCursor('\r')
tokenAPI.Byte.MoveCursor('\n')
tokenAPI.Byte.MoveCursor('a')
tokenAPI.Byte.MoveCursor('b')
AssertEqual(t, "line 2, column 3", tokenAPI.Input.Cursor(), "Cursor position after moving by byte")
AssertEqual(t, 7, tokenAPI.stackFrame.offset, "Offset after moving by byte")
}
func TestMoveCursorByRunes(t *testing.T) {
tokenAPI := NewAPI("")
tokenAPI.Rune.MoveCursor('ɹ')
tokenAPI.Rune.MoveCursor('n')
tokenAPI.Rune.MoveCursor('u')
tokenAPI.Rune.MoveCursor('\r')
tokenAPI.Rune.MoveCursor('\n')
tokenAPI.Rune.MoveCursor('ǝ')
AssertEqual(t, "line 2, column 2", tokenAPI.Input.Cursor(), "Cursor position after moving by rune")
AssertEqual(t, 8, tokenAPI.stackFrame.offset, "Offset after moving by rune")
}
func TestWhenMovingCursor_CursorPositionIsUpdated(t *testing.T) {
for _, test := range []struct {
name string
input []string
byte int
rune int
line int
column int
}{
{"No input at all", []string{""}, 0, 0, 0, 0},
{"One ASCII char", []string{"a"}, 1, 1, 0, 1},
{"Multiple ASCII chars", []string{"abc"}, 3, 3, 0, 3},
{"One newline", []string{"\n"}, 1, 1, 1, 0},
{"Carriage return", []string{"\r\r\r"}, 3, 3, 0, 3},
{"One UTF8 3 byte char", []string{"⌘"}, 3, 1, 0, 1},
{"Mixture", []string{"Hello\n\npretty\nW⌘O⌘R⌘L⌘D"}, 31, 23, 3, 9},
{"Multiple calls", []string{"hello", "world"}, 10, 10, 0, 10},
} {
tokenAPI := NewAPI("")
for _, s := range test.input {
for _, r := range s {
tokenAPI.Rune.MoveCursor(r)
}
}
if tokenAPI.stackFrame.line != test.line {
t.Errorf("[%s] Unexpected line offset %d (expected %d)", test.name, tokenAPI.stackFrame.line, test.line)
}
if tokenAPI.stackFrame.column != test.column {
t.Errorf("[%s] Unexpected column offset %d (expected %d)", test.name, tokenAPI.stackFrame.column, test.column)
}
}
}

View File

@ -12,7 +12,7 @@ type Output struct {
func (o Output) String() string { func (o Output) String() string {
a := o.api a := o.api
f := a.stackFrame f := a.stackFrame
bytes := a.outputData[f.bytesStart:f.bytesEnd] bytes := a.outputBytes[f.bytesStart:f.bytesEnd]
return string(bytes) return string(bytes)
} }
@ -22,7 +22,7 @@ func (o Output) Runes() []rune {
func (o Output) Rune(offset int) rune { func (o Output) Rune(offset int) rune {
a := o.api a := o.api
r, _ := utf8.DecodeRune(a.outputData[a.stackFrame.bytesStart+offset:]) r, _ := utf8.DecodeRune(a.outputBytes[a.stackFrame.bytesStart+offset:])
return r return r
} }
@ -41,7 +41,7 @@ func (o Output) AddByte(b byte) {
f := a.stackFrame f := a.stackFrame
curBytesEnd := f.bytesEnd curBytesEnd := f.bytesEnd
a.growOutputData(curBytesEnd + 1) a.growOutputData(curBytesEnd + 1)
a.outputData[curBytesEnd] = b a.outputBytes[curBytesEnd] = b
f.bytesEnd++ f.bytesEnd++
} }
@ -56,7 +56,7 @@ func (o Output) AddBytes(bytes ...byte) {
curBytesEnd := f.bytesEnd curBytesEnd := f.bytesEnd
newBytesEnd := curBytesEnd + len(bytes) newBytesEnd := curBytesEnd + len(bytes)
a.growOutputData(newBytesEnd) a.growOutputData(newBytesEnd)
copy(a.outputData[curBytesEnd:], bytes) copy(a.outputBytes[curBytesEnd:], bytes)
f.bytesEnd = newBytesEnd f.bytesEnd = newBytesEnd
} }
@ -66,7 +66,7 @@ func (o Output) AddRunes(runes ...rune) {
runesAsString := string(runes) runesAsString := string(runes)
newBytesEnd := f.bytesEnd + len(runesAsString) newBytesEnd := f.bytesEnd + len(runesAsString)
a.growOutputData(newBytesEnd) a.growOutputData(newBytesEnd)
copy(a.outputData[f.bytesEnd:], runesAsString) copy(a.outputBytes[f.bytesEnd:], runesAsString)
f.bytesEnd = newBytesEnd f.bytesEnd = newBytesEnd
} }
@ -133,9 +133,9 @@ func (api *API) growOutputTokens(requiredTokens int) {
} }
func (api *API) growOutputData(requiredBytes int) { func (api *API) growOutputData(requiredBytes int) {
if cap(api.outputData) < requiredBytes { if cap(api.outputBytes) < requiredBytes {
newBytes := make([]byte, requiredBytes*2) newBytes := make([]byte, requiredBytes*2)
copy(newBytes, api.outputData) copy(newBytes, api.outputBytes)
api.outputData = newBytes api.outputBytes = newBytes
} }
} }

View File

@ -6,8 +6,8 @@ import (
"git.makaay.nl/mauricem/go-parsekit/read" "git.makaay.nl/mauricem/go-parsekit/read"
) )
// RuneMode provides (UTF8) rune-driven input/output functionality for the tokenize API. // InputRuneMode provides (UTF8) rune-driven input/output functionality for the tokenize API.
type RuneMode struct { type InputRuneMode struct {
api *API api *API
reader *read.Buffer // the buffered input reader reader *read.Buffer // the buffered input reader
} }
@ -25,7 +25,7 @@ type RuneMode struct {
// When an error occurs during reading the input, an error will be returned. // When an error occurs during reading the input, an error will be returned.
// When an offset is requested that is beyond the length of the available input // When an offset is requested that is beyond the length of the available input
// data, then the error will be io.EOF. // data, then the error will be io.EOF.
func (runeMode RuneMode) Peek(offset int) (rune, int, error) { func (runeMode InputRuneMode) Peek(offset int) (rune, int, error) {
return runeMode.reader.RuneAt(runeMode.api.stackFrame.offset + offset) return runeMode.reader.RuneAt(runeMode.api.stackFrame.offset + offset)
} }
@ -40,14 +40,14 @@ func (runeMode RuneMode) Peek(offset int) (rune, int, error) {
// //
// After the call, byte offset 0 for PeekByte() and PeekRune() will point at // After the call, byte offset 0 for PeekByte() and PeekRune() will point at
// the first byte after the accepted rune. // the first byte after the accepted rune.
func (runeMode RuneMode) Accept(r rune) { func (runeMode InputRuneMode) Accept(r rune) {
a := runeMode.api a := runeMode.api
f := a.stackFrame f := a.stackFrame
curBytesEnd := f.bytesEnd curBytesEnd := f.bytesEnd
maxRequiredBytes := curBytesEnd + utf8.UTFMax maxRequiredBytes := curBytesEnd + utf8.UTFMax
a.growOutputData(maxRequiredBytes) a.growOutputData(maxRequiredBytes)
w := utf8.EncodeRune(a.outputData[curBytesEnd:], r) w := utf8.EncodeRune(a.outputBytes[curBytesEnd:], r)
f.bytesEnd += w f.bytesEnd += w
runeMode.MoveCursor(r) runeMode.MoveCursor(r)
@ -64,7 +64,7 @@ func (runeMode RuneMode) Accept(r rune) {
// //
// After the call, byte offset 0 for PeekByte() and PeekRune() will point at // After the call, byte offset 0 for PeekByte() and PeekRune() will point at
// the first byte after the accepted runes. // the first byte after the accepted runes.
func (runeMode RuneMode) AcceptMulti(runes ...rune) { func (runeMode InputRuneMode) AcceptMulti(runes ...rune) {
a := runeMode.api a := runeMode.api
f := a.stackFrame f := a.stackFrame
@ -73,7 +73,7 @@ func (runeMode RuneMode) AcceptMulti(runes ...rune) {
a.growOutputData(maxBytes) a.growOutputData(maxBytes)
for _, r := range runes { for _, r := range runes {
w := utf8.EncodeRune(a.outputData[curBytesEnd:], r) w := utf8.EncodeRune(a.outputBytes[curBytesEnd:], r)
curBytesEnd += w curBytesEnd += w
runeMode.MoveCursor(r) runeMode.MoveCursor(r)
} }
@ -86,7 +86,7 @@ func (runeMode RuneMode) AcceptMulti(runes ...rune) {
// //
// After the call, byte offset 0 for Peek() and PeekMulti() will point at // After the call, byte offset 0 for Peek() and PeekMulti() will point at
// the first rune at the new cursor position. // the first rune at the new cursor position.
func (runeMode RuneMode) MoveCursor(r rune) int { func (runeMode InputRuneMode) MoveCursor(r rune) int {
f := runeMode.api.stackFrame f := runeMode.api.stackFrame
if r == '\n' { if r == '\n' {
f.column = 0 f.column = 0
@ -106,7 +106,7 @@ func (runeMode RuneMode) MoveCursor(r rune) int {
// //
// After the call, byte offset 0 for Peek() and PeekMulti() will point at // After the call, byte offset 0 for Peek() and PeekMulti() will point at
// the first rune at the new cursor position. // the first rune at the new cursor position.
func (runeMode RuneMode) MoveCursorMulti(runes ...rune) { func (runeMode InputRuneMode) MoveCursorMulti(runes ...rune) {
for _, r := range runes { for _, r := range runes {
runeMode.MoveCursor(r) runeMode.MoveCursor(r)
} }

View File

@ -13,12 +13,12 @@ func ExampleNewAPI() {
} }
func ExampleAPI_PeekByte() { func ExampleAPI_PeekByte() {
api := tokenize.NewAPI("The input that the API will handle") tokenAPI := tokenize.NewAPI("The input that the API will handle")
r1, _, err := api.Rune.Peek(19) // 'A', r1, _, err := tokenAPI.Rune.Peek(19) // 'A',
r2, _, err := api.Rune.Peek(20) // 'P' r2, _, err := tokenAPI.Rune.Peek(20) // 'P'
r3, _, err := api.Rune.Peek(21) // 'I' r3, _, err := tokenAPI.Rune.Peek(21) // 'I'
_, _, err = api.Rune.Peek(100) // EOF _, _, err = tokenAPI.Rune.Peek(100) // EOF
fmt.Printf("%c%c%c %s\n", r1, r2, r3, err) fmt.Printf("%c%c%c %s\n", r1, r2, r3, err)
@ -27,12 +27,12 @@ func ExampleAPI_PeekByte() {
} }
func ExampleAPI_PeekRune() { func ExampleAPI_PeekRune() {
api := tokenize.NewAPI("The input that the ДPI will handle") tokenAPI := tokenize.NewAPI("The input that the ДPI will handle")
r1, _, err := api.Rune.Peek(19) // 'Д', 2 bytes so next rune starts at 21 r1, _, err := tokenAPI.Rune.Peek(19) // 'Д', 2 bytes so next rune starts at 21
r2, _, err := api.Rune.Peek(21) // 'P' r2, _, err := tokenAPI.Rune.Peek(21) // 'P'
r3, _, err := api.Rune.Peek(22) // 'I' r3, _, err := tokenAPI.Rune.Peek(22) // 'I'
_, _, err = api.Rune.Peek(100) // EOF _, _, err = tokenAPI.Rune.Peek(100) // EOF
fmt.Printf("%c%c%c %s\n", r1, r2, r3, err) fmt.Printf("%c%c%c %s\n", r1, r2, r3, err)
@ -41,49 +41,49 @@ func ExampleAPI_PeekRune() {
} }
func ExampleAPI_AcceptRune() { func ExampleAPI_AcceptRune() {
api := tokenize.NewAPI("The input that the ДPI will handle") tokenAPI := tokenize.NewAPI("The input that the ДPI will handle")
// Reads 'T' and accepts it to the API output data. // Reads 'T' and accepts it to the API output data.
r, _, _ := api.Rune.Peek(0) r, _, _ := tokenAPI.Rune.Peek(0)
api.Rune.Accept(r) tokenAPI.Rune.Accept(r)
// Reads 'h' and accepts it to the API output data. // Reads 'h' and accepts it to the API output data.
r, _, _ = api.Rune.Peek(0) r, _, _ = tokenAPI.Rune.Peek(0)
api.Rune.Accept(r) tokenAPI.Rune.Accept(r)
// Reads 'e', but does not accept it to the API output data. // Reads 'e', but does not accept it to the API output data.
r, _, _ = api.Rune.Peek(0) r, _, _ = tokenAPI.Rune.Peek(0)
fmt.Printf("API results: %q\n", api.Output.String()) fmt.Printf("API results: %q\n", tokenAPI.Output.String())
// Output: // Output:
// API results: "Th" // API results: "Th"
} }
func ExampleAPI_AcceptRunes() { func ExampleAPI_AcceptRunes() {
api := tokenize.NewAPI("The input that the API will handle") tokenAPI := tokenize.NewAPI("The input that the API will handle")
// Peeks at the first two runes 'T' and 'h'. // Peeks at the first two runes 'T' and 'h'.
r0, _, _ := api.Rune.Peek(0) r0, _, _ := tokenAPI.Rune.Peek(0)
r1, _, _ := api.Rune.Peek(1) r1, _, _ := tokenAPI.Rune.Peek(1)
// Peeks at the third rune 'e'. // Peeks at the third rune 'e'.
api.Rune.Peek(2) tokenAPI.Rune.Peek(2)
// Accepts only 'T' and 'h' into the API results. // Accepts only 'T' and 'h' into the API results.
api.Rune.AcceptMulti(r0, r1) tokenAPI.Rune.AcceptMulti(r0, r1)
fmt.Printf("API results: %q\n", api.Output.String()) fmt.Printf("API results: %q\n", tokenAPI.Output.String())
// Output: // Output:
// API results: "Th" // API results: "Th"
} }
func ExampleAPI_SkipRune() { func ExampleAPI_SkipRune() {
api := tokenize.NewAPI("The input that the API will handle") tokenAPI := tokenize.NewAPI("The input that the API will handle")
for { for {
r, _, err := api.Rune.Peek(0) r, _, err := tokenAPI.Rune.Peek(0)
// EOF reached. // EOF reached.
if err != nil { if err != nil {
@ -92,44 +92,44 @@ func ExampleAPI_SkipRune() {
// Only accept runes that are vowels. // Only accept runes that are vowels.
if strings.ContainsRune("aeiouAEIOU", r) { if strings.ContainsRune("aeiouAEIOU", r) {
api.Rune.Accept(r) tokenAPI.Rune.Accept(r)
} else { } else {
api.Rune.MoveCursor(r) tokenAPI.Rune.MoveCursor(r)
} }
} }
fmt.Printf("API results: %q\n", api.Output.String()) fmt.Printf("API results: %q\n", tokenAPI.Output.String())
// Output: // Output:
// API results: "eiuaeAIiae" // API results: "eiuaeAIiae"
} }
func ExampleAPI_modifyingResults() { func ExampleAPI_modifyingResults() {
api := tokenize.NewAPI("") tokenAPI := tokenize.NewAPI("")
api.Output.AddString("Some runes") tokenAPI.Output.AddString("Some runes")
api.Output.AddRunes(' ', 'a', 'd', 'd', 'e', 'd') tokenAPI.Output.AddRunes(' ', 'a', 'd', 'd', 'e', 'd')
api.Output.AddRunes(' ', 'i', 'n', ' ') tokenAPI.Output.AddRunes(' ', 'i', 'n', ' ')
api.Output.AddString("various ways") tokenAPI.Output.AddString("various ways")
fmt.Printf("API result first 10 runes: %q\n", api.Output.Runes()[0:10]) fmt.Printf("API result first 10 runes: %q\n", tokenAPI.Output.Runes()[0:10])
fmt.Printf("API result runes as string: %q\n", api.Output.String()) fmt.Printf("API result runes as string: %q\n", tokenAPI.Output.String())
api.Output.SetString("new ") tokenAPI.Output.SetString("new ")
api.Output.AddString("set ") tokenAPI.Output.AddString("set ")
api.Output.AddString("of ") tokenAPI.Output.AddString("of ")
api.Output.AddRunes('r', 'u', 'n', 'e', 's') tokenAPI.Output.AddRunes('r', 'u', 'n', 'e', 's')
fmt.Printf("API result runes as string: %q\n", api.Output.String()) fmt.Printf("API result runes as string: %q\n", tokenAPI.Output.String())
fmt.Printf("API result runes: %q\n", api.Output.Runes()) fmt.Printf("API result runes: %q\n", tokenAPI.Output.Runes())
fmt.Printf("API third rune: %q\n", api.Output.Rune(2)) fmt.Printf("API third rune: %q\n", tokenAPI.Output.Rune(2))
api.Output.AddToken(tokenize.Token{ tokenAPI.Output.AddToken(tokenize.Token{
Type: 42, Type: 42,
Value: "towel"}) Value: "towel"})
api.Output.AddToken(tokenize.Token{ tokenAPI.Output.AddToken(tokenize.Token{
Type: 73, Type: 73,
Value: "Zaphod"}) Value: "Zaphod"})
fmt.Printf("API result tokens: %v\n", api.Output.Tokens()) fmt.Printf("API result tokens: %v\n", tokenAPI.Output.Tokens())
fmt.Printf("API second result token: %v\n", api.Output.Token(1)) fmt.Printf("API second result token: %v\n", tokenAPI.Output.Token(1))
// Output: // Output:
// API result first 10 runes: ['S' 'o' 'm' 'e' ' ' 'r' 'u' 'n' 'e' 's'] // API result first 10 runes: ['S' 'o' 'm' 'e' ' ' 'r' 'u' 'n' 'e' 's']
@ -142,24 +142,24 @@ func ExampleAPI_modifyingResults() {
} }
func ExampleAPI_Reset() { func ExampleAPI_Reset() {
api := tokenize.NewAPI("Very important input!") tokenAPI := tokenize.NewAPI("Very important input!")
r, _, _ := api.Rune.Peek(0) // read 'V' r, _, _ := tokenAPI.Rune.Peek(0) // read 'V'
api.Rune.Accept(r) tokenAPI.Rune.Accept(r)
r, _, _ = api.Rune.Peek(0) // read 'e' r, _, _ = tokenAPI.Rune.Peek(0) // read 'e'
api.Rune.Accept(r) tokenAPI.Rune.Accept(r)
fmt.Printf("API results: %q at %s\n", api.Output.String(), api.Cursor()) fmt.Printf("API results: %q at %s\n", tokenAPI.Output.String(), tokenAPI.Input.Cursor())
// Reset clears the results. // Reset clears the results.
api.Reset() tokenAPI.Reset()
fmt.Printf("API results: %q at %s\n", api.Output.String(), api.Cursor()) fmt.Printf("API results: %q at %s\n", tokenAPI.Output.String(), tokenAPI.Input.Cursor())
// So then doing the same read operations, the same data are read. // So then doing the same read operations, the same data are read.
r, _, _ = api.Rune.Peek(0) // read 'V' r, _, _ = tokenAPI.Rune.Peek(0) // read 'V'
api.Rune.Accept(r) tokenAPI.Rune.Accept(r)
r, _, _ = api.Rune.Peek(0) // read 'e' r, _, _ = tokenAPI.Rune.Peek(0) // read 'e'
api.Rune.Accept(r) tokenAPI.Rune.Accept(r)
fmt.Printf("API results: %q at %s\n", api.Output.String(), api.Cursor()) fmt.Printf("API results: %q at %s\n", tokenAPI.Output.String(), tokenAPI.Input.Cursor())
// Output: // Output:
// API results: "Ve" at line 1, column 3 // API results: "Ve" at line 1, column 3
@ -233,155 +233,155 @@ func ExampleAPI_Merge() {
} }
func TestMultipleLevelsOfForksAndMerges(t *testing.T) { func TestMultipleLevelsOfForksAndMerges(t *testing.T) {
api := tokenize.NewAPI("abcdefghijklmnopqrstuvwxyz") tokenAPI := tokenize.NewAPI("abcdefghijklmnopqrstuvwxyz")
// Fork a few levels. // Fork a few levels.
child1 := api.Fork() child1 := tokenAPI.Fork()
child2 := api.Fork() child2 := tokenAPI.Fork()
child3 := api.Fork() child3 := tokenAPI.Fork()
child4 := api.Fork() child4 := tokenAPI.Fork()
// Read a rune 'a' from child4. // Read a rune 'a' from child4.
r, _, _ := api.Rune.Peek(0) r, _, _ := tokenAPI.Rune.Peek(0)
AssertEqual(t, 'a', r, "child4 rune 1") AssertEqual(t, 'a', r, "child4 rune 1")
api.Rune.Accept(r) tokenAPI.Rune.Accept(r)
AssertEqual(t, "a", api.Output.String(), "child4 runes after rune 1") AssertEqual(t, "a", tokenAPI.Output.String(), "child4 runes after rune 1")
// Read another rune 'b' from child4. // Read another rune 'b' from child4.
r, _, _ = api.Rune.Peek(0) r, _, _ = tokenAPI.Rune.Peek(0)
AssertEqual(t, 'b', r, "child4 rune 2") AssertEqual(t, 'b', r, "child4 rune 2")
api.Rune.Accept(r) tokenAPI.Rune.Accept(r)
AssertEqual(t, "ab", api.Output.String(), "child4 runes after rune 2") AssertEqual(t, "ab", tokenAPI.Output.String(), "child4 runes after rune 2")
// Merge "ab" from child4 to child3. // Merge "ab" from child4 to child3.
api.Merge(child4) tokenAPI.Merge(child4)
AssertEqual(t, "", api.Output.String(), "child4 runes after first merge") AssertEqual(t, "", tokenAPI.Output.String(), "child4 runes after first merge")
// Read some more from child4. // Read some more from child4.
r, _, _ = api.Rune.Peek(0) r, _, _ = tokenAPI.Rune.Peek(0)
AssertEqual(t, 'c', r, "child4 rune 3") AssertEqual(t, 'c', r, "child4 rune 3")
api.Rune.Accept(r) tokenAPI.Rune.Accept(r)
AssertEqual(t, "c", api.Output.String(), "child4 runes after rune 1") AssertEqual(t, "c", tokenAPI.Output.String(), "child4 runes after rune 1")
AssertEqual(t, "line 1, column 4", api.Cursor(), "cursor child4 rune 3") AssertEqual(t, "line 1, column 4", tokenAPI.Input.Cursor(), "cursor child4 rune 3")
// Merge "c" from child4 to child3. // Merge "c" from child4 to child3.
api.Merge(child4) tokenAPI.Merge(child4)
// And dispose of child4, making child3 the active stack level. // And dispose of child4, making child3 the active stack level.
api.Dispose(child4) tokenAPI.Dispose(child4)
// Child3 should now have the compbined results "abc" from child4's work. // Child3 should now have the compbined results "abc" from child4's work.
AssertEqual(t, "abc", api.Output.String(), "child3 after merge of child4") AssertEqual(t, "abc", tokenAPI.Output.String(), "child3 after merge of child4")
AssertEqual(t, "line 1, column 4", api.Cursor(), "cursor child3 rune 3, after merge of child4") AssertEqual(t, "line 1, column 4", tokenAPI.Input.Cursor(), "cursor child3 rune 3, after merge of child4")
// Now read some data from child3. // Now read some data from child3.
r, _, _ = api.Rune.Peek(0) r, _, _ = tokenAPI.Rune.Peek(0)
AssertEqual(t, 'd', r, "child3 rune 5") AssertEqual(t, 'd', r, "child3 rune 5")
api.Rune.Accept(r) tokenAPI.Rune.Accept(r)
r, _, _ = api.Rune.Peek(0) r, _, _ = tokenAPI.Rune.Peek(0)
AssertEqual(t, 'e', r, "child3 rune 5") AssertEqual(t, 'e', r, "child3 rune 5")
api.Rune.Accept(r) tokenAPI.Rune.Accept(r)
r, _, _ = api.Rune.Peek(0) r, _, _ = tokenAPI.Rune.Peek(0)
AssertEqual(t, 'f', r, "child3 rune 5") AssertEqual(t, 'f', r, "child3 rune 5")
api.Rune.Accept(r) tokenAPI.Rune.Accept(r)
AssertEqual(t, "abcdef", api.Output.String(), "child3 total result after rune 6") AssertEqual(t, "abcdef", tokenAPI.Output.String(), "child3 total result after rune 6")
// Temporarily go some new forks from here, but don't use their outcome. // Temporarily go some new forks from here, but don't use their outcome.
child3sub1 := api.Fork() child3sub1 := tokenAPI.Fork()
r, _, _ = api.Rune.Peek(0) r, _, _ = tokenAPI.Rune.Peek(0)
api.Rune.Accept(r) tokenAPI.Rune.Accept(r)
r, _, _ = api.Rune.Peek(0) r, _, _ = tokenAPI.Rune.Peek(0)
api.Rune.Accept(r) tokenAPI.Rune.Accept(r)
child3sub2 := api.Fork() child3sub2 := tokenAPI.Fork()
r, _, _ = api.Rune.Peek(0) r, _, _ = tokenAPI.Rune.Peek(0)
api.Rune.Accept(r) tokenAPI.Rune.Accept(r)
api.Merge(child3sub2) // do merge sub2 down to sub1 tokenAPI.Merge(child3sub2) // do merge sub2 down to sub1
api.Dispose(child3sub2) // and dispose of sub2 tokenAPI.Dispose(child3sub2) // and dispose of sub2
api.Dispose(child3sub1) // but dispose of sub1 without merging tokenAPI.Dispose(child3sub1) // but dispose of sub1 without merging
// Instead merge the results from before this forking segway from child3 to child2 // Instead merge the results from before this forking segway from child3 to child2
// and dispose of it. // and dispose of it.
api.Merge(child3) tokenAPI.Merge(child3)
api.Dispose(child3) tokenAPI.Dispose(child3)
AssertEqual(t, "abcdef", api.Output.String(), "child2 total result after merge of child3") AssertEqual(t, "abcdef", tokenAPI.Output.String(), "child2 total result after merge of child3")
AssertEqual(t, "line 1, column 7", api.Cursor(), "cursor child2 after merge child3") AssertEqual(t, "line 1, column 7", tokenAPI.Input.Cursor(), "cursor child2 after merge child3")
// Merge child2 to child1 and dispose of it. // Merge child2 to child1 and dispose of it.
api.Merge(child2) tokenAPI.Merge(child2)
api.Dispose(child2) tokenAPI.Dispose(child2)
// Merge child1 a few times to the top level api. // Merge child1 a few times to the top level api.
api.Merge(child1) tokenAPI.Merge(child1)
api.Merge(child1) tokenAPI.Merge(child1)
api.Merge(child1) tokenAPI.Merge(child1)
api.Merge(child1) tokenAPI.Merge(child1)
// And dispose of it. // And dispose of it.
api.Dispose(child1) tokenAPI.Dispose(child1)
// Read some data from the top level api. // Read some data from the top level api.
r, _, _ = api.Rune.Peek(0) r, _, _ = tokenAPI.Rune.Peek(0)
api.Rune.Accept(r) tokenAPI.Rune.Accept(r)
AssertEqual(t, "abcdefg", api.Output.String(), "api string end result") AssertEqual(t, "abcdefg", tokenAPI.Output.String(), "api string end result")
AssertEqual(t, "line 1, column 8", api.Cursor(), "api cursor end result") AssertEqual(t, "line 1, column 8", tokenAPI.Input.Cursor(), "api cursor end result")
} }
func TestClearData(t *testing.T) { func TestClearData(t *testing.T) {
api := tokenize.NewAPI("Laphroaig") tokenAPI := tokenize.NewAPI("Laphroaig")
r, _, _ := api.Rune.Peek(0) // Read 'L' r, _, _ := tokenAPI.Rune.Peek(0) // Read 'L'
api.Rune.Accept(r) // Add to runes tokenAPI.Rune.Accept(r) // Add to runes
r, _, _ = api.Rune.Peek(0) // Read 'a' r, _, _ = tokenAPI.Rune.Peek(0) // Read 'a'
api.Rune.Accept(r) // Add to runes tokenAPI.Rune.Accept(r) // Add to runes
api.Output.ClearData() // Clear the runes, giving us a fresh start. tokenAPI.Output.ClearData() // Clear the runes, giving us a fresh start.
r, _, _ = api.Rune.Peek(0) // Read 'p' r, _, _ = tokenAPI.Rune.Peek(0) // Read 'p'
api.Rune.Accept(r) // Add to runes tokenAPI.Rune.Accept(r) // Add to runes
r, _, _ = api.Rune.Peek(0) // Read 'r' r, _, _ = tokenAPI.Rune.Peek(0) // Read 'r'
api.Rune.Accept(r) // Add to runes tokenAPI.Rune.Accept(r) // Add to runes
AssertEqual(t, "ph", api.Output.String(), "api string end result") AssertEqual(t, "ph", tokenAPI.Output.String(), "api string end result")
} }
func TestMergeScenariosForTokens(t *testing.T) { func TestMergeScenariosForTokens(t *testing.T) {
api := tokenize.NewAPI("") tokenAPI := tokenize.NewAPI("")
token1 := tokenize.Token{Value: 1} token1 := tokenize.Token{Value: 1}
token2 := tokenize.Token{Value: 2} token2 := tokenize.Token{Value: 2}
token3 := tokenize.Token{Value: 3} token3 := tokenize.Token{Value: 3}
token4 := tokenize.Token{Value: 4} token4 := tokenize.Token{Value: 4}
api.Output.SetTokens(token1) tokenAPI.Output.SetTokens(token1)
tokens := api.Output.Tokens() tokens := tokenAPI.Output.Tokens()
AssertEqual(t, 1, len(tokens), "Tokens 1") AssertEqual(t, 1, len(tokens), "Tokens 1")
child := api.Fork() child := tokenAPI.Fork()
tokens = api.Output.Tokens() tokens = tokenAPI.Output.Tokens()
AssertEqual(t, 0, len(tokens), "Tokens 2") AssertEqual(t, 0, len(tokens), "Tokens 2")
api.Output.AddToken(token2) tokenAPI.Output.AddToken(token2)
api.Merge(child) tokenAPI.Merge(child)
api.Dispose(child) tokenAPI.Dispose(child)
tokens = api.Output.Tokens() tokens = tokenAPI.Output.Tokens()
AssertEqual(t, 2, len(tokens), "Tokens 3") AssertEqual(t, 2, len(tokens), "Tokens 3")
child = api.Fork() child = tokenAPI.Fork()
api.Output.AddToken(token3) tokenAPI.Output.AddToken(token3)
api.Reset() tokenAPI.Reset()
api.Output.AddToken(token4) tokenAPI.Output.AddToken(token4)
api.Merge(child) tokenAPI.Merge(child)
api.Dispose(child) tokenAPI.Dispose(child)
tokens = api.Output.Tokens() tokens = tokenAPI.Output.Tokens()
AssertEqual(t, 3, len(tokens), "Tokens 4") AssertEqual(t, 3, len(tokens), "Tokens 4")
AssertEqual(t, 1, api.Output.TokenValue(0).(int), "Tokens 4, value 0") AssertEqual(t, 1, tokenAPI.Output.TokenValue(0).(int), "Tokens 4, value 0")
AssertEqual(t, 2, api.Output.TokenValue(1).(int), "Tokens 4, value 1") AssertEqual(t, 2, tokenAPI.Output.TokenValue(1).(int), "Tokens 4, value 1")
AssertEqual(t, 4, api.Output.TokenValue(2).(int), "Tokens 4, value 2") AssertEqual(t, 4, tokenAPI.Output.TokenValue(2).(int), "Tokens 4, value 2")
} }

View File

@ -1,65 +0,0 @@
package tokenize
import (
"testing"
)
func TestMoveCursorByBytes(t *testing.T) {
api := NewAPI("")
api.Byte.MoveCursor('a')
api.Byte.MoveCursor('b')
api.Byte.MoveCursor('c')
api.Byte.MoveCursor('\r')
api.Byte.MoveCursor('\n')
api.Byte.MoveCursor('a')
api.Byte.MoveCursor('b')
AssertEqual(t, "line 2, column 3", api.Cursor(), "Cursor position after moving by byte")
AssertEqual(t, 7, api.stackFrame.offset, "Offset after moving by byte")
}
func TestMoveCursorByRunes(t *testing.T) {
api := NewAPI("")
api.Rune.MoveCursor('ɹ')
api.Rune.MoveCursor('n')
api.Rune.MoveCursor('u')
api.Rune.MoveCursor('\r')
api.Rune.MoveCursor('\n')
api.Rune.MoveCursor('ǝ')
AssertEqual(t, "line 2, column 2", api.Cursor(), "Cursor position after moving by rune")
AssertEqual(t, 8, api.stackFrame.offset, "Offset after moving by rune")
}
func TestWhenMovingCursor_CursorPositionIsUpdated(t *testing.T) {
for _, test := range []struct {
name string
input []string
byte int
rune int
line int
column int
}{
{"No input at all", []string{""}, 0, 0, 0, 0},
{"One ASCII char", []string{"a"}, 1, 1, 0, 1},
{"Multiple ASCII chars", []string{"abc"}, 3, 3, 0, 3},
{"One newline", []string{"\n"}, 1, 1, 1, 0},
{"Carriage return", []string{"\r\r\r"}, 3, 3, 0, 3},
{"One UTF8 3 byte char", []string{"⌘"}, 3, 1, 0, 1},
{"Mixture", []string{"Hello\n\npretty\nW⌘O⌘R⌘L⌘D"}, 31, 23, 3, 9},
{"Multiple calls", []string{"hello", "world"}, 10, 10, 0, 10},
} {
api := NewAPI("")
for _, s := range test.input {
for _, r := range s {
api.Rune.MoveCursor(r)
}
}
if api.stackFrame.line != test.line {
t.Errorf("[%s] Unexpected line offset %d (expected %d)", test.name, api.stackFrame.line, test.line)
}
if api.stackFrame.column != test.column {
t.Errorf("[%s] Unexpected column offset %d (expected %d)", test.name, api.stackFrame.column, test.column)
}
}
}

View File

@ -7,7 +7,7 @@ package tokenize
// A Handler function gets an API as its input and returns a boolean to // A Handler function gets an API as its input and returns a boolean to
// indicate whether or not it found a match on the input. The API is used // indicate whether or not it found a match on the input. The API is used
// for retrieving input data to match against and for reporting back results. // for retrieving input data to match against and for reporting back results.
type Handler func(t *API) bool type Handler func(tokenAPI *API) bool
// Match is syntactic sugar that allows you to write a construction like // Match is syntactic sugar that allows you to write a construction like
// NewTokenizer(handler).Execute(input) as handler.Match(input). // NewTokenizer(handler).Execute(input) as handler.Match(input).

File diff suppressed because it is too large Load Diff

View File

@ -39,16 +39,16 @@ func (result *Result) String() string {
// look at the documentation for parsekit.read.New(). // look at the documentation for parsekit.read.New().
func New(tokenHandler Handler) Func { func New(tokenHandler Handler) Func {
return func(input interface{}) (*Result, error) { return func(input interface{}) (*Result, error) {
api := NewAPI(input) tokenAPI := NewAPI(input)
ok := tokenHandler(api) ok := tokenHandler(tokenAPI)
if !ok { if !ok {
err := fmt.Errorf("mismatch at %s", api.Cursor()) err := fmt.Errorf("mismatch at %s", tokenAPI.Input.Cursor())
return nil, err return nil, err
} }
result := &Result{ result := &Result{
Runes: api.Output.Runes(), Runes: tokenAPI.Output.Runes(),
Tokens: api.Output.Tokens(), Tokens: tokenAPI.Output.Tokens(),
} }
return result, nil return result, nil
} }

View File

@ -54,29 +54,29 @@ func ExampleNew() {
} }
func TestCallingPeekRune_PeeksRuneOnInput(t *testing.T) { func TestCallingPeekRune_PeeksRuneOnInput(t *testing.T) {
api := makeTokenizeAPI() tokenizeAPI := makeTokenizeAPI()
r, _, _ := api.Rune.Peek(0) r, _, _ := tokenizeAPI.Rune.Peek(0)
AssertEqual(t, 'T', r, "first rune") AssertEqual(t, 'T', r, "first rune")
} }
func TestInputCanAcceptRunesFromReader(t *testing.T) { func TestInputCanAcceptRunesFromReader(t *testing.T) {
i := makeTokenizeAPI() tokenAPI := makeTokenizeAPI()
r0, _, _ := i.Rune.Peek(0) r0, _, _ := tokenAPI.Rune.Peek(0)
i.Rune.Accept(r0) tokenAPI.Rune.Accept(r0)
r1, _, _ := i.Rune.Peek(0) // 0, because read offset resets to 0 after Accept* calls. r1, _, _ := tokenAPI.Rune.Peek(0) // 0, because read offset resets to 0 after Accept* calls.
r2, _, _ := i.Rune.Peek(1) r2, _, _ := tokenAPI.Rune.Peek(1)
i.Rune.AcceptMulti(r1, r2) tokenAPI.Rune.AcceptMulti(r1, r2)
AssertEqual(t, "Tes", i.Output.String(), "i.String()") AssertEqual(t, "Tes", tokenAPI.Output.String(), "i.String()")
} }
func TestCallingMergeOnTopLevelAPI_Panics(t *testing.T) { func TestCallingMergeOnTopLevelAPI_Panics(t *testing.T) {
AssertPanic(t, PanicT{ AssertPanic(t, PanicT{
Function: func() { Function: func() {
i := makeTokenizeAPI() tokenAPI := makeTokenizeAPI()
i.Merge(0) tokenAPI.Merge(0)
}, },
Regexp: true, Regexp: true,
Expect: `tokenize\.API\.Merge\(\): Merge\(\) called at /.*_test.go:\d+ on the top-level API`}) Expect: `tokenize\.API\.Merge\(\): Merge\(\) called at /.*_test.go:\d+ on the top-level API`})
@ -85,10 +85,10 @@ func TestCallingMergeOnTopLevelAPI_Panics(t *testing.T) {
func TestCallingMergeOnForkParentAPI_Panics(t *testing.T) { func TestCallingMergeOnForkParentAPI_Panics(t *testing.T) {
AssertPanic(t, PanicT{ AssertPanic(t, PanicT{
Function: func() { Function: func() {
i := makeTokenizeAPI() tokenAPI := makeTokenizeAPI()
child := i.Fork() child := tokenAPI.Fork()
i.Fork() tokenAPI.Fork()
i.Merge(child) tokenAPI.Merge(child)
}, },
Regexp: true, Regexp: true,
Expect: `tokenize\.API\.Merge\(\): Merge\(\) called at /.*_test.go:\d+ ` + Expect: `tokenize\.API\.Merge\(\): Merge\(\) called at /.*_test.go:\d+ ` +
@ -98,8 +98,8 @@ func TestCallingMergeOnForkParentAPI_Panics(t *testing.T) {
func TestCallingDisposeOnTopLevelAPI_Panics(t *testing.T) { func TestCallingDisposeOnTopLevelAPI_Panics(t *testing.T) {
AssertPanic(t, PanicT{ AssertPanic(t, PanicT{
Function: func() { Function: func() {
i := makeTokenizeAPI() tokenAPI := makeTokenizeAPI()
i.Dispose(0) tokenAPI.Dispose(0)
}, },
Regexp: true, Regexp: true,
Expect: `tokenize\.API\.Dispose\(\): Dispose\(\) called at /.*_test.go:\d+ on the top-level API`}) Expect: `tokenize\.API\.Dispose\(\): Dispose\(\) called at /.*_test.go:\d+ on the top-level API`})
@ -108,10 +108,10 @@ func TestCallingDisposeOnTopLevelAPI_Panics(t *testing.T) {
func TestCallingDisposeOnForkParentAPI_Panics(t *testing.T) { func TestCallingDisposeOnForkParentAPI_Panics(t *testing.T) {
AssertPanic(t, PanicT{ AssertPanic(t, PanicT{
Function: func() { Function: func() {
i := makeTokenizeAPI() tokenAPI := makeTokenizeAPI()
child := i.Fork() child := tokenAPI.Fork()
i.Fork() tokenAPI.Fork()
i.Dispose(child) tokenAPI.Dispose(child)
}, },
Regexp: true, Regexp: true,
Expect: `tokenize\.API\.Dispose\(\): Dispose\(\) called at /.*_test.go:\d+ ` + Expect: `tokenize\.API\.Dispose\(\): Dispose\(\) called at /.*_test.go:\d+ ` +
@ -121,11 +121,11 @@ func TestCallingDisposeOnForkParentAPI_Panics(t *testing.T) {
func TestCallingForkOnForkedParentAPI_Panics(t *testing.T) { func TestCallingForkOnForkedParentAPI_Panics(t *testing.T) {
AssertPanic(t, PanicT{ AssertPanic(t, PanicT{
Function: func() { Function: func() {
i := makeTokenizeAPI() tokenAPI := makeTokenizeAPI()
i.Fork() tokenAPI.Fork()
g := i.Fork() g := tokenAPI.Fork()
i.Fork() tokenAPI.Fork()
i.Merge(g) tokenAPI.Merge(g)
}, },
Regexp: true, Regexp: true,
Expect: `tokenize\.API\.Merge\(\): Merge\(\) called at /.*_test.go:\d+ ` + Expect: `tokenize\.API\.Merge\(\): Merge\(\) called at /.*_test.go:\d+ ` +
@ -133,30 +133,30 @@ func TestCallingForkOnForkedParentAPI_Panics(t *testing.T) {
} }
func TestAccept_UpdatesCursor(t *testing.T) { func TestAccept_UpdatesCursor(t *testing.T) {
i := tokenize.NewAPI(strings.NewReader("input\r\nwith\r\nnewlines")) tokenAPI := tokenize.NewAPI(strings.NewReader("input\r\nwith\r\nnewlines"))
AssertEqual(t, "start of file", i.Cursor(), "cursor 1") AssertEqual(t, "start of file", tokenAPI.Input.Cursor(), "cursor 1")
for j := 0; j < 6; j++ { // read "input\r", cursor end up at "\n" for j := 0; j < 6; j++ { // read "input\r", cursor end up at "\n"
r, _, _ := i.Rune.Peek(0) r, _, _ := tokenAPI.Rune.Peek(0)
i.Rune.Accept(r) tokenAPI.Rune.Accept(r)
} }
AssertEqual(t, "line 1, column 7", i.Cursor(), "cursor 2") AssertEqual(t, "line 1, column 7", tokenAPI.Input.Cursor(), "cursor 2")
r, _, _ := i.Rune.Peek(0) // read "\n", cursor ends up at start of new line r, _, _ := tokenAPI.Rune.Peek(0) // read "\n", cursor ends up at start of new line
i.Rune.Accept(r) tokenAPI.Rune.Accept(r)
AssertEqual(t, "line 2, column 1", i.Cursor(), "cursor 3") AssertEqual(t, "line 2, column 1", tokenAPI.Input.Cursor(), "cursor 3")
for j := 0; j < 10; j++ { // read "with\r\nnewl", cursor end up at "i" for j := 0; j < 10; j++ { // read "with\r\nnewl", cursor end up at "i"
b, _ := i.Byte.Peek(0) b, _ := tokenAPI.Byte.Peek(0)
i.Byte.Accept(b) tokenAPI.Byte.Accept(b)
} }
AssertEqual(t, "line 3, column 5", i.Cursor(), "cursor 4") AssertEqual(t, "line 3, column 5", tokenAPI.Input.Cursor(), "cursor 4")
} }
func TestWhenCallingPeekruneAtEndOfFile_EOFIsReturned(t *testing.T) { func TestWhenCallingPeekruneAtEndOfFile_EOFIsReturned(t *testing.T) {
i := tokenize.NewAPI(strings.NewReader("X")) tokenAPI := tokenize.NewAPI(strings.NewReader("X"))
r, _, _ := i.Rune.Peek(0) r, _, _ := tokenAPI.Rune.Peek(0)
i.Rune.Accept(r) tokenAPI.Rune.Accept(r)
r, _, err := i.Rune.Peek(0) r, _, err := tokenAPI.Rune.Peek(0)
AssertEqual(t, true, r == utf8.RuneError, "returned rune from NextRune()") AssertEqual(t, true, r == utf8.RuneError, "returned rune from NextRune()")
AssertEqual(t, true, err == io.EOF, "returned error from NextRune()") AssertEqual(t, true, err == io.EOF, "returned error from NextRune()")

View File

@ -6,84 +6,84 @@ import (
func TestFork_CreatesForkOfInputAtSameCursorPosition(t *testing.T) { func TestFork_CreatesForkOfInputAtSameCursorPosition(t *testing.T) {
// Create input, accept the first rune. // Create input, accept the first rune.
i := NewAPI("Testing") tokenAPI := NewAPI("Testing")
r, _, _ := i.Rune.Peek(0) r, _, _ := tokenAPI.Rune.Peek(0)
i.Rune.Accept(r) // T tokenAPI.Rune.Accept(r) // T
AssertEqual(t, "T", i.Output.String(), "accepted rune in input") AssertEqual(t, "T", tokenAPI.Output.String(), "accepted rune in input")
// Fork // Fork
child := i.Fork() child := tokenAPI.Fork()
AssertEqual(t, 1, i.stackFrame.offset, "parent offset") AssertEqual(t, 1, tokenAPI.stackFrame.offset, "parent offset")
AssertEqual(t, 1, i.stackFrame.offset, "child offset") AssertEqual(t, 1, tokenAPI.stackFrame.offset, "child offset")
// Accept two runes via fork. // Accept two runes via fork.
r, _, _ = i.Rune.Peek(0) r, _, _ = tokenAPI.Rune.Peek(0)
i.Rune.Accept(r) // e tokenAPI.Rune.Accept(r) // e
r, _, _ = i.Rune.Peek(0) r, _, _ = tokenAPI.Rune.Peek(0)
i.Rune.Accept(r) // s tokenAPI.Rune.Accept(r) // s
AssertEqual(t, "es", i.Output.String(), "result runes in fork") AssertEqual(t, "es", tokenAPI.Output.String(), "result runes in fork")
AssertEqual(t, 1, i.stackFrames[i.stackLevel-1].offset, "parent offset") AssertEqual(t, 1, tokenAPI.stackFrames[tokenAPI.stackLevel-1].offset, "parent offset")
AssertEqual(t, 3, i.stackFrame.offset, "child offset") AssertEqual(t, 3, tokenAPI.stackFrame.offset, "child offset")
// Merge fork back into parent // Merge fork back into parent
i.Merge(child) tokenAPI.Merge(child)
i.Dispose(child) tokenAPI.Dispose(child)
AssertEqual(t, "Tes", i.Output.String(), "result runes in parent Input after Merge()") AssertEqual(t, "Tes", tokenAPI.Output.String(), "result runes in parent Input after Merge()")
AssertEqual(t, 3, i.stackFrame.offset, "parent offset") AssertEqual(t, 3, tokenAPI.stackFrame.offset, "parent offset")
} }
func TestGivenForkedChildWhichAcceptedRune_AfterMerging_RuneEndsUpInParentResult(t *testing.T) { func TestGivenForkedChildWhichAcceptedRune_AfterMerging_RuneEndsUpInParentResult(t *testing.T) {
i := NewAPI("Testing") tokenAPI := NewAPI("Testing")
r, _, _ := i.Rune.Peek(0) r, _, _ := tokenAPI.Rune.Peek(0)
i.Rune.Accept(r) // T tokenAPI.Rune.Accept(r) // T
f1 := i.Fork() f1 := tokenAPI.Fork()
r, _, _ = i.Rune.Peek(0) r, _, _ = tokenAPI.Rune.Peek(0)
i.Rune.Accept(r) // e tokenAPI.Rune.Accept(r) // e
f2 := i.Fork() f2 := tokenAPI.Fork()
r, _, _ = i.Rune.Peek(0) r, _, _ = tokenAPI.Rune.Peek(0)
i.Rune.Accept(r) // s tokenAPI.Rune.Accept(r) // s
AssertEqual(t, "s", i.Output.String(), "f2 String()") AssertEqual(t, "s", tokenAPI.Output.String(), "f2 String()")
AssertEqual(t, 3, i.stackFrame.offset, "f2.offset A") AssertEqual(t, 3, tokenAPI.stackFrame.offset, "f2.offset A")
i.Merge(f2) tokenAPI.Merge(f2)
i.Dispose(f2) tokenAPI.Dispose(f2)
AssertEqual(t, "es", i.Output.String(), "f1 String()") AssertEqual(t, "es", tokenAPI.Output.String(), "f1 String()")
AssertEqual(t, 3, i.stackFrame.offset, "f1.offset A") AssertEqual(t, 3, tokenAPI.stackFrame.offset, "f1.offset A")
i.Merge(f1) tokenAPI.Merge(f1)
i.Dispose(f1) tokenAPI.Dispose(f1)
AssertEqual(t, "Tes", i.Output.String(), "top-level API String()") AssertEqual(t, "Tes", tokenAPI.Output.String(), "top-level API String()")
AssertEqual(t, 3, i.stackFrame.offset, "f1.offset A") AssertEqual(t, 3, tokenAPI.stackFrame.offset, "f1.offset A")
} }
func TestFlushInput(t *testing.T) { func TestFlushInput(t *testing.T) {
i := NewAPI("cool") tokenAPI := NewAPI("cool")
// Flushing without any read data is okay. FlushInput() will return // Flushing without any read data is okay. FlushInput() will return
// false in this case, and nothing else happens. // false in this case, and nothing else happens.
AssertTrue(t, i.FlushInput() == false, "flush input at start") AssertTrue(t, tokenAPI.Input.Flush() == false, "flush input at start")
r, _, _ := i.Rune.Peek(0) r, _, _ := tokenAPI.Rune.Peek(0)
i.Rune.Accept(r) // c tokenAPI.Rune.Accept(r) // c
r, _, _ = i.Rune.Peek(0) r, _, _ = tokenAPI.Rune.Peek(0)
i.Rune.Accept(r) // o tokenAPI.Rune.Accept(r) // o
AssertTrue(t, i.FlushInput() == true, "flush input after reading some data") AssertTrue(t, tokenAPI.Input.Flush() == true, "flush input after reading some data")
AssertEqual(t, 0, i.stackFrame.offset, "offset after flush input") AssertEqual(t, 0, tokenAPI.stackFrame.offset, "offset after flush input")
AssertTrue(t, i.FlushInput() == false, "flush input after flush input") AssertTrue(t, tokenAPI.Input.Flush() == false, "flush input after flush input")
// Read offset is now zero, but reading should continue after "co". // Read offset is now zero, but reading should continue after "co".
// The output so far isn't modified, so the following accept calls // The output so far isn't modified, so the following accept calls
// will add their runes to the already accepted string "co". // will add their runes to the already accepted string "co".
r, _, _ = i.Rune.Peek(0) r, _, _ = tokenAPI.Rune.Peek(0)
i.Rune.Accept(r) // o tokenAPI.Rune.Accept(r) // o
r, _, _ = i.Rune.Peek(0) r, _, _ = tokenAPI.Rune.Peek(0)
i.Rune.Accept(r) // o tokenAPI.Rune.Accept(r) // o
AssertEqual(t, "cool", i.Output.String(), "end result") AssertEqual(t, "cool", tokenAPI.Output.String(), "end result")
} }
func TestInputFlusherWrapper(t *testing.T) { func TestInputFlusherWrapper(t *testing.T) {