Backup work for next refactoring step.

This commit is contained in:
Maurice Makaay 2019-07-22 22:16:28 +00:00
parent 070e6a13a7
commit cf679b2225
11 changed files with 277 additions and 330 deletions

View File

@ -30,9 +30,8 @@ func (p *API) Peek(tokenHandler tokenize.Handler) bool {
forkedAPI, ok := p.invokeHandler("Peek", tokenHandler)
t := p.tokenAPI
if ok {
r := p.Result
r.Tokens = t.Output.Tokens()
r.Runes = t.Output.Runes()
p.Result.Tokens = t.Output.Tokens()
p.Result.Runes = t.Output.Runes()
}
t.Dispose(forkedAPI)
return ok
@ -44,16 +43,13 @@ func (p *API) Peek(tokenHandler tokenize.Handler) bool {
// and the read cursor will stay at the same position.
//
// After calling this method, you can retrieve the results using the Result() method.
// TODO lessen indirection by introducing a := p.tokenAPI (maybe some other parser funcs too?)
// TODO Eh why keep these results all the time? Same for Peek!
func (p *API) Accept(tokenHandler tokenize.Handler) bool {
t := p.tokenAPI
forkedAPI, ok := p.invokeHandler("Accept", tokenHandler)
if ok {
// Keep track of the results.
r := p.Result
r.Tokens = t.Output.Tokens()
r.Runes = t.Output.Runes()
// Keep track of the results as produced by this child.
p.Result.Tokens = t.Output.Tokens()
p.Result.Runes = t.Output.Runes()
// Merge to the parent level.
t.Merge(forkedAPI)

View File

@ -76,10 +76,10 @@ type API struct {
stackFrames []stackFrame // the stack frames, containing stack level-specific data
stackLevel int // the current stack level
stackFrame *stackFrame // the current stack frame
Byte ByteMode // byte-mode operations
Input *Input // provides input-related functionality
reader *read.Buffer // the buffered input reader
Output *Output // provides output-related functionality
Byte ByteMode // access to a set of byte-based input methods
Rune RuneMode // access to a set of rune-based input methods
Output Output // provides output-related functionality
outputTokens []Token // accepted tokens
outputData []byte // accepted data
}
@ -110,8 +110,8 @@ func NewAPI(input interface{}) *API {
stackFrames: make([]stackFrame, initialStackDepth),
}
api.Byte = ByteMode{api: api}
api.Input = &Input{api: api}
api.Output = &Output{api: api}
api.Rune = RuneMode{api: api}
api.Output = Output{api: api}
api.stackFrame = &api.stackFrames[0]
return api

View File

@ -1,6 +1,6 @@
package tokenize
// Input provides input-related functionality for the tokenize API.
// ByteMode provides byte-driven input/output functionality for the tokenize API.
type ByteMode struct {
api *API
}
@ -87,6 +87,6 @@ func (api *API) dataAddByte(b byte) {
}
func (api *API) dataSetBytes(bytes ...byte) {
api.dataClear()
api.Output.ClearData()
api.Byte.AppendMulti(bytes...)
}

View File

@ -9,152 +9,110 @@ type Output struct {
api *API
}
func (o *Output) String() string {
return o.api.dataAsString()
}
func (api *API) dataAsString() string {
bytes := api.outputData[api.stackFrame.bytesStart:api.stackFrame.bytesEnd]
func (o Output) String() string {
a := o.api
f := a.stackFrame
bytes := a.outputData[f.bytesStart:f.bytesEnd]
return string(bytes)
}
func (o *Output) Runes() []rune {
return o.api.dataAsRunes()
func (o Output) Runes() []rune {
return []rune(o.String())
}
func (api *API) dataAsRunes() []rune {
bytes := api.outputData[api.stackFrame.bytesStart:api.stackFrame.bytesEnd]
return []rune(string(bytes))
}
func (o *Output) Rune(offset int) rune {
return o.api.dataRune(offset)
}
func (api *API) dataRune(offset int) rune {
r, _ := utf8.DecodeRune(api.outputData[api.stackFrame.bytesStart+offset:])
func (o Output) Rune(offset int) rune {
a := o.api
r, _ := utf8.DecodeRune(a.outputData[a.stackFrame.bytesStart+offset:])
return r
}
func (o *Output) ClearData() {
o.api.dataClear()
func (o Output) ClearData() {
f := o.api.stackFrame
f.bytesEnd = f.bytesStart
}
func (api *API) dataClear() {
api.stackFrame.bytesEnd = api.stackFrame.bytesStart
}
func (o *Output) SetBytes(bytes ...byte) {
func (o Output) SetBytes(bytes ...byte) {
o.ClearData()
o.api.dataSetBytes(bytes...)
}
func (o *Output) AddByte(b byte) {
func (o Output) AddByte(b byte) {
o.api.dataAddByte(b)
}
func (o *Output) AddBytes(bytes ...byte) {
func (o Output) AddBytes(bytes ...byte) {
o.api.Byte.AppendMulti(bytes...)
}
func (o *Output) SetRunes(runes ...rune) {
o.api.dataSetRunes(runes...)
func (o Output) SetRunes(runes ...rune) {
o.ClearData()
o.AddRunes(runes...)
}
func (api *API) dataSetRunes(runes ...rune) {
api.dataClear()
api.dataAddRunes(runes...)
}
func (o *Output) AddRunes(runes ...rune) {
o.api.dataAddRunes(runes...)
}
func (api *API) dataAddRunes(runes ...rune) {
func (o Output) AddRunes(runes ...rune) {
a := o.api
f := a.stackFrame
runesAsString := string(runes)
newBytesEnd := api.stackFrame.bytesEnd + len(runesAsString)
api.growOutputData(newBytesEnd)
copy(api.outputData[api.stackFrame.bytesEnd:], runesAsString)
api.stackFrame.bytesEnd = newBytesEnd
newBytesEnd := f.bytesEnd + len(runesAsString)
a.growOutputData(newBytesEnd)
copy(a.outputData[f.bytesEnd:], runesAsString)
f.bytesEnd = newBytesEnd
}
func (o *Output) AddString(s string) {
o.api.dataAddString(s)
func (o Output) AddString(s string) {
o.api.Byte.AppendMulti([]byte(s)...)
}
func (api *API) dataAddString(s string) {
api.Byte.AppendMulti([]byte(s)...)
func (o Output) SetString(s string) {
o.ClearData()
o.AddBytes([]byte(s)...)
}
func (o *Output) SetString(s string) {
o.api.dataSetString(s)
func (o Output) Tokens() []Token {
a := o.api
f := a.stackFrame
return a.outputTokens[f.tokenStart:f.tokenEnd]
}
func (api *API) dataSetString(s string) {
api.dataClear()
api.dataSetBytes([]byte(s)...)
func (o Output) Token(offset int) Token {
a := o.api
return a.outputTokens[a.stackFrame.tokenStart+offset]
}
func (o *Output) Tokens() []Token {
return o.api.tokens()
func (o Output) TokenValue(offset int) interface{} {
a := o.api
return a.outputTokens[a.stackFrame.tokenStart+offset].Value
}
func (api *API) tokens() []Token {
return api.outputTokens[api.stackFrame.tokenStart:api.stackFrame.tokenEnd]
func (o Output) ClearTokens() {
f := o.api.stackFrame
f.tokenEnd = f.tokenStart
}
func (o *Output) Token(offset int) Token {
return o.api.token(offset)
func (o Output) SetTokens(tokens ...Token) {
o.ClearTokens()
o.AddTokens(tokens...)
}
func (api *API) token(offset int) Token {
return api.outputTokens[api.stackFrame.tokenStart+offset]
func (o Output) AddToken(token Token) {
a := o.api
f := a.stackFrame
tokenEnd := f.tokenEnd
a.growOutputTokens(tokenEnd + 1)
a.outputTokens[tokenEnd] = token
f.tokenEnd++
}
func (o *Output) TokenValue(offset int) interface{} {
return o.api.tokenValue(offset)
}
func (api *API) tokenValue(offset int) interface{} {
return api.outputTokens[api.stackFrame.tokenStart+offset].Value
}
func (o *Output) ClearTokens() {
o.api.tokensClear()
}
func (api *API) tokensClear() {
api.stackFrame.tokenEnd = api.stackFrame.tokenStart
}
func (o *Output) SetTokens(tokens ...Token) {
o.api.tokensSet(tokens...)
}
func (api *API) tokensSet(tokens ...Token) {
api.tokensClear()
api.tokensAdd(tokens...)
type Func func(input interface{}) (*Result, error)
// Result holds the runes and tokens as produced by the tokenizer.
type Result struct {
Tokens []Token
Runes []rune
func (o Output) AddTokens(tokens ...Token) {
a := o.api
f := a.stackFrame
a.growOutputTokens(f.tokenEnd + len(tokens))
for _, t := range tokens {
a.outputTokens[f.tokenEnd] = t
f.tokenEnd++
}
}
func (o *Output) AddTokens(tokens ...Token) {
o.api.tokensAdd(tokens...)
}
func (api *API) tokensAdd(tokens ...Token) {
// Grow the tokens capacity when needed.
newTokenEnd := api.stackFrame.tokenEnd + len(tokens)
api.growOutputTokens(newTokenEnd)
for offset, t := range tokens {
api.outputTokens[api.stackFrame.tokenEnd+offset] = t
}
api.stackFrame.tokenEnd = newTokenEnd
}
func (api *API) growOutputTokens(requiredTokens int) {
if cap(api.outputTokens) < requiredTokens {
newTokens := make([]Token, requiredTokens*2)

View File

@ -4,12 +4,12 @@ import (
"unicode/utf8"
)
// Input provides input-related functionality for the tokenize API.
type Input struct {
// RuneMode provides (UTF8) rune-driven input/output functionality for the tokenize API.
type RuneMode struct {
api *API
}
// PeekRune returns the UTF8 rune at the provided byte offset, including its byte width.
// Peek returns the UTF8 rune at the provided byte offset, including its byte width.
//
// The byte width is useful to know what byte offset you'll have to use to peek
// the next byte or rune. Some UTF8 runes take up 4 bytes of data, so when the
@ -22,15 +22,12 @@ type Input struct {
// When an error occurs during reading the input, an error will be returned.
// When an offset is requested that is beyond the length of the available input
// data, then the error will be io.EOF.
func (i *Input) PeekRune(offset int) (rune, int, error) {
return i.api.peekRune(offset)
func (runeMode RuneMode) Peek(offset int) (rune, int, error) {
a := runeMode.api
return a.reader.RuneAt(a.stackFrame.offset + offset)
}
func (api *API) peekRune(offset int) (rune, int, error) {
return api.reader.RuneAt(api.stackFrame.offset + offset)
}
// SkipRune is used to skip over a single rune that was read from the input.
// Skip is used to skip over a single rune that was read from the input.
// This tells the tokenizer: "I've seen this rune. It is of no interest.
// I will now continue reading after this rune."
//
@ -40,16 +37,13 @@ func (api *API) peekRune(offset int) (rune, int, error) {
//
// After the call, byte offset 0 for PeekByte() and PeekRune() will point at
// the first byte after the skipped rune.
func (i *Input) SkipRune(r rune) {
i.api.skipRune(r)
func (runeMode *RuneMode) Skip(r rune) {
f := runeMode.api.stackFrame
f.moveCursorByRune(r)
f.offset += utf8.RuneLen(r)
}
func (api *API) skipRune(r rune) {
api.stackFrame.moveCursorByRune(r)
api.stackFrame.offset += utf8.RuneLen(r)
}
// SkipRunes is used to skip over one or more runes that were read from the input.
// SkipMulti is used to skip over one or more runes that were read from the input.
// This tells the tokenizer: "I've seen these runes. They are of no interest.
// I will now continue reading after these runes."
//
@ -59,18 +53,15 @@ func (api *API) skipRune(r rune) {
//
// After the call, byte offset 0 for PeekByte() and PeekRune() will point at
// the first byte after the skipped runes.
func (i *Input) SkipRunes(runes ...rune) {
i.api.skipRunes(runes...)
}
func (api *API) skipRunes(runes ...rune) {
func (runeMode *RuneMode) SkipMulti(runes ...rune) {
f := runeMode.api.stackFrame
for _, r := range runes {
api.stackFrame.moveCursorByRune(r)
api.stackFrame.offset += utf8.RuneLen(r)
f.moveCursorByRune(r)
f.offset += utf8.RuneLen(r)
}
}
// AcceptRune is used to accept a single rune that was read from the input.
// Accept is used to accept a single rune that was read from the input.
// This tells the tokenizer: "I've seen this rune. I want to make use of it
// for the final output, so please remember it for me. I will now continue
// reading after this rune."
@ -81,23 +72,21 @@ func (api *API) skipRunes(runes ...rune) {
//
// After the call, byte offset 0 for PeekByte() and PeekRune() will point at
// the first byte after the accepted rune.
func (i *Input) AcceptRune(r rune) {
i.api.acceptRune(r)
}
func (api *API) acceptRune(r rune) {
curBytesEnd := api.stackFrame.bytesEnd
func (runeMode *RuneMode) Accept(r rune) {
a := runeMode.api
f := a.stackFrame
curBytesEnd := f.bytesEnd
maxRequiredBytes := curBytesEnd + utf8.UTFMax
api.growOutputData(maxRequiredBytes)
w := utf8.EncodeRune(api.outputData[curBytesEnd:], r)
api.stackFrame.bytesEnd += w
api.stackFrame.offset += w
a.growOutputData(maxRequiredBytes)
w := utf8.EncodeRune(a.outputData[curBytesEnd:], r)
f.bytesEnd += w
f.offset += w
api.stackFrame.moveCursorByRune(r)
f.moveCursorByRune(r)
}
// AcceptRunes is used to accept one or more runes that were read from the input.
// AcceptMulti is used to accept one or more runes that were read from the input.
// This tells the tokenizer: "I've seen these runes. I want to make use of them
// for the final output, so please remember them for me. I will now continue
// reading after these runes."
@ -108,22 +97,20 @@ func (api *API) acceptRune(r rune) {
//
// After the call, byte offset 0 for PeekByte() and PeekRune() will point at
// the first byte after the accepted runes.
func (i *Input) AcceptRunes(runes ...rune) {
i.api.acceptRunes(runes...)
}
func (api *API) acceptRunes(runes ...rune) {
func (runeMode *RuneMode) AcceptMulti(runes ...rune) {
a := runeMode.api
f := a.stackFrame
runesAsString := string(runes)
byteLen := len(runesAsString)
curBytesEnd := api.stackFrame.bytesEnd
curBytesEnd := f.bytesEnd
newBytesEnd := curBytesEnd + byteLen
api.growOutputData(newBytesEnd)
copy(api.outputData[curBytesEnd:], runesAsString)
api.stackFrame.bytesEnd = newBytesEnd
api.stackFrame.offset += byteLen
a.growOutputData(newBytesEnd)
copy(a.outputData[curBytesEnd:], runesAsString)
f.bytesEnd = newBytesEnd
f.offset += byteLen
for _, r := range runes {
api.stackFrame.moveCursorByRune(r)
f.moveCursorByRune(r)
}
}

View File

@ -15,10 +15,10 @@ func ExampleNewAPI() {
func ExampleAPI_PeekByte() {
api := tokenize.NewAPI("The input that the API will handle")
r1, _, err := api.Input.PeekRune(19) // 'A',
r2, _, err := api.Input.PeekRune(20) // 'P'
r3, _, err := api.Input.PeekRune(21) // 'I'
_, _, err = api.Input.PeekRune(100) // EOF
r1, _, err := api.Rune.Peek(19) // 'A',
r2, _, err := api.Rune.Peek(20) // 'P'
r3, _, err := api.Rune.Peek(21) // 'I'
_, _, err = api.Rune.Peek(100) // EOF
fmt.Printf("%c%c%c %s\n", r1, r2, r3, err)
@ -29,10 +29,10 @@ func ExampleAPI_PeekByte() {
func ExampleAPI_PeekRune() {
api := tokenize.NewAPI("The input that the ДPI will handle")
r1, _, err := api.Input.PeekRune(19) // 'Д', 2 bytes so next rune starts at 21
r2, _, err := api.Input.PeekRune(21) // 'P'
r3, _, err := api.Input.PeekRune(22) // 'I'
_, _, err = api.Input.PeekRune(100) // EOF
r1, _, err := api.Rune.Peek(19) // 'Д', 2 bytes so next rune starts at 21
r2, _, err := api.Rune.Peek(21) // 'P'
r3, _, err := api.Rune.Peek(22) // 'I'
_, _, err = api.Rune.Peek(100) // EOF
fmt.Printf("%c%c%c %s\n", r1, r2, r3, err)
@ -44,15 +44,15 @@ func ExampleAPI_AcceptRune() {
api := tokenize.NewAPI("The input that the ДPI will handle")
// Reads 'T' and accepts it to the API output data.
r, _, _ := api.Input.PeekRune(0)
api.Input.AcceptRune(r)
r, _, _ := api.Rune.Peek(0)
api.Rune.Accept(r)
// Reads 'h' and accepts it to the API output data.
r, _, _ = api.Input.PeekRune(0)
api.Input.AcceptRune(r)
r, _, _ = api.Rune.Peek(0)
api.Rune.Accept(r)
// Reads 'e', but does not accept it to the API output data.
r, _, _ = api.Input.PeekRune(0)
r, _, _ = api.Rune.Peek(0)
fmt.Printf("API results: %q\n", api.Output.String())
@ -64,14 +64,14 @@ func ExampleAPI_AcceptRunes() {
api := tokenize.NewAPI("The input that the API will handle")
// Peeks at the first two runes 'T' and 'h'.
r0, _, _ := api.Input.PeekRune(0)
r1, _, _ := api.Input.PeekRune(1)
r0, _, _ := api.Rune.Peek(0)
r1, _, _ := api.Rune.Peek(1)
// Peeks at the third rune 'e'.
api.Input.PeekRune(2)
api.Rune.Peek(2)
// Accepts only 'T' and 'h' into the API results.
api.Input.AcceptRunes(r0, r1)
api.Rune.AcceptMulti(r0, r1)
fmt.Printf("API results: %q\n", api.Output.String())
@ -83,7 +83,7 @@ func ExampleAPI_SkipRune() {
api := tokenize.NewAPI("The input that the API will handle")
for {
r, _, err := api.Input.PeekRune(0)
r, _, err := api.Rune.Peek(0)
// EOF reached.
if err != nil {
@ -92,9 +92,9 @@ func ExampleAPI_SkipRune() {
// Only accept runes that are vowels.
if strings.ContainsRune("aeiouAEIOU", r) {
api.Input.AcceptRune(r)
api.Rune.Accept(r)
} else {
api.Input.SkipRune(r)
api.Rune.Skip(r)
}
}
@ -122,10 +122,10 @@ func ExampleAPI_modifyingResults() {
fmt.Printf("API result runes: %q\n", api.Output.Runes())
fmt.Printf("API third rune: %q\n", api.Output.Rune(2))
api.Output.AddTokens(tokenize.Token{
api.Output.AddToken(tokenize.Token{
Type: 42,
Value: "towel"})
api.Output.AddTokens(tokenize.Token{
api.Output.AddToken(tokenize.Token{
Type: 73,
Value: "Zaphod"})
fmt.Printf("API result tokens: %v\n", api.Output.Tokens())
@ -144,10 +144,10 @@ func ExampleAPI_modifyingResults() {
func ExampleAPI_Reset() {
api := tokenize.NewAPI("Very important input!")
r, _, _ := api.Input.PeekRune(0) // read 'V'
api.Input.AcceptRune(r)
r, _, _ = api.Input.PeekRune(0) // read 'e'
api.Input.AcceptRune(r)
r, _, _ := api.Rune.Peek(0) // read 'V'
api.Rune.Accept(r)
r, _, _ = api.Rune.Peek(0) // read 'e'
api.Rune.Accept(r)
fmt.Printf("API results: %q at %s\n", api.Output.String(), api.Cursor())
// Reset clears the results.
@ -155,10 +155,10 @@ func ExampleAPI_Reset() {
fmt.Printf("API results: %q at %s\n", api.Output.String(), api.Cursor())
// So then doing the same read operations, the same data are read.
r, _, _ = api.Input.PeekRune(0) // read 'V'
api.Input.AcceptRune(r)
r, _, _ = api.Input.PeekRune(0) // read 'e'
api.Input.AcceptRune(r)
r, _, _ = api.Rune.Peek(0) // read 'V'
api.Rune.Accept(r)
r, _, _ = api.Rune.Peek(0) // read 'e'
api.Rune.Accept(r)
fmt.Printf("API results: %q at %s\n", api.Output.String(), api.Cursor())
// Output:
@ -210,15 +210,15 @@ func ExampleAPI_Fork() {
func ExampleAPI_Merge() {
tokenHandler := func(t *tokenize.API) bool {
child1 := t.Fork()
r0, _, _ := t.Input.PeekRune(0) // reads 'H'
r1, _, _ := t.Input.PeekRune(1) // reads 'i'
t.Input.AcceptRunes(r0, r1) // these runes are accepted in the API results for child1
r0, _, _ := t.Rune.Peek(0) // reads 'H'
r1, _, _ := t.Rune.Peek(1) // reads 'i'
t.Rune.AcceptMulti(r0, r1) // these runes are accepted in the API results for child1
child2 := t.Fork()
r0, _, _ = t.Input.PeekRune(0) // reads ' '
r1, _, _ = t.Input.PeekRune(1) // reads 'm'
t.Input.AcceptRunes(r0, r1) // these runes are accepted in the API results for child2
t.Dispose(child2) // but they are not merged and thefore not used by child1
r0, _, _ = t.Rune.Peek(0) // reads ' '
r1, _, _ = t.Rune.Peek(1) // reads 'm'
t.Rune.AcceptMulti(r0, r1) // these runes are accepted in the API results for child2
t.Dispose(child2) // but they are not merged and thefore not used by child1
t.Merge(child1) // We merge child1, which has read 'H' and 'i' only.
t.Dispose(child1) // and clean up child1 to return to the parent
@ -242,15 +242,15 @@ func TestMultipleLevelsOfForksAndMerges(t *testing.T) {
child4 := api.Fork()
// Read a rune 'a' from child4.
r, _, _ := api.Input.PeekRune(0)
r, _, _ := api.Rune.Peek(0)
AssertEqual(t, 'a', r, "child4 rune 1")
api.Input.AcceptRune(r)
api.Rune.Accept(r)
AssertEqual(t, "a", api.Output.String(), "child4 runes after rune 1")
// Read another rune 'b' from child4.
r, _, _ = api.Input.PeekRune(0)
r, _, _ = api.Rune.Peek(0)
AssertEqual(t, 'b', r, "child4 rune 2")
api.Input.AcceptRune(r)
api.Rune.Accept(r)
AssertEqual(t, "ab", api.Output.String(), "child4 runes after rune 2")
// Merge "ab" from child4 to child3.
@ -258,9 +258,9 @@ func TestMultipleLevelsOfForksAndMerges(t *testing.T) {
AssertEqual(t, "", api.Output.String(), "child4 runes after first merge")
// Read some more from child4.
r, _, _ = api.Input.PeekRune(0)
r, _, _ = api.Rune.Peek(0)
AssertEqual(t, 'c', r, "child4 rune 3")
api.Input.AcceptRune(r)
api.Rune.Accept(r)
AssertEqual(t, "c", api.Output.String(), "child4 runes after rune 1")
AssertEqual(t, "line 1, column 4", api.Cursor(), "cursor child4 rune 3")
@ -275,29 +275,29 @@ func TestMultipleLevelsOfForksAndMerges(t *testing.T) {
AssertEqual(t, "line 1, column 4", api.Cursor(), "cursor child3 rune 3, after merge of child4")
// Now read some data from child3.
r, _, _ = api.Input.PeekRune(0)
r, _, _ = api.Rune.Peek(0)
AssertEqual(t, 'd', r, "child3 rune 5")
api.Input.AcceptRune(r)
api.Rune.Accept(r)
r, _, _ = api.Input.PeekRune(0)
r, _, _ = api.Rune.Peek(0)
AssertEqual(t, 'e', r, "child3 rune 5")
api.Input.AcceptRune(r)
api.Rune.Accept(r)
r, _, _ = api.Input.PeekRune(0)
r, _, _ = api.Rune.Peek(0)
AssertEqual(t, 'f', r, "child3 rune 5")
api.Input.AcceptRune(r)
api.Rune.Accept(r)
AssertEqual(t, "abcdef", api.Output.String(), "child3 total result after rune 6")
// Temporarily go some new forks from here, but don't use their outcome.
child3sub1 := api.Fork()
r, _, _ = api.Input.PeekRune(0)
api.Input.AcceptRune(r)
r, _, _ = api.Input.PeekRune(0)
api.Input.AcceptRune(r)
r, _, _ = api.Rune.Peek(0)
api.Rune.Accept(r)
r, _, _ = api.Rune.Peek(0)
api.Rune.Accept(r)
child3sub2 := api.Fork()
r, _, _ = api.Input.PeekRune(0)
api.Input.AcceptRune(r)
r, _, _ = api.Rune.Peek(0)
api.Rune.Accept(r)
api.Merge(child3sub2) // do merge sub2 down to sub1
api.Dispose(child3sub2) // and dispose of sub2
api.Dispose(child3sub1) // but dispose of sub1 without merging
@ -324,8 +324,8 @@ func TestMultipleLevelsOfForksAndMerges(t *testing.T) {
api.Dispose(child1)
// Read some data from the top level api.
r, _, _ = api.Input.PeekRune(0)
api.Input.AcceptRune(r)
r, _, _ = api.Rune.Peek(0)
api.Rune.Accept(r)
AssertEqual(t, "abcdefg", api.Output.String(), "api string end result")
AssertEqual(t, "line 1, column 8", api.Cursor(), "api cursor end result")
@ -333,15 +333,15 @@ func TestMultipleLevelsOfForksAndMerges(t *testing.T) {
func TestClearData(t *testing.T) {
api := tokenize.NewAPI("Laphroaig")
r, _, _ := api.Input.PeekRune(0) // Read 'L'
api.Input.AcceptRune(r) // Add to runes
r, _, _ = api.Input.PeekRune(0) // Read 'a'
api.Input.AcceptRune(r) // Add to runes
api.Output.ClearData() // Clear the runes, giving us a fresh start.
r, _, _ = api.Input.PeekRune(0) // Read 'p'
api.Input.AcceptRune(r) // Add to runes
r, _, _ = api.Input.PeekRune(0) // Read 'r'
api.Input.AcceptRune(r) // Add to runes
r, _, _ := api.Rune.Peek(0) // Read 'L'
api.Rune.Accept(r) // Add to runes
r, _, _ = api.Rune.Peek(0) // Read 'a'
api.Rune.Accept(r) // Add to runes
api.Output.ClearData() // Clear the runes, giving us a fresh start.
r, _, _ = api.Rune.Peek(0) // Read 'p'
api.Rune.Accept(r) // Add to runes
r, _, _ = api.Rune.Peek(0) // Read 'r'
api.Rune.Accept(r) // Add to runes
AssertEqual(t, "ph", api.Output.String(), "api string end result")
}
@ -363,7 +363,7 @@ func TestMergeScenariosForTokens(t *testing.T) {
tokens = api.Output.Tokens()
AssertEqual(t, 0, len(tokens), "Tokens 2")
api.Output.AddTokens(token2)
api.Output.AddToken(token2)
api.Merge(child)
api.Dispose(child)
@ -372,9 +372,9 @@ func TestMergeScenariosForTokens(t *testing.T) {
AssertEqual(t, 2, len(tokens), "Tokens 3")
child = api.Fork()
api.Output.AddTokens(token3)
api.Output.AddToken(token3)
api.Reset()
api.Output.AddTokens(token4)
api.Output.AddToken(token4)
api.Merge(child)
api.Dispose(child)

View File

@ -365,9 +365,9 @@ func MatchRune(expected rune) Handler {
return MatchByte(byte(expected))
}
return func(t *API) bool {
r, _, err := t.peekRune(0)
r, _, err := t.Rune.Peek(0)
if err == nil && r == expected {
t.acceptRune(r)
t.Rune.Accept(r)
return true
}
return false
@ -408,13 +408,13 @@ func MatchRunes(expected ...rune) Handler {
return MatchBytes(expectedBytes...)
}
return func(t *API) bool {
r, _, err := t.peekRune(0)
r, _, err := t.Rune.Peek(0)
if err != nil {
return false
}
for _, e := range expected {
if r == e {
t.acceptRune(r)
t.Rune.Accept(r)
return true
}
}
@ -458,9 +458,9 @@ func MatchRuneRange(start rune, end rune) Handler {
return MatchByteRange(byte(start), byte(end))
}
return func(t *API) bool {
r, _, err := t.peekRune(0)
r, _, err := t.Rune.Peek(0)
if err == nil && r >= start && r <= end {
t.acceptRune(r)
t.Rune.Accept(r)
return true
}
return false
@ -605,9 +605,9 @@ func MatchByteByCallback(callback func(byte) bool) Handler {
// so those can be used. E.g. MatchRuneByCallback(unicode.IsLower).
func MatchRuneByCallback(callback func(rune) bool) Handler {
return func(t *API) bool {
r, _, err := t.peekRune(0)
r, _, err := t.Rune.Peek(0)
if err == nil && callback(r) {
t.acceptRune(r)
t.Rune.Accept(r)
return true
}
return false
@ -650,14 +650,14 @@ func MatchStr(expected string) Handler {
}
offset++
} else {
r, w, err := t.peekRune(offset)
r, w, err := t.Rune.Peek(offset)
if err != nil || e != r {
return false
}
offset += w
}
}
t.acceptRunes(expectedRunes...)
t.Rune.AcceptMulti(expectedRunes...)
return true
}
}
@ -680,7 +680,7 @@ func MatchStrNoCase(expected string) Handler {
matches[i] = rune(b)
width++
} else {
r, w, err := t.peekRune(width)
r, w, err := t.Rune.Peek(width)
if err != nil || (r != e && unicode.ToUpper(r) != unicode.ToUpper(e)) {
return false
}
@ -689,7 +689,7 @@ func MatchStrNoCase(expected string) Handler {
}
i++
}
t.acceptRunes(matches...)
t.Rune.AcceptMulti(matches...)
return true
}
}
@ -762,9 +762,9 @@ func MatchNot(handler Handler) Handler {
return false
}
t.Dispose(child)
r, _, err := t.peekRune(0)
r, _, err := t.Rune.Peek(0)
if err == nil {
t.acceptRune(r)
t.Rune.Accept(r)
return true
}
return false
@ -988,7 +988,7 @@ func MatchIntegerBetween(min int64, max int64) Handler {
if !digits(t) {
return false
}
value, _ := strconv.ParseInt(t.dataAsString(), 10, 64)
value, _ := strconv.ParseInt(t.Output.String(), 10, 64)
if value < min || value > max {
return false
}
@ -1032,9 +1032,9 @@ func MatchAnyByte() Handler {
// replacement rune \uFFFD (i.e. utf8.RuneError), which displays as <20>.
func MatchAnyRune() Handler {
return func(t *API) bool {
r, _, err := t.peekRune(0)
r, _, err := t.Rune.Peek(0)
if err == nil {
t.acceptRune(r)
t.Rune.Accept(r)
return true
}
return false
@ -1045,9 +1045,9 @@ func MatchAnyRune() Handler {
// UTF8 rune can be read from the input.
func MatchValidRune() Handler {
return func(t *API) bool {
r, _, err := t.peekRune(0)
r, _, err := t.Rune.Peek(0)
if err == nil && r != utf8.RuneError {
t.acceptRune(r)
t.Rune.Accept(r)
return true
}
return false
@ -1058,9 +1058,9 @@ func MatchValidRune() Handler {
// UTF8 rune can be read from the input.
func MatchInvalidRune() Handler {
return func(t *API) bool {
r, _, err := t.peekRune(0)
r, _, err := t.Rune.Peek(0)
if err == nil && r == utf8.RuneError {
t.acceptRune(r)
t.Rune.Accept(r)
return true
}
return false
@ -1427,13 +1427,14 @@ func MatchIPv4Netmask(normalize bool) Handler {
}
// Check if the mask is provided in canonical form (at the binary level, ones followed by zeroes).
mask := net.IPv4Mask(t.tokenValue(0).(byte), t.tokenValue(1).(byte), t.tokenValue(2).(byte), t.tokenValue(3).(byte))
val := t.Output.TokenValue
mask := net.IPv4Mask(val(0).(byte), val(1).(byte), val(2).(byte), val(3).(byte))
ones, bits := mask.Size()
if ones == 0 && bits == 0 {
return false
}
t.tokensClear()
t.Output.ClearTokens()
return true
}
}
@ -1462,18 +1463,19 @@ func MatchIPv4Net(normalize bool) Handler {
return true
}
maskToken := t.token(1)
maskToken := t.Output.Token(1)
val := t.Output.TokenValue
if maskToken.Type == "cidr" {
t.dataSetString(fmt.Sprintf("%s/%d", t.tokenValue(0), t.tokenValue(1).(uint8)))
t.Output.SetString(fmt.Sprintf("%s/%d", val(0), val(1).(uint8)))
} else {
o := strings.Split(t.tokenValue(1).(string), ".")
o := strings.Split(val(1).(string), ".")
b := func(idx int) byte { i, _ := strconv.Atoi(o[idx]); return byte(i) }
mask := net.IPv4Mask(b(0), b(1), b(2), b(3))
bits, _ := mask.Size()
t.dataSetString(fmt.Sprintf("%s/%d", t.tokenValue(0), bits))
t.Output.SetString(fmt.Sprintf("%s/%d", val(0), bits))
}
t.tokensClear()
t.Output.ClearTokens()
return true
}
}
@ -1502,13 +1504,13 @@ func MatchIPv6(normalize bool) Handler {
}
// Invalid IPv6, when net.ParseIP() cannot handle it.
parsed := net.ParseIP(t.dataAsString())
parsed := net.ParseIP(t.Output.String())
if parsed == nil {
return false
}
if normalize {
t.dataSetString(parsed.String())
t.Output.SetString(parsed.String())
}
return true
}
@ -1531,8 +1533,8 @@ func matchCIDRMask(bits int64, normalize bool) Handler {
if !mask(t) {
return false
}
bits, _ := strconv.Atoi(t.dataAsString())
t.dataSetString(fmt.Sprintf("%d", bits))
bits, _ := strconv.Atoi(t.Output.String())
t.Output.SetString(fmt.Sprintf("%d", bits))
return true
}
}
@ -1673,8 +1675,11 @@ func ModifyByCallback(handler Handler, modfunc func(string) string) Handler {
return func(t *API) bool {
child := t.Fork()
if handler(t) {
s := modfunc(t.dataAsString())
t.dataSetString(s)
origS := t.Output.String()
s := modfunc(origS)
if s != origS {
t.Output.SetString(s)
}
t.Merge(child)
t.Dispose(child)
return true
@ -1691,7 +1696,7 @@ func ModifyByCallback(handler Handler, modfunc func(string) string) Handler {
// an 'n'-character).
func MakeStrLiteralToken(toktype interface{}, handler Handler) Handler {
return MakeTokenByCallback(toktype, handler, func(t *API) interface{} {
literal := t.dataAsString()
literal := t.Output.String()
return literal
})
}
@ -1703,7 +1708,7 @@ func MakeStrLiteralToken(toktype interface{}, handler Handler) Handler {
func MakeStrInterpretedToken(toktype interface{}, handler Handler) Handler {
return MakeTokenByCallback(toktype, handler, func(t *API) interface{} {
// TODO ERROR HANDLING
interpreted, _ := interpretString(t.dataAsString())
interpreted, _ := interpretString(t.Output.String())
return interpreted
})
}
@ -1727,7 +1732,7 @@ func interpretString(str string) (string, error) {
func MakeRuneToken(toktype interface{}, handler Handler) Handler {
return MakeTokenByCallback(toktype, handler, func(t *API) interface{} {
// TODO ERROR HANDLING --- not a 1 rune input
return t.dataRune(0)
return t.Output.Rune(0)
})
}
@ -1737,7 +1742,7 @@ func MakeRuneToken(toktype interface{}, handler Handler) Handler {
func MakeByteToken(toktype interface{}, handler Handler) Handler {
return MakeTokenByCallback(toktype, handler, func(t *API) interface{} {
// TODO ERROR HANDLING --- not a 1 byte input
return byte(t.dataRune(0))
return byte(t.Output.Rune(0))
})
}
@ -1942,7 +1947,7 @@ func MakeBooleanToken(toktype interface{}, handler Handler) Handler {
func makeStrconvToken(name string, toktype interface{}, handler Handler, convert func(s string) (interface{}, error)) Handler {
return MakeTokenByCallback(toktype, handler, func(t *API) interface{} {
value, err := convert(t.dataAsString())
value, err := convert(t.Output.String())
if err != nil {
// TODO meh, panic feels so bad here. Maybe just turn this case into "no match"?
panic(fmt.Sprintf("%s token invalid (%s)", name, err))
@ -1973,7 +1978,7 @@ func MakeTokenByCallback(toktype interface{}, handler Handler, makeValue func(t
// tokens will end up in the order "date", "year", "month", "day". When we'd add the
// token to the child here, the order would have been "year", "month", "day", "date".
token := Token{Type: toktype, Value: makeValue(t)}
t.tokensAdd(token)
t.Output.AddToken(token)
t.Merge(child)
t.Dispose(child)
@ -1990,10 +1995,10 @@ func MakeTokenGroup(toktype interface{}, handler Handler) Handler {
return func(t *API) bool {
child := t.Fork()
if handler(t) {
tokens := t.tokens()
tokens := t.Output.Tokens()
tokensCopy := make([]Token, len(tokens))
copy(tokensCopy, tokens)
t.tokensSet(Token{Type: toktype, Value: tokensCopy})
t.Output.SetTokens(Token{Type: toktype, Value: tokensCopy})
t.Merge(child)
t.Dispose(child)
return true

View File

@ -7,6 +7,7 @@ import (
tokenize "git.makaay.nl/mauricem/go-parsekit/tokenize"
)
// TODO cleanup when I'm finished with these.
func TestCombinatorsTempDebug(t *testing.T) {
var a = tokenize.A
AssertHandlers(t, []HandlerT{

View File

@ -47,8 +47,8 @@ func New(tokenHandler Handler) Func {
return nil, err
}
result := &Result{
Runes: api.dataAsRunes(),
Tokens: api.tokens(),
Runes: api.Output.Runes(),
Tokens: api.Output.Tokens(),
}
return result, nil
}

View File

@ -55,19 +55,19 @@ func ExampleNew() {
func TestCallingPeekRune_PeeksRuneOnInput(t *testing.T) {
api := makeTokenizeAPI()
r, _, _ := api.Input.PeekRune(0)
r, _, _ := api.Rune.Peek(0)
AssertEqual(t, 'T', r, "first rune")
}
func TestInputCanAcceptRunesFromReader(t *testing.T) {
i := makeTokenizeAPI()
r0, _, _ := i.Input.PeekRune(0)
i.Input.AcceptRune(r0)
r0, _, _ := i.Rune.Peek(0)
i.Rune.Accept(r0)
r1, _, _ := i.Input.PeekRune(0) // 0, because read offset resets to 0 after Accept* calls.
r2, _, _ := i.Input.PeekRune(1)
i.Input.AcceptRunes(r1, r2)
r1, _, _ := i.Rune.Peek(0) // 0, because read offset resets to 0 after Accept* calls.
r2, _, _ := i.Rune.Peek(1)
i.Rune.AcceptMulti(r1, r2)
AssertEqual(t, "Tes", i.Output.String(), "i.String()")
}
@ -136,13 +136,13 @@ func TestAccept_UpdatesCursor(t *testing.T) {
i := tokenize.NewAPI(strings.NewReader("input\r\nwith\r\nnewlines"))
AssertEqual(t, "start of file", i.Cursor(), "cursor 1")
for j := 0; j < 6; j++ { // read "input\r", cursor end up at "\n"
r, _, _ := i.Input.PeekRune(0)
i.Input.AcceptRune(r)
r, _, _ := i.Rune.Peek(0)
i.Rune.Accept(r)
}
AssertEqual(t, "line 1, column 7", i.Cursor(), "cursor 2")
r, _, _ := i.Input.PeekRune(0) // read "\n", cursor ends up at start of new line
i.Input.AcceptRune(r)
r, _, _ := i.Rune.Peek(0) // read "\n", cursor ends up at start of new line
i.Rune.Accept(r)
AssertEqual(t, "line 2, column 1", i.Cursor(), "cursor 3")
for j := 0; j < 10; j++ { // read "with\r\nnewl", cursor end up at "i"
@ -154,9 +154,9 @@ func TestAccept_UpdatesCursor(t *testing.T) {
func TestWhenCallingPeekruneAtEndOfFile_EOFIsReturned(t *testing.T) {
i := tokenize.NewAPI(strings.NewReader("X"))
r, _, _ := i.Input.PeekRune(0)
i.Input.AcceptRune(r)
r, _, err := i.Input.PeekRune(0)
r, _, _ := i.Rune.Peek(0)
i.Rune.Accept(r)
r, _, err := i.Rune.Peek(0)
AssertEqual(t, true, r == utf8.RuneError, "returned rune from NextRune()")
AssertEqual(t, true, err == io.EOF, "returned error from NextRune()")
@ -167,9 +167,9 @@ func TestAfterReadingruneAtEndOfFile_EarlierRunesCanStillBeAccessed(t *testing.T
child := i.Fork()
// To to the EOF.
r, _, _ := i.Input.PeekRune(0)
i.Input.AcceptRune(r)
r, _, err := i.Input.PeekRune(0)
r, _, _ := i.Rune.Peek(0)
i.Rune.Accept(r)
r, _, err := i.Rune.Peek(0)
AssertEqual(t, true, r == utf8.RuneError, "returned rune from 2nd NextRune()")
AssertEqual(t, true, err == io.EOF, "returned error from 2nd NextRune()")
@ -177,7 +177,7 @@ func TestAfterReadingruneAtEndOfFile_EarlierRunesCanStillBeAccessed(t *testing.T
i.Dispose(child)
// So here we should see the same input data as before.
r, _, err = i.Input.PeekRune(0)
r, _, err = i.Rune.Peek(0)
AssertEqual(t, 'X', r, "returned rune from 2nd NextRune()")
AssertEqual(t, true, err == nil, "returned error from 2nd NextRune()")
}

View File

@ -7,9 +7,9 @@ import (
func TestFork_CreatesForkOfInputAtSameCursorPosition(t *testing.T) {
// Create input, accept the first rune.
i := NewAPI("Testing")
r, _, _ := i.Input.PeekRune(0)
i.Input.AcceptRune(r) // T
AssertEqual(t, "T", i.dataAsString(), "accepted rune in input")
r, _, _ := i.Rune.Peek(0)
i.Rune.Accept(r) // T
AssertEqual(t, "T", i.Output.String(), "accepted rune in input")
// Fork
child := i.Fork()
@ -17,44 +17,44 @@ func TestFork_CreatesForkOfInputAtSameCursorPosition(t *testing.T) {
AssertEqual(t, 1, i.stackFrame.offset, "child offset")
// Accept two runes via fork.
r, _, _ = i.Input.PeekRune(0)
i.Input.AcceptRune(r) // e
r, _, _ = i.Input.PeekRune(0)
i.Input.AcceptRune(r) // s
AssertEqual(t, "es", i.dataAsString(), "result runes in fork")
r, _, _ = i.Rune.Peek(0)
i.Rune.Accept(r) // e
r, _, _ = i.Rune.Peek(0)
i.Rune.Accept(r) // s
AssertEqual(t, "es", i.Output.String(), "result runes in fork")
AssertEqual(t, 1, i.stackFrames[i.stackLevel-1].offset, "parent offset")
AssertEqual(t, 3, i.stackFrame.offset, "child offset")
// Merge fork back into parent
i.Merge(child)
i.Dispose(child)
AssertEqual(t, "Tes", i.dataAsString(), "result runes in parent Input after Merge()")
AssertEqual(t, "Tes", i.Output.String(), "result runes in parent Input after Merge()")
AssertEqual(t, 3, i.stackFrame.offset, "parent offset")
}
func TestGivenForkedChildWhichAcceptedRune_AfterMerging_RuneEndsUpInParentResult(t *testing.T) {
i := NewAPI("Testing")
r, _, _ := i.Input.PeekRune(0)
i.Input.AcceptRune(r) // T
r, _, _ := i.Rune.Peek(0)
i.Rune.Accept(r) // T
f1 := i.Fork()
r, _, _ = i.Input.PeekRune(0)
i.Input.AcceptRune(r) // e
r, _, _ = i.Rune.Peek(0)
i.Rune.Accept(r) // e
f2 := i.Fork()
r, _, _ = i.Input.PeekRune(0)
i.Input.AcceptRune(r) // s
AssertEqual(t, "s", i.dataAsString(), "f2 String()")
r, _, _ = i.Rune.Peek(0)
i.Rune.Accept(r) // s
AssertEqual(t, "s", i.Output.String(), "f2 String()")
AssertEqual(t, 3, i.stackFrame.offset, "f2.offset A")
i.Merge(f2)
i.Dispose(f2)
AssertEqual(t, "es", i.dataAsString(), "f1 String()")
AssertEqual(t, "es", i.Output.String(), "f1 String()")
AssertEqual(t, 3, i.stackFrame.offset, "f1.offset A")
i.Merge(f1)
i.Dispose(f1)
AssertEqual(t, "Tes", i.dataAsString(), "top-level API String()")
AssertEqual(t, "Tes", i.Output.String(), "top-level API String()")
AssertEqual(t, 3, i.stackFrame.offset, "f1.offset A")
}
@ -65,10 +65,10 @@ func TestFlushInput(t *testing.T) {
// false in this case, and nothing else happens.
AssertTrue(t, i.FlushInput() == false, "flush input at start")
r, _, _ := i.Input.PeekRune(0)
i.Input.AcceptRune(r) // c
r, _, _ = i.Input.PeekRune(0)
i.Input.AcceptRune(r) // o
r, _, _ := i.Rune.Peek(0)
i.Rune.Accept(r) // c
r, _, _ = i.Rune.Peek(0)
i.Rune.Accept(r) // o
AssertTrue(t, i.FlushInput() == true, "flush input after reading some data")
AssertEqual(t, 0, i.stackFrame.offset, "offset after flush input")
@ -78,12 +78,12 @@ func TestFlushInput(t *testing.T) {
// Read offset is now zero, but reading should continue after "co".
// The output so far isn't modified, so the following accept calls
// will add their runes to the already accepted string "co".
r, _, _ = i.Input.PeekRune(0)
i.Input.AcceptRune(r) // o
r, _, _ = i.Input.PeekRune(0)
i.Input.AcceptRune(r) // o
r, _, _ = i.Rune.Peek(0)
i.Rune.Accept(r) // o
r, _, _ = i.Rune.Peek(0)
i.Rune.Accept(r) // o
AssertEqual(t, "cool", i.dataAsString(), "end result")
AssertEqual(t, "cool", i.Output.String(), "end result")
}
func TestInputFlusherWrapper(t *testing.T) {
@ -92,19 +92,19 @@ func TestInputFlusherWrapper(t *testing.T) {
api := NewAPI("abaab")
runeA(api)
AssertEqual(t, 1, api.stackFrame.offset, "offset after 1 read")
AssertEqual(t, "a", api.dataAsString(), "runes after 1 read")
AssertEqual(t, "a", api.Output.String(), "runes after 1 read")
flushB(api)
AssertEqual(t, 0, api.stackFrame.offset, "offset after 2 reads + input flush")
AssertEqual(t, "ab", api.dataAsString(), "runes after 2 reads")
AssertEqual(t, "ab", api.Output.String(), "runes after 2 reads")
runeA(api)
AssertEqual(t, 1, api.stackFrame.offset, "offset after 3 reads")
AssertEqual(t, "aba", api.dataAsString(), "runes after 3 reads")
AssertEqual(t, "aba", api.Output.String(), "runes after 3 reads")
runeA(api)
AssertEqual(t, 2, api.stackFrame.offset, "offset after 4 reads")
AssertEqual(t, "abaa", api.dataAsString(), "runes after 4 reads")
AssertEqual(t, "abaa", api.Output.String(), "runes after 4 reads")
flushB(api)
AssertEqual(t, 0, api.stackFrame.offset, "offset after 5 reads + input flush")
AssertEqual(t, "abaab", api.dataAsString(), "runes after 5 reads")
AssertEqual(t, "abaab", api.Output.String(), "runes after 5 reads")
}
func AssertEqual(t *testing.T, expected interface{}, actual interface{}, forWhat string) {