Adding documentation and getting the interactions between ParseAPI and TokenAPI cleaned up a bit.

This commit is contained in:
Maurice Makaay 2019-06-07 07:26:41 +00:00
parent c0389283bd
commit 3094b09284
10 changed files with 438 additions and 298 deletions

View File

@ -8,12 +8,12 @@ import (
// ParseAPI holds the internal state of a parse run and provides an API to // ParseAPI holds the internal state of a parse run and provides an API to
// ParseHandler methods to communicate with the parser. // ParseHandler methods to communicate with the parser.
type ParseAPI struct { type ParseAPI struct {
tokenAPI *TokenAPI // the input reader tokenAPI *TokenAPI // the input reader
loopCheck map[string]bool // used for parser loop detection loopCheck map[string]bool // used for parser loop detection
expecting string // a description of what the current state expects to find (see Expects()) expecting string // a description of what the current state expects to find (see Expects())
result *TokenResult // Last TokenHandler result as retrieved by On(...).Accept() result *TokenHandlerResult // Last TokenHandler result as produced by On(...).Accept()
err *Error // error during parsing, retrieved by Error(), further ParseAPI calls are ignored err *Error // error during parsing, retrieved by Error(), further ParseAPI calls are ignored
stopped bool // a boolean set to true by Stop(), further ParseAPI calls are ignored stopped bool // a boolean set to true by Stop(), further ParseAPI calls are ignored
} }
// panicWhenStoppedOrInError will panic when the parser has produced an error // panicWhenStoppedOrInError will panic when the parser has produced an error
@ -99,13 +99,13 @@ func (p *ParseAPI) On(tokenHandler TokenHandler) *ParseAPIOnAction {
} }
p.result = nil p.result = nil
p.tokenAPI.result = newTokenResult() p.tokenAPI.clearResults()
fork := p.tokenAPI.Fork() child := p.tokenAPI.Fork()
ok := tokenHandler(fork) ok := tokenHandler(child)
return &ParseAPIOnAction{ return &ParseAPIOnAction{
parseAPI: p, parseAPI: p,
tokenAPI: fork, tokenAPI: child,
ok: ok, ok: ok,
} }
} }
@ -119,7 +119,7 @@ type ParseAPIOnAction struct {
} }
// Accept tells the parser to move the read cursor past a match that was // Accept tells the parser to move the read cursor past a match that was
// found, and to make the TokenResult from the TokenAPI available in the // found, and to make the TokenHandlerResult from the TokenAPI available in the
// ParseAPI through the ParseAPI.Result() method. // ParseAPI through the ParseAPI.Result() method.
// //
// Returns true in case a match was found. // Returns true in case a match was found.
@ -127,10 +127,8 @@ type ParseAPIOnAction struct {
func (a *ParseAPIOnAction) Accept() bool { func (a *ParseAPIOnAction) Accept() bool {
if a.ok { if a.ok {
a.tokenAPI.Merge() a.tokenAPI.Merge()
a.flushReader()
a.parseAPI.result = a.tokenAPI.root.result a.parseAPI.result = a.tokenAPI.root.result
a.flushTokenAPI()
a.flushReader() //a.flush()
} }
return a.ok return a.ok
} }
@ -147,9 +145,9 @@ func (a *ParseAPIOnAction) Accept() bool {
// When no match was found, then no action is taken and false is returned. // When no match was found, then no action is taken and false is returned.
func (a *ParseAPIOnAction) Skip() bool { func (a *ParseAPIOnAction) Skip() bool {
if a.ok { if a.ok {
a.tokenAPI.root.cursor = a.tokenAPI.cursor a.tokenAPI.syncCursor(a.tokenAPI.root)
a.parseAPI.result = nil a.tokenAPI.clearResults()
a.flushTokenAPI() a.tokenAPI.detachChilds()
a.flushReader() a.flushReader()
} }
return a.ok return a.ok
@ -166,30 +164,26 @@ func (a *ParseAPIOnAction) Skip() bool {
func (a *ParseAPIOnAction) Stay() bool { func (a *ParseAPIOnAction) Stay() bool {
if a.ok { if a.ok {
a.parseAPI.result = nil a.parseAPI.result = nil
a.flushTokenAPI() a.tokenAPI.clearResults()
a.tokenAPI.detachChilds()
} }
return a.ok return a.ok
} }
func (a *ParseAPIOnAction) flushTokenAPI() {
a.tokenAPI.root.result = newTokenResult()
a.tokenAPI.root.detachChilds()
}
func (a *ParseAPIOnAction) flushReader() { func (a *ParseAPIOnAction) flushReader() {
if a.tokenAPI.offset > 0 { if a.tokenAPI.result.offset > 0 {
a.tokenAPI.root.reader.flush(a.tokenAPI.offset) a.tokenAPI.root.reader.flush(a.tokenAPI.root.result.offset)
a.tokenAPI.root.offset = 0 a.tokenAPI.root.result.offset = 0
a.parseAPI.initLoopCheck() a.parseAPI.initLoopCheck()
} }
} }
// Result returns a TokenResult struct, containing results as produced by the // Result returns a TokenHandlerResult struct, containing results as produced by the
// last ParseAPI.On().Accept() call. // last ParseAPI.On().Accept() call.
func (p *ParseAPI) Result() *TokenResult { func (p *ParseAPI) Result() *TokenHandlerResult {
result := p.result result := p.result
if p.result == nil { if p.result == nil {
callerPanic(1, "parsekit.ParseAPI.TokenResult(): TokenResult() called "+ callerPanic(1, "parsekit.ParseAPI.TokenHandlerResult(): TokenHandlerResult() called "+
"at {caller} without calling ParseAPI.Accept() on beforehand") "at {caller} without calling ParseAPI.Accept() on beforehand")
} }
return result return result
@ -251,7 +245,7 @@ func (p *ParseAPI) Error(format string, args ...interface{}) {
// No call to p.panicWhenStoppedOrInError(), to allow a parser to // No call to p.panicWhenStoppedOrInError(), to allow a parser to
// set a different error message when needed. // set a different error message when needed.
message := fmt.Sprintf(format, args...) message := fmt.Sprintf(format, args...)
p.err = &Error{message, p.tokenAPI.Cursor()} p.err = &Error{message, *p.tokenAPI.result.cursor}
} }
// ExpectEndOfFile can be used to check if the input is at end of file. // ExpectEndOfFile can be used to check if the input is at end of file.

View File

@ -41,7 +41,7 @@ func ExampleParser_usingTokens() {
// Output: // Output:
// Runes accepted: "¡ök!" // Runes accepted: "¡ök!"
// Token values: RUNE(int32:161) RUNE(int32:246) RUNE(int32:107) RUNE(int32:33) // Token values: RUNE("¡", value = (int32)161) RUNE("ö", value = (int32)246) RUNE("k", value = (int32)107) RUNE("!", value = (int32)33)
} }
func ExampleParseAPI_UnexpectedInput() { func ExampleParseAPI_UnexpectedInput() {
@ -244,7 +244,7 @@ func TestGivenParserWithoutCallToAccept_ResultPanics(t *testing.T) {
parsekit.AssertPanic(t, parsekit.PanicT{ parsekit.AssertPanic(t, parsekit.PanicT{
Function: func() { p.Execute("") }, Function: func() { p.Execute("") },
Regexp: true, Regexp: true,
Expect: `parsekit\.ParseAPI\.TokenResult\(\): TokenResult\(\) called at ` + Expect: `parsekit\.ParseAPI\.TokenHandlerResult\(\): TokenHandlerResult\(\) called at ` +
`/.*/parser_test.go:\d+ without calling ParseAPI.Accept\(\) on beforehand`}) `/.*/parser_test.go:\d+ without calling ParseAPI.Accept\(\) on beforehand`})
} }

View File

@ -26,7 +26,7 @@ import (
// //
// Next to adding runes to the output, it is also possible to modify the // Next to adding runes to the output, it is also possible to modify the
// already collected runes or to produce lexical Tokens. For all things // already collected runes or to produce lexical Tokens. For all things
// concerning results, take a look at the Result struct, which can be // concerning results, take a look at the TokenHandlerResult struct, which can be
// accessed though the method Result(). // accessed though the method Result().
// //
// FORKING OPERATION FOR EASY LOOKEAHEAD SUPPORT: // FORKING OPERATION FOR EASY LOOKEAHEAD SUPPORT:
@ -42,14 +42,15 @@ import (
// forked parent. // forked parent.
// //
// After forking, the same interface as described for BASIC OPERATION can be // After forking, the same interface as described for BASIC OPERATION can be
// used to fill the result buffer. When the lookahead was successful, then // used to fill the results. When the lookahead was successful, then
// Merge() can be called on the forked child to append the child's result // Merge() can be called on the forked child to append the child's results
// buffer to the parent's result buffer, and to move the read cursor position // to the parent's results, and to move the read cursor position to that
// to that of the child. // of the child.
// //
// When the lookahead was unsuccessful, then the forked child TokenAPI can // When the lookahead was unsuccessful or when the results of the forked child
// simply be discarded. The parent TokenAPI was never modified, so it can // are not to be used, then the forked child TokenAPI can simply be discarded.
// safely be used as if the lookahead never happened. // The parent TokenAPI was never modified, so it can safely be used as if the
// lookahead never happened.
// //
// Note: // Note:
// Many tokenizers/parsers take a different approach on lookaheads by using // Many tokenizers/parsers take a different approach on lookaheads by using
@ -58,22 +59,19 @@ import (
// efficient, however, in my opinion, not very intuitive to read. // efficient, however, in my opinion, not very intuitive to read.
type TokenAPI struct { type TokenAPI struct {
reader *reader reader *reader
cursor *Cursor // current read cursor position, rel. to the input start root *TokenAPI // the root TokenAPI
offset int // current rune offset rel. to the Reader's sliding window parent *TokenAPI // parent TokenAPI in case this TokenAPI is a fork child
result *TokenResult // results as produced by a TokenHandler (runes, Tokens) child *TokenAPI // child TokenAPI in case this TokenAPI is a fork parent
root *TokenAPI // the root TokenAPI result *TokenHandlerResult // results as produced by a TokenHandler (runes, Tokens)
parent *TokenAPI // parent TokenAPI in case this TokenAPI is a fork child
child *TokenAPI // child TokenAPI in case this TokenAPI is a fork parent
} }
// NewTokenAPI initializes a new TokenAPI struct, wrapped around the provided io.Reader. // NewTokenAPI initializes a new TokenAPI struct, wrapped around the provided io.Reader.
func NewTokenAPI(r io.Reader) *TokenAPI { func NewTokenAPI(r io.Reader) *TokenAPI {
input := &TokenAPI{ input := &TokenAPI{
reader: newReader(r), reader: newReader(r),
cursor: &Cursor{}, result: newTokenHandlerResult(),
result: newTokenResult(),
} }
input.root = input input.root = input // TODO remove this one from root input, input.root == nil is also a good check for "is root?".
return input return input
} }
@ -93,7 +91,7 @@ func (i *TokenAPI) NextRune() (rune, error) {
} }
i.detachChilds() i.detachChilds()
readRune, err := i.reader.runeAt(i.offset) readRune, err := i.reader.runeAt(i.result.offset)
i.result.lastRune = &runeInfo{r: readRune, err: err} i.result.lastRune = &runeInfo{r: readRune, err: err}
return readRune, err return readRune, err
} }
@ -110,8 +108,8 @@ func (i *TokenAPI) Accept() {
callerPanic(1, "parsekit.TokenAPI.Accept(): Accept() called at {caller}, but the prior call to NextRune() failed") callerPanic(1, "parsekit.TokenAPI.Accept(): Accept() called at {caller}, but the prior call to NextRune() failed")
} }
i.result.runes = append(i.result.runes, i.result.lastRune.r) i.result.runes = append(i.result.runes, i.result.lastRune.r)
i.cursor.Move(fmt.Sprintf("%c", i.result.lastRune.r)) i.result.cursor.Move(fmt.Sprintf("%c", i.result.lastRune.r))
i.offset++ i.result.offset++
i.result.lastRune = nil i.result.lastRune = nil
} }
@ -122,65 +120,65 @@ func (i *TokenAPI) Accept() {
// affecting the parent TokenAPI. This is for example useful when you must perform // affecting the parent TokenAPI. This is for example useful when you must perform
// some form of lookahead. // some form of lookahead.
// //
// When such lookahead turned out successful and you want to accept the results // When processing of the TokenHandler was successful and you want to add the results
// into the parent TokenAPI, you can call TokenAPIold.Merge() on the forked // to the parent TokenAPI, you can call TokenAPIold.Merge() on the forked
// child. This will add the runes in the result buffer to the result buffer of // child. This will add the runes in the result buffer to the result buffer of
// the parent. It also updates the read cursor position of the parent to that // the parent. It also updates the read cursor position of the parent to that
// of the child. // of the child.
// //
// When the lookahead failed, or you don't the results as produced by that // When processing failed, or you don't want to use the results as produced by that
// lookahead, the forked child can simply be discarded. You can continue to work // lookahead, the forked child can simply be discarded. You can continue to work
// with the parent TokenAPI as if nothing ever happened. // with the parent TokenAPI as if nothing ever happened.
func (i *TokenAPI) Fork() *TokenAPI { func (i *TokenAPI) Fork() *TokenAPI {
// Cleanup current forking / reading state.
i.detachChilds() i.detachChilds()
i.result.lastRune = nil
// Create the new fork. // Create the new fork.
child := &TokenAPI{ child := &TokenAPI{
reader: i.reader, reader: i.reader,
cursor: &Cursor{},
offset: i.offset,
root: i.root, root: i.root,
parent: i, parent: i,
} }
child.result = newTokenResult() child.result = newTokenHandlerResult()
*child.cursor = *i.cursor i.syncCursor(child)
i.child = child i.child = child
i.result.lastRune = nil
return child return child
} }
// Merge appends the Result of a forked child TokenAPI to the Result of its // Merge appends the TokenHandlerResult of a forked child TokenAPI to the TokenHandlerResult
// parent. The read cursor position of the parent is also updated to that of // of its parent. The read cursor position of the parent is also updated to
// the forked child. // that of the forked child.
// //
// After the merge operation, the child is reset so it can immediately be // After the merge operation, the child is reset so it can immediately be
// reused for performing another match. This means that all Result data are // reused for performing another match. This means that all TokenHandlerResult data are
// cleared, but the read cursor position is kept at its current position. // cleared, but the read cursor position is kept at its current position.
// This allows a child to feed results in chunks to its parent. // This allows a child to feed results in chunks to its parent.
func (i *TokenAPI) Merge() { func (i *TokenAPI) Merge() {
if i.parent == nil { if i.parent == nil {
callerPanic(1, "parsekit.TokenAPI.Merge(): Merge() called at {caller} on a non-forked TokenAPI") callerPanic(1, "parsekit.TokenAPI.Merge(): Merge() called at {caller} on a non-forked TokenAPI")
} }
i.addResultsToParent()
i.syncCursor(i.parent)
i.clearResults()
i.detachChilds()
}
func (i *TokenAPI) addResultsToParent() {
i.parent.result.runes = append(i.parent.result.runes, i.result.runes...) i.parent.result.runes = append(i.parent.result.runes, i.result.runes...)
i.parent.result.tokens = append(i.parent.result.tokens, i.result.tokens...) i.parent.result.tokens = append(i.parent.result.tokens, i.result.tokens...)
i.parent.offset = i.offset
i.parent.cursor = i.cursor
i.detachChilds()
i.result = newTokenResult()
} }
// Result returns the TokenResult data for the TokenAPI. The returned struct func (i *TokenAPI) syncCursor(to *TokenAPI) {
// can be used to retrieve and to modify result data. to.result.offset = i.result.offset
func (i *TokenAPI) Result() *TokenResult { *to.result.cursor = *i.result.cursor
return i.result
} }
// Cursor retrieves the current read cursor data. func (i *TokenAPI) clearResults() {
// TODO make this and offset part of Result struct? i.result.lastRune = nil
func (i *TokenAPI) Cursor() Cursor { i.result.runes = []rune{}
return *i.cursor i.result.tokens = []*Token{}
i.result.err = nil
} }
func (i *TokenAPI) detachChilds() { func (i *TokenAPI) detachChilds() {
@ -197,3 +195,9 @@ func (i *TokenAPI) detachChildsRecurse() {
i.child = nil i.child = nil
i.parent = nil i.parent = nil
} }
// Result returns the TokenHandlerResult data for the TokenAPI. The returned struct
// can be used to retrieve and to modify result data.
func (i *TokenAPI) Result() *TokenHandlerResult {
return i.result
}

169
tokenhandlerresult.go Normal file
View File

@ -0,0 +1,169 @@
package parsekit
import (
"fmt"
"strings"
)
// TokenHandlerResult is a struct that is used for holding and managing tokenizing results as
// produced by a TokenHandler.
type TokenHandlerResult struct {
lastRune *runeInfo // Information about the last rune read using NextRune()
runes []rune
tokens []*Token
cursor *Cursor // current read cursor position, relative to the start of the file
offset int // current rune offset relative to the Reader's sliding window
err *Error // can be used by a TokenHandler to report a specific issue with the input
}
type runeInfo struct {
r rune
err error
}
// Token defines a lexical token as produced by TokenHandlers.
//
// The only mandatory data in a Token are the Runes. The Type and Value fields
// are optional fields that can be filled with data at will.
//
// The use of the Type field is to let a tokenizer communicate to
// the parser what type of token it's handling.
//
// The use of the Value field is to store any kind af data along with the token.
// One use of this can be found in the built-in token maker functions like
// MakeInt8Token(), which store an interpreted version of the input string
// in the Value field.
type Token struct {
Runes []rune // the runes that make up the token
Type interface{} // optional token type, can be any type that a parser author sees fit
Value interface{} // optional token value, of any type as well
}
func (t Token) String() string {
tokenType := ""
if t.Type != nil {
tokenType = fmt.Sprintf("%v", t.Type)
}
value := ""
if t.Value != nil {
value = fmt.Sprintf(", value = (%T)%v", t.Value, t.Value)
}
return fmt.Sprintf("%v(%q%s)", tokenType, string(t.Runes), value)
}
// newTokenHandlerResult initializes an empty TokenHandlerResult struct.
func newTokenHandlerResult() *TokenHandlerResult {
return &TokenHandlerResult{
runes: []rune{},
tokens: []*Token{},
cursor: &Cursor{},
}
}
// ClearRunes clears the runes in the TokenHandlerResult.
func (r *TokenHandlerResult) ClearRunes() {
r.runes = []rune{}
}
// SetRunes replaces the Runes from the TokenHandlerResult with the provided input.
func (r *TokenHandlerResult) SetRunes(s interface{}) {
r.ClearRunes()
r.addRunes(s)
}
// AddRunes is used to add runes to the TokenHandlerResult.
func (r *TokenHandlerResult) AddRunes(set ...interface{}) {
r.addRunes(set...)
}
// AddRunes is used to add runes to the TokenHandlerResult.
func (r *TokenHandlerResult) addRunes(set ...interface{}) {
for _, s := range set {
switch s := s.(type) {
case string:
r.runes = append(r.runes, []rune(s)...)
case []rune:
r.runes = append(r.runes, s...)
case rune:
r.runes = append(r.runes, s)
default:
callerPanic(2, "parsekit.TokenHandlerResult.AddRunes(): unsupported type '%T' used at {caller}", s)
}
}
}
// Runes retrieves the Runes from the TokenHandlerResult.
func (r *TokenHandlerResult) Runes() []rune {
return r.runes
}
// Rune retrieve a single rune from the TokenHandlerResult at the specified index.
func (r *TokenHandlerResult) Rune(idx int) rune {
return r.runes[idx]
}
// String returns the Runes from the TokenHandlerResult as a string.
func (r *TokenHandlerResult) String() string {
return string(r.runes)
}
// ClearTokens clears the tokens in the TokenHandlerResult.
func (r *TokenHandlerResult) ClearTokens() {
r.tokens = []*Token{}
}
// SetTokens replaces the Tokens from the TokenHandlerResult with the provided input.
func (r *TokenHandlerResult) SetTokens(tokens []*Token) {
r.ClearTokens()
for _, t := range tokens {
r.AddToken(t)
}
}
// AddToken is used to add a Token to the TokenHandlerResult.
func (r *TokenHandlerResult) AddToken(t *Token) {
r.tokens = append(r.tokens, t)
}
// SliceOfTokens is an alias for []*Token type. The method Tokens() returns
// this type. A String() method is defined for it, to make it easy to
// format the tokens as a string for testing / debugging purposes.
type SliceOfTokens []*Token
func (ts SliceOfTokens) String() string {
parts := make([]string, len(ts))
for i, t := range ts {
parts[i] = t.String()
}
return strings.Join(parts, " ")
}
// Tokens retrieves the Tokens from the TokenHandlerResult.
func (r *TokenHandlerResult) Tokens() SliceOfTokens {
return r.tokens
}
// Token retrieves a single Token from the TokenHandlerResult at the specified index.
func (r *TokenHandlerResult) Token(idx int) *Token {
return r.tokens[idx]
}
// Values retrieves a slice containing only the Values for the TokenHandlerResult Tokens.
func (r *TokenHandlerResult) Values() []interface{} {
values := make([]interface{}, len(r.tokens))
for i, tok := range r.tokens {
values[i] = tok.Value
}
return values
}
// Value retrieves a single Value from the TokenHandlerResult Token at the specified index.
func (r *TokenHandlerResult) Value(idx int) interface{} {
return r.tokens[idx].Value
}
func (r *TokenHandlerResult) Cursor() *Cursor {
return r.cursor
}

View File

@ -210,11 +210,11 @@ var A = struct {
HexDigit: MatchHexDigit(), HexDigit: MatchHexDigit(),
Octet: MatchOctet(false), Octet: MatchOctet(false),
IPv4: MatchIPv4(true), IPv4: MatchIPv4(true),
IPv4CIDRMask: MatchIPv4CIDRMask(), IPv4CIDRMask: MatchIPv4CIDRMask(true),
IPv4Netmask: MatchIPv4Netmask(), IPv4Netmask: MatchIPv4Netmask(true),
IPv4Net: MatchIPv4Net(true), IPv4Net: MatchIPv4Net(true),
IPv6: MatchIPv6(true), IPv6: MatchIPv6(true),
IPv6CIDRMask: MatchIPv6CIDRMask(), IPv6CIDRMask: MatchIPv6CIDRMask(true),
IPv6Net: MatchIPv6Net(true), IPv6Net: MatchIPv6Net(true),
} }
@ -417,9 +417,11 @@ func MatchSeq(handlers ...TokenHandler) TokenHandler {
return func(t *TokenAPI) bool { return func(t *TokenAPI) bool {
child := t.Fork() child := t.Fork()
for _, handler := range handlers { for _, handler := range handlers {
if !handler(child) { subchild := child.Fork()
if !handler(subchild) {
return false return false
} }
subchild.Merge()
} }
child.Merge() child.Merge()
return true return true
@ -589,15 +591,13 @@ func MatchIntegerBetween(min int64, max int64) TokenHandler {
} }
digits := MatchSigned(MatchDigits()) digits := MatchSigned(MatchDigits())
return func(t *TokenAPI) bool { return func(t *TokenAPI) bool {
fork := t.Fork() if !digits(t) {
if !digits(fork) {
return false return false
} }
value, _ := strconv.ParseInt(fork.Result().String(), 10, 64) value, _ := strconv.ParseInt(t.Result().String(), 10, 64)
if value < min || value > max { if value < min || value > max {
return false return false
} }
fork.Merge()
return true return true
} }
} }
@ -607,8 +607,8 @@ func MatchIntegerBetween(min int64, max int64) TokenHandler {
// a successful or a failing match through its boolean return value. // a successful or a failing match through its boolean return value.
func MatchEndOfFile() TokenHandler { func MatchEndOfFile() TokenHandler {
return func(t *TokenAPI) bool { return func(t *TokenAPI) bool {
fork := t.Fork() child := t.Fork()
_, err := fork.NextRune() _, err := child.NextRune()
return err == io.EOF return err == io.EOF
} }
} }
@ -659,7 +659,7 @@ func MatchDigitNotZero() TokenHandler {
} }
// MatchInteger creates a TokenHandler function that checks if a valid integer // MatchInteger creates a TokenHandler function that checks if a valid integer
// can be read from the input. In line with Go, a integer cannot start with // can be read from the input. In line with Go, an integer cannot start with
// a zero. Starting with a zero is used to indicate other bases, like octal or // a zero. Starting with a zero is used to indicate other bases, like octal or
// hexadecimal. // hexadecimal.
func MatchInteger() TokenHandler { func MatchInteger() TokenHandler {
@ -724,22 +724,20 @@ func MatchHexDigit() TokenHandler {
func MatchOctet(normalize bool) TokenHandler { func MatchOctet(normalize bool) TokenHandler {
max3Digits := MatchMinMax(1, 3, MatchDigit()) max3Digits := MatchMinMax(1, 3, MatchDigit())
return func(t *TokenAPI) bool { return func(t *TokenAPI) bool {
fork := t.Fork() if !max3Digits(t) {
if !max3Digits(fork) {
return false return false
} }
value, _ := strconv.ParseInt(fork.Result().String(), 10, 16) value, _ := strconv.ParseInt(t.Result().String(), 10, 16)
if value > 255 { if value > 255 {
return false return false
} }
if normalize { if normalize {
runes := fork.Result().Runes() runes := t.Result().Runes()
for len(runes) > 1 && runes[0] == '0' { for len(runes) > 1 && runes[0] == '0' {
runes = runes[1:] runes = runes[1:]
} }
fork.Result().SetRunes(runes) t.Result().SetRunes(runes)
} }
fork.Merge()
return true return true
} }
} }
@ -757,19 +755,20 @@ func MatchIPv4(normalize bool) TokenHandler {
// MatchIPv4CIDRMask creates a TokenHandler function that checks if a // MatchIPv4CIDRMask creates a TokenHandler function that checks if a
// valid IPv4 CIDR mask (0 - 32) value can be read from the input. // valid IPv4 CIDR mask (0 - 32) value can be read from the input.
func MatchIPv4CIDRMask() TokenHandler { func MatchIPv4CIDRMask(normalize bool) TokenHandler {
return MatchIntegerBetween(0, 32) return matchCIDRMask(32, normalize)
} }
// MatchIPv4Netmask creates a TokenHandler function that checks if a valid // MatchIPv4Netmask creates a TokenHandler function that checks if a valid
// IPv4 netmask can be read from input (e.g. 255.255.255.0). // IPv4 netmask can be read from input (e.g. 255.255.255.0).
// Only a netmask in canonical form are accepted (meaning that in binary form // Only a netmask in canonical form is accepted (meaning that in binary form
// it start with zero or more 1-bits, followed by only 0-bits up to the // it start with zero or more 1-bits, followed by only 0-bits up to the
// 32 bit length). // 32 bit length).
// //
// Netmasks that look like "255.255.192.000" will be normalized to "255.255.192.0". // When the normalize parameter is true, netmasks that look like
func MatchIPv4Netmask() TokenHandler { // "255.255.192.000" will be normalized to "255.255.192.0".
octet := MakeUint8Token(nil, MatchOctet(true)) func MatchIPv4Netmask(normalize bool) TokenHandler {
octet := MakeUint8Token(nil, MatchOctet(normalize))
dot := MatchRune('.') dot := MatchRune('.')
netmask := MatchSeq(octet, dot, octet, dot, octet, dot, octet) netmask := MatchSeq(octet, dot, octet, dot, octet, dot, octet)
@ -802,8 +801,8 @@ func MatchIPv4Net(normalize bool) TokenHandler {
ip := MakeStrLiteralToken("ip", MatchIPv4(normalize)) ip := MakeStrLiteralToken("ip", MatchIPv4(normalize))
slash := MatchRune('/') slash := MatchRune('/')
mask := MatchAny( mask := MatchAny(
MakeStrLiteralToken("mask", MatchIPv4Netmask()), MakeStrLiteralToken("mask", MatchIPv4Netmask(normalize)),
MakeUint8Token("cidr", MatchIPv4CIDRMask())) MakeUint8Token("cidr", MatchIPv4CIDRMask(normalize)))
ipnet := MatchSeq(ip, slash, mask) ipnet := MatchSeq(ip, slash, mask)
return func(t *TokenAPI) bool { return func(t *TokenAPI) bool {
@ -841,13 +840,12 @@ func MatchIPv6(normalize bool) TokenHandler {
return func(t *TokenAPI) bool { return func(t *TokenAPI) bool {
nrOfHextets := 0 nrOfHextets := 0
fork := t.Fork()
for nrOfHextets < 8 { for nrOfHextets < 8 {
if hextet(fork) { if hextet(t) {
nrOfHextets++ nrOfHextets++
} else if empty(fork) { } else if empty(t) {
nrOfHextets += 2 nrOfHextets += 2
} else if !colon(fork) { } else if !colon(t) {
break break
} }
} }
@ -857,34 +855,51 @@ func MatchIPv6(normalize bool) TokenHandler {
} }
// Invalid IPv6, when net.ParseIP() cannot handle it. // Invalid IPv6, when net.ParseIP() cannot handle it.
parsed := net.ParseIP(fork.Result().String()) parsed := net.ParseIP(t.Result().String())
if parsed == nil { if parsed == nil {
return false return false
} }
if normalize { if normalize {
fork.Result().SetRunes(parsed.String()) t.Result().SetRunes(parsed.String())
} }
fork.Merge()
return true return true
} }
} }
// MatchIPv6CIDRMask creates a TokenHandler function that checks if a // MatchIPv6CIDRMask creates a TokenHandler function that checks if a
// valid IPv6 CIDR mask (0 - 128) value can be read from the input. // valid IPv6 CIDR mask (0 - 128) value can be read from the input.
func MatchIPv6CIDRMask() TokenHandler { func MatchIPv6CIDRMask(normalize bool) TokenHandler {
return MatchIntegerBetween(0, 128) return matchCIDRMask(128, normalize)
}
func matchCIDRMask(bits int64, normalize bool) TokenHandler {
mask := MatchIntegerBetween(0, bits)
if !normalize {
return mask
}
return func(t *TokenAPI) bool {
if !mask(t) {
return false
}
r := t.Result()
bits, _ := strconv.Atoi(r.String())
t.Result().SetRunes(fmt.Sprintf("%d", bits))
return true
}
} }
// MatchIPv6Net creates a TokenHandler function that checks the input for an // MatchIPv6Net creates a TokenHandler function that checks the input for an
// IPv6 + mask input, e.g. fe80:0:0:0:0216:3eff:fe96:0002/64. // IPv6 + mask input, e.g. fe80:0:0:0:0216:3eff:fe96:0002/64.
// //
// When the normalize parameter is true, then the IP address and the mask are // When the normalize parameter is true, then the IP address and the mask are
// normalized. // normalized. The above example would be normalized to fe08::216:3eff:fe96:2/64.
func MatchIPv6Net(normalize bool) TokenHandler { func MatchIPv6Net(normalize bool) TokenHandler {
ip := MatchIPv6(normalize) ip := MatchIPv6(normalize)
slash := MatchRune('/') slash := MatchRune('/')
mask := MatchIPv6CIDRMask() mask := MatchIPv6CIDRMask(normalize)
return MatchSeq(ip, slash, mask) return MatchSeq(ip, slash, mask)
} }
@ -991,6 +1006,11 @@ func ModifyByCallback(handler TokenHandler, modfunc func(string) string) TokenHa
} }
} }
// MakeStrLiteralToken creates a TokenHandler that will add a Token to the
// TokenHandlerResult, for which the Token.Value is set to a string-typed
// representation of the read Runes. This string is literal, meaning that an
// escape sequence like "\n" is kept as-is (a backslash character, followed by
// an 'n'-character).
func MakeStrLiteralToken(toktype interface{}, handler TokenHandler) TokenHandler { func MakeStrLiteralToken(toktype interface{}, handler TokenHandler) TokenHandler {
return MakeTokenByCallback(handler, func(t *TokenAPI) *Token { return MakeTokenByCallback(handler, func(t *TokenAPI) *Token {
literal := t.Result().String() literal := t.Result().String()
@ -998,6 +1018,10 @@ func MakeStrLiteralToken(toktype interface{}, handler TokenHandler) TokenHandler
}) })
} }
// MakeStrInterpretedToken creates a TokenHandler that will add a Token to the
// TokenHandlerResult, for which the Token.Value is set to a string-typed
// representation of the read Runes. This string is interpreted, meaning that an
// escape sequence like "\n" is translated to an actual newline control character
func MakeStrInterpretedToken(toktype interface{}, handler TokenHandler) TokenHandler { func MakeStrInterpretedToken(toktype interface{}, handler TokenHandler) TokenHandler {
return MakeTokenByCallback(handler, func(t *TokenAPI) *Token { return MakeTokenByCallback(handler, func(t *TokenAPI) *Token {
// TODO ERROR HANDLING // TODO ERROR HANDLING
@ -1006,20 +1030,6 @@ func MakeStrInterpretedToken(toktype interface{}, handler TokenHandler) TokenHan
}) })
} }
func MakeRuneToken(toktype interface{}, handler TokenHandler) TokenHandler {
return MakeTokenByCallback(handler, func(t *TokenAPI) *Token {
// TODO ERROR HANDLING --- not a 1 rune input
return &Token{Type: toktype, Runes: t.Result().Runes(), Value: t.Result().Rune(0)}
})
}
func MakeByteToken(toktype interface{}, handler TokenHandler) TokenHandler {
return MakeTokenByCallback(handler, func(t *TokenAPI) *Token {
// TODO ERROR HANDLING --- not a 1 byte input
return &Token{Type: toktype, Runes: t.Result().Runes(), Value: byte(t.Result().Rune(0))}
})
}
func interpretString(str string) (string, error) { func interpretString(str string) (string, error) {
var sb strings.Builder var sb strings.Builder
for len(str) > 0 { for len(str) > 0 {
@ -1033,12 +1043,38 @@ func interpretString(str string) (string, error) {
return sb.String(), nil return sb.String(), nil
} }
// MakeRuneToken creates a TokenHandler that will add a Token to the
// TokenHandlerResult, for which the Token.Value is set to a Rune-representation
// of the read Rune.
func MakeRuneToken(toktype interface{}, handler TokenHandler) TokenHandler {
return MakeTokenByCallback(handler, func(t *TokenAPI) *Token {
// TODO ERROR HANDLING --- not a 1 rune input
return &Token{Type: toktype, Runes: t.Result().Runes(), Value: t.Result().Rune(0)}
})
}
// MakeByteToken creates a TokenHandler that will add a Token to the
// TokenHandlerResult, for which the Token.Value is set to a Byte-representation
// of the read Rune.
func MakeByteToken(toktype interface{}, handler TokenHandler) TokenHandler {
return MakeTokenByCallback(handler, func(t *TokenAPI) *Token {
// TODO ERROR HANDLING --- not a 1 byte input
return &Token{Type: toktype, Runes: t.Result().Runes(), Value: byte(t.Result().Rune(0))}
})
}
// MakeIntToken creates a TokenHandler that will add a Token to the
// TokenHandlerResult, for which the Token.Value is set to an int-representation
// of the read Rune.
func MakeIntToken(toktype interface{}, handler TokenHandler) TokenHandler { func MakeIntToken(toktype interface{}, handler TokenHandler) TokenHandler {
return makeStrconvToken(toktype, handler, func(s string) (interface{}, error) { return makeStrconvToken(toktype, handler, func(s string) (interface{}, error) {
return strconv.Atoi(s) return strconv.Atoi(s)
}) })
} }
// MakeInt8Token creates a TokenHandler that will add a Token to the
// TokenHandlerResult, for which the Token.Value is set to an int8-representation
// of the read Rune.
// TODO allow other Go types for oct and hex too. // TODO allow other Go types for oct and hex too.
func MakeInt8Token(toktype interface{}, handler TokenHandler) TokenHandler { func MakeInt8Token(toktype interface{}, handler TokenHandler) TokenHandler {
return makeStrconvToken(toktype, handler, return makeStrconvToken(toktype, handler,
@ -1051,6 +1087,9 @@ func MakeInt8Token(toktype interface{}, handler TokenHandler) TokenHandler {
}) })
} }
// MakeInt16Token creates a TokenHandler that will add a Token to the
// TokenHandlerResult, for which the Token.Value is set to an int16-representation
// of the read Rune.
func MakeInt16Token(toktype interface{}, handler TokenHandler) TokenHandler { func MakeInt16Token(toktype interface{}, handler TokenHandler) TokenHandler {
return makeStrconvToken(toktype, handler, return makeStrconvToken(toktype, handler,
func(s string) (interface{}, error) { func(s string) (interface{}, error) {
@ -1062,6 +1101,9 @@ func MakeInt16Token(toktype interface{}, handler TokenHandler) TokenHandler {
}) })
} }
// MakeInt32Token creates a TokenHandler that will add a Token to the
// TokenHandlerResult, for which the Token.Value is set to an int32-representation
// of the read Rune.
func MakeInt32Token(toktype interface{}, handler TokenHandler) TokenHandler { func MakeInt32Token(toktype interface{}, handler TokenHandler) TokenHandler {
return makeStrconvToken(toktype, handler, return makeStrconvToken(toktype, handler,
func(s string) (interface{}, error) { func(s string) (interface{}, error) {
@ -1073,6 +1115,9 @@ func MakeInt32Token(toktype interface{}, handler TokenHandler) TokenHandler {
}) })
} }
// MakeInt64Token creates a TokenHandler that will add a Token to the
// TokenHandlerResult, for which the Token.Value is set to an int64-representation
// of the read Rune.
func MakeInt64Token(toktype interface{}, handler TokenHandler) TokenHandler { func MakeInt64Token(toktype interface{}, handler TokenHandler) TokenHandler {
return makeStrconvToken(toktype, handler, return makeStrconvToken(toktype, handler,
func(s string) (interface{}, error) { func(s string) (interface{}, error) {
@ -1084,6 +1129,9 @@ func MakeInt64Token(toktype interface{}, handler TokenHandler) TokenHandler {
}) })
} }
// MakeUintToken creates a TokenHandler that will add a Token to the
// TokenHandlerResult, for which the Token.Value is set to an uint-representation
// of the read Rune.
func MakeUintToken(toktype interface{}, handler TokenHandler) TokenHandler { func MakeUintToken(toktype interface{}, handler TokenHandler) TokenHandler {
return makeStrconvToken(toktype, handler, return makeStrconvToken(toktype, handler,
func(s string) (interface{}, error) { func(s string) (interface{}, error) {
@ -1095,6 +1143,9 @@ func MakeUintToken(toktype interface{}, handler TokenHandler) TokenHandler {
}) })
} }
// MakeUint8Token creates a TokenHandler that will add a Token to the
// TokenHandlerResult, for which the Token.Value is set to an uint8-representation
// of the read Rune.
// TODO allow other Go types for oct and hex too. // TODO allow other Go types for oct and hex too.
func MakeUint8Token(toktype interface{}, handler TokenHandler) TokenHandler { func MakeUint8Token(toktype interface{}, handler TokenHandler) TokenHandler {
return makeStrconvToken(toktype, handler, return makeStrconvToken(toktype, handler,
@ -1107,6 +1158,9 @@ func MakeUint8Token(toktype interface{}, handler TokenHandler) TokenHandler {
}) })
} }
// MakeUint16Token creates a TokenHandler that will add a Token to the
// TokenHandlerResult, for which the Token.Value is set to an uint16-representation
// of the read Rune.
func MakeUint16Token(toktype interface{}, handler TokenHandler) TokenHandler { func MakeUint16Token(toktype interface{}, handler TokenHandler) TokenHandler {
return makeStrconvToken(toktype, handler, return makeStrconvToken(toktype, handler,
func(s string) (interface{}, error) { func(s string) (interface{}, error) {
@ -1118,6 +1172,9 @@ func MakeUint16Token(toktype interface{}, handler TokenHandler) TokenHandler {
}) })
} }
// MakeUint32Token creates a TokenHandler that will add a Token to the
// TokenHandlerResult, for which the Token.Value is set to an uint32-representation
// of the read Rune.
func MakeUint32Token(toktype interface{}, handler TokenHandler) TokenHandler { func MakeUint32Token(toktype interface{}, handler TokenHandler) TokenHandler {
return makeStrconvToken(toktype, handler, return makeStrconvToken(toktype, handler,
func(s string) (interface{}, error) { func(s string) (interface{}, error) {
@ -1129,6 +1186,9 @@ func MakeUint32Token(toktype interface{}, handler TokenHandler) TokenHandler {
}) })
} }
// MakeUint64Token creates a TokenHandler that will add a Token to the
// TokenHandlerResult, for which the Token.Value is set to an uint64-representation
// of the read Rune.
func MakeUint64Token(toktype interface{}, handler TokenHandler) TokenHandler { func MakeUint64Token(toktype interface{}, handler TokenHandler) TokenHandler {
return makeStrconvToken(toktype, handler, return makeStrconvToken(toktype, handler,
func(s string) (interface{}, error) { func(s string) (interface{}, error) {
@ -1140,6 +1200,9 @@ func MakeUint64Token(toktype interface{}, handler TokenHandler) TokenHandler {
}) })
} }
// MakeFloat32Token creates a TokenHandler that will add a Token to the
// TokenHandlerResult, for which the Token.Value is set to an float32-representation
// of the read Rune.
func MakeFloat32Token(toktype interface{}, handler TokenHandler) TokenHandler { func MakeFloat32Token(toktype interface{}, handler TokenHandler) TokenHandler {
return makeStrconvToken(toktype, handler, return makeStrconvToken(toktype, handler,
func(s string) (interface{}, error) { func(s string) (interface{}, error) {
@ -1151,6 +1214,9 @@ func MakeFloat32Token(toktype interface{}, handler TokenHandler) TokenHandler {
}) })
} }
// MakeFloat64Token creates a TokenHandler that will add a Token to the
// TokenHandlerResult, for which the Token.Value is set to an float64-representation
// of the read Rune.
func MakeFloat64Token(toktype interface{}, handler TokenHandler) TokenHandler { func MakeFloat64Token(toktype interface{}, handler TokenHandler) TokenHandler {
return makeStrconvToken(toktype, handler, return makeStrconvToken(toktype, handler,
func(s string) (interface{}, error) { func(s string) (interface{}, error) {
@ -1162,6 +1228,9 @@ func MakeFloat64Token(toktype interface{}, handler TokenHandler) TokenHandler {
}) })
} }
// MakeBooleanToken creates a TokenHandler that will add a Token to the
// TokenHandlerResult, for which the Token.Value is set to an bool-representation
// of the read Rune.
func MakeBooleanToken(toktype interface{}, handler TokenHandler) TokenHandler { func MakeBooleanToken(toktype interface{}, handler TokenHandler) TokenHandler {
return makeStrconvToken(toktype, handler, return makeStrconvToken(toktype, handler,
func(s string) (interface{}, error) { func(s string) (interface{}, error) {
@ -1191,12 +1260,16 @@ func makeStrconvToken(toktype interface{}, handler TokenHandler, convert func(s
}) })
} }
// MakeTokenByCallback creates a TokenHandler that will add a Token to the
// TokenHandlerResult, for which the Token is to be generated by the provided
// callback function. The function gets the current TokenAPI as its input and
// must return a complete Token.
func MakeTokenByCallback(handler TokenHandler, callback func(t *TokenAPI) *Token) TokenHandler { func MakeTokenByCallback(handler TokenHandler, callback func(t *TokenAPI) *Token) TokenHandler {
return func(t *TokenAPI) bool { return func(t *TokenAPI) bool {
fork := t.Fork() child := t.Fork()
if handler(fork) { if handler(child) {
t.Result().AddToken(callback(fork)) t.Result().AddToken(callback(child))
fork.Merge() child.Merge()
return true return true
} }
return false return false

View File

@ -259,7 +259,11 @@ func TestIPv6Atoms(t *testing.T) {
{"1:2:3:4::5:6:7:8:9", a.IPv6, true, "1:2:3:4::5:6"}, {"1:2:3:4::5:6:7:8:9", a.IPv6, true, "1:2:3:4::5:6"},
{"a:b::ffff:0:1111", a.IPv6, true, "a:b::ffff:0:1111"}, {"a:b::ffff:0:1111", a.IPv6, true, "a:b::ffff:0:1111"},
{"000a:000b:0000:000:00:ffff:0000:1111", a.IPv6, true, "a:b::ffff:0:1111"}, {"000a:000b:0000:000:00:ffff:0000:1111", a.IPv6, true, "a:b::ffff:0:1111"},
{"000a:0000:0000:001:00:0:ffff:1111", a.IPv6, true, "a::1:0:0:ffff:1111"},
{"0000:0000:0000:001:00:0:ffff:1111", a.IPv6, true, "::1:0:0:ffff:1111"},
{"aaaa:bbbb:cccc:dddd:eeee:ffff:0000:1111", a.IPv6, true, "aaaa:bbbb:cccc:dddd:eeee:ffff:0:1111"}, {"aaaa:bbbb:cccc:dddd:eeee:ffff:0000:1111", a.IPv6, true, "aaaa:bbbb:cccc:dddd:eeee:ffff:0:1111"},
{"gggg:bbbb:cccc:dddd:eeee:ffff:0000:1111", a.IPv6, false, ""},
{"ffff::gggg:eeee:ffff:0000:1111", a.IPv6, true, "ffff::"},
{"0", a.IPv6CIDRMask, true, "0"}, {"0", a.IPv6CIDRMask, true, "0"},
{"128", a.IPv6CIDRMask, true, "128"}, {"128", a.IPv6CIDRMask, true, "128"},
{"129", a.IPv6CIDRMask, false, ""}, {"129", a.IPv6CIDRMask, false, ""},

View File

@ -6,7 +6,7 @@ package parsekit
// method. // method.
type Tokenizer struct { type Tokenizer struct {
parser *Parser parser *Parser
result *TokenResult result *TokenHandlerResult
} }
// TokenHandler is the function type that is involved in turning a low level // TokenHandler is the function type that is involved in turning a low level
@ -41,9 +41,9 @@ func NewTokenizer(tokenHandler TokenHandler, expects string) *Tokenizer {
} }
// Execute feeds the input to the wrapped TokenHandler function. // Execute feeds the input to the wrapped TokenHandler function.
// It returns the TokenHandler's TokenResult. When an error occurred // It returns the TokenHandler's TokenHandlerResult. When an error occurred
// during parsing, the error will be set, nil otherwise. // during parsing, the error will be set, nil otherwise.
func (t *Tokenizer) Execute(input string) (*TokenResult, *Error) { func (t *Tokenizer) Execute(input string) (*TokenHandlerResult, *Error) {
err := t.parser.Execute(input) err := t.parser.Execute(input)
return t.result, err return t.result, err
} }

View File

@ -33,7 +33,7 @@ func ExampleTokenizer_Execute() {
"10.0.300.1/24", "10.0.300.1/24",
"not an IPv4 CIDR", "not an IPv4 CIDR",
} { } {
// Execute returns a TokenResult and an error, which is nil on success. // Execute returns a TokenHandlerResult and an error, which is nil on success.
result, err := tokenizer.Execute(input) result, err := tokenizer.Execute(input)
if err == nil { if err == nil {
@ -43,9 +43,9 @@ func ExampleTokenizer_Execute() {
} }
} }
// Output: // Output:
// Result: ip(string:0.0.0.0) mask(int8:0) // Result: ip("0.0.0.0", value = (string)0.0.0.0) mask("0", value = (int8)0)
// Result: ip(string:192.168.0.1) mask(int8:24) // Result: ip("192.168.0.1", value = (string)192.168.0.1) mask("24", value = (int8)24)
// Result: ip(string:255.255.255.255) mask(int8:32) // Result: ip("255.255.255.255", value = (string)255.255.255.255) mask("32", value = (int8)32)
// Error: unexpected input (expected cidr) // Error: unexpected input (expected cidr)
// Error: unexpected input (expected cidr) // Error: unexpected input (expected cidr)
} }
@ -173,11 +173,11 @@ func TestCallingAcceptAfterNextRune_AcceptsRuneAndMovesReadOffsetForward(t *test
i := mkInput() i := mkInput()
r, _ := i.NextRune() r, _ := i.NextRune()
AssertEqual(t, 'T', r, "result from 1st call to NextRune()") AssertEqual(t, 'T', r, "result from 1st call to NextRune()")
AssertTrue(t, i.result.lastRune != nil, "Input.lastRune after NextRune() is not nil") AssertTrue(t, i.result.lastRune != nil, "TokenAPI.result.lastRune after NextRune() is not nil")
i.Accept() i.Accept()
AssertTrue(t, i.result.lastRune == nil, "Input.lastRune after Accept() is nil") AssertTrue(t, i.result.lastRune == nil, "TokenAPI.result.lastRune after Accept() is nil")
AssertEqual(t, 1, i.offset, "Input.offset") AssertEqual(t, 1, i.result.offset, "TokenAPI.result.offset")
AssertEqual(t, 'T', i.reader.buffer[0], "Input.buffer[0]") AssertEqual(t, 'T', i.reader.buffer[0], "TokenAPI.reader.buffer[0]")
r, _ = i.NextRune() r, _ = i.NextRune()
AssertEqual(t, 'e', r, "result from 2nd call to NextRune()") AssertEqual(t, 'e', r, "result from 2nd call to NextRune()")
} }
@ -194,21 +194,20 @@ func TestCallingMultipleAccepts_FillsInputWithData(t *testing.T) {
func TestAccept_UpdatesCursor(t *testing.T) { func TestAccept_UpdatesCursor(t *testing.T) {
i := NewTokenAPI(strings.NewReader("input\r\nwith\r\nnewlines")) i := NewTokenAPI(strings.NewReader("input\r\nwith\r\nnewlines"))
AssertEqual(t, "start of file", i.cursor.String(), "cursor 1") AssertEqual(t, "start of file", i.result.cursor.String(), "cursor 1")
for j := 0; j < 6; j++ { // read "input\r", cursor end up at "\n" for j := 0; j < 6; j++ { // read "input\r", cursor end up at "\n"
i.NextRune() i.NextRune()
i.Accept() i.Accept()
} }
AssertEqual(t, "line 1, column 7", i.cursor.String(), "cursor 2") AssertEqual(t, "line 1, column 7", i.result.cursor.String(), "cursor 2")
i.NextRune() // read "\n", cursor ends up at start of new line i.NextRune() // read "\n", cursor ends up at start of new line
i.Accept() i.Accept()
AssertEqual(t, "line 2, column 1", i.cursor.String(), "cursor 3") AssertEqual(t, "line 2, column 1", i.result.cursor.String(), "cursor 3")
for j := 0; j < 10; j++ { // read "with\r\nnewl", cursor end up at "i" for j := 0; j < 10; j++ { // read "with\r\nnewl", cursor end up at "i"
i.NextRune() i.NextRune()
i.Accept() i.Accept()
} }
AssertEqual(t, "line 3, column 5", i.cursor.String(), "cursor 4") AssertEqual(t, "line 3, column 5", i.result.cursor.String(), "cursor 4")
AssertEqual(t, *i.cursor, i.Cursor(), "i.Cursor()")
} }
func TestFork_CreatesForkOfInputAtSameCursorPosition(t *testing.T) { func TestFork_CreatesForkOfInputAtSameCursorPosition(t *testing.T) {
@ -221,20 +220,20 @@ func TestFork_CreatesForkOfInputAtSameCursorPosition(t *testing.T) {
f := i.Fork() f := i.Fork()
AssertEqual(t, f, i.child, "Input.child (must be f)") AssertEqual(t, f, i.child, "Input.child (must be f)")
AssertEqual(t, i, f.parent, "Input.parent (must be i)") AssertEqual(t, i, f.parent, "Input.parent (must be i)")
AssertEqual(t, 1, i.cursor.Byte, "i.child.cursor.Byte") AssertEqual(t, 1, i.result.cursor.Byte, "i.child.cursor.Byte")
AssertEqual(t, 1, i.child.cursor.Byte, "i.child.cursor.Byte") AssertEqual(t, 1, i.child.result.cursor.Byte, "i.child.cursor.Byte")
// Accept two runes via fork. // Accept two runes via fork.
f.NextRune() f.NextRune()
f.Accept() // e f.Accept() // e
f.NextRune() f.NextRune()
f.Accept() // s f.Accept() // s
AssertEqual(t, "es", f.Result().String(), "result runes in fork") AssertEqual(t, "es", f.Result().String(), "result runes in fork")
AssertEqual(t, 1, i.cursor.Byte, "i.child.cursor.Byte") AssertEqual(t, 1, i.result.cursor.Byte, "i.child.cursor.Byte")
AssertEqual(t, 3, i.child.cursor.Byte, "i.child.cursor.Byte") AssertEqual(t, 3, i.child.result.cursor.Byte, "i.child.cursor.Byte")
// Merge fork back into parent // Merge fork back into parent
f.Merge() f.Merge()
AssertEqual(t, "Tes", i.Result().String(), "result runes in parent Input after Merge()") AssertEqual(t, "Tes", i.Result().String(), "result runes in parent Input after Merge()")
AssertEqual(t, 3, i.cursor.Byte, "i.child.cursor.Byte") AssertEqual(t, 3, i.result.cursor.Byte, "i.child.cursor.Byte")
} }
func TestGivenForkedChildWhichAcceptedRune_AfterMerging_RuneEndsUpInParentResult(t *testing.T) { func TestGivenForkedChildWhichAcceptedRune_AfterMerging_RuneEndsUpInParentResult(t *testing.T) {
@ -248,25 +247,25 @@ func TestGivenForkedChildWhichAcceptedRune_AfterMerging_RuneEndsUpInParentResult
f2.NextRune() f2.NextRune()
f2.Accept() f2.Accept()
AssertEqual(t, "T", i.Result().String(), "i.Result().String()") AssertEqual(t, "T", i.Result().String(), "i.Result().String()")
AssertEqual(t, 1, i.offset, "i.offset") AssertEqual(t, 1, i.result.offset, "i.offset A")
AssertEqual(t, "e", f1.Result().String(), "f1.Result().String()") AssertEqual(t, "e", f1.Result().String(), "f1.Result().String()")
AssertEqual(t, 2, f1.offset, "f1.offset") AssertEqual(t, 2, f1.result.offset, "f1.offset A")
AssertEqual(t, "s", f2.Result().String(), "f2.Result().String()") AssertEqual(t, "s", f2.Result().String(), "f2.Result().String()")
AssertEqual(t, 3, f2.offset, "f2.offset") AssertEqual(t, 3, f2.result.offset, "f2.offset A")
f2.Merge() f2.Merge()
AssertEqual(t, "T", i.Result().String(), "i.Result().String()") AssertEqual(t, "T", i.Result().String(), "i.Result().String()")
AssertEqual(t, 1, i.offset, "i.offset") AssertEqual(t, 1, i.result.offset, "i.offset B")
AssertEqual(t, "es", f1.Result().String(), "f1.Result().String()") AssertEqual(t, "es", f1.Result().String(), "f1.Result().String()")
AssertEqual(t, 3, f1.offset, "f1.offset") AssertEqual(t, 3, f1.result.offset, "f1.offset B")
AssertEqual(t, "", f2.Result().String(), "f2.Result().String()") AssertEqual(t, "", f2.Result().String(), "f2.Result().String()")
AssertEqual(t, 3, f2.offset, "f2.offset") AssertEqual(t, 3, f2.result.offset, "f2.offset B")
f1.Merge() f1.Merge()
AssertEqual(t, "Tes", i.Result().String(), "i.Result().String()") AssertEqual(t, "Tes", i.Result().String(), "i.Result().String()")
AssertEqual(t, 3, i.offset, "i.offset") AssertEqual(t, 3, i.result.offset, "i.offset C")
AssertEqual(t, "", f1.Result().String(), "f1.Result().String()") AssertEqual(t, "", f1.Result().String(), "f1.Result().String()")
AssertEqual(t, 3, f1.offset, "f1.offset") AssertEqual(t, 3, f1.result.offset, "f1.offset C")
AssertEqual(t, "", f2.Result().String(), "f2.Result().String()") AssertEqual(t, "", f2.Result().String(), "f2.Result().String()")
AssertEqual(t, 3, f2.offset, "f2.offset") AssertEqual(t, 3, f2.result.offset, "f2.offset C")
} }
func TestWhenCallingNextruneAtEndOfFile_EOFIsReturned(t *testing.T) { func TestWhenCallingNextruneAtEndOfFile_EOFIsReturned(t *testing.T) {

View File

@ -1,136 +0,0 @@
package parsekit
import (
"fmt"
"strings"
)
// Result holds results as produced by a TokenHandler.
type TokenResult struct {
lastRune *runeInfo // Information about the last rune read using NextRune()
runes []rune
tokens []*Token
}
type runeInfo struct {
r rune
err error
}
// Token defines a lexical token as produced by TokenHandlers.
type Token struct {
Type interface{} // token type, can be any type that a parser author sees fit
Runes []rune // the runes that make up the token
Value interface{} // an optional value of any type
}
// newTokenResult initializes an empty result struct.
func newTokenResult() *TokenResult {
return &TokenResult{
runes: []rune{},
tokens: []*Token{},
}
}
// ClearRunes clears the runes in the TokenResult.
func (r *TokenResult) ClearRunes() {
r.runes = []rune{}
}
// SetRunes replaces the Runes from the TokenResult with the provided input.
func (r *TokenResult) SetRunes(s interface{}) {
r.ClearRunes()
r.addRunes(s)
}
// AddRunes is used to add runes to the TokenResult.
func (r *TokenResult) AddRunes(set ...interface{}) {
r.addRunes(set...)
}
// AddRunes is used to add runes to the TokenResult.
func (r *TokenResult) addRunes(set ...interface{}) {
for _, s := range set {
switch s := s.(type) {
case string:
r.runes = append(r.runes, []rune(s)...)
case []rune:
r.runes = append(r.runes, s...)
case rune:
r.runes = append(r.runes, s)
default:
callerPanic(2, "parsekit.TokenResult.AddRunes(): unsupported type '%T' used at {caller}", s)
}
}
}
// Runes retrieves the Runes from the TokenResult.
func (r *TokenResult) Runes() []rune {
return r.runes
}
// Rune retrieve a single rune from the TokenResult at the specified index.
func (r *TokenResult) Rune(idx int) rune {
return r.runes[idx]
}
// String returns the Runes from the TokenResult as a string.
func (r *TokenResult) String() string {
return string(r.runes)
}
// ClearTokens clears the tokens in the TokenResult.
func (r *TokenResult) ClearTokens() {
r.tokens = []*Token{}
}
// SetTokens replaces the Tokens from the TokenResult with the provided input.
func (r *TokenResult) SetTokens(tokens []*Token) {
r.ClearTokens()
for _, t := range tokens {
r.AddToken(t)
}
}
// AddToken is used to add a Token to the TokenResult.
func (r *TokenResult) AddToken(t *Token) {
r.tokens = append(r.tokens, t)
}
// SliceOfTokens is an alias for []*Token type. The method Tokens() returns
// this type. A String() method is defined for it, to make it easy to
// format the tokens as a string for testing / debugging purposes.
type SliceOfTokens []*Token
func (ts SliceOfTokens) String() string {
parts := make([]string, len(ts))
for i, t := range ts {
str := fmt.Sprintf("%v(%T:%v)", t.Type, t.Value, t.Value)
parts[i] = str
}
return strings.Join(parts, " ")
}
// Tokens retrieves the Tokens from the TokenResult.
func (r *TokenResult) Tokens() SliceOfTokens {
return r.tokens
}
// Token retrieves a single Token from the TokenResult at the specified index.
func (r *TokenResult) Token(idx int) *Token {
return r.tokens[idx]
}
// Values retrieves a slice containing only the Values for the TokenResult Tokens.
func (r *TokenResult) Values() []interface{} {
values := make([]interface{}, len(r.tokens))
for i, tok := range r.tokens {
values[i] = tok.Value
}
return values
}
// Value retrieves a single Value from the TokenResult Token at the specified index.
func (r *TokenResult) Value(idx int) interface{} {
return r.tokens[idx].Value
}

View File

@ -1,10 +1,43 @@
package parsekit package parsekit
import ( import (
"fmt"
"strings" "strings"
"testing" "testing"
) )
func ExampleToken() {
t0 := Token{
Runes: []rune("10.1.2.3"),
}
t1 := Token{
Runes: []rune("two hundred and twenty four"),
Type: "Number",
Value: 224,
}
const TName = 1
t2 := Token{
Runes: []rune("John"),
Type: TName,
}
t3 := Token{
Runes: []rune("The answer"),
Value: 42,
}
fmt.Printf("%s\n%s\n%s\n%s\n", t0, t1, t2, t3)
// Output:
// ("10.1.2.3")
// Number("two hundred and twenty four", value = (int)224)
// 1("John")
// ("The answer", value = (int)42)
}
func TestSetResult_AcceptsVariousTypesAsInput(t *testing.T) { func TestSetResult_AcceptsVariousTypesAsInput(t *testing.T) {
i := NewTokenAPI(strings.NewReader("Testing")) i := NewTokenAPI(strings.NewReader("Testing"))
i.Result().SetRunes("string") i.Result().SetRunes("string")
@ -22,6 +55,6 @@ func TestSetResult_PanicsOnUnhandledInput(t *testing.T) {
i.Result().SetRunes(1234567) i.Result().SetRunes(1234567)
}, },
Regexp: true, Regexp: true,
Expect: `parsekit\.TokenResult\.AddRunes\(\): unsupported type 'int' used at /.*/tokenresult_test.go:\d+`, Expect: `parsekit\.TokenHandlerResult\.AddRunes\(\): unsupported type 'int' used at /.*/tokenresult_test.go:\d+`,
}) })
} }