Adding documentation and getting the interactions between ParseAPI and TokenAPI cleaned up a bit.

This commit is contained in:
Maurice Makaay 2019-06-07 07:26:41 +00:00
parent c0389283bd
commit 3094b09284
10 changed files with 438 additions and 298 deletions

View File

@ -11,7 +11,7 @@ type ParseAPI struct {
tokenAPI *TokenAPI // the input reader
loopCheck map[string]bool // used for parser loop detection
expecting string // a description of what the current state expects to find (see Expects())
result *TokenResult // Last TokenHandler result as retrieved by On(...).Accept()
result *TokenHandlerResult // Last TokenHandler result as produced by On(...).Accept()
err *Error // error during parsing, retrieved by Error(), further ParseAPI calls are ignored
stopped bool // a boolean set to true by Stop(), further ParseAPI calls are ignored
}
@ -99,13 +99,13 @@ func (p *ParseAPI) On(tokenHandler TokenHandler) *ParseAPIOnAction {
}
p.result = nil
p.tokenAPI.result = newTokenResult()
fork := p.tokenAPI.Fork()
ok := tokenHandler(fork)
p.tokenAPI.clearResults()
child := p.tokenAPI.Fork()
ok := tokenHandler(child)
return &ParseAPIOnAction{
parseAPI: p,
tokenAPI: fork,
tokenAPI: child,
ok: ok,
}
}
@ -119,7 +119,7 @@ type ParseAPIOnAction struct {
}
// Accept tells the parser to move the read cursor past a match that was
// found, and to make the TokenResult from the TokenAPI available in the
// found, and to make the TokenHandlerResult from the TokenAPI available in the
// ParseAPI through the ParseAPI.Result() method.
//
// Returns true in case a match was found.
@ -127,10 +127,8 @@ type ParseAPIOnAction struct {
func (a *ParseAPIOnAction) Accept() bool {
if a.ok {
a.tokenAPI.Merge()
a.flushReader()
a.parseAPI.result = a.tokenAPI.root.result
a.flushTokenAPI()
a.flushReader() //a.flush()
}
return a.ok
}
@ -147,9 +145,9 @@ func (a *ParseAPIOnAction) Accept() bool {
// When no match was found, then no action is taken and false is returned.
func (a *ParseAPIOnAction) Skip() bool {
if a.ok {
a.tokenAPI.root.cursor = a.tokenAPI.cursor
a.parseAPI.result = nil
a.flushTokenAPI()
a.tokenAPI.syncCursor(a.tokenAPI.root)
a.tokenAPI.clearResults()
a.tokenAPI.detachChilds()
a.flushReader()
}
return a.ok
@ -166,30 +164,26 @@ func (a *ParseAPIOnAction) Skip() bool {
func (a *ParseAPIOnAction) Stay() bool {
if a.ok {
a.parseAPI.result = nil
a.flushTokenAPI()
a.tokenAPI.clearResults()
a.tokenAPI.detachChilds()
}
return a.ok
}
func (a *ParseAPIOnAction) flushTokenAPI() {
a.tokenAPI.root.result = newTokenResult()
a.tokenAPI.root.detachChilds()
}
func (a *ParseAPIOnAction) flushReader() {
if a.tokenAPI.offset > 0 {
a.tokenAPI.root.reader.flush(a.tokenAPI.offset)
a.tokenAPI.root.offset = 0
if a.tokenAPI.result.offset > 0 {
a.tokenAPI.root.reader.flush(a.tokenAPI.root.result.offset)
a.tokenAPI.root.result.offset = 0
a.parseAPI.initLoopCheck()
}
}
// Result returns a TokenResult struct, containing results as produced by the
// Result returns a TokenHandlerResult struct, containing results as produced by the
// last ParseAPI.On().Accept() call.
func (p *ParseAPI) Result() *TokenResult {
func (p *ParseAPI) Result() *TokenHandlerResult {
result := p.result
if p.result == nil {
callerPanic(1, "parsekit.ParseAPI.TokenResult(): TokenResult() called "+
callerPanic(1, "parsekit.ParseAPI.TokenHandlerResult(): TokenHandlerResult() called "+
"at {caller} without calling ParseAPI.Accept() on beforehand")
}
return result
@ -251,7 +245,7 @@ func (p *ParseAPI) Error(format string, args ...interface{}) {
// No call to p.panicWhenStoppedOrInError(), to allow a parser to
// set a different error message when needed.
message := fmt.Sprintf(format, args...)
p.err = &Error{message, p.tokenAPI.Cursor()}
p.err = &Error{message, *p.tokenAPI.result.cursor}
}
// ExpectEndOfFile can be used to check if the input is at end of file.

View File

@ -41,7 +41,7 @@ func ExampleParser_usingTokens() {
// Output:
// Runes accepted: "¡ök!"
// Token values: RUNE(int32:161) RUNE(int32:246) RUNE(int32:107) RUNE(int32:33)
// Token values: RUNE("¡", value = (int32)161) RUNE("ö", value = (int32)246) RUNE("k", value = (int32)107) RUNE("!", value = (int32)33)
}
func ExampleParseAPI_UnexpectedInput() {
@ -244,7 +244,7 @@ func TestGivenParserWithoutCallToAccept_ResultPanics(t *testing.T) {
parsekit.AssertPanic(t, parsekit.PanicT{
Function: func() { p.Execute("") },
Regexp: true,
Expect: `parsekit\.ParseAPI\.TokenResult\(\): TokenResult\(\) called at ` +
Expect: `parsekit\.ParseAPI\.TokenHandlerResult\(\): TokenHandlerResult\(\) called at ` +
`/.*/parser_test.go:\d+ without calling ParseAPI.Accept\(\) on beforehand`})
}

View File

@ -26,7 +26,7 @@ import (
//
// Next to adding runes to the output, it is also possible to modify the
// already collected runes or to produce lexical Tokens. For all things
// concerning results, take a look at the Result struct, which can be
// concerning results, take a look at the TokenHandlerResult struct, which can be
// accessed though the method Result().
//
// FORKING OPERATION FOR EASY LOOKEAHEAD SUPPORT:
@ -42,14 +42,15 @@ import (
// forked parent.
//
// After forking, the same interface as described for BASIC OPERATION can be
// used to fill the result buffer. When the lookahead was successful, then
// Merge() can be called on the forked child to append the child's result
// buffer to the parent's result buffer, and to move the read cursor position
// to that of the child.
// used to fill the results. When the lookahead was successful, then
// Merge() can be called on the forked child to append the child's results
// to the parent's results, and to move the read cursor position to that
// of the child.
//
// When the lookahead was unsuccessful, then the forked child TokenAPI can
// simply be discarded. The parent TokenAPI was never modified, so it can
// safely be used as if the lookahead never happened.
// When the lookahead was unsuccessful or when the results of the forked child
// are not to be used, then the forked child TokenAPI can simply be discarded.
// The parent TokenAPI was never modified, so it can safely be used as if the
// lookahead never happened.
//
// Note:
// Many tokenizers/parsers take a different approach on lookaheads by using
@ -58,22 +59,19 @@ import (
// efficient, however, in my opinion, not very intuitive to read.
type TokenAPI struct {
reader *reader
cursor *Cursor // current read cursor position, rel. to the input start
offset int // current rune offset rel. to the Reader's sliding window
result *TokenResult // results as produced by a TokenHandler (runes, Tokens)
root *TokenAPI // the root TokenAPI
parent *TokenAPI // parent TokenAPI in case this TokenAPI is a fork child
child *TokenAPI // child TokenAPI in case this TokenAPI is a fork parent
result *TokenHandlerResult // results as produced by a TokenHandler (runes, Tokens)
}
// NewTokenAPI initializes a new TokenAPI struct, wrapped around the provided io.Reader.
func NewTokenAPI(r io.Reader) *TokenAPI {
input := &TokenAPI{
reader: newReader(r),
cursor: &Cursor{},
result: newTokenResult(),
result: newTokenHandlerResult(),
}
input.root = input
input.root = input // TODO remove this one from root input, input.root == nil is also a good check for "is root?".
return input
}
@ -93,7 +91,7 @@ func (i *TokenAPI) NextRune() (rune, error) {
}
i.detachChilds()
readRune, err := i.reader.runeAt(i.offset)
readRune, err := i.reader.runeAt(i.result.offset)
i.result.lastRune = &runeInfo{r: readRune, err: err}
return readRune, err
}
@ -110,8 +108,8 @@ func (i *TokenAPI) Accept() {
callerPanic(1, "parsekit.TokenAPI.Accept(): Accept() called at {caller}, but the prior call to NextRune() failed")
}
i.result.runes = append(i.result.runes, i.result.lastRune.r)
i.cursor.Move(fmt.Sprintf("%c", i.result.lastRune.r))
i.offset++
i.result.cursor.Move(fmt.Sprintf("%c", i.result.lastRune.r))
i.result.offset++
i.result.lastRune = nil
}
@ -122,65 +120,65 @@ func (i *TokenAPI) Accept() {
// affecting the parent TokenAPI. This is for example useful when you must perform
// some form of lookahead.
//
// When such lookahead turned out successful and you want to accept the results
// into the parent TokenAPI, you can call TokenAPIold.Merge() on the forked
// When processing of the TokenHandler was successful and you want to add the results
// to the parent TokenAPI, you can call TokenAPIold.Merge() on the forked
// child. This will add the runes in the result buffer to the result buffer of
// the parent. It also updates the read cursor position of the parent to that
// of the child.
//
// When the lookahead failed, or you don't the results as produced by that
// When processing failed, or you don't want to use the results as produced by that
// lookahead, the forked child can simply be discarded. You can continue to work
// with the parent TokenAPI as if nothing ever happened.
func (i *TokenAPI) Fork() *TokenAPI {
// Cleanup current forking / reading state.
i.detachChilds()
i.result.lastRune = nil
// Create the new fork.
child := &TokenAPI{
reader: i.reader,
cursor: &Cursor{},
offset: i.offset,
root: i.root,
parent: i,
}
child.result = newTokenResult()
*child.cursor = *i.cursor
child.result = newTokenHandlerResult()
i.syncCursor(child)
i.child = child
i.result.lastRune = nil
return child
}
// Merge appends the Result of a forked child TokenAPI to the Result of its
// parent. The read cursor position of the parent is also updated to that of
// the forked child.
// Merge appends the TokenHandlerResult of a forked child TokenAPI to the TokenHandlerResult
// of its parent. The read cursor position of the parent is also updated to
// that of the forked child.
//
// After the merge operation, the child is reset so it can immediately be
// reused for performing another match. This means that all Result data are
// reused for performing another match. This means that all TokenHandlerResult data are
// cleared, but the read cursor position is kept at its current position.
// This allows a child to feed results in chunks to its parent.
func (i *TokenAPI) Merge() {
if i.parent == nil {
callerPanic(1, "parsekit.TokenAPI.Merge(): Merge() called at {caller} on a non-forked TokenAPI")
}
i.addResultsToParent()
i.syncCursor(i.parent)
i.clearResults()
i.detachChilds()
}
func (i *TokenAPI) addResultsToParent() {
i.parent.result.runes = append(i.parent.result.runes, i.result.runes...)
i.parent.result.tokens = append(i.parent.result.tokens, i.result.tokens...)
i.parent.offset = i.offset
i.parent.cursor = i.cursor
i.detachChilds()
i.result = newTokenResult()
}
// Result returns the TokenResult data for the TokenAPI. The returned struct
// can be used to retrieve and to modify result data.
func (i *TokenAPI) Result() *TokenResult {
return i.result
func (i *TokenAPI) syncCursor(to *TokenAPI) {
to.result.offset = i.result.offset
*to.result.cursor = *i.result.cursor
}
// Cursor retrieves the current read cursor data.
// TODO make this and offset part of Result struct?
func (i *TokenAPI) Cursor() Cursor {
return *i.cursor
func (i *TokenAPI) clearResults() {
i.result.lastRune = nil
i.result.runes = []rune{}
i.result.tokens = []*Token{}
i.result.err = nil
}
func (i *TokenAPI) detachChilds() {
@ -197,3 +195,9 @@ func (i *TokenAPI) detachChildsRecurse() {
i.child = nil
i.parent = nil
}
// Result returns the TokenHandlerResult data for the TokenAPI. The returned struct
// can be used to retrieve and to modify result data.
func (i *TokenAPI) Result() *TokenHandlerResult {
return i.result
}

169
tokenhandlerresult.go Normal file
View File

@ -0,0 +1,169 @@
package parsekit
import (
"fmt"
"strings"
)
// TokenHandlerResult is a struct that is used for holding and managing tokenizing results as
// produced by a TokenHandler.
type TokenHandlerResult struct {
lastRune *runeInfo // Information about the last rune read using NextRune()
runes []rune
tokens []*Token
cursor *Cursor // current read cursor position, relative to the start of the file
offset int // current rune offset relative to the Reader's sliding window
err *Error // can be used by a TokenHandler to report a specific issue with the input
}
type runeInfo struct {
r rune
err error
}
// Token defines a lexical token as produced by TokenHandlers.
//
// The only mandatory data in a Token are the Runes. The Type and Value fields
// are optional fields that can be filled with data at will.
//
// The use of the Type field is to let a tokenizer communicate to
// the parser what type of token it's handling.
//
// The use of the Value field is to store any kind af data along with the token.
// One use of this can be found in the built-in token maker functions like
// MakeInt8Token(), which store an interpreted version of the input string
// in the Value field.
type Token struct {
Runes []rune // the runes that make up the token
Type interface{} // optional token type, can be any type that a parser author sees fit
Value interface{} // optional token value, of any type as well
}
func (t Token) String() string {
tokenType := ""
if t.Type != nil {
tokenType = fmt.Sprintf("%v", t.Type)
}
value := ""
if t.Value != nil {
value = fmt.Sprintf(", value = (%T)%v", t.Value, t.Value)
}
return fmt.Sprintf("%v(%q%s)", tokenType, string(t.Runes), value)
}
// newTokenHandlerResult initializes an empty TokenHandlerResult struct.
func newTokenHandlerResult() *TokenHandlerResult {
return &TokenHandlerResult{
runes: []rune{},
tokens: []*Token{},
cursor: &Cursor{},
}
}
// ClearRunes clears the runes in the TokenHandlerResult.
func (r *TokenHandlerResult) ClearRunes() {
r.runes = []rune{}
}
// SetRunes replaces the Runes from the TokenHandlerResult with the provided input.
func (r *TokenHandlerResult) SetRunes(s interface{}) {
r.ClearRunes()
r.addRunes(s)
}
// AddRunes is used to add runes to the TokenHandlerResult.
func (r *TokenHandlerResult) AddRunes(set ...interface{}) {
r.addRunes(set...)
}
// AddRunes is used to add runes to the TokenHandlerResult.
func (r *TokenHandlerResult) addRunes(set ...interface{}) {
for _, s := range set {
switch s := s.(type) {
case string:
r.runes = append(r.runes, []rune(s)...)
case []rune:
r.runes = append(r.runes, s...)
case rune:
r.runes = append(r.runes, s)
default:
callerPanic(2, "parsekit.TokenHandlerResult.AddRunes(): unsupported type '%T' used at {caller}", s)
}
}
}
// Runes retrieves the Runes from the TokenHandlerResult.
func (r *TokenHandlerResult) Runes() []rune {
return r.runes
}
// Rune retrieve a single rune from the TokenHandlerResult at the specified index.
func (r *TokenHandlerResult) Rune(idx int) rune {
return r.runes[idx]
}
// String returns the Runes from the TokenHandlerResult as a string.
func (r *TokenHandlerResult) String() string {
return string(r.runes)
}
// ClearTokens clears the tokens in the TokenHandlerResult.
func (r *TokenHandlerResult) ClearTokens() {
r.tokens = []*Token{}
}
// SetTokens replaces the Tokens from the TokenHandlerResult with the provided input.
func (r *TokenHandlerResult) SetTokens(tokens []*Token) {
r.ClearTokens()
for _, t := range tokens {
r.AddToken(t)
}
}
// AddToken is used to add a Token to the TokenHandlerResult.
func (r *TokenHandlerResult) AddToken(t *Token) {
r.tokens = append(r.tokens, t)
}
// SliceOfTokens is an alias for []*Token type. The method Tokens() returns
// this type. A String() method is defined for it, to make it easy to
// format the tokens as a string for testing / debugging purposes.
type SliceOfTokens []*Token
func (ts SliceOfTokens) String() string {
parts := make([]string, len(ts))
for i, t := range ts {
parts[i] = t.String()
}
return strings.Join(parts, " ")
}
// Tokens retrieves the Tokens from the TokenHandlerResult.
func (r *TokenHandlerResult) Tokens() SliceOfTokens {
return r.tokens
}
// Token retrieves a single Token from the TokenHandlerResult at the specified index.
func (r *TokenHandlerResult) Token(idx int) *Token {
return r.tokens[idx]
}
// Values retrieves a slice containing only the Values for the TokenHandlerResult Tokens.
func (r *TokenHandlerResult) Values() []interface{} {
values := make([]interface{}, len(r.tokens))
for i, tok := range r.tokens {
values[i] = tok.Value
}
return values
}
// Value retrieves a single Value from the TokenHandlerResult Token at the specified index.
func (r *TokenHandlerResult) Value(idx int) interface{} {
return r.tokens[idx].Value
}
func (r *TokenHandlerResult) Cursor() *Cursor {
return r.cursor
}

View File

@ -210,11 +210,11 @@ var A = struct {
HexDigit: MatchHexDigit(),
Octet: MatchOctet(false),
IPv4: MatchIPv4(true),
IPv4CIDRMask: MatchIPv4CIDRMask(),
IPv4Netmask: MatchIPv4Netmask(),
IPv4CIDRMask: MatchIPv4CIDRMask(true),
IPv4Netmask: MatchIPv4Netmask(true),
IPv4Net: MatchIPv4Net(true),
IPv6: MatchIPv6(true),
IPv6CIDRMask: MatchIPv6CIDRMask(),
IPv6CIDRMask: MatchIPv6CIDRMask(true),
IPv6Net: MatchIPv6Net(true),
}
@ -417,9 +417,11 @@ func MatchSeq(handlers ...TokenHandler) TokenHandler {
return func(t *TokenAPI) bool {
child := t.Fork()
for _, handler := range handlers {
if !handler(child) {
subchild := child.Fork()
if !handler(subchild) {
return false
}
subchild.Merge()
}
child.Merge()
return true
@ -589,15 +591,13 @@ func MatchIntegerBetween(min int64, max int64) TokenHandler {
}
digits := MatchSigned(MatchDigits())
return func(t *TokenAPI) bool {
fork := t.Fork()
if !digits(fork) {
if !digits(t) {
return false
}
value, _ := strconv.ParseInt(fork.Result().String(), 10, 64)
value, _ := strconv.ParseInt(t.Result().String(), 10, 64)
if value < min || value > max {
return false
}
fork.Merge()
return true
}
}
@ -607,8 +607,8 @@ func MatchIntegerBetween(min int64, max int64) TokenHandler {
// a successful or a failing match through its boolean return value.
func MatchEndOfFile() TokenHandler {
return func(t *TokenAPI) bool {
fork := t.Fork()
_, err := fork.NextRune()
child := t.Fork()
_, err := child.NextRune()
return err == io.EOF
}
}
@ -659,7 +659,7 @@ func MatchDigitNotZero() TokenHandler {
}
// MatchInteger creates a TokenHandler function that checks if a valid integer
// can be read from the input. In line with Go, a integer cannot start with
// can be read from the input. In line with Go, an integer cannot start with
// a zero. Starting with a zero is used to indicate other bases, like octal or
// hexadecimal.
func MatchInteger() TokenHandler {
@ -724,22 +724,20 @@ func MatchHexDigit() TokenHandler {
func MatchOctet(normalize bool) TokenHandler {
max3Digits := MatchMinMax(1, 3, MatchDigit())
return func(t *TokenAPI) bool {
fork := t.Fork()
if !max3Digits(fork) {
if !max3Digits(t) {
return false
}
value, _ := strconv.ParseInt(fork.Result().String(), 10, 16)
value, _ := strconv.ParseInt(t.Result().String(), 10, 16)
if value > 255 {
return false
}
if normalize {
runes := fork.Result().Runes()
runes := t.Result().Runes()
for len(runes) > 1 && runes[0] == '0' {
runes = runes[1:]
}
fork.Result().SetRunes(runes)
t.Result().SetRunes(runes)
}
fork.Merge()
return true
}
}
@ -757,19 +755,20 @@ func MatchIPv4(normalize bool) TokenHandler {
// MatchIPv4CIDRMask creates a TokenHandler function that checks if a
// valid IPv4 CIDR mask (0 - 32) value can be read from the input.
func MatchIPv4CIDRMask() TokenHandler {
return MatchIntegerBetween(0, 32)
func MatchIPv4CIDRMask(normalize bool) TokenHandler {
return matchCIDRMask(32, normalize)
}
// MatchIPv4Netmask creates a TokenHandler function that checks if a valid
// IPv4 netmask can be read from input (e.g. 255.255.255.0).
// Only a netmask in canonical form are accepted (meaning that in binary form
// Only a netmask in canonical form is accepted (meaning that in binary form
// it start with zero or more 1-bits, followed by only 0-bits up to the
// 32 bit length).
//
// Netmasks that look like "255.255.192.000" will be normalized to "255.255.192.0".
func MatchIPv4Netmask() TokenHandler {
octet := MakeUint8Token(nil, MatchOctet(true))
// When the normalize parameter is true, netmasks that look like
// "255.255.192.000" will be normalized to "255.255.192.0".
func MatchIPv4Netmask(normalize bool) TokenHandler {
octet := MakeUint8Token(nil, MatchOctet(normalize))
dot := MatchRune('.')
netmask := MatchSeq(octet, dot, octet, dot, octet, dot, octet)
@ -802,8 +801,8 @@ func MatchIPv4Net(normalize bool) TokenHandler {
ip := MakeStrLiteralToken("ip", MatchIPv4(normalize))
slash := MatchRune('/')
mask := MatchAny(
MakeStrLiteralToken("mask", MatchIPv4Netmask()),
MakeUint8Token("cidr", MatchIPv4CIDRMask()))
MakeStrLiteralToken("mask", MatchIPv4Netmask(normalize)),
MakeUint8Token("cidr", MatchIPv4CIDRMask(normalize)))
ipnet := MatchSeq(ip, slash, mask)
return func(t *TokenAPI) bool {
@ -841,13 +840,12 @@ func MatchIPv6(normalize bool) TokenHandler {
return func(t *TokenAPI) bool {
nrOfHextets := 0
fork := t.Fork()
for nrOfHextets < 8 {
if hextet(fork) {
if hextet(t) {
nrOfHextets++
} else if empty(fork) {
} else if empty(t) {
nrOfHextets += 2
} else if !colon(fork) {
} else if !colon(t) {
break
}
}
@ -857,34 +855,51 @@ func MatchIPv6(normalize bool) TokenHandler {
}
// Invalid IPv6, when net.ParseIP() cannot handle it.
parsed := net.ParseIP(fork.Result().String())
parsed := net.ParseIP(t.Result().String())
if parsed == nil {
return false
}
if normalize {
fork.Result().SetRunes(parsed.String())
t.Result().SetRunes(parsed.String())
}
fork.Merge()
return true
}
}
// MatchIPv6CIDRMask creates a TokenHandler function that checks if a
// valid IPv6 CIDR mask (0 - 128) value can be read from the input.
func MatchIPv6CIDRMask() TokenHandler {
return MatchIntegerBetween(0, 128)
func MatchIPv6CIDRMask(normalize bool) TokenHandler {
return matchCIDRMask(128, normalize)
}
func matchCIDRMask(bits int64, normalize bool) TokenHandler {
mask := MatchIntegerBetween(0, bits)
if !normalize {
return mask
}
return func(t *TokenAPI) bool {
if !mask(t) {
return false
}
r := t.Result()
bits, _ := strconv.Atoi(r.String())
t.Result().SetRunes(fmt.Sprintf("%d", bits))
return true
}
}
// MatchIPv6Net creates a TokenHandler function that checks the input for an
// IPv6 + mask input, e.g. fe80:0:0:0:0216:3eff:fe96:0002/64.
//
// When the normalize parameter is true, then the IP address and the mask are
// normalized.
// normalized. The above example would be normalized to fe08::216:3eff:fe96:2/64.
func MatchIPv6Net(normalize bool) TokenHandler {
ip := MatchIPv6(normalize)
slash := MatchRune('/')
mask := MatchIPv6CIDRMask()
mask := MatchIPv6CIDRMask(normalize)
return MatchSeq(ip, slash, mask)
}
@ -991,6 +1006,11 @@ func ModifyByCallback(handler TokenHandler, modfunc func(string) string) TokenHa
}
}
// MakeStrLiteralToken creates a TokenHandler that will add a Token to the
// TokenHandlerResult, for which the Token.Value is set to a string-typed
// representation of the read Runes. This string is literal, meaning that an
// escape sequence like "\n" is kept as-is (a backslash character, followed by
// an 'n'-character).
func MakeStrLiteralToken(toktype interface{}, handler TokenHandler) TokenHandler {
return MakeTokenByCallback(handler, func(t *TokenAPI) *Token {
literal := t.Result().String()
@ -998,6 +1018,10 @@ func MakeStrLiteralToken(toktype interface{}, handler TokenHandler) TokenHandler
})
}
// MakeStrInterpretedToken creates a TokenHandler that will add a Token to the
// TokenHandlerResult, for which the Token.Value is set to a string-typed
// representation of the read Runes. This string is interpreted, meaning that an
// escape sequence like "\n" is translated to an actual newline control character
func MakeStrInterpretedToken(toktype interface{}, handler TokenHandler) TokenHandler {
return MakeTokenByCallback(handler, func(t *TokenAPI) *Token {
// TODO ERROR HANDLING
@ -1006,20 +1030,6 @@ func MakeStrInterpretedToken(toktype interface{}, handler TokenHandler) TokenHan
})
}
func MakeRuneToken(toktype interface{}, handler TokenHandler) TokenHandler {
return MakeTokenByCallback(handler, func(t *TokenAPI) *Token {
// TODO ERROR HANDLING --- not a 1 rune input
return &Token{Type: toktype, Runes: t.Result().Runes(), Value: t.Result().Rune(0)}
})
}
func MakeByteToken(toktype interface{}, handler TokenHandler) TokenHandler {
return MakeTokenByCallback(handler, func(t *TokenAPI) *Token {
// TODO ERROR HANDLING --- not a 1 byte input
return &Token{Type: toktype, Runes: t.Result().Runes(), Value: byte(t.Result().Rune(0))}
})
}
func interpretString(str string) (string, error) {
var sb strings.Builder
for len(str) > 0 {
@ -1033,12 +1043,38 @@ func interpretString(str string) (string, error) {
return sb.String(), nil
}
// MakeRuneToken creates a TokenHandler that will add a Token to the
// TokenHandlerResult, for which the Token.Value is set to a Rune-representation
// of the read Rune.
func MakeRuneToken(toktype interface{}, handler TokenHandler) TokenHandler {
return MakeTokenByCallback(handler, func(t *TokenAPI) *Token {
// TODO ERROR HANDLING --- not a 1 rune input
return &Token{Type: toktype, Runes: t.Result().Runes(), Value: t.Result().Rune(0)}
})
}
// MakeByteToken creates a TokenHandler that will add a Token to the
// TokenHandlerResult, for which the Token.Value is set to a Byte-representation
// of the read Rune.
func MakeByteToken(toktype interface{}, handler TokenHandler) TokenHandler {
return MakeTokenByCallback(handler, func(t *TokenAPI) *Token {
// TODO ERROR HANDLING --- not a 1 byte input
return &Token{Type: toktype, Runes: t.Result().Runes(), Value: byte(t.Result().Rune(0))}
})
}
// MakeIntToken creates a TokenHandler that will add a Token to the
// TokenHandlerResult, for which the Token.Value is set to an int-representation
// of the read Rune.
func MakeIntToken(toktype interface{}, handler TokenHandler) TokenHandler {
return makeStrconvToken(toktype, handler, func(s string) (interface{}, error) {
return strconv.Atoi(s)
})
}
// MakeInt8Token creates a TokenHandler that will add a Token to the
// TokenHandlerResult, for which the Token.Value is set to an int8-representation
// of the read Rune.
// TODO allow other Go types for oct and hex too.
func MakeInt8Token(toktype interface{}, handler TokenHandler) TokenHandler {
return makeStrconvToken(toktype, handler,
@ -1051,6 +1087,9 @@ func MakeInt8Token(toktype interface{}, handler TokenHandler) TokenHandler {
})
}
// MakeInt16Token creates a TokenHandler that will add a Token to the
// TokenHandlerResult, for which the Token.Value is set to an int16-representation
// of the read Rune.
func MakeInt16Token(toktype interface{}, handler TokenHandler) TokenHandler {
return makeStrconvToken(toktype, handler,
func(s string) (interface{}, error) {
@ -1062,6 +1101,9 @@ func MakeInt16Token(toktype interface{}, handler TokenHandler) TokenHandler {
})
}
// MakeInt32Token creates a TokenHandler that will add a Token to the
// TokenHandlerResult, for which the Token.Value is set to an int32-representation
// of the read Rune.
func MakeInt32Token(toktype interface{}, handler TokenHandler) TokenHandler {
return makeStrconvToken(toktype, handler,
func(s string) (interface{}, error) {
@ -1073,6 +1115,9 @@ func MakeInt32Token(toktype interface{}, handler TokenHandler) TokenHandler {
})
}
// MakeInt64Token creates a TokenHandler that will add a Token to the
// TokenHandlerResult, for which the Token.Value is set to an int64-representation
// of the read Rune.
func MakeInt64Token(toktype interface{}, handler TokenHandler) TokenHandler {
return makeStrconvToken(toktype, handler,
func(s string) (interface{}, error) {
@ -1084,6 +1129,9 @@ func MakeInt64Token(toktype interface{}, handler TokenHandler) TokenHandler {
})
}
// MakeUintToken creates a TokenHandler that will add a Token to the
// TokenHandlerResult, for which the Token.Value is set to an uint-representation
// of the read Rune.
func MakeUintToken(toktype interface{}, handler TokenHandler) TokenHandler {
return makeStrconvToken(toktype, handler,
func(s string) (interface{}, error) {
@ -1095,6 +1143,9 @@ func MakeUintToken(toktype interface{}, handler TokenHandler) TokenHandler {
})
}
// MakeUint8Token creates a TokenHandler that will add a Token to the
// TokenHandlerResult, for which the Token.Value is set to an uint8-representation
// of the read Rune.
// TODO allow other Go types for oct and hex too.
func MakeUint8Token(toktype interface{}, handler TokenHandler) TokenHandler {
return makeStrconvToken(toktype, handler,
@ -1107,6 +1158,9 @@ func MakeUint8Token(toktype interface{}, handler TokenHandler) TokenHandler {
})
}
// MakeUint16Token creates a TokenHandler that will add a Token to the
// TokenHandlerResult, for which the Token.Value is set to an uint16-representation
// of the read Rune.
func MakeUint16Token(toktype interface{}, handler TokenHandler) TokenHandler {
return makeStrconvToken(toktype, handler,
func(s string) (interface{}, error) {
@ -1118,6 +1172,9 @@ func MakeUint16Token(toktype interface{}, handler TokenHandler) TokenHandler {
})
}
// MakeUint32Token creates a TokenHandler that will add a Token to the
// TokenHandlerResult, for which the Token.Value is set to an uint32-representation
// of the read Rune.
func MakeUint32Token(toktype interface{}, handler TokenHandler) TokenHandler {
return makeStrconvToken(toktype, handler,
func(s string) (interface{}, error) {
@ -1129,6 +1186,9 @@ func MakeUint32Token(toktype interface{}, handler TokenHandler) TokenHandler {
})
}
// MakeUint64Token creates a TokenHandler that will add a Token to the
// TokenHandlerResult, for which the Token.Value is set to an uint64-representation
// of the read Rune.
func MakeUint64Token(toktype interface{}, handler TokenHandler) TokenHandler {
return makeStrconvToken(toktype, handler,
func(s string) (interface{}, error) {
@ -1140,6 +1200,9 @@ func MakeUint64Token(toktype interface{}, handler TokenHandler) TokenHandler {
})
}
// MakeFloat32Token creates a TokenHandler that will add a Token to the
// TokenHandlerResult, for which the Token.Value is set to an float32-representation
// of the read Rune.
func MakeFloat32Token(toktype interface{}, handler TokenHandler) TokenHandler {
return makeStrconvToken(toktype, handler,
func(s string) (interface{}, error) {
@ -1151,6 +1214,9 @@ func MakeFloat32Token(toktype interface{}, handler TokenHandler) TokenHandler {
})
}
// MakeFloat64Token creates a TokenHandler that will add a Token to the
// TokenHandlerResult, for which the Token.Value is set to an float64-representation
// of the read Rune.
func MakeFloat64Token(toktype interface{}, handler TokenHandler) TokenHandler {
return makeStrconvToken(toktype, handler,
func(s string) (interface{}, error) {
@ -1162,6 +1228,9 @@ func MakeFloat64Token(toktype interface{}, handler TokenHandler) TokenHandler {
})
}
// MakeBooleanToken creates a TokenHandler that will add a Token to the
// TokenHandlerResult, for which the Token.Value is set to an bool-representation
// of the read Rune.
func MakeBooleanToken(toktype interface{}, handler TokenHandler) TokenHandler {
return makeStrconvToken(toktype, handler,
func(s string) (interface{}, error) {
@ -1191,12 +1260,16 @@ func makeStrconvToken(toktype interface{}, handler TokenHandler, convert func(s
})
}
// MakeTokenByCallback creates a TokenHandler that will add a Token to the
// TokenHandlerResult, for which the Token is to be generated by the provided
// callback function. The function gets the current TokenAPI as its input and
// must return a complete Token.
func MakeTokenByCallback(handler TokenHandler, callback func(t *TokenAPI) *Token) TokenHandler {
return func(t *TokenAPI) bool {
fork := t.Fork()
if handler(fork) {
t.Result().AddToken(callback(fork))
fork.Merge()
child := t.Fork()
if handler(child) {
t.Result().AddToken(callback(child))
child.Merge()
return true
}
return false

View File

@ -259,7 +259,11 @@ func TestIPv6Atoms(t *testing.T) {
{"1:2:3:4::5:6:7:8:9", a.IPv6, true, "1:2:3:4::5:6"},
{"a:b::ffff:0:1111", a.IPv6, true, "a:b::ffff:0:1111"},
{"000a:000b:0000:000:00:ffff:0000:1111", a.IPv6, true, "a:b::ffff:0:1111"},
{"000a:0000:0000:001:00:0:ffff:1111", a.IPv6, true, "a::1:0:0:ffff:1111"},
{"0000:0000:0000:001:00:0:ffff:1111", a.IPv6, true, "::1:0:0:ffff:1111"},
{"aaaa:bbbb:cccc:dddd:eeee:ffff:0000:1111", a.IPv6, true, "aaaa:bbbb:cccc:dddd:eeee:ffff:0:1111"},
{"gggg:bbbb:cccc:dddd:eeee:ffff:0000:1111", a.IPv6, false, ""},
{"ffff::gggg:eeee:ffff:0000:1111", a.IPv6, true, "ffff::"},
{"0", a.IPv6CIDRMask, true, "0"},
{"128", a.IPv6CIDRMask, true, "128"},
{"129", a.IPv6CIDRMask, false, ""},

View File

@ -6,7 +6,7 @@ package parsekit
// method.
type Tokenizer struct {
parser *Parser
result *TokenResult
result *TokenHandlerResult
}
// TokenHandler is the function type that is involved in turning a low level
@ -41,9 +41,9 @@ func NewTokenizer(tokenHandler TokenHandler, expects string) *Tokenizer {
}
// Execute feeds the input to the wrapped TokenHandler function.
// It returns the TokenHandler's TokenResult. When an error occurred
// It returns the TokenHandler's TokenHandlerResult. When an error occurred
// during parsing, the error will be set, nil otherwise.
func (t *Tokenizer) Execute(input string) (*TokenResult, *Error) {
func (t *Tokenizer) Execute(input string) (*TokenHandlerResult, *Error) {
err := t.parser.Execute(input)
return t.result, err
}

View File

@ -33,7 +33,7 @@ func ExampleTokenizer_Execute() {
"10.0.300.1/24",
"not an IPv4 CIDR",
} {
// Execute returns a TokenResult and an error, which is nil on success.
// Execute returns a TokenHandlerResult and an error, which is nil on success.
result, err := tokenizer.Execute(input)
if err == nil {
@ -43,9 +43,9 @@ func ExampleTokenizer_Execute() {
}
}
// Output:
// Result: ip(string:0.0.0.0) mask(int8:0)
// Result: ip(string:192.168.0.1) mask(int8:24)
// Result: ip(string:255.255.255.255) mask(int8:32)
// Result: ip("0.0.0.0", value = (string)0.0.0.0) mask("0", value = (int8)0)
// Result: ip("192.168.0.1", value = (string)192.168.0.1) mask("24", value = (int8)24)
// Result: ip("255.255.255.255", value = (string)255.255.255.255) mask("32", value = (int8)32)
// Error: unexpected input (expected cidr)
// Error: unexpected input (expected cidr)
}
@ -173,11 +173,11 @@ func TestCallingAcceptAfterNextRune_AcceptsRuneAndMovesReadOffsetForward(t *test
i := mkInput()
r, _ := i.NextRune()
AssertEqual(t, 'T', r, "result from 1st call to NextRune()")
AssertTrue(t, i.result.lastRune != nil, "Input.lastRune after NextRune() is not nil")
AssertTrue(t, i.result.lastRune != nil, "TokenAPI.result.lastRune after NextRune() is not nil")
i.Accept()
AssertTrue(t, i.result.lastRune == nil, "Input.lastRune after Accept() is nil")
AssertEqual(t, 1, i.offset, "Input.offset")
AssertEqual(t, 'T', i.reader.buffer[0], "Input.buffer[0]")
AssertTrue(t, i.result.lastRune == nil, "TokenAPI.result.lastRune after Accept() is nil")
AssertEqual(t, 1, i.result.offset, "TokenAPI.result.offset")
AssertEqual(t, 'T', i.reader.buffer[0], "TokenAPI.reader.buffer[0]")
r, _ = i.NextRune()
AssertEqual(t, 'e', r, "result from 2nd call to NextRune()")
}
@ -194,21 +194,20 @@ func TestCallingMultipleAccepts_FillsInputWithData(t *testing.T) {
func TestAccept_UpdatesCursor(t *testing.T) {
i := NewTokenAPI(strings.NewReader("input\r\nwith\r\nnewlines"))
AssertEqual(t, "start of file", i.cursor.String(), "cursor 1")
AssertEqual(t, "start of file", i.result.cursor.String(), "cursor 1")
for j := 0; j < 6; j++ { // read "input\r", cursor end up at "\n"
i.NextRune()
i.Accept()
}
AssertEqual(t, "line 1, column 7", i.cursor.String(), "cursor 2")
AssertEqual(t, "line 1, column 7", i.result.cursor.String(), "cursor 2")
i.NextRune() // read "\n", cursor ends up at start of new line
i.Accept()
AssertEqual(t, "line 2, column 1", i.cursor.String(), "cursor 3")
AssertEqual(t, "line 2, column 1", i.result.cursor.String(), "cursor 3")
for j := 0; j < 10; j++ { // read "with\r\nnewl", cursor end up at "i"
i.NextRune()
i.Accept()
}
AssertEqual(t, "line 3, column 5", i.cursor.String(), "cursor 4")
AssertEqual(t, *i.cursor, i.Cursor(), "i.Cursor()")
AssertEqual(t, "line 3, column 5", i.result.cursor.String(), "cursor 4")
}
func TestFork_CreatesForkOfInputAtSameCursorPosition(t *testing.T) {
@ -221,20 +220,20 @@ func TestFork_CreatesForkOfInputAtSameCursorPosition(t *testing.T) {
f := i.Fork()
AssertEqual(t, f, i.child, "Input.child (must be f)")
AssertEqual(t, i, f.parent, "Input.parent (must be i)")
AssertEqual(t, 1, i.cursor.Byte, "i.child.cursor.Byte")
AssertEqual(t, 1, i.child.cursor.Byte, "i.child.cursor.Byte")
AssertEqual(t, 1, i.result.cursor.Byte, "i.child.cursor.Byte")
AssertEqual(t, 1, i.child.result.cursor.Byte, "i.child.cursor.Byte")
// Accept two runes via fork.
f.NextRune()
f.Accept() // e
f.NextRune()
f.Accept() // s
AssertEqual(t, "es", f.Result().String(), "result runes in fork")
AssertEqual(t, 1, i.cursor.Byte, "i.child.cursor.Byte")
AssertEqual(t, 3, i.child.cursor.Byte, "i.child.cursor.Byte")
AssertEqual(t, 1, i.result.cursor.Byte, "i.child.cursor.Byte")
AssertEqual(t, 3, i.child.result.cursor.Byte, "i.child.cursor.Byte")
// Merge fork back into parent
f.Merge()
AssertEqual(t, "Tes", i.Result().String(), "result runes in parent Input after Merge()")
AssertEqual(t, 3, i.cursor.Byte, "i.child.cursor.Byte")
AssertEqual(t, 3, i.result.cursor.Byte, "i.child.cursor.Byte")
}
func TestGivenForkedChildWhichAcceptedRune_AfterMerging_RuneEndsUpInParentResult(t *testing.T) {
@ -248,25 +247,25 @@ func TestGivenForkedChildWhichAcceptedRune_AfterMerging_RuneEndsUpInParentResult
f2.NextRune()
f2.Accept()
AssertEqual(t, "T", i.Result().String(), "i.Result().String()")
AssertEqual(t, 1, i.offset, "i.offset")
AssertEqual(t, 1, i.result.offset, "i.offset A")
AssertEqual(t, "e", f1.Result().String(), "f1.Result().String()")
AssertEqual(t, 2, f1.offset, "f1.offset")
AssertEqual(t, 2, f1.result.offset, "f1.offset A")
AssertEqual(t, "s", f2.Result().String(), "f2.Result().String()")
AssertEqual(t, 3, f2.offset, "f2.offset")
AssertEqual(t, 3, f2.result.offset, "f2.offset A")
f2.Merge()
AssertEqual(t, "T", i.Result().String(), "i.Result().String()")
AssertEqual(t, 1, i.offset, "i.offset")
AssertEqual(t, 1, i.result.offset, "i.offset B")
AssertEqual(t, "es", f1.Result().String(), "f1.Result().String()")
AssertEqual(t, 3, f1.offset, "f1.offset")
AssertEqual(t, 3, f1.result.offset, "f1.offset B")
AssertEqual(t, "", f2.Result().String(), "f2.Result().String()")
AssertEqual(t, 3, f2.offset, "f2.offset")
AssertEqual(t, 3, f2.result.offset, "f2.offset B")
f1.Merge()
AssertEqual(t, "Tes", i.Result().String(), "i.Result().String()")
AssertEqual(t, 3, i.offset, "i.offset")
AssertEqual(t, 3, i.result.offset, "i.offset C")
AssertEqual(t, "", f1.Result().String(), "f1.Result().String()")
AssertEqual(t, 3, f1.offset, "f1.offset")
AssertEqual(t, 3, f1.result.offset, "f1.offset C")
AssertEqual(t, "", f2.Result().String(), "f2.Result().String()")
AssertEqual(t, 3, f2.offset, "f2.offset")
AssertEqual(t, 3, f2.result.offset, "f2.offset C")
}
func TestWhenCallingNextruneAtEndOfFile_EOFIsReturned(t *testing.T) {

View File

@ -1,136 +0,0 @@
package parsekit
import (
"fmt"
"strings"
)
// Result holds results as produced by a TokenHandler.
type TokenResult struct {
lastRune *runeInfo // Information about the last rune read using NextRune()
runes []rune
tokens []*Token
}
type runeInfo struct {
r rune
err error
}
// Token defines a lexical token as produced by TokenHandlers.
type Token struct {
Type interface{} // token type, can be any type that a parser author sees fit
Runes []rune // the runes that make up the token
Value interface{} // an optional value of any type
}
// newTokenResult initializes an empty result struct.
func newTokenResult() *TokenResult {
return &TokenResult{
runes: []rune{},
tokens: []*Token{},
}
}
// ClearRunes clears the runes in the TokenResult.
func (r *TokenResult) ClearRunes() {
r.runes = []rune{}
}
// SetRunes replaces the Runes from the TokenResult with the provided input.
func (r *TokenResult) SetRunes(s interface{}) {
r.ClearRunes()
r.addRunes(s)
}
// AddRunes is used to add runes to the TokenResult.
func (r *TokenResult) AddRunes(set ...interface{}) {
r.addRunes(set...)
}
// AddRunes is used to add runes to the TokenResult.
func (r *TokenResult) addRunes(set ...interface{}) {
for _, s := range set {
switch s := s.(type) {
case string:
r.runes = append(r.runes, []rune(s)...)
case []rune:
r.runes = append(r.runes, s...)
case rune:
r.runes = append(r.runes, s)
default:
callerPanic(2, "parsekit.TokenResult.AddRunes(): unsupported type '%T' used at {caller}", s)
}
}
}
// Runes retrieves the Runes from the TokenResult.
func (r *TokenResult) Runes() []rune {
return r.runes
}
// Rune retrieve a single rune from the TokenResult at the specified index.
func (r *TokenResult) Rune(idx int) rune {
return r.runes[idx]
}
// String returns the Runes from the TokenResult as a string.
func (r *TokenResult) String() string {
return string(r.runes)
}
// ClearTokens clears the tokens in the TokenResult.
func (r *TokenResult) ClearTokens() {
r.tokens = []*Token{}
}
// SetTokens replaces the Tokens from the TokenResult with the provided input.
func (r *TokenResult) SetTokens(tokens []*Token) {
r.ClearTokens()
for _, t := range tokens {
r.AddToken(t)
}
}
// AddToken is used to add a Token to the TokenResult.
func (r *TokenResult) AddToken(t *Token) {
r.tokens = append(r.tokens, t)
}
// SliceOfTokens is an alias for []*Token type. The method Tokens() returns
// this type. A String() method is defined for it, to make it easy to
// format the tokens as a string for testing / debugging purposes.
type SliceOfTokens []*Token
func (ts SliceOfTokens) String() string {
parts := make([]string, len(ts))
for i, t := range ts {
str := fmt.Sprintf("%v(%T:%v)", t.Type, t.Value, t.Value)
parts[i] = str
}
return strings.Join(parts, " ")
}
// Tokens retrieves the Tokens from the TokenResult.
func (r *TokenResult) Tokens() SliceOfTokens {
return r.tokens
}
// Token retrieves a single Token from the TokenResult at the specified index.
func (r *TokenResult) Token(idx int) *Token {
return r.tokens[idx]
}
// Values retrieves a slice containing only the Values for the TokenResult Tokens.
func (r *TokenResult) Values() []interface{} {
values := make([]interface{}, len(r.tokens))
for i, tok := range r.tokens {
values[i] = tok.Value
}
return values
}
// Value retrieves a single Value from the TokenResult Token at the specified index.
func (r *TokenResult) Value(idx int) interface{} {
return r.tokens[idx].Value
}

View File

@ -1,10 +1,43 @@
package parsekit
import (
"fmt"
"strings"
"testing"
)
func ExampleToken() {
t0 := Token{
Runes: []rune("10.1.2.3"),
}
t1 := Token{
Runes: []rune("two hundred and twenty four"),
Type: "Number",
Value: 224,
}
const TName = 1
t2 := Token{
Runes: []rune("John"),
Type: TName,
}
t3 := Token{
Runes: []rune("The answer"),
Value: 42,
}
fmt.Printf("%s\n%s\n%s\n%s\n", t0, t1, t2, t3)
// Output:
// ("10.1.2.3")
// Number("two hundred and twenty four", value = (int)224)
// 1("John")
// ("The answer", value = (int)42)
}
func TestSetResult_AcceptsVariousTypesAsInput(t *testing.T) {
i := NewTokenAPI(strings.NewReader("Testing"))
i.Result().SetRunes("string")
@ -22,6 +55,6 @@ func TestSetResult_PanicsOnUnhandledInput(t *testing.T) {
i.Result().SetRunes(1234567)
},
Regexp: true,
Expect: `parsekit\.TokenResult\.AddRunes\(\): unsupported type 'int' used at /.*/tokenresult_test.go:\d+`,
Expect: `parsekit\.TokenHandlerResult\.AddRunes\(\): unsupported type 'int' used at /.*/tokenresult_test.go:\d+`,
})
}