go-parsekit/tokenize/tokenize.go

56 lines
1.7 KiB
Go

// Package tokenize provides tooling to build a tokenizer in
// parser/combinator-style, used to feed data to the parser.
package tokenize
import (
"fmt"
)
// Func is the function signature as returned by New: a function that takes
// any supported type of input, executes a tokenizer run and returns a
// Result struct (possibly nil) and an error (possibly nil).
type Func func(input interface{}) (*Result, error)
// Result holds the runes and tokens as produced by the tokenizer.
type Result struct {
Tokens []Token
Runes []rune
}
func (result *Result) String() string {
return string(result.Runes)
}
// New instantiates a new tokenizer.
//
// The tokenizer is a tokenizing state machine, in which tokenize.Handler
// functions are used to move the state machine forward during tokenizing.
// Using the New function, you can wrap a tokenize.Handler in a simple way,
// making it possible to feed some input to the handler and retrieve the
// tokenizing results.
//
// The startHandler argument points the tokenizer to the tokenize.Handler function
// that must be executed at the start of the tokenizing process. From there on
// other tokenize.Handler functions can be invoked recursively to implement the
// tokenizing process.
//
// THis function returns a function that can be invoked to run the tokenizer
// against the provided input data. For an overview of allowed inputs, take a
// look at the documentation for parsekit.read.New().
func New(tokenHandler Handler) Func {
return func(input interface{}) (*Result, error) {
api := NewAPI(input)
ok := tokenHandler(api)
if !ok {
err := fmt.Errorf("mismatch at %s", Cursor{})
return nil, err
}
result := &Result{
Runes: api.Runes(),
Tokens: api.Tokens(),
}
return result, nil
}
}