go-parsekit/tokenize/tokenize.go

46 lines
1.6 KiB
Go

// Package tokenize provides tooling to build a tokenizer in
// parser/combinator-style, used to feed data to the parser.
package tokenize
import (
"fmt"
)
// Func is the function signature as returned by New: a function that takes
// any supported type of input, executes a tokenizer run and returns a
// Result struct (possibly nil) and an error (possibly nil).
type Func func(input interface{}) (*Result, error)
// New instantiates a new tokenizer.
//
// The tokenizer is a tokenizing state machine, in which tokenize.Handler
// functions are used to move the state machine forward during tokenizing.
// Using the New function, you can wrap a tokenize.Handler in a simple way,
// making it possible to feed some input to the handler and retrieve the
// tokenizing results.
//
// The startHandler argument points the tokenizer to the tokenize.Handler function
// that must be executed at the start of the tokenizing process. From there on
// other tokenize.Handler functions can be invoked recursively to implement the
// tokenizing process.
//
// This function returns a function that can be invoked to run the tokenizer
// against the provided input data. For an overview of allowed inputs, take a
// look at the documentation for parsekit.read.New().
func New(tokenHandler Handler) Func {
return func(input interface{}) (*Result, error) {
tokenAPI := NewAPI(input)
ok := tokenHandler(tokenAPI)
if !ok {
err := fmt.Errorf("mismatch at %s", tokenAPI.Input.Cursor())
return nil, err
}
result := &Result{
Bytes: tokenAPI.Output.Bytes(),
Tokens: tokenAPI.Output.Tokens(),
}
return result, nil
}
}