package parsekit import ( "fmt" ) // TokenHandler is the function type that is involved in turning a low level // stream of UTF8 runes into parsing tokens. Its purpose is to check if input // data matches some kind of pattern and to report back the match. // // A TokenHandler is to be used in conjunction with parsekit.P.On() or // parsekit.Matcher(). // // A TokenHandler function gets a TokenAPI as its input and returns a boolean to // indicate whether or not it found a match on the input. The TokenAPI is used // for retrieving input data to match against and for reporting back results. type TokenHandler func(t *TokenAPI) bool // TokenAPI is used by TokenHandler functions to retrieve runes from the // input to match against and to report back results. // // Basic operation: // // To retrieve the next rune from the input, the TokenHandler function can call // the TokenAPI.NextRune() method. // // The TokenHandler function can then evaluate the retrieved rune and either // accept of skip the rune. When accepting it using TokenAPI.Accept(), the rune // is added to the resulting output of the TokenAPI. When using TokenAPI.Skip(), // the rune will not be added to the output. It is mandatory for a TokenHandler // to call either Accept() or Skip() after retrieving a rune, before calling // NextRune() again. // // Eventually, the TokenHandler function must return a boolean value, indicating // whether or not a match was found. When true, then the calling code will // use the runes that were accepted into the TokenAPI's resulting output. // // Forking operation for easy lookahead support: // // Sometimes, a TokenHandler function must be able to perform a lookahead, which // might either succeed or fail. In case of a failing lookahead, the state // of the TokenAPI must be brought back to the original state. // // The way in which this is supported, is by forking a TokenAPI by calling // TokenAPI.Fork(). This will return a child TokenAPI, with an empty // output buffer, but using the same input cursor position as the forked parent. // // The TokenHandler function can then use the same interface as described for // normal operation to retrieve runes from the input and to fill the resulting // output. When the TokenHandler function decides that the lookahead was successful, // then the method TokenAPI.Merge() can be called on the forked child to // append the resulting output from the child to the parent's resulting output, // and to update the parent input cursor position to that of the child. // // When the TokenHandler function decides that the lookahead was unsuccessful, // then it can simply discard the forked child. The parent TokenAPI was never // modified, so a new match can be safely started using that parent, as if the // lookahead never happened. type TokenAPI struct { p *ParseAPI // parser state, used to retrieve input data to match against (TODO should be tiny interface) inputOffset int // the byte offset into the input input []rune // a slice of runes that represents all retrieved input runes for the Matcher output []rune // a slice of runes that represents the accepted output runes for the Matcher currRune *runeInfo // hold information for the last rune that was read from the input parent *TokenAPI // the parent MatchDialog, in case this one was forked } // runeInfo describes a single rune and its metadata. type runeInfo struct { Rune rune // an UTF8 rune ByteSize int // the number of bytes in the rune OK bool // false when the rune represents an invalid UTF8 rune or EOF } // NextRune retrieves the next rune from the input. // // It returns the rune and a boolean. The boolean will be false in case an // invalid UTF8 rune or the end of the file was encountered. // // After using NextRune() to retrieve a rune, Accept() or Skip() can be called // to respectively add the rune to the TokenAPI's resulting output or to // fully ignore it. This way, a TokenHandler has full control over what runes are // significant for the resulting output of that TokenHandler. // // After using NextRune(), this method can not be reinvoked, until the last read // rune is explicitly accepted or skipped as described above. func (t *TokenAPI) NextRune() (rune, bool) { if t.currRune != nil { panic("internal Matcher error: NextRune() was called without accepting or skipping the previously read rune") } r, w, ok := t.p.peek(t.inputOffset) t.currRune = &runeInfo{r, w, ok} if ok { t.input = append(t.input, r) } return r, ok } // Fork splits off a child TokenAPI, containing the same input cursor position // as the parent TokenAPI, but with all other data in a fresh state. // // By forking, a TokenHandler function can freely work with a TokenAPI, without // affecting the parent TokenAPI. This is for example useful when the // TokenHandler function must perform some form of lookahead. // // When a successful match was found, the TokenHandler function can call // TokenAPI.Merge() on the forked child to have the resulting output added // to the parent TokenAPI. // // When no match was found, the forked child can simply be discarded. // // Example case: A TokenHandler checks for a sequence of runes: 'a', 'b', 'c', 'd'. // This is done in 4 steps and only after finishing all steps, the TokenHandler // function can confirm a successful match. The TokenHandler function for this // case could look like this (yes, it's naive, but it shows the point): // TODO make proper tested example // // func MatchAbcd(t *TokenAPI) bool { // child := t.Fork() // fork to keep m from input untouched // for _, letter := []rune {'a', 'b', 'c', 'd'} { // if r, ok := t.NextRune(); !ok || r != letter { // return false // report mismatch, t is left untouched // } // child.Accept() // add rune to child output // } // child.Merge() // we have a match, add resulting output to parent // return true // and report the successful match // } func (t *TokenAPI) Fork() *TokenAPI { return &TokenAPI{ p: t.p, inputOffset: t.inputOffset, parent: t, } } // Accept will add the last rune as read by TokenAPI.NextRune() to the resulting // output of the TokenAPI. func (t *TokenAPI) Accept() { t.checkAllowedCall("Accept()") t.output = append(t.output, t.currRune.Rune) t.inputOffset += t.currRune.ByteSize t.currRune = nil } // Skip will ignore the last rune as read by NextRune(). func (t *TokenAPI) Skip() { t.checkAllowedCall("Skip()") t.inputOffset += t.currRune.ByteSize t.currRune = nil } func (t *TokenAPI) checkAllowedCall(name string) { if t.currRune == nil { panic(fmt.Sprintf("internal Matcher error: %s was called without a prior call to NextRune()", name)) } if !t.currRune.OK { panic(fmt.Sprintf("internal Matcher error: %s was called, but prior call to NextRune() did not return OK (EOF or invalid rune)", name)) } } // Merge merges the resulting output from a forked child TokenAPI back into // its parent: The runes that are accepted in the child are added to the parent // runes and the parent's input cursor position is advanced to the child's // cursor position. // // After the merge, the child TokenAPI is reset so it can immediately be // reused for performing another match (all data are cleared, except for the // input offset which is kept at its current position). func (t *TokenAPI) Merge() bool { if t.parent == nil { panic("internal parser error: Cannot call Merge a a non-forked MatchDialog") } t.parent.input = append(t.parent.input, t.input...) t.parent.output = append(t.parent.output, t.output...) t.parent.inputOffset = t.inputOffset t.ClearOutput() t.ClearInput() return true } // ClearOutput clears the resulting output for the TokenAPI, but it keeps // the input and input offset as-is. func (t *TokenAPI) ClearOutput() { t.output = []rune{} } // ClearInput clears the input for the TokenAPI, but it keeps the output // and input offset as-is. func (t *TokenAPI) ClearInput() { t.input = []rune{} }