package parsekit import ( "fmt" "io" ) // TokenAPI wraps a parsekit.Reader and its purpose is to retrieve input data and // to report back results. For easy lookahead support, a forking strategy is // provided. // // BASIC OPERATION: // // To retrieve the next rune from the TokenAPI, call the NextRune() method. // // When the rune is to be accepted as input, call the method Accept(). The rune // is then added to the result buffer of the TokenAPI struct. // It is mandatory to call Accept() after retrieving a rune, before calling // NextRune() again. Failing to do so will result in a panic. // // By invoking NextRune() + Accept() multiple times, the result buffer is extended // with as many runes as needed. // // FORKING OPERATION FOR EASY LOOKEAHEAD SUPPORT: // // Sometimes, we must be able to perform a lookahead, which might either // succeed or fail. In case of a failing lookahead, the state of the TokenAPI must be // brought back to the original state, so we can try a different route. // // The way in which this is supported, is by forking a TokenAPI struct by calling // Fork(). This will return a forked child TokenAPI, with an empty result buffer, // but using the same input cursor position as the forked parent. // // After forking, the same interface as described for BASIC OPERATION can be // used to fill the result buffer. When the lookahead was successful, then // Merge() can be called on the forked child to append the child's result // buffer to the parent's result buffer, and to move the input cursor position // to that of the child. // // When the lookahead was unsuccessful, then the forked child TokenAPI can simply // be discarded. The parent TokenAPI was never modified, so it can safely be used // as if the lookahead never happened. // // Note: // Many tokenizers/parsers take a different approach on lookaheads by using // peeks and by moving the input cursor position back and forth, or by putting // read input back on the input stream. That often leads to code that is // efficient, however, in my opinion, not very untuitive to read. type TokenAPI struct { reader *Reader cursor *Cursor // current read cursor position, rel. to the input start offset int // current rune offset rel. to the Reader's sliding window result *Result // results as produced by a TokenHandler (runes, Tokens) root *TokenAPI // the root TokenAPI parent *TokenAPI // parent TokenAPI in case this TokenAPI is a fork child child *TokenAPI // child TokenAPI in case this TokenAPI is a fork parent } // NewTokenAPI initializes a new TokenAPI struct, wrapped around the provided io.Reader. func NewTokenAPI(r io.Reader) *TokenAPI { input := &TokenAPI{ reader: NewReader(r), cursor: &Cursor{}, result: NewResult(), } input.root = input return input } // NextRune returns the rune at the current read offset. // // When an invalid UTF8 rune is encountered on the input, it is replaced with // the utf.RuneError rune. It's up to the caller to handle this as an error // when needed. // // After reading a rune it must be Accept()-ed to move the read cursor forward // to the next rune. Doing so is mandatory. When doing a second call to NextRune() // without explicitly accepting, this method will panic. func (i *TokenAPI) NextRune() (rune, error) { if i.result.lastRune != nil { caller, linepos := getCaller(1) panic(fmt.Sprintf("parsekit.TokenAPI.NextRune(): NextRune() called without a prior call "+ "to Accept() from %s at %s", caller, linepos)) } i.detachChilds() readRune, err := i.reader.RuneAt(i.offset) i.result.lastRune = &runeInfo{r: readRune, err: err} return readRune, err } // Accept the last rune as read by NextRune() into the result buffer and move // the cursor forward. // // It is not allowed to call Accept() when the previous call to NextRune() // returned an error. Calling Accept() in such case will result in a panic. func (i *TokenAPI) Accept() { if i.result.lastRune == nil { caller, linepos := getCaller(1) panic(fmt.Sprintf( "parsekit.TokenAPI.Accept(): Accept() called without first "+ "calling NextRune() from %s at %s", caller, linepos)) } else if i.result.lastRune.err != nil { caller, linepos := getCaller(1) panic(fmt.Sprintf( "parsekit.TokenAPI.Accept(): Accept() called while the previous "+ "call to NextRune() failed from %s at %s", caller, linepos)) } i.result.runes = append(i.result.runes, i.result.lastRune.r) i.cursor.move(fmt.Sprintf("%c", i.result.lastRune.r)) i.offset++ i.result.lastRune = nil } // Fork forks off a child of the TokenAPI struct. It will reuse the same Reader and // read cursor position, but for the rest this is a fresh TokenAPI. func (i *TokenAPI) Fork() *TokenAPI { i.detachChilds() // Create the new fork. child := &TokenAPI{ reader: i.reader, cursor: &Cursor{}, offset: i.offset, root: i.root, parent: i, } child.result = NewResult() *child.cursor = *i.cursor i.child = child i.result.lastRune = nil return child } // Merge adds the data of the forked child TokenAPI that Merge() is called on to the // data of its parent (results and read cursor position). func (i *TokenAPI) Merge() { if i.parent == nil { panic("parsekit.TokenAPI.Merge(): Cannot call Merge() on a non-forked TokenAPI") } i.parent.result.runes = append(i.parent.result.runes, i.result.runes...) i.parent.result.tokens = append(i.parent.result.tokens, i.result.tokens...) i.parent.offset = i.offset i.parent.cursor = i.cursor i.detachChilds() i.result = NewResult() } // Result returns the result data for the TokenAPI. The returned struct // can be used to retrieve and modify the result data. func (i *TokenAPI) Result() *Result { return i.result } // Cursor retrieves the current read cursor data. // TODO make this and offset part of Result struct? func (i *TokenAPI) Cursor() Cursor { return *i.cursor } // FlushReaderBuffer delegates to the Flush() method of the contained // parsekit.TokenAPI.Reader. It flushes the provided number of runes from the // reader cache. func (i *TokenAPI) FlushReaderBuffer(numberOfRunes int) { if i != i.root { panic("parsekit.input.TokenAPI.FlushReaderBuffer(): Flushbuffer() can only be called on the root TokenAPI, not on a forked child") } i.detachChilds() i.reader.Flush(numberOfRunes) i.offset = 0 } func (i *TokenAPI) detachChilds() { if i.child != nil { i.child.detachChildsRecurse() i.child = nil } } func (i *TokenAPI) detachChildsRecurse() { if i.child != nil { i.child.detachChildsRecurse() } i.child = nil i.parent = nil }