Lowering the number of forks required.
This commit is contained in:
parent
99b0abc490
commit
bc9e718e47
|
@ -144,7 +144,7 @@ func (h *helloparser1) end(p *parse.API) {
|
|||
|
||||
h.greetee = strings.TrimSpace(h.greetee)
|
||||
if h.greetee == "" {
|
||||
p.Error("The name cannot be empty")
|
||||
p.SetError("The name cannot be empty")
|
||||
} else {
|
||||
p.Stop()
|
||||
}
|
||||
|
|
|
@ -82,27 +82,27 @@ func (h *helloparser2) Parse(input string) (string, error) {
|
|||
func (h *helloparser2) start(p *parse.API) {
|
||||
c, a, m := tokenize.C, tokenize.A, tokenize.M
|
||||
if !p.Accept(a.StrNoCase("hello")) {
|
||||
p.Error("the greeting is not being friendly")
|
||||
p.SetError("the greeting is not being friendly")
|
||||
return
|
||||
}
|
||||
if !p.Accept(c.Seq(c.Optional(a.Blanks), a.Comma, c.Optional(a.Blanks))) {
|
||||
p.Error("the greeting is not properly separated")
|
||||
p.SetError("the greeting is not properly separated")
|
||||
return
|
||||
}
|
||||
if p.Accept(m.TrimSpace(c.OneOrMore(a.AnyRune.Except(a.Excl)))) {
|
||||
h.greetee = p.Result.String()
|
||||
if h.greetee == "" {
|
||||
p.Error("the name cannot be empty")
|
||||
p.SetError("the name cannot be empty")
|
||||
return
|
||||
}
|
||||
} else {
|
||||
p.Error("the greeting is targeted at thin air")
|
||||
p.SetError("the greeting is targeted at thin air")
|
||||
return
|
||||
}
|
||||
if !p.Accept(a.Excl) {
|
||||
p.Error("the greeting is not loud enough")
|
||||
p.SetError("the greeting is not loud enough")
|
||||
} else if !p.Accept(a.EndOfFile) {
|
||||
p.Error("too much stuff going on after the closing '!'")
|
||||
p.SetError("too much stuff going on after the closing '!'")
|
||||
} else {
|
||||
p.Stop()
|
||||
}
|
||||
|
|
60
parse/api.go
60
parse/api.go
|
@ -30,13 +30,14 @@ type API struct {
|
|||
// On a successful peek, the results (data + tokens) are returned by the peek.
|
||||
// They are availablel (as with Accept()) through parse.API.Result.
|
||||
func (parseAPI *API) PeekWithResult(tokenHandler tokenize.Handler) bool {
|
||||
child, ok := parseAPI.invokeHandler("Peek", tokenHandler)
|
||||
_, ok := parseAPI.invokeTokenizeHandler("Peek", tokenHandler)
|
||||
tokenAPI := parseAPI.tokenAPI
|
||||
if ok {
|
||||
parseAPI.Result.Tokens = tokenAPI.Output.Tokens()
|
||||
parseAPI.Result.Runes = tokenAPI.Output.Runes()
|
||||
}
|
||||
tokenAPI.Dispose(child)
|
||||
tokenAPI.Input.Reset()
|
||||
tokenAPI.Output.Reset()
|
||||
return ok
|
||||
}
|
||||
|
||||
|
@ -48,11 +49,12 @@ func (parseAPI *API) PeekWithResult(tokenHandler tokenize.Handler) bool {
|
|||
// No results (data + tokens) are returned by Peek(). If want access to the data
|
||||
// through parse.API.Result, make use of PeekWithResult() instead.
|
||||
func (parseAPI *API) Peek(tokenHandler tokenize.Handler) bool {
|
||||
child, ok := parseAPI.invokeHandler("Peek", tokenHandler)
|
||||
t := parseAPI.tokenAPI
|
||||
_, ok := parseAPI.invokeTokenizeHandler("Peek", tokenHandler)
|
||||
tokenAPI := parseAPI.tokenAPI
|
||||
parseAPI.Result.Tokens = nil
|
||||
parseAPI.Result.Runes = nil
|
||||
t.Dispose(child)
|
||||
tokenAPI.Input.Reset()
|
||||
tokenAPI.Output.Reset()
|
||||
return ok
|
||||
}
|
||||
|
||||
|
@ -61,37 +63,42 @@ func (parseAPI *API) Peek(tokenHandler tokenize.Handler) bool {
|
|||
// forward to beyond the match that was found. Otherwise false will be
|
||||
// and the read cursor will stay at the same position.
|
||||
//
|
||||
// After calling this method, you can retrieve the results using the Result() method.
|
||||
// After calling this method, you can retrieve the results through the API.Result field.
|
||||
func (parseAPI *API) Accept(tokenHandler tokenize.Handler) bool {
|
||||
t := parseAPI.tokenAPI
|
||||
child, ok := parseAPI.invokeHandler("Accept", tokenHandler)
|
||||
tokenAPI := parseAPI.tokenAPI
|
||||
_, ok := parseAPI.invokeTokenizeHandler("Accept", tokenHandler)
|
||||
if ok {
|
||||
// Keep track of the results as produced by this child.
|
||||
parseAPI.Result.Tokens = t.Output.Tokens()
|
||||
parseAPI.Result.Runes = t.Output.Runes()
|
||||
parseAPI.Result.Tokens = tokenAPI.Output.Tokens()
|
||||
parseAPI.Result.Runes = tokenAPI.Output.Runes()
|
||||
|
||||
// Merge to the parent level.
|
||||
t.Merge(child)
|
||||
t.Dispose(child)
|
||||
// Now the results are stored, we can reset the results for the next handler.
|
||||
tokenAPI.Output.Reset()
|
||||
|
||||
// And flush the input reader buffer.
|
||||
t.Input.Flush()
|
||||
tokenAPI.Input.Flush()
|
||||
} else {
|
||||
t.Dispose(child)
|
||||
// No match, so reset the tokenize.API for the next handler.
|
||||
// This moves back the read cursor to the start and clears all results.
|
||||
tokenAPI.Input.Reset()
|
||||
tokenAPI.Output.Reset()
|
||||
}
|
||||
return ok
|
||||
}
|
||||
|
||||
func (parseAPI *API) invokeHandler(name string, tokenHandler tokenize.Handler) (int, bool) {
|
||||
// invokeTokenizeHandler forks the tokenize.API, and invokes the tokenize.Handler
|
||||
// in the context of the created child. The child is returned, so the caller
|
||||
// has full control over merging and disposing the child.
|
||||
func (parseAPI *API) invokeTokenizeHandler(name string, tokenHandler tokenize.Handler) (int, bool) {
|
||||
parseAPI.panicWhenStoppedOrInError(name)
|
||||
if tokenHandler == nil {
|
||||
callerPanic(name, "parsekit.parse.API.{name}(): {name}() called with nil tokenHandler argument at {caller}")
|
||||
}
|
||||
|
||||
child := parseAPI.tokenAPI.Fork()
|
||||
//child := parseAPI.tokenAPI.Fork()
|
||||
ok := tokenHandler(parseAPI.tokenAPI)
|
||||
|
||||
return child, ok
|
||||
return 0, ok
|
||||
}
|
||||
|
||||
// panicWhenStoppedOrInError will panic when the parser has produced an error
|
||||
|
@ -165,19 +172,18 @@ func (parseAPI *API) Stop() {
|
|||
parseAPI.stopped = true
|
||||
}
|
||||
|
||||
// Error sets the error message in the API.
|
||||
// SetError sets the error message in the API.
|
||||
//
|
||||
// After setting an error, no more calls to API methods are allowed.
|
||||
// Calling a method in this state will result in a panic.
|
||||
// TODO ... wait how do I read the error? I don't I guess, I just return it. Is Error() a good name or SetError() better for example?
|
||||
func (parseAPI *API) Error(format string, data ...interface{}) {
|
||||
// No call to p.panicWhenStoppedOrInError(), to allow a parser to
|
||||
// set a different error message when needed.
|
||||
// You can still call SetError() though, to set a different error message
|
||||
// if you feel the need to do so.
|
||||
func (parseAPI *API) SetError(format string, data ...interface{}) {
|
||||
message := fmt.Sprintf(format, data...)
|
||||
parseAPI.err = fmt.Errorf("%s at %s", message, parseAPI.tokenAPI.Input.Cursor())
|
||||
}
|
||||
|
||||
// ExpectEndOfFile can be used to check if the input is at end of file.
|
||||
// ExpectEndOfFile checks if the end of the input file has been reached.
|
||||
//
|
||||
// When it finds that the end of the file was indeed reached, then the parser
|
||||
// will be stopped through Stop(). Otherwise, the unexpected input is reported
|
||||
|
@ -209,11 +215,11 @@ func (parseAPI *API) Expected(expected string) {
|
|||
_, err := parseAPI.tokenAPI.Byte.Peek(0)
|
||||
switch {
|
||||
case err == nil:
|
||||
parseAPI.Error("unexpected input%s", fmtExpects(expected))
|
||||
parseAPI.SetError("unexpected input%s", fmtExpects(expected))
|
||||
case err == io.EOF:
|
||||
parseAPI.Error("unexpected end of file%s", fmtExpects(expected))
|
||||
parseAPI.SetError("unexpected end of file%s", fmtExpects(expected))
|
||||
default:
|
||||
parseAPI.Error("unexpected error '%s'%s", err, fmtExpects(expected))
|
||||
parseAPI.SetError("unexpected error '%s'%s", err, fmtExpects(expected))
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -41,6 +41,7 @@ func New(startHandler Handler) Func {
|
|||
// and try to make the best of it.
|
||||
api.ExpectEndOfFile()
|
||||
}
|
||||
|
||||
return api.err
|
||||
}
|
||||
}
|
||||
|
|
|
@ -245,7 +245,7 @@ func TestGivenParserWithErrorSet_HandlePanics(t *testing.T) {
|
|||
panic("This is not the handler you're looking for")
|
||||
}
|
||||
p := parse.New(func(p *parse.API) {
|
||||
p.Error("It ends here")
|
||||
p.SetError("It ends here")
|
||||
p.Handle(otherHandler)
|
||||
})
|
||||
parse.AssertPanic(t, parse.PanicT{
|
||||
|
|
|
@ -213,6 +213,7 @@ func (buf *Buffer) fill(minBytes int) {
|
|||
}
|
||||
|
||||
const defaultBufferSize = 1024
|
||||
const runeCacheSize = 128
|
||||
|
||||
// ErrTooLarge is passed to panic if memory cannot be allocated to store data in a buffer.
|
||||
var ErrTooLarge = errors.New("parsekit.read.Buffer: too large")
|
||||
|
|
|
@ -86,13 +86,14 @@ type API struct {
|
|||
}
|
||||
|
||||
type stackFrame struct {
|
||||
offset int // the read offset (relative to the start of the reader buffer) for this stack frame
|
||||
column int // the column at which the cursor is (0-indexed, relative to the start of the stack frame)
|
||||
line int // the line at which the cursor is (0-indexed, relative to the start of the stack frame)
|
||||
bytesStart int // the starting point in the API.bytes slice for runes produced by this stack level
|
||||
bytesEnd int // the end point in the API.bytes slice for runes produced by this stack level
|
||||
tokenStart int // the starting point in the API.tokens slice for tokens produced by this stack level
|
||||
tokenEnd int // the end point in the API.tokens slice for tokens produced by this stack level
|
||||
offsetLocal int // the read offset, relative to the start if this stack frame
|
||||
offset int // the read offset, relative to the start of the reader buffer
|
||||
column int // the column at which the cursor is (0-indexed, relative to the start of the stack frame)
|
||||
line int // the line at which the cursor is (0-indexed, relative to the start of the stack frame)
|
||||
bytesStart int // the starting point in the API.bytes slice for runes produced by this stack level
|
||||
bytesEnd int // the end point in the API.bytes slice for runes produced by this stack level
|
||||
tokenStart int // the starting point in the API.tokens slice for tokens produced by this stack level
|
||||
tokenEnd int // the end point in the API.tokens slice for tokens produced by this stack level
|
||||
|
||||
// TODO
|
||||
err error // can be used by a Handler to report a specific issue with the input
|
||||
|
@ -177,16 +178,7 @@ func (tokenAPI *API) Fork() int {
|
|||
// Once the child is no longer needed, it can be disposed of by using the
|
||||
// method Dispose(), which will return the tokenizer to the parent.
|
||||
func (tokenAPI *API) Merge(stackLevel int) {
|
||||
if stackLevel == 0 {
|
||||
callerPanic("Merge", "tokenize.API.{name}(): {name}() called at {caller} "+
|
||||
"on the top-level API stack level 0")
|
||||
}
|
||||
if stackLevel != tokenAPI.stackLevel {
|
||||
callerPanic("Merge", "tokenize.API.{name}(): {name}() called at {caller} "+
|
||||
"on API stack level %d, but the current stack level is %d "+
|
||||
"(forgot to Dispose() a forked child?)", stackLevel, tokenAPI.stackLevel)
|
||||
}
|
||||
|
||||
tokenAPI.checkStackLevelForMethod("Merge", stackLevel)
|
||||
parent := &tokenAPI.stackFrames[stackLevel-1]
|
||||
f := tokenAPI.stackFrame
|
||||
|
||||
|
@ -206,6 +198,7 @@ func (tokenAPI *API) Merge(stackLevel int) {
|
|||
f.tokenStart = f.tokenEnd
|
||||
|
||||
// Update the parent read offset.
|
||||
parent.offsetLocal = parent.offsetLocal + (f.offset - parent.offset)
|
||||
parent.offset = f.offset
|
||||
|
||||
// Update the parent cursor position.
|
||||
|
@ -221,24 +214,20 @@ func (tokenAPI *API) Merge(stackLevel int) {
|
|||
f.err = nil
|
||||
}
|
||||
|
||||
// Reset moves the read cursor back to the beginning for the currently active API child.
|
||||
// Aditionally, all output (bytes and tokens) that was emitted from the API child is
|
||||
// cleared as well.
|
||||
func (tokenAPI *API) Reset() {
|
||||
f := tokenAPI.stackFrame
|
||||
f.bytesEnd = f.bytesStart
|
||||
f.tokenEnd = f.tokenStart
|
||||
f.column = 0
|
||||
f.line = 0
|
||||
if tokenAPI.stackLevel == 0 {
|
||||
f.offset = 0
|
||||
} else {
|
||||
f.offset = tokenAPI.stackFrames[tokenAPI.stackLevel-1].offset
|
||||
}
|
||||
f.err = nil
|
||||
}
|
||||
|
||||
func (tokenAPI *API) Dispose(stackLevel int) {
|
||||
tokenAPI.checkStackLevelForMethod("Dispose", stackLevel)
|
||||
tokenAPI.stackLevel = stackLevel - 1
|
||||
tokenAPI.stackFrame = &tokenAPI.stackFrames[stackLevel-1]
|
||||
}
|
||||
|
||||
func (tokenAPI *API) checkStackLevelForMethod(name string, stackLevel int) {
|
||||
if stackLevel == 0 {
|
||||
callerPanic(name, "tokenize.API.{name}(): {name}() called at {caller} "+
|
||||
"on the top-level API stack level 0")
|
||||
}
|
||||
if stackLevel != tokenAPI.stackLevel {
|
||||
callerPanic(name, "tokenize.API.{name}(): {name}() called at {caller} "+
|
||||
"on API stack level %d, but the current stack level is %d "+
|
||||
"(forgot to Dispose() a forked child?)", stackLevel, tokenAPI.stackLevel)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -30,7 +30,7 @@ func (byteMode InputByteMode) Accept(b byte) {
|
|||
byteMode.MoveCursor(b)
|
||||
}
|
||||
|
||||
// AcceptMulti is used to accept one or more bytes that were read from the input.
|
||||
// AcceptMulti accepts one or more bytes that were read from the input.
|
||||
// This tells the tokenizer: "I've seen these bytes. I want to make use of them
|
||||
// for the final output, so please remember them for me. I will now continue
|
||||
// reading after these bytes."
|
||||
|
@ -62,6 +62,7 @@ func (byteMode InputByteMode) MoveCursor(b byte) {
|
|||
}
|
||||
|
||||
f.offset++
|
||||
f.offsetLocal++
|
||||
}
|
||||
|
||||
// MoveCursorMulti updates the position of the read cursor, based on the provided bytes.
|
||||
|
|
|
@ -30,6 +30,16 @@ func (i Input) Cursor() string {
|
|||
return fmt.Sprintf("line %d, column %d", line+1, column+1)
|
||||
}
|
||||
|
||||
func (i Input) Reset() {
|
||||
f := i.api.stackFrame
|
||||
if f.offsetLocal > 0 {
|
||||
f.column = 0
|
||||
f.line = 0
|
||||
f.offset -= f.offsetLocal
|
||||
f.offsetLocal = 0
|
||||
}
|
||||
}
|
||||
|
||||
// Flush flushes input data from the read buffer up to the current
|
||||
// read cursor position of the tokenizer.
|
||||
//
|
||||
|
@ -41,6 +51,7 @@ func (i Input) Flush() bool {
|
|||
if f.offset > 0 {
|
||||
i.reader.Flush(f.offset)
|
||||
f.offset = 0
|
||||
f.offsetLocal = 0
|
||||
return true
|
||||
}
|
||||
return false
|
||||
|
|
|
@ -26,6 +26,13 @@ func (o Output) Rune(offset int) rune {
|
|||
return r
|
||||
}
|
||||
|
||||
func (o Output) Reset() {
|
||||
f := o.api.stackFrame
|
||||
f.bytesEnd = f.bytesStart
|
||||
f.tokenEnd = f.tokenStart
|
||||
f.err = nil
|
||||
}
|
||||
|
||||
func (o Output) ClearData() {
|
||||
f := o.api.stackFrame
|
||||
f.bytesEnd = f.bytesStart
|
||||
|
|
|
@ -97,6 +97,7 @@ func (runeMode InputRuneMode) MoveCursor(r rune) int {
|
|||
|
||||
width := utf8.RuneLen(r)
|
||||
f.offset += width
|
||||
f.offsetLocal += width
|
||||
return width
|
||||
}
|
||||
|
||||
|
|
|
@ -663,23 +663,23 @@ func MatchStrNoCase(expected string) Handler {
|
|||
|
||||
return func(tokenAPI *API) bool {
|
||||
matches := make([]rune, l)
|
||||
width := 0
|
||||
offset := 0
|
||||
i := 0
|
||||
for _, e := range expected {
|
||||
if e <= '\x7F' {
|
||||
b, err := tokenAPI.Byte.Peek(width)
|
||||
b, err := tokenAPI.Byte.Peek(offset)
|
||||
if err != nil || (b != byte(e) && unicode.ToUpper(rune(b)) != unicode.ToUpper(e)) {
|
||||
return false
|
||||
}
|
||||
matches[i] = rune(b)
|
||||
width++
|
||||
offset++
|
||||
} else {
|
||||
r, w, err := tokenAPI.Rune.Peek(width)
|
||||
r, w, err := tokenAPI.Rune.Peek(offset)
|
||||
if err != nil || (r != e && unicode.ToUpper(r) != unicode.ToUpper(e)) {
|
||||
return false
|
||||
}
|
||||
matches[i] = r
|
||||
width += w
|
||||
offset += w
|
||||
}
|
||||
i++
|
||||
}
|
||||
|
@ -737,7 +737,8 @@ func MatchAny(handlers ...Handler) Handler {
|
|||
tokenAPI.Dispose(child)
|
||||
return true
|
||||
}
|
||||
tokenAPI.Reset()
|
||||
tokenAPI.Input.Reset()
|
||||
tokenAPI.Output.Reset()
|
||||
}
|
||||
tokenAPI.Dispose(child)
|
||||
|
||||
|
|
|
@ -34,7 +34,7 @@ func (result *Result) String() string {
|
|||
// other tokenize.Handler functions can be invoked recursively to implement the
|
||||
// tokenizing process.
|
||||
//
|
||||
// THis function returns a function that can be invoked to run the tokenizer
|
||||
// This function returns a function that can be invoked to run the tokenizer
|
||||
// against the provided input data. For an overview of allowed inputs, take a
|
||||
// look at the documentation for parsekit.read.New().
|
||||
func New(tokenHandler Handler) Func {
|
||||
|
|
Loading…
Reference in New Issue