Lowering the number of forks required.

This commit is contained in:
Maurice Makaay 2019-07-24 22:42:40 +00:00
parent 99b0abc490
commit bc9e718e47
13 changed files with 95 additions and 77 deletions

View File

@ -144,7 +144,7 @@ func (h *helloparser1) end(p *parse.API) {
h.greetee = strings.TrimSpace(h.greetee) h.greetee = strings.TrimSpace(h.greetee)
if h.greetee == "" { if h.greetee == "" {
p.Error("The name cannot be empty") p.SetError("The name cannot be empty")
} else { } else {
p.Stop() p.Stop()
} }

View File

@ -82,27 +82,27 @@ func (h *helloparser2) Parse(input string) (string, error) {
func (h *helloparser2) start(p *parse.API) { func (h *helloparser2) start(p *parse.API) {
c, a, m := tokenize.C, tokenize.A, tokenize.M c, a, m := tokenize.C, tokenize.A, tokenize.M
if !p.Accept(a.StrNoCase("hello")) { if !p.Accept(a.StrNoCase("hello")) {
p.Error("the greeting is not being friendly") p.SetError("the greeting is not being friendly")
return return
} }
if !p.Accept(c.Seq(c.Optional(a.Blanks), a.Comma, c.Optional(a.Blanks))) { if !p.Accept(c.Seq(c.Optional(a.Blanks), a.Comma, c.Optional(a.Blanks))) {
p.Error("the greeting is not properly separated") p.SetError("the greeting is not properly separated")
return return
} }
if p.Accept(m.TrimSpace(c.OneOrMore(a.AnyRune.Except(a.Excl)))) { if p.Accept(m.TrimSpace(c.OneOrMore(a.AnyRune.Except(a.Excl)))) {
h.greetee = p.Result.String() h.greetee = p.Result.String()
if h.greetee == "" { if h.greetee == "" {
p.Error("the name cannot be empty") p.SetError("the name cannot be empty")
return return
} }
} else { } else {
p.Error("the greeting is targeted at thin air") p.SetError("the greeting is targeted at thin air")
return return
} }
if !p.Accept(a.Excl) { if !p.Accept(a.Excl) {
p.Error("the greeting is not loud enough") p.SetError("the greeting is not loud enough")
} else if !p.Accept(a.EndOfFile) { } else if !p.Accept(a.EndOfFile) {
p.Error("too much stuff going on after the closing '!'") p.SetError("too much stuff going on after the closing '!'")
} else { } else {
p.Stop() p.Stop()
} }

View File

@ -30,13 +30,14 @@ type API struct {
// On a successful peek, the results (data + tokens) are returned by the peek. // On a successful peek, the results (data + tokens) are returned by the peek.
// They are availablel (as with Accept()) through parse.API.Result. // They are availablel (as with Accept()) through parse.API.Result.
func (parseAPI *API) PeekWithResult(tokenHandler tokenize.Handler) bool { func (parseAPI *API) PeekWithResult(tokenHandler tokenize.Handler) bool {
child, ok := parseAPI.invokeHandler("Peek", tokenHandler) _, ok := parseAPI.invokeTokenizeHandler("Peek", tokenHandler)
tokenAPI := parseAPI.tokenAPI tokenAPI := parseAPI.tokenAPI
if ok { if ok {
parseAPI.Result.Tokens = tokenAPI.Output.Tokens() parseAPI.Result.Tokens = tokenAPI.Output.Tokens()
parseAPI.Result.Runes = tokenAPI.Output.Runes() parseAPI.Result.Runes = tokenAPI.Output.Runes()
} }
tokenAPI.Dispose(child) tokenAPI.Input.Reset()
tokenAPI.Output.Reset()
return ok return ok
} }
@ -48,11 +49,12 @@ func (parseAPI *API) PeekWithResult(tokenHandler tokenize.Handler) bool {
// No results (data + tokens) are returned by Peek(). If want access to the data // No results (data + tokens) are returned by Peek(). If want access to the data
// through parse.API.Result, make use of PeekWithResult() instead. // through parse.API.Result, make use of PeekWithResult() instead.
func (parseAPI *API) Peek(tokenHandler tokenize.Handler) bool { func (parseAPI *API) Peek(tokenHandler tokenize.Handler) bool {
child, ok := parseAPI.invokeHandler("Peek", tokenHandler) _, ok := parseAPI.invokeTokenizeHandler("Peek", tokenHandler)
t := parseAPI.tokenAPI tokenAPI := parseAPI.tokenAPI
parseAPI.Result.Tokens = nil parseAPI.Result.Tokens = nil
parseAPI.Result.Runes = nil parseAPI.Result.Runes = nil
t.Dispose(child) tokenAPI.Input.Reset()
tokenAPI.Output.Reset()
return ok return ok
} }
@ -61,37 +63,42 @@ func (parseAPI *API) Peek(tokenHandler tokenize.Handler) bool {
// forward to beyond the match that was found. Otherwise false will be // forward to beyond the match that was found. Otherwise false will be
// and the read cursor will stay at the same position. // and the read cursor will stay at the same position.
// //
// After calling this method, you can retrieve the results using the Result() method. // After calling this method, you can retrieve the results through the API.Result field.
func (parseAPI *API) Accept(tokenHandler tokenize.Handler) bool { func (parseAPI *API) Accept(tokenHandler tokenize.Handler) bool {
t := parseAPI.tokenAPI tokenAPI := parseAPI.tokenAPI
child, ok := parseAPI.invokeHandler("Accept", tokenHandler) _, ok := parseAPI.invokeTokenizeHandler("Accept", tokenHandler)
if ok { if ok {
// Keep track of the results as produced by this child. // Keep track of the results as produced by this child.
parseAPI.Result.Tokens = t.Output.Tokens() parseAPI.Result.Tokens = tokenAPI.Output.Tokens()
parseAPI.Result.Runes = t.Output.Runes() parseAPI.Result.Runes = tokenAPI.Output.Runes()
// Merge to the parent level. // Now the results are stored, we can reset the results for the next handler.
t.Merge(child) tokenAPI.Output.Reset()
t.Dispose(child)
// And flush the input reader buffer. // And flush the input reader buffer.
t.Input.Flush() tokenAPI.Input.Flush()
} else { } else {
t.Dispose(child) // No match, so reset the tokenize.API for the next handler.
// This moves back the read cursor to the start and clears all results.
tokenAPI.Input.Reset()
tokenAPI.Output.Reset()
} }
return ok return ok
} }
func (parseAPI *API) invokeHandler(name string, tokenHandler tokenize.Handler) (int, bool) { // invokeTokenizeHandler forks the tokenize.API, and invokes the tokenize.Handler
// in the context of the created child. The child is returned, so the caller
// has full control over merging and disposing the child.
func (parseAPI *API) invokeTokenizeHandler(name string, tokenHandler tokenize.Handler) (int, bool) {
parseAPI.panicWhenStoppedOrInError(name) parseAPI.panicWhenStoppedOrInError(name)
if tokenHandler == nil { if tokenHandler == nil {
callerPanic(name, "parsekit.parse.API.{name}(): {name}() called with nil tokenHandler argument at {caller}") callerPanic(name, "parsekit.parse.API.{name}(): {name}() called with nil tokenHandler argument at {caller}")
} }
child := parseAPI.tokenAPI.Fork() //child := parseAPI.tokenAPI.Fork()
ok := tokenHandler(parseAPI.tokenAPI) ok := tokenHandler(parseAPI.tokenAPI)
return child, ok return 0, ok
} }
// panicWhenStoppedOrInError will panic when the parser has produced an error // panicWhenStoppedOrInError will panic when the parser has produced an error
@ -165,19 +172,18 @@ func (parseAPI *API) Stop() {
parseAPI.stopped = true parseAPI.stopped = true
} }
// Error sets the error message in the API. // SetError sets the error message in the API.
// //
// After setting an error, no more calls to API methods are allowed. // After setting an error, no more calls to API methods are allowed.
// Calling a method in this state will result in a panic. // Calling a method in this state will result in a panic.
// TODO ... wait how do I read the error? I don't I guess, I just return it. Is Error() a good name or SetError() better for example? // You can still call SetError() though, to set a different error message
func (parseAPI *API) Error(format string, data ...interface{}) { // if you feel the need to do so.
// No call to p.panicWhenStoppedOrInError(), to allow a parser to func (parseAPI *API) SetError(format string, data ...interface{}) {
// set a different error message when needed.
message := fmt.Sprintf(format, data...) message := fmt.Sprintf(format, data...)
parseAPI.err = fmt.Errorf("%s at %s", message, parseAPI.tokenAPI.Input.Cursor()) parseAPI.err = fmt.Errorf("%s at %s", message, parseAPI.tokenAPI.Input.Cursor())
} }
// ExpectEndOfFile can be used to check if the input is at end of file. // ExpectEndOfFile checks if the end of the input file has been reached.
// //
// When it finds that the end of the file was indeed reached, then the parser // When it finds that the end of the file was indeed reached, then the parser
// will be stopped through Stop(). Otherwise, the unexpected input is reported // will be stopped through Stop(). Otherwise, the unexpected input is reported
@ -209,11 +215,11 @@ func (parseAPI *API) Expected(expected string) {
_, err := parseAPI.tokenAPI.Byte.Peek(0) _, err := parseAPI.tokenAPI.Byte.Peek(0)
switch { switch {
case err == nil: case err == nil:
parseAPI.Error("unexpected input%s", fmtExpects(expected)) parseAPI.SetError("unexpected input%s", fmtExpects(expected))
case err == io.EOF: case err == io.EOF:
parseAPI.Error("unexpected end of file%s", fmtExpects(expected)) parseAPI.SetError("unexpected end of file%s", fmtExpects(expected))
default: default:
parseAPI.Error("unexpected error '%s'%s", err, fmtExpects(expected)) parseAPI.SetError("unexpected error '%s'%s", err, fmtExpects(expected))
} }
} }

View File

@ -41,6 +41,7 @@ func New(startHandler Handler) Func {
// and try to make the best of it. // and try to make the best of it.
api.ExpectEndOfFile() api.ExpectEndOfFile()
} }
return api.err return api.err
} }
} }

View File

@ -245,7 +245,7 @@ func TestGivenParserWithErrorSet_HandlePanics(t *testing.T) {
panic("This is not the handler you're looking for") panic("This is not the handler you're looking for")
} }
p := parse.New(func(p *parse.API) { p := parse.New(func(p *parse.API) {
p.Error("It ends here") p.SetError("It ends here")
p.Handle(otherHandler) p.Handle(otherHandler)
}) })
parse.AssertPanic(t, parse.PanicT{ parse.AssertPanic(t, parse.PanicT{

View File

@ -213,6 +213,7 @@ func (buf *Buffer) fill(minBytes int) {
} }
const defaultBufferSize = 1024 const defaultBufferSize = 1024
const runeCacheSize = 128
// ErrTooLarge is passed to panic if memory cannot be allocated to store data in a buffer. // ErrTooLarge is passed to panic if memory cannot be allocated to store data in a buffer.
var ErrTooLarge = errors.New("parsekit.read.Buffer: too large") var ErrTooLarge = errors.New("parsekit.read.Buffer: too large")

View File

@ -86,7 +86,8 @@ type API struct {
} }
type stackFrame struct { type stackFrame struct {
offset int // the read offset (relative to the start of the reader buffer) for this stack frame offsetLocal int // the read offset, relative to the start if this stack frame
offset int // the read offset, relative to the start of the reader buffer
column int // the column at which the cursor is (0-indexed, relative to the start of the stack frame) column int // the column at which the cursor is (0-indexed, relative to the start of the stack frame)
line int // the line at which the cursor is (0-indexed, relative to the start of the stack frame) line int // the line at which the cursor is (0-indexed, relative to the start of the stack frame)
bytesStart int // the starting point in the API.bytes slice for runes produced by this stack level bytesStart int // the starting point in the API.bytes slice for runes produced by this stack level
@ -177,16 +178,7 @@ func (tokenAPI *API) Fork() int {
// Once the child is no longer needed, it can be disposed of by using the // Once the child is no longer needed, it can be disposed of by using the
// method Dispose(), which will return the tokenizer to the parent. // method Dispose(), which will return the tokenizer to the parent.
func (tokenAPI *API) Merge(stackLevel int) { func (tokenAPI *API) Merge(stackLevel int) {
if stackLevel == 0 { tokenAPI.checkStackLevelForMethod("Merge", stackLevel)
callerPanic("Merge", "tokenize.API.{name}(): {name}() called at {caller} "+
"on the top-level API stack level 0")
}
if stackLevel != tokenAPI.stackLevel {
callerPanic("Merge", "tokenize.API.{name}(): {name}() called at {caller} "+
"on API stack level %d, but the current stack level is %d "+
"(forgot to Dispose() a forked child?)", stackLevel, tokenAPI.stackLevel)
}
parent := &tokenAPI.stackFrames[stackLevel-1] parent := &tokenAPI.stackFrames[stackLevel-1]
f := tokenAPI.stackFrame f := tokenAPI.stackFrame
@ -206,6 +198,7 @@ func (tokenAPI *API) Merge(stackLevel int) {
f.tokenStart = f.tokenEnd f.tokenStart = f.tokenEnd
// Update the parent read offset. // Update the parent read offset.
parent.offsetLocal = parent.offsetLocal + (f.offset - parent.offset)
parent.offset = f.offset parent.offset = f.offset
// Update the parent cursor position. // Update the parent cursor position.
@ -221,24 +214,20 @@ func (tokenAPI *API) Merge(stackLevel int) {
f.err = nil f.err = nil
} }
// Reset moves the read cursor back to the beginning for the currently active API child.
// Aditionally, all output (bytes and tokens) that was emitted from the API child is
// cleared as well.
func (tokenAPI *API) Reset() {
f := tokenAPI.stackFrame
f.bytesEnd = f.bytesStart
f.tokenEnd = f.tokenStart
f.column = 0
f.line = 0
if tokenAPI.stackLevel == 0 {
f.offset = 0
} else {
f.offset = tokenAPI.stackFrames[tokenAPI.stackLevel-1].offset
}
f.err = nil
}
func (tokenAPI *API) Dispose(stackLevel int) { func (tokenAPI *API) Dispose(stackLevel int) {
tokenAPI.checkStackLevelForMethod("Dispose", stackLevel)
tokenAPI.stackLevel = stackLevel - 1 tokenAPI.stackLevel = stackLevel - 1
tokenAPI.stackFrame = &tokenAPI.stackFrames[stackLevel-1] tokenAPI.stackFrame = &tokenAPI.stackFrames[stackLevel-1]
} }
func (tokenAPI *API) checkStackLevelForMethod(name string, stackLevel int) {
if stackLevel == 0 {
callerPanic(name, "tokenize.API.{name}(): {name}() called at {caller} "+
"on the top-level API stack level 0")
}
if stackLevel != tokenAPI.stackLevel {
callerPanic(name, "tokenize.API.{name}(): {name}() called at {caller} "+
"on API stack level %d, but the current stack level is %d "+
"(forgot to Dispose() a forked child?)", stackLevel, tokenAPI.stackLevel)
}
}

View File

@ -30,7 +30,7 @@ func (byteMode InputByteMode) Accept(b byte) {
byteMode.MoveCursor(b) byteMode.MoveCursor(b)
} }
// AcceptMulti is used to accept one or more bytes that were read from the input. // AcceptMulti accepts one or more bytes that were read from the input.
// This tells the tokenizer: "I've seen these bytes. I want to make use of them // This tells the tokenizer: "I've seen these bytes. I want to make use of them
// for the final output, so please remember them for me. I will now continue // for the final output, so please remember them for me. I will now continue
// reading after these bytes." // reading after these bytes."
@ -62,6 +62,7 @@ func (byteMode InputByteMode) MoveCursor(b byte) {
} }
f.offset++ f.offset++
f.offsetLocal++
} }
// MoveCursorMulti updates the position of the read cursor, based on the provided bytes. // MoveCursorMulti updates the position of the read cursor, based on the provided bytes.

View File

@ -30,6 +30,16 @@ func (i Input) Cursor() string {
return fmt.Sprintf("line %d, column %d", line+1, column+1) return fmt.Sprintf("line %d, column %d", line+1, column+1)
} }
func (i Input) Reset() {
f := i.api.stackFrame
if f.offsetLocal > 0 {
f.column = 0
f.line = 0
f.offset -= f.offsetLocal
f.offsetLocal = 0
}
}
// Flush flushes input data from the read buffer up to the current // Flush flushes input data from the read buffer up to the current
// read cursor position of the tokenizer. // read cursor position of the tokenizer.
// //
@ -41,6 +51,7 @@ func (i Input) Flush() bool {
if f.offset > 0 { if f.offset > 0 {
i.reader.Flush(f.offset) i.reader.Flush(f.offset)
f.offset = 0 f.offset = 0
f.offsetLocal = 0
return true return true
} }
return false return false

View File

@ -26,6 +26,13 @@ func (o Output) Rune(offset int) rune {
return r return r
} }
func (o Output) Reset() {
f := o.api.stackFrame
f.bytesEnd = f.bytesStart
f.tokenEnd = f.tokenStart
f.err = nil
}
func (o Output) ClearData() { func (o Output) ClearData() {
f := o.api.stackFrame f := o.api.stackFrame
f.bytesEnd = f.bytesStart f.bytesEnd = f.bytesStart

View File

@ -97,6 +97,7 @@ func (runeMode InputRuneMode) MoveCursor(r rune) int {
width := utf8.RuneLen(r) width := utf8.RuneLen(r)
f.offset += width f.offset += width
f.offsetLocal += width
return width return width
} }

View File

@ -663,23 +663,23 @@ func MatchStrNoCase(expected string) Handler {
return func(tokenAPI *API) bool { return func(tokenAPI *API) bool {
matches := make([]rune, l) matches := make([]rune, l)
width := 0 offset := 0
i := 0 i := 0
for _, e := range expected { for _, e := range expected {
if e <= '\x7F' { if e <= '\x7F' {
b, err := tokenAPI.Byte.Peek(width) b, err := tokenAPI.Byte.Peek(offset)
if err != nil || (b != byte(e) && unicode.ToUpper(rune(b)) != unicode.ToUpper(e)) { if err != nil || (b != byte(e) && unicode.ToUpper(rune(b)) != unicode.ToUpper(e)) {
return false return false
} }
matches[i] = rune(b) matches[i] = rune(b)
width++ offset++
} else { } else {
r, w, err := tokenAPI.Rune.Peek(width) r, w, err := tokenAPI.Rune.Peek(offset)
if err != nil || (r != e && unicode.ToUpper(r) != unicode.ToUpper(e)) { if err != nil || (r != e && unicode.ToUpper(r) != unicode.ToUpper(e)) {
return false return false
} }
matches[i] = r matches[i] = r
width += w offset += w
} }
i++ i++
} }
@ -737,7 +737,8 @@ func MatchAny(handlers ...Handler) Handler {
tokenAPI.Dispose(child) tokenAPI.Dispose(child)
return true return true
} }
tokenAPI.Reset() tokenAPI.Input.Reset()
tokenAPI.Output.Reset()
} }
tokenAPI.Dispose(child) tokenAPI.Dispose(child)

View File

@ -34,7 +34,7 @@ func (result *Result) String() string {
// other tokenize.Handler functions can be invoked recursively to implement the // other tokenize.Handler functions can be invoked recursively to implement the
// tokenizing process. // tokenizing process.
// //
// THis function returns a function that can be invoked to run the tokenizer // This function returns a function that can be invoked to run the tokenizer
// against the provided input data. For an overview of allowed inputs, take a // against the provided input data. For an overview of allowed inputs, take a
// look at the documentation for parsekit.read.New(). // look at the documentation for parsekit.read.New().
func New(tokenHandler Handler) Func { func New(tokenHandler Handler) Func {