Lowering the number of forks required.

This commit is contained in:
Maurice Makaay 2019-07-24 22:42:40 +00:00
parent 99b0abc490
commit bc9e718e47
13 changed files with 95 additions and 77 deletions

View File

@ -144,7 +144,7 @@ func (h *helloparser1) end(p *parse.API) {
h.greetee = strings.TrimSpace(h.greetee)
if h.greetee == "" {
p.Error("The name cannot be empty")
p.SetError("The name cannot be empty")
} else {
p.Stop()
}

View File

@ -82,27 +82,27 @@ func (h *helloparser2) Parse(input string) (string, error) {
func (h *helloparser2) start(p *parse.API) {
c, a, m := tokenize.C, tokenize.A, tokenize.M
if !p.Accept(a.StrNoCase("hello")) {
p.Error("the greeting is not being friendly")
p.SetError("the greeting is not being friendly")
return
}
if !p.Accept(c.Seq(c.Optional(a.Blanks), a.Comma, c.Optional(a.Blanks))) {
p.Error("the greeting is not properly separated")
p.SetError("the greeting is not properly separated")
return
}
if p.Accept(m.TrimSpace(c.OneOrMore(a.AnyRune.Except(a.Excl)))) {
h.greetee = p.Result.String()
if h.greetee == "" {
p.Error("the name cannot be empty")
p.SetError("the name cannot be empty")
return
}
} else {
p.Error("the greeting is targeted at thin air")
p.SetError("the greeting is targeted at thin air")
return
}
if !p.Accept(a.Excl) {
p.Error("the greeting is not loud enough")
p.SetError("the greeting is not loud enough")
} else if !p.Accept(a.EndOfFile) {
p.Error("too much stuff going on after the closing '!'")
p.SetError("too much stuff going on after the closing '!'")
} else {
p.Stop()
}

View File

@ -30,13 +30,14 @@ type API struct {
// On a successful peek, the results (data + tokens) are returned by the peek.
// They are availablel (as with Accept()) through parse.API.Result.
func (parseAPI *API) PeekWithResult(tokenHandler tokenize.Handler) bool {
child, ok := parseAPI.invokeHandler("Peek", tokenHandler)
_, ok := parseAPI.invokeTokenizeHandler("Peek", tokenHandler)
tokenAPI := parseAPI.tokenAPI
if ok {
parseAPI.Result.Tokens = tokenAPI.Output.Tokens()
parseAPI.Result.Runes = tokenAPI.Output.Runes()
}
tokenAPI.Dispose(child)
tokenAPI.Input.Reset()
tokenAPI.Output.Reset()
return ok
}
@ -48,11 +49,12 @@ func (parseAPI *API) PeekWithResult(tokenHandler tokenize.Handler) bool {
// No results (data + tokens) are returned by Peek(). If want access to the data
// through parse.API.Result, make use of PeekWithResult() instead.
func (parseAPI *API) Peek(tokenHandler tokenize.Handler) bool {
child, ok := parseAPI.invokeHandler("Peek", tokenHandler)
t := parseAPI.tokenAPI
_, ok := parseAPI.invokeTokenizeHandler("Peek", tokenHandler)
tokenAPI := parseAPI.tokenAPI
parseAPI.Result.Tokens = nil
parseAPI.Result.Runes = nil
t.Dispose(child)
tokenAPI.Input.Reset()
tokenAPI.Output.Reset()
return ok
}
@ -61,37 +63,42 @@ func (parseAPI *API) Peek(tokenHandler tokenize.Handler) bool {
// forward to beyond the match that was found. Otherwise false will be
// and the read cursor will stay at the same position.
//
// After calling this method, you can retrieve the results using the Result() method.
// After calling this method, you can retrieve the results through the API.Result field.
func (parseAPI *API) Accept(tokenHandler tokenize.Handler) bool {
t := parseAPI.tokenAPI
child, ok := parseAPI.invokeHandler("Accept", tokenHandler)
tokenAPI := parseAPI.tokenAPI
_, ok := parseAPI.invokeTokenizeHandler("Accept", tokenHandler)
if ok {
// Keep track of the results as produced by this child.
parseAPI.Result.Tokens = t.Output.Tokens()
parseAPI.Result.Runes = t.Output.Runes()
parseAPI.Result.Tokens = tokenAPI.Output.Tokens()
parseAPI.Result.Runes = tokenAPI.Output.Runes()
// Merge to the parent level.
t.Merge(child)
t.Dispose(child)
// Now the results are stored, we can reset the results for the next handler.
tokenAPI.Output.Reset()
// And flush the input reader buffer.
t.Input.Flush()
tokenAPI.Input.Flush()
} else {
t.Dispose(child)
// No match, so reset the tokenize.API for the next handler.
// This moves back the read cursor to the start and clears all results.
tokenAPI.Input.Reset()
tokenAPI.Output.Reset()
}
return ok
}
func (parseAPI *API) invokeHandler(name string, tokenHandler tokenize.Handler) (int, bool) {
// invokeTokenizeHandler forks the tokenize.API, and invokes the tokenize.Handler
// in the context of the created child. The child is returned, so the caller
// has full control over merging and disposing the child.
func (parseAPI *API) invokeTokenizeHandler(name string, tokenHandler tokenize.Handler) (int, bool) {
parseAPI.panicWhenStoppedOrInError(name)
if tokenHandler == nil {
callerPanic(name, "parsekit.parse.API.{name}(): {name}() called with nil tokenHandler argument at {caller}")
}
child := parseAPI.tokenAPI.Fork()
//child := parseAPI.tokenAPI.Fork()
ok := tokenHandler(parseAPI.tokenAPI)
return child, ok
return 0, ok
}
// panicWhenStoppedOrInError will panic when the parser has produced an error
@ -165,19 +172,18 @@ func (parseAPI *API) Stop() {
parseAPI.stopped = true
}
// Error sets the error message in the API.
// SetError sets the error message in the API.
//
// After setting an error, no more calls to API methods are allowed.
// Calling a method in this state will result in a panic.
// TODO ... wait how do I read the error? I don't I guess, I just return it. Is Error() a good name or SetError() better for example?
func (parseAPI *API) Error(format string, data ...interface{}) {
// No call to p.panicWhenStoppedOrInError(), to allow a parser to
// set a different error message when needed.
// You can still call SetError() though, to set a different error message
// if you feel the need to do so.
func (parseAPI *API) SetError(format string, data ...interface{}) {
message := fmt.Sprintf(format, data...)
parseAPI.err = fmt.Errorf("%s at %s", message, parseAPI.tokenAPI.Input.Cursor())
}
// ExpectEndOfFile can be used to check if the input is at end of file.
// ExpectEndOfFile checks if the end of the input file has been reached.
//
// When it finds that the end of the file was indeed reached, then the parser
// will be stopped through Stop(). Otherwise, the unexpected input is reported
@ -209,11 +215,11 @@ func (parseAPI *API) Expected(expected string) {
_, err := parseAPI.tokenAPI.Byte.Peek(0)
switch {
case err == nil:
parseAPI.Error("unexpected input%s", fmtExpects(expected))
parseAPI.SetError("unexpected input%s", fmtExpects(expected))
case err == io.EOF:
parseAPI.Error("unexpected end of file%s", fmtExpects(expected))
parseAPI.SetError("unexpected end of file%s", fmtExpects(expected))
default:
parseAPI.Error("unexpected error '%s'%s", err, fmtExpects(expected))
parseAPI.SetError("unexpected error '%s'%s", err, fmtExpects(expected))
}
}

View File

@ -41,6 +41,7 @@ func New(startHandler Handler) Func {
// and try to make the best of it.
api.ExpectEndOfFile()
}
return api.err
}
}

View File

@ -245,7 +245,7 @@ func TestGivenParserWithErrorSet_HandlePanics(t *testing.T) {
panic("This is not the handler you're looking for")
}
p := parse.New(func(p *parse.API) {
p.Error("It ends here")
p.SetError("It ends here")
p.Handle(otherHandler)
})
parse.AssertPanic(t, parse.PanicT{

View File

@ -213,6 +213,7 @@ func (buf *Buffer) fill(minBytes int) {
}
const defaultBufferSize = 1024
const runeCacheSize = 128
// ErrTooLarge is passed to panic if memory cannot be allocated to store data in a buffer.
var ErrTooLarge = errors.New("parsekit.read.Buffer: too large")

View File

@ -86,13 +86,14 @@ type API struct {
}
type stackFrame struct {
offset int // the read offset (relative to the start of the reader buffer) for this stack frame
column int // the column at which the cursor is (0-indexed, relative to the start of the stack frame)
line int // the line at which the cursor is (0-indexed, relative to the start of the stack frame)
bytesStart int // the starting point in the API.bytes slice for runes produced by this stack level
bytesEnd int // the end point in the API.bytes slice for runes produced by this stack level
tokenStart int // the starting point in the API.tokens slice for tokens produced by this stack level
tokenEnd int // the end point in the API.tokens slice for tokens produced by this stack level
offsetLocal int // the read offset, relative to the start if this stack frame
offset int // the read offset, relative to the start of the reader buffer
column int // the column at which the cursor is (0-indexed, relative to the start of the stack frame)
line int // the line at which the cursor is (0-indexed, relative to the start of the stack frame)
bytesStart int // the starting point in the API.bytes slice for runes produced by this stack level
bytesEnd int // the end point in the API.bytes slice for runes produced by this stack level
tokenStart int // the starting point in the API.tokens slice for tokens produced by this stack level
tokenEnd int // the end point in the API.tokens slice for tokens produced by this stack level
// TODO
err error // can be used by a Handler to report a specific issue with the input
@ -177,16 +178,7 @@ func (tokenAPI *API) Fork() int {
// Once the child is no longer needed, it can be disposed of by using the
// method Dispose(), which will return the tokenizer to the parent.
func (tokenAPI *API) Merge(stackLevel int) {
if stackLevel == 0 {
callerPanic("Merge", "tokenize.API.{name}(): {name}() called at {caller} "+
"on the top-level API stack level 0")
}
if stackLevel != tokenAPI.stackLevel {
callerPanic("Merge", "tokenize.API.{name}(): {name}() called at {caller} "+
"on API stack level %d, but the current stack level is %d "+
"(forgot to Dispose() a forked child?)", stackLevel, tokenAPI.stackLevel)
}
tokenAPI.checkStackLevelForMethod("Merge", stackLevel)
parent := &tokenAPI.stackFrames[stackLevel-1]
f := tokenAPI.stackFrame
@ -206,6 +198,7 @@ func (tokenAPI *API) Merge(stackLevel int) {
f.tokenStart = f.tokenEnd
// Update the parent read offset.
parent.offsetLocal = parent.offsetLocal + (f.offset - parent.offset)
parent.offset = f.offset
// Update the parent cursor position.
@ -221,24 +214,20 @@ func (tokenAPI *API) Merge(stackLevel int) {
f.err = nil
}
// Reset moves the read cursor back to the beginning for the currently active API child.
// Aditionally, all output (bytes and tokens) that was emitted from the API child is
// cleared as well.
func (tokenAPI *API) Reset() {
f := tokenAPI.stackFrame
f.bytesEnd = f.bytesStart
f.tokenEnd = f.tokenStart
f.column = 0
f.line = 0
if tokenAPI.stackLevel == 0 {
f.offset = 0
} else {
f.offset = tokenAPI.stackFrames[tokenAPI.stackLevel-1].offset
}
f.err = nil
}
func (tokenAPI *API) Dispose(stackLevel int) {
tokenAPI.checkStackLevelForMethod("Dispose", stackLevel)
tokenAPI.stackLevel = stackLevel - 1
tokenAPI.stackFrame = &tokenAPI.stackFrames[stackLevel-1]
}
func (tokenAPI *API) checkStackLevelForMethod(name string, stackLevel int) {
if stackLevel == 0 {
callerPanic(name, "tokenize.API.{name}(): {name}() called at {caller} "+
"on the top-level API stack level 0")
}
if stackLevel != tokenAPI.stackLevel {
callerPanic(name, "tokenize.API.{name}(): {name}() called at {caller} "+
"on API stack level %d, but the current stack level is %d "+
"(forgot to Dispose() a forked child?)", stackLevel, tokenAPI.stackLevel)
}
}

View File

@ -30,7 +30,7 @@ func (byteMode InputByteMode) Accept(b byte) {
byteMode.MoveCursor(b)
}
// AcceptMulti is used to accept one or more bytes that were read from the input.
// AcceptMulti accepts one or more bytes that were read from the input.
// This tells the tokenizer: "I've seen these bytes. I want to make use of them
// for the final output, so please remember them for me. I will now continue
// reading after these bytes."
@ -62,6 +62,7 @@ func (byteMode InputByteMode) MoveCursor(b byte) {
}
f.offset++
f.offsetLocal++
}
// MoveCursorMulti updates the position of the read cursor, based on the provided bytes.

View File

@ -30,6 +30,16 @@ func (i Input) Cursor() string {
return fmt.Sprintf("line %d, column %d", line+1, column+1)
}
func (i Input) Reset() {
f := i.api.stackFrame
if f.offsetLocal > 0 {
f.column = 0
f.line = 0
f.offset -= f.offsetLocal
f.offsetLocal = 0
}
}
// Flush flushes input data from the read buffer up to the current
// read cursor position of the tokenizer.
//
@ -41,6 +51,7 @@ func (i Input) Flush() bool {
if f.offset > 0 {
i.reader.Flush(f.offset)
f.offset = 0
f.offsetLocal = 0
return true
}
return false

View File

@ -26,6 +26,13 @@ func (o Output) Rune(offset int) rune {
return r
}
func (o Output) Reset() {
f := o.api.stackFrame
f.bytesEnd = f.bytesStart
f.tokenEnd = f.tokenStart
f.err = nil
}
func (o Output) ClearData() {
f := o.api.stackFrame
f.bytesEnd = f.bytesStart

View File

@ -97,6 +97,7 @@ func (runeMode InputRuneMode) MoveCursor(r rune) int {
width := utf8.RuneLen(r)
f.offset += width
f.offsetLocal += width
return width
}

View File

@ -663,23 +663,23 @@ func MatchStrNoCase(expected string) Handler {
return func(tokenAPI *API) bool {
matches := make([]rune, l)
width := 0
offset := 0
i := 0
for _, e := range expected {
if e <= '\x7F' {
b, err := tokenAPI.Byte.Peek(width)
b, err := tokenAPI.Byte.Peek(offset)
if err != nil || (b != byte(e) && unicode.ToUpper(rune(b)) != unicode.ToUpper(e)) {
return false
}
matches[i] = rune(b)
width++
offset++
} else {
r, w, err := tokenAPI.Rune.Peek(width)
r, w, err := tokenAPI.Rune.Peek(offset)
if err != nil || (r != e && unicode.ToUpper(r) != unicode.ToUpper(e)) {
return false
}
matches[i] = r
width += w
offset += w
}
i++
}
@ -737,7 +737,8 @@ func MatchAny(handlers ...Handler) Handler {
tokenAPI.Dispose(child)
return true
}
tokenAPI.Reset()
tokenAPI.Input.Reset()
tokenAPI.Output.Reset()
}
tokenAPI.Dispose(child)

View File

@ -34,7 +34,7 @@ func (result *Result) String() string {
// other tokenize.Handler functions can be invoked recursively to implement the
// tokenizing process.
//
// THis function returns a function that can be invoked to run the tokenizer
// This function returns a function that can be invoked to run the tokenizer
// against the provided input data. For an overview of allowed inputs, take a
// look at the documentation for parsekit.read.New().
func New(tokenHandler Handler) Func {