Code cleanup, making the byte and rune inputs look as much the same as possible and get rid of some unneeded functionality.
This commit is contained in:
parent
93d2cfa6f1
commit
a968f22d45
|
@ -73,7 +73,7 @@ import (
|
|||
// can lead to hard to track bugs. I much prefer this forking method, since
|
||||
// no bookkeeping has to be implemented when implementing a parser.
|
||||
type API struct {
|
||||
stackFrames []stackFrame // the stack frames, containing stack level-specific data
|
||||
stackFrames []stackFrame // the stack frames, containing stack level-specific dat
|
||||
stackLevel int // the current stack level
|
||||
stackFrame *stackFrame // the current stack frame
|
||||
reader *read.Buffer // the buffered input reader
|
||||
|
@ -105,12 +105,13 @@ const initialByteStoreLength = 1024
|
|||
// For an overview of allowed inputs, take a look at the documentation
|
||||
// for parsekit.read.New().
|
||||
func NewAPI(input interface{}) *API {
|
||||
reader := read.New(input)
|
||||
api := &API{
|
||||
reader: read.New(input),
|
||||
stackFrames: make([]stackFrame, initialStackDepth),
|
||||
reader: reader,
|
||||
}
|
||||
api.Byte = ByteMode{api: api}
|
||||
api.Rune = RuneMode{api: api}
|
||||
api.Byte = ByteMode{api: api, reader: reader}
|
||||
api.Rune = RuneMode{api: api, reader: reader}
|
||||
api.Output = Output{api: api}
|
||||
api.stackFrame = &api.stackFrames[0]
|
||||
|
||||
|
@ -210,8 +211,8 @@ func (tokenAPI *API) Merge(stackLevel int) {
|
|||
f.err = nil
|
||||
}
|
||||
|
||||
// Reset moves the input cursor back to the beginning for the currently active API child.
|
||||
// Aditionally, any output (bytes and tokens) that was emitted from the API child are
|
||||
// Reset moves the read cursor back to the beginning for the currently active API child.
|
||||
// Aditionally, all output (bytes and tokens) that was emitted from the API child is
|
||||
// cleared as well.
|
||||
func (api *API) Reset() {
|
||||
f := api.stackFrame
|
||||
|
|
|
@ -1,8 +1,11 @@
|
|||
package tokenize
|
||||
|
||||
import "git.makaay.nl/mauricem/go-parsekit/read"
|
||||
|
||||
// ByteMode provides byte-driven input/output functionality for the tokenize API.
|
||||
type ByteMode struct {
|
||||
api *API
|
||||
api *API
|
||||
reader *read.Buffer // the buffered input reader
|
||||
}
|
||||
|
||||
// Peek returns the byte at the provided byte offset.
|
||||
|
@ -11,37 +14,12 @@ type ByteMode struct {
|
|||
// When an offset is requested that is beyond the length of the available input
|
||||
// data, then the error will be io.EOF.
|
||||
func (byteMode ByteMode) Peek(offset int) (byte, error) {
|
||||
a := byteMode.api
|
||||
return a.reader.ByteAt(a.stackFrame.offset + offset)
|
||||
}
|
||||
|
||||
// Skip is used to skip over one or more bytes that were read from the input.
|
||||
// This tells the tokenizer: "I've seen these bytes. They are of no interest.
|
||||
// I will now continue reading after these bytes."
|
||||
//
|
||||
// This will merely update the position of the cursor (which keeps track of what
|
||||
// line and column we are on in the input data). The bytes are not added to
|
||||
// the output.
|
||||
//
|
||||
// After the call, byte offset 0 for PeekByte() and PeekRune() will point at
|
||||
// the first byte after the skipped bytes.
|
||||
func (byteMode ByteMode) Skip(b byte) {
|
||||
f := byteMode.api.stackFrame
|
||||
f.moveCursorByByte(b)
|
||||
f.offset++
|
||||
}
|
||||
|
||||
func (byteMode ByteMode) SkipMulti(bytes ...byte) {
|
||||
f := byteMode.api.stackFrame
|
||||
for _, b := range bytes {
|
||||
f.moveCursorByByte(b)
|
||||
f.offset++
|
||||
}
|
||||
return byteMode.reader.ByteAt(byteMode.api.stackFrame.offset + offset)
|
||||
}
|
||||
|
||||
func (byteMode ByteMode) Accept(b byte) {
|
||||
byteMode.api.Output.AddByte(b)
|
||||
byteMode.Skip(b)
|
||||
byteMode.MoveCursor(b)
|
||||
}
|
||||
|
||||
// AcceptMulti is used to accept one or more bytes that were read from the input.
|
||||
|
@ -57,5 +35,35 @@ func (byteMode ByteMode) Accept(b byte) {
|
|||
// the first byte after the accepted bytes.
|
||||
func (byteMode ByteMode) AcceptMulti(bytes ...byte) {
|
||||
byteMode.api.Output.AddBytes(bytes...)
|
||||
byteMode.SkipMulti(bytes...)
|
||||
byteMode.MoveCursorMulti(bytes...)
|
||||
}
|
||||
|
||||
// MoveCursor updates the position of the read cursor, based on the provided byte.
|
||||
// This method takes newlines into account to keep track of line numbers and
|
||||
// column positions for the input cursor.
|
||||
//
|
||||
// After the call, byte offset 0 for Peek() and PeekMulti() will point at
|
||||
// the first byte at the new cursor position.
|
||||
func (byteMode ByteMode) MoveCursor(b byte) {
|
||||
f := byteMode.api.stackFrame
|
||||
if b == '\n' {
|
||||
f.column = 0
|
||||
f.line++
|
||||
} else {
|
||||
f.column++
|
||||
}
|
||||
|
||||
f.offset++
|
||||
}
|
||||
|
||||
// MoveCursorMulti updates the position of the read cursor, based on the provided bytes.
|
||||
// This method takes newlines into account to keep track of line numbers and
|
||||
// column positions for the input cursor.
|
||||
//
|
||||
// After the call, byte offset 0 for Peek() and PeekMulti() will point at
|
||||
// the first byte at the new cursor position.
|
||||
func (byteMode ByteMode) MoveCursorMulti(bytes ...byte) {
|
||||
for _, b := range bytes {
|
||||
byteMode.MoveCursor(b)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -2,11 +2,14 @@ package tokenize
|
|||
|
||||
import (
|
||||
"unicode/utf8"
|
||||
|
||||
"git.makaay.nl/mauricem/go-parsekit/read"
|
||||
)
|
||||
|
||||
// RuneMode provides (UTF8) rune-driven input/output functionality for the tokenize API.
|
||||
type RuneMode struct {
|
||||
api *API
|
||||
api *API
|
||||
reader *read.Buffer // the buffered input reader
|
||||
}
|
||||
|
||||
// Peek returns the UTF8 rune at the provided byte offset, including its byte width.
|
||||
|
@ -23,42 +26,7 @@ type RuneMode struct {
|
|||
// When an offset is requested that is beyond the length of the available input
|
||||
// data, then the error will be io.EOF.
|
||||
func (runeMode RuneMode) Peek(offset int) (rune, int, error) {
|
||||
a := runeMode.api
|
||||
return a.reader.RuneAt(a.stackFrame.offset + offset)
|
||||
}
|
||||
|
||||
// Skip is used to skip over a single rune that was read from the input.
|
||||
// This tells the tokenizer: "I've seen this rune. It is of no interest.
|
||||
// I will now continue reading after this rune."
|
||||
//
|
||||
// This will merely update the position of the cursor (which keeps track of what
|
||||
// line and column we are on in APIthe input data). The rune is not added to
|
||||
// the output.
|
||||
//
|
||||
// After the call, byte offset 0 for PeekByte() and PeekRune() will point at
|
||||
// the first byte after the skipped rune.
|
||||
func (runeMode RuneMode) Skip(r rune) {
|
||||
f := runeMode.api.stackFrame
|
||||
f.moveCursorByRune(r)
|
||||
f.offset += utf8.RuneLen(r)
|
||||
}
|
||||
|
||||
// SkipMulti is used to skip over one or more runes that were read from the input.
|
||||
// This tells the tokenizer: "I've seen these runes. They are of no interest.
|
||||
// I will now continue reading after these runes."
|
||||
//
|
||||
// This will merely update the position of the cursor (which keeps track of what
|
||||
// line and column we are on in the input data). The runes are not added to
|
||||
// the output.
|
||||
//
|
||||
// After the call, byte offset 0 for PeekByte() and PeekRune() will point at
|
||||
// the first byte after the skipped runes.
|
||||
func (runeMode RuneMode) SkipMulti(runes ...rune) {
|
||||
f := runeMode.api.stackFrame
|
||||
for _, r := range runes {
|
||||
f.moveCursorByRune(r)
|
||||
f.offset += utf8.RuneLen(r)
|
||||
}
|
||||
return runeMode.reader.RuneAt(runeMode.api.stackFrame.offset + offset)
|
||||
}
|
||||
|
||||
// Accept is used to accept a single rune that was read from the input.
|
||||
|
@ -67,7 +35,7 @@ func (runeMode RuneMode) SkipMulti(runes ...rune) {
|
|||
// reading after this rune."
|
||||
//
|
||||
// This will update the position of the cursor (which keeps track of what line
|
||||
// and column we are on in the input data) and add the rune to the tokenizer
|
||||
// and column we are on intin the input data) and add the rune to the tokenizer
|
||||
// output.
|
||||
//
|
||||
// After the call, byte offset 0 for PeekByte() and PeekRune() will point at
|
||||
|
@ -75,15 +43,14 @@ func (runeMode RuneMode) SkipMulti(runes ...rune) {
|
|||
func (runeMode RuneMode) Accept(r rune) {
|
||||
a := runeMode.api
|
||||
f := a.stackFrame
|
||||
|
||||
curBytesEnd := f.bytesEnd
|
||||
maxRequiredBytes := curBytesEnd + utf8.UTFMax
|
||||
|
||||
a.growOutputData(maxRequiredBytes)
|
||||
w := utf8.EncodeRune(a.outputData[curBytesEnd:], r)
|
||||
f.bytesEnd += w
|
||||
f.offset += w
|
||||
|
||||
f.moveCursorByRune(r)
|
||||
runeMode.MoveCursor(r)
|
||||
}
|
||||
|
||||
// AcceptMulti is used to accept one or more runes that were read from the input.
|
||||
|
@ -100,17 +67,47 @@ func (runeMode RuneMode) Accept(r rune) {
|
|||
func (runeMode RuneMode) AcceptMulti(runes ...rune) {
|
||||
a := runeMode.api
|
||||
f := a.stackFrame
|
||||
runesAsString := string(runes)
|
||||
byteLen := len(runesAsString)
|
||||
curBytesEnd := f.bytesEnd
|
||||
newBytesEnd := curBytesEnd + byteLen
|
||||
|
||||
a.growOutputData(newBytesEnd)
|
||||
copy(a.outputData[curBytesEnd:], runesAsString)
|
||||
f.bytesEnd = newBytesEnd
|
||||
f.offset += byteLen
|
||||
curBytesEnd := f.bytesEnd
|
||||
maxBytes := curBytesEnd + len(runes)*utf8.UTFMax
|
||||
a.growOutputData(maxBytes)
|
||||
|
||||
for _, r := range runes {
|
||||
f.moveCursorByRune(r)
|
||||
w := utf8.EncodeRune(a.outputData[curBytesEnd:], r)
|
||||
curBytesEnd += w
|
||||
runeMode.MoveCursor(r)
|
||||
}
|
||||
f.bytesEnd = curBytesEnd
|
||||
}
|
||||
|
||||
// MoveCursor updates the position of the read cursor, based on the provided rune.
|
||||
// This method takes newlines into account to keep track of line numbers and
|
||||
// column positions for the input cursor.
|
||||
//
|
||||
// After the call, byte offset 0 for Peek() and PeekMulti() will point at
|
||||
// the first rune at the new cursor position.
|
||||
func (runeMode RuneMode) MoveCursor(r rune) int {
|
||||
f := runeMode.api.stackFrame
|
||||
if r == '\n' {
|
||||
f.column = 0
|
||||
f.line++
|
||||
} else {
|
||||
f.column++
|
||||
}
|
||||
|
||||
width := utf8.RuneLen(r)
|
||||
f.offset += width
|
||||
return width
|
||||
}
|
||||
|
||||
// MoveCursorMulti updates the position of the read cursor, based on the provided runes.
|
||||
// This method takes newlines into account to keep track of line numbers and
|
||||
// column positions for the input cursor.
|
||||
//
|
||||
// After the call, byte offset 0 for Peek() and PeekMulti() will point at
|
||||
// the first rune at the new cursor position.
|
||||
func (runeMode RuneMode) MoveCursorMulti(runes ...rune) {
|
||||
for _, r := range runes {
|
||||
runeMode.MoveCursor(r)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -94,7 +94,7 @@ func ExampleAPI_SkipRune() {
|
|||
if strings.ContainsRune("aeiouAEIOU", r) {
|
||||
api.Rune.Accept(r)
|
||||
} else {
|
||||
api.Rune.Skip(r)
|
||||
api.Rune.MoveCursor(r)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1,30 +0,0 @@
|
|||
package tokenize
|
||||
|
||||
// move updates the position of the cursor, based on the provided input string.
|
||||
// The input string represents the runes that the cursor must be moved over.
|
||||
// This method will take newlines into account to keep track of line numbers and
|
||||
// column positions automatically.
|
||||
func (f *stackFrame) moveCursor(input string) *stackFrame {
|
||||
for _, r := range input {
|
||||
f.moveCursorByRune(r)
|
||||
}
|
||||
return f
|
||||
}
|
||||
|
||||
func (f *stackFrame) moveCursorByRune(r rune) {
|
||||
if r == '\n' {
|
||||
f.column = 0
|
||||
f.line++
|
||||
} else {
|
||||
f.column++
|
||||
}
|
||||
}
|
||||
|
||||
func (f *stackFrame) moveCursorByByte(b byte) {
|
||||
if b == '\n' {
|
||||
f.column = 0
|
||||
f.line++
|
||||
} else {
|
||||
f.column++
|
||||
}
|
||||
}
|
|
@ -6,27 +6,29 @@ import (
|
|||
|
||||
func TestMoveCursorByBytes(t *testing.T) {
|
||||
api := NewAPI("")
|
||||
api.stackFrame.moveCursorByByte('a')
|
||||
api.stackFrame.moveCursorByByte('b')
|
||||
api.stackFrame.moveCursorByByte('c')
|
||||
api.stackFrame.moveCursorByByte('\r')
|
||||
api.stackFrame.moveCursorByByte('\n')
|
||||
api.stackFrame.moveCursorByByte('a')
|
||||
api.stackFrame.moveCursorByByte('b')
|
||||
api.Byte.MoveCursor('a')
|
||||
api.Byte.MoveCursor('b')
|
||||
api.Byte.MoveCursor('c')
|
||||
api.Byte.MoveCursor('\r')
|
||||
api.Byte.MoveCursor('\n')
|
||||
api.Byte.MoveCursor('a')
|
||||
api.Byte.MoveCursor('b')
|
||||
|
||||
AssertEqual(t, "line 2, column 3", api.Cursor(), "Cursor position after moving by byte")
|
||||
AssertEqual(t, 7, api.stackFrame.offset, "Offset after moving by byte")
|
||||
}
|
||||
|
||||
func TestMoveCursorByRunes(t *testing.T) {
|
||||
api := NewAPI("")
|
||||
api.stackFrame.moveCursorByRune('ɹ')
|
||||
api.stackFrame.moveCursorByRune('n')
|
||||
api.stackFrame.moveCursorByRune('u')
|
||||
api.stackFrame.moveCursorByRune('\r')
|
||||
api.stackFrame.moveCursorByRune('\n')
|
||||
api.stackFrame.moveCursorByRune('ǝ')
|
||||
api.Rune.MoveCursor('ɹ')
|
||||
api.Rune.MoveCursor('n')
|
||||
api.Rune.MoveCursor('u')
|
||||
api.Rune.MoveCursor('\r')
|
||||
api.Rune.MoveCursor('\n')
|
||||
api.Rune.MoveCursor('ǝ')
|
||||
|
||||
AssertEqual(t, "line 2, column 2", api.Cursor(), "Cursor position after moving by rune")
|
||||
AssertEqual(t, 8, api.stackFrame.offset, "Offset after moving by rune")
|
||||
}
|
||||
|
||||
func TestWhenMovingCursor_CursorPositionIsUpdated(t *testing.T) {
|
||||
|
@ -49,7 +51,9 @@ func TestWhenMovingCursor_CursorPositionIsUpdated(t *testing.T) {
|
|||
} {
|
||||
api := NewAPI("")
|
||||
for _, s := range test.input {
|
||||
api.stackFrame.moveCursor(s)
|
||||
for _, r := range s {
|
||||
api.Rune.MoveCursor(r)
|
||||
}
|
||||
}
|
||||
if api.stackFrame.line != test.line {
|
||||
t.Errorf("[%s] Unexpected line offset %d (expected %d)", test.name, api.stackFrame.line, test.line)
|
||||
|
|
|
@ -476,7 +476,7 @@ func MatchNewline() Handler {
|
|||
return false
|
||||
}
|
||||
if b1 == '\n' {
|
||||
t.Byte.AcceptMulti(b1)
|
||||
t.Byte.Accept(b1)
|
||||
return true
|
||||
}
|
||||
if b1 == '\r' {
|
||||
|
@ -1121,7 +1121,7 @@ func MatchInteger(normalize bool) Handler {
|
|||
|
||||
// The next character is a zero, skip the leading zero and check again.
|
||||
if err == nil && b2 == b {
|
||||
t.Byte.Skip('0')
|
||||
t.Byte.MoveCursor('0')
|
||||
continue
|
||||
}
|
||||
// The next character is not a zero, nor a digit at all.
|
||||
|
@ -1131,7 +1131,7 @@ func MatchInteger(normalize bool) Handler {
|
|||
return true
|
||||
}
|
||||
// The next character is a digit. SKip the leading zero and go with the digit.
|
||||
t.Byte.Skip('0')
|
||||
t.Byte.MoveCursor('0')
|
||||
t.Byte.Accept(b2)
|
||||
break
|
||||
}
|
||||
|
@ -1170,7 +1170,7 @@ func MatchDecimal(normalize bool) Handler {
|
|||
|
||||
// The next character is a zero, skip the leading zero and check again.
|
||||
if err == nil && b2 == b {
|
||||
t.Byte.Skip('0')
|
||||
t.Byte.MoveCursor('0')
|
||||
continue
|
||||
}
|
||||
// The next character is a dot, go with the zero before the dot and
|
||||
|
@ -1186,7 +1186,7 @@ func MatchDecimal(normalize bool) Handler {
|
|||
return true
|
||||
}
|
||||
// The next character is a digit. SKip the leading zero and go with the digit.
|
||||
t.Byte.Skip('0')
|
||||
t.Byte.MoveCursor('0')
|
||||
t.Byte.Accept(b2)
|
||||
break
|
||||
}
|
||||
|
@ -1198,7 +1198,7 @@ func MatchDecimal(normalize bool) Handler {
|
|||
if err != nil || b < '0' || b > '9' {
|
||||
break
|
||||
}
|
||||
t.Byte.AcceptMulti(b)
|
||||
t.Byte.Accept(b)
|
||||
}
|
||||
|
||||
// No dot or no digit after a dot? Then we're done.
|
||||
|
@ -1266,6 +1266,7 @@ func MatchBoolean() Handler {
|
|||
t.Byte.Accept(b1)
|
||||
return true
|
||||
}
|
||||
// TODO Multibyte peeks (also useful for strings)
|
||||
b3, _ := t.Byte.Peek(2)
|
||||
b4, _ := t.Byte.Peek(3)
|
||||
b5, err := t.Byte.Peek(4)
|
||||
|
@ -1362,7 +1363,7 @@ func MatchOctet(normalize bool) Handler {
|
|||
if err != nil || b2 < '0' || b2 > '9' {
|
||||
// Output 2-digit octet.
|
||||
if normalize && b0 == '0' {
|
||||
t.Byte.Skip(b0)
|
||||
t.Byte.MoveCursor(b0)
|
||||
t.Byte.Accept(b1)
|
||||
} else {
|
||||
t.Byte.AcceptMulti(b0, b1)
|
||||
|
@ -1377,9 +1378,9 @@ func MatchOctet(normalize bool) Handler {
|
|||
|
||||
// Output 3-digit octet.
|
||||
if normalize && b0 == '0' {
|
||||
t.Byte.Skip(b0)
|
||||
t.Byte.MoveCursor(b0)
|
||||
if b1 == '0' {
|
||||
t.Byte.Skip(b1)
|
||||
t.Byte.MoveCursor(b1)
|
||||
} else {
|
||||
t.Byte.Accept(b1)
|
||||
}
|
||||
|
@ -1598,7 +1599,7 @@ func ModifyDropUntilEndOfLine() Handler {
|
|||
if b == '\n' {
|
||||
return true
|
||||
}
|
||||
t.Byte.Skip(b)
|
||||
t.Byte.MoveCursor(b)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue