go-parsekit/tokenhandlers_builtin.go

package parsekit

import (
	"fmt"
	"io"
	"runtime"
	"strconv"
	"strings"
	"unicode"
)

// C provides convenient access to a range of parser/combinators that can be
// used to construct TokenHandler functions.
//
// Parser/combinators are so called higher order functions that take in one
// or more other TokenHandlers and output a new TokenHandler. They can be
// used to combine TokenHandlers in useful ways to create new more complex
// TokenHandlers.
//
// When using C in your own parser, then it is advised to create a variable
// to reference it:
//
//     var c = parsekit.C
//
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
var C = struct {
	Any        func(...TokenHandler) TokenHandler
	Not        func(TokenHandler) TokenHandler
	Opt        func(TokenHandler) TokenHandler
	Seq        func(...TokenHandler) TokenHandler
	Rep        func(times int, handler TokenHandler) TokenHandler
	Min        func(min int, handler TokenHandler) TokenHandler
	Max        func(max int, handler TokenHandler) TokenHandler
	ZeroOrMore func(TokenHandler) TokenHandler
	OneOrMore  func(TokenHandler) TokenHandler
	MinMax     func(min int, max int, handler TokenHandler) TokenHandler
	Separated  func(separated TokenHandler, separator TokenHandler) TokenHandler // TODO reverse args for consistency, us string?
	Except     func(except TokenHandler, handler TokenHandler) TokenHandler
}{
	Opt:        MatchOpt,
	Any:        MatchAny,
	Not:        MatchNot,
	Seq:        MatchSeq,
	Rep:        MatchRep,
	Min:        MatchMin,
	Max:        MatchMax,
	ZeroOrMore: MatchZeroOrMore,
	OneOrMore:  MatchOneOrMore,
	MinMax:     MatchMinMax,
	Separated:  MatchSeparated,
	Except:     MatchExcept,
}

// A provides convenient access to a range of atoms or functions to build atoms.
//
// When using A in your own parser, then it is advised to create a variable
// to reference it:
//
//     var a = parsekit.A
//
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
var A = struct {
	Rune                  func(rune) TokenHandler
	Runes                 func(...rune) TokenHandler
	RuneRange             func(rune, rune) TokenHandler
	Str                   func(string) TokenHandler
	StrNoCase             func(string) TokenHandler
	EndOfFile             TokenHandler
	AnyRune               TokenHandler
	Space                 TokenHandler
	Tab                   TokenHandler
	CR                    TokenHandler
	LF                    TokenHandler
	CRLF                  TokenHandler
	Excl                  TokenHandler
	DoubleQuote           TokenHandler
	Hash                  TokenHandler
	Dollar                TokenHandler
	Percent               TokenHandler
	Amp                   TokenHandler
	SingleQuote           TokenHandler
	RoundOpen             TokenHandler
	LeftParen             TokenHandler
	RoundClose            TokenHandler
	RightParen            TokenHandler
	Asterisk              TokenHandler
	Multiply              TokenHandler
	Plus                  TokenHandler
	Add                   TokenHandler
	Comma                 TokenHandler
	Minus                 TokenHandler
	Subtract              TokenHandler
	Dot                   TokenHandler
	Slash                 TokenHandler
	Divide                TokenHandler
	Colon                 TokenHandler
	Semicolon             TokenHandler
	AngleOpen             TokenHandler
	LessThan              TokenHandler
	Equal                 TokenHandler
	AngleClose            TokenHandler
	GreaterThan           TokenHandler
	Question              TokenHandler
	At                    TokenHandler
	SquareOpen            TokenHandler
	Backslash             TokenHandler
	SquareClose           TokenHandler
	Caret                 TokenHandler
	Underscore            TokenHandler
	Backquote             TokenHandler
	CurlyOpen             TokenHandler
	Pipe                  TokenHandler
	CurlyClose            TokenHandler
	Tilde                 TokenHandler
	Newline               TokenHandler
	Whitespace            TokenHandler
	WhitespaceAndNewlines TokenHandler
	EndOfLine             TokenHandler
	Digit                 TokenHandler
	DigitNotZero          TokenHandler
	Digits                TokenHandler
	Float                 TokenHandler
	Boolean               TokenHandler
	Integer               TokenHandler
	Signed                func(TokenHandler) TokenHandler
	IntegerBetween        func(min int64, max int64) TokenHandler
	ASCII                 TokenHandler
	ASCIILower            TokenHandler
	ASCIIUpper            TokenHandler
	HexDigit              TokenHandler
	Octet                 TokenHandler
	IPv4                  TokenHandler
	IPv4MaskBits          TokenHandler
}{
	Rune:                  MatchRune,
	Runes:                 MatchRunes,
	RuneRange:             MatchRuneRange,
	Str:                   MatchStr,
	StrNoCase:             MatchStrNoCase,
	EndOfFile:             MatchEndOfFile(),
	AnyRune:               MatchAnyRune(),
	Space:                 MatchRune(' '),
	Tab:                   MatchRune('\t'),
	CR:                    MatchRune('\r'),
	LF:                    MatchRune('\n'),
	CRLF:                  MatchStr("\r\n"),
	Excl:                  MatchRune('!'),
	DoubleQuote:           MatchRune('"'),
	Hash:                  MatchRune('#'),
	Dollar:                MatchRune('$'),
	Percent:               MatchRune('%'),
	Amp:                   MatchRune('&'),
	SingleQuote:           MatchRune('\''),
	RoundOpen:             MatchRune('('),
	LeftParen:             MatchRune('('),
	RoundClose:            MatchRune(')'),
	RightParen:            MatchRune(')'),
	Asterisk:              MatchRune('*'),
	Multiply:              MatchRune('*'),
	Plus:                  MatchRune('+'),
	Add:                   MatchRune('+'),
	Comma:                 MatchRune(','),
	Minus:                 MatchRune('-'),
	Subtract:              MatchRune('-'),
	Dot:                   MatchRune('.'),
	Slash:                 MatchRune('/'),
	Divide:                MatchRune('/'),
	Colon:                 MatchRune(':'),
	Semicolon:             MatchRune(';'),
	AngleOpen:             MatchRune('<'),
	LessThan:              MatchRune('<'),
	Equal:                 MatchRune('='),
	AngleClose:            MatchRune('>'),
	GreaterThan:           MatchRune('>'),
	Question:              MatchRune('?'),
	At:                    MatchRune('@'),
	SquareOpen:            MatchRune('['),
	Backslash:             MatchRune('\\'),
	SquareClose:           MatchRune(']'),
	Caret:                 MatchRune('^'),
	Underscore:            MatchRune('_'),
	Backquote:             MatchRune('`'),
	CurlyOpen:             MatchRune('{'),
	Pipe:                  MatchRune('|'),
	CurlyClose:            MatchRune('}'),
	Tilde:                 MatchRune('~'),
	Whitespace:            MatchOneOrMore(MatchAny(MatchRune(' '), MatchRune('\t'))),
	WhitespaceAndNewlines: MatchOneOrMore(MatchAny(MatchRune(' '), MatchRune('\t'), MatchStr("\r\n"), MatchRune('\n'))),
	EndOfLine:             MatchAny(MatchStr("\r\n"), MatchRune('\n'), MatchEndOfFile()),
	Digit:                 MatchDigit(),
	DigitNotZero:          MatchDigitNotZero(),
	Digits:                MatchDigits(),
	Integer:               MatchInteger(),
	Signed:                MatchSigned,
	IntegerBetween:        MatchIntegerBetween,
	Float:                 MatchFloat(),
	Boolean:               MatchBoolean(),
	ASCII:                 MatchRuneRange('\x00', '\x7F'),
	ASCIILower:            MatchRuneRange('a', 'z'),
	ASCIIUpper:            MatchRuneRange('A', 'Z'),
	HexDigit:              MatchAny(MatchRuneRange('0', '9'), MatchRuneRange('a', 'f'), MatchRuneRange('A', 'F')),
	Octet:                 MatchOctet(false),
	IPv4:                  MatchIPv4(),
	IPv4MaskBits:          MatchIntegerBetween(0, 32),
}

// T provides convenient access to a range of Token producers (which in their
// nature are parser/combinators) that can be used when creating TokenHandler
// functions.
//
// When using T in your own parser, then it is advised to create a variable
// to reference it:
//
//     var t = parsekit.T
//
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
var T = struct {
	Str            func(interface{}, TokenHandler) TokenHandler
	StrInterpreted func(interface{}, TokenHandler) TokenHandler
	Byte           func(interface{}, TokenHandler) TokenHandler
	Rune           func(interface{}, TokenHandler) TokenHandler
	Int            func(interface{}, TokenHandler) TokenHandler
	Int8           func(interface{}, TokenHandler) TokenHandler
	Int16          func(interface{}, TokenHandler) TokenHandler
	Int32          func(interface{}, TokenHandler) TokenHandler
	Int64          func(interface{}, TokenHandler) TokenHandler
	Uint           func(interface{}, TokenHandler) TokenHandler
	Uint8          func(interface{}, TokenHandler) TokenHandler
	Uint16         func(interface{}, TokenHandler) TokenHandler
	Uint32         func(interface{}, TokenHandler) TokenHandler
	Uint64         func(interface{}, TokenHandler) TokenHandler
	Float32        func(interface{}, TokenHandler) TokenHandler
	Float64        func(interface{}, TokenHandler) TokenHandler
	Boolean        func(interface{}, TokenHandler) TokenHandler
	ByCallback     func(TokenHandler, func(t *TokenAPI) *Token) TokenHandler
}{
	Str:            MakeStrLiteralToken,
	StrInterpreted: MakeStrInterpretedToken,
	Byte:           MakeByteToken,
	Rune:           MakeRuneToken,
	Int:            MakeIntToken,
	Int8:           MakeInt8Token,
	Int16:          MakeInt16Token,
	Int32:          MakeInt32Token,
	Int64:          MakeInt64Token,
	Uint:           MakeUintToken,
	Uint8:          MakeUint8Token,
	Uint16:         MakeUint16Token,
	Uint32:         MakeUint32Token,
	Uint64:         MakeUint64Token,
	Float32:        MakeFloat32Token,
	Float64:        MakeFloat64Token,
	Boolean:        MakeBooleanToken,
	ByCallback:     MakeTokenByCallback,
}

// MatchRune creates a TokenHandler function that checks if the next rune from
// the input matches the provided rune.
func MatchRune(expected rune) TokenHandler {
	return func(t *TokenAPI) bool {
		input, err := t.NextRune()
		if err == nil && input == expected {
			t.Accept()
			return true
		}
		return false
	}
}

// MatchRunes creates a TokenHandler function that that checks if the next rune
// from the input is one of the provided runes.
func MatchRunes(expected ...rune) TokenHandler {
	s := string(expected)
	return func(t *TokenAPI) bool {
		input, err := t.NextRune()
		if err == nil {
			if strings.ContainsRune(s, input) {
				t.Accept()
				return true
			}
		}
		return false
	}
}

// MatchRuneRange creates a TokenHandler function that that checks if the next rune
// from the input is contained by the provided rune range.
//
// The rune range is defined by a start and an end rune, inclusive, so:
//
//     MatchRuneRange('g', 'k')
//
// creates a TokenHandler that will match any of 'g', 'h', 'i', 'j' or 'k'.
func MatchRuneRange(start rune, end rune) TokenHandler {
	if end < start {
		panic(fmt.Sprintf("TokenHandler bug: MatchRuneRange definition error: start %q must not be < end %q", start, end))
	}
	return func(t *TokenAPI) bool {
		input, err := t.NextRune()
		if err == nil && input >= start && input <= end {
			t.Accept()
			return true
		}
		return false
	}
}

// MatchStr creates a TokenHandler that will check if the upcoming runes on the
// input match the provided string.
// TODO make this a more efficient string-level match?
func MatchStr(expected string) TokenHandler {
	var handlers = []TokenHandler{}
	for _, r := range expected {
		handlers = append(handlers, MatchRune(r))
	}
	return MatchSeq(handlers...)
}

// MatchStrNoCase creates a TokenHandler that will check if the upcoming runes
// on the input match the provided string in a case-insensitive manner.
// TODO make this a more efficient string-level match?
func MatchStrNoCase(expected string) TokenHandler {
	var handlers = []TokenHandler{}
	for _, r := range expected {
		u := unicode.ToUpper(r)
		l := unicode.ToLower(r)
		handlers = append(handlers, MatchRunes(u, l))
	}
	return MatchSeq(handlers...)
}

// MatchOpt creates a TokenHandler that makes the provided TokenHandler optional.
// When the provided TokenHandler applies, then its output is used, otherwise
// no output is generated but still a successful match is reported.
func MatchOpt(handler TokenHandler) TokenHandler {
	return func(t *TokenAPI) bool {
		child := t.Fork()
		if handler(child) {
			child.Merge()
		}
		return true
	}
}

// MatchSeq creates a TokenHandler that checks if the provided TokenHandlers can be
// applied in their exact order. Only if all TokenHandlers apply, the sequence
// reports successful match.
func MatchSeq(handlers ...TokenHandler) TokenHandler {
	return func(t *TokenAPI) bool {
		child := t.Fork()
		for _, handler := range handlers {
			if !handler(child) {
				return false
			}
		}
		child.Merge()
		return true
	}
}

// MatchAny creates a TokenHandler that checks if any of the provided TokenHandlers
// can be applied. They are applied in their provided order. The first TokenHandler
// that applies is used for reporting back a match.
func MatchAny(handlers ...TokenHandler) TokenHandler {
	return func(t *TokenAPI) bool {
		for _, handler := range handlers {
			child := t.Fork()
			if handler(child) {
				child.Merge()
				return true
			}
		}
		return false
	}
}

// MatchNot creates a TokenHandler that checks if the provided TokenHandler applies to
// the current input. If it does, then a failed match will be reported. If it
// does not, then the next rune from the input will be reported as a match.
func MatchNot(handler TokenHandler) TokenHandler {
	return func(t *TokenAPI) bool {
		probe := t.Fork()
		if handler(probe) {
			return false
		}
		_, err := t.NextRune()
		if err == nil {
			t.Accept()
			return true
		}
		return false
	}
}

// MatchRep creates a TokenHandler that checks if the provided TokenHandler can be
// applied exactly the provided amount of times.
//
// Note that the input can contain more than the provided number of matches, e.g.:
//
//     MatchRep(4, MatchRune('X'))
//
// will not match input "XXX", it will match input "XXXX", but also "XXXXXX".
// In that last case, there will be a remainder "XX" on the input.
func MatchRep(times int, handler TokenHandler) TokenHandler {
	return matchMinMax(times, times, handler, "MatchRep")
}

// MatchMin creates a TokenHandler that checks if the provided TokenHandler can be
// applied at least the provided minimum number of times.
// When more matches are possible, these will be included in the output.
func MatchMin(min int, handler TokenHandler) TokenHandler {
	if min < 0 {
		panic("TokenHandler bug: MatchMin definition error: min must be >= 0")
	}
	return matchMinMax(min, -1, handler, "MatchMin")
}

// MatchMax creates a TokenHandler that checks if the provided TokenHandler can be
// applied at maximum the provided minimum number of times.
// When more matches are possible, these will be included in the output.
// Zero matches are considered a successful match.
func MatchMax(max int, handler TokenHandler) TokenHandler {
	if max < 0 {
		panic("TokenHandler bug: MatchMax definition error: max must be >= 0")
	}
	return matchMinMax(0, max, handler, "MatchMax")
}

// MatchZeroOrMore creates a TokenHandler that checks if the provided TokenHandler can
// be applied zero or more times. All matches will be included in the output.
// Zero matches are considered a successful match.
func MatchZeroOrMore(handler TokenHandler) TokenHandler {
	return matchMinMax(0, -1, handler, "MatchZeroOfMore")
}

// MatchOneOrMore creates a TokenHandler that checks if the provided TokenHandler can
// be applied one or more times. All matches will be included in the output.
func MatchOneOrMore(handler TokenHandler) TokenHandler {
	return matchMinMax(1, -1, handler, "MatchOneOrMore")
}

// MatchMinMax creates a TokenHandler that checks if the provided TokenHandler can
// be applied between the provided minimum and maximum number of times,
// inclusive. All matches will be included in the output.
func MatchMinMax(min int, max int, handler TokenHandler) TokenHandler {
	if max < 0 {
		panic("TokenHandler bug: MatchMinMax definition error: max must be >= 0")
	}
	if min < 0 {
		panic("TokenHandler bug: MatchMinMax definition error: min must be >= 0")
	}
	return matchMinMax(min, max, handler, "MatchMinMax")
}

func matchMinMax(min int, max int, handler TokenHandler, name string) TokenHandler {
	if max >= 0 && min > max {
		panic(fmt.Sprintf("TokenHandler bug: %s definition error: max %d must not be < min %d", name, max, min))
	}
	return func(t *TokenAPI) bool {
		child := t.Fork()
		total := 0
		// Check for the minimum required amount of matches.
		for total < min {
			total++
			if !handler(child) {
				return false
			}
		}
		// No specified max: include the rest of the available matches.
		// Specified max: include the rest of the availble matches, up to the max.
		child.Merge()
		for max < 0 || total < max {
			total++
			if !handler(child) {
				break
			}
			child.Merge()
		}
		return true
	}
}

// MatchSeparated creates a TokenHandler that checks for a pattern of one or more
// TokenHandlers of one type (the separated), separated by TokenHandler of another type
// (the separator). All matches (separated + separator) are included in the
// output.
func MatchSeparated(separator TokenHandler, separated TokenHandler) TokenHandler {
	return MatchSeq(separated, MatchZeroOrMore(MatchSeq(separator, separated)))
}

// MatchExcept creates a TokenHandler that checks if the provided TokenHandler can be
// applied to the upcoming input. It also checks if the except TokenHandler can be
// applied. If the handler applies, but the except TokenHandler as well, then the match
// as a whole will be treated as a mismatch.
func MatchExcept(except TokenHandler, handler TokenHandler) TokenHandler {
	return func(t *TokenAPI) bool {
		if except(t.Fork()) {
			return false
		}
		return handler(t)
	}
}

// MatchSigned creates a TokenHandler that checks if the provided TokenHandler is
// prefixed by an optional '+' or '-' sign. This can be used to turn numeric
// atoms into a signed version, e.g.
//
//     C.Signed(A.Integer)
func MatchSigned(handler TokenHandler) TokenHandler {
	sign := MatchOpt(MatchAny(MatchRune('+'), MatchRune('-')))
	return MatchSeq(sign, handler)
}

// MatchIntegerBetween creates a TokenHandler that checks for an integer
// value between the provided min and max boundaries (inclusive).
// It uses an int64 for checking internally, so you can check values
// ranging from -9223372036854775808 to 9223372036854775807.
func MatchIntegerBetween(min int64, max int64) TokenHandler {
	digits := MatchSigned(MatchDigits())
	return func(t *TokenAPI) bool {
		fork := t.Fork()
		if !digits(fork) {
			return false
		}
		value, _ := strconv.ParseInt(fork.Result().String(), 10, 64)
		if value < min || value > max {
			return false
		}
		fork.Merge()
		return true
	}
}

// MatchEndOfFile creates a TokenHandler that checks if the end of the input data
// has been reached. This TokenHandler will never produce output. It only reports
// a successful or a failing match through its boolean return value.
func MatchEndOfFile() TokenHandler {
	return func(t *TokenAPI) bool {
		fork := t.Fork()
		_, err := fork.NextRune()
		return err == io.EOF
	}
}

// MatchAnyRune creates a TokenHandler function that checks if a valid rune can be
// read from the input. It reports back a successful match if the end of the
// input has not yet been reached and the upcoming input is a valid UTF8 rune.
func MatchAnyRune() TokenHandler {
	return func(t *TokenAPI) bool {
		_, err := t.NextRune()
		if err == nil {
			t.Accept()
			return true
		}
		return false
	}
}

// MatchDigit creates a TokenHandler that checks if a single digit can be read
// from the input.
func MatchDigit() TokenHandler {
	return MatchRuneRange('0', '9')
}

// MatchDigits creates a TokenHandler that checks if one or more digits can be read
// from the input.
func MatchDigits() TokenHandler {
	return MatchOneOrMore(MatchRuneRange('0', '9'))
}

// MatchDigitNotZero creates a TokenHandler that checks if a single digit not equal
// to zero '0' can be read from the input.
func MatchDigitNotZero() TokenHandler {
	return MatchRuneRange('1', '9')
}

// MatchInteger creates a TokenHandler function that checks if a valid integer
// can be read from the input. In line with Go, a integer cannot start with
// a zero. Starting with a zero is used to indicate other bases, like octal or
// hexadecimal.
func MatchInteger() TokenHandler {
	justZero := MatchRune('0')
	integer := MatchSeq(MatchDigitNotZero(), MatchZeroOrMore(MatchDigit()))
	return MatchAny(integer, justZero)
}

// MatchFloat creates a TokenHandler function that checks if a valid float value
// can be read from the input. In case the fractional part is missing, this
// TokenHandler will report a match, so both "123" and "123.123" will match.
func MatchFloat() TokenHandler {
	digits := MatchDigits()
	return MatchSeq(digits, MatchOpt(MatchSeq(MatchRune('.'), digits)))
}

// MatchBoolean creates a TokenHandler function that checks if a valid boolean
// value can be read from the input. It supports the boolean values as understood
// by Go's strconv.ParseBool() function.
func MatchBoolean() TokenHandler {
	trues := MatchAny(MatchStr("true"), MatchStr("TRUE"), MatchStr("True"), MatchRune('1'), MatchRune('t'), MatchRune('T'))
	falses := MatchAny(MatchStr("false"), MatchStr("FALSE"), MatchStr("False"), MatchRune('0'), MatchRune('f'), MatchRune('F'))
	return MatchAny(trues, falses)
}

// MatchOctet creates a TokenHandler function that checks if a valid octet value
// can be read from the input (octet = byte value representation, with a value
// between 0 and 255 inclusive). It only looks at the first 1 to 3 upcoming
// digits, not if there's a non-digit after it, meaning that "123255" would be
// a valid sequence of two octets.
//
// When the normalize parameter is set to true, then leading zeroes will be
// stripped from the octet.
func MatchOctet(normalize bool) TokenHandler {
	digits := MatchMinMax(1, 3, MatchDigit())
	return func(t *TokenAPI) bool {
		fork := t.Fork()
		if !digits(fork) {
			return false
		}
		value, _ := strconv.ParseInt(fork.Result().String(), 10, 16)
		if value <= 255 {
			if normalize {
				runes := fork.Result().Runes()
				for len(runes) > 1 && runes[0] == '0' {
					runes = runes[1:]
				}
				fork.Result().SetRunes(runes)
			}
			fork.Merge()
			return true
		}
		return false
	}
}

// MatchIPv4 creates a TokenHandler function that checks if a valid IPv4
// IP address value can be read from the input.
// It will normalize IP-addresses that look like "192.168.001.012" to
// "192.168.1.12".
func MatchIPv4() TokenHandler {
	octet := MatchOctet(true)
	dot := MatchRune('.')
	return MatchSeq(octet, dot, octet, dot, octet, dot, octet)
}

// M provides convenient access to a range of modifiers (which in their nature are
// parser/combinators) that can be used when creating TokenHandler functions.
//
// In parsekit, a modifier is defined as a TokenHandler function that modifies the
// resulting output of another TokenHandler in some way. It does not do any matching
// against input of its own.
//
// When using M in your own parser, then it is advised to create a variable
// to reference it:
//
//     var m = parsekit.M
//
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
var M = struct {
	Drop       func(TokenHandler) TokenHandler
	Trim       func(handler TokenHandler, cutset string) TokenHandler // TODO reverse arguments?
	TrimLeft   func(handler TokenHandler, cutset string) TokenHandler // TODO reverse arguments?
	TrimRight  func(handler TokenHandler, cutset string) TokenHandler // TODO reverse arguments?
	TrimSpace  func(handler TokenHandler) TokenHandler
	ToLower    func(TokenHandler) TokenHandler
	ToUpper    func(TokenHandler) TokenHandler
	Replace    func(handler TokenHandler, replaceWith string) TokenHandler // TODO reverse arguments?
	ByCallback func(TokenHandler, func(string) string) TokenHandler
}{
	Drop:       ModifyDrop,
	Trim:       ModifyTrim,
	TrimLeft:   ModifyTrimLeft,
	TrimRight:  ModifyTrimRight,
	TrimSpace:  ModifyTrimSpace,
	ToLower:    ModifyToLower,
	ToUpper:    ModifyToUpper,
	Replace:    ModifyReplace,
	ByCallback: ModifyByCallback,
}

// ModifyDrop creates a TokenHandler that checks if the provided TokenHandler applies.
// If it does, then its output is discarded completely.
//
// Note that if the TokenHandler does not apply, a mismatch will be reported back,
// even though we would have dropped the output anyway. So if you would like
// to drop optional whitespace, then use something like:
//
//     M.Drop(C.Opt(A.Whitespace))
//
// instead of:
//
//     M.Drop(A.Whitespace)
//
// Since whitespace is defined as "1 or more spaces and/or tabs", the input
// string "bork" would not match against the second form, but " bork" would.
// In both cases, it would match the first form.
func ModifyDrop(handler TokenHandler) TokenHandler {
	return ModifyByCallback(handler, func(s string) string {
		return ""
	})
}

// ModifyTrim creates a TokenHandler that checks if the provided TokenHandler applies.
// If it does, then its output is taken and characters from the provided
// cutset are trimmed from both the left and the right of the output.
func ModifyTrim(handler TokenHandler, cutset string) TokenHandler {
	return modifyTrim(handler, cutset, true, true)
}

// ModifyTrimLeft creates a TokenHandler that checks if the provided TokenHandler applies.
// If it does, then its output is taken and characters from the provided
// cutset are trimmed from the left of the output.
func ModifyTrimLeft(handler TokenHandler, cutset string) TokenHandler {
	return modifyTrim(handler, cutset, true, false)
}

// ModifyTrimRight creates a TokenHandler that checks if the provided TokenHandler applies.
// If it does, then its output is taken and characters from the provided
// cutset are trimmed from the right of the output.
func ModifyTrimRight(handler TokenHandler, cutset string) TokenHandler {
	return modifyTrim(handler, cutset, false, true)
}

func modifyTrim(handler TokenHandler, cutset string, trimLeft bool, trimRight bool) TokenHandler {
	modfunc := func(s string) string {
		if trimLeft {
			s = strings.TrimLeft(s, cutset)
		}
		if trimRight {
			s = strings.TrimRight(s, cutset)
		}
		return s
	}
	return ModifyByCallback(handler, modfunc)
}

// ModifyTrimSpace creates a TokenHandler that checks if the provided TokenHandler applies.
// If it does, then its output is taken and all leading and trailing whitespace charcters,
// as defined by Unicode (spaces, tabs, carriage returns and newlines) are removed from it.
func ModifyTrimSpace(handler TokenHandler) TokenHandler {
	return ModifyByCallback(handler, strings.TrimSpace)
}

// ModifyToUpper creates a TokenHandler that checks if the provided TokenHandler applies.
// If it does, then its output is taken and characters from the provided
// cutset are converted into upper case.
func ModifyToUpper(handler TokenHandler) TokenHandler {
	return ModifyByCallback(handler, strings.ToUpper)
}

// ModifyToLower creates a TokenHandler that checks if the provided TokenHandler applies.
// If it does, then its output is taken and characters from the provided
// cutset are converted into lower case.
func ModifyToLower(handler TokenHandler) TokenHandler {
	return ModifyByCallback(handler, strings.ToLower)
}

// ModifyReplace creates a TokenHandler that checks if the provided TokenHandler applies.
// If it does, then its output is replaced by the provided string.
func ModifyReplace(handler TokenHandler, replaceWith string) TokenHandler {
	return ModifyByCallback(handler, func(string) string {
		return replaceWith
	})
}

// ModifyByCallback creates a TokenHandler that checks if the provided TokenHandler applies.
// If it does, then its output is taken and it is fed to the provided modfunc.
// This is a simple function that takes a string on input and returns a possibly
// modified string on output. The return value of the modfunc will replace the
// resulting output.
func ModifyByCallback(handler TokenHandler, modfunc func(string) string) TokenHandler {
	return func(t *TokenAPI) bool {
		child := t.Fork()
		if handler(child) {
			s := modfunc(child.Result().String())
			child.Result().SetRunes(s)
			child.Merge()
			return true
		}
		return false
	}
}

func MakeStrLiteralToken(toktype interface{}, handler TokenHandler) TokenHandler {
	return MakeTokenByCallback(handler, func(t *TokenAPI) *Token {
		literal := t.Result().String()
		return &Token{Type: toktype, Runes: t.Result().Runes(), Value: literal}
	})
}

func MakeStrInterpretedToken(toktype interface{}, handler TokenHandler) TokenHandler {
	return MakeTokenByCallback(handler, func(t *TokenAPI) *Token {
		// TODO ERROR HANDLING
		interpreted, _ := interpretString(t.Result().String())
		return &Token{Type: toktype, Runes: t.Result().Runes(), Value: interpreted}
	})
}

func MakeRuneToken(toktype interface{}, handler TokenHandler) TokenHandler {
	return MakeTokenByCallback(handler, func(t *TokenAPI) *Token {
		// TODO ERROR HANDLING --- not a 1 rune input
		return &Token{Type: toktype, Runes: t.Result().Runes(), Value: t.Result().Rune(0)}
	})
}

func MakeByteToken(toktype interface{}, handler TokenHandler) TokenHandler {
	return MakeTokenByCallback(handler, func(t *TokenAPI) *Token {
		// TODO ERROR HANDLING --- not a 1 byte input
		return &Token{Type: toktype, Runes: t.Result().Runes(), Value: byte(t.Result().Rune(0))}
	})
}

func interpretString(str string) (string, error) {
	var sb strings.Builder
	for len(str) > 0 {
		r, _, remainder, err := strconv.UnquoteChar(str, '"')
		if err != nil {
			return sb.String(), err
		}
		str = remainder
		sb.WriteRune(r)
	}
	return sb.String(), nil
}

func MakeIntToken(toktype interface{}, handler TokenHandler) TokenHandler {
	return makeStrconvToken(toktype, handler, func(s string) (interface{}, error) {
		return strconv.Atoi(s)
	})
}

// TODO allow other Go types for oct and hex too.
func MakeInt8Token(toktype interface{}, handler TokenHandler) TokenHandler {
	return makeStrconvToken(toktype, handler,
		func(s string) (interface{}, error) {
			value, err := strconv.ParseInt(s, 10, 8)
			if err == nil {
				return int8(value), err
			}
			return value, err
		})
}

func MakeInt16Token(toktype interface{}, handler TokenHandler) TokenHandler {
	return makeStrconvToken(toktype, handler,
		func(s string) (interface{}, error) {
			value, err := strconv.ParseInt(s, 10, 16)
			if err == nil {
				return int16(value), err
			}
			return value, err
		})
}

func MakeInt32Token(toktype interface{}, handler TokenHandler) TokenHandler {
	return makeStrconvToken(toktype, handler,
		func(s string) (interface{}, error) {
			value, err := strconv.ParseInt(s, 10, 32)
			if err == nil {
				return int32(value), err
			}
			return value, err
		})
}

func MakeInt64Token(toktype interface{}, handler TokenHandler) TokenHandler {
	return makeStrconvToken(toktype, handler,
		func(s string) (interface{}, error) {
			value, err := strconv.ParseInt(s, 10, 64)
			if err == nil {
				return int64(value), err
			}
			return value, err
		})
}

func MakeUintToken(toktype interface{}, handler TokenHandler) TokenHandler {
	return makeStrconvToken(toktype, handler,
		func(s string) (interface{}, error) {
			value, err := strconv.ParseUint(s, 10, 0)
			if err == nil {
				return uint(value), err
			}
			return value, err
		})
}

// TODO allow other Go types for oct and hex too.
func MakeUint8Token(toktype interface{}, handler TokenHandler) TokenHandler {
	return makeStrconvToken(toktype, handler,
		func(s string) (interface{}, error) {
			value, err := strconv.ParseUint(s, 10, 8)
			if err == nil {
				return uint8(value), err
			}
			return value, err
		})
}

func MakeUint16Token(toktype interface{}, handler TokenHandler) TokenHandler {
	return makeStrconvToken(toktype, handler,
		func(s string) (interface{}, error) {
			value, err := strconv.ParseUint(s, 10, 16)
			if err == nil {
				return uint16(value), err
			}
			return value, err
		})
}

func MakeUint32Token(toktype interface{}, handler TokenHandler) TokenHandler {
	return makeStrconvToken(toktype, handler,
		func(s string) (interface{}, error) {
			value, err := strconv.ParseUint(s, 10, 32)
			if err == nil {
				return uint32(value), err
			}
			return value, err
		})
}

func MakeUint64Token(toktype interface{}, handler TokenHandler) TokenHandler {
	return makeStrconvToken(toktype, handler,
		func(s string) (interface{}, error) {
			value, err := strconv.ParseUint(s, 10, 64)
			if err == nil {
				return uint64(value), err
			}
			return value, err
		})
}

func MakeFloat32Token(toktype interface{}, handler TokenHandler) TokenHandler {
	return makeStrconvToken(toktype, handler,
		func(s string) (interface{}, error) {
			value, err := strconv.ParseFloat(s, 32)
			if err == nil {
				return float32(value), err
			}
			return value, err
		})
}

func MakeFloat64Token(toktype interface{}, handler TokenHandler) TokenHandler {
	return makeStrconvToken(toktype, handler,
		func(s string) (interface{}, error) {
			value, err := strconv.ParseFloat(s, 64)
			if err == nil {
				return float64(value), err
			}
			return value, err
		})
}

func MakeBooleanToken(toktype interface{}, handler TokenHandler) TokenHandler {
	return makeStrconvToken(toktype, handler,
		func(s string) (interface{}, error) {
			value, err := strconv.ParseBool(s)
			if err == nil {
				return bool(value), err
			}
			return value, err
		})
}

func makeStrconvToken(toktype interface{}, handler TokenHandler, convert func(s string) (interface{}, error)) TokenHandler {
	pc, _, _, _ := runtime.Caller(1)
	fullName := runtime.FuncForPC(pc).Name()
	parts := strings.Split(fullName, ".")
	name := parts[len(parts)-1]
	return MakeTokenByCallback(handler, func(t *TokenAPI) *Token {
		value, err := convert(t.Result().String())
		if err != nil {
			panic(fmt.Sprintf(
				"TokenHandler error: %s cannot handle input %q: %s "+
					"(only use a type conversion token maker, when the input has been "+
					"validated on beforehand)", name, t.Result().String(), err))
		}
		return &Token{Type: toktype, Runes: t.Result().Runes(), Value: value}
	})
}

func MakeTokenByCallback(handler TokenHandler, callback func(t *TokenAPI) *Token) TokenHandler {
	return func(t *TokenAPI) bool {
		fork := t.Fork()
		if handler(fork) {
			t.Result().AddToken(callback(fork))
			fork.Merge()
			return true
		}
		return false
	}
}