From 34fa8d64a865836fc24b2189262aab916f62fcf0 Mon Sep 17 00:00:00 2001 From: chenzhuoyu Date: Thu, 10 Jun 2021 16:36:04 +0800 Subject: [PATCH] feat: CPU feature detection support --- .licenserc.yaml | 18 +- Makefile | 113 +- ast/node.go | 174 +- ast/parser.go | 85 +- ast/search.go | 38 +- ast/utils.go | 7 +- decoder/assembler_amd64.go | 39 +- decoder/assembler_test.go | 38 +- decoder/errors.go | 8 +- decoder/errors_test.go | 4 +- decoder/generic_amd64.go | 21 +- decoder/generic_amd64.s | 22 +- decoder/generic_test.go | 6 +- decoder/pools.go | 4 +- encoder/assembler_amd64.go | 221 +- encoder/primitives.go | 6 +- go.mod | 3 +- go.sum | 6 +- internal/cpu/features.go | 38 + internal/jit/assembler_amd64.go | 4 +- internal/jit/backend_test.go | 34 + internal/native/avx/fastfloat_amd64_test.go | 66 + internal/native/avx/fastint_amd64_test.go | 135 + internal/native/avx/native_amd64.go | 100 + internal/native/avx/native_amd64.s | 6246 +++++++++++++++++ internal/native/avx/native_amd64_test.go | 426 ++ internal/native/avx/native_export_amd64.go | 45 + .../native/{ => avx}/native_subr_amd64.go | 30 +- internal/native/avx2/fastfloat_amd64_test.go | 66 + internal/native/avx2/fastint_amd64_test.go | 135 + internal/native/avx2/native_amd64.go | 100 + internal/native/{ => avx2}/native_amd64.s | 3593 +++++----- internal/native/avx2/native_amd64_test.go | 426 ++ internal/native/avx2/native_export_amd64.go | 45 + internal/native/avx2/native_subr_amd64.go | 50 + internal/native/dispatch_amd64.go | 123 + internal/native/dispatch_amd64.s | 55 + ...loat_test.go => fastfloat_amd64_test.tmpl} | 2 +- ...astint_test.go => fastint_amd64_test.tmpl} | 2 +- .../{native_amd64.go => native_amd64.tmpl} | 20 +- ...{native_test.go => native_amd64_test.tmpl} | 127 +- internal/native/native_export_amd64.tmpl | 43 + internal/native/{native.go => types/types.go} | 53 +- native/fastbytes.c | 401 +- native/fastint.c | 32 + native/parsing.c | 96 +- tools/asm2asm | 2 +- 47 files changed, 10838 insertions(+), 2470 deletions(-) create mode 100644 internal/cpu/features.go create mode 100644 internal/jit/backend_test.go create mode 100644 internal/native/avx/fastfloat_amd64_test.go create mode 100644 internal/native/avx/fastint_amd64_test.go create mode 100644 internal/native/avx/native_amd64.go create mode 100644 internal/native/avx/native_amd64.s create mode 100644 internal/native/avx/native_amd64_test.go create mode 100644 internal/native/avx/native_export_amd64.go rename internal/native/{ => avx}/native_subr_amd64.go (89%) create mode 100644 internal/native/avx2/fastfloat_amd64_test.go create mode 100644 internal/native/avx2/fastint_amd64_test.go create mode 100644 internal/native/avx2/native_amd64.go rename internal/native/{ => avx2}/native_amd64.s (73%) create mode 100644 internal/native/avx2/native_amd64_test.go create mode 100644 internal/native/avx2/native_export_amd64.go create mode 100644 internal/native/avx2/native_subr_amd64.go create mode 100644 internal/native/dispatch_amd64.go create mode 100644 internal/native/dispatch_amd64.s rename internal/native/{fastfloat_test.go => fastfloat_amd64_test.tmpl} (99%) rename internal/native/{fastint_test.go => fastint_amd64_test.tmpl} (99%) rename internal/native/{native_amd64.go => native_amd64.tmpl} (77%) rename internal/native/{native_test.go => native_amd64_test.tmpl} (73%) create mode 100644 internal/native/native_export_amd64.tmpl rename internal/native/{native.go => types/types.go} (68%) diff --git a/.licenserc.yaml b/.licenserc.yaml index 303ddec..804a2b7 100644 --- a/.licenserc.yaml +++ b/.licenserc.yaml @@ -8,13 +8,15 @@ header: - '**/*.s' paths-ignore: - - 'ast/asm.s' # empty file - - 'encoder/asm.s' # empty file - - 'internal/caching/asm.s' # empty file - - 'internal/jit/asm.s' # empty file - - 'internal/native/native_amd64.s' # auto-generated by asm2asm - - 'internal/native/native_subr_amd64.go' # auto-generated by asm2asm - - 'internal/resolver/asm.s' # empty file - - 'internal/rt/asm.s' # empty file + - 'ast/asm.s' # empty file + - 'encoder/asm.s' # empty file + - 'internal/caching/asm.s' # empty file + - 'internal/jit/asm.s' # empty file + - 'internal/native/avx/native_amd64.s' # auto-generated by asm2asm + - 'internal/native/avx/native_subr_amd64.go' # auto-generated by asm2asm + - 'internal/native/avx2/native_amd64.s' # auto-generated by asm2asm + - 'internal/native/avx2/native_subr_amd64.go' # auto-generated by asm2asm + - 'internal/resolver/asm.s' # empty file + - 'internal/rt/asm.s' # empty file comment: on-failure \ No newline at end of file diff --git a/Makefile b/Makefile index 631dcf9..9ca68a9 100644 --- a/Makefile +++ b/Makefile @@ -14,39 +14,92 @@ # limitations under the License. # -.PHONY: all clean +ARCH := avx avx2 +TMP_DIR := output +OUT_DIR := internal/native +SRC_FILE := native/native.c -CFLAGS := -mavx -CFLAGS += -mavx2 -CFLAGS += -mbmi -CFLAGS += -mbmi2 -CFLAGS += -mfma -CFLAGS += -msse -CFLAGS += -msse2 -CFLAGS += -msse3 -CFLAGS += -msse4 -CFLAGS += -mssse3 -CFLAGS += -mno-red-zone -CFLAGS += -ffast-math -CFLAGS += -fno-asynchronous-unwind-tables -CFLAGS += -fno-builtin -CFLAGS += -fno-exceptions -CFLAGS += -fno-rtti -CFLAGS += -fno-stack-protector -CFLAGS += -nostdlib -CFLAGS += -O3 +CPU_avx := amd64 +CPU_avx2 := amd64 -NATIVE_ASM := $(wildcard native/*.S) -NATIVE_SRC := $(wildcard native/*.h) -NATIVE_SRC += $(wildcard native/*.c) +TMPL_avx := fastint_amd64_test fastfloat_amd64_test native_amd64_test native_export_amd64 +TMPL_avx2 := fastint_amd64_test fastfloat_amd64_test native_amd64_test native_export_amd64 -all: internal/native/native_amd64.s +CFLAGS_avx := -msse2 -mavx -mno-avx2 -DUSE_SSE=1 -DUSE_AVX=1 -DUSE_AVX2=0 +CFLAGS_avx2 := -msse2 -mavx -mavx2 -DUSE_SSE=1 -DUSE_AVX=1 -DUSE_AVX2=1 + +CC_amd64 := clang +ASM2ASM_amd64 := tools/asm2asm/asm2asm.py + +CFLAGS := -mno-red-zone +CFLAGS += -fno-asynchronous-unwind-tables +CFLAGS += -fno-builtin +CFLAGS += -fno-exceptions +CFLAGS += -fno-rtti +CFLAGS += -fno-stack-protector +CFLAGS += -nostdlib +CFLAGS += -O3 + +NATIVE_SRC := $(wildcard native/*.h) +NATIVE_SRC += $(wildcard native/*.c) + +.PHONY: all clean ${ARCH} + +define build_tmpl + $(eval @arch := $(1)) + $(eval @tmpl := $(2)) + $(eval @dest := $(3)) + +${@dest}: ${@tmpl} + mkdir -p $(dir ${@dest}) + echo '// Code generated by Makefile, DO NOT EDIT.' > ${@dest} + echo >> ${@dest} + sed -e 's/{{PACKAGE}}/${@arch}/g' ${@tmpl} >> ${@dest} +endef + +define build_arch + $(eval @cpu := $(value CPU_$(1))) + $(eval @deps := $(foreach tmpl,$(value TMPL_$(1)),${OUT_DIR}/$(1)/${tmpl}.go)) + $(eval @asmin := ${TMP_DIR}/$(1)/native.s) + $(eval @asmout := ${OUT_DIR}/$(1)/native_${@cpu}.s) + $(eval @stubin := ${OUT_DIR}/native_${@cpu}.tmpl) + $(eval @stubout := ${OUT_DIR}/$(1)/native_${@cpu}.go) + +$(1): ${@asmout} ${@deps} + +${@asmout}: ${@stubout} ${NATIVE_SRC} + mkdir -p ${TMP_DIR}/$(1) + $${CC_${@cpu}} $${CFLAGS} $${CFLAGS_$(1)} -S -o ${TMP_DIR}/$(1)/native.s ${SRC_FILE} + python3 $${ASM2ASM_${@cpu}} ${@asmout} ${TMP_DIR}/$(1)/native.s + asmfmt -w ${@asmout} + +$(eval $(call \ + build_tmpl, \ + $(1), \ + ${@stubin}, \ + ${@stubout} \ +)) + +$(foreach \ + tmpl, \ + $(value TMPL_$(1)), \ + $(eval $(call \ + build_tmpl, \ + $(1), \ + ${OUT_DIR}/${tmpl}.tmpl, \ + ${OUT_DIR}/$(1)/${tmpl}.go \ + )) \ +) +endef + +all: ${ARCH} clean: - rm -vf internal/native/native_amd64.s output/*.s + rm -vfr ${TMP_DIR}/{sse,avx,avx2} + rm -vfr ${OUT_DIR}/{sse,avx,avx2} -internal/native/native_amd64.s: ${NATIVE_SRC} ${NATIVE_ASM} internal/native/native_amd64.go - mkdir -p output - clang ${CFLAGS} -S -o output/native.s native/native.c - python3 tools/asm2asm/asm2asm.py internal/native/native_amd64.s output/native.s ${NATIVE_ASM} - asmfmt -w internal/native/native_amd64.s +$(foreach \ + arch, \ + ${ARCH}, \ + $(eval $(call build_arch,${arch})) \ +) diff --git a/ast/node.go b/ast/node.go index 634486a..1af30fb 100644 --- a/ast/node.go +++ b/ast/node.go @@ -21,7 +21,7 @@ import ( `fmt` `unsafe` - `github.com/bytedance/sonic/internal/native` + `github.com/bytedance/sonic/internal/native/types` `github.com/bytedance/sonic/internal/rt` ) @@ -36,23 +36,23 @@ const ( ) const ( - V_RAW native.ValueType = 1 << 4 - V_NUMBER native.ValueType = 10 - V_ARRAY_RAW = V_RAW | native.V_ARRAY - V_OBJECT_RAW = V_RAW | native.V_OBJECT - MASK_RAW = V_RAW - 1 + V_RAW types.ValueType = 1 << 4 + V_NUMBER types.ValueType = 10 + V_ARRAY_RAW = V_RAW | types.V_ARRAY + V_OBJECT_RAW = V_RAW | types.V_OBJECT + MASK_RAW = V_RAW - 1 ) type Node struct { v int64 - t native.ValueType + t types.ValueType p unsafe.Pointer m map[string]unsafe.Pointer } /** Node Type Accessor **/ -func (self *Node) Type() native.ValueType { +func (self *Node) Type() types.ValueType { return self.t & MASK_RAW } @@ -69,12 +69,12 @@ func (self *Node) Raw() string { // Bool returns bool value represented by this node // -// If node type is not native.V_TRUE or native.V_FALSE, or V_RAW (must be a bool json value) +// If node type is not types.V_TRUE or types.V_FALSE, or V_RAW (must be a bool json value) // it will panic func (self *Node) Bool() bool { switch self.t { - case native.V_TRUE : return true - case native.V_FALSE : return false + case types.V_TRUE : return true + case types.V_FALSE : return false case V_RAW : n := self.parseRaw() return n.Bool() @@ -86,9 +86,9 @@ func (self *Node) Bool() bool { func (self *Node) Int64() int64 { switch self.t { case V_NUMBER : return numberToInt64(self) - case native.V_TRUE : return 1 - case native.V_FALSE : return 0 - case V_RAW : + case types.V_TRUE : return 1 + case types.V_FALSE : return 0 + case V_RAW : n := self.parseRaw() return n.Int64() default : panic("value cannot be represented as an integer") @@ -109,36 +109,36 @@ func (self *Node) Number() json.Number { // String as above. func (self *Node) String() string { switch self.t { - case V_NUMBER : return toNumber(self).String() - case native.V_NULL : return "null" - case native.V_TRUE : return "true" - case native.V_FALSE : return "false" - case native.V_STRING : return addr2str(self.p, self.v) + case V_NUMBER : return toNumber(self).String() + case types.V_NULL : return "null" + case types.V_TRUE : return "true" + case types.V_FALSE : return "false" + case types.V_STRING : return addr2str(self.p, self.v) case V_RAW : n := self.parseRaw() return n.String() - default : panic("value cannot be represented as a simple string") + default : panic("value cannot be represented as a simple string") } } // Float64 as above. func (self *Node) Float64() float64 { switch self.t { - case V_NUMBER : return numberToFloat64(self) - case native.V_TRUE : return 1.0 - case native.V_FALSE : return 0.0 + case V_NUMBER : return numberToFloat64(self) + case types.V_TRUE : return 1.0 + case types.V_FALSE : return 0.0 case V_RAW : n := self.parseRaw() return n.Float64() - default : panic("value cannot be represented as an integer") + default : panic("value cannot be represented as an integer") } } // IsRaw returns true if the node is type of below three: // // 1. V_RAW (never parsed) -// 2. native.V_Object_RAW (partially parsed) -// 3. native.V_Array_RAW (partially parsed) +// 2. types.V_Object_RAW (partially parsed) +// 3. types.V_Array_RAW (partially parsed) func (self *Node) IsRaw() bool { return self.t&V_RAW != 0 } @@ -148,9 +148,9 @@ func (self *Node) IsRaw() bool { // Len returns children count of a array|object|string node // For partially loaded node, it also works but only counts the parsed children func (self *Node) Len() int { - if self.t == native.V_ARRAY || self.t == native.V_OBJECT || self.t == V_ARRAY_RAW || self.t == V_OBJECT_RAW { + if self.t == types.V_ARRAY || self.t == types.V_OBJECT || self.t == V_ARRAY_RAW || self.t == V_OBJECT_RAW { return int(self.v & _LEN_MASK) - } else if self.t == native.V_STRING { + } else if self.t == types.V_STRING { return int(self.v) } else { panic("value does not have a length") @@ -159,7 +159,7 @@ func (self *Node) Len() int { // Cap returns malloc capacity of a array|object node for children func (self *Node) Cap() int { - if self.t == native.V_ARRAY || self.t == native.V_OBJECT || self.t == V_ARRAY_RAW || self.t == V_OBJECT_RAW { + if self.t == types.V_ARRAY || self.t == types.V_OBJECT || self.t == V_ARRAY_RAW || self.t == V_OBJECT_RAW { return int(self.v >> _CAP_BITS) } else { panic("value does not have a capacity") @@ -205,7 +205,7 @@ func (self *Node) SetByIndex(index int, node Node) { // Add appends the given node under array node func (self *Node) Add(node Node) { - self.must(native.V_ARRAY, "an array") + self.must(types.V_ARRAY, "an array") self.loadAllIndex() l := self.Len() c := self.Cap() @@ -240,26 +240,26 @@ func (self *Node) GetByPath(path ...interface{}) *Node { // Get loads given key of an object node on demands func (self *Node) Get(key string) *Node { - self.must(native.V_OBJECT, "an object") + self.must(types.V_OBJECT, "an object") return self.loadKey(key) } // Index loads given index of an array node on demands func (self *Node) Index(idx int) *Node { - self.must(native.V_ARRAY, "an array") + self.must(types.V_ARRAY, "an array") return self.loadIndex(idx) } // Values returns iterator for array's children traversal func (self *Node) Values() ListIterator { - self.must(native.V_ARRAY, "an array") + self.must(types.V_ARRAY, "an array") self.loadAllIndex() return ListIterator{Iterator{p: self}} } -// Values returns iterator for object's children traversal +// Properties returns iterator for object's children traversal func (self *Node) Properties() ObjectIterator { - self.must(native.V_OBJECT, "an object") + self.must(types.V_OBJECT, "an object") self.loadAllKey() return ObjectIterator{Iterator{p: self}} } @@ -268,28 +268,28 @@ func (self *Node) Properties() ObjectIterator { // Map loads all keys of an object node func (self *Node) Map() map[string]interface{} { - self.must(native.V_OBJECT, "an object") + self.must(types.V_OBJECT, "an object") self.loadAllKey() return self.toGenericObject() } // MapUseNumber loads all keys of an object node, with numeric nodes casted to json.Number func (self *Node) MapUseNumber() map[string]interface{} { - self.must(native.V_OBJECT, "an object") + self.must(types.V_OBJECT, "an object") self.loadAllKey() return self.toGenericObjectUseNumber() } // Array loads all indexes of an array node func (self *Node) Array() []interface{} { - self.must(native.V_ARRAY, "an array") + self.must(types.V_ARRAY, "an array") self.loadAllIndex() return self.toGenericArray() } -// Array loads all indexes of an array node, with numeric nodes casted to json.Number +// ArrayUseNumber loads all indexes of an array node, with numeric nodes casted to json.Number func (self *Node) ArrayUseNumber() []interface{} { - self.must(native.V_ARRAY, "an array") + self.must(types.V_ARRAY, "an array") self.loadAllIndex() return self.toGenericArrayUseNumber() } @@ -299,14 +299,14 @@ func (self *Node) ArrayUseNumber() []interface{} { // all numberic nodes are casted to float64 func (self *Node) Interface() interface{} { switch self.t { - case native.V_EOF : panic("invalid value") - case native.V_NULL : return nil - case native.V_TRUE : return true - case native.V_FALSE : return false - case native.V_ARRAY : return self.toGenericArray() - case native.V_OBJECT : return self.toGenericObject() - case native.V_STRING : return addr2str(self.p, self.v) - case V_NUMBER : + case types.V_EOF : panic("invalid value") + case types.V_NULL : return nil + case types.V_TRUE : return true + case types.V_FALSE : return false + case types.V_ARRAY : return self.toGenericArray() + case types.V_OBJECT : return self.toGenericObject() + case types.V_STRING : return addr2str(self.p, self.v) + case V_NUMBER : return numberToFloat64(self) case V_ARRAY_RAW: self.loadAllIndex() @@ -317,7 +317,7 @@ func (self *Node) Interface() interface{} { case V_RAW : n := self.parseRaw() return n.Interface() - default : panic("not gonna happen") + default : panic("not gonna happen") } } @@ -325,14 +325,14 @@ func (self *Node) Interface() interface{} { // except numberic nodes are casted to json.Number func (self *Node) InterfaceUseNumber() interface{} { switch self.t { - case native.V_EOF : panic("invalid value") - case native.V_NULL : return nil - case native.V_TRUE : return true - case native.V_FALSE : return false - case native.V_ARRAY : return self.toGenericArrayUseNumber() - case native.V_OBJECT : return self.toGenericObjectUseNumber() - case native.V_STRING : return addr2str(self.p, self.v) - case V_NUMBER : + case types.V_EOF : panic("invalid value") + case types.V_NULL : return nil + case types.V_TRUE : return true + case types.V_FALSE : return false + case types.V_ARRAY : return self.toGenericArrayUseNumber() + case types.V_OBJECT : return self.toGenericObjectUseNumber() + case types.V_STRING : return addr2str(self.p, self.v) + case V_NUMBER : return toNumber(self) case V_ARRAY_RAW: self.loadAllIndex() @@ -340,10 +340,10 @@ func (self *Node) InterfaceUseNumber() interface{} { case V_OBJECT_RAW: self.loadAllKey() return self.toGenericObjectUseNumber() - case V_RAW : + case V_RAW : n := self.parseRaw() return n.InterfaceUseNumber() - default : panic("not gonna happen") + default : panic("not gonna happen") } } @@ -355,7 +355,7 @@ var ( ) func (self *Node) setCapAndLen(cap int, len int) { - if self.t == native.V_ARRAY || self.t == native.V_OBJECT || self.t == V_ARRAY_RAW || self.t == V_OBJECT_RAW { + if self.t == types.V_ARRAY || self.t == types.V_OBJECT || self.t == V_ARRAY_RAW || self.t == V_OBJECT_RAW { self.v = int64(len&_LEN_MASK | cap<<_CAP_BITS) } else { panic("value does not have a length") @@ -370,7 +370,7 @@ func (self *Pair) unsafe_next() *Pair { return (*Pair)(unsafe.Pointer(uintptr(unsafe.Pointer(self)) + _PAIR_SIZE)) } -func (self *Node) must(t native.ValueType, s string) { +func (self *Node) must(t types.ValueType, s string) { if self.t == V_RAW { *self = self.parseRaw() } @@ -470,7 +470,7 @@ func (self *Node) loadAllIndex() { if !self.IsRaw() { return } - var err native.ParsingError + var err types.ParsingError stack := (*parseArrayStack)(self.p) parser := &stack.parser old := parser.noLazy @@ -486,7 +486,7 @@ func (self *Node) loadAllKey() { if !self.IsRaw() { return } - var err native.ParsingError + var err types.ParsingError stack := (*parseObjectStack)(self.p) parser := &stack.parser old := parser.noLazy @@ -523,7 +523,7 @@ func (self *Node) loadIndex(index int) *Node { return nil } -func (self *Node) loadNextNode() (*Node, native.ParsingError) { +func (self *Node) loadNextNode() (*Node, types.ParsingError) { stack := (*parseArrayStack)(self.p) ret := stack.v parser := &stack.parser @@ -532,7 +532,7 @@ func (self *Node) loadNextNode() (*Node, native.ParsingError) { /* check for EOF */ if parser.p = parser.lspace(sp); parser.p >= ns { - return nil, native.ERR_EOF + return nil, types.ERR_EOF } /* check for empty array */ @@ -543,7 +543,7 @@ func (self *Node) loadNextNode() (*Node, native.ParsingError) { } var val Node - var err native.ParsingError + var err types.ParsingError /* decode the value */ parser.noLazy = true @@ -558,7 +558,7 @@ func (self *Node) loadNextNode() (*Node, native.ParsingError) { /* check for EOF */ if parser.p >= ns { - return &ret[len(ret)-1], native.ERR_EOF + return &ret[len(ret)-1], types.ERR_EOF } /* check for the next character */ @@ -572,7 +572,7 @@ func (self *Node) loadNextNode() (*Node, native.ParsingError) { self.setArray(ret) return &ret[len(ret)-1], 0 default: - return &ret[len(ret)-1], native.ERR_INVALID_CHAR + return &ret[len(ret)-1], types.ERR_INVALID_CHAR } } @@ -596,7 +596,7 @@ func (self *Node) loadKey(key string) *Node { return nil } -func (self *Node) loadNextPair() (*Pair, native.ParsingError) { +func (self *Node) loadNextPair() (*Pair, types.ParsingError) { stack := (*parseObjectStack)(self.p) ret := stack.v parser := &stack.parser @@ -605,7 +605,7 @@ func (self *Node) loadNextPair() (*Pair, native.ParsingError) { /* check for EOF */ if parser.p = parser.lspace(sp); parser.p >= ns { - return nil, native.ERR_EOF + return nil, types.ERR_EOF } /* check for empty object */ @@ -617,12 +617,12 @@ func (self *Node) loadNextPair() (*Pair, native.ParsingError) { /* decode one pair */ var val Node - var njs native.JsonState - var err native.ParsingError + var njs types.JsonState + var err types.ParsingError /* decode the key */ - if njs = parser.decodeValue(); njs.Vt != native.V_STRING { - return nil, native.ERR_INVALID_CHAR + if njs = parser.decodeValue(); njs.Vt != types.V_STRING { + return nil, types.ERR_INVALID_CHAR } /* extract the key */ @@ -654,7 +654,7 @@ func (self *Node) loadNextPair() (*Pair, native.ParsingError) { /* check for EOF */ if parser.p >= ns { - return &ret[len(ret)-1], native.ERR_EOF + return &ret[len(ret)-1], types.ERR_EOF } /* check for the next character */ @@ -668,7 +668,7 @@ func (self *Node) loadNextPair() (*Pair, native.ParsingError) { self.setObject(ret) return &ret[len(ret)-1], 0 default: - return &ret[len(ret)-1], native.ERR_INVALID_CHAR + return &ret[len(ret)-1], types.ERR_INVALID_CHAR } } @@ -752,12 +752,12 @@ func (self *Node) toGenericObjectUseNumber() map[string]interface{} { /** Internal Factory Methods **/ var ( - nullNode = Node{t: native.V_NULL} - trueNode = Node{t: native.V_TRUE} - falseNode = Node{t: native.V_FALSE} + nullNode = Node{t: types.V_NULL} + trueNode = Node{t: types.V_TRUE} + falseNode = Node{t: types.V_FALSE} - emptyArrayNode = Node{t: native.V_ARRAY} - emptyObjectNode = Node{t: native.V_OBJECT} + emptyArrayNode = Node{t: types.V_ARRAY} + emptyObjectNode = Node{t: types.V_OBJECT} ) func newNumber(v string) Node { @@ -790,7 +790,7 @@ func numberToInt64(node *Node) int64 { func newBytes(v []byte) Node { return Node{ - t: native.V_STRING, + t: types.V_STRING, p: mem2ptr(v), v: int64(len(v)), } @@ -798,7 +798,7 @@ func newBytes(v []byte) Node { func newString(v string) Node { return Node{ - t: native.V_STRING, + t: types.V_STRING, p: str2ptr(v), v: int64(len(v)), } @@ -806,28 +806,28 @@ func newString(v string) Node { func newArray(v []Node) Node { return Node{ - t: native.V_ARRAY, + t: types.V_ARRAY, v: int64(len(v)&_LEN_MASK | cap(v)<<_CAP_BITS), p: *(*unsafe.Pointer)(unsafe.Pointer(&v)), } } func (self *Node) setArray(v []Node) { - self.t = native.V_ARRAY + self.t = types.V_ARRAY self.setCapAndLen(cap(v), len(v)) self.p = *(*unsafe.Pointer)(unsafe.Pointer(&v)) } func newObject(v []Pair) Node { return Node{ - t: native.V_OBJECT, + t: types.V_OBJECT, v: int64(len(v)&_LEN_MASK | cap(v)<<_CAP_BITS), p: *(*unsafe.Pointer)(unsafe.Pointer(&v)), } } func (self *Node) setObject(v []Pair) { - self.t = native.V_OBJECT + self.t = types.V_OBJECT self.setCapAndLen(cap(v), len(v)) self.p = *(*unsafe.Pointer)(unsafe.Pointer(&v)) } diff --git a/ast/parser.go b/ast/parser.go index 04c73a1..85536f7 100644 --- a/ast/parser.go +++ b/ast/parser.go @@ -21,6 +21,7 @@ import ( `unsafe` `github.com/bytedance/sonic/internal/native` + `github.com/bytedance/sonic/internal/native/types` `github.com/bytedance/sonic/internal/rt` ) @@ -34,24 +35,24 @@ type Parser struct { var stackPool = sync.Pool{ New: func()interface{}{ - return &native.StateMachine{} + return &types.StateMachine{} }, } /** Parser Private Methods **/ -func (self *Parser) delim() native.ParsingError { +func (self *Parser) delim() types.ParsingError { n := len(self.s) p := self.lspace(self.p) /* check for EOF */ if p >= n { - return native.ERR_EOF + return types.ERR_EOF } /* check for the delimtier */ if self.s[p] != ':' { - return native.ERR_INVALID_CHAR + return types.ERR_INVALID_CHAR } /* update the read pointer */ @@ -59,18 +60,18 @@ func (self *Parser) delim() native.ParsingError { return 0 } -func (self *Parser) object() native.ParsingError { +func (self *Parser) object() types.ParsingError { n := len(self.s) p := self.lspace(self.p) /* check for EOF */ if p >= n { - return native.ERR_EOF + return types.ERR_EOF } /* check for the delimtier */ if self.s[p] != '{' { - return native.ERR_INVALID_CHAR + return types.ERR_INVALID_CHAR } /* update the read pointer */ @@ -78,18 +79,18 @@ func (self *Parser) object() native.ParsingError { return 0 } -func (self *Parser) array() native.ParsingError { +func (self *Parser) array() types.ParsingError { n := len(self.s) p := self.lspace(self.p) /* check for EOF */ if p >= n { - return native.ERR_EOF + return types.ERR_EOF } /* check for the delimtier */ if self.s[p] != '[' { - return native.ERR_INVALID_CHAR + return types.ERR_INVALID_CHAR } /* update the read pointer */ @@ -102,19 +103,19 @@ func (self *Parser) lspace(sp int) int { return native.Lspace(sv.Ptr, sv.Len, sp) } -func (self *Parser) decodeValue() (val native.JsonState) { +func (self *Parser) decodeValue() (val types.JsonState) { sv := (*rt.GoString)(unsafe.Pointer(&self.s)) self.p = native.Value(sv.Ptr, sv.Len, self.p, &val) return } -func (self *Parser) decodeArray(ret []Node) (Node, native.ParsingError) { +func (self *Parser) decodeArray(ret []Node) (Node, types.ParsingError) { sp := self.p ns := len(self.s) /* check for EOF */ if self.p = self.lspace(sp); self.p >= ns { - return Node{}, native.ERR_EOF + return Node{}, types.ERR_EOF } /* check for empty array */ @@ -126,7 +127,7 @@ func (self *Parser) decodeArray(ret []Node) (Node, native.ParsingError) { /* allocate array space and parse every element */ for { var val Node - var err native.ParsingError + var err types.ParsingError /* decode the value */ if val, err = self.Parse(); err != 0 { @@ -139,7 +140,7 @@ func (self *Parser) decodeArray(ret []Node) (Node, native.ParsingError) { /* check for EOF */ if self.p >= ns { - return Node{}, native.ERR_EOF + return Node{}, types.ERR_EOF } /* check for the next character */ @@ -150,18 +151,18 @@ func (self *Parser) decodeArray(ret []Node) (Node, native.ParsingError) { if val.IsRaw() { return newRawArray(self, ret), 0 } - return Node{}, native.ERR_INVALID_CHAR + return Node{}, types.ERR_INVALID_CHAR } } } -func (self *Parser) decodeObject(ret []Pair) (Node, native.ParsingError) { +func (self *Parser) decodeObject(ret []Pair) (Node, types.ParsingError) { sp := self.p ns := len(self.s) /* check for EOF */ if self.p = self.lspace(sp); self.p >= ns { - return Node{}, native.ERR_EOF + return Node{}, types.ERR_EOF } /* check for empty object */ @@ -173,12 +174,12 @@ func (self *Parser) decodeObject(ret []Pair) (Node, native.ParsingError) { /* decode each pair */ for { var val Node - var njs native.JsonState - var err native.ParsingError + var njs types.JsonState + var err types.ParsingError /* decode the key */ - if njs = self.decodeValue(); njs.Vt != native.V_STRING { - return Node{}, native.ERR_INVALID_CHAR + if njs = self.decodeValue(); njs.Vt != types.V_STRING { + return Node{}, types.ERR_INVALID_CHAR } /* extract the key */ @@ -208,7 +209,7 @@ func (self *Parser) decodeObject(ret []Pair) (Node, native.ParsingError) { /* check for EOF */ if self.p >= ns { - return Node{}, native.ERR_EOF + return Node{}, types.ERR_EOF } /* check for the next character */ @@ -219,12 +220,12 @@ func (self *Parser) decodeObject(ret []Pair) (Node, native.ParsingError) { if val.IsRaw() { return newRawObject(self, ret), 0 } - return Node{}, native.ERR_INVALID_CHAR + return Node{}, types.ERR_INVALID_CHAR } } } -func (self *Parser) decodeString(iv int64, ep int) (Node, native.ParsingError) { +func (self *Parser) decodeString(iv int64, ep int) (Node, types.ParsingError) { p := self.p - 1 s := self.s[iv:p] @@ -251,36 +252,36 @@ func (self *Parser) Pos() int { return self.p } -func (self *Parser) Parse() (Node, native.ParsingError) { +func (self *Parser) Parse() (Node, types.ParsingError) { switch val := self.decodeValue(); val.Vt { - case native.V_EOF : return Node{}, native.ERR_EOF - case native.V_NULL : return nullNode, 0 - case native.V_TRUE : return trueNode, 0 - case native.V_FALSE : return falseNode, 0 - case native.V_STRING : return self.decodeString(val.Iv, val.Ep) - case native.V_ARRAY: + case types.V_EOF : return Node{}, types.ERR_EOF + case types.V_NULL : return nullNode, 0 + case types.V_TRUE : return trueNode, 0 + case types.V_FALSE : return falseNode, 0 + case types.V_STRING : return self.decodeString(val.Iv, val.Ep) + case types.V_ARRAY: if self.noLazy { return self.decodeArray(make([]Node, 0, _DEFAULT_NODE_CAP)) } return newRawArray(self, make([]Node, 0, _DEFAULT_NODE_CAP)), 0 - case native.V_OBJECT: + case types.V_OBJECT: if self.noLazy { return self.decodeObject(make([]Pair, 0, _DEFAULT_NODE_CAP)) } return newRawObject(self, make([]Pair, 0, _DEFAULT_NODE_CAP)), 0 - case native.V_DOUBLE : return newNumber(self.s[val.Ep:self.p]), 0 - case native.V_INTEGER : return newNumber(self.s[val.Ep:self.p]), 0 - default : return Node{}, native.ParsingError(-val.Vt) + case types.V_DOUBLE : return newNumber(self.s[val.Ep:self.p]), 0 + case types.V_INTEGER : return newNumber(self.s[val.Ep:self.p]), 0 + default : return Node{}, types.ParsingError(-val.Vt) } } -func (self *Parser) skip() (int, native.ParsingError) { - fsm := stackPool.Get().(*native.StateMachine) +func (self *Parser) skip() (int, types.ParsingError) { + fsm := stackPool.Get().(*types.StateMachine) start := native.SkipOne(&self.s, &self.p, fsm) stackPool.Put(fsm) if start < 0 { - return self.p, native.ParsingError(-start) + return self.p, types.ParsingError(-start) } return start, 0 } @@ -288,7 +289,7 @@ func (self *Parser) skip() (int, native.ParsingError) { /** Parser Factory **/ // Loads parse all json into interface{} -func Loads(src string) (int, interface{}, native.ParsingError) { +func Loads(src string) (int, interface{}, types.ParsingError) { ps := &Parser{s: src} np, err := ps.Parse() @@ -300,8 +301,8 @@ func Loads(src string) (int, interface{}, native.ParsingError) { } } -// Loads parse all json into interface{}, with numeric nodes casted to json.Number -func LoadsUseNumber(src string) (int, interface{}, native.ParsingError) { +// LoadsUseNumber parse all json into interface{}, with numeric nodes casted to json.Number +func LoadsUseNumber(src string) (int, interface{}, types.ParsingError) { ps := &Parser{s: src} np, err := ps.Parse() diff --git a/ast/search.go b/ast/search.go index 93784ae..17c4537 100644 --- a/ast/search.go +++ b/ast/search.go @@ -19,7 +19,7 @@ package ast import ( `fmt` - `github.com/bytedance/sonic/internal/native` + `github.com/bytedance/sonic/internal/native/types` ) type Searcher struct { @@ -50,7 +50,7 @@ func (self *Parser) printNear(start int) string { func (self *Searcher) GetByPath(path ...interface{}) (Node, error) { self.parser.p = 0 - var err native.ParsingError + var err types.ParsingError for _, p := range path { switch p.(type) { case int: @@ -80,7 +80,7 @@ func (self *Searcher) GetByPath(path ...interface{}) (Node, error) { return newRawNode(self.parser.s[start:self.parser.p]), nil } -func (self *Parser) searchKey(match string) native.ParsingError { +func (self *Parser) searchKey(match string) types.ParsingError { ns := len(self.s) if err := self.object(); err != 0 { return err @@ -88,23 +88,23 @@ func (self *Parser) searchKey(match string) native.ParsingError { /* check for EOF */ if self.p = self.lspace(self.p); self.p >= ns { - return native.ERR_EOF + return types.ERR_EOF } /* check for empty object */ if self.s[self.p] == '}' { self.p++ - return native.ERR_EOF + return types.ERR_EOF } - var njs native.JsonState - var err native.ParsingError + var njs types.JsonState + var err types.ParsingError /* decode each pair */ for { /* decode the key */ - if njs = self.decodeValue(); njs.Vt != native.V_STRING { - return native.ERR_INVALID_CHAR + if njs = self.decodeValue(); njs.Vt != types.V_STRING { + return types.ERR_INVALID_CHAR } /* extract the key */ @@ -135,7 +135,7 @@ func (self *Parser) searchKey(match string) native.ParsingError { /* check for EOF */ self.p = self.lspace(self.p) if self.p >= ns { - return native.ERR_EOF + return types.ERR_EOF } /* check for the next character */ @@ -144,14 +144,14 @@ func (self *Parser) searchKey(match string) native.ParsingError { self.p++ case '}': self.p++ - return native.ERR_EOF + return types.ERR_EOF default: - return native.ERR_INVALID_CHAR + return types.ERR_INVALID_CHAR } } } -func (self *Parser) searchIndex(idx int) native.ParsingError { +func (self *Parser) searchIndex(idx int) types.ParsingError { ns := len(self.s) if err := self.array(); err != 0 { return err @@ -159,16 +159,16 @@ func (self *Parser) searchIndex(idx int) native.ParsingError { /* check for EOF */ if self.p = self.lspace(self.p); self.p >= ns { - return native.ERR_EOF + return types.ERR_EOF } /* check for empty array */ if self.s[self.p] == ']' { self.p++ - return native.ERR_EOF + return types.ERR_EOF } - var err native.ParsingError + var err types.ParsingError /* allocate array space and parse every element */ for i := 0; i < idx; i++ { @@ -180,7 +180,7 @@ func (self *Parser) searchIndex(idx int) native.ParsingError { /* check for EOF */ self.p = self.lspace(self.p) if self.p >= ns { - return native.ERR_EOF + return types.ERR_EOF } /* check for the next character */ @@ -189,9 +189,9 @@ func (self *Parser) searchIndex(idx int) native.ParsingError { self.p++ case ']': self.p++ - return native.ERR_EOF + return types.ERR_EOF default: - return native.ERR_INVALID_CHAR + return types.ERR_INVALID_CHAR } } diff --git a/ast/utils.go b/ast/utils.go index 80a3ba7..fa5fa7e 100644 --- a/ast/utils.go +++ b/ast/utils.go @@ -20,6 +20,7 @@ import ( `unsafe` `github.com/bytedance/sonic/internal/native` + `github.com/bytedance/sonic/internal/native/types` `github.com/bytedance/sonic/internal/rt` ) @@ -50,7 +51,7 @@ func addr2str(p unsafe.Pointer, n int64) (s string) { return } -func unquoteBytes(s string, m *[]byte) native.ParsingError { +func unquoteBytes(s string, m *[]byte) types.ParsingError { pos := -1 slv := (*rt.GoSlice)(unsafe.Pointer(m)) str := (*rt.GoString)(unsafe.Pointer(&s)) @@ -58,7 +59,7 @@ func unquoteBytes(s string, m *[]byte) native.ParsingError { /* check for errors */ if ret < 0 { - return native.ParsingError(-ret) + return types.ParsingError(-ret) } /* update the length */ @@ -66,7 +67,7 @@ func unquoteBytes(s string, m *[]byte) native.ParsingError { return 0 } -func UnquoteString(s string) (ret string, err native.ParsingError) { +func UnquoteString(s string) (ret string, err types.ParsingError) { mm := make([]byte, 0, len(s)) err = unquoteBytes(s, &mm) ret = rt.Mem2Str(mm) diff --git a/decoder/assembler_amd64.go b/decoder/assembler_amd64.go index 9f5b21e..3e85b81 100644 --- a/decoder/assembler_amd64.go +++ b/decoder/assembler_amd64.go @@ -25,6 +25,7 @@ import ( `github.com/bytedance/sonic/internal/caching` `github.com/bytedance/sonic/internal/jit` `github.com/bytedance/sonic/internal/native` + `github.com/bytedance/sonic/internal/native/types` `github.com/bytedance/sonic/internal/rt` `github.com/twitchyliquid64/golang-asm/obj` ) @@ -424,7 +425,7 @@ func (self *_Assembler) base64_error() { func (self *_Assembler) parsing_error() { self.Link(_LB_eof_error) // _eof_error: self.Emit("MOVQ" , _IL, _IC) // MOVQ IL, IC - self.Emit("MOVL" , jit.Imm(int64(native.ERR_EOF)), _EP) // MOVL ${native.ERR_EOF}, EP + self.Emit("MOVL" , jit.Imm(int64(types.ERR_EOF)), _EP) // MOVL ${types.ERR_EOF}, EP self.Sjmp("JMP" , _LB_parsing_error) // JMP _parsing_error self.Link(_LB_unquote_error) // _unquote_error: self.Emit("SUBQ" , _VAR_sr, _SI) // SUBQ sr, SI @@ -457,7 +458,7 @@ func (self *_Assembler) parsing_error() { self.Link(_LB_char_1_error) // _char_1_error: self.Emit("ADDQ" , jit.Imm(1), _IC) // ADDQ $1, IC self.Link(_LB_char_0_error) // _char_0_error: - self.Emit("MOVL" , jit.Imm(int64(native.ERR_INVALID_CHAR)), _EP) // MOVL ${native.ERR_INVALID_CHAR}, EP + self.Emit("MOVL" , jit.Imm(int64(types.ERR_INVALID_CHAR)), _EP) // MOVL ${types.ERR_INVALID_CHAR}, EP self.Link(_LB_parsing_error) // _parsing_error: self.Emit("MOVOU", _ARG_s, _X0) // MOVOU s, X0 self.Emit("MOVOU", _X0, jit.Ptr(_SP, 0)) // MOVOU X0, (SP) @@ -570,15 +571,15 @@ func init() { } func (self *_Assembler) range_single() { - self.Emit("VMOVSD" , _VAR_st_Dv, _X0) // VMOVSD st.Dv, X0 + self.Emit("MOVSD" , _VAR_st_Dv, _X0) // MOVSD st.Dv, X0 self.Emit("MOVQ" , _V_max_f32, _AX) // MOVQ _max_f32, AX self.Emit("MOVQ" , jit.Gitab(_I_float32), _ET) // MOVQ ${itab(float32)}, ET self.Emit("MOVQ" , jit.Gtype(_T_float32), _EP) // MOVQ ${type(float32)}, EP - self.Emit("VUCOMISD", jit.Ptr(_AX, 0), _X0) // VUCOMISD (AX), X0 + self.Emit("UCOMISD" , jit.Ptr(_AX, 0), _X0) // UCOMISD (AX), X0 self.Sjmp("JA" , _LB_range_error) // JA _range_error self.Emit("MOVQ" , _V_min_f32, _AX) // MOVQ _min_f32, AX - self.Emit("VMOVSD" , jit.Ptr(_AX, 0), _X1) // VMOVSD (AX), X1 - self.Emit("VUCOMISD", _X0, _X1) // VUCOMISD X0, X1 + self.Emit("MOVSD" , jit.Ptr(_AX, 0), _X1) // MOVSD (AX), X1 + self.Emit("UCOMISD" , _X0, _X1) // UCOMISD X0, X1 self.Sjmp("JA" , _LB_range_error) // JA _range_error self.Emit("CVTSD2SS", _X0, _X0) // CVTSD2SS X0, X0 } @@ -634,7 +635,7 @@ func (self *_Assembler) unquote_once(p obj.Addr, n obj.Addr) { self.Emit("XORL" , _R8, _R8) // XORL R8, R8 self.Emit("BTQ" , jit.Imm(_F_disable_urc), _ARG_fv) // BTQ ${_F_disable_urc}, fv self.Emit("SETCC", _R8) // SETCC R8 - self.Emit("SHLQ" , jit.Imm(native.B_UNICODE_REPLACE), _R8) // SHLQ ${native.B_UNICODE_REPLACE}, R8 + self.Emit("SHLQ" , jit.Imm(types.B_UNICODE_REPLACE), _R8) // SHLQ ${types.B_UNICODE_REPLACE}, R8 self.call(_F_unquote) // CALL unquote self.Emit("MOVQ" , n, _SI) // MOVQ ${n}, SI self.Emit("ADDQ" , jit.Imm(1), _SI) // ADDQ $1, SI @@ -661,11 +662,11 @@ func (self *_Assembler) unquote_twice(p obj.Addr, n obj.Addr) { self.Emit("MOVQ" , n, _SI) // MOVQ ${n}, SI self.Emit("MOVQ" , _DX, p) // MOVQ DX, ${p} self.Emit("LEAQ" , _VAR_sr, _CX) // LEAQ sr, CX - self.Emit("MOVL" , jit.Imm(native.F_DOUBLE_UNQUOTE), _R8) // MOVL ${native.F_DOUBLE_UNQUOTE}, R8 + self.Emit("MOVL" , jit.Imm(types.F_DOUBLE_UNQUOTE), _R8) // MOVL ${types.F_DOUBLE_UNQUOTE}, R8 self.Emit("BTQ" , jit.Imm(_F_disable_urc), _ARG_fv) // BTQ ${_F_disable_urc}, AX self.Emit("XORL" , _AX, _AX) // XORL AX, AX self.Emit("SETCC", _AX) // SETCC AX - self.Emit("SHLQ" , jit.Imm(native.B_UNICODE_REPLACE), _AX) // SHLQ ${native.B_UNICODE_REPLACE}, AX + self.Emit("SHLQ" , jit.Imm(types.B_UNICODE_REPLACE), _AX) // SHLQ ${types.B_UNICODE_REPLACE}, AX self.Emit("ORQ" , _AX, _R8) // ORQ AX, R8 self.call(_F_unquote) // CALL unquote self.Emit("MOVQ" , n, _SI) // MOVQ ${n}, SI @@ -1013,15 +1014,15 @@ func (self *_Assembler) _asm_OP_u64(_ *_Instr) { } func (self *_Assembler) _asm_OP_f32(_ *_Instr) { - self.parse_number() // PARSE NUMBER - self.range_single() // RANGE float32 - self.Emit("VMOVSS", _X0, jit.Ptr(_VP, 0)) // VMOVSS X0, (VP) + self.parse_number() // PARSE NUMBER + self.range_single() // RANGE float32 + self.Emit("MOVSS", _X0, jit.Ptr(_VP, 0)) // MOVSS X0, (VP) } func (self *_Assembler) _asm_OP_f64(_ *_Instr) { - self.parse_number() // PARSE NUMBER - self.Emit("VMOVSD", _VAR_st_Dv, _X0) // VMOVSD st.Dv, X0 - self.Emit("VMOVSD", _X0, jit.Ptr(_VP, 0)) // VMOVSD X0, (VP) + self.parse_number() // PARSE NUMBER + self.Emit("MOVSD", _VAR_st_Dv, _X0) // MOVSD st.Dv, X0 + self.Emit("MOVSD", _X0, jit.Ptr(_VP, 0)) // MOVSD X0, (VP) } func (self *_Assembler) _asm_OP_unquote(_ *_Instr) { @@ -1041,13 +1042,13 @@ func (self *_Assembler) _asm_OP_nil_1(_ *_Instr) { } func (self *_Assembler) _asm_OP_nil_2(_ *_Instr) { - self.Emit("VPXOR", _X0, _X0, _X0) // VPXOR X0, X0, X0 + self.Emit("PXOR" , _X0, _X0) // PXOR X0, X0 self.Emit("MOVOU", _X0, jit.Ptr(_VP, 0)) // MOVOU X0, (VP) } func (self *_Assembler) _asm_OP_nil_3(_ *_Instr) { self.Emit("XORL" , _AX, _AX) // XORL AX, AX - self.Emit("VPXOR", _X0, _X0, _X0) // VPXOR X0, X0, X0 + self.Emit("PXOR" , _X0, _X0) // PXOR X0, X0 self.Emit("MOVOU", _X0, jit.Ptr(_VP, 0)) // MOVOU X0, (VP) self.Emit("MOVQ" , _AX, jit.Ptr(_VP, 16)) // MOVOU X0, 16(VP) } @@ -1133,7 +1134,7 @@ func (self *_Assembler) _asm_OP_map_key_u64(p *_Instr) { func (self *_Assembler) _asm_OP_map_key_f32(p *_Instr) { self.parse_number() // PARSE NUMBER self.range_single() // RANGE float32 - self.Emit("VMOVSS", _X0, _VAR_st_Dv) // VMOVSS X0, st.Dv + self.Emit("MOVSS", _X0, _VAR_st_Dv) // MOVSS X0, st.Dv self.mapassign_std(p.vt(), _VAR_st_Dv) // MAPASSIGN ${p.vt()}, mapassign, st.Dv } @@ -1371,7 +1372,7 @@ func (self *_Assembler) _asm_OP_drop_2(_ *_Instr) { self.Emit("SUBQ" , jit.Imm(16), _AX) // SUBQ $16, AX self.Emit("MOVQ" , jit.Sib(_ST, _AX, 1, 8), _VP) // MOVQ 8(ST)(AX), VP self.Emit("MOVQ" , _AX, jit.Ptr(_ST, 0)) // MOVQ AX, (ST) - self.Emit("VPXOR", _X0, _X0, _X0) // VPXOR X0, X0, X0 + self.Emit("PXOR" , _X0, _X0) // PXOR X0, X0 self.Emit("MOVOU", _X0, jit.Sib(_ST, _AX, 1, 8)) // MOVOU X0, 8(ST)(AX) } diff --git a/decoder/assembler_test.go b/decoder/assembler_test.go index 634bc2a..1bf6e68 100644 --- a/decoder/assembler_test.go +++ b/decoder/assembler_test.go @@ -25,7 +25,7 @@ import ( `github.com/bytedance/sonic/internal/caching` `github.com/bytedance/sonic/internal/jit` - `github.com/bytedance/sonic/internal/native` + `github.com/bytedance/sonic/internal/native/types` `github.com/bytedance/sonic/internal/rt` `github.com/stretchr/testify/assert` `github.com/stretchr/testify/require` @@ -182,26 +182,26 @@ func TestAssembler_OpCode(t *testing.T) { key: "_OP_str/error_eof", ins: []_Instr{newInsOp(_OP_str)}, src: `12345`, - err: SyntaxError{Src: `12345`, Pos: 5, Code: native.ERR_EOF}, + err: SyntaxError{Src: `12345`, Pos: 5, Code: types.ERR_EOF}, val: new(string), }, { key: "_OP_str/error_invalid_escape", ins: []_Instr{newInsOp(_OP_str)}, src: `12\g345"`, - err: SyntaxError{Src: `12\g345"`, Pos: 3, Code: native.ERR_INVALID_ESCAPE}, + err: SyntaxError{Src: `12\g345"`, Pos: 3, Code: types.ERR_INVALID_ESCAPE}, val: new(string), }, { key: "_OP_str/error_invalid_unicode", ins: []_Instr{newInsOp(_OP_str)}, src: `hello\ud800world"`, opt: 1 << _F_disable_urc, - err: SyntaxError{Src: `hello\ud800world"`, Pos: 7, Code: native.ERR_INVALID_UNICODE}, + err: SyntaxError{Src: `hello\ud800world"`, Pos: 7, Code: types.ERR_INVALID_UNICODE}, val: new(string), }, { key: "_OP_str/error_invalid_char", ins: []_Instr{newInsOp(_OP_str)}, src: `12\u1ggg345"`, - err: SyntaxError{Src: `12\u1ggg345"`, Pos: 5, Code: native.ERR_INVALID_CHAR}, + err: SyntaxError{Src: `12\u1ggg345"`, Pos: 5, Code: types.ERR_INVALID_CHAR}, val: new(string), }, { key: "_OP_bin", @@ -213,7 +213,7 @@ func TestAssembler_OpCode(t *testing.T) { key: "_OP_bin/error_eof", ins: []_Instr{newInsOp(_OP_bin)}, src: `aGVsbG8sIHdvcmxk`, - err: SyntaxError{Src: `aGVsbG8sIHdvcmxk`, Pos: 16, Code: native.ERR_EOF}, + err: SyntaxError{Src: `aGVsbG8sIHdvcmxk`, Pos: 16, Code: types.ERR_EOF}, val: new([]byte), }, { key: "_OP_bin/error_corrupt_input", @@ -243,25 +243,25 @@ func TestAssembler_OpCode(t *testing.T) { key: "_OP_bool/error_eof_1", ins: []_Instr{newInsOp(_OP_bool)}, src: "tru", - err: SyntaxError{Src: `tru`, Pos: 3, Code: native.ERR_EOF}, + err: SyntaxError{Src: `tru`, Pos: 3, Code: types.ERR_EOF}, val: new(bool), }, { key: "_OP_bool/error_eof_2", ins: []_Instr{newInsOp(_OP_bool)}, src: "fals", - err: SyntaxError{Src: `fals`, Pos: 4, Code: native.ERR_EOF}, + err: SyntaxError{Src: `fals`, Pos: 4, Code: types.ERR_EOF}, val: new(bool), }, { key: "_OP_bool/error_invalid_char_1", ins: []_Instr{newInsOp(_OP_bool)}, src: "falxe", - err: SyntaxError{Src: `falxe`, Pos: 3, Code: native.ERR_INVALID_CHAR}, + err: SyntaxError{Src: `falxe`, Pos: 3, Code: types.ERR_INVALID_CHAR}, val: new(bool), }, { key: "_OP_bool/error_invalid_char_2", ins: []_Instr{newInsOp(_OP_bool)}, src: "falsx", - err: SyntaxError{Src: `falsx`, Pos: 4, Code: native.ERR_INVALID_CHAR}, + err: SyntaxError{Src: `falsx`, Pos: 4, Code: types.ERR_INVALID_CHAR}, val: new(bool), }, { key: "_OP_num/positive", @@ -279,13 +279,13 @@ func TestAssembler_OpCode(t *testing.T) { key: "_OP_num/error_eof", ins: []_Instr{newInsOp(_OP_num)}, src: "-", - err: SyntaxError{Src: `-`, Pos: 1, Code: native.ERR_EOF}, + err: SyntaxError{Src: `-`, Pos: 1, Code: types.ERR_EOF}, val: new(json.Number), }, { key: "_OP_num/error_invalid_char", ins: []_Instr{newInsOp(_OP_num)}, src: "xxx", - err: SyntaxError{Src: `xxx`, Pos: 0, Code: native.ERR_INVALID_CHAR}, + err: SyntaxError{Src: `xxx`, Pos: 0, Code: types.ERR_INVALID_CHAR}, val: new(json.Number), }, { key: "_OP_i8", @@ -303,7 +303,7 @@ func TestAssembler_OpCode(t *testing.T) { key: "_OP_i8/error_wrong_type", ins: []_Instr{newInsOp(_OP_i8)}, src: "12.34", - err: SyntaxError{Src: `12.34`, Pos: 2, Code: native.ERR_INVALID_NUMBER_FMT}, + err: SyntaxError{Src: `12.34`, Pos: 2, Code: types.ERR_INVALID_NUMBER_FMT}, val: new(int8), }, { key: "_OP_u8", @@ -321,13 +321,13 @@ func TestAssembler_OpCode(t *testing.T) { key: "_OP_u8/error_underflow", ins: []_Instr{newInsOp(_OP_u8)}, src: "-123", - err: SyntaxError{Src: `-123`, Pos: 0, Code: native.ERR_INVALID_NUMBER_FMT}, + err: SyntaxError{Src: `-123`, Pos: 0, Code: types.ERR_INVALID_NUMBER_FMT}, val: new(uint8), }, { key: "_OP_u8/error_wrong_type", ins: []_Instr{newInsOp(_OP_u8)}, src: "12.34", - err: SyntaxError{Src: `12.34`, Pos: 2, Code: native.ERR_INVALID_NUMBER_FMT}, + err: SyntaxError{Src: `12.34`, Pos: 2, Code: types.ERR_INVALID_NUMBER_FMT}, val: new(uint8), }, { key: "_OP_f32", @@ -369,7 +369,7 @@ func TestAssembler_OpCode(t *testing.T) { key: "_OP_unquote/error_invalid_end", ins: []_Instr{newInsOp(_OP_unquote)}, src: `\"te\\\"st"`, - err: SyntaxError{Src: `\"te\\\"st"`, Pos: 8, Code: native.ERR_INVALID_CHAR}, + err: SyntaxError{Src: `\"te\\\"st"`, Pos: 8, Code: types.ERR_INVALID_CHAR}, val: new(string), }, { key: "_OP_nil_1", @@ -382,7 +382,7 @@ func TestAssembler_OpCode(t *testing.T) { ins: []_Instr{newInsOp(_OP_nil_2)}, src: "", exp: error(nil), - val: (func() *error { v := new(error); *v = native.ERR_EOF; return v })(), + val: (func() *error { v := new(error); *v = types.ERR_EOF; return v })(), }, { key: "_OP_nil_3", ins: []_Instr{newInsOp(_OP_nil_3)}, @@ -623,7 +623,7 @@ func TestAssembler_OpCode(t *testing.T) { key: "_OP_lspace/error", ins: []_Instr{newInsOp(_OP_lspace)}, src: "", - err: SyntaxError{Src: ``, Pos: 0, Code: native.ERR_EOF}, + err: SyntaxError{Src: ``, Pos: 0, Code: types.ERR_EOF}, val: nil, }, { key: "_OP_match_char/correct", @@ -635,7 +635,7 @@ func TestAssembler_OpCode(t *testing.T) { key: "_OP_match_char/error", ins: []_Instr{newInsVb(_OP_match_char, 'b')}, src: "a", - err: SyntaxError{Src: `a`, Pos: 0, Code: native.ERR_INVALID_CHAR}, + err: SyntaxError{Src: `a`, Pos: 0, Code: types.ERR_INVALID_CHAR}, val: nil, }, { key: "_OP_switch", diff --git a/decoder/errors.go b/decoder/errors.go index e4783fc..6cb3369 100644 --- a/decoder/errors.go +++ b/decoder/errors.go @@ -24,13 +24,13 @@ import ( `strconv` `strings` - `github.com/bytedance/sonic/internal/native` + `github.com/bytedance/sonic/internal/native/types` ) type SyntaxError struct { Pos int Src string - Code native.ParsingError + Code types.ParsingError } func (self SyntaxError) Error() string { @@ -86,7 +86,7 @@ func clamp_zero(v int) int { } } -func error_wrap(src string, pos int, code native.ParsingError) error { +func error_wrap(src string, pos int, code types.ParsingError) error { return SyntaxError { Pos : pos, Src : src, @@ -111,6 +111,6 @@ func error_value(value string, vtype reflect.Type) error { } //go:nosplit -func throw_invalid_type(vt native.ValueType) { +func throw_invalid_type(vt types.ValueType) { throw(fmt.Sprintf("invalid value type: %d", vt)) } diff --git a/decoder/errors_test.go b/decoder/errors_test.go index 259b328..3f6d51f 100644 --- a/decoder/errors_test.go +++ b/decoder/errors_test.go @@ -19,14 +19,14 @@ package decoder import ( `testing` - `github.com/bytedance/sonic/internal/native` + `github.com/bytedance/sonic/internal/native/types` ) func make_err(src string, pos int) SyntaxError { return SyntaxError { Src : src, Pos : pos, - Code : native.ERR_INVALID_CHAR, + Code : types.ERR_INVALID_CHAR, } } diff --git a/decoder/generic_amd64.go b/decoder/generic_amd64.go index 5703317..a0f4a6d 100644 --- a/decoder/generic_amd64.go +++ b/decoder/generic_amd64.go @@ -22,6 +22,7 @@ import ( `github.com/bytedance/sonic/internal/jit` `github.com/bytedance/sonic/internal/native` + `github.com/bytedance/sonic/internal/native/types` `github.com/bytedance/sonic/internal/rt` `github.com/twitchyliquid64/golang-asm/obj` ) @@ -112,8 +113,8 @@ var ( ) var ( - _V_max = jit.Imm(int64(native.V_MAX)) - _V_eof = jit.Imm(int64(native.ERR_EOF)) + _V_max = jit.Imm(int64(types.V_MAX)) + _V_eof = jit.Imm(int64(types.ERR_EOF)) _F_value = jit.Imm(int64(native.S_value)) ) @@ -293,7 +294,7 @@ func (self *_ValueDecoder) instrs() { self.Emit("XORL" , _R8, _R8) // XORL R8, R8 self.Emit("BTQ" , jit.Imm(_F_disable_urc), _VP) // BTQ ${_F_disable_urc}, VP self.Emit("SETCC", _R8) // SETCC R8 - self.Emit("SHLQ" , jit.Imm(native.B_UNICODE_REPLACE), _R8) // SHLQ ${native.B_UNICODE_REPLACE}, R8 + self.Emit("SHLQ" , jit.Imm(types.B_UNICODE_REPLACE), _R8) // SHLQ ${types.B_UNICODE_REPLACE}, R8 self.call(_F_unquote) // CALL unquote self.Emit("TESTQ", _AX, _AX) // TESTQ AX, AX self.Sjmp("JS" , _LB_esc_error) // JS _esc_error @@ -309,14 +310,14 @@ func (self *_ValueDecoder) instrs() { /* V_DOUBLE */ self.Link(_SW_case_V_DOUBLE) - self.Emit("BTQ" , jit.Imm(_F_use_number), _VP) // BTQ ${_F_use_number}, VP - self.Sjmp("JC" , "_use_number") // JC _use_number - self.Emit("VMOVSD", _VAR_vv_Dv, _X0) // VMOVSD st.Dv, X0 - self.Emit("VMOVSD", _X0, jit.Ptr(_SP, 0)) // VMOVSD X0, (SP) + self.Emit("BTQ" , jit.Imm(_F_use_number), _VP) // BTQ ${_F_use_number}, VP + self.Sjmp("JC" , "_use_number") // JC _use_number + self.Emit("MOVSD", _VAR_vv_Dv, _X0) // MOVSD st.Dv, X0 + self.Emit("MOVSD", _X0, jit.Ptr(_SP, 0)) // MOVSD X0, (SP) self.call_go(_F_convT64) // CALL_GO convT64 - self.Emit("MOVQ" , _T_float64, _RT) // MOVQ ${type(float64)}, RT - self.Emit("MOVQ" , jit.Ptr(_SP, 8), _RV) // MOVQ 8(SP), RV - self.Sjmp("JMP" , _LB_done) // JMP _done + self.Emit("MOVQ" , _T_float64, _RT) // MOVQ ${type(float64)}, RT + self.Emit("MOVQ" , jit.Ptr(_SP, 8), _RV) // MOVQ 8(SP), RV + self.Sjmp("JMP" , _LB_done) // JMP _done /* V_INTEGER */ self.Link(_SW_case_V_INTEGER) diff --git a/decoder/generic_amd64.s b/decoder/generic_amd64.s index 9aabc73..505a16e 100644 --- a/decoder/generic_amd64.s +++ b/decoder/generic_amd64.s @@ -39,15 +39,15 @@ #define ERR_INVALID_CHAR $-2 #define lspace(to) \ - MOVQ PS, DI \ - MOVQ PN, SI \ - MOVQ PI, DX \ - MOVQ github·com∕bytedance∕sonic∕internal∕native·_subr__lspace(SB), AX \ - CALL AX \ - MOVQ AX, PI \ - TESTQ AX, AX \ - JNS to \ - RET \ + MOVQ PS, DI \ + MOVQ PN, SI \ + MOVQ PI, DX \ + MOVQ github·com∕bytedance∕sonic∕internal∕native·S_lspace(SB), AX \ + CALL AX \ + MOVQ AX, PI \ + TESTQ AX, AX \ + JNS to \ + RET \ to: #define match_eof(to) \ @@ -337,7 +337,7 @@ TEXT decodeObjectKey(SB), NOSPLIT, $120 - 0 LEAQ VAR_in, DI LEAQ VAR_in_PI, SI LEAQ VAR_vv, DX - MOVQ github·com∕bytedance∕sonic∕internal∕native·_subr__vstring(SB), AX + MOVQ github·com∕bytedance∕sonic∕internal∕native·S_vstring(SB), AX CALL AX MOVQ VAR_in_PI, PI @@ -397,7 +397,7 @@ _unquote: BTQ F_DISABLE_URC, FL SETCC R8 SHLQ B_UNICODE_REPLACE, R8 - MOVQ github·com∕bytedance∕sonic∕internal∕native·_subr__unquote(SB), AX + MOVQ github·com∕bytedance∕sonic∕internal∕native·S_unquote(SB), AX CALL AX TESTQ AX, AX JS _escape_error diff --git a/decoder/generic_test.go b/decoder/generic_test.go index 51b1abf..e64c1d7 100644 --- a/decoder/generic_test.go +++ b/decoder/generic_test.go @@ -23,7 +23,7 @@ import ( `unsafe` `github.com/bytedance/sonic/ast` - `github.com/bytedance/sonic/internal/native` + `github.com/bytedance/sonic/internal/native/types` `github.com/bytedance/sonic/internal/rt` `github.com/davecgh/go-spew/spew` `github.com/stretchr/testify/require` @@ -32,7 +32,7 @@ import ( //go:nosplit //go:noescape //goland:noinspection GoUnusedParameter -func decodeInterface(s string, f uint64) (int, interface{}, native.ParsingError) +func decodeInterface(s string, f uint64) (int, interface{}, types.ParsingError) //go:nosplit //go:noescape @@ -54,7 +54,7 @@ func TestGeneric_DecodeInterface(t *testing.T) { func TestGeneric_DecodeObjectKeyString(t *testing.T) { r := decodeObjectKeyString(` "hello\u2333world"`, 4) if v := (*rt.GoString)(unsafe.Pointer(&r)).Len; v < 0 { - require.NoError(t, native.ParsingError(-v)) + require.NoError(t, types.ParsingError(-v)) } spew.Dump(r) } diff --git a/decoder/pools.go b/decoder/pools.go index cd02687..850c777 100644 --- a/decoder/pools.go +++ b/decoder/pools.go @@ -21,7 +21,7 @@ import ( `unsafe` `github.com/bytedance/sonic/internal/caching` - `github.com/bytedance/sonic/internal/native` + `github.com/bytedance/sonic/internal/native/types` `github.com/bytedance/sonic/internal/rt` ) @@ -40,7 +40,7 @@ var ( type _Stack struct { sp uintptr sb [_MaxStack]unsafe.Pointer - mm native.StateMachine + mm types.StateMachine } type _Decoder func( diff --git a/encoder/assembler_amd64.go b/encoder/assembler_amd64.go index 3f188da..a826d41 100644 --- a/encoder/assembler_amd64.go +++ b/encoder/assembler_amd64.go @@ -22,6 +22,7 @@ import ( `strconv` `unsafe` + `github.com/bytedance/sonic/internal/cpu` `github.com/bytedance/sonic/internal/jit` `github.com/twitchyliquid64/golang-asm/obj` `github.com/twitchyliquid64/golang-asm/obj/x86` @@ -115,10 +116,6 @@ var ( var ( _X0 = jit.Reg("X0") - _X1 = jit.Reg("X1") -) - -var ( _Y0 = jit.Reg("Y0") _Y1 = jit.Reg("Y1") _Y2 = jit.Reg("Y2") @@ -411,16 +408,16 @@ func (self *_Assembler) save_state() { } func (self *_Assembler) drop_state(decr int64) { - self.Emit("MOVQ" , jit.Ptr(_ST, 0), _AX) // MOVQ (ST), AX - self.Emit("SUBQ" , jit.Imm(decr), _AX) // SUBQ $decr, AX - self.Emit("MOVQ" , _AX, jit.Ptr(_ST, 0)) // MOVQ AX, (ST) - self.Emit("MOVQ" , jit.Sib(_ST, _AX, 1, 8), _SP_x) // MOVQ 8(ST)(AX), SP.x - self.Emit("MOVQ" , jit.Sib(_ST, _AX, 1, 16), _SP_f) // MOVQ 16(ST)(AX), SP.f - self.Emit("MOVQ" , jit.Sib(_ST, _AX, 1, 24), _SP_p) // MOVQ 24(ST)(AX), SP.p - self.Emit("MOVQ" , jit.Sib(_ST, _AX, 1, 32), _SP_q) // MOVQ 32(ST)(AX), SP.q - self.Emit("VPXOR" , _Y0, _Y0, _Y0) // VPXOR Y0, Y0, Y0 - self.Emit("VMOVDQU", _Y0, jit.Sib(_ST, _AX, 1, 8)) // VMOVDQU Y0, 8(ST)(AX) - self.Emit("VZEROUPPER") // VZEROUPPER + self.Emit("MOVQ" , jit.Ptr(_ST, 0), _AX) // MOVQ (ST), AX + self.Emit("SUBQ" , jit.Imm(decr), _AX) // SUBQ $decr, AX + self.Emit("MOVQ" , _AX, jit.Ptr(_ST, 0)) // MOVQ AX, (ST) + self.Emit("MOVQ" , jit.Sib(_ST, _AX, 1, 8), _SP_x) // MOVQ 8(ST)(AX), SP.x + self.Emit("MOVQ" , jit.Sib(_ST, _AX, 1, 16), _SP_f) // MOVQ 16(ST)(AX), SP.f + self.Emit("MOVQ" , jit.Sib(_ST, _AX, 1, 24), _SP_p) // MOVQ 24(ST)(AX), SP.p + self.Emit("MOVQ" , jit.Sib(_ST, _AX, 1, 32), _SP_q) // MOVQ 32(ST)(AX), SP.q + self.Emit("PXOR" , _X0, _X0) // PXOR X0, X0 + self.Emit("MOVOU", _X0, jit.Sib(_ST, _AX, 1, 8)) // MOVOU X0, 8(ST)(AX) + self.Emit("MOVOU", _X0, jit.Sib(_ST, _AX, 1, 24)) // MOVOU X0, 24(ST)(AX) } /** Buffer Helpers **/ @@ -654,6 +651,106 @@ func (self *_Assembler) encode_string(fn obj.Addr, doubleQuote bool) { self.close_quote(doubleQuote) // QCLOSE $doubleQuote } +/** Zero Value Check Routine **/ + +func (self *_Assembler) check_zero(nb int, dest int) { + i := int64(0) + e := int64(nb) + + /* special case: zero-sized value, always empty */ + if e == 0 { + return + } + + /* default instructions for AVX2 */ + vclear := func(v obj.Addr) { self.Emit("VPXOR" , v, v, v) } + vset1a := func(a, b obj.Addr) { self.Emit("VPCMPEQB", a, a, b) } + vandpb := func(b, a, r obj.Addr) { self.Emit("VPAND" , b, a, r) } + vcmpeq := func(b, a, r obj.Addr) { self.Emit("VPCMPEQB", b, a, r) } + + /* fall-back instructions for AVX */ + if !cpu.HasAVX2 { + vclear = func(v obj.Addr) { self.Emit("VXORPS", v, v, v) } + vset1a = func(a, b obj.Addr) { self.Emit("VCMPPS", a, a, b, jit.Imm(0x0f)) } + vandpb = func(b, a, r obj.Addr) { self.Emit("VANDPS", b, a, r) } + vcmpeq = func(b, a, r obj.Addr) { self.Emit("VCMPPS", b, a, r, jit.Imm(0x00)) } + } + + /* if n is less than 32 byte, only scalar code will be used; + * otherwise AVX is used, so clear Y0, and set Y1 to all 1s */ + if e >= 32 { + vclear(_Y0) // CLEAR Y0 + vset1a(_Y0, _Y1) // SET1A Y0, Y1 + } + + /* 128-byte tests */ + for i <= e - 128 { + vcmpeq(jit.Ptr(_SP_p, i + 0), _Y0, _Y2) // CMPEQ i+0(SP.p), Y0, Y2 + vcmpeq(jit.Ptr(_SP_p, i + 32), _Y0, _Y3) // CMPEQ i+32(SP.p), Y0, Y3 + vcmpeq(jit.Ptr(_SP_p, i + 64), _Y0, _Y4) // CMPEQ i+64(SP.p), Y0, Y4 + vcmpeq(jit.Ptr(_SP_p, i + 96), _Y0, _Y5) // CMPEQ i+96(SP.p), Y0, Y5 + vandpb(_Y3, _Y2, _Y2) // ANDPB Y3, Y2, Y2 + vandpb(_Y5, _Y4, _Y3) // ANDPB Y5, Y4, Y3 + vandpb(_Y2, _Y3, _Y3) // ANDPB Y2, Y3, Y3 + self.Emit("VPTEST", _Y1, _Y3) // VPTEST Y1, Y3 + self.Sjmp("JNC" , "_not_zero_z_{n}") // JNC _not_zero_z_{n} + i += 128 + } + + /* 32-byte tests */ + for i <= e - 32 { + vcmpeq(jit.Ptr(_SP_p, i), _Y0, _Y2) // CMPEQ i(SP.p), Y0, Y2 + self.Emit("VPTEST", _Y1, _Y2) // VPTEST Y1, Y2 + self.Sjmp("JNC" , "_not_zero_z_{n}") // JNC _not_zero_z_{n} + i += 32 + } + + /* VZEROUPPER to avoid AVX-SSE transition penalty */ + if e >= 32 { + self.Emit("VZEROUPPER") + } + + /* 8-byte tests */ + for i <= e - 8 { + self.Emit("CMPQ", jit.Ptr(_SP_p, i), jit.Imm(0)) // CMPQ i(SP.p), $0 + self.Sjmp("JNE" , "_not_zero_{n}") // JNE _not_zero_{n} + i += 8 + } + + /* 4 byte test */ + if i <= e - 4 { + self.Emit("CMPL", jit.Ptr(_SP_p, i), jit.Imm(0)) // CMPL i(SP.p), $0 + self.Sjmp("JNE" , "_not_zero_{n}") // JNE _not_zero_{n} + i += 4 + } + + /* 2 byte test */ + if i <= e - 2 { + self.Emit("CMPW", jit.Ptr(_SP_p, i), jit.Imm(0)) // CMPW i(SP.p), $0 + self.Sjmp("JNE" , "_not_zero_{n}") // JNE _not_zero_{n} + i += 2 + } + + /* the last byte */ + if i < e { + self.Emit("CMPB", jit.Ptr(_SP_p, i), jit.Imm(0)) // CMPB i(SP.p), $0 + self.Sjmp("JNE" , "_not_zero_{n}") // JNE _not_zero_{n} + } + + /* value is not zero */ + if e < 32 { + self.Xjmp("JMP", dest) + self.Link("_not_zero_{n}") + return + } + + /* VZEROUPPER to avoid AVX-SSE transition penalty */ + self.Xjmp("JMP", dest) + self.Link("_not_zero_z_{n}") + self.Emit("VZEROUPPER") + self.Link("_not_zero_{n}") +} + /** OpCode Assembler Functions **/ var ( @@ -671,7 +768,7 @@ var ( var ( _F_memmove = jit.Func(memmove) - _F_isZeroSafe = jit.Func(isZeroSafe) + _F_isZeroTyped = jit.Func(isZeroTyped) _F_mapiternext = jit.Func(mapiternext) _F_mapiterinit = jit.Func(mapiterinit) _F_error_number = jit.Func(error_number) @@ -960,103 +1057,15 @@ func (self *_Assembler) _asm_OP_is_zero_map(p *_Instr) { } func (self *_Assembler) _asm_OP_is_zero_mem(p *_Instr) { - i := int64(0) - e := int64(p.vlen()) - - /* increse the offset, and decide what jumps to use */ - incr_jump := func(np string, op string, d int64, avx2 bool) { - if i += d; avx2 && e >= 32 { - self.Sjmp(np, "not_zero_z_{n}") - } else if i < e || e >= 32 { - self.Sjmp(np, "not_zero_{n}") - } else { - self.Xjmp(op, p.vi()) - } - } - - /* if n is less than 32 byte, only SSE2 will be used; - * otherwise AVX2 is used, so clear Y0, and set Y1 to all 0xff */ - if e < 32 { - self.Emit("VPXOR", _X0, _X0, _X0) // VPXOR X0, X0, X0 - } else { - self.Emit("VPXOR" , _Y0, _Y0, _Y0) // VPXOR Y0, Y0, Y0 - self.Emit("VPCMPEQB", _Y1, _Y1, _Y1) // VPCMPEQB Y1, Y1, Y1 - } - - /* 128-byte tests */ - for i <= e - 128 { - self.Emit("VPCMPEQB", jit.Ptr(_SP_p, i + 0), _Y0, _Y2) // VPCMPEQB i+0(SP.p), Y0, Y2 - self.Emit("VPCMPEQB", jit.Ptr(_SP_p, i + 32), _Y0, _Y3) // VPCMPEQB i+32(SP.p), Y0, Y3 - self.Emit("VPCMPEQB", jit.Ptr(_SP_p, i + 64), _Y0, _Y4) // VPCMPEQB i+64(SP.p), Y0, Y4 - self.Emit("VPCMPEQB", jit.Ptr(_SP_p, i + 96), _Y0, _Y5) // VPCMPEQB i+96(SP.p), Y0, Y5 - self.Emit("VPAND" , _Y2, _Y3, _Y2) // VPAND Y2, Y3, Y2 - self.Emit("VPAND" , _Y4, _Y5, _Y3) // VPAND Y4, Y5, Y3 - self.Emit("VPXOR" , _Y3, _Y1, _Y3) // VPXOR Y3, Y1, Y3 - self.Emit("VPTEST" , _Y2, _Y3) // VPTEST Y2, Y3 - incr_jump("JNC" , "JC", 128, true) // JNC not_zero_z_{n} - } - - /* 32-byte tests */ - for i <= e - 32 { - self.Emit("VPCMPEQB", jit.Ptr(_SP_p, i), _Y0, _Y2) // VPCMPEQB i(SP.p), Y0, Y2 - self.Emit("VPTEST" , _Y2, _Y1) // VPTEST Y2, Y1 - incr_jump("JNC" , "JC", 32, true) // JNC not_zero_z_{n} - } - - /* VZEROUPPER to avoid AVX-SSE transition penalty */ - if e >= 32 { - self.Emit("VZEROUPPER") - } - - /* 16 bytes test */ - if i <= e - 16 { - self.Emit("PCMPEQB" , jit.Ptr(_SP_p, i), _X0, _X1) // PCMPEQB i(SP.p), X0, X1 - self.Emit("PMOVMSKB", _X1, _AX) // PMOVMSKB X1, AX - self.Emit("CMPQ" , jit.Imm(-1), _AX) // CMPQ $-1, AX - incr_jump("JNE" , "JE", 16, false) // JNE not_zero_{n} - } - - /* 8-byte tests */ - if i <= e - 8 { - self.Emit("CMPQ", jit.Ptr(_SP_p, i), jit.Imm(0)) // CMPQ i(SP.p), $0 - incr_jump("JNE" , "JE", 8, false) // JNE not_zero_{n} - } - - /* 4 byte test */ - if i <= e - 4 { - self.Emit("CMPL", jit.Ptr(_SP_p, i), jit.Imm(0)) // CMPL i(SP.p), $0 - incr_jump("JNE" , "JE", 4, false) // JNE not_zero_{n} - } - - /* 2 byte test */ - if i <= e - 2 { - self.Emit("CMPW", jit.Ptr(_SP_p, i), jit.Imm(0)) // CMPW i(SP.p), $0 - incr_jump("JNE" , "JE", 2, false) // JNE not_zero_{n} - } - - /* the last byte */ - if i < e { - self.Emit("CMPB", jit.Ptr(_SP_p, i), jit.Imm(0)) // CMPB i(SP.p), $0 - incr_jump("JNE" , "JE", 1, false) // JNE not_zero_{n} - } - - /* VZEROUPPER to avoid AVX-SSE transition penalty */ - if e >= 32 { - self.Xjmp("JMP", p.vi()) - self.Link("not_zero_z_{n}") - self.Emit("VZEROUPPER") - } - - /* value is not zero */ - self.Link("not_zero_{n}") - self.NOP() + self.check_zero(p.vlen(), p.vi()) } func (self *_Assembler) _asm_OP_is_zero_safe(p *_Instr) { + self.check_zero(p.vlen(), p.vi()) // CHECKZ $p.vlen(), p.vi() self.Emit("MOVQ", jit.Type(p.vt()), _AX) // MOVQ $p.vt(), AX self.Emit("MOVQ", _SP_p, jit.Ptr(_SP, 0)) // MOVQ SP.p, (SP) self.Emit("MOVQ", _AX, jit.Ptr(_SP, 8)) // MOVQ AX, 8(SP) - self.call_go(_F_isZeroSafe) // CALL_GO isZeroSafe + self.call_go(_F_isZeroTyped) // CALL_GO isZeroTyped self.Emit("CMPQ", jit.Ptr(_SP, 16), jit.Imm(0)) // CMPQ 16(SP), $0 self.Xjmp("JNE" , p.vi()) // JNE p.vi() } diff --git a/encoder/primitives.go b/encoder/primitives.go index b164e95..b0b3af3 100644 --- a/encoder/primitives.go +++ b/encoder/primitives.go @@ -176,12 +176,8 @@ func encodeTextMarshaler(buf *[]byte, val encoding.TextMarshaler) error { } } -func isZeroMem(p unsafe.Pointer, nb int) bool { - return native.Lzero(p, nb) == 0 -} - func isZeroSafe(p unsafe.Pointer, vt *rt.GoType) bool { - if isZeroMem(p, vt.Size()) { + if native.Lzero(p, vt.Size()) == 0 { return true } else { return isZeroTyped(p, vt) diff --git a/go.mod b/go.mod index 7f177ef..d1c0696 100644 --- a/go.mod +++ b/go.mod @@ -3,9 +3,10 @@ module github.com/bytedance/sonic go 1.15 require ( - github.com/chenzhuoyu/base64x v0.0.0-20210528150155-e775e1ff0f56 + github.com/chenzhuoyu/base64x v0.0.0-20210528162528-3c6c11c43ee5 github.com/davecgh/go-spew v1.1.1 github.com/json-iterator/go v1.1.10 + github.com/klauspost/cpuid/v2 v2.0.6 github.com/stretchr/testify v1.7.0 github.com/tidwall/gjson v1.8.0 github.com/twitchyliquid64/golang-asm v0.15.1 diff --git a/go.sum b/go.sum index a1684a3..7a0b19e 100644 --- a/go.sum +++ b/go.sum @@ -1,11 +1,13 @@ -github.com/chenzhuoyu/base64x v0.0.0-20210528150155-e775e1ff0f56 h1:dwu5dS6rUvkgozhG4LYv4PPs8wu7T9L6D7A2Y1nwHwo= -github.com/chenzhuoyu/base64x v0.0.0-20210528150155-e775e1ff0f56/go.mod h1:NfDzX8KeqVNX62apij1OkqoeDdq1VR3g0TRZo99kkBA= +github.com/chenzhuoyu/base64x v0.0.0-20210528162528-3c6c11c43ee5 h1:7AStn2tanqGY99xzW+Ve1p6YYqnRr1m/yswJ4h0BhcY= +github.com/chenzhuoyu/base64x v0.0.0-20210528162528-3c6c11c43ee5/go.mod h1:NfDzX8KeqVNX62apij1OkqoeDdq1VR3g0TRZo99kkBA= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/json-iterator/go v1.1.10 h1:Kz6Cvnvv2wGdaG/V8yMvfkmNiXq9Ya2KUv4rouJJr68= github.com/json-iterator/go v1.1.10/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= +github.com/klauspost/cpuid/v2 v2.0.6 h1:dQ5ueTiftKxp0gyjKSx5+8BtPWkyQbd95m8Gys/RarI= +github.com/klauspost/cpuid/v2 v2.0.6/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421 h1:ZqeYNhU3OHLH3mGKHDcjJRFFRrJa6eAM5H+CtDdOsPc= github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742 h1:Esafd1046DLDQ0W1YjYsBW+p8U2u7vzgW2SQVmlNazg= diff --git a/internal/cpu/features.go b/internal/cpu/features.go new file mode 100644 index 0000000..504b0a7 --- /dev/null +++ b/internal/cpu/features.go @@ -0,0 +1,38 @@ +/* + * Copyright 2021 ByteDance Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package cpu + +import ( + `fmt` + `os` + + `github.com/klauspost/cpuid/v2` +) + +var ( + HasAVX = cpuid.CPU.Has(cpuid.AVX) + HasAVX2 = cpuid.CPU.Has(cpuid.AVX2) +) + +func init() { + switch v := os.Getenv("SONIC_MODE"); v { + case "" : break + case "auto" : break + case "noavx2" : HasAVX2 = false + default : panic(fmt.Sprintf("invalid mode: '%s', should be one of 'auto', 'noavx2'", v)) + } +} diff --git a/internal/jit/assembler_amd64.go b/internal/jit/assembler_amd64.go index 0ae0a09..0a409f0 100644 --- a/internal/jit/assembler_amd64.go +++ b/internal/jit/assembler_amd64.go @@ -182,7 +182,9 @@ func (self *BaseAssembler) assignOperands(p *obj.Prog, args []obj.Addr) { case 0 : case 1 : p.To = args[0] case 2 : p.To, p.From = args[1], args[0] - default : p.To, p.From, p.RestArgs = args[1], args[0], args[2:] + case 3 : p.To, p.From, p.RestArgs = args[2], args[0], args[1:2] + case 4 : p.To, p.From, p.RestArgs = args[2], args[3], args[:2] + default : panic("invalid operands") } } diff --git a/internal/jit/backend_test.go b/internal/jit/backend_test.go new file mode 100644 index 0000000..8ddcd2e --- /dev/null +++ b/internal/jit/backend_test.go @@ -0,0 +1,34 @@ +/* + * Copyright 2021 ByteDance Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package jit + +import ( + `testing` + + `github.com/davecgh/go-spew/spew` + `github.com/twitchyliquid64/golang-asm/obj` + `github.com/twitchyliquid64/golang-asm/obj/x86` +) + +func TestBackend(t *testing.T) { + e := newBackend("amd64") + p := e.New() + p.As = x86.AVPTEST + (*BaseAssembler)(nil).assignOperands(p, []obj.Addr{Reg("Y2"), Reg("Y1")}) + e.Append(p) + spew.Dump(e.Assemble()) +} diff --git a/internal/native/avx/fastfloat_amd64_test.go b/internal/native/avx/fastfloat_amd64_test.go new file mode 100644 index 0000000..d4345d9 --- /dev/null +++ b/internal/native/avx/fastfloat_amd64_test.go @@ -0,0 +1,66 @@ +// Code generated by Makefile, DO NOT EDIT. + +/* + * Copyright 2021 ByteDance Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package avx + +import ( + `math` + `strconv` + `testing` + + `github.com/stretchr/testify/assert` +) + +func TestFastFloat_Encode(t *testing.T) { + var buf [64]byte + assert.Equal(t, "0" , string(buf[:__f64toa(&buf[0], 0)])) + assert.Equal(t, "0" , string(buf[:__f64toa(&buf[0], math.Float64frombits(0x8000000000000000))])) + assert.Equal(t, "12340000000" , string(buf[:__f64toa(&buf[0], 1234e7)])) + assert.Equal(t, "12.34" , string(buf[:__f64toa(&buf[0], 1234e-2)])) + assert.Equal(t, "0.001234" , string(buf[:__f64toa(&buf[0], 1234e-6)])) + assert.Equal(t, "1e30" , string(buf[:__f64toa(&buf[0], 1e30)])) + assert.Equal(t, "1.234e33" , string(buf[:__f64toa(&buf[0], 1234e30)])) + assert.Equal(t, "1.234e308" , string(buf[:__f64toa(&buf[0], 1234e305)])) + assert.Equal(t, "1.234e-317" , string(buf[:__f64toa(&buf[0], 1234e-320)])) + assert.Equal(t, "1.7976931348623157e308" , string(buf[:__f64toa(&buf[0], 1.7976931348623157e308)])) + assert.Equal(t, "-12340000000" , string(buf[:__f64toa(&buf[0], -1234e7)])) + assert.Equal(t, "-12.34" , string(buf[:__f64toa(&buf[0], -1234e-2)])) + assert.Equal(t, "-0.001234" , string(buf[:__f64toa(&buf[0], -1234e-6)])) + assert.Equal(t, "-1e30" , string(buf[:__f64toa(&buf[0], -1e30)])) + assert.Equal(t, "-1.234e33" , string(buf[:__f64toa(&buf[0], -1234e30)])) + assert.Equal(t, "-1.234e308" , string(buf[:__f64toa(&buf[0], -1234e305)])) + assert.Equal(t, "-1.234e-317" , string(buf[:__f64toa(&buf[0], -1234e-320)])) + assert.Equal(t, "-2.2250738585072014e-308" , string(buf[:__f64toa(&buf[0], -2.2250738585072014e-308)])) +} + +func BenchmarkFastFloat_Encode(b *testing.B) { + val := -2.2250738585072014e-308 + benchmarks := []struct { + name string + test func(*testing.B) + }{{ + name: "StdLib", + test: func(b *testing.B) { var buf [64]byte; for i := 0; i < b.N; i++ { strconv.AppendFloat(buf[:], val, 'g', -1, 64) }}, + }, { + name: "FastFloat", + test: func(b *testing.B) { var buf [64]byte; for i := 0; i < b.N; i++ { __f64toa(&buf[0], val) }}, + }} + for _, bm := range benchmarks { + b.Run(bm.name, bm.test) + } +} diff --git a/internal/native/avx/fastint_amd64_test.go b/internal/native/avx/fastint_amd64_test.go new file mode 100644 index 0000000..6467dcf --- /dev/null +++ b/internal/native/avx/fastint_amd64_test.go @@ -0,0 +1,135 @@ +// Code generated by Makefile, DO NOT EDIT. + +/* + * Copyright 2021 ByteDance Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package avx + +import ( + `strconv` + `testing` + + `github.com/stretchr/testify/assert` +) + +func TestFastInt_IntToString(t *testing.T) { + var buf [32]byte + assert.Equal(t, "0" , string(buf[:__i64toa(&buf[0], 0)])) + assert.Equal(t, "1" , string(buf[:__i64toa(&buf[0], 1)])) + assert.Equal(t, "12" , string(buf[:__i64toa(&buf[0], 12)])) + assert.Equal(t, "123" , string(buf[:__i64toa(&buf[0], 123)])) + assert.Equal(t, "1234" , string(buf[:__i64toa(&buf[0], 1234)])) + assert.Equal(t, "12345" , string(buf[:__i64toa(&buf[0], 12345)])) + assert.Equal(t, "123456" , string(buf[:__i64toa(&buf[0], 123456)])) + assert.Equal(t, "1234567" , string(buf[:__i64toa(&buf[0], 1234567)])) + assert.Equal(t, "12345678" , string(buf[:__i64toa(&buf[0], 12345678)])) + assert.Equal(t, "123456789" , string(buf[:__i64toa(&buf[0], 123456789)])) + assert.Equal(t, "1234567890" , string(buf[:__i64toa(&buf[0], 1234567890)])) + assert.Equal(t, "12345678901" , string(buf[:__i64toa(&buf[0], 12345678901)])) + assert.Equal(t, "123456789012" , string(buf[:__i64toa(&buf[0], 123456789012)])) + assert.Equal(t, "1234567890123" , string(buf[:__i64toa(&buf[0], 1234567890123)])) + assert.Equal(t, "12345678901234" , string(buf[:__i64toa(&buf[0], 12345678901234)])) + assert.Equal(t, "123456789012345" , string(buf[:__i64toa(&buf[0], 123456789012345)])) + assert.Equal(t, "1234567890123456" , string(buf[:__i64toa(&buf[0], 1234567890123456)])) + assert.Equal(t, "12345678901234567" , string(buf[:__i64toa(&buf[0], 12345678901234567)])) + assert.Equal(t, "123456789012345678" , string(buf[:__i64toa(&buf[0], 123456789012345678)])) + assert.Equal(t, "1234567890123456789" , string(buf[:__i64toa(&buf[0], 1234567890123456789)])) + assert.Equal(t, "9223372036854775807" , string(buf[:__i64toa(&buf[0], 9223372036854775807)])) + assert.Equal(t, "-1" , string(buf[:__i64toa(&buf[0], -1)])) + assert.Equal(t, "-12" , string(buf[:__i64toa(&buf[0], -12)])) + assert.Equal(t, "-123" , string(buf[:__i64toa(&buf[0], -123)])) + assert.Equal(t, "-1234" , string(buf[:__i64toa(&buf[0], -1234)])) + assert.Equal(t, "-12345" , string(buf[:__i64toa(&buf[0], -12345)])) + assert.Equal(t, "-123456" , string(buf[:__i64toa(&buf[0], -123456)])) + assert.Equal(t, "-1234567" , string(buf[:__i64toa(&buf[0], -1234567)])) + assert.Equal(t, "-12345678" , string(buf[:__i64toa(&buf[0], -12345678)])) + assert.Equal(t, "-123456789" , string(buf[:__i64toa(&buf[0], -123456789)])) + assert.Equal(t, "-1234567890" , string(buf[:__i64toa(&buf[0], -1234567890)])) + assert.Equal(t, "-12345678901" , string(buf[:__i64toa(&buf[0], -12345678901)])) + assert.Equal(t, "-123456789012" , string(buf[:__i64toa(&buf[0], -123456789012)])) + assert.Equal(t, "-1234567890123" , string(buf[:__i64toa(&buf[0], -1234567890123)])) + assert.Equal(t, "-12345678901234" , string(buf[:__i64toa(&buf[0], -12345678901234)])) + assert.Equal(t, "-123456789012345" , string(buf[:__i64toa(&buf[0], -123456789012345)])) + assert.Equal(t, "-1234567890123456" , string(buf[:__i64toa(&buf[0], -1234567890123456)])) + assert.Equal(t, "-12345678901234567" , string(buf[:__i64toa(&buf[0], -12345678901234567)])) + assert.Equal(t, "-123456789012345678" , string(buf[:__i64toa(&buf[0], -123456789012345678)])) + assert.Equal(t, "-1234567890123456789" , string(buf[:__i64toa(&buf[0], -1234567890123456789)])) + assert.Equal(t, "-9223372036854775808" , string(buf[:__i64toa(&buf[0], -9223372036854775808)])) +} + +func TestFastInt_UintToString(t *testing.T) { + var buf [32]byte + assert.Equal(t, "0" , string(buf[:__u64toa(&buf[0], 0)])) + assert.Equal(t, "1" , string(buf[:__u64toa(&buf[0], 1)])) + assert.Equal(t, "12" , string(buf[:__u64toa(&buf[0], 12)])) + assert.Equal(t, "123" , string(buf[:__u64toa(&buf[0], 123)])) + assert.Equal(t, "1234" , string(buf[:__u64toa(&buf[0], 1234)])) + assert.Equal(t, "12345" , string(buf[:__u64toa(&buf[0], 12345)])) + assert.Equal(t, "123456" , string(buf[:__u64toa(&buf[0], 123456)])) + assert.Equal(t, "1234567" , string(buf[:__u64toa(&buf[0], 1234567)])) + assert.Equal(t, "12345678" , string(buf[:__u64toa(&buf[0], 12345678)])) + assert.Equal(t, "123456789" , string(buf[:__u64toa(&buf[0], 123456789)])) + assert.Equal(t, "1234567890" , string(buf[:__u64toa(&buf[0], 1234567890)])) + assert.Equal(t, "12345678901" , string(buf[:__u64toa(&buf[0], 12345678901)])) + assert.Equal(t, "123456789012" , string(buf[:__u64toa(&buf[0], 123456789012)])) + assert.Equal(t, "1234567890123" , string(buf[:__u64toa(&buf[0], 1234567890123)])) + assert.Equal(t, "12345678901234" , string(buf[:__u64toa(&buf[0], 12345678901234)])) + assert.Equal(t, "123456789012345" , string(buf[:__u64toa(&buf[0], 123456789012345)])) + assert.Equal(t, "1234567890123456" , string(buf[:__u64toa(&buf[0], 1234567890123456)])) + assert.Equal(t, "12345678901234567" , string(buf[:__u64toa(&buf[0], 12345678901234567)])) + assert.Equal(t, "123456789012345678" , string(buf[:__u64toa(&buf[0], 123456789012345678)])) + assert.Equal(t, "1234567890123456789" , string(buf[:__u64toa(&buf[0], 1234567890123456789)])) + assert.Equal(t, "12345678901234567890" , string(buf[:__u64toa(&buf[0], 12345678901234567890)])) + assert.Equal(t, "18446744073709551615" , string(buf[:__u64toa(&buf[0], 18446744073709551615)])) +} + +func BenchmarkFastInt_IntToString(b *testing.B) { + benchmarks := []struct { + name string + test func(*testing.B) + }{{ + name: "StdLib-Positive", + test: func(b *testing.B) { var buf [32]byte; for i := 0; i < b.N; i++ { strconv.AppendInt(buf[:], int64(i), 10) }}, + }, { + name: "StdLib-Negative", + test: func(b *testing.B) { var buf [32]byte; for i := 0; i < b.N; i++ { strconv.AppendInt(buf[:], -int64(i), 10) }}, + }, { + name: "FastInt-Positive", + test: func(b *testing.B) { var buf [32]byte; for i := 0; i < b.N; i++ { __i64toa(&buf[0], int64(i)) }}, + }, { + name: "FastInt-Negative", + test: func(b *testing.B) { var buf [32]byte; for i := 0; i < b.N; i++ { __i64toa(&buf[0], -int64(i)) }}, + }} + for _, bm := range benchmarks { + b.Run(bm.name, bm.test) + } +} + +func BenchmarkFastInt_UintToString(b *testing.B) { + benchmarks := []struct { + name string + test func(*testing.B) + }{{ + name: "StdLib", + test: func(b *testing.B) { var buf [32]byte; for i := 0; i < b.N; i++ { strconv.AppendUint(buf[:], uint64(i), 10) }}, + }, { + name: "FastInt", + test: func(b *testing.B) { var buf [32]byte; for i := 0; i < b.N; i++ { __u64toa(&buf[0], uint64(i)) }}, + }} + for _, bm := range benchmarks { + b.Run(bm.name, bm.test) + } +} diff --git a/internal/native/avx/native_amd64.go b/internal/native/avx/native_amd64.go new file mode 100644 index 0000000..b8c4d35 --- /dev/null +++ b/internal/native/avx/native_amd64.go @@ -0,0 +1,100 @@ +// Code generated by Makefile, DO NOT EDIT. + +/* + * Copyright 2021 ByteDance Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package avx + +import ( + `unsafe` + + `github.com/bytedance/sonic/internal/native/types` +) + +//go:nosplit +//go:noescape +//goland:noinspection GoUnusedParameter +func __i64toa(out *byte, val int64) (ret int) + +//go:nosplit +//go:noescape +//goland:noinspection GoUnusedParameter +func __u64toa(out *byte, val uint64) (ret int) + +//go:nosplit +//go:noescape +//goland:noinspection GoUnusedParameter +func __f64toa(out *byte, val float64) (ret int) + +//go:nosplit +//go:noescape +//goland:noinspection GoUnusedParameter +func __lzero(p unsafe.Pointer, n int) (ret int) + +//go:nosplit +//go:noescape +//goland:noinspection GoUnusedParameter +func __lquote(buf *string, off int) (ret int) + +//go:nosplit +//go:noescape +//goland:noinspection GoUnusedParameter +func __lspace(sp unsafe.Pointer, nb int, off int) (ret int) + +//go:nosplit +//go:noescape +//goland:noinspection GoUnusedParameter +func __value(s unsafe.Pointer, n int, p int, v *types.JsonState) (ret int) + +//go:nosplit +//go:noescape +//goland:noinspection GoUnusedParameter +func __vstring(s *string, p *int, v *types.JsonState) + +//go:nosplit +//go:noescape +//goland:noinspection GoUnusedParameter +func __vnumber(s *string, p *int, v *types.JsonState) + +//go:nosplit +//go:noescape +//goland:noinspection GoUnusedParameter +func __vsigned(s *string, p *int, v *types.JsonState) + +//go:nosplit +//go:noescape +//goland:noinspection GoUnusedParameter +func __vunsigned(s *string, p *int, v *types.JsonState) + +//go:nosplit +//go:noescape +//goland:noinspection GoUnusedParameter +func __skip_one(s *string, p *int, m *types.StateMachine) (ret int) + +//go:nosplit +//go:noescape +//goland:noinspection GoUnusedParameter +func __skip_array(s *string, p *int, m *types.StateMachine) (ret int) + +//go:nosplit +//go:noescape +//goland:noinspection GoUnusedParameter +func __skip_object(s *string, p *int, m *types.StateMachine) (ret int) + +//go:nosplit +//go:noescape +//goland:noinspection GoUnusedParameter +func __unquote(s unsafe.Pointer, nb int, dp unsafe.Pointer, ep *int, flags uint64) (ret int) diff --git a/internal/native/avx/native_amd64.s b/internal/native/avx/native_amd64.s new file mode 100644 index 0000000..6706b85 --- /dev/null +++ b/internal/native/avx/native_amd64.s @@ -0,0 +1,6246 @@ +// +build !noasm !appengine +// Code generated by asm2asm, DO NOT EDIT. + +#include "go_asm.h" +#include "textflag.h" + +TEXT ·___asm2asm_compiled_code__DO_NOT_CALL_THIS_SYMBOL___(SB), NOSPLIT, $0 +_lzero: + BYTE $0x55 // pushq %rbp + WORD $0x8948; BYTE $0xe5 // movq %rsp, %rbp + CMPQ SI, $128 + JB LBB0_4 + LONG $0xc057f8c5 // vxorps %xmm0, %xmm0, %xmm0 + LONG $0xc8c2fcc5; BYTE $0x0f // vcmptrueps %ymm0, %ymm0, %ymm1 + +LBB0_2: + LONG $0x17c2fcc5; BYTE $0x00 // vcmpeqps (%rdi), %ymm0, %ymm2 + LONG $0x5fc2fcc5; WORD $0x0020 // vcmpeqps $32(%rdi), %ymm0, %ymm3 + LONG $0xd254e4c5 // vandps %ymm2, %ymm3, %ymm2 + LONG $0x5fc2fcc5; WORD $0x0040 // vcmpeqps $64(%rdi), %ymm0, %ymm3 + LONG $0x67c2fcc5; WORD $0x0060 // vcmpeqps $96(%rdi), %ymm0, %ymm4 + LONG $0xdc54e4c5 // vandps %ymm4, %ymm3, %ymm3 + LONG $0xd354ecc5 // vandps %ymm3, %ymm2, %ymm2 + LONG $0x177de2c4; BYTE $0xd1 // vptest %ymm1, %ymm2 + JAE LBB0_14 + SUBQ $-128, DI + ADDQ $-128, SI + CMPQ SI, $127 + JA LBB0_2 + +LBB0_4: + CMPQ SI, $32 + JB LBB0_8 + LONG $0xc057f8c5 // vxorps %xmm0, %xmm0, %xmm0 + LONG $0xc8c2fcc5; BYTE $0x0f // vcmptrueps %ymm0, %ymm0, %ymm1 + +LBB0_6: + LONG $0x17c2fcc5; BYTE $0x00 // vcmpeqps (%rdi), %ymm0, %ymm2 + LONG $0x177de2c4; BYTE $0xd1 // vptest %ymm1, %ymm2 + JAE LBB0_14 + ADDQ $32, DI + ADDQ $-32, SI + CMPQ SI, $31 + JA LBB0_6 + +LBB0_8: + CMPQ SI, $8 + JB LBB0_11 + +LBB0_9: + CMPQ 0(DI), $0 + JNE LBB0_14 + ADDQ $8, DI + ADDQ $-8, SI + CMPQ SI, $7 + JA LBB0_9 + +LBB0_11: + CMPQ SI, $4 + JB LBB0_18 + MOVL $1, AX + CMPL 0(DI), $0 + JNE LBB0_13 + ADDQ $4, DI + ADDQ $-4, SI + +LBB0_18: + CMPQ SI, $2 + JB LBB0_21 + MOVL $1, AX + CMPW 0(DI), $0 + JNE LBB0_13 + ADDQ $2, DI + ADDQ $-2, SI + +LBB0_21: + XORL AX, AX + TESTQ SI, SI + JE LBB0_13 + CMPB 0(DI), $0 + SETNE AX + BYTE $0x5d // popq %rbp + WORD $0xf8c5; BYTE $0x77 // vzeroupper + RET + +LBB0_14: + MOVL $1, AX + BYTE $0x5d // popq %rbp + WORD $0xf8c5; BYTE $0x77 // vzeroupper + RET + +LBB0_13: + BYTE $0x5d // popq %rbp + WORD $0xf8c5; BYTE $0x77 // vzeroupper + RET + +LCPI1_0: + QUAD $0x2020202020202020; QUAD $0x2020202020202020 // .space 16, ' ' + +LCPI1_1: + QUAD $0x2222222222222222; QUAD $0x2222222222222222 // .space 16, '""""""""""""""""' + +LCPI1_2: + QUAD $0x5c5c5c5c5c5c5c5c; QUAD $0x5c5c5c5c5c5c5c5c // .space 16, '\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' + +LCPI1_3: + BYTE $0xff // .byte 255 + BYTE $0xff // .byte 255 + BYTE $0xff // .byte 255 + BYTE $0xff // .byte 255 + BYTE $0xff // .byte 255 + BYTE $0xff // .byte 255 + BYTE $0xff // .byte 255 + BYTE $0xff // .byte 255 + BYTE $0xff // .byte 255 + BYTE $0xff // .byte 255 + BYTE $0xff // .byte 255 + BYTE $0xff // .byte 255 + BYTE $0xff // .byte 255 + BYTE $0xff // .byte 255 + BYTE $0x00 // .space 1, '\x00' + BYTE $0xff // .byte 255 + +_lquote: + BYTE $0x55 // pushq %rbp + WORD $0x8948; BYTE $0xe5 // movq %rsp, %rbp + MOVQ 8(DI), R8 + MOVQ R8, R10 + SUBQ SI, R10 + JE LBB1_1 + MOVQ 0(DI), R11 + LEAQ 0(R11)(SI*1), CX + CMPQ R10, $16 + JAE LBB1_3 + LONG $0x763941c4; BYTE $0xc0 // vpcmpeqd %xmm8, %xmm8, %xmm8 + DECQ R10 + LONG $0xc076f9c5 // vpcmpeqd %xmm0, %xmm0, %xmm0 + CMPQ R10, $14 + JA LBB1_24 + LONG $0xb4058d48; WORD $0x0001; BYTE $0x00 // leaq $436(%rip), %rax /* LJTI1_0(%rip) */ + MOVLQSX 0(AX)(R10*4), DX + ADDQ AX, DX + LONG $0x763141c4; BYTE $0xc9 // vpcmpeqd %xmm9, %xmm9, %xmm9 + LONG $0x762941c4; BYTE $0xd2 // vpcmpeqd %xmm10, %xmm10, %xmm10 + LONG $0x762141c4; BYTE $0xdb // vpcmpeqd %xmm11, %xmm11, %xmm11 + LONG $0x761941c4; BYTE $0xe4 // vpcmpeqd %xmm12, %xmm12, %xmm12 + LONG $0x761141c4; BYTE $0xed // vpcmpeqd %xmm13, %xmm13, %xmm13 + LONG $0x760941c4; BYTE $0xf6 // vpcmpeqd %xmm14, %xmm14, %xmm14 + LONG $0xff76c1c5 // vpcmpeqd %xmm7, %xmm7, %xmm7 + LONG $0xc076f9c5 // vpcmpeqd %xmm0, %xmm0, %xmm0 + LONG $0xc976f1c5 // vpcmpeqd %xmm1, %xmm1, %xmm1 + LONG $0xd276e9c5 // vpcmpeqd %xmm2, %xmm2, %xmm2 + LONG $0xdb76e1c5 // vpcmpeqd %xmm3, %xmm3, %xmm3 + LONG $0xe476d9c5 // vpcmpeqd %xmm4, %xmm4, %xmm4 + LONG $0xed76d1c5 // vpcmpeqd %xmm5, %xmm5, %xmm5 + LONG $0xf676c9c5 // vpcmpeqd %xmm6, %xmm6, %xmm6 + JMP DX + +LBB1_9: + QUAD $0xffffff5f056ffac5 // vmovdqu $-161(%rip), %xmm0 /* LCPI1_3(%rip) */ + LONG $0x207963c4; WORD $0x0e49; BYTE $0x0e // vpinsrb $14, $14(%rcx), %xmm0, %xmm9 + +LBB1_10: + LONG $0x203163c4; WORD $0x0d51; BYTE $0x0d // vpinsrb $13, $13(%rcx), %xmm9, %xmm10 + +LBB1_11: + LONG $0x202963c4; WORD $0x0c59; BYTE $0x0c // vpinsrb $12, $12(%rcx), %xmm10, %xmm11 + +LBB1_12: + LONG $0x202163c4; WORD $0x0b61; BYTE $0x0b // vpinsrb $11, $11(%rcx), %xmm11, %xmm12 + +LBB1_13: + LONG $0x201963c4; WORD $0x0a69; BYTE $0x0a // vpinsrb $10, $10(%rcx), %xmm12, %xmm13 + +LBB1_14: + LONG $0x201163c4; WORD $0x0971; BYTE $0x09 // vpinsrb $9, $9(%rcx), %xmm13, %xmm14 + +LBB1_15: + LONG $0x2009e3c4; WORD $0x0879; BYTE $0x08 // vpinsrb $8, $8(%rcx), %xmm14, %xmm7 + +LBB1_16: + LONG $0x2041e3c4; WORD $0x0741; BYTE $0x07 // vpinsrb $7, $7(%rcx), %xmm7, %xmm0 + +LBB1_17: + LONG $0x2079e3c4; WORD $0x0649; BYTE $0x06 // vpinsrb $6, $6(%rcx), %xmm0, %xmm1 + +LBB1_18: + LONG $0x2071e3c4; WORD $0x0551; BYTE $0x05 // vpinsrb $5, $5(%rcx), %xmm1, %xmm2 + +LBB1_19: + LONG $0x2069e3c4; WORD $0x0459; BYTE $0x04 // vpinsrb $4, $4(%rcx), %xmm2, %xmm3 + +LBB1_20: + LONG $0x2061e3c4; WORD $0x0361; BYTE $0x03 // vpinsrb $3, $3(%rcx), %xmm3, %xmm4 + +LBB1_21: + LONG $0x2059e3c4; WORD $0x0269; BYTE $0x02 // vpinsrb $2, $2(%rcx), %xmm4, %xmm5 + +LBB1_22: + LONG $0x2051e3c4; WORD $0x0171; BYTE $0x01 // vpinsrb $1, $1(%rcx), %xmm5, %xmm6 + +LBB1_23: + LONG $0x2049e3c4; WORD $0x0001 // vpinsrb $0, (%rcx), %xmm6, %xmm0 + +LBB1_24: + QUAD $0xfffffebf0d6ffac5 // vmovdqu $-321(%rip), %xmm1 /* LCPI1_0(%rip) */ + QUAD $0xfffffec71574f9c5 // vpcmpeqb $-313(%rip), %xmm0, %xmm2 /* LCPI1_1(%rip) */ + QUAD $0xfffffecf1d74f9c5 // vpcmpeqb $-305(%rip), %xmm0, %xmm3 /* LCPI1_2(%rip) */ + LONG $0xc864f1c5 // vpcmpgtb %xmm0, %xmm1, %xmm1 + LONG $0xd2ebe1c5 // vpor %xmm2, %xmm3, %xmm2 + LONG $0x6479c1c4; BYTE $0xc0 // vpcmpgtb %xmm8, %xmm0, %xmm0 + LONG $0xc0dbf1c5 // vpand %xmm0, %xmm1, %xmm0 + LONG $0xc0ebe9c5 // vpor %xmm0, %xmm2, %xmm0 + LONG $0xc0d7f9c5 // vpmovmskb %xmm0, %eax + ORL $-65536, AX + BSFL AX, AX + +LBB1_25: + ADDQ SI, AX + BYTE $0x5d // popq %rbp + RET + +LBB1_1: + XORL AX, AX + ADDQ SI, AX + BYTE $0x5d // popq %rbp + RET + +LBB1_3: + LEAQ -16(R10), R9 + ANDQ $-16, R9 + XORL AX, AX + QUAD $0xfffffe70056ffac5 // vmovdqu $-400(%rip), %xmm0 /* LCPI1_0(%rip) */ + QUAD $0xfffffe780d6ffac5 // vmovdqu $-392(%rip), %xmm1 /* LCPI1_1(%rip) */ + QUAD $0xfffffe80156ffac5 // vmovdqu $-384(%rip), %xmm2 /* LCPI1_2(%rip) */ + LONG $0xdb76e1c5 // vpcmpeqd %xmm3, %xmm3, %xmm3 + MOVQ R10, DX + +LBB1_4: + LONG $0x216ffac5 // vmovdqu (%rcx), %xmm4 + LONG $0xec64f9c5 // vpcmpgtb %xmm4, %xmm0, %xmm5 + LONG $0xf174d9c5 // vpcmpeqb %xmm1, %xmm4, %xmm6 + LONG $0xfa74d9c5 // vpcmpeqb %xmm2, %xmm4, %xmm7 + LONG $0xf6ebc1c5 // vpor %xmm6, %xmm7, %xmm6 + LONG $0xe364d9c5 // vpcmpgtb %xmm3, %xmm4, %xmm4 + LONG $0xe4dbd1c5 // vpand %xmm4, %xmm5, %xmm4 + LONG $0xe4ebc9c5 // vpor %xmm4, %xmm6, %xmm4 + LONG $0xfcd7f9c5 // vpmovmskb %xmm4, %edi + ORL $-65536, DI + BSFL DI, DI + ADDQ DI, AX + CMPL DI, $16 + JB LBB1_25 + ADDQ $16, CX + ADDQ $-16, DX + CMPQ DX, $15 + JA LBB1_4 + LONG $0x6f7a81c4; WORD $0x034c; BYTE $0xf0 // vmovdqu $-16(%r11,%r8), %xmm1 + QUAD $0xfffffe171574f1c5 // vpcmpeqb $-489(%rip), %xmm1, %xmm2 /* LCPI1_1(%rip) */ + QUAD $0xfffffe1f1d74f1c5 // vpcmpeqb $-481(%rip), %xmm1, %xmm3 /* LCPI1_2(%rip) */ + LONG $0xc164f9c5 // vpcmpgtb %xmm1, %xmm0, %xmm0 + LONG $0xd2ebe1c5 // vpor %xmm2, %xmm3, %xmm2 + LONG $0xdb76e1c5 // vpcmpeqd %xmm3, %xmm3, %xmm3 + LONG $0xcb64f1c5 // vpcmpgtb %xmm3, %xmm1, %xmm1 + LONG $0xc1dbf9c5 // vpand %xmm1, %xmm0, %xmm0 + LONG $0xc0ebe9c5 // vpor %xmm0, %xmm2, %xmm0 + LONG $0xc8d7f9c5 // vpmovmskb %xmm0, %ecx + ORL $-65536, CX + BSFL CX, CX + SUBQ R9, R10 + ADDQ AX, R10 + LEAQ -32(CX)(R10*1), AX + ADDQ SI, AX + BYTE $0x5d // popq %rbp + RET + +// .set L1_0_set_23, LBB1_23-LJTI1_0 +// .set L1_0_set_22, LBB1_22-LJTI1_0 +// .set L1_0_set_21, LBB1_21-LJTI1_0 +// .set L1_0_set_20, LBB1_20-LJTI1_0 +// .set L1_0_set_19, LBB1_19-LJTI1_0 +// .set L1_0_set_18, LBB1_18-LJTI1_0 +// .set L1_0_set_17, LBB1_17-LJTI1_0 +// .set L1_0_set_16, LBB1_16-LJTI1_0 +// .set L1_0_set_15, LBB1_15-LJTI1_0 +// .set L1_0_set_14, LBB1_14-LJTI1_0 +// .set L1_0_set_13, LBB1_13-LJTI1_0 +// .set L1_0_set_12, LBB1_12-LJTI1_0 +// .set L1_0_set_11, LBB1_11-LJTI1_0 +// .set L1_0_set_10, LBB1_10-LJTI1_0 +// .set L1_0_set_9, LBB1_9-LJTI1_0 +LJTI1_0: + LONG $0xfffffefd // .long L1_0_set_23 + LONG $0xfffffef6 // .long L1_0_set_22 + LONG $0xfffffeef // .long L1_0_set_21 + LONG $0xfffffee8 // .long L1_0_set_20 + LONG $0xfffffee1 // .long L1_0_set_19 + LONG $0xfffffeda // .long L1_0_set_18 + LONG $0xfffffed3 // .long L1_0_set_17 + LONG $0xfffffecc // .long L1_0_set_16 + LONG $0xfffffec5 // .long L1_0_set_15 + LONG $0xfffffebe // .long L1_0_set_14 + LONG $0xfffffeb7 // .long L1_0_set_13 + LONG $0xfffffeb0 // .long L1_0_set_12 + LONG $0xfffffea9 // .long L1_0_set_11 + LONG $0xfffffea2 // .long L1_0_set_10 + LONG $0xfffffe93 // .long L1_0_set_9 + +LCPI2_0: + QUAD $0x2020202020202020; QUAD $0x2020202020202020 // .space 16, ' ' + +LCPI2_1: + QUAD $0x0909090909090909; QUAD $0x0909090909090909 // .space 16, '\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t' + +LCPI2_2: + QUAD $0x0a0a0a0a0a0a0a0a; QUAD $0x0a0a0a0a0a0a0a0a // .space 16, '\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n' + +LCPI2_3: + QUAD $0x0d0d0d0d0d0d0d0d; QUAD $0x0d0d0d0d0d0d0d0d // .space 16, '\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r' + +LCPI2_4: + BYTE $0xff // .byte 255 + BYTE $0xff // .byte 255 + BYTE $0xff // .byte 255 + BYTE $0xff // .byte 255 + BYTE $0xff // .byte 255 + BYTE $0xff // .byte 255 + BYTE $0xff // .byte 255 + BYTE $0xff // .byte 255 + BYTE $0xff // .byte 255 + BYTE $0xff // .byte 255 + BYTE $0xff // .byte 255 + BYTE $0xff // .byte 255 + BYTE $0xff // .byte 255 + BYTE $0xff // .byte 255 + BYTE $0x00 // .space 1, '\x00' + BYTE $0xff // .byte 255 + +_lspace: + BYTE $0x55 // pushq %rbp + WORD $0x8948; BYTE $0xe5 // movq %rsp, %rbp + MOVQ SI, R8 + SUBQ DX, R8 + JE LBB2_1 + LEAQ 0(DI)(DX*1), R10 + CMPQ R8, $16 + JAE LBB2_3 + LONG $0xc076f9c5 // vpcmpeqd %xmm0, %xmm0, %xmm0 + DECQ R8 + CMPQ R8, $14 + JA LBB2_24 + LONG $0x72058d48; WORD $0x0001; BYTE $0x00 // leaq $370(%rip), %rax /* LJTI2_0(%rip) */ + MOVLQSX 0(AX)(R8*4), CX + ADDQ AX, CX + JMP CX + +LBB2_9: + QUAD $0xffffffa9056ffac5 // vmovdqu $-87(%rip), %xmm0 /* LCPI2_4(%rip) */ + LONG $0x2079c3c4; WORD $0x0e42; BYTE $0x0e // vpinsrb $14, $14(%r10), %xmm0, %xmm0 + +LBB2_10: + LONG $0x2079c3c4; WORD $0x0d42; BYTE $0x0d // vpinsrb $13, $13(%r10), %xmm0, %xmm0 + +LBB2_11: + LONG $0x2079c3c4; WORD $0x0c42; BYTE $0x0c // vpinsrb $12, $12(%r10), %xmm0, %xmm0 + +LBB2_12: + LONG $0x2079c3c4; WORD $0x0b42; BYTE $0x0b // vpinsrb $11, $11(%r10), %xmm0, %xmm0 + +LBB2_13: + LONG $0x2079c3c4; WORD $0x0a42; BYTE $0x0a // vpinsrb $10, $10(%r10), %xmm0, %xmm0 + +LBB2_14: + LONG $0x2079c3c4; WORD $0x0942; BYTE $0x09 // vpinsrb $9, $9(%r10), %xmm0, %xmm0 + +LBB2_15: + LONG $0x2079c3c4; WORD $0x0842; BYTE $0x08 // vpinsrb $8, $8(%r10), %xmm0, %xmm0 + +LBB2_16: + LONG $0x2079c3c4; WORD $0x0742; BYTE $0x07 // vpinsrb $7, $7(%r10), %xmm0, %xmm0 + +LBB2_17: + LONG $0x2079c3c4; WORD $0x0642; BYTE $0x06 // vpinsrb $6, $6(%r10), %xmm0, %xmm0 + +LBB2_18: + LONG $0x2079c3c4; WORD $0x0542; BYTE $0x05 // vpinsrb $5, $5(%r10), %xmm0, %xmm0 + +LBB2_19: + LONG $0x2079c3c4; WORD $0x0442; BYTE $0x04 // vpinsrb $4, $4(%r10), %xmm0, %xmm0 + +LBB2_20: + LONG $0x2079c3c4; WORD $0x0342; BYTE $0x03 // vpinsrb $3, $3(%r10), %xmm0, %xmm0 + +LBB2_21: + LONG $0x2079c3c4; WORD $0x0242; BYTE $0x02 // vpinsrb $2, $2(%r10), %xmm0, %xmm0 + +LBB2_22: + LONG $0x2079c3c4; WORD $0x0142; BYTE $0x01 // vpinsrb $1, $1(%r10), %xmm0, %xmm0 + +LBB2_23: + LONG $0x2079c3c4; WORD $0x0002 // vpinsrb $0, (%r10), %xmm0, %xmm0 + +LBB2_24: + QUAD $0xfffffef90d74f9c5 // vpcmpeqb $-263(%rip), %xmm0, %xmm1 /* LCPI2_0(%rip) */ + QUAD $0xffffff011574f9c5 // vpcmpeqb $-255(%rip), %xmm0, %xmm2 /* LCPI2_1(%rip) */ + QUAD $0xffffff091d74f9c5 // vpcmpeqb $-247(%rip), %xmm0, %xmm3 /* LCPI2_2(%rip) */ + LONG $0xcaebf1c5 // vpor %xmm2, %xmm1, %xmm1 + QUAD $0xffffff0d0574f9c5 // vpcmpeqb $-243(%rip), %xmm0, %xmm0 /* LCPI2_3(%rip) */ + LONG $0xc3ebf9c5 // vpor %xmm3, %xmm0, %xmm0 + LONG $0xc1ebf9c5 // vpor %xmm1, %xmm0, %xmm0 + LONG $0xc0d7f9c5 // vpmovmskb %xmm0, %eax + NOTL AX + BSFL AX, AX + +LBB2_25: + ADDQ DX, AX + BYTE $0x5d // popq %rbp + RET + +LBB2_1: + XORL AX, AX + ADDQ DX, AX + BYTE $0x5d // popq %rbp + RET + +LBB2_3: + LEAQ -16(R8), R9 + ANDQ $-16, R9 + XORL AX, AX + QUAD $0xfffffeae056ffac5 // vmovdqu $-338(%rip), %xmm0 /* LCPI2_0(%rip) */ + QUAD $0xfffffeb60d6ffac5 // vmovdqu $-330(%rip), %xmm1 /* LCPI2_1(%rip) */ + QUAD $0xfffffebe156ffac5 // vmovdqu $-322(%rip), %xmm2 /* LCPI2_2(%rip) */ + QUAD $0xfffffec61d6ffac5 // vmovdqu $-314(%rip), %xmm3 /* LCPI2_3(%rip) */ + MOVQ R8, R11 + +LBB2_4: + LONG $0x6f7ac1c4; BYTE $0x22 // vmovdqu (%r10), %xmm4 + LONG $0xe874d9c5 // vpcmpeqb %xmm0, %xmm4, %xmm5 + LONG $0xf174d9c5 // vpcmpeqb %xmm1, %xmm4, %xmm6 + LONG $0xeeebd1c5 // vpor %xmm6, %xmm5, %xmm5 + LONG $0xf274d9c5 // vpcmpeqb %xmm2, %xmm4, %xmm6 + LONG $0xe374d9c5 // vpcmpeqb %xmm3, %xmm4, %xmm4 + LONG $0xe6ebd9c5 // vpor %xmm6, %xmm4, %xmm4 + LONG $0xe5ebd9c5 // vpor %xmm5, %xmm4, %xmm4 + LONG $0xccd7f9c5 // vpmovmskb %xmm4, %ecx + NOTL CX + BSFL CX, CX + ADDQ CX, AX + CMPL CX, $16 + JB LBB2_25 + ADDQ $16, R10 + ADDQ $-16, R11 + CMPQ R11, $15 + JA LBB2_4 + LONG $0x446ffac5; WORD $0xf037 // vmovdqu $-16(%rdi,%rsi), %xmm0 + QUAD $0xfffffe450d74f9c5 // vpcmpeqb $-443(%rip), %xmm0, %xmm1 /* LCPI2_0(%rip) */ + QUAD $0xfffffe4d1574f9c5 // vpcmpeqb $-435(%rip), %xmm0, %xmm2 /* LCPI2_1(%rip) */ + LONG $0xcaebf1c5 // vpor %xmm2, %xmm1, %xmm1 + QUAD $0xfffffe511574f9c5 // vpcmpeqb $-431(%rip), %xmm0, %xmm2 /* LCPI2_2(%rip) */ + QUAD $0xfffffe590574f9c5 // vpcmpeqb $-423(%rip), %xmm0, %xmm0 /* LCPI2_3(%rip) */ + LONG $0xc2ebf9c5 // vpor %xmm2, %xmm0, %xmm0 + LONG $0xc1ebf9c5 // vpor %xmm1, %xmm0, %xmm0 + LONG $0xc8d7f9c5 // vpmovmskb %xmm0, %ecx + NOTL CX + BSFL CX, CX + SUBQ R9, R8 + ADDQ AX, R8 + LEAQ -32(CX)(R8*1), AX + ADDQ DX, AX + BYTE $0x5d // popq %rbp + RET + +// .set L2_0_set_23, LBB2_23-LJTI2_0 +// .set L2_0_set_22, LBB2_22-LJTI2_0 +// .set L2_0_set_21, LBB2_21-LJTI2_0 +// .set L2_0_set_20, LBB2_20-LJTI2_0 +// .set L2_0_set_19, LBB2_19-LJTI2_0 +// .set L2_0_set_18, LBB2_18-LJTI2_0 +// .set L2_0_set_17, LBB2_17-LJTI2_0 +// .set L2_0_set_16, LBB2_16-LJTI2_0 +// .set L2_0_set_15, LBB2_15-LJTI2_0 +// .set L2_0_set_14, LBB2_14-LJTI2_0 +// .set L2_0_set_13, LBB2_13-LJTI2_0 +// .set L2_0_set_12, LBB2_12-LJTI2_0 +// .set L2_0_set_11, LBB2_11-LJTI2_0 +// .set L2_0_set_10, LBB2_10-LJTI2_0 +// .set L2_0_set_9, LBB2_9-LJTI2_0 +LJTI2_0: + LONG $0xffffff01 // .long L2_0_set_23 + LONG $0xfffffefa // .long L2_0_set_22 + LONG $0xfffffef3 // .long L2_0_set_21 + LONG $0xfffffeec // .long L2_0_set_20 + LONG $0xfffffee5 // .long L2_0_set_19 + LONG $0xfffffede // .long L2_0_set_18 + LONG $0xfffffed7 // .long L2_0_set_17 + LONG $0xfffffed0 // .long L2_0_set_16 + LONG $0xfffffec9 // .long L2_0_set_15 + LONG $0xfffffec2 // .long L2_0_set_14 + LONG $0xfffffebb // .long L2_0_set_13 + LONG $0xfffffeb4 // .long L2_0_set_12 + LONG $0xfffffead // .long L2_0_set_11 + LONG $0xfffffea6 // .long L2_0_set_10 + LONG $0xfffffe97 // .long L2_0_set_9 + +_strchr1: + BYTE $0x55 // pushq %rbp + WORD $0x8948; BYTE $0xe5 // movq %rsp, %rbp + MOVQ 8(DI), R10 + SUBQ SI, R10 + MOVQ 0(DI), R11 + ADDQ SI, R11 + LONG $0xc26ef9c5 // vmovd %edx, %xmm0 + LONG $0xc9eff1c5 // vpxor %xmm1, %xmm1, %xmm1 + LONG $0x0079e2c4; BYTE $0xc1 // vpshufb %xmm1, %xmm0, %xmm0 + MOVQ R11, CX + MOVQ R10, AX + MOVQ R11, DX + ANDQ $15, CX + JE LBB3_5 + MOVQ R11, AX + ANDQ $-16, AX + LONG $0x0874f9c5 // vpcmpeqb (%rax), %xmm0, %xmm1 + LONG $0xc1d7f9c5 // vpmovmskb %xmm1, %eax + SHRQ CX, AX + TESTQ AX, AX + JE LBB3_4 + ORQ $65536, AX + BSFQ AX, CX + CMPQ CX, R10 + MOVQ $-1, AX + LONG $0xc14c0f48 // cmovlq %rcx, %rax + JMP LBB3_3 + +LBB3_4: + MOVL $16, DI + SUBQ CX, DI + LEAQ 0(R11)(DI*1), DX + MOVQ R10, AX + SUBQ DI, AX + +LBB3_5: + CMPQ AX, $64 + JL LBB3_6 + +LBB3_12: + LONG $0x2274f9c5 // vpcmpeqb (%rdx), %xmm0, %xmm4 + LONG $0x5a74f9c5; BYTE $0x10 // vpcmpeqb $16(%rdx), %xmm0, %xmm3 + LONG $0x5274f9c5; BYTE $0x20 // vpcmpeqb $32(%rdx), %xmm0, %xmm2 + LONG $0x4a74f9c5; BYTE $0x30 // vpcmpeqb $48(%rdx), %xmm0, %xmm1 + LONG $0xecebe1c5 // vpor %xmm4, %xmm3, %xmm5 + LONG $0xf1ebe9c5 // vpor %xmm1, %xmm2, %xmm6 + LONG $0xeeebd1c5 // vpor %xmm6, %xmm5, %xmm5 + LONG $0xcdd7f9c5 // vpmovmskb %xmm5, %ecx + TESTW CX, CX + JNE LBB3_14 + ADDQ $64, DX + LEAQ -64(AX), CX + CMPQ AX, $127 + MOVQ CX, AX + JG LBB3_12 + MOVQ R10, AX + TESTQ CX, CX + JNS LBB3_8 + JMP LBB3_3 + +LBB3_6: + MOVQ AX, CX + MOVQ R10, AX + TESTQ CX, CX + JS LBB3_3 + +LBB3_8: + LONG $0x0a74f9c5 // vpcmpeqb (%rdx), %xmm0, %xmm1 + LONG $0xc9d779c5 // vpmovmskb %xmm1, %r9d + TESTW R9, R9 + JE LBB3_21 + +LBB3_9: + MOVQ DX, R8 + +LBB3_10: + MOVWLZX R9, AX + ORQ $65536, AX + BSFQ AX, DX + MOVQ $-1, AX + CMPQ DX, CX + JGE LBB3_3 + SUBQ R11, R8 + ADDQ DX, R8 + MOVQ R8, AX + JMP LBB3_3 + +LBB3_14: + LONG $0xc4d7f9c5 // vpmovmskb %xmm4, %eax + TESTW AX, AX + JE LBB3_16 + MOVWLZX AX, AX + SUBQ R11, DX + ORQ $65536, AX + BSFQ AX, AX + ADDQ DX, AX + JMP LBB3_3 + +LBB3_21: + MOVQ R10, AX + CMPQ CX, $15 + JLE LBB3_3 + LEAQ 16(DX), R8 + LONG $0x7479c1c4; BYTE $0x08 // vpcmpeqb (%r8), %xmm0, %xmm1 + LONG $0xc9d779c5 // vpmovmskb %xmm1, %r9d + TESTW R9, R9 + JE LBB3_24 + ADDQ $-16, CX + JMP LBB3_10 + +LBB3_16: + LONG $0xc3d7f9c5 // vpmovmskb %xmm3, %eax + TESTW AX, AX + JE LBB3_18 + MOVWLZX AX, AX + ORQ $65536, AX + BSFQ AX, AX + SUBQ R11, DX + LEAQ 16(DX)(AX*1), AX + JMP LBB3_3 + +LBB3_18: + LONG $0xc2d7f9c5 // vpmovmskb %xmm2, %eax + SUBQ R11, DX + TESTW AX, AX + JE LBB3_20 + MOVWLZX AX, AX + ORQ $65536, AX + BSFQ AX, AX + LEAQ 32(DX)(AX*1), AX + JMP LBB3_3 + +LBB3_24: + MOVQ R10, AX + CMPQ CX, $32 + JL LBB3_3 + LEAQ 32(DX), R8 + LONG $0x7479c1c4; BYTE $0x08 // vpcmpeqb (%r8), %xmm0, %xmm1 + LONG $0xc9d779c5 // vpmovmskb %xmm1, %r9d + TESTW R9, R9 + JE LBB3_27 + ADDQ $-32, CX + JMP LBB3_10 + +LBB3_20: + LONG $0xc1d7f9c5 // vpmovmskb %xmm1, %eax + ORQ $65536, AX + BSFQ AX, AX + LEAQ 48(DX)(AX*1), AX + +LBB3_3: + ADDQ AX, SI + CMPQ AX, R10 + MOVQ $-1, AX + LONG $0xc6420f48 // cmovbq %rsi, %rax + BYTE $0x5d // popq %rbp + RET + +LBB3_27: + MOVQ R10, AX + CMPQ CX, $48 + JL LBB3_3 + ADDQ $48, DX + LONG $0x0274f9c5 // vpcmpeqb (%rdx), %xmm0, %xmm0 + LONG $0xc8d779c5 // vpmovmskb %xmm0, %r9d + MOVQ R10, AX + TESTW R9, R9 + JE LBB3_3 + ADDQ $-48, CX + JMP LBB3_9 + +LCPI4_0: + BYTE $0xff // .byte 255 + BYTE $0xff // .byte 255 + BYTE $0xff // .byte 255 + BYTE $0xff // .byte 255 + BYTE $0xff // .byte 255 + BYTE $0xff // .byte 255 + BYTE $0xff // .byte 255 + BYTE $0xff // .byte 255 + BYTE $0xff // .byte 255 + BYTE $0xff // .byte 255 + BYTE $0xff // .byte 255 + BYTE $0xff // .byte 255 + BYTE $0xff // .byte 255 + BYTE $0xff // .byte 255 + BYTE $0x00 // .space 1, '\x00' + BYTE $0xff // .byte 255 + +_strchr2: + BYTE $0x55 // pushq %rbp + WORD $0x8948; BYTE $0xe5 // movq %rsp, %rbp + MOVQ 8(DI), R8 + MOVQ R8, R10 + SUBQ SI, R10 + JE LBB4_1 + MOVQ 0(DI), R9 + LEAQ 0(R9)(SI*1), DI + CMPQ R10, $16 + JAE LBB4_3 + LONG $0xc076f9c5 // vpcmpeqd %xmm0, %xmm0, %xmm0 + LEAQ -1(R10), AX + CMPQ AX, $14 + JA LBB4_24 + LONG $0x55058d4c; WORD $0x0001; BYTE $0x00 // leaq $341(%rip), %r8 /* LJTI4_0(%rip) */ + MOVLQSX 0(R8)(AX*4), AX + ADDQ R8, AX + JMP AX + +LBB4_9: + QUAD $0xffffffa1056ffac5 // vmovdqu $-95(%rip), %xmm0 /* LCPI4_0(%rip) */ + LONG $0x2079e3c4; WORD $0x0e47; BYTE $0x0e // vpinsrb $14, $14(%rdi), %xmm0, %xmm0 + +LBB4_10: + LONG $0x2079e3c4; WORD $0x0d47; BYTE $0x0d // vpinsrb $13, $13(%rdi), %xmm0, %xmm0 + +LBB4_11: + LONG $0x2079e3c4; WORD $0x0c47; BYTE $0x0c // vpinsrb $12, $12(%rdi), %xmm0, %xmm0 + +LBB4_12: + LONG $0x2079e3c4; WORD $0x0b47; BYTE $0x0b // vpinsrb $11, $11(%rdi), %xmm0, %xmm0 + +LBB4_13: + LONG $0x2079e3c4; WORD $0x0a47; BYTE $0x0a // vpinsrb $10, $10(%rdi), %xmm0, %xmm0 + +LBB4_14: + LONG $0x2079e3c4; WORD $0x0947; BYTE $0x09 // vpinsrb $9, $9(%rdi), %xmm0, %xmm0 + +LBB4_15: + LONG $0x2079e3c4; WORD $0x0847; BYTE $0x08 // vpinsrb $8, $8(%rdi), %xmm0, %xmm0 + +LBB4_16: + LONG $0x2079e3c4; WORD $0x0747; BYTE $0x07 // vpinsrb $7, $7(%rdi), %xmm0, %xmm0 + +LBB4_17: + LONG $0x2079e3c4; WORD $0x0647; BYTE $0x06 // vpinsrb $6, $6(%rdi), %xmm0, %xmm0 + +LBB4_18: + LONG $0x2079e3c4; WORD $0x0547; BYTE $0x05 // vpinsrb $5, $5(%rdi), %xmm0, %xmm0 + +LBB4_19: + LONG $0x2079e3c4; WORD $0x0447; BYTE $0x04 // vpinsrb $4, $4(%rdi), %xmm0, %xmm0 + +LBB4_20: + LONG $0x2079e3c4; WORD $0x0347; BYTE $0x03 // vpinsrb $3, $3(%rdi), %xmm0, %xmm0 + +LBB4_21: + LONG $0x2079e3c4; WORD $0x0247; BYTE $0x02 // vpinsrb $2, $2(%rdi), %xmm0, %xmm0 + +LBB4_22: + LONG $0x2079e3c4; WORD $0x0147; BYTE $0x01 // vpinsrb $1, $1(%rdi), %xmm0, %xmm0 + +LBB4_23: + LONG $0x2079e3c4; WORD $0x0007 // vpinsrb $0, (%rdi), %xmm0, %xmm0 + +LBB4_24: + MOVBLZX DX, AX + LONG $0xc86ef9c5 // vmovd %eax, %xmm1 + LONG $0xd2efe9c5 // vpxor %xmm2, %xmm2, %xmm2 + LONG $0x0071e2c4; BYTE $0xca // vpshufb %xmm2, %xmm1, %xmm1 + LONG $0xc874f1c5 // vpcmpeqb %xmm0, %xmm1, %xmm1 + MOVBLZX CX, AX + LONG $0xd86ef9c5 // vmovd %eax, %xmm3 + LONG $0x0061e2c4; BYTE $0xd2 // vpshufb %xmm2, %xmm3, %xmm2 + LONG $0xc074e9c5 // vpcmpeqb %xmm0, %xmm2, %xmm0 + LONG $0xc1ebf9c5 // vpor %xmm1, %xmm0, %xmm0 + LONG $0xc0d7f9c5 // vpmovmskb %xmm0, %eax + ORL $-65536, AX + BSFL AX, CX + JMP LBB4_25 + +LBB4_1: + XORL CX, CX + JMP LBB4_25 + +LBB4_3: + MOVBLZX DX, AX + LONG $0xc06ef9c5 // vmovd %eax, %xmm0 + LONG $0xc9eff1c5 // vpxor %xmm1, %xmm1, %xmm1 + LONG $0x0079e2c4; BYTE $0xc1 // vpshufb %xmm1, %xmm0, %xmm0 + MOVBLZX CX, AX + LONG $0xd06ef9c5 // vmovd %eax, %xmm2 + LONG $0x0069e2c4; BYTE $0xc9 // vpshufb %xmm1, %xmm2, %xmm1 + LEAQ -16(R10), R11 + ANDQ $-16, R11 + XORL CX, CX + MOVQ R10, AX + +LBB4_4: + LONG $0x176ffac5 // vmovdqu (%rdi), %xmm2 + LONG $0xda74f9c5 // vpcmpeqb %xmm2, %xmm0, %xmm3 + LONG $0xd274f1c5 // vpcmpeqb %xmm2, %xmm1, %xmm2 + LONG $0xd3ebe9c5 // vpor %xmm3, %xmm2, %xmm2 + LONG $0xd2d7f9c5 // vpmovmskb %xmm2, %edx + ORL $-65536, DX + BSFL DX, DX + ADDQ DX, CX + CMPL DX, $16 + JB LBB4_25 + ADDQ $16, DI + ADDQ $-16, AX + CMPQ AX, $15 + JA LBB4_4 + LONG $0x6f7a81c4; WORD $0x0154; BYTE $0xf0 // vmovdqu $-16(%r9,%r8), %xmm2 + LONG $0xc274f9c5 // vpcmpeqb %xmm2, %xmm0, %xmm0 + LONG $0xca74f1c5 // vpcmpeqb %xmm2, %xmm1, %xmm1 + LONG $0xc0ebf1c5 // vpor %xmm0, %xmm1, %xmm0 + LONG $0xc0d7f9c5 // vpmovmskb %xmm0, %eax + ORL $-65536, AX + BSFL AX, AX + MOVQ R10, DX + SUBQ R11, DX + ADDQ CX, DX + LEAQ -32(AX)(DX*1), CX + +LBB4_25: + ADDQ CX, SI + CMPQ CX, R10 + MOVQ $-1, AX + LONG $0xc6420f48 // cmovbq %rsi, %rax + BYTE $0x5d // popq %rbp + RET + +// .set L4_0_set_23, LBB4_23-LJTI4_0 +// .set L4_0_set_22, LBB4_22-LJTI4_0 +// .set L4_0_set_21, LBB4_21-LJTI4_0 +// .set L4_0_set_20, LBB4_20-LJTI4_0 +// .set L4_0_set_19, LBB4_19-LJTI4_0 +// .set L4_0_set_18, LBB4_18-LJTI4_0 +// .set L4_0_set_17, LBB4_17-LJTI4_0 +// .set L4_0_set_16, LBB4_16-LJTI4_0 +// .set L4_0_set_15, LBB4_15-LJTI4_0 +// .set L4_0_set_14, LBB4_14-LJTI4_0 +// .set L4_0_set_13, LBB4_13-LJTI4_0 +// .set L4_0_set_12, LBB4_12-LJTI4_0 +// .set L4_0_set_11, LBB4_11-LJTI4_0 +// .set L4_0_set_10, LBB4_10-LJTI4_0 +// .set L4_0_set_9, LBB4_9-LJTI4_0 +LJTI4_0: + LONG $0xffffff1e // .long L4_0_set_23 + LONG $0xffffff17 // .long L4_0_set_22 + LONG $0xffffff10 // .long L4_0_set_21 + LONG $0xffffff09 // .long L4_0_set_20 + LONG $0xffffff02 // .long L4_0_set_19 + LONG $0xfffffefb // .long L4_0_set_18 + LONG $0xfffffef4 // .long L4_0_set_17 + LONG $0xfffffeed // .long L4_0_set_16 + LONG $0xfffffee6 // .long L4_0_set_15 + LONG $0xfffffedf // .long L4_0_set_14 + LONG $0xfffffed8 // .long L4_0_set_13 + LONG $0xfffffed1 // .long L4_0_set_12 + LONG $0xfffffeca // .long L4_0_set_11 + LONG $0xfffffec3 // .long L4_0_set_10 + LONG $0xfffffeb4 // .long L4_0_set_9 + +LCPI5_0: + QUAD $0x8000000000000000 // .quad 0x8000000000000000 + QUAD $0x8000000000000000 // .quad 0x8000000000000000 + +LCPI5_1: + QUAD $0x3fd34413509f79fe // .quad 0x3fd34413509f79fe + +LCPI5_2: + QUAD $0x4075b00000000000 // .quad 0x4075b00000000000 + +LCPI5_3: + QUAD $0x3030303030303030; QUAD $0x3030303030303030 // .space 16, '0000000000000000' + QUAD $0x3030303030303030; QUAD $0x3030303030303030 // .space 16, '0000000000000000' + +_f64toa: + BYTE $0x55 // pushq %rbp + WORD $0x8948; BYTE $0xe5 // movq %rsp, %rbp + WORD $0x5741 // pushq %r15 + WORD $0x5641 // pushq %r14 + WORD $0x5541 // pushq %r13 + WORD $0x5441 // pushq %r12 + BYTE $0x53 // pushq %rbx + SUBQ $96, SP + MOVQ DI, R15 + LONG $0xc957f1c5 // vxorpd %xmm1, %xmm1, %xmm1 + LONG $0xc12ef9c5 // vucomisd %xmm1, %xmm0 + JNE LBB5_2 + JP LBB5_2 + MOVB $48, 0(R15) + MOVL $1, AX + JMP LBB5_89 + +LBB5_2: + LONG $0xc82ef9c5 // vucomisd %xmm0, %xmm1 + JBE LBB5_4 + QUAD $0xffffff840557f9c5 // vxorpd $-124(%rip), %xmm0, %xmm0 /* LCPI5_0(%rip) */ + MOVB $45, 0(R15) + INCQ R15 + MOVL $1, -44(BP) + JMP LBB5_5 + +LBB5_4: + MOVL $0, -44(BP) + +LBB5_5: + LONG $0x7ef9e1c4; BYTE $0xc1 // vmovq %xmm0, %rcx + MOVQ $4503599627370496, DX + LEAQ -1(DX), SI + ANDQ CX, SI + SHRQ $52, CX + ANDL $2047, CX + LEAQ 0(SI)(DX*1), AX + LEAL -1075(CX), DI + TESTL CX, CX + LONG $0xc6440f48 // cmoveq %rsi, %rax + MOVL $-1074, SI + WORD $0x450f; BYTE $0xf7 // cmovnel %edi, %esi + XORL CX, CX + CMPQ AX, DX + SETEQ CX + INCL CX + LEAQ 1(AX)(AX*1), R10 + BSRQ R10, DX + XORQ $63, DX + MOVL DX, R12 + NOTL R12 + ADDL SI, R12 + SUBL CX, SI + MOVQ AX, DI + SHLQ CX, DI + DECQ DI + MOVL DX, CX + SHLQ CX, R10 + SUBL R12, SI + MOVL SI, CX + SHLQ CX, DI + MOVL $-61, CX + SUBL R12, CX + LONG $0xc12aebc5 // vcvtsi2sd %ecx, %xmm2, %xmm0 + QUAD $0xfffffef90559fbc5 // vmulsd $-263(%rip), %xmm0, %xmm0 /* LCPI5_1(%rip) */ + QUAD $0xfffffef90558fbc5 // vaddsd $-263(%rip), %xmm0, %xmm0 /* LCPI5_2(%rip) */ + LONG $0xc82cfbc5 // vcvttsd2si %xmm0, %ecx + LONG $0xc8e6f9c5 // vcvttpd2dq %xmm0, %xmm1 + LONG $0xc9e6fac5 // vcvtdq2pd %xmm1, %xmm1 + LONG $0xc15cfbc5 // vsubsd %xmm1, %xmm0, %xmm0 + LONG $0xc957f1c5 // vxorpd %xmm1, %xmm1, %xmm1 + XORL DX, DX + LONG $0xc12ef9c5 // vucomisd %xmm1, %xmm0 + SETHI DX + ADDL CX, DX + SARL $3, DX + LEAL 8(DX*8), R14 + INCL DX + MOVL $348, CX + MOVQ CX, -64(BP) + LONG $0x940d8d48; WORD $0x0034; BYTE $0x00 // leaq $13460(%rip), %rcx /* _TabPowE(%rip) */ + MOVBLSX 0(CX)(DX*2), SI + LONG $0x370d8d48; WORD $0x0035; BYTE $0x00 // leaq $13623(%rip), %rcx /* _TabPowF(%rip) */ + MOVQ 0(CX)(DX*8), R8 + BSRQ AX, CX + XORL $63, CX + SHLQ CX, AX + MULQ R8 + MOVQ DX, R9 + MOVQ AX, CX + MOVQ R10, AX + MULQ R8 + MOVQ AX, BX + MOVQ DX, R11 + SARQ $63, CX + SHRQ $63, BX + MOVQ DI, AX + MULQ R8 + ADDL R12, SI + SHRQ $63, AX + ADDQ DX, AX + MOVQ AX, -88(BP) + NOTQ AX + LEAQ -1(R11)(BX*1), R12 + MOVQ CX, -120(BP) + MOVQ CX, DX + MOVL $-64, R10 + SUBL SI, R10 + NEGL SI + MOVL $1, DI + MOVL SI, CX + SHLQ CX, DI + MOVQ R9, -112(BP) + SUBQ R9, DX + MOVQ R12, R9 + SHRQ CX, R9 + MOVQ R11, -104(BP) + MOVQ BX, -96(BP) + ADDQ BX, R11 + MOVQ DI, -136(BP) + DECQ DI + MOVL $1, SI + CMPQ R9, $10 + JB LBB5_14 + MOVL $2, SI + CMPQ R9, $100 + JB LBB5_14 + MOVL $3, SI + CMPQ R9, $1000 + JB LBB5_14 + MOVL $4, SI + CMPQ R9, $10000 + JB LBB5_14 + MOVL $5, SI + CMPQ R9, $100000 + JB LBB5_14 + MOVL $6, SI + CMPQ R9, $1000000 + JB LBB5_14 + MOVL $7, SI + CMPQ R9, $10000000 + JB LBB5_14 + MOVL $8, SI + CMPQ R9, $100000000 + JB LBB5_14 + CMPQ R9, $1000000000 + MOVL $10, SI + SBBL $0, SI + +LBB5_14: + MOVQ -64(BP), CX + MOVQ R14, -72(BP) + SUBL R14, CX + MOVQ CX, -64(BP) + LEAQ -1(AX)(R11*1), R8 + LEAQ -1(DX)(R11*1), AX + MOVQ AX, -80(BP) + MOVQ DI, -128(BP) + ANDQ DI, R12 + LONG $0x813d8d48; WORD $0x0009; BYTE $0x00 // leaq $2433(%rip), %rdi /* LJTI5_0(%rip) */ + MOVQ R15, -56(BP) + JMP LBB5_17 + +LBB5_15: + ADDB $48, R9 + MOVB R9, 0(R15) + INCQ R15 + +LBB5_16: + MOVQ R13, BX + MOVL R10, CX + SHLQ CX, BX + LEAQ 0(BX)(R12*1), AX + MOVQ R8, R14 + MOVQ R13, R9 + SUBQ AX, R14 + JAE LBB5_43 + +LBB5_17: + TESTL SI, SI + JLE LBB5_31 + DECL SI + MOVLQSX 0(DI)(SI*4), AX + ADDQ DI, AX + XORL R13, R13 + JMP AX + +LBB5_19: + MOVQ R9, AX + MOVQ $-3689348814741910323, CX + MULQ CX + SHRQ $3, DX + LEAQ 0(DX)(DX*1), AX + LEAQ 0(AX)(AX*4), AX + JMP LBB5_28 + +LBB5_20: + MOVQ R9, AX + SHRQ $2, AX + MOVQ $2951479051793528259, CX + MULQ CX + SHRQ $2, DX + IMUL3Q $100, DX, AX + JMP LBB5_28 + +LBB5_21: + MOVQ R9, AX + SHRQ $3, AX + MOVQ $2361183241434822607, CX + MULQ CX + SHRQ $4, DX + IMUL3Q $1000, DX, AX + JMP LBB5_28 + +LBB5_22: + MOVQ R9, AX + MOVQ $3777893186295716171, CX + MULQ CX + SHRQ $11, DX + IMUL3Q $10000, DX, AX + JMP LBB5_28 + +LBB5_23: + MOVQ R9, AX + SHRQ $5, AX + MOVQ $755578637259143235, CX + MULQ CX + SHRQ $7, DX + IMUL3Q $100000, DX, AX + JMP LBB5_28 + +LBB5_24: + MOVQ R9, AX + MOVQ $4835703278458516699, CX + MULQ CX + SHRQ $18, DX + IMUL3Q $1000000, DX, AX + JMP LBB5_28 + +LBB5_25: + MOVQ R9, AX + MOVQ $-2972493582642298179, CX + MULQ CX + SHRQ $23, DX + IMUL3Q $10000000, DX, AX + JMP LBB5_28 + +LBB5_26: + MOVQ R9, AX + MOVQ $-6067343680855748867, CX + MULQ CX + SHRQ $26, DX + IMUL3Q $100000000, DX, AX + JMP LBB5_28 + +LBB5_27: + MOVQ R9, AX + SHRQ $9, AX + MOVQ $19342813113834067, CX + MULQ CX + SHRQ $11, DX + IMUL3Q $1000000000, DX, AX + +LBB5_28: + SUBQ AX, R9 + MOVQ R9, R13 + MOVQ DX, R9 + +LBB5_29: + CMPQ R15, -56(BP) + JA LBB5_15 + TESTL R9, R9 + JE LBB5_16 + JMP LBB5_15 + +LBB5_31: + XORL SI, SI + MOVQ -56(BP), R14 + MOVQ -136(BP), R9 + MOVQ -128(BP), DI + JMP LBB5_34 + +LBB5_32: + ADDB $48, DX + MOVB DX, 0(R15) + INCQ R15 + +LBB5_33: + DECL SI + MOVQ R8, CX + SUBQ R12, CX + JA LBB5_36 + +LBB5_34: + MOVQ R8, AX + ADDQ R12, R12 + LEAQ 0(R12)(R12*4), R12 + MOVQ R12, DX + MOVL R10, CX + SHRQ CX, DX + ADDQ R8, AX + LEAQ 0(AX)(AX*4), R8 + ANDQ DI, R12 + CMPQ R15, R14 + JA LBB5_32 + MOVBLZX DX, CX + TESTQ CX, CX + JE LBB5_33 + JMP LBB5_32 + +LBB5_36: + CMPQ CX, R9 + JB LBB5_50 + MOVL SI, CX + NEGL CX + MOVLQSX CX, CX + LONG $0xdd158d48; WORD $0x0034; BYTE $0x00 // leaq $13533(%rip), %rdx /* _TabPow10(%rip) */ + MOVQ -80(BP), DI + IMULQ 0(DX)(CX*8), DI + CMPQ R12, DI + MOVQ -64(BP), R10 + JAE LBB5_53 + LEAQ 0(AX)(AX*4), AX + LEAQ 0(R9)(R12*1), CX + SUBQ CX, AX + MOVQ DI, R8 + NEGQ R8 + MOVQ DI, BX + SUBQ R12, BX + +LBB5_39: + CMPQ CX, DI + JB LBB5_41 + LEAQ 0(R8)(CX*1), DX + CMPQ BX, DX + JBE LBB5_53 + +LBB5_41: + DECB -1(R15) + SUBQ R9, AX + SETCS DX + CMPQ CX, DI + JAE LBB5_53 + ADDQ R9, CX + SUBQ R9, BX + TESTB DX, DX + JE LBB5_39 + JMP LBB5_53 + +LBB5_43: + MOVL SI, CX + LONG $0x73158d48; WORD $0x0034; BYTE $0x00 // leaq $13427(%rip), %rdx /* _TabPow10(%rip) */ + MOVQ 0(DX)(CX*8), DI + MOVL R10, CX + SHLQ CX, DI + MOVQ -80(BP), R8 + CMPQ AX, R8 + JAE LBB5_51 + CMPQ R14, DI + MOVQ -64(BP), R10 + JB LBB5_52 + SUBQ -88(BP), R11 + ADDQ DI, R12 + LEAQ 0(R12)(BX*1), CX + SUBQ CX, R11 + ADDQ $-2, R11 + MOVQ -112(BP), R9 + ADDQ R9, R12 + MOVQ -120(BP), DX + SUBQ DX, R12 + MOVQ -96(BP), R13 + SUBQ R13, R12 + MOVQ -104(BP), R14 + SUBQ R14, R12 + LEAQ 1(R12)(BX*1), BX + ADDQ R13, DX + ADDQ R14, DX + SUBQ R9, DX + NOTQ AX + ADDQ DX, AX + MOVQ -56(BP), R14 + +LBB5_46: + CMPQ CX, R8 + JB LBB5_48 + CMPQ AX, BX + JBE LBB5_53 + +LBB5_48: + DECB -1(R15) + SUBQ DI, R11 + SETCS DX + CMPQ CX, R8 + JAE LBB5_53 + ADDQ DI, CX + ADDQ DI, BX + SUBQ DI, AX + TESTB DX, DX + JE LBB5_46 + JMP LBB5_53 + +LBB5_50: + MOVQ -64(BP), R10 + JMP LBB5_53 + +LBB5_51: + MOVQ -56(BP), R14 + MOVQ -64(BP), R10 + JMP LBB5_53 + +LBB5_52: + MOVQ -56(BP), R14 + +LBB5_53: + MOVQ R15, R12 + SUBQ R14, R12 + ADDL SI, R10 + LEAL 0(R10)(R12*1), R11 + TESTL R10, R10 + JS LBB5_59 + CMPL R11, $21 + JG LBB5_59 + TESTL R10, R10 + JE LBB5_86 + MOVLQSX R12, R9 + ADDQ R14, R9 + SUBL -72(BP), SI + ADDL $347, SI + CMPL SI, $127 + JB LBB5_84 + INCQ SI + MOVQ SI, R8 + ANDQ $-128, R8 + LEAQ -128(R8), AX + MOVQ AX, BX + SHRQ $7, BX + INCQ BX + MOVL BX, CX + ANDL $3, CX + CMPQ AX, $384 + JAE LBB5_77 + XORL DX, DX + JMP LBB5_79 + +LBB5_59: + LEAL -1(R11), DI + CMPL DI, $20 + JA LBB5_63 + LEAL 1(R12), AX + CMPL AX, R11 + JLE LBB5_72 + MOVLQSX AX, DX + MOVL R11, CX + +LBB5_62: + MOVBLZX -2(R14)(DX*1), BX + MOVB BX, -1(R14)(DX*1) + LEAQ -1(DX), SI + MOVQ SI, DX + CMPQ SI, CX + JG LBB5_62 + JMP LBB5_73 + +LBB5_63: + LEAL 5(R11), AX + CMPL AX, $5 + JA LBB5_74 + MOVQ R10, R15 + MOVL $2, AX + MOVL $2, DX + SUBL R11, DX + LEAL 0(DX)(R12*1), CX + TESTL CX, CX + JLE LBB5_68 + MOVL CX, BX + MOVL DX, R10 + CMPL CX, $128 + JAE LBB5_96 + +LBB5_66: + INCQ BX + MOVQ R14, CX + SUBQ R10, CX + +LBB5_67: + MOVBLZX -2(CX)(BX*1), DX + MOVB DX, -2(R14)(BX*1) + DECQ BX + CMPQ BX, $1 + JG LBB5_67 + +LBB5_68: + SUBL R15, AX + MOVW $11824, 0(R14) + TESTL R11, R11 + JE LBB5_87 + NEGL R11 + LEAQ 2(R14), DI + MOVQ -72(BP), DX + SUBL SI, DX + SUBL R12, DX + ADDL $-349, DX + CMPL DX, $127 + JB LBB5_121 + INCQ DX + MOVQ DX, R8 + ANDQ $-128, DX + LEAQ -128(DX), BX + MOVQ BX, CX + SHRQ $7, CX + INCQ CX + MOVL CX, SI + ANDL $3, SI + CMPQ BX, $384 + JAE LBB5_113 + XORL BX, BX + JMP LBB5_115 + +LBB5_72: + MOVL R11, CX + +LBB5_73: + MOVL -44(BP), BX + MOVB $46, 0(R14)(CX*1) + JMP LBB5_88 + +LBB5_74: + CMPL R12, $1 + JNE LBB5_90 + MOVB $101, 1(R14) + LEAQ 2(R14), DX + TESTL DI, DI + JS LBB5_103 + MOVQ DX, R14 + MOVL DI, CX + MOVL -44(BP), BX + JMP LBB5_105 + +LBB5_77: + MOVLQSX R12, AX + LEAQ 480(AX)(R14*1), AX + ANDQ $-4, BX + NEGQ BX + XORL DX, DX + QUAD $0xfffff8f60528fdc5 // vmovapd $-1802(%rip), %ymm0 /* LCPI5_3(%rip) */ + +LBB5_78: + QUAD $0xfffe20108411fdc5; BYTE $0xff // vmovupd %ymm0, $-480(%rax,%rdx) + QUAD $0xfffe40108411fdc5; BYTE $0xff // vmovupd %ymm0, $-448(%rax,%rdx) + QUAD $0xfffe60108411fdc5; BYTE $0xff // vmovupd %ymm0, $-416(%rax,%rdx) + QUAD $0xfffe80108411fdc5; BYTE $0xff // vmovupd %ymm0, $-384(%rax,%rdx) + QUAD $0xfffea0108411fdc5; BYTE $0xff // vmovupd %ymm0, $-352(%rax,%rdx) + QUAD $0xfffec0108411fdc5; BYTE $0xff // vmovupd %ymm0, $-320(%rax,%rdx) + QUAD $0xfffee0108411fdc5; BYTE $0xff // vmovupd %ymm0, $-288(%rax,%rdx) + QUAD $0xffff00108411fdc5; BYTE $0xff // vmovupd %ymm0, $-256(%rax,%rdx) + QUAD $0xffff20108411fdc5; BYTE $0xff // vmovupd %ymm0, $-224(%rax,%rdx) + QUAD $0xffff40108411fdc5; BYTE $0xff // vmovupd %ymm0, $-192(%rax,%rdx) + QUAD $0xffff60108411fdc5; BYTE $0xff // vmovupd %ymm0, $-160(%rax,%rdx) + LONG $0x4411fdc5; WORD $0x8010 // vmovupd %ymm0, $-128(%rax,%rdx) + LONG $0x4411fdc5; WORD $0xa010 // vmovupd %ymm0, $-96(%rax,%rdx) + LONG $0x4411fdc5; WORD $0xc010 // vmovupd %ymm0, $-64(%rax,%rdx) + LONG $0x4411fdc5; WORD $0xe010 // vmovupd %ymm0, $-32(%rax,%rdx) + LONG $0x0411fdc5; BYTE $0x10 // vmovupd %ymm0, (%rax,%rdx) + ADDQ $512, DX + ADDQ $4, BX + JNE LBB5_78 + +LBB5_79: + TESTQ CX, CX + JE LBB5_82 + SUBL R14, R15 + MOVLQSX R15, AX + ADDQ AX, DX + LEAQ 96(R14)(DX*1), AX + NEGQ CX + QUAD $0xfffff8470528fdc5 // vmovapd $-1977(%rip), %ymm0 /* LCPI5_3(%rip) */ + +LBB5_81: + LONG $0x4011fdc5; BYTE $0xa0 // vmovupd %ymm0, $-96(%rax) + LONG $0x4011fdc5; BYTE $0xc0 // vmovupd %ymm0, $-64(%rax) + LONG $0x4011fdc5; BYTE $0xe0 // vmovupd %ymm0, $-32(%rax) + LONG $0x0011fdc5 // vmovupd %ymm0, (%rax) + SUBQ $-128, AX + INCQ CX + JNE LBB5_81 + +LBB5_82: + CMPQ SI, R8 + JE LBB5_86 + SUBL R8, R10 + ADDQ R8, R9 + +LBB5_84: + MOVL R10, AX + XORL CX, CX + +LBB5_85: + MOVB $48, 0(R9)(CX*1) + INCQ CX + CMPL AX, CX + JNE LBB5_85 + +LBB5_86: + MOVL R11, AX + +LBB5_87: + MOVL -44(BP), BX + +LBB5_88: + ADDL BX, AX + +LBB5_89: + ADDQ $96, SP + BYTE $0x5b // popq %rbx + WORD $0x5c41 // popq %r12 + WORD $0x5d41 // popq %r13 + WORD $0x5e41 // popq %r14 + WORD $0x5f41 // popq %r15 + BYTE $0x5d // popq %rbp + WORD $0xf8c5; BYTE $0x77 // vzeroupper + RET + +LBB5_90: + LEAL 1(R12), AX + CMPL AX, $2 + JL LBB5_93 + MOVL AX, AX + INCQ AX + +LBB5_92: + MOVBLZX -3(R14)(AX*1), CX + MOVB CX, -2(R14)(AX*1) + DECQ AX + CMPQ AX, $2 + JG LBB5_92 + +LBB5_93: + MOVB $46, 1(R14) + MOVLQSX R12, AX + MOVB $101, 1(R14)(AX*1) + LEAQ 2(R14)(AX*1), DX + TESTL DI, DI + MOVL -44(BP), BX + JS LBB5_108 + MOVQ DX, CX + JMP LBB5_109 + +LBB5_96: + LEAQ -1(BX), DI + LEAQ -1(R14)(BX*1), DX + CMPQ DI, DX + JA LBB5_66 + MOVQ R10, R8 + NOTQ R8 + LEAQ 0(R8)(BX*1), DX + ADDQ R14, DX + CMPQ DI, DX + JA LBB5_66 + TESTL CX, CX + MOVL $1, CX + LONG $0xcb440f48 // cmoveq %rbx, %rcx + LEAQ -1(CX)(R14*1), DX + MOVQ BX, DI + SUBQ R10, DI + ADDQ R14, DI + CMPQ DX, DI + JAE LBB5_100 + LEAQ 0(R14)(BX*1), DX + ADDQ R8, CX + ADDQ R14, CX + CMPQ CX, DX + JB LBB5_66 + +LBB5_100: + MOVL BX, R8 + ANDL $-128, R8 + LEAQ -128(R8), CX + MOVQ CX, R9 + SHRQ $7, R9 + INCQ R9 + TESTQ CX, CX + JE LBB5_125 + LEAQ -32(BX)(R14*1), CX + MOVQ R10, DI + NEGQ DI + MOVQ R9, DX + ANDQ $-2, DX + NEGQ DX + MOVQ $-1, R14 + +LBB5_102: + LONG $0x4410fcc5; WORD $0xa039 // vmovups $-96(%rcx,%rdi), %ymm0 + LONG $0x4c10fcc5; WORD $0xc039 // vmovups $-64(%rcx,%rdi), %ymm1 + LONG $0x5410fcc5; WORD $0xe039 // vmovups $-32(%rcx,%rdi), %ymm2 + LONG $0x1c10fcc5; BYTE $0x39 // vmovups (%rcx,%rdi), %ymm3 + LONG $0x1911fcc5 // vmovups %ymm3, (%rcx) + LONG $0x5111fcc5; BYTE $0xe0 // vmovups %ymm2, $-32(%rcx) + LONG $0x4911fcc5; BYTE $0xc0 // vmovups %ymm1, $-64(%rcx) + LONG $0x4111fcc5; BYTE $0xa0 // vmovups %ymm0, $-96(%rcx) + QUAD $0xffff20398410fdc5; BYTE $0xff // vmovupd $-224(%rcx,%rdi), %ymm0 + QUAD $0xffff40398c10fdc5; BYTE $0xff // vmovupd $-192(%rcx,%rdi), %ymm1 + QUAD $0xffff60399410fcc5; BYTE $0xff // vmovups $-160(%rcx,%rdi), %ymm2 + LONG $0x5c10fcc5; WORD $0x8039 // vmovups $-128(%rcx,%rdi), %ymm3 + LONG $0x5911fcc5; BYTE $0x80 // vmovups %ymm3, $-128(%rcx) + QUAD $0xffffff609111fcc5 // vmovups %ymm2, $-160(%rcx) + QUAD $0xffffff408911fdc5 // vmovupd %ymm1, $-192(%rcx) + QUAD $0xffffff208111fdc5 // vmovupd %ymm0, $-224(%rcx) + ADDQ $-256, R14 + ADDQ $-256, CX + ADDQ $2, DX + JNE LBB5_102 + JMP LBB5_126 + +LBB5_103: + MOVL DI, CX + NEGL CX + MOVB $45, 2(R14) + ADDQ $3, R14 + CMPL DI, $-9 + MOVL -44(BP), BX + JL LBB5_105 + ADDB $48, CX + MOVB CX, 0(R14) + MOVL $4, AX + JMP LBB5_88 + +LBB5_105: + MOVL R14, AX + SUBL DX, AX + CMPL CX, $99 + JG LBB5_107 + ADDL $4, AX + MOVL CX, CX + LONG $0xe2358d48; WORD $0x002f; BYTE $0x00 // leaq $12258(%rip), %rsi /* _Digits(%rip) */ + MOVB 0(SI)(CX*2), DX + ADDQ CX, CX + MOVB DX, 0(R14) + ORL $1, CX + MOVB 0(CX)(SI*1), CX + MOVB CX, 1(R14) + JMP LBB5_88 + +LBB5_107: + ADDL $5, AX + MOVL CX, DX + IMUL3Q $1374389535, DX, DX + SHRQ $37, DX + LEAL 48(DX), SI + MOVB SI, 0(R14) + WORD $0xd26b; BYTE $0x64 // imull $100, %edx, %edx + SUBL DX, CX + LONG $0xa8358d48; WORD $0x002f; BYTE $0x00 // leaq $12200(%rip), %rsi /* _Digits(%rip) */ + MOVB 0(SI)(CX*2), DX + MOVB 1(SI)(CX*2), CX + MOVB DX, 1(R14) + MOVB CX, 2(R14) + JMP LBB5_88 + +LBB5_108: + NEGL DI + LEAQ 1(DX), CX + MOVB $45, 0(DX) + +LBB5_109: + CMPL DI, $9 + JG LBB5_111 + MOVL CX, AX + SUBL DX, AX + LEAL 3(R12)(AX*1), AX + ADDB $48, DI + MOVB DI, 0(CX) + JMP LBB5_88 + +LBB5_111: + LEAL 2(R12), SI + MOVL CX, AX + SUBL DX, AX + ADDL SI, AX + CMPL DI, $99 + JG LBB5_124 + ADDL $2, AX + MOVL DI, DX + LONG $0x51358d48; WORD $0x002f; BYTE $0x00 // leaq $12113(%rip), %rsi /* _Digits(%rip) */ + MOVB 0(SI)(DX*2), DI + ADDQ DX, DX + MOVB DI, 0(CX) + ORL $1, DX + MOVB 0(DX)(SI*1), DX + MOVB DX, 1(CX) + JMP LBB5_88 + +LBB5_113: + ANDQ $-4, CX + NEGQ CX + XORL BX, BX + QUAD $0xfffff5a30528fdc5 // vmovapd $-2653(%rip), %ymm0 /* LCPI5_3(%rip) */ + +LBB5_114: + LONG $0x117dc1c4; WORD $0x1e44; BYTE $0x02 // vmovupd %ymm0, $2(%r14,%rbx) + LONG $0x117dc1c4; WORD $0x1e44; BYTE $0x22 // vmovupd %ymm0, $34(%r14,%rbx) + LONG $0x117dc1c4; WORD $0x1e44; BYTE $0x42 // vmovupd %ymm0, $66(%r14,%rbx) + LONG $0x117dc1c4; WORD $0x1e44; BYTE $0x62 // vmovupd %ymm0, $98(%r14,%rbx) + QUAD $0x00821e84117dc1c4; WORD $0x0000 // vmovupd %ymm0, $130(%r14,%rbx) + QUAD $0x00a21e84117dc1c4; WORD $0x0000 // vmovupd %ymm0, $162(%r14,%rbx) + QUAD $0x00c21e84117dc1c4; WORD $0x0000 // vmovupd %ymm0, $194(%r14,%rbx) + QUAD $0x00e21e84117dc1c4; WORD $0x0000 // vmovupd %ymm0, $226(%r14,%rbx) + QUAD $0x01021e84117dc1c4; WORD $0x0000 // vmovupd %ymm0, $258(%r14,%rbx) + QUAD $0x01221e84117dc1c4; WORD $0x0000 // vmovupd %ymm0, $290(%r14,%rbx) + QUAD $0x01421e84117dc1c4; WORD $0x0000 // vmovupd %ymm0, $322(%r14,%rbx) + QUAD $0x01621e84117dc1c4; WORD $0x0000 // vmovupd %ymm0, $354(%r14,%rbx) + QUAD $0x01821e84117dc1c4; WORD $0x0000 // vmovupd %ymm0, $386(%r14,%rbx) + QUAD $0x01a21e84117dc1c4; WORD $0x0000 // vmovupd %ymm0, $418(%r14,%rbx) + QUAD $0x01c21e84117dc1c4; WORD $0x0000 // vmovupd %ymm0, $450(%r14,%rbx) + QUAD $0x01e21e84117dc1c4; WORD $0x0000 // vmovupd %ymm0, $482(%r14,%rbx) + ADDQ $512, BX + ADDQ $4, CX + JNE LBB5_114 + +LBB5_115: + TESTQ SI, SI + JE LBB5_118 + LEAQ 98(BX)(R14*1), CX + NEGQ SI + QUAD $0xfffff4e90528fdc5 // vmovapd $-2839(%rip), %ymm0 /* LCPI5_3(%rip) */ + +LBB5_117: + LONG $0x4111fdc5; BYTE $0xa0 // vmovupd %ymm0, $-96(%rcx) + LONG $0x4111fdc5; BYTE $0xc0 // vmovupd %ymm0, $-64(%rcx) + LONG $0x4111fdc5; BYTE $0xe0 // vmovupd %ymm0, $-32(%rcx) + LONG $0x0111fdc5 // vmovupd %ymm0, (%rcx) + SUBQ $-128, CX + INCQ SI + JNE LBB5_117 + +LBB5_118: + CMPQ R8, DX + JE LBB5_87 + SUBL DX, R11 + ADDQ DX, DI + +LBB5_121: + MOVL R11, CX + XORL DX, DX + +LBB5_122: + MOVB $48, 0(DI)(DX*1) + INCQ DX + CMPL CX, DX + JNE LBB5_122 + JMP LBB5_87 + +LBB5_124: + ADDL $3, AX + MOVL DI, DX + IMUL3Q $1374389535, DX, DX + SHRQ $37, DX + LEAL 48(DX), SI + MOVB SI, 0(CX) + WORD $0xd26b; BYTE $0x64 // imull $100, %edx, %edx + SUBL DX, DI + LONG $0x0c158d48; WORD $0x002e; BYTE $0x00 // leaq $11788(%rip), %rdx /* _Digits(%rip) */ + MOVB 0(DX)(DI*2), SI + MOVB 1(DX)(DI*2), DX + MOVB SI, 1(CX) + MOVB DX, 2(CX) + JMP LBB5_88 + +LBB5_125: + MOVQ $-1, R14 + +LBB5_126: + TESTB $1, R9 + JE LBB5_128 + ADDQ BX, R14 + MOVQ R14, CX + SUBQ R10, CX + MOVQ -56(BP), DX + LONG $0x4410fdc5; WORD $0x810a // vmovupd $-127(%rdx,%rcx), %ymm0 + LONG $0x4c10fdc5; WORD $0xa10a // vmovupd $-95(%rdx,%rcx), %ymm1 + LONG $0x5410fcc5; WORD $0xc10a // vmovups $-63(%rdx,%rcx), %ymm2 + LONG $0x5c10fcc5; WORD $0xe10a // vmovups $-31(%rdx,%rcx), %ymm3 + LONG $0x117ca1c4; WORD $0x325c; BYTE $0xe1 // vmovups %ymm3, $-31(%rdx,%r14) + LONG $0x117ca1c4; WORD $0x3254; BYTE $0xc1 // vmovups %ymm2, $-63(%rdx,%r14) + LONG $0x117da1c4; WORD $0x324c; BYTE $0xa1 // vmovupd %ymm1, $-95(%rdx,%r14) + LONG $0x117da1c4; WORD $0x3244; BYTE $0x81 // vmovupd %ymm0, $-127(%rdx,%r14) + +LBB5_128: + CMPQ R8, BX + MOVQ -56(BP), R14 + JE LBB5_68 + ANDL $127, BX + JMP LBB5_66 + +// .set L5_0_set_29, LBB5_29-LJTI5_0 +// .set L5_0_set_19, LBB5_19-LJTI5_0 +// .set L5_0_set_20, LBB5_20-LJTI5_0 +// .set L5_0_set_21, LBB5_21-LJTI5_0 +// .set L5_0_set_22, LBB5_22-LJTI5_0 +// .set L5_0_set_23, LBB5_23-LJTI5_0 +// .set L5_0_set_24, LBB5_24-LJTI5_0 +// .set L5_0_set_25, LBB5_25-LJTI5_0 +// .set L5_0_set_26, LBB5_26-LJTI5_0 +// .set L5_0_set_27, LBB5_27-LJTI5_0 +LJTI5_0: + LONG $0xfffff7e7 // .long L5_0_set_29 + LONG $0xfffff6c1 // .long L5_0_set_19 + LONG $0xfffff6e2 // .long L5_0_set_20 + LONG $0xfffff703 // .long L5_0_set_21 + LONG $0xfffff727 // .long L5_0_set_22 + LONG $0xfffff747 // .long L5_0_set_23 + LONG $0xfffff768 // .long L5_0_set_24 + LONG $0xfffff785 // .long L5_0_set_25 + LONG $0xfffff7a2 // .long L5_0_set_26 + LONG $0xfffff7bf // .long L5_0_set_27 + +_i64toa: + TESTQ SI, SI + JS LBB6_1 + JMP _u64toa + +LBB6_1: + BYTE $0x55 // pushq %rbp + WORD $0x8948; BYTE $0xe5 // movq %rsp, %rbp + MOVB $45, 0(DI) + INCQ DI + NEGQ SI + LONG $0x000044e8; BYTE $0x00 // callq _u64toa + INCL AX + BYTE $0x5d // popq %rbp + RET + +LCPI7_0: + QUAD $0x00000000d1b71759 // .quad 3518437209 + QUAD $0x00000000d1b71759 // .quad 3518437209 + +LCPI7_3: + WORD $0x000a // .word 10 + WORD $0x000a // .word 10 + WORD $0x000a // .word 10 + WORD $0x000a // .word 10 + WORD $0x000a // .word 10 + WORD $0x000a // .word 10 + WORD $0x000a // .word 10 + WORD $0x000a // .word 10 + +LCPI7_4: + QUAD $0x3030303030303030; QUAD $0x3030303030303030 // .space 16, '0000000000000000' + +LCPI7_1: + QUAD $0x80003334147b20c5 // .quad -9223315738079846203 + +LCPI7_2: + QUAD $0x8000200008000080 // .quad -9223336852348469120 + +_u64toa: + BYTE $0x55 // pushq %rbp + WORD $0x8948; BYTE $0xe5 // movq %rsp, %rbp + CMPQ SI, $9999 + JA LBB7_8 + MOVWLZX SI, AX + SHRL $2, AX + LONG $0x147bc069; WORD $0x0000 // imull $5243, %eax, %eax + SHRL $17, AX + LEAQ 0(AX)(AX*1), DX + WORD $0xc06b; BYTE $0x64 // imull $100, %eax, %eax + MOVL SI, CX + SUBL AX, CX + MOVWLZX CX, AX + ADDQ AX, AX + CMPL SI, $1000 + JB LBB7_3 + LONG $0xd00d8d48; WORD $0x002c; BYTE $0x00 // leaq $11472(%rip), %rcx /* _Digits(%rip) */ + MOVB 0(DX)(CX*1), CX + MOVB CX, 0(DI) + MOVL $1, CX + JMP LBB7_4 + +LBB7_3: + XORL CX, CX + CMPL SI, $100 + JB LBB7_5 + +LBB7_4: + MOVWLZX DX, DX + ORQ $1, DX + LONG $0xaf358d48; WORD $0x002c; BYTE $0x00 // leaq $11439(%rip), %rsi /* _Digits(%rip) */ + MOVB 0(DX)(SI*1), DX + MOVL CX, SI + INCL CX + MOVB DX, 0(DI)(SI*1) + +LBB7_6: + LONG $0x9e158d48; WORD $0x002c; BYTE $0x00 // leaq $11422(%rip), %rdx /* _Digits(%rip) */ + MOVB 0(AX)(DX*1), DX + MOVL CX, SI + INCL CX + MOVB DX, 0(DI)(SI*1) + +LBB7_7: + MOVWLZX AX, AX + ORQ $1, AX + LONG $0x86158d48; WORD $0x002c; BYTE $0x00 // leaq $11398(%rip), %rdx /* _Digits(%rip) */ + MOVB 0(AX)(DX*1), AX + MOVL CX, DX + INCL CX + MOVB AX, 0(DI)(DX*1) + MOVL CX, AX + BYTE $0x5d // popq %rbp + RET + +LBB7_5: + XORL CX, CX + CMPL SI, $10 + JAE LBB7_6 + JMP LBB7_7 + +LBB7_8: + CMPQ SI, $99999999 + JA LBB7_16 + MOVL SI, AX + MOVL $3518437209, DX + IMULQ AX, DX + SHRQ $45, DX + LONG $0x10c26944; WORD $0x0027; BYTE $0x00 // imull $10000, %edx, %r8d + MOVL SI, CX + SUBL R8, CX + IMUL3Q $1125899907, AX, R10 + SHRQ $49, R10 + ANDL $-2, R10 + MOVWLZX DX, AX + SHRL $2, AX + LONG $0x147bc069; WORD $0x0000 // imull $5243, %eax, %eax + SHRL $17, AX + WORD $0xc06b; BYTE $0x64 // imull $100, %eax, %eax + SUBL AX, DX + MOVWLZX DX, R9 + ADDQ R9, R9 + MOVWLZX CX, AX + SHRL $2, AX + LONG $0x147bc069; WORD $0x0000 // imull $5243, %eax, %eax + SHRL $17, AX + LEAQ 0(AX)(AX*1), R8 + WORD $0xc06b; BYTE $0x64 // imull $100, %eax, %eax + SUBL AX, CX + MOVWLZX CX, R11 + ADDQ R11, R11 + CMPL SI, $10000000 + JB LBB7_11 + LONG $0xef058d48; WORD $0x002b; BYTE $0x00 // leaq $11247(%rip), %rax /* _Digits(%rip) */ + MOVB 0(R10)(AX*1), AX + MOVB AX, 0(DI) + MOVL $1, CX + JMP LBB7_12 + +LBB7_11: + XORL CX, CX + CMPL SI, $1000000 + JB LBB7_13 + +LBB7_12: + MOVL R10, AX + ORQ $1, AX + LONG $0xca358d48; WORD $0x002b; BYTE $0x00 // leaq $11210(%rip), %rsi /* _Digits(%rip) */ + MOVB 0(AX)(SI*1), AX + MOVL CX, SI + INCL CX + MOVB AX, 0(DI)(SI*1) + +LBB7_14: + LONG $0xb9058d48; WORD $0x002b; BYTE $0x00 // leaq $11193(%rip), %rax /* _Digits(%rip) */ + MOVB 0(R9)(AX*1), AX + MOVL CX, SI + INCL CX + MOVB AX, 0(DI)(SI*1) + +LBB7_15: + MOVWLZX R9, AX + ORQ $1, AX + LONG $0x9f358d48; WORD $0x002b; BYTE $0x00 // leaq $11167(%rip), %rsi /* _Digits(%rip) */ + MOVB 0(AX)(SI*1), AX + MOVL CX, DX + MOVB AX, 0(DI)(DX*1) + MOVB 0(R8)(SI*1), AX + MOVB AX, 1(DI)(DX*1) + MOVWLZX R8, AX + ORQ $1, AX + MOVB 0(AX)(SI*1), AX + MOVB AX, 2(DI)(DX*1) + MOVB 0(R11)(SI*1), AX + MOVB AX, 3(DI)(DX*1) + MOVWLZX R11, AX + ORQ $1, AX + MOVB 0(AX)(SI*1), AX + ADDL $5, CX + MOVB AX, 4(DI)(DX*1) + MOVL CX, AX + BYTE $0x5d // popq %rbp + RET + +LBB7_13: + XORL CX, CX + CMPL SI, $100000 + JAE LBB7_14 + JMP LBB7_15 + +LBB7_16: + MOVQ $9999999999999999, AX + CMPQ SI, AX + JA LBB7_18 + MOVQ $-6067343680855748867, CX + MOVQ SI, AX + MULQ CX + SHRQ $26, DX + LONG $0xe100c269; WORD $0x05f5 // imull $100000000, %edx, %eax + SUBL AX, SI + LONG $0xc26ef9c5 // vmovd %edx, %xmm0 + QUAD $0xfffffdcb0d6ffac5 // vmovdqu $-565(%rip), %xmm1 /* LCPI7_0(%rip) */ + LONG $0xd1f4f9c5 // vpmuludq %xmm1, %xmm0, %xmm2 + LONG $0xd273e9c5; BYTE $0x2d // vpsrlq $45, %xmm2, %xmm2 + MOVL $10000, AX + LONG $0x6ef9e1c4; BYTE $0xd8 // vmovq %rax, %xmm3 + LONG $0xe3f4e9c5 // vpmuludq %xmm3, %xmm2, %xmm4 + LONG $0xc4faf9c5 // vpsubd %xmm4, %xmm0, %xmm0 + LONG $0xc061e9c5 // vpunpcklwd %xmm0, %xmm2, %xmm0 + LONG $0xf073f9c5; BYTE $0x02 // vpsllq $2, %xmm0, %xmm0 + LONG $0xc070fbc5; BYTE $0x50 // vpshuflw $80, %xmm0, %xmm0 + LONG $0xc070f9c5; BYTE $0x50 // vpshufd $80, %xmm0, %xmm0 + QUAD $0xfffffdc51512fbc5 // vmovddup $-571(%rip), %xmm2 /* LCPI7_1(%rip) */ + LONG $0xc2e4f9c5 // vpmulhuw %xmm2, %xmm0, %xmm0 + QUAD $0xfffffdc12512fbc5 // vmovddup $-575(%rip), %xmm4 /* LCPI7_2(%rip) */ + LONG $0xc4e4f9c5 // vpmulhuw %xmm4, %xmm0, %xmm0 + QUAD $0xfffffd8d2d6ffac5 // vmovdqu $-627(%rip), %xmm5 /* LCPI7_3(%rip) */ + LONG $0xf5d5f9c5 // vpmullw %xmm5, %xmm0, %xmm6 + LONG $0xf673c9c5; BYTE $0x10 // vpsllq $16, %xmm6, %xmm6 + LONG $0xc6f9f9c5 // vpsubw %xmm6, %xmm0, %xmm0 + LONG $0xf66ef9c5 // vmovd %esi, %xmm6 + LONG $0xc9f4c9c5 // vpmuludq %xmm1, %xmm6, %xmm1 + LONG $0xd173f1c5; BYTE $0x2d // vpsrlq $45, %xmm1, %xmm1 + LONG $0xdbf4f1c5 // vpmuludq %xmm3, %xmm1, %xmm3 + LONG $0xdbfac9c5 // vpsubd %xmm3, %xmm6, %xmm3 + LONG $0xcb61f1c5 // vpunpcklwd %xmm3, %xmm1, %xmm1 + LONG $0xf173f1c5; BYTE $0x02 // vpsllq $2, %xmm1, %xmm1 + LONG $0xc970fbc5; BYTE $0x50 // vpshuflw $80, %xmm1, %xmm1 + LONG $0xc970f9c5; BYTE $0x50 // vpshufd $80, %xmm1, %xmm1 + LONG $0xcae4f1c5 // vpmulhuw %xmm2, %xmm1, %xmm1 + LONG $0xcce4f1c5 // vpmulhuw %xmm4, %xmm1, %xmm1 + LONG $0xd5d5f1c5 // vpmullw %xmm5, %xmm1, %xmm2 + LONG $0xf273e9c5; BYTE $0x10 // vpsllq $16, %xmm2, %xmm2 + LONG $0xcaf9f1c5 // vpsubw %xmm2, %xmm1, %xmm1 + LONG $0xc167f9c5 // vpackuswb %xmm1, %xmm0, %xmm0 + QUAD $0xfffffd470dfcf9c5 // vpaddb $-697(%rip), %xmm0, %xmm1 /* LCPI7_4(%rip) */ + LONG $0xd2efe9c5 // vpxor %xmm2, %xmm2, %xmm2 + LONG $0xc274f9c5 // vpcmpeqb %xmm2, %xmm0, %xmm0 + LONG $0xc0d7f9c5 // vpmovmskb %xmm0, %eax + ORL $32768, AX + XORL $-32769, AX + BSFL AX, AX + MOVL $16, CX + SUBL AX, CX + SHLQ $4, AX + LONG $0x14158d48; WORD $0x002b; BYTE $0x00 // leaq $11028(%rip), %rdx /* _VecShiftShuffles(%rip) */ + LONG $0x0071e2c4; WORD $0x1004 // vpshufb (%rax,%rdx), %xmm1, %xmm0 + LONG $0x077ffac5 // vmovdqu %xmm0, (%rdi) + MOVL CX, AX + BYTE $0x5d // popq %rbp + RET + +LBB7_18: + MOVQ $4153837486827862103, CX + MOVQ SI, AX + MULQ CX + SHRQ $51, DX + MOVQ $10000000000000000, AX + IMULQ DX, AX + SUBQ AX, SI + CMPL DX, $9 + JA LBB7_20 + ADDB $48, DX + MOVB DX, 0(DI) + MOVL $1, CX + JMP LBB7_25 + +LBB7_20: + CMPL DX, $99 + JA LBB7_22 + MOVL DX, AX + LONG $0xf70d8d48; WORD $0x0029; BYTE $0x00 // leaq $10743(%rip), %rcx /* _Digits(%rip) */ + MOVB 0(CX)(AX*2), DX + MOVB 1(CX)(AX*2), AX + MOVB DX, 0(DI) + MOVB AX, 1(DI) + MOVL $2, CX + JMP LBB7_25 + +LBB7_22: + MOVL DX, AX + SHRL $2, AX + LONG $0x147bc069; WORD $0x0000 // imull $5243, %eax, %eax + SHRL $17, AX + CMPL DX, $999 + JA LBB7_24 + ADDL $48, AX + MOVB AX, 0(DI) + MOVWLZX DX, AX + MOVL AX, CX + SHRL $2, CX + LONG $0x147bc969; WORD $0x0000 // imull $5243, %ecx, %ecx + SHRL $17, CX + WORD $0xc96b; BYTE $0x64 // imull $100, %ecx, %ecx + SUBL CX, AX + MOVWLZX AX, AX + LONG $0xa60d8d48; WORD $0x0029; BYTE $0x00 // leaq $10662(%rip), %rcx /* _Digits(%rip) */ + MOVB 0(CX)(AX*2), DX + MOVB 1(CX)(AX*2), AX + MOVB DX, 1(DI) + MOVB AX, 2(DI) + MOVL $3, CX + JMP LBB7_25 + +LBB7_24: + WORD $0xc86b; BYTE $0x64 // imull $100, %eax, %ecx + SUBL CX, DX + MOVWLZX AX, AX + LONG $0x83058d4c; WORD $0x0029; BYTE $0x00 // leaq $10627(%rip), %r8 /* _Digits(%rip) */ + MOVB 0(R8)(AX*2), CX + MOVB 1(R8)(AX*2), AX + MOVB CX, 0(DI) + MOVB AX, 1(DI) + MOVWLZX DX, AX + MOVB 0(R8)(AX*2), CX + ADDQ AX, AX + MOVB CX, 2(DI) + ORL $1, AX + MOVWLZX AX, AX + MOVB 0(AX)(R8*1), AX + MOVB AX, 3(DI) + MOVL $4, CX + +LBB7_25: + MOVQ $-6067343680855748867, DX + MOVQ SI, AX + MULQ DX + SHRQ $26, DX + LONG $0xc26ef9c5 // vmovd %edx, %xmm0 + QUAD $0xfffffbe60d6ffac5 // vmovdqu $-1050(%rip), %xmm1 /* LCPI7_0(%rip) */ + LONG $0xd1f4f9c5 // vpmuludq %xmm1, %xmm0, %xmm2 + LONG $0xd273e9c5; BYTE $0x2d // vpsrlq $45, %xmm2, %xmm2 + MOVL $10000, AX + LONG $0x6ef9e1c4; BYTE $0xd8 // vmovq %rax, %xmm3 + LONG $0xe3f4e9c5 // vpmuludq %xmm3, %xmm2, %xmm4 + LONG $0xc4faf9c5 // vpsubd %xmm4, %xmm0, %xmm0 + LONG $0xc061e9c5 // vpunpcklwd %xmm0, %xmm2, %xmm0 + LONG $0xf073f9c5; BYTE $0x02 // vpsllq $2, %xmm0, %xmm0 + LONG $0xc070fbc5; BYTE $0x50 // vpshuflw $80, %xmm0, %xmm0 + LONG $0xc070f9c5; BYTE $0x50 // vpshufd $80, %xmm0, %xmm0 + QUAD $0xfffffbe01512fbc5 // vmovddup $-1056(%rip), %xmm2 /* LCPI7_1(%rip) */ + LONG $0xc2e4f9c5 // vpmulhuw %xmm2, %xmm0, %xmm0 + QUAD $0xfffffbdc2512fbc5 // vmovddup $-1060(%rip), %xmm4 /* LCPI7_2(%rip) */ + LONG $0xc4e4f9c5 // vpmulhuw %xmm4, %xmm0, %xmm0 + QUAD $0xfffffba82d6ffac5 // vmovdqu $-1112(%rip), %xmm5 /* LCPI7_3(%rip) */ + LONG $0xf5d5f9c5 // vpmullw %xmm5, %xmm0, %xmm6 + LONG $0xf673c9c5; BYTE $0x10 // vpsllq $16, %xmm6, %xmm6 + LONG $0xc6f9f9c5 // vpsubw %xmm6, %xmm0, %xmm0 + LONG $0xe100c269; WORD $0x05f5 // imull $100000000, %edx, %eax + SUBL AX, SI + LONG $0xf66ef9c5 // vmovd %esi, %xmm6 + LONG $0xc9f4c9c5 // vpmuludq %xmm1, %xmm6, %xmm1 + LONG $0xd173f1c5; BYTE $0x2d // vpsrlq $45, %xmm1, %xmm1 + LONG $0xdbf4f1c5 // vpmuludq %xmm3, %xmm1, %xmm3 + LONG $0xdbfac9c5 // vpsubd %xmm3, %xmm6, %xmm3 + LONG $0xcb61f1c5 // vpunpcklwd %xmm3, %xmm1, %xmm1 + LONG $0xf173f1c5; BYTE $0x02 // vpsllq $2, %xmm1, %xmm1 + LONG $0xc970fbc5; BYTE $0x50 // vpshuflw $80, %xmm1, %xmm1 + LONG $0xc970f9c5; BYTE $0x50 // vpshufd $80, %xmm1, %xmm1 + LONG $0xcae4f1c5 // vpmulhuw %xmm2, %xmm1, %xmm1 + LONG $0xcce4f1c5 // vpmulhuw %xmm4, %xmm1, %xmm1 + LONG $0xd5d5f1c5 // vpmullw %xmm5, %xmm1, %xmm2 + LONG $0xf273e9c5; BYTE $0x10 // vpsllq $16, %xmm2, %xmm2 + LONG $0xcaf9f1c5 // vpsubw %xmm2, %xmm1, %xmm1 + LONG $0xc167f9c5 // vpackuswb %xmm1, %xmm0, %xmm0 + QUAD $0xfffffb5a05fcf9c5 // vpaddb $-1190(%rip), %xmm0, %xmm0 /* LCPI7_4(%rip) */ + MOVL CX, AX + LONG $0x047ffac5; BYTE $0x07 // vmovdqu %xmm0, (%rdi,%rax) + ORL $16, CX + MOVL CX, AX + BYTE $0x5d // popq %rbp + RET + +LCPI8_0: + QUAD $0x5c5c5c5c5c5c5c5c; QUAD $0x5c5c5c5c5c5c5c5c // .space 16, '\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' + +_unquote: + BYTE $0x55 // pushq %rbp + WORD $0x8948; BYTE $0xe5 // movq %rsp, %rbp + WORD $0x5741 // pushq %r15 + WORD $0x5641 // pushq %r14 + WORD $0x5541 // pushq %r13 + WORD $0x5441 // pushq %r12 + BYTE $0x53 // pushq %rbx + SUBQ $16, SP + TESTQ SI, SI + JE LBB8_1 + MOVQ CX, -48(BP) + MOVQ R8, AX + MOVQ R8, -56(BP) + MOVL R8, R10 + ANDL $1, R10 + LONG $0x91058d4c; WORD $0x0029; BYTE $0x00 // leaq $10641(%rip), %r8 /* __UnquoteTab(%rip) */ + QUAD $0xffffffb5056ffac5 // vmovdqu $-75(%rip), %xmm0 /* LCPI8_0(%rip) */ + MOVQ DI, R9 + MOVQ SI, R14 + MOVQ DX, AX + +LBB8_3: + CMPB 0(R9), $92 + JNE LBB8_5 + XORL R15, R15 + JMP LBB8_15 + +LBB8_5: + MOVQ R14, R11 + MOVQ AX, R15 + MOVQ R9, R12 + CMPQ R14, $16 + JL LBB8_6 + +LBB8_11: + LONG $0x6f7ac1c4; WORD $0x240c // vmovdqu (%r12), %xmm1 + LONG $0x7f7ac1c4; BYTE $0x0f // vmovdqu %xmm1, (%r15) + LONG $0xc874f1c5 // vpcmpeqb %xmm0, %xmm1, %xmm1 + LONG $0xd9d7f9c5 // vpmovmskb %xmm1, %ebx + TESTW BX, BX + JNE LBB8_12 + ADDQ $16, R12 + ADDQ $16, R15 + LEAQ -16(R11), CX + CMPQ R11, $31 + MOVQ CX, R11 + JG LBB8_11 + +LBB8_6: + TESTQ R11, R11 + JE LBB8_90 + XORL BX, BX + +LBB8_8: + MOVBLZX 0(R12)(BX*1), CX + CMPB CX, $92 + JE LBB8_13 + MOVB CX, 0(R15)(BX*1) + INCQ BX + CMPQ R11, BX + JNE LBB8_8 + JMP LBB8_90 + +LBB8_13: + ADDQ BX, R12 + SUBQ R9, R12 + MOVQ R12, R15 + CMPQ R15, $-1 + JNE LBB8_15 + JMP LBB8_90 + +LBB8_12: + MOVWLZX BX, CX + SUBQ R9, R12 + ORQ $65536, CX + BSFQ CX, R15 + ADDQ R12, R15 + CMPQ R15, $-1 + JE LBB8_90 + +LBB8_15: + LEAQ 2(R15), CX + SUBQ CX, R14 + JS LBB8_16 + LEAQ 2(R9)(R15*1), R9 + TESTQ R10, R10 + JNE LBB8_18 + +LBB8_29: + ADDQ R15, AX + MOVBLZX -1(R9), CX + MOVB 0(CX)(R8*1), CX + CMPB CX, $-1 + JE LBB8_33 + TESTB CX, CX + JE LBB8_31 + MOVB CX, 0(AX) + INCQ AX + JMP LBB8_88 + +LBB8_33: + CMPQ R14, $3 + JLE LBB8_16 + MOVL 0(R9), R15 + MOVL R15, CX + NOTL CX + LEAL -808464432(R15), BX + ANDL $-2139062144, CX + TESTL BX, CX + JNE LBB8_38 + LEAL 421075225(R15), BX + ORL R15, BX + TESTL $-2139062144, BX + JNE LBB8_38 + MOVL R15, BX + ANDL $2139062143, BX + MOVL $-1061109568, R11 + SUBL BX, R11 + LEAL 1179010630(BX), R12 + ANDL CX, R11 + TESTL R12, R11 + JNE LBB8_38 + MOVL $-522133280, R11 + SUBL BX, R11 + ADDL $960051513, BX + ANDL R11, CX + TESTL BX, CX + JNE LBB8_38 + BSWAPL R15 + MOVL R15, CX + SHRL $4, CX + NOTL CX + ANDL $16843009, CX + LEAL 0(CX)(CX*8), CX + ANDL $252645135, R15 + ADDL CX, R15 + MOVL R15, R11 + SHRL $4, R11 + ORL R15, R11 + MOVL R11, CX + SHRL $8, CX + ANDL $65280, CX + MOVBLZX R11, R13 + ORL CX, R13 + LEAQ 4(R9), R15 + LEAQ -4(R14), R12 + CMPL R13, $127 + JBE LBB8_48 + CMPL R13, $2047 + JBE LBB8_51 + MOVL R11, BX + ANDL $16252928, BX + CMPL BX, $14155776 + JE LBB8_54 + SHRL $12, CX + ORB $-32, CX + MOVB CX, 0(AX) + SHRL $6, R13 + ANDB $63, R13 + ORB $-128, R13 + MOVB R13, 1(AX) + ANDB $63, R11 + ORB $-128, R11 + MOVB R11, 2(AX) + ADDQ $3, AX + +LBB8_49: + MOVQ R12, R14 + MOVQ R15, R9 + +LBB8_88: + TESTQ R14, R14 + JNE LBB8_3 + JMP LBB8_89 + +LBB8_18: + TESTL R14, R14 + JE LBB8_16 + CMPB -1(R9), $92 + JNE LBB8_20 + CMPB 0(R9), $92 + JNE LBB8_28 + CMPL R14, $1 + JLE LBB8_16 + MOVB 1(R9), CX + CMPB CX, $34 + JE LBB8_27 + CMPB CX, $92 + JNE LBB8_26 + +LBB8_27: + INCQ R9 + DECQ R14 + +LBB8_28: + INCQ R9 + DECQ R14 + JMP LBB8_29 + +LBB8_48: + MOVB R11, 0(AX) + INCQ AX + JMP LBB8_49 + +LBB8_51: + SHRL $6, R13 + ORB $-64, R13 + MOVB R13, 0(AX) + ANDB $63, R11 + ORB $-128, R11 + MOVB R11, 1(AX) + ADDQ $2, AX + JMP LBB8_49 + +LBB8_54: + TESTQ R10, R10 + JNE LBB8_56 + MOVQ R12, R14 + MOVQ R15, R9 + JMP LBB8_60 + +LBB8_56: + CMPQ R14, $4 + JLE LBB8_16 + CMPB 0(R15), $92 + JNE LBB8_58 + ADDQ $-5, R14 + ADDQ $5, R9 + +LBB8_60: + CMPL R13, $56319 + JA LBB8_64 + CMPQ R14, $6 + JL LBB8_64 + CMPB 0(R9), $92 + JNE LBB8_64 + CMPB 1(R9), $117 + JNE LBB8_64 + MOVL 2(R9), R11 + MOVL R11, CX + NOTL CX + LEAL -808464432(R11), BX + ANDL $-2139062144, CX + TESTL BX, CX + JNE LBB8_72 + LEAL 421075225(R11), BX + ORL R11, BX + TESTL $-2139062144, BX + JNE LBB8_72 + MOVL R11, BX + ANDL $2139062143, BX + MOVL $-1061109568, R15 + SUBL BX, R15 + LEAL 1179010630(BX), R12 + ANDL CX, R15 + TESTL R12, R15 + JNE LBB8_72 + MOVL $-522133280, R15 + SUBL BX, R15 + ADDL $960051513, BX + ANDL R15, CX + TESTL BX, CX + JNE LBB8_72 + BSWAPL R11 + MOVL R11, CX + SHRL $4, CX + NOTL CX + ANDL $16843009, CX + LEAL 0(CX)(CX*8), CX + ANDL $252645135, R11 + ADDL CX, R11 + MOVL R11, BX + SHRL $4, BX + ORL R11, BX + ADDQ $6, R9 + ADDQ $-6, R14 + MOVL BX, CX + ANDL $16515072, CX + CMPL CX, $14417920 + JNE LBB8_82 + MOVL BX, CX + SHRL $8, CX + ANDL $65280, CX + MOVBLZX BX, BX + ORL CX, BX + SHLL $10, R13 + LEAL -56613888(R13)(BX*1), BX + CMPL BX, $1114112 + JB LBB8_87 + TESTB $2, -56(BP) + JE LBB8_83 + JMP LBB8_65 + +LBB8_64: + TESTB $2, -56(BP) + JE LBB8_66 + +LBB8_65: + MOVW $-16401, 0(AX) + MOVB $-67, 2(AX) + ADDQ $3, AX + JMP LBB8_88 + +LBB8_82: + TESTB $2, -56(BP) + JE LBB8_83 + MOVL $-272777233, 0(AX) + MOVW $-16961, 4(AX) + ADDQ $6, AX + JMP LBB8_88 + +LBB8_87: + MOVL BX, CX + SHRL $18, CX + ORB $-16, CX + MOVB CX, 0(AX) + MOVL BX, CX + SHRL $12, CX + ANDB $63, CX + ORB $-128, CX + MOVB CX, 1(AX) + MOVL BX, CX + SHRL $6, CX + ANDB $63, CX + ORB $-128, CX + MOVB CX, 2(AX) + ANDB $63, BX + ORB $-128, BX + MOVB BX, 3(AX) + ADDQ $4, AX + JMP LBB8_88 + +LBB8_1: + XORL R14, R14 + MOVQ DX, AX + +LBB8_90: + ADDQ R14, AX + SUBQ DX, AX + +LBB8_91: + ADDQ $16, SP + BYTE $0x5b // popq %rbx + WORD $0x5c41 // popq %r12 + WORD $0x5d41 // popq %r13 + WORD $0x5e41 // popq %r14 + WORD $0x5f41 // popq %r15 + BYTE $0x5d // popq %rbp + RET + +LBB8_38: + MOVQ R9, DX + SUBQ DI, DX + MOVQ -48(BP), DI + MOVQ DX, 0(DI) + MOVB 0(R9), CX + LEAL -48(CX), SI + MOVQ $-2, AX + CMPB SI, $10 + JB LBB8_40 + ANDB $-33, CX + ADDB $-65, CX + CMPB CX, $5 + JA LBB8_91 + +LBB8_40: + LEAQ 1(DX), CX + MOVQ CX, 0(DI) + MOVB 1(R9), CX + LEAL -48(CX), SI + CMPB SI, $10 + JB LBB8_42 + ANDB $-33, CX + ADDB $-65, CX + CMPB CX, $5 + JA LBB8_91 + +LBB8_42: + LEAQ 2(DX), CX + MOVQ CX, 0(DI) + MOVB 2(R9), CX + LEAL -48(CX), SI + CMPB SI, $10 + JB LBB8_44 + ANDB $-33, CX + ADDB $-65, CX + CMPB CX, $5 + JA LBB8_91 + +LBB8_44: + LEAQ 3(DX), CX + MOVQ CX, 0(DI) + MOVB 3(R9), CX + LEAL -48(CX), SI + CMPB SI, $10 + JB LBB8_46 + ANDB $-33, CX + ADDB $-65, CX + CMPB CX, $5 + JA LBB8_91 + +LBB8_46: + ADDQ $4, DX + MOVQ DX, 0(DI) + JMP LBB8_91 + +LBB8_16: + MOVQ -48(BP), AX + MOVQ SI, 0(AX) + MOVQ $-1, AX + JMP LBB8_91 + +LBB8_31: + NOTQ DI + ADDQ DI, R9 + MOVQ -48(BP), AX + MOVQ R9, 0(AX) + MOVQ $-3, AX + JMP LBB8_91 + +LBB8_89: + XORL R14, R14 + JMP LBB8_90 + +LBB8_83: + SUBQ DI, R9 + ADDQ $-4, R9 + +LBB8_67: + MOVQ -48(BP), AX + MOVQ R9, 0(AX) + MOVQ $-4, AX + JMP LBB8_91 + +LBB8_20: + NOTQ DI + ADDQ DI, R9 + JMP LBB8_21 + +LBB8_26: + SUBQ DI, R9 + INCQ R9 + +LBB8_21: + MOVQ -48(BP), AX + MOVQ R9, 0(AX) + MOVQ $-2, AX + JMP LBB8_91 + +LBB8_58: + SUBQ DI, R15 + ADDQ $-4, R15 + MOVQ -48(BP), AX + MOVQ R15, 0(AX) + MOVQ $-4, AX + JMP LBB8_91 + +LBB8_72: + MOVQ R9, DX + SUBQ DI, DX + ADDQ $2, DX + MOVQ -48(BP), AX + MOVQ DX, 0(AX) + MOVB 2(R9), CX + LEAL -48(CX), SI + MOVQ $-2, AX + CMPB SI, $10 + JB LBB8_74 + ANDB $-33, CX + ADDB $-65, CX + CMPB CX, $5 + JA LBB8_91 + +LBB8_74: + LEAQ 1(DX), CX + MOVQ -48(BP), SI + MOVQ CX, 0(SI) + MOVB 3(R9), CX + LEAL -48(CX), SI + CMPB SI, $10 + JB LBB8_76 + ANDB $-33, CX + ADDB $-65, CX + CMPB CX, $5 + JA LBB8_91 + +LBB8_76: + LEAQ 2(DX), CX + MOVQ -48(BP), SI + MOVQ CX, 0(SI) + MOVB 4(R9), CX + LEAL -48(CX), SI + CMPB SI, $10 + JB LBB8_78 + ANDB $-33, CX + ADDB $-65, CX + CMPB CX, $5 + JA LBB8_91 + +LBB8_78: + LEAQ 3(DX), CX + MOVQ -48(BP), SI + MOVQ CX, 0(SI) + MOVB 5(R9), CX + LEAL -48(CX), SI + CMPB SI, $10 + JB LBB8_80 + ANDB $-33, CX + ADDB $-65, CX + CMPB CX, $5 + JA LBB8_91 + +LBB8_80: + ADDQ $4, DX + MOVQ -48(BP), CX + MOVQ DX, 0(CX) + JMP LBB8_91 + +LBB8_66: + LEAQ 4(R10)(DI*1), AX + SUBQ AX, R9 + JMP LBB8_67 + +_value: + BYTE $0x55 // pushq %rbp + WORD $0x8948; BYTE $0xe5 // movq %rsp, %rbp + WORD $0x5741 // pushq %r15 + WORD $0x5641 // pushq %r14 + BYTE $0x53 // pushq %rbx + SUBQ $24, SP + MOVQ CX, R14 + MOVQ SI, BX + MOVQ DI, R15 + MOVQ DI, -48(BP) + MOVQ SI, -40(BP) + LONG $0xffe27ae8; BYTE $0xff // callq _lspace + MOVQ AX, -32(BP) + CMPQ AX, BX + JAE LBB9_4 + LEAQ 1(AX), CX + MOVQ CX, -32(BP) + MOVBLSX 0(R15)(AX*1), DX + CMPL DX, $123 + JA LBB9_6 + LONG $0xa7358d48; WORD $0x0001; BYTE $0x00 // leaq $423(%rip), %rsi /* LJTI9_0(%rip) */ + MOVLQSX 0(SI)(DX*4), DX + ADDQ SI, DX + JMP DX + +LBB9_3: + MOVQ AX, -32(BP) + LEAQ -48(BP), DI + LEAQ -32(BP), SI + MOVQ R14, DX + LONG $0x0008c8e8; BYTE $0x00 // callq _vnumber + JMP LBB9_5 + +LBB9_4: + MOVQ $1, 0(R14) + +LBB9_5: + MOVQ -32(BP), AX + ADDQ $24, SP + BYTE $0x5b // popq %rbx + WORD $0x5e41 // popq %r14 + WORD $0x5f41 // popq %r15 + BYTE $0x5d // popq %rbp + RET + +LBB9_6: + MOVQ $-2, 0(R14) + JMP LBB9_5 + +LBB9_7: + LEAQ -48(BP), DI + LEAQ -32(BP), SI + MOVQ R14, DX + LONG $0x000359e8; BYTE $0x00 // callq _vstring + JMP LBB9_5 + +LBB9_8: + LEAQ -4(BX), DX + CMPQ AX, DX + JAE LBB9_19 + MOVL 0(R15)(CX*1), DX + CMPL DX, $1702063201 + JNE LBB9_21 + ADDQ $5, AX + MOVQ AX, -32(BP) + MOVL $4, AX + MOVQ AX, 0(R14) + JMP LBB9_5 + +LBB9_11: + LEAQ -3(BX), CX + CMPQ AX, CX + JAE LBB9_20 + MOVL 0(R15)(AX*1), DX + CMPL DX, $1819047278 + JNE LBB9_26 + ADDQ $4, AX + MOVQ AX, -32(BP) + MOVL $2, CX + MOVQ CX, 0(R14) + JMP LBB9_5 + +LBB9_14: + MOVQ $6, 0(R14) + JMP LBB9_5 + +LBB9_15: + MOVQ $5, 0(R14) + JMP LBB9_5 + +LBB9_16: + LEAQ -3(BX), CX + CMPQ AX, CX + JAE LBB9_20 + MOVL 0(R15)(AX*1), DX + CMPL DX, $1702195828 + JNE LBB9_31 + ADDQ $4, AX + MOVQ AX, -32(BP) + MOVL $3, CX + MOVQ CX, 0(R14) + JMP LBB9_5 + +LBB9_20: + MOVQ BX, -32(BP) + MOVQ $-1, CX + MOVQ CX, 0(R14) + JMP LBB9_5 + +LBB9_19: + MOVQ BX, -32(BP) + MOVQ $-1, AX + MOVQ AX, 0(R14) + JMP LBB9_5 + +LBB9_21: + MOVQ $-2, AX + CMPB DX, $97 + JNE LBB9_25 + MOVL $1702063201, DX + +LBB9_23: + SHRL $8, DX + MOVBLSX 1(R15)(CX*1), SI + INCQ CX + MOVBLZX DX, DI + CMPL DI, SI + JE LBB9_23 + MOVQ CX, -32(BP) + +LBB9_25: + MOVQ AX, 0(R14) + JMP LBB9_5 + +LBB9_26: + MOVQ AX, -32(BP) + MOVQ $-2, CX + CMPB DX, $110 + JNE LBB9_30 + MOVL $1819047278, DX + +LBB9_28: + SHRL $8, DX + MOVBLSX 1(R15)(AX*1), SI + INCQ AX + MOVBLZX DX, DI + CMPL DI, SI + JE LBB9_28 + JMP LBB9_29 + +LBB9_31: + MOVQ AX, -32(BP) + MOVQ $-2, CX + CMPB DX, $116 + JNE LBB9_30 + MOVL $1702195828, DX + +LBB9_33: + SHRL $8, DX + MOVBLSX 1(R15)(AX*1), SI + INCQ AX + MOVBLZX DX, DI + CMPL DI, SI + JE LBB9_33 + +LBB9_29: + MOVQ AX, -32(BP) + +LBB9_30: + MOVQ CX, 0(R14) + JMP LBB9_5 + +// .set L9_0_set_4, LBB9_4-LJTI9_0 +// .set L9_0_set_6, LBB9_6-LJTI9_0 +// .set L9_0_set_7, LBB9_7-LJTI9_0 +// .set L9_0_set_3, LBB9_3-LJTI9_0 +// .set L9_0_set_15, LBB9_15-LJTI9_0 +// .set L9_0_set_8, LBB9_8-LJTI9_0 +// .set L9_0_set_11, LBB9_11-LJTI9_0 +// .set L9_0_set_16, LBB9_16-LJTI9_0 +// .set L9_0_set_14, LBB9_14-LJTI9_0 +LJTI9_0: + LONG $0xfffffe78 // .long L9_0_set_4 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe97 // .long L9_0_set_7 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe62 // .long L9_0_set_3 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe62 // .long L9_0_set_3 + LONG $0xfffffe62 // .long L9_0_set_3 + LONG $0xfffffe62 // .long L9_0_set_3 + LONG $0xfffffe62 // .long L9_0_set_3 + LONG $0xfffffe62 // .long L9_0_set_3 + LONG $0xfffffe62 // .long L9_0_set_3 + LONG $0xfffffe62 // .long L9_0_set_3 + LONG $0xfffffe62 // .long L9_0_set_3 + LONG $0xfffffe62 // .long L9_0_set_3 + LONG $0xfffffe62 // .long L9_0_set_3 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xffffff12 // .long L9_0_set_15 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffea9 // .long L9_0_set_8 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffed8 // .long L9_0_set_11 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xffffff1e // .long L9_0_set_16 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xfffffe8e // .long L9_0_set_6 + LONG $0xffffff06 // .long L9_0_set_14 + +LCPI10_0: + QUAD $0x2222222222222222; QUAD $0x2222222222222222 // .space 16, '""""""""""""""""' + +_vstring: + BYTE $0x55 // pushq %rbp + WORD $0x8948; BYTE $0xe5 // movq %rsp, %rbp + WORD $0x5741 // pushq %r15 + WORD $0x5641 // pushq %r14 + WORD $0x5541 // pushq %r13 + WORD $0x5441 // pushq %r12 + BYTE $0x53 // pushq %rbx + BYTE $0x50 // pushq %rax + MOVQ DX, R14 + MOVQ SI, R12 + MOVQ DI, BX + MOVQ 0(SI), R15 + MOVQ R15, SI + MOVL $34, DX + MOVL $92, CX + LONG $0xffe27de8; BYTE $0xff // callq _strchr2 + TESTQ AX, AX + JS LBB10_1 + MOVQ 0(BX), R11 + MOVQ $-1, DX + CMPB 0(R11)(AX*1), $34 + JNE LBB10_5 + MOVQ AX, CX + +LBB10_4: + MOVQ DX, 24(R14) + INCQ CX + MOVQ CX, 0(R12) + MOVQ R15, 16(R14) + MOVL $7, AX + JMP LBB10_60 + +LBB10_1: + MOVQ 8(BX), R10 + +LBB10_59: + MOVQ R10, 0(R12) + MOVQ $-1, AX + +LBB10_60: + MOVQ AX, 0(R14) + ADDQ $8, SP + BYTE $0x5b // popq %rbx + WORD $0x5c41 // popq %r12 + WORD $0x5d41 // popq %r13 + WORD $0x5e41 // popq %r14 + WORD $0x5f41 // popq %r15 + BYTE $0x5d // popq %rbp + RET + +LBB10_5: + LEAQ 1(AX), R8 + MOVQ 8(BX), R10 + MOVQ R10, DX + SUBQ R8, DX + MOVQ AX, -48(BP) + LEAQ 1(R11)(AX*1), R13 + MOVQ R13, CX + MOVQ DX, SI + MOVQ R13, DI + ANDQ $15, CX + JE LBB10_9 + MOVQ R13, SI + ANDQ $-16, SI + LONG $0x066ff9c5 // vmovdqa (%rsi), %xmm0 + QUAD $0xffffff3b0574f9c5 // vpcmpeqb $-197(%rip), %xmm0, %xmm0 /* LCPI10_0(%rip) */ + LONG $0xf0d7f9c5 // vpmovmskb %xmm0, %esi + SHRQ CX, SI + TESTQ SI, SI + JE LBB10_8 + ORQ $65536, SI + BSFQ SI, SI + CMPQ SI, DX + MOVQ $-1, CX + LONG $0xce4c0f48 // cmovlq %rsi, %rcx + JMP LBB10_29 + +LBB10_8: + MOVL $16, BX + SUBQ CX, BX + LEAQ 0(R13)(BX*1), DI + MOVQ DX, SI + SUBQ BX, SI + +LBB10_9: + CMPQ SI, $64 + JL LBB10_10 + QUAD $0xfffffef00d6ffac5 // vmovdqu $-272(%rip), %xmm1 /* LCPI10_0(%rip) */ + +LBB10_17: + LONG $0x276ff9c5 // vmovdqa (%rdi), %xmm4 + LONG $0x5f6ff9c5; BYTE $0x10 // vmovdqa $16(%rdi), %xmm3 + LONG $0x576ff9c5; BYTE $0x20 // vmovdqa $32(%rdi), %xmm2 + LONG $0x476ff9c5; BYTE $0x30 // vmovdqa $48(%rdi), %xmm0 + LONG $0xe974d9c5 // vpcmpeqb %xmm1, %xmm4, %xmm5 + LONG $0xf174e1c5 // vpcmpeqb %xmm1, %xmm3, %xmm6 + LONG $0xedebc9c5 // vpor %xmm5, %xmm6, %xmm5 + LONG $0xf174e9c5 // vpcmpeqb %xmm1, %xmm2, %xmm6 + LONG $0xf974f9c5 // vpcmpeqb %xmm1, %xmm0, %xmm7 + LONG $0xf7ebc9c5 // vpor %xmm7, %xmm6, %xmm6 + LONG $0xeeebd1c5 // vpor %xmm6, %xmm5, %xmm5 + LONG $0xcdd7f9c5 // vpmovmskb %xmm5, %ecx + TESTW CX, CX + JNE LBB10_19 + ADDQ $64, DI + LEAQ -64(SI), BX + CMPQ SI, $127 + MOVQ BX, SI + JG LBB10_17 + JMP LBB10_11 + +LBB10_10: + MOVQ SI, BX + +LBB10_11: + TESTQ BX, BX + JS LBB10_59 + LONG $0x076ff9c5 // vmovdqa (%rdi), %xmm0 + QUAD $0xfffffe8d0574f9c5 // vpcmpeqb $-371(%rip), %xmm0, %xmm0 /* LCPI10_0(%rip) */ + LONG $0xc8d779c5 // vpmovmskb %xmm0, %r9d + TESTW R9, R9 + JE LBB10_25 + +LBB10_13: + MOVQ DI, CX + +LBB10_14: + MOVWLZX R9, SI + ORQ $65536, SI + BSFQ SI, SI + CMPQ SI, BX + JGE LBB10_59 + SUBQ R13, CX + ADDQ SI, CX + JMP LBB10_29 + +LBB10_19: + QUAD $0xfffffe550d74d9c5 // vpcmpeqb $-427(%rip), %xmm4, %xmm1 /* LCPI10_0(%rip) */ + LONG $0xc9d7f9c5 // vpmovmskb %xmm1, %ecx + TESTW CX, CX + JE LBB10_21 + MOVWLZX CX, CX + SUBQ R13, DI + ORQ $65536, CX + BSFQ CX, CX + ADDQ DI, CX + JMP LBB10_29 + +LBB10_25: + CMPQ BX, $15 + JLE LBB10_59 + LEAQ 16(DI), CX + LONG $0x016ff9c5 // vmovdqa (%rcx), %xmm0 + QUAD $0xfffffe190574f9c5 // vpcmpeqb $-487(%rip), %xmm0, %xmm0 /* LCPI10_0(%rip) */ + LONG $0xc8d779c5 // vpmovmskb %xmm0, %r9d + TESTW R9, R9 + JE LBB10_69 + ADDQ $-16, BX + JMP LBB10_14 + +LBB10_21: + QUAD $0xfffffe010d74e1c5 // vpcmpeqb $-511(%rip), %xmm3, %xmm1 /* LCPI10_0(%rip) */ + LONG $0xc9d7f9c5 // vpmovmskb %xmm1, %ecx + TESTW CX, CX + JE LBB10_23 + MOVWLZX CX, CX + ORQ $65536, CX + BSFQ CX, CX + SUBQ R13, DI + LEAQ 16(DI)(CX*1), CX + JMP LBB10_29 + +LBB10_23: + QUAD $0xfffffdd80d74e9c5 // vpcmpeqb $-552(%rip), %xmm2, %xmm1 /* LCPI10_0(%rip) */ + LONG $0xc9d7f9c5 // vpmovmskb %xmm1, %ecx + SUBQ R13, DI + TESTW CX, CX + JE LBB10_28 + MOVWLZX CX, CX + ORQ $65536, CX + BSFQ CX, CX + LEAQ 32(DI)(CX*1), CX + JMP LBB10_29 + +LBB10_69: + CMPQ BX, $32 + JL LBB10_59 + LEAQ 32(DI), CX + LONG $0x016ff9c5 // vmovdqa (%rcx), %xmm0 + QUAD $0xfffffd9d0574f9c5 // vpcmpeqb $-611(%rip), %xmm0, %xmm0 /* LCPI10_0(%rip) */ + LONG $0xc8d779c5 // vpmovmskb %xmm0, %r9d + TESTW R9, R9 + JE LBB10_72 + ADDQ $-32, BX + JMP LBB10_14 + +LBB10_28: + QUAD $0xfffffd7e0574f9c5 // vpcmpeqb $-642(%rip), %xmm0, %xmm0 /* LCPI10_0(%rip) */ + LONG $0xc8d7f9c5 // vpmovmskb %xmm0, %ecx + ORQ $65536, CX + BSFQ CX, CX + LEAQ 48(DI)(CX*1), CX + +LBB10_29: + CMPQ CX, DX + JAE LBB10_59 + ADDQ R8, CX + JS LBB10_59 + LEAQ -1(R11), R8 + QUAD $0xfffffd4c056ffac5 // vmovdqu $-692(%rip), %xmm0 /* LCPI10_0(%rip) */ + QUAD $0xfffffd44056f7ac5 // vmovdqu $-700(%rip), %xmm8 /* LCPI10_0(%rip) */ + +LBB10_32: + LEAQ 0(R8)(CX*1), SI + XORL DX, DX + +LBB10_33: + CMPB 0(SI)(DX*1), $92 + LEAQ -1(DX), DX + JE LBB10_33 + NOTL DX + TESTB $1, DX + JE LBB10_58 + LEAQ 1(CX), R13 + MOVQ R10, DX + SUBQ R13, DX + LEAQ 1(R11)(CX*1), R9 + MOVQ R9, CX + MOVQ DX, BX + MOVQ R9, DI + ANDQ $15, CX + JE LBB10_39 + MOVQ R9, DI + ANDQ $-16, DI + LONG $0x1774b9c5 // vpcmpeqb (%rdi), %xmm8, %xmm2 + LONG $0xfad7f9c5 // vpmovmskb %xmm2, %edi + SHRQ CX, DI + TESTQ DI, DI + JE LBB10_38 + ORQ $65536, DI + BSFQ DI, CX + CMPQ CX, DX + MOVQ $-1, AX + LONG $0xc84d0f48 // cmovgeq %rax, %rcx + JMP LBB10_56 + +LBB10_38: + MOVL $16, SI + SUBQ CX, SI + LEAQ 0(R9)(SI*1), DI + MOVQ DX, BX + SUBQ SI, BX + +LBB10_39: + MOVQ BX, CX + CMPQ BX, $64 + JL LBB10_40 + +LBB10_45: + LONG $0x2f6ff9c5 // vmovdqa (%rdi), %xmm5 + LONG $0x676ff9c5; BYTE $0x10 // vmovdqa $16(%rdi), %xmm4 + LONG $0x5f6ff9c5; BYTE $0x20 // vmovdqa $32(%rdi), %xmm3 + LONG $0x576ff9c5; BYTE $0x30 // vmovdqa $48(%rdi), %xmm2 + LONG $0xf074d1c5 // vpcmpeqb %xmm0, %xmm5, %xmm6 + LONG $0xf874d9c5 // vpcmpeqb %xmm0, %xmm4, %xmm7 + LONG $0xf6ebc1c5 // vpor %xmm6, %xmm7, %xmm6 + LONG $0xf874e1c5 // vpcmpeqb %xmm0, %xmm3, %xmm7 + LONG $0xc874e9c5 // vpcmpeqb %xmm0, %xmm2, %xmm1 + LONG $0xc9ebc1c5 // vpor %xmm1, %xmm7, %xmm1 + LONG $0xc9ebc9c5 // vpor %xmm1, %xmm6, %xmm1 + LONG $0xf1d7f9c5 // vpmovmskb %xmm1, %esi + TESTW SI, SI + JNE LBB10_47 + ADDQ $64, DI + LEAQ -64(CX), BX + CMPQ CX, $127 + MOVQ BX, CX + JG LBB10_45 + +LBB10_40: + TESTQ BX, BX + JS LBB10_59 + MOVQ R12, AX + MOVQ R10, R12 + MOVQ R14, R10 + MOVQ R15, R14 + MOVQ R8, R15 + LONG $0x0f74b9c5 // vpcmpeqb (%rdi), %xmm8, %xmm1 + LONG $0xc1d779c5 // vpmovmskb %xmm1, %r8d + TESTW R8, R8 + JE LBB10_53 + +LBB10_42: + MOVQ DI, CX + JMP LBB10_43 + +LBB10_53: + CMPQ BX, $15 + JLE LBB10_54 + LEAQ 16(DI), CX + LONG $0x0974b9c5 // vpcmpeqb (%rcx), %xmm8, %xmm1 + LONG $0xc1d779c5 // vpmovmskb %xmm1, %r8d + TESTW R8, R8 + JE LBB10_63 + ADDQ $-16, BX + JMP LBB10_43 + +LBB10_47: + LONG $0xcd74b9c5 // vpcmpeqb %xmm5, %xmm8, %xmm1 + LONG $0xc9d7f9c5 // vpmovmskb %xmm1, %ecx + TESTW CX, CX + JE LBB10_49 + MOVWLZX CX, CX + SUBQ R9, DI + ORQ $65536, CX + BSFQ CX, CX + ADDQ DI, CX + JMP LBB10_56 + +LBB10_63: + CMPQ BX, $32 + JL LBB10_54 + LEAQ 32(DI), CX + LONG $0x0974b9c5 // vpcmpeqb (%rcx), %xmm8, %xmm1 + LONG $0xc1d779c5 // vpmovmskb %xmm1, %r8d + TESTW R8, R8 + JE LBB10_66 + ADDQ $-32, BX + +LBB10_43: + MOVWLZX R8, SI + ORQ $65536, SI + BSFQ SI, DI + CMPQ DI, BX + JGE LBB10_54 + SUBQ R9, CX + ADDQ DI, CX + MOVQ R15, R8 + MOVQ R14, R15 + MOVQ R10, R14 + MOVQ R12, R10 + MOVQ AX, R12 + +LBB10_56: + CMPQ CX, DX + JAE LBB10_59 + ADDQ R13, CX + JNS LBB10_32 + JMP LBB10_59 + +LBB10_49: + LONG $0xcc74b9c5 // vpcmpeqb %xmm4, %xmm8, %xmm1 + LONG $0xc9d7f9c5 // vpmovmskb %xmm1, %ecx + TESTW CX, CX + JE LBB10_51 + MOVWLZX CX, CX + ORQ $65536, CX + BSFQ CX, CX + SUBQ R9, DI + LEAQ 16(DI)(CX*1), CX + JMP LBB10_56 + +LBB10_66: + CMPQ BX, $48 + JL LBB10_54 + ADDQ $48, DI + LONG $0x0f74b9c5 // vpcmpeqb (%rdi), %xmm8, %xmm1 + LONG $0xc1d779c5 // vpmovmskb %xmm1, %r8d + TESTW R8, R8 + JE LBB10_54 + ADDQ $-48, BX + JMP LBB10_42 + +LBB10_51: + LONG $0xcb74b9c5 // vpcmpeqb %xmm3, %xmm8, %xmm1 + LONG $0xc9d7f9c5 // vpmovmskb %xmm1, %ecx + SUBQ R9, DI + TESTW CX, CX + JE LBB10_55 + MOVWLZX CX, CX + ORQ $65536, CX + BSFQ CX, CX + LEAQ 32(DI)(CX*1), CX + JMP LBB10_56 + +LBB10_55: + LONG $0xca74b9c5 // vpcmpeqb %xmm2, %xmm8, %xmm1 + LONG $0xc9d7f9c5 // vpmovmskb %xmm1, %ecx + ORQ $65536, CX + BSFQ CX, CX + LEAQ 48(DI)(CX*1), CX + JMP LBB10_56 + +LBB10_58: + MOVQ -48(BP), DX + TESTQ CX, CX + JNS LBB10_4 + JMP LBB10_59 + +LBB10_54: + MOVQ R10, R14 + MOVQ R12, R10 + MOVQ AX, R12 + JMP LBB10_59 + +LBB10_72: + CMPQ BX, $48 + JL LBB10_59 + ADDQ $48, DI + LONG $0x076ff9c5 // vmovdqa (%rdi), %xmm0 + QUAD $0xfffffad90574f9c5 // vpcmpeqb $-1319(%rip), %xmm0, %xmm0 /* LCPI10_0(%rip) */ + LONG $0xc8d779c5 // vpmovmskb %xmm0, %r9d + TESTW R9, R9 + JE LBB10_59 + ADDQ $-48, BX + JMP LBB10_13 + +LCPI11_0: + QUAD $0x4024000000000000 // .quad 0x4024000000000000 + +LCPI11_1: + QUAD $0x7ff0000000000000 // .quad 0x7ff0000000000000 + +_vnumber: + BYTE $0x55 // pushq %rbp + WORD $0x8948; BYTE $0xe5 // movq %rsp, %rbp + WORD $0x5741 // pushq %r15 + WORD $0x5641 // pushq %r14 + WORD $0x5541 // pushq %r13 + WORD $0x5441 // pushq %r12 + BYTE $0x53 // pushq %rbx + MOVQ 0(SI), AX + MOVQ 0(DI), R8 + MOVQ 8(DI), R15 + MOVQ $9, 0(DX) + LONG $0xc057f9c5 // vxorpd %xmm0, %xmm0, %xmm0 + LONG $0x4211f9c5; BYTE $0x08 // vmovupd %xmm0, $8(%rdx) + MOVQ 0(SI), CX + MOVQ CX, 24(DX) + CMPQ AX, R15 + JAE LBB11_56 + MOVB 0(R8)(AX*1), CX + MOVL $1, R10 + CMPB CX, $45 + JNE LBB11_4 + INCQ AX + CMPQ AX, R15 + JAE LBB11_56 + MOVB 0(R8)(AX*1), CX + MOVL $-1, R10 + +LBB11_4: + LEAL -48(CX), DI + CMPB DI, $10 + JB LBB11_6 + MOVQ AX, 0(SI) + MOVQ $-2, 0(DX) + JMP LBB11_57 + +LBB11_6: + CMPB CX, $48 + JNE LBB11_10 + LEAQ 1(AX), CX + CMPQ AX, R15 + JAE LBB11_21 + MOVB 0(R8)(CX*1), BX + ADDB $-46, BX + CMPB BX, $55 + JA LBB11_21 + MOVBLZX BX, DI + MOVQ $36028797027352577, BX + BTQ DI, BX + JAE LBB11_21 + +LBB11_10: + CMPQ AX, R15 + MOVQ R15, R9 + LONG $0xc8470f4c // cmovaq %rax, %r9 + XORL R12, R12 + +LBB11_11: + MOVQ R12, DI + CMPQ R9, AX + JE LBB11_22 + MOVQ AX, R13 + MOVBLZX 0(R8)(AX*1), BX + LEAL -48(BX), AX + CMPB AX, $9 + JA LBB11_23 + ADDL $-48, BX + IMULL R10, BX + IMUL3Q $10, DI, R12 + JO LBB11_15 + LEAQ 1(R13), AX + MOVLQSX BX, CX + ADDQ CX, R12 + JNO LBB11_11 + +LBB11_15: + LONG $0x2af3e1c4; BYTE $0xc7 // vcvtsi2sd %rdi, %xmm1, %xmm0 + QUAD $0xfffffef10559fbc5 // vmulsd $-271(%rip), %xmm0, %xmm0 /* LCPI11_0(%rip) */ + LONG $0xcb2af3c5 // vcvtsi2sd %ebx, %xmm1, %xmm1 + LONG $0xc158fbc5 // vaddsd %xmm1, %xmm0, %xmm0 + MOVQ $8, 0(DX) + LEAQ 1(R13), AX + CMPQ AX, R15 + SETCS CX + JAE LBB11_26 + MOVB 1(R8)(R13*1), DI + LEAL -48(DI), BX + MOVB $1, CX + CMPB BX, $9 + JA LBB11_26 + ADDQ $2, R13 + QUAD $0xfffffeb70d10fbc5 // vmovsd $-329(%rip), %xmm1 /* LCPI11_0(%rip) */ + +LBB11_18: + MOVQ R13, CX + MOVBLZX DI, AX + LONG $0xc159fbc5 // vmulsd %xmm1, %xmm0, %xmm0 + ADDL $-48, AX + IMULL R10, AX + LONG $0xd02ae3c5 // vcvtsi2sd %eax, %xmm3, %xmm2 + LONG $0xc258fbc5 // vaddsd %xmm2, %xmm0, %xmm0 + CMPQ R15, R13 + JE LBB11_24 + MOVBLZX 0(R8)(CX*1), DI + LEAL -48(DI), AX + LEAQ 1(CX), R13 + CMPB AX, $9 + JBE LBB11_18 + DECQ R13 + MOVQ R13, AX + JMP LBB11_25 + +LBB11_21: + MOVQ CX, 0(SI) + JMP LBB11_57 + +LBB11_22: + LONG $0x2af3e1c4; BYTE $0xc7 // vcvtsi2sd %rdi, %xmm1, %xmm0 + MOVQ R9, R15 + MOVQ DI, R12 + JMP LBB11_75 + +LBB11_23: + LONG $0x2af3e1c4; BYTE $0xc7 // vcvtsi2sd %rdi, %xmm1, %xmm0 + MOVB $1, R9 + MOVQ DI, R12 + CMPQ R13, R15 + JB LBB11_27 + JMP LBB11_49 + +LBB11_24: + MOVQ R15, AX + +LBB11_25: + CMPQ CX, R15 + SETCS CX + +LBB11_26: + XORL R9, R9 + MOVQ AX, R13 + TESTB CX, CX + JE LBB11_49 + +LBB11_27: + CMPB 0(R8)(R13*1), $46 + JNE LBB11_49 + LEAQ 1(R13), R11 + MOVQ $8, 0(DX) + CMPQ R11, R15 + JAE LBB11_56 + MOVB 0(R8)(R11*1), R14 + LEAL -48(R14), CX + CMPB CX, $9 + JBE LBB11_31 + MOVQ R11, 0(SI) + MOVQ $-2, 0(DX) + JMP LBB11_57 + +LBB11_31: + LONG $0xc957f1c5 // vxorpd %xmm1, %xmm1, %xmm1 + MOVQ R11, AX + LONG $0xd257e9c5 // vxorpd %xmm2, %xmm2, %xmm2 + CMPB CX, $9 + JA LBB11_40 + LEAL 18(R13), AX + MOVLQSX AX, DI + MOVQ R11, AX + LONG $0xd257e9c5 // vxorpd %xmm2, %xmm2, %xmm2 + CMPQ R13, DI + JGE LBB11_40 + ADDQ $2, R13 + XORL CX, CX + +LBB11_34: + MOVBLZX R14, AX + LEAQ 0(CX)(CX*4), CX + ADDL $-48, AX + IMULL R10, AX + WORD $0x9848 // cltq + LEAQ 0(AX)(CX*2), CX + CMPQ R15, R13 + JE LBB11_38 + MOVBLZX 0(R8)(R13*1), R14 + LEAL -48(R14), BX + LEAQ 1(R13), AX + CMPB BX, $9 + JA LBB11_37 + CMPQ R13, DI + MOVQ AX, R13 + JLE LBB11_34 + +LBB11_37: + DECQ AX + JMP LBB11_39 + +LBB11_38: + MOVQ R15, AX + +LBB11_39: + LONG $0x2ae3e1c4; BYTE $0xd1 // vcvtsi2sd %rcx, %xmm3, %xmm2 + +LBB11_40: + SUBQ AX, R11 + CMPL R11, $-323 + JL LBB11_44 + CMPL R11, $308 + JLE LBB11_43 + QUAD $0xfffffd820d10fbc5 // vmovsd $-638(%rip), %xmm1 /* LCPI11_1(%rip) */ + JMP LBB11_44 + +LBB11_43: + ADDL $323, R11 + LONG $0xb10d8d48; WORD $0x0018; BYTE $0x00 // leaq $6321(%rip), %rcx /* _P10_TAB(%rip) */ + LONG $0x596ba1c4; WORD $0xd90c // vmulsd (%rcx,%r11,8), %xmm2, %xmm1 + +LBB11_44: + LONG $0xc158fbc5 // vaddsd %xmm1, %xmm0, %xmm0 + CMPQ AX, R15 + JAE LBB11_48 + +LBB11_45: + MOVBLZX 0(R8)(AX*1), CX + ADDB $-48, CX + CMPB CX, $9 + JA LBB11_48 + INCQ AX + CMPQ R15, AX + JNE LBB11_45 + JMP LBB11_74 + +LBB11_48: + MOVQ AX, R13 + +LBB11_49: + CMPQ R13, R15 + JAE LBB11_73 + MOVB 0(R8)(R13*1), AX + ORB $32, AX + CMPB AX, $101 + JNE LBB11_73 + LEAQ 1(R13), CX + MOVQ $8, 0(DX) + CMPQ CX, R15 + JAE LBB11_56 + MOVB 0(R8)(CX*1), AX + CMPB AX, $45 + JE LBB11_54 + MOVL $1, R10 + CMPB AX, $43 + JNE LBB11_58 + +LBB11_54: + ADDQ $2, R13 + CMPQ R13, R15 + JAE LBB11_56 + XORL CX, CX + CMPB AX, $43 + SETEQ CX + LEAL -1(CX)(CX*1), R10 + MOVB 0(R8)(R13*1), AX + JMP LBB11_59 + +LBB11_56: + MOVQ R15, 0(SI) + MOVQ $-1, 0(DX) + +LBB11_57: + BYTE $0x5b // popq %rbx + WORD $0x5c41 // popq %r12 + WORD $0x5d41 // popq %r13 + WORD $0x5e41 // popq %r14 + WORD $0x5f41 // popq %r15 + BYTE $0x5d // popq %rbp + RET + +LBB11_58: + MOVQ CX, R13 + +LBB11_59: + LEAL -48(AX), CX + CMPB CX, $9 + JBE LBB11_61 + MOVQ R13, 0(SI) + MOVQ $-2, 0(DX) + JMP LBB11_57 + +LBB11_61: + XORL R11, R11 + CMPQ R13, R15 + JAE LBB11_72 + XORL R11, R11 + CMPB CX, $9 + JA LBB11_67 + LEAQ -1(R15), CX + XORL R11, R11 + +LBB11_64: + LEAL 0(R11)(R11*4), DI + MOVBLZX AX, AX + ADDL $-48, AX + IMULL R10, AX + LEAL 0(AX)(DI*2), R11 + CMPQ CX, R13 + JE LBB11_66 + MOVBLZX 1(R8)(R13*1), AX + INCQ R13 + LEAL -48(AX), DI + CMPB DI, $10 + JB LBB11_64 + JMP LBB11_67 + +LBB11_66: + MOVQ R15, R13 + +LBB11_67: + CMPL R11, $1 + JE LBB11_73 + CMPL R11, $-323 + JGE LBB11_70 + LONG $0xc057f9c5 // vxorpd %xmm0, %xmm0, %xmm0 + JMP LBB11_73 + +LBB11_70: + CMPL R11, $308 + JLE LBB11_72 + QUAD $0xfffffc5b0510fbc5 // vmovsd $-933(%rip), %xmm0 /* LCPI11_1(%rip) */ + JMP LBB11_73 + +LBB11_72: + ADDL $323, R11 + LONG $0x8a058d48; WORD $0x0017; BYTE $0x00 // leaq $6026(%rip), %rax /* _P10_TAB(%rip) */ + LONG $0x597ba1c4; WORD $0xd804 // vmulsd (%rax,%r11,8), %xmm0, %xmm0 + +LBB11_73: + MOVQ R13, R15 + +LBB11_74: + TESTB R9, R9 + JE LBB11_76 + +LBB11_75: + MOVQ R12, 16(DX) + +LBB11_76: + MOVQ R15, 0(SI) + LONG $0x4211fbc5; BYTE $0x08 // vmovsd %xmm0, $8(%rdx) + JMP LBB11_57 + +_vsigned: + BYTE $0x55 // pushq %rbp + WORD $0x8948; BYTE $0xe5 // movq %rsp, %rbp + BYTE $0x53 // pushq %rbx + MOVQ 0(SI), AX + MOVQ 0(DI), R8 + MOVQ 8(DI), R11 + MOVQ $9, 0(DX) + LONG $0xc057f8c5 // vxorps %xmm0, %xmm0, %xmm0 + LONG $0x4211f8c5; BYTE $0x08 // vmovups %xmm0, $8(%rdx) + MOVQ 0(SI), CX + MOVQ CX, 24(DX) + CMPQ AX, R11 + JAE LBB12_1 + MOVB 0(R8)(AX*1), CX + MOVL $1, R9 + CMPB CX, $45 + JNE LBB12_5 + INCQ AX + CMPQ AX, R11 + JAE LBB12_1 + MOVB 0(R8)(AX*1), CX + MOVQ $-1, R9 + +LBB12_5: + LEAL -48(CX), DI + CMPB DI, $10 + JB LBB12_7 + MOVQ AX, 0(SI) + MOVQ $-2, 0(DX) + BYTE $0x5b // popq %rbx + BYTE $0x5d // popq %rbp + RET + +LBB12_1: + MOVQ R11, 0(SI) + MOVQ $-1, 0(DX) + BYTE $0x5b // popq %rbx + BYTE $0x5d // popq %rbp + RET + +LBB12_7: + CMPB CX, $48 + JNE LBB12_12 + LEAQ 1(AX), DI + CMPQ AX, R11 + JAE LBB12_11 + MOVB 0(R8)(DI*1), CX + ADDB $-46, CX + CMPB CX, $55 + JA LBB12_11 + MOVBLZX CX, R10 + MOVQ $36028797027352577, CX + BTQ R10, CX + JAE LBB12_11 + +LBB12_12: + CMPQ AX, R11 + MOVQ R11, R10 + LONG $0xd0470f4c // cmovaq %rax, %r10 + XORL DI, DI + +LBB12_13: + CMPQ R10, AX + JE LBB12_23 + MOVBQSX 0(R8)(AX*1), CX + LEAL -48(CX), BX + CMPB BX, $9 + JA LBB12_18 + IMUL3Q $10, DI, DI + JO LBB12_17 + INCQ AX + ADDL $-48, CX + IMULQ R9, CX + ADDQ CX, DI + JNO LBB12_13 + +LBB12_17: + DECQ AX + MOVQ AX, 0(SI) + MOVQ $-5, 0(DX) + BYTE $0x5b // popq %rbx + BYTE $0x5d // popq %rbp + RET + +LBB12_11: + MOVQ DI, 0(SI) + BYTE $0x5b // popq %rbx + BYTE $0x5d // popq %rbp + RET + +LBB12_18: + CMPQ AX, R11 + JAE LBB12_22 + CMPB CX, $46 + JE LBB12_25 + CMPB CX, $69 + JE LBB12_25 + CMPB CX, $101 + JNE LBB12_22 + +LBB12_25: + MOVQ AX, 0(SI) + MOVQ $-6, 0(DX) + BYTE $0x5b // popq %rbx + BYTE $0x5d // popq %rbp + RET + +LBB12_22: + MOVQ AX, R10 + +LBB12_23: + MOVQ R10, 0(SI) + MOVQ DI, 16(DX) + BYTE $0x5b // popq %rbx + BYTE $0x5d // popq %rbp + RET + +_vunsigned: + BYTE $0x55 // pushq %rbp + WORD $0x8948; BYTE $0xe5 // movq %rsp, %rbp + BYTE $0x53 // pushq %rbx + MOVQ DX, R8 + MOVQ 0(SI), CX + MOVQ 0(DI), R9 + MOVQ 8(DI), R11 + MOVQ $9, 0(DX) + LONG $0xc057f8c5 // vxorps %xmm0, %xmm0, %xmm0 + LONG $0x4211f8c5; BYTE $0x08 // vmovups %xmm0, $8(%rdx) + MOVQ 0(SI), AX + MOVQ AX, 24(DX) + CMPQ CX, R11 + JAE LBB13_1 + MOVB 0(R9)(CX*1), AX + CMPB AX, $45 + JNE LBB13_4 + +LBB13_3: + MOVQ CX, 0(SI) + MOVQ $-6, 0(R8) + BYTE $0x5b // popq %rbx + BYTE $0x5d // popq %rbp + RET + +LBB13_1: + MOVQ R11, 0(SI) + MOVQ $-1, 0(R8) + BYTE $0x5b // popq %rbx + BYTE $0x5d // popq %rbp + RET + +LBB13_4: + LEAL -48(AX), DX + CMPB DX, $10 + JB LBB13_6 + MOVQ CX, 0(SI) + MOVQ $-2, 0(R8) + BYTE $0x5b // popq %rbx + BYTE $0x5d // popq %rbp + RET + +LBB13_6: + CMPB AX, $48 + JNE LBB13_7 + MOVB 1(R9)(CX*1), AX + ADDB $-46, AX + CMPB AX, $55 + JA LBB13_16 + MOVBLZX AX, AX + MOVQ $36028797027352577, DX + BTQ AX, DX + JAE LBB13_16 + +LBB13_7: + XORL AX, AX + MOVL $10, R10 + +LBB13_8: + CMPQ R11, CX + JE LBB13_22 + MOVBLSX 0(R9)(CX*1), DI + LEAL -48(DI), DX + CMPB DX, $9 + JA LBB13_17 + MULQ R10 + JO LBB13_13 + INCQ CX + ADDL $-48, DI + XORL BX, BX + ADDQ DI, AX + SETCS BX + MOVQ BX, DX + NEGQ DX + XORQ DX, BX + JNE LBB13_13 + TESTQ DX, DX + JNS LBB13_8 + +LBB13_13: + DECQ CX + MOVQ CX, 0(SI) + MOVQ $-5, 0(R8) + BYTE $0x5b // popq %rbx + BYTE $0x5d // popq %rbp + RET + +LBB13_17: + CMPQ CX, R11 + JAE LBB13_21 + CMPB DI, $46 + JE LBB13_3 + CMPB DI, $69 + JE LBB13_3 + CMPB DI, $101 + JE LBB13_3 + +LBB13_21: + MOVQ CX, R11 + +LBB13_22: + MOVQ R11, 0(SI) + MOVQ AX, 16(R8) + BYTE $0x5b // popq %rbx + BYTE $0x5d // popq %rbp + RET + +LBB13_16: + INCQ CX + MOVQ CX, 0(SI) + BYTE $0x5b // popq %rbx + BYTE $0x5d // popq %rbp + RET + +_skip_one: + BYTE $0x55 // pushq %rbp + WORD $0x8948; BYTE $0xe5 // movq %rsp, %rbp + MOVQ DX, AX + MOVQ SI, DX + MOVQ DI, SI + MOVQ $1, 0(AX) + MOVQ AX, DI + BYTE $0x5d // popq %rbp + JMP _fsm_exec + +_fsm_exec: + BYTE $0x55 // pushq %rbp + WORD $0x8948; BYTE $0xe5 // movq %rsp, %rbp + WORD $0x5741 // pushq %r15 + WORD $0x5641 // pushq %r14 + WORD $0x5541 // pushq %r13 + WORD $0x5441 // pushq %r12 + BYTE $0x53 // pushq %rbx + SUBQ $56, SP + MOVQ SI, -56(BP) + MOVL 0(DI), R15 + TESTL R15, R15 + JE LBB15_48 + MOVQ DX, R13 + MOVQ DI, R8 + MOVQ $-1, R9 + MOVQ DX, -48(BP) + MOVQ DI, -64(BP) + JMP LBB15_7 + +LBB15_2: + CMPL R15, $65535 + JG LBB15_51 + INCL R15 + MOVL R15, 0(R8) + MOVL $0, 4(R8)(SI*4) + +LBB15_4: + MOVQ -48(BP), R13 + +LBB15_5: + MOVL 0(R8), DX + MOVL DX, R15 + MOVQ R9, AX + TESTL DX, DX + JE LBB15_50 + +LBB15_7: + MOVQ R9, R12 + MOVQ -56(BP), BX + MOVQ 0(BX), R14 + MOVQ 8(BX), SI + MOVQ 0(R13), DX + MOVQ R14, DI + LONG $0xffd2e4e8; BYTE $0xff // callq _lspace + MOVQ AX, CX + MOVQ AX, 0(R13) + CMPQ AX, 8(BX) + JAE LBB15_9 + MOVQ CX, DX + INCQ DX + MOVQ DX, 0(R13) + MOVBLZX 0(R14)(CX*1), AX + MOVQ DX, CX + JMP LBB15_10 + +LBB15_9: + XORL AX, AX + +LBB15_10: + MOVQ -64(BP), R8 + MOVQ R12, R9 + LEAL -1(R15), DX + MOVLQSX R15, SI + MOVL 0(R8)(SI*4), BX + LEAQ -1(CX), R12 + CMPQ R9, $-1 + LONG $0xcc440f4d // cmoveq %r12, %r9 + DECL BX + CMPL BX, $5 + JA LBB15_14 + LEAQ -1(SI), R10 + LONG $0x663d8d48; WORD $0x0003; BYTE $0x00 // leaq $870(%rip), %rdi /* LJTI15_0(%rip) */ + MOVLQSX 0(DI)(BX*4), BX + ADDQ DI, BX + JMP BX + +LBB15_12: + MOVBLSX AX, AX + CMPL AX, $44 + JE LBB15_2 + CMPL AX, $93 + JE LBB15_29 + JMP LBB15_49 + +LBB15_14: + MOVL DX, 0(R8) + MOVBLSX AX, DX + CMPL DX, $123 + MOVQ -48(BP), R13 + JBE LBB15_24 + JMP LBB15_49 + +LBB15_15: + MOVBLSX AX, AX + CMPL AX, $44 + JNE LBB15_28 + CMPL R15, $65535 + JG LBB15_51 + INCL R15 + MOVL R15, 0(R8) + MOVL $3, 4(R8)(SI*4) + JMP LBB15_4 + +LBB15_18: + CMPB AX, $34 + JNE LBB15_49 + MOVL $4, 4(R8)(R10*4) + MOVQ -56(BP), DI + MOVQ -48(BP), R13 + MOVQ R13, SI + MOVQ R9, R14 + LONG $0x000538e8; BYTE $0x00 // callq _skip_string + MOVQ R14, R9 + MOVQ -64(BP), R8 + TESTQ AX, AX + JNS LBB15_5 + JMP LBB15_50 + +LBB15_20: + CMPB AX, $58 + JNE LBB15_49 + MOVL $0, 4(R8)(R10*4) + JMP LBB15_4 + +LBB15_22: + CMPB AX, $93 + JE LBB15_29 + MOVL $1, 4(R8)(R10*4) + MOVBLSX AX, DX + CMPL DX, $123 + MOVQ -48(BP), R13 + JA LBB15_49 + +LBB15_24: + MOVQ $-1, AX + LONG $0x98358d48; WORD $0x0002; BYTE $0x00 // leaq $664(%rip), %rsi /* LJTI15_1(%rip) */ + MOVLQSX 0(SI)(DX*4), DX + ADDQ SI, DX + JMP DX + +LBB15_25: + MOVQ -56(BP), AX + MOVQ 8(AX), SI + MOVQ R14, DI + MOVQ R13, DX + MOVQ R12, CX + +LBB15_26: + LEAQ -96(BP), R8 + MOVQ R9, R14 + MOVQ R12, R9 + LONG $0x0009f3e8; BYTE $0x00 // callq _advance_number + MOVQ R14, R9 + MOVQ -64(BP), R8 + TESTQ AX, AX + LONG $0xe0480f4c // cmovsq %rax, %r12 + MOVQ R12, AX + TESTQ R12, R12 + JNS LBB15_5 + JMP LBB15_50 + +LBB15_27: + MOVBLSX AX, AX + CMPL AX, $34 + JE LBB15_30 + +LBB15_28: + CMPL AX, $125 + JNE LBB15_49 + +LBB15_29: + MOVL DX, 0(R8) + MOVQ -48(BP), R13 + MOVL DX, R15 + MOVQ R9, AX + TESTL DX, DX + JNE LBB15_7 + JMP LBB15_50 + +LBB15_30: + MOVQ R9, R15 + MOVL $2, 4(R8)(R10*4) + MOVQ -56(BP), DI + MOVQ -48(BP), SI + MOVQ SI, R13 + MOVQ R8, BX + LONG $0x00044ce8; BYTE $0x00 // callq _skip_string + TESTQ AX, AX + JS LBB15_50 + MOVLQSX 0(BX), AX + CMPQ AX, $65535 + JG LBB15_51 + MOVQ BX, R8 + LEAL 1(AX), CX + MOVL CX, 0(BX) + MOVL $4, 4(BX)(AX*4) + MOVQ R15, R9 + JMP LBB15_5 + +LBB15_33: + MOVQ -56(BP), DI + MOVQ R13, SI + MOVQ R9, BX + LONG $0x00040de8; BYTE $0x00 // callq _skip_string + MOVQ BX, R9 + MOVQ -64(BP), R8 + TESTQ AX, AX + JNS LBB15_5 + JMP LBB15_50 + +LBB15_34: + MOVQ -56(BP), AX + MOVQ 8(AX), SI + MOVQ R14, DI + MOVQ R13, DX + JMP LBB15_26 + +LBB15_35: + MOVLQSX 0(R8), AX + CMPQ AX, $65535 + JG LBB15_51 + LEAL 1(AX), CX + MOVL CX, 0(R8) + MOVL $5, 4(R8)(AX*4) + JMP LBB15_5 + +LBB15_37: + MOVQ -56(BP), DX + MOVQ 8(DX), DX + LEAQ -4(DX), SI + CMPQ CX, SI + JA LBB15_54 + MOVL 0(R14)(CX*1), DX + CMPL DX, $1702063201 + JNE LBB15_56 + LEAQ 4(CX), AX + JMP LBB15_45 + +LBB15_40: + MOVQ -56(BP), DX + MOVQ 8(DX), DX + LEAQ -3(DX), SI + CMPQ CX, SI + JA LBB15_54 + CMPL -1(R14)(CX*1), $1819047278 + JE LBB15_44 + JMP LBB15_63 + +LBB15_42: + MOVQ -56(BP), DX + MOVQ 8(DX), DX + LEAQ -3(DX), SI + CMPQ CX, SI + JA LBB15_54 + CMPL -1(R14)(CX*1), $1702195828 + JNE LBB15_59 + +LBB15_44: + LEAQ 3(CX), AX + +LBB15_45: + MOVQ AX, 0(R13) + MOVQ R12, AX + TESTQ CX, CX + JG LBB15_5 + JMP LBB15_50 + +LBB15_46: + MOVLQSX 0(R8), AX + CMPQ AX, $65535 + JG LBB15_51 + LEAL 1(AX), CX + MOVL CX, 0(R8) + MOVL $6, 4(R8)(AX*4) + JMP LBB15_5 + +LBB15_48: + MOVQ $-1, AX + JMP LBB15_50 + +LBB15_49: + MOVQ $-2, AX + JMP LBB15_50 + +LBB15_51: + MOVQ $-7, AX + +LBB15_50: + ADDQ $56, SP + BYTE $0x5b // popq %rbx + WORD $0x5c41 // popq %r12 + WORD $0x5d41 // popq %r13 + WORD $0x5e41 // popq %r14 + WORD $0x5f41 // popq %r15 + BYTE $0x5d // popq %rbp + RET + +LBB15_54: + MOVQ DX, 0(R13) + JMP LBB15_50 + +LBB15_56: + MOVQ $-2, AX + CMPB DX, $97 + JNE LBB15_50 + INCQ CX + MOVL $1702063201, DX + +LBB15_58: + SHRL $8, DX + MOVQ CX, 0(R13) + MOVBLSX 0(R14)(CX*1), SI + MOVBLZX DX, DI + INCQ CX + CMPL DI, SI + JE LBB15_58 + JMP LBB15_50 + +LBB15_59: + MOVQ R12, 0(R13) + MOVQ $-2, AX + CMPB 0(R14)(R12*1), $116 + JNE LBB15_50 + MOVL $1702195828, DX + +LBB15_61: + SHRL $8, DX + MOVQ CX, 0(R13) + MOVBLSX 0(R14)(CX*1), SI + MOVBLZX DX, DI + INCQ CX + CMPL DI, SI + JE LBB15_61 + JMP LBB15_50 + +LBB15_63: + MOVQ R12, 0(R13) + MOVQ $-2, AX + CMPB 0(R14)(R12*1), $110 + JNE LBB15_50 + MOVL $1819047278, DX + +LBB15_65: + SHRL $8, DX + MOVQ CX, 0(R13) + MOVBLSX 0(R14)(CX*1), SI + MOVBLZX DX, DI + INCQ CX + CMPL DI, SI + JE LBB15_65 + JMP LBB15_50 + +// .set L15_0_set_12, LBB15_12-LJTI15_0 +// .set L15_0_set_15, LBB15_15-LJTI15_0 +// .set L15_0_set_18, LBB15_18-LJTI15_0 +// .set L15_0_set_20, LBB15_20-LJTI15_0 +// .set L15_0_set_22, LBB15_22-LJTI15_0 +// .set L15_0_set_27, LBB15_27-LJTI15_0 +LJTI15_0: + LONG $0xfffffca3 // .long L15_0_set_12 + LONG $0xfffffcd5 // .long L15_0_set_15 + LONG $0xfffffd02 // .long L15_0_set_18 + LONG $0xfffffd3b // .long L15_0_set_20 + LONG $0xfffffd51 // .long L15_0_set_22 + LONG $0xfffffdc8 // .long L15_0_set_27 + + // .set L15_1_set_50, LBB15_50-LJTI15_1 + // .set L15_1_set_49, LBB15_49-LJTI15_1 + // .set L15_1_set_33, LBB15_33-LJTI15_1 + // .set L15_1_set_34, LBB15_34-LJTI15_1 + // .set L15_1_set_25, LBB15_25-LJTI15_1 + // .set L15_1_set_35, LBB15_35-LJTI15_1 + // .set L15_1_set_37, LBB15_37-LJTI15_1 + // .set L15_1_set_40, LBB15_40-LJTI15_1 + // .set L15_1_set_42, LBB15_42-LJTI15_1 + // .set L15_1_set_46, LBB15_46-LJTI15_1 +LJTI15_1: + LONG $0xffffff42 // .long L15_1_set_50 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xfffffe2a // .long L15_1_set_33 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xfffffe4e // .long L15_1_set_34 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xfffffd71 // .long L15_1_set_25 + LONG $0xfffffd71 // .long L15_1_set_25 + LONG $0xfffffd71 // .long L15_1_set_25 + LONG $0xfffffd71 // .long L15_1_set_25 + LONG $0xfffffd71 // .long L15_1_set_25 + LONG $0xfffffd71 // .long L15_1_set_25 + LONG $0xfffffd71 // .long L15_1_set_25 + LONG $0xfffffd71 // .long L15_1_set_25 + LONG $0xfffffd71 // .long L15_1_set_25 + LONG $0xfffffd71 // .long L15_1_set_25 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xfffffe61 // .long L15_1_set_35 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xfffffe84 // .long L15_1_set_37 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xfffffeaf // .long L15_1_set_40 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xfffffed4 // .long L15_1_set_42 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff32 // .long L15_1_set_49 + LONG $0xffffff0a // .long L15_1_set_46 + +_skip_array: + BYTE $0x55 // pushq %rbp + WORD $0x8948; BYTE $0xe5 // movq %rsp, %rbp + MOVQ DX, AX + MOVQ SI, DX + MOVQ DI, SI + MOVQ $21474836481, CX + MOVQ CX, 0(AX) + MOVQ AX, DI + BYTE $0x5d // popq %rbp + JMP _fsm_exec + +_skip_object: + BYTE $0x55 // pushq %rbp + WORD $0x8948; BYTE $0xe5 // movq %rsp, %rbp + MOVQ DX, AX + MOVQ SI, DX + MOVQ DI, SI + MOVQ $25769803777, CX + MOVQ CX, 0(AX) + MOVQ AX, DI + BYTE $0x5d // popq %rbp + JMP _fsm_exec + +LCPI18_0: + QUAD $0x2222222222222222; QUAD $0x2222222222222222 // .space 16, '""""""""""""""""' + +_skip_string: + BYTE $0x55 // pushq %rbp + WORD $0x8948; BYTE $0xe5 // movq %rsp, %rbp + WORD $0x5741 // pushq %r15 + WORD $0x5641 // pushq %r14 + WORD $0x5541 // pushq %r13 + WORD $0x5441 // pushq %r12 + BYTE $0x53 // pushq %rbx + BYTE $0x50 // pushq %rax + MOVQ SI, R14 + MOVQ DI, BX + MOVQ 0(SI), R15 + MOVQ R15, SI + MOVL $34, DX + MOVL $92, CX + LONG $0xffd09ce8; BYTE $0xff // callq _strchr2 + TESTQ AX, AX + JS LBB18_3 + MOVQ AX, CX + MOVQ 0(BX), R10 + CMPB 0(R10)(AX*1), $34 + JNE LBB18_6 + +LBB18_2: + DECQ R15 + INCQ CX + MOVQ CX, R11 + MOVQ R15, AX + JMP LBB18_5 + +LBB18_3: + MOVQ 8(BX), R11 + +LBB18_4: + MOVQ $-1, AX + +LBB18_5: + MOVQ R11, 0(R14) + ADDQ $8, SP + BYTE $0x5b // popq %rbx + WORD $0x5c41 // popq %r12 + WORD $0x5d41 // popq %r13 + WORD $0x5e41 // popq %r14 + WORD $0x5f41 // popq %r15 + BYTE $0x5d // popq %rbp + RET + +LBB18_6: + LEAQ 1(CX), R8 + MOVQ 8(BX), R11 + MOVQ R11, DX + SUBQ R8, DX + LEAQ 1(R10)(CX*1), BX + MOVQ BX, CX + MOVQ DX, AX + MOVQ BX, SI + ANDQ $15, CX + JE LBB18_10 + MOVQ BX, AX + ANDQ $-16, AX + LONG $0x006ff9c5 // vmovdqa (%rax), %xmm0 + QUAD $0xffffff550574f9c5 // vpcmpeqb $-171(%rip), %xmm0, %xmm0 /* LCPI18_0(%rip) */ + LONG $0xc0d7f9c5 // vpmovmskb %xmm0, %eax + SHRQ CX, AX + TESTQ AX, AX + JE LBB18_9 + ORQ $65536, AX + BSFQ AX, AX + CMPQ AX, DX + MOVQ $-1, CX + LONG $0xc84c0f48 // cmovlq %rax, %rcx + JMP LBB18_33 + +LBB18_9: + MOVL $16, DI + SUBQ CX, DI + LEAQ 0(BX)(DI*1), SI + MOVQ DX, AX + SUBQ DI, AX + +LBB18_10: + CMPQ AX, $64 + JL LBB18_14 + QUAD $0xffffff0c0d6ffac5 // vmovdqu $-244(%rip), %xmm1 /* LCPI18_0(%rip) */ + +LBB18_12: + LONG $0x266ff9c5 // vmovdqa (%rsi), %xmm4 + LONG $0x5e6ff9c5; BYTE $0x10 // vmovdqa $16(%rsi), %xmm3 + LONG $0x566ff9c5; BYTE $0x20 // vmovdqa $32(%rsi), %xmm2 + LONG $0x466ff9c5; BYTE $0x30 // vmovdqa $48(%rsi), %xmm0 + LONG $0xe974d9c5 // vpcmpeqb %xmm1, %xmm4, %xmm5 + LONG $0xf174e1c5 // vpcmpeqb %xmm1, %xmm3, %xmm6 + LONG $0xedebc9c5 // vpor %xmm5, %xmm6, %xmm5 + LONG $0xf174e9c5 // vpcmpeqb %xmm1, %xmm2, %xmm6 + LONG $0xf974f9c5 // vpcmpeqb %xmm1, %xmm0, %xmm7 + LONG $0xf7ebc9c5 // vpor %xmm7, %xmm6, %xmm6 + LONG $0xeeebd1c5 // vpor %xmm6, %xmm5, %xmm5 + LONG $0xcdd7f9c5 // vpmovmskb %xmm5, %ecx + TESTW CX, CX + JNE LBB18_20 + ADDQ $64, SI + LEAQ -64(AX), DI + CMPQ AX, $127 + MOVQ DI, AX + JG LBB18_12 + JMP LBB18_15 + +LBB18_14: + MOVQ AX, DI + +LBB18_15: + MOVQ $-1, AX + TESTQ DI, DI + JS LBB18_5 + LONG $0x066ff9c5 // vmovdqa (%rsi), %xmm0 + QUAD $0xfffffea20574f9c5 // vpcmpeqb $-350(%rip), %xmm0, %xmm0 /* LCPI18_0(%rip) */ + LONG $0xc8d779c5 // vpmovmskb %xmm0, %r9d + TESTW R9, R9 + JE LBB18_22 + +LBB18_17: + MOVQ SI, CX + +LBB18_18: + MOVWLZX R9, SI + ORQ $65536, SI + BSFQ SI, SI + CMPQ SI, DI + JGE LBB18_5 + SUBQ BX, CX + ADDQ SI, CX + JMP LBB18_33 + +LBB18_20: + QUAD $0xfffffe6a0d74d9c5 // vpcmpeqb $-406(%rip), %xmm4, %xmm1 /* LCPI18_0(%rip) */ + LONG $0xc1d7f9c5 // vpmovmskb %xmm1, %eax + TESTW AX, AX + JE LBB18_25 + MOVWLZX AX, AX + SUBQ BX, SI + ORQ $65536, AX + BSFQ AX, CX + ADDQ SI, CX + JMP LBB18_33 + +LBB18_22: + CMPQ DI, $15 + JLE LBB18_5 + LEAQ 16(SI), CX + LONG $0x016ff9c5 // vmovdqa (%rcx), %xmm0 + QUAD $0xfffffe2f0574f9c5 // vpcmpeqb $-465(%rip), %xmm0, %xmm0 /* LCPI18_0(%rip) */ + LONG $0xc8d779c5 // vpmovmskb %xmm0, %r9d + TESTW R9, R9 + JE LBB18_29 + ADDQ $-16, DI + JMP LBB18_18 + +LBB18_25: + QUAD $0xfffffe170d74e1c5 // vpcmpeqb $-489(%rip), %xmm3, %xmm1 /* LCPI18_0(%rip) */ + LONG $0xc1d7f9c5 // vpmovmskb %xmm1, %eax + TESTW AX, AX + JE LBB18_27 + MOVWLZX AX, AX + ORQ $65536, AX + BSFQ AX, AX + SUBQ BX, SI + LEAQ 16(SI)(AX*1), CX + JMP LBB18_33 + +LBB18_27: + QUAD $0xfffffdef0d74e9c5 // vpcmpeqb $-529(%rip), %xmm2, %xmm1 /* LCPI18_0(%rip) */ + LONG $0xc1d7f9c5 // vpmovmskb %xmm1, %eax + SUBQ BX, SI + TESTW AX, AX + JE LBB18_32 + MOVWLZX AX, AX + ORQ $65536, AX + BSFQ AX, AX + LEAQ 32(SI)(AX*1), CX + JMP LBB18_33 + +LBB18_29: + CMPQ DI, $32 + JL LBB18_5 + LEAQ 32(SI), CX + LONG $0x016ff9c5 // vmovdqa (%rcx), %xmm0 + QUAD $0xfffffdb50574f9c5 // vpcmpeqb $-587(%rip), %xmm0, %xmm0 /* LCPI18_0(%rip) */ + LONG $0xc8d779c5 // vpmovmskb %xmm0, %r9d + TESTW R9, R9 + JE LBB18_71 + ADDQ $-32, DI + JMP LBB18_18 + +LBB18_32: + QUAD $0xfffffd960574f9c5 // vpcmpeqb $-618(%rip), %xmm0, %xmm0 /* LCPI18_0(%rip) */ + LONG $0xc0d7f9c5 // vpmovmskb %xmm0, %eax + ORQ $65536, AX + BSFQ AX, AX + LEAQ 48(SI)(AX*1), CX + +LBB18_33: + MOVQ $-1, AX + CMPQ CX, DX + JAE LBB18_5 + ADDQ R8, CX + JS LBB18_5 + LEAQ -1(R10), R9 + QUAD $0xfffffd5e056ffac5 // vmovdqu $-674(%rip), %xmm0 /* LCPI18_0(%rip) */ + QUAD $0xfffffd56056f7ac5 // vmovdqu $-682(%rip), %xmm8 /* LCPI18_0(%rip) */ + MOVQ $-1, R8 + +LBB18_36: + LEAQ 0(R9)(CX*1), DX + XORL AX, AX + +LBB18_37: + CMPB 0(DX)(AX*1), $92 + LEAQ -1(AX), AX + JE LBB18_37 + NOTL AX + TESTB $1, AX + JE LBB18_69 + LEAQ 1(CX), R12 + MOVQ R11, R13 + SUBQ R12, R13 + LEAQ 1(R10)(CX*1), DI + MOVQ DI, CX + MOVQ R13, BX + MOVQ DI, SI + ANDQ $15, CX + JE LBB18_43 + MOVQ DI, AX + ANDQ $-16, AX + LONG $0x1074b9c5 // vpcmpeqb (%rax), %xmm8, %xmm2 + LONG $0xc2d7f9c5 // vpmovmskb %xmm2, %eax + SHRQ CX, AX + TESTQ AX, AX + JE LBB18_42 + ORQ $65536, AX + BSFQ AX, CX + CMPQ CX, R13 + LONG $0xc84d0f49 // cmovgeq %r8, %rcx + JMP LBB18_59 + +LBB18_42: + MOVL $16, AX + SUBQ CX, AX + LEAQ 0(DI)(AX*1), SI + MOVQ R13, BX + SUBQ AX, BX + +LBB18_43: + MOVQ BX, AX + CMPQ BX, $64 + JL LBB18_46 + +LBB18_44: + LONG $0x2e6ff9c5 // vmovdqa (%rsi), %xmm5 + LONG $0x666ff9c5; BYTE $0x10 // vmovdqa $16(%rsi), %xmm4 + LONG $0x5e6ff9c5; BYTE $0x20 // vmovdqa $32(%rsi), %xmm3 + LONG $0x566ff9c5; BYTE $0x30 // vmovdqa $48(%rsi), %xmm2 + LONG $0xf074d1c5 // vpcmpeqb %xmm0, %xmm5, %xmm6 + LONG $0xf874d9c5 // vpcmpeqb %xmm0, %xmm4, %xmm7 + LONG $0xf6ebc1c5 // vpor %xmm6, %xmm7, %xmm6 + LONG $0xf874e1c5 // vpcmpeqb %xmm0, %xmm3, %xmm7 + LONG $0xc874e9c5 // vpcmpeqb %xmm0, %xmm2, %xmm1 + LONG $0xc9ebc1c5 // vpor %xmm1, %xmm7, %xmm1 + LONG $0xc9ebc9c5 // vpor %xmm1, %xmm6, %xmm1 + LONG $0xc9d7f9c5 // vpmovmskb %xmm1, %ecx + TESTW CX, CX + JNE LBB18_52 + ADDQ $64, SI + LEAQ -64(AX), BX + CMPQ AX, $127 + MOVQ BX, AX + JG LBB18_44 + +LBB18_46: + MOVQ $-1, AX + TESTQ BX, BX + JS LBB18_5 + LONG $0x0e74b9c5 // vpcmpeqb (%rsi), %xmm8, %xmm1 + LONG $0xd1d7f9c5 // vpmovmskb %xmm1, %edx + TESTW DX, DX + JE LBB18_49 + +LBB18_48: + MOVQ SI, CX + JMP LBB18_57 + +LBB18_49: + CMPQ BX, $15 + JLE LBB18_5 + LEAQ 16(SI), CX + LONG $0x0974b9c5 // vpcmpeqb (%rcx), %xmm8, %xmm1 + LONG $0xd1d7f9c5 // vpmovmskb %xmm1, %edx + TESTW DX, DX + JE LBB18_54 + ADDQ $-16, BX + JMP LBB18_57 + +LBB18_52: + LONG $0xcd74b9c5 // vpcmpeqb %xmm5, %xmm8, %xmm1 + LONG $0xc1d7f9c5 // vpmovmskb %xmm1, %eax + TESTW AX, AX + JE LBB18_61 + MOVWLZX AX, AX + SUBQ DI, SI + ORQ $65536, AX + BSFQ AX, CX + ADDQ SI, CX + JMP LBB18_59 + +LBB18_54: + CMPQ BX, $32 + JL LBB18_5 + LEAQ 32(SI), CX + LONG $0x0974b9c5 // vpcmpeqb (%rcx), %xmm8, %xmm1 + LONG $0xd1d7f9c5 // vpmovmskb %xmm1, %edx + TESTW DX, DX + JE LBB18_63 + ADDQ $-32, BX + +LBB18_57: + MOVWLZX DX, DX + ORQ $65536, DX + BSFQ DX, DX + CMPQ DX, BX + JGE LBB18_5 + SUBQ DI, CX + ADDQ DX, CX + +LBB18_59: + MOVQ $-1, AX + CMPQ CX, R13 + JAE LBB18_5 + ADDQ R12, CX + JNS LBB18_36 + JMP LBB18_5 + +LBB18_61: + LONG $0xcc74b9c5 // vpcmpeqb %xmm4, %xmm8, %xmm1 + LONG $0xc1d7f9c5 // vpmovmskb %xmm1, %eax + TESTW AX, AX + JE LBB18_66 + MOVWLZX AX, AX + ORQ $65536, AX + BSFQ AX, AX + SUBQ DI, SI + LEAQ 16(SI)(AX*1), CX + JMP LBB18_59 + +LBB18_63: + CMPQ BX, $48 + JL LBB18_5 + ADDQ $48, SI + LONG $0x0e74b9c5 // vpcmpeqb (%rsi), %xmm8, %xmm1 + LONG $0xd1d7f9c5 // vpmovmskb %xmm1, %edx + TESTW DX, DX + JE LBB18_5 + ADDQ $-48, BX + JMP LBB18_48 + +LBB18_66: + LONG $0xcb74b9c5 // vpcmpeqb %xmm3, %xmm8, %xmm1 + LONG $0xc1d7f9c5 // vpmovmskb %xmm1, %eax + SUBQ DI, SI + TESTW AX, AX + JE LBB18_68 + MOVWLZX AX, AX + ORQ $65536, AX + BSFQ AX, AX + LEAQ 32(SI)(AX*1), CX + JMP LBB18_59 + +LBB18_68: + LONG $0xca74b9c5 // vpcmpeqb %xmm2, %xmm8, %xmm1 + LONG $0xc1d7f9c5 // vpmovmskb %xmm1, %eax + ORQ $65536, AX + BSFQ AX, AX + LEAQ 48(SI)(AX*1), CX + JMP LBB18_59 + +LBB18_69: + TESTQ CX, CX + JNS LBB18_2 + JMP LBB18_4 + +LBB18_71: + CMPQ DI, $48 + JL LBB18_5 + ADDQ $48, SI + LONG $0x066ff9c5 // vmovdqa (%rsi), %xmm0 + QUAD $0xfffffb100574f9c5 // vpcmpeqb $-1264(%rip), %xmm0, %xmm0 /* LCPI18_0(%rip) */ + LONG $0xc8d779c5 // vpmovmskb %xmm0, %r9d + TESTW R9, R9 + JE LBB18_5 + ADDQ $-48, DI + JMP LBB18_17 + +_skip_negative: + BYTE $0x55 // pushq %rbp + WORD $0x8948; BYTE $0xe5 // movq %rsp, %rbp + BYTE $0x53 // pushq %rbx + SUBQ $40, SP + MOVQ SI, DX + MOVQ 0(SI), CX + LEAQ -1(CX), BX + MOVQ 0(DI), AX + MOVQ 8(DI), SI + LEAQ -40(BP), R8 + MOVQ AX, DI + MOVQ BX, R9 + LONG $0x00001ee8; BYTE $0x00 // callq _advance_number + TESTQ AX, AX + LONG $0xc3490f48 // cmovnsq %rbx, %rax + ADDQ $40, SP + BYTE $0x5b // popq %rbx + BYTE $0x5d // popq %rbp + RET + +LCPI20_0: + QUAD $0x4024000000000000 // .quad 0x4024000000000000 + +LCPI20_1: + QUAD $0x7ff0000000000000 // .quad 0x7ff0000000000000 + +_advance_number: + BYTE $0x55 // pushq %rbp + WORD $0x8948; BYTE $0xe5 // movq %rsp, %rbp + WORD $0x5741 // pushq %r15 + WORD $0x5641 // pushq %r14 + BYTE $0x53 // pushq %rbx + MOVQ $-1, AX + CMPQ SI, CX + JBE LBB20_9 + MOVQ $0, 16(R8) + MOVQ R9, 24(R8) + MOVB 0(DI)(CX*1), R14 + CMPB R14, $48 + JNE LBB20_10 + INCQ CX + CMPQ CX, SI + JAE LBB20_12 + XORL R9, R9 + +LBB20_4: + LONG $0x2afbc1c4; BYTE $0xc1 // vcvtsi2sd %r9, %xmm0, %xmm0 + MOVQ R9, 16(R8) + LONG $0x117bc1c4; WORD $0x0840 // vmovsd %xmm0, $8(%r8) + ADDQ $8, R8 + MOVL $9, BX + MOVQ R8, R9 + CMPQ CX, SI + JAE LBB20_25 + JMP LBB20_5 + +LBB20_10: + LEAL -48(R14), BX + CMPB BX, $9 + JBE LBB20_13 + +LBB20_11: + MOVQ $-2, AX + JMP LBB20_55 + +LBB20_12: + MOVL $9, AX + JMP LBB20_55 + +LBB20_13: + INCQ CX + XORL R11, R11 + MOVQ CX, R10 + +LBB20_14: + IMUL3Q $10, R11, R9 + JO LBB20_19 + MOVBLZX R14, CX + ADDQ $-48, CX + ADDQ CX, R9 + JO LBB20_19 + CMPQ SI, R10 + JE LBB20_34 + MOVBLZX 0(DI)(R10*1), R14 + LEAL -48(R14), CX + INCQ R10 + MOVQ R9, R11 + CMPB CX, $10 + JB LBB20_14 + DECQ R10 + MOVQ R10, CX + JMP LBB20_4 + +LBB20_19: + MOVQ R9, 16(R8) + LONG $0x2afbc1c4; BYTE $0xc9 // vcvtsi2sd %r9, %xmm0, %xmm1 + LONG $0x2afbc1c4; BYTE $0xc3 // vcvtsi2sd %r11, %xmm0, %xmm0 + LEAQ 8(R8), R9 + CMPQ R10, SI + MOVQ R10, R11 + LONG $0xde420f4c // cmovbq %rsi, %r11 + LONG $0x117bc1c4; WORD $0x0848 // vmovsd %xmm1, $8(%r8) + MOVB -1(DI)(R10*1), BX + DECQ R10 + QUAD $0xfffffef80d10fbc5 // vmovsd $-264(%rip), %xmm1 /* LCPI20_0(%rip) */ + +LBB20_20: + LONG $0xc159fbc5 // vmulsd %xmm1, %xmm0, %xmm0 + LEAQ 1(R10), CX + MOVBLSX BX, BX + ADDL $-48, BX + LONG $0xd32ae3c5 // vcvtsi2sd %ebx, %xmm3, %xmm2 + LONG $0xc258fbc5 // vaddsd %xmm2, %xmm0, %xmm0 + CMPQ CX, SI + JAE LBB20_23 + MOVBLZX 1(DI)(R10*1), BX + LEAL -48(BX), R14 + MOVQ CX, R10 + CMPB R14, $10 + JB LBB20_20 + JMP LBB20_24 + +LBB20_23: + MOVQ R11, CX + +LBB20_24: + LONG $0x117bc1c4; WORD $0x0840 // vmovsd %xmm0, $8(%r8) + MOVQ $9223372036854775807, BX + MOVQ BX, 16(R8) + MOVL $8, BX + CMPQ CX, SI + JAE LBB20_25 + +LBB20_5: + CMPB 0(DI)(CX*1), $46 + JNE LBB20_25 + INCQ CX + CMPQ CX, SI + JAE LBB20_9 + MOVB 0(DI)(CX*1), R14 + LEAL -48(R14), BX + CMPB BX, $9 + JA LBB20_11 + MOVL $2, R11 + SUBQ SI, R11 + XORL R10, R10 + MOVQ CX, R8 + +LBB20_36: + LEAQ 0(R11)(R8*1), R15 + LEAL 0(R10)(R10*4), R10 + MOVBLZX R14, BX + LEAL -48(BX)(R10*2), R10 + CMPQ R15, $1 + JE LBB20_44 + MOVBLZX 1(DI)(R8*1), R14 + INCQ R8 + LEAL -48(R14), BX + CMPB BX, $10 + JB LBB20_36 + JMP LBB20_45 + +LBB20_25: + MOVQ CX, R8 + CMPQ R8, SI + JAE LBB20_54 + +LBB20_26: + MOVB 0(DI)(R8*1), CX + ORB $32, CX + CMPB CX, $101 + JNE LBB20_54 + LEAQ 1(R8), CX + CMPQ CX, SI + JAE LBB20_9 + MOVB 0(DI)(CX*1), R10 + CMPB R10, $45 + JE LBB20_30 + MOVL $1, R11 + CMPB R10, $43 + JNE LBB20_38 + +LBB20_30: + LEAQ -1(SI), BX + CMPQ CX, BX + JAE LBB20_55 + LEAQ 2(R8), R14 + XORL AX, AX + CMPB R10, $43 + SETEQ AX + LEAL -1(AX)(AX*1), R11 + MOVB 2(DI)(R8*1), R10 + JMP LBB20_39 + +LBB20_9: + MOVQ SI, CX + +LBB20_55: + MOVQ CX, 0(DX) + BYTE $0x5b // popq %rbx + WORD $0x5e41 // popq %r14 + WORD $0x5f41 // popq %r15 + BYTE $0x5d // popq %rbp + RET + +LBB20_34: + MOVQ SI, CX + JMP LBB20_4 + +LBB20_38: + MOVQ CX, R14 + +LBB20_39: + LEAL -48(R10), CX + MOVQ $-2, AX + CMPB CX, $9 + JBE LBB20_41 + MOVQ R14, CX + JMP LBB20_55 + +LBB20_41: + LEAQ 1(R14), R8 + CMPQ R8, SI + LONG $0xc6420f4c // cmovbq %rsi, %r8 + XORL AX, AX + +LBB20_42: + LEAL 0(AX)(AX*4), AX + LEAQ 1(R14), CX + MOVBLZX R10, BX + LEAL -48(BX)(AX*2), AX + CMPQ CX, SI + JAE LBB20_48 + MOVBLZX 1(DI)(R14*1), R10 + LEAL -48(R10), BX + MOVQ CX, R14 + CMPB BX, $10 + JB LBB20_42 + JMP LBB20_49 + +LBB20_44: + MOVQ SI, R8 + +LBB20_45: + SUBQ R8, CX + LONG $0xc957f1c5 // vxorpd %xmm1, %xmm1, %xmm1 + CMPL CX, $-323 + JL LBB20_53 + CMPL CX, $308 + JLE LBB20_52 + QUAD $0xfffffd6a0d10fbc5 // vmovsd $-662(%rip), %xmm1 /* LCPI20_1(%rip) */ + JMP LBB20_53 + +LBB20_48: + MOVQ R8, CX + +LBB20_49: + IMULL R11, AX + LONG $0xc957f1c5 // vxorpd %xmm1, %xmm1, %xmm1 + CMPL AX, $-323 + JL LBB20_57 + CMPL AX, $308 + JLE LBB20_56 + QUAD $0xfffffd470d10fbc5 // vmovsd $-697(%rip), %xmm1 /* LCPI20_1(%rip) */ + JMP LBB20_57 + +LBB20_52: + LONG $0x2a63c1c4; BYTE $0xca // vcvtsi2sd %r10d, %xmm3, %xmm1 + ADDL $323, CX + LONG $0x8e1d8d48; WORD $0x0006; BYTE $0x00 // leaq $1678(%rip), %rbx /* _P10_TAB(%rip) */ + LONG $0x0c59f3c5; BYTE $0xcb // vmulsd (%rbx,%rcx,8), %xmm1, %xmm1 + +LBB20_53: + LONG $0xc058f3c5 // vaddsd %xmm0, %xmm1, %xmm0 + LONG $0x117bc1c4; BYTE $0x01 // vmovsd %xmm0, (%r9) + MOVL $8, BX + CMPQ R8, SI + JB LBB20_26 + +LBB20_54: + MOVQ R8, CX + MOVQ BX, AX + JMP LBB20_55 + +LBB20_56: + ADDL $323, AX + LONG $0x5b358d48; WORD $0x0006; BYTE $0x00 // leaq $1627(%rip), %rsi /* _P10_TAB(%rip) */ + LONG $0x0c59fbc5; BYTE $0xc6 // vmulsd (%rsi,%rax,8), %xmm0, %xmm1 + +LBB20_57: + LONG $0x117bc1c4; BYTE $0x09 // vmovsd %xmm1, (%r9) + MOVL $8, AX + JMP LBB20_55 + +_skip_positive: + BYTE $0x55 // pushq %rbp + WORD $0x8948; BYTE $0xe5 // movq %rsp, %rbp + BYTE $0x53 // pushq %rbx + SUBQ $40, SP + MOVQ SI, DX + MOVQ 0(SI), BX + DECQ BX + MOVQ 0(DI), AX + MOVQ 8(DI), SI + LEAQ -40(BP), R8 + MOVQ AX, DI + MOVQ BX, CX + MOVQ BX, R9 + LONG $0xfffcc9e8; BYTE $0xff // callq _advance_number + TESTQ AX, AX + LONG $0xc3490f48 // cmovnsq %rbx, %rax + ADDQ $40, SP + BYTE $0x5b // popq %rbx + BYTE $0x5d // popq %rbp + RET + +_TabPowE: + WORD $0xfb3c // .word 64316 + WORD $0xfb57 // .word 64343 + WORD $0xfb72 // .word 64370 + WORD $0xfb8c // .word 64396 + WORD $0xfba7 // .word 64423 + WORD $0xfbc1 // .word 64449 + WORD $0xfbdc // .word 64476 + WORD $0xfbf6 // .word 64502 + WORD $0xfc11 // .word 64529 + WORD $0xfc2c // .word 64556 + WORD $0xfc46 // .word 64582 + WORD $0xfc61 // .word 64609 + WORD $0xfc7b // .word 64635 + WORD $0xfc96 // .word 64662 + WORD $0xfcb1 // .word 64689 + WORD $0xfccb // .word 64715 + WORD $0xfce6 // .word 64742 + WORD $0xfd00 // .word 64768 + WORD $0xfd1b // .word 64795 + WORD $0xfd35 // .word 64821 + WORD $0xfd50 // .word 64848 + WORD $0xfd6b // .word 64875 + WORD $0xfd85 // .word 64901 + WORD $0xfda0 // .word 64928 + WORD $0xfdba // .word 64954 + WORD $0xfdd5 // .word 64981 + WORD $0xfdef // .word 65007 + WORD $0xfe0a // .word 65034 + WORD $0xfe25 // .word 65061 + WORD $0xfe3f // .word 65087 + WORD $0xfe5a // .word 65114 + WORD $0xfe74 // .word 65140 + WORD $0xfe8f // .word 65167 + WORD $0xfea9 // .word 65193 + WORD $0xfec4 // .word 65220 + WORD $0xfedf // .word 65247 + WORD $0xfef9 // .word 65273 + WORD $0xff14 // .word 65300 + WORD $0xff2e // .word 65326 + WORD $0xff49 // .word 65353 + WORD $0xff63 // .word 65379 + WORD $0xff7e // .word 65406 + WORD $0xff99 // .word 65433 + WORD $0xffb3 // .word 65459 + WORD $0xffce // .word 65486 + WORD $0xffe8 // .word 65512 + WORD $0x0003 // .word 3 + WORD $0x001e // .word 30 + WORD $0x0038 // .word 56 + WORD $0x0053 // .word 83 + WORD $0x006d // .word 109 + WORD $0x0088 // .word 136 + WORD $0x00a2 // .word 162 + WORD $0x00bd // .word 189 + WORD $0x00d8 // .word 216 + WORD $0x00f2 // .word 242 + WORD $0x010d // .word 269 + WORD $0x0127 // .word 295 + WORD $0x0142 // .word 322 + WORD $0x015c // .word 348 + WORD $0x0177 // .word 375 + WORD $0x0192 // .word 402 + WORD $0x01ac // .word 428 + WORD $0x01c7 // .word 455 + WORD $0x01e1 // .word 481 + WORD $0x01fc // .word 508 + WORD $0x0216 // .word 534 + WORD $0x0231 // .word 561 + WORD $0x024c // .word 588 + WORD $0x0266 // .word 614 + WORD $0x0281 // .word 641 + WORD $0x029b // .word 667 + WORD $0x02b6 // .word 694 + WORD $0x02d0 // .word 720 + WORD $0x02eb // .word 747 + WORD $0x0306 // .word 774 + WORD $0x0320 // .word 800 + WORD $0x033b // .word 827 + WORD $0x0355 // .word 853 + WORD $0x0370 // .word 880 + WORD $0x038b // .word 907 + WORD $0x03a5 // .word 933 + WORD $0x03c0 // .word 960 + WORD $0x03da // .word 986 + WORD $0x03f5 // .word 1013 + WORD $0x040f // .word 1039 + WORD $0x042a // .word 1066 + +_TabPowF: + QUAD $0xfa8fd5a0081c0288 // .quad -391859759250406776 + QUAD $0xbaaee17fa23ebf76 // .quad -4994806998408183946 + QUAD $0x8b16fb203055ac76 // .quad -8424269937281487754 + QUAD $0xcf42894a5dce35ea // .quad -3512093806901185046 + QUAD $0x9a6bb0aa55653b2d // .quad -7319562523736982739 + QUAD $0xe61acf033d1a45df // .quad -1865951482774665761 + QUAD $0xab70fe17c79ac6ca // .quad -6093090917745768758 + QUAD $0xff77b1fcbebcdc4f // .quad -38366372719436721 + QUAD $0xbe5691ef416bd60c // .quad -4731433901725329908 + QUAD $0x8dd01fad907ffc3c // .quad -8228041688891786180 + QUAD $0xd3515c2831559a83 // .quad -3219690930897053053 + QUAD $0x9d71ac8fada6c9b5 // .quad -7101705404292871755 + QUAD $0xea9c227723ee8bcb // .quad -1541319077368263733 + QUAD $0xaecc49914078536d // .quad -5851220927660403859 + QUAD $0x823c12795db6ce57 // .quad -9062348037703676329 + QUAD $0xc21094364dfb5637 // .quad -4462904269766699465 + QUAD $0x9096ea6f3848984f // .quad -8027971522334779313 + QUAD $0xd77485cb25823ac7 // .quad -2921563150702462265 + QUAD $0xa086cfcd97bf97f4 // .quad -6879582898840692748 + QUAD $0xef340a98172aace5 // .quad -1210330751515841307 + QUAD $0xb23867fb2a35b28e // .quad -5604615407819967858 + QUAD $0x84c8d4dfd2c63f3b // .quad -8878612607581929669 + QUAD $0xc5dd44271ad3cdba // .quad -4189117143640191558 + QUAD $0x936b9fcebb25c996 // .quad -7823984217374209642 + QUAD $0xdbac6c247d62a584 // .quad -2617598379430861436 + QUAD $0xa3ab66580d5fdaf6 // .quad -6653111496142234890 + QUAD $0xf3e2f893dec3f126 // .quad -872862063775190746 + QUAD $0xb5b5ada8aaff80b8 // .quad -5353181642124984136 + QUAD $0x87625f056c7c4a8b // .quad -8691279853972075893 + QUAD $0xc9bcff6034c13053 // .quad -3909969587797413805 + QUAD $0x964e858c91ba2655 // .quad -7616003081050118571 + QUAD $0xdff9772470297ebd // .quad -2307682335666372931 + QUAD $0xa6dfbd9fb8e5b88f // .quad -6422206049907525489 + QUAD $0xf8a95fcf88747d94 // .quad -528786136287117932 + QUAD $0xb94470938fa89bcf // .quad -5096825099203863601 + QUAD $0x8a08f0f8bf0f156b // .quad -8500279345513818773 + QUAD $0xcdb02555653131b6 // .quad -3625356651333078602 + QUAD $0x993fe2c6d07b7fac // .quad -7403949918844649556 + QUAD $0xe45c10c42a2b3b06 // .quad -1991698500497491194 + QUAD $0xaa242499697392d3 // .quad -6186779746782440749 + QUAD $0xfd87b5f28300ca0e // .quad -177973607073265138 + QUAD $0xbce5086492111aeb // .quad -4835449396872013077 + QUAD $0x8cbccc096f5088cc // .quad -8305539271883716404 + QUAD $0xd1b71758e219652c // .quad -3335171328526686932 + QUAD $0x9c40000000000000 // .quad -7187745005283311616 + QUAD $0xe8d4a51000000000 // .quad -1669528073709551616 + QUAD $0xad78ebc5ac620000 // .quad -5946744073709551616 + QUAD $0x813f3978f8940984 // .quad -9133518327554766460 + QUAD $0xc097ce7bc90715b3 // .quad -4568956265895094861 + QUAD $0x8f7e32ce7bea5c70 // .quad -8106986416796705680 + QUAD $0xd5d238a4abe98068 // .quad -3039304518611664792 + QUAD $0x9f4f2726179a2245 // .quad -6967307053960650171 + QUAD $0xed63a231d4c4fb27 // .quad -1341049929119499481 + QUAD $0xb0de65388cc8ada8 // .quad -5702008784649933400 + QUAD $0x83c7088e1aab65db // .quad -8951176327949752869 + QUAD $0xc45d1df942711d9a // .quad -4297245513042813542 + QUAD $0x924d692ca61be758 // .quad -7904546130479028392 + QUAD $0xda01ee641a708dea // .quad -2737644984756826646 + QUAD $0xa26da3999aef774a // .quad -6742553186979055798 + QUAD $0xf209787bb47d6b85 // .quad -1006140569036166267 + QUAD $0xb454e4a179dd1877 // .quad -5452481866653427593 + QUAD $0x865b86925b9bc5c2 // .quad -8765264286586255934 + QUAD $0xc83553c5c8965d3d // .quad -4020214983419339459 + QUAD $0x952ab45cfa97a0b3 // .quad -7698142301602209613 + QUAD $0xde469fbd99a05fe3 // .quad -2430079312244744221 + QUAD $0xa59bc234db398c25 // .quad -6513398903789220827 + QUAD $0xf6c69a72a3989f5c // .quad -664674077828931748 + QUAD $0xb7dcbf5354e9bece // .quad -5198069505264599346 + QUAD $0x88fcf317f22241e2 // .quad -8575712306248138270 + QUAD $0xcc20ce9bd35c78a5 // .quad -3737760522056206171 + QUAD $0x98165af37b2153df // .quad -7487697328667536417 + QUAD $0xe2a0b5dc971f303a // .quad -2116491865831296966 + QUAD $0xa8d9d1535ce3b396 // .quad -6279758049420528746 + QUAD $0xfb9b7cd9a4a7443c // .quad -316522074587315140 + QUAD $0xbb764c4ca7a44410 // .quad -4938676049251384304 + QUAD $0x8bab8eefb6409c1a // .quad -8382449121214030822 + QUAD $0xd01fef10a657842c // .quad -3449775934753242068 + QUAD $0x9b10a4e5e9913129 // .quad -7273132090830278359 + QUAD $0xe7109bfba19c0c9d // .quad -1796764746270372707 + QUAD $0xac2820d9623bf429 // .quad -6041542782089432023 + QUAD $0x80444b5e7aa7cf85 // .quad -9204148869281624187 + QUAD $0xbf21e44003acdd2d // .quad -4674203974643163859 + QUAD $0x8e679c2f5e44ff8f // .quad -8185402070463610993 + QUAD $0xd433179d9c8cb841 // .quad -3156152948152813503 + QUAD $0x9e19db92b4e31ba9 // .quad -7054365918152680535 + QUAD $0xeb96bf6ebadf77d9 // .quad -1470777745987373095 + QUAD $0xaf87023b9bf0ee6b // .quad -5798663540173640085 + +_TabPow10: + QUAD $0x0000000000000001 // .quad 1 + QUAD $0x000000000000000a // .quad 10 + QUAD $0x0000000000000064 // .quad 100 + QUAD $0x00000000000003e8 // .quad 1000 + QUAD $0x0000000000002710 // .quad 10000 + QUAD $0x00000000000186a0 // .quad 100000 + QUAD $0x00000000000f4240 // .quad 1000000 + QUAD $0x0000000000989680 // .quad 10000000 + QUAD $0x0000000005f5e100 // .quad 100000000 + QUAD $0x000000003b9aca00 // .quad 1000000000 + +_Digits: + QUAD $0x3330323031303030; QUAD $0x3730363035303430 // .ascii 16, '0001020304050607' + QUAD $0x3131303139303830; QUAD $0x3531343133313231 // .ascii 16, '0809101112131415' + QUAD $0x3931383137313631; QUAD $0x3332323231323032 // .ascii 16, '1617181920212223' + QUAD $0x3732363235323432; QUAD $0x3133303339323832 // .ascii 16, '2425262728293031' + QUAD $0x3533343333333233; QUAD $0x3933383337333633 // .ascii 16, '3233343536373839' + QUAD $0x3334323431343034; QUAD $0x3734363435343434 // .ascii 16, '4041424344454647' + QUAD $0x3135303539343834; QUAD $0x3535343533353235 // .ascii 16, '4849505152535455' + QUAD $0x3935383537353635; QUAD $0x3336323631363036 // .ascii 16, '5657585960616263' + QUAD $0x3736363635363436; QUAD $0x3137303739363836 // .ascii 16, '6465666768697071' + QUAD $0x3537343733373237; QUAD $0x3937383737373637 // .ascii 16, '7273747576777879' + QUAD $0x3338323831383038; QUAD $0x3738363835383438 // .ascii 16, '8081828384858687' + QUAD $0x3139303939383838; QUAD $0x3539343933393239 // .ascii 16, '8889909192939495' + QUAD $0x3939383937393639 // .ascii 8, '96979899' + +_VecShiftShuffles: + QUAD $0x0706050403020100; QUAD $0x0f0e0d0c0b0a0908 // .ascii 16, '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f' + QUAD $0x0807060504030201; QUAD $0xff0f0e0d0c0b0a09 // .ascii 16, '\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\xff' + QUAD $0x0908070605040302; QUAD $0xffff0f0e0d0c0b0a // .ascii 16, '\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\xff\xff' + QUAD $0x0a09080706050403; QUAD $0xffffff0f0e0d0c0b // .ascii 16, '\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\xff\xff\xff' + QUAD $0x0b0a090807060504; QUAD $0xffffffff0f0e0d0c // .ascii 16, '\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\xff\xff\xff\xff' + QUAD $0x0c0b0a0908070605; QUAD $0xffffffffff0f0e0d // .ascii 16, '\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\xff\xff\xff\xff\xff' + QUAD $0x0d0c0b0a09080706; QUAD $0xffffffffffff0f0e // .ascii 16, '\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\xff\xff\xff\xff\xff\xff' + QUAD $0x0e0d0c0b0a090807; QUAD $0xffffffffffffff0f // .ascii 16, '\x07\x08\t\n\x0b\x0c\r\x0e\x0f\xff\xff\xff\xff\xff\xff\xff' + QUAD $0x0f0e0d0c0b0a0908; QUAD $0xffffffffffffffff // .ascii 16, '\x08\t\n\x0b\x0c\r\x0e\x0f\xff\xff\xff\xff\xff\xff\xff\xff' + +__UnquoteTab: + QUAD $0x0000000000000000; QUAD $0x0000000000000000 // .ascii 16, '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + QUAD $0x0000000000000000; QUAD $0x0000000000000000 // .ascii 16, '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + QUAD $0x0000000000220000; QUAD $0x2f00000000000000 // .ascii 16, '\x00\x00"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00/' + QUAD $0x0000000000000000; QUAD $0x0000000000000000 // .ascii 16, '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + QUAD $0x0000000000000000; QUAD $0x0000000000000000 // .ascii 16, '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + QUAD $0x0000000000000000; QUAD $0x0000005c00000000 // .ascii 16, '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\\\x00\x00\x00' + QUAD $0x000c000000080000; QUAD $0x000a000000000000 // .ascii 16, '\x00\x00\x08\x00\x00\x00\x0c\x00\x00\x00\x00\x00\x00\x00\n\x00' + LONG $0x000d0000; WORD $0xff09 // .ascii 6, '\x00\x00\r\x00\t\xff' + QUAD $0x0000000000000000; QUAD $0x0000000000000000 // .space 16, '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + QUAD $0x0000000000000000; QUAD $0x0000000000000000 // .space 16, '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + QUAD $0x0000000000000000; QUAD $0x0000000000000000 // .space 16, '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + QUAD $0x0000000000000000; QUAD $0x0000000000000000 // .space 16, '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + QUAD $0x0000000000000000; QUAD $0x0000000000000000 // .space 16, '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + QUAD $0x0000000000000000; QUAD $0x0000000000000000 // .space 16, '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + QUAD $0x0000000000000000; QUAD $0x0000000000000000 // .space 16, '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + QUAD $0x0000000000000000; QUAD $0x0000000000000000 // .space 16, '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + QUAD $0x0000000000000000; WORD $0x0000 // .space 10, '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + +_P10_TAB: + QUAD $0x0000000000000002 // .quad 0x0000000000000002 + QUAD $0x0000000000000014 // .quad 0x0000000000000014 + QUAD $0x00000000000000ca // .quad 0x00000000000000ca + QUAD $0x00000000000007e8 // .quad 0x00000000000007e8 + QUAD $0x0000000000004f10 // .quad 0x0000000000004f10 + QUAD $0x00000000000316a2 // .quad 0x00000000000316a2 + QUAD $0x00000000001ee257 // .quad 0x00000000001ee257 + QUAD $0x000000000134d761 // .quad 0x000000000134d761 + QUAD $0x000000000c1069cd // .quad 0x000000000c1069cd + QUAD $0x0000000078a42205 // .quad 0x0000000078a42205 + QUAD $0x00000004b6695433 // .quad 0x00000004b6695433 + QUAD $0x0000002f201d49fb // .quad 0x0000002f201d49fb + QUAD $0x000001d74124e3d1 // .quad 0x000001d74124e3d1 + QUAD $0x000012688b70e62b // .quad 0x000012688b70e62b + QUAD $0x0000b8157268fdaf // .quad 0x0000b8157268fdaf + QUAD $0x000730d67819e8d2 // .quad 0x000730d67819e8d2 + QUAD $0x0031fa182c40c60d // .quad 0x0031fa182c40c60d + QUAD $0x0066789e3750f791 // .quad 0x0066789e3750f791 + QUAD $0x009c16c5c5253575 // .quad 0x009c16c5c5253575 + QUAD $0x00d18e3b9b374169 // .quad 0x00d18e3b9b374169 + QUAD $0x0105f1ca820511c3 // .quad 0x0105f1ca820511c3 + QUAD $0x013b6e3d22865634 // .quad 0x013b6e3d22865634 + QUAD $0x017124e63593f5e1 // .quad 0x017124e63593f5e1 + QUAD $0x01a56e1fc2f8f359 // .quad 0x01a56e1fc2f8f359 + QUAD $0x01dac9a7b3b7302f // .quad 0x01dac9a7b3b7302f + QUAD $0x0210be08d0527e1d // .quad 0x0210be08d0527e1d + QUAD $0x0244ed8b04671da5 // .quad 0x0244ed8b04671da5 + QUAD $0x027a28edc580e50e // .quad 0x027a28edc580e50e + QUAD $0x02b059949b708f29 // .quad 0x02b059949b708f29 + QUAD $0x02e46ff9c24cb2f3 // .quad 0x02e46ff9c24cb2f3 + QUAD $0x03198bf832dfdfb0 // .quad 0x03198bf832dfdfb0 + QUAD $0x034feef63f97d79c // .quad 0x034feef63f97d79c + QUAD $0x0383f559e7bee6c1 // .quad 0x0383f559e7bee6c1 + QUAD $0x03b8f2b061aea072 // .quad 0x03b8f2b061aea072 + QUAD $0x03ef2f5c7a1a488e // .quad 0x03ef2f5c7a1a488e + QUAD $0x04237d99cc506d59 // .quad 0x04237d99cc506d59 + QUAD $0x04585d003f6488af // .quad 0x04585d003f6488af + QUAD $0x048e74404f3daadb // .quad 0x048e74404f3daadb + QUAD $0x04c308a831868ac9 // .quad 0x04c308a831868ac9 + QUAD $0x04f7cad23de82d7b // .quad 0x04f7cad23de82d7b + QUAD $0x052dbd86cd6238d9 // .quad 0x052dbd86cd6238d9 + QUAD $0x05629674405d6388 // .quad 0x05629674405d6388 + QUAD $0x05973c115074bc6a // .quad 0x05973c115074bc6a + QUAD $0x05cd0b15a491eb84 // .quad 0x05cd0b15a491eb84 + QUAD $0x060226ed86db3333 // .quad 0x060226ed86db3333 + QUAD $0x0636b0a8e891ffff // .quad 0x0636b0a8e891ffff + QUAD $0x066c5cd322b67fff // .quad 0x066c5cd322b67fff + QUAD $0x06a1ba03f5b21000 // .quad 0x06a1ba03f5b21000 + QUAD $0x06d62884f31e93ff // .quad 0x06d62884f31e93ff + QUAD $0x070bb2a62fe638ff // .quad 0x070bb2a62fe638ff + QUAD $0x07414fa7ddefe3a0 // .quad 0x07414fa7ddefe3a0 + QUAD $0x0775a391d56bdc87 // .quad 0x0775a391d56bdc87 + QUAD $0x07ab0c764ac6d3a9 // .quad 0x07ab0c764ac6d3a9 + QUAD $0x07e0e7c9eebc444a // .quad 0x07e0e7c9eebc444a + QUAD $0x081521bc6a6b555c // .quad 0x081521bc6a6b555c + QUAD $0x084a6a2b85062ab3 // .quad 0x084a6a2b85062ab3 + QUAD $0x0880825b3323dab0 // .quad 0x0880825b3323dab0 + QUAD $0x08b4a2f1ffecd15c // .quad 0x08b4a2f1ffecd15c + QUAD $0x08e9cbae7fe805b3 // .quad 0x08e9cbae7fe805b3 + QUAD $0x09201f4d0ff10390 // .quad 0x09201f4d0ff10390 + QUAD $0x0954272053ed4474 // .quad 0x0954272053ed4474 + QUAD $0x098930e868e89591 // .quad 0x098930e868e89591 + QUAD $0x09bf7d228322baf5 // .quad 0x09bf7d228322baf5 + QUAD $0x09f3ae3591f5b4d9 // .quad 0x09f3ae3591f5b4d9 + QUAD $0x0a2899c2f6732210 // .quad 0x0a2899c2f6732210 + QUAD $0x0a5ec033b40fea93 // .quad 0x0a5ec033b40fea93 + QUAD $0x0a9338205089f29c // .quad 0x0a9338205089f29c + QUAD $0x0ac8062864ac6f43 // .quad 0x0ac8062864ac6f43 + QUAD $0x0afe07b27dd78b14 // .quad 0x0afe07b27dd78b14 + QUAD $0x0b32c4cf8ea6b6ec // .quad 0x0b32c4cf8ea6b6ec + QUAD $0x0b677603725064a8 // .quad 0x0b677603725064a8 + QUAD $0x0b9d53844ee47dd1 // .quad 0x0b9d53844ee47dd1 + QUAD $0x0bd25432b14ecea3 // .quad 0x0bd25432b14ecea3 + QUAD $0x0c06e93f5da2824c // .quad 0x0c06e93f5da2824c + QUAD $0x0c3ca38f350b22df // .quad 0x0c3ca38f350b22df + QUAD $0x0c71e6398126f5cb // .quad 0x0c71e6398126f5cb + QUAD $0x0ca65fc7e170b33e // .quad 0x0ca65fc7e170b33e + QUAD $0x0cdbf7b9d9cce00d // .quad 0x0cdbf7b9d9cce00d + QUAD $0x0d117ad428200c08 // .quad 0x0d117ad428200c08 + QUAD $0x0d45d98932280f0a // .quad 0x0d45d98932280f0a + QUAD $0x0d7b4feb7eb212cd // .quad 0x0d7b4feb7eb212cd + QUAD $0x0db111f32f2f4bc0 // .quad 0x0db111f32f2f4bc0 + QUAD $0x0de5566ffafb1eb0 // .quad 0x0de5566ffafb1eb0 + QUAD $0x0e1aac0bf9b9e65c // .quad 0x0e1aac0bf9b9e65c + QUAD $0x0e50ab877c142ffa // .quad 0x0e50ab877c142ffa + QUAD $0x0e84d6695b193bf8 // .quad 0x0e84d6695b193bf8 + QUAD $0x0eba0c03b1df8af6 // .quad 0x0eba0c03b1df8af6 + QUAD $0x0ef047824f2bb6da // .quad 0x0ef047824f2bb6da + QUAD $0x0f245962e2f6a490 // .quad 0x0f245962e2f6a490 + QUAD $0x0f596fbb9bb44db4 // .quad 0x0f596fbb9bb44db4 + QUAD $0x0f8fcbaa82a16121 // .quad 0x0f8fcbaa82a16121 + QUAD $0x0fc3df4a91a4dcb5 // .quad 0x0fc3df4a91a4dcb5 + QUAD $0x0ff8d71d360e13e2 // .quad 0x0ff8d71d360e13e2 + QUAD $0x102f0ce4839198db // .quad 0x102f0ce4839198db + QUAD $0x1063680ed23aff89 // .quad 0x1063680ed23aff89 + QUAD $0x1098421286c9bf6b // .quad 0x1098421286c9bf6b + QUAD $0x10ce5297287c2f45 // .quad 0x10ce5297287c2f45 + QUAD $0x1102f39e794d9d8b // .quad 0x1102f39e794d9d8b + QUAD $0x1137b08617a104ee // .quad 0x1137b08617a104ee + QUAD $0x116d9ca79d89462a // .quad 0x116d9ca79d89462a + QUAD $0x11a281e8c275cbda // .quad 0x11a281e8c275cbda + QUAD $0x11d72262f3133ed1 // .quad 0x11d72262f3133ed1 + QUAD $0x120ceafbafd80e85 // .quad 0x120ceafbafd80e85 + QUAD $0x124212dd4de70913 // .quad 0x124212dd4de70913 + QUAD $0x12769794a160cb58 // .quad 0x12769794a160cb58 + QUAD $0x12ac3d79c9b8fe2e // .quad 0x12ac3d79c9b8fe2e + QUAD $0x12e1a66c1e139edd // .quad 0x12e1a66c1e139edd + QUAD $0x1316100725988694 // .quad 0x1316100725988694 + QUAD $0x134b9408eefea839 // .quad 0x134b9408eefea839 + QUAD $0x13813c85955f2923 // .quad 0x13813c85955f2923 + QUAD $0x13b58ba6fab6f36c // .quad 0x13b58ba6fab6f36c + QUAD $0x13eaee90b964b047 // .quad 0x13eaee90b964b047 + QUAD $0x1420d51a73deee2d // .quad 0x1420d51a73deee2d + QUAD $0x14550a6110d6a9b8 // .quad 0x14550a6110d6a9b8 + QUAD $0x148a4cf9550c5426 // .quad 0x148a4cf9550c5426 + QUAD $0x14c0701bd527b498 // .quad 0x14c0701bd527b498 + QUAD $0x14f48c22ca71a1bd // .quad 0x14f48c22ca71a1bd + QUAD $0x1529af2b7d0e0a2d // .quad 0x1529af2b7d0e0a2d + QUAD $0x15600d7b2e28c65c // .quad 0x15600d7b2e28c65c + QUAD $0x159410d9f9b2f7f3 // .quad 0x159410d9f9b2f7f3 + QUAD $0x15c91510781fb5f0 // .quad 0x15c91510781fb5f0 + QUAD $0x15ff5a549627a36c // .quad 0x15ff5a549627a36c + QUAD $0x16339874ddd8c623 // .quad 0x16339874ddd8c623 + QUAD $0x16687e92154ef7ac // .quad 0x16687e92154ef7ac + QUAD $0x169e9e369aa2b597 // .quad 0x169e9e369aa2b597 + QUAD $0x16d322e220a5b17e // .quad 0x16d322e220a5b17e + QUAD $0x1707eb9aa8cf1dde // .quad 0x1707eb9aa8cf1dde + QUAD $0x173de6815302e556 // .quad 0x173de6815302e556 + QUAD $0x1772b010d3e1cf56 // .quad 0x1772b010d3e1cf56 + QUAD $0x17a75c1508da432b // .quad 0x17a75c1508da432b + QUAD $0x17dd331a4b10d3f6 // .quad 0x17dd331a4b10d3f6 + QUAD $0x18123ff06eea847a // .quad 0x18123ff06eea847a + QUAD $0x1846cfec8aa52598 // .quad 0x1846cfec8aa52598 + QUAD $0x187c83e7ad4e6efe // .quad 0x187c83e7ad4e6efe + QUAD $0x18b1d270cc51055f // .quad 0x18b1d270cc51055f + QUAD $0x18e6470cff6546b6 // .quad 0x18e6470cff6546b6 + QUAD $0x191bd8d03f3e9864 // .quad 0x191bd8d03f3e9864 + QUAD $0x1951678227871f3e // .quad 0x1951678227871f3e + QUAD $0x1985c162b168e70e // .quad 0x1985c162b168e70e + QUAD $0x19bb31bb5dc320d2 // .quad 0x19bb31bb5dc320d2 + QUAD $0x19f0ff151a99f483 // .quad 0x19f0ff151a99f483 + QUAD $0x1a253eda614071a4 // .quad 0x1a253eda614071a4 + QUAD $0x1a5a8e90f9908e0d // .quad 0x1a5a8e90f9908e0d + QUAD $0x1a90991a9bfa58c8 // .quad 0x1a90991a9bfa58c8 + QUAD $0x1ac4bf6142f8eefa // .quad 0x1ac4bf6142f8eefa + QUAD $0x1af9ef3993b72ab8 // .quad 0x1af9ef3993b72ab8 + QUAD $0x1b303583fc527ab3 // .quad 0x1b303583fc527ab3 + QUAD $0x1b6442e4fb671960 // .quad 0x1b6442e4fb671960 + QUAD $0x1b99539e3a40dfb8 // .quad 0x1b99539e3a40dfb8 + QUAD $0x1bcfa885c8d117a6 // .quad 0x1bcfa885c8d117a6 + QUAD $0x1c03c9539d82aec8 // .quad 0x1c03c9539d82aec8 + QUAD $0x1c38bba884e35a7a // .quad 0x1c38bba884e35a7a + QUAD $0x1c6eea92a61c3118 // .quad 0x1c6eea92a61c3118 + QUAD $0x1ca3529ba7d19eaf // .quad 0x1ca3529ba7d19eaf + QUAD $0x1cd8274291c6065b // .quad 0x1cd8274291c6065b + QUAD $0x1d0e3113363787f2 // .quad 0x1d0e3113363787f2 + QUAD $0x1d42deac01e2b4f7 // .quad 0x1d42deac01e2b4f7 + QUAD $0x1d779657025b6235 // .quad 0x1d779657025b6235 + QUAD $0x1dad7becc2f23ac2 // .quad 0x1dad7becc2f23ac2 + QUAD $0x1de26d73f9d764b9 // .quad 0x1de26d73f9d764b9 + QUAD $0x1e1708d0f84d3de7 // .quad 0x1e1708d0f84d3de7 + QUAD $0x1e4ccb0536608d61 // .quad 0x1e4ccb0536608d61 + QUAD $0x1e81fee341fc585d // .quad 0x1e81fee341fc585d + QUAD $0x1eb67e9c127b6e74 // .quad 0x1eb67e9c127b6e74 + QUAD $0x1eec1e43171a4a11 // .quad 0x1eec1e43171a4a11 + QUAD $0x1f2192e9ee706e4b // .quad 0x1f2192e9ee706e4b + QUAD $0x1f55f7a46a0c89dd // .quad 0x1f55f7a46a0c89dd + QUAD $0x1f8b758d848fac55 // .quad 0x1f8b758d848fac55 + QUAD $0x1fc1297872d9cbb5 // .quad 0x1fc1297872d9cbb5 + QUAD $0x1ff573d68f903ea2 // .quad 0x1ff573d68f903ea2 + QUAD $0x202ad0cc33744e4b // .quad 0x202ad0cc33744e4b + QUAD $0x2060c27fa028b0ef // .quad 0x2060c27fa028b0ef + QUAD $0x2094f31f8832dd2a // .quad 0x2094f31f8832dd2a + QUAD $0x20ca2fe76a3f9475 // .quad 0x20ca2fe76a3f9475 + QUAD $0x21005df0a267bcc9 // .quad 0x21005df0a267bcc9 + QUAD $0x2134756ccb01abfb // .quad 0x2134756ccb01abfb + QUAD $0x216992c7fdc216fa // .quad 0x216992c7fdc216fa + QUAD $0x219ff779fd329cb9 // .quad 0x219ff779fd329cb9 + QUAD $0x21d3faac3e3fa1f3 // .quad 0x21d3faac3e3fa1f3 + QUAD $0x2208f9574dcf8a70 // .quad 0x2208f9574dcf8a70 + QUAD $0x223f37ad21436d0c // .quad 0x223f37ad21436d0c + QUAD $0x227382cc34ca2428 // .quad 0x227382cc34ca2428 + QUAD $0x22a8637f41fcad32 // .quad 0x22a8637f41fcad32 + QUAD $0x22de7c5f127bd87e // .quad 0x22de7c5f127bd87e + QUAD $0x23130dbb6b8d674f // .quad 0x23130dbb6b8d674f + QUAD $0x2347d12a4670c123 // .quad 0x2347d12a4670c123 + QUAD $0x237dc574d80cf16b // .quad 0x237dc574d80cf16b + QUAD $0x23b29b69070816e3 // .quad 0x23b29b69070816e3 + QUAD $0x23e7424348ca1c9c // .quad 0x23e7424348ca1c9c + QUAD $0x241d12d41afca3c3 // .quad 0x241d12d41afca3c3 + QUAD $0x24522bc490dde65a // .quad 0x24522bc490dde65a + QUAD $0x2486b6b5b5155ff0 // .quad 0x2486b6b5b5155ff0 + QUAD $0x24bc6463225ab7ec // .quad 0x24bc6463225ab7ec + QUAD $0x24f1bebdf578b2f4 // .quad 0x24f1bebdf578b2f4 + QUAD $0x25262e6d72d6dfb0 // .quad 0x25262e6d72d6dfb0 + QUAD $0x255bba08cf8c979d // .quad 0x255bba08cf8c979d + QUAD $0x2591544581b7dec2 // .quad 0x2591544581b7dec2 + QUAD $0x25c5a956e225d672 // .quad 0x25c5a956e225d672 + QUAD $0x25fb13ac9aaf4c0f // .quad 0x25fb13ac9aaf4c0f + QUAD $0x2630ec4be0ad8f89 // .quad 0x2630ec4be0ad8f89 + QUAD $0x2665275ed8d8f36c // .quad 0x2665275ed8d8f36c + QUAD $0x269a71368f0f3047 // .quad 0x269a71368f0f3047 + QUAD $0x26d086c219697e2c // .quad 0x26d086c219697e2c + QUAD $0x2704a8729fc3ddb7 // .quad 0x2704a8729fc3ddb7 + QUAD $0x2739d28f47b4d525 // .quad 0x2739d28f47b4d525 + QUAD $0x277023998cd10537 // .quad 0x277023998cd10537 + QUAD $0x27a42c7ff0054685 // .quad 0x27a42c7ff0054685 + QUAD $0x27d9379fec069826 // .quad 0x27d9379fec069826 + QUAD $0x280f8587e7083e30 // .quad 0x280f8587e7083e30 + QUAD $0x2843b374f06526de // .quad 0x2843b374f06526de + QUAD $0x2878a0522c7e7095 // .quad 0x2878a0522c7e7095 + QUAD $0x28aec866b79e0cba // .quad 0x28aec866b79e0cba + QUAD $0x28e33d4032c2c7f5 // .quad 0x28e33d4032c2c7f5 + QUAD $0x29180c903f7379f2 // .quad 0x29180c903f7379f2 + QUAD $0x294e0fb44f50586e // .quad 0x294e0fb44f50586e + QUAD $0x2982c9d0b1923745 // .quad 0x2982c9d0b1923745 + QUAD $0x29b77c44ddf6c516 // .quad 0x29b77c44ddf6c516 + QUAD $0x29ed5b561574765b // .quad 0x29ed5b561574765b + QUAD $0x2a225915cd68c9f9 // .quad 0x2a225915cd68c9f9 + QUAD $0x2a56ef5b40c2fc77 // .quad 0x2a56ef5b40c2fc77 + QUAD $0x2a8cab3210f3bb95 // .quad 0x2a8cab3210f3bb95 + QUAD $0x2ac1eaff4a98553d // .quad 0x2ac1eaff4a98553d + QUAD $0x2af665bf1d3e6a8d // .quad 0x2af665bf1d3e6a8d + QUAD $0x2b2bff2ee48e0530 // .quad 0x2b2bff2ee48e0530 + QUAD $0x2b617f7d4ed8c33e // .quad 0x2b617f7d4ed8c33e + QUAD $0x2b95df5ca28ef40d // .quad 0x2b95df5ca28ef40d + QUAD $0x2bcb5733cb32b111 // .quad 0x2bcb5733cb32b111 + QUAD $0x2c0116805effaeaa // .quad 0x2c0116805effaeaa + QUAD $0x2c355c2076bf9a55 // .quad 0x2c355c2076bf9a55 + QUAD $0x2c6ab328946f80ea // .quad 0x2c6ab328946f80ea + QUAD $0x2ca0aff95cc5b092 // .quad 0x2ca0aff95cc5b092 + QUAD $0x2cd4dbf7b3f71cb7 // .quad 0x2cd4dbf7b3f71cb7 + QUAD $0x2d0a12f5a0f4e3e5 // .quad 0x2d0a12f5a0f4e3e5 + QUAD $0x2d404bd984990e6f // .quad 0x2d404bd984990e6f + QUAD $0x2d745ecfe5bf520b // .quad 0x2d745ecfe5bf520b + QUAD $0x2da97683df2f268d // .quad 0x2da97683df2f268d + QUAD $0x2ddfd424d6faf031 // .quad 0x2ddfd424d6faf031 + QUAD $0x2e13e497065cd61f // .quad 0x2e13e497065cd61f + QUAD $0x2e48ddbcc7f40ba6 // .quad 0x2e48ddbcc7f40ba6 + QUAD $0x2e7f152bf9f10e90 // .quad 0x2e7f152bf9f10e90 + QUAD $0x2eb36d3b7c36a91a // .quad 0x2eb36d3b7c36a91a + QUAD $0x2ee8488a5b445360 // .quad 0x2ee8488a5b445360 + QUAD $0x2f1e5aacf2156838 // .quad 0x2f1e5aacf2156838 + QUAD $0x2f52f8ac174d6123 // .quad 0x2f52f8ac174d6123 + QUAD $0x2f87b6d71d20b96c // .quad 0x2f87b6d71d20b96c + QUAD $0x2fbda48ce468e7c7 // .quad 0x2fbda48ce468e7c7 + QUAD $0x2ff286d80ec190dc // .quad 0x2ff286d80ec190dc + QUAD $0x3027288e1271f513 // .quad 0x3027288e1271f513 + QUAD $0x305cf2b1970e7258 // .quad 0x305cf2b1970e7258 + QUAD $0x309217aefe690777 // .quad 0x309217aefe690777 + QUAD $0x30c69d9abe034955 // .quad 0x30c69d9abe034955 + QUAD $0x30fc45016d841baa // .quad 0x30fc45016d841baa + QUAD $0x3131ab20e472914a // .quad 0x3131ab20e472914a + QUAD $0x316615e91d8f359d // .quad 0x316615e91d8f359d + QUAD $0x319b9b6364f30304 // .quad 0x319b9b6364f30304 + QUAD $0x31d1411e1f17e1e3 // .quad 0x31d1411e1f17e1e3 + QUAD $0x32059165a6ddda5b // .quad 0x32059165a6ddda5b + QUAD $0x323af5bf109550f2 // .quad 0x323af5bf109550f2 + QUAD $0x3270d9976a5d5297 // .quad 0x3270d9976a5d5297 + QUAD $0x32a50ffd44f4a73d // .quad 0x32a50ffd44f4a73d + QUAD $0x32da53fc9631d10d // .quad 0x32da53fc9631d10d + QUAD $0x3310747ddddf22a8 // .quad 0x3310747ddddf22a8 + QUAD $0x3344919d5556eb52 // .quad 0x3344919d5556eb52 + QUAD $0x3379b604aaaca626 // .quad 0x3379b604aaaca626 + QUAD $0x33b011c2eaabe7d8 // .quad 0x33b011c2eaabe7d8 + QUAD $0x33e41633a556e1ce // .quad 0x33e41633a556e1ce + QUAD $0x34191bc08eac9a41 // .quad 0x34191bc08eac9a41 + QUAD $0x344f62b0b257c0d2 // .quad 0x344f62b0b257c0d2 + QUAD $0x34839dae6f76d883 // .quad 0x34839dae6f76d883 + QUAD $0x34b8851a0b548ea4 // .quad 0x34b8851a0b548ea4 + QUAD $0x34eea6608e29b24d // .quad 0x34eea6608e29b24d + QUAD $0x352327fc58da0f70 // .quad 0x352327fc58da0f70 + QUAD $0x3557f1fb6f10934c // .quad 0x3557f1fb6f10934c + QUAD $0x358dee7a4ad4b81f // .quad 0x358dee7a4ad4b81f + QUAD $0x35c2b50c6ec4f313 // .quad 0x35c2b50c6ec4f313 + QUAD $0x35f7624f8a762fd8 // .quad 0x35f7624f8a762fd8 + QUAD $0x362d3ae36d13bbce // .quad 0x362d3ae36d13bbce + QUAD $0x366244ce242c5561 // .quad 0x366244ce242c5561 + QUAD $0x3696d601ad376ab9 // .quad 0x3696d601ad376ab9 + QUAD $0x36cc8b8218854567 // .quad 0x36cc8b8218854567 + QUAD $0x3701d7314f534b61 // .quad 0x3701d7314f534b61 + QUAD $0x37364cfda3281e39 // .quad 0x37364cfda3281e39 + QUAD $0x376be03d0bf225c7 // .quad 0x376be03d0bf225c7 + QUAD $0x37a16c262777579c // .quad 0x37a16c262777579c + QUAD $0x37d5c72fb1552d83 // .quad 0x37d5c72fb1552d83 + QUAD $0x380b38fb9daa78e4 // .quad 0x380b38fb9daa78e4 + QUAD $0x3841039d428a8b8f // .quad 0x3841039d428a8b8f + QUAD $0x38754484932d2e72 // .quad 0x38754484932d2e72 + QUAD $0x38aa95a5b7f87a0f // .quad 0x38aa95a5b7f87a0f + QUAD $0x38e09d8792fb4c49 // .quad 0x38e09d8792fb4c49 + QUAD $0x3914c4e977ba1f5c // .quad 0x3914c4e977ba1f5c + QUAD $0x3949f623d5a8a733 // .quad 0x3949f623d5a8a733 + QUAD $0x398039d665896880 // .quad 0x398039d665896880 + QUAD $0x39b4484bfeebc2a0 // .quad 0x39b4484bfeebc2a0 + QUAD $0x39e95a5efea6b347 // .quad 0x39e95a5efea6b347 + QUAD $0x3a1fb0f6be506019 // .quad 0x3a1fb0f6be506019 + QUAD $0x3a53ce9a36f23c10 // .quad 0x3a53ce9a36f23c10 + QUAD $0x3a88c240c4aecb14 // .quad 0x3a88c240c4aecb14 + QUAD $0x3abef2d0f5da7dd9 // .quad 0x3abef2d0f5da7dd9 + QUAD $0x3af357c299a88ea7 // .quad 0x3af357c299a88ea7 + QUAD $0x3b282db34012b251 // .quad 0x3b282db34012b251 + QUAD $0x3b5e392010175ee6 // .quad 0x3b5e392010175ee6 + QUAD $0x3b92e3b40a0e9b4f // .quad 0x3b92e3b40a0e9b4f + QUAD $0x3bc79ca10c924223 // .quad 0x3bc79ca10c924223 + QUAD $0x3bfd83c94fb6d2ac // .quad 0x3bfd83c94fb6d2ac + QUAD $0x3c32725dd1d243ac // .quad 0x3c32725dd1d243ac + QUAD $0x3c670ef54646d497 // .quad 0x3c670ef54646d497 + QUAD $0x3c9cd2b297d889bc // .quad 0x3c9cd2b297d889bc + QUAD $0x3cd203af9ee75616 // .quad 0x3cd203af9ee75616 + QUAD $0x3d06849b86a12b9b // .quad 0x3d06849b86a12b9b + QUAD $0x3d3c25c268497682 // .quad 0x3d3c25c268497682 + QUAD $0x3d719799812dea11 // .quad 0x3d719799812dea11 + QUAD $0x3da5fd7fe1796495 // .quad 0x3da5fd7fe1796495 + QUAD $0x3ddb7cdfd9d7bdbb // .quad 0x3ddb7cdfd9d7bdbb + QUAD $0x3e112e0be826d695 // .quad 0x3e112e0be826d695 + QUAD $0x3e45798ee2308c3a // .quad 0x3e45798ee2308c3a + QUAD $0x3e7ad7f29abcaf48 // .quad 0x3e7ad7f29abcaf48 + QUAD $0x3eb0c6f7a0b5ed8d // .quad 0x3eb0c6f7a0b5ed8d + QUAD $0x3ee4f8b588e368f1 // .quad 0x3ee4f8b588e368f1 + QUAD $0x3f1a36e2eb1c432d // .quad 0x3f1a36e2eb1c432d + QUAD $0x3f50624dd2f1a9fc // .quad 0x3f50624dd2f1a9fc + QUAD $0x3f847ae147ae147b // .quad 0x3f847ae147ae147b + QUAD $0x3fb999999999999a // .quad 0x3fb999999999999a + QUAD $0x3ff0000000000000 // .quad 0x3ff0000000000000 + QUAD $0x4024000000000000 // .quad 0x4024000000000000 + QUAD $0x4059000000000000 // .quad 0x4059000000000000 + QUAD $0x408f400000000000 // .quad 0x408f400000000000 + QUAD $0x40c3880000000000 // .quad 0x40c3880000000000 + QUAD $0x40f86a0000000000 // .quad 0x40f86a0000000000 + QUAD $0x412e848000000000 // .quad 0x412e848000000000 + QUAD $0x416312d000000000 // .quad 0x416312d000000000 + QUAD $0x4197d78400000000 // .quad 0x4197d78400000000 + QUAD $0x41cdcd6500000000 // .quad 0x41cdcd6500000000 + QUAD $0x4202a05f20000000 // .quad 0x4202a05f20000000 + QUAD $0x42374876e8000000 // .quad 0x42374876e8000000 + QUAD $0x426d1a94a2000000 // .quad 0x426d1a94a2000000 + QUAD $0x42a2309ce5400000 // .quad 0x42a2309ce5400000 + QUAD $0x42d6bcc41e900000 // .quad 0x42d6bcc41e900000 + QUAD $0x430c6bf526340000 // .quad 0x430c6bf526340000 + QUAD $0x4341c37937e08000 // .quad 0x4341c37937e08000 + QUAD $0x4376345785d8a000 // .quad 0x4376345785d8a000 + QUAD $0x43abc16d674ec800 // .quad 0x43abc16d674ec800 + QUAD $0x43e158e460913d00 // .quad 0x43e158e460913d00 + QUAD $0x4415af1d78b58c40 // .quad 0x4415af1d78b58c40 + QUAD $0x444b1ae4d6e2ef50 // .quad 0x444b1ae4d6e2ef50 + QUAD $0x4480f0cf064dd592 // .quad 0x4480f0cf064dd592 + QUAD $0x44b52d02c7e14af6 // .quad 0x44b52d02c7e14af6 + QUAD $0x44ea784379d99db4 // .quad 0x44ea784379d99db4 + QUAD $0x45208b2a2c280291 // .quad 0x45208b2a2c280291 + QUAD $0x4554adf4b7320335 // .quad 0x4554adf4b7320335 + QUAD $0x4589d971e4fe8402 // .quad 0x4589d971e4fe8402 + QUAD $0x45c027e72f1f1281 // .quad 0x45c027e72f1f1281 + QUAD $0x45f431e0fae6d721 // .quad 0x45f431e0fae6d721 + QUAD $0x46293e5939a08cea // .quad 0x46293e5939a08cea + QUAD $0x465f8def8808b024 // .quad 0x465f8def8808b024 + QUAD $0x4693b8b5b5056e17 // .quad 0x4693b8b5b5056e17 + QUAD $0x46c8a6e32246c99c // .quad 0x46c8a6e32246c99c + QUAD $0x46fed09bead87c03 // .quad 0x46fed09bead87c03 + QUAD $0x4733426172c74d82 // .quad 0x4733426172c74d82 + QUAD $0x476812f9cf7920e3 // .quad 0x476812f9cf7920e3 + QUAD $0x479e17b84357691b // .quad 0x479e17b84357691b + QUAD $0x47d2ced32a16a1b1 // .quad 0x47d2ced32a16a1b1 + QUAD $0x48078287f49c4a1d // .quad 0x48078287f49c4a1d + QUAD $0x483d6329f1c35ca5 // .quad 0x483d6329f1c35ca5 + QUAD $0x48725dfa371a19e7 // .quad 0x48725dfa371a19e7 + QUAD $0x48a6f578c4e0a061 // .quad 0x48a6f578c4e0a061 + QUAD $0x48dcb2d6f618c879 // .quad 0x48dcb2d6f618c879 + QUAD $0x4911efc659cf7d4c // .quad 0x4911efc659cf7d4c + QUAD $0x49466bb7f0435c9e // .quad 0x49466bb7f0435c9e + QUAD $0x497c06a5ec5433c6 // .quad 0x497c06a5ec5433c6 + QUAD $0x49b18427b3b4a05c // .quad 0x49b18427b3b4a05c + QUAD $0x49e5e531a0a1c873 // .quad 0x49e5e531a0a1c873 + QUAD $0x4a1b5e7e08ca3a8f // .quad 0x4a1b5e7e08ca3a8f + QUAD $0x4a511b0ec57e649a // .quad 0x4a511b0ec57e649a + QUAD $0x4a8561d276ddfdc0 // .quad 0x4a8561d276ddfdc0 + QUAD $0x4ababa4714957d30 // .quad 0x4ababa4714957d30 + QUAD $0x4af0b46c6cdd6e3e // .quad 0x4af0b46c6cdd6e3e + QUAD $0x4b24e1878814c9ce // .quad 0x4b24e1878814c9ce + QUAD $0x4b5a19e96a19fc41 // .quad 0x4b5a19e96a19fc41 + QUAD $0x4b905031e2503da9 // .quad 0x4b905031e2503da9 + QUAD $0x4bc4643e5ae44d13 // .quad 0x4bc4643e5ae44d13 + QUAD $0x4bf97d4df19d6057 // .quad 0x4bf97d4df19d6057 + QUAD $0x4c2fdca16e04b86d // .quad 0x4c2fdca16e04b86d + QUAD $0x4c63e9e4e4c2f344 // .quad 0x4c63e9e4e4c2f344 + QUAD $0x4c98e45e1df3b015 // .quad 0x4c98e45e1df3b015 + QUAD $0x4ccf1d75a5709c1b // .quad 0x4ccf1d75a5709c1b + QUAD $0x4d03726987666191 // .quad 0x4d03726987666191 + QUAD $0x4d384f03e93ff9f5 // .quad 0x4d384f03e93ff9f5 + QUAD $0x4d6e62c4e38ff872 // .quad 0x4d6e62c4e38ff872 + QUAD $0x4da2fdbb0e39fb47 // .quad 0x4da2fdbb0e39fb47 + QUAD $0x4dd7bd29d1c87a19 // .quad 0x4dd7bd29d1c87a19 + QUAD $0x4e0dac74463a989f // .quad 0x4e0dac74463a989f + QUAD $0x4e428bc8abe49f64 // .quad 0x4e428bc8abe49f64 + QUAD $0x4e772ebad6ddc73d // .quad 0x4e772ebad6ddc73d + QUAD $0x4eacfa698c95390c // .quad 0x4eacfa698c95390c + QUAD $0x4ee21c81f7dd43a7 // .quad 0x4ee21c81f7dd43a7 + QUAD $0x4f16a3a275d49491 // .quad 0x4f16a3a275d49491 + QUAD $0x4f4c4c8b1349b9b5 // .quad 0x4f4c4c8b1349b9b5 + QUAD $0x4f81afd6ec0e1411 // .quad 0x4f81afd6ec0e1411 + QUAD $0x4fb61bcca7119916 // .quad 0x4fb61bcca7119916 + QUAD $0x4feba2bfd0d5ff5b // .quad 0x4feba2bfd0d5ff5b + QUAD $0x502145b7e285bf99 // .quad 0x502145b7e285bf99 + QUAD $0x50559725db272f7f // .quad 0x50559725db272f7f + QUAD $0x508afcef51f0fb5f // .quad 0x508afcef51f0fb5f + QUAD $0x50c0de1593369d1b // .quad 0x50c0de1593369d1b + QUAD $0x50f5159af8044462 // .quad 0x50f5159af8044462 + QUAD $0x512a5b01b605557b // .quad 0x512a5b01b605557b + QUAD $0x516078e111c3556d // .quad 0x516078e111c3556d + QUAD $0x5194971956342ac8 // .quad 0x5194971956342ac8 + QUAD $0x51c9bcdfabc1357a // .quad 0x51c9bcdfabc1357a + QUAD $0x5200160bcb58c16c // .quad 0x5200160bcb58c16c + QUAD $0x52341b8ebe2ef1c7 // .quad 0x52341b8ebe2ef1c7 + QUAD $0x526922726dbaae39 // .quad 0x526922726dbaae39 + QUAD $0x529f6b0f092959c7 // .quad 0x529f6b0f092959c7 + QUAD $0x52d3a2e965b9d81d // .quad 0x52d3a2e965b9d81d + QUAD $0x53088ba3bf284e24 // .quad 0x53088ba3bf284e24 + QUAD $0x533eae8caef261ad // .quad 0x533eae8caef261ad + QUAD $0x53732d17ed577d0c // .quad 0x53732d17ed577d0c + QUAD $0x53a7f85de8ad5c4f // .quad 0x53a7f85de8ad5c4f + QUAD $0x53ddf67562d8b363 // .quad 0x53ddf67562d8b363 + QUAD $0x5412ba095dc7701e // .quad 0x5412ba095dc7701e + QUAD $0x5447688bb5394c25 // .quad 0x5447688bb5394c25 + QUAD $0x547d42aea2879f2e // .quad 0x547d42aea2879f2e + QUAD $0x54b249ad2594c37d // .quad 0x54b249ad2594c37d + QUAD $0x54e6dc186ef9f45c // .quad 0x54e6dc186ef9f45c + QUAD $0x551c931e8ab87173 // .quad 0x551c931e8ab87173 + QUAD $0x5551dbf316b346e8 // .quad 0x5551dbf316b346e8 + QUAD $0x558652efdc6018a2 // .quad 0x558652efdc6018a2 + QUAD $0x55bbe7abd3781eca // .quad 0x55bbe7abd3781eca + QUAD $0x55f170cb642b133f // .quad 0x55f170cb642b133f + QUAD $0x5625ccfe3d35d80e // .quad 0x5625ccfe3d35d80e + QUAD $0x565b403dcc834e12 // .quad 0x565b403dcc834e12 + QUAD $0x569108269fd210cb // .quad 0x569108269fd210cb + QUAD $0x56c54a3047c694fe // .quad 0x56c54a3047c694fe + QUAD $0x56fa9cbc59b83a3d // .quad 0x56fa9cbc59b83a3d + QUAD $0x5730a1f5b8132466 // .quad 0x5730a1f5b8132466 + QUAD $0x5764ca732617ed80 // .quad 0x5764ca732617ed80 + QUAD $0x5799fd0fef9de8e0 // .quad 0x5799fd0fef9de8e0 + QUAD $0x57d03e29f5c2b18c // .quad 0x57d03e29f5c2b18c + QUAD $0x58044db473335def // .quad 0x58044db473335def + QUAD $0x583961219000356b // .quad 0x583961219000356b + QUAD $0x586fb969f40042c5 // .quad 0x586fb969f40042c5 + QUAD $0x58a3d3e2388029bb // .quad 0x58a3d3e2388029bb + QUAD $0x58d8c8dac6a0342a // .quad 0x58d8c8dac6a0342a + QUAD $0x590efb1178484135 // .quad 0x590efb1178484135 + QUAD $0x59435ceaeb2d28c1 // .quad 0x59435ceaeb2d28c1 + QUAD $0x59783425a5f872f1 // .quad 0x59783425a5f872f1 + QUAD $0x59ae412f0f768fad // .quad 0x59ae412f0f768fad + QUAD $0x59e2e8bd69aa19cc // .quad 0x59e2e8bd69aa19cc + QUAD $0x5a17a2ecc414a03f // .quad 0x5a17a2ecc414a03f + QUAD $0x5a4d8ba7f519c84f // .quad 0x5a4d8ba7f519c84f + QUAD $0x5a827748f9301d32 // .quad 0x5a827748f9301d32 + QUAD $0x5ab7151b377c247e // .quad 0x5ab7151b377c247e + QUAD $0x5aecda62055b2d9e // .quad 0x5aecda62055b2d9e + QUAD $0x5b22087d4358fc82 // .quad 0x5b22087d4358fc82 + QUAD $0x5b568a9c942f3ba3 // .quad 0x5b568a9c942f3ba3 + QUAD $0x5b8c2d43b93b0a8c // .quad 0x5b8c2d43b93b0a8c + QUAD $0x5bc19c4a53c4e697 // .quad 0x5bc19c4a53c4e697 + QUAD $0x5bf6035ce8b6203d // .quad 0x5bf6035ce8b6203d + QUAD $0x5c2b843422e3a84d // .quad 0x5c2b843422e3a84d + QUAD $0x5c6132a095ce4930 // .quad 0x5c6132a095ce4930 + QUAD $0x5c957f48bb41db7c // .quad 0x5c957f48bb41db7c + QUAD $0x5ccadf1aea12525b // .quad 0x5ccadf1aea12525b + QUAD $0x5d00cb70d24b7379 // .quad 0x5d00cb70d24b7379 + QUAD $0x5d34fe4d06de5057 // .quad 0x5d34fe4d06de5057 + QUAD $0x5d6a3de04895e46d // .quad 0x5d6a3de04895e46d + QUAD $0x5da066ac2d5daec4 // .quad 0x5da066ac2d5daec4 + QUAD $0x5dd4805738b51a75 // .quad 0x5dd4805738b51a75 + QUAD $0x5e09a06d06e26112 // .quad 0x5e09a06d06e26112 + QUAD $0x5e400444244d7cab // .quad 0x5e400444244d7cab + QUAD $0x5e7405552d60dbd6 // .quad 0x5e7405552d60dbd6 + QUAD $0x5ea906aa78b912cc // .quad 0x5ea906aa78b912cc + QUAD $0x5edf485516e7577f // .quad 0x5edf485516e7577f + QUAD $0x5f138d352e5096af // .quad 0x5f138d352e5096af + QUAD $0x5f48708279e4bc5b // .quad 0x5f48708279e4bc5b + QUAD $0x5f7e8ca3185deb72 // .quad 0x5f7e8ca3185deb72 + QUAD $0x5fb317e5ef3ab327 // .quad 0x5fb317e5ef3ab327 + QUAD $0x5fe7dddf6b095ff1 // .quad 0x5fe7dddf6b095ff1 + QUAD $0x601dd55745cbb7ed // .quad 0x601dd55745cbb7ed + QUAD $0x6052a5568b9f52f4 // .quad 0x6052a5568b9f52f4 + QUAD $0x60874eac2e8727b1 // .quad 0x60874eac2e8727b1 + QUAD $0x60bd22573a28f19d // .quad 0x60bd22573a28f19d + QUAD $0x60f2357684599702 // .quad 0x60f2357684599702 + QUAD $0x6126c2d4256ffcc3 // .quad 0x6126c2d4256ffcc3 + QUAD $0x615c73892ecbfbf4 // .quad 0x615c73892ecbfbf4 + QUAD $0x6191c835bd3f7d78 // .quad 0x6191c835bd3f7d78 + QUAD $0x61c63a432c8f5cd6 // .quad 0x61c63a432c8f5cd6 + QUAD $0x61fbc8d3f7b3340c // .quad 0x61fbc8d3f7b3340c + QUAD $0x62315d847ad00087 // .quad 0x62315d847ad00087 + QUAD $0x6265b4e5998400a9 // .quad 0x6265b4e5998400a9 + QUAD $0x629b221effe500d4 // .quad 0x629b221effe500d4 + QUAD $0x62d0f5535fef2084 // .quad 0x62d0f5535fef2084 + QUAD $0x630532a837eae8a5 // .quad 0x630532a837eae8a5 + QUAD $0x633a7f5245e5a2cf // .quad 0x633a7f5245e5a2cf + QUAD $0x63708f936baf85c1 // .quad 0x63708f936baf85c1 + QUAD $0x63a4b378469b6732 // .quad 0x63a4b378469b6732 + QUAD $0x63d9e056584240fe // .quad 0x63d9e056584240fe + QUAD $0x64102c35f729689f // .quad 0x64102c35f729689f + QUAD $0x6444374374f3c2c6 // .quad 0x6444374374f3c2c6 + QUAD $0x647945145230b378 // .quad 0x647945145230b378 + QUAD $0x64af965966bce056 // .quad 0x64af965966bce056 + QUAD $0x64e3bdf7e0360c36 // .quad 0x64e3bdf7e0360c36 + QUAD $0x6518ad75d8438f43 // .quad 0x6518ad75d8438f43 + QUAD $0x654ed8d34e547314 // .quad 0x654ed8d34e547314 + QUAD $0x6583478410f4c7ec // .quad 0x6583478410f4c7ec + QUAD $0x65b819651531f9e8 // .quad 0x65b819651531f9e8 + QUAD $0x65ee1fbe5a7e7861 // .quad 0x65ee1fbe5a7e7861 + QUAD $0x6622d3d6f88f0b3d // .quad 0x6622d3d6f88f0b3d + QUAD $0x665788ccb6b2ce0c // .quad 0x665788ccb6b2ce0c + QUAD $0x668d6affe45f818f // .quad 0x668d6affe45f818f + QUAD $0x66c262dfeebbb0f9 // .quad 0x66c262dfeebbb0f9 + QUAD $0x66f6fb97ea6a9d38 // .quad 0x66f6fb97ea6a9d38 + QUAD $0x672cba7de5054486 // .quad 0x672cba7de5054486 + QUAD $0x6761f48eaf234ad4 // .quad 0x6761f48eaf234ad4 + QUAD $0x679671b25aec1d89 // .quad 0x679671b25aec1d89 + QUAD $0x67cc0e1ef1a724eb // .quad 0x67cc0e1ef1a724eb + QUAD $0x680188d357087713 // .quad 0x680188d357087713 + QUAD $0x6835eb082cca94d7 // .quad 0x6835eb082cca94d7 + QUAD $0x686b65ca37fd3a0d // .quad 0x686b65ca37fd3a0d + QUAD $0x68a11f9e62fe4448 // .quad 0x68a11f9e62fe4448 + QUAD $0x68d56785fbbdd55a // .quad 0x68d56785fbbdd55a + QUAD $0x690ac1677aad4ab1 // .quad 0x690ac1677aad4ab1 + QUAD $0x6940b8e0acac4eaf // .quad 0x6940b8e0acac4eaf + QUAD $0x6974e718d7d7625a // .quad 0x6974e718d7d7625a + QUAD $0x69aa20df0dcd3af1 // .quad 0x69aa20df0dcd3af1 + QUAD $0x69e0548b68a044d6 // .quad 0x69e0548b68a044d6 + QUAD $0x6a1469ae42c8560c // .quad 0x6a1469ae42c8560c + QUAD $0x6a498419d37a6b8f // .quad 0x6a498419d37a6b8f + QUAD $0x6a7fe52048590673 // .quad 0x6a7fe52048590673 + QUAD $0x6ab3ef342d37a408 // .quad 0x6ab3ef342d37a408 + QUAD $0x6ae8eb0138858d0a // .quad 0x6ae8eb0138858d0a + QUAD $0x6b1f25c186a6f04c // .quad 0x6b1f25c186a6f04c + QUAD $0x6b537798f4285630 // .quad 0x6b537798f4285630 + QUAD $0x6b88557f31326bbb // .quad 0x6b88557f31326bbb + QUAD $0x6bbe6adefd7f06aa // .quad 0x6bbe6adefd7f06aa + QUAD $0x6bf302cb5e6f642a // .quad 0x6bf302cb5e6f642a + QUAD $0x6c27c37e360b3d35 // .quad 0x6c27c37e360b3d35 + QUAD $0x6c5db45dc38e0c82 // .quad 0x6c5db45dc38e0c82 + QUAD $0x6c9290ba9a38c7d1 // .quad 0x6c9290ba9a38c7d1 + QUAD $0x6cc734e940c6f9c6 // .quad 0x6cc734e940c6f9c6 + QUAD $0x6cfd022390f8b837 // .quad 0x6cfd022390f8b837 + QUAD $0x6d3221563a9b7323 // .quad 0x6d3221563a9b7323 + QUAD $0x6d66a9abc9424feb // .quad 0x6d66a9abc9424feb + QUAD $0x6d9c5416bb92e3e6 // .quad 0x6d9c5416bb92e3e6 + QUAD $0x6dd1b48e353bce70 // .quad 0x6dd1b48e353bce70 + QUAD $0x6e0621b1c28ac20c // .quad 0x6e0621b1c28ac20c + QUAD $0x6e3baa1e332d728f // .quad 0x6e3baa1e332d728f + QUAD $0x6e714a52dffc6799 // .quad 0x6e714a52dffc6799 + QUAD $0x6ea59ce797fb817f // .quad 0x6ea59ce797fb817f + QUAD $0x6edb04217dfa61df // .quad 0x6edb04217dfa61df + QUAD $0x6f10e294eebc7d2c // .quad 0x6f10e294eebc7d2c + QUAD $0x6f451b3a2a6b9c76 // .quad 0x6f451b3a2a6b9c76 + QUAD $0x6f7a6208b5068394 // .quad 0x6f7a6208b5068394 + QUAD $0x6fb07d457124123d // .quad 0x6fb07d457124123d + QUAD $0x6fe49c96cd6d16cc // .quad 0x6fe49c96cd6d16cc + QUAD $0x7019c3bc80c85c7f // .quad 0x7019c3bc80c85c7f + QUAD $0x70501a55d07d39cf // .quad 0x70501a55d07d39cf + QUAD $0x708420eb449c8843 // .quad 0x708420eb449c8843 + QUAD $0x70b9292615c3aa54 // .quad 0x70b9292615c3aa54 + QUAD $0x70ef736f9b3494e9 // .quad 0x70ef736f9b3494e9 + QUAD $0x7123a825c100dd11 // .quad 0x7123a825c100dd11 + QUAD $0x7158922f31411456 // .quad 0x7158922f31411456 + QUAD $0x718eb6bafd91596b // .quad 0x718eb6bafd91596b + QUAD $0x71c33234de7ad7e3 // .quad 0x71c33234de7ad7e3 + QUAD $0x71f7fec216198ddc // .quad 0x71f7fec216198ddc + QUAD $0x722dfe729b9ff153 // .quad 0x722dfe729b9ff153 + QUAD $0x7262bf07a143f6d4 // .quad 0x7262bf07a143f6d4 + QUAD $0x72976ec98994f489 // .quad 0x72976ec98994f489 + QUAD $0x72cd4a7bebfa31ab // .quad 0x72cd4a7bebfa31ab + QUAD $0x73024e8d737c5f0b // .quad 0x73024e8d737c5f0b + QUAD $0x7336e230d05b76cd // .quad 0x7336e230d05b76cd + QUAD $0x736c9abd04725481 // .quad 0x736c9abd04725481 + QUAD $0x73a1e0b622c774d0 // .quad 0x73a1e0b622c774d0 + QUAD $0x73d658e3ab795204 // .quad 0x73d658e3ab795204 + QUAD $0x740bef1c9657a686 // .quad 0x740bef1c9657a686 + QUAD $0x74417571ddf6c814 // .quad 0x74417571ddf6c814 + QUAD $0x7475d2ce55747a18 // .quad 0x7475d2ce55747a18 + QUAD $0x74ab4781ead1989e // .quad 0x74ab4781ead1989e + QUAD $0x74e10cb132c2ff63 // .quad 0x74e10cb132c2ff63 + QUAD $0x75154fdd7f73bf3c // .quad 0x75154fdd7f73bf3c + QUAD $0x754aa3d4df50af0b // .quad 0x754aa3d4df50af0b + QUAD $0x7580a6650b926d67 // .quad 0x7580a6650b926d67 + QUAD $0x75b4cffe4e7708c0 // .quad 0x75b4cffe4e7708c0 + QUAD $0x75ea03fde214caf1 // .quad 0x75ea03fde214caf1 + QUAD $0x7620427ead4cfed6 // .quad 0x7620427ead4cfed6 + QUAD $0x7654531e58a03e8c // .quad 0x7654531e58a03e8c + QUAD $0x768967e5eec84e2f // .quad 0x768967e5eec84e2f + QUAD $0x76bfc1df6a7a61bb // .quad 0x76bfc1df6a7a61bb + QUAD $0x76f3d92ba28c7d15 // .quad 0x76f3d92ba28c7d15 + QUAD $0x7728cf768b2f9c5a // .quad 0x7728cf768b2f9c5a + QUAD $0x775f03542dfb8370 // .quad 0x775f03542dfb8370 + QUAD $0x779362149cbd3226 // .quad 0x779362149cbd3226 + QUAD $0x77c83a99c3ec7eb0 // .quad 0x77c83a99c3ec7eb0 + QUAD $0x77fe494034e79e5c // .quad 0x77fe494034e79e5c + QUAD $0x7832edc82110c2f9 // .quad 0x7832edc82110c2f9 + QUAD $0x7867a93a2954f3b8 // .quad 0x7867a93a2954f3b8 + QUAD $0x789d9388b3aa30a5 // .quad 0x789d9388b3aa30a5 + QUAD $0x78d27c35704a5e67 // .quad 0x78d27c35704a5e67 + QUAD $0x79071b42cc5cf601 // .quad 0x79071b42cc5cf601 + QUAD $0x793ce2137f743382 // .quad 0x793ce2137f743382 + QUAD $0x79720d4c2fa8a031 // .quad 0x79720d4c2fa8a031 + QUAD $0x79a6909f3b92c83d // .quad 0x79a6909f3b92c83d + QUAD $0x79dc34c70a777a4d // .quad 0x79dc34c70a777a4d + QUAD $0x7a11a0fc668aac70 // .quad 0x7a11a0fc668aac70 + QUAD $0x7a46093b802d578c // .quad 0x7a46093b802d578c + QUAD $0x7a7b8b8a6038ad6f // .quad 0x7a7b8b8a6038ad6f + QUAD $0x7ab137367c236c65 // .quad 0x7ab137367c236c65 + QUAD $0x7ae585041b2c477f // .quad 0x7ae585041b2c477f + QUAD $0x7b1ae64521f7595e // .quad 0x7b1ae64521f7595e + QUAD $0x7b50cfeb353a97db // .quad 0x7b50cfeb353a97db + QUAD $0x7b8503e602893dd2 // .quad 0x7b8503e602893dd2 + QUAD $0x7bba44df832b8d46 // .quad 0x7bba44df832b8d46 + QUAD $0x7bf06b0bb1fb384c // .quad 0x7bf06b0bb1fb384c + QUAD $0x7c2485ce9e7a065f // .quad 0x7c2485ce9e7a065f + QUAD $0x7c59a742461887f6 // .quad 0x7c59a742461887f6 + QUAD $0x7c9008896bcf54fa // .quad 0x7c9008896bcf54fa + QUAD $0x7cc40aabc6c32a38 // .quad 0x7cc40aabc6c32a38 + QUAD $0x7cf90d56b873f4c7 // .quad 0x7cf90d56b873f4c7 + QUAD $0x7d2f50ac6690f1f8 // .quad 0x7d2f50ac6690f1f8 + QUAD $0x7d63926bc01a973b // .quad 0x7d63926bc01a973b + QUAD $0x7d987706b0213d0a // .quad 0x7d987706b0213d0a + QUAD $0x7dce94c85c298c4c // .quad 0x7dce94c85c298c4c + QUAD $0x7e031cfd3999f7b0 // .quad 0x7e031cfd3999f7b0 + QUAD $0x7e37e43c8800759c // .quad 0x7e37e43c8800759c + QUAD $0x7e6ddd4baa009303 // .quad 0x7e6ddd4baa009303 + QUAD $0x7ea2aa4f4a405be2 // .quad 0x7ea2aa4f4a405be2 + QUAD $0x7ed754e31cd072da // .quad 0x7ed754e31cd072da + QUAD $0x7f0d2a1be4048f90 // .quad 0x7f0d2a1be4048f90 + QUAD $0x7f423a516e82d9ba // .quad 0x7f423a516e82d9ba + QUAD $0x7f76c8e5ca239029 // .quad 0x7f76c8e5ca239029 + QUAD $0x7fac7b1f3cac7433 // .quad 0x7fac7b1f3cac7433 + QUAD $0x7fe1ccf385ebc8a0 // .quad 0x7fe1ccf385ebc8a0 + +TEXT ·__f64toa(SB), NOSPLIT, $0 - 24 + MOVQ out+0(FP), DI + MOVSD val+8(FP), X0 + CALL ·___asm2asm_compiled_code__DO_NOT_CALL_THIS_SYMBOL___+2480(SB) // _f64toa + MOVQ AX, ret+16(FP) + RET + +TEXT ·__i64toa(SB), NOSPLIT, $0 - 24 + MOVQ out+0(FP), DI + MOVQ val+8(FP), SI + CALL ·___asm2asm_compiled_code__DO_NOT_CALL_THIS_SYMBOL___+5544(SB) // _i64toa + MOVQ AX, ret+16(FP) + RET + +TEXT ·__lquote(SB), NOSPLIT, $0 - 24 + MOVQ buf+0(FP), DI + MOVQ off+8(FP), SI + CALL ·___asm2asm_compiled_code__DO_NOT_CALL_THIS_SYMBOL___+295(SB) // _lquote + MOVQ AX, ret+16(FP) + RET + +TEXT ·__lspace(SB), NOSPLIT, $0 - 32 + MOVQ sp+0(FP), DI + MOVQ nb+8(FP), SI + MOVQ off+16(FP), DX + CALL ·___asm2asm_compiled_code__DO_NOT_CALL_THIS_SYMBOL___+937(SB) // _lspace + MOVQ AX, ret+24(FP) + RET + +TEXT ·__lzero(SB), NOSPLIT, $0 - 24 + MOVQ p+0(FP), DI + MOVQ n+8(FP), SI + CALL ·___asm2asm_compiled_code__DO_NOT_CALL_THIS_SYMBOL___+0(SB) // _lzero + MOVQ AX, ret+16(FP) + RET + +TEXT ·__skip_array(SB), NOSPLIT, $0 - 32 + MOVQ s+0(FP), DI + MOVQ p+8(FP), SI + MOVQ m+16(FP), DX + CALL ·___asm2asm_compiled_code__DO_NOT_CALL_THIS_SYMBOL___+13958(SB) // _skip_array + MOVQ AX, ret+24(FP) + RET + +TEXT ·__skip_object(SB), NOSPLIT, $0 - 32 + MOVQ s+0(FP), DI + MOVQ p+8(FP), SI + MOVQ m+16(FP), DX + CALL ·___asm2asm_compiled_code__DO_NOT_CALL_THIS_SYMBOL___+13993(SB) // _skip_object + MOVQ AX, ret+24(FP) + RET + +TEXT ·__skip_one(SB), NOSPLIT, $0 - 32 + MOVQ s+0(FP), DI + MOVQ p+8(FP), SI + MOVQ m+16(FP), DX + CALL ·___asm2asm_compiled_code__DO_NOT_CALL_THIS_SYMBOL___+12328(SB) // _skip_one + MOVQ AX, ret+24(FP) + RET + +TEXT ·__u64toa(SB), NOSPLIT, $0 - 24 + MOVQ out+0(FP), DI + MOVQ val+8(FP), SI + CALL ·___asm2asm_compiled_code__DO_NOT_CALL_THIS_SYMBOL___+5637(SB) // _u64toa + MOVQ AX, ret+16(FP) + RET + +TEXT ·__unquote(SB), NOSPLIT, $0 - 48 + MOVQ s+0(FP), DI + MOVQ nb+8(FP), SI + MOVQ dp+16(FP), DX + MOVQ ep+24(FP), CX + MOVQ flags+32(FP), R8 + CALL ·___asm2asm_compiled_code__DO_NOT_CALL_THIS_SYMBOL___+6825(SB) // _unquote + MOVQ AX, ret+40(FP) + RET + +TEXT ·__value(SB), NOSPLIT, $0 - 40 + MOVQ s+0(FP), DI + MOVQ n+8(FP), SI + MOVQ p+16(FP), DX + MOVQ v+24(FP), CX + CALL ·___asm2asm_compiled_code__DO_NOT_CALL_THIS_SYMBOL___+8460(SB) // _value + MOVQ AX, ret+32(FP) + RET + +TEXT ·__vnumber(SB), NOSPLIT, $0 - 24 + MOVQ s+0(FP), DI + MOVQ p+8(FP), SI + MOVQ v+16(FP), DX + LEAQ ·___asm2asm_compiled_code__DO_NOT_CALL_THIS_SYMBOL___+10806(SB), AX // _vnumber + JMP AX + +TEXT ·__vsigned(SB), NOSPLIT, $0 - 24 + MOVQ s+0(FP), DI + MOVQ p+8(FP), SI + MOVQ v+16(FP), DX + LEAQ ·___asm2asm_compiled_code__DO_NOT_CALL_THIS_SYMBOL___+11778(SB), AX // _vsigned + JMP AX + +TEXT ·__vstring(SB), NOSPLIT, $0 - 24 + MOVQ s+0(FP), DI + MOVQ p+8(FP), SI + MOVQ v+16(FP), DX + LEAQ ·___asm2asm_compiled_code__DO_NOT_CALL_THIS_SYMBOL___+9464(SB), AX // _vstring + JMP AX + +TEXT ·__vunsigned(SB), NOSPLIT, $0 - 24 + MOVQ s+0(FP), DI + MOVQ p+8(FP), SI + MOVQ v+16(FP), DX + LEAQ ·___asm2asm_compiled_code__DO_NOT_CALL_THIS_SYMBOL___+12055(SB), AX // _vunsigned + JMP AX diff --git a/internal/native/avx/native_amd64_test.go b/internal/native/avx/native_amd64_test.go new file mode 100644 index 0000000..8e29a70 --- /dev/null +++ b/internal/native/avx/native_amd64_test.go @@ -0,0 +1,426 @@ +// Code generated by Makefile, DO NOT EDIT. + +/* + * Copyright 2021 ByteDance Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package avx + +import ( + `encoding/hex` + `fmt` + `math` + `testing` + `unsafe` + + `github.com/bytedance/sonic/internal/native/types` + `github.com/bytedance/sonic/internal/rt` + `github.com/davecgh/go-spew/spew` + `github.com/stretchr/testify/assert` + `github.com/stretchr/testify/require` +) + +func TestNative_Value(t *testing.T) { + var v types.JsonState + s := ` -12345` + p := (*rt.GoString)(unsafe.Pointer(&s)) + x := __value(p.Ptr, p.Len, 0, &v) + assert.Equal(t, 9, x) + assert.Equal(t, types.V_INTEGER, v.Vt) + assert.Equal(t, int64(-12345), v.Iv) + assert.Equal(t, 3, v.Ep) +} + +func TestNative_Unquote(t *testing.T) { + s := `hello\b\f\n\r\t\\\"\u2333world` + d := make([]byte, 0, len(s)) + ep := -1 + dp := (*rt.GoSlice)(unsafe.Pointer(&d)) + sp := (*rt.GoString)(unsafe.Pointer(&s)) + rv := __unquote(sp.Ptr, sp.Len, dp.Ptr, &ep, 0) + if rv < 0 { + require.NoError(t, types.ParsingError(-rv)) + } + dp.Len = rv + assert.Equal(t, -1, ep) + assert.Equal(t, "hello\b\f\n\r\t\\\"\u2333world", string(d)) +} + +func TestNative_UnquoteError(t *testing.T) { + s := `asdf\` + d := make([]byte, 0, len(s)) + ep := -1 + dp := (*rt.GoSlice)(unsafe.Pointer(&d)) + sp := (*rt.GoString)(unsafe.Pointer(&s)) + rv := __unquote(sp.Ptr, sp.Len, dp.Ptr, &ep, 0) + assert.Equal(t, -int(types.ERR_EOF), rv) + assert.Equal(t, 5, ep) + s = `asdf\gqwer` + d = make([]byte, 0, len(s)) + ep = -1 + dp = (*rt.GoSlice)(unsafe.Pointer(&d)) + sp = (*rt.GoString)(unsafe.Pointer(&s)) + rv = __unquote(sp.Ptr, sp.Len, dp.Ptr, &ep, 0) + assert.Equal(t, -int(types.ERR_INVALID_ESCAPE), rv) + assert.Equal(t, 5, ep) + s = `asdf\u1gggqwer` + d = make([]byte, 0, len(s)) + ep = -1 + dp = (*rt.GoSlice)(unsafe.Pointer(&d)) + sp = (*rt.GoString)(unsafe.Pointer(&s)) + rv = __unquote(sp.Ptr, sp.Len, dp.Ptr, &ep, 0) + assert.Equal(t, -int(types.ERR_INVALID_CHAR), rv) + assert.Equal(t, 7, ep) + s = `asdf\ud800qwer` + d = make([]byte, 0, len(s)) + ep = -1 + dp = (*rt.GoSlice)(unsafe.Pointer(&d)) + sp = (*rt.GoString)(unsafe.Pointer(&s)) + rv = __unquote(sp.Ptr, sp.Len, dp.Ptr, &ep, 0) + assert.Equal(t, -int(types.ERR_INVALID_UNICODE), rv) + assert.Equal(t, 6, ep) + s = `asdf\\ud800qwer` + d = make([]byte, 0, len(s)) + ep = -1 + dp = (*rt.GoSlice)(unsafe.Pointer(&d)) + sp = (*rt.GoString)(unsafe.Pointer(&s)) + rv = __unquote(sp.Ptr, sp.Len, dp.Ptr, &ep, types.F_DOUBLE_UNQUOTE) + assert.Equal(t, -int(types.ERR_INVALID_UNICODE), rv) + assert.Equal(t, 7, ep) + s = `asdf\ud800\ud800qwer` + d = make([]byte, 0, len(s)) + ep = -1 + dp = (*rt.GoSlice)(unsafe.Pointer(&d)) + sp = (*rt.GoString)(unsafe.Pointer(&s)) + rv = __unquote(sp.Ptr, sp.Len, dp.Ptr, &ep, 0) + assert.Equal(t, -int(types.ERR_INVALID_UNICODE), rv) + assert.Equal(t, 12, ep) + s = `asdf\\ud800\\ud800qwer` + d = make([]byte, 0, len(s)) + ep = -1 + dp = (*rt.GoSlice)(unsafe.Pointer(&d)) + sp = (*rt.GoString)(unsafe.Pointer(&s)) + rv = __unquote(sp.Ptr, sp.Len, dp.Ptr, &ep, types.F_DOUBLE_UNQUOTE) + assert.Equal(t, -int(types.ERR_INVALID_UNICODE), rv) + assert.Equal(t, 14, ep) +} + +func TestNative_DoubleUnquote(t *testing.T) { + s := `hello\\b\\f\\n\\r\\t\\\\\\\"\\u2333world` + d := make([]byte, 0, len(s)) + ep := -1 + dp := (*rt.GoSlice)(unsafe.Pointer(&d)) + sp := (*rt.GoString)(unsafe.Pointer(&s)) + rv := __unquote(sp.Ptr, sp.Len, dp.Ptr, &ep, types.F_DOUBLE_UNQUOTE) + if rv < 0 { + require.NoError(t, types.ParsingError(-rv)) + } + dp.Len = rv + assert.Equal(t, -1, ep) + assert.Equal(t, "hello\b\f\n\r\t\\\"\u2333world", string(d)) +} + +func TestNative_UnquoteUnicodeReplacement(t *testing.T) { + s := `hello\ud800world` + d := make([]byte, 0, len(s)) + ep := -1 + dp := (*rt.GoSlice)(unsafe.Pointer(&d)) + sp := (*rt.GoString)(unsafe.Pointer(&s)) + rv := __unquote(sp.Ptr, sp.Len, dp.Ptr, &ep, types.F_UNICODE_REPLACE) + if rv < 0 { + require.NoError(t, types.ParsingError(-rv)) + } + dp.Len = rv + assert.Equal(t, -1, ep) + assert.Equal(t, "hello\ufffdworld", string(d)) + s = `hello\ud800\ud800world` + d = make([]byte, 0, len(s)) + ep = -1 + dp = (*rt.GoSlice)(unsafe.Pointer(&d)) + sp = (*rt.GoString)(unsafe.Pointer(&s)) + rv = __unquote(sp.Ptr, sp.Len, dp.Ptr, &ep, types.F_UNICODE_REPLACE) + if rv < 0 { + require.NoError(t, types.ParsingError(-rv)) + } + dp.Len = rv + assert.Equal(t, -1, ep) + assert.Equal(t, "hello\ufffd\ufffdworld", string(d)) +} + +func TestNative_Vstring(t *testing.T) { + var v types.JsonState + i := 0 + s := `test"test\n2"` + __vstring(&s, &i, &v) + assert.Equal(t, 5, i) + assert.Equal(t, -1, v.Ep) + assert.Equal(t, int64(0), v.Iv) + __vstring(&s, &i, &v) + assert.Equal(t, 13, i) + assert.Equal(t, 9, v.Ep) + assert.Equal(t, int64(5), v.Iv) +} + +func TestNative_VstringHangUpOnRandomData(t *testing.T) { + v, e := hex.DecodeString( + "228dc61efd54ef80a908fb6026b7f2d5f92a257ba8b347c995f259eb8685376a" + + "8c4500262d9c308b3f3ec2577689cf345d9f86f9b5d18d3e463bec5c22df2d2e" + + "4506010eba1dae7278", + ) + assert.Nil(t, e) + p := 1 + s := rt.Mem2Str(v) + var js types.JsonState + __vstring(&s, &p, &js) + fmt.Printf("js: %s\n", spew.Sdump(js)) +} + +func TestNative_Vnumber(t *testing.T) { + var v types.JsonState + i := 0 + s := "1234" + __vnumber(&s, &i, &v) + assert.Equal(t, 4, i) + assert.Equal(t, 0, v.Ep) + assert.Equal(t, int64(1234), v.Iv) + assert.Equal(t, types.V_INTEGER, v.Vt) + i = 0 + s = "1.234" + __vnumber(&s, &i, &v) + assert.Equal(t, 5, i) + assert.Equal(t, 0, v.Ep) + assert.Equal(t, 1.234, v.Dv) + assert.Equal(t, types.V_DOUBLE, v.Vt) + i = 0 + s = "1.234e5" + __vnumber(&s, &i, &v) + assert.Equal(t, 7, i) + assert.Equal(t, 0, v.Ep) + assert.Equal(t, 1.234e5, v.Dv) + assert.Equal(t, types.V_DOUBLE, v.Vt) + i = 0 + s = "0.0125" + __vnumber(&s, &i, &v) + assert.Equal(t, 6, i) + assert.Equal(t, 0, v.Ep) + assert.Equal(t, 0.0125, v.Dv) + assert.Equal(t, types.V_DOUBLE, v.Vt) + i = 0 + s = "100000000000000000000" + __vnumber(&s, &i, &v) + assert.Equal(t, 21, i) + assert.Equal(t, 0, v.Ep) + assert.Equal(t, 100000000000000000000.0, v.Dv) + assert.Equal(t, types.V_DOUBLE, v.Vt) + i = 0 + s = "999999999999999900000" + __vnumber(&s, &i, &v) + assert.Equal(t, 21, i) + assert.Equal(t, 0, v.Ep) + assert.Equal(t, 999999999999999900000.0, v.Dv) + assert.Equal(t, types.V_DOUBLE, v.Vt) + i = 0 + s = "-1.234" + __vnumber(&s, &i, &v) + assert.Equal(t, 6, i) + assert.Equal(t, 0, v.Ep) + assert.Equal(t, -1.234, v.Dv) + assert.Equal(t, types.V_DOUBLE, v.Vt) +} + +func TestNative_Vsigned(t *testing.T) { + var v types.JsonState + i := 0 + s := "1234" + __vsigned(&s, &i, &v) + assert.Equal(t, 4, i) + assert.Equal(t, 0, v.Ep) + assert.Equal(t, int64(1234), v.Iv) + assert.Equal(t, types.V_INTEGER, v.Vt) + i = 0 + s = "-1234" + __vsigned(&s, &i, &v) + assert.Equal(t, 5, i) + assert.Equal(t, 0, v.Ep) + assert.Equal(t, int64(-1234), v.Iv) + assert.Equal(t, types.V_INTEGER, v.Vt) + i = 0 + s = "9223372036854775807" + __vsigned(&s, &i, &v) + assert.Equal(t, 19, i) + assert.Equal(t, 0, v.Ep) + assert.Equal(t, int64(math.MaxInt64), v.Iv) + assert.Equal(t, types.V_INTEGER, v.Vt) + i = 0 + s = "-9223372036854775808" + __vsigned(&s, &i, &v) + assert.Equal(t, 20, i) + assert.Equal(t, 0, v.Ep) + assert.Equal(t, int64(math.MinInt64), v.Iv) + assert.Equal(t, types.V_INTEGER, v.Vt) + i = 0 + s = "9223372036854775808" + __vsigned(&s, &i, &v) + assert.Equal(t, 18, i) + assert.Equal(t, 0, v.Ep) + assert.Equal(t, types.ValueType(-int(types.ERR_INTEGER_OVERFLOW)), v.Vt) + i = 0 + s = "-9223372036854775809" + __vsigned(&s, &i, &v) + assert.Equal(t, 19, i) + assert.Equal(t, 0, v.Ep) + assert.Equal(t, types.ValueType(-int(types.ERR_INTEGER_OVERFLOW)), v.Vt) + i = 0 + s = "1.234" + __vsigned(&s, &i, &v) + assert.Equal(t, 1, i) + assert.Equal(t, 0, v.Ep) + assert.Equal(t, types.ValueType(-int(types.ERR_INVALID_NUMBER_FMT)), v.Vt) + i = 0 + s = "0.0125" + __vsigned(&s, &i, &v) + assert.Equal(t, 1, i) + assert.Equal(t, 0, v.Ep) + assert.Equal(t, types.ValueType(-int(types.ERR_INVALID_NUMBER_FMT)), v.Vt) + i = 0 + s = "-1234e5" + __vsigned(&s, &i, &v) + assert.Equal(t, 5, i) + assert.Equal(t, 0, v.Ep) + assert.Equal(t, types.ValueType(-int(types.ERR_INVALID_NUMBER_FMT)), v.Vt) + i = 0 + s = "-1234e-5" + __vsigned(&s, &i, &v) + assert.Equal(t, 5, i) + assert.Equal(t, 0, v.Ep) + assert.Equal(t, types.ValueType(-int(types.ERR_INVALID_NUMBER_FMT)), v.Vt) +} + +func TestNative_Vunsigned(t *testing.T) { + var v types.JsonState + i := 0 + s := "1234" + __vunsigned(&s, &i, &v) + assert.Equal(t, 4, i) + assert.Equal(t, 0, v.Ep) + assert.Equal(t, int64(1234), v.Iv) + assert.Equal(t, types.V_INTEGER, v.Vt) + i = 0 + s = "18446744073709551615" + __vunsigned(&s, &i, &v) + assert.Equal(t, 20, i) + assert.Equal(t, 0, v.Ep) + assert.Equal(t, ^int64(0), v.Iv) + assert.Equal(t, types.V_INTEGER, v.Vt) + i = 0 + s = "18446744073709551616" + __vunsigned(&s, &i, &v) + assert.Equal(t, 19, i) + assert.Equal(t, 0, v.Ep) + assert.Equal(t, types.ValueType(-int(types.ERR_INTEGER_OVERFLOW)), v.Vt) + i = 0 + s = "-1234" + __vunsigned(&s, &i, &v) + assert.Equal(t, 0, i) + assert.Equal(t, 0, v.Ep) + assert.Equal(t, types.ValueType(-int(types.ERR_INVALID_NUMBER_FMT)), v.Vt) + i = 0 + s = "1.234" + __vunsigned(&s, &i, &v) + assert.Equal(t, 1, i) + assert.Equal(t, 0, v.Ep) + assert.Equal(t, types.ValueType(-int(types.ERR_INVALID_NUMBER_FMT)), v.Vt) + i = 0 + s = "0.0125" + __vunsigned(&s, &i, &v) + assert.Equal(t, 1, i) + assert.Equal(t, 0, v.Ep) + assert.Equal(t, types.ValueType(-int(types.ERR_INVALID_NUMBER_FMT)), v.Vt) + i = 0 + s = "1234e5" + __vunsigned(&s, &i, &v) + assert.Equal(t, 4, i) + assert.Equal(t, 0, v.Ep) + assert.Equal(t, types.ValueType(-int(types.ERR_INVALID_NUMBER_FMT)), v.Vt) + i = 0 + s = "-1234e5" + __vunsigned(&s, &i, &v) + assert.Equal(t, 0, i) + assert.Equal(t, 0, v.Ep) + assert.Equal(t, types.ValueType(-int(types.ERR_INVALID_NUMBER_FMT)), v.Vt) + i = 0 + s = "-1.234e5" + __vunsigned(&s, &i, &v) + assert.Equal(t, 0, i) + assert.Equal(t, 0, v.Ep) + assert.Equal(t, types.ValueType(-int(types.ERR_INVALID_NUMBER_FMT)), v.Vt) + i = 0 + s = "-1.234e-5" + __vunsigned(&s, &i, &v) + assert.Equal(t, 0, i) + assert.Equal(t, 0, v.Ep) + assert.Equal(t, types.ValueType(-int(types.ERR_INVALID_NUMBER_FMT)), v.Vt) +} + +func TestNative_SkipOne(t *testing.T) { + p := 0 + s := ` {"asdf": [null, true, false, 1, 2.0, -3]}, 1234.5` + q := __skip_one(&s, &p, &types.StateMachine{}) + assert.Equal(t, 42, p) + assert.Equal(t, 1, q) + p = 0 + s = `1 2.5 -3 "asdf\nqwer" true false null {} []` + q = __skip_one(&s, &p, &types.StateMachine{}) + assert.Equal(t, 1, p) + assert.Equal(t, 0, q) + q = __skip_one(&s, &p, &types.StateMachine{}) + assert.Equal(t, 5, p) + assert.Equal(t, 2, q) + q = __skip_one(&s, &p, &types.StateMachine{}) + assert.Equal(t, 8, p) + assert.Equal(t, 6, q) + q = __skip_one(&s, &p, &types.StateMachine{}) + assert.Equal(t, 21, p) + assert.Equal(t, 9, q) + q = __skip_one(&s, &p, &types.StateMachine{}) + assert.Equal(t, 26, p) + assert.Equal(t, 22, q) + q = __skip_one(&s, &p, &types.StateMachine{}) + assert.Equal(t, 32, p) + assert.Equal(t, 27, q) + q = __skip_one(&s, &p, &types.StateMachine{}) + assert.Equal(t, 37, p) + assert.Equal(t, 33, q) + q = __skip_one(&s, &p, &types.StateMachine{}) + assert.Equal(t, 40, p) + assert.Equal(t, 38, q) + q = __skip_one(&s, &p, &types.StateMachine{}) + assert.Equal(t, 43, p) + assert.Equal(t, 41, q) +} + +func TestNative_SkipArray(t *testing.T) { + p := 0 + s := `null, true, false, 1, 2.0, -3, {"asdf": "wqer"}],` + __skip_array(&s, &p, &types.StateMachine{}) + assert.Equal(t, p, 48) +} + +func TestNative_SkipObject(t *testing.T) { + p := 0 + s := `"asdf": "wqer"},` + __skip_object(&s, &p, &types.StateMachine{}) + assert.Equal(t, p, 15) +} diff --git a/internal/native/avx/native_export_amd64.go b/internal/native/avx/native_export_amd64.go new file mode 100644 index 0000000..325e974 --- /dev/null +++ b/internal/native/avx/native_export_amd64.go @@ -0,0 +1,45 @@ +// Code generated by Makefile, DO NOT EDIT. + +/* + * Copyright 2021 ByteDance Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package avx + +var ( + S_f64toa = _subr__f64toa + S_i64toa = _subr__i64toa + S_lquote = _subr__lquote + S_u64toa = _subr__u64toa +) + +var ( + S_lspace = _subr__lspace + S_unquote = _subr__unquote +) + +var ( + S_value = _subr__value + S_vstring = _subr__vstring + S_vnumber = _subr__vnumber + S_vsigned = _subr__vsigned + S_vunsigned = _subr__vunsigned +) + +var ( + S_skip_one = _subr__skip_one + S_skip_array = _subr__skip_array + S_skip_object = _subr__skip_object +) diff --git a/internal/native/native_subr_amd64.go b/internal/native/avx/native_subr_amd64.go similarity index 89% rename from internal/native/native_subr_amd64.go rename to internal/native/avx/native_subr_amd64.go index 0a4855a..ba6b9dc 100644 --- a/internal/native/native_subr_amd64.go +++ b/internal/native/avx/native_subr_amd64.go @@ -1,7 +1,7 @@ // +build !noasm !appengine // Code generated by asm2asm, DO NOT EDIT. -package native +package avx import ( `unsafe` @@ -14,21 +14,21 @@ func ___asm2asm_compiled_code__DO_NOT_CALL_THIS_SYMBOL___() var ( _func__base = ___asm2asm_compiled_code__DO_NOT_CALL_THIS_SYMBOL___ - _subr__f64toa = **(**uintptr)(unsafe.Pointer(&_func__base)) + 2953 - _subr__i64toa = **(**uintptr)(unsafe.Pointer(&_func__base)) + 6042 - _subr__lquote = **(**uintptr)(unsafe.Pointer(&_func__base)) + 384 - _subr__lspace = **(**uintptr)(unsafe.Pointer(&_func__base)) + 1266 + _subr__f64toa = **(**uintptr)(unsafe.Pointer(&_func__base)) + 2480 + _subr__i64toa = **(**uintptr)(unsafe.Pointer(&_func__base)) + 5544 + _subr__lquote = **(**uintptr)(unsafe.Pointer(&_func__base)) + 295 + _subr__lspace = **(**uintptr)(unsafe.Pointer(&_func__base)) + 937 _subr__lzero = **(**uintptr)(unsafe.Pointer(&_func__base)) + 0 - _subr__skip_array = **(**uintptr)(unsafe.Pointer(&_func__base)) + 14398 - _subr__skip_object = **(**uintptr)(unsafe.Pointer(&_func__base)) + 14433 - _subr__skip_one = **(**uintptr)(unsafe.Pointer(&_func__base)) + 12845 - _subr__u64toa = **(**uintptr)(unsafe.Pointer(&_func__base)) + 6135 - _subr__unquote = **(**uintptr)(unsafe.Pointer(&_func__base)) + 7356 - _subr__value = **(**uintptr)(unsafe.Pointer(&_func__base)) + 9076 - _subr__vnumber = **(**uintptr)(unsafe.Pointer(&_func__base)) + 11325 - _subr__vsigned = **(**uintptr)(unsafe.Pointer(&_func__base)) + 12295 - _subr__vstring = **(**uintptr)(unsafe.Pointer(&_func__base)) + 10096 - _subr__vunsigned = **(**uintptr)(unsafe.Pointer(&_func__base)) + 12572 + _subr__skip_array = **(**uintptr)(unsafe.Pointer(&_func__base)) + 13958 + _subr__skip_object = **(**uintptr)(unsafe.Pointer(&_func__base)) + 13993 + _subr__skip_one = **(**uintptr)(unsafe.Pointer(&_func__base)) + 12328 + _subr__u64toa = **(**uintptr)(unsafe.Pointer(&_func__base)) + 5637 + _subr__unquote = **(**uintptr)(unsafe.Pointer(&_func__base)) + 6825 + _subr__value = **(**uintptr)(unsafe.Pointer(&_func__base)) + 8460 + _subr__vnumber = **(**uintptr)(unsafe.Pointer(&_func__base)) + 10806 + _subr__vsigned = **(**uintptr)(unsafe.Pointer(&_func__base)) + 11778 + _subr__vstring = **(**uintptr)(unsafe.Pointer(&_func__base)) + 9464 + _subr__vunsigned = **(**uintptr)(unsafe.Pointer(&_func__base)) + 12055 ) var ( diff --git a/internal/native/avx2/fastfloat_amd64_test.go b/internal/native/avx2/fastfloat_amd64_test.go new file mode 100644 index 0000000..6bff2c6 --- /dev/null +++ b/internal/native/avx2/fastfloat_amd64_test.go @@ -0,0 +1,66 @@ +// Code generated by Makefile, DO NOT EDIT. + +/* + * Copyright 2021 ByteDance Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package avx2 + +import ( + `math` + `strconv` + `testing` + + `github.com/stretchr/testify/assert` +) + +func TestFastFloat_Encode(t *testing.T) { + var buf [64]byte + assert.Equal(t, "0" , string(buf[:__f64toa(&buf[0], 0)])) + assert.Equal(t, "0" , string(buf[:__f64toa(&buf[0], math.Float64frombits(0x8000000000000000))])) + assert.Equal(t, "12340000000" , string(buf[:__f64toa(&buf[0], 1234e7)])) + assert.Equal(t, "12.34" , string(buf[:__f64toa(&buf[0], 1234e-2)])) + assert.Equal(t, "0.001234" , string(buf[:__f64toa(&buf[0], 1234e-6)])) + assert.Equal(t, "1e30" , string(buf[:__f64toa(&buf[0], 1e30)])) + assert.Equal(t, "1.234e33" , string(buf[:__f64toa(&buf[0], 1234e30)])) + assert.Equal(t, "1.234e308" , string(buf[:__f64toa(&buf[0], 1234e305)])) + assert.Equal(t, "1.234e-317" , string(buf[:__f64toa(&buf[0], 1234e-320)])) + assert.Equal(t, "1.7976931348623157e308" , string(buf[:__f64toa(&buf[0], 1.7976931348623157e308)])) + assert.Equal(t, "-12340000000" , string(buf[:__f64toa(&buf[0], -1234e7)])) + assert.Equal(t, "-12.34" , string(buf[:__f64toa(&buf[0], -1234e-2)])) + assert.Equal(t, "-0.001234" , string(buf[:__f64toa(&buf[0], -1234e-6)])) + assert.Equal(t, "-1e30" , string(buf[:__f64toa(&buf[0], -1e30)])) + assert.Equal(t, "-1.234e33" , string(buf[:__f64toa(&buf[0], -1234e30)])) + assert.Equal(t, "-1.234e308" , string(buf[:__f64toa(&buf[0], -1234e305)])) + assert.Equal(t, "-1.234e-317" , string(buf[:__f64toa(&buf[0], -1234e-320)])) + assert.Equal(t, "-2.2250738585072014e-308" , string(buf[:__f64toa(&buf[0], -2.2250738585072014e-308)])) +} + +func BenchmarkFastFloat_Encode(b *testing.B) { + val := -2.2250738585072014e-308 + benchmarks := []struct { + name string + test func(*testing.B) + }{{ + name: "StdLib", + test: func(b *testing.B) { var buf [64]byte; for i := 0; i < b.N; i++ { strconv.AppendFloat(buf[:], val, 'g', -1, 64) }}, + }, { + name: "FastFloat", + test: func(b *testing.B) { var buf [64]byte; for i := 0; i < b.N; i++ { __f64toa(&buf[0], val) }}, + }} + for _, bm := range benchmarks { + b.Run(bm.name, bm.test) + } +} diff --git a/internal/native/avx2/fastint_amd64_test.go b/internal/native/avx2/fastint_amd64_test.go new file mode 100644 index 0000000..8903b2f --- /dev/null +++ b/internal/native/avx2/fastint_amd64_test.go @@ -0,0 +1,135 @@ +// Code generated by Makefile, DO NOT EDIT. + +/* + * Copyright 2021 ByteDance Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package avx2 + +import ( + `strconv` + `testing` + + `github.com/stretchr/testify/assert` +) + +func TestFastInt_IntToString(t *testing.T) { + var buf [32]byte + assert.Equal(t, "0" , string(buf[:__i64toa(&buf[0], 0)])) + assert.Equal(t, "1" , string(buf[:__i64toa(&buf[0], 1)])) + assert.Equal(t, "12" , string(buf[:__i64toa(&buf[0], 12)])) + assert.Equal(t, "123" , string(buf[:__i64toa(&buf[0], 123)])) + assert.Equal(t, "1234" , string(buf[:__i64toa(&buf[0], 1234)])) + assert.Equal(t, "12345" , string(buf[:__i64toa(&buf[0], 12345)])) + assert.Equal(t, "123456" , string(buf[:__i64toa(&buf[0], 123456)])) + assert.Equal(t, "1234567" , string(buf[:__i64toa(&buf[0], 1234567)])) + assert.Equal(t, "12345678" , string(buf[:__i64toa(&buf[0], 12345678)])) + assert.Equal(t, "123456789" , string(buf[:__i64toa(&buf[0], 123456789)])) + assert.Equal(t, "1234567890" , string(buf[:__i64toa(&buf[0], 1234567890)])) + assert.Equal(t, "12345678901" , string(buf[:__i64toa(&buf[0], 12345678901)])) + assert.Equal(t, "123456789012" , string(buf[:__i64toa(&buf[0], 123456789012)])) + assert.Equal(t, "1234567890123" , string(buf[:__i64toa(&buf[0], 1234567890123)])) + assert.Equal(t, "12345678901234" , string(buf[:__i64toa(&buf[0], 12345678901234)])) + assert.Equal(t, "123456789012345" , string(buf[:__i64toa(&buf[0], 123456789012345)])) + assert.Equal(t, "1234567890123456" , string(buf[:__i64toa(&buf[0], 1234567890123456)])) + assert.Equal(t, "12345678901234567" , string(buf[:__i64toa(&buf[0], 12345678901234567)])) + assert.Equal(t, "123456789012345678" , string(buf[:__i64toa(&buf[0], 123456789012345678)])) + assert.Equal(t, "1234567890123456789" , string(buf[:__i64toa(&buf[0], 1234567890123456789)])) + assert.Equal(t, "9223372036854775807" , string(buf[:__i64toa(&buf[0], 9223372036854775807)])) + assert.Equal(t, "-1" , string(buf[:__i64toa(&buf[0], -1)])) + assert.Equal(t, "-12" , string(buf[:__i64toa(&buf[0], -12)])) + assert.Equal(t, "-123" , string(buf[:__i64toa(&buf[0], -123)])) + assert.Equal(t, "-1234" , string(buf[:__i64toa(&buf[0], -1234)])) + assert.Equal(t, "-12345" , string(buf[:__i64toa(&buf[0], -12345)])) + assert.Equal(t, "-123456" , string(buf[:__i64toa(&buf[0], -123456)])) + assert.Equal(t, "-1234567" , string(buf[:__i64toa(&buf[0], -1234567)])) + assert.Equal(t, "-12345678" , string(buf[:__i64toa(&buf[0], -12345678)])) + assert.Equal(t, "-123456789" , string(buf[:__i64toa(&buf[0], -123456789)])) + assert.Equal(t, "-1234567890" , string(buf[:__i64toa(&buf[0], -1234567890)])) + assert.Equal(t, "-12345678901" , string(buf[:__i64toa(&buf[0], -12345678901)])) + assert.Equal(t, "-123456789012" , string(buf[:__i64toa(&buf[0], -123456789012)])) + assert.Equal(t, "-1234567890123" , string(buf[:__i64toa(&buf[0], -1234567890123)])) + assert.Equal(t, "-12345678901234" , string(buf[:__i64toa(&buf[0], -12345678901234)])) + assert.Equal(t, "-123456789012345" , string(buf[:__i64toa(&buf[0], -123456789012345)])) + assert.Equal(t, "-1234567890123456" , string(buf[:__i64toa(&buf[0], -1234567890123456)])) + assert.Equal(t, "-12345678901234567" , string(buf[:__i64toa(&buf[0], -12345678901234567)])) + assert.Equal(t, "-123456789012345678" , string(buf[:__i64toa(&buf[0], -123456789012345678)])) + assert.Equal(t, "-1234567890123456789" , string(buf[:__i64toa(&buf[0], -1234567890123456789)])) + assert.Equal(t, "-9223372036854775808" , string(buf[:__i64toa(&buf[0], -9223372036854775808)])) +} + +func TestFastInt_UintToString(t *testing.T) { + var buf [32]byte + assert.Equal(t, "0" , string(buf[:__u64toa(&buf[0], 0)])) + assert.Equal(t, "1" , string(buf[:__u64toa(&buf[0], 1)])) + assert.Equal(t, "12" , string(buf[:__u64toa(&buf[0], 12)])) + assert.Equal(t, "123" , string(buf[:__u64toa(&buf[0], 123)])) + assert.Equal(t, "1234" , string(buf[:__u64toa(&buf[0], 1234)])) + assert.Equal(t, "12345" , string(buf[:__u64toa(&buf[0], 12345)])) + assert.Equal(t, "123456" , string(buf[:__u64toa(&buf[0], 123456)])) + assert.Equal(t, "1234567" , string(buf[:__u64toa(&buf[0], 1234567)])) + assert.Equal(t, "12345678" , string(buf[:__u64toa(&buf[0], 12345678)])) + assert.Equal(t, "123456789" , string(buf[:__u64toa(&buf[0], 123456789)])) + assert.Equal(t, "1234567890" , string(buf[:__u64toa(&buf[0], 1234567890)])) + assert.Equal(t, "12345678901" , string(buf[:__u64toa(&buf[0], 12345678901)])) + assert.Equal(t, "123456789012" , string(buf[:__u64toa(&buf[0], 123456789012)])) + assert.Equal(t, "1234567890123" , string(buf[:__u64toa(&buf[0], 1234567890123)])) + assert.Equal(t, "12345678901234" , string(buf[:__u64toa(&buf[0], 12345678901234)])) + assert.Equal(t, "123456789012345" , string(buf[:__u64toa(&buf[0], 123456789012345)])) + assert.Equal(t, "1234567890123456" , string(buf[:__u64toa(&buf[0], 1234567890123456)])) + assert.Equal(t, "12345678901234567" , string(buf[:__u64toa(&buf[0], 12345678901234567)])) + assert.Equal(t, "123456789012345678" , string(buf[:__u64toa(&buf[0], 123456789012345678)])) + assert.Equal(t, "1234567890123456789" , string(buf[:__u64toa(&buf[0], 1234567890123456789)])) + assert.Equal(t, "12345678901234567890" , string(buf[:__u64toa(&buf[0], 12345678901234567890)])) + assert.Equal(t, "18446744073709551615" , string(buf[:__u64toa(&buf[0], 18446744073709551615)])) +} + +func BenchmarkFastInt_IntToString(b *testing.B) { + benchmarks := []struct { + name string + test func(*testing.B) + }{{ + name: "StdLib-Positive", + test: func(b *testing.B) { var buf [32]byte; for i := 0; i < b.N; i++ { strconv.AppendInt(buf[:], int64(i), 10) }}, + }, { + name: "StdLib-Negative", + test: func(b *testing.B) { var buf [32]byte; for i := 0; i < b.N; i++ { strconv.AppendInt(buf[:], -int64(i), 10) }}, + }, { + name: "FastInt-Positive", + test: func(b *testing.B) { var buf [32]byte; for i := 0; i < b.N; i++ { __i64toa(&buf[0], int64(i)) }}, + }, { + name: "FastInt-Negative", + test: func(b *testing.B) { var buf [32]byte; for i := 0; i < b.N; i++ { __i64toa(&buf[0], -int64(i)) }}, + }} + for _, bm := range benchmarks { + b.Run(bm.name, bm.test) + } +} + +func BenchmarkFastInt_UintToString(b *testing.B) { + benchmarks := []struct { + name string + test func(*testing.B) + }{{ + name: "StdLib", + test: func(b *testing.B) { var buf [32]byte; for i := 0; i < b.N; i++ { strconv.AppendUint(buf[:], uint64(i), 10) }}, + }, { + name: "FastInt", + test: func(b *testing.B) { var buf [32]byte; for i := 0; i < b.N; i++ { __u64toa(&buf[0], uint64(i)) }}, + }} + for _, bm := range benchmarks { + b.Run(bm.name, bm.test) + } +} diff --git a/internal/native/avx2/native_amd64.go b/internal/native/avx2/native_amd64.go new file mode 100644 index 0000000..098ee55 --- /dev/null +++ b/internal/native/avx2/native_amd64.go @@ -0,0 +1,100 @@ +// Code generated by Makefile, DO NOT EDIT. + +/* + * Copyright 2021 ByteDance Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package avx2 + +import ( + `unsafe` + + `github.com/bytedance/sonic/internal/native/types` +) + +//go:nosplit +//go:noescape +//goland:noinspection GoUnusedParameter +func __i64toa(out *byte, val int64) (ret int) + +//go:nosplit +//go:noescape +//goland:noinspection GoUnusedParameter +func __u64toa(out *byte, val uint64) (ret int) + +//go:nosplit +//go:noescape +//goland:noinspection GoUnusedParameter +func __f64toa(out *byte, val float64) (ret int) + +//go:nosplit +//go:noescape +//goland:noinspection GoUnusedParameter +func __lzero(p unsafe.Pointer, n int) (ret int) + +//go:nosplit +//go:noescape +//goland:noinspection GoUnusedParameter +func __lquote(buf *string, off int) (ret int) + +//go:nosplit +//go:noescape +//goland:noinspection GoUnusedParameter +func __lspace(sp unsafe.Pointer, nb int, off int) (ret int) + +//go:nosplit +//go:noescape +//goland:noinspection GoUnusedParameter +func __value(s unsafe.Pointer, n int, p int, v *types.JsonState) (ret int) + +//go:nosplit +//go:noescape +//goland:noinspection GoUnusedParameter +func __vstring(s *string, p *int, v *types.JsonState) + +//go:nosplit +//go:noescape +//goland:noinspection GoUnusedParameter +func __vnumber(s *string, p *int, v *types.JsonState) + +//go:nosplit +//go:noescape +//goland:noinspection GoUnusedParameter +func __vsigned(s *string, p *int, v *types.JsonState) + +//go:nosplit +//go:noescape +//goland:noinspection GoUnusedParameter +func __vunsigned(s *string, p *int, v *types.JsonState) + +//go:nosplit +//go:noescape +//goland:noinspection GoUnusedParameter +func __skip_one(s *string, p *int, m *types.StateMachine) (ret int) + +//go:nosplit +//go:noescape +//goland:noinspection GoUnusedParameter +func __skip_array(s *string, p *int, m *types.StateMachine) (ret int) + +//go:nosplit +//go:noescape +//goland:noinspection GoUnusedParameter +func __skip_object(s *string, p *int, m *types.StateMachine) (ret int) + +//go:nosplit +//go:noescape +//goland:noinspection GoUnusedParameter +func __unquote(s unsafe.Pointer, nb int, dp unsafe.Pointer, ep *int, flags uint64) (ret int) diff --git a/internal/native/native_amd64.s b/internal/native/avx2/native_amd64.s similarity index 73% rename from internal/native/native_amd64.s rename to internal/native/avx2/native_amd64.s index 57233b5..d8bcfaa 100644 --- a/internal/native/native_amd64.s +++ b/internal/native/avx2/native_amd64.s @@ -14,14 +14,12 @@ _lzero: LONG $0xc976f5c5 // vpcmpeqd %ymm1, %ymm1, %ymm1 LBB0_2: - LONG $0x576ffec5; BYTE $0x20 // vmovdqu $32(%rdi), %ymm2 + LONG $0x576ffdc5; BYTE $0x20 // vmovdqa $32(%rdi), %ymm2 LONG $0x17ebedc5 // vpor (%rdi), %ymm2, %ymm2 - LONG $0x5f6ffec5; BYTE $0x60 // vmovdqu $96(%rdi), %ymm3 - LONG $0x5febe5c5; BYTE $0x40 // vpor $64(%rdi), %ymm3, %ymm3 + LONG $0x57ebedc5; BYTE $0x40 // vpor $64(%rdi), %ymm2, %ymm2 + LONG $0x57ebedc5; BYTE $0x60 // vpor $96(%rdi), %ymm2, %ymm2 LONG $0xd074edc5 // vpcmpeqb %ymm0, %ymm2, %ymm2 - LONG $0xd874e5c5 // vpcmpeqb %ymm0, %ymm3, %ymm3 - LONG $0xd9efe5c5 // vpxor %ymm1, %ymm3, %ymm3 - LONG $0x177de2c4; BYTE $0xd3 // vptest %ymm3, %ymm2 + LONG $0x177de2c4; BYTE $0xd1 // vptest %ymm1, %ymm2 JAE LBB0_14 SUBQ $-128, DI ADDQ $-128, SI @@ -147,8 +145,8 @@ _lquote: DECQ DX LONG $0xc076f9c5 // vpcmpeqd %xmm0, %xmm0, %xmm0 CMPQ DX, $14 - JA LBB1_29 - LONG $0x28058d48; WORD $0x0002; BYTE $0x00 // leaq $552(%rip), %rax /* LJTI1_0(%rip) */ + JA LBB1_30 + LONG $0x32058d48; WORD $0x0002; BYTE $0x00 // leaq $562(%rip), %rax /* LJTI1_0(%rip) */ MOVLQSX 0(AX)(DX*4), DX ADDQ AX, DX LONG $0x763141c4; BYTE $0xc9 // vpcmpeqd %xmm9, %xmm9, %xmm9 @@ -167,53 +165,53 @@ _lquote: LONG $0xf676c9c5 // vpcmpeqd %xmm6, %xmm6, %xmm6 JMP DX -LBB1_14: +LBB1_15: QUAD $0xffffff63056ffac5 // vmovdqu $-157(%rip), %xmm0 /* LCPI1_6(%rip) */ LONG $0x207963c4; WORD $0x0e49; BYTE $0x0e // vpinsrb $14, $14(%rcx), %xmm0, %xmm9 -LBB1_15: +LBB1_16: LONG $0x203163c4; WORD $0x0d51; BYTE $0x0d // vpinsrb $13, $13(%rcx), %xmm9, %xmm10 -LBB1_16: +LBB1_17: LONG $0x202963c4; WORD $0x0c59; BYTE $0x0c // vpinsrb $12, $12(%rcx), %xmm10, %xmm11 -LBB1_17: +LBB1_18: LONG $0x202163c4; WORD $0x0b61; BYTE $0x0b // vpinsrb $11, $11(%rcx), %xmm11, %xmm12 -LBB1_18: +LBB1_19: LONG $0x201963c4; WORD $0x0a69; BYTE $0x0a // vpinsrb $10, $10(%rcx), %xmm12, %xmm13 -LBB1_19: +LBB1_20: LONG $0x201163c4; WORD $0x0971; BYTE $0x09 // vpinsrb $9, $9(%rcx), %xmm13, %xmm14 -LBB1_20: +LBB1_21: LONG $0x2009e3c4; WORD $0x0879; BYTE $0x08 // vpinsrb $8, $8(%rcx), %xmm14, %xmm7 -LBB1_21: +LBB1_22: LONG $0x2041e3c4; WORD $0x0741; BYTE $0x07 // vpinsrb $7, $7(%rcx), %xmm7, %xmm0 -LBB1_22: +LBB1_23: LONG $0x2079e3c4; WORD $0x0649; BYTE $0x06 // vpinsrb $6, $6(%rcx), %xmm0, %xmm1 -LBB1_23: +LBB1_24: LONG $0x2071e3c4; WORD $0x0551; BYTE $0x05 // vpinsrb $5, $5(%rcx), %xmm1, %xmm2 -LBB1_24: +LBB1_25: LONG $0x2069e3c4; WORD $0x0459; BYTE $0x04 // vpinsrb $4, $4(%rcx), %xmm2, %xmm3 -LBB1_25: +LBB1_26: LONG $0x2061e3c4; WORD $0x0361; BYTE $0x03 // vpinsrb $3, $3(%rcx), %xmm3, %xmm4 -LBB1_26: +LBB1_27: LONG $0x2059e3c4; WORD $0x0269; BYTE $0x02 // vpinsrb $2, $2(%rcx), %xmm4, %xmm5 -LBB1_27: +LBB1_28: LONG $0x2051e3c4; WORD $0x0171; BYTE $0x01 // vpinsrb $1, $1(%rcx), %xmm5, %xmm6 -LBB1_28: +LBB1_29: LONG $0x2049e3c4; WORD $0x0001 // vpinsrb $0, (%rcx), %xmm6, %xmm0 -LBB1_29: +LBB1_30: QUAD $0xfffffec30d6ffac5 // vmovdqu $-317(%rip), %xmm1 /* LCPI1_3(%rip) */ QUAD $0xfffffecb1574f9c5 // vpcmpeqb $-309(%rip), %xmm0, %xmm2 /* LCPI1_4(%rip) */ QUAD $0xfffffed31d74f9c5 // vpcmpeqb $-301(%rip), %xmm0, %xmm3 /* LCPI1_5(%rip) */ @@ -224,9 +222,9 @@ LBB1_29: LONG $0xc0ebe9c5 // vpor %xmm0, %xmm2, %xmm0 LONG $0xc0d7f9c5 // vpmovmskb %xmm0, %eax ORL $-65536, AX - LONG $0xc0bc0ff3 // tzcntl %eax, %eax + BSFL AX, AX -LBB1_30: +LBB1_31: ADDQ SI, AX BYTE $0x5d // popq %rbp WORD $0xf8c5; BYTE $0x77 // vzeroupper @@ -242,32 +240,32 @@ LBB1_3: CMPQ DX, $32 JB LBB1_9 XORL AX, AX - QUAD $0xfffffe12056ffec5 // vmovdqu $-494(%rip), %ymm0 /* LCPI1_0(%rip) */ - QUAD $0xfffffe2a0d6ffec5 // vmovdqu $-470(%rip), %ymm1 /* LCPI1_1(%rip) */ - QUAD $0xfffffe42156ffec5 // vmovdqu $-446(%rip), %ymm2 /* LCPI1_2(%rip) */ + QUAD $0xfffffe13056ffec5 // vmovdqu $-493(%rip), %ymm0 /* LCPI1_0(%rip) */ + QUAD $0xfffffe2b0d6ffec5 // vmovdqu $-469(%rip), %ymm1 /* LCPI1_1(%rip) */ + QUAD $0xfffffe43156ffec5 // vmovdqu $-445(%rip), %ymm2 /* LCPI1_2(%rip) */ LONG $0xdb76e5c5 // vpcmpeqd %ymm3, %ymm3, %ymm3 MOVQ $-4294967296, R8 LBB1_5: - LONG $0x216ffec5 // vmovdqu (%rcx), %ymm4 - LONG $0xec64fdc5 // vpcmpgtb %ymm4, %ymm0, %ymm5 - LONG $0xf174ddc5 // vpcmpeqb %ymm1, %ymm4, %ymm6 - LONG $0xfa74ddc5 // vpcmpeqb %ymm2, %ymm4, %ymm7 - LONG $0xf6ebc5c5 // vpor %ymm6, %ymm7, %ymm6 - LONG $0xe364ddc5 // vpcmpgtb %ymm3, %ymm4, %ymm4 - LONG $0xe4dbd5c5 // vpand %ymm4, %ymm5, %ymm4 - LONG $0xe4ebcdc5 // vpor %ymm4, %ymm6, %ymm4 - LONG $0xfcd7fdc5 // vpmovmskb %ymm4, %edi + LONG $0x216ffec5 // vmovdqu (%rcx), %ymm4 + LONG $0xec64fdc5 // vpcmpgtb %ymm4, %ymm0, %ymm5 + LONG $0xf174ddc5 // vpcmpeqb %ymm1, %ymm4, %ymm6 + LONG $0xfa74ddc5 // vpcmpeqb %ymm2, %ymm4, %ymm7 + LONG $0xf6ebc5c5 // vpor %ymm6, %ymm7, %ymm6 + LONG $0xe364ddc5 // vpcmpgtb %ymm3, %ymm4, %ymm4 + LONG $0xe4dbd5c5 // vpand %ymm4, %ymm5, %ymm4 + LONG $0xe4ebcdc5 // vpor %ymm4, %ymm6, %ymm4 + LONG $0xfcd7fdc5 // vpmovmskb %ymm4, %edi ORQ R8, DI - LONG $0xbc0f48f3; BYTE $0xff // tzcntq %rdi, %rdi + BSFQ DI, DI ADDQ DI, AX CMPQ DI, $32 - JB LBB1_30 + JB LBB1_31 ADDQ $32, CX ADDQ $-32, DX CMPQ DX, $31 JA LBB1_5 - WORD $0xf8c5; BYTE $0x77 // vzeroupper + WORD $0xf8c5; BYTE $0x77 // vzeroupper CMPQ DX, $16 JAE LBB1_10 JMP LBB1_8 @@ -277,31 +275,37 @@ LBB1_9: XORL AX, AX LBB1_10: - LONG $0x016ffac5 // vmovdqu (%rcx), %xmm0 - QUAD $0xfffffdf50d6ffac5 // vmovdqu $-523(%rip), %xmm1 /* LCPI1_3(%rip) */ - LONG $0xc864f1c5 // vpcmpgtb %xmm0, %xmm1, %xmm1 - QUAD $0xfffffdf91574f9c5 // vpcmpeqb $-519(%rip), %xmm0, %xmm2 /* LCPI1_4(%rip) */ - QUAD $0xfffffe011d74f9c5 // vpcmpeqb $-511(%rip), %xmm0, %xmm3 /* LCPI1_5(%rip) */ - LONG $0xd2ebe1c5 // vpor %xmm2, %xmm3, %xmm2 + QUAD $0xfffffdfb056ffac5 // vmovdqu $-517(%rip), %xmm0 /* LCPI1_3(%rip) */ + QUAD $0xfffffe030d6ffac5 // vmovdqu $-509(%rip), %xmm1 /* LCPI1_4(%rip) */ + QUAD $0xfffffe0b156ffac5 // vmovdqu $-501(%rip), %xmm2 /* LCPI1_5(%rip) */ LONG $0xdb76e1c5 // vpcmpeqd %xmm3, %xmm3, %xmm3 - LONG $0xc364f9c5 // vpcmpgtb %xmm3, %xmm0, %xmm0 - LONG $0xc0dbf1c5 // vpand %xmm0, %xmm1, %xmm0 - LONG $0xc0ebe9c5 // vpor %xmm0, %xmm2, %xmm0 - LONG $0xf8d7f9c5 // vpmovmskb %xmm0, %edi + +LBB1_11: + LONG $0x216ffac5 // vmovdqu (%rcx), %xmm4 + LONG $0xec64f9c5 // vpcmpgtb %xmm4, %xmm0, %xmm5 + LONG $0xf174d9c5 // vpcmpeqb %xmm1, %xmm4, %xmm6 + LONG $0xfa74d9c5 // vpcmpeqb %xmm2, %xmm4, %xmm7 + LONG $0xf6ebc1c5 // vpor %xmm6, %xmm7, %xmm6 + LONG $0xe364d9c5 // vpcmpgtb %xmm3, %xmm4, %xmm4 + LONG $0xe4dbd1c5 // vpand %xmm4, %xmm5, %xmm4 + LONG $0xe4ebc9c5 // vpor %xmm4, %xmm6, %xmm4 + LONG $0xfcd7f9c5 // vpmovmskb %xmm4, %edi ORL $-65536, DI - LONG $0xffbc0ff3 // tzcntl %edi, %edi + BSFL DI, DI ADDQ DI, AX CMPL DI, $16 - JB LBB1_30 + JB LBB1_31 ADDQ $16, CX ADDQ $-16, DX + CMPQ DX, $15 + JA LBB1_11 LBB1_8: LONG $0x446ffac5; WORD $0xf011 // vmovdqu $-16(%rcx,%rdx), %xmm0 - QUAD $0xfffffd9d0d6ffac5 // vmovdqu $-611(%rip), %xmm1 /* LCPI1_3(%rip) */ + QUAD $0xfffffd920d6ffac5 // vmovdqu $-622(%rip), %xmm1 /* LCPI1_3(%rip) */ LONG $0xc864f1c5 // vpcmpgtb %xmm0, %xmm1, %xmm1 - QUAD $0xfffffda11574f9c5 // vpcmpeqb $-607(%rip), %xmm0, %xmm2 /* LCPI1_4(%rip) */ - QUAD $0xfffffda91d74f9c5 // vpcmpeqb $-599(%rip), %xmm0, %xmm3 /* LCPI1_5(%rip) */ + QUAD $0xfffffd961574f9c5 // vpcmpeqb $-618(%rip), %xmm0, %xmm2 /* LCPI1_4(%rip) */ + QUAD $0xfffffd9e1d74f9c5 // vpcmpeqb $-610(%rip), %xmm0, %xmm3 /* LCPI1_5(%rip) */ LONG $0xd2ebe1c5 // vpor %xmm2, %xmm3, %xmm2 LONG $0xdb76e1c5 // vpcmpeqd %xmm3, %xmm3, %xmm3 LONG $0xc364f9c5 // vpcmpgtb %xmm3, %xmm0, %xmm0 @@ -309,13 +313,14 @@ LBB1_8: LONG $0xc0ebe9c5 // vpor %xmm0, %xmm2, %xmm0 LONG $0xc8d7f9c5 // vpmovmskb %xmm0, %ecx ORL $-65536, CX - LONG $0xc9bc0ff3 // tzcntl %ecx, %ecx + BSFL CX, CX ADDQ DX, AX LEAQ -16(CX)(AX*1), AX ADDQ SI, AX BYTE $0x5d // popq %rbp RET +// .set L1_0_set_29, LBB1_29-LJTI1_0 // .set L1_0_set_28, LBB1_28-LJTI1_0 // .set L1_0_set_27, LBB1_27-LJTI1_0 // .set L1_0_set_26, LBB1_26-LJTI1_0 @@ -330,23 +335,22 @@ LBB1_8: // .set L1_0_set_17, LBB1_17-LJTI1_0 // .set L1_0_set_16, LBB1_16-LJTI1_0 // .set L1_0_set_15, LBB1_15-LJTI1_0 -// .set L1_0_set_14, LBB1_14-LJTI1_0 LJTI1_0: - LONG $0xfffffe89 // .long L1_0_set_28 - LONG $0xfffffe82 // .long L1_0_set_27 - LONG $0xfffffe7b // .long L1_0_set_26 - LONG $0xfffffe74 // .long L1_0_set_25 - LONG $0xfffffe6d // .long L1_0_set_24 - LONG $0xfffffe66 // .long L1_0_set_23 - LONG $0xfffffe5f // .long L1_0_set_22 - LONG $0xfffffe58 // .long L1_0_set_21 - LONG $0xfffffe51 // .long L1_0_set_20 - LONG $0xfffffe4a // .long L1_0_set_19 - LONG $0xfffffe43 // .long L1_0_set_18 - LONG $0xfffffe3c // .long L1_0_set_17 - LONG $0xfffffe35 // .long L1_0_set_16 - LONG $0xfffffe2e // .long L1_0_set_15 - LONG $0xfffffe1f // .long L1_0_set_14 + LONG $0xfffffe7f // .long L1_0_set_29 + LONG $0xfffffe78 // .long L1_0_set_28 + LONG $0xfffffe71 // .long L1_0_set_27 + LONG $0xfffffe6a // .long L1_0_set_26 + LONG $0xfffffe63 // .long L1_0_set_25 + LONG $0xfffffe5c // .long L1_0_set_24 + LONG $0xfffffe55 // .long L1_0_set_23 + LONG $0xfffffe4e // .long L1_0_set_22 + LONG $0xfffffe47 // .long L1_0_set_21 + LONG $0xfffffe40 // .long L1_0_set_20 + LONG $0xfffffe39 // .long L1_0_set_19 + LONG $0xfffffe32 // .long L1_0_set_18 + LONG $0xfffffe2b // .long L1_0_set_17 + LONG $0xfffffe24 // .long L1_0_set_16 + LONG $0xfffffe15 // .long L1_0_set_15 LCPI2_0: QUAD $0x2020202020202020; QUAD $0x2020202020202020 // .space 16, ' ' @@ -405,59 +409,59 @@ _lspace: LONG $0xc076f9c5 // vpcmpeqd %xmm0, %xmm0, %xmm0 DECQ SI CMPQ SI, $14 - JA LBB2_29 - LONG $0xd0058d48; WORD $0x0001; BYTE $0x00 // leaq $464(%rip), %rax /* LJTI2_0(%rip) */ + JA LBB2_30 + LONG $0xe2058d48; WORD $0x0001; BYTE $0x00 // leaq $482(%rip), %rax /* LJTI2_0(%rip) */ MOVLQSX 0(AX)(SI*4), CX ADDQ AX, CX JMP CX -LBB2_14: +LBB2_15: QUAD $0xffffffad056ffac5 // vmovdqu $-83(%rip), %xmm0 /* LCPI2_8(%rip) */ LONG $0x2079e3c4; WORD $0x0e47; BYTE $0x0e // vpinsrb $14, $14(%rdi), %xmm0, %xmm0 -LBB2_15: +LBB2_16: LONG $0x2079e3c4; WORD $0x0d47; BYTE $0x0d // vpinsrb $13, $13(%rdi), %xmm0, %xmm0 -LBB2_16: +LBB2_17: LONG $0x2079e3c4; WORD $0x0c47; BYTE $0x0c // vpinsrb $12, $12(%rdi), %xmm0, %xmm0 -LBB2_17: +LBB2_18: LONG $0x2079e3c4; WORD $0x0b47; BYTE $0x0b // vpinsrb $11, $11(%rdi), %xmm0, %xmm0 -LBB2_18: +LBB2_19: LONG $0x2079e3c4; WORD $0x0a47; BYTE $0x0a // vpinsrb $10, $10(%rdi), %xmm0, %xmm0 -LBB2_19: +LBB2_20: LONG $0x2079e3c4; WORD $0x0947; BYTE $0x09 // vpinsrb $9, $9(%rdi), %xmm0, %xmm0 -LBB2_20: +LBB2_21: LONG $0x2079e3c4; WORD $0x0847; BYTE $0x08 // vpinsrb $8, $8(%rdi), %xmm0, %xmm0 -LBB2_21: +LBB2_22: LONG $0x2079e3c4; WORD $0x0747; BYTE $0x07 // vpinsrb $7, $7(%rdi), %xmm0, %xmm0 -LBB2_22: +LBB2_23: LONG $0x2079e3c4; WORD $0x0647; BYTE $0x06 // vpinsrb $6, $6(%rdi), %xmm0, %xmm0 -LBB2_23: +LBB2_24: LONG $0x2079e3c4; WORD $0x0547; BYTE $0x05 // vpinsrb $5, $5(%rdi), %xmm0, %xmm0 -LBB2_24: +LBB2_25: LONG $0x2079e3c4; WORD $0x0447; BYTE $0x04 // vpinsrb $4, $4(%rdi), %xmm0, %xmm0 -LBB2_25: +LBB2_26: LONG $0x2079e3c4; WORD $0x0347; BYTE $0x03 // vpinsrb $3, $3(%rdi), %xmm0, %xmm0 -LBB2_26: +LBB2_27: LONG $0x2079e3c4; WORD $0x0247; BYTE $0x02 // vpinsrb $2, $2(%rdi), %xmm0, %xmm0 -LBB2_27: +LBB2_28: LONG $0x2079e3c4; WORD $0x0147; BYTE $0x01 // vpinsrb $1, $1(%rdi), %xmm0, %xmm0 -LBB2_28: +LBB2_29: LONG $0x2079e3c4; WORD $0x0007 // vpinsrb $0, (%rdi), %xmm0, %xmm0 -LBB2_29: +LBB2_30: QUAD $0xfffffefd0d74f9c5 // vpcmpeqb $-259(%rip), %xmm0, %xmm1 /* LCPI2_4(%rip) */ QUAD $0xffffff051574f9c5 // vpcmpeqb $-251(%rip), %xmm0, %xmm2 /* LCPI2_5(%rip) */ QUAD $0xffffff0d1d74f9c5 // vpcmpeqb $-243(%rip), %xmm0, %xmm3 /* LCPI2_6(%rip) */ @@ -467,9 +471,9 @@ LBB2_29: LONG $0xc1ebf9c5 // vpor %xmm1, %xmm0, %xmm0 LONG $0xc0d7f9c5 // vpmovmskb %xmm0, %eax NOTL AX - LONG $0xc0bc0ff3 // tzcntl %eax, %eax + BSFL AX, AX -LBB2_30: +LBB2_31: ADDQ DX, AX BYTE $0x5d // popq %rbp WORD $0xf8c5; BYTE $0x77 // vzeroupper @@ -485,31 +489,31 @@ LBB2_3: CMPQ SI, $32 JB LBB2_9 XORL AX, AX - QUAD $0xfffffe30056ffec5 // vmovdqu $-464(%rip), %ymm0 /* LCPI2_0(%rip) */ - QUAD $0xfffffe480d6ffec5 // vmovdqu $-440(%rip), %ymm1 /* LCPI2_1(%rip) */ - QUAD $0xfffffe60156ffec5 // vmovdqu $-416(%rip), %ymm2 /* LCPI2_2(%rip) */ - QUAD $0xfffffe781d6ffec5 // vmovdqu $-392(%rip), %ymm3 /* LCPI2_3(%rip) */ + QUAD $0xfffffe31056ffec5 // vmovdqu $-463(%rip), %ymm0 /* LCPI2_0(%rip) */ + QUAD $0xfffffe490d6ffec5 // vmovdqu $-439(%rip), %ymm1 /* LCPI2_1(%rip) */ + QUAD $0xfffffe61156ffec5 // vmovdqu $-415(%rip), %ymm2 /* LCPI2_2(%rip) */ + QUAD $0xfffffe791d6ffec5 // vmovdqu $-391(%rip), %ymm3 /* LCPI2_3(%rip) */ LBB2_5: - LONG $0x276ffec5 // vmovdqu (%rdi), %ymm4 - LONG $0xe874ddc5 // vpcmpeqb %ymm0, %ymm4, %ymm5 - LONG $0xf174ddc5 // vpcmpeqb %ymm1, %ymm4, %ymm6 - LONG $0xeeebd5c5 // vpor %ymm6, %ymm5, %ymm5 - LONG $0xf274ddc5 // vpcmpeqb %ymm2, %ymm4, %ymm6 - LONG $0xe374ddc5 // vpcmpeqb %ymm3, %ymm4, %ymm4 - LONG $0xe6ebddc5 // vpor %ymm6, %ymm4, %ymm4 - LONG $0xe5ebddc5 // vpor %ymm5, %ymm4, %ymm4 - LONG $0xccd7fdc5 // vpmovmskb %ymm4, %ecx + LONG $0x276ffec5 // vmovdqu (%rdi), %ymm4 + LONG $0xe874ddc5 // vpcmpeqb %ymm0, %ymm4, %ymm5 + LONG $0xf174ddc5 // vpcmpeqb %ymm1, %ymm4, %ymm6 + LONG $0xeeebd5c5 // vpor %ymm6, %ymm5, %ymm5 + LONG $0xf274ddc5 // vpcmpeqb %ymm2, %ymm4, %ymm6 + LONG $0xe374ddc5 // vpcmpeqb %ymm3, %ymm4, %ymm4 + LONG $0xe6ebddc5 // vpor %ymm6, %ymm4, %ymm4 + LONG $0xe5ebddc5 // vpor %ymm5, %ymm4, %ymm4 + LONG $0xccd7fdc5 // vpmovmskb %ymm4, %ecx NOTQ CX - LONG $0xbc0f48f3; BYTE $0xc9 // tzcntq %rcx, %rcx + BSFQ CX, CX ADDQ CX, AX CMPQ CX, $32 - JB LBB2_30 + JB LBB2_31 ADDQ $32, DI ADDQ $-32, SI CMPQ SI, $31 JA LBB2_5 - WORD $0xf8c5; BYTE $0x77 // vzeroupper + WORD $0xf8c5; BYTE $0x77 // vzeroupper CMPQ SI, $16 JAE LBB2_10 JMP LBB2_8 @@ -519,41 +523,50 @@ LBB2_9: XORL AX, AX LBB2_10: - LONG $0x076ffac5 // vmovdqu (%rdi), %xmm0 - QUAD $0xfffffe390d74f9c5 // vpcmpeqb $-455(%rip), %xmm0, %xmm1 /* LCPI2_4(%rip) */ - QUAD $0xfffffe411574f9c5 // vpcmpeqb $-447(%rip), %xmm0, %xmm2 /* LCPI2_5(%rip) */ - QUAD $0xfffffe491d74f9c5 // vpcmpeqb $-439(%rip), %xmm0, %xmm3 /* LCPI2_6(%rip) */ - QUAD $0xfffffe510574f9c5 // vpcmpeqb $-431(%rip), %xmm0, %xmm0 /* LCPI2_7(%rip) */ - LONG $0xcaebf1c5 // vpor %xmm2, %xmm1, %xmm1 - LONG $0xc3ebf9c5 // vpor %xmm3, %xmm0, %xmm0 - LONG $0xc1ebf9c5 // vpor %xmm1, %xmm0, %xmm0 - LONG $0xc8d7f9c5 // vpmovmskb %xmm0, %ecx + QUAD $0xfffffe3f056ffac5 // vmovdqu $-449(%rip), %xmm0 /* LCPI2_4(%rip) */ + QUAD $0xfffffe470d6ffac5 // vmovdqu $-441(%rip), %xmm1 /* LCPI2_5(%rip) */ + QUAD $0xfffffe4f156ffac5 // vmovdqu $-433(%rip), %xmm2 /* LCPI2_6(%rip) */ + QUAD $0xfffffe571d6ffac5 // vmovdqu $-425(%rip), %xmm3 /* LCPI2_7(%rip) */ + +LBB2_11: + LONG $0x276ffac5 // vmovdqu (%rdi), %xmm4 + LONG $0xe874d9c5 // vpcmpeqb %xmm0, %xmm4, %xmm5 + LONG $0xf174d9c5 // vpcmpeqb %xmm1, %xmm4, %xmm6 + LONG $0xeeebd1c5 // vpor %xmm6, %xmm5, %xmm5 + LONG $0xf274d9c5 // vpcmpeqb %xmm2, %xmm4, %xmm6 + LONG $0xe374d9c5 // vpcmpeqb %xmm3, %xmm4, %xmm4 + LONG $0xe6ebd9c5 // vpor %xmm6, %xmm4, %xmm4 + LONG $0xe5ebd9c5 // vpor %xmm5, %xmm4, %xmm4 + LONG $0xccd7f9c5 // vpmovmskb %xmm4, %ecx NOTL CX - LONG $0xc9bc0ff3 // tzcntl %ecx, %ecx + BSFL CX, CX ADDQ CX, AX CMPL CX, $16 - JB LBB2_30 + JB LBB2_31 ADDQ $16, DI ADDQ $-16, SI + CMPQ SI, $15 + JA LBB2_11 LBB2_8: LONG $0x446ffac5; WORD $0xf037 // vmovdqu $-16(%rdi,%rsi), %xmm0 - QUAD $0xfffffde90d74f9c5 // vpcmpeqb $-535(%rip), %xmm0, %xmm1 /* LCPI2_4(%rip) */ - QUAD $0xfffffdf11574f9c5 // vpcmpeqb $-527(%rip), %xmm0, %xmm2 /* LCPI2_5(%rip) */ - QUAD $0xfffffdf91d74f9c5 // vpcmpeqb $-519(%rip), %xmm0, %xmm3 /* LCPI2_6(%rip) */ - QUAD $0xfffffe010574f9c5 // vpcmpeqb $-511(%rip), %xmm0, %xmm0 /* LCPI2_7(%rip) */ + QUAD $0xfffffdd60d74f9c5 // vpcmpeqb $-554(%rip), %xmm0, %xmm1 /* LCPI2_4(%rip) */ + QUAD $0xfffffdde1574f9c5 // vpcmpeqb $-546(%rip), %xmm0, %xmm2 /* LCPI2_5(%rip) */ + QUAD $0xfffffde61d74f9c5 // vpcmpeqb $-538(%rip), %xmm0, %xmm3 /* LCPI2_6(%rip) */ + QUAD $0xfffffdee0574f9c5 // vpcmpeqb $-530(%rip), %xmm0, %xmm0 /* LCPI2_7(%rip) */ LONG $0xcaebf1c5 // vpor %xmm2, %xmm1, %xmm1 LONG $0xc3ebf9c5 // vpor %xmm3, %xmm0, %xmm0 LONG $0xc1ebf9c5 // vpor %xmm1, %xmm0, %xmm0 LONG $0xc8d7f9c5 // vpmovmskb %xmm0, %ecx NOTL CX - LONG $0xc9bc0ff3 // tzcntl %ecx, %ecx + BSFL CX, CX ADDQ SI, AX LEAQ -16(CX)(AX*1), AX ADDQ DX, AX BYTE $0x5d // popq %rbp RET +// .set L2_0_set_29, LBB2_29-LJTI2_0 // .set L2_0_set_28, LBB2_28-LJTI2_0 // .set L2_0_set_27, LBB2_27-LJTI2_0 // .set L2_0_set_26, LBB2_26-LJTI2_0 @@ -568,23 +581,22 @@ LBB2_8: // .set L2_0_set_17, LBB2_17-LJTI2_0 // .set L2_0_set_16, LBB2_16-LJTI2_0 // .set L2_0_set_15, LBB2_15-LJTI2_0 -// .set L2_0_set_14, LBB2_14-LJTI2_0 LJTI2_0: - LONG $0xfffffea3 // .long L2_0_set_28 - LONG $0xfffffe9c // .long L2_0_set_27 - LONG $0xfffffe95 // .long L2_0_set_26 - LONG $0xfffffe8e // .long L2_0_set_25 - LONG $0xfffffe87 // .long L2_0_set_24 - LONG $0xfffffe80 // .long L2_0_set_23 - LONG $0xfffffe79 // .long L2_0_set_22 - LONG $0xfffffe72 // .long L2_0_set_21 - LONG $0xfffffe6b // .long L2_0_set_20 - LONG $0xfffffe64 // .long L2_0_set_19 - LONG $0xfffffe5d // .long L2_0_set_18 - LONG $0xfffffe56 // .long L2_0_set_17 - LONG $0xfffffe4f // .long L2_0_set_16 - LONG $0xfffffe48 // .long L2_0_set_15 - LONG $0xfffffe39 // .long L2_0_set_14 + LONG $0xfffffe91 // .long L2_0_set_29 + LONG $0xfffffe8a // .long L2_0_set_28 + LONG $0xfffffe83 // .long L2_0_set_27 + LONG $0xfffffe7c // .long L2_0_set_26 + LONG $0xfffffe75 // .long L2_0_set_25 + LONG $0xfffffe6e // .long L2_0_set_24 + LONG $0xfffffe67 // .long L2_0_set_23 + LONG $0xfffffe60 // .long L2_0_set_22 + LONG $0xfffffe59 // .long L2_0_set_21 + LONG $0xfffffe52 // .long L2_0_set_20 + LONG $0xfffffe4b // .long L2_0_set_19 + LONG $0xfffffe44 // .long L2_0_set_18 + LONG $0xfffffe3d // .long L2_0_set_17 + LONG $0xfffffe36 // .long L2_0_set_16 + LONG $0xfffffe27 // .long L2_0_set_15 _strchr1: BYTE $0x55 // pushq %rbp @@ -595,31 +607,33 @@ _strchr1: ADDQ SI, R11 LONG $0xc26ef9c5 // vmovd %edx, %xmm0 LONG $0x787de2c4; BYTE $0xc0 // vpbroadcastb %xmm0, %ymm0 - MOVQ R11, DI + MOVQ R11, CX MOVQ R9, AX MOVQ R11, DX - ANDQ $31, DI + ANDQ $31, CX JE LBB3_5 MOVQ R11, AX ANDQ $-32, AX LONG $0x0874fdc5 // vpcmpeqb (%rax), %ymm0, %ymm1 LONG $0xc1d7fdc5 // vpmovmskb %ymm1, %eax - LONG $0xf742e2c4; BYTE $0xc0 // sarxl %edi, %eax, %eax - TESTL AX, AX - JE LBB3_4 WORD $0x9848 // cltq - LONG $0xbc0f48f3; BYTE $0xc8 // tzcntq %rax, %rcx + SARQ CX, AX + TESTQ AX, AX + JE LBB3_4 + MOVQ $4294967296, CX + ORQ CX, AX + BSFQ AX, CX CMPQ CX, R9 MOVQ $-1, AX LONG $0xc14c0f48 // cmovlq %rcx, %rax JMP LBB3_3 LBB3_4: - MOVL $32, CX - SUBQ DI, CX - LEAQ 0(R11)(CX*1), DX + MOVL $32, DI + SUBQ CX, DI + LEAQ 0(R11)(DI*1), DX MOVQ R9, AX - SUBQ CX, AX + SUBQ DI, AX LBB3_5: CMPQ AX, $128 @@ -636,19 +650,19 @@ LBB3_12: LONG $0x177de2c4; BYTE $0xed // vptest %ymm5, %ymm5 JNE LBB3_14 SUBQ $-128, DX - LEAQ -128(AX), DI + LEAQ -128(AX), CX CMPQ AX, $255 - MOVQ DI, AX + MOVQ CX, AX JG LBB3_12 MOVQ R9, AX - TESTQ DI, DI + TESTQ CX, CX JNS LBB3_8 JMP LBB3_3 LBB3_6: - MOVQ AX, DI + MOVQ AX, CX MOVQ R9, AX - TESTQ DI, DI + TESTQ CX, CX JS LBB3_3 LBB3_8: @@ -661,74 +675,82 @@ LBB3_9: MOVQ DX, R8 LBB3_10: - MOVLQSX R10, AX - LONG $0xbc0f48f3; BYTE $0xd0 // tzcntq %rax, %rdx - MOVQ $-1, AX - CMPQ DX, DI - JGE LBB3_3 - SUBQ R11, R8 - ADDQ DX, R8 - MOVQ R8, AX - JMP LBB3_3 + MOVL R10, AX + MOVQ $4294967296, DX + ORQ AX, DX + BSFQ DX, DX + MOVQ $-1, AX + CMPQ DX, CX + JGE LBB3_3 + SUBQ R11, R8 + ADDQ DX, R8 + MOVQ R8, AX + JMP LBB3_3 LBB3_14: - LONG $0xc4d7fdc5 // vpmovmskb %ymm4, %eax + LONG $0xc4d7fdc5 // vpmovmskb %ymm4, %eax TESTL AX, AX JE LBB3_16 + MOVL AX, AX SUBQ R11, DX - WORD $0x9848 // cltq - LONG $0xbc0f48f3; BYTE $0xc0 // tzcntq %rax, %rax + MOVQ $4294967296, CX + ORQ AX, CX + BSFQ CX, AX ADDQ DX, AX JMP LBB3_3 LBB3_21: MOVQ R9, AX - CMPQ DI, $31 + CMPQ CX, $31 JLE LBB3_3 LEAQ 32(DX), R8 LONG $0x747dc1c4; BYTE $0x08 // vpcmpeqb (%r8), %ymm0, %ymm1 LONG $0xd1d77dc5 // vpmovmskb %ymm1, %r10d TESTL R10, R10 JE LBB3_24 - ADDQ $-32, DI + ADDQ $-32, CX JMP LBB3_10 LBB3_16: - LONG $0xc3d7fdc5 // vpmovmskb %ymm3, %eax + LONG $0xc3d7fdc5 // vpmovmskb %ymm3, %eax TESTL AX, AX JE LBB3_18 - WORD $0x9848 // cltq - LONG $0xbc0f48f3; BYTE $0xc0 // tzcntq %rax, %rax + MOVL AX, AX + MOVQ $4294967296, CX + ORQ AX, CX + BSFQ CX, AX SUBQ R11, DX LEAQ 32(DX)(AX*1), AX JMP LBB3_3 LBB3_18: - LONG $0xc2d7fdc5 // vpmovmskb %ymm2, %eax + MOVQ $4294967296, AX + LONG $0xcad7fdc5 // vpmovmskb %ymm2, %ecx SUBQ R11, DX - TESTL AX, AX + TESTL CX, CX JE LBB3_20 - WORD $0x9848 // cltq - LONG $0xbc0f48f3; BYTE $0xc0 // tzcntq %rax, %rax + MOVL CX, CX + ORQ AX, CX + BSFQ CX, AX LEAQ 64(DX)(AX*1), AX JMP LBB3_3 LBB3_24: MOVQ R9, AX - CMPQ DI, $64 + CMPQ CX, $64 JL LBB3_3 LEAQ 64(DX), R8 LONG $0x747dc1c4; BYTE $0x08 // vpcmpeqb (%r8), %ymm0, %ymm1 LONG $0xd1d77dc5 // vpmovmskb %ymm1, %r10d TESTL R10, R10 JE LBB3_27 - ADDQ $-64, DI + ADDQ $-64, CX JMP LBB3_10 LBB3_20: - LONG $0xc1d7fdc5 // vpmovmskb %ymm1, %eax - WORD $0x9848 // cltq - LONG $0xbc0f48f3; BYTE $0xc0 // tzcntq %rax, %rax + LONG $0xc9d7fdc5 // vpmovmskb %ymm1, %ecx + ORQ AX, CX + BSFQ CX, AX LEAQ 96(DX)(AX*1), AX LBB3_3: @@ -742,7 +764,7 @@ LBB3_3: LBB3_27: MOVQ R9, AX - CMPQ DI, $96 + CMPQ CX, $96 JL LBB3_3 ADDQ $96, DX LONG $0x0274fdc5 // vpcmpeqb (%rdx), %ymm0, %ymm0 @@ -750,7 +772,7 @@ LBB3_27: MOVQ R9, AX TESTL R10, R10 JE LBB3_3 - ADDQ $-96, DI + ADDQ $-96, CX JMP LBB3_9 LCPI4_0: @@ -784,59 +806,59 @@ _strchr2: LONG $0xc076f9c5 // vpcmpeqd %xmm0, %xmm0, %xmm0 LEAQ -1(R8), AX CMPQ AX, $14 - JA LBB4_24 - LONG $0xb63d8d48; WORD $0x0001; BYTE $0x00 // leaq $438(%rip), %rdi /* LJTI4_0(%rip) */ + JA LBB4_25 + LONG $0xb83d8d48; WORD $0x0001; BYTE $0x00 // leaq $440(%rip), %rdi /* LJTI4_0(%rip) */ MOVLQSX 0(DI)(AX*4), AX ADDQ DI, AX JMP AX -LBB4_9: +LBB4_10: QUAD $0xffffffa5056ffac5 // vmovdqu $-91(%rip), %xmm0 /* LCPI4_0(%rip) */ LONG $0x2079c3c4; WORD $0x0e41; BYTE $0x0e // vpinsrb $14, $14(%r9), %xmm0, %xmm0 -LBB4_10: +LBB4_11: LONG $0x2079c3c4; WORD $0x0d41; BYTE $0x0d // vpinsrb $13, $13(%r9), %xmm0, %xmm0 -LBB4_11: +LBB4_12: LONG $0x2079c3c4; WORD $0x0c41; BYTE $0x0c // vpinsrb $12, $12(%r9), %xmm0, %xmm0 -LBB4_12: +LBB4_13: LONG $0x2079c3c4; WORD $0x0b41; BYTE $0x0b // vpinsrb $11, $11(%r9), %xmm0, %xmm0 -LBB4_13: +LBB4_14: LONG $0x2079c3c4; WORD $0x0a41; BYTE $0x0a // vpinsrb $10, $10(%r9), %xmm0, %xmm0 -LBB4_14: +LBB4_15: LONG $0x2079c3c4; WORD $0x0941; BYTE $0x09 // vpinsrb $9, $9(%r9), %xmm0, %xmm0 -LBB4_15: +LBB4_16: LONG $0x2079c3c4; WORD $0x0841; BYTE $0x08 // vpinsrb $8, $8(%r9), %xmm0, %xmm0 -LBB4_16: +LBB4_17: LONG $0x2079c3c4; WORD $0x0741; BYTE $0x07 // vpinsrb $7, $7(%r9), %xmm0, %xmm0 -LBB4_17: +LBB4_18: LONG $0x2079c3c4; WORD $0x0641; BYTE $0x06 // vpinsrb $6, $6(%r9), %xmm0, %xmm0 -LBB4_18: +LBB4_19: LONG $0x2079c3c4; WORD $0x0541; BYTE $0x05 // vpinsrb $5, $5(%r9), %xmm0, %xmm0 -LBB4_19: +LBB4_20: LONG $0x2079c3c4; WORD $0x0441; BYTE $0x04 // vpinsrb $4, $4(%r9), %xmm0, %xmm0 -LBB4_20: +LBB4_21: LONG $0x2079c3c4; WORD $0x0341; BYTE $0x03 // vpinsrb $3, $3(%r9), %xmm0, %xmm0 -LBB4_21: +LBB4_22: LONG $0x2079c3c4; WORD $0x0241; BYTE $0x02 // vpinsrb $2, $2(%r9), %xmm0, %xmm0 -LBB4_22: +LBB4_23: LONG $0x2079c3c4; WORD $0x0141; BYTE $0x01 // vpinsrb $1, $1(%r9), %xmm0, %xmm0 -LBB4_23: +LBB4_24: LONG $0x2079c3c4; WORD $0x0001 // vpinsrb $0, (%r9), %xmm0, %xmm0 -LBB4_24: +LBB4_25: LONG $0xca6ef9c5 // vmovd %edx, %xmm1 LONG $0x7879e2c4; BYTE $0xc9 // vpbroadcastb %xmm1, %xmm1 LONG $0xc874f1c5 // vpcmpeqb %xmm0, %xmm1, %xmm1 @@ -846,12 +868,12 @@ LBB4_24: LONG $0xc1ebf9c5 // vpor %xmm1, %xmm0, %xmm0 LONG $0xc0d7f9c5 // vpmovmskb %xmm0, %eax ORL $-65536, AX - LONG $0xbc0f44f3; BYTE $0xd8 // tzcntl %eax, %r11d - JMP LBB4_31 + BSFL AX, R11 + JMP LBB4_32 LBB4_1: XORL R11, R11 - JMP LBB4_31 + JMP LBB4_32 LBB4_3: CMPQ R8, $31 @@ -864,21 +886,21 @@ LBB4_3: MOVQ $-4294967296, R10 MOVQ R8, DI -LBB4_26: +LBB4_27: LONG $0x6f7ec1c4; BYTE $0x11 // vmovdqu (%r9), %ymm2 LONG $0xda74fdc5 // vpcmpeqb %ymm2, %ymm0, %ymm3 LONG $0xd274f5c5 // vpcmpeqb %ymm2, %ymm1, %ymm2 LONG $0xd3ebedc5 // vpor %ymm3, %ymm2, %ymm2 LONG $0xc2d7fdc5 // vpmovmskb %ymm2, %eax ORQ R10, AX - LONG $0xbc0f48f3; BYTE $0xc0 // tzcntq %rax, %rax + BSFQ AX, AX ADDQ AX, R11 CMPQ AX, $32 - JB LBB4_31 + JB LBB4_32 ADDQ $32, R9 ADDQ $-32, DI CMPQ DI, $31 - JA LBB4_26 + JA LBB4_27 WORD $0xf8c5; BYTE $0x77 // vzeroupper CMPQ DI, $15 JA LBB4_5 @@ -886,7 +908,7 @@ LBB4_26: LONG $0x7879e2c4; BYTE $0xc0 // vpbroadcastb %xmm0, %xmm0 LONG $0xc96ef9c5 // vmovd %ecx, %xmm1 LONG $0x7879e2c4; BYTE $0xc9 // vpbroadcastb %xmm1, %xmm1 - JMP LBB4_30 + JMP LBB4_31 LBB4_4: WORD $0xf8c5; BYTE $0x77 // vzeroupper @@ -894,35 +916,39 @@ LBB4_4: MOVQ R8, DI LBB4_5: - LONG $0x6f7ac1c4; BYTE $0x11 // vmovdqu (%r9), %xmm2 LONG $0xc26ef9c5 // vmovd %edx, %xmm0 LONG $0x7879e2c4; BYTE $0xc0 // vpbroadcastb %xmm0, %xmm0 - LONG $0xda74f9c5 // vpcmpeqb %xmm2, %xmm0, %xmm3 LONG $0xc96ef9c5 // vmovd %ecx, %xmm1 LONG $0x7879e2c4; BYTE $0xc9 // vpbroadcastb %xmm1, %xmm1 + +LBB4_6: + LONG $0x6f7ac1c4; BYTE $0x11 // vmovdqu (%r9), %xmm2 + LONG $0xda74f9c5 // vpcmpeqb %xmm2, %xmm0, %xmm3 LONG $0xd274f1c5 // vpcmpeqb %xmm2, %xmm1, %xmm2 LONG $0xd3ebe9c5 // vpor %xmm3, %xmm2, %xmm2 LONG $0xc2d7f9c5 // vpmovmskb %xmm2, %eax ORL $-65536, AX - LONG $0xc0bc0ff3 // tzcntl %eax, %eax + BSFL AX, AX ADDQ AX, R11 CMPL AX, $16 - JB LBB4_31 + JB LBB4_32 ADDQ $16, R9 ADDQ $-16, DI + CMPQ DI, $15 + JA LBB4_6 -LBB4_30: +LBB4_31: LONG $0x6f7ac1c4; WORD $0x3954; BYTE $0xf0 // vmovdqu $-16(%r9,%rdi), %xmm2 LONG $0xc274f9c5 // vpcmpeqb %xmm2, %xmm0, %xmm0 LONG $0xca74f1c5 // vpcmpeqb %xmm2, %xmm1, %xmm1 LONG $0xc0ebf1c5 // vpor %xmm0, %xmm1, %xmm0 LONG $0xc0d7f9c5 // vpmovmskb %xmm0, %eax ORL $-65536, AX - LONG $0xc0bc0ff3 // tzcntl %eax, %eax + BSFL AX, AX ADDQ DI, R11 LEAQ -16(AX)(R11*1), R11 -LBB4_31: +LBB4_32: ADDQ R11, SI CMPQ R11, R8 MOVQ $-1, AX @@ -931,6 +957,7 @@ LBB4_31: WORD $0xf8c5; BYTE $0x77 // vzeroupper RET +// .set L4_0_set_24, LBB4_24-LJTI4_0 // .set L4_0_set_23, LBB4_23-LJTI4_0 // .set L4_0_set_22, LBB4_22-LJTI4_0 // .set L4_0_set_21, LBB4_21-LJTI4_0 @@ -945,23 +972,22 @@ LBB4_31: // .set L4_0_set_12, LBB4_12-LJTI4_0 // .set L4_0_set_11, LBB4_11-LJTI4_0 // .set L4_0_set_10, LBB4_10-LJTI4_0 -// .set L4_0_set_9, LBB4_9-LJTI4_0 LJTI4_0: - LONG $0xfffffebd // .long L4_0_set_23 - LONG $0xfffffeb6 // .long L4_0_set_22 - LONG $0xfffffeaf // .long L4_0_set_21 - LONG $0xfffffea8 // .long L4_0_set_20 - LONG $0xfffffea1 // .long L4_0_set_19 - LONG $0xfffffe9a // .long L4_0_set_18 - LONG $0xfffffe93 // .long L4_0_set_17 - LONG $0xfffffe8c // .long L4_0_set_16 - LONG $0xfffffe85 // .long L4_0_set_15 - LONG $0xfffffe7e // .long L4_0_set_14 - LONG $0xfffffe77 // .long L4_0_set_13 - LONG $0xfffffe70 // .long L4_0_set_12 - LONG $0xfffffe69 // .long L4_0_set_11 - LONG $0xfffffe62 // .long L4_0_set_10 - LONG $0xfffffe53 // .long L4_0_set_9 + LONG $0xfffffebb // .long L4_0_set_24 + LONG $0xfffffeb4 // .long L4_0_set_23 + LONG $0xfffffead // .long L4_0_set_22 + LONG $0xfffffea6 // .long L4_0_set_21 + LONG $0xfffffe9f // .long L4_0_set_20 + LONG $0xfffffe98 // .long L4_0_set_19 + LONG $0xfffffe91 // .long L4_0_set_18 + LONG $0xfffffe8a // .long L4_0_set_17 + LONG $0xfffffe83 // .long L4_0_set_16 + LONG $0xfffffe7c // .long L4_0_set_15 + LONG $0xfffffe75 // .long L4_0_set_14 + LONG $0xfffffe6e // .long L4_0_set_13 + LONG $0xfffffe67 // .long L4_0_set_12 + LONG $0xfffffe60 // .long L4_0_set_11 + LONG $0xfffffe51 // .long L4_0_set_10 LCPI5_0: QUAD $0x8000000000000000 // .quad 0x8000000000000000 @@ -985,594 +1011,612 @@ _f64toa: WORD $0x5541 // pushq %r13 WORD $0x5441 // pushq %r12 BYTE $0x53 // pushq %rbx - SUBQ $88, SP + SUBQ $96, SP + MOVQ DI, R15 LONG $0xc957f1c5 // vxorpd %xmm1, %xmm1, %xmm1 LONG $0xc12ef9c5 // vucomisd %xmm1, %xmm0 JNE LBB5_2 - MOVB $48, 0(DI) + JP LBB5_2 + MOVB $48, 0(R15) MOVL $1, AX - JMP LBB5_126 + JMP LBB5_89 LBB5_2: - LONG $0xc12ef9c5 // vucomisd %xmm1, %xmm0 - JAE LBB5_3 - QUAD $0xffffff8a0557f9c5 // vxorpd $-118(%rip), %xmm0, %xmm0 /* LCPI5_0(%rip) */ - MOVB $45, 0(DI) - INCQ DI - MOVQ DI, -56(BP) + LONG $0xc82ef9c5 // vucomisd %xmm0, %xmm1 + JBE LBB5_4 + QUAD $0xffffff840557f9c5 // vxorpd $-124(%rip), %xmm0, %xmm0 /* LCPI5_0(%rip) */ + MOVB $45, 0(R15) + INCQ R15 MOVL $1, -44(BP) JMP LBB5_5 -LBB5_3: - MOVQ DI, -56(BP) +LBB5_4: MOVL $0, -44(BP) LBB5_5: - LONG $0x7ef9e1c4; BYTE $0xc0 // vmovq %xmm0, %rax - MOVQ $4503599627370496, CX - LEAQ -1(CX), SI - ANDQ AX, SI - SHRQ $52, AX - ANDL $2047, AX - LEAQ 0(SI)(CX*1), DX - LEAL -1075(AX), DI - TESTL AX, AX - LONG $0xd6440f48 // cmoveq %rsi, %rdx - MOVL $-1074, BX - WORD $0x450f; BYTE $0xdf // cmovnel %edi, %ebx - XORL DI, DI - CMPQ DX, CX - SETEQ DI - INCL DI - LEAQ 1(DX)(DX*1), AX - BSRQ AX, CX - XORQ $63, CX - LONG $0xf7f162c4; BYTE $0xc8 // shlxq %rcx, %rax, %r9 - MOVL CX, SI - NOTL SI - ADDL BX, SI - MOVL BX, CX - SUBL DI, CX - LONG $0xf7c1e2c4; BYTE $0xfa // shlxq %rdi, %rdx, %rdi + LONG $0x7ef9e1c4; BYTE $0xc1 // vmovq %xmm0, %rcx + MOVQ $4503599627370496, DX + LEAQ -1(DX), SI + ANDQ CX, SI + SHRQ $52, CX + ANDL $2047, CX + LEAQ 0(SI)(DX*1), AX + LEAL -1075(CX), DI + TESTL CX, CX + LONG $0xc6440f48 // cmoveq %rsi, %rax + MOVL $-1074, SI + WORD $0x450f; BYTE $0xf7 // cmovnel %edi, %esi + XORL CX, CX + CMPQ AX, DX + SETEQ CX + INCL CX + LEAQ 1(AX)(AX*1), R10 + BSRQ R10, DX + XORQ $63, DX + MOVL DX, R12 + NOTL R12 + ADDL SI, R12 + SUBL CX, SI + MOVQ AX, DI + SHLQ CX, DI DECQ DI - SUBL SI, CX - MOVL $-61, BX - SUBL SI, BX - LONG $0xc32aebc5 // vcvtsi2sd %ebx, %xmm2, %xmm0 - LONG $0xf7f162c4; BYTE $0xd7 // shlxq %rcx, %rdi, %r10 - QUAD $0xfffffefc0d10fbc5 // vmovsd $-260(%rip), %xmm1 /* LCPI5_1(%rip) */ - QUAD $0xfffefb0da9f9e2c4; BYTE $0xff // vfmadd213sd $-261(%rip), %xmm0, %xmm1 /* LCPI5_2(%rip) */ - LONG $0xf92cfbc5 // vcvttsd2si %xmm1, %edi - LONG $0x0b71e3c4; WORD $0x0bc1 // vroundsd $11, %xmm1, %xmm1, %xmm0 - LONG $0xc05cf3c5 // vsubsd %xmm0, %xmm1, %xmm0 + MOVL DX, CX + SHLQ CX, R10 + SUBL R12, SI + MOVL SI, CX + SHLQ CX, DI + MOVL $-61, CX + SUBL R12, CX + LONG $0xc12aebc5 // vcvtsi2sd %ecx, %xmm2, %xmm0 + QUAD $0xfffffef90559fbc5 // vmulsd $-263(%rip), %xmm0, %xmm0 /* LCPI5_1(%rip) */ + QUAD $0xfffffef90558fbc5 // vaddsd $-263(%rip), %xmm0, %xmm0 /* LCPI5_2(%rip) */ + LONG $0xc82cfbc5 // vcvttsd2si %xmm0, %ecx + LONG $0xc8e6f9c5 // vcvttpd2dq %xmm0, %xmm1 + LONG $0xc9e6fac5 // vcvtdq2pd %xmm1, %xmm1 + LONG $0xc15cfbc5 // vsubsd %xmm1, %xmm0, %xmm0 LONG $0xc957f1c5 // vxorpd %xmm1, %xmm1, %xmm1 - XORL BX, BX + XORL DX, DX LONG $0xc12ef9c5 // vucomisd %xmm1, %xmm0 - SETHI BX - ADDL DI, BX - SARL $3, BX - LEAL 8(BX*8), R13 - INCL BX - MOVL $348, R14 - LONG $0x3e058d4c; WORD $0x0034; BYTE $0x00 // leaq $13374(%rip), %r8 /* _TabPowE(%rip) */ - LONG $0xe53d8d48; WORD $0x0034; BYTE $0x00 // leaq $13541(%rip), %rdi /* _TabPowF(%rip) */ - MOVQ 0(DI)(BX*8), R15 - BSRQ DX, AX - XORL $63, AX - LONG $0xf7f9e2c4; BYTE $0xd2 // shlxq %rax, %rdx, %rdx - LONG $0xf6fb42c4; BYTE $0xdf // mulxq %r15, %rax, %r11 - MOVBLSX 0(R8)(BX*2), CX - MOVQ R9, DX - LONG $0xf6c342c4; BYTE $0xcf // mulxq %r15, %rdi, %r9 - SARQ $63, AX - MOVQ R10, DX - LONG $0xf6bbc2c4; BYTE $0xd7 // mulxq %r15, %r8, %rdx - SHRQ $63, DI - ADDL SI, CX - SHRQ $63, R8 - ADDQ DX, R8 - MOVQ R8, -88(BP) - NOTQ R8 - LEAQ -1(R9)(DI*1), R12 - MOVQ AX, -120(BP) - MOVQ AX, DX - MOVQ R11, -112(BP) - SUBQ R11, DX - MOVL $-64, BX - SUBL CX, BX - NEGL CX + SETHI DX + ADDL CX, DX + SARL $3, DX + LEAL 8(DX*8), R14 + INCL DX + MOVL $348, CX + MOVQ CX, -64(BP) + LONG $0xbc0d8d48; WORD $0x0034; BYTE $0x00 // leaq $13500(%rip), %rcx /* _TabPowE(%rip) */ + MOVBLSX 0(CX)(DX*2), SI + LONG $0x5f0d8d48; WORD $0x0035; BYTE $0x00 // leaq $13663(%rip), %rcx /* _TabPowF(%rip) */ + MOVQ 0(CX)(DX*8), R8 + BSRQ AX, CX + XORL $63, CX + SHLQ CX, AX + MULQ R8 + MOVQ DX, R9 + MOVQ AX, CX + MOVQ R10, AX + MULQ R8 + MOVQ AX, BX + MOVQ DX, R11 + SARQ $63, CX + SHRQ $63, BX + MOVQ DI, AX + MULQ R8 + ADDL R12, SI + SHRQ $63, AX + ADDQ DX, AX + MOVQ AX, -88(BP) + NOTQ AX + LEAQ -1(R11)(BX*1), R12 + MOVQ CX, -120(BP) + MOVQ CX, DX + MOVL $-64, R10 + SUBL SI, R10 + NEGL SI + MOVL $1, DI + MOVL SI, CX + SHLQ CX, DI + MOVQ R9, -112(BP) + SUBQ R9, DX + MOVQ R12, R9 + SHRQ CX, R9 + MOVQ R11, -104(BP) + MOVQ BX, -96(BP) + ADDQ BX, R11 + MOVQ DI, -136(BP) + DECQ DI MOVL $1, SI - LONG $0xf7f1e2c4; BYTE $0xf6 // shlxq %rcx, %rsi, %rsi - LONG $0xf7f342c4; BYTE $0xd4 // shrxq %rcx, %r12, %r10 - MOVQ R9, -104(BP) - MOVQ DI, -96(BP) - ADDQ R9, DI - MOVL $1, R11 - CMPQ R10, $10 + CMPQ R9, $10 JB LBB5_14 - MOVL $2, R11 - CMPQ R10, $100 + MOVL $2, SI + CMPQ R9, $100 JB LBB5_14 - MOVL $3, R11 - CMPQ R10, $1000 + MOVL $3, SI + CMPQ R9, $1000 JB LBB5_14 - MOVL $4, R11 - CMPQ R10, $10000 + MOVL $4, SI + CMPQ R9, $10000 JB LBB5_14 - MOVL $5, R11 - CMPQ R10, $100000 + MOVL $5, SI + CMPQ R9, $100000 JB LBB5_14 - MOVL $6, R11 - CMPQ R10, $1000000 + MOVL $6, SI + CMPQ R9, $1000000 JB LBB5_14 - MOVL $7, R11 - CMPQ R10, $10000000 + MOVL $7, SI + CMPQ R9, $10000000 JB LBB5_14 - MOVL $8, R11 - CMPQ R10, $100000000 + MOVL $8, SI + CMPQ R9, $100000000 JB LBB5_14 - CMPQ R10, $1000000000 - MOVL $10, R11 - SBBL $0, R11 + CMPQ R9, $1000000000 + MOVL $10, SI + SBBL $0, SI LBB5_14: - MOVQ R13, -64(BP) - SUBL R13, R14 + MOVQ -64(BP), CX MOVQ R14, -72(BP) - LEAQ -1(R8)(DI*1), R14 - LEAQ -1(DX)(DI*1), AX + SUBL R14, CX + MOVQ CX, -64(BP) + LEAQ -1(AX)(R11*1), R8 + LEAQ -1(DX)(R11*1), AX MOVQ AX, -80(BP) - MOVQ SI, -128(BP) - LEAQ -1(SI), R9 - LONG $0xf5e042c4; BYTE $0xec // bzhiq %rbx, %r12, %r13 - LONG $0xa6258d4c; WORD $0x0009; BYTE $0x00 // leaq $2470(%rip), %r12 /* LJTI5_0(%rip) */ - MOVQ -56(BP), R15 - JMP LBB5_15 - -LBB5_36: - ADDB $48, R10 - MOVB R10, 0(R15) - INCQ R15 - -LBB5_37: - LONG $0xf7e1e2c4; BYTE $0xf0 // shlxq %rbx, %rax, %rsi - LEAQ 0(SI)(R13*1), DX - MOVQ R14, R8 - MOVQ AX, R10 - SUBQ DX, R8 - JAE LBB5_38 - -LBB5_15: - TESTL R11, R11 - JLE LBB5_16 - DECL R11 - MOVLQSX 0(R12)(R11*4), CX - ADDQ R12, CX - XORL AX, AX - JMP CX - -LBB5_32: - MOVQ R10, DX - MOVQ $-3689348814741910323, AX - LONG $0xf6f3e2c4; BYTE $0xc8 // mulxq %rax, %rcx, %rcx - SHRQ $3, CX - LEAQ 0(CX)(CX*1), AX - LEAQ 0(AX)(AX*4), AX - JMP LBB5_33 - -LBB5_31: - MOVQ R10, DX - SHRQ $2, DX - MOVQ $2951479051793528259, AX - LONG $0xf6f3e2c4; BYTE $0xc8 // mulxq %rax, %rcx, %rcx - SHRQ $2, CX - IMUL3Q $100, CX, AX - JMP LBB5_33 - -LBB5_30: - MOVQ R10, DX - SHRQ $3, DX - MOVQ $2361183241434822607, AX - LONG $0xf6f3e2c4; BYTE $0xc8 // mulxq %rax, %rcx, %rcx - SHRQ $4, CX - IMUL3Q $1000, CX, AX - JMP LBB5_33 - -LBB5_29: - MOVQ R10, DX - MOVQ $3777893186295716171, AX - LONG $0xf6f3e2c4; BYTE $0xc8 // mulxq %rax, %rcx, %rcx - SHRQ $11, CX - IMUL3Q $10000, CX, AX - JMP LBB5_33 - -LBB5_28: - MOVQ R10, DX - SHRQ $5, DX - MOVQ $755578637259143235, AX - LONG $0xf6f3e2c4; BYTE $0xc8 // mulxq %rax, %rcx, %rcx - SHRQ $7, CX - IMUL3Q $100000, CX, AX - JMP LBB5_33 - -LBB5_27: - MOVQ R10, DX - MOVQ $4835703278458516699, AX - LONG $0xf6f3e2c4; BYTE $0xc8 // mulxq %rax, %rcx, %rcx - SHRQ $18, CX - IMUL3Q $1000000, CX, AX - JMP LBB5_33 - -LBB5_26: - MOVQ R10, DX - MOVQ $-2972493582642298179, AX - LONG $0xf6f3e2c4; BYTE $0xc8 // mulxq %rax, %rcx, %rcx - SHRQ $23, CX - IMUL3Q $10000000, CX, AX - JMP LBB5_33 - -LBB5_25: - MOVQ R10, DX - MOVQ $-6067343680855748867, AX - LONG $0xf6f3e2c4; BYTE $0xc8 // mulxq %rax, %rcx, %rcx - SHRQ $26, CX - IMUL3Q $100000000, CX, AX - JMP LBB5_33 - -LBB5_24: - MOVQ R10, DX - SHRQ $9, DX - MOVQ $19342813113834067, AX - LONG $0xf6f3e2c4; BYTE $0xc8 // mulxq %rax, %rcx, %rcx - SHRQ $11, CX - IMUL3Q $1000000000, CX, AX - -LBB5_33: - SUBQ AX, R10 - MOVQ R10, AX - MOVQ CX, R10 - -LBB5_34: - CMPQ R15, -56(BP) - JA LBB5_36 - TESTL R10, R10 - JE LBB5_37 - JMP LBB5_36 - -LBB5_16: - XORL R11, R11 - MOVQ -56(BP), R12 + MOVQ DI, -128(BP) + ANDQ DI, R12 + LONG $0x813d8d48; WORD $0x0009; BYTE $0x00 // leaq $2433(%rip), %rdi /* LJTI5_0(%rip) */ + MOVQ R15, -56(BP) JMP LBB5_17 +LBB5_15: + ADDB $48, R9 + MOVB R9, 0(R15) + INCQ R15 + +LBB5_16: + MOVQ R13, BX + MOVL R10, CX + SHLQ CX, BX + LEAQ 0(BX)(R12*1), AX + MOVQ R8, R14 + MOVQ R13, R9 + SUBQ AX, R14 + JAE LBB5_43 + +LBB5_17: + TESTL SI, SI + JLE LBB5_31 + DECL SI + MOVLQSX 0(DI)(SI*4), AX + ADDQ DI, AX + XORL R13, R13 + JMP AX + LBB5_19: + MOVQ R9, AX + MOVQ $-3689348814741910323, CX + MULQ CX + SHRQ $3, DX + LEAQ 0(DX)(DX*1), AX + LEAQ 0(AX)(AX*4), AX + JMP LBB5_28 + +LBB5_20: + MOVQ R9, AX + SHRQ $2, AX + MOVQ $2951479051793528259, CX + MULQ CX + SHRQ $2, DX + IMUL3Q $100, DX, AX + JMP LBB5_28 + +LBB5_21: + MOVQ R9, AX + SHRQ $3, AX + MOVQ $2361183241434822607, CX + MULQ CX + SHRQ $4, DX + IMUL3Q $1000, DX, AX + JMP LBB5_28 + +LBB5_22: + MOVQ R9, AX + MOVQ $3777893186295716171, CX + MULQ CX + SHRQ $11, DX + IMUL3Q $10000, DX, AX + JMP LBB5_28 + +LBB5_23: + MOVQ R9, AX + SHRQ $5, AX + MOVQ $755578637259143235, CX + MULQ CX + SHRQ $7, DX + IMUL3Q $100000, DX, AX + JMP LBB5_28 + +LBB5_24: + MOVQ R9, AX + MOVQ $4835703278458516699, CX + MULQ CX + SHRQ $18, DX + IMUL3Q $1000000, DX, AX + JMP LBB5_28 + +LBB5_25: + MOVQ R9, AX + MOVQ $-2972493582642298179, CX + MULQ CX + SHRQ $23, DX + IMUL3Q $10000000, DX, AX + JMP LBB5_28 + +LBB5_26: + MOVQ R9, AX + MOVQ $-6067343680855748867, CX + MULQ CX + SHRQ $26, DX + IMUL3Q $100000000, DX, AX + JMP LBB5_28 + +LBB5_27: + MOVQ R9, AX + SHRQ $9, AX + MOVQ $19342813113834067, CX + MULQ CX + SHRQ $11, DX + IMUL3Q $1000000000, DX, AX + +LBB5_28: + SUBQ AX, R9 + MOVQ R9, R13 + MOVQ DX, R9 + +LBB5_29: + CMPQ R15, -56(BP) + JA LBB5_15 + TESTL R9, R9 + JE LBB5_16 + JMP LBB5_15 + +LBB5_31: + XORL SI, SI + MOVQ -56(BP), R14 + MOVQ -136(BP), R9 + MOVQ -128(BP), DI + JMP LBB5_34 + +LBB5_32: ADDB $48, DX MOVB DX, 0(R15) INCQ R15 -LBB5_20: - DECL R11 - MOVQ R14, CX - SUBQ R13, CX - JA LBB5_21 +LBB5_33: + DECL SI + MOVQ R8, CX + SUBQ R12, CX + JA LBB5_36 -LBB5_17: - MOVQ R14, AX - ADDQ R14, AX - LEAQ 0(AX)(AX*4), R14 - ADDQ R13, R13 - LEAQ 0(R13)(R13*4), R13 - LONG $0xf7e3c2c4; BYTE $0xd5 // shrxq %rbx, %r13, %rdx - ANDQ R9, R13 - CMPQ R15, R12 - JA LBB5_19 +LBB5_34: + MOVQ R8, AX + ADDQ R12, R12 + LEAQ 0(R12)(R12*4), R12 + MOVQ R12, DX + MOVL R10, CX + SHRQ CX, DX + ADDQ R8, AX + LEAQ 0(AX)(AX*4), R8 + ANDQ DI, R12 + CMPQ R15, R14 + JA LBB5_32 MOVBLZX DX, CX TESTQ CX, CX - JE LBB5_20 - JMP LBB5_19 + JE LBB5_33 + JMP LBB5_32 -LBB5_21: - MOVQ -128(BP), DI - CMPQ CX, DI - JB LBB5_22 - MOVL R11, CX +LBB5_36: + CMPQ CX, R9 + JB LBB5_50 + MOVL SI, CX NEGL CX MOVLQSX CX, CX - LONG $0x84158d48; WORD $0x0034; BYTE $0x00 // leaq $13444(%rip), %rdx /* _TabPow10(%rip) */ - MOVQ -80(BP), SI - IMULQ 0(DX)(CX*8), SI - CMPQ R13, SI - JAE LBB5_22 - LEAQ 0(AX)(AX*4), AX - LEAQ 0(DI)(R13*1), DX - SUBQ DX, AX - MOVQ SI, R8 - NEGQ R8 - MOVQ SI, BX - SUBQ R13, BX - MOVQ -72(BP), R14 + LONG $0x05158d48; WORD $0x0035; BYTE $0x00 // leaq $13573(%rip), %rdx /* _TabPow10(%rip) */ + MOVQ -80(BP), DI + IMULQ 0(DX)(CX*8), DI + CMPQ R12, DI MOVQ -64(BP), R10 - -LBB5_48: - CMPQ DX, SI - JB LBB5_50 - LEAQ 0(R8)(DX*1), CX - CMPQ BX, CX - JBE LBB5_52 - -LBB5_50: - DECB -1(R15) - SUBQ DI, AX - SETCS CX - CMPQ DX, SI - JAE LBB5_52 - ADDQ DI, DX - SUBQ DI, BX - TESTB CX, CX - JE LBB5_48 - JMP LBB5_52 - -LBB5_38: - MOVQ -80(BP), R9 - CMPQ DX, R9 - JAE LBB5_39 - MOVL R11, AX - LONG $0x080d8d48; WORD $0x0034; BYTE $0x00 // leaq $13320(%rip), %rcx /* _TabPow10(%rip) */ - LONG $0xf7e1e2c4; WORD $0xc104 // shlxq %rbx, (%rcx,%rax,8), %rax - CMPQ R8, AX - MOVQ -56(BP), R12 - JB LBB5_22 - SUBQ -88(BP), DI - ADDQ AX, R13 - LEAQ 0(R13)(SI*1), BX - SUBQ BX, DI - ADDQ $-2, DI - MOVQ -112(BP), R8 - ADDQ R8, R13 - MOVQ -120(BP), CX - SUBQ CX, R13 - MOVQ -96(BP), R14 - SUBQ R14, R13 - MOVQ -104(BP), R10 - SUBQ R10, R13 - LEAQ 1(R13)(SI*1), SI - ADDQ R14, CX - ADDQ R10, CX - SUBQ R8, CX - NOTQ DX - ADDQ CX, DX - MOVQ -72(BP), R14 - MOVQ -64(BP), R10 - -LBB5_42: - CMPQ BX, R9 - JB LBB5_44 - CMPQ DX, SI - JBE LBB5_52 - -LBB5_44: - DECB -1(R15) - SUBQ AX, DI - SETCS CX - CMPQ BX, R9 - JAE LBB5_52 - ADDQ AX, BX - ADDQ AX, SI - SUBQ AX, DX - TESTB CX, CX - JE LBB5_42 - JMP LBB5_52 + JAE LBB5_53 + LEAQ 0(AX)(AX*4), AX + LEAQ 0(R9)(R12*1), CX + SUBQ CX, AX + MOVQ DI, R8 + NEGQ R8 + MOVQ DI, BX + SUBQ R12, BX LBB5_39: - MOVQ -56(BP), R12 + CMPQ CX, DI + JB LBB5_41 + LEAQ 0(R8)(CX*1), DX + CMPQ BX, DX + JBE LBB5_53 -LBB5_22: - MOVQ -72(BP), R14 +LBB5_41: + DECB -1(R15) + SUBQ R9, AX + SETCS DX + CMPQ CX, DI + JAE LBB5_53 + ADDQ R9, CX + SUBQ R9, BX + TESTB DX, DX + JE LBB5_39 + JMP LBB5_53 + +LBB5_43: + MOVL SI, CX + LONG $0x9b158d48; WORD $0x0034; BYTE $0x00 // leaq $13467(%rip), %rdx /* _TabPow10(%rip) */ + MOVQ 0(DX)(CX*8), DI + MOVL R10, CX + SHLQ CX, DI + MOVQ -80(BP), R8 + CMPQ AX, R8 + JAE LBB5_51 + CMPQ R14, DI MOVQ -64(BP), R10 + JB LBB5_52 + SUBQ -88(BP), R11 + ADDQ DI, R12 + LEAQ 0(R12)(BX*1), CX + SUBQ CX, R11 + ADDQ $-2, R11 + MOVQ -112(BP), R9 + ADDQ R9, R12 + MOVQ -120(BP), DX + SUBQ DX, R12 + MOVQ -96(BP), R13 + SUBQ R13, R12 + MOVQ -104(BP), R14 + SUBQ R14, R12 + LEAQ 1(R12)(BX*1), BX + ADDQ R13, DX + ADDQ R14, DX + SUBQ R9, DX + NOTQ AX + ADDQ DX, AX + MOVQ -56(BP), R14 + +LBB5_46: + CMPQ CX, R8 + JB LBB5_48 + CMPQ AX, BX + JBE LBB5_53 + +LBB5_48: + DECB -1(R15) + SUBQ DI, R11 + SETCS DX + CMPQ CX, R8 + JAE LBB5_53 + ADDQ DI, CX + ADDQ DI, BX + SUBQ DI, AX + TESTB DX, DX + JE LBB5_46 + JMP LBB5_53 + +LBB5_50: + MOVQ -64(BP), R10 + JMP LBB5_53 + +LBB5_51: + MOVQ -56(BP), R14 + MOVQ -64(BP), R10 + JMP LBB5_53 LBB5_52: - MOVQ R15, R13 - SUBQ R12, R13 - ADDL R11, R14 - LEAL 0(R14)(R13*1), R9 - TESTL R14, R14 - JS LBB5_69 - CMPL R9, $21 - JG LBB5_69 - TESTL R14, R14 - JE LBB5_55 - MOVQ R10, AX - MOVLQSX R13, R10 - ADDQ R12, R10 - SUBL AX, R11 - ADDL $347, R11 - CMPL R11, $127 - JB LBB5_66 - INCQ R11 - MOVQ R11, R8 + MOVQ -56(BP), R14 + +LBB5_53: + MOVQ R15, R12 + SUBQ R14, R12 + ADDL SI, R10 + LEAL 0(R10)(R12*1), R11 + TESTL R10, R10 + JS LBB5_59 + CMPL R11, $21 + JG LBB5_59 + TESTL R10, R10 + JE LBB5_86 + MOVLQSX R12, R9 + ADDQ R14, R9 + SUBL -72(BP), SI + ADDL $347, SI + CMPL SI, $127 + JB LBB5_84 + INCQ SI + MOVQ SI, R8 ANDQ $-128, R8 LEAQ -128(R8), AX - MOVQ AX, DX - SHRQ $7, DX - INCQ DX - MOVL DX, BX - ANDL $3, BX + MOVQ AX, BX + SHRQ $7, BX + INCQ BX + MOVL BX, CX + ANDL $3, CX CMPQ AX, $384 - JAE LBB5_59 - XORL AX, AX - JMP LBB5_61 + JAE LBB5_77 + XORL DX, DX + JMP LBB5_79 -LBB5_69: - LEAL -1(R9), SI - CMPL SI, $20 - JA LBB5_75 - LEAL 1(R13), AX - CMPL AX, R9 - JLE LBB5_71 +LBB5_59: + LEAL -1(R11), DI + CMPL DI, $20 + JA LBB5_63 + LEAL 1(R12), AX + CMPL AX, R11 + JLE LBB5_72 MOVLQSX AX, DX - MOVL R9, CX - MOVL -44(BP), DI + MOVL R11, CX -LBB5_73: - MOVBLZX -2(R12)(DX*1), BX - MOVB BX, -1(R12)(DX*1) +LBB5_62: + MOVBLZX -2(R14)(DX*1), BX + MOVB BX, -1(R14)(DX*1) LEAQ -1(DX), SI MOVQ SI, DX CMPQ SI, CX - JG LBB5_73 - MOVB $46, 0(R12)(CX*1) - JMP LBB5_125 - -LBB5_75: - LEAL 5(R9), AX - CMPL AX, $5 - JA LBB5_105 - MOVL $2, AX - MOVL $2, DI - SUBL R9, DI - LEAL 0(DI)(R13*1), DX - TESTL DX, DX - JLE LBB5_92 - MOVL DX, CX - MOVL DI, R8 - CMPL DX, $128 - JAE LBB5_78 - -LBB5_90: - INCQ CX - MOVQ R12, DX - SUBQ R8, DX - -LBB5_91: - MOVBLZX -2(DX)(CX*1), BX - MOVB BX, -2(R12)(CX*1) - DECQ CX - CMPQ CX, $1 - JG LBB5_91 - -LBB5_92: - SUBL R14, AX - MOVW $11824, 0(R12) - TESTL R9, R9 - JE LBB5_124 - NEGL R9 - LEAQ 2(R12), SI - SUBL R11, R10 - SUBL R13, R10 - ADDL $-349, R10 - CMPL R10, $127 - JB LBB5_103 - INCQ R10 - MOVQ R10, CX - ANDQ $-128, CX - LEAQ -128(CX), DI - MOVQ DI, DX - SHRQ $7, DX - INCQ DX - MOVL DX, BX - ANDL $3, BX - CMPQ DI, $384 - JAE LBB5_96 - XORL DI, DI - JMP LBB5_98 - -LBB5_71: - MOVL R9, CX - MOVL -44(BP), DI - MOVB $46, 0(R12)(CX*1) - JMP LBB5_125 - -LBB5_105: - CMPL R13, $1 - JNE LBB5_113 - MOVB $101, 1(R12) - LEAQ 2(R12), DX - TESTL SI, SI - JS LBB5_110 - MOVQ DX, R12 - MOVL SI, CX - MOVL -44(BP), DI - JMP LBB5_108 - -LBB5_59: - MOVLQSX R13, AX - LEAQ 480(AX)(R12*1), SI - ANDQ $-4, DX - NEGQ DX - XORL AX, AX - QUAD $0xfffff8ec0528fdc5 // vmovapd $-1812(%rip), %ymm0 /* LCPI5_3(%rip) */ - -LBB5_60: - QUAD $0xfffe20068411fdc5; BYTE $0xff // vmovupd %ymm0, $-480(%rsi,%rax) - QUAD $0xfffe40068411fdc5; BYTE $0xff // vmovupd %ymm0, $-448(%rsi,%rax) - QUAD $0xfffe60068411fdc5; BYTE $0xff // vmovupd %ymm0, $-416(%rsi,%rax) - QUAD $0xfffe80068411fdc5; BYTE $0xff // vmovupd %ymm0, $-384(%rsi,%rax) - QUAD $0xfffea0068411fdc5; BYTE $0xff // vmovupd %ymm0, $-352(%rsi,%rax) - QUAD $0xfffec0068411fdc5; BYTE $0xff // vmovupd %ymm0, $-320(%rsi,%rax) - QUAD $0xfffee0068411fdc5; BYTE $0xff // vmovupd %ymm0, $-288(%rsi,%rax) - QUAD $0xffff00068411fdc5; BYTE $0xff // vmovupd %ymm0, $-256(%rsi,%rax) - QUAD $0xffff20068411fdc5; BYTE $0xff // vmovupd %ymm0, $-224(%rsi,%rax) - QUAD $0xffff40068411fdc5; BYTE $0xff // vmovupd %ymm0, $-192(%rsi,%rax) - QUAD $0xffff60068411fdc5; BYTE $0xff // vmovupd %ymm0, $-160(%rsi,%rax) - LONG $0x4411fdc5; WORD $0x8006 // vmovupd %ymm0, $-128(%rsi,%rax) - LONG $0x4411fdc5; WORD $0xa006 // vmovupd %ymm0, $-96(%rsi,%rax) - LONG $0x4411fdc5; WORD $0xc006 // vmovupd %ymm0, $-64(%rsi,%rax) - LONG $0x4411fdc5; WORD $0xe006 // vmovupd %ymm0, $-32(%rsi,%rax) - LONG $0x0411fdc5; BYTE $0x06 // vmovupd %ymm0, (%rsi,%rax) - ADDQ $512, AX - ADDQ $4, DX - JNE LBB5_60 - -LBB5_61: - TESTQ BX, BX - JE LBB5_64 - SUBL R12, R15 - MOVLQSX R15, CX - ADDQ CX, AX - LEAQ 96(R12)(AX*1), AX - NEGQ BX - QUAD $0xfffff83e0528fdc5 // vmovapd $-1986(%rip), %ymm0 /* LCPI5_3(%rip) */ + JG LBB5_62 + JMP LBB5_73 LBB5_63: + LEAL 5(R11), AX + CMPL AX, $5 + JA LBB5_74 + MOVQ R10, R15 + MOVL $2, AX + MOVL $2, DX + SUBL R11, DX + LEAL 0(DX)(R12*1), CX + TESTL CX, CX + JLE LBB5_68 + MOVL CX, BX + MOVL DX, R10 + CMPL CX, $128 + JAE LBB5_96 + +LBB5_66: + INCQ BX + MOVQ R14, CX + SUBQ R10, CX + +LBB5_67: + MOVBLZX -2(CX)(BX*1), DX + MOVB DX, -2(R14)(BX*1) + DECQ BX + CMPQ BX, $1 + JG LBB5_67 + +LBB5_68: + SUBL R15, AX + MOVW $11824, 0(R14) + TESTL R11, R11 + JE LBB5_87 + NEGL R11 + LEAQ 2(R14), DI + MOVQ -72(BP), DX + SUBL SI, DX + SUBL R12, DX + ADDL $-349, DX + CMPL DX, $127 + JB LBB5_121 + INCQ DX + MOVQ DX, R8 + ANDQ $-128, DX + LEAQ -128(DX), BX + MOVQ BX, CX + SHRQ $7, CX + INCQ CX + MOVL CX, SI + ANDL $3, SI + CMPQ BX, $384 + JAE LBB5_113 + XORL BX, BX + JMP LBB5_115 + +LBB5_72: + MOVL R11, CX + +LBB5_73: + MOVL -44(BP), BX + MOVB $46, 0(R14)(CX*1) + JMP LBB5_88 + +LBB5_74: + CMPL R12, $1 + JNE LBB5_90 + MOVB $101, 1(R14) + LEAQ 2(R14), DX + TESTL DI, DI + JS LBB5_103 + MOVQ DX, R14 + MOVL DI, CX + MOVL -44(BP), BX + JMP LBB5_105 + +LBB5_77: + MOVLQSX R12, AX + LEAQ 480(AX)(R14*1), AX + ANDQ $-4, BX + NEGQ BX + XORL DX, DX + QUAD $0xfffff8f60528fdc5 // vmovapd $-1802(%rip), %ymm0 /* LCPI5_3(%rip) */ + +LBB5_78: + QUAD $0xfffe20108411fdc5; BYTE $0xff // vmovupd %ymm0, $-480(%rax,%rdx) + QUAD $0xfffe40108411fdc5; BYTE $0xff // vmovupd %ymm0, $-448(%rax,%rdx) + QUAD $0xfffe60108411fdc5; BYTE $0xff // vmovupd %ymm0, $-416(%rax,%rdx) + QUAD $0xfffe80108411fdc5; BYTE $0xff // vmovupd %ymm0, $-384(%rax,%rdx) + QUAD $0xfffea0108411fdc5; BYTE $0xff // vmovupd %ymm0, $-352(%rax,%rdx) + QUAD $0xfffec0108411fdc5; BYTE $0xff // vmovupd %ymm0, $-320(%rax,%rdx) + QUAD $0xfffee0108411fdc5; BYTE $0xff // vmovupd %ymm0, $-288(%rax,%rdx) + QUAD $0xffff00108411fdc5; BYTE $0xff // vmovupd %ymm0, $-256(%rax,%rdx) + QUAD $0xffff20108411fdc5; BYTE $0xff // vmovupd %ymm0, $-224(%rax,%rdx) + QUAD $0xffff40108411fdc5; BYTE $0xff // vmovupd %ymm0, $-192(%rax,%rdx) + QUAD $0xffff60108411fdc5; BYTE $0xff // vmovupd %ymm0, $-160(%rax,%rdx) + LONG $0x4411fdc5; WORD $0x8010 // vmovupd %ymm0, $-128(%rax,%rdx) + LONG $0x4411fdc5; WORD $0xa010 // vmovupd %ymm0, $-96(%rax,%rdx) + LONG $0x4411fdc5; WORD $0xc010 // vmovupd %ymm0, $-64(%rax,%rdx) + LONG $0x4411fdc5; WORD $0xe010 // vmovupd %ymm0, $-32(%rax,%rdx) + LONG $0x0411fdc5; BYTE $0x10 // vmovupd %ymm0, (%rax,%rdx) + ADDQ $512, DX + ADDQ $4, BX + JNE LBB5_78 + +LBB5_79: + TESTQ CX, CX + JE LBB5_82 + SUBL R14, R15 + MOVLQSX R15, AX + ADDQ AX, DX + LEAQ 96(R14)(DX*1), AX + NEGQ CX + QUAD $0xfffff8470528fdc5 // vmovapd $-1977(%rip), %ymm0 /* LCPI5_3(%rip) */ + +LBB5_81: LONG $0x4011fdc5; BYTE $0xa0 // vmovupd %ymm0, $-96(%rax) LONG $0x4011fdc5; BYTE $0xc0 // vmovupd %ymm0, $-64(%rax) LONG $0x4011fdc5; BYTE $0xe0 // vmovupd %ymm0, $-32(%rax) LONG $0x0011fdc5 // vmovupd %ymm0, (%rax) SUBQ $-128, AX - INCQ BX - JNE LBB5_63 + INCQ CX + JNE LBB5_81 -LBB5_64: - CMPQ R11, R8 - JNE LBB5_65 +LBB5_82: + CMPQ SI, R8 + JE LBB5_86 + SUBL R8, R10 + ADDQ R8, R9 -LBB5_55: - MOVL R9, AX - -LBB5_124: - MOVL -44(BP), DI - JMP LBB5_125 - -LBB5_65: - SUBL R8, R14 - ADDQ R8, R10 - -LBB5_66: - MOVL R14, AX +LBB5_84: + MOVL R10, AX XORL CX, CX - MOVL -44(BP), DI -LBB5_67: - MOVB $48, 0(R10)(CX*1) +LBB5_85: + MOVB $48, 0(R9)(CX*1) INCQ CX CMPL AX, CX - JNE LBB5_67 - MOVL R9, AX + JNE LBB5_85 -LBB5_125: - ADDL DI, AX +LBB5_86: + MOVL R11, AX -LBB5_126: - ADDQ $88, SP +LBB5_87: + MOVL -44(BP), BX + +LBB5_88: + ADDL BX, AX + +LBB5_89: + ADDQ $96, SP BYTE $0x5b // popq %rbx WORD $0x5c41 // popq %r12 WORD $0x5d41 // popq %r13 @@ -1582,300 +1626,298 @@ LBB5_126: WORD $0xf8c5; BYTE $0x77 // vzeroupper RET -LBB5_113: - LEAL 1(R13), AX +LBB5_90: + LEAL 1(R12), AX CMPL AX, $2 - JL LBB5_116 + JL LBB5_93 MOVL AX, AX INCQ AX -LBB5_115: - MOVBLZX -3(R12)(AX*1), CX - MOVB CX, -2(R12)(AX*1) +LBB5_92: + MOVBLZX -3(R14)(AX*1), CX + MOVB CX, -2(R14)(AX*1) DECQ AX CMPQ AX, $2 - JG LBB5_115 + JG LBB5_92 -LBB5_116: - MOVB $46, 1(R12) - MOVLQSX R13, AX - MOVB $101, 1(R12)(AX*1) - LEAQ 2(R12)(AX*1), DX - TESTL SI, SI - JS LBB5_118 +LBB5_93: + MOVB $46, 1(R14) + MOVLQSX R12, AX + MOVB $101, 1(R14)(AX*1) + LEAQ 2(R14)(AX*1), DX + TESTL DI, DI + MOVL -44(BP), BX + JS LBB5_108 MOVQ DX, CX - JMP LBB5_119 + JMP LBB5_109 -LBB5_78: - LEAQ -1(CX), SI - LEAQ -1(R12)(CX*1), DI - CMPQ SI, DI - JA LBB5_90 - MOVQ R8, DI - NOTQ DI - LEAQ 0(DI)(CX*1), BX - ADDQ R12, BX - CMPQ SI, BX - JA LBB5_90 - TESTL DX, DX - MOVL $1, DX - LONG $0xd1440f48 // cmoveq %rcx, %rdx - LEAQ -1(DX)(R12*1), BX - MOVQ CX, SI - SUBQ R8, SI - ADDQ R12, SI - CMPQ BX, SI - JAE LBB5_82 - LEAQ 0(R12)(CX*1), SI - ADDQ DI, DX - ADDQ R12, DX - CMPQ DX, SI - JB LBB5_90 +LBB5_96: + LEAQ -1(BX), DI + LEAQ -1(R14)(BX*1), DX + CMPQ DI, DX + JA LBB5_66 + MOVQ R10, R8 + NOTQ R8 + LEAQ 0(R8)(BX*1), DX + ADDQ R14, DX + CMPQ DI, DX + JA LBB5_66 + TESTL CX, CX + MOVL $1, CX + LONG $0xcb440f48 // cmoveq %rbx, %rcx + LEAQ -1(CX)(R14*1), DX + MOVQ BX, DI + SUBQ R10, DI + ADDQ R14, DI + CMPQ DX, DI + JAE LBB5_100 + LEAQ 0(R14)(BX*1), DX + ADDQ R8, CX + ADDQ R14, CX + CMPQ CX, DX + JB LBB5_66 -LBB5_82: - MOVQ R10, R15 - MOVL CX, R10 - ANDL $-128, R10 - LEAQ -128(R10), DX - MOVQ DX, SI - SHRQ $7, SI - INCQ SI - TESTQ DX, DX - JE LBB5_83 - LEAQ -32(CX)(R12*1), DI - MOVQ R8, BX - NEGQ BX - MOVQ SI, DX +LBB5_100: + MOVL BX, R8 + ANDL $-128, R8 + LEAQ -128(R8), CX + MOVQ CX, R9 + SHRQ $7, R9 + INCQ R9 + TESTQ CX, CX + JE LBB5_125 + LEAQ -32(BX)(R14*1), CX + MOVQ R10, DI + NEGQ DI + MOVQ R9, DX ANDQ $-2, DX NEGQ DX - MOVQ $-1, R12 + MOVQ $-1, R14 -LBB5_85: - LONG $0x4410fcc5; WORD $0xa01f // vmovups $-96(%rdi,%rbx), %ymm0 - LONG $0x4c10fcc5; WORD $0xc01f // vmovups $-64(%rdi,%rbx), %ymm1 - LONG $0x5410fcc5; WORD $0xe01f // vmovups $-32(%rdi,%rbx), %ymm2 - LONG $0x1c10fcc5; BYTE $0x1f // vmovups (%rdi,%rbx), %ymm3 - LONG $0x1f11fcc5 // vmovups %ymm3, (%rdi) - LONG $0x5711fcc5; BYTE $0xe0 // vmovups %ymm2, $-32(%rdi) - LONG $0x4f11fcc5; BYTE $0xc0 // vmovups %ymm1, $-64(%rdi) - LONG $0x4711fcc5; BYTE $0xa0 // vmovups %ymm0, $-96(%rdi) - QUAD $0xffff201f8410fdc5; BYTE $0xff // vmovupd $-224(%rdi,%rbx), %ymm0 - QUAD $0xffff401f8c10fdc5; BYTE $0xff // vmovupd $-192(%rdi,%rbx), %ymm1 - QUAD $0xffff601f9410fcc5; BYTE $0xff // vmovups $-160(%rdi,%rbx), %ymm2 - LONG $0x5c10fcc5; WORD $0x801f // vmovups $-128(%rdi,%rbx), %ymm3 - LONG $0x5f11fcc5; BYTE $0x80 // vmovups %ymm3, $-128(%rdi) - QUAD $0xffffff609711fcc5 // vmovups %ymm2, $-160(%rdi) - QUAD $0xffffff408f11fdc5 // vmovupd %ymm1, $-192(%rdi) - QUAD $0xffffff208711fdc5 // vmovupd %ymm0, $-224(%rdi) - ADDQ $-256, R12 - ADDQ $-256, DI +LBB5_102: + LONG $0x4410fcc5; WORD $0xa039 // vmovups $-96(%rcx,%rdi), %ymm0 + LONG $0x4c10fcc5; WORD $0xc039 // vmovups $-64(%rcx,%rdi), %ymm1 + LONG $0x5410fcc5; WORD $0xe039 // vmovups $-32(%rcx,%rdi), %ymm2 + LONG $0x1c10fcc5; BYTE $0x39 // vmovups (%rcx,%rdi), %ymm3 + LONG $0x1911fcc5 // vmovups %ymm3, (%rcx) + LONG $0x5111fcc5; BYTE $0xe0 // vmovups %ymm2, $-32(%rcx) + LONG $0x4911fcc5; BYTE $0xc0 // vmovups %ymm1, $-64(%rcx) + LONG $0x4111fcc5; BYTE $0xa0 // vmovups %ymm0, $-96(%rcx) + QUAD $0xffff20398410fdc5; BYTE $0xff // vmovupd $-224(%rcx,%rdi), %ymm0 + QUAD $0xffff40398c10fdc5; BYTE $0xff // vmovupd $-192(%rcx,%rdi), %ymm1 + QUAD $0xffff60399410fcc5; BYTE $0xff // vmovups $-160(%rcx,%rdi), %ymm2 + LONG $0x5c10fcc5; WORD $0x8039 // vmovups $-128(%rcx,%rdi), %ymm3 + LONG $0x5911fcc5; BYTE $0x80 // vmovups %ymm3, $-128(%rcx) + QUAD $0xffffff609111fcc5 // vmovups %ymm2, $-160(%rcx) + QUAD $0xffffff408911fdc5 // vmovupd %ymm1, $-192(%rcx) + QUAD $0xffffff208111fdc5 // vmovupd %ymm0, $-224(%rcx) + ADDQ $-256, R14 + ADDQ $-256, CX ADDQ $2, DX - JNE LBB5_85 - JMP LBB5_86 + JNE LBB5_102 + JMP LBB5_126 -LBB5_110: - MOVL SI, CX +LBB5_103: + MOVL DI, CX NEGL CX - MOVB $45, 2(R12) - ADDQ $3, R12 - CMPL SI, $-9 - MOVL -44(BP), DI - JL LBB5_108 + MOVB $45, 2(R14) + ADDQ $3, R14 + CMPL DI, $-9 + MOVL -44(BP), BX + JL LBB5_105 ADDB $48, CX - MOVB CX, 0(R12) + MOVB CX, 0(R14) MOVL $4, AX - JMP LBB5_125 + JMP LBB5_88 -LBB5_108: - MOVL R12, AX +LBB5_105: + MOVL R14, AX SUBL DX, AX CMPL CX, $99 - JG LBB5_112 + JG LBB5_107 ADDL $4, AX MOVL CX, CX - LONG $0x72158d48; WORD $0x002f; BYTE $0x00 // leaq $12146(%rip), %rdx /* _Digits(%rip) */ - MOVB 0(DX)(CX*2), BX + LONG $0x0a358d48; WORD $0x0030; BYTE $0x00 // leaq $12298(%rip), %rsi /* _Digits(%rip) */ + MOVB 0(SI)(CX*2), DX ADDQ CX, CX - MOVB BX, 0(R12) + MOVB DX, 0(R14) ORL $1, CX - MOVB 0(CX)(DX*1), CX - MOVB CX, 1(R12) - JMP LBB5_125 + MOVB 0(CX)(SI*1), CX + MOVB CX, 1(R14) + JMP LBB5_88 -LBB5_112: +LBB5_107: ADDL $5, AX MOVL CX, DX IMUL3Q $1374389535, DX, DX SHRQ $37, DX LEAL 48(DX), SI - MOVB SI, 0(R12) + MOVB SI, 0(R14) WORD $0xd26b; BYTE $0x64 // imull $100, %edx, %edx SUBL DX, CX - LONG $0x35158d48; WORD $0x002f; BYTE $0x00 // leaq $12085(%rip), %rdx /* _Digits(%rip) */ - MOVB 0(DX)(CX*2), BX - MOVB 1(DX)(CX*2), CX - MOVB BX, 1(R12) - MOVB CX, 2(R12) - JMP LBB5_125 + LONG $0xd0358d48; WORD $0x002f; BYTE $0x00 // leaq $12240(%rip), %rsi /* _Digits(%rip) */ + MOVB 0(SI)(CX*2), DX + MOVB 1(SI)(CX*2), CX + MOVB DX, 1(R14) + MOVB CX, 2(R14) + JMP LBB5_88 -LBB5_118: - NEGL SI +LBB5_108: + NEGL DI LEAQ 1(DX), CX MOVB $45, 0(DX) -LBB5_119: - CMPL SI, $9 - JG LBB5_121 +LBB5_109: + CMPL DI, $9 + JG LBB5_111 MOVL CX, AX SUBL DX, AX - LEAL 3(R13)(AX*1), AX - ADDB $48, SI - MOVB SI, 0(CX) - JMP LBB5_124 + LEAL 3(R12)(AX*1), AX + ADDB $48, DI + MOVB DI, 0(CX) + JMP LBB5_88 -LBB5_121: - LEAL 2(R13), DI +LBB5_111: + LEAL 2(R12), SI MOVL CX, AX SUBL DX, AX - ADDL DI, AX - CMPL SI, $99 - JG LBB5_123 + ADDL SI, AX + CMPL DI, $99 + JG LBB5_124 ADDL $2, AX - MOVL SI, DX - LONG $0xdd358d48; WORD $0x002e; BYTE $0x00 // leaq $11997(%rip), %rsi /* _Digits(%rip) */ - MOVB 0(SI)(DX*2), BX + MOVL DI, DX + LONG $0x79358d48; WORD $0x002f; BYTE $0x00 // leaq $12153(%rip), %rsi /* _Digits(%rip) */ + MOVB 0(SI)(DX*2), DI ADDQ DX, DX - MOVB BX, 0(CX) + MOVB DI, 0(CX) ORL $1, DX MOVB 0(DX)(SI*1), DX MOVB DX, 1(CX) - JMP LBB5_124 + JMP LBB5_88 -LBB5_96: - ANDQ $-4, DX - NEGQ DX - XORL DI, DI - QUAD $0xfffff58e0528fdc5 // vmovapd $-2674(%rip), %ymm0 /* LCPI5_3(%rip) */ +LBB5_113: + ANDQ $-4, CX + NEGQ CX + XORL BX, BX + QUAD $0xfffff5a30528fdc5 // vmovapd $-2653(%rip), %ymm0 /* LCPI5_3(%rip) */ -LBB5_97: - LONG $0x117dc1c4; WORD $0x3c44; BYTE $0x02 // vmovupd %ymm0, $2(%r12,%rdi) - LONG $0x117dc1c4; WORD $0x3c44; BYTE $0x22 // vmovupd %ymm0, $34(%r12,%rdi) - LONG $0x117dc1c4; WORD $0x3c44; BYTE $0x42 // vmovupd %ymm0, $66(%r12,%rdi) - LONG $0x117dc1c4; WORD $0x3c44; BYTE $0x62 // vmovupd %ymm0, $98(%r12,%rdi) - QUAD $0x00823c84117dc1c4; WORD $0x0000 // vmovupd %ymm0, $130(%r12,%rdi) - QUAD $0x00a23c84117dc1c4; WORD $0x0000 // vmovupd %ymm0, $162(%r12,%rdi) - QUAD $0x00c23c84117dc1c4; WORD $0x0000 // vmovupd %ymm0, $194(%r12,%rdi) - QUAD $0x00e23c84117dc1c4; WORD $0x0000 // vmovupd %ymm0, $226(%r12,%rdi) - QUAD $0x01023c84117dc1c4; WORD $0x0000 // vmovupd %ymm0, $258(%r12,%rdi) - QUAD $0x01223c84117dc1c4; WORD $0x0000 // vmovupd %ymm0, $290(%r12,%rdi) - QUAD $0x01423c84117dc1c4; WORD $0x0000 // vmovupd %ymm0, $322(%r12,%rdi) - QUAD $0x01623c84117dc1c4; WORD $0x0000 // vmovupd %ymm0, $354(%r12,%rdi) - QUAD $0x01823c84117dc1c4; WORD $0x0000 // vmovupd %ymm0, $386(%r12,%rdi) - QUAD $0x01a23c84117dc1c4; WORD $0x0000 // vmovupd %ymm0, $418(%r12,%rdi) - QUAD $0x01c23c84117dc1c4; WORD $0x0000 // vmovupd %ymm0, $450(%r12,%rdi) - QUAD $0x01e23c84117dc1c4; WORD $0x0000 // vmovupd %ymm0, $482(%r12,%rdi) - ADDQ $512, DI - ADDQ $4, DX - JNE LBB5_97 +LBB5_114: + LONG $0x117dc1c4; WORD $0x1e44; BYTE $0x02 // vmovupd %ymm0, $2(%r14,%rbx) + LONG $0x117dc1c4; WORD $0x1e44; BYTE $0x22 // vmovupd %ymm0, $34(%r14,%rbx) + LONG $0x117dc1c4; WORD $0x1e44; BYTE $0x42 // vmovupd %ymm0, $66(%r14,%rbx) + LONG $0x117dc1c4; WORD $0x1e44; BYTE $0x62 // vmovupd %ymm0, $98(%r14,%rbx) + QUAD $0x00821e84117dc1c4; WORD $0x0000 // vmovupd %ymm0, $130(%r14,%rbx) + QUAD $0x00a21e84117dc1c4; WORD $0x0000 // vmovupd %ymm0, $162(%r14,%rbx) + QUAD $0x00c21e84117dc1c4; WORD $0x0000 // vmovupd %ymm0, $194(%r14,%rbx) + QUAD $0x00e21e84117dc1c4; WORD $0x0000 // vmovupd %ymm0, $226(%r14,%rbx) + QUAD $0x01021e84117dc1c4; WORD $0x0000 // vmovupd %ymm0, $258(%r14,%rbx) + QUAD $0x01221e84117dc1c4; WORD $0x0000 // vmovupd %ymm0, $290(%r14,%rbx) + QUAD $0x01421e84117dc1c4; WORD $0x0000 // vmovupd %ymm0, $322(%r14,%rbx) + QUAD $0x01621e84117dc1c4; WORD $0x0000 // vmovupd %ymm0, $354(%r14,%rbx) + QUAD $0x01821e84117dc1c4; WORD $0x0000 // vmovupd %ymm0, $386(%r14,%rbx) + QUAD $0x01a21e84117dc1c4; WORD $0x0000 // vmovupd %ymm0, $418(%r14,%rbx) + QUAD $0x01c21e84117dc1c4; WORD $0x0000 // vmovupd %ymm0, $450(%r14,%rbx) + QUAD $0x01e21e84117dc1c4; WORD $0x0000 // vmovupd %ymm0, $482(%r14,%rbx) + ADDQ $512, BX + ADDQ $4, CX + JNE LBB5_114 -LBB5_98: - TESTQ BX, BX - JE LBB5_101 - LEAQ 98(DI)(R12*1), DX - NEGQ BX - QUAD $0xfffff4d40528fdc5 // vmovapd $-2860(%rip), %ymm0 /* LCPI5_3(%rip) */ +LBB5_115: + TESTQ SI, SI + JE LBB5_118 + LEAQ 98(BX)(R14*1), CX + NEGQ SI + QUAD $0xfffff4e90528fdc5 // vmovapd $-2839(%rip), %ymm0 /* LCPI5_3(%rip) */ -LBB5_100: - LONG $0x4211fdc5; BYTE $0xa0 // vmovupd %ymm0, $-96(%rdx) - LONG $0x4211fdc5; BYTE $0xc0 // vmovupd %ymm0, $-64(%rdx) - LONG $0x4211fdc5; BYTE $0xe0 // vmovupd %ymm0, $-32(%rdx) - LONG $0x0211fdc5 // vmovupd %ymm0, (%rdx) - SUBQ $-128, DX - INCQ BX - JNE LBB5_100 +LBB5_117: + LONG $0x4111fdc5; BYTE $0xa0 // vmovupd %ymm0, $-96(%rcx) + LONG $0x4111fdc5; BYTE $0xc0 // vmovupd %ymm0, $-64(%rcx) + LONG $0x4111fdc5; BYTE $0xe0 // vmovupd %ymm0, $-32(%rcx) + LONG $0x0111fdc5 // vmovupd %ymm0, (%rcx) + SUBQ $-128, CX + INCQ SI + JNE LBB5_117 -LBB5_101: - CMPQ R10, CX - JE LBB5_124 - SUBL CX, R9 - ADDQ CX, SI +LBB5_118: + CMPQ R8, DX + JE LBB5_87 + SUBL DX, R11 + ADDQ DX, DI -LBB5_103: - MOVL R9, CX +LBB5_121: + MOVL R11, CX XORL DX, DX - MOVL -44(BP), DI -LBB5_104: - MOVB $48, 0(SI)(DX*1) +LBB5_122: + MOVB $48, 0(DI)(DX*1) INCQ DX CMPL CX, DX - JNE LBB5_104 - JMP LBB5_125 + JNE LBB5_122 + JMP LBB5_87 -LBB5_123: +LBB5_124: ADDL $3, AX - MOVL SI, DX + MOVL DI, DX IMUL3Q $1374389535, DX, DX SHRQ $37, DX - LEAL 48(DX), DI - MOVB DI, 0(CX) + LEAL 48(DX), SI + MOVB SI, 0(CX) WORD $0xd26b; BYTE $0x64 // imull $100, %edx, %edx - SUBL DX, SI - LONG $0x97158d48; WORD $0x002d; BYTE $0x00 // leaq $11671(%rip), %rdx /* _Digits(%rip) */ - MOVB 0(DX)(SI*2), BX - MOVB 1(DX)(SI*2), DX - MOVB BX, 1(CX) + SUBL DX, DI + LONG $0x34158d48; WORD $0x002e; BYTE $0x00 // leaq $11828(%rip), %rdx /* _Digits(%rip) */ + MOVB 0(DX)(DI*2), SI + MOVB 1(DX)(DI*2), DX + MOVB SI, 1(CX) MOVB DX, 2(CX) - JMP LBB5_124 + JMP LBB5_88 -LBB5_83: - MOVQ $-1, R12 +LBB5_125: + MOVQ $-1, R14 -LBB5_86: - TESTB $1, SI - JE LBB5_88 - ADDQ CX, R12 - MOVQ R12, DX - SUBQ R8, DX - MOVQ -56(BP), SI - LONG $0x4410fdc5; WORD $0x8116 // vmovupd $-127(%rsi,%rdx), %ymm0 - LONG $0x4c10fdc5; WORD $0xa116 // vmovupd $-95(%rsi,%rdx), %ymm1 - LONG $0x5410fcc5; WORD $0xc116 // vmovups $-63(%rsi,%rdx), %ymm2 - LONG $0x5c10fcc5; WORD $0xe116 // vmovups $-31(%rsi,%rdx), %ymm3 - LONG $0x117ca1c4; WORD $0x265c; BYTE $0xe1 // vmovups %ymm3, $-31(%rsi,%r12) - LONG $0x117ca1c4; WORD $0x2654; BYTE $0xc1 // vmovups %ymm2, $-63(%rsi,%r12) - LONG $0x117da1c4; WORD $0x264c; BYTE $0xa1 // vmovupd %ymm1, $-95(%rsi,%r12) - LONG $0x117da1c4; WORD $0x2644; BYTE $0x81 // vmovupd %ymm0, $-127(%rsi,%r12) +LBB5_126: + TESTB $1, R9 + JE LBB5_128 + ADDQ BX, R14 + MOVQ R14, CX + SUBQ R10, CX + MOVQ -56(BP), DX + LONG $0x4410fdc5; WORD $0x810a // vmovupd $-127(%rdx,%rcx), %ymm0 + LONG $0x4c10fdc5; WORD $0xa10a // vmovupd $-95(%rdx,%rcx), %ymm1 + LONG $0x5410fcc5; WORD $0xc10a // vmovups $-63(%rdx,%rcx), %ymm2 + LONG $0x5c10fcc5; WORD $0xe10a // vmovups $-31(%rdx,%rcx), %ymm3 + LONG $0x117ca1c4; WORD $0x325c; BYTE $0xe1 // vmovups %ymm3, $-31(%rdx,%r14) + LONG $0x117ca1c4; WORD $0x3254; BYTE $0xc1 // vmovups %ymm2, $-63(%rdx,%r14) + LONG $0x117da1c4; WORD $0x324c; BYTE $0xa1 // vmovupd %ymm1, $-95(%rdx,%r14) + LONG $0x117da1c4; WORD $0x3244; BYTE $0x81 // vmovupd %ymm0, $-127(%rdx,%r14) -LBB5_88: - CMPQ R10, CX - MOVQ -56(BP), R12 - MOVQ R15, R10 - JE LBB5_92 - ANDL $127, CX - JMP LBB5_90 +LBB5_128: + CMPQ R8, BX + MOVQ -56(BP), R14 + JE LBB5_68 + ANDL $127, BX + JMP LBB5_66 -// .set L5_0_set_34, LBB5_34-LJTI5_0 -// .set L5_0_set_32, LBB5_32-LJTI5_0 -// .set L5_0_set_31, LBB5_31-LJTI5_0 -// .set L5_0_set_30, LBB5_30-LJTI5_0 // .set L5_0_set_29, LBB5_29-LJTI5_0 -// .set L5_0_set_28, LBB5_28-LJTI5_0 -// .set L5_0_set_27, LBB5_27-LJTI5_0 -// .set L5_0_set_26, LBB5_26-LJTI5_0 -// .set L5_0_set_25, LBB5_25-LJTI5_0 +// .set L5_0_set_19, LBB5_19-LJTI5_0 +// .set L5_0_set_20, LBB5_20-LJTI5_0 +// .set L5_0_set_21, LBB5_21-LJTI5_0 +// .set L5_0_set_22, LBB5_22-LJTI5_0 +// .set L5_0_set_23, LBB5_23-LJTI5_0 // .set L5_0_set_24, LBB5_24-LJTI5_0 +// .set L5_0_set_25, LBB5_25-LJTI5_0 +// .set L5_0_set_26, LBB5_26-LJTI5_0 +// .set L5_0_set_27, LBB5_27-LJTI5_0 LJTI5_0: - LONG $0xfffff7d1 // .long L5_0_set_34 - LONG $0xfffff699 // .long L5_0_set_32 - LONG $0xfffff6bc // .long L5_0_set_31 - LONG $0xfffff6df // .long L5_0_set_30 - LONG $0xfffff705 // .long L5_0_set_29 - LONG $0xfffff727 // .long L5_0_set_28 - LONG $0xfffff74a // .long L5_0_set_27 - LONG $0xfffff769 // .long L5_0_set_26 - LONG $0xfffff788 // .long L5_0_set_25 - LONG $0xfffff7a7 // .long L5_0_set_24 + LONG $0xfffff7e7 // .long L5_0_set_29 + LONG $0xfffff6c1 // .long L5_0_set_19 + LONG $0xfffff6e2 // .long L5_0_set_20 + LONG $0xfffff703 // .long L5_0_set_21 + LONG $0xfffff727 // .long L5_0_set_22 + LONG $0xfffff747 // .long L5_0_set_23 + LONG $0xfffff768 // .long L5_0_set_24 + LONG $0xfffff785 // .long L5_0_set_25 + LONG $0xfffff7a2 // .long L5_0_set_26 + LONG $0xfffff7bf // .long L5_0_set_27 _i64toa: TESTQ SI, SI @@ -1929,50 +1971,51 @@ _u64toa: WORD $0xc06b; BYTE $0x64 // imull $100, %eax, %eax MOVL SI, CX SUBL AX, CX - MOVWLZX CX, CX - ADDQ CX, CX + MOVWLZX CX, AX + ADDQ AX, AX CMPL SI, $1000 JB LBB7_3 - LONG $0x5a058d48; WORD $0x002c; BYTE $0x00 // leaq $11354(%rip), %rax /* _Digits(%rip) */ - MOVB 0(DX)(AX*1), AX - MOVB AX, 0(DI) - MOVL $1, AX + LONG $0xf80d8d48; WORD $0x002c; BYTE $0x00 // leaq $11512(%rip), %rcx /* _Digits(%rip) */ + MOVB 0(DX)(CX*1), CX + MOVB CX, 0(DI) + MOVL $1, CX JMP LBB7_4 LBB7_3: - XORL AX, AX + XORL CX, CX CMPL SI, $100 JB LBB7_5 LBB7_4: MOVWLZX DX, DX ORQ $1, DX - LONG $0x39358d48; WORD $0x002c; BYTE $0x00 // leaq $11321(%rip), %rsi /* _Digits(%rip) */ + LONG $0xd7358d48; WORD $0x002c; BYTE $0x00 // leaq $11479(%rip), %rsi /* _Digits(%rip) */ MOVB 0(DX)(SI*1), DX - MOVL AX, SI - INCL AX + MOVL CX, SI + INCL CX MOVB DX, 0(DI)(SI*1) LBB7_6: - LONG $0x28158d48; WORD $0x002c; BYTE $0x00 // leaq $11304(%rip), %rdx /* _Digits(%rip) */ - MOVB 0(CX)(DX*1), DX - MOVL AX, SI - INCL AX + LONG $0xc6158d48; WORD $0x002c; BYTE $0x00 // leaq $11462(%rip), %rdx /* _Digits(%rip) */ + MOVB 0(AX)(DX*1), DX + MOVL CX, SI + INCL CX MOVB DX, 0(DI)(SI*1) LBB7_7: - MOVWLZX CX, CX - ORQ $1, CX - LONG $0x10158d48; WORD $0x002c; BYTE $0x00 // leaq $11280(%rip), %rdx /* _Digits(%rip) */ - MOVB 0(CX)(DX*1), CX - MOVL AX, DX - INCL AX - MOVB CX, 0(DI)(DX*1) + MOVWLZX AX, AX + ORQ $1, AX + LONG $0xae158d48; WORD $0x002c; BYTE $0x00 // leaq $11438(%rip), %rdx /* _Digits(%rip) */ + MOVB 0(AX)(DX*1), AX + MOVL CX, DX + INCL CX + MOVB AX, 0(DI)(DX*1) + MOVL CX, AX BYTE $0x5d // popq %rbp RET LBB7_5: - XORL AX, AX + XORL CX, CX CMPL SI, $10 JAE LBB7_6 JMP LBB7_7 @@ -1980,87 +2023,88 @@ LBB7_5: LBB7_8: CMPQ SI, $99999999 JA LBB7_16 - MOVL SI, CX + MOVL SI, AX MOVL $3518437209, DX - IMULQ CX, DX + IMULQ AX, DX SHRQ $45, DX LONG $0x10c26944; WORD $0x0027; BYTE $0x00 // imull $10000, %edx, %r8d - MOVL SI, AX - SUBL R8, AX - IMUL3Q $1125899907, CX, R10 + MOVL SI, CX + SUBL R8, CX + IMUL3Q $1125899907, AX, R10 SHRQ $49, R10 ANDL $-2, R10 - MOVWLZX DX, CX - SHRL $2, CX - LONG $0x147bc969; WORD $0x0000 // imull $5243, %ecx, %ecx - SHRL $17, CX - WORD $0xc96b; BYTE $0x64 // imull $100, %ecx, %ecx - SUBL CX, DX + MOVWLZX DX, AX + SHRL $2, AX + LONG $0x147bc069; WORD $0x0000 // imull $5243, %eax, %eax + SHRL $17, AX + WORD $0xc06b; BYTE $0x64 // imull $100, %eax, %eax + SUBL AX, DX MOVWLZX DX, R9 ADDQ R9, R9 - MOVWLZX AX, CX - SHRL $2, CX - LONG $0x147bc969; WORD $0x0000 // imull $5243, %ecx, %ecx - SHRL $17, CX - LEAQ 0(CX)(CX*1), R8 - WORD $0xc96b; BYTE $0x64 // imull $100, %ecx, %ecx - SUBL CX, AX - MOVWLZX AX, R11 + MOVWLZX CX, AX + SHRL $2, AX + LONG $0x147bc069; WORD $0x0000 // imull $5243, %eax, %eax + SHRL $17, AX + LEAQ 0(AX)(AX*1), R8 + WORD $0xc06b; BYTE $0x64 // imull $100, %eax, %eax + SUBL AX, CX + MOVWLZX CX, R11 ADDQ R11, R11 CMPL SI, $10000000 JB LBB7_11 - LONG $0x7b058d48; WORD $0x002b; BYTE $0x00 // leaq $11131(%rip), %rax /* _Digits(%rip) */ + LONG $0x17058d48; WORD $0x002c; BYTE $0x00 // leaq $11287(%rip), %rax /* _Digits(%rip) */ MOVB 0(R10)(AX*1), AX MOVB AX, 0(DI) - MOVL $1, AX + MOVL $1, CX JMP LBB7_12 LBB7_11: - XORL AX, AX + XORL CX, CX CMPL SI, $1000000 JB LBB7_13 LBB7_12: - MOVL R10, CX - ORQ $1, CX - LONG $0x56358d48; WORD $0x002b; BYTE $0x00 // leaq $11094(%rip), %rsi /* _Digits(%rip) */ - MOVB 0(CX)(SI*1), CX - MOVL AX, SI - INCL AX - MOVB CX, 0(DI)(SI*1) + MOVL R10, AX + ORQ $1, AX + LONG $0xf2358d48; WORD $0x002b; BYTE $0x00 // leaq $11250(%rip), %rsi /* _Digits(%rip) */ + MOVB 0(AX)(SI*1), AX + MOVL CX, SI + INCL CX + MOVB AX, 0(DI)(SI*1) LBB7_14: - LONG $0x450d8d48; WORD $0x002b; BYTE $0x00 // leaq $11077(%rip), %rcx /* _Digits(%rip) */ - MOVB 0(R9)(CX*1), CX - MOVL AX, SI - INCL AX - MOVB CX, 0(DI)(SI*1) + LONG $0xe1058d48; WORD $0x002b; BYTE $0x00 // leaq $11233(%rip), %rax /* _Digits(%rip) */ + MOVB 0(R9)(AX*1), AX + MOVL CX, SI + INCL CX + MOVB AX, 0(DI)(SI*1) LBB7_15: - MOVWLZX R9, CX - ORQ $1, CX - LONG $0x2b358d48; WORD $0x002b; BYTE $0x00 // leaq $11051(%rip), %rsi /* _Digits(%rip) */ - MOVB 0(CX)(SI*1), CX - MOVL AX, DX - MOVB CX, 0(DI)(DX*1) - MOVB 0(R8)(SI*1), CX - MOVB CX, 1(DI)(DX*1) - MOVWLZX R8, CX - ORQ $1, CX - MOVB 0(CX)(SI*1), CX - MOVB CX, 2(DI)(DX*1) - MOVB 0(R11)(SI*1), CX - MOVB CX, 3(DI)(DX*1) - MOVWLZX R11, CX - ORQ $1, CX - MOVB 0(CX)(SI*1), CX - ADDL $5, AX - MOVB CX, 4(DI)(DX*1) + MOVWLZX R9, AX + ORQ $1, AX + LONG $0xc7358d48; WORD $0x002b; BYTE $0x00 // leaq $11207(%rip), %rsi /* _Digits(%rip) */ + MOVB 0(AX)(SI*1), AX + MOVL CX, DX + MOVB AX, 0(DI)(DX*1) + MOVB 0(R8)(SI*1), AX + MOVB AX, 1(DI)(DX*1) + MOVWLZX R8, AX + ORQ $1, AX + MOVB 0(AX)(SI*1), AX + MOVB AX, 2(DI)(DX*1) + MOVB 0(R11)(SI*1), AX + MOVB AX, 3(DI)(DX*1) + MOVWLZX R11, AX + ORQ $1, AX + MOVB 0(AX)(SI*1), AX + ADDL $5, CX + MOVB AX, 4(DI)(DX*1) + MOVL CX, AX BYTE $0x5d // popq %rbp RET LBB7_13: - XORL AX, AX + XORL CX, CX CMPL SI, $100000 JAE LBB7_14 JMP LBB7_15 @@ -2069,14 +2113,14 @@ LBB7_16: MOVQ $9999999999999999, AX CMPQ SI, AX JA LBB7_18 - MOVQ $-6067343680855748867, AX - MOVQ SI, DX - LONG $0xf6fbe2c4; BYTE $0xc0 // mulxq %rax, %rax, %rax - SHRQ $26, AX - LONG $0xe100c869; WORD $0x05f5 // imull $100000000, %eax, %ecx - SUBL CX, SI - LONG $0xc06ef9c5 // vmovd %eax, %xmm0 - QUAD $0xfffffdcd0d6ffac5 // vmovdqu $-563(%rip), %xmm1 /* LCPI7_0(%rip) */ + MOVQ $-6067343680855748867, CX + MOVQ SI, AX + MULQ CX + SHRQ $26, DX + LONG $0xe100c269; WORD $0x05f5 // imull $100000000, %edx, %eax + SUBL AX, SI + LONG $0xc26ef9c5 // vmovd %edx, %xmm0 + QUAD $0xfffffdcb0d6ffac5 // vmovdqu $-565(%rip), %xmm1 /* LCPI7_0(%rip) */ LONG $0xd1f4f9c5 // vpmuludq %xmm1, %xmm0, %xmm2 LONG $0xd273e9c5; BYTE $0x2d // vpsrlq $45, %xmm2, %xmm2 MOVL $10000, AX @@ -2087,11 +2131,11 @@ LBB7_16: LONG $0xf073f9c5; BYTE $0x02 // vpsllq $2, %xmm0, %xmm0 LONG $0xc070fbc5; BYTE $0x50 // vpshuflw $80, %xmm0, %xmm0 LONG $0xc070f9c5; BYTE $0x50 // vpshufd $80, %xmm0, %xmm0 - QUAD $0xfffdc6155979e2c4; BYTE $0xff // vpbroadcastq $-570(%rip), %xmm2 /* LCPI7_1(%rip) */ - QUAD $0xfffdc5255979e2c4; BYTE $0xff // vpbroadcastq $-571(%rip), %xmm4 /* LCPI7_2(%rip) */ + QUAD $0xfffdc4155979e2c4; BYTE $0xff // vpbroadcastq $-572(%rip), %xmm2 /* LCPI7_1(%rip) */ + QUAD $0xfffdc3255979e2c4; BYTE $0xff // vpbroadcastq $-573(%rip), %xmm4 /* LCPI7_2(%rip) */ LONG $0xc2e4f9c5 // vpmulhuw %xmm2, %xmm0, %xmm0 LONG $0xc4e4f9c5 // vpmulhuw %xmm4, %xmm0, %xmm0 - QUAD $0xfffffd8d2d6ffac5 // vmovdqu $-627(%rip), %xmm5 /* LCPI7_3(%rip) */ + QUAD $0xfffffd8b2d6ffac5 // vmovdqu $-629(%rip), %xmm5 /* LCPI7_3(%rip) */ LONG $0xf5d5f9c5 // vpmullw %xmm5, %xmm0, %xmm6 LONG $0xf673c9c5; BYTE $0x10 // vpsllq $16, %xmm6, %xmm6 LONG $0xc6f9f9c5 // vpsubw %xmm6, %xmm0, %xmm0 @@ -2110,59 +2154,60 @@ LBB7_16: LONG $0xf273e9c5; BYTE $0x10 // vpsllq $16, %xmm2, %xmm2 LONG $0xcaf9f1c5 // vpsubw %xmm2, %xmm1, %xmm1 LONG $0xc167f9c5 // vpackuswb %xmm1, %xmm0, %xmm0 - QUAD $0xfffffd470dfcf9c5 // vpaddb $-697(%rip), %xmm0, %xmm1 /* LCPI7_4(%rip) */ + QUAD $0xfffffd450dfcf9c5 // vpaddb $-699(%rip), %xmm0, %xmm1 /* LCPI7_4(%rip) */ LONG $0xd2efe9c5 // vpxor %xmm2, %xmm2, %xmm2 LONG $0xc274f9c5 // vpcmpeqb %xmm2, %xmm0, %xmm0 LONG $0xc0d7f9c5 // vpmovmskb %xmm0, %eax ORL $32768, AX XORL $-32769, AX - LONG $0xc8bc0ff3 // tzcntl %eax, %ecx - MOVL $16, AX - SUBL CX, AX - SHLQ $4, CX - LONG $0x9d158d48; WORD $0x002a; BYTE $0x00 // leaq $10909(%rip), %rdx /* _VecShiftShuffles(%rip) */ - LONG $0x0071e2c4; WORD $0x1104 // vpshufb (%rcx,%rdx), %xmm1, %xmm0 + BSFL AX, AX + MOVL $16, CX + SUBL AX, CX + SHLQ $4, AX + LONG $0x3a158d48; WORD $0x002b; BYTE $0x00 // leaq $11066(%rip), %rdx /* _VecShiftShuffles(%rip) */ + LONG $0x0071e2c4; WORD $0x1004 // vpshufb (%rax,%rdx), %xmm1, %xmm0 LONG $0x077ffac5 // vmovdqu %xmm0, (%rdi) + MOVL CX, AX BYTE $0x5d // popq %rbp RET LBB7_18: - MOVQ $4153837486827862103, AX - MOVQ SI, DX - LONG $0xf6fbe2c4; BYTE $0xc0 // mulxq %rax, %rax, %rax - SHRQ $51, AX - MOVQ $10000000000000000, CX - IMULQ AX, CX - SUBQ CX, SI - CMPL AX, $9 + MOVQ $4153837486827862103, CX + MOVQ SI, AX + MULQ CX + SHRQ $51, DX + MOVQ $10000000000000000, AX + IMULQ DX, AX + SUBQ AX, SI + CMPL DX, $9 JA LBB7_20 - ADDB $48, AX - MOVB AX, 0(DI) - MOVL $1, AX + ADDB $48, DX + MOVB DX, 0(DI) + MOVL $1, CX JMP LBB7_25 LBB7_20: - CMPL AX, $99 + CMPL DX, $99 JA LBB7_22 - MOVL AX, AX - LONG $0x810d8d48; WORD $0x0029; BYTE $0x00 // leaq $10625(%rip), %rcx /* _Digits(%rip) */ + MOVL DX, AX + LONG $0x1d0d8d48; WORD $0x002a; BYTE $0x00 // leaq $10781(%rip), %rcx /* _Digits(%rip) */ MOVB 0(CX)(AX*2), DX MOVB 1(CX)(AX*2), AX MOVB DX, 0(DI) MOVB AX, 1(DI) - MOVL $2, AX + MOVL $2, CX JMP LBB7_25 LBB7_22: - MOVL AX, CX - SHRL $2, CX - LONG $0x147bc969; WORD $0x0000 // imull $5243, %ecx, %ecx - SHRL $17, CX - CMPL AX, $999 + MOVL DX, AX + SHRL $2, AX + LONG $0x147bc069; WORD $0x0000 // imull $5243, %eax, %eax + SHRL $17, AX + CMPL DX, $999 JA LBB7_24 - ADDL $48, CX - MOVB CX, 0(DI) - MOVWLZX AX, AX + ADDL $48, AX + MOVB AX, 0(DI) + MOVWLZX DX, AX MOVL AX, CX SHRL $2, CX LONG $0x147bc969; WORD $0x0000 // imull $5243, %ecx, %ecx @@ -2170,24 +2215,24 @@ LBB7_22: WORD $0xc96b; BYTE $0x64 // imull $100, %ecx, %ecx SUBL CX, AX MOVWLZX AX, AX - LONG $0x310d8d48; WORD $0x0029; BYTE $0x00 // leaq $10545(%rip), %rcx /* _Digits(%rip) */ + LONG $0xcc0d8d48; WORD $0x0029; BYTE $0x00 // leaq $10700(%rip), %rcx /* _Digits(%rip) */ MOVB 0(CX)(AX*2), DX MOVB 1(CX)(AX*2), AX MOVB DX, 1(DI) MOVB AX, 2(DI) - MOVL $3, AX + MOVL $3, CX JMP LBB7_25 LBB7_24: - WORD $0xd16b; BYTE $0x64 // imull $100, %ecx, %edx - SUBL DX, AX - MOVWLZX CX, CX - LONG $0x0e058d4c; WORD $0x0029; BYTE $0x00 // leaq $10510(%rip), %r8 /* _Digits(%rip) */ - MOVB 0(R8)(CX*2), DX - MOVB 1(R8)(CX*2), CX - MOVB DX, 0(DI) - MOVB CX, 1(DI) + WORD $0xc86b; BYTE $0x64 // imull $100, %eax, %ecx + SUBL CX, DX MOVWLZX AX, AX + LONG $0xa9058d4c; WORD $0x0029; BYTE $0x00 // leaq $10665(%rip), %r8 /* _Digits(%rip) */ + MOVB 0(R8)(AX*2), CX + MOVB 1(R8)(AX*2), AX + MOVB CX, 0(DI) + MOVB AX, 1(DI) + MOVWLZX DX, AX MOVB 0(R8)(AX*2), CX ADDQ AX, AX MOVB CX, 2(DI) @@ -2195,35 +2240,35 @@ LBB7_24: MOVWLZX AX, AX MOVB 0(AX)(R8*1), AX MOVB AX, 3(DI) - MOVL $4, AX + MOVL $4, CX LBB7_25: - MOVQ $-6067343680855748867, CX - MOVQ SI, DX - LONG $0xf6f3e2c4; BYTE $0xc9 // mulxq %rcx, %rcx, %rcx - SHRQ $26, CX - LONG $0xc16ef9c5 // vmovd %ecx, %xmm0 - QUAD $0xfffffbe50d6ffac5 // vmovdqu $-1051(%rip), %xmm1 /* LCPI7_0(%rip) */ + MOVQ $-6067343680855748867, DX + MOVQ SI, AX + MULQ DX + SHRQ $26, DX + LONG $0xc26ef9c5 // vmovd %edx, %xmm0 + QUAD $0xfffffbe40d6ffac5 // vmovdqu $-1052(%rip), %xmm1 /* LCPI7_0(%rip) */ LONG $0xd1f4f9c5 // vpmuludq %xmm1, %xmm0, %xmm2 LONG $0xd273e9c5; BYTE $0x2d // vpsrlq $45, %xmm2, %xmm2 - MOVL $10000, DX - LONG $0x6ef9e1c4; BYTE $0xda // vmovq %rdx, %xmm3 + MOVL $10000, AX + LONG $0x6ef9e1c4; BYTE $0xd8 // vmovq %rax, %xmm3 LONG $0xe3f4e9c5 // vpmuludq %xmm3, %xmm2, %xmm4 LONG $0xc4faf9c5 // vpsubd %xmm4, %xmm0, %xmm0 LONG $0xc061e9c5 // vpunpcklwd %xmm0, %xmm2, %xmm0 LONG $0xf073f9c5; BYTE $0x02 // vpsllq $2, %xmm0, %xmm0 LONG $0xc070fbc5; BYTE $0x50 // vpshuflw $80, %xmm0, %xmm0 - QUAD $0xfffbe3155979e2c4; BYTE $0xff // vpbroadcastq $-1053(%rip), %xmm2 /* LCPI7_1(%rip) */ + QUAD $0xfffbe2155979e2c4; BYTE $0xff // vpbroadcastq $-1054(%rip), %xmm2 /* LCPI7_1(%rip) */ LONG $0xc070f9c5; BYTE $0x50 // vpshufd $80, %xmm0, %xmm0 LONG $0xc2e4f9c5 // vpmulhuw %xmm2, %xmm0, %xmm0 - QUAD $0xfffbd9255979e2c4; BYTE $0xff // vpbroadcastq $-1063(%rip), %xmm4 /* LCPI7_2(%rip) */ + QUAD $0xfffbd8255979e2c4; BYTE $0xff // vpbroadcastq $-1064(%rip), %xmm4 /* LCPI7_2(%rip) */ LONG $0xc4e4f9c5 // vpmulhuw %xmm4, %xmm0, %xmm0 - QUAD $0xfffffba52d6ffac5 // vmovdqu $-1115(%rip), %xmm5 /* LCPI7_3(%rip) */ + QUAD $0xfffffba42d6ffac5 // vmovdqu $-1116(%rip), %xmm5 /* LCPI7_3(%rip) */ LONG $0xf5d5f9c5 // vpmullw %xmm5, %xmm0, %xmm6 LONG $0xf673c9c5; BYTE $0x10 // vpsllq $16, %xmm6, %xmm6 LONG $0xc6f9f9c5 // vpsubw %xmm6, %xmm0, %xmm0 - LONG $0xe100c969; WORD $0x05f5 // imull $100000000, %ecx, %ecx - SUBL CX, SI + LONG $0xe100c269; WORD $0x05f5 // imull $100000000, %edx, %eax + SUBL AX, SI LONG $0xf66ef9c5 // vmovd %esi, %xmm6 LONG $0xc9f4c9c5 // vpmuludq %xmm1, %xmm6, %xmm1 LONG $0xd173f1c5; BYTE $0x2d // vpsrlq $45, %xmm1, %xmm1 @@ -2239,10 +2284,11 @@ LBB7_25: LONG $0xf273e9c5; BYTE $0x10 // vpsllq $16, %xmm2, %xmm2 LONG $0xcaf9f1c5 // vpsubw %xmm2, %xmm1, %xmm1 LONG $0xc167f9c5 // vpackuswb %xmm1, %xmm0, %xmm0 - QUAD $0xfffffb5705fcf9c5 // vpaddb $-1193(%rip), %xmm0, %xmm0 /* LCPI7_4(%rip) */ - MOVL AX, CX - LONG $0x047ffac5; BYTE $0x0f // vmovdqu %xmm0, (%rdi,%rcx) - ORL $16, AX + QUAD $0xfffffb5605fcf9c5 // vpaddb $-1194(%rip), %xmm0, %xmm0 /* LCPI7_4(%rip) */ + MOVL CX, AX + LONG $0x047ffac5; BYTE $0x07 // vmovdqu %xmm0, (%rdi,%rax) + ORL $16, CX + MOVL CX, AX BYTE $0x5d // popq %rbp RET @@ -2254,38 +2300,36 @@ LCPI8_1: QUAD $0x5c5c5c5c5c5c5c5c; QUAD $0x5c5c5c5c5c5c5c5c // .space 16, '\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' _unquote: - BYTE $0x55 // pushq %rbp - WORD $0x8948; BYTE $0xe5 // movq %rsp, %rbp - WORD $0x5741 // pushq %r15 - WORD $0x5641 // pushq %r14 - WORD $0x5541 // pushq %r13 - WORD $0x5441 // pushq %r12 - BYTE $0x53 // pushq %rbx - SUBQ $16, SP + BYTE $0x55 // pushq %rbp + WORD $0x8948; BYTE $0xe5 // movq %rsp, %rbp + WORD $0x5741 // pushq %r15 + WORD $0x5641 // pushq %r14 + WORD $0x5541 // pushq %r13 + WORD $0x5441 // pushq %r12 + BYTE $0x53 // pushq %rbx + BYTE $0x50 // pushq %rax TESTQ SI, SI JE LBB8_1 MOVQ CX, -48(BP) - MOVQ R8, AX - MOVQ R8, -56(BP) MOVL R8, R10 ANDL $1, R10 - LONG $0xfe058d4c; WORD $0x0028; BYTE $0x00 // leaq $10494(%rip), %r8 /* __UnquoteTab(%rip) */ - QUAD $0xffffff990d6ffec5 // vmovdqu $-103(%rip), %ymm1 /* LCPI8_0(%rip) */ + QUAD $0xffffffaa0d6ffec5 // vmovdqu $-86(%rip), %ymm1 /* LCPI8_0(%rip) */ + QUAD $0xffffffc2156ffac5 // vmovdqu $-62(%rip), %xmm2 /* LCPI8_1(%rip) */ MOVQ DI, R9 - MOVQ SI, R14 + MOVQ SI, R15 MOVQ DX, AX JMP LBB8_3 LBB8_1: - XORL R14, R14 + XORL R15, R15 MOVQ DX, AX LBB8_95: - ADDQ R14, AX + ADDQ R15, AX SUBQ DX, AX LBB8_96: - ADDQ $16, SP + ADDQ $8, SP BYTE $0x5b // popq %rbx WORD $0x5c41 // popq %r12 WORD $0x5d41 // popq %r13 @@ -2300,11 +2344,11 @@ LBB8_53: INCQ AX LBB8_54: - MOVQ R12, R14 - MOVQ R15, R9 + MOVQ R13, R15 + MOVQ R12, R9 LBB8_93: - TESTQ R14, R14 + TESTQ R15, R15 JE LBB8_94 LBB8_3: @@ -2314,86 +2358,97 @@ LBB8_3: JMP LBB8_20 LBB8_5: - MOVQ AX, R11 - MOVQ R9, R15 - MOVQ R14, R12 - CMPQ R14, $32 + MOVQ R15, R13 + MOVQ AX, R14 + MOVQ R9, R12 + CMPQ R15, $32 JL LBB8_10 - MOVQ R14, CX - MOVQ R9, R15 - MOVQ AX, R11 + MOVQ R9, R12 + MOVQ AX, R14 + MOVQ R15, R11 LBB8_7: - LONG $0x6f7ec1c4; BYTE $0x07 // vmovdqu (%r15), %ymm0 - LONG $0x7f7ec1c4; BYTE $0x03 // vmovdqu %ymm0, (%r11) - LONG $0xc174fdc5 // vpcmpeqb %ymm1, %ymm0, %ymm0 - LONG $0xd8d7fdc5 // vpmovmskb %ymm0, %ebx - TESTL BX, BX + LONG $0x6f7ec1c4; WORD $0x2404 // vmovdqu (%r12), %ymm0 + LONG $0x7f7ec1c4; BYTE $0x06 // vmovdqu %ymm0, (%r14) + LONG $0xc174fdc5 // vpcmpeqb %ymm1, %ymm0, %ymm0 + LONG $0xc8d7fdc5 // vpmovmskb %ymm0, %ecx + TESTL CX, CX JNE LBB8_8 - ADDQ $32, R15 - ADDQ $32, R11 - LEAQ -32(CX), R12 - CMPQ CX, $63 - MOVQ R12, CX + ADDQ $32, R12 + ADDQ $32, R14 + LEAQ -32(R11), R13 + CMPQ R11, $63 + MOVQ R13, R11 JG LBB8_7 LBB8_10: - WORD $0xf8c5; BYTE $0x77 // vzeroupper - CMPQ R12, $16 - JL LBB8_14 - LONG $0x6f7ac1c4; BYTE $0x07 // vmovdqu (%r15), %xmm0 - LONG $0x7f7ac1c4; BYTE $0x03 // vmovdqu %xmm0, (%r11) - QUAD $0xffffff100574f9c5 // vpcmpeqb $-240(%rip), %xmm0, %xmm0 /* LCPI8_1(%rip) */ - LONG $0xc8d7f9c5 // vpmovmskb %xmm0, %ecx - TESTW CX, CX - JE LBB8_13 - SUBQ R9, R15 - MOVWLZX CX, CX - LONG $0xbc0f48f3; BYTE $0xd9 // tzcntq %rcx, %rbx - ADDQ R15, BX - QUAD $0xfffffed10d6ffec5 // vmovdqu $-303(%rip), %ymm1 /* LCPI8_0(%rip) */ - JMP LBB8_19 - -LBB8_8: - SUBQ R9, R15 - MOVLQSX BX, CX - LONG $0xbc0f48f3; BYTE $0xd9 // tzcntq %rcx, %rbx - ADDQ R15, BX - JMP LBB8_19 - -LBB8_13: - ADDQ $16, R15 - ADDQ $16, R11 - ADDQ $-16, R12 - -LBB8_14: - QUAD $0xfffffeab0d6ffec5 // vmovdqu $-341(%rip), %ymm1 /* LCPI8_0(%rip) */ - TESTQ R12, R12 - JE LBB8_95 - XORL CX, CX + WORD $0xf8c5; BYTE $0x77 // vzeroupper + QUAD $0xffffff24156ffac5 // vmovdqu $-220(%rip), %xmm2 /* LCPI8_1(%rip) */ + CMPQ R13, $16 + JL LBB8_11 LBB8_16: - MOVBLZX 0(R15)(CX*1), BX + LONG $0x6f7ac1c4; WORD $0x2404 // vmovdqu (%r12), %xmm0 + LONG $0x7f7ac1c4; BYTE $0x06 // vmovdqu %xmm0, (%r14) + LONG $0xc274f9c5 // vpcmpeqb %xmm2, %xmm0, %xmm0 + LONG $0xc8d7f9c5 // vpmovmskb %xmm0, %ecx + TESTW CX, CX + JNE LBB8_17 + ADDQ $16, R12 + ADDQ $16, R14 + LEAQ -16(R13), CX + CMPQ R13, $31 + MOVQ CX, R13 + JG LBB8_16 + +LBB8_11: + TESTQ R13, R13 + JE LBB8_95 + XORL CX, CX + QUAD $0xfffffebe0d6ffec5 // vmovdqu $-322(%rip), %ymm1 /* LCPI8_0(%rip) */ + +LBB8_13: + MOVBLZX 0(R12)(CX*1), BX CMPB BX, $92 JE LBB8_18 - MOVB BX, 0(R11)(CX*1) + MOVB BX, 0(R14)(CX*1) INCQ CX - CMPQ R12, CX - JNE LBB8_16 + CMPQ R13, CX + JNE LBB8_13 JMP LBB8_95 LBB8_18: - ADDQ CX, R15 - SUBQ R9, R15 - MOVQ R15, BX - -LBB8_19: + ADDQ CX, R12 + SUBQ R9, R12 + MOVQ R12, BX CMPQ BX, $-1 - JE LBB8_95 + JNE LBB8_20 + JMP LBB8_95 + +LBB8_8: + MOVL CX, CX + MOVQ $4294967296, BX + ORQ BX, CX + SUBQ R9, R12 + BSFQ CX, BX + ADDQ R12, BX + CMPQ BX, $-1 + JNE LBB8_20 + JMP LBB8_95 + +LBB8_17: + MOVWLZX CX, CX + SUBQ R9, R12 + ORQ $65536, CX + BSFQ CX, BX + ADDQ R12, BX + QUAD $0xfffffe4f0d6ffec5 // vmovdqu $-433(%rip), %ymm1 /* LCPI8_0(%rip) */ + CMPQ BX, $-1 + JE LBB8_95 LBB8_20: LEAQ 2(BX), CX - SUBQ CX, R14 + SUBQ CX, R15 JS LBB8_21 LEAQ 2(R9)(BX*1), R9 TESTQ R10, R10 @@ -2402,7 +2457,8 @@ LBB8_20: LBB8_34: ADDQ BX, AX MOVBLZX -1(R9), CX - MOVB 0(CX)(R8*1), CX + LONG $0x131d8d48; WORD $0x0028; BYTE $0x00 // leaq $10259(%rip), %rbx /* __UnquoteTab(%rip) */ + MOVB 0(CX)(BX*1), CX CMPB CX, $-1 JE LBB8_38 TESTB CX, CX @@ -2412,66 +2468,66 @@ LBB8_34: JMP LBB8_93 LBB8_38: - CMPQ R14, $3 + CMPQ R15, $3 JLE LBB8_21 - MOVL 0(R9), R15 - MOVL R15, CX - NOTL CX - LEAL -808464432(R15), BX - ANDL $-2139062144, CX - TESTL BX, CX + MOVL 0(R9), R12 + MOVL R12, BX + NOTL BX + LEAL -808464432(R12), CX + ANDL $-2139062144, BX + TESTL CX, BX JNE LBB8_43 - LEAL 421075225(R15), BX - ORL R15, BX - TESTL $-2139062144, BX + LEAL 421075225(R12), CX + ORL R12, CX + TESTL $-2139062144, CX JNE LBB8_43 - MOVL R15, BX - ANDL $2139062143, BX + MOVL R12, CX + ANDL $2139062143, CX MOVL $-1061109568, R11 - SUBL BX, R11 - LEAL 1179010630(BX), R12 - ANDL CX, R11 - TESTL R12, R11 + SUBL CX, R11 + LEAL 1179010630(CX), R14 + ANDL BX, R11 + TESTL R14, R11 JNE LBB8_43 MOVL $-522133280, R11 - SUBL BX, R11 - ADDL $960051513, BX - ANDL R11, CX - TESTL BX, CX + SUBL CX, R11 + ADDL $960051513, CX + ANDL R11, BX + TESTL CX, BX JNE LBB8_43 - BSWAPL R15 - MOVL R15, CX + BSWAPL R12 + MOVL R12, CX SHRL $4, CX NOTL CX ANDL $16843009, CX LEAL 0(CX)(CX*8), CX - ANDL $252645135, R15 - ADDL CX, R15 - MOVL R15, R11 + ANDL $252645135, R12 + ADDL CX, R12 + MOVL R12, R11 SHRL $4, R11 - ORL R15, R11 - MOVL R11, CX - SHRL $8, CX - ANDL $65280, CX - MOVBLZX R11, R13 - ORL CX, R13 - LEAQ 4(R9), R15 - LEAQ -4(R14), R12 - CMPL R13, $127 + ORL R12, R11 + MOVL R11, R14 + SHRL $8, R14 + ANDL $65280, R14 + MOVBLZX R11, BX + ORL R14, BX + LEAQ 4(R9), R12 + LEAQ -4(R15), R13 + CMPL BX, $127 JBE LBB8_53 - CMPL R13, $2047 + CMPL BX, $2047 JBE LBB8_56 - MOVL R11, BX - ANDL $16252928, BX - CMPL BX, $14155776 + MOVL R11, CX + ANDL $16252928, CX + CMPL CX, $14155776 JE LBB8_59 - SHRL $12, CX - ORB $-32, CX - MOVB CX, 0(AX) - SHRL $6, R13 - ANDB $63, R13 - ORB $-128, R13 - MOVB R13, 1(AX) + SHRL $12, R14 + ORB $-32, R14 + MOVB R14, 0(AX) + SHRL $6, BX + ANDB $63, BX + ORB $-128, BX + MOVB BX, 1(AX) ANDB $63, R11 ORB $-128, R11 MOVB R11, 2(AX) @@ -2479,13 +2535,13 @@ LBB8_38: JMP LBB8_54 LBB8_23: - TESTL R14, R14 + TESTL R15, R15 JE LBB8_21 CMPB -1(R9), $92 JNE LBB8_25 CMPB 0(R9), $92 JNE LBB8_33 - CMPL R14, $1 + CMPL R15, $1 JLE LBB8_21 MOVB 1(R9), CX CMPB CX, $34 @@ -2495,17 +2551,17 @@ LBB8_23: LBB8_32: INCQ R9 - DECQ R14 + DECQ R15 LBB8_33: INCQ R9 - DECQ R14 + DECQ R15 JMP LBB8_34 LBB8_56: - SHRL $6, R13 - ORB $-64, R13 - MOVB R13, 0(AX) + SHRL $6, BX + ORB $-64, BX + MOVB BX, 0(AX) ANDB $63, R11 ORB $-128, R11 MOVB R11, 1(AX) @@ -2515,84 +2571,84 @@ LBB8_56: LBB8_59: TESTQ R10, R10 JNE LBB8_61 - MOVQ R12, R14 - MOVQ R15, R9 + MOVQ R13, R15 + MOVQ R12, R9 JMP LBB8_65 LBB8_61: - CMPQ R14, $4 + CMPQ R15, $4 JLE LBB8_21 - CMPB 0(R15), $92 + CMPB 0(R12), $92 JNE LBB8_63 - ADDQ $-5, R14 + ADDQ $-5, R15 ADDQ $5, R9 LBB8_65: - CMPL R13, $56319 + CMPL BX, $56319 JA LBB8_69 - CMPQ R14, $6 + CMPQ R15, $6 JL LBB8_69 CMPB 0(R9), $92 JNE LBB8_69 CMPB 1(R9), $117 JNE LBB8_69 - MOVL 2(R9), R11 - MOVL R11, CX - NOTL CX - LEAL -808464432(R11), BX - ANDL $-2139062144, CX - TESTL BX, CX + MOVL 2(R9), R14 + MOVL R14, R12 + NOTL R12 + LEAL -808464432(R14), CX + ANDL $-2139062144, R12 + TESTL CX, R12 JNE LBB8_77 - LEAL 421075225(R11), BX - ORL R11, BX - TESTL $-2139062144, BX + LEAL 421075225(R14), CX + ORL R14, CX + TESTL $-2139062144, CX JNE LBB8_77 - MOVL R11, BX - ANDL $2139062143, BX - MOVL $-1061109568, R15 - SUBL BX, R15 - LEAL 1179010630(BX), R12 - ANDL CX, R15 - TESTL R12, R15 + MOVL R14, CX + ANDL $2139062143, CX + MOVL $-1061109568, R11 + SUBL CX, R11 + LEAL 1179010630(CX), R13 + ANDL R12, R11 + TESTL R13, R11 JNE LBB8_77 - MOVL $-522133280, R15 - SUBL BX, R15 - ADDL $960051513, BX - ANDL R15, CX - TESTL BX, CX + MOVL $-522133280, R11 + SUBL CX, R11 + ADDL $960051513, CX + ANDL R11, R12 + TESTL CX, R12 JNE LBB8_77 - BSWAPL R11 - MOVL R11, CX + BSWAPL R14 + MOVL R14, CX SHRL $4, CX NOTL CX ANDL $16843009, CX LEAL 0(CX)(CX*8), CX - ANDL $252645135, R11 - ADDL CX, R11 - MOVL R11, BX - SHRL $4, BX - ORL R11, BX + ANDL $252645135, R14 + ADDL CX, R14 + MOVL R14, R11 + SHRL $4, R11 + ORL R14, R11 ADDQ $6, R9 - ADDQ $-6, R14 - MOVL BX, CX + ADDQ $-6, R15 + MOVL R11, CX ANDL $16515072, CX CMPL CX, $14417920 JNE LBB8_87 - MOVL BX, CX + MOVL R11, CX SHRL $8, CX ANDL $65280, CX - MOVBLZX BX, BX - ORL CX, BX - SHLL $10, R13 - LEAL -56613888(R13)(BX*1), BX + MOVBLZX R11, R11 + ORL CX, R11 + SHLL $10, BX + LEAL -56613888(BX)(R11*1), BX CMPL BX, $1114112 JB LBB8_92 - TESTB $2, -56(BP) + TESTB $2, R8 JE LBB8_88 JMP LBB8_70 LBB8_69: - TESTB $2, -56(BP) + TESTB $2, R8 JE LBB8_71 LBB8_70: @@ -2602,7 +2658,7 @@ LBB8_70: JMP LBB8_93 LBB8_87: - TESTB $2, -56(BP) + TESTB $2, R8 JE LBB8_88 MOVL $-272777233, 0(AX) MOVW $-16961, 4(AX) @@ -2701,7 +2757,7 @@ LBB8_36: JMP LBB8_96 LBB8_94: - XORL R14, R14 + XORL R15, R15 JMP LBB8_95 LBB8_88: @@ -2730,10 +2786,10 @@ LBB8_26: JMP LBB8_96 LBB8_63: - SUBQ DI, R15 - ADDQ $-4, R15 + SUBQ DI, R12 + ADDQ $-4, R12 MOVQ -48(BP), AX - MOVQ R15, 0(AX) + MOVQ R12, 0(AX) MOVQ $-4, AX JMP LBB8_96 @@ -2815,7 +2871,7 @@ _value: MOVQ DI, R15 MOVQ DI, -48(BP) MOVQ SI, -40(BP) - LONG $0xffe15be8; BYTE $0xff // callq _lspace + LONG $0xffe0e5e8; BYTE $0xff // callq _lspace MOVQ AX, -32(BP) CMPQ AX, BX JAE LBB9_4 @@ -2834,7 +2890,7 @@ LBB9_3: LEAQ -48(BP), DI LEAQ -32(BP), SI MOVQ R14, DX - LONG $0x000867e8; BYTE $0x00 // callq _vnumber + LONG $0x0008cae8; BYTE $0x00 // callq _vnumber JMP LBB9_5 LBB9_4: @@ -3126,31 +3182,32 @@ _vstring: BYTE $0x50 // pushq %rax MOVQ DX, R14 MOVQ SI, R12 - MOVQ DI, BX - MOVQ 0(SI), SI - MOVQ SI, -48(BP) + MOVQ DI, R15 + MOVQ 0(SI), BX + MOVQ BX, SI MOVL $34, DX MOVL $92, CX - LONG $0xffe180e8; BYTE $0xff // callq _strchr2 + LONG $0xffe15ce8; BYTE $0xff // callq _strchr2 TESTQ AX, AX JS LBB10_4 - MOVQ 0(BX), R11 - MOVQ $-1, CX - CMPB 0(R11)(AX*1), $34 + MOVQ BX, -48(BP) + MOVQ 0(R15), R13 + MOVQ $-1, DX + CMPB 0(R13)(AX*1), $34 JNE LBB10_7 - MOVQ AX, SI + MOVQ AX, CX LBB10_3: - MOVQ CX, 24(R14) - INCQ SI - MOVQ SI, 0(R12) + MOVQ DX, 24(R14) + INCQ CX + MOVQ CX, 0(R12) MOVQ -48(BP), AX MOVQ AX, 16(R14) MOVL $7, AX JMP LBB10_6 LBB10_4: - MOVQ 8(BX), R9 + MOVQ 8(R15), R9 LBB10_5: MOVQ R9, 0(R12) @@ -3170,47 +3227,49 @@ LBB10_6: LBB10_7: LEAQ 1(AX), R8 - MOVQ 8(BX), R9 - MOVQ R9, DI - SUBQ R8, DI - LEAQ 1(R11)(AX*1), R13 - MOVQ R13, DX - MOVQ DI, SI - MOVQ R13, BX - ANDQ $31, DX + MOVQ 8(R15), R9 + MOVQ R9, DX + SUBQ R8, DX + LEAQ 1(R13)(AX*1), R10 + MOVQ R10, CX + MOVQ DX, SI + MOVQ R10, DI + ANDQ $31, CX JE LBB10_11 - MOVQ R13, SI + MOVQ R10, SI ANDQ $-32, SI - LONG $0x066ffdc5 // vmovdqa (%rsi), %ymm0 - QUAD $0xffffff270574fdc5 // vpcmpeqb $-217(%rip), %ymm0, %ymm0 /* LCPI10_0(%rip) */ - LONG $0xf0d7fdc5 // vpmovmskb %ymm0, %esi - LONG $0xf76ae2c4; BYTE $0xf6 // sarxl %edx, %esi, %esi - TESTL SI, SI + LONG $0x066ffdc5 // vmovdqa (%rsi), %ymm0 + QUAD $0xffffff230574fdc5 // vpcmpeqb $-221(%rip), %ymm0, %ymm0 /* LCPI10_0(%rip) */ + LONG $0xf0d7fdc5 // vpmovmskb %ymm0, %esi + MOVLQSX SI, SI + SARQ CX, SI + TESTQ SI, SI JE LBB10_10 - MOVLQSX SI, CX - LONG $0xbc0f48f3; BYTE $0xc9 // tzcntq %rcx, %rcx - CMPQ CX, DI - MOVQ $-1, SI - LONG $0xf14c0f48 // cmovlq %rcx, %rsi + MOVQ $4294967296, CX + ORQ CX, SI + BSFQ SI, SI + CMPQ SI, DX + MOVQ $-1, CX + LONG $0xce4c0f48 // cmovlq %rsi, %rcx JMP LBB10_34 LBB10_10: - MOVL $32, CX - SUBQ DX, CX - LEAQ 0(R13)(CX*1), BX - MOVQ DI, SI - SUBQ CX, SI + MOVL $32, BX + SUBQ CX, BX + LEAQ 0(R10)(BX*1), DI + MOVQ DX, SI + SUBQ BX, SI LBB10_11: CMPQ SI, $128 JL LBB10_15 - QUAD $0xfffffedb0d6ffec5 // vmovdqu $-293(%rip), %ymm1 /* LCPI10_0(%rip) */ + QUAD $0xfffffecd0d6ffec5 // vmovdqu $-307(%rip), %ymm1 /* LCPI10_0(%rip) */ LBB10_13: - LONG $0x236ffdc5 // vmovdqa (%rbx), %ymm4 - LONG $0x5b6ffdc5; BYTE $0x20 // vmovdqa $32(%rbx), %ymm3 - LONG $0x536ffdc5; BYTE $0x40 // vmovdqa $64(%rbx), %ymm2 - LONG $0x436ffdc5; BYTE $0x60 // vmovdqa $96(%rbx), %ymm0 + LONG $0x276ffdc5 // vmovdqa (%rdi), %ymm4 + LONG $0x5f6ffdc5; BYTE $0x20 // vmovdqa $32(%rdi), %ymm3 + LONG $0x576ffdc5; BYTE $0x40 // vmovdqa $64(%rdi), %ymm2 + LONG $0x476ffdc5; BYTE $0x60 // vmovdqa $96(%rdi), %ymm0 LONG $0xe974ddc5 // vpcmpeqb %ymm1, %ymm4, %ymm5 LONG $0xf174e5c5 // vpcmpeqb %ymm1, %ymm3, %ymm6 LONG $0xedebcdc5 // vpor %ymm5, %ymm6, %ymm5 @@ -3220,292 +3279,304 @@ LBB10_13: LONG $0xeeebd5c5 // vpor %ymm6, %ymm5, %ymm5 LONG $0x177de2c4; BYTE $0xed // vptest %ymm5, %ymm5 JNE LBB10_21 - SUBQ $-128, BX - LEAQ -128(SI), DX + SUBQ $-128, DI + LEAQ -128(SI), BX CMPQ SI, $255 - MOVQ DX, SI + MOVQ BX, SI JG LBB10_13 JMP LBB10_16 LBB10_15: - MOVQ SI, DX + MOVQ SI, BX LBB10_16: - TESTQ DX, DX + TESTQ BX, BX JS LBB10_5 - LONG $0x036ffdc5 // vmovdqa (%rbx), %ymm0 - QUAD $0xfffffe770574fdc5 // vpcmpeqb $-393(%rip), %ymm0, %ymm0 /* LCPI10_0(%rip) */ - LONG $0xd0d77dc5 // vpmovmskb %ymm0, %r10d - TESTL R10, R10 + LONG $0x076ffdc5 // vmovdqa (%rdi), %ymm0 + QUAD $0xfffffe690574fdc5 // vpcmpeqb $-407(%rip), %ymm0, %ymm0 /* LCPI10_0(%rip) */ + LONG $0xf0d7fdc5 // vpmovmskb %ymm0, %esi + TESTL SI, SI JE LBB10_23 LBB10_18: - MOVQ BX, SI + MOVQ DI, CX LBB10_19: - MOVLQSX R10, CX - LONG $0xbc0f48f3; BYTE $0xd9 // tzcntq %rcx, %rbx - CMPQ BX, DX - JGE LBB10_5 - SUBQ R13, SI - ADDQ BX, SI - JMP LBB10_34 + MOVL SI, SI + MOVQ $4294967296, DI + ORQ DI, SI + BSFQ SI, SI + CMPQ SI, BX + JGE LBB10_5 + SUBQ R10, CX + ADDQ SI, CX + JMP LBB10_34 LBB10_21: - QUAD $0xfffffe470d74ddc5 // vpcmpeqb $-441(%rip), %ymm4, %ymm1 /* LCPI10_0(%rip) */ - LONG $0xd1d7fdc5 // vpmovmskb %ymm1, %edx - TESTL DX, DX - JE LBB10_26 - SUBQ R13, BX - MOVLQSX DX, CX - LONG $0xbc0f48f3; BYTE $0xf1 // tzcntq %rcx, %rsi - ADDQ BX, SI - JMP LBB10_34 + QUAD $0xfffffe2f0d74ddc5 // vpcmpeqb $-465(%rip), %ymm4, %ymm1 /* LCPI10_0(%rip) */ + LONG $0xc9d7fdc5 // vpmovmskb %ymm1, %ecx + TESTL CX, CX + JE LBB10_26 + MOVL CX, CX + SUBQ R10, DI + MOVQ $4294967296, SI + ORQ SI, CX + BSFQ CX, CX + ADDQ DI, CX + JMP LBB10_34 LBB10_23: - CMPQ DX, $31 + CMPQ BX, $31 JLE LBB10_5 - LEAQ 32(BX), SI - LONG $0x066ffdc5 // vmovdqa (%rsi), %ymm0 - QUAD $0xfffffe120574fdc5 // vpcmpeqb $-494(%rip), %ymm0, %ymm0 /* LCPI10_0(%rip) */ - LONG $0xd0d77dc5 // vpmovmskb %ymm0, %r10d - TESTL R10, R10 + LEAQ 32(DI), CX + LONG $0x016ffdc5 // vmovdqa (%rcx), %ymm0 + QUAD $0xfffffdef0574fdc5 // vpcmpeqb $-529(%rip), %ymm0, %ymm0 /* LCPI10_0(%rip) */ + LONG $0xf0d7fdc5 // vpmovmskb %ymm0, %esi + TESTL SI, SI JE LBB10_30 - ADDQ $-32, DX + ADDQ $-32, BX JMP LBB10_19 LBB10_26: - QUAD $0xfffffdfb0d74e5c5 // vpcmpeqb $-517(%rip), %ymm3, %ymm1 /* LCPI10_0(%rip) */ - LONG $0xd1d7fdc5 // vpmovmskb %ymm1, %edx - TESTL DX, DX - JE LBB10_28 - MOVLQSX DX, CX - LONG $0xbc0f48f3; BYTE $0xc9 // tzcntq %rcx, %rcx - SUBQ R13, BX - LEAQ 32(BX)(CX*1), SI - JMP LBB10_34 + QUAD $0xfffffdd90d74e5c5 // vpcmpeqb $-551(%rip), %ymm3, %ymm1 /* LCPI10_0(%rip) */ + LONG $0xc9d7fdc5 // vpmovmskb %ymm1, %ecx + TESTL CX, CX + JE LBB10_28 + MOVL CX, CX + MOVQ $4294967296, SI + ORQ SI, CX + BSFQ CX, CX + SUBQ R10, DI + LEAQ 32(DI)(CX*1), CX + JMP LBB10_34 LBB10_28: - QUAD $0xfffffdd90d74edc5 // vpcmpeqb $-551(%rip), %ymm2, %ymm1 /* LCPI10_0(%rip) */ - LONG $0xd1d7fdc5 // vpmovmskb %ymm1, %edx - SUBQ R13, BX - TESTL DX, DX - JE LBB10_33 - MOVLQSX DX, CX - LONG $0xbc0f48f3; BYTE $0xc9 // tzcntq %rcx, %rcx - LEAQ 64(BX)(CX*1), SI - JMP LBB10_34 + QUAD $0xfffffdac0d74edc5 // vpcmpeqb $-596(%rip), %ymm2, %ymm1 /* LCPI10_0(%rip) */ + LONG $0xc9d7fdc5 // vpmovmskb %ymm1, %ecx + SUBQ R10, DI + TESTL CX, CX + JE LBB10_33 + MOVL CX, CX + MOVQ $4294967296, SI + ORQ SI, CX + BSFQ CX, CX + LEAQ 64(DI)(CX*1), CX + JMP LBB10_34 LBB10_30: - CMPQ DX, $64 + CMPQ BX, $64 JL LBB10_5 - LEAQ 64(BX), SI - LONG $0x066ffdc5 // vmovdqa (%rsi), %ymm0 - QUAD $0xfffffda50574fdc5 // vpcmpeqb $-603(%rip), %ymm0, %ymm0 /* LCPI10_0(%rip) */ - LONG $0xd0d77dc5 // vpmovmskb %ymm0, %r10d - TESTL R10, R10 - JE LBB10_71 - ADDQ $-64, DX + LEAQ 64(DI), CX + LONG $0x016ffdc5 // vmovdqa (%rcx), %ymm0 + QUAD $0xfffffd6d0574fdc5 // vpcmpeqb $-659(%rip), %ymm0, %ymm0 /* LCPI10_0(%rip) */ + LONG $0xf0d7fdc5 // vpmovmskb %ymm0, %esi + TESTL SI, SI + JE LBB10_72 + ADDQ $-64, BX JMP LBB10_19 LBB10_33: - QUAD $0xfffffd870574fdc5 // vpcmpeqb $-633(%rip), %ymm0, %ymm0 /* LCPI10_0(%rip) */ - LONG $0xc8d7fdc5 // vpmovmskb %ymm0, %ecx - MOVLQSX CX, CX - LONG $0xbc0f48f3; BYTE $0xc9 // tzcntq %rcx, %rcx - LEAQ 96(BX)(CX*1), SI + QUAD $0xfffffd500574fdc5 // vpcmpeqb $-688(%rip), %ymm0, %ymm0 /* LCPI10_0(%rip) */ + LONG $0xc8d7fdc5 // vpmovmskb %ymm0, %ecx + MOVQ $4294967296, SI + ORQ SI, CX + BSFQ CX, CX + LEAQ 96(DI)(CX*1), CX LBB10_34: - CMPQ SI, DI + CMPQ CX, DX JAE LBB10_5 - ADDQ R8, SI + ADDQ R8, CX JS LBB10_5 - LEAQ -1(R11), R10 - QUAD $0xfffffd58056ffec5 // vmovdqu $-680(%rip), %ymm0 /* LCPI10_0(%rip) */ + LEAQ -1(R13), R10 + QUAD $0xfffffd18056ffec5 // vmovdqu $-744(%rip), %ymm0 /* LCPI10_0(%rip) */ LBB10_37: - LEAQ 0(R10)(SI*1), DX - XORL CX, CX + LEAQ 0(R10)(CX*1), SI + XORL DX, DX LBB10_38: - CMPB 0(DX)(CX*1), $92 - LEAQ -1(CX), CX + CMPB 0(SI)(DX*1), $92 + LEAQ -1(DX), DX JE LBB10_38 - NOTL CX - TESTB $1, CX - JE LBB10_70 - LEAQ 1(SI), R13 - MOVQ R9, BX - SUBQ R13, BX - LEAQ 1(R11)(SI*1), R15 - MOVQ R15, SI - MOVQ BX, CX - MOVQ R15, DI - ANDQ $31, SI + NOTL DX + TESTB $1, DX + JE LBB10_71 + LEAQ 1(CX), R15 + MOVQ R9, R11 + SUBQ R15, R11 + LEAQ 1(R13)(CX*1), SI + MOVQ SI, CX + MOVQ R11, DI + MOVQ SI, BX + ANDQ $31, CX JE LBB10_44 - MOVQ R15, CX - ANDQ $-32, CX - LONG $0x0974fdc5 // vpcmpeqb (%rcx), %ymm0, %ymm1 - LONG $0xc9d7fdc5 // vpmovmskb %ymm1, %ecx - LONG $0xf74ae2c4; BYTE $0xc9 // sarxl %esi, %ecx, %ecx - TESTL CX, CX + MOVQ SI, DI + ANDQ $-32, DI + LONG $0x0f74fdc5 // vpcmpeqb (%rdi), %ymm0, %ymm1 + LONG $0xf9d7fdc5 // vpmovmskb %ymm1, %edi + MOVLQSX DI, DI + SARQ CX, DI + TESTQ DI, DI JE LBB10_43 - MOVLQSX CX, CX - LONG $0xbc0f48f3; BYTE $0xf1 // tzcntq %rcx, %rsi - CMPQ SI, BX - MOVQ $-1, CX - LONG $0xf14d0f48 // cmovgeq %rcx, %rsi - JMP LBB10_60 + MOVQ $4294967296, CX + ORQ CX, DI + BSFQ DI, CX + CMPQ CX, R11 + MOVQ $-1, DX + LONG $0xca4d0f48 // cmovgeq %rdx, %rcx + JMP LBB10_61 LBB10_43: MOVL $32, DX - SUBQ SI, DX - LEAQ 0(R15)(DX*1), DI - MOVQ BX, CX - SUBQ DX, CX + SUBQ CX, DX + LEAQ 0(SI)(DX*1), BX + MOVQ R11, DI + SUBQ DX, DI LBB10_44: - MOVQ CX, SI - CMPQ CX, $128 + MOVQ DI, CX + CMPQ DI, $128 JL LBB10_47 LBB10_45: - LONG $0x2774fdc5 // vpcmpeqb (%rdi), %ymm0, %ymm4 - LONG $0x5f74fdc5; BYTE $0x20 // vpcmpeqb $32(%rdi), %ymm0, %ymm3 - LONG $0x5774fdc5; BYTE $0x40 // vpcmpeqb $64(%rdi), %ymm0, %ymm2 - LONG $0x4f74fdc5; BYTE $0x60 // vpcmpeqb $96(%rdi), %ymm0, %ymm1 + LONG $0x2374fdc5 // vpcmpeqb (%rbx), %ymm0, %ymm4 + LONG $0x5b74fdc5; BYTE $0x20 // vpcmpeqb $32(%rbx), %ymm0, %ymm3 + LONG $0x5374fdc5; BYTE $0x40 // vpcmpeqb $64(%rbx), %ymm0, %ymm2 + LONG $0x4b74fdc5; BYTE $0x60 // vpcmpeqb $96(%rbx), %ymm0, %ymm1 LONG $0xecebe5c5 // vpor %ymm4, %ymm3, %ymm5 LONG $0xf1ebedc5 // vpor %ymm1, %ymm2, %ymm6 LONG $0xeeebd5c5 // vpor %ymm6, %ymm5, %ymm5 LONG $0x177de2c4; BYTE $0xed // vptest %ymm5, %ymm5 JNE LBB10_53 - SUBQ $-128, DI - LEAQ -128(SI), CX - CMPQ SI, $255 - MOVQ CX, SI + SUBQ $-128, BX + LEAQ -128(CX), DI + CMPQ CX, $255 + MOVQ DI, CX JG LBB10_45 LBB10_47: - TESTQ CX, CX + TESTQ DI, DI JS LBB10_5 - LONG $0x0f74fdc5 // vpcmpeqb (%rdi), %ymm0, %ymm1 + LONG $0x0b74fdc5 // vpcmpeqb (%rbx), %ymm0, %ymm1 LONG $0xc1d77dc5 // vpmovmskb %ymm1, %r8d TESTL R8, R8 JE LBB10_50 LBB10_49: - MOVQ DI, SI - MOVLQSX R8, DX - LONG $0xbc0f48f3; BYTE $0xfa // tzcntq %rdx, %rdi - CMPQ DI, CX - JL LBB10_58 - JMP LBB10_5 + MOVQ BX, CX + JMP LBB10_58 LBB10_50: - CMPQ CX, $31 - JLE LBB10_5 - LEAQ 32(DI), SI - LONG $0x0e74fdc5 // vpcmpeqb (%rsi), %ymm0, %ymm1 - LONG $0xc1d77dc5 // vpmovmskb %ymm1, %r8d - TESTL R8, R8 - JE LBB10_55 - ADDQ $-32, CX - MOVLQSX R8, DX - LONG $0xbc0f48f3; BYTE $0xfa // tzcntq %rdx, %rdi - CMPQ DI, CX - JL LBB10_58 - JMP LBB10_5 + CMPQ DI, $31 + JLE LBB10_5 + LEAQ 32(BX), CX + LONG $0x0974fdc5 // vpcmpeqb (%rcx), %ymm0, %ymm1 + LONG $0xc1d77dc5 // vpmovmskb %ymm1, %r8d + TESTL R8, R8 + JE LBB10_55 + ADDQ $-32, DI + JMP LBB10_58 LBB10_53: - LONG $0xccd7fdc5 // vpmovmskb %ymm4, %ecx - TESTL CX, CX - JE LBB10_62 - SUBQ R15, DI - MOVLQSX CX, CX - LONG $0xbc0f48f3; BYTE $0xf1 // tzcntq %rcx, %rsi - JMP LBB10_59 + LONG $0xccd7fdc5 // vpmovmskb %ymm4, %ecx + TESTL CX, CX + JE LBB10_63 + MOVL CX, CX + SUBQ SI, BX + MOVQ $4294967296, DX + ORQ DX, CX + BSFQ CX, CX + JMP LBB10_60 LBB10_55: - CMPQ CX, $64 - JL LBB10_5 - LEAQ 64(DI), SI - LONG $0x0e74fdc5 // vpcmpeqb (%rsi), %ymm0, %ymm1 - LONG $0xc1d77dc5 // vpmovmskb %ymm1, %r8d - TESTL R8, R8 - JE LBB10_64 - ADDQ $-64, CX - MOVLQSX R8, DX - LONG $0xbc0f48f3; BYTE $0xfa // tzcntq %rdx, %rdi - CMPQ DI, CX - JGE LBB10_5 + CMPQ DI, $64 + JL LBB10_5 + LEAQ 64(BX), CX + LONG $0x0974fdc5 // vpcmpeqb (%rcx), %ymm0, %ymm1 + LONG $0xc1d77dc5 // vpmovmskb %ymm1, %r8d + TESTL R8, R8 + JE LBB10_65 + ADDQ $-64, DI LBB10_58: - SUBQ R15, SI - -LBB10_59: - ADDQ DI, SI + MOVL R8, DX + MOVQ $4294967296, BX + ORQ BX, DX + BSFQ DX, BX + CMPQ BX, DI + JGE LBB10_5 + SUBQ SI, CX LBB10_60: - CMPQ SI, BX + ADDQ BX, CX + +LBB10_61: + CMPQ CX, R11 JAE LBB10_5 - ADDQ R13, SI + ADDQ R15, CX JNS LBB10_37 JMP LBB10_5 -LBB10_62: - LONG $0xcbd7fdc5 // vpmovmskb %ymm3, %ecx - TESTL CX, CX - JE LBB10_67 - MOVLQSX CX, CX - LONG $0xbc0f48f3; BYTE $0xc9 // tzcntq %rcx, %rcx - SUBQ R15, DI - LEAQ 32(DI)(CX*1), SI - JMP LBB10_60 +LBB10_63: + LONG $0xcbd7fdc5 // vpmovmskb %ymm3, %ecx + TESTL CX, CX + JE LBB10_68 + MOVL CX, CX + MOVQ $4294967296, DX + ORQ DX, CX + BSFQ CX, CX + SUBQ SI, BX + LEAQ 32(BX)(CX*1), CX + JMP LBB10_61 -LBB10_64: - CMPQ CX, $96 +LBB10_65: + CMPQ DI, $96 JL LBB10_5 - ADDQ $96, DI - LONG $0x0f74fdc5 // vpcmpeqb (%rdi), %ymm0, %ymm1 + ADDQ $96, BX + LONG $0x0b74fdc5 // vpcmpeqb (%rbx), %ymm0, %ymm1 LONG $0xc1d77dc5 // vpmovmskb %ymm1, %r8d TESTL R8, R8 JE LBB10_5 - ADDQ $-96, CX + ADDQ $-96, DI JMP LBB10_49 -LBB10_67: - LONG $0xcad7fdc5 // vpmovmskb %ymm2, %ecx - SUBQ R15, DI - TESTL CX, CX - JE LBB10_69 - MOVLQSX CX, CX - LONG $0xbc0f48f3; BYTE $0xc9 // tzcntq %rcx, %rcx - LEAQ 64(DI)(CX*1), SI - JMP LBB10_60 - -LBB10_69: - LONG $0xc9d7fdc5 // vpmovmskb %ymm1, %ecx - MOVLQSX CX, CX - LONG $0xbc0f48f3; BYTE $0xc9 // tzcntq %rcx, %rcx - LEAQ 96(DI)(CX*1), SI - JMP LBB10_60 +LBB10_68: + LONG $0xcad7fdc5 // vpmovmskb %ymm2, %ecx + SUBQ SI, BX + TESTL CX, CX + JE LBB10_70 + MOVL CX, CX + MOVQ $4294967296, DX + ORQ DX, CX + BSFQ CX, CX + LEAQ 64(BX)(CX*1), CX + JMP LBB10_61 LBB10_70: - MOVQ AX, CX - TESTQ SI, SI + LONG $0xc9d7fdc5 // vpmovmskb %ymm1, %ecx + MOVQ $4294967296, DX + ORQ DX, CX + BSFQ CX, CX + LEAQ 96(BX)(CX*1), CX + JMP LBB10_61 + +LBB10_71: + MOVQ AX, DX + TESTQ CX, CX JNS LBB10_3 JMP LBB10_5 -LBB10_71: - CMPQ DX, $96 +LBB10_72: + CMPQ BX, $96 JL LBB10_5 - ADDQ $96, BX - LONG $0x036ffdc5 // vmovdqa (%rbx), %ymm0 - QUAD $0xfffffb390574fdc5 // vpcmpeqb $-1223(%rip), %ymm0, %ymm0 /* LCPI10_0(%rip) */ - LONG $0xd0d77dc5 // vpmovmskb %ymm0, %r10d - TESTL R10, R10 + ADDQ $96, DI + LONG $0x076ffdc5 // vmovdqa (%rdi), %ymm0 + QUAD $0xfffffad50574fdc5 // vpcmpeqb $-1323(%rip), %ymm0, %ymm0 /* LCPI10_0(%rip) */ + LONG $0xf0d7fdc5 // vpmovmskb %ymm0, %esi + TESTL SI, SI JE LBB10_5 - ADDQ $-96, DX + ADDQ $-96, BX JMP LBB10_18 LCPI11_0: @@ -3590,9 +3661,10 @@ LBB11_11: JNO LBB11_11 LBB11_15: - LONG $0x2af3e1c4; BYTE $0xcf // vcvtsi2sd %rdi, %xmm1, %xmm1 - LONG $0xc32aebc5 // vcvtsi2sd %ebx, %xmm2, %xmm0 - QUAD $0xfffeec05b9f1e2c4; BYTE $0xff // vfmadd231sd $-276(%rip), %xmm1, %xmm0 /* LCPI11_0(%rip) */ + LONG $0x2af3e1c4; BYTE $0xc7 // vcvtsi2sd %rdi, %xmm1, %xmm0 + QUAD $0xfffffef10559fbc5 // vmulsd $-271(%rip), %xmm0, %xmm0 /* LCPI11_0(%rip) */ + LONG $0xcb2af3c5 // vcvtsi2sd %ebx, %xmm1, %xmm1 + LONG $0xc158fbc5 // vaddsd %xmm1, %xmm0, %xmm0 MOVQ $8, 0(DX) LEAQ 1(R13), AX CMPQ AX, R15 @@ -3604,16 +3676,16 @@ LBB11_15: CMPB BX, $9 JA LBB11_26 ADDQ $2, R13 - QUAD $0xfffffeba0d10fbc5 // vmovsd $-326(%rip), %xmm1 /* LCPI11_0(%rip) */ + QUAD $0xfffffeb70d10fbc5 // vmovsd $-329(%rip), %xmm1 /* LCPI11_0(%rip) */ LBB11_18: - LONG $0xd028f9c5 // vmovapd %xmm0, %xmm2 MOVQ R13, CX MOVBLZX DI, AX + LONG $0xc159fbc5 // vmulsd %xmm1, %xmm0, %xmm0 ADDL $-48, AX IMULL R10, AX - LONG $0xc02ae3c5 // vcvtsi2sd %eax, %xmm3, %xmm0 - LONG $0xb9e9e2c4; BYTE $0xc1 // vfmadd231sd %xmm1, %xmm2, %xmm0 + LONG $0xd02ae3c5 // vcvtsi2sd %eax, %xmm3, %xmm2 + LONG $0xc258fbc5 // vaddsd %xmm2, %xmm0, %xmm0 CMPQ R15, R13 JE LBB11_24 MOVBLZX 0(R8)(CX*1), DI @@ -3720,16 +3792,16 @@ LBB11_40: JL LBB11_44 CMPL R11, $308 JLE LBB11_43 - QUAD $0xfffffd840d10fbc5 // vmovsd $-636(%rip), %xmm1 /* LCPI11_1(%rip) */ + QUAD $0xfffffd820d10fbc5 // vmovsd $-638(%rip), %xmm1 /* LCPI11_1(%rip) */ JMP LBB11_44 LBB11_43: ADDL $323, R11 - LONG $0x280d8d48; WORD $0x0018; BYTE $0x00 // leaq $6184(%rip), %rcx /* _P10_TAB(%rip) */ + LONG $0x250d8d48; WORD $0x0018; BYTE $0x00 // leaq $6181(%rip), %rcx /* _P10_TAB(%rip) */ LONG $0x596ba1c4; WORD $0xd90c // vmulsd (%rcx,%r11,8), %xmm2, %xmm1 LBB11_44: - LONG $0xc058f3c5 // vaddsd %xmm0, %xmm1, %xmm0 + LONG $0xc158fbc5 // vaddsd %xmm1, %xmm0, %xmm0 CMPQ AX, R15 JAE LBB11_48 @@ -3838,12 +3910,12 @@ LBB11_67: LBB11_70: CMPL R11, $308 JLE LBB11_72 - QUAD $0xfffffc5d0510fbc5 // vmovsd $-931(%rip), %xmm0 /* LCPI11_1(%rip) */ + QUAD $0xfffffc5b0510fbc5 // vmovsd $-933(%rip), %xmm0 /* LCPI11_1(%rip) */ JMP LBB11_73 LBB11_72: ADDL $323, R11 - LONG $0x01058d48; WORD $0x0017; BYTE $0x00 // leaq $5889(%rip), %rax /* _P10_TAB(%rip) */ + LONG $0xfe058d48; WORD $0x0016; BYTE $0x00 // leaq $5886(%rip), %rax /* _P10_TAB(%rip) */ LONG $0x597ba1c4; WORD $0xd804 // vmulsd (%rax,%r11,8), %xmm0, %xmm0 LBB11_73: @@ -4144,7 +4216,7 @@ LBB15_7: MOVQ 0(R15), DI MOVQ 8(R15), SI MOVQ 0(BX), DX - LONG $0xffd242e8; BYTE $0xff // callq _lspace + LONG $0xffd167e8; BYTE $0xff // callq _lspace MOVQ AX, CX MOVQ AX, 0(BX) CMPQ AX, 8(R15) @@ -4249,7 +4321,7 @@ LBB15_27: LBB15_28: LEAQ -80(BP), R8 MOVQ R14, R9 - LONG $0x000990e8; BYTE $0x00 // callq _advance_number + LONG $0x00098ee8; BYTE $0x00 // callq _advance_number TESTQ AX, AX LONG $0xf0480f4c // cmovsq %rax, %r14 MOVQ R14, AX @@ -4625,29 +4697,29 @@ _skip_string: MOVQ R15, SI MOVL $34, DX MOVL $92, CX - LONG $0xffd050e8; BYTE $0xff // callq _strchr2 + LONG $0xffcfc6e8; BYTE $0xff // callq _strchr2 TESTQ AX, AX JS LBB18_3 MOVQ AX, CX - MOVQ 0(BX), R10 - CMPB 0(R10)(AX*1), $34 + MOVQ 0(BX), R11 + CMPB 0(R11)(AX*1), $34 JNE LBB18_6 LBB18_2: DECQ R15 INCQ CX - MOVQ CX, R11 + MOVQ CX, R12 MOVQ R15, AX JMP LBB18_5 LBB18_3: - MOVQ 8(BX), R11 + MOVQ 8(BX), R12 LBB18_4: MOVQ $-1, AX LBB18_5: - MOVQ R11, 0(R14) + MOVQ R12, 0(R14) ADDQ $8, SP BYTE $0x5b // popq %rbx WORD $0x5c41 // popq %r12 @@ -4659,42 +4731,44 @@ LBB18_5: RET LBB18_6: + MOVQ $4294967296, R9 LEAQ 1(CX), R8 - MOVQ 8(BX), R11 - MOVQ R11, DX + MOVQ 8(BX), R12 + MOVQ R12, DX SUBQ R8, DX - LEAQ 1(R10)(CX*1), BX - MOVQ BX, CX + LEAQ 1(R11)(CX*1), DI + MOVQ DI, CX MOVQ DX, AX - MOVQ BX, SI + MOVQ DI, SI ANDQ $31, CX JE LBB18_10 - MOVQ BX, AX + MOVQ DI, AX ANDQ $-32, AX - LONG $0x006ffdc5 // vmovdqa (%rax), %ymm0 - QUAD $0xffffff420574fdc5 // vpcmpeqb $-190(%rip), %ymm0, %ymm0 /* LCPI18_0(%rip) */ - LONG $0xc0d7fdc5 // vpmovmskb %ymm0, %eax - LONG $0xf772e2c4; BYTE $0xc0 // sarxl %ecx, %eax, %eax - TESTL AX, AX + LONG $0x006ffdc5 // vmovdqa (%rax), %ymm0 + QUAD $0xffffff380574fdc5 // vpcmpeqb $-200(%rip), %ymm0, %ymm0 /* LCPI18_0(%rip) */ + LONG $0xc0d7fdc5 // vpmovmskb %ymm0, %eax + WORD $0x9848 // cltq + SARQ CX, AX + TESTQ AX, AX JE LBB18_9 - WORD $0x9848 // cltq - LONG $0xbc0f48f3; BYTE $0xc0 // tzcntq %rax, %rax + ORQ R9, AX + BSFQ AX, AX CMPQ AX, DX MOVQ $-1, CX - LONG $0xc84c0f48 // cmovlq %rax, %rcx + LONG $0xc84c0f48 // cmovlq %rax, %rcx JMP LBB18_33 LBB18_9: - MOVL $32, DI - SUBQ CX, DI - LEAQ 0(BX)(DI*1), SI + MOVL $32, BX + SUBQ CX, BX + LEAQ 0(DI)(BX*1), SI MOVQ DX, AX - SUBQ DI, AX + SUBQ BX, AX LBB18_10: CMPQ AX, $128 JL LBB18_14 - QUAD $0xfffffef90d6ffec5 // vmovdqu $-263(%rip), %ymm1 /* LCPI18_0(%rip) */ + QUAD $0xfffffeee0d6ffec5 // vmovdqu $-274(%rip), %ymm1 /* LCPI18_0(%rip) */ LBB18_12: LONG $0x266ffdc5 // vmovdqa (%rsi), %ymm4 @@ -4711,99 +4785,103 @@ LBB18_12: LONG $0x177de2c4; BYTE $0xed // vptest %ymm5, %ymm5 JNE LBB18_20 SUBQ $-128, SI - LEAQ -128(AX), DI + LEAQ -128(AX), BX CMPQ AX, $255 - MOVQ DI, AX + MOVQ BX, AX JG LBB18_12 JMP LBB18_15 LBB18_14: - MOVQ AX, DI + MOVQ AX, BX LBB18_15: MOVQ $-1, AX - TESTQ DI, DI + TESTQ BX, BX JS LBB18_5 LONG $0x066ffdc5 // vmovdqa (%rsi), %ymm0 - QUAD $0xfffffe8f0574fdc5 // vpcmpeqb $-369(%rip), %ymm0, %ymm0 /* LCPI18_0(%rip) */ - LONG $0xc8d77dc5 // vpmovmskb %ymm0, %r9d - TESTL R9, R9 + QUAD $0xfffffe840574fdc5 // vpcmpeqb $-380(%rip), %ymm0, %ymm0 /* LCPI18_0(%rip) */ + LONG $0xd0d77dc5 // vpmovmskb %ymm0, %r10d + TESTL R10, R10 JE LBB18_22 LBB18_17: MOVQ SI, CX LBB18_18: - MOVLQSX R9, SI - LONG $0xbc0f48f3; BYTE $0xf6 // tzcntq %rsi, %rsi - CMPQ SI, DI - JGE LBB18_5 - SUBQ BX, CX - ADDQ SI, CX - JMP LBB18_33 + MOVL R10, SI + ORQ R9, SI + BSFQ SI, SI + CMPQ SI, BX + JGE LBB18_5 + SUBQ DI, CX + ADDQ SI, CX + JMP LBB18_33 LBB18_20: - QUAD $0xfffffe5f0d74ddc5 // vpcmpeqb $-417(%rip), %ymm4, %ymm1 /* LCPI18_0(%rip) */ - LONG $0xc1d7fdc5 // vpmovmskb %ymm1, %eax + QUAD $0xfffffe520d74ddc5 // vpcmpeqb $-430(%rip), %ymm4, %ymm1 /* LCPI18_0(%rip) */ + LONG $0xc1d7fdc5 // vpmovmskb %ymm1, %eax TESTL AX, AX JE LBB18_25 - SUBQ BX, SI - WORD $0x9848 // cltq - LONG $0xbc0f48f3; BYTE $0xc8 // tzcntq %rax, %rcx + MOVL AX, AX + SUBQ DI, SI + ORQ R9, AX + BSFQ AX, CX ADDQ SI, CX JMP LBB18_33 LBB18_22: - CMPQ DI, $31 + CMPQ BX, $31 JLE LBB18_5 LEAQ 32(SI), CX LONG $0x016ffdc5 // vmovdqa (%rcx), %ymm0 - QUAD $0xfffffe2b0574fdc5 // vpcmpeqb $-469(%rip), %ymm0, %ymm0 /* LCPI18_0(%rip) */ - LONG $0xc8d77dc5 // vpmovmskb %ymm0, %r9d - TESTL R9, R9 + QUAD $0xfffffe1c0574fdc5 // vpcmpeqb $-484(%rip), %ymm0, %ymm0 /* LCPI18_0(%rip) */ + LONG $0xd0d77dc5 // vpmovmskb %ymm0, %r10d + TESTL R10, R10 JE LBB18_29 - ADDQ $-32, DI + ADDQ $-32, BX JMP LBB18_18 LBB18_25: - QUAD $0xfffffe140d74e5c5 // vpcmpeqb $-492(%rip), %ymm3, %ymm1 /* LCPI18_0(%rip) */ - LONG $0xc1d7fdc5 // vpmovmskb %ymm1, %eax + QUAD $0xfffffe050d74e5c5 // vpcmpeqb $-507(%rip), %ymm3, %ymm1 /* LCPI18_0(%rip) */ + LONG $0xc1d7fdc5 // vpmovmskb %ymm1, %eax TESTL AX, AX JE LBB18_27 - WORD $0x9848 // cltq - LONG $0xbc0f48f3; BYTE $0xc0 // tzcntq %rax, %rax - SUBQ BX, SI + MOVL AX, AX + ORQ R9, AX + BSFQ AX, AX + SUBQ DI, SI LEAQ 32(SI)(AX*1), CX JMP LBB18_33 LBB18_27: - QUAD $0xfffffdf30d74edc5 // vpcmpeqb $-525(%rip), %ymm2, %ymm1 /* LCPI18_0(%rip) */ - LONG $0xc1d7fdc5 // vpmovmskb %ymm1, %eax - SUBQ BX, SI + QUAD $0xfffffde20d74edc5 // vpcmpeqb $-542(%rip), %ymm2, %ymm1 /* LCPI18_0(%rip) */ + LONG $0xc1d7fdc5 // vpmovmskb %ymm1, %eax + SUBQ DI, SI TESTL AX, AX JE LBB18_32 - WORD $0x9848 // cltq - LONG $0xbc0f48f3; BYTE $0xc0 // tzcntq %rax, %rax + MOVL AX, AX + ORQ R9, AX + BSFQ AX, AX LEAQ 64(SI)(AX*1), CX JMP LBB18_33 LBB18_29: - CMPQ DI, $64 + CMPQ BX, $64 JL LBB18_5 LEAQ 64(SI), CX LONG $0x016ffdc5 // vmovdqa (%rcx), %ymm0 - QUAD $0xfffffdc00574fdc5 // vpcmpeqb $-576(%rip), %ymm0, %ymm0 /* LCPI18_0(%rip) */ - LONG $0xc8d77dc5 // vpmovmskb %ymm0, %r9d - TESTL R9, R9 - JE LBB18_71 - ADDQ $-64, DI + QUAD $0xfffffdad0574fdc5 // vpcmpeqb $-595(%rip), %ymm0, %ymm0 /* LCPI18_0(%rip) */ + LONG $0xd0d77dc5 // vpmovmskb %ymm0, %r10d + TESTL R10, R10 + JE LBB18_72 + ADDQ $-64, BX JMP LBB18_18 LBB18_32: - QUAD $0xfffffda20574fdc5 // vpcmpeqb $-606(%rip), %ymm0, %ymm0 /* LCPI18_0(%rip) */ - LONG $0xc0d7fdc5 // vpmovmskb %ymm0, %eax - WORD $0x9848 // cltq - LONG $0xbc0f48f3; BYTE $0xc0 // tzcntq %rax, %rax + QUAD $0xfffffd8f0574fdc5 // vpcmpeqb $-625(%rip), %ymm0, %ymm0 /* LCPI18_0(%rip) */ + LONG $0xc0d7fdc5 // vpmovmskb %ymm0, %eax + ORQ R9, AX + BSFQ AX, AX LEAQ 96(SI)(AX*1), CX LBB18_33: @@ -4812,53 +4890,54 @@ LBB18_33: JAE LBB18_5 ADDQ R8, CX JS LBB18_5 - LEAQ -1(R10), R9 - QUAD $0xfffffd6d056ffec5 // vmovdqu $-659(%rip), %ymm0 /* LCPI18_0(%rip) */ - MOVQ $-1, R8 + LEAQ -1(R11), R10 + QUAD $0xfffffd5a056ffec5 // vmovdqu $-678(%rip), %ymm0 /* LCPI18_0(%rip) */ LBB18_36: - LEAQ 0(R9)(CX*1), DX + LEAQ 0(R10)(CX*1), DX XORL AX, AX LBB18_37: - CMPB 0(DX)(AX*1), $92 - LEAQ -1(AX), AX - JE LBB18_37 - NOTL AX - TESTB $1, AX - JE LBB18_69 - LEAQ 1(CX), R12 - MOVQ R11, DX - SUBQ R12, DX - LEAQ 1(R10)(CX*1), R13 - MOVQ R13, AX - MOVQ DX, BX - MOVQ R13, SI - ANDQ $31, AX - JE LBB18_43 - MOVQ R13, CX - ANDQ $-32, CX - LONG $0x0974fdc5 // vpcmpeqb (%rcx), %ymm0, %ymm1 - LONG $0xc9d7fdc5 // vpmovmskb %ymm1, %ecx - LONG $0xf77ae2c4; BYTE $0xc9 // sarxl %eax, %ecx, %ecx - TESTL CX, CX - JE LBB18_42 - MOVLQSX CX, AX - LONG $0xbc0f48f3; BYTE $0xc8 // tzcntq %rax, %rcx - CMPQ CX, DX - LONG $0xc84d0f49 // cmovgeq %r8, %rcx - JMP LBB18_59 + CMPB 0(DX)(AX*1), $92 + LEAQ -1(AX), AX + JE LBB18_37 + NOTL AX + TESTB $1, AX + JE LBB18_70 + LEAQ 1(CX), R13 + MOVQ R12, DX + SUBQ R13, DX + LEAQ 1(R11)(CX*1), BX + MOVQ BX, CX + MOVQ DX, DI + MOVQ BX, SI + ANDQ $31, CX + JE LBB18_43 + MOVQ BX, AX + ANDQ $-32, AX + LONG $0x0874fdc5 // vpcmpeqb (%rax), %ymm0, %ymm1 + LONG $0xc1d7fdc5 // vpmovmskb %ymm1, %eax + WORD $0x9848 // cltq + SARQ CX, AX + TESTQ AX, AX + JE LBB18_42 + ORQ R9, AX + BSFQ AX, CX + CMPQ CX, DX + MOVQ $-1, AX + LONG $0xc84d0f48 // cmovgeq %rax, %rcx + JMP LBB18_60 LBB18_42: - MOVL $32, CX - SUBQ AX, CX - LEAQ 0(R13)(CX*1), SI - MOVQ DX, BX - SUBQ CX, BX + MOVL $32, AX + SUBQ CX, AX + LEAQ 0(BX)(AX*1), SI + MOVQ DX, DI + SUBQ AX, DI LBB18_43: - MOVQ BX, AX - CMPQ BX, $128 + MOVQ DI, AX + CMPQ DI, $128 JL LBB18_46 LBB18_44: @@ -4872,133 +4951,129 @@ LBB18_44: LONG $0x177de2c4; BYTE $0xed // vptest %ymm5, %ymm5 JNE LBB18_52 SUBQ $-128, SI - LEAQ -128(AX), BX + LEAQ -128(AX), DI CMPQ AX, $255 - MOVQ BX, AX + MOVQ DI, AX JG LBB18_44 LBB18_46: MOVQ $-1, AX - TESTQ BX, BX + TESTQ DI, DI JS LBB18_5 LONG $0x0e74fdc5 // vpcmpeqb (%rsi), %ymm0, %ymm1 - LONG $0xf9d7fdc5 // vpmovmskb %ymm1, %edi - TESTL DI, DI + LONG $0xc1d77dc5 // vpmovmskb %ymm1, %r8d + TESTL R8, R8 JE LBB18_49 LBB18_48: - MOVQ SI, CX - MOVLQSX DI, SI - LONG $0xbc0f48f3; BYTE $0xf6 // tzcntq %rsi, %rsi - CMPQ SI, BX - JL LBB18_57 - JMP LBB18_5 + MOVQ SI, CX + JMP LBB18_57 LBB18_49: - CMPQ BX, $31 - JLE LBB18_5 - LEAQ 32(SI), CX - LONG $0x0974fdc5 // vpcmpeqb (%rcx), %ymm0, %ymm1 - LONG $0xf9d7fdc5 // vpmovmskb %ymm1, %edi - TESTL DI, DI - JE LBB18_54 - ADDQ $-32, BX - MOVLQSX DI, SI - LONG $0xbc0f48f3; BYTE $0xf6 // tzcntq %rsi, %rsi - CMPQ SI, BX - JL LBB18_57 - JMP LBB18_5 + CMPQ DI, $31 + JLE LBB18_5 + LEAQ 32(SI), CX + LONG $0x0974fdc5 // vpcmpeqb (%rcx), %ymm0, %ymm1 + LONG $0xc1d77dc5 // vpmovmskb %ymm1, %r8d + TESTL R8, R8 + JE LBB18_54 + ADDQ $-32, DI + JMP LBB18_57 LBB18_52: - LONG $0xc4d7fdc5 // vpmovmskb %ymm4, %eax + LONG $0xc4d7fdc5 // vpmovmskb %ymm4, %eax TESTL AX, AX - JE LBB18_61 - SUBQ R13, SI - WORD $0x9848 // cltq - LONG $0xbc0f48f3; BYTE $0xc8 // tzcntq %rax, %rcx - JMP LBB18_58 + JE LBB18_62 + MOVL AX, AX + SUBQ BX, SI + ORQ R9, AX + BSFQ AX, CX + JMP LBB18_59 LBB18_54: - CMPQ BX, $64 - JL LBB18_5 - LEAQ 64(SI), CX - LONG $0x0974fdc5 // vpcmpeqb (%rcx), %ymm0, %ymm1 - LONG $0xf9d7fdc5 // vpmovmskb %ymm1, %edi - TESTL DI, DI - JE LBB18_63 - ADDQ $-64, BX - MOVLQSX DI, SI - LONG $0xbc0f48f3; BYTE $0xf6 // tzcntq %rsi, %rsi - CMPQ SI, BX - JGE LBB18_5 + CMPQ DI, $64 + JL LBB18_5 + LEAQ 64(SI), CX + LONG $0x0974fdc5 // vpcmpeqb (%rcx), %ymm0, %ymm1 + LONG $0xc1d77dc5 // vpmovmskb %ymm1, %r8d + TESTL R8, R8 + JE LBB18_64 + ADDQ $-64, DI LBB18_57: - SUBQ R13, CX - -LBB18_58: - ADDQ SI, CX + MOVL R8, SI + ORQ R9, SI + BSFQ SI, SI + CMPQ SI, DI + JGE LBB18_5 + SUBQ BX, CX LBB18_59: + ADDQ SI, CX + +LBB18_60: MOVQ $-1, AX CMPQ CX, DX JAE LBB18_5 - ADDQ R12, CX + ADDQ R13, CX JNS LBB18_36 JMP LBB18_5 -LBB18_61: - LONG $0xc3d7fdc5 // vpmovmskb %ymm3, %eax +LBB18_62: + LONG $0xc3d7fdc5 // vpmovmskb %ymm3, %eax TESTL AX, AX - JE LBB18_66 - WORD $0x9848 // cltq - LONG $0xbc0f48f3; BYTE $0xc0 // tzcntq %rax, %rax - SUBQ R13, SI + JE LBB18_67 + MOVL AX, AX + ORQ R9, AX + BSFQ AX, AX + SUBQ BX, SI LEAQ 32(SI)(AX*1), CX - JMP LBB18_59 + JMP LBB18_60 -LBB18_63: - CMPQ BX, $96 +LBB18_64: + CMPQ DI, $96 JL LBB18_5 ADDQ $96, SI LONG $0x0e74fdc5 // vpcmpeqb (%rsi), %ymm0, %ymm1 - LONG $0xf9d7fdc5 // vpmovmskb %ymm1, %edi - TESTL DI, DI + LONG $0xc1d77dc5 // vpmovmskb %ymm1, %r8d + TESTL R8, R8 JE LBB18_5 - ADDQ $-96, BX + ADDQ $-96, DI JMP LBB18_48 -LBB18_66: - LONG $0xc2d7fdc5 // vpmovmskb %ymm2, %eax - SUBQ R13, SI +LBB18_67: + LONG $0xc2d7fdc5 // vpmovmskb %ymm2, %eax + SUBQ BX, SI TESTL AX, AX - JE LBB18_68 - WORD $0x9848 // cltq - LONG $0xbc0f48f3; BYTE $0xc0 // tzcntq %rax, %rax + JE LBB18_69 + MOVL AX, AX + ORQ R9, AX + BSFQ AX, AX LEAQ 64(SI)(AX*1), CX - JMP LBB18_59 - -LBB18_68: - LONG $0xc1d7fdc5 // vpmovmskb %ymm1, %eax - WORD $0x9848 // cltq - LONG $0xbc0f48f3; BYTE $0xc0 // tzcntq %rax, %rax - LEAQ 96(SI)(AX*1), CX - JMP LBB18_59 + JMP LBB18_60 LBB18_69: + LONG $0xc1d7fdc5 // vpmovmskb %ymm1, %eax + ORQ R9, AX + BSFQ AX, AX + LEAQ 96(SI)(AX*1), CX + JMP LBB18_60 + +LBB18_70: TESTQ CX, CX JNS LBB18_2 JMP LBB18_4 -LBB18_71: - CMPQ DI, $96 +LBB18_72: + CMPQ BX, $96 JL LBB18_5 ADDQ $96, SI LONG $0x066ffdc5 // vmovdqa (%rsi), %ymm0 - QUAD $0xfffffb4c0574fdc5 // vpcmpeqb $-1204(%rip), %ymm0, %ymm0 /* LCPI18_0(%rip) */ - LONG $0xc8d77dc5 // vpmovmskb %ymm0, %r9d - TESTL R9, R9 + QUAD $0xfffffb4e0574fdc5 // vpcmpeqb $-1202(%rip), %ymm0, %ymm0 /* LCPI18_0(%rip) */ + LONG $0xd0d77dc5 // vpmovmskb %ymm0, %r10d + TESTL R10, R10 JE LBB18_5 - ADDQ $-96, DI + ADDQ $-96, BX JMP LBB18_17 _skip_negative: @@ -5109,12 +5184,12 @@ LBB20_19: QUAD $0xfffffef80d10fbc5 // vmovsd $-264(%rip), %xmm1 /* LCPI20_0(%rip) */ LBB20_20: - LONG $0xd028f9c5 // vmovapd %xmm0, %xmm2 + LONG $0xc159fbc5 // vmulsd %xmm1, %xmm0, %xmm0 LEAQ 1(R10), CX MOVBLSX BX, BX ADDL $-48, BX - LONG $0xc32ae3c5 // vcvtsi2sd %ebx, %xmm3, %xmm0 - LONG $0xb9e9e2c4; BYTE $0xc1 // vfmadd231sd %xmm1, %xmm2, %xmm0 + LONG $0xd32ae3c5 // vcvtsi2sd %ebx, %xmm3, %xmm2 + LONG $0xc258fbc5 // vaddsd %xmm2, %xmm0, %xmm0 CMPQ CX, SI JAE LBB20_23 MOVBLZX 1(DI)(R10*1), BX @@ -5252,7 +5327,7 @@ LBB20_45: JL LBB20_53 CMPL CX, $308 JLE LBB20_52 - QUAD $0xfffffd690d10fbc5 // vmovsd $-663(%rip), %xmm1 /* LCPI20_1(%rip) */ + QUAD $0xfffffd6a0d10fbc5 // vmovsd $-662(%rip), %xmm1 /* LCPI20_1(%rip) */ JMP LBB20_53 LBB20_48: @@ -5265,7 +5340,7 @@ LBB20_49: JL LBB20_57 CMPL AX, $308 JLE LBB20_56 - QUAD $0xfffffd460d10fbc5 // vmovsd $-698(%rip), %xmm1 /* LCPI20_1(%rip) */ + QUAD $0xfffffd470d10fbc5 // vmovsd $-697(%rip), %xmm1 /* LCPI20_1(%rip) */ JMP LBB20_57 LBB20_52: @@ -5275,7 +5350,7 @@ LBB20_52: LONG $0x0c59f3c5; BYTE $0xcb // vmulsd (%rbx,%rcx,8), %xmm1, %xmm1 LBB20_53: - LONG $0xc158fbc5 // vaddsd %xmm1, %xmm0, %xmm0 + LONG $0xc058f3c5 // vaddsd %xmm0, %xmm1, %xmm0 LONG $0x117bc1c4; BYTE $0x01 // vmovsd %xmm0, (%r9) MOVL $8, BX CMPQ R8, SI @@ -5310,7 +5385,7 @@ _skip_positive: MOVQ AX, DI MOVQ BX, CX MOVQ BX, R9 - LONG $0xfffcc8e8; BYTE $0xff // callq _advance_number + LONG $0xfffcc9e8; BYTE $0xff // callq _advance_number TESTQ AX, AX LONG $0xc3490f48 // cmovnsq %rbx, %rax ADDQ $40, SP @@ -6190,21 +6265,21 @@ _P10_TAB: TEXT ·__f64toa(SB), NOSPLIT, $0 - 24 MOVQ out+0(FP), DI MOVSD val+8(FP), X0 - CALL ·___asm2asm_compiled_code__DO_NOT_CALL_THIS_SYMBOL___+2953(SB) // _f64toa + CALL ·___asm2asm_compiled_code__DO_NOT_CALL_THIS_SYMBOL___+3038(SB) // _f64toa MOVQ AX, ret+16(FP) RET TEXT ·__i64toa(SB), NOSPLIT, $0 - 24 MOVQ out+0(FP), DI MOVQ val+8(FP), SI - CALL ·___asm2asm_compiled_code__DO_NOT_CALL_THIS_SYMBOL___+6042(SB) // _i64toa + CALL ·___asm2asm_compiled_code__DO_NOT_CALL_THIS_SYMBOL___+6102(SB) // _i64toa MOVQ AX, ret+16(FP) RET TEXT ·__lquote(SB), NOSPLIT, $0 - 24 MOVQ buf+0(FP), DI MOVQ off+8(FP), SI - CALL ·___asm2asm_compiled_code__DO_NOT_CALL_THIS_SYMBOL___+384(SB) // _lquote + CALL ·___asm2asm_compiled_code__DO_NOT_CALL_THIS_SYMBOL___+376(SB) // _lquote MOVQ AX, ret+16(FP) RET @@ -6212,7 +6287,7 @@ TEXT ·__lspace(SB), NOSPLIT, $0 - 32 MOVQ sp+0(FP), DI MOVQ nb+8(FP), SI MOVQ off+16(FP), DX - CALL ·___asm2asm_compiled_code__DO_NOT_CALL_THIS_SYMBOL___+1266(SB) // _lspace + CALL ·___asm2asm_compiled_code__DO_NOT_CALL_THIS_SYMBOL___+1268(SB) // _lspace MOVQ AX, ret+24(FP) RET @@ -6227,7 +6302,7 @@ TEXT ·__skip_array(SB), NOSPLIT, $0 - 32 MOVQ s+0(FP), DI MOVQ p+8(FP), SI MOVQ m+16(FP), DX - CALL ·___asm2asm_compiled_code__DO_NOT_CALL_THIS_SYMBOL___+14398(SB) // _skip_array + CALL ·___asm2asm_compiled_code__DO_NOT_CALL_THIS_SYMBOL___+14619(SB) // _skip_array MOVQ AX, ret+24(FP) RET @@ -6235,7 +6310,7 @@ TEXT ·__skip_object(SB), NOSPLIT, $0 - 32 MOVQ s+0(FP), DI MOVQ p+8(FP), SI MOVQ m+16(FP), DX - CALL ·___asm2asm_compiled_code__DO_NOT_CALL_THIS_SYMBOL___+14433(SB) // _skip_object + CALL ·___asm2asm_compiled_code__DO_NOT_CALL_THIS_SYMBOL___+14654(SB) // _skip_object MOVQ AX, ret+24(FP) RET @@ -6243,14 +6318,14 @@ TEXT ·__skip_one(SB), NOSPLIT, $0 - 32 MOVQ s+0(FP), DI MOVQ p+8(FP), SI MOVQ m+16(FP), DX - CALL ·___asm2asm_compiled_code__DO_NOT_CALL_THIS_SYMBOL___+12845(SB) // _skip_one + CALL ·___asm2asm_compiled_code__DO_NOT_CALL_THIS_SYMBOL___+13066(SB) // _skip_one MOVQ AX, ret+24(FP) RET TEXT ·__u64toa(SB), NOSPLIT, $0 - 24 MOVQ out+0(FP), DI MOVQ val+8(FP), SI - CALL ·___asm2asm_compiled_code__DO_NOT_CALL_THIS_SYMBOL___+6135(SB) // _u64toa + CALL ·___asm2asm_compiled_code__DO_NOT_CALL_THIS_SYMBOL___+6195(SB) // _u64toa MOVQ AX, ret+16(FP) RET @@ -6260,7 +6335,7 @@ TEXT ·__unquote(SB), NOSPLIT, $0 - 48 MOVQ dp+16(FP), DX MOVQ ep+24(FP), CX MOVQ flags+32(FP), R8 - CALL ·___asm2asm_compiled_code__DO_NOT_CALL_THIS_SYMBOL___+7356(SB) // _unquote + CALL ·___asm2asm_compiled_code__DO_NOT_CALL_THIS_SYMBOL___+7419(SB) // _unquote MOVQ AX, ret+40(FP) RET @@ -6269,7 +6344,7 @@ TEXT ·__value(SB), NOSPLIT, $0 - 40 MOVQ n+8(FP), SI MOVQ p+16(FP), DX MOVQ v+24(FP), CX - CALL ·___asm2asm_compiled_code__DO_NOT_CALL_THIS_SYMBOL___+9076(SB) // _value + CALL ·___asm2asm_compiled_code__DO_NOT_CALL_THIS_SYMBOL___+9196(SB) // _value MOVQ AX, ret+32(FP) RET @@ -6277,26 +6352,26 @@ TEXT ·__vnumber(SB), NOSPLIT, $0 - 24 MOVQ s+0(FP), DI MOVQ p+8(FP), SI MOVQ v+16(FP), DX - LEAQ ·___asm2asm_compiled_code__DO_NOT_CALL_THIS_SYMBOL___+11325(SB), AX // _vnumber + LEAQ ·___asm2asm_compiled_code__DO_NOT_CALL_THIS_SYMBOL___+11544(SB), AX // _vnumber JMP AX TEXT ·__vsigned(SB), NOSPLIT, $0 - 24 MOVQ s+0(FP), DI MOVQ p+8(FP), SI MOVQ v+16(FP), DX - LEAQ ·___asm2asm_compiled_code__DO_NOT_CALL_THIS_SYMBOL___+12295(SB), AX // _vsigned + LEAQ ·___asm2asm_compiled_code__DO_NOT_CALL_THIS_SYMBOL___+12516(SB), AX // _vsigned JMP AX TEXT ·__vstring(SB), NOSPLIT, $0 - 24 MOVQ s+0(FP), DI MOVQ p+8(FP), SI MOVQ v+16(FP), DX - LEAQ ·___asm2asm_compiled_code__DO_NOT_CALL_THIS_SYMBOL___+10096(SB), AX // _vstring + LEAQ ·___asm2asm_compiled_code__DO_NOT_CALL_THIS_SYMBOL___+10216(SB), AX // _vstring JMP AX TEXT ·__vunsigned(SB), NOSPLIT, $0 - 24 MOVQ s+0(FP), DI MOVQ p+8(FP), SI MOVQ v+16(FP), DX - LEAQ ·___asm2asm_compiled_code__DO_NOT_CALL_THIS_SYMBOL___+12572(SB), AX // _vunsigned + LEAQ ·___asm2asm_compiled_code__DO_NOT_CALL_THIS_SYMBOL___+12793(SB), AX // _vunsigned JMP AX diff --git a/internal/native/avx2/native_amd64_test.go b/internal/native/avx2/native_amd64_test.go new file mode 100644 index 0000000..a1d4e3a --- /dev/null +++ b/internal/native/avx2/native_amd64_test.go @@ -0,0 +1,426 @@ +// Code generated by Makefile, DO NOT EDIT. + +/* + * Copyright 2021 ByteDance Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package avx2 + +import ( + `encoding/hex` + `fmt` + `math` + `testing` + `unsafe` + + `github.com/bytedance/sonic/internal/native/types` + `github.com/bytedance/sonic/internal/rt` + `github.com/davecgh/go-spew/spew` + `github.com/stretchr/testify/assert` + `github.com/stretchr/testify/require` +) + +func TestNative_Value(t *testing.T) { + var v types.JsonState + s := ` -12345` + p := (*rt.GoString)(unsafe.Pointer(&s)) + x := __value(p.Ptr, p.Len, 0, &v) + assert.Equal(t, 9, x) + assert.Equal(t, types.V_INTEGER, v.Vt) + assert.Equal(t, int64(-12345), v.Iv) + assert.Equal(t, 3, v.Ep) +} + +func TestNative_Unquote(t *testing.T) { + s := `hello\b\f\n\r\t\\\"\u2333world` + d := make([]byte, 0, len(s)) + ep := -1 + dp := (*rt.GoSlice)(unsafe.Pointer(&d)) + sp := (*rt.GoString)(unsafe.Pointer(&s)) + rv := __unquote(sp.Ptr, sp.Len, dp.Ptr, &ep, 0) + if rv < 0 { + require.NoError(t, types.ParsingError(-rv)) + } + dp.Len = rv + assert.Equal(t, -1, ep) + assert.Equal(t, "hello\b\f\n\r\t\\\"\u2333world", string(d)) +} + +func TestNative_UnquoteError(t *testing.T) { + s := `asdf\` + d := make([]byte, 0, len(s)) + ep := -1 + dp := (*rt.GoSlice)(unsafe.Pointer(&d)) + sp := (*rt.GoString)(unsafe.Pointer(&s)) + rv := __unquote(sp.Ptr, sp.Len, dp.Ptr, &ep, 0) + assert.Equal(t, -int(types.ERR_EOF), rv) + assert.Equal(t, 5, ep) + s = `asdf\gqwer` + d = make([]byte, 0, len(s)) + ep = -1 + dp = (*rt.GoSlice)(unsafe.Pointer(&d)) + sp = (*rt.GoString)(unsafe.Pointer(&s)) + rv = __unquote(sp.Ptr, sp.Len, dp.Ptr, &ep, 0) + assert.Equal(t, -int(types.ERR_INVALID_ESCAPE), rv) + assert.Equal(t, 5, ep) + s = `asdf\u1gggqwer` + d = make([]byte, 0, len(s)) + ep = -1 + dp = (*rt.GoSlice)(unsafe.Pointer(&d)) + sp = (*rt.GoString)(unsafe.Pointer(&s)) + rv = __unquote(sp.Ptr, sp.Len, dp.Ptr, &ep, 0) + assert.Equal(t, -int(types.ERR_INVALID_CHAR), rv) + assert.Equal(t, 7, ep) + s = `asdf\ud800qwer` + d = make([]byte, 0, len(s)) + ep = -1 + dp = (*rt.GoSlice)(unsafe.Pointer(&d)) + sp = (*rt.GoString)(unsafe.Pointer(&s)) + rv = __unquote(sp.Ptr, sp.Len, dp.Ptr, &ep, 0) + assert.Equal(t, -int(types.ERR_INVALID_UNICODE), rv) + assert.Equal(t, 6, ep) + s = `asdf\\ud800qwer` + d = make([]byte, 0, len(s)) + ep = -1 + dp = (*rt.GoSlice)(unsafe.Pointer(&d)) + sp = (*rt.GoString)(unsafe.Pointer(&s)) + rv = __unquote(sp.Ptr, sp.Len, dp.Ptr, &ep, types.F_DOUBLE_UNQUOTE) + assert.Equal(t, -int(types.ERR_INVALID_UNICODE), rv) + assert.Equal(t, 7, ep) + s = `asdf\ud800\ud800qwer` + d = make([]byte, 0, len(s)) + ep = -1 + dp = (*rt.GoSlice)(unsafe.Pointer(&d)) + sp = (*rt.GoString)(unsafe.Pointer(&s)) + rv = __unquote(sp.Ptr, sp.Len, dp.Ptr, &ep, 0) + assert.Equal(t, -int(types.ERR_INVALID_UNICODE), rv) + assert.Equal(t, 12, ep) + s = `asdf\\ud800\\ud800qwer` + d = make([]byte, 0, len(s)) + ep = -1 + dp = (*rt.GoSlice)(unsafe.Pointer(&d)) + sp = (*rt.GoString)(unsafe.Pointer(&s)) + rv = __unquote(sp.Ptr, sp.Len, dp.Ptr, &ep, types.F_DOUBLE_UNQUOTE) + assert.Equal(t, -int(types.ERR_INVALID_UNICODE), rv) + assert.Equal(t, 14, ep) +} + +func TestNative_DoubleUnquote(t *testing.T) { + s := `hello\\b\\f\\n\\r\\t\\\\\\\"\\u2333world` + d := make([]byte, 0, len(s)) + ep := -1 + dp := (*rt.GoSlice)(unsafe.Pointer(&d)) + sp := (*rt.GoString)(unsafe.Pointer(&s)) + rv := __unquote(sp.Ptr, sp.Len, dp.Ptr, &ep, types.F_DOUBLE_UNQUOTE) + if rv < 0 { + require.NoError(t, types.ParsingError(-rv)) + } + dp.Len = rv + assert.Equal(t, -1, ep) + assert.Equal(t, "hello\b\f\n\r\t\\\"\u2333world", string(d)) +} + +func TestNative_UnquoteUnicodeReplacement(t *testing.T) { + s := `hello\ud800world` + d := make([]byte, 0, len(s)) + ep := -1 + dp := (*rt.GoSlice)(unsafe.Pointer(&d)) + sp := (*rt.GoString)(unsafe.Pointer(&s)) + rv := __unquote(sp.Ptr, sp.Len, dp.Ptr, &ep, types.F_UNICODE_REPLACE) + if rv < 0 { + require.NoError(t, types.ParsingError(-rv)) + } + dp.Len = rv + assert.Equal(t, -1, ep) + assert.Equal(t, "hello\ufffdworld", string(d)) + s = `hello\ud800\ud800world` + d = make([]byte, 0, len(s)) + ep = -1 + dp = (*rt.GoSlice)(unsafe.Pointer(&d)) + sp = (*rt.GoString)(unsafe.Pointer(&s)) + rv = __unquote(sp.Ptr, sp.Len, dp.Ptr, &ep, types.F_UNICODE_REPLACE) + if rv < 0 { + require.NoError(t, types.ParsingError(-rv)) + } + dp.Len = rv + assert.Equal(t, -1, ep) + assert.Equal(t, "hello\ufffd\ufffdworld", string(d)) +} + +func TestNative_Vstring(t *testing.T) { + var v types.JsonState + i := 0 + s := `test"test\n2"` + __vstring(&s, &i, &v) + assert.Equal(t, 5, i) + assert.Equal(t, -1, v.Ep) + assert.Equal(t, int64(0), v.Iv) + __vstring(&s, &i, &v) + assert.Equal(t, 13, i) + assert.Equal(t, 9, v.Ep) + assert.Equal(t, int64(5), v.Iv) +} + +func TestNative_VstringHangUpOnRandomData(t *testing.T) { + v, e := hex.DecodeString( + "228dc61efd54ef80a908fb6026b7f2d5f92a257ba8b347c995f259eb8685376a" + + "8c4500262d9c308b3f3ec2577689cf345d9f86f9b5d18d3e463bec5c22df2d2e" + + "4506010eba1dae7278", + ) + assert.Nil(t, e) + p := 1 + s := rt.Mem2Str(v) + var js types.JsonState + __vstring(&s, &p, &js) + fmt.Printf("js: %s\n", spew.Sdump(js)) +} + +func TestNative_Vnumber(t *testing.T) { + var v types.JsonState + i := 0 + s := "1234" + __vnumber(&s, &i, &v) + assert.Equal(t, 4, i) + assert.Equal(t, 0, v.Ep) + assert.Equal(t, int64(1234), v.Iv) + assert.Equal(t, types.V_INTEGER, v.Vt) + i = 0 + s = "1.234" + __vnumber(&s, &i, &v) + assert.Equal(t, 5, i) + assert.Equal(t, 0, v.Ep) + assert.Equal(t, 1.234, v.Dv) + assert.Equal(t, types.V_DOUBLE, v.Vt) + i = 0 + s = "1.234e5" + __vnumber(&s, &i, &v) + assert.Equal(t, 7, i) + assert.Equal(t, 0, v.Ep) + assert.Equal(t, 1.234e5, v.Dv) + assert.Equal(t, types.V_DOUBLE, v.Vt) + i = 0 + s = "0.0125" + __vnumber(&s, &i, &v) + assert.Equal(t, 6, i) + assert.Equal(t, 0, v.Ep) + assert.Equal(t, 0.0125, v.Dv) + assert.Equal(t, types.V_DOUBLE, v.Vt) + i = 0 + s = "100000000000000000000" + __vnumber(&s, &i, &v) + assert.Equal(t, 21, i) + assert.Equal(t, 0, v.Ep) + assert.Equal(t, 100000000000000000000.0, v.Dv) + assert.Equal(t, types.V_DOUBLE, v.Vt) + i = 0 + s = "999999999999999900000" + __vnumber(&s, &i, &v) + assert.Equal(t, 21, i) + assert.Equal(t, 0, v.Ep) + assert.Equal(t, 999999999999999900000.0, v.Dv) + assert.Equal(t, types.V_DOUBLE, v.Vt) + i = 0 + s = "-1.234" + __vnumber(&s, &i, &v) + assert.Equal(t, 6, i) + assert.Equal(t, 0, v.Ep) + assert.Equal(t, -1.234, v.Dv) + assert.Equal(t, types.V_DOUBLE, v.Vt) +} + +func TestNative_Vsigned(t *testing.T) { + var v types.JsonState + i := 0 + s := "1234" + __vsigned(&s, &i, &v) + assert.Equal(t, 4, i) + assert.Equal(t, 0, v.Ep) + assert.Equal(t, int64(1234), v.Iv) + assert.Equal(t, types.V_INTEGER, v.Vt) + i = 0 + s = "-1234" + __vsigned(&s, &i, &v) + assert.Equal(t, 5, i) + assert.Equal(t, 0, v.Ep) + assert.Equal(t, int64(-1234), v.Iv) + assert.Equal(t, types.V_INTEGER, v.Vt) + i = 0 + s = "9223372036854775807" + __vsigned(&s, &i, &v) + assert.Equal(t, 19, i) + assert.Equal(t, 0, v.Ep) + assert.Equal(t, int64(math.MaxInt64), v.Iv) + assert.Equal(t, types.V_INTEGER, v.Vt) + i = 0 + s = "-9223372036854775808" + __vsigned(&s, &i, &v) + assert.Equal(t, 20, i) + assert.Equal(t, 0, v.Ep) + assert.Equal(t, int64(math.MinInt64), v.Iv) + assert.Equal(t, types.V_INTEGER, v.Vt) + i = 0 + s = "9223372036854775808" + __vsigned(&s, &i, &v) + assert.Equal(t, 18, i) + assert.Equal(t, 0, v.Ep) + assert.Equal(t, types.ValueType(-int(types.ERR_INTEGER_OVERFLOW)), v.Vt) + i = 0 + s = "-9223372036854775809" + __vsigned(&s, &i, &v) + assert.Equal(t, 19, i) + assert.Equal(t, 0, v.Ep) + assert.Equal(t, types.ValueType(-int(types.ERR_INTEGER_OVERFLOW)), v.Vt) + i = 0 + s = "1.234" + __vsigned(&s, &i, &v) + assert.Equal(t, 1, i) + assert.Equal(t, 0, v.Ep) + assert.Equal(t, types.ValueType(-int(types.ERR_INVALID_NUMBER_FMT)), v.Vt) + i = 0 + s = "0.0125" + __vsigned(&s, &i, &v) + assert.Equal(t, 1, i) + assert.Equal(t, 0, v.Ep) + assert.Equal(t, types.ValueType(-int(types.ERR_INVALID_NUMBER_FMT)), v.Vt) + i = 0 + s = "-1234e5" + __vsigned(&s, &i, &v) + assert.Equal(t, 5, i) + assert.Equal(t, 0, v.Ep) + assert.Equal(t, types.ValueType(-int(types.ERR_INVALID_NUMBER_FMT)), v.Vt) + i = 0 + s = "-1234e-5" + __vsigned(&s, &i, &v) + assert.Equal(t, 5, i) + assert.Equal(t, 0, v.Ep) + assert.Equal(t, types.ValueType(-int(types.ERR_INVALID_NUMBER_FMT)), v.Vt) +} + +func TestNative_Vunsigned(t *testing.T) { + var v types.JsonState + i := 0 + s := "1234" + __vunsigned(&s, &i, &v) + assert.Equal(t, 4, i) + assert.Equal(t, 0, v.Ep) + assert.Equal(t, int64(1234), v.Iv) + assert.Equal(t, types.V_INTEGER, v.Vt) + i = 0 + s = "18446744073709551615" + __vunsigned(&s, &i, &v) + assert.Equal(t, 20, i) + assert.Equal(t, 0, v.Ep) + assert.Equal(t, ^int64(0), v.Iv) + assert.Equal(t, types.V_INTEGER, v.Vt) + i = 0 + s = "18446744073709551616" + __vunsigned(&s, &i, &v) + assert.Equal(t, 19, i) + assert.Equal(t, 0, v.Ep) + assert.Equal(t, types.ValueType(-int(types.ERR_INTEGER_OVERFLOW)), v.Vt) + i = 0 + s = "-1234" + __vunsigned(&s, &i, &v) + assert.Equal(t, 0, i) + assert.Equal(t, 0, v.Ep) + assert.Equal(t, types.ValueType(-int(types.ERR_INVALID_NUMBER_FMT)), v.Vt) + i = 0 + s = "1.234" + __vunsigned(&s, &i, &v) + assert.Equal(t, 1, i) + assert.Equal(t, 0, v.Ep) + assert.Equal(t, types.ValueType(-int(types.ERR_INVALID_NUMBER_FMT)), v.Vt) + i = 0 + s = "0.0125" + __vunsigned(&s, &i, &v) + assert.Equal(t, 1, i) + assert.Equal(t, 0, v.Ep) + assert.Equal(t, types.ValueType(-int(types.ERR_INVALID_NUMBER_FMT)), v.Vt) + i = 0 + s = "1234e5" + __vunsigned(&s, &i, &v) + assert.Equal(t, 4, i) + assert.Equal(t, 0, v.Ep) + assert.Equal(t, types.ValueType(-int(types.ERR_INVALID_NUMBER_FMT)), v.Vt) + i = 0 + s = "-1234e5" + __vunsigned(&s, &i, &v) + assert.Equal(t, 0, i) + assert.Equal(t, 0, v.Ep) + assert.Equal(t, types.ValueType(-int(types.ERR_INVALID_NUMBER_FMT)), v.Vt) + i = 0 + s = "-1.234e5" + __vunsigned(&s, &i, &v) + assert.Equal(t, 0, i) + assert.Equal(t, 0, v.Ep) + assert.Equal(t, types.ValueType(-int(types.ERR_INVALID_NUMBER_FMT)), v.Vt) + i = 0 + s = "-1.234e-5" + __vunsigned(&s, &i, &v) + assert.Equal(t, 0, i) + assert.Equal(t, 0, v.Ep) + assert.Equal(t, types.ValueType(-int(types.ERR_INVALID_NUMBER_FMT)), v.Vt) +} + +func TestNative_SkipOne(t *testing.T) { + p := 0 + s := ` {"asdf": [null, true, false, 1, 2.0, -3]}, 1234.5` + q := __skip_one(&s, &p, &types.StateMachine{}) + assert.Equal(t, 42, p) + assert.Equal(t, 1, q) + p = 0 + s = `1 2.5 -3 "asdf\nqwer" true false null {} []` + q = __skip_one(&s, &p, &types.StateMachine{}) + assert.Equal(t, 1, p) + assert.Equal(t, 0, q) + q = __skip_one(&s, &p, &types.StateMachine{}) + assert.Equal(t, 5, p) + assert.Equal(t, 2, q) + q = __skip_one(&s, &p, &types.StateMachine{}) + assert.Equal(t, 8, p) + assert.Equal(t, 6, q) + q = __skip_one(&s, &p, &types.StateMachine{}) + assert.Equal(t, 21, p) + assert.Equal(t, 9, q) + q = __skip_one(&s, &p, &types.StateMachine{}) + assert.Equal(t, 26, p) + assert.Equal(t, 22, q) + q = __skip_one(&s, &p, &types.StateMachine{}) + assert.Equal(t, 32, p) + assert.Equal(t, 27, q) + q = __skip_one(&s, &p, &types.StateMachine{}) + assert.Equal(t, 37, p) + assert.Equal(t, 33, q) + q = __skip_one(&s, &p, &types.StateMachine{}) + assert.Equal(t, 40, p) + assert.Equal(t, 38, q) + q = __skip_one(&s, &p, &types.StateMachine{}) + assert.Equal(t, 43, p) + assert.Equal(t, 41, q) +} + +func TestNative_SkipArray(t *testing.T) { + p := 0 + s := `null, true, false, 1, 2.0, -3, {"asdf": "wqer"}],` + __skip_array(&s, &p, &types.StateMachine{}) + assert.Equal(t, p, 48) +} + +func TestNative_SkipObject(t *testing.T) { + p := 0 + s := `"asdf": "wqer"},` + __skip_object(&s, &p, &types.StateMachine{}) + assert.Equal(t, p, 15) +} diff --git a/internal/native/avx2/native_export_amd64.go b/internal/native/avx2/native_export_amd64.go new file mode 100644 index 0000000..93d200c --- /dev/null +++ b/internal/native/avx2/native_export_amd64.go @@ -0,0 +1,45 @@ +// Code generated by Makefile, DO NOT EDIT. + +/* + * Copyright 2021 ByteDance Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package avx2 + +var ( + S_f64toa = _subr__f64toa + S_i64toa = _subr__i64toa + S_lquote = _subr__lquote + S_u64toa = _subr__u64toa +) + +var ( + S_lspace = _subr__lspace + S_unquote = _subr__unquote +) + +var ( + S_value = _subr__value + S_vstring = _subr__vstring + S_vnumber = _subr__vnumber + S_vsigned = _subr__vsigned + S_vunsigned = _subr__vunsigned +) + +var ( + S_skip_one = _subr__skip_one + S_skip_array = _subr__skip_array + S_skip_object = _subr__skip_object +) diff --git a/internal/native/avx2/native_subr_amd64.go b/internal/native/avx2/native_subr_amd64.go new file mode 100644 index 0000000..963566a --- /dev/null +++ b/internal/native/avx2/native_subr_amd64.go @@ -0,0 +1,50 @@ +// +build !noasm !appengine +// Code generated by asm2asm, DO NOT EDIT. + +package avx2 + +import ( + `unsafe` +) + +//go:nosplit +//go:noescape +//goland:noinspection ALL +func ___asm2asm_compiled_code__DO_NOT_CALL_THIS_SYMBOL___() + +var ( + _func__base = ___asm2asm_compiled_code__DO_NOT_CALL_THIS_SYMBOL___ + _subr__f64toa = **(**uintptr)(unsafe.Pointer(&_func__base)) + 3038 + _subr__i64toa = **(**uintptr)(unsafe.Pointer(&_func__base)) + 6102 + _subr__lquote = **(**uintptr)(unsafe.Pointer(&_func__base)) + 376 + _subr__lspace = **(**uintptr)(unsafe.Pointer(&_func__base)) + 1268 + _subr__lzero = **(**uintptr)(unsafe.Pointer(&_func__base)) + 0 + _subr__skip_array = **(**uintptr)(unsafe.Pointer(&_func__base)) + 14619 + _subr__skip_object = **(**uintptr)(unsafe.Pointer(&_func__base)) + 14654 + _subr__skip_one = **(**uintptr)(unsafe.Pointer(&_func__base)) + 13066 + _subr__u64toa = **(**uintptr)(unsafe.Pointer(&_func__base)) + 6195 + _subr__unquote = **(**uintptr)(unsafe.Pointer(&_func__base)) + 7419 + _subr__value = **(**uintptr)(unsafe.Pointer(&_func__base)) + 9196 + _subr__vnumber = **(**uintptr)(unsafe.Pointer(&_func__base)) + 11544 + _subr__vsigned = **(**uintptr)(unsafe.Pointer(&_func__base)) + 12516 + _subr__vstring = **(**uintptr)(unsafe.Pointer(&_func__base)) + 10216 + _subr__vunsigned = **(**uintptr)(unsafe.Pointer(&_func__base)) + 12793 +) + +var ( + _ = _subr__f64toa + _ = _subr__i64toa + _ = _subr__lquote + _ = _subr__lspace + _ = _subr__lzero + _ = _subr__skip_array + _ = _subr__skip_object + _ = _subr__skip_one + _ = _subr__u64toa + _ = _subr__unquote + _ = _subr__value + _ = _subr__vnumber + _ = _subr__vsigned + _ = _subr__vstring + _ = _subr__vunsigned +) diff --git a/internal/native/dispatch_amd64.go b/internal/native/dispatch_amd64.go new file mode 100644 index 0000000..e968d56 --- /dev/null +++ b/internal/native/dispatch_amd64.go @@ -0,0 +1,123 @@ +/* + * Copyright 2021 ByteDance Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package native + +import ( + `unsafe` + + `github.com/bytedance/sonic/internal/cpu` + `github.com/bytedance/sonic/internal/native/avx` + `github.com/bytedance/sonic/internal/native/avx2` + `github.com/bytedance/sonic/internal/native/types` +) + +var ( + S_f64toa uintptr + S_i64toa uintptr + S_u64toa uintptr + S_lquote uintptr + S_lspace uintptr + S_unquote uintptr +) + +var ( + S_value uintptr + S_vstring uintptr + S_vnumber uintptr + S_vsigned uintptr + S_vunsigned uintptr +) + +var ( + S_skip_one uintptr + S_skip_array uintptr + S_skip_object uintptr +) + +//go:nosplit +//go:noescape +//goland:noinspection GoUnusedParameter +func Lzero(p unsafe.Pointer, n int) int + +//go:nosplit +//go:noescape +//goland:noinspection GoUnusedParameter +func Lquote(buf *string, off int) int + +//go:nosplit +//go:noescape +//goland:noinspection GoUnusedParameter +func Lspace(sp unsafe.Pointer, nb int, off int) int + +//go:nosplit +//go:noescape +//goland:noinspection GoUnusedParameter +func Value(s unsafe.Pointer, n int, p int, v *types.JsonState) int + +//go:nosplit +//go:noescape +//goland:noinspection GoUnusedParameter +func SkipOne(s *string, p *int, m *types.StateMachine) int + +//go:nosplit +//go:noescape +//goland:noinspection GoUnusedParameter +func Unquote(s unsafe.Pointer, nb int, dp unsafe.Pointer, ep *int, flags uint64) int + +func useAVX() { + S_f64toa = avx.S_f64toa + S_i64toa = avx.S_i64toa + S_u64toa = avx.S_u64toa + S_lquote = avx.S_lquote + S_lspace = avx.S_lspace + S_unquote = avx.S_unquote + S_value = avx.S_value + S_vstring = avx.S_vstring + S_vnumber = avx.S_vnumber + S_vsigned = avx.S_vsigned + S_vunsigned = avx.S_vunsigned + S_skip_one = avx.S_skip_one + S_skip_array = avx.S_skip_array + S_skip_object = avx.S_skip_object +} + +func useAVX2() { + S_f64toa = avx2.S_f64toa + S_i64toa = avx2.S_i64toa + S_u64toa = avx2.S_u64toa + S_lquote = avx2.S_lquote + S_lspace = avx2.S_lspace + S_unquote = avx2.S_unquote + S_value = avx2.S_value + S_vstring = avx2.S_vstring + S_vnumber = avx2.S_vnumber + S_vsigned = avx2.S_vsigned + S_vunsigned = avx2.S_vunsigned + S_skip_one = avx2.S_skip_one + S_skip_array = avx2.S_skip_array + S_skip_object = avx2.S_skip_object +} + +func init() { + if cpu.HasAVX2 { + useAVX2() + } else if cpu.HasAVX { + useAVX() + } else { + panic("Unsupported CPU, maybe it's too old to run Sonic.") + } +} diff --git a/internal/native/dispatch_amd64.s b/internal/native/dispatch_amd64.s new file mode 100644 index 0000000..85041fd --- /dev/null +++ b/internal/native/dispatch_amd64.s @@ -0,0 +1,55 @@ +// +// Copyright 2021 ByteDance Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include "go_asm.h" +#include "funcdata.h" +#include "textflag.h" + +TEXT ·Lzero(SB), NOSPLIT, $0 - 24 + CMPB github·com∕bytedance∕sonic∕internal∕cpu·HasAVX2(SB), $0 + JE 2(PC) + JMP github·com∕bytedance∕sonic∕internal∕native∕avx2·__lzero(SB) + JMP github·com∕bytedance∕sonic∕internal∕native∕avx·__lzero(SB) + +TEXT ·Lquote(SB), NOSPLIT, $0 - 24 + CMPB github·com∕bytedance∕sonic∕internal∕cpu·HasAVX2(SB), $0 + JE 2(PC) + JMP github·com∕bytedance∕sonic∕internal∕native∕avx2·__lquote(SB) + JMP github·com∕bytedance∕sonic∕internal∕native∕avx·__lquote(SB) + +TEXT ·Lspace(SB), NOSPLIT, $0 - 32 + CMPB github·com∕bytedance∕sonic∕internal∕cpu·HasAVX2(SB), $0 + JE 2(PC) + JMP github·com∕bytedance∕sonic∕internal∕native∕avx2·__lspace(SB) + JMP github·com∕bytedance∕sonic∕internal∕native∕avx·__lspace(SB) + +TEXT ·Value(SB), NOSPLIT, $0 - 40 + CMPB github·com∕bytedance∕sonic∕internal∕cpu·HasAVX2(SB), $0 + JE 2(PC) + JMP github·com∕bytedance∕sonic∕internal∕native∕avx2·__value(SB) + JMP github·com∕bytedance∕sonic∕internal∕native∕avx·__value(SB) + +TEXT ·SkipOne(SB), NOSPLIT, $0 - 32 + CMPB github·com∕bytedance∕sonic∕internal∕cpu·HasAVX2(SB), $0 + JE 2(PC) + JMP github·com∕bytedance∕sonic∕internal∕native∕avx2·__skip_one(SB) + JMP github·com∕bytedance∕sonic∕internal∕native∕avx·__skip_one(SB) + +TEXT ·Unquote(SB), NOSPLIT, $0 - 48 + CMPB github·com∕bytedance∕sonic∕internal∕cpu·HasAVX2(SB), $0 + JE 2(PC) + JMP github·com∕bytedance∕sonic∕internal∕native∕avx2·__unquote(SB) + JMP github·com∕bytedance∕sonic∕internal∕native∕avx·__unquote(SB) diff --git a/internal/native/fastfloat_test.go b/internal/native/fastfloat_amd64_test.tmpl similarity index 99% rename from internal/native/fastfloat_test.go rename to internal/native/fastfloat_amd64_test.tmpl index 9abec7e..98849dd 100644 --- a/internal/native/fastfloat_test.go +++ b/internal/native/fastfloat_amd64_test.tmpl @@ -14,7 +14,7 @@ * limitations under the License. */ -package native +package {{PACKAGE}} import ( `math` diff --git a/internal/native/fastint_test.go b/internal/native/fastint_amd64_test.tmpl similarity index 99% rename from internal/native/fastint_test.go rename to internal/native/fastint_amd64_test.tmpl index f7025c8..9858832 100644 --- a/internal/native/fastint_test.go +++ b/internal/native/fastint_amd64_test.tmpl @@ -14,7 +14,7 @@ * limitations under the License. */ -package native +package {{PACKAGE}} import ( `strconv` diff --git a/internal/native/native_amd64.go b/internal/native/native_amd64.tmpl similarity index 77% rename from internal/native/native_amd64.go rename to internal/native/native_amd64.tmpl index 65b83f7..f503015 100644 --- a/internal/native/native_amd64.go +++ b/internal/native/native_amd64.tmpl @@ -14,10 +14,12 @@ * limitations under the License. */ -package native +package {{PACKAGE}} import ( `unsafe` + + `github.com/bytedance/sonic/internal/native/types` ) //go:nosplit @@ -53,42 +55,42 @@ func __lspace(sp unsafe.Pointer, nb int, off int) (ret int) //go:nosplit //go:noescape //goland:noinspection GoUnusedParameter -func __value(s unsafe.Pointer, n int, p int, v *JsonState) (ret int) +func __value(s unsafe.Pointer, n int, p int, v *types.JsonState) (ret int) //go:nosplit //go:noescape //goland:noinspection GoUnusedParameter -func __vstring(s *string, p *int, v *JsonState) +func __vstring(s *string, p *int, v *types.JsonState) //go:nosplit //go:noescape //goland:noinspection GoUnusedParameter -func __vnumber(s *string, p *int, v *JsonState) +func __vnumber(s *string, p *int, v *types.JsonState) //go:nosplit //go:noescape //goland:noinspection GoUnusedParameter -func __vsigned(s *string, p *int, v *JsonState) +func __vsigned(s *string, p *int, v *types.JsonState) //go:nosplit //go:noescape //goland:noinspection GoUnusedParameter -func __vunsigned(s *string, p *int, v *JsonState) +func __vunsigned(s *string, p *int, v *types.JsonState) //go:nosplit //go:noescape //goland:noinspection GoUnusedParameter -func __skip_one(s *string, p *int, m *StateMachine) (ret int) +func __skip_one(s *string, p *int, m *types.StateMachine) (ret int) //go:nosplit //go:noescape //goland:noinspection GoUnusedParameter -func __skip_array(s *string, p *int, m *StateMachine) (ret int) +func __skip_array(s *string, p *int, m *types.StateMachine) (ret int) //go:nosplit //go:noescape //goland:noinspection GoUnusedParameter -func __skip_object(s *string, p *int, m *StateMachine) (ret int) +func __skip_object(s *string, p *int, m *types.StateMachine) (ret int) //go:nosplit //go:noescape diff --git a/internal/native/native_test.go b/internal/native/native_amd64_test.tmpl similarity index 73% rename from internal/native/native_test.go rename to internal/native/native_amd64_test.tmpl index 0573e90..fdcd27b 100644 --- a/internal/native/native_test.go +++ b/internal/native/native_amd64_test.tmpl @@ -14,7 +14,7 @@ * limitations under the License. */ -package native +package {{PACKAGE}} import ( `encoding/hex` @@ -23,6 +23,7 @@ import ( `testing` `unsafe` + `github.com/bytedance/sonic/internal/native/types` `github.com/bytedance/sonic/internal/rt` `github.com/davecgh/go-spew/spew` `github.com/stretchr/testify/assert` @@ -30,12 +31,12 @@ import ( ) func TestNative_Value(t *testing.T) { - var v JsonState + var v types.JsonState s := ` -12345` p := (*rt.GoString)(unsafe.Pointer(&s)) x := __value(p.Ptr, p.Len, 0, &v) assert.Equal(t, 9, x) - assert.Equal(t, V_INTEGER, v.Vt) + assert.Equal(t, types.V_INTEGER, v.Vt) assert.Equal(t, int64(-12345), v.Iv) assert.Equal(t, 3, v.Ep) } @@ -48,7 +49,7 @@ func TestNative_Unquote(t *testing.T) { sp := (*rt.GoString)(unsafe.Pointer(&s)) rv := __unquote(sp.Ptr, sp.Len, dp.Ptr, &ep, 0) if rv < 0 { - require.NoError(t, ParsingError(-rv)) + require.NoError(t, types.ParsingError(-rv)) } dp.Len = rv assert.Equal(t, -1, ep) @@ -62,7 +63,7 @@ func TestNative_UnquoteError(t *testing.T) { dp := (*rt.GoSlice)(unsafe.Pointer(&d)) sp := (*rt.GoString)(unsafe.Pointer(&s)) rv := __unquote(sp.Ptr, sp.Len, dp.Ptr, &ep, 0) - assert.Equal(t, -int(ERR_EOF), rv) + assert.Equal(t, -int(types.ERR_EOF), rv) assert.Equal(t, 5, ep) s = `asdf\gqwer` d = make([]byte, 0, len(s)) @@ -70,7 +71,7 @@ func TestNative_UnquoteError(t *testing.T) { dp = (*rt.GoSlice)(unsafe.Pointer(&d)) sp = (*rt.GoString)(unsafe.Pointer(&s)) rv = __unquote(sp.Ptr, sp.Len, dp.Ptr, &ep, 0) - assert.Equal(t, -int(ERR_INVALID_ESCAPE), rv) + assert.Equal(t, -int(types.ERR_INVALID_ESCAPE), rv) assert.Equal(t, 5, ep) s = `asdf\u1gggqwer` d = make([]byte, 0, len(s)) @@ -78,7 +79,7 @@ func TestNative_UnquoteError(t *testing.T) { dp = (*rt.GoSlice)(unsafe.Pointer(&d)) sp = (*rt.GoString)(unsafe.Pointer(&s)) rv = __unquote(sp.Ptr, sp.Len, dp.Ptr, &ep, 0) - assert.Equal(t, -int(ERR_INVALID_CHAR), rv) + assert.Equal(t, -int(types.ERR_INVALID_CHAR), rv) assert.Equal(t, 7, ep) s = `asdf\ud800qwer` d = make([]byte, 0, len(s)) @@ -86,15 +87,15 @@ func TestNative_UnquoteError(t *testing.T) { dp = (*rt.GoSlice)(unsafe.Pointer(&d)) sp = (*rt.GoString)(unsafe.Pointer(&s)) rv = __unquote(sp.Ptr, sp.Len, dp.Ptr, &ep, 0) - assert.Equal(t, -int(ERR_INVALID_UNICODE), rv) + assert.Equal(t, -int(types.ERR_INVALID_UNICODE), rv) assert.Equal(t, 6, ep) s = `asdf\\ud800qwer` d = make([]byte, 0, len(s)) ep = -1 dp = (*rt.GoSlice)(unsafe.Pointer(&d)) sp = (*rt.GoString)(unsafe.Pointer(&s)) - rv = __unquote(sp.Ptr, sp.Len, dp.Ptr, &ep, F_DOUBLE_UNQUOTE) - assert.Equal(t, -int(ERR_INVALID_UNICODE), rv) + rv = __unquote(sp.Ptr, sp.Len, dp.Ptr, &ep, types.F_DOUBLE_UNQUOTE) + assert.Equal(t, -int(types.ERR_INVALID_UNICODE), rv) assert.Equal(t, 7, ep) s = `asdf\ud800\ud800qwer` d = make([]byte, 0, len(s)) @@ -102,15 +103,15 @@ func TestNative_UnquoteError(t *testing.T) { dp = (*rt.GoSlice)(unsafe.Pointer(&d)) sp = (*rt.GoString)(unsafe.Pointer(&s)) rv = __unquote(sp.Ptr, sp.Len, dp.Ptr, &ep, 0) - assert.Equal(t, -int(ERR_INVALID_UNICODE), rv) + assert.Equal(t, -int(types.ERR_INVALID_UNICODE), rv) assert.Equal(t, 12, ep) s = `asdf\\ud800\\ud800qwer` d = make([]byte, 0, len(s)) ep = -1 dp = (*rt.GoSlice)(unsafe.Pointer(&d)) sp = (*rt.GoString)(unsafe.Pointer(&s)) - rv = __unquote(sp.Ptr, sp.Len, dp.Ptr, &ep, F_DOUBLE_UNQUOTE) - assert.Equal(t, -int(ERR_INVALID_UNICODE), rv) + rv = __unquote(sp.Ptr, sp.Len, dp.Ptr, &ep, types.F_DOUBLE_UNQUOTE) + assert.Equal(t, -int(types.ERR_INVALID_UNICODE), rv) assert.Equal(t, 14, ep) } @@ -120,9 +121,9 @@ func TestNative_DoubleUnquote(t *testing.T) { ep := -1 dp := (*rt.GoSlice)(unsafe.Pointer(&d)) sp := (*rt.GoString)(unsafe.Pointer(&s)) - rv := __unquote(sp.Ptr, sp.Len, dp.Ptr, &ep, F_DOUBLE_UNQUOTE) + rv := __unquote(sp.Ptr, sp.Len, dp.Ptr, &ep, types.F_DOUBLE_UNQUOTE) if rv < 0 { - require.NoError(t, ParsingError(-rv)) + require.NoError(t, types.ParsingError(-rv)) } dp.Len = rv assert.Equal(t, -1, ep) @@ -135,9 +136,9 @@ func TestNative_UnquoteUnicodeReplacement(t *testing.T) { ep := -1 dp := (*rt.GoSlice)(unsafe.Pointer(&d)) sp := (*rt.GoString)(unsafe.Pointer(&s)) - rv := __unquote(sp.Ptr, sp.Len, dp.Ptr, &ep, F_UNICODE_REPLACE) + rv := __unquote(sp.Ptr, sp.Len, dp.Ptr, &ep, types.F_UNICODE_REPLACE) if rv < 0 { - require.NoError(t, ParsingError(-rv)) + require.NoError(t, types.ParsingError(-rv)) } dp.Len = rv assert.Equal(t, -1, ep) @@ -147,9 +148,9 @@ func TestNative_UnquoteUnicodeReplacement(t *testing.T) { ep = -1 dp = (*rt.GoSlice)(unsafe.Pointer(&d)) sp = (*rt.GoString)(unsafe.Pointer(&s)) - rv = __unquote(sp.Ptr, sp.Len, dp.Ptr, &ep, F_UNICODE_REPLACE) + rv = __unquote(sp.Ptr, sp.Len, dp.Ptr, &ep, types.F_UNICODE_REPLACE) if rv < 0 { - require.NoError(t, ParsingError(-rv)) + require.NoError(t, types.ParsingError(-rv)) } dp.Len = rv assert.Equal(t, -1, ep) @@ -157,7 +158,7 @@ func TestNative_UnquoteUnicodeReplacement(t *testing.T) { } func TestNative_Vstring(t *testing.T) { - var v JsonState + var v types.JsonState i := 0 s := `test"test\n2"` __vstring(&s, &i, &v) @@ -179,231 +180,231 @@ func TestNative_VstringHangUpOnRandomData(t *testing.T) { assert.Nil(t, e) p := 1 s := rt.Mem2Str(v) - var js JsonState + var js types.JsonState __vstring(&s, &p, &js) fmt.Printf("js: %s\n", spew.Sdump(js)) } func TestNative_Vnumber(t *testing.T) { - var v JsonState + var v types.JsonState i := 0 s := "1234" __vnumber(&s, &i, &v) assert.Equal(t, 4, i) assert.Equal(t, 0, v.Ep) assert.Equal(t, int64(1234), v.Iv) - assert.Equal(t, V_INTEGER, v.Vt) + assert.Equal(t, types.V_INTEGER, v.Vt) i = 0 s = "1.234" __vnumber(&s, &i, &v) assert.Equal(t, 5, i) assert.Equal(t, 0, v.Ep) assert.Equal(t, 1.234, v.Dv) - assert.Equal(t, V_DOUBLE, v.Vt) + assert.Equal(t, types.V_DOUBLE, v.Vt) i = 0 s = "1.234e5" __vnumber(&s, &i, &v) assert.Equal(t, 7, i) assert.Equal(t, 0, v.Ep) assert.Equal(t, 1.234e5, v.Dv) - assert.Equal(t, V_DOUBLE, v.Vt) + assert.Equal(t, types.V_DOUBLE, v.Vt) i = 0 s = "0.0125" __vnumber(&s, &i, &v) assert.Equal(t, 6, i) assert.Equal(t, 0, v.Ep) assert.Equal(t, 0.0125, v.Dv) - assert.Equal(t, V_DOUBLE, v.Vt) + assert.Equal(t, types.V_DOUBLE, v.Vt) i = 0 s = "100000000000000000000" __vnumber(&s, &i, &v) assert.Equal(t, 21, i) assert.Equal(t, 0, v.Ep) assert.Equal(t, 100000000000000000000.0, v.Dv) - assert.Equal(t, V_DOUBLE, v.Vt) + assert.Equal(t, types.V_DOUBLE, v.Vt) i = 0 s = "999999999999999900000" __vnumber(&s, &i, &v) assert.Equal(t, 21, i) assert.Equal(t, 0, v.Ep) assert.Equal(t, 999999999999999900000.0, v.Dv) - assert.Equal(t, V_DOUBLE, v.Vt) + assert.Equal(t, types.V_DOUBLE, v.Vt) i = 0 s = "-1.234" __vnumber(&s, &i, &v) assert.Equal(t, 6, i) assert.Equal(t, 0, v.Ep) assert.Equal(t, -1.234, v.Dv) - assert.Equal(t, V_DOUBLE, v.Vt) + assert.Equal(t, types.V_DOUBLE, v.Vt) } func TestNative_Vsigned(t *testing.T) { - var v JsonState + var v types.JsonState i := 0 s := "1234" __vsigned(&s, &i, &v) assert.Equal(t, 4, i) assert.Equal(t, 0, v.Ep) assert.Equal(t, int64(1234), v.Iv) - assert.Equal(t, V_INTEGER, v.Vt) + assert.Equal(t, types.V_INTEGER, v.Vt) i = 0 s = "-1234" __vsigned(&s, &i, &v) assert.Equal(t, 5, i) assert.Equal(t, 0, v.Ep) assert.Equal(t, int64(-1234), v.Iv) - assert.Equal(t, V_INTEGER, v.Vt) + assert.Equal(t, types.V_INTEGER, v.Vt) i = 0 s = "9223372036854775807" __vsigned(&s, &i, &v) assert.Equal(t, 19, i) assert.Equal(t, 0, v.Ep) assert.Equal(t, int64(math.MaxInt64), v.Iv) - assert.Equal(t, V_INTEGER, v.Vt) + assert.Equal(t, types.V_INTEGER, v.Vt) i = 0 s = "-9223372036854775808" __vsigned(&s, &i, &v) assert.Equal(t, 20, i) assert.Equal(t, 0, v.Ep) assert.Equal(t, int64(math.MinInt64), v.Iv) - assert.Equal(t, V_INTEGER, v.Vt) + assert.Equal(t, types.V_INTEGER, v.Vt) i = 0 s = "9223372036854775808" __vsigned(&s, &i, &v) assert.Equal(t, 18, i) assert.Equal(t, 0, v.Ep) - assert.Equal(t, ValueType(-int(ERR_INTEGER_OVERFLOW)), v.Vt) + assert.Equal(t, types.ValueType(-int(types.ERR_INTEGER_OVERFLOW)), v.Vt) i = 0 s = "-9223372036854775809" __vsigned(&s, &i, &v) assert.Equal(t, 19, i) assert.Equal(t, 0, v.Ep) - assert.Equal(t, ValueType(-int(ERR_INTEGER_OVERFLOW)), v.Vt) + assert.Equal(t, types.ValueType(-int(types.ERR_INTEGER_OVERFLOW)), v.Vt) i = 0 s = "1.234" __vsigned(&s, &i, &v) assert.Equal(t, 1, i) assert.Equal(t, 0, v.Ep) - assert.Equal(t, ValueType(-int(ERR_INVALID_NUMBER_FMT)), v.Vt) + assert.Equal(t, types.ValueType(-int(types.ERR_INVALID_NUMBER_FMT)), v.Vt) i = 0 s = "0.0125" __vsigned(&s, &i, &v) assert.Equal(t, 1, i) assert.Equal(t, 0, v.Ep) - assert.Equal(t, ValueType(-int(ERR_INVALID_NUMBER_FMT)), v.Vt) + assert.Equal(t, types.ValueType(-int(types.ERR_INVALID_NUMBER_FMT)), v.Vt) i = 0 s = "-1234e5" __vsigned(&s, &i, &v) assert.Equal(t, 5, i) assert.Equal(t, 0, v.Ep) - assert.Equal(t, ValueType(-int(ERR_INVALID_NUMBER_FMT)), v.Vt) + assert.Equal(t, types.ValueType(-int(types.ERR_INVALID_NUMBER_FMT)), v.Vt) i = 0 s = "-1234e-5" __vsigned(&s, &i, &v) assert.Equal(t, 5, i) assert.Equal(t, 0, v.Ep) - assert.Equal(t, ValueType(-int(ERR_INVALID_NUMBER_FMT)), v.Vt) + assert.Equal(t, types.ValueType(-int(types.ERR_INVALID_NUMBER_FMT)), v.Vt) } func TestNative_Vunsigned(t *testing.T) { - var v JsonState + var v types.JsonState i := 0 s := "1234" __vunsigned(&s, &i, &v) assert.Equal(t, 4, i) assert.Equal(t, 0, v.Ep) assert.Equal(t, int64(1234), v.Iv) - assert.Equal(t, V_INTEGER, v.Vt) + assert.Equal(t, types.V_INTEGER, v.Vt) i = 0 s = "18446744073709551615" __vunsigned(&s, &i, &v) assert.Equal(t, 20, i) assert.Equal(t, 0, v.Ep) assert.Equal(t, ^int64(0), v.Iv) - assert.Equal(t, V_INTEGER, v.Vt) + assert.Equal(t, types.V_INTEGER, v.Vt) i = 0 s = "18446744073709551616" __vunsigned(&s, &i, &v) assert.Equal(t, 19, i) assert.Equal(t, 0, v.Ep) - assert.Equal(t, ValueType(-int(ERR_INTEGER_OVERFLOW)), v.Vt) + assert.Equal(t, types.ValueType(-int(types.ERR_INTEGER_OVERFLOW)), v.Vt) i = 0 s = "-1234" __vunsigned(&s, &i, &v) assert.Equal(t, 0, i) assert.Equal(t, 0, v.Ep) - assert.Equal(t, ValueType(-int(ERR_INVALID_NUMBER_FMT)), v.Vt) + assert.Equal(t, types.ValueType(-int(types.ERR_INVALID_NUMBER_FMT)), v.Vt) i = 0 s = "1.234" __vunsigned(&s, &i, &v) assert.Equal(t, 1, i) assert.Equal(t, 0, v.Ep) - assert.Equal(t, ValueType(-int(ERR_INVALID_NUMBER_FMT)), v.Vt) + assert.Equal(t, types.ValueType(-int(types.ERR_INVALID_NUMBER_FMT)), v.Vt) i = 0 s = "0.0125" __vunsigned(&s, &i, &v) assert.Equal(t, 1, i) assert.Equal(t, 0, v.Ep) - assert.Equal(t, ValueType(-int(ERR_INVALID_NUMBER_FMT)), v.Vt) + assert.Equal(t, types.ValueType(-int(types.ERR_INVALID_NUMBER_FMT)), v.Vt) i = 0 s = "1234e5" __vunsigned(&s, &i, &v) assert.Equal(t, 4, i) assert.Equal(t, 0, v.Ep) - assert.Equal(t, ValueType(-int(ERR_INVALID_NUMBER_FMT)), v.Vt) + assert.Equal(t, types.ValueType(-int(types.ERR_INVALID_NUMBER_FMT)), v.Vt) i = 0 s = "-1234e5" __vunsigned(&s, &i, &v) assert.Equal(t, 0, i) assert.Equal(t, 0, v.Ep) - assert.Equal(t, ValueType(-int(ERR_INVALID_NUMBER_FMT)), v.Vt) + assert.Equal(t, types.ValueType(-int(types.ERR_INVALID_NUMBER_FMT)), v.Vt) i = 0 s = "-1.234e5" __vunsigned(&s, &i, &v) assert.Equal(t, 0, i) assert.Equal(t, 0, v.Ep) - assert.Equal(t, ValueType(-int(ERR_INVALID_NUMBER_FMT)), v.Vt) + assert.Equal(t, types.ValueType(-int(types.ERR_INVALID_NUMBER_FMT)), v.Vt) i = 0 s = "-1.234e-5" __vunsigned(&s, &i, &v) assert.Equal(t, 0, i) assert.Equal(t, 0, v.Ep) - assert.Equal(t, ValueType(-int(ERR_INVALID_NUMBER_FMT)), v.Vt) + assert.Equal(t, types.ValueType(-int(types.ERR_INVALID_NUMBER_FMT)), v.Vt) } func TestNative_SkipOne(t *testing.T) { p := 0 s := ` {"asdf": [null, true, false, 1, 2.0, -3]}, 1234.5` - q := __skip_one(&s, &p, &StateMachine{}) + q := __skip_one(&s, &p, &types.StateMachine{}) assert.Equal(t, 42, p) assert.Equal(t, 1, q) p = 0 s = `1 2.5 -3 "asdf\nqwer" true false null {} []` - q = __skip_one(&s, &p, &StateMachine{}) + q = __skip_one(&s, &p, &types.StateMachine{}) assert.Equal(t, 1, p) assert.Equal(t, 0, q) - q = __skip_one(&s, &p, &StateMachine{}) + q = __skip_one(&s, &p, &types.StateMachine{}) assert.Equal(t, 5, p) assert.Equal(t, 2, q) - q = __skip_one(&s, &p, &StateMachine{}) + q = __skip_one(&s, &p, &types.StateMachine{}) assert.Equal(t, 8, p) assert.Equal(t, 6, q) - q = __skip_one(&s, &p, &StateMachine{}) + q = __skip_one(&s, &p, &types.StateMachine{}) assert.Equal(t, 21, p) assert.Equal(t, 9, q) - q = __skip_one(&s, &p, &StateMachine{}) + q = __skip_one(&s, &p, &types.StateMachine{}) assert.Equal(t, 26, p) assert.Equal(t, 22, q) - q = __skip_one(&s, &p, &StateMachine{}) + q = __skip_one(&s, &p, &types.StateMachine{}) assert.Equal(t, 32, p) assert.Equal(t, 27, q) - q = __skip_one(&s, &p, &StateMachine{}) + q = __skip_one(&s, &p, &types.StateMachine{}) assert.Equal(t, 37, p) assert.Equal(t, 33, q) - q = __skip_one(&s, &p, &StateMachine{}) + q = __skip_one(&s, &p, &types.StateMachine{}) assert.Equal(t, 40, p) assert.Equal(t, 38, q) - q = __skip_one(&s, &p, &StateMachine{}) + q = __skip_one(&s, &p, &types.StateMachine{}) assert.Equal(t, 43, p) assert.Equal(t, 41, q) } @@ -411,13 +412,13 @@ func TestNative_SkipOne(t *testing.T) { func TestNative_SkipArray(t *testing.T) { p := 0 s := `null, true, false, 1, 2.0, -3, {"asdf": "wqer"}],` - __skip_array(&s, &p, &StateMachine{}) + __skip_array(&s, &p, &types.StateMachine{}) assert.Equal(t, p, 48) } func TestNative_SkipObject(t *testing.T) { p := 0 s := `"asdf": "wqer"},` - __skip_object(&s, &p, &StateMachine{}) + __skip_object(&s, &p, &types.StateMachine{}) assert.Equal(t, p, 15) } diff --git a/internal/native/native_export_amd64.tmpl b/internal/native/native_export_amd64.tmpl new file mode 100644 index 0000000..c662202 --- /dev/null +++ b/internal/native/native_export_amd64.tmpl @@ -0,0 +1,43 @@ +/* + * Copyright 2021 ByteDance Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package {{PACKAGE}} + +var ( + S_f64toa = _subr__f64toa + S_i64toa = _subr__i64toa + S_lquote = _subr__lquote + S_u64toa = _subr__u64toa +) + +var ( + S_lspace = _subr__lspace + S_unquote = _subr__unquote +) + +var ( + S_value = _subr__value + S_vstring = _subr__vstring + S_vnumber = _subr__vnumber + S_vsigned = _subr__vsigned + S_vunsigned = _subr__vunsigned +) + +var ( + S_skip_one = _subr__skip_one + S_skip_array = _subr__skip_array + S_skip_object = _subr__skip_object +) diff --git a/internal/native/native.go b/internal/native/types/types.go similarity index 68% rename from internal/native/native.go rename to internal/native/types/types.go index 4ea8561..d88c814 100644 --- a/internal/native/native.go +++ b/internal/native/types/types.go @@ -14,11 +14,10 @@ * limitations under the License. */ -package native +package types import ( `fmt` - `unsafe` ) type ValueType int @@ -96,53 +95,3 @@ type StateMachine struct { Sp int Vt [MAX_RECURSE]int } - -var ( - S_f64toa = _subr__f64toa - S_i64toa = _subr__i64toa - S_lquote = _subr__lquote - S_u64toa = _subr__u64toa -) - -var ( - S_lspace = _subr__lspace - S_unquote = _subr__unquote -) - -var ( - S_value = _subr__value - S_vstring = _subr__vstring - S_vnumber = _subr__vnumber - S_vsigned = _subr__vsigned - S_vunsigned = _subr__vunsigned -) - -var ( - S_skip_one = _subr__skip_one - S_skip_array = _subr__skip_array - S_skip_object = _subr__skip_object -) - -func Lzero(p unsafe.Pointer, n int) int { - return __lzero(p, n) -} - -func Lquote(buf *string, off int) int { - return __lquote(buf, off) -} - -func Lspace(sp unsafe.Pointer, nb int, off int) int { - return __lspace(sp, nb, off) -} - -func Value(s unsafe.Pointer, n int, p int, v *JsonState) int { - return __value(s, n, p, v) -} - -func SkipOne(s *string, p *int, m *StateMachine) int { - return __skip_one(s, p, m) -} - -func Unquote(s unsafe.Pointer, nb int, dp unsafe.Pointer, ep *int, flags uint64) int { - return __unquote(s, nb, dp, ep, flags) -} diff --git a/native/fastbytes.c b/native/fastbytes.c index 6a3d62a..9eede0f 100644 --- a/native/fastbytes.c +++ b/native/fastbytes.c @@ -16,43 +16,39 @@ #include "native.h" +#if USE_SSE #define loop_decl() \ size_t v; \ size_t n = 0; \ const char * p = s; \ -#define loop_m128(func, ...) { \ - if (nb >= 16) { \ - if ((v = func(_mm_loadu_si128(as_m128c(p)), ## __VA_ARGS__)) < 16) { \ +#define loop_simd(size, load, func, ...) { \ + while (nb >= size) { \ + if ((v = func(load((const void *)(p)), ## __VA_ARGS__)) < size) { \ return n + v; \ } else { \ n += v; \ - p += 16; \ - nb -= 16; \ + p += size; \ + nb -= size; \ } \ } \ } -#define loop_m256(func, ...) { \ - while (nb >= 32) { \ - if ((v = func(_mm256_loadu_si256(as_m256c(p)), ## __VA_ARGS__)) < 32) { \ - return n + v; \ - } else { \ - n += v; \ - p += 32; \ - nb -= 32; \ - } \ - } \ -} +#if !USE_AVX2 +#define loop_zero() +#define loop_m256(func, ...) +#else +#define loop_zero() _mm256_zeroupper(); +#define loop_m256(func, ...) loop_simd(32, _mm256_loadu_si256, func, ## __VA_ARGS__) +#endif -#define loop_last(func, ...) { \ - return func(_mm_loadu_si128(as_m128c(p + nb - 16)), ## __VA_ARGS__) + n + nb - 16; \ -} +#define loop_m128(func, ...) loop_simd(16, _mm_loadu_si128, func, ## __VA_ARGS__) +#define loop_last(func, ...) return func(_mm_loadu_si128(as_m128c(p + nb - 16)), ## __VA_ARGS__) + n + nb - 16; -#define loop_simd(func, ...) { \ +#define loop_bulk(func, ...) { \ loop_decl() \ loop_m256(func ## _avx2, ## __VA_ARGS__) \ - _mm256_zeroupper(); \ + loop_zero(); \ loop_m128(func ## _sse2, ## __VA_ARGS__) \ loop_last(func ## _sse2, ## __VA_ARGS__) \ } @@ -95,6 +91,7 @@ static inline size_t lspace_sse2(__m128i v0) { return v9; } +#if USE_AVX2 static inline size_t lspace_avx2(__m256i v0) { __m256i v1 = _mm256_cmpeq_epi8 (v0, _mm256_set1_epi8(' ')); __m256i v2 = _mm256_cmpeq_epi8 (v0, _mm256_set1_epi8('\t')); @@ -107,6 +104,7 @@ static inline size_t lspace_avx2(__m256i v0) { uint64_t v9 = __builtin_ctzll (~(uint64_t)(v8)); return v9; } +#endif static inline size_t lquote_sse2(__m128i v0) { __m128i v1 = _mm_cmpgt_epi8 (v0, _mm_set1_epi8(-1)); @@ -121,6 +119,7 @@ static inline size_t lquote_sse2(__m128i v0) { return v9; } +#if USE_AVX2 static inline size_t lquote_avx2(__m256i v0) { __m256i v1 = _mm256_cmpgt_epi8 (v0, _mm256_set1_epi8(-1)); __m256i v2 = _mm256_cmpgt_epi8 (v0, _mm256_set1_epi8(31)); @@ -133,6 +132,7 @@ static inline size_t lquote_avx2(__m256i v0) { uint64_t v9 = __builtin_ctzll ((uint64_t)v8 | 0xffffffff00000000); return v9; } +#endif static inline size_t strchr2_sse2(__m128i v0, uint64_t c0, uint64_t c1) { __m128i v1 = _mm_cmpeq_epi8 (v0, _mm_set1_epi8((char)c0)); @@ -143,6 +143,7 @@ static inline size_t strchr2_sse2(__m128i v0, uint64_t c0, uint64_t c1) { return v5; } +#if USE_AVX2 static inline size_t strchr2_avx2(__m256i v0, uint64_t c0, uint64_t c1) { __m256i v1 = _mm256_cmpeq_epi8 (v0, _mm256_set1_epi8((char)c0)); __m256i v2 = _mm256_cmpeq_epi8 (v0, _mm256_set1_epi8((char)c1)); @@ -151,127 +152,59 @@ static inline size_t strchr2_avx2(__m256i v0, uint64_t c0, uint64_t c1) { uint64_t v5 = __builtin_ctzll ((uint64_t)v4 | 0xffffffff00000000); return v5; } +#endif + +#define do_simd(func, ...) { \ + if (nb == 0) { \ + return 0; \ + } if (nb < 16) { \ + loop_duff(func, ## __VA_ARGS__) \ + } else { \ + loop_bulk(func, ## __VA_ARGS__) \ + } \ +} +#endif + +#define is_quote(c) ((c) == '"' || (c) == '\\' || ((c) >= 0 && (c) <= 31)) +#define is_space(c) ((c) == ' ' || (c) == '\t' || (c) == '\n' || (c) == '\r') static inline size_t lspace_p(const char *s, size_t nb) { - if (nb == 0) { - return 0; - } else if (nb < 16) { - loop_duff(lspace) - } else { - loop_simd(lspace) - } +#if USE_SSE + do_simd(lspace) +#else + size_t i = 0; + while (i < nb && !is_space(s[i])) i++; + return i; +#endif } static inline size_t lquote_p(const char *s, size_t nb) { - if (nb == 0) { - return 0; - } else if (nb < 16) { - loop_duff(lquote) - } else { - loop_simd(lquote) - } +#if USE_SSE + do_simd(lquote) +#else + size_t i = 0; + while (i < nb && !is_quote(s[i])) i++; + return i; +#endif } static inline size_t strchr1_p(const char *p, size_t nb, uint64_t ch) { - __m256i a; - __m256i b; - __m256i c; - __m256i d; - __m256i u; - __m256i v; - __m256i w; - int32_t r; +#if USE_SSE + int64_t r; uint32_t t; /* prepare the vector */ - __m256i x = _mm256_set1_epi8(ch); ssize_t n = nb; uintptr_t m = (uintptr_t)p; const char * q = p; - /* check for pointer alignment */ - if (m & 31) { - v = _mm256_load_si256 ((const void *)(m & -32)); - v = _mm256_cmpeq_epi8 (v, x); - r = _mm256_movemask_epi8 (v); - - /* check for match in the first characters */ - if ((r = r >> (t = m & 31)) != 0) { - if ((r = _mm_tzcnt_64(r)) < n) { - return r; - } else { - return -1; - } - } - - /* make the pointer aligned */ - p += 32 - t; - n -= 32 - t; - } - - /* attempt to compare 128-bytes at a time */ - while (n >= 128) { - a = _mm256_cmpeq_epi8 (_mm256_load_si256((const void *)(p + 0)), x); - b = _mm256_cmpeq_epi8 (_mm256_load_si256((const void *)(p + 32)), x); - c = _mm256_cmpeq_epi8 (_mm256_load_si256((const void *)(p + 64)), x); - d = _mm256_cmpeq_epi8 (_mm256_load_si256((const void *)(p + 96)), x); - u = _mm256_or_si256 (a, b); - v = _mm256_or_si256 (c, d); - w = _mm256_or_si256 (u, v); - - /* check if anything matches */ - if (_mm256_testz_si256(w, w)) { - p += 128; - n -= 128; - continue; - } - - /* match something in the 128-byte region */ - if ((r = _mm256_movemask_epi8(a)) != 0) { - return p - q + _mm_tzcnt_64(r); - } else if ((r = _mm256_movemask_epi8(b)) != 0) { - return p - q + _mm_tzcnt_64(r) + 32; - } else if ((r = _mm256_movemask_epi8(c)) != 0) { - return p - q + _mm_tzcnt_64(r) + 64; - } else { - return p - q + _mm_tzcnt_64(_mm256_movemask_epi8(d)) + 96; - } - } - - /* check every 32 bytes, at most 4 times */ - for (int i = 0; i < 4 && n >= 0; i++) { - v = _mm256_cmpeq_epi8 (_mm256_load_si256((const void *)p), x); - r = _mm256_movemask_epi8 (v); - - /* found something */ - if (r != 0) { - if ((r = _mm_tzcnt_64(r)) >= n) { - return -1; - } else { - return p - q + r; - } - } - - /* otherwise advance to next block */ - p += 32; - n -= 32; - } - - /* not found */ - return nb; -} - -static inline size_t strchr2_p(const char *s, size_t nb, uint64_t c0, uint64_t c1) { - if (nb == 0) { - return 0; - } else if (nb < 16) { - loop_duff(strchr2, c0, c1) - } else { - loop_simd(strchr2, c0, c1) - } -} - -size_t lzero(const char *p, size_t n) { +#if USE_AVX2 +#define ALIGN_VAL 31 +#define _mm_or _mm256_or_si256 +#define _mm_load _mm256_load_si256 +#define _mm_cmpeq(a, b) _mm256_cmpeq_epi8(a, b) +#define _mm_testz(v) _mm256_testz_si256(v, v) +#define _mm_movemask(v) _mm256_movemask_epi8(v) __m256i a; __m256i b; __m256i c; @@ -279,47 +212,219 @@ size_t lzero(const char *p, size_t n) { __m256i u; __m256i v; __m256i w; + __m256i x = _mm256_set1_epi8(ch); +#else +#define ALIGN_VAL 15 +#define _mm_or _mm_or_si128 +#define _mm_load _mm_load_si128 +#define _mm_cmpeq(a, b) _mm_cmpeq_epi8(a, b) +#define _mm_testz(v) (_mm_movemask_epi8(v) == 0) +#define _mm_movemask(v) _mm_movemask_epi8(v) + __m128i a; + __m128i b; + __m128i c; + __m128i d; + __m128i u; + __m128i v; + __m128i w; + __m128i x = _mm_set1_epi8(ch); +#endif - /* zero vector */ - size_t r = 0; +#define BLOCK_SIZE (ALIGN_VAL + 1) +#define BLOCK_MASK (1ull << BLOCK_SIZE) +#define BLOCK_LARGE (BLOCK_SIZE * 4) + + /* check for pointer alignment */ + if (m & ALIGN_VAL) { + v = _mm_load ((const void *)(m & -BLOCK_SIZE)); + v = _mm_cmpeq (v, x); + r = _mm_movemask (v); + + /* check for match in the first characters */ + if ((r >>= (t = m & ALIGN_VAL)) != 0) { + if ((r = __builtin_ctzll(r | BLOCK_MASK)) < n) { + return r; + } else { + return -1; + } + } + + /* make the pointer aligned */ + p += BLOCK_SIZE - t; + n -= BLOCK_SIZE - t; + } + + /* attempt to compare 4 blocks at a time */ + while (n >= BLOCK_LARGE) { + a = _mm_load ((const void *)(p + BLOCK_SIZE * 0)); + b = _mm_load ((const void *)(p + BLOCK_SIZE * 1)); + c = _mm_load ((const void *)(p + BLOCK_SIZE * 2)); + d = _mm_load ((const void *)(p + BLOCK_SIZE * 3)); + a = _mm_cmpeq (a, x); + b = _mm_cmpeq (b, x); + c = _mm_cmpeq (c, x); + d = _mm_cmpeq (d, x); + u = _mm_or (a, b); + v = _mm_or (c, d); + w = _mm_or (u, v); + + /* check if anything matches */ + if (_mm_testz(w)) { + p += BLOCK_LARGE; + n -= BLOCK_LARGE; + continue; + } + + /* match something in the 4-blocks region */ + if ((r = _mm_movemask(a)) != 0) { + return p - q + __builtin_ctzll(r | BLOCK_MASK); + } else if ((r = _mm_movemask(b)) != 0) { + return p - q + __builtin_ctzll(r | BLOCK_MASK) + BLOCK_SIZE; + } else if ((r = _mm_movemask(c)) != 0) { + return p - q + __builtin_ctzll(r | BLOCK_MASK) + BLOCK_SIZE * 2; + } else { + return p - q + __builtin_ctzll(_mm_movemask(d) | BLOCK_MASK) + BLOCK_SIZE * 3; + } + } + + /* check every block, at most 4 times */ + for (int i = 0; i < 4 && n >= 0; i++) { + v = _mm_load ((const void *)p); + v = _mm_cmpeq (v, x); + r = _mm_movemask (v); + + /* found something */ + if (r != 0) { + if ((r = __builtin_ctzll(r | BLOCK_MASK)) >= n) { + return -1; + } else { + return p - q + r; + } + } + + /* otherwise advance to next block */ + p += BLOCK_SIZE; + n -= BLOCK_SIZE; + } + +#undef _mm_load +#undef _mm_bitor +#undef _mm_cmpeq +#undef _mm_testz +#undef _mm_movemask +#undef ALIGN_VAL +#undef BLOCK_SIZE +#undef BLOCK_LARGE +#else + for (size_t i = 0; i < nb; i++) { + if (p[i] == ch) { + return i; + } + } +#endif + + /* not found */ + return nb; +} + +static inline size_t strchr2_p(const char *s, size_t nb, uint64_t c0, uint64_t c1) { +#if USE_SSE + do_simd(strchr2, c0, c1) +#else + size_t i = 0; + while (i < nb && s[i] != c0 && s[i] != c1) i++; + return i; +#endif +} + +size_t lzero(const char *p, size_t n) { +#if USE_SSE +#if USE_AVX + __m256i a; + __m256i b; + __m256i c; + __m256i d; + __m256i u; + __m256i v; + __m256i w; __m256i y = _mm256_set1_epi8(0xff); __m256i z = _mm256_setzero_si256(); + #define BLOCK_SIZE 32 +#else + __m128i a; + __m128i b; + __m128i c; + __m128i d; + __m128i u; + __m128i v; + __m128i w; + __m128i z = _mm_setzero_si128(); + #define BLOCK_SIZE 16 +#endif - /* 128 bytes loop */ - while (n >= 128) { - a = _mm256_cmpeq_epi8 (_mm256_loadu_si256(as_m256c(p + 0)), z); - b = _mm256_cmpeq_epi8 (_mm256_loadu_si256(as_m256c(p + 32)), z); - c = _mm256_cmpeq_epi8 (_mm256_loadu_si256(as_m256c(p + 64)), z); - d = _mm256_cmpeq_epi8 (_mm256_loadu_si256(as_m256c(p + 96)), z); - u = _mm256_and_si256 (a, b); - v = _mm256_and_si256 (c, d); - w = _mm256_xor_si256 (v, y); +#if USE_AVX2 +#define _mm_load _mm256_load_si256 +#define _mm_and(a, b) _mm256_and_si256(a, b) +#define _mm_cmpeq(a, b) _mm256_cmpeq_epi8(a, b) +#define _mm_testinz(v) (!_mm256_testc_si256(v, y)) +#elif USE_AVX +#define _mm_load _mm256_load_si256 +#define _mm_and(a, b) _mm256_and_ps((__m256)a, (__m256)b) +#define _mm_cmpeq(a, b) _mm256_cmp_ps(a, b, _CMP_EQ_OQ) +#define _mm_testinz(v) (!_mm256_testc_si256(v, y)) +#else +#define _mm_load _mm_load_si128 +#define _mm_and(a, b) _mm_and_si128(a, b) +#define _mm_cmpeq(a, b) _mm_cmpeq_epi8(a, b) +#define _mm_testinz(v) (_mm_movemask_epi8(v) != 0xffff) +#endif + + /* multi-block loop */ + while (n >= BLOCK_SIZE * 4) { + a = _mm_load ((const void *)(p + BLOCK_SIZE * 0)); + b = _mm_load ((const void *)(p + BLOCK_SIZE * 1)); + c = _mm_load ((const void *)(p + BLOCK_SIZE * 2)); + d = _mm_load ((const void *)(p + BLOCK_SIZE * 3)); + a = _mm_cmpeq (a, z); + b = _mm_cmpeq (b, z); + c = _mm_cmpeq (c, z); + d = _mm_cmpeq (d, z); + u = _mm_and (a, b); + v = _mm_and (c, d); + w = _mm_and (u, v); /* test for zeros */ - if (!_mm256_testc_si256(u, w)) { + if (_mm_testinz(w)) { return 1; } /* move to next block */ - p += 128; - n -= 128; + p += BLOCK_SIZE * 4; + n -= BLOCK_SIZE * 4; } - /* 32 bytes loop */ - while (n >= 32) { - a = _mm256_loadu_si256 (as_m256c(p)); - b = _mm256_cmpeq_epi8 (a, z); + /* single block loop */ + while (n >= BLOCK_SIZE) { + a = _mm_load ((const void *)(p)); + b = _mm_cmpeq (a, z); /* test for zeros */ - if (!_mm256_testc_si256(b, y)) { + if (_mm_testinz(b)) { return 1; } /* move to next block */ - p += 32; - n -= 32; + p += BLOCK_SIZE; + n -= BLOCK_SIZE; } +#undef _mm_load +#undef _mm_cmpeq +#undef _mm_bitand +#undef _mm_testinz +#undef BLOCK_SIZE +#endif + /* 8 bytes loop */ while (n >= 8) { if (*(uint64_t *)p) { diff --git a/native/fastint.c b/native/fastint.c index dbe1ea3..7b34509 100644 --- a/native/fastint.c +++ b/native/fastint.c @@ -16,6 +16,8 @@ #include "native.h" +#if USE_SSE + static const char Digits[200] = { '0', '0', '0', '1', '0', '2', '0', '3', '0', '4', '0', '5', '0', '6', '0', '7', '0', '8', '0', '9', '1', '0', '1', '1', '1', '2', '1', '3', '1', '4', '1', '5', '1', '6', '1', '7', '1', '8', '1', '9', @@ -221,6 +223,8 @@ static inline int u64toa_xlarge_sse2(char *out, uint64_t val) { return n + 16; } +#endif + int i64toa(char *out, int64_t val) { if (likely(val >= 0)) { return u64toa(out, (uint64_t)val); @@ -230,6 +234,8 @@ int i64toa(char *out, int64_t val) { } } +#if USE_SSE + int u64toa(char *out, uint64_t val) { if (likely(val < 10000)) { return u32toa_small(out, (uint32_t)val); @@ -241,3 +247,29 @@ int u64toa(char *out, uint64_t val) { return u64toa_xlarge_sse2(out, val); } } + +#else + +int u64toa(char *out, uint64_t val) { + char c; + long n = 0; + uint64_t v = val; + + /* convert each digit */ + while (val) { + out[n++] = v % 10 + '0'; + v /= 10; + } + + /* reverse the output */ + for (long i = 0; i < n / 2; i++) { + c = out[i]; + out[i] = out[n - i - 1]; + out[n - i - 1] = c; + } + + /* all done */ + return n; +} + +#endif diff --git a/native/parsing.c b/native/parsing.c index eee87bd..55a828c 100644 --- a/native/parsing.c +++ b/native/parsing.c @@ -28,55 +28,60 @@ static const char _UnquoteTab[256] = { ['\\'] = '\\', }; -#define memcchr_p32_avx2() \ - while (n >= 32) { \ - u = _mm256_loadu_si256 ((const void *)s); \ - v = _mm256_cmpeq_epi8 (u, b); \ - _mm256_storeu_si256 ((void *)p, u); \ - \ - /* check for matches */ \ - if ((r = _mm256_movemask_epi8(v)) != 0) { \ - return s - q + _mm_tzcnt_64(r); \ - } \ - \ - /* move to the next 32 bytes */ \ - s += 32; \ - p += 32; \ - n -= 32; \ - } \ - -#define memcchr_p32_sse2() \ - if (n >= 16) { \ - x = _mm_loadu_si128 ((const void *)s); \ - y = _mm_cmpeq_epi8 (x, a); \ - _mm_storeu_si128 ((void *)p, x); \ - \ - /* check for matches */ \ - if ((r = _mm_movemask_epi8(y)) != 0) { \ - return s - q + _mm_tzcnt_64(r); \ - } \ - \ - /* move to the next 16 bytes */ \ - s += 16; \ - p += 16; \ - n -= 16; \ - } - static inline ssize_t memcchr_p32(const char *s, ssize_t nb, char *p) { - int32_t r; - __m128i x; - __m128i y; - __m256i u; - __m256i v; - __m128i a = _mm_set1_epi8('\\'); - __m256i b = _mm256_set1_epi8('\\'); + int64_t r; ssize_t n = nb; const char * q = s; - /* scan & copy with SIMD */ - memcchr_p32_avx2(); +#if USE_AVX2 + __m256i u; + __m256i v; + __m256i b = _mm256_set1_epi8('\\'); + + /* process every 32 bytes */ + while (n >= 32) { + u = _mm256_loadu_si256 ((const void *)s); + v = _mm256_cmpeq_epi8 (u, b); + _mm256_storeu_si256 ((void *)p, u); + + /* check for matches */ + if ((r = _mm256_movemask_epi8(v)) != 0) { + return s - q + __builtin_ctzll(r | (1ull << 32)); + } + + /* move to the next 32 bytes */ + s += 32; + p += 32; + n -= 32; + } +#endif + +#if USE_AVX2 _mm256_zeroupper(); - memcchr_p32_sse2(); +#endif + +#if USE_SSE + __m128i x; + __m128i y; + __m128i a = _mm_set1_epi8('\\'); + + /* process every 16 bytes */ + while (n >= 16) { + x = _mm_loadu_si128 ((const void *)s); + y = _mm_cmpeq_epi8 (x, a); + _mm_storeu_si128 ((void *)p, x); + + /* check for matches */ + if ((r = _mm_movemask_epi8(y)) != 0) { + return s - q + __builtin_ctzll(r | (1 << 16)); + } + + /* move to the next 16 bytes */ + s += 16; + p += 16; + n -= 16; + } +#endif /* remaining bytes, do with scalar code */ while (n--) { @@ -91,9 +96,6 @@ static inline ssize_t memcchr_p32(const char *s, ssize_t nb, char *p) { return -1; } -#undef memcchr_p32_avx2 -#undef memcchr_p32_sse2 - #define ALL_01h (~0ul / 255) #define ALL_7fh (ALL_01h * 127) #define ALL_80h (ALL_01h * 128) diff --git a/tools/asm2asm b/tools/asm2asm index 75fa4e6..38a8136 160000 --- a/tools/asm2asm +++ b/tools/asm2asm @@ -1 +1 @@ -Subproject commit 75fa4e6f2ee3ee4049724fe2a3806fbf119d0667 +Subproject commit 38a813682862252de3d1a016c90755467bff0ee9