mirror of
https://github.com/ii64/sonic.git
synced 2026-06-21 00:46:43 +08:00
fix: use sse instead of sse4 (#305)
* fix: use sse instead of sse4 * fix: use dispatch * fix: remove lzero Co-authored-by: liuqiang <liuqiang.06@bytedance.com>
This commit is contained in:
parent
5e54c02172
commit
56e81a633e
20 changed files with 8711 additions and 11320 deletions
16
Makefile
16
Makefile
|
|
@ -14,22 +14,22 @@
|
|||
# limitations under the License.
|
||||
#
|
||||
|
||||
ARCH := avx avx2 sse4
|
||||
ARCH := avx avx2 sse
|
||||
TMP_DIR := output
|
||||
OUT_DIR := internal/native
|
||||
SRC_FILE := native/native.c
|
||||
|
||||
CPU_avx := amd64
|
||||
CPU_avx2 := amd64
|
||||
CPU_sse4 := amd64
|
||||
CPU_sse := amd64
|
||||
|
||||
TMPL_avx := fastint_amd64_test fastfloat_amd64_test native_amd64_test native_export_amd64
|
||||
TMPL_avx2 := fastint_amd64_test fastfloat_amd64_test native_amd64_test native_export_amd64
|
||||
TMPL_sse4 := fastint_amd64_test fastfloat_amd64_test native_amd64_test native_export_amd64
|
||||
TMPL_sse := fastint_amd64_test fastfloat_amd64_test native_amd64_test native_export_amd64
|
||||
|
||||
CFLAGS_avx := -msse4 -mavx -mno-avx2 -DUSE_AVX=1 -DUSE_AVX2=0
|
||||
CFLAGS_avx2 := -msse4 -mavx -mavx2 -DUSE_AVX=1 -DUSE_AVX2=1
|
||||
CFLAGS_sse4 := -msse4 -mno-avx -mno-avx2
|
||||
CFLAGS_avx := -msse -mno-sse4 -mavx -mno-avx2 -DUSE_AVX=1 -DUSE_AVX2=0
|
||||
CFLAGS_avx2 := -msse -mno-sse4 -mavx -mavx2 -DUSE_AVX=1 -DUSE_AVX2=1
|
||||
CFLAGS_sse := -msse -mno-sse4 -mno-avx -mno-avx2
|
||||
|
||||
CC_amd64 := clang
|
||||
ASM2ASM_amd64 := tools/asm2asm/asm2asm.py
|
||||
|
|
@ -100,8 +100,8 @@ endef
|
|||
all: ${ARCH}
|
||||
|
||||
clean:
|
||||
rm -vfr ${TMP_DIR}/{sse4,avx,avx2}
|
||||
rm -vfr ${OUT_DIR}/{sse4,avx,avx2}
|
||||
rm -vfr ${TMP_DIR}/{sse,avx,avx2}
|
||||
rm -vfr ${OUT_DIR}/{sse,avx,avx2}
|
||||
|
||||
$(foreach \
|
||||
arch, \
|
||||
|
|
|
|||
|
|
@ -26,7 +26,7 @@ import (
|
|||
var (
|
||||
HasAVX = cpuid.CPU.Has(cpuid.AVX)
|
||||
HasAVX2 = cpuid.CPU.Has(cpuid.AVX2)
|
||||
HasSSE4 = cpuid.CPU.Has(cpuid.SSE4)
|
||||
HasSSE = cpuid.CPU.Has(cpuid.SSE)
|
||||
)
|
||||
|
||||
func init() {
|
||||
|
|
|
|||
|
|
@ -44,11 +44,6 @@ func __f64toa(out *byte, val float64) (ret int)
|
|||
//goland:noinspection GoUnusedParameter
|
||||
func __f32toa(out *byte, val float32) (ret int)
|
||||
|
||||
//go:nosplit
|
||||
//go:noescape
|
||||
//goland:noinspection GoUnusedParameter
|
||||
func __lzero(p unsafe.Pointer, n int) (ret int)
|
||||
|
||||
//go:nosplit
|
||||
//go:noescape
|
||||
//goland:noinspection GoUnusedParameter
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -9,34 +9,32 @@ package avx
|
|||
func __native_entry__() uintptr
|
||||
|
||||
var (
|
||||
_subr__f32toa = __native_entry__() + 24880
|
||||
_subr__f64toa = __native_entry__() + 720
|
||||
_subr__html_escape = __native_entry__() + 10768
|
||||
_subr__i64toa = __native_entry__() + 4464
|
||||
_subr__lspace = __native_entry__() + 320
|
||||
_subr__lzero = __native_entry__() + 16
|
||||
_subr__quote = __native_entry__() + 5840
|
||||
_subr__skip_array = __native_entry__() + 23152
|
||||
_subr__skip_number = __native_entry__() + 24624
|
||||
_subr__skip_object = __native_entry__() + 23200
|
||||
_subr__skip_one = __native_entry__() + 21280
|
||||
_subr__u64toa = __native_entry__() + 4576
|
||||
_subr__unquote = __native_entry__() + 7584
|
||||
_subr__validate_one = __native_entry__() + 24768
|
||||
_subr__value = __native_entry__() + 14016
|
||||
_subr__vnumber = __native_entry__() + 19024
|
||||
_subr__vsigned = __native_entry__() + 20576
|
||||
_subr__vstring = __native_entry__() + 16096
|
||||
_subr__vunsigned = __native_entry__() + 20928
|
||||
_subr__f32toa = __native_entry__() + 24592
|
||||
_subr__f64toa = __native_entry__() + 496
|
||||
_subr__html_escape = __native_entry__() + 10480
|
||||
_subr__i64toa = __native_entry__() + 4176
|
||||
_subr__lspace = __native_entry__() + 80
|
||||
_subr__quote = __native_entry__() + 5552
|
||||
_subr__skip_array = __native_entry__() + 22864
|
||||
_subr__skip_number = __native_entry__() + 24336
|
||||
_subr__skip_object = __native_entry__() + 22912
|
||||
_subr__skip_one = __native_entry__() + 20992
|
||||
_subr__u64toa = __native_entry__() + 4288
|
||||
_subr__unquote = __native_entry__() + 7296
|
||||
_subr__validate_one = __native_entry__() + 24480
|
||||
_subr__value = __native_entry__() + 13728
|
||||
_subr__vnumber = __native_entry__() + 18736
|
||||
_subr__vsigned = __native_entry__() + 20288
|
||||
_subr__vstring = __native_entry__() + 15808
|
||||
_subr__vunsigned = __native_entry__() + 20640
|
||||
)
|
||||
|
||||
const (
|
||||
_stack__f32toa = 728
|
||||
_stack__f64toa = 712
|
||||
_stack__f32toa = 64
|
||||
_stack__f64toa = 80
|
||||
_stack__html_escape = 64
|
||||
_stack__i64toa = 16
|
||||
_stack__lspace = 8
|
||||
_stack__lzero = 8
|
||||
_stack__quote = 80
|
||||
_stack__skip_array = 128
|
||||
_stack__skip_number = 72
|
||||
|
|
@ -58,7 +56,6 @@ var (
|
|||
_ = _subr__html_escape
|
||||
_ = _subr__i64toa
|
||||
_ = _subr__lspace
|
||||
_ = _subr__lzero
|
||||
_ = _subr__quote
|
||||
_ = _subr__skip_array
|
||||
_ = _subr__skip_number
|
||||
|
|
@ -80,7 +77,6 @@ const (
|
|||
_ = _stack__html_escape
|
||||
_ = _stack__i64toa
|
||||
_ = _stack__lspace
|
||||
_ = _stack__lzero
|
||||
_ = _stack__quote
|
||||
_ = _stack__skip_array
|
||||
_ = _stack__skip_number
|
||||
|
|
|
|||
|
|
@ -44,11 +44,6 @@ func __f64toa(out *byte, val float64) (ret int)
|
|||
//goland:noinspection GoUnusedParameter
|
||||
func __f32toa(out *byte, val float32) (ret int)
|
||||
|
||||
//go:nosplit
|
||||
//go:noescape
|
||||
//goland:noinspection GoUnusedParameter
|
||||
func __lzero(p unsafe.Pointer, n int) (ret int)
|
||||
|
||||
//go:nosplit
|
||||
//go:noescape
|
||||
//goland:noinspection GoUnusedParameter
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -9,34 +9,32 @@ package avx2
|
|||
func __native_entry__() uintptr
|
||||
|
||||
var (
|
||||
_subr__f32toa = __native_entry__() + 28752
|
||||
_subr__f64toa = __native_entry__() + 976
|
||||
_subr__html_escape = __native_entry__() + 12608
|
||||
_subr__i64toa = __native_entry__() + 4720
|
||||
_subr__lspace = __native_entry__() + 448
|
||||
_subr__lzero = __native_entry__() + 16
|
||||
_subr__quote = __native_entry__() + 6192
|
||||
_subr__skip_array = __native_entry__() + 26400
|
||||
_subr__skip_number = __native_entry__() + 28496
|
||||
_subr__skip_object = __native_entry__() + 26448
|
||||
_subr__skip_one = __native_entry__() + 24496
|
||||
_subr__u64toa = __native_entry__() + 4832
|
||||
_subr__unquote = __native_entry__() + 9136
|
||||
_subr__validate_one = __native_entry__() + 28640
|
||||
_subr__value = __native_entry__() + 17184
|
||||
_subr__vnumber = __native_entry__() + 22240
|
||||
_subr__vsigned = __native_entry__() + 23792
|
||||
_subr__vstring = __native_entry__() + 19568
|
||||
_subr__vunsigned = __native_entry__() + 24144
|
||||
_subr__f32toa = __native_entry__() + 28464
|
||||
_subr__f64toa = __native_entry__() + 752
|
||||
_subr__html_escape = __native_entry__() + 12320
|
||||
_subr__i64toa = __native_entry__() + 4432
|
||||
_subr__lspace = __native_entry__() + 224
|
||||
_subr__quote = __native_entry__() + 5904
|
||||
_subr__skip_array = __native_entry__() + 26112
|
||||
_subr__skip_number = __native_entry__() + 28208
|
||||
_subr__skip_object = __native_entry__() + 26160
|
||||
_subr__skip_one = __native_entry__() + 24208
|
||||
_subr__u64toa = __native_entry__() + 4544
|
||||
_subr__unquote = __native_entry__() + 8848
|
||||
_subr__validate_one = __native_entry__() + 28352
|
||||
_subr__value = __native_entry__() + 16896
|
||||
_subr__vnumber = __native_entry__() + 21952
|
||||
_subr__vsigned = __native_entry__() + 23504
|
||||
_subr__vstring = __native_entry__() + 19280
|
||||
_subr__vunsigned = __native_entry__() + 23856
|
||||
)
|
||||
|
||||
const (
|
||||
_stack__f32toa = 728
|
||||
_stack__f64toa = 712
|
||||
_stack__f32toa = 64
|
||||
_stack__f64toa = 80
|
||||
_stack__html_escape = 72
|
||||
_stack__i64toa = 16
|
||||
_stack__lspace = 8
|
||||
_stack__lzero = 8
|
||||
_stack__quote = 72
|
||||
_stack__skip_array = 136
|
||||
_stack__skip_number = 80
|
||||
|
|
@ -58,7 +56,6 @@ var (
|
|||
_ = _subr__html_escape
|
||||
_ = _subr__i64toa
|
||||
_ = _subr__lspace
|
||||
_ = _subr__lzero
|
||||
_ = _subr__quote
|
||||
_ = _subr__skip_array
|
||||
_ = _subr__skip_number
|
||||
|
|
@ -80,7 +77,6 @@ const (
|
|||
_ = _stack__html_escape
|
||||
_ = _stack__i64toa
|
||||
_ = _stack__lspace
|
||||
_ = _stack__lzero
|
||||
_ = _stack__quote
|
||||
_ = _stack__skip_array
|
||||
_ = _stack__skip_number
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ import (
|
|||
`github.com/bytedance/sonic/internal/cpu`
|
||||
`github.com/bytedance/sonic/internal/native/avx`
|
||||
`github.com/bytedance/sonic/internal/native/avx2`
|
||||
`github.com/bytedance/sonic/internal/native/sse4`
|
||||
`github.com/bytedance/sonic/internal/native/sse`
|
||||
`github.com/bytedance/sonic/internal/native/types`
|
||||
)
|
||||
|
||||
|
|
@ -134,23 +134,23 @@ func useAVX2() {
|
|||
S_skip_number = avx2.S_skip_number
|
||||
}
|
||||
|
||||
func useSSE4() {
|
||||
S_f64toa = sse4.S_f64toa
|
||||
S_f32toa = sse4.S_f32toa
|
||||
S_i64toa = sse4.S_i64toa
|
||||
S_u64toa = sse4.S_u64toa
|
||||
S_lspace = sse4.S_lspace
|
||||
S_quote = sse4.S_quote
|
||||
S_unquote = sse4.S_unquote
|
||||
S_value = sse4.S_value
|
||||
S_vstring = sse4.S_vstring
|
||||
S_vnumber = sse4.S_vnumber
|
||||
S_vsigned = sse4.S_vsigned
|
||||
S_vunsigned = sse4.S_vunsigned
|
||||
S_skip_one = sse4.S_skip_one
|
||||
S_skip_array = sse4.S_skip_array
|
||||
S_skip_object = sse4.S_skip_object
|
||||
S_skip_number = sse4.S_skip_number
|
||||
func useSSE() {
|
||||
S_f64toa = sse.S_f64toa
|
||||
S_f32toa = sse.S_f32toa
|
||||
S_i64toa = sse.S_i64toa
|
||||
S_u64toa = sse.S_u64toa
|
||||
S_lspace = sse.S_lspace
|
||||
S_quote = sse.S_quote
|
||||
S_unquote = sse.S_unquote
|
||||
S_value = sse.S_value
|
||||
S_vstring = sse.S_vstring
|
||||
S_vnumber = sse.S_vnumber
|
||||
S_vsigned = sse.S_vsigned
|
||||
S_vunsigned = sse.S_vunsigned
|
||||
S_skip_one = sse.S_skip_one
|
||||
S_skip_array = sse.S_skip_array
|
||||
S_skip_object = sse.S_skip_object
|
||||
S_skip_number = sse.S_skip_number
|
||||
}
|
||||
|
||||
func init() {
|
||||
|
|
@ -158,8 +158,8 @@ func init() {
|
|||
useAVX2()
|
||||
} else if cpu.HasAVX {
|
||||
useAVX()
|
||||
} else if cpu.HasSSE4 {
|
||||
useSSE4()
|
||||
} else if cpu.HasSSE {
|
||||
useSSE()
|
||||
} else {
|
||||
panic("Unsupported CPU, maybe it's too old to run Sonic.")
|
||||
}
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@ TEXT ·Quote(SB), NOSPLIT, $0 - 48
|
|||
CMPB github·com∕bytedance∕sonic∕internal∕cpu·HasAVX(SB), $0
|
||||
JE 2(PC)
|
||||
JMP github·com∕bytedance∕sonic∕internal∕native∕avx·__quote(SB)
|
||||
JMP github·com∕bytedance∕sonic∕internal∕native∕sse4·__quote(SB)
|
||||
JMP github·com∕bytedance∕sonic∕internal∕native∕sse·__quote(SB)
|
||||
|
||||
TEXT ·Unquote(SB), NOSPLIT, $0 - 48
|
||||
CMPB github·com∕bytedance∕sonic∕internal∕cpu·HasAVX2(SB), $0
|
||||
|
|
@ -34,7 +34,7 @@ TEXT ·Unquote(SB), NOSPLIT, $0 - 48
|
|||
CMPB github·com∕bytedance∕sonic∕internal∕cpu·HasAVX(SB), $0
|
||||
JE 2(PC)
|
||||
JMP github·com∕bytedance∕sonic∕internal∕native∕avx·__unquote(SB)
|
||||
JMP github·com∕bytedance∕sonic∕internal∕native∕sse4·__unquote(SB)
|
||||
JMP github·com∕bytedance∕sonic∕internal∕native∕sse·__unquote(SB)
|
||||
|
||||
TEXT ·HTMLEscape(SB), NOSPLIT, $0 - 40
|
||||
CMPB github·com∕bytedance∕sonic∕internal∕cpu·HasAVX2(SB), $0
|
||||
|
|
@ -43,7 +43,7 @@ TEXT ·HTMLEscape(SB), NOSPLIT, $0 - 40
|
|||
CMPB github·com∕bytedance∕sonic∕internal∕cpu·HasAVX(SB), $0
|
||||
JE 2(PC)
|
||||
JMP github·com∕bytedance∕sonic∕internal∕native∕avx·__html_escape(SB)
|
||||
JMP github·com∕bytedance∕sonic∕internal∕native∕sse4·__html_escape(SB)
|
||||
JMP github·com∕bytedance∕sonic∕internal∕native∕sse·__html_escape(SB)
|
||||
|
||||
|
||||
TEXT ·Value(SB), NOSPLIT, $0 - 48
|
||||
|
|
@ -53,7 +53,7 @@ TEXT ·Value(SB), NOSPLIT, $0 - 48
|
|||
CMPB github·com∕bytedance∕sonic∕internal∕cpu·HasAVX(SB), $0
|
||||
JE 2(PC)
|
||||
JMP github·com∕bytedance∕sonic∕internal∕native∕avx·__value(SB)
|
||||
JMP github·com∕bytedance∕sonic∕internal∕native∕sse4·__value(SB)
|
||||
JMP github·com∕bytedance∕sonic∕internal∕native∕sse·__value(SB)
|
||||
|
||||
TEXT ·SkipOne(SB), NOSPLIT, $0 - 40
|
||||
CMPB github·com∕bytedance∕sonic∕internal∕cpu·HasAVX2(SB), $0
|
||||
|
|
@ -62,7 +62,7 @@ TEXT ·SkipOne(SB), NOSPLIT, $0 - 40
|
|||
CMPB github·com∕bytedance∕sonic∕internal∕cpu·HasAVX(SB), $0
|
||||
JE 2(PC)
|
||||
JMP github·com∕bytedance∕sonic∕internal∕native∕avx·__skip_one(SB)
|
||||
JMP github·com∕bytedance∕sonic∕internal∕native∕sse4·__skip_one(SB)
|
||||
JMP github·com∕bytedance∕sonic∕internal∕native∕sse·__skip_one(SB)
|
||||
|
||||
TEXT ·ValidateOne(SB), NOSPLIT, $0 - 32
|
||||
CMPB github·com∕bytedance∕sonic∕internal∕cpu·HasAVX2(SB), $0
|
||||
|
|
@ -71,7 +71,7 @@ TEXT ·ValidateOne(SB), NOSPLIT, $0 - 32
|
|||
CMPB github·com∕bytedance∕sonic∕internal∕cpu·HasAVX(SB), $0
|
||||
JE 2(PC)
|
||||
JMP github·com∕bytedance∕sonic∕internal∕native∕avx·__validate_one(SB)
|
||||
JMP github·com∕bytedance∕sonic∕internal∕native∕sse4·__validate_one(SB)
|
||||
JMP github·com∕bytedance∕sonic∕internal∕native∕sse·__validate_one(SB)
|
||||
|
||||
TEXT ·I64toa(SB), NOSPLIT, $0 - 32
|
||||
CMPB github·com∕bytedance∕sonic∕internal∕cpu·HasAVX2(SB), $0
|
||||
|
|
@ -80,7 +80,7 @@ TEXT ·I64toa(SB), NOSPLIT, $0 - 32
|
|||
CMPB github·com∕bytedance∕sonic∕internal∕cpu·HasAVX(SB), $0
|
||||
JE 2(PC)
|
||||
JMP github·com∕bytedance∕sonic∕internal∕native∕avx·__i64toa(SB)
|
||||
JMP github·com∕bytedance∕sonic∕internal∕native∕sse4·__i64toa(SB)
|
||||
JMP github·com∕bytedance∕sonic∕internal∕native∕sse·__i64toa(SB)
|
||||
|
||||
TEXT ·U64toa(SB), NOSPLIT, $0 - 32
|
||||
CMPB github·com∕bytedance∕sonic∕internal∕cpu·HasAVX2(SB), $0
|
||||
|
|
@ -89,5 +89,5 @@ TEXT ·U64toa(SB), NOSPLIT, $0 - 32
|
|||
CMPB github·com∕bytedance∕sonic∕internal∕cpu·HasAVX(SB), $0
|
||||
JE 2(PC)
|
||||
JMP github·com∕bytedance∕sonic∕internal∕native∕avx·__u64toa(SB)
|
||||
JMP github·com∕bytedance∕sonic∕internal∕native∕sse4·__u64toa(SB)
|
||||
JMP github·com∕bytedance∕sonic∕internal∕native∕sse·__u64toa(SB)
|
||||
|
||||
|
|
|
|||
|
|
@ -42,11 +42,6 @@ func __f64toa(out *byte, val float64) (ret int)
|
|||
//goland:noinspection GoUnusedParameter
|
||||
func __f32toa(out *byte, val float32) (ret int)
|
||||
|
||||
//go:nosplit
|
||||
//go:noescape
|
||||
//goland:noinspection GoUnusedParameter
|
||||
func __lzero(p unsafe.Pointer, n int) (ret int)
|
||||
|
||||
//go:nosplit
|
||||
//go:noescape
|
||||
//goland:noinspection GoUnusedParameter
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package sse4
|
||||
package sse
|
||||
|
||||
import (
|
||||
`math`
|
||||
|
|
@ -16,7 +16,7 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package sse4
|
||||
package sse
|
||||
|
||||
import (
|
||||
`strconv`
|
||||
|
|
@ -16,7 +16,7 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package sse4
|
||||
package sse
|
||||
|
||||
import (
|
||||
`unsafe`
|
||||
|
|
@ -44,11 +44,6 @@ func __f64toa(out *byte, val float64) (ret int)
|
|||
//goland:noinspection GoUnusedParameter
|
||||
func __f32toa(out *byte, val float32) (ret int)
|
||||
|
||||
//go:nosplit
|
||||
//go:noescape
|
||||
//goland:noinspection GoUnusedParameter
|
||||
func __lzero(p unsafe.Pointer, n int) (ret int)
|
||||
|
||||
//go:nosplit
|
||||
//go:noescape
|
||||
//goland:noinspection GoUnusedParameter
|
||||
File diff suppressed because it is too large
Load diff
|
|
@ -16,7 +16,7 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package sse4
|
||||
package sse
|
||||
|
||||
import (
|
||||
`encoding/hex`
|
||||
|
|
@ -16,7 +16,7 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package sse4
|
||||
package sse
|
||||
|
||||
var (
|
||||
S_f64toa = _subr__f64toa
|
||||
|
|
@ -1,7 +1,7 @@
|
|||
// +build !noasm !appengine
|
||||
// Code generated by asm2asm, DO NOT EDIT.
|
||||
|
||||
package sse4
|
||||
package sse
|
||||
|
||||
//go:nosplit
|
||||
//go:noescape
|
||||
|
|
@ -9,34 +9,32 @@ package sse4
|
|||
func __native_entry__() uintptr
|
||||
|
||||
var (
|
||||
_subr__f32toa = __native_entry__() + 24896
|
||||
_subr__f64toa = __native_entry__() + 640
|
||||
_subr__html_escape = __native_entry__() + 10672
|
||||
_subr__i64toa = __native_entry__() + 4304
|
||||
_subr__lspace = __native_entry__() + 256
|
||||
_subr__lzero = __native_entry__() + 16
|
||||
_subr__quote = __native_entry__() + 5712
|
||||
_subr__skip_array = __native_entry__() + 23184
|
||||
_subr__skip_number = __native_entry__() + 24688
|
||||
_subr__skip_object = __native_entry__() + 23232
|
||||
_subr__skip_one = __native_entry__() + 21312
|
||||
_subr__u64toa = __native_entry__() + 4432
|
||||
_subr__unquote = __native_entry__() + 7488
|
||||
_subr__validate_one = __native_entry__() + 24832
|
||||
_subr__value = __native_entry__() + 13936
|
||||
_subr__vnumber = __native_entry__() + 19056
|
||||
_subr__vsigned = __native_entry__() + 20608
|
||||
_subr__vstring = __native_entry__() + 16016
|
||||
_subr__vunsigned = __native_entry__() + 20960
|
||||
_subr__f32toa = __native_entry__() + 24640
|
||||
_subr__f64toa = __native_entry__() + 464
|
||||
_subr__html_escape = __native_entry__() + 10416
|
||||
_subr__i64toa = __native_entry__() + 4048
|
||||
_subr__lspace = __native_entry__() + 80
|
||||
_subr__quote = __native_entry__() + 5456
|
||||
_subr__skip_array = __native_entry__() + 22928
|
||||
_subr__skip_number = __native_entry__() + 24432
|
||||
_subr__skip_object = __native_entry__() + 22976
|
||||
_subr__skip_one = __native_entry__() + 21056
|
||||
_subr__u64toa = __native_entry__() + 4176
|
||||
_subr__unquote = __native_entry__() + 7232
|
||||
_subr__validate_one = __native_entry__() + 24576
|
||||
_subr__value = __native_entry__() + 13680
|
||||
_subr__vnumber = __native_entry__() + 18800
|
||||
_subr__vsigned = __native_entry__() + 20352
|
||||
_subr__vstring = __native_entry__() + 15760
|
||||
_subr__vunsigned = __native_entry__() + 20704
|
||||
)
|
||||
|
||||
const (
|
||||
_stack__f32toa = 632
|
||||
_stack__f64toa = 616
|
||||
_stack__f32toa = 64
|
||||
_stack__f64toa = 80
|
||||
_stack__html_escape = 64
|
||||
_stack__i64toa = 16
|
||||
_stack__lspace = 8
|
||||
_stack__lzero = 8
|
||||
_stack__quote = 80
|
||||
_stack__skip_array = 128
|
||||
_stack__skip_number = 72
|
||||
|
|
@ -58,7 +56,6 @@ var (
|
|||
_ = _subr__html_escape
|
||||
_ = _subr__i64toa
|
||||
_ = _subr__lspace
|
||||
_ = _subr__lzero
|
||||
_ = _subr__quote
|
||||
_ = _subr__skip_array
|
||||
_ = _subr__skip_number
|
||||
|
|
@ -80,7 +77,6 @@ const (
|
|||
_ = _stack__html_escape
|
||||
_ = _stack__i64toa
|
||||
_ = _stack__lspace
|
||||
_ = _stack__lzero
|
||||
_ = _stack__quote
|
||||
_ = _stack__skip_array
|
||||
_ = _stack__skip_number
|
||||
|
|
@ -16,84 +16,6 @@
|
|||
|
||||
#include "native.h"
|
||||
|
||||
static inline int is_zero_sse(__m128i v) {
|
||||
return _mm_testz_si128(v, v);
|
||||
}
|
||||
|
||||
#if USE_AVX
|
||||
static inline int is_zero_avx(__m256i v) {
|
||||
return _mm256_testz_si256(v, v);
|
||||
}
|
||||
#endif
|
||||
|
||||
size_t lzero(const char *sp, size_t nb) {
|
||||
size_t n = nb;
|
||||
const char * p = sp;
|
||||
|
||||
#if USE_AVX
|
||||
/* 32-byte loop */
|
||||
while (n >= 32) {
|
||||
if (!is_zero_avx(_mm256_loadu_si256((const void *)p))) {
|
||||
_mm256_zeroupper();
|
||||
return 1;
|
||||
} else {
|
||||
p += 32;
|
||||
n -= 32;
|
||||
}
|
||||
}
|
||||
|
||||
/* clear upper half to avoid AVX-SSE transition penalty */
|
||||
_mm256_zeroupper();
|
||||
#endif
|
||||
|
||||
/* 16-byte loop */
|
||||
while (n >= 16) {
|
||||
if (!is_zero_sse(_mm_loadu_si128((const void *)p))) {
|
||||
return 1;
|
||||
} else {
|
||||
p += 16;
|
||||
n -= 16;
|
||||
}
|
||||
}
|
||||
|
||||
/* 8-byte test */
|
||||
if (n >= 8) {
|
||||
if (*(uint64_t *)p) {
|
||||
return 1;
|
||||
} else {
|
||||
p += 8;
|
||||
n -= 8;
|
||||
}
|
||||
}
|
||||
|
||||
/* 4-byte test */
|
||||
if (n >= 4) {
|
||||
if (*(uint32_t *)p) {
|
||||
return 1;
|
||||
} else {
|
||||
p += 4;
|
||||
n -= 4;
|
||||
}
|
||||
}
|
||||
|
||||
/* 2-byte test */
|
||||
if (n >= 2) {
|
||||
if (*(uint16_t *)p) {
|
||||
return 1;
|
||||
} else {
|
||||
p += 2;
|
||||
n -= 2;
|
||||
}
|
||||
}
|
||||
|
||||
/* the final byte */
|
||||
if (n == 0) {
|
||||
return 0;
|
||||
} else {
|
||||
return *p != 0;
|
||||
}
|
||||
}
|
||||
|
||||
#if USE_AVX2
|
||||
static const uintptr_t ALIGN_MASK = 31;
|
||||
#else
|
||||
|
|
|
|||
|
|
@ -101,7 +101,6 @@ int f64toa(char *out, double val);
|
|||
int i64toa(char *out, int64_t val);
|
||||
int u64toa(char *out, uint64_t val);
|
||||
|
||||
size_t lzero(const char *sp, size_t nb);
|
||||
size_t lspace(const char *sp, size_t nb, size_t p);
|
||||
|
||||
ssize_t quote(const char *sp, ssize_t nb, char *dp, ssize_t *dn, uint64_t flags);
|
||||
|
|
|
|||
Loading…
Reference in a new issue