2
0
Fork 0
mirror of https://github.com/ii64/sonic.git synced 2026-06-21 00:46:43 +08:00

fix: use sse instead of sse4 (#305)

* fix: use sse instead of sse4

* fix: use dispatch

* fix: remove lzero

Co-authored-by: liuqiang <liuqiang.06@bytedance.com>
This commit is contained in:
liu 2022-09-26 12:45:01 +08:00 committed by GitHub
parent 5e54c02172
commit 56e81a633e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
20 changed files with 8711 additions and 11320 deletions

View file

@ -14,22 +14,22 @@
# limitations under the License. # limitations under the License.
# #
ARCH := avx avx2 sse4 ARCH := avx avx2 sse
TMP_DIR := output TMP_DIR := output
OUT_DIR := internal/native OUT_DIR := internal/native
SRC_FILE := native/native.c SRC_FILE := native/native.c
CPU_avx := amd64 CPU_avx := amd64
CPU_avx2 := amd64 CPU_avx2 := amd64
CPU_sse4 := amd64 CPU_sse := amd64
TMPL_avx := fastint_amd64_test fastfloat_amd64_test native_amd64_test native_export_amd64 TMPL_avx := fastint_amd64_test fastfloat_amd64_test native_amd64_test native_export_amd64
TMPL_avx2 := fastint_amd64_test fastfloat_amd64_test native_amd64_test native_export_amd64 TMPL_avx2 := fastint_amd64_test fastfloat_amd64_test native_amd64_test native_export_amd64
TMPL_sse4 := fastint_amd64_test fastfloat_amd64_test native_amd64_test native_export_amd64 TMPL_sse := fastint_amd64_test fastfloat_amd64_test native_amd64_test native_export_amd64
CFLAGS_avx := -msse4 -mavx -mno-avx2 -DUSE_AVX=1 -DUSE_AVX2=0 CFLAGS_avx := -msse -mno-sse4 -mavx -mno-avx2 -DUSE_AVX=1 -DUSE_AVX2=0
CFLAGS_avx2 := -msse4 -mavx -mavx2 -DUSE_AVX=1 -DUSE_AVX2=1 CFLAGS_avx2 := -msse -mno-sse4 -mavx -mavx2 -DUSE_AVX=1 -DUSE_AVX2=1
CFLAGS_sse4 := -msse4 -mno-avx -mno-avx2 CFLAGS_sse := -msse -mno-sse4 -mno-avx -mno-avx2
CC_amd64 := clang CC_amd64 := clang
ASM2ASM_amd64 := tools/asm2asm/asm2asm.py ASM2ASM_amd64 := tools/asm2asm/asm2asm.py
@ -100,8 +100,8 @@ endef
all: ${ARCH} all: ${ARCH}
clean: clean:
rm -vfr ${TMP_DIR}/{sse4,avx,avx2} rm -vfr ${TMP_DIR}/{sse,avx,avx2}
rm -vfr ${OUT_DIR}/{sse4,avx,avx2} rm -vfr ${OUT_DIR}/{sse,avx,avx2}
$(foreach \ $(foreach \
arch, \ arch, \

View file

@ -26,7 +26,7 @@ import (
var ( var (
HasAVX = cpuid.CPU.Has(cpuid.AVX) HasAVX = cpuid.CPU.Has(cpuid.AVX)
HasAVX2 = cpuid.CPU.Has(cpuid.AVX2) HasAVX2 = cpuid.CPU.Has(cpuid.AVX2)
HasSSE4 = cpuid.CPU.Has(cpuid.SSE4) HasSSE = cpuid.CPU.Has(cpuid.SSE)
) )
func init() { func init() {

View file

@ -44,11 +44,6 @@ func __f64toa(out *byte, val float64) (ret int)
//goland:noinspection GoUnusedParameter //goland:noinspection GoUnusedParameter
func __f32toa(out *byte, val float32) (ret int) func __f32toa(out *byte, val float32) (ret int)
//go:nosplit
//go:noescape
//goland:noinspection GoUnusedParameter
func __lzero(p unsafe.Pointer, n int) (ret int)
//go:nosplit //go:nosplit
//go:noescape //go:noescape
//goland:noinspection GoUnusedParameter //goland:noinspection GoUnusedParameter

File diff suppressed because it is too large Load diff

View file

@ -9,34 +9,32 @@ package avx
func __native_entry__() uintptr func __native_entry__() uintptr
var ( var (
_subr__f32toa = __native_entry__() + 24880 _subr__f32toa = __native_entry__() + 24592
_subr__f64toa = __native_entry__() + 720 _subr__f64toa = __native_entry__() + 496
_subr__html_escape = __native_entry__() + 10768 _subr__html_escape = __native_entry__() + 10480
_subr__i64toa = __native_entry__() + 4464 _subr__i64toa = __native_entry__() + 4176
_subr__lspace = __native_entry__() + 320 _subr__lspace = __native_entry__() + 80
_subr__lzero = __native_entry__() + 16 _subr__quote = __native_entry__() + 5552
_subr__quote = __native_entry__() + 5840 _subr__skip_array = __native_entry__() + 22864
_subr__skip_array = __native_entry__() + 23152 _subr__skip_number = __native_entry__() + 24336
_subr__skip_number = __native_entry__() + 24624 _subr__skip_object = __native_entry__() + 22912
_subr__skip_object = __native_entry__() + 23200 _subr__skip_one = __native_entry__() + 20992
_subr__skip_one = __native_entry__() + 21280 _subr__u64toa = __native_entry__() + 4288
_subr__u64toa = __native_entry__() + 4576 _subr__unquote = __native_entry__() + 7296
_subr__unquote = __native_entry__() + 7584 _subr__validate_one = __native_entry__() + 24480
_subr__validate_one = __native_entry__() + 24768 _subr__value = __native_entry__() + 13728
_subr__value = __native_entry__() + 14016 _subr__vnumber = __native_entry__() + 18736
_subr__vnumber = __native_entry__() + 19024 _subr__vsigned = __native_entry__() + 20288
_subr__vsigned = __native_entry__() + 20576 _subr__vstring = __native_entry__() + 15808
_subr__vstring = __native_entry__() + 16096 _subr__vunsigned = __native_entry__() + 20640
_subr__vunsigned = __native_entry__() + 20928
) )
const ( const (
_stack__f32toa = 728 _stack__f32toa = 64
_stack__f64toa = 712 _stack__f64toa = 80
_stack__html_escape = 64 _stack__html_escape = 64
_stack__i64toa = 16 _stack__i64toa = 16
_stack__lspace = 8 _stack__lspace = 8
_stack__lzero = 8
_stack__quote = 80 _stack__quote = 80
_stack__skip_array = 128 _stack__skip_array = 128
_stack__skip_number = 72 _stack__skip_number = 72
@ -58,7 +56,6 @@ var (
_ = _subr__html_escape _ = _subr__html_escape
_ = _subr__i64toa _ = _subr__i64toa
_ = _subr__lspace _ = _subr__lspace
_ = _subr__lzero
_ = _subr__quote _ = _subr__quote
_ = _subr__skip_array _ = _subr__skip_array
_ = _subr__skip_number _ = _subr__skip_number
@ -80,7 +77,6 @@ const (
_ = _stack__html_escape _ = _stack__html_escape
_ = _stack__i64toa _ = _stack__i64toa
_ = _stack__lspace _ = _stack__lspace
_ = _stack__lzero
_ = _stack__quote _ = _stack__quote
_ = _stack__skip_array _ = _stack__skip_array
_ = _stack__skip_number _ = _stack__skip_number

View file

@ -44,11 +44,6 @@ func __f64toa(out *byte, val float64) (ret int)
//goland:noinspection GoUnusedParameter //goland:noinspection GoUnusedParameter
func __f32toa(out *byte, val float32) (ret int) func __f32toa(out *byte, val float32) (ret int)
//go:nosplit
//go:noescape
//goland:noinspection GoUnusedParameter
func __lzero(p unsafe.Pointer, n int) (ret int)
//go:nosplit //go:nosplit
//go:noescape //go:noescape
//goland:noinspection GoUnusedParameter //goland:noinspection GoUnusedParameter

File diff suppressed because it is too large Load diff

View file

@ -9,34 +9,32 @@ package avx2
func __native_entry__() uintptr func __native_entry__() uintptr
var ( var (
_subr__f32toa = __native_entry__() + 28752 _subr__f32toa = __native_entry__() + 28464
_subr__f64toa = __native_entry__() + 976 _subr__f64toa = __native_entry__() + 752
_subr__html_escape = __native_entry__() + 12608 _subr__html_escape = __native_entry__() + 12320
_subr__i64toa = __native_entry__() + 4720 _subr__i64toa = __native_entry__() + 4432
_subr__lspace = __native_entry__() + 448 _subr__lspace = __native_entry__() + 224
_subr__lzero = __native_entry__() + 16 _subr__quote = __native_entry__() + 5904
_subr__quote = __native_entry__() + 6192 _subr__skip_array = __native_entry__() + 26112
_subr__skip_array = __native_entry__() + 26400 _subr__skip_number = __native_entry__() + 28208
_subr__skip_number = __native_entry__() + 28496 _subr__skip_object = __native_entry__() + 26160
_subr__skip_object = __native_entry__() + 26448 _subr__skip_one = __native_entry__() + 24208
_subr__skip_one = __native_entry__() + 24496 _subr__u64toa = __native_entry__() + 4544
_subr__u64toa = __native_entry__() + 4832 _subr__unquote = __native_entry__() + 8848
_subr__unquote = __native_entry__() + 9136 _subr__validate_one = __native_entry__() + 28352
_subr__validate_one = __native_entry__() + 28640 _subr__value = __native_entry__() + 16896
_subr__value = __native_entry__() + 17184 _subr__vnumber = __native_entry__() + 21952
_subr__vnumber = __native_entry__() + 22240 _subr__vsigned = __native_entry__() + 23504
_subr__vsigned = __native_entry__() + 23792 _subr__vstring = __native_entry__() + 19280
_subr__vstring = __native_entry__() + 19568 _subr__vunsigned = __native_entry__() + 23856
_subr__vunsigned = __native_entry__() + 24144
) )
const ( const (
_stack__f32toa = 728 _stack__f32toa = 64
_stack__f64toa = 712 _stack__f64toa = 80
_stack__html_escape = 72 _stack__html_escape = 72
_stack__i64toa = 16 _stack__i64toa = 16
_stack__lspace = 8 _stack__lspace = 8
_stack__lzero = 8
_stack__quote = 72 _stack__quote = 72
_stack__skip_array = 136 _stack__skip_array = 136
_stack__skip_number = 80 _stack__skip_number = 80
@ -58,7 +56,6 @@ var (
_ = _subr__html_escape _ = _subr__html_escape
_ = _subr__i64toa _ = _subr__i64toa
_ = _subr__lspace _ = _subr__lspace
_ = _subr__lzero
_ = _subr__quote _ = _subr__quote
_ = _subr__skip_array _ = _subr__skip_array
_ = _subr__skip_number _ = _subr__skip_number
@ -80,7 +77,6 @@ const (
_ = _stack__html_escape _ = _stack__html_escape
_ = _stack__i64toa _ = _stack__i64toa
_ = _stack__lspace _ = _stack__lspace
_ = _stack__lzero
_ = _stack__quote _ = _stack__quote
_ = _stack__skip_array _ = _stack__skip_array
_ = _stack__skip_number _ = _stack__skip_number

View file

@ -22,7 +22,7 @@ import (
`github.com/bytedance/sonic/internal/cpu` `github.com/bytedance/sonic/internal/cpu`
`github.com/bytedance/sonic/internal/native/avx` `github.com/bytedance/sonic/internal/native/avx`
`github.com/bytedance/sonic/internal/native/avx2` `github.com/bytedance/sonic/internal/native/avx2`
`github.com/bytedance/sonic/internal/native/sse4` `github.com/bytedance/sonic/internal/native/sse`
`github.com/bytedance/sonic/internal/native/types` `github.com/bytedance/sonic/internal/native/types`
) )
@ -134,23 +134,23 @@ func useAVX2() {
S_skip_number = avx2.S_skip_number S_skip_number = avx2.S_skip_number
} }
func useSSE4() { func useSSE() {
S_f64toa = sse4.S_f64toa S_f64toa = sse.S_f64toa
S_f32toa = sse4.S_f32toa S_f32toa = sse.S_f32toa
S_i64toa = sse4.S_i64toa S_i64toa = sse.S_i64toa
S_u64toa = sse4.S_u64toa S_u64toa = sse.S_u64toa
S_lspace = sse4.S_lspace S_lspace = sse.S_lspace
S_quote = sse4.S_quote S_quote = sse.S_quote
S_unquote = sse4.S_unquote S_unquote = sse.S_unquote
S_value = sse4.S_value S_value = sse.S_value
S_vstring = sse4.S_vstring S_vstring = sse.S_vstring
S_vnumber = sse4.S_vnumber S_vnumber = sse.S_vnumber
S_vsigned = sse4.S_vsigned S_vsigned = sse.S_vsigned
S_vunsigned = sse4.S_vunsigned S_vunsigned = sse.S_vunsigned
S_skip_one = sse4.S_skip_one S_skip_one = sse.S_skip_one
S_skip_array = sse4.S_skip_array S_skip_array = sse.S_skip_array
S_skip_object = sse4.S_skip_object S_skip_object = sse.S_skip_object
S_skip_number = sse4.S_skip_number S_skip_number = sse.S_skip_number
} }
func init() { func init() {
@ -158,8 +158,8 @@ func init() {
useAVX2() useAVX2()
} else if cpu.HasAVX { } else if cpu.HasAVX {
useAVX() useAVX()
} else if cpu.HasSSE4 { } else if cpu.HasSSE {
useSSE4() useSSE()
} else { } else {
panic("Unsupported CPU, maybe it's too old to run Sonic.") panic("Unsupported CPU, maybe it's too old to run Sonic.")
} }

View file

@ -25,7 +25,7 @@ TEXT ·Quote(SB), NOSPLIT, $0 - 48
CMPB github·combytedancesonicinternalcpu·HasAVX(SB), $0 CMPB github·combytedancesonicinternalcpu·HasAVX(SB), $0
JE 2(PC) JE 2(PC)
JMP github·combytedancesonicinternalnativeavx·__quote(SB) JMP github·combytedancesonicinternalnativeavx·__quote(SB)
JMP github·combytedancesonicinternalnativesse4·__quote(SB) JMP github·combytedancesonicinternalnativesse·__quote(SB)
TEXT ·Unquote(SB), NOSPLIT, $0 - 48 TEXT ·Unquote(SB), NOSPLIT, $0 - 48
CMPB github·combytedancesonicinternalcpu·HasAVX2(SB), $0 CMPB github·combytedancesonicinternalcpu·HasAVX2(SB), $0
@ -34,7 +34,7 @@ TEXT ·Unquote(SB), NOSPLIT, $0 - 48
CMPB github·combytedancesonicinternalcpu·HasAVX(SB), $0 CMPB github·combytedancesonicinternalcpu·HasAVX(SB), $0
JE 2(PC) JE 2(PC)
JMP github·combytedancesonicinternalnativeavx·__unquote(SB) JMP github·combytedancesonicinternalnativeavx·__unquote(SB)
JMP github·combytedancesonicinternalnativesse4·__unquote(SB) JMP github·combytedancesonicinternalnativesse·__unquote(SB)
TEXT ·HTMLEscape(SB), NOSPLIT, $0 - 40 TEXT ·HTMLEscape(SB), NOSPLIT, $0 - 40
CMPB github·combytedancesonicinternalcpu·HasAVX2(SB), $0 CMPB github·combytedancesonicinternalcpu·HasAVX2(SB), $0
@ -43,7 +43,7 @@ TEXT ·HTMLEscape(SB), NOSPLIT, $0 - 40
CMPB github·combytedancesonicinternalcpu·HasAVX(SB), $0 CMPB github·combytedancesonicinternalcpu·HasAVX(SB), $0
JE 2(PC) JE 2(PC)
JMP github·combytedancesonicinternalnativeavx·__html_escape(SB) JMP github·combytedancesonicinternalnativeavx·__html_escape(SB)
JMP github·combytedancesonicinternalnativesse4·__html_escape(SB) JMP github·combytedancesonicinternalnativesse·__html_escape(SB)
TEXT ·Value(SB), NOSPLIT, $0 - 48 TEXT ·Value(SB), NOSPLIT, $0 - 48
@ -53,7 +53,7 @@ TEXT ·Value(SB), NOSPLIT, $0 - 48
CMPB github·combytedancesonicinternalcpu·HasAVX(SB), $0 CMPB github·combytedancesonicinternalcpu·HasAVX(SB), $0
JE 2(PC) JE 2(PC)
JMP github·combytedancesonicinternalnativeavx·__value(SB) JMP github·combytedancesonicinternalnativeavx·__value(SB)
JMP github·combytedancesonicinternalnativesse4·__value(SB) JMP github·combytedancesonicinternalnativesse·__value(SB)
TEXT ·SkipOne(SB), NOSPLIT, $0 - 40 TEXT ·SkipOne(SB), NOSPLIT, $0 - 40
CMPB github·combytedancesonicinternalcpu·HasAVX2(SB), $0 CMPB github·combytedancesonicinternalcpu·HasAVX2(SB), $0
@ -62,7 +62,7 @@ TEXT ·SkipOne(SB), NOSPLIT, $0 - 40
CMPB github·combytedancesonicinternalcpu·HasAVX(SB), $0 CMPB github·combytedancesonicinternalcpu·HasAVX(SB), $0
JE 2(PC) JE 2(PC)
JMP github·combytedancesonicinternalnativeavx·__skip_one(SB) JMP github·combytedancesonicinternalnativeavx·__skip_one(SB)
JMP github·combytedancesonicinternalnativesse4·__skip_one(SB) JMP github·combytedancesonicinternalnativesse·__skip_one(SB)
TEXT ·ValidateOne(SB), NOSPLIT, $0 - 32 TEXT ·ValidateOne(SB), NOSPLIT, $0 - 32
CMPB github·combytedancesonicinternalcpu·HasAVX2(SB), $0 CMPB github·combytedancesonicinternalcpu·HasAVX2(SB), $0
@ -71,7 +71,7 @@ TEXT ·ValidateOne(SB), NOSPLIT, $0 - 32
CMPB github·combytedancesonicinternalcpu·HasAVX(SB), $0 CMPB github·combytedancesonicinternalcpu·HasAVX(SB), $0
JE 2(PC) JE 2(PC)
JMP github·combytedancesonicinternalnativeavx·__validate_one(SB) JMP github·combytedancesonicinternalnativeavx·__validate_one(SB)
JMP github·combytedancesonicinternalnativesse4·__validate_one(SB) JMP github·combytedancesonicinternalnativesse·__validate_one(SB)
TEXT ·I64toa(SB), NOSPLIT, $0 - 32 TEXT ·I64toa(SB), NOSPLIT, $0 - 32
CMPB github·combytedancesonicinternalcpu·HasAVX2(SB), $0 CMPB github·combytedancesonicinternalcpu·HasAVX2(SB), $0
@ -80,7 +80,7 @@ TEXT ·I64toa(SB), NOSPLIT, $0 - 32
CMPB github·combytedancesonicinternalcpu·HasAVX(SB), $0 CMPB github·combytedancesonicinternalcpu·HasAVX(SB), $0
JE 2(PC) JE 2(PC)
JMP github·combytedancesonicinternalnativeavx·__i64toa(SB) JMP github·combytedancesonicinternalnativeavx·__i64toa(SB)
JMP github·combytedancesonicinternalnativesse4·__i64toa(SB) JMP github·combytedancesonicinternalnativesse·__i64toa(SB)
TEXT ·U64toa(SB), NOSPLIT, $0 - 32 TEXT ·U64toa(SB), NOSPLIT, $0 - 32
CMPB github·combytedancesonicinternalcpu·HasAVX2(SB), $0 CMPB github·combytedancesonicinternalcpu·HasAVX2(SB), $0
@ -89,5 +89,5 @@ TEXT ·U64toa(SB), NOSPLIT, $0 - 32
CMPB github·combytedancesonicinternalcpu·HasAVX(SB), $0 CMPB github·combytedancesonicinternalcpu·HasAVX(SB), $0
JE 2(PC) JE 2(PC)
JMP github·combytedancesonicinternalnativeavx·__u64toa(SB) JMP github·combytedancesonicinternalnativeavx·__u64toa(SB)
JMP github·combytedancesonicinternalnativesse4·__u64toa(SB) JMP github·combytedancesonicinternalnativesse·__u64toa(SB)

View file

@ -42,11 +42,6 @@ func __f64toa(out *byte, val float64) (ret int)
//goland:noinspection GoUnusedParameter //goland:noinspection GoUnusedParameter
func __f32toa(out *byte, val float32) (ret int) func __f32toa(out *byte, val float32) (ret int)
//go:nosplit
//go:noescape
//goland:noinspection GoUnusedParameter
func __lzero(p unsafe.Pointer, n int) (ret int)
//go:nosplit //go:nosplit
//go:noescape //go:noescape
//goland:noinspection GoUnusedParameter //goland:noinspection GoUnusedParameter

View file

@ -16,7 +16,7 @@
* limitations under the License. * limitations under the License.
*/ */
package sse4 package sse
import ( import (
`math` `math`

View file

@ -16,7 +16,7 @@
* limitations under the License. * limitations under the License.
*/ */
package sse4 package sse
import ( import (
`strconv` `strconv`

View file

@ -16,7 +16,7 @@
* limitations under the License. * limitations under the License.
*/ */
package sse4 package sse
import ( import (
`unsafe` `unsafe`
@ -44,11 +44,6 @@ func __f64toa(out *byte, val float64) (ret int)
//goland:noinspection GoUnusedParameter //goland:noinspection GoUnusedParameter
func __f32toa(out *byte, val float32) (ret int) func __f32toa(out *byte, val float32) (ret int)
//go:nosplit
//go:noescape
//goland:noinspection GoUnusedParameter
func __lzero(p unsafe.Pointer, n int) (ret int)
//go:nosplit //go:nosplit
//go:noescape //go:noescape
//goland:noinspection GoUnusedParameter //goland:noinspection GoUnusedParameter

View file

@ -16,7 +16,7 @@
* limitations under the License. * limitations under the License.
*/ */
package sse4 package sse
import ( import (
`encoding/hex` `encoding/hex`

View file

@ -16,7 +16,7 @@
* limitations under the License. * limitations under the License.
*/ */
package sse4 package sse
var ( var (
S_f64toa = _subr__f64toa S_f64toa = _subr__f64toa

View file

@ -1,7 +1,7 @@
// +build !noasm !appengine // +build !noasm !appengine
// Code generated by asm2asm, DO NOT EDIT. // Code generated by asm2asm, DO NOT EDIT.
package sse4 package sse
//go:nosplit //go:nosplit
//go:noescape //go:noescape
@ -9,34 +9,32 @@ package sse4
func __native_entry__() uintptr func __native_entry__() uintptr
var ( var (
_subr__f32toa = __native_entry__() + 24896 _subr__f32toa = __native_entry__() + 24640
_subr__f64toa = __native_entry__() + 640 _subr__f64toa = __native_entry__() + 464
_subr__html_escape = __native_entry__() + 10672 _subr__html_escape = __native_entry__() + 10416
_subr__i64toa = __native_entry__() + 4304 _subr__i64toa = __native_entry__() + 4048
_subr__lspace = __native_entry__() + 256 _subr__lspace = __native_entry__() + 80
_subr__lzero = __native_entry__() + 16 _subr__quote = __native_entry__() + 5456
_subr__quote = __native_entry__() + 5712 _subr__skip_array = __native_entry__() + 22928
_subr__skip_array = __native_entry__() + 23184 _subr__skip_number = __native_entry__() + 24432
_subr__skip_number = __native_entry__() + 24688 _subr__skip_object = __native_entry__() + 22976
_subr__skip_object = __native_entry__() + 23232 _subr__skip_one = __native_entry__() + 21056
_subr__skip_one = __native_entry__() + 21312 _subr__u64toa = __native_entry__() + 4176
_subr__u64toa = __native_entry__() + 4432 _subr__unquote = __native_entry__() + 7232
_subr__unquote = __native_entry__() + 7488 _subr__validate_one = __native_entry__() + 24576
_subr__validate_one = __native_entry__() + 24832 _subr__value = __native_entry__() + 13680
_subr__value = __native_entry__() + 13936 _subr__vnumber = __native_entry__() + 18800
_subr__vnumber = __native_entry__() + 19056 _subr__vsigned = __native_entry__() + 20352
_subr__vsigned = __native_entry__() + 20608 _subr__vstring = __native_entry__() + 15760
_subr__vstring = __native_entry__() + 16016 _subr__vunsigned = __native_entry__() + 20704
_subr__vunsigned = __native_entry__() + 20960
) )
const ( const (
_stack__f32toa = 632 _stack__f32toa = 64
_stack__f64toa = 616 _stack__f64toa = 80
_stack__html_escape = 64 _stack__html_escape = 64
_stack__i64toa = 16 _stack__i64toa = 16
_stack__lspace = 8 _stack__lspace = 8
_stack__lzero = 8
_stack__quote = 80 _stack__quote = 80
_stack__skip_array = 128 _stack__skip_array = 128
_stack__skip_number = 72 _stack__skip_number = 72
@ -58,7 +56,6 @@ var (
_ = _subr__html_escape _ = _subr__html_escape
_ = _subr__i64toa _ = _subr__i64toa
_ = _subr__lspace _ = _subr__lspace
_ = _subr__lzero
_ = _subr__quote _ = _subr__quote
_ = _subr__skip_array _ = _subr__skip_array
_ = _subr__skip_number _ = _subr__skip_number
@ -80,7 +77,6 @@ const (
_ = _stack__html_escape _ = _stack__html_escape
_ = _stack__i64toa _ = _stack__i64toa
_ = _stack__lspace _ = _stack__lspace
_ = _stack__lzero
_ = _stack__quote _ = _stack__quote
_ = _stack__skip_array _ = _stack__skip_array
_ = _stack__skip_number _ = _stack__skip_number

View file

@ -16,84 +16,6 @@
#include "native.h" #include "native.h"
static inline int is_zero_sse(__m128i v) {
return _mm_testz_si128(v, v);
}
#if USE_AVX
static inline int is_zero_avx(__m256i v) {
return _mm256_testz_si256(v, v);
}
#endif
size_t lzero(const char *sp, size_t nb) {
size_t n = nb;
const char * p = sp;
#if USE_AVX
/* 32-byte loop */
while (n >= 32) {
if (!is_zero_avx(_mm256_loadu_si256((const void *)p))) {
_mm256_zeroupper();
return 1;
} else {
p += 32;
n -= 32;
}
}
/* clear upper half to avoid AVX-SSE transition penalty */
_mm256_zeroupper();
#endif
/* 16-byte loop */
while (n >= 16) {
if (!is_zero_sse(_mm_loadu_si128((const void *)p))) {
return 1;
} else {
p += 16;
n -= 16;
}
}
/* 8-byte test */
if (n >= 8) {
if (*(uint64_t *)p) {
return 1;
} else {
p += 8;
n -= 8;
}
}
/* 4-byte test */
if (n >= 4) {
if (*(uint32_t *)p) {
return 1;
} else {
p += 4;
n -= 4;
}
}
/* 2-byte test */
if (n >= 2) {
if (*(uint16_t *)p) {
return 1;
} else {
p += 2;
n -= 2;
}
}
/* the final byte */
if (n == 0) {
return 0;
} else {
return *p != 0;
}
}
#if USE_AVX2 #if USE_AVX2
static const uintptr_t ALIGN_MASK = 31; static const uintptr_t ALIGN_MASK = 31;
#else #else

View file

@ -101,7 +101,6 @@ int f64toa(char *out, double val);
int i64toa(char *out, int64_t val); int i64toa(char *out, int64_t val);
int u64toa(char *out, uint64_t val); int u64toa(char *out, uint64_t val);
size_t lzero(const char *sp, size_t nb);
size_t lspace(const char *sp, size_t nb, size_t p); size_t lspace(const char *sp, size_t nb, size_t p);
ssize_t quote(const char *sp, ssize_t nb, char *dp, ssize_t *dn, uint64_t flags); ssize_t quote(const char *sp, ssize_t nb, char *dp, ssize_t *dn, uint64_t flags);