mirror of
https://github.com/ii64/sonic.git
synced 2026-06-21 00:46:43 +08:00
feat: performance optimizations
This commit is contained in:
parent
8fe152d152
commit
5cb6f17944
29 changed files with 8937 additions and 7635 deletions
4
Makefile
4
Makefile
|
|
@ -25,8 +25,8 @@ CPU_avx2 := amd64
|
|||
TMPL_avx := fastint_amd64_test fastfloat_amd64_test native_amd64_test native_export_amd64
|
||||
TMPL_avx2 := fastint_amd64_test fastfloat_amd64_test native_amd64_test native_export_amd64
|
||||
|
||||
CFLAGS_avx := -msse2 -mavx -mno-avx2 -DUSE_SSE=1 -DUSE_AVX=1 -DUSE_AVX2=0
|
||||
CFLAGS_avx2 := -msse2 -mavx -mavx2 -DUSE_SSE=1 -DUSE_AVX=1 -DUSE_AVX2=1
|
||||
CFLAGS_avx := -msse4 -mavx -mno-avx2 -DUSE_AVX=1 -DUSE_AVX2=0
|
||||
CFLAGS_avx2 := -msse4 -mavx -mavx2 -DUSE_AVX=1 -DUSE_AVX2=1
|
||||
|
||||
CC_amd64 := clang
|
||||
ASM2ASM_amd64 := tools/asm2asm/asm2asm.py
|
||||
|
|
|
|||
|
|
@ -80,6 +80,10 @@ const (
|
|||
_IM_alse = 0x65736c61 // 'alse' ('false' without the 'f')
|
||||
)
|
||||
|
||||
const (
|
||||
_BM_space = (1 << ' ') | (1 << '\t') | (1 << '\r') | (1 << '\n')
|
||||
)
|
||||
|
||||
const (
|
||||
_LB_error = "_error"
|
||||
_LB_im_error = "_im_error"
|
||||
|
|
@ -1318,15 +1322,34 @@ func (self *_Assembler) _asm_OP_unmarshal_text_p(p *_Instr) {
|
|||
}
|
||||
|
||||
func (self *_Assembler) _asm_OP_lspace(_ *_Instr) {
|
||||
self.Emit("MOVQ" , _IP, _DI) // MOVQ IP, DI
|
||||
self.Emit("MOVQ" , _IL, _SI) // MOVQ IL, SI
|
||||
self.Emit("MOVQ" , _IC, _DX) // MOVQ IC, DX
|
||||
self.call(_F_lspace) // CALL lspace
|
||||
self.Emit("TESTQ", _AX, _AX) // TESTQ AX, AX
|
||||
self.Sjmp("JS" , _LB_parsing_error_v) // JS _parsing_error_v
|
||||
self.Emit("CMPQ" , _AX, _IL) // CMPQ AX, IL
|
||||
self.Sjmp("JAE" , _LB_eof_error) // JAE _eof_error
|
||||
self.Emit("MOVQ" , _AX, _IC) // MOVQ AX, IC
|
||||
self.Emit("CMPQ" , _IC, _IL) // CMPQ IC, IL
|
||||
self.Sjmp("JAE" , _LB_eof_error) // JAE _eof_error
|
||||
self.Emit("MOVQ" , jit.Imm(_BM_space), _DX) // MOVQ _BM_space, DX
|
||||
self.Emit("MOVBQZX", jit.Sib(_IP, _IC, 1, 0), _AX) // MOVBQZX (IP)(IC), AX
|
||||
self.Emit("BTQ" , _AX, _DX) // BTQ AX, DX
|
||||
self.Sjmp("JNC" , "_nospace_{n}") // JNC _nospace_{n}
|
||||
|
||||
/* test up to 4 characters */
|
||||
for i := 0; i < 3; i++ {
|
||||
self.Emit("ADDQ" , jit.Imm(1), _IC) // ADDQ $1, IC
|
||||
self.Emit("CMPQ" , _IC, _IL) // CMPQ IC, IL
|
||||
self.Sjmp("JAE" , _LB_eof_error) // JAE _eof_error
|
||||
self.Emit("MOVBQZX", jit.Sib(_IP, _IC, 1, 0), _AX) // MOVBQZX (IP)(IC), AX
|
||||
self.Emit("BTQ" , _AX, _DX) // BTQ AX, DX
|
||||
self.Sjmp("JNC" , "_nospace_{n}") // JNC _nospace_{n}
|
||||
}
|
||||
|
||||
/* handle over to the native function */
|
||||
self.Emit("MOVQ" , _IP, _DI) // MOVQ IP, DI
|
||||
self.Emit("MOVQ" , _IL, _SI) // MOVQ IL, SI
|
||||
self.Emit("MOVQ" , _IC, _DX) // MOVQ IC, DX
|
||||
self.call(_F_lspace) // CALL lspace
|
||||
self.Emit("TESTQ" , _AX, _AX) // TESTQ AX, AX
|
||||
self.Sjmp("JS" , _LB_parsing_error_v) // JS _parsing_error_v
|
||||
self.Emit("CMPQ" , _AX, _IL) // CMPQ AX, IL
|
||||
self.Sjmp("JAE" , _LB_eof_error) // JAE _eof_error
|
||||
self.Emit("MOVQ" , _AX, _IC) // MOVQ AX, IC
|
||||
self.Link("_nospace_{n}") // _nospace_{n}:
|
||||
}
|
||||
|
||||
func (self *_Assembler) _asm_OP_match_char(p *_Instr) {
|
||||
|
|
|
|||
|
|
@ -119,10 +119,6 @@ const (
|
|||
_S_vmask = (1 << _S_val) | (1 << _S_arr_0)
|
||||
)
|
||||
|
||||
const (
|
||||
_X_space = (1 << ' ') | (1 << '\t') | (1 << '\r') | (1 << '\n')
|
||||
)
|
||||
|
||||
const (
|
||||
_A_init_len = 1
|
||||
_A_init_cap = 16
|
||||
|
|
@ -206,11 +202,11 @@ func (self *_ValueDecoder) compile() {
|
|||
self.Emit("TESTQ", _AX, _AX) // TESTQ AX, AX
|
||||
self.Sjmp("JS" , "_return") // JS _return
|
||||
|
||||
/* fast path: no-space or 1-space cases */
|
||||
/* fast path: test up to 4 characters manually */
|
||||
self.Emit("CMPQ" , _IC, _IL) // CMPQ IC, IL
|
||||
self.Sjmp("JAE" , "_decode_V_EOF") // JAE _decode_V_EOF
|
||||
self.Emit("MOVBQZX", jit.Sib(_IP, _IC, 1, 0), _AX) // MOVBQZX (IP)(IC), AX
|
||||
self.Emit("MOVQ" , jit.Imm(_X_space), _DX) // MOVQ _X_space, DX
|
||||
self.Emit("MOVQ" , jit.Imm(_BM_space), _DX) // MOVQ _BM_space, DX
|
||||
self.Emit("BTQ" , _AX, _DX) // BTQ _AX, _DX
|
||||
self.Sjmp("JNC" , "_decode_fast") // JNC _decode_fast
|
||||
|
||||
|
|
@ -219,6 +215,33 @@ func (self *_ValueDecoder) compile() {
|
|||
self.Emit("CMPQ" , _IC, _IL) // CMPQ IC, IL
|
||||
self.Sjmp("JAE" , "_decode_V_EOF") // JAE _decode_V_EOF
|
||||
self.Emit("MOVBQZX", jit.Sib(_IP, _IC, 1, 0), _AX) // MOVBQZX (IP)(IC), AX
|
||||
self.Emit("MOVQ" , jit.Imm(_BM_space), _DX) // MOVQ _BM_space, DX
|
||||
self.Emit("BTQ" , _AX, _DX) // BTQ _AX, _DX
|
||||
self.Sjmp("JNC" , "_decode_fast") // JNC _decode_fast
|
||||
|
||||
/* 2-space case */
|
||||
self.Emit("ADDQ" , jit.Imm(1), _IC) // ADDQ $1, IC
|
||||
self.Emit("CMPQ" , _IC, _IL) // CMPQ IC, IL
|
||||
self.Sjmp("JAE" , "_decode_V_EOF") // JAE _decode_V_EOF
|
||||
self.Emit("MOVBQZX", jit.Sib(_IP, _IC, 1, 0), _AX) // MOVBQZX (IP)(IC), AX
|
||||
self.Emit("MOVQ" , jit.Imm(_BM_space), _DX) // MOVQ _BM_space, DX
|
||||
self.Emit("BTQ" , _AX, _DX) // BTQ _AX, _DX
|
||||
self.Sjmp("JNC" , "_decode_fast") // JNC _decode_fast
|
||||
|
||||
/* 3-space case */
|
||||
self.Emit("ADDQ" , jit.Imm(1), _IC) // ADDQ $1, IC
|
||||
self.Emit("CMPQ" , _IC, _IL) // CMPQ IC, IL
|
||||
self.Sjmp("JAE" , "_decode_V_EOF") // JAE _decode_V_EOF
|
||||
self.Emit("MOVBQZX", jit.Sib(_IP, _IC, 1, 0), _AX) // MOVBQZX (IP)(IC), AX
|
||||
self.Emit("MOVQ" , jit.Imm(_BM_space), _DX) // MOVQ _BM_space, DX
|
||||
self.Emit("BTQ" , _AX, _DX) // BTQ _AX, _DX
|
||||
self.Sjmp("JNC" , "_decode_fast") // JNC _decode_fast
|
||||
|
||||
/* 4-space case */
|
||||
self.Emit("ADDQ" , jit.Imm(1), _IC) // ADDQ $1, IC
|
||||
self.Emit("CMPQ" , _IC, _IL) // CMPQ IC, IL
|
||||
self.Sjmp("JAE" , "_decode_V_EOF") // JAE _decode_V_EOF
|
||||
self.Emit("MOVBQZX", jit.Sib(_IP, _IC, 1, 0), _AX) // MOVBQZX (IP)(IC), AX
|
||||
|
||||
/* fast path: use lookup table to select decoder */
|
||||
self.Link("_decode_fast") // _decode_fast:
|
||||
|
|
@ -232,14 +255,14 @@ func (self *_ValueDecoder) compile() {
|
|||
self.Rjmp("JMP" , _AX) // JMP AX
|
||||
|
||||
/* decode with native decoder */
|
||||
self.Link("_decode_native") // _decode_native:
|
||||
self.Emit("MOVQ", _IP, _DI) // MOVQ IP, DI
|
||||
self.Emit("MOVQ", _IL, _SI) // MOVQ IL, SI
|
||||
self.Emit("MOVQ", _IC, _DX) // MOVQ IC, DX
|
||||
self.Emit("LEAQ", _VAR_ss, _CX) // LEAQ ss, CX
|
||||
self.Emit("MOVL", jit.Imm(1), _R8) // MOVL $1, R8
|
||||
self.call(_F_value) // CALL value
|
||||
self.Emit("MOVQ", _AX, _IC) // MOVQ AX, IC
|
||||
self.Link("_decode_native") // _decode_native:
|
||||
self.Emit("MOVQ", _IP, _DI) // MOVQ IP, DI
|
||||
self.Emit("MOVQ", _IL, _SI) // MOVQ IL, SI
|
||||
self.Emit("MOVQ", _IC, _DX) // MOVQ IC, DX
|
||||
self.Emit("LEAQ", _VAR_ss, _CX) // LEAQ ss, CX
|
||||
self.Emit("MOVL", jit.Imm(1), _R8) // MOVL $1, R8
|
||||
self.call(_F_value) // CALL value
|
||||
self.Emit("MOVQ", _AX, _IC) // MOVQ AX, IC
|
||||
|
||||
/* check for errors */
|
||||
self.Emit("MOVQ" , _VAR_ss_Vt, _AX) // MOVQ ss.Vt, AX
|
||||
|
|
@ -565,6 +588,7 @@ func (self *_ValueDecoder) compile() {
|
|||
/* return from decoder */
|
||||
self.Link("_return") // _return:
|
||||
self.Emit("XORL", _EP, _EP) // XORL EP, EP
|
||||
self.Emit("MOVQ", _EP, jit.Ptr(_ST, _ST_Vp)) // MOVQ EP, ST.Vp[0]
|
||||
self.Link("_epilogue") // _epilogue:
|
||||
self.Emit("SUBQ", jit.Imm(_FsmOffset), _ST) // SUBQ _FsmOffset, _ST
|
||||
self.Emit("MOVQ", jit.Ptr(_SP, _VD_offs), _BP) // MOVQ _VD_offs(SP), BP
|
||||
|
|
|
|||
|
|
@ -23,8 +23,8 @@ import (
|
|||
`sync`
|
||||
`unsafe`
|
||||
|
||||
`github.com/bytedance/sonic/internal/cpu`
|
||||
`github.com/bytedance/sonic/internal/jit`
|
||||
`github.com/bytedance/sonic/internal/native/types`
|
||||
`github.com/twitchyliquid64/golang-asm/obj`
|
||||
`github.com/twitchyliquid64/golang-asm/obj/x86`
|
||||
|
||||
|
|
@ -68,13 +68,14 @@ const (
|
|||
)
|
||||
|
||||
const (
|
||||
_FP_args = 40 // 40 bytes for passing arguments to this function
|
||||
_FP_fargs = 64 // 64 bytes for passing arguments to other Go functions
|
||||
_FP_saves = 64 // 64 bytes for saving the registers before CALL instructions
|
||||
_FP_args = 40 // 40 bytes for passing arguments to this function
|
||||
_FP_fargs = 64 // 64 bytes for passing arguments to other Go functions
|
||||
_FP_saves = 64 // 64 bytes for saving the registers before CALL instructions
|
||||
_FP_locals = 16 // 16 bytes for local variables
|
||||
)
|
||||
|
||||
const (
|
||||
_FP_offs = _FP_fargs + _FP_saves
|
||||
_FP_offs = _FP_fargs + _FP_saves + _FP_locals
|
||||
_FP_size = _FP_offs + 8 // 8 bytes for the parent frame pointer
|
||||
_FP_base = _FP_size + 8 // 8 bytes for the return address
|
||||
)
|
||||
|
|
@ -118,11 +119,6 @@ var (
|
|||
var (
|
||||
_X0 = jit.Reg("X0")
|
||||
_Y0 = jit.Reg("Y0")
|
||||
_Y1 = jit.Reg("Y1")
|
||||
_Y2 = jit.Reg("Y2")
|
||||
_Y3 = jit.Reg("Y3")
|
||||
_Y4 = jit.Reg("Y4")
|
||||
_Y5 = jit.Reg("Y5")
|
||||
)
|
||||
|
||||
var (
|
||||
|
|
@ -156,6 +152,11 @@ var (
|
|||
_RET_ep = jit.Ptr(_SP, _FP_base + 32)
|
||||
)
|
||||
|
||||
var (
|
||||
_VAR_sp = jit.Ptr(_SP, _FP_fargs + _FP_saves)
|
||||
_VAR_dn = jit.Ptr(_SP, _FP_fargs + _FP_saves + 8)
|
||||
)
|
||||
|
||||
var (
|
||||
_REG_ffi = []obj.Addr{_RP, _RL, _RC}
|
||||
_REG_enc = []obj.Addr{_ST, _SP_x, _SP_f, _SP_p, _SP_q}
|
||||
|
|
@ -380,13 +381,17 @@ func (self *_Assembler) check_size_rl(v obj.Addr) {
|
|||
|
||||
/* check for buffer capacity */
|
||||
self.x++
|
||||
self.Emit("LEAQ", v, _AX) // LEAQ $v, AX
|
||||
self.Emit("CMPQ", _AX, _RC) // CMPQ AX, RC
|
||||
self.Sjmp("JBE" , key) // JBE _more_space_return_{n}
|
||||
self.Emit("LEAQ", v, _AX) // LEAQ $v, AX
|
||||
self.Emit("CMPQ", _AX, _RC) // CMPQ AX, RC
|
||||
self.Sjmp("JBE" , key) // JBE _more_space_return_{n}
|
||||
self.slice_grow_ax(key) // GROW $key
|
||||
self.Link(key) // _more_space_return_{n}:
|
||||
}
|
||||
|
||||
func (self *_Assembler) slice_grow_ax(ret string) {
|
||||
self.Byte(0x4c, 0x8d, 0x0d) // LEAQ ?(PC), R9
|
||||
self.Sref(key, 4) // .... &key
|
||||
self.Sref(ret, 4) // .... &ret
|
||||
self.Sjmp("JMP" , _LB_more_space) // JMP _more_space
|
||||
self.Link(key) // _more_space_return_{n}:
|
||||
}
|
||||
|
||||
/** State Stack Helpers **/
|
||||
|
|
@ -433,6 +438,11 @@ func (self *_Assembler) add_long(ch uint32, n int64) {
|
|||
self.Emit("ADDQ", jit.Imm(n), _RL) // ADDQ $n, RL
|
||||
}
|
||||
|
||||
func (self *_Assembler) add_text(ss string) {
|
||||
self.store_str(ss) // TEXT $ss
|
||||
self.Emit("ADDQ", jit.Imm(int64(len(ss))), _RL) // ADDQ ${len(ss)}, RL
|
||||
}
|
||||
|
||||
func (self *_Assembler) prep_buffer() {
|
||||
self.Emit("MOVQ", _ARG_rb, _AX) // MOVQ rb<>+0(FP), AX
|
||||
self.Emit("MOVQ", _RL, jit.Ptr(_AX, 8)) // MOVQ RL, 8(AX)
|
||||
|
|
@ -602,7 +612,16 @@ func (self *_Assembler) error_nan_or_infinite() {
|
|||
|
||||
/** String Encoding Routine **/
|
||||
|
||||
func (self *_Assembler) open_quote(doubleQuote bool) {
|
||||
var (
|
||||
_F_quote = jit.Imm(int64(native.S_quote))
|
||||
)
|
||||
|
||||
func (self *_Assembler) encode_string(doubleQuote bool) {
|
||||
self.Emit("MOVQ" , jit.Ptr(_SP_p, 8), _AX) // MOVQ 8(SP.p), AX
|
||||
self.Emit("TESTQ", _AX, _AX) // TESTQ AX, AX
|
||||
self.Sjmp("JZ" , "_str_empty_{n}") // JZ _str_empty_{n}
|
||||
|
||||
/* openning quote, check for double quote */
|
||||
if !doubleQuote {
|
||||
self.check_size_r(_AX, 2) // SIZE $2
|
||||
self.add_char('"') // CHAR $'"'
|
||||
|
|
@ -610,46 +629,69 @@ func (self *_Assembler) open_quote(doubleQuote bool) {
|
|||
self.check_size_r(_AX, 6) // SIZE $6
|
||||
self.add_long(_IM_open, 3) // TEXT $`"\"`
|
||||
}
|
||||
}
|
||||
|
||||
func (self *_Assembler) close_quote(doubleQuote bool) {
|
||||
/* quoting loop */
|
||||
self.Emit("XORL", _AX, _AX) // XORL AX, AX
|
||||
self.Emit("MOVQ", _AX, _VAR_sp) // MOVQ AX, sp
|
||||
self.Link("_str_loop_{n}") // _str_loop_{n}:
|
||||
self.save_c() // SAVE $REG_ffi
|
||||
|
||||
/* load the output buffer first, and then input buffer,
|
||||
* because the parameter registers collide with RP / RL / RC */
|
||||
self.Emit("MOVQ", _RC, _CX) // MOVQ RC, CX
|
||||
self.Emit("SUBQ", _RL, _CX) // SUBQ RL, CX
|
||||
self.Emit("MOVQ", _CX, _VAR_dn) // MOVQ CX, dn
|
||||
self.Emit("LEAQ", jit.Sib(_RP, _RL, 1, 0), _DX) // LEAQ (RP)(RL), DX
|
||||
self.Emit("LEAQ", _VAR_dn, _CX) // LEAQ dn, CX
|
||||
self.Emit("MOVQ", _VAR_sp, _AX) // MOVQ sp, AX
|
||||
self.Emit("MOVQ", jit.Ptr(_SP_p, 0), _DI) // MOVQ (SP.p), DI
|
||||
self.Emit("MOVQ", jit.Ptr(_SP_p, 8), _SI) // MOVQ 8(SP.p), SI
|
||||
self.Emit("ADDQ", _AX, _DI) // ADDQ AX, DI
|
||||
self.Emit("SUBQ", _AX, _SI) // SUBQ AX, SI
|
||||
|
||||
/* set the flags based on `doubleQuote` */
|
||||
if !doubleQuote {
|
||||
self.check_size(1) // SIZE $1
|
||||
self.Link("_str_end_{n}") // _str_end_{n}:
|
||||
self.add_char('"') // CHAR $'"'
|
||||
self.Emit("XORL", _R8, _R8) // XORL R8, R8
|
||||
} else {
|
||||
self.Emit("MOVL", jit.Imm(types.F_DOUBLE_UNQUOTE), _R8) // MOVL ${types.F_DOUBLE_UNQUOTE}, R8
|
||||
}
|
||||
|
||||
/* call the native quoter */
|
||||
self.call_c(_F_quote) // CALL quote
|
||||
self.Emit("ADDQ" , _VAR_dn, _RL) // ADDQ dn, RL
|
||||
self.Emit("TESTQ", _AX, _AX) // TESTQ AX, AX
|
||||
self.Sjmp("JS" , "_str_space_{n}") // JS _str_space_{n}
|
||||
|
||||
/* close the string, check for double quote */
|
||||
if !doubleQuote {
|
||||
self.check_size(1) // SIZE $1
|
||||
self.add_char('"') // CHAR $'"'
|
||||
self.Sjmp("JMP", "_str_end_{n}") // JMP _str_end_{n}
|
||||
} else {
|
||||
self.check_size(3) // SIZE $3
|
||||
self.Link("_str_end_{n}") // _str_end_{n}:
|
||||
self.store_str(`\""`) // TEXT $`\""`
|
||||
self.Emit("ADDQ", jit.Imm(3), _RL) // ADDQ $3, RL
|
||||
self.add_text("\\\"\"") // TEXT $'\""'
|
||||
self.Sjmp("JMP", "_str_end_{n}") // JMP _str_end_{n}
|
||||
}
|
||||
}
|
||||
|
||||
func (self *_Assembler) encode_string(fn obj.Addr, doubleQuote bool) {
|
||||
self.Emit("MOVQ" , jit.Ptr(_SP_p, 8), _AX) // MOVQ 8(SP.p), AX
|
||||
self.open_quote(doubleQuote) // QOPEN $doubleQuote
|
||||
self.Emit("CMPQ" , jit.Ptr(_SP_p, 8), jit.Imm(0)) // CMPQ 8(SP.p), $0
|
||||
self.Sjmp("JE" , "_str_end_{n}") // JE _str_end_{n}
|
||||
self.save_c() // SAVE $REG_ffi
|
||||
self.Emit("MOVQ" , _SP_p, _DI) // MOVQ SP.p, DI
|
||||
self.Emit("XORL" , _SI, _SI) // XORL SI, SI
|
||||
self.call_c(_F_lquote) // CALL lquote
|
||||
self.Emit("CMPQ" , _AX, jit.Ptr(_SP_p, 8)) // CMPQ AX, 8(SP.p)
|
||||
self.Sjmp("JNE" , "_str_quote_{n}") // JNE _str_quote_{n}
|
||||
self.Emit("LEAQ" , jit.Sib(_RP, _RL, 1, 0), _AX) // LEAQ (RP)(RL), AX
|
||||
self.Emit("ADDQ" , jit.Ptr(_SP_p, 8), _RL) // ADDQ 8(SP.p), RL
|
||||
self.Emit("MOVQ" , _AX, jit.Ptr(_SP, 0)) // MOVQ AX, 0(SP)
|
||||
self.Emit("MOVOU", jit.Ptr(_SP_p, 0), _X0) // MOVOU (SP.p), X0
|
||||
self.Emit("MOVOU", _X0, jit.Ptr(_SP, 8)) // MOVOU X0, 8(SP)
|
||||
self.call_go(_F_memmove) // CALL_GO memmove
|
||||
self.Sjmp("JMP" , "_str_end_{n}") // JMP _str_end_{n}
|
||||
self.Link("_str_quote_{n}") // _str_quote_{n}:
|
||||
self.Emit("MOVQ" , _AX, jit.Ptr(_SP, 8)) // MOVQ AX, 8(SP)
|
||||
self.prep_buffer() // MOVE {buf}, (SP)
|
||||
self.Emit("MOVOU", jit.Ptr(_SP_p, 0), _X0) // MOVOU (SP.p), X0
|
||||
self.Emit("MOVOU", _X0, jit.Ptr(_SP, 16)) // MOVOU X0, 16(SP)
|
||||
self.call_encoder(fn) // CALL $fn
|
||||
self.close_quote(doubleQuote) // QCLOSE $doubleQuote
|
||||
/* not enough space to contain the quoted string */
|
||||
self.Link("_str_space_{n}") // _str_space_{n}:
|
||||
self.Emit("NOTQ", _AX) // NOTQ AX
|
||||
self.Emit("ADDQ", _AX, _VAR_sp) // ADDQ AX, sp
|
||||
self.Emit("LEAQ", jit.Sib(_RC, _RC, 1, 0), _AX) // LEAQ (RC)(RC), AX
|
||||
self.slice_grow_ax("_str_loop_{n}") // GROW _str_loop_{n}
|
||||
|
||||
/* empty string, check for double quote */
|
||||
if !doubleQuote {
|
||||
self.Link("_str_empty_{n}") // _str_empty_{n}:
|
||||
self.check_size(2) // SIZE $2
|
||||
self.add_text("\"\"") // TEXT $'""'
|
||||
self.Link("_str_end_{n}") // _str_end_{n}:
|
||||
} else {
|
||||
self.Link("_str_empty_{n}") // _str_empty_{n}:
|
||||
self.check_size(6) // SIZE $6
|
||||
self.add_text("\"\\\"\\\"\"") // TEXT $'"\"\""'
|
||||
self.Link("_str_end_{n}") // _str_end_{n}:
|
||||
}
|
||||
}
|
||||
|
||||
/** Zero Value Check Routine **/
|
||||
|
|
@ -663,46 +705,11 @@ func (self *_Assembler) check_zero(nb int, dest int) {
|
|||
return
|
||||
}
|
||||
|
||||
/* default instructions for AVX2 */
|
||||
vclear := func(v obj.Addr) { self.Emit("VPXOR" , v, v, v) }
|
||||
vset1a := func(a, b obj.Addr) { self.Emit("VPCMPEQB", a, a, b) }
|
||||
vandpb := func(b, a, r obj.Addr) { self.Emit("VPAND" , b, a, r) }
|
||||
vcmpeq := func(b, a, r obj.Addr) { self.Emit("VPCMPEQB", b, a, r) }
|
||||
|
||||
/* fall-back instructions for AVX */
|
||||
if !cpu.HasAVX2 {
|
||||
vclear = func(v obj.Addr) { self.Emit("VXORPS", v, v, v) }
|
||||
vset1a = func(a, b obj.Addr) { self.Emit("VCMPPS", a, a, b, jit.Imm(0x0f)) }
|
||||
vandpb = func(b, a, r obj.Addr) { self.Emit("VANDPS", b, a, r) }
|
||||
vcmpeq = func(b, a, r obj.Addr) { self.Emit("VCMPPS", b, a, r, jit.Imm(0x00)) }
|
||||
}
|
||||
|
||||
/* if n is less than 32 byte, only scalar code will be used;
|
||||
* otherwise AVX is used, so clear Y0, and set Y1 to all 1s */
|
||||
if e >= 32 {
|
||||
vclear(_Y0) // CLEAR Y0
|
||||
vset1a(_Y0, _Y1) // SET1A Y0, Y1
|
||||
}
|
||||
|
||||
/* 128-byte tests */
|
||||
for i <= e - 128 {
|
||||
vcmpeq(jit.Ptr(_SP_p, i + 0), _Y0, _Y2) // CMPEQ i+0(SP.p), Y0, Y2
|
||||
vcmpeq(jit.Ptr(_SP_p, i + 32), _Y0, _Y3) // CMPEQ i+32(SP.p), Y0, Y3
|
||||
vcmpeq(jit.Ptr(_SP_p, i + 64), _Y0, _Y4) // CMPEQ i+64(SP.p), Y0, Y4
|
||||
vcmpeq(jit.Ptr(_SP_p, i + 96), _Y0, _Y5) // CMPEQ i+96(SP.p), Y0, Y5
|
||||
vandpb(_Y3, _Y2, _Y2) // ANDPB Y3, Y2, Y2
|
||||
vandpb(_Y5, _Y4, _Y3) // ANDPB Y5, Y4, Y3
|
||||
vandpb(_Y2, _Y3, _Y3) // ANDPB Y2, Y3, Y3
|
||||
self.Emit("VPTEST", _Y1, _Y3) // VPTEST Y1, Y3
|
||||
self.Sjmp("JNC" , "_not_zero_z_{n}") // JNC _not_zero_z_{n}
|
||||
i += 128
|
||||
}
|
||||
|
||||
/* 32-byte tests */
|
||||
/* 32-byte test */
|
||||
for i <= e - 32 {
|
||||
vcmpeq(jit.Ptr(_SP_p, i), _Y0, _Y2) // CMPEQ i(SP.p), Y0, Y2
|
||||
self.Emit("VPTEST", _Y1, _Y2) // VPTEST Y1, Y2
|
||||
self.Sjmp("JNC" , "_not_zero_z_{n}") // JNC _not_zero_z_{n}
|
||||
self.Emit("VMOVDQU", jit.Ptr(_SP_p, i), _Y0) // VMOVDQU (SP.p), Y0
|
||||
self.Emit("VPTEST" , _Y0, _Y0) // VPTEST Y0, Y0
|
||||
self.Sjmp("JNZ" , "_not_zero_z_{n}") // JNZ _not_zero_z_{n}
|
||||
i += 32
|
||||
}
|
||||
|
||||
|
|
@ -711,8 +718,16 @@ func (self *_Assembler) check_zero(nb int, dest int) {
|
|||
self.Emit("VZEROUPPER")
|
||||
}
|
||||
|
||||
/* 8-byte tests */
|
||||
for i <= e - 8 {
|
||||
/* 16-byte test */
|
||||
if i <= e - 16 {
|
||||
self.Emit("MOVOU", jit.Ptr(_SP_p, i), _X0) // MOVOU (SP.p), X0
|
||||
self.Emit("PTEST", _X0, _X0) // PTEST X0, X0
|
||||
self.Sjmp("JNZ" , "_not_zero_{n}") // JNZ _not_zero_{n}
|
||||
i += 16
|
||||
}
|
||||
|
||||
/* 8-byte test */
|
||||
if i <= e - 8 {
|
||||
self.Emit("CMPQ", jit.Ptr(_SP_p, i), jit.Imm(0)) // CMPQ i(SP.p), $0
|
||||
self.Sjmp("JNE" , "_not_zero_{n}") // JNE _not_zero_{n}
|
||||
i += 8
|
||||
|
|
@ -771,7 +786,6 @@ var (
|
|||
_F_f64toa = jit.Imm(int64(native.S_f64toa))
|
||||
_F_i64toa = jit.Imm(int64(native.S_i64toa))
|
||||
_F_u64toa = jit.Imm(int64(native.S_u64toa))
|
||||
_F_lquote = jit.Imm(int64(native.S_lquote))
|
||||
_F_b64encode = jit.Imm(int64(_subr__b64encode))
|
||||
)
|
||||
|
||||
|
|
@ -792,16 +806,12 @@ var (
|
|||
)
|
||||
|
||||
var (
|
||||
_F_encodeQuote obj.Addr
|
||||
_F_encodeDoubleQuote obj.Addr
|
||||
_F_encodeTypedPointer obj.Addr
|
||||
_F_encodeJsonMarshaler obj.Addr
|
||||
_F_encodeTextMarshaler obj.Addr
|
||||
)
|
||||
|
||||
func init() {
|
||||
_F_encodeQuote = jit.Func(encodeQuote)
|
||||
_F_encodeDoubleQuote = jit.Func(encodeDoubleQuote)
|
||||
_F_encodeTypedPointer = jit.Func(encodeTypedPointer)
|
||||
_F_encodeJsonMarshaler = jit.Func(encodeJsonMarshaler)
|
||||
_F_encodeTextMarshaler = jit.Func(encodeTextMarshaler)
|
||||
|
|
@ -889,7 +899,7 @@ func (self *_Assembler) _asm_OP_f64(_ *_Instr) {
|
|||
}
|
||||
|
||||
func (self *_Assembler) _asm_OP_str(_ *_Instr) {
|
||||
self.encode_string(_F_encodeQuote, false)
|
||||
self.encode_string(false)
|
||||
}
|
||||
|
||||
func (self *_Assembler) _asm_OP_bin(_ *_Instr) {
|
||||
|
|
@ -913,7 +923,7 @@ func (self *_Assembler) _asm_OP_bin(_ *_Instr) {
|
|||
}
|
||||
|
||||
func (self *_Assembler) _asm_OP_quote(_ *_Instr) {
|
||||
self.encode_string(_F_encodeDoubleQuote, true)
|
||||
self.encode_string(true)
|
||||
}
|
||||
|
||||
func (self *_Assembler) _asm_OP_number(_ *_Instr) {
|
||||
|
|
@ -977,9 +987,8 @@ func (self *_Assembler) _asm_OP_byte(p *_Instr) {
|
|||
}
|
||||
|
||||
func (self *_Assembler) _asm_OP_text(p *_Instr) {
|
||||
self.check_size(len(p.vs()))
|
||||
self.store_str(p.vs())
|
||||
self.Emit("ADDQ", jit.Imm(int64(len(p.vs()))), _RL) // ADDQ $len(p.vs()), RL
|
||||
self.check_size(len(p.vs())) // SIZE ${len(p.vs())}
|
||||
self.add_text(p.vs()) // TEXT ${p.vs()}
|
||||
}
|
||||
|
||||
func (self *_Assembler) _asm_OP_deref(_ *_Instr) {
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@ import (
|
|||
`unsafe`
|
||||
|
||||
`github.com/bytedance/sonic/internal/rt`
|
||||
`github.com/davecgh/go-spew/spew`
|
||||
`github.com/stretchr/testify/assert`
|
||||
)
|
||||
|
||||
|
|
@ -339,6 +340,18 @@ func TestAssembler_OpCode(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestAssembler_StringMoreSpace(t *testing.T) {
|
||||
p := &_Program{ins: []_Instr{newInsOp(_OP_str)}}
|
||||
m := make([]byte, 0, 8)
|
||||
s := new(_Stack)
|
||||
a := newAssembler(p)
|
||||
f := a.Load()
|
||||
v := "\u0001\u0002\u0003\u0004\u0005\u0006\u0007\u0008\u0009\u000a\u000b\u000c\u000d\u000e\u000f\u0010"
|
||||
e := f(&m, unsafe.Pointer(&v), s)
|
||||
assert.Nil(t, e)
|
||||
spew.Dump(m)
|
||||
}
|
||||
|
||||
func TestAssembler_TwitterJSON_Generic(t *testing.T) {
|
||||
p := &_Program{ins: mustCompile(&_GenericValue).ins}
|
||||
m := []byte(nil)
|
||||
|
|
|
|||
|
|
@ -38,7 +38,7 @@ func Quote(s string) string {
|
|||
p = make([]byte, 0, n)
|
||||
|
||||
/* call the encoder */
|
||||
_ = encodeStr(&p, s)
|
||||
_ = encodeString(&p, s)
|
||||
return rt.Mem2Str(p)
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -28,126 +28,44 @@ import (
|
|||
|
||||
/** Encoder Primitives **/
|
||||
|
||||
var _QuoteTab = [256]string {
|
||||
'\x00' : `\u0000`,
|
||||
'\x01' : `\u0001`,
|
||||
'\x02' : `\u0002`,
|
||||
'\x03' : `\u0003`,
|
||||
'\x04' : `\u0004`,
|
||||
'\x05' : `\u0005`,
|
||||
'\x06' : `\u0006`,
|
||||
'\x07' : `\u0007`,
|
||||
'\b' : `\b`,
|
||||
'\t' : `\t`,
|
||||
'\n' : `\n`,
|
||||
'\x0b' : `\u000b`,
|
||||
'\f' : `\f`,
|
||||
'\r' : `\r`,
|
||||
'\x0e' : `\u000e`,
|
||||
'\x0f' : `\u000f`,
|
||||
'\x10' : `\u0010`,
|
||||
'\x11' : `\u0011`,
|
||||
'\x12' : `\u0012`,
|
||||
'\x13' : `\u0013`,
|
||||
'\x14' : `\u0014`,
|
||||
'\x15' : `\u0015`,
|
||||
'\x16' : `\u0016`,
|
||||
'\x17' : `\u0017`,
|
||||
'\x18' : `\u0018`,
|
||||
'\x19' : `\u0019`,
|
||||
'\x1a' : `\u001a`,
|
||||
'\x1b' : `\u001b`,
|
||||
'\x1c' : `\u001c`,
|
||||
'\x1d' : `\u001d`,
|
||||
'\x1e' : `\u001e`,
|
||||
'\x1f' : `\u001f`,
|
||||
'"' : `\"`,
|
||||
'\\' : `\\`,
|
||||
}
|
||||
|
||||
var _DoubleQuoteTab = [256]string {
|
||||
'\x00' : `\\u0000`,
|
||||
'\x01' : `\\u0001`,
|
||||
'\x02' : `\\u0002`,
|
||||
'\x03' : `\\u0003`,
|
||||
'\x04' : `\\u0004`,
|
||||
'\x05' : `\\u0005`,
|
||||
'\x06' : `\\u0006`,
|
||||
'\x07' : `\\u0007`,
|
||||
'\b' : `\\b`,
|
||||
'\t' : `\\t`,
|
||||
'\n' : `\\n`,
|
||||
'\x0b' : `\\u000b`,
|
||||
'\f' : `\\f`,
|
||||
'\r' : `\\r`,
|
||||
'\x0e' : `\\u000e`,
|
||||
'\x0f' : `\\u000f`,
|
||||
'\x10' : `\\u0010`,
|
||||
'\x11' : `\\u0011`,
|
||||
'\x12' : `\\u0012`,
|
||||
'\x13' : `\\u0013`,
|
||||
'\x14' : `\\u0014`,
|
||||
'\x15' : `\\u0015`,
|
||||
'\x16' : `\\u0016`,
|
||||
'\x17' : `\\u0017`,
|
||||
'\x18' : `\\u0018`,
|
||||
'\x19' : `\\u0019`,
|
||||
'\x1a' : `\\u001a`,
|
||||
'\x1b' : `\\u001b`,
|
||||
'\x1c' : `\\u001c`,
|
||||
'\x1d' : `\\u001d`,
|
||||
'\x1e' : `\\u001e`,
|
||||
'\x1f' : `\\u001f`,
|
||||
'"' : `\\\"`,
|
||||
'\\' : `\\\\`,
|
||||
}
|
||||
|
||||
func encodeNil(rb *[]byte) error {
|
||||
*rb = append(*rb, 'n', 'u', 'l', 'l')
|
||||
return nil
|
||||
}
|
||||
|
||||
func encodeStr(buf *[]byte, val string) error {
|
||||
func encodeString(buf *[]byte, val string) error {
|
||||
var sidx int
|
||||
var pbuf *rt.GoSlice
|
||||
var pstr *rt.GoString
|
||||
|
||||
/* opening quote */
|
||||
*buf = append(*buf, '"')
|
||||
encodeQuote(buf, native.Lquote(&val, 0), val)
|
||||
pbuf = (*rt.GoSlice)(unsafe.Pointer(buf))
|
||||
pstr = (*rt.GoString)(unsafe.Pointer(&val))
|
||||
|
||||
/* encode with native library */
|
||||
for sidx < pstr.Len {
|
||||
sn := pstr.Len - sidx
|
||||
dn := pbuf.Cap - pbuf.Len
|
||||
sp := padd(pstr.Ptr, sidx)
|
||||
dp := padd(pbuf.Ptr, pbuf.Len)
|
||||
nb := native.Quote(sp, sn, dp, &dn, 0)
|
||||
|
||||
/* check for errors */
|
||||
if pbuf.Len += dn; nb >= 0 {
|
||||
break
|
||||
}
|
||||
|
||||
/* not enough space, grow the slice and try again */
|
||||
sidx += ^nb
|
||||
*pbuf = growslice(rt.UnpackType(byteType), *pbuf, pbuf.Cap * 2)
|
||||
}
|
||||
|
||||
/* closing quote */
|
||||
*buf = append(*buf, '"')
|
||||
return nil
|
||||
}
|
||||
|
||||
func encodeQuote(buf *[]byte, i int, val string) {
|
||||
p := 0
|
||||
n := len(val)
|
||||
|
||||
/* quote all the characters, if any */
|
||||
for i < n {
|
||||
*buf = append(*buf, rt.Str2Mem(val[p:i])...)
|
||||
*buf = append(*buf, rt.Str2Mem(_QuoteTab[val[i]])...)
|
||||
p, i = i + 1, native.Lquote(&val, i + 1)
|
||||
}
|
||||
|
||||
/* add the remaining characters */
|
||||
if p < n {
|
||||
*buf = append(*buf, rt.Str2Mem(val[p:])...)
|
||||
}
|
||||
}
|
||||
|
||||
func encodeDoubleQuote(buf *[]byte, i int, val string) {
|
||||
p := 0
|
||||
n := len(val)
|
||||
|
||||
/* quote all the characters, if any */
|
||||
for i < n {
|
||||
*buf = append(*buf, rt.Str2Mem(val[p:i])...)
|
||||
*buf = append(*buf, rt.Str2Mem(_DoubleQuoteTab[val[i]])...)
|
||||
p, i = i + 1, native.Lquote(&val, i + 1)
|
||||
}
|
||||
|
||||
/* add the remaining characters */
|
||||
if p < n {
|
||||
*buf = append(*buf, rt.Str2Mem(val[p:])...)
|
||||
}
|
||||
}
|
||||
|
||||
func encodeTypedPointer(buf *[]byte, vt *rt.GoType, vp *unsafe.Pointer, sb *_Stack) error {
|
||||
if vt == nil {
|
||||
return encodeNil(buf)
|
||||
|
|
@ -172,7 +90,7 @@ func encodeTextMarshaler(buf *[]byte, val encoding.TextMarshaler) error {
|
|||
if ret, err := val.MarshalText(); err != nil {
|
||||
return err
|
||||
} else {
|
||||
return encodeStr(buf, rt.Mem2Str(ret))
|
||||
return encodeString(buf, rt.Mem2Str(ret))
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -23,6 +23,11 @@ import (
|
|||
`github.com/bytedance/sonic/internal/loader`
|
||||
)
|
||||
|
||||
//go:nosplit
|
||||
func padd(p unsafe.Pointer, v int) unsafe.Pointer {
|
||||
return unsafe.Pointer(uintptr(p) + uintptr(v))
|
||||
}
|
||||
|
||||
//go:nosplit
|
||||
func ptoenc(p loader.Function) _Encoder {
|
||||
return *(*_Encoder)(unsafe.Pointer(&p))
|
||||
|
|
|
|||
|
|
@ -47,12 +47,17 @@ func __lzero(p unsafe.Pointer, n int) (ret int)
|
|||
//go:nosplit
|
||||
//go:noescape
|
||||
//goland:noinspection GoUnusedParameter
|
||||
func __lquote(buf *string, off int) (ret int)
|
||||
func __lspace(sp unsafe.Pointer, nb int, off int) (ret int)
|
||||
|
||||
//go:nosplit
|
||||
//go:noescape
|
||||
//goland:noinspection GoUnusedParameter
|
||||
func __lspace(sp unsafe.Pointer, nb int, off int) (ret int)
|
||||
func __quote(sp unsafe.Pointer, nb int, dp unsafe.Pointer, dn *int, flags uint64) (ret int)
|
||||
|
||||
//go:nosplit
|
||||
//go:noescape
|
||||
//goland:noinspection GoUnusedParameter
|
||||
func __unquote(sp unsafe.Pointer, nb int, dp unsafe.Pointer, ep *int, flags uint64) (ret int)
|
||||
|
||||
//go:nosplit
|
||||
//go:noescape
|
||||
|
|
@ -93,8 +98,3 @@ func __skip_array(s *string, p *int, m *types.StateMachine) (ret int)
|
|||
//go:noescape
|
||||
//goland:noinspection GoUnusedParameter
|
||||
func __skip_object(s *string, p *int, m *types.StateMachine) (ret int)
|
||||
|
||||
//go:nosplit
|
||||
//go:noescape
|
||||
//goland:noinspection GoUnusedParameter
|
||||
func __unquote(s unsafe.Pointer, nb int, dp unsafe.Pointer, ep *int, flags uint64) (ret int)
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -43,6 +43,45 @@ func TestNative_Value(t *testing.T) {
|
|||
assert.Equal(t, 3, v.Ep)
|
||||
}
|
||||
|
||||
func TestNative_Quote(t *testing.T) {
|
||||
s := "hello\b\f\n\r\t\\\"\u666fworld"
|
||||
d := make([]byte, 256)
|
||||
dp := (*rt.GoSlice)(unsafe.Pointer(&d))
|
||||
sp := (*rt.GoString)(unsafe.Pointer(&s))
|
||||
rv := __quote(sp.Ptr, sp.Len, dp.Ptr, &dp.Len, 0)
|
||||
if rv < 0 {
|
||||
require.NoError(t, types.ParsingError(-rv))
|
||||
}
|
||||
assert.Equal(t, len(s), rv)
|
||||
assert.Equal(t, 27, len(d))
|
||||
assert.Equal(t, `hello\b\f\n\r\t\\\"景world`, string(d))
|
||||
}
|
||||
|
||||
func TestNative_QuoteNoMem(t *testing.T) {
|
||||
s := "hello\b\f\n\r\t\\\"\u666fworld"
|
||||
d := make([]byte, 10)
|
||||
dp := (*rt.GoSlice)(unsafe.Pointer(&d))
|
||||
sp := (*rt.GoString)(unsafe.Pointer(&s))
|
||||
rv := __quote(sp.Ptr, sp.Len, dp.Ptr, &dp.Len, 0)
|
||||
assert.Equal(t, -8, rv)
|
||||
assert.Equal(t, 9, len(d))
|
||||
assert.Equal(t, `hello\b\f`, string(d))
|
||||
}
|
||||
|
||||
func TestNative_DoubleQuote(t *testing.T) {
|
||||
s := "hello\b\f\n\r\t\\\"\u666fworld"
|
||||
d := make([]byte, 256)
|
||||
dp := (*rt.GoSlice)(unsafe.Pointer(&d))
|
||||
sp := (*rt.GoString)(unsafe.Pointer(&s))
|
||||
rv := __quote(sp.Ptr, sp.Len, dp.Ptr, &dp.Len, types.F_DOUBLE_UNQUOTE)
|
||||
if rv < 0 {
|
||||
require.NoError(t, types.ParsingError(-rv))
|
||||
}
|
||||
assert.Equal(t, len(s), rv)
|
||||
assert.Equal(t, 36, len(d))
|
||||
assert.Equal(t, `hello\\b\\f\\n\\r\\t\\\\\\\"景world`, string(d))
|
||||
}
|
||||
|
||||
func TestNative_Unquote(t *testing.T) {
|
||||
s := `hello\b\f\n\r\t\\\"\u2333world`
|
||||
d := make([]byte, 0, len(s))
|
||||
|
|
@ -173,6 +212,16 @@ func TestNative_Vstring(t *testing.T) {
|
|||
assert.Equal(t, int64(5), v.Iv)
|
||||
}
|
||||
|
||||
func TestNative_VstringEscapeEOF(t *testing.T) {
|
||||
var v types.JsonState
|
||||
i := 0
|
||||
s := `xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\"xxxxxxxxxxxxxxxxxxxxxxxxxxxxx"x`
|
||||
__vstring(&s, &i, &v)
|
||||
assert.Equal(t, 95, i)
|
||||
assert.Equal(t, 63, v.Ep)
|
||||
assert.Equal(t, int64(0), v.Iv)
|
||||
}
|
||||
|
||||
func TestNative_VstringHangUpOnRandomData(t *testing.T) {
|
||||
v, e := hex.DecodeString(
|
||||
"228dc61efd54ef80a908fb6026b7f2d5f92a257ba8b347c995f259eb8685376a" +
|
||||
|
|
|
|||
|
|
@ -21,12 +21,12 @@ package avx
|
|||
var (
|
||||
S_f64toa = _subr__f64toa
|
||||
S_i64toa = _subr__i64toa
|
||||
S_lquote = _subr__lquote
|
||||
S_u64toa = _subr__u64toa
|
||||
S_lspace = _subr__lspace
|
||||
)
|
||||
|
||||
var (
|
||||
S_lspace = _subr__lspace
|
||||
S_quote = _subr__quote
|
||||
S_unquote = _subr__unquote
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -14,29 +14,29 @@ func ___asm2asm_compiled_code__DO_NOT_CALL_THIS_SYMBOL___()
|
|||
|
||||
var (
|
||||
_func__base = ___asm2asm_compiled_code__DO_NOT_CALL_THIS_SYMBOL___
|
||||
_subr__f64toa = **(**uintptr)(unsafe.Pointer(&_func__base)) + 2480
|
||||
_subr__i64toa = **(**uintptr)(unsafe.Pointer(&_func__base)) + 5544
|
||||
_subr__lquote = **(**uintptr)(unsafe.Pointer(&_func__base)) + 295
|
||||
_subr__lspace = **(**uintptr)(unsafe.Pointer(&_func__base)) + 937
|
||||
_subr__f64toa = **(**uintptr)(unsafe.Pointer(&_func__base)) + 574
|
||||
_subr__i64toa = **(**uintptr)(unsafe.Pointer(&_func__base)) + 3638
|
||||
_subr__lspace = **(**uintptr)(unsafe.Pointer(&_func__base)) + 238
|
||||
_subr__lzero = **(**uintptr)(unsafe.Pointer(&_func__base)) + 0
|
||||
_subr__skip_array = **(**uintptr)(unsafe.Pointer(&_func__base)) + 14087
|
||||
_subr__skip_object = **(**uintptr)(unsafe.Pointer(&_func__base)) + 14122
|
||||
_subr__skip_one = **(**uintptr)(unsafe.Pointer(&_func__base)) + 12457
|
||||
_subr__u64toa = **(**uintptr)(unsafe.Pointer(&_func__base)) + 5637
|
||||
_subr__unquote = **(**uintptr)(unsafe.Pointer(&_func__base)) + 6825
|
||||
_subr__value = **(**uintptr)(unsafe.Pointer(&_func__base)) + 8460
|
||||
_subr__vnumber = **(**uintptr)(unsafe.Pointer(&_func__base)) + 10935
|
||||
_subr__vsigned = **(**uintptr)(unsafe.Pointer(&_func__base)) + 11907
|
||||
_subr__vstring = **(**uintptr)(unsafe.Pointer(&_func__base)) + 9593
|
||||
_subr__vunsigned = **(**uintptr)(unsafe.Pointer(&_func__base)) + 12184
|
||||
_subr__quote = **(**uintptr)(unsafe.Pointer(&_func__base)) + 4951
|
||||
_subr__skip_array = **(**uintptr)(unsafe.Pointer(&_func__base)) + 13969
|
||||
_subr__skip_object = **(**uintptr)(unsafe.Pointer(&_func__base)) + 14004
|
||||
_subr__skip_one = **(**uintptr)(unsafe.Pointer(&_func__base)) + 11647
|
||||
_subr__u64toa = **(**uintptr)(unsafe.Pointer(&_func__base)) + 3731
|
||||
_subr__unquote = **(**uintptr)(unsafe.Pointer(&_func__base)) + 5972
|
||||
_subr__value = **(**uintptr)(unsafe.Pointer(&_func__base)) + 7664
|
||||
_subr__vnumber = **(**uintptr)(unsafe.Pointer(&_func__base)) + 10125
|
||||
_subr__vsigned = **(**uintptr)(unsafe.Pointer(&_func__base)) + 11097
|
||||
_subr__vstring = **(**uintptr)(unsafe.Pointer(&_func__base)) + 9240
|
||||
_subr__vunsigned = **(**uintptr)(unsafe.Pointer(&_func__base)) + 11374
|
||||
)
|
||||
|
||||
var (
|
||||
_ = _subr__f64toa
|
||||
_ = _subr__i64toa
|
||||
_ = _subr__lquote
|
||||
_ = _subr__lspace
|
||||
_ = _subr__lzero
|
||||
_ = _subr__quote
|
||||
_ = _subr__skip_array
|
||||
_ = _subr__skip_object
|
||||
_ = _subr__skip_one
|
||||
|
|
|
|||
|
|
@ -47,12 +47,17 @@ func __lzero(p unsafe.Pointer, n int) (ret int)
|
|||
//go:nosplit
|
||||
//go:noescape
|
||||
//goland:noinspection GoUnusedParameter
|
||||
func __lquote(buf *string, off int) (ret int)
|
||||
func __lspace(sp unsafe.Pointer, nb int, off int) (ret int)
|
||||
|
||||
//go:nosplit
|
||||
//go:noescape
|
||||
//goland:noinspection GoUnusedParameter
|
||||
func __lspace(sp unsafe.Pointer, nb int, off int) (ret int)
|
||||
func __quote(sp unsafe.Pointer, nb int, dp unsafe.Pointer, dn *int, flags uint64) (ret int)
|
||||
|
||||
//go:nosplit
|
||||
//go:noescape
|
||||
//goland:noinspection GoUnusedParameter
|
||||
func __unquote(sp unsafe.Pointer, nb int, dp unsafe.Pointer, ep *int, flags uint64) (ret int)
|
||||
|
||||
//go:nosplit
|
||||
//go:noescape
|
||||
|
|
@ -93,8 +98,3 @@ func __skip_array(s *string, p *int, m *types.StateMachine) (ret int)
|
|||
//go:noescape
|
||||
//goland:noinspection GoUnusedParameter
|
||||
func __skip_object(s *string, p *int, m *types.StateMachine) (ret int)
|
||||
|
||||
//go:nosplit
|
||||
//go:noescape
|
||||
//goland:noinspection GoUnusedParameter
|
||||
func __unquote(s unsafe.Pointer, nb int, dp unsafe.Pointer, ep *int, flags uint64) (ret int)
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -43,6 +43,45 @@ func TestNative_Value(t *testing.T) {
|
|||
assert.Equal(t, 3, v.Ep)
|
||||
}
|
||||
|
||||
func TestNative_Quote(t *testing.T) {
|
||||
s := "hello\b\f\n\r\t\\\"\u666fworld"
|
||||
d := make([]byte, 256)
|
||||
dp := (*rt.GoSlice)(unsafe.Pointer(&d))
|
||||
sp := (*rt.GoString)(unsafe.Pointer(&s))
|
||||
rv := __quote(sp.Ptr, sp.Len, dp.Ptr, &dp.Len, 0)
|
||||
if rv < 0 {
|
||||
require.NoError(t, types.ParsingError(-rv))
|
||||
}
|
||||
assert.Equal(t, len(s), rv)
|
||||
assert.Equal(t, 27, len(d))
|
||||
assert.Equal(t, `hello\b\f\n\r\t\\\"景world`, string(d))
|
||||
}
|
||||
|
||||
func TestNative_QuoteNoMem(t *testing.T) {
|
||||
s := "hello\b\f\n\r\t\\\"\u666fworld"
|
||||
d := make([]byte, 10)
|
||||
dp := (*rt.GoSlice)(unsafe.Pointer(&d))
|
||||
sp := (*rt.GoString)(unsafe.Pointer(&s))
|
||||
rv := __quote(sp.Ptr, sp.Len, dp.Ptr, &dp.Len, 0)
|
||||
assert.Equal(t, -8, rv)
|
||||
assert.Equal(t, 9, len(d))
|
||||
assert.Equal(t, `hello\b\f`, string(d))
|
||||
}
|
||||
|
||||
func TestNative_DoubleQuote(t *testing.T) {
|
||||
s := "hello\b\f\n\r\t\\\"\u666fworld"
|
||||
d := make([]byte, 256)
|
||||
dp := (*rt.GoSlice)(unsafe.Pointer(&d))
|
||||
sp := (*rt.GoString)(unsafe.Pointer(&s))
|
||||
rv := __quote(sp.Ptr, sp.Len, dp.Ptr, &dp.Len, types.F_DOUBLE_UNQUOTE)
|
||||
if rv < 0 {
|
||||
require.NoError(t, types.ParsingError(-rv))
|
||||
}
|
||||
assert.Equal(t, len(s), rv)
|
||||
assert.Equal(t, 36, len(d))
|
||||
assert.Equal(t, `hello\\b\\f\\n\\r\\t\\\\\\\"景world`, string(d))
|
||||
}
|
||||
|
||||
func TestNative_Unquote(t *testing.T) {
|
||||
s := `hello\b\f\n\r\t\\\"\u2333world`
|
||||
d := make([]byte, 0, len(s))
|
||||
|
|
@ -173,6 +212,16 @@ func TestNative_Vstring(t *testing.T) {
|
|||
assert.Equal(t, int64(5), v.Iv)
|
||||
}
|
||||
|
||||
func TestNative_VstringEscapeEOF(t *testing.T) {
|
||||
var v types.JsonState
|
||||
i := 0
|
||||
s := `xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\"xxxxxxxxxxxxxxxxxxxxxxxxxxxxx"x`
|
||||
__vstring(&s, &i, &v)
|
||||
assert.Equal(t, 95, i)
|
||||
assert.Equal(t, 63, v.Ep)
|
||||
assert.Equal(t, int64(0), v.Iv)
|
||||
}
|
||||
|
||||
func TestNative_VstringHangUpOnRandomData(t *testing.T) {
|
||||
v, e := hex.DecodeString(
|
||||
"228dc61efd54ef80a908fb6026b7f2d5f92a257ba8b347c995f259eb8685376a" +
|
||||
|
|
|
|||
|
|
@ -21,12 +21,12 @@ package avx2
|
|||
var (
|
||||
S_f64toa = _subr__f64toa
|
||||
S_i64toa = _subr__i64toa
|
||||
S_lquote = _subr__lquote
|
||||
S_u64toa = _subr__u64toa
|
||||
S_lspace = _subr__lspace
|
||||
)
|
||||
|
||||
var (
|
||||
S_lspace = _subr__lspace
|
||||
S_quote = _subr__quote
|
||||
S_unquote = _subr__unquote
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -14,29 +14,29 @@ func ___asm2asm_compiled_code__DO_NOT_CALL_THIS_SYMBOL___()
|
|||
|
||||
var (
|
||||
_func__base = ___asm2asm_compiled_code__DO_NOT_CALL_THIS_SYMBOL___
|
||||
_subr__f64toa = **(**uintptr)(unsafe.Pointer(&_func__base)) + 3038
|
||||
_subr__i64toa = **(**uintptr)(unsafe.Pointer(&_func__base)) + 6102
|
||||
_subr__lquote = **(**uintptr)(unsafe.Pointer(&_func__base)) + 376
|
||||
_subr__lspace = **(**uintptr)(unsafe.Pointer(&_func__base)) + 1268
|
||||
_subr__f64toa = **(**uintptr)(unsafe.Pointer(&_func__base)) + 822
|
||||
_subr__i64toa = **(**uintptr)(unsafe.Pointer(&_func__base)) + 3886
|
||||
_subr__lspace = **(**uintptr)(unsafe.Pointer(&_func__base)) + 366
|
||||
_subr__lzero = **(**uintptr)(unsafe.Pointer(&_func__base)) + 0
|
||||
_subr__skip_array = **(**uintptr)(unsafe.Pointer(&_func__base)) + 14748
|
||||
_subr__skip_object = **(**uintptr)(unsafe.Pointer(&_func__base)) + 14783
|
||||
_subr__skip_one = **(**uintptr)(unsafe.Pointer(&_func__base)) + 13195
|
||||
_subr__u64toa = **(**uintptr)(unsafe.Pointer(&_func__base)) + 6195
|
||||
_subr__unquote = **(**uintptr)(unsafe.Pointer(&_func__base)) + 7419
|
||||
_subr__value = **(**uintptr)(unsafe.Pointer(&_func__base)) + 9196
|
||||
_subr__vnumber = **(**uintptr)(unsafe.Pointer(&_func__base)) + 11673
|
||||
_subr__vsigned = **(**uintptr)(unsafe.Pointer(&_func__base)) + 12645
|
||||
_subr__vstring = **(**uintptr)(unsafe.Pointer(&_func__base)) + 10345
|
||||
_subr__vunsigned = **(**uintptr)(unsafe.Pointer(&_func__base)) + 12922
|
||||
_subr__quote = **(**uintptr)(unsafe.Pointer(&_func__base)) + 5299
|
||||
_subr__skip_array = **(**uintptr)(unsafe.Pointer(&_func__base)) + 15851
|
||||
_subr__skip_object = **(**uintptr)(unsafe.Pointer(&_func__base)) + 15886
|
||||
_subr__skip_one = **(**uintptr)(unsafe.Pointer(&_func__base)) + 13051
|
||||
_subr__u64toa = **(**uintptr)(unsafe.Pointer(&_func__base)) + 3979
|
||||
_subr__unquote = **(**uintptr)(unsafe.Pointer(&_func__base)) + 7136
|
||||
_subr__value = **(**uintptr)(unsafe.Pointer(&_func__base)) + 9082
|
||||
_subr__vnumber = **(**uintptr)(unsafe.Pointer(&_func__base)) + 11529
|
||||
_subr__vsigned = **(**uintptr)(unsafe.Pointer(&_func__base)) + 12501
|
||||
_subr__vstring = **(**uintptr)(unsafe.Pointer(&_func__base)) + 10760
|
||||
_subr__vunsigned = **(**uintptr)(unsafe.Pointer(&_func__base)) + 12778
|
||||
)
|
||||
|
||||
var (
|
||||
_ = _subr__f64toa
|
||||
_ = _subr__i64toa
|
||||
_ = _subr__lquote
|
||||
_ = _subr__lspace
|
||||
_ = _subr__lzero
|
||||
_ = _subr__quote
|
||||
_ = _subr__skip_array
|
||||
_ = _subr__skip_object
|
||||
_ = _subr__skip_one
|
||||
|
|
|
|||
|
|
@ -26,11 +26,14 @@ import (
|
|||
)
|
||||
|
||||
var (
|
||||
S_f64toa uintptr
|
||||
S_i64toa uintptr
|
||||
S_u64toa uintptr
|
||||
S_lquote uintptr
|
||||
S_lspace uintptr
|
||||
S_f64toa uintptr
|
||||
S_i64toa uintptr
|
||||
S_u64toa uintptr
|
||||
S_lspace uintptr
|
||||
)
|
||||
|
||||
var (
|
||||
S_quote uintptr
|
||||
S_unquote uintptr
|
||||
)
|
||||
|
||||
|
|
@ -56,12 +59,12 @@ func Lzero(p unsafe.Pointer, n int) int
|
|||
//go:nosplit
|
||||
//go:noescape
|
||||
//goland:noinspection GoUnusedParameter
|
||||
func Lquote(buf *string, off int) int
|
||||
func Quote(s unsafe.Pointer, nb int, dp unsafe.Pointer, dn *int, flags uint64) int
|
||||
|
||||
//go:nosplit
|
||||
//go:noescape
|
||||
//goland:noinspection GoUnusedParameter
|
||||
func Lspace(sp unsafe.Pointer, nb int, off int) int
|
||||
func Unquote(s unsafe.Pointer, nb int, dp unsafe.Pointer, ep *int, flags uint64) int
|
||||
|
||||
//go:nosplit
|
||||
//go:noescape
|
||||
|
|
@ -73,17 +76,12 @@ func Value(s unsafe.Pointer, n int, p int, v *types.JsonState, allow_control int
|
|||
//goland:noinspection GoUnusedParameter
|
||||
func SkipOne(s *string, p *int, m *types.StateMachine) int
|
||||
|
||||
//go:nosplit
|
||||
//go:noescape
|
||||
//goland:noinspection GoUnusedParameter
|
||||
func Unquote(s unsafe.Pointer, nb int, dp unsafe.Pointer, ep *int, flags uint64) int
|
||||
|
||||
func useAVX() {
|
||||
S_f64toa = avx.S_f64toa
|
||||
S_i64toa = avx.S_i64toa
|
||||
S_u64toa = avx.S_u64toa
|
||||
S_lquote = avx.S_lquote
|
||||
S_lspace = avx.S_lspace
|
||||
S_quote = avx.S_quote
|
||||
S_unquote = avx.S_unquote
|
||||
S_value = avx.S_value
|
||||
S_vstring = avx.S_vstring
|
||||
|
|
@ -99,8 +97,8 @@ func useAVX2() {
|
|||
S_f64toa = avx2.S_f64toa
|
||||
S_i64toa = avx2.S_i64toa
|
||||
S_u64toa = avx2.S_u64toa
|
||||
S_lquote = avx2.S_lquote
|
||||
S_lspace = avx2.S_lspace
|
||||
S_quote = avx2.S_quote
|
||||
S_unquote = avx2.S_unquote
|
||||
S_value = avx2.S_value
|
||||
S_vstring = avx2.S_vstring
|
||||
|
|
|
|||
|
|
@ -24,17 +24,17 @@ TEXT ·Lzero(SB), NOSPLIT, $0 - 24
|
|||
JMP github·com∕bytedance∕sonic∕internal∕native∕avx2·__lzero(SB)
|
||||
JMP github·com∕bytedance∕sonic∕internal∕native∕avx·__lzero(SB)
|
||||
|
||||
TEXT ·Lquote(SB), NOSPLIT, $0 - 24
|
||||
TEXT ·Quote(SB), NOSPLIT, $0 - 48
|
||||
CMPB github·com∕bytedance∕sonic∕internal∕cpu·HasAVX2(SB), $0
|
||||
JE 2(PC)
|
||||
JMP github·com∕bytedance∕sonic∕internal∕native∕avx2·__lquote(SB)
|
||||
JMP github·com∕bytedance∕sonic∕internal∕native∕avx·__lquote(SB)
|
||||
JMP github·com∕bytedance∕sonic∕internal∕native∕avx2·__quote(SB)
|
||||
JMP github·com∕bytedance∕sonic∕internal∕native∕avx·__quote(SB)
|
||||
|
||||
TEXT ·Lspace(SB), NOSPLIT, $0 - 32
|
||||
TEXT ·Unquote(SB), NOSPLIT, $0 - 48
|
||||
CMPB github·com∕bytedance∕sonic∕internal∕cpu·HasAVX2(SB), $0
|
||||
JE 2(PC)
|
||||
JMP github·com∕bytedance∕sonic∕internal∕native∕avx2·__lspace(SB)
|
||||
JMP github·com∕bytedance∕sonic∕internal∕native∕avx·__lspace(SB)
|
||||
JMP github·com∕bytedance∕sonic∕internal∕native∕avx2·__unquote(SB)
|
||||
JMP github·com∕bytedance∕sonic∕internal∕native∕avx·__unquote(SB)
|
||||
|
||||
TEXT ·Value(SB), NOSPLIT, $0 - 48
|
||||
CMPB github·com∕bytedance∕sonic∕internal∕cpu·HasAVX2(SB), $0
|
||||
|
|
@ -47,9 +47,3 @@ TEXT ·SkipOne(SB), NOSPLIT, $0 - 32
|
|||
JE 2(PC)
|
||||
JMP github·com∕bytedance∕sonic∕internal∕native∕avx2·__skip_one(SB)
|
||||
JMP github·com∕bytedance∕sonic∕internal∕native∕avx·__skip_one(SB)
|
||||
|
||||
TEXT ·Unquote(SB), NOSPLIT, $0 - 48
|
||||
CMPB github·com∕bytedance∕sonic∕internal∕cpu·HasAVX2(SB), $0
|
||||
JE 2(PC)
|
||||
JMP github·com∕bytedance∕sonic∕internal∕native∕avx2·__unquote(SB)
|
||||
JMP github·com∕bytedance∕sonic∕internal∕native∕avx·__unquote(SB)
|
||||
|
|
|
|||
|
|
@ -45,12 +45,17 @@ func __lzero(p unsafe.Pointer, n int) (ret int)
|
|||
//go:nosplit
|
||||
//go:noescape
|
||||
//goland:noinspection GoUnusedParameter
|
||||
func __lquote(buf *string, off int) (ret int)
|
||||
func __lspace(sp unsafe.Pointer, nb int, off int) (ret int)
|
||||
|
||||
//go:nosplit
|
||||
//go:noescape
|
||||
//goland:noinspection GoUnusedParameter
|
||||
func __lspace(sp unsafe.Pointer, nb int, off int) (ret int)
|
||||
func __quote(sp unsafe.Pointer, nb int, dp unsafe.Pointer, dn *int, flags uint64) (ret int)
|
||||
|
||||
//go:nosplit
|
||||
//go:noescape
|
||||
//goland:noinspection GoUnusedParameter
|
||||
func __unquote(sp unsafe.Pointer, nb int, dp unsafe.Pointer, ep *int, flags uint64) (ret int)
|
||||
|
||||
//go:nosplit
|
||||
//go:noescape
|
||||
|
|
@ -91,8 +96,3 @@ func __skip_array(s *string, p *int, m *types.StateMachine) (ret int)
|
|||
//go:noescape
|
||||
//goland:noinspection GoUnusedParameter
|
||||
func __skip_object(s *string, p *int, m *types.StateMachine) (ret int)
|
||||
|
||||
//go:nosplit
|
||||
//go:noescape
|
||||
//goland:noinspection GoUnusedParameter
|
||||
func __unquote(s unsafe.Pointer, nb int, dp unsafe.Pointer, ep *int, flags uint64) (ret int)
|
||||
|
|
|
|||
|
|
@ -41,6 +41,45 @@ func TestNative_Value(t *testing.T) {
|
|||
assert.Equal(t, 3, v.Ep)
|
||||
}
|
||||
|
||||
func TestNative_Quote(t *testing.T) {
|
||||
s := "hello\b\f\n\r\t\\\"\u666fworld"
|
||||
d := make([]byte, 256)
|
||||
dp := (*rt.GoSlice)(unsafe.Pointer(&d))
|
||||
sp := (*rt.GoString)(unsafe.Pointer(&s))
|
||||
rv := __quote(sp.Ptr, sp.Len, dp.Ptr, &dp.Len, 0)
|
||||
if rv < 0 {
|
||||
require.NoError(t, types.ParsingError(-rv))
|
||||
}
|
||||
assert.Equal(t, len(s), rv)
|
||||
assert.Equal(t, 27, len(d))
|
||||
assert.Equal(t, `hello\b\f\n\r\t\\\"景world`, string(d))
|
||||
}
|
||||
|
||||
func TestNative_QuoteNoMem(t *testing.T) {
|
||||
s := "hello\b\f\n\r\t\\\"\u666fworld"
|
||||
d := make([]byte, 10)
|
||||
dp := (*rt.GoSlice)(unsafe.Pointer(&d))
|
||||
sp := (*rt.GoString)(unsafe.Pointer(&s))
|
||||
rv := __quote(sp.Ptr, sp.Len, dp.Ptr, &dp.Len, 0)
|
||||
assert.Equal(t, -8, rv)
|
||||
assert.Equal(t, 9, len(d))
|
||||
assert.Equal(t, `hello\b\f`, string(d))
|
||||
}
|
||||
|
||||
func TestNative_DoubleQuote(t *testing.T) {
|
||||
s := "hello\b\f\n\r\t\\\"\u666fworld"
|
||||
d := make([]byte, 256)
|
||||
dp := (*rt.GoSlice)(unsafe.Pointer(&d))
|
||||
sp := (*rt.GoString)(unsafe.Pointer(&s))
|
||||
rv := __quote(sp.Ptr, sp.Len, dp.Ptr, &dp.Len, types.F_DOUBLE_UNQUOTE)
|
||||
if rv < 0 {
|
||||
require.NoError(t, types.ParsingError(-rv))
|
||||
}
|
||||
assert.Equal(t, len(s), rv)
|
||||
assert.Equal(t, 36, len(d))
|
||||
assert.Equal(t, `hello\\b\\f\\n\\r\\t\\\\\\\"景world`, string(d))
|
||||
}
|
||||
|
||||
func TestNative_Unquote(t *testing.T) {
|
||||
s := `hello\b\f\n\r\t\\\"\u2333world`
|
||||
d := make([]byte, 0, len(s))
|
||||
|
|
@ -171,6 +210,16 @@ func TestNative_Vstring(t *testing.T) {
|
|||
assert.Equal(t, int64(5), v.Iv)
|
||||
}
|
||||
|
||||
func TestNative_VstringEscapeEOF(t *testing.T) {
|
||||
var v types.JsonState
|
||||
i := 0
|
||||
s := `xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\"xxxxxxxxxxxxxxxxxxxxxxxxxxxxx"x`
|
||||
__vstring(&s, &i, &v)
|
||||
assert.Equal(t, 95, i)
|
||||
assert.Equal(t, 63, v.Ep)
|
||||
assert.Equal(t, int64(0), v.Iv)
|
||||
}
|
||||
|
||||
func TestNative_VstringHangUpOnRandomData(t *testing.T) {
|
||||
v, e := hex.DecodeString(
|
||||
"228dc61efd54ef80a908fb6026b7f2d5f92a257ba8b347c995f259eb8685376a" +
|
||||
|
|
|
|||
|
|
@ -19,12 +19,12 @@ package {{PACKAGE}}
|
|||
var (
|
||||
S_f64toa = _subr__f64toa
|
||||
S_i64toa = _subr__i64toa
|
||||
S_lquote = _subr__lquote
|
||||
S_u64toa = _subr__u64toa
|
||||
S_lspace = _subr__lspace
|
||||
)
|
||||
|
||||
var (
|
||||
S_lspace = _subr__lspace
|
||||
S_quote = _subr__quote
|
||||
S_unquote = _subr__unquote
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -16,417 +16,48 @@
|
|||
|
||||
#include "native.h"
|
||||
|
||||
#if USE_SSE
|
||||
#define loop_decl() \
|
||||
size_t v; \
|
||||
size_t n = 0; \
|
||||
const char * p = s; \
|
||||
|
||||
#define loop_simd(size, load, func, ...) { \
|
||||
while (nb >= size) { \
|
||||
if ((v = func(load((const void *)(p)), ## __VA_ARGS__)) < size) { \
|
||||
return n + v; \
|
||||
} else { \
|
||||
n += v; \
|
||||
p += size; \
|
||||
nb -= size; \
|
||||
} \
|
||||
} \
|
||||
static inline int is_zero_sse(__m128i v) {
|
||||
return _mm_testz_si128(v, v);
|
||||
}
|
||||
|
||||
#if !USE_AVX2
|
||||
#define loop_zero()
|
||||
#define loop_m256(func, ...)
|
||||
#else
|
||||
#define loop_zero() _mm256_zeroupper();
|
||||
#define loop_m256(func, ...) loop_simd(32, _mm256_loadu_si256, func, ## __VA_ARGS__)
|
||||
#endif
|
||||
|
||||
#define loop_m128(func, ...) loop_simd(16, _mm_loadu_si128, func, ## __VA_ARGS__)
|
||||
#define loop_last(func, ...) return func(_mm_loadu_si128(as_m128c(p + nb - 16)), ## __VA_ARGS__) + n + nb - 16;
|
||||
|
||||
#define loop_bulk(func, ...) { \
|
||||
loop_decl() \
|
||||
loop_m256(func ## _avx2, ## __VA_ARGS__) \
|
||||
loop_zero(); \
|
||||
loop_m128(func ## _sse2, ## __VA_ARGS__) \
|
||||
loop_last(func ## _sse2, ## __VA_ARGS__) \
|
||||
}
|
||||
|
||||
#define loop_duff(func, ...) { \
|
||||
size_t r = nb; \
|
||||
__m128i m = _mm_set1_epi8(0xff); \
|
||||
\
|
||||
/* remaining bytes */ \
|
||||
switch (r) { \
|
||||
case 15 : m = _mm_insert_epi8(m, s[14], 14); \
|
||||
case 14 : m = _mm_insert_epi8(m, s[13], 13); \
|
||||
case 13 : m = _mm_insert_epi8(m, s[12], 12); \
|
||||
case 12 : m = _mm_insert_epi8(m, s[11], 11); \
|
||||
case 11 : m = _mm_insert_epi8(m, s[10], 10); \
|
||||
case 10 : m = _mm_insert_epi8(m, s[ 9], 9); \
|
||||
case 9 : m = _mm_insert_epi8(m, s[ 8], 8); \
|
||||
case 8 : m = _mm_insert_epi8(m, s[ 7], 7); \
|
||||
case 7 : m = _mm_insert_epi8(m, s[ 6], 6); \
|
||||
case 6 : m = _mm_insert_epi8(m, s[ 5], 5); \
|
||||
case 5 : m = _mm_insert_epi8(m, s[ 4], 4); \
|
||||
case 4 : m = _mm_insert_epi8(m, s[ 3], 3); \
|
||||
case 3 : m = _mm_insert_epi8(m, s[ 2], 2); \
|
||||
case 2 : m = _mm_insert_epi8(m, s[ 1], 1); \
|
||||
case 1 : m = _mm_insert_epi8(m, s[ 0], 0); \
|
||||
default : return func ## _sse2(m, ## __VA_ARGS__); \
|
||||
} \
|
||||
}
|
||||
|
||||
static inline size_t lspace_sse2(__m128i v0) {
|
||||
__m128i v1 = _mm_cmpeq_epi8 (v0, _mm_set1_epi8(' '));
|
||||
__m128i v2 = _mm_cmpeq_epi8 (v0, _mm_set1_epi8('\t'));
|
||||
__m128i v3 = _mm_cmpeq_epi8 (v0, _mm_set1_epi8('\n'));
|
||||
__m128i v4 = _mm_cmpeq_epi8 (v0, _mm_set1_epi8('\r'));
|
||||
__m128i v5 = _mm_or_si128 (v1, v2);
|
||||
__m128i v6 = _mm_or_si128 (v3, v4);
|
||||
__m128i v7 = _mm_or_si128 (v5, v6);
|
||||
uint32_t v8 = _mm_movemask_epi8 (v7);
|
||||
uint32_t v9 = __builtin_ctz (~v8);
|
||||
return v9;
|
||||
}
|
||||
|
||||
#if USE_AVX2
|
||||
static inline size_t lspace_avx2(__m256i v0) {
|
||||
__m256i v1 = _mm256_cmpeq_epi8 (v0, _mm256_set1_epi8(' '));
|
||||
__m256i v2 = _mm256_cmpeq_epi8 (v0, _mm256_set1_epi8('\t'));
|
||||
__m256i v3 = _mm256_cmpeq_epi8 (v0, _mm256_set1_epi8('\n'));
|
||||
__m256i v4 = _mm256_cmpeq_epi8 (v0, _mm256_set1_epi8('\r'));
|
||||
__m256i v5 = _mm256_or_si256 (v1, v2);
|
||||
__m256i v6 = _mm256_or_si256 (v3, v4);
|
||||
__m256i v7 = _mm256_or_si256 (v5, v6);
|
||||
uint32_t v8 = _mm256_movemask_epi8 (v7);
|
||||
uint64_t v9 = __builtin_ctzll (~(uint64_t)(v8));
|
||||
return v9;
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline size_t lquote_sse2(__m128i v0) {
|
||||
__m128i v1 = _mm_cmpgt_epi8 (v0, _mm_set1_epi8(-1));
|
||||
__m128i v2 = _mm_cmplt_epi8 (v0, _mm_set1_epi8(' '));
|
||||
__m128i v3 = _mm_cmpeq_epi8 (v0, _mm_set1_epi8('"'));
|
||||
__m128i v4 = _mm_cmpeq_epi8 (v0, _mm_set1_epi8('\\'));
|
||||
__m128i v5 = _mm_and_si128 (v1, v2);
|
||||
__m128i v6 = _mm_or_si128 (v3, v4);
|
||||
__m128i v7 = _mm_or_si128 (v5, v6);
|
||||
uint32_t v8 = _mm_movemask_epi8 (v7);
|
||||
uint32_t v9 = __builtin_ctz (v8 | 0xffff0000);
|
||||
return v9;
|
||||
}
|
||||
|
||||
#if USE_AVX2
|
||||
static inline size_t lquote_avx2(__m256i v0) {
|
||||
__m256i v1 = _mm256_cmpgt_epi8 (v0, _mm256_set1_epi8(-1));
|
||||
__m256i v2 = _mm256_cmpgt_epi8 (v0, _mm256_set1_epi8(31));
|
||||
__m256i v3 = _mm256_cmpeq_epi8 (v0, _mm256_set1_epi8('"'));
|
||||
__m256i v4 = _mm256_cmpeq_epi8 (v0, _mm256_set1_epi8('\\'));
|
||||
__m256i v5 = _mm256_andnot_si256 (v2, v1);
|
||||
__m256i v6 = _mm256_or_si256 (v3, v4);
|
||||
__m256i v7 = _mm256_or_si256 (v5, v6);
|
||||
uint32_t v8 = _mm256_movemask_epi8 (v7);
|
||||
uint64_t v9 = __builtin_ctzll ((uint64_t)v8 | 0xffffffff00000000);
|
||||
return v9;
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline size_t strchr2_sse2(__m128i v0, uint64_t c0, uint64_t c1) {
|
||||
__m128i v1 = _mm_cmpeq_epi8 (v0, _mm_set1_epi8((char)c0));
|
||||
__m128i v2 = _mm_cmpeq_epi8 (v0, _mm_set1_epi8((char)c1));
|
||||
__m128i v3 = _mm_or_si128 (v1, v2);
|
||||
uint32_t v4 = _mm_movemask_epi8 (v3);
|
||||
uint32_t v5 = __builtin_ctz (v4 | 0xffff0000);
|
||||
return v5;
|
||||
}
|
||||
|
||||
#if USE_AVX2
|
||||
static inline size_t strchr2_avx2(__m256i v0, uint64_t c0, uint64_t c1) {
|
||||
__m256i v1 = _mm256_cmpeq_epi8 (v0, _mm256_set1_epi8((char)c0));
|
||||
__m256i v2 = _mm256_cmpeq_epi8 (v0, _mm256_set1_epi8((char)c1));
|
||||
__m256i v3 = _mm256_or_si256 (v1, v2);
|
||||
uint32_t v4 = _mm256_movemask_epi8 (v3);
|
||||
uint64_t v5 = __builtin_ctzll ((uint64_t)v4 | 0xffffffff00000000);
|
||||
return v5;
|
||||
}
|
||||
#endif
|
||||
|
||||
#define do_simd(func, ...) { \
|
||||
if (nb == 0) { \
|
||||
return 0; \
|
||||
} if (nb < 16) { \
|
||||
loop_duff(func, ## __VA_ARGS__) \
|
||||
} else { \
|
||||
loop_bulk(func, ## __VA_ARGS__) \
|
||||
} \
|
||||
}
|
||||
#endif
|
||||
|
||||
#define is_quote(c) ((c) == '"' || (c) == '\\' || ((c) >= 0 && (c) <= 31))
|
||||
#define is_space(c) ((c) == ' ' || (c) == '\t' || (c) == '\n' || (c) == '\r')
|
||||
|
||||
static inline size_t lspace_p(const char *s, size_t nb) {
|
||||
#if USE_SSE
|
||||
do_simd(lspace)
|
||||
#else
|
||||
size_t i = 0;
|
||||
while (i < nb && !is_space(s[i])) i++;
|
||||
return i;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline size_t lquote_p(const char *s, size_t nb) {
|
||||
#if USE_SSE
|
||||
do_simd(lquote)
|
||||
#else
|
||||
size_t i = 0;
|
||||
while (i < nb && !is_quote(s[i])) i++;
|
||||
return i;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline size_t strchr1_p(const char *p, size_t nb, uint64_t ch) {
|
||||
#if USE_SSE
|
||||
int64_t r;
|
||||
uint32_t t;
|
||||
|
||||
/* prepare the vector */
|
||||
ssize_t n = nb;
|
||||
uintptr_t m = (uintptr_t)p;
|
||||
const char * q = p;
|
||||
|
||||
#if USE_AVX2
|
||||
#define ALIGN_VAL 31
|
||||
#define _mm_or _mm256_or_si256
|
||||
#define _mm_load _mm256_load_si256
|
||||
#define _mm_cmpeq(a, b) _mm256_cmpeq_epi8(a, b)
|
||||
#define _mm_testz(v) _mm256_testz_si256(v, v)
|
||||
#define _mm_movemask(v) _mm256_movemask_epi8(v)
|
||||
__m256i a;
|
||||
__m256i b;
|
||||
__m256i c;
|
||||
__m256i d;
|
||||
__m256i u;
|
||||
__m256i v;
|
||||
__m256i w;
|
||||
__m256i x = _mm256_set1_epi8(ch);
|
||||
#else
|
||||
#define ALIGN_VAL 15
|
||||
#define _mm_or _mm_or_si128
|
||||
#define _mm_load _mm_load_si128
|
||||
#define _mm_cmpeq(a, b) _mm_cmpeq_epi8(a, b)
|
||||
#define _mm_testz(v) (_mm_movemask_epi8(v) == 0)
|
||||
#define _mm_movemask(v) _mm_movemask_epi8(v)
|
||||
__m128i a;
|
||||
__m128i b;
|
||||
__m128i c;
|
||||
__m128i d;
|
||||
__m128i u;
|
||||
__m128i v;
|
||||
__m128i w;
|
||||
__m128i x = _mm_set1_epi8(ch);
|
||||
#endif
|
||||
|
||||
#define BLOCK_SIZE (ALIGN_VAL + 1)
|
||||
#define BLOCK_MASK (1ull << BLOCK_SIZE)
|
||||
#define BLOCK_LARGE (BLOCK_SIZE * 4)
|
||||
|
||||
/* check for pointer alignment */
|
||||
if (m & ALIGN_VAL) {
|
||||
v = _mm_load ((const void *)(m & -BLOCK_SIZE));
|
||||
v = _mm_cmpeq (v, x);
|
||||
r = _mm_movemask (v);
|
||||
|
||||
/* check for match in the first characters */
|
||||
if ((r >>= (t = m & ALIGN_VAL)) != 0) {
|
||||
if ((r = __builtin_ctzll(r | BLOCK_MASK)) < n) {
|
||||
return r;
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/* make the pointer aligned */
|
||||
p += BLOCK_SIZE - t;
|
||||
n -= BLOCK_SIZE - t;
|
||||
}
|
||||
|
||||
/* attempt to compare 4 blocks at a time */
|
||||
while (n >= BLOCK_LARGE) {
|
||||
a = _mm_load ((const void *)(p + BLOCK_SIZE * 0));
|
||||
b = _mm_load ((const void *)(p + BLOCK_SIZE * 1));
|
||||
c = _mm_load ((const void *)(p + BLOCK_SIZE * 2));
|
||||
d = _mm_load ((const void *)(p + BLOCK_SIZE * 3));
|
||||
a = _mm_cmpeq (a, x);
|
||||
b = _mm_cmpeq (b, x);
|
||||
c = _mm_cmpeq (c, x);
|
||||
d = _mm_cmpeq (d, x);
|
||||
u = _mm_or (a, b);
|
||||
v = _mm_or (c, d);
|
||||
w = _mm_or (u, v);
|
||||
|
||||
/* check if anything matches */
|
||||
if (_mm_testz(w)) {
|
||||
p += BLOCK_LARGE;
|
||||
n -= BLOCK_LARGE;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* match something in the 4-blocks region */
|
||||
if ((r = _mm_movemask(a)) != 0) {
|
||||
return p - q + __builtin_ctzll(r | BLOCK_MASK);
|
||||
} else if ((r = _mm_movemask(b)) != 0) {
|
||||
return p - q + __builtin_ctzll(r | BLOCK_MASK) + BLOCK_SIZE;
|
||||
} else if ((r = _mm_movemask(c)) != 0) {
|
||||
return p - q + __builtin_ctzll(r | BLOCK_MASK) + BLOCK_SIZE * 2;
|
||||
} else {
|
||||
return p - q + __builtin_ctzll(_mm_movemask(d) | BLOCK_MASK) + BLOCK_SIZE * 3;
|
||||
}
|
||||
}
|
||||
|
||||
/* check every block, at most 4 times */
|
||||
for (int i = 0; i < 4 && n >= 0; i++) {
|
||||
v = _mm_load ((const void *)p);
|
||||
v = _mm_cmpeq (v, x);
|
||||
r = _mm_movemask (v);
|
||||
|
||||
/* found something */
|
||||
if (r != 0) {
|
||||
if ((r = __builtin_ctzll(r | BLOCK_MASK)) >= n) {
|
||||
return -1;
|
||||
} else {
|
||||
return p - q + r;
|
||||
}
|
||||
}
|
||||
|
||||
/* otherwise advance to next block */
|
||||
p += BLOCK_SIZE;
|
||||
n -= BLOCK_SIZE;
|
||||
}
|
||||
|
||||
#undef _mm_load
|
||||
#undef _mm_bitor
|
||||
#undef _mm_cmpeq
|
||||
#undef _mm_testz
|
||||
#undef _mm_movemask
|
||||
#undef ALIGN_VAL
|
||||
#undef BLOCK_SIZE
|
||||
#undef BLOCK_LARGE
|
||||
#else
|
||||
for (size_t i = 0; i < nb; i++) {
|
||||
if (p[i] == ch) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/* not found */
|
||||
return nb;
|
||||
}
|
||||
|
||||
static inline size_t strchr2_p(const char *s, size_t nb, uint64_t c0, uint64_t c1) {
|
||||
#if USE_SSE
|
||||
do_simd(strchr2, c0, c1)
|
||||
#else
|
||||
size_t i = 0;
|
||||
while (i < nb && s[i] != c0 && s[i] != c1) i++;
|
||||
return i;
|
||||
#endif
|
||||
}
|
||||
|
||||
size_t lzero(const char *p, size_t n) {
|
||||
#if USE_SSE
|
||||
#if USE_AVX
|
||||
__m256i a;
|
||||
__m256i b;
|
||||
__m256i c;
|
||||
__m256i d;
|
||||
__m256i u;
|
||||
__m256i v;
|
||||
__m256i w;
|
||||
__m256i y = _mm256_set1_epi8(0xff);
|
||||
__m256i z = _mm256_setzero_si256();
|
||||
#define BLOCK_SIZE 32
|
||||
#else
|
||||
__m128i a;
|
||||
__m128i b;
|
||||
__m128i c;
|
||||
__m128i d;
|
||||
__m128i u;
|
||||
__m128i v;
|
||||
__m128i w;
|
||||
__m128i z = _mm_setzero_si128();
|
||||
#define BLOCK_SIZE 16
|
||||
static inline int is_zero_avx(__m256i v) {
|
||||
return _mm256_testz_si256(v, v);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if USE_AVX2
|
||||
#define _mm_load _mm256_load_si256
|
||||
#define _mm_and(a, b) _mm256_and_si256(a, b)
|
||||
#define _mm_cmpeq(a, b) _mm256_cmpeq_epi8(a, b)
|
||||
#define _mm_testinz(v) (!_mm256_testc_si256(v, y))
|
||||
#elif USE_AVX
|
||||
#define _mm_load _mm256_load_si256
|
||||
#define _mm_and(a, b) _mm256_and_ps((__m256)a, (__m256)b)
|
||||
#define _mm_cmpeq(a, b) _mm256_cmp_ps(a, b, _CMP_EQ_OQ)
|
||||
#define _mm_testinz(v) (!_mm256_testc_si256(v, y))
|
||||
#else
|
||||
#define _mm_load _mm_load_si128
|
||||
#define _mm_and(a, b) _mm_and_si128(a, b)
|
||||
#define _mm_cmpeq(a, b) _mm_cmpeq_epi8(a, b)
|
||||
#define _mm_testinz(v) (_mm_movemask_epi8(v) != 0xffff)
|
||||
#endif
|
||||
size_t lzero(const char *sp, size_t nb) {
|
||||
size_t n = nb;
|
||||
const char * p = sp;
|
||||
|
||||
/* multi-block loop */
|
||||
while (n >= BLOCK_SIZE * 4) {
|
||||
a = _mm_load ((const void *)(p + BLOCK_SIZE * 0));
|
||||
b = _mm_load ((const void *)(p + BLOCK_SIZE * 1));
|
||||
c = _mm_load ((const void *)(p + BLOCK_SIZE * 2));
|
||||
d = _mm_load ((const void *)(p + BLOCK_SIZE * 3));
|
||||
a = _mm_cmpeq (a, z);
|
||||
b = _mm_cmpeq (b, z);
|
||||
c = _mm_cmpeq (c, z);
|
||||
d = _mm_cmpeq (d, z);
|
||||
u = _mm_and (a, b);
|
||||
v = _mm_and (c, d);
|
||||
w = _mm_and (u, v);
|
||||
|
||||
/* test for zeros */
|
||||
if (_mm_testinz(w)) {
|
||||
#if USE_AVX
|
||||
/* 32-byte loop */
|
||||
while (n >= 32) {
|
||||
if (!is_zero_avx(_mm256_loadu_si256((const void *)p))) {
|
||||
_mm256_zeroupper();
|
||||
return 1;
|
||||
} else {
|
||||
p += 32;
|
||||
n -= 32;
|
||||
}
|
||||
|
||||
/* move to next block */
|
||||
p += BLOCK_SIZE * 4;
|
||||
n -= BLOCK_SIZE * 4;
|
||||
}
|
||||
|
||||
/* single block loop */
|
||||
while (n >= BLOCK_SIZE) {
|
||||
a = _mm_load ((const void *)(p));
|
||||
b = _mm_cmpeq (a, z);
|
||||
|
||||
/* test for zeros */
|
||||
if (_mm_testinz(b)) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* move to next block */
|
||||
p += BLOCK_SIZE;
|
||||
n -= BLOCK_SIZE;
|
||||
}
|
||||
|
||||
#undef _mm_load
|
||||
#undef _mm_cmpeq
|
||||
#undef _mm_bitand
|
||||
#undef _mm_testinz
|
||||
#undef BLOCK_SIZE
|
||||
/* clear upper half to avoid AVX-SSE transition penalty */
|
||||
_mm256_zeroupper();
|
||||
#endif
|
||||
|
||||
/* 8 bytes loop */
|
||||
while (n >= 8) {
|
||||
/* 16-byte loop */
|
||||
while (n >= 16) {
|
||||
if (!is_zero_sse(_mm_loadu_si128((const void *)p))) {
|
||||
return 1;
|
||||
} else {
|
||||
p += 16;
|
||||
n -= 16;
|
||||
}
|
||||
}
|
||||
|
||||
/* 8-byte test */
|
||||
if (n >= 8) {
|
||||
if (*(uint64_t *)p) {
|
||||
return 1;
|
||||
} else {
|
||||
|
|
@ -435,7 +66,7 @@ size_t lzero(const char *p, size_t n) {
|
|||
}
|
||||
}
|
||||
|
||||
/* 4 bytes test */
|
||||
/* 4-byte test */
|
||||
if (n >= 4) {
|
||||
if (*(uint32_t *)p) {
|
||||
return 1;
|
||||
|
|
@ -445,7 +76,7 @@ size_t lzero(const char *p, size_t n) {
|
|||
}
|
||||
}
|
||||
|
||||
/* 2 bytes test */
|
||||
/* 2-byte test */
|
||||
if (n >= 2) {
|
||||
if (*(uint16_t *)p) {
|
||||
return 1;
|
||||
|
|
@ -463,22 +94,90 @@ size_t lzero(const char *p, size_t n) {
|
|||
}
|
||||
}
|
||||
|
||||
size_t lquote(const GoString *s, size_t p) {
|
||||
return lquote_p(s->buf + p, s->len - p) + p;
|
||||
}
|
||||
#if USE_AVX2
|
||||
static const uintptr_t ALIGN_MASK = 31;
|
||||
#else
|
||||
static const uintptr_t ALIGN_MASK = 15;
|
||||
#endif
|
||||
|
||||
size_t lspace(const char *sp, size_t nb, size_t p) {
|
||||
return lspace_p(sp + p, nb - p) + p;
|
||||
}
|
||||
int32_t ms;
|
||||
const char * ss = sp;
|
||||
|
||||
ssize_t strchr1(const GoString *s, size_t p, char ch) {
|
||||
size_t n = s->len - p;
|
||||
size_t v = strchr1_p(s->buf + p, n, ch);
|
||||
return v >= n ? -1 : v + p;
|
||||
}
|
||||
/* seek to `p` */
|
||||
sp += p;
|
||||
nb -= p;
|
||||
|
||||
ssize_t strchr2(const GoString *s, size_t p, char c0, char c1) {
|
||||
size_t n = s->len - p;
|
||||
size_t v = strchr2_p(s->buf + p, n, c0, c1);
|
||||
return v >= n ? -1 : v + p;
|
||||
}
|
||||
/* likely to run into non-spaces within a few characters, try scalar code first */
|
||||
while (nb > 0 && ((uintptr_t)sp & ALIGN_MASK)) {
|
||||
switch ((nb--, *sp++)) {
|
||||
case ' ' : break;
|
||||
case '\r' : break;
|
||||
case '\n' : break;
|
||||
case '\t' : break;
|
||||
default : return sp - ss - 1;
|
||||
}
|
||||
}
|
||||
|
||||
#if USE_AVX2
|
||||
/* 32-byte loop */
|
||||
while (likely(nb >= 32)) {
|
||||
__m256i x = _mm256_load_si256 ((const void *)sp);
|
||||
__m256i a = _mm256_cmpeq_epi8 (x, _mm256_set1_epi8(' '));
|
||||
__m256i b = _mm256_cmpeq_epi8 (x, _mm256_set1_epi8('\t'));
|
||||
__m256i c = _mm256_cmpeq_epi8 (x, _mm256_set1_epi8('\n'));
|
||||
__m256i d = _mm256_cmpeq_epi8 (x, _mm256_set1_epi8('\r'));
|
||||
__m256i u = _mm256_or_si256 (a, b);
|
||||
__m256i v = _mm256_or_si256 (c, d);
|
||||
__m256i w = _mm256_or_si256 (u, v);
|
||||
|
||||
/* check for matches */
|
||||
if ((ms = _mm256_movemask_epi8(w)) != -1) {
|
||||
_mm256_zeroupper();
|
||||
return sp - ss + __builtin_ctzll(~(uint64_t)ms);
|
||||
}
|
||||
|
||||
/* move to next block */
|
||||
sp += 32;
|
||||
nb -= 32;
|
||||
}
|
||||
|
||||
/* clear upper half to avoid AVX-SSE transition penalty */
|
||||
_mm256_zeroupper();
|
||||
#endif
|
||||
|
||||
/* 16-byte loop */
|
||||
while (likely(nb >= 16)) {
|
||||
__m128i x = _mm_load_si128 ((const void *)sp);
|
||||
__m128i a = _mm_cmpeq_epi8 (x, _mm_set1_epi8(' '));
|
||||
__m128i b = _mm_cmpeq_epi8 (x, _mm_set1_epi8('\t'));
|
||||
__m128i c = _mm_cmpeq_epi8 (x, _mm_set1_epi8('\n'));
|
||||
__m128i d = _mm_cmpeq_epi8 (x, _mm_set1_epi8('\r'));
|
||||
__m128i u = _mm_or_si128 (a, b);
|
||||
__m128i v = _mm_or_si128 (c, d);
|
||||
__m128i w = _mm_or_si128 (u, v);
|
||||
|
||||
/* check for matches */
|
||||
if ((ms = _mm_movemask_epi8(w)) != 0xffff) {
|
||||
return sp - ss + __builtin_ctz(~ms);
|
||||
}
|
||||
|
||||
/* move to next block */
|
||||
sp += 16;
|
||||
nb -= 16;
|
||||
}
|
||||
|
||||
/* remaining bytes, do with scalar code */
|
||||
while (nb-- > 0) {
|
||||
switch (*sp++) {
|
||||
case ' ' : break;
|
||||
case '\r' : break;
|
||||
case '\n' : break;
|
||||
case '\t' : break;
|
||||
default : return sp - ss - 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* all the characters are spaces */
|
||||
return sp - ss;
|
||||
}
|
||||
|
|
@ -16,8 +16,6 @@
|
|||
|
||||
#include "native.h"
|
||||
|
||||
#if USE_SSE
|
||||
|
||||
static const char Digits[200] = {
|
||||
'0', '0', '0', '1', '0', '2', '0', '3', '0', '4', '0', '5', '0', '6', '0', '7', '0', '8', '0', '9',
|
||||
'1', '0', '1', '1', '1', '2', '1', '3', '1', '4', '1', '5', '1', '6', '1', '7', '1', '8', '1', '9',
|
||||
|
|
@ -223,8 +221,6 @@ static inline int u64toa_xlarge_sse2(char *out, uint64_t val) {
|
|||
return n + 16;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
int i64toa(char *out, int64_t val) {
|
||||
if (likely(val >= 0)) {
|
||||
return u64toa(out, (uint64_t)val);
|
||||
|
|
@ -234,8 +230,6 @@ int i64toa(char *out, int64_t val) {
|
|||
}
|
||||
}
|
||||
|
||||
#if USE_SSE
|
||||
|
||||
int u64toa(char *out, uint64_t val) {
|
||||
if (likely(val < 10000)) {
|
||||
return u32toa_small(out, (uint32_t)val);
|
||||
|
|
@ -247,29 +241,3 @@ int u64toa(char *out, uint64_t val) {
|
|||
return u64toa_xlarge_sse2(out, val);
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
int u64toa(char *out, uint64_t val) {
|
||||
char c;
|
||||
long n = 0;
|
||||
uint64_t v = val;
|
||||
|
||||
/* convert each digit */
|
||||
while (val) {
|
||||
out[n++] = v % 10 + '0';
|
||||
v /= 10;
|
||||
}
|
||||
|
||||
/* reverse the output */
|
||||
for (long i = 0; i < n / 2; i++) {
|
||||
c = out[i];
|
||||
out[i] = out[n - i - 1];
|
||||
out[n - i - 1] = c;
|
||||
}
|
||||
|
||||
/* all done */
|
||||
return n;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -93,13 +93,11 @@ int f64toa(char *out, double val);
|
|||
int i64toa(char *out, int64_t val);
|
||||
int u64toa(char *out, uint64_t val);
|
||||
|
||||
size_t lzero(const char *p, size_t n);
|
||||
size_t lquote(const GoString *s, size_t p);
|
||||
size_t lspace(const char *sp, size_t nb, size_t p);
|
||||
ssize_t unquote(const char *sp, ssize_t nb, char *dp, ssize_t *ep, uint64_t flags);
|
||||
size_t lzero(const char *sp, size_t nb);
|
||||
size_t lspace(const char *sp, size_t nb, size_t p);
|
||||
|
||||
ssize_t strchr1(const GoString *s, size_t p, char ch);
|
||||
ssize_t strchr2(const GoString *s, size_t p, char c0, char c1);
|
||||
ssize_t quote(const char *sp, ssize_t nb, char *dp, ssize_t *dn, uint64_t flags);
|
||||
ssize_t unquote(const char *sp, ssize_t nb, char *dp, ssize_t *ep, uint64_t flags);
|
||||
|
||||
long value(const char *s, size_t n, long p, JsonState *ret, int allow_control);
|
||||
void vstring(const GoString *src, long *p, JsonState *ret);
|
||||
|
|
|
|||
296
native/parsing.c
296
native/parsing.c
|
|
@ -16,6 +16,292 @@
|
|||
|
||||
#include "native.h"
|
||||
|
||||
/** String Quoting **/
|
||||
|
||||
typedef struct {
|
||||
const long n;
|
||||
const char s[8];
|
||||
} quoted_t;
|
||||
|
||||
static const quoted_t _SingleQuoteTab[256] = {
|
||||
['\x00'] = { .n = 6, .s = "\\u0000" },
|
||||
['\x01'] = { .n = 6, .s = "\\u0001" },
|
||||
['\x02'] = { .n = 6, .s = "\\u0002" },
|
||||
['\x03'] = { .n = 6, .s = "\\u0003" },
|
||||
['\x04'] = { .n = 6, .s = "\\u0004" },
|
||||
['\x05'] = { .n = 6, .s = "\\u0005" },
|
||||
['\x06'] = { .n = 6, .s = "\\u0006" },
|
||||
['\x07'] = { .n = 6, .s = "\\u0007" },
|
||||
['\b' ] = { .n = 2, .s = "\\b" },
|
||||
['\t' ] = { .n = 2, .s = "\\t" },
|
||||
['\n' ] = { .n = 2, .s = "\\n" },
|
||||
['\x0b'] = { .n = 6, .s = "\\u000b" },
|
||||
['\f' ] = { .n = 2, .s = "\\f" },
|
||||
['\r' ] = { .n = 2, .s = "\\r" },
|
||||
['\x0e'] = { .n = 6, .s = "\\u000e" },
|
||||
['\x0f'] = { .n = 6, .s = "\\u000f" },
|
||||
['\x10'] = { .n = 6, .s = "\\u0010" },
|
||||
['\x11'] = { .n = 6, .s = "\\u0011" },
|
||||
['\x12'] = { .n = 6, .s = "\\u0012" },
|
||||
['\x13'] = { .n = 6, .s = "\\u0013" },
|
||||
['\x14'] = { .n = 6, .s = "\\u0014" },
|
||||
['\x15'] = { .n = 6, .s = "\\u0015" },
|
||||
['\x16'] = { .n = 6, .s = "\\u0016" },
|
||||
['\x17'] = { .n = 6, .s = "\\u0017" },
|
||||
['\x18'] = { .n = 6, .s = "\\u0018" },
|
||||
['\x19'] = { .n = 6, .s = "\\u0019" },
|
||||
['\x1a'] = { .n = 6, .s = "\\u001a" },
|
||||
['\x1b'] = { .n = 6, .s = "\\u001b" },
|
||||
['\x1c'] = { .n = 6, .s = "\\u001c" },
|
||||
['\x1d'] = { .n = 6, .s = "\\u001d" },
|
||||
['\x1e'] = { .n = 6, .s = "\\u001e" },
|
||||
['\x1f'] = { .n = 6, .s = "\\u001f" },
|
||||
['"' ] = { .n = 2, .s = "\\\"" },
|
||||
['\\' ] = { .n = 2, .s = "\\\\" },
|
||||
};
|
||||
|
||||
static const quoted_t _DoubleQuoteTab[256] = {
|
||||
['\x00'] = { .n = 7, .s = "\\\\u0000" },
|
||||
['\x01'] = { .n = 7, .s = "\\\\u0001" },
|
||||
['\x02'] = { .n = 7, .s = "\\\\u0002" },
|
||||
['\x03'] = { .n = 7, .s = "\\\\u0003" },
|
||||
['\x04'] = { .n = 7, .s = "\\\\u0004" },
|
||||
['\x05'] = { .n = 7, .s = "\\\\u0005" },
|
||||
['\x06'] = { .n = 7, .s = "\\\\u0006" },
|
||||
['\x07'] = { .n = 7, .s = "\\\\u0007" },
|
||||
['\b' ] = { .n = 3, .s = "\\\\b" },
|
||||
['\t' ] = { .n = 3, .s = "\\\\t" },
|
||||
['\n' ] = { .n = 3, .s = "\\\\n" },
|
||||
['\x0b'] = { .n = 7, .s = "\\\\u000b" },
|
||||
['\f' ] = { .n = 3, .s = "\\\\f" },
|
||||
['\r' ] = { .n = 3, .s = "\\\\r" },
|
||||
['\x0e'] = { .n = 7, .s = "\\\\u000e" },
|
||||
['\x0f'] = { .n = 7, .s = "\\\\u000f" },
|
||||
['\x10'] = { .n = 7, .s = "\\\\u0010" },
|
||||
['\x11'] = { .n = 7, .s = "\\\\u0011" },
|
||||
['\x12'] = { .n = 7, .s = "\\\\u0012" },
|
||||
['\x13'] = { .n = 7, .s = "\\\\u0013" },
|
||||
['\x14'] = { .n = 7, .s = "\\\\u0014" },
|
||||
['\x15'] = { .n = 7, .s = "\\\\u0015" },
|
||||
['\x16'] = { .n = 7, .s = "\\\\u0016" },
|
||||
['\x17'] = { .n = 7, .s = "\\\\u0017" },
|
||||
['\x18'] = { .n = 7, .s = "\\\\u0018" },
|
||||
['\x19'] = { .n = 7, .s = "\\\\u0019" },
|
||||
['\x1a'] = { .n = 7, .s = "\\\\u001a" },
|
||||
['\x1b'] = { .n = 7, .s = "\\\\u001b" },
|
||||
['\x1c'] = { .n = 7, .s = "\\\\u001c" },
|
||||
['\x1d'] = { .n = 7, .s = "\\\\u001d" },
|
||||
['\x1e'] = { .n = 7, .s = "\\\\u001e" },
|
||||
['\x1f'] = { .n = 7, .s = "\\\\u001f" },
|
||||
['"' ] = { .n = 4, .s = "\\\\\\\"" },
|
||||
['\\' ] = { .n = 4, .s = "\\\\\\\\" },
|
||||
};
|
||||
|
||||
static inline void memcpy_p8(char *dp, const char *sp, size_t nb) {
|
||||
if (nb >= 4) { *(uint32_t *)dp = *(const uint32_t *)sp; sp += 4, dp += 4, nb -= 4; }
|
||||
if (nb >= 2) { *(uint16_t *)dp = *(const uint16_t *)sp; sp += 2, dp += 2, nb -= 2; }
|
||||
if (nb >= 1) { *dp = *sp; }
|
||||
}
|
||||
|
||||
static inline void memcpy_p16(char *dp, const char *sp, size_t nb) {
|
||||
if (nb >= 8) { *(uint64_t *)dp = *(const uint64_t *)sp; sp += 8, dp += 8, nb -= 8; }
|
||||
if (nb >= 4) { *(uint32_t *)dp = *(const uint32_t *)sp; sp += 4, dp += 4, nb -= 4; }
|
||||
if (nb >= 2) { *(uint16_t *)dp = *(const uint16_t *)sp; sp += 2, dp += 2, nb -= 2; }
|
||||
if (nb >= 1) { *dp = *sp; }
|
||||
}
|
||||
|
||||
static inline void memcpy_p32(char *dp, const char *sp, size_t nb) {
|
||||
if (nb >= 16) { _mm_storeu_si128((void *)dp, _mm_loadu_si128((const void *)sp)); sp += 16, dp += 16, nb -= 16; }
|
||||
if (nb >= 8) { *(uint64_t *)dp = *(const uint64_t *)sp; sp += 8, dp += 8, nb -= 8; }
|
||||
if (nb >= 4) { *(uint32_t *)dp = *(const uint32_t *)sp; sp += 4, dp += 4, nb -= 4; }
|
||||
if (nb >= 2) { *(uint16_t *)dp = *(const uint16_t *)sp; sp += 2, dp += 2, nb -= 2; }
|
||||
if (nb >= 1) { *dp = *sp; }
|
||||
}
|
||||
|
||||
static inline __m128i _mm_find_quote(__m128i vv) {
|
||||
__m128i e1 = _mm_cmpgt_epi8 (vv, _mm_set1_epi8(-1));
|
||||
__m128i e2 = _mm_cmpgt_epi8 (vv, _mm_set1_epi8(31));
|
||||
__m128i e3 = _mm_cmpeq_epi8 (vv, _mm_set1_epi8('"'));
|
||||
__m128i e4 = _mm_cmpeq_epi8 (vv, _mm_set1_epi8('\\'));
|
||||
__m128i r1 = _mm_andnot_si128 (e2, e1);
|
||||
__m128i r2 = _mm_or_si128 (e3, e4);
|
||||
__m128i rv = _mm_or_si128 (r1, r2);
|
||||
return rv;
|
||||
}
|
||||
|
||||
#if USE_AVX2
|
||||
static inline __m256i _mm256_find_quote(__m256i vv) {
|
||||
__m256i e1 = _mm256_cmpgt_epi8 (vv, _mm256_set1_epi8(-1));
|
||||
__m256i e2 = _mm256_cmpgt_epi8 (vv, _mm256_set1_epi8(31));
|
||||
__m256i e3 = _mm256_cmpeq_epi8 (vv, _mm256_set1_epi8('"'));
|
||||
__m256i e4 = _mm256_cmpeq_epi8 (vv, _mm256_set1_epi8('\\'));
|
||||
__m256i r1 = _mm256_andnot_si256 (e2, e1);
|
||||
__m256i r2 = _mm256_or_si256 (e3, e4);
|
||||
__m256i rv = _mm256_or_si256 (r1, r2);
|
||||
return rv;
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline ssize_t memcchr_quote(const char *sp, ssize_t nb, char *dp, ssize_t dn) {
|
||||
uint32_t mm;
|
||||
const char * ss = sp;
|
||||
|
||||
#if USE_AVX2
|
||||
/* 32-byte loop, full store */
|
||||
while (nb >= 32 && dn >= 32) {
|
||||
__m256i vv = _mm256_loadu_si256 ((const void *)sp);
|
||||
__m256i rv = _mm256_find_quote (vv);
|
||||
_mm256_storeu_si256 ((void *)dp, vv);
|
||||
|
||||
/* check for matches */
|
||||
if ((mm = _mm256_movemask_epi8(rv)) != 0) {
|
||||
return sp - ss + __builtin_ctz(mm);
|
||||
}
|
||||
|
||||
/* move to next block */
|
||||
sp += 32;
|
||||
dp += 32;
|
||||
nb -= 32;
|
||||
dn -= 32;
|
||||
}
|
||||
|
||||
/* 32-byte test, partial store */
|
||||
if (nb >= 32) {
|
||||
__m256i vv = _mm256_loadu_si256 ((const void *)sp);
|
||||
__m256i rv = _mm256_find_quote (vv);
|
||||
uint32_t mv = _mm256_movemask_epi8 (rv);
|
||||
uint32_t fv = __builtin_ctzll ((uint64_t)mv | 0x0100000000);
|
||||
|
||||
/* copy at most `dn` characters */
|
||||
if (fv <= dn) {
|
||||
memcpy_p32(dp, sp, fv);
|
||||
return sp - ss + fv;
|
||||
} else {
|
||||
memcpy_p32(dp, sp, dn);
|
||||
return -(sp - ss + dn) - 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* clear upper half to avoid AVX-SSE transition penalty */
|
||||
_mm256_zeroupper();
|
||||
#endif
|
||||
|
||||
/* 16-byte loop, full store */
|
||||
while (nb >= 16 && dn >= 16) {
|
||||
__m128i vv = _mm_loadu_si128 ((const void *)sp);
|
||||
__m128i rv = _mm_find_quote (vv);
|
||||
_mm_storeu_si128 ((void *)dp, vv);
|
||||
|
||||
/* check for matches */
|
||||
if ((mm = _mm_movemask_epi8(rv)) != 0) {
|
||||
return sp - ss + __builtin_ctz(mm);
|
||||
}
|
||||
|
||||
/* move to next block */
|
||||
sp += 16;
|
||||
dp += 16;
|
||||
nb -= 16;
|
||||
dn -= 16;
|
||||
}
|
||||
|
||||
/* 16-byte test, partial store */
|
||||
if (nb >= 16) {
|
||||
__m128i vv = _mm_loadu_si128 ((const void *)sp);
|
||||
__m128i rv = _mm_find_quote (vv);
|
||||
uint32_t mv = _mm_movemask_epi8 (rv);
|
||||
uint32_t fv = __builtin_ctz (mv | 0x010000);
|
||||
|
||||
/* copy at most `dn` characters */
|
||||
if (fv <= dn) {
|
||||
memcpy_p16(dp, sp, fv);
|
||||
return sp - ss + fv;
|
||||
} else {
|
||||
memcpy_p16(dp, sp, dn);
|
||||
return -(sp - ss + dn) - 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* handle the remaining bytes with scalar code */
|
||||
while (nb > 0 && dn > 0) {
|
||||
if (_SingleQuoteTab[*(uint8_t *)sp].n) {
|
||||
return sp - ss;
|
||||
} else {
|
||||
dn--, nb--;
|
||||
*dp++ = *sp++;
|
||||
}
|
||||
}
|
||||
|
||||
/* check for dest buffer */
|
||||
if (nb == 0) {
|
||||
return sp - ss;
|
||||
} else {
|
||||
return -(sp - ss) - 1;
|
||||
}
|
||||
}
|
||||
|
||||
ssize_t quote(const char *sp, ssize_t nb, char *dp, ssize_t *dn, uint64_t flags) {
|
||||
ssize_t nd = *dn;
|
||||
const char * ds = dp;
|
||||
const char * ss = sp;
|
||||
const quoted_t * tab;
|
||||
|
||||
/* select quoting table */
|
||||
if (!(flags & F_DBLUNQ)) {
|
||||
tab = _SingleQuoteTab;
|
||||
} else {
|
||||
tab = _DoubleQuoteTab;
|
||||
}
|
||||
|
||||
/* find the special characters, copy on the fly */
|
||||
while (nb != 0) {
|
||||
int nc;
|
||||
uint8_t ch;
|
||||
ssize_t rb = memcchr_quote(sp, nb, dp, nd);
|
||||
|
||||
/* not enough buffer space */
|
||||
if (rb < 0) {
|
||||
*dn = dp - ds - rb - 1;
|
||||
return -(sp - ss - rb - 1) - 1;
|
||||
}
|
||||
|
||||
/* skip already copied bytes */
|
||||
sp += rb;
|
||||
dp += rb;
|
||||
nb -= rb;
|
||||
nd -= rb;
|
||||
|
||||
/* get the escape entry, handle consecutive quotes */
|
||||
while (nb != 0) {
|
||||
ch = *(uint8_t *)sp;
|
||||
nc = tab[ch].n;
|
||||
|
||||
/* check for escape character */
|
||||
if (nc == 0) {
|
||||
break;
|
||||
}
|
||||
|
||||
/* check for buffer space */
|
||||
if (nc > nd) {
|
||||
*dn = dp - ds;
|
||||
return -(sp - ss) - 1;
|
||||
}
|
||||
|
||||
/* copy the quoted value */
|
||||
memcpy_p8(dp, tab[ch].s, nc);
|
||||
sp++;
|
||||
nb--;
|
||||
dp += nc;
|
||||
nd -= nc;
|
||||
}
|
||||
}
|
||||
|
||||
/* all done */
|
||||
*dn = dp - ds;
|
||||
return sp - ss;
|
||||
}
|
||||
|
||||
/** String Unquoting **/
|
||||
|
||||
static const char _UnquoteTab[256] = {
|
||||
['/' ] = '/',
|
||||
['"' ] = '"',
|
||||
|
|
@ -46,7 +332,7 @@ static inline ssize_t memcchr_p32(const char *s, ssize_t nb, char *p) {
|
|||
|
||||
/* check for matches */
|
||||
if ((r = _mm256_movemask_epi8(v)) != 0) {
|
||||
return s - q + __builtin_ctzll(r | (1ull << 32));
|
||||
return s - q + __builtin_ctzll(r);
|
||||
}
|
||||
|
||||
/* move to the next 32 bytes */
|
||||
|
|
@ -54,13 +340,12 @@ static inline ssize_t memcchr_p32(const char *s, ssize_t nb, char *p) {
|
|||
p += 32;
|
||||
n -= 32;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if USE_AVX2
|
||||
/* clear upper half to avoid AVX-SSE transition penalty */
|
||||
_mm256_zeroupper();
|
||||
#endif
|
||||
|
||||
#if USE_SSE
|
||||
/* initialze with '\\' */
|
||||
__m128i x;
|
||||
__m128i y;
|
||||
__m128i a = _mm_set1_epi8('\\');
|
||||
|
|
@ -73,7 +358,7 @@ static inline ssize_t memcchr_p32(const char *s, ssize_t nb, char *p) {
|
|||
|
||||
/* check for matches */
|
||||
if ((r = _mm_movemask_epi8(y)) != 0) {
|
||||
return s - q + __builtin_ctzll(r | (1 << 16));
|
||||
return s - q + __builtin_ctzll(r);
|
||||
}
|
||||
|
||||
/* move to the next 16 bytes */
|
||||
|
|
@ -81,7 +366,6 @@ static inline ssize_t memcchr_p32(const char *s, ssize_t nb, char *p) {
|
|||
p += 16;
|
||||
n -= 16;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* remaining bytes, do with scalar code */
|
||||
while (n--) {
|
||||
|
|
|
|||
|
|
@ -19,6 +19,9 @@
|
|||
static const char *CS_ARRAY = "[]{},\"[]{},\"[]{}";
|
||||
static const char *CS_OBJECT = "[]{},:\"[]{}:,\"[]";
|
||||
|
||||
static const uint64_t ODD_MASK = 0xaaaaaaaaaaaaaaaa;
|
||||
static const uint64_t EVEN_MASK = 0x5555555555555555;
|
||||
|
||||
static const double P10_TAB[632] = {
|
||||
/* <================= -Inf ================= */ 1e-323, 1e-322, 1e-321, 1e-320,
|
||||
1e-319, 1e-318, 1e-317, 1e-316, 1e-315, 1e-314, 1e-313, 1e-312, 1e-311, 1e-310,
|
||||
|
|
@ -96,22 +99,54 @@ static inline double pow10(double v, int p) {
|
|||
}
|
||||
}
|
||||
|
||||
static inline uint64_t add32(uint64_t v1, uint64_t v2, uint64_t *vo) {
|
||||
uint32_t v;
|
||||
uint32_t c = __builtin_uadd_overflow((uint32_t)v1, (uint32_t)v2, &v);
|
||||
|
||||
/* set the carry */
|
||||
*vo = c;
|
||||
return v;
|
||||
}
|
||||
|
||||
static inline uint64_t add64(uint64_t v1, uint64_t v2, uint64_t *vo) {
|
||||
uint64_t v;
|
||||
uint64_t c = __builtin_uaddll_overflow(v1, v2, &v);
|
||||
|
||||
/* set the carry */
|
||||
*vo = c;
|
||||
return v;
|
||||
}
|
||||
|
||||
static inline char isspace(char ch) {
|
||||
return ch == ' ' || ch == '\r' || ch == '\n' | ch == '\t';
|
||||
}
|
||||
|
||||
static inline void vdigits(const GoString *src, long *p, JsonState *ret) {
|
||||
--*p;
|
||||
vnumber(src, p, ret);
|
||||
}
|
||||
|
||||
static inline char advance(const GoString *src, long *p) {
|
||||
if (*p >= src->len) {
|
||||
return 0;
|
||||
} else {
|
||||
return src->buf[(*p)++];
|
||||
}
|
||||
}
|
||||
|
||||
static inline char advance_ns(const GoString *src, long *p) {
|
||||
*p = lspace(src->buf, src->len, *p);
|
||||
return advance(src, p);
|
||||
size_t vi = *p;
|
||||
size_t nb = src->len;
|
||||
const char * sp = src->buf;
|
||||
|
||||
/* it's likely to run into non-spaces within a few
|
||||
* characters, so test up to 4 characters manually */
|
||||
for (int i = 0; i < 4 && vi < nb; i++, vi++) {
|
||||
if (!isspace(sp[vi])) {
|
||||
goto nospace;
|
||||
}
|
||||
}
|
||||
|
||||
/* too many spaces, use SIMD to search for characters */
|
||||
if ((vi = lspace(sp, nb, vi)) >= nb) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
nospace:
|
||||
*p = vi + 1;
|
||||
return src->buf[vi];
|
||||
}
|
||||
|
||||
static inline int64_t advance_dword(const GoString *src, long *p, long dec, int64_t ret, uint32_t val) {
|
||||
|
|
@ -128,192 +163,219 @@ static inline int64_t advance_dword(const GoString *src, long *p, long dec, int6
|
|||
}
|
||||
}
|
||||
|
||||
static inline ssize_t advance_string(const GoString *src, long *p, int64_t *ep) {
|
||||
ssize_t e;
|
||||
ssize_t i;
|
||||
static inline ssize_t advance_string(const GoString *src, long p, int64_t *ep) {
|
||||
char ch;
|
||||
uint64_t es;
|
||||
uint64_t fe;
|
||||
uint64_t os;
|
||||
uint64_t m0;
|
||||
uint64_t m1;
|
||||
uint64_t mx;
|
||||
uint64_t cr = 0;
|
||||
|
||||
/* check for end of string */
|
||||
if ((e = strchr2(src, *p, '"', '\\')) < 0) {
|
||||
*p = src->len;
|
||||
return -ERR_EOF;
|
||||
}
|
||||
/* buffer pointers */
|
||||
size_t nb = src->len;
|
||||
const char * sp = src->buf;
|
||||
const char * ss = src->buf;
|
||||
|
||||
/* encounters a '"' at the first scan, it's an unquoted string */
|
||||
if (src->buf[e] == '"') {
|
||||
*ep = -1;
|
||||
return e;
|
||||
}
|
||||
#define ep_init() *ep = -1;
|
||||
#define ep_setc() ep_setx(sp - ss - 1)
|
||||
#define ep_setx(x) if (*ep == -1) { *ep = (x); }
|
||||
|
||||
/* search for the next double quote */
|
||||
i = e;
|
||||
e = strchr1(src, e + 1, '"');
|
||||
/* seek to `p` */
|
||||
nb -= p;
|
||||
sp += p;
|
||||
ep_init()
|
||||
|
||||
/* seek to the end of string */
|
||||
while (e >= 0) {
|
||||
int n = 0;
|
||||
const char * q = src->buf + e;
|
||||
#if USE_AVX2
|
||||
/* initialize vectors */
|
||||
__m256i v0;
|
||||
__m256i v1;
|
||||
__m256i q0;
|
||||
__m256i q1;
|
||||
__m256i x0;
|
||||
__m256i x1;
|
||||
__m256i cq = _mm256_set1_epi8('"');
|
||||
__m256i cx = _mm256_set1_epi8('\\');
|
||||
|
||||
/* counting backslashes */
|
||||
while (*--q == '\\') {
|
||||
n++;
|
||||
/* partial masks */
|
||||
uint32_t s0;
|
||||
uint32_t s1;
|
||||
uint32_t t0;
|
||||
uint32_t t1;
|
||||
#else
|
||||
/* initialize vectors */
|
||||
__m128i v0;
|
||||
__m128i v1;
|
||||
__m128i v2;
|
||||
__m128i v3;
|
||||
__m128i q0;
|
||||
__m128i q1;
|
||||
__m128i q2;
|
||||
__m128i q3;
|
||||
__m128i x0;
|
||||
__m128i x1;
|
||||
__m128i x2;
|
||||
__m128i x3;
|
||||
__m128i cq = _mm_set1_epi8('"');
|
||||
__m128i cx = _mm_set1_epi8('\\');
|
||||
|
||||
/* partial masks */
|
||||
uint32_t s0;
|
||||
uint32_t s1;
|
||||
uint32_t s2;
|
||||
uint32_t s3;
|
||||
uint32_t t0;
|
||||
uint32_t t1;
|
||||
uint32_t t2;
|
||||
uint32_t t3;
|
||||
#endif
|
||||
|
||||
#define m0_mask(add) \
|
||||
m1 &= ~cr; \
|
||||
fe = (m1 << 1) | cr; \
|
||||
os = (m1 & ~fe) & ODD_MASK; \
|
||||
es = add(os, m1, &cr) << 1; \
|
||||
m0 &= ~(fe & (es ^ EVEN_MASK));
|
||||
|
||||
/* 64-byte SIMD loop */
|
||||
while (likely(nb >= 64)) {
|
||||
#if USE_AVX2
|
||||
v0 = _mm256_loadu_si256 ((const void *)(sp + 0));
|
||||
v1 = _mm256_loadu_si256 ((const void *)(sp + 32));
|
||||
q0 = _mm256_cmpeq_epi8 (v0, cq);
|
||||
q1 = _mm256_cmpeq_epi8 (v1, cq);
|
||||
x0 = _mm256_cmpeq_epi8 (v0, cx);
|
||||
x1 = _mm256_cmpeq_epi8 (v1, cx);
|
||||
s0 = _mm256_movemask_epi8 (q0);
|
||||
s1 = _mm256_movemask_epi8 (q1);
|
||||
t0 = _mm256_movemask_epi8 (x0);
|
||||
t1 = _mm256_movemask_epi8 (x1);
|
||||
m0 = ((uint64_t)s1 << 32) | (uint64_t)s0;
|
||||
m1 = ((uint64_t)t1 << 32) | (uint64_t)t0;
|
||||
#else
|
||||
v0 = _mm_loadu_si128 ((const void *)(sp + 0));
|
||||
v1 = _mm_loadu_si128 ((const void *)(sp + 16));
|
||||
v2 = _mm_loadu_si128 ((const void *)(sp + 32));
|
||||
v3 = _mm_loadu_si128 ((const void *)(sp + 48));
|
||||
q0 = _mm_cmpeq_epi8 (v0, cq);
|
||||
q1 = _mm_cmpeq_epi8 (v1, cq);
|
||||
q2 = _mm_cmpeq_epi8 (v2, cq);
|
||||
q3 = _mm_cmpeq_epi8 (v3, cq);
|
||||
x0 = _mm_cmpeq_epi8 (v0, cx);
|
||||
x1 = _mm_cmpeq_epi8 (v1, cx);
|
||||
x2 = _mm_cmpeq_epi8 (v2, cx);
|
||||
x3 = _mm_cmpeq_epi8 (v3, cx);
|
||||
s0 = _mm_movemask_epi8 (q0);
|
||||
s1 = _mm_movemask_epi8 (q1);
|
||||
s2 = _mm_movemask_epi8 (q2);
|
||||
s3 = _mm_movemask_epi8 (q3);
|
||||
t0 = _mm_movemask_epi8 (x0);
|
||||
t1 = _mm_movemask_epi8 (x1);
|
||||
t2 = _mm_movemask_epi8 (x2);
|
||||
t3 = _mm_movemask_epi8 (x3);
|
||||
m0 = ((uint64_t)s3 << 48) | ((uint64_t)s2 << 32) | ((uint64_t)s1 << 16) | (uint64_t)s0;
|
||||
m1 = ((uint64_t)t3 << 48) | ((uint64_t)t2 << 32) | ((uint64_t)t1 << 16) | (uint64_t)t0;
|
||||
#endif
|
||||
|
||||
/** update first quote position */
|
||||
if (unlikely(m1 != 0)) {
|
||||
ep_setx(sp - ss + __builtin_ctzll(m1))
|
||||
}
|
||||
|
||||
/* pairs of backslashes cancel each other out */
|
||||
if ((n & 1) == 0) {
|
||||
break;
|
||||
/** mask all the escaped quotes */
|
||||
if (unlikely(m1 != 0 || cr != 0)) {
|
||||
m0_mask(add64)
|
||||
}
|
||||
|
||||
/* find the next double quote */
|
||||
e = e + 1;
|
||||
e = strchr1(src, e, '"');
|
||||
}
|
||||
|
||||
/* check for end of string */
|
||||
if (e < 0) {
|
||||
*p = src->len;
|
||||
return -ERR_EOF;
|
||||
}
|
||||
|
||||
/* update the result */
|
||||
*ep = i;
|
||||
return e;
|
||||
}
|
||||
|
||||
static inline int64_t advance_number(const GoString *src, long *p, long i, JsonState *ret, size_t sp) {
|
||||
size_t n = src->len;
|
||||
const char * s = src->buf;
|
||||
|
||||
/* check for EOF */
|
||||
if (i >= n) {
|
||||
*p = n;
|
||||
return -ERR_EOF;
|
||||
}
|
||||
|
||||
/* base factors */
|
||||
int esm = 1;
|
||||
int exp = 0;
|
||||
int ovf = 0;
|
||||
int rem = 0;
|
||||
double val = 0;
|
||||
int64_t idx = 0;
|
||||
int64_t i64 = 0;
|
||||
int64_t rvt = V_INTEGER;
|
||||
|
||||
/* initial state */
|
||||
ret->iv = 0;
|
||||
ret->ep = sp;
|
||||
|
||||
/* check for the special case of '0' */
|
||||
if (s[i] == '0') {
|
||||
if (++i >= n) {
|
||||
*p = i;
|
||||
return V_INTEGER;
|
||||
/* check for end quote */
|
||||
if (m0 != 0) {
|
||||
return sp - ss + __builtin_ctzll(m0) + 1;
|
||||
}
|
||||
} else {
|
||||
if (s[i] < '0' || s[i] > '9') {
|
||||
*p = i;
|
||||
return -ERR_INVAL;
|
||||
|
||||
/* move to the next block */
|
||||
sp += 64;
|
||||
nb -= 64;
|
||||
}
|
||||
|
||||
/* 32-byte SIMD round */
|
||||
if (likely(nb >= 32)) {
|
||||
#if USE_AVX2
|
||||
v0 = _mm256_loadu_si256 ((const void *)sp);
|
||||
q0 = _mm256_cmpeq_epi8 (v0, cq);
|
||||
x0 = _mm256_cmpeq_epi8 (v0, cx);
|
||||
s0 = _mm256_movemask_epi8 (q0);
|
||||
t0 = _mm256_movemask_epi8 (x0);
|
||||
m0 = (uint64_t)s0;
|
||||
m1 = (uint64_t)t0;
|
||||
#else
|
||||
v0 = _mm_loadu_si128 ((const void *)(sp + 0));
|
||||
v1 = _mm_loadu_si128 ((const void *)(sp + 16));
|
||||
q0 = _mm_cmpeq_epi8 (v0, cq);
|
||||
q1 = _mm_cmpeq_epi8 (v1, cq);
|
||||
x0 = _mm_cmpeq_epi8 (v0, cx);
|
||||
x1 = _mm_cmpeq_epi8 (v1, cx);
|
||||
s0 = _mm_movemask_epi8 (q0);
|
||||
s1 = _mm_movemask_epi8 (q1);
|
||||
t0 = _mm_movemask_epi8 (x0);
|
||||
t1 = _mm_movemask_epi8 (x1);
|
||||
m0 = ((uint64_t)s1 << 16) | (uint64_t)s0;
|
||||
m1 = ((uint64_t)t1 << 16) | (uint64_t)t0;
|
||||
#endif
|
||||
|
||||
/** update first quote position */
|
||||
if (unlikely(m1 != 0)) {
|
||||
ep_setx(sp - ss + __builtin_ctzll(m1))
|
||||
}
|
||||
|
||||
/** mask all the escaped quotes */
|
||||
if (unlikely(m1 != 0 || cr != 0)) {
|
||||
m0_mask(add32)
|
||||
}
|
||||
|
||||
/* check for end quote */
|
||||
if (m0 != 0) {
|
||||
return sp - ss + __builtin_ctzll(m0) + 1;
|
||||
}
|
||||
|
||||
/* move to the next block */
|
||||
sp += 32;
|
||||
nb -= 32;
|
||||
}
|
||||
|
||||
/* check for carry */
|
||||
if (unlikely(cr != 0)) {
|
||||
if (nb == 0) {
|
||||
return -ERR_EOF;
|
||||
} else {
|
||||
while (!(ovf = __builtin_smulll_overflow((idx = i64), 10, &i64)) &&
|
||||
!(ovf = __builtin_saddll_overflow(i64, s[i] - '0', &i64)) &&
|
||||
!(++i >= n || !(s[i] >= '0' && s[i] <= '9')));
|
||||
ep_setc()
|
||||
sp++, nb--;
|
||||
}
|
||||
}
|
||||
|
||||
/* set the integer part */
|
||||
ret->iv = i64;
|
||||
ret->dv = i64;
|
||||
|
||||
/* check for integer overflow, in such case
|
||||
* the number must be represented by double */
|
||||
if (ovf) {
|
||||
val = idx;
|
||||
rvt = V_DOUBLE;
|
||||
|
||||
/* convert the remaining digits */
|
||||
do {
|
||||
val *= 10;
|
||||
val += s[i++] - '0';
|
||||
} while (i < n && s[i] >= '0' && s[i] <= '9');
|
||||
|
||||
/* set the integer part to INT64_MAX to indicate an overflow */
|
||||
ret->dv = val;
|
||||
ret->iv = INT64_MAX;
|
||||
}
|
||||
|
||||
/* check for decimal points */
|
||||
if (i < n && s[i] == '.') {
|
||||
idx = ++i;
|
||||
rvt = V_DOUBLE;
|
||||
|
||||
/* check for EOF */
|
||||
if (i >= n) {
|
||||
*p = n;
|
||||
return -ERR_EOF;
|
||||
}
|
||||
|
||||
/* should be a digit */
|
||||
if (s[i] < '0' || s[i] > '9') {
|
||||
*p = i;
|
||||
return -ERR_INVAL;
|
||||
}
|
||||
|
||||
/* convert the fractional part */
|
||||
do {
|
||||
rem *= 10;
|
||||
rem += s[i++] - '0';
|
||||
} while (i < n && s[i] >= '0' && s[i] <= '9');
|
||||
|
||||
/* combine with the integer part */
|
||||
idx -= i;
|
||||
ret->dv += pow10(rem, idx);
|
||||
}
|
||||
|
||||
/* check for exponent */
|
||||
if (i < n && (s[i] == 'e' || s[i] == 'E')) {
|
||||
i++;
|
||||
rvt = V_DOUBLE;
|
||||
|
||||
/* check for EOF */
|
||||
if (i >= n) {
|
||||
*p = n;
|
||||
return -ERR_EOF;
|
||||
}
|
||||
|
||||
/* check for the '+' or '-' sign */
|
||||
if (s[i] == '+' || s[i] == '-') {
|
||||
if (i >= n - 1) {
|
||||
*p = i;
|
||||
/* handle the remaining bytes with scalar code */
|
||||
while (nb-- > 0 && (ch = *sp++) != '"') {
|
||||
if (unlikely(ch == '\\')) {
|
||||
if (nb == 0) {
|
||||
return -ERR_EOF;
|
||||
} else {
|
||||
if (s[i++] == '+') {
|
||||
esm = 1;
|
||||
} else {
|
||||
esm = -1;
|
||||
}
|
||||
ep_setc()
|
||||
sp++, nb--;
|
||||
}
|
||||
}
|
||||
|
||||
/* should be a digit */
|
||||
if (s[i] < '0' || s[i] > '9') {
|
||||
*p = i;
|
||||
return -ERR_INVAL;
|
||||
}
|
||||
|
||||
/* convert the power */
|
||||
do {
|
||||
exp *= 10;
|
||||
exp += s[i++] - '0';
|
||||
} while (i < n && s[i] >= '0' && s[i] <= '9');
|
||||
|
||||
/* apply the power */
|
||||
exp *= esm;
|
||||
ret->dv = pow10(ret->dv, exp);
|
||||
}
|
||||
|
||||
/* calculate the offset */
|
||||
*p = i;
|
||||
return rvt;
|
||||
#undef ep_init
|
||||
#undef ep_setc
|
||||
#undef ep_setx
|
||||
#undef m0_mask
|
||||
|
||||
/* check for quotes */
|
||||
if (ch == '"') {
|
||||
return sp - ss;
|
||||
} else {
|
||||
return -ERR_EOF;
|
||||
}
|
||||
}
|
||||
|
||||
/** Value Scanning Routines **/
|
||||
|
|
@ -352,16 +414,17 @@ long value(const char *s, size_t n, long p, JsonState *ret, int allow_control) {
|
|||
|
||||
void vstring(const GoString *src, long *p, JsonState *ret) {
|
||||
int64_t i = *p;
|
||||
ssize_t e = advance_string(src, p, &ret->ep);
|
||||
ssize_t e = advance_string(src, i, &ret->ep);
|
||||
|
||||
/* check for errors */
|
||||
if (e < 0) {
|
||||
*p = src->len;
|
||||
ret->vt = e;
|
||||
return;
|
||||
}
|
||||
|
||||
/* update the result */
|
||||
*p = e + 1;
|
||||
*p = e;
|
||||
ret->iv = i;
|
||||
ret->vt = V_STRING;
|
||||
}
|
||||
|
|
@ -743,6 +806,218 @@ static inline long fsm_exec(StateMachine *self, const GoString *src, long *p) {
|
|||
#undef FSM_CHAR
|
||||
#undef FSM_XERR
|
||||
|
||||
#define check_bits(mv) \
|
||||
if (unlikely((v = mv & (mv - 1)) != 0)) { \
|
||||
return -(sp - ss + __builtin_ctz(v) + 1); \
|
||||
}
|
||||
|
||||
#define check_sidx(iv) \
|
||||
if (likely(iv == -1)) { \
|
||||
iv = sp - ss - 1; \
|
||||
} else { \
|
||||
return -(sp - ss); \
|
||||
}
|
||||
|
||||
#define check_vidx(iv, mv) \
|
||||
if (mv != 0) { \
|
||||
if (likely(iv == -1)) { \
|
||||
iv = sp - ss + __builtin_ctz(mv); \
|
||||
} else { \
|
||||
return -(sp - ss + __builtin_ctz(mv) + 1); \
|
||||
} \
|
||||
}
|
||||
|
||||
static inline long skip_number(const char *sp, size_t nb) {
|
||||
long di = -1;
|
||||
long ei = -1;
|
||||
long si = -1;
|
||||
const char * ss = sp;
|
||||
|
||||
/* check for EOF */
|
||||
if (nb == 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* special case of '0' */
|
||||
if (*sp == '0' && (nb == 1 || sp[1] != '.')) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
#if USE_AVX2
|
||||
/* can do with AVX-2 */
|
||||
if (likely(nb >= 32)) {
|
||||
__m256i d9 = _mm256_set1_epi8('9');
|
||||
__m256i ds = _mm256_set1_epi8('/');
|
||||
__m256i dp = _mm256_set1_epi8('.');
|
||||
__m256i el = _mm256_set1_epi8('e');
|
||||
__m256i eu = _mm256_set1_epi8('E');
|
||||
__m256i xp = _mm256_set1_epi8('+');
|
||||
__m256i xm = _mm256_set1_epi8('-');
|
||||
|
||||
/* 32-byte loop */
|
||||
do {
|
||||
__m256i sb = _mm256_loadu_si256 ((const void *)sp);
|
||||
__m256i i0 = _mm256_cmpgt_epi8 (sb, ds);
|
||||
__m256i i9 = _mm256_cmpgt_epi8 (sb, d9);
|
||||
__m256i id = _mm256_cmpeq_epi8 (sb, dp);
|
||||
__m256i il = _mm256_cmpeq_epi8 (sb, el);
|
||||
__m256i iu = _mm256_cmpeq_epi8 (sb, eu);
|
||||
__m256i ip = _mm256_cmpeq_epi8 (sb, xp);
|
||||
__m256i im = _mm256_cmpeq_epi8 (sb, xm);
|
||||
__m256i iv = _mm256_andnot_si256 (i9, i0);
|
||||
__m256i ie = _mm256_or_si256 (il, iu);
|
||||
__m256i is = _mm256_or_si256 (ip, im);
|
||||
__m256i rt = _mm256_or_si256 (iv, id);
|
||||
__m256i ru = _mm256_or_si256 (ie, is);
|
||||
__m256i rv = _mm256_or_si256 (rt, ru);
|
||||
|
||||
/* exponent and sign position */
|
||||
uint32_t md = _mm256_movemask_epi8(id);
|
||||
uint32_t me = _mm256_movemask_epi8(ie);
|
||||
uint32_t ms = _mm256_movemask_epi8(is);
|
||||
uint32_t mr = _mm256_movemask_epi8(rv);
|
||||
|
||||
/* mismatch position */
|
||||
uint32_t v;
|
||||
uint32_t i = __builtin_ctzll(~(uint64_t)mr | 0x0100000000);
|
||||
|
||||
/* mask out excess characters */
|
||||
if (i != 32) {
|
||||
md &= (1 << i) - 1;
|
||||
me &= (1 << i) - 1;
|
||||
ms &= (1 << i) - 1;
|
||||
}
|
||||
|
||||
/* check & update decimal point, exponent and sign index */
|
||||
check_bits(md)
|
||||
check_bits(me)
|
||||
check_bits(ms)
|
||||
check_vidx(di, md)
|
||||
check_vidx(ei, me)
|
||||
check_vidx(si, ms)
|
||||
|
||||
/* check for valid number */
|
||||
if (i != 32) {
|
||||
sp += i;
|
||||
_mm256_zeroupper();
|
||||
goto check_index;
|
||||
}
|
||||
|
||||
/* move to next block */
|
||||
sp += 32;
|
||||
nb -= 32;
|
||||
} while (nb >= 32);
|
||||
|
||||
/* clear the upper half to prevent AVX-SSE transition penalty */
|
||||
_mm256_zeroupper();
|
||||
}
|
||||
#endif
|
||||
|
||||
/* can do with SSE */
|
||||
if (likely(nb >= 16)) {
|
||||
__m128i dc = _mm_set1_epi8(':');
|
||||
__m128i ds = _mm_set1_epi8('/');
|
||||
__m128i dp = _mm_set1_epi8('.');
|
||||
__m128i el = _mm_set1_epi8('e');
|
||||
__m128i eu = _mm_set1_epi8('E');
|
||||
__m128i xp = _mm_set1_epi8('+');
|
||||
__m128i xm = _mm_set1_epi8('-');
|
||||
__m128i v1 = _mm_set1_epi8(0xff);
|
||||
|
||||
/* 16-byte loop */
|
||||
do {
|
||||
__m128i sb = _mm_loadu_si128 ((const void *)sp);
|
||||
__m128i i0 = _mm_cmpgt_epi8 (sb, ds);
|
||||
__m128i i9 = _mm_cmplt_epi8 (sb, dc);
|
||||
__m128i id = _mm_cmpeq_epi8 (sb, dp);
|
||||
__m128i il = _mm_cmpeq_epi8 (sb, el);
|
||||
__m128i iu = _mm_cmpeq_epi8 (sb, eu);
|
||||
__m128i ip = _mm_cmpeq_epi8 (sb, xp);
|
||||
__m128i im = _mm_cmpeq_epi8 (sb, xm);
|
||||
__m128i iv = _mm_and_si128 (i9, i0);
|
||||
__m128i ie = _mm_or_si128 (il, iu);
|
||||
__m128i is = _mm_or_si128 (ip, im);
|
||||
__m128i rt = _mm_or_si128 (iv, id);
|
||||
__m128i ru = _mm_or_si128 (ie, is);
|
||||
__m128i rv = _mm_or_si128 (rt, ru);
|
||||
|
||||
/* exponent and sign position */
|
||||
uint32_t md = _mm_movemask_epi8(id);
|
||||
uint32_t me = _mm_movemask_epi8(ie);
|
||||
uint32_t ms = _mm_movemask_epi8(is);
|
||||
uint32_t mr = _mm_movemask_epi8(rv);
|
||||
|
||||
/* mismatch position */
|
||||
uint32_t v;
|
||||
uint32_t i = __builtin_ctzll(~mr | 0x00010000);
|
||||
|
||||
/* mask out excess characters */
|
||||
if (i != 16) {
|
||||
md &= (1 << i) - 1;
|
||||
me &= (1 << i) - 1;
|
||||
ms &= (1 << i) - 1;
|
||||
}
|
||||
|
||||
/* check & update exponent and sign index */
|
||||
check_bits(md)
|
||||
check_bits(me)
|
||||
check_bits(ms)
|
||||
check_vidx(di, md)
|
||||
check_vidx(ei, me)
|
||||
check_vidx(si, ms)
|
||||
|
||||
/* check for valid number */
|
||||
if (i != 16) {
|
||||
sp += i;
|
||||
goto check_index;
|
||||
}
|
||||
|
||||
/* move to next block */
|
||||
sp += 16;
|
||||
nb -= 16;
|
||||
} while (nb >= 16);
|
||||
}
|
||||
|
||||
/* remaining bytes, do with scalar code */
|
||||
while (likely(--nb >= 0)) {
|
||||
switch (*sp++) {
|
||||
case '0' : /* fallthrough */
|
||||
case '1' : /* fallthrough */
|
||||
case '2' : /* fallthrough */
|
||||
case '3' : /* fallthrough */
|
||||
case '4' : /* fallthrough */
|
||||
case '5' : /* fallthrough */
|
||||
case '6' : /* fallthrough */
|
||||
case '7' : /* fallthrough */
|
||||
case '8' : /* fallthrough */
|
||||
case '9' : break;
|
||||
case '.' : check_sidx(di); break;
|
||||
case 'e' : /* fallthrough */
|
||||
case 'E' : check_sidx(ei); break;
|
||||
case '+' : /* fallthrough */
|
||||
case '-' : check_sidx(si); break;
|
||||
default : sp--; goto check_index;
|
||||
}
|
||||
}
|
||||
|
||||
check_index:
|
||||
if (di == 0 || si == 0) {
|
||||
return -1;
|
||||
} else if (si > 0 && ei != si - 1) {
|
||||
return -si - 1;
|
||||
} else if (di >= 0 && ei >= 0 && di > ei - 1) {
|
||||
return -di - 1;
|
||||
} else if (di >= 0 && ei >= 0 && di == ei - 1) {
|
||||
return -ei - 1;
|
||||
} else {
|
||||
return sp - ss;
|
||||
}
|
||||
}
|
||||
|
||||
#undef check_bits
|
||||
#undef check_sidx
|
||||
#undef check_vidx
|
||||
|
||||
long skip_one(const GoString *src, long *p, StateMachine *m) {
|
||||
fsm_init(m, FSM_VAL);
|
||||
return fsm_exec(m, src, p);
|
||||
|
|
@ -761,40 +1036,44 @@ long skip_object(const GoString *src, long *p, StateMachine *m) {
|
|||
long skip_string(const GoString *src, long *p) {
|
||||
int64_t v;
|
||||
ssize_t q = *p - 1;
|
||||
ssize_t e = advance_string(src, p, &v);
|
||||
ssize_t e = advance_string(src, *p, &v);
|
||||
|
||||
/* check for errors */
|
||||
if (e < 0) {
|
||||
/* check for errors, and update the position */
|
||||
if (e >= 0) {
|
||||
*p = e;
|
||||
return q;
|
||||
} else {
|
||||
*p = src->len;
|
||||
return e;
|
||||
}
|
||||
|
||||
/* update the position */
|
||||
*p = e + 1;
|
||||
return q;
|
||||
}
|
||||
|
||||
long skip_negative(const GoString *src, long *p) {
|
||||
long q = *p - 1;
|
||||
int64_t r;
|
||||
JsonState v;
|
||||
long i = *p;
|
||||
long r = skip_number(src->buf + i, src->len - i);
|
||||
|
||||
/* skip the number */
|
||||
if ((r = advance_number(src, p, *p, &v, q)) < 0) {
|
||||
return r;
|
||||
} else {
|
||||
return q;
|
||||
/* check for errors */
|
||||
if (r < 0) {
|
||||
*p -= r + 1;
|
||||
return -ERR_INVAL;
|
||||
}
|
||||
|
||||
/* update value pointer */
|
||||
*p += r;
|
||||
return i - 1;
|
||||
}
|
||||
|
||||
long skip_positive(const GoString *src, long *p) {
|
||||
long q = *p - 1;
|
||||
int64_t r;
|
||||
JsonState v;
|
||||
long i = *p - 1;
|
||||
long r = skip_number(src->buf + i, src->len - i);
|
||||
|
||||
/* skip the number */
|
||||
if ((r = advance_number(src, p, q, &v, q)) < 0) {
|
||||
return r;
|
||||
} else {
|
||||
return q;
|
||||
/* check for errors */
|
||||
if (r < 0) {
|
||||
*p -= r + 2;
|
||||
return -ERR_INVAL;
|
||||
}
|
||||
|
||||
/* update value pointer */
|
||||
*p += r - 1;
|
||||
return i;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1 +1 @@
|
|||
Subproject commit 38a813682862252de3d1a016c90755467bff0ee9
|
||||
Subproject commit daab6520b48bc30586f7468676c990b5c1f781bd
|
||||
Loading…
Reference in a new issue