2
0
Fork 0
mirror of https://github.com/ii64/sonic.git synced 2026-06-21 00:46:43 +08:00

feat: performance optimizations

This commit is contained in:
chenzhuoyu 2021-07-01 21:29:31 +08:00 committed by Oxygen
parent 8fe152d152
commit 5cb6f17944
29 changed files with 8937 additions and 7635 deletions

View file

@ -25,8 +25,8 @@ CPU_avx2 := amd64
TMPL_avx := fastint_amd64_test fastfloat_amd64_test native_amd64_test native_export_amd64
TMPL_avx2 := fastint_amd64_test fastfloat_amd64_test native_amd64_test native_export_amd64
CFLAGS_avx := -msse2 -mavx -mno-avx2 -DUSE_SSE=1 -DUSE_AVX=1 -DUSE_AVX2=0
CFLAGS_avx2 := -msse2 -mavx -mavx2 -DUSE_SSE=1 -DUSE_AVX=1 -DUSE_AVX2=1
CFLAGS_avx := -msse4 -mavx -mno-avx2 -DUSE_AVX=1 -DUSE_AVX2=0
CFLAGS_avx2 := -msse4 -mavx -mavx2 -DUSE_AVX=1 -DUSE_AVX2=1
CC_amd64 := clang
ASM2ASM_amd64 := tools/asm2asm/asm2asm.py

View file

@ -80,6 +80,10 @@ const (
_IM_alse = 0x65736c61 // 'alse' ('false' without the 'f')
)
const (
_BM_space = (1 << ' ') | (1 << '\t') | (1 << '\r') | (1 << '\n')
)
const (
_LB_error = "_error"
_LB_im_error = "_im_error"
@ -1318,15 +1322,34 @@ func (self *_Assembler) _asm_OP_unmarshal_text_p(p *_Instr) {
}
func (self *_Assembler) _asm_OP_lspace(_ *_Instr) {
self.Emit("MOVQ" , _IP, _DI) // MOVQ IP, DI
self.Emit("MOVQ" , _IL, _SI) // MOVQ IL, SI
self.Emit("MOVQ" , _IC, _DX) // MOVQ IC, DX
self.call(_F_lspace) // CALL lspace
self.Emit("TESTQ", _AX, _AX) // TESTQ AX, AX
self.Sjmp("JS" , _LB_parsing_error_v) // JS _parsing_error_v
self.Emit("CMPQ" , _AX, _IL) // CMPQ AX, IL
self.Sjmp("JAE" , _LB_eof_error) // JAE _eof_error
self.Emit("MOVQ" , _AX, _IC) // MOVQ AX, IC
self.Emit("CMPQ" , _IC, _IL) // CMPQ IC, IL
self.Sjmp("JAE" , _LB_eof_error) // JAE _eof_error
self.Emit("MOVQ" , jit.Imm(_BM_space), _DX) // MOVQ _BM_space, DX
self.Emit("MOVBQZX", jit.Sib(_IP, _IC, 1, 0), _AX) // MOVBQZX (IP)(IC), AX
self.Emit("BTQ" , _AX, _DX) // BTQ AX, DX
self.Sjmp("JNC" , "_nospace_{n}") // JNC _nospace_{n}
/* test up to 4 characters */
for i := 0; i < 3; i++ {
self.Emit("ADDQ" , jit.Imm(1), _IC) // ADDQ $1, IC
self.Emit("CMPQ" , _IC, _IL) // CMPQ IC, IL
self.Sjmp("JAE" , _LB_eof_error) // JAE _eof_error
self.Emit("MOVBQZX", jit.Sib(_IP, _IC, 1, 0), _AX) // MOVBQZX (IP)(IC), AX
self.Emit("BTQ" , _AX, _DX) // BTQ AX, DX
self.Sjmp("JNC" , "_nospace_{n}") // JNC _nospace_{n}
}
/* handle over to the native function */
self.Emit("MOVQ" , _IP, _DI) // MOVQ IP, DI
self.Emit("MOVQ" , _IL, _SI) // MOVQ IL, SI
self.Emit("MOVQ" , _IC, _DX) // MOVQ IC, DX
self.call(_F_lspace) // CALL lspace
self.Emit("TESTQ" , _AX, _AX) // TESTQ AX, AX
self.Sjmp("JS" , _LB_parsing_error_v) // JS _parsing_error_v
self.Emit("CMPQ" , _AX, _IL) // CMPQ AX, IL
self.Sjmp("JAE" , _LB_eof_error) // JAE _eof_error
self.Emit("MOVQ" , _AX, _IC) // MOVQ AX, IC
self.Link("_nospace_{n}") // _nospace_{n}:
}
func (self *_Assembler) _asm_OP_match_char(p *_Instr) {

View file

@ -119,10 +119,6 @@ const (
_S_vmask = (1 << _S_val) | (1 << _S_arr_0)
)
const (
_X_space = (1 << ' ') | (1 << '\t') | (1 << '\r') | (1 << '\n')
)
const (
_A_init_len = 1
_A_init_cap = 16
@ -206,11 +202,11 @@ func (self *_ValueDecoder) compile() {
self.Emit("TESTQ", _AX, _AX) // TESTQ AX, AX
self.Sjmp("JS" , "_return") // JS _return
/* fast path: no-space or 1-space cases */
/* fast path: test up to 4 characters manually */
self.Emit("CMPQ" , _IC, _IL) // CMPQ IC, IL
self.Sjmp("JAE" , "_decode_V_EOF") // JAE _decode_V_EOF
self.Emit("MOVBQZX", jit.Sib(_IP, _IC, 1, 0), _AX) // MOVBQZX (IP)(IC), AX
self.Emit("MOVQ" , jit.Imm(_X_space), _DX) // MOVQ _X_space, DX
self.Emit("MOVQ" , jit.Imm(_BM_space), _DX) // MOVQ _BM_space, DX
self.Emit("BTQ" , _AX, _DX) // BTQ _AX, _DX
self.Sjmp("JNC" , "_decode_fast") // JNC _decode_fast
@ -219,6 +215,33 @@ func (self *_ValueDecoder) compile() {
self.Emit("CMPQ" , _IC, _IL) // CMPQ IC, IL
self.Sjmp("JAE" , "_decode_V_EOF") // JAE _decode_V_EOF
self.Emit("MOVBQZX", jit.Sib(_IP, _IC, 1, 0), _AX) // MOVBQZX (IP)(IC), AX
self.Emit("MOVQ" , jit.Imm(_BM_space), _DX) // MOVQ _BM_space, DX
self.Emit("BTQ" , _AX, _DX) // BTQ _AX, _DX
self.Sjmp("JNC" , "_decode_fast") // JNC _decode_fast
/* 2-space case */
self.Emit("ADDQ" , jit.Imm(1), _IC) // ADDQ $1, IC
self.Emit("CMPQ" , _IC, _IL) // CMPQ IC, IL
self.Sjmp("JAE" , "_decode_V_EOF") // JAE _decode_V_EOF
self.Emit("MOVBQZX", jit.Sib(_IP, _IC, 1, 0), _AX) // MOVBQZX (IP)(IC), AX
self.Emit("MOVQ" , jit.Imm(_BM_space), _DX) // MOVQ _BM_space, DX
self.Emit("BTQ" , _AX, _DX) // BTQ _AX, _DX
self.Sjmp("JNC" , "_decode_fast") // JNC _decode_fast
/* 3-space case */
self.Emit("ADDQ" , jit.Imm(1), _IC) // ADDQ $1, IC
self.Emit("CMPQ" , _IC, _IL) // CMPQ IC, IL
self.Sjmp("JAE" , "_decode_V_EOF") // JAE _decode_V_EOF
self.Emit("MOVBQZX", jit.Sib(_IP, _IC, 1, 0), _AX) // MOVBQZX (IP)(IC), AX
self.Emit("MOVQ" , jit.Imm(_BM_space), _DX) // MOVQ _BM_space, DX
self.Emit("BTQ" , _AX, _DX) // BTQ _AX, _DX
self.Sjmp("JNC" , "_decode_fast") // JNC _decode_fast
/* 4-space case */
self.Emit("ADDQ" , jit.Imm(1), _IC) // ADDQ $1, IC
self.Emit("CMPQ" , _IC, _IL) // CMPQ IC, IL
self.Sjmp("JAE" , "_decode_V_EOF") // JAE _decode_V_EOF
self.Emit("MOVBQZX", jit.Sib(_IP, _IC, 1, 0), _AX) // MOVBQZX (IP)(IC), AX
/* fast path: use lookup table to select decoder */
self.Link("_decode_fast") // _decode_fast:
@ -232,14 +255,14 @@ func (self *_ValueDecoder) compile() {
self.Rjmp("JMP" , _AX) // JMP AX
/* decode with native decoder */
self.Link("_decode_native") // _decode_native:
self.Emit("MOVQ", _IP, _DI) // MOVQ IP, DI
self.Emit("MOVQ", _IL, _SI) // MOVQ IL, SI
self.Emit("MOVQ", _IC, _DX) // MOVQ IC, DX
self.Emit("LEAQ", _VAR_ss, _CX) // LEAQ ss, CX
self.Emit("MOVL", jit.Imm(1), _R8) // MOVL $1, R8
self.call(_F_value) // CALL value
self.Emit("MOVQ", _AX, _IC) // MOVQ AX, IC
self.Link("_decode_native") // _decode_native:
self.Emit("MOVQ", _IP, _DI) // MOVQ IP, DI
self.Emit("MOVQ", _IL, _SI) // MOVQ IL, SI
self.Emit("MOVQ", _IC, _DX) // MOVQ IC, DX
self.Emit("LEAQ", _VAR_ss, _CX) // LEAQ ss, CX
self.Emit("MOVL", jit.Imm(1), _R8) // MOVL $1, R8
self.call(_F_value) // CALL value
self.Emit("MOVQ", _AX, _IC) // MOVQ AX, IC
/* check for errors */
self.Emit("MOVQ" , _VAR_ss_Vt, _AX) // MOVQ ss.Vt, AX
@ -565,6 +588,7 @@ func (self *_ValueDecoder) compile() {
/* return from decoder */
self.Link("_return") // _return:
self.Emit("XORL", _EP, _EP) // XORL EP, EP
self.Emit("MOVQ", _EP, jit.Ptr(_ST, _ST_Vp)) // MOVQ EP, ST.Vp[0]
self.Link("_epilogue") // _epilogue:
self.Emit("SUBQ", jit.Imm(_FsmOffset), _ST) // SUBQ _FsmOffset, _ST
self.Emit("MOVQ", jit.Ptr(_SP, _VD_offs), _BP) // MOVQ _VD_offs(SP), BP

View file

@ -23,8 +23,8 @@ import (
`sync`
`unsafe`
`github.com/bytedance/sonic/internal/cpu`
`github.com/bytedance/sonic/internal/jit`
`github.com/bytedance/sonic/internal/native/types`
`github.com/twitchyliquid64/golang-asm/obj`
`github.com/twitchyliquid64/golang-asm/obj/x86`
@ -68,13 +68,14 @@ const (
)
const (
_FP_args = 40 // 40 bytes for passing arguments to this function
_FP_fargs = 64 // 64 bytes for passing arguments to other Go functions
_FP_saves = 64 // 64 bytes for saving the registers before CALL instructions
_FP_args = 40 // 40 bytes for passing arguments to this function
_FP_fargs = 64 // 64 bytes for passing arguments to other Go functions
_FP_saves = 64 // 64 bytes for saving the registers before CALL instructions
_FP_locals = 16 // 16 bytes for local variables
)
const (
_FP_offs = _FP_fargs + _FP_saves
_FP_offs = _FP_fargs + _FP_saves + _FP_locals
_FP_size = _FP_offs + 8 // 8 bytes for the parent frame pointer
_FP_base = _FP_size + 8 // 8 bytes for the return address
)
@ -118,11 +119,6 @@ var (
var (
_X0 = jit.Reg("X0")
_Y0 = jit.Reg("Y0")
_Y1 = jit.Reg("Y1")
_Y2 = jit.Reg("Y2")
_Y3 = jit.Reg("Y3")
_Y4 = jit.Reg("Y4")
_Y5 = jit.Reg("Y5")
)
var (
@ -156,6 +152,11 @@ var (
_RET_ep = jit.Ptr(_SP, _FP_base + 32)
)
var (
_VAR_sp = jit.Ptr(_SP, _FP_fargs + _FP_saves)
_VAR_dn = jit.Ptr(_SP, _FP_fargs + _FP_saves + 8)
)
var (
_REG_ffi = []obj.Addr{_RP, _RL, _RC}
_REG_enc = []obj.Addr{_ST, _SP_x, _SP_f, _SP_p, _SP_q}
@ -380,13 +381,17 @@ func (self *_Assembler) check_size_rl(v obj.Addr) {
/* check for buffer capacity */
self.x++
self.Emit("LEAQ", v, _AX) // LEAQ $v, AX
self.Emit("CMPQ", _AX, _RC) // CMPQ AX, RC
self.Sjmp("JBE" , key) // JBE _more_space_return_{n}
self.Emit("LEAQ", v, _AX) // LEAQ $v, AX
self.Emit("CMPQ", _AX, _RC) // CMPQ AX, RC
self.Sjmp("JBE" , key) // JBE _more_space_return_{n}
self.slice_grow_ax(key) // GROW $key
self.Link(key) // _more_space_return_{n}:
}
func (self *_Assembler) slice_grow_ax(ret string) {
self.Byte(0x4c, 0x8d, 0x0d) // LEAQ ?(PC), R9
self.Sref(key, 4) // .... &key
self.Sref(ret, 4) // .... &ret
self.Sjmp("JMP" , _LB_more_space) // JMP _more_space
self.Link(key) // _more_space_return_{n}:
}
/** State Stack Helpers **/
@ -433,6 +438,11 @@ func (self *_Assembler) add_long(ch uint32, n int64) {
self.Emit("ADDQ", jit.Imm(n), _RL) // ADDQ $n, RL
}
func (self *_Assembler) add_text(ss string) {
self.store_str(ss) // TEXT $ss
self.Emit("ADDQ", jit.Imm(int64(len(ss))), _RL) // ADDQ ${len(ss)}, RL
}
func (self *_Assembler) prep_buffer() {
self.Emit("MOVQ", _ARG_rb, _AX) // MOVQ rb<>+0(FP), AX
self.Emit("MOVQ", _RL, jit.Ptr(_AX, 8)) // MOVQ RL, 8(AX)
@ -602,7 +612,16 @@ func (self *_Assembler) error_nan_or_infinite() {
/** String Encoding Routine **/
func (self *_Assembler) open_quote(doubleQuote bool) {
var (
_F_quote = jit.Imm(int64(native.S_quote))
)
func (self *_Assembler) encode_string(doubleQuote bool) {
self.Emit("MOVQ" , jit.Ptr(_SP_p, 8), _AX) // MOVQ 8(SP.p), AX
self.Emit("TESTQ", _AX, _AX) // TESTQ AX, AX
self.Sjmp("JZ" , "_str_empty_{n}") // JZ _str_empty_{n}
/* openning quote, check for double quote */
if !doubleQuote {
self.check_size_r(_AX, 2) // SIZE $2
self.add_char('"') // CHAR $'"'
@ -610,46 +629,69 @@ func (self *_Assembler) open_quote(doubleQuote bool) {
self.check_size_r(_AX, 6) // SIZE $6
self.add_long(_IM_open, 3) // TEXT $`"\"`
}
}
func (self *_Assembler) close_quote(doubleQuote bool) {
/* quoting loop */
self.Emit("XORL", _AX, _AX) // XORL AX, AX
self.Emit("MOVQ", _AX, _VAR_sp) // MOVQ AX, sp
self.Link("_str_loop_{n}") // _str_loop_{n}:
self.save_c() // SAVE $REG_ffi
/* load the output buffer first, and then input buffer,
* because the parameter registers collide with RP / RL / RC */
self.Emit("MOVQ", _RC, _CX) // MOVQ RC, CX
self.Emit("SUBQ", _RL, _CX) // SUBQ RL, CX
self.Emit("MOVQ", _CX, _VAR_dn) // MOVQ CX, dn
self.Emit("LEAQ", jit.Sib(_RP, _RL, 1, 0), _DX) // LEAQ (RP)(RL), DX
self.Emit("LEAQ", _VAR_dn, _CX) // LEAQ dn, CX
self.Emit("MOVQ", _VAR_sp, _AX) // MOVQ sp, AX
self.Emit("MOVQ", jit.Ptr(_SP_p, 0), _DI) // MOVQ (SP.p), DI
self.Emit("MOVQ", jit.Ptr(_SP_p, 8), _SI) // MOVQ 8(SP.p), SI
self.Emit("ADDQ", _AX, _DI) // ADDQ AX, DI
self.Emit("SUBQ", _AX, _SI) // SUBQ AX, SI
/* set the flags based on `doubleQuote` */
if !doubleQuote {
self.check_size(1) // SIZE $1
self.Link("_str_end_{n}") // _str_end_{n}:
self.add_char('"') // CHAR $'"'
self.Emit("XORL", _R8, _R8) // XORL R8, R8
} else {
self.Emit("MOVL", jit.Imm(types.F_DOUBLE_UNQUOTE), _R8) // MOVL ${types.F_DOUBLE_UNQUOTE}, R8
}
/* call the native quoter */
self.call_c(_F_quote) // CALL quote
self.Emit("ADDQ" , _VAR_dn, _RL) // ADDQ dn, RL
self.Emit("TESTQ", _AX, _AX) // TESTQ AX, AX
self.Sjmp("JS" , "_str_space_{n}") // JS _str_space_{n}
/* close the string, check for double quote */
if !doubleQuote {
self.check_size(1) // SIZE $1
self.add_char('"') // CHAR $'"'
self.Sjmp("JMP", "_str_end_{n}") // JMP _str_end_{n}
} else {
self.check_size(3) // SIZE $3
self.Link("_str_end_{n}") // _str_end_{n}:
self.store_str(`\""`) // TEXT $`\""`
self.Emit("ADDQ", jit.Imm(3), _RL) // ADDQ $3, RL
self.add_text("\\\"\"") // TEXT $'\""'
self.Sjmp("JMP", "_str_end_{n}") // JMP _str_end_{n}
}
}
func (self *_Assembler) encode_string(fn obj.Addr, doubleQuote bool) {
self.Emit("MOVQ" , jit.Ptr(_SP_p, 8), _AX) // MOVQ 8(SP.p), AX
self.open_quote(doubleQuote) // QOPEN $doubleQuote
self.Emit("CMPQ" , jit.Ptr(_SP_p, 8), jit.Imm(0)) // CMPQ 8(SP.p), $0
self.Sjmp("JE" , "_str_end_{n}") // JE _str_end_{n}
self.save_c() // SAVE $REG_ffi
self.Emit("MOVQ" , _SP_p, _DI) // MOVQ SP.p, DI
self.Emit("XORL" , _SI, _SI) // XORL SI, SI
self.call_c(_F_lquote) // CALL lquote
self.Emit("CMPQ" , _AX, jit.Ptr(_SP_p, 8)) // CMPQ AX, 8(SP.p)
self.Sjmp("JNE" , "_str_quote_{n}") // JNE _str_quote_{n}
self.Emit("LEAQ" , jit.Sib(_RP, _RL, 1, 0), _AX) // LEAQ (RP)(RL), AX
self.Emit("ADDQ" , jit.Ptr(_SP_p, 8), _RL) // ADDQ 8(SP.p), RL
self.Emit("MOVQ" , _AX, jit.Ptr(_SP, 0)) // MOVQ AX, 0(SP)
self.Emit("MOVOU", jit.Ptr(_SP_p, 0), _X0) // MOVOU (SP.p), X0
self.Emit("MOVOU", _X0, jit.Ptr(_SP, 8)) // MOVOU X0, 8(SP)
self.call_go(_F_memmove) // CALL_GO memmove
self.Sjmp("JMP" , "_str_end_{n}") // JMP _str_end_{n}
self.Link("_str_quote_{n}") // _str_quote_{n}:
self.Emit("MOVQ" , _AX, jit.Ptr(_SP, 8)) // MOVQ AX, 8(SP)
self.prep_buffer() // MOVE {buf}, (SP)
self.Emit("MOVOU", jit.Ptr(_SP_p, 0), _X0) // MOVOU (SP.p), X0
self.Emit("MOVOU", _X0, jit.Ptr(_SP, 16)) // MOVOU X0, 16(SP)
self.call_encoder(fn) // CALL $fn
self.close_quote(doubleQuote) // QCLOSE $doubleQuote
/* not enough space to contain the quoted string */
self.Link("_str_space_{n}") // _str_space_{n}:
self.Emit("NOTQ", _AX) // NOTQ AX
self.Emit("ADDQ", _AX, _VAR_sp) // ADDQ AX, sp
self.Emit("LEAQ", jit.Sib(_RC, _RC, 1, 0), _AX) // LEAQ (RC)(RC), AX
self.slice_grow_ax("_str_loop_{n}") // GROW _str_loop_{n}
/* empty string, check for double quote */
if !doubleQuote {
self.Link("_str_empty_{n}") // _str_empty_{n}:
self.check_size(2) // SIZE $2
self.add_text("\"\"") // TEXT $'""'
self.Link("_str_end_{n}") // _str_end_{n}:
} else {
self.Link("_str_empty_{n}") // _str_empty_{n}:
self.check_size(6) // SIZE $6
self.add_text("\"\\\"\\\"\"") // TEXT $'"\"\""'
self.Link("_str_end_{n}") // _str_end_{n}:
}
}
/** Zero Value Check Routine **/
@ -663,46 +705,11 @@ func (self *_Assembler) check_zero(nb int, dest int) {
return
}
/* default instructions for AVX2 */
vclear := func(v obj.Addr) { self.Emit("VPXOR" , v, v, v) }
vset1a := func(a, b obj.Addr) { self.Emit("VPCMPEQB", a, a, b) }
vandpb := func(b, a, r obj.Addr) { self.Emit("VPAND" , b, a, r) }
vcmpeq := func(b, a, r obj.Addr) { self.Emit("VPCMPEQB", b, a, r) }
/* fall-back instructions for AVX */
if !cpu.HasAVX2 {
vclear = func(v obj.Addr) { self.Emit("VXORPS", v, v, v) }
vset1a = func(a, b obj.Addr) { self.Emit("VCMPPS", a, a, b, jit.Imm(0x0f)) }
vandpb = func(b, a, r obj.Addr) { self.Emit("VANDPS", b, a, r) }
vcmpeq = func(b, a, r obj.Addr) { self.Emit("VCMPPS", b, a, r, jit.Imm(0x00)) }
}
/* if n is less than 32 byte, only scalar code will be used;
* otherwise AVX is used, so clear Y0, and set Y1 to all 1s */
if e >= 32 {
vclear(_Y0) // CLEAR Y0
vset1a(_Y0, _Y1) // SET1A Y0, Y1
}
/* 128-byte tests */
for i <= e - 128 {
vcmpeq(jit.Ptr(_SP_p, i + 0), _Y0, _Y2) // CMPEQ i+0(SP.p), Y0, Y2
vcmpeq(jit.Ptr(_SP_p, i + 32), _Y0, _Y3) // CMPEQ i+32(SP.p), Y0, Y3
vcmpeq(jit.Ptr(_SP_p, i + 64), _Y0, _Y4) // CMPEQ i+64(SP.p), Y0, Y4
vcmpeq(jit.Ptr(_SP_p, i + 96), _Y0, _Y5) // CMPEQ i+96(SP.p), Y0, Y5
vandpb(_Y3, _Y2, _Y2) // ANDPB Y3, Y2, Y2
vandpb(_Y5, _Y4, _Y3) // ANDPB Y5, Y4, Y3
vandpb(_Y2, _Y3, _Y3) // ANDPB Y2, Y3, Y3
self.Emit("VPTEST", _Y1, _Y3) // VPTEST Y1, Y3
self.Sjmp("JNC" , "_not_zero_z_{n}") // JNC _not_zero_z_{n}
i += 128
}
/* 32-byte tests */
/* 32-byte test */
for i <= e - 32 {
vcmpeq(jit.Ptr(_SP_p, i), _Y0, _Y2) // CMPEQ i(SP.p), Y0, Y2
self.Emit("VPTEST", _Y1, _Y2) // VPTEST Y1, Y2
self.Sjmp("JNC" , "_not_zero_z_{n}") // JNC _not_zero_z_{n}
self.Emit("VMOVDQU", jit.Ptr(_SP_p, i), _Y0) // VMOVDQU (SP.p), Y0
self.Emit("VPTEST" , _Y0, _Y0) // VPTEST Y0, Y0
self.Sjmp("JNZ" , "_not_zero_z_{n}") // JNZ _not_zero_z_{n}
i += 32
}
@ -711,8 +718,16 @@ func (self *_Assembler) check_zero(nb int, dest int) {
self.Emit("VZEROUPPER")
}
/* 8-byte tests */
for i <= e - 8 {
/* 16-byte test */
if i <= e - 16 {
self.Emit("MOVOU", jit.Ptr(_SP_p, i), _X0) // MOVOU (SP.p), X0
self.Emit("PTEST", _X0, _X0) // PTEST X0, X0
self.Sjmp("JNZ" , "_not_zero_{n}") // JNZ _not_zero_{n}
i += 16
}
/* 8-byte test */
if i <= e - 8 {
self.Emit("CMPQ", jit.Ptr(_SP_p, i), jit.Imm(0)) // CMPQ i(SP.p), $0
self.Sjmp("JNE" , "_not_zero_{n}") // JNE _not_zero_{n}
i += 8
@ -771,7 +786,6 @@ var (
_F_f64toa = jit.Imm(int64(native.S_f64toa))
_F_i64toa = jit.Imm(int64(native.S_i64toa))
_F_u64toa = jit.Imm(int64(native.S_u64toa))
_F_lquote = jit.Imm(int64(native.S_lquote))
_F_b64encode = jit.Imm(int64(_subr__b64encode))
)
@ -792,16 +806,12 @@ var (
)
var (
_F_encodeQuote obj.Addr
_F_encodeDoubleQuote obj.Addr
_F_encodeTypedPointer obj.Addr
_F_encodeJsonMarshaler obj.Addr
_F_encodeTextMarshaler obj.Addr
)
func init() {
_F_encodeQuote = jit.Func(encodeQuote)
_F_encodeDoubleQuote = jit.Func(encodeDoubleQuote)
_F_encodeTypedPointer = jit.Func(encodeTypedPointer)
_F_encodeJsonMarshaler = jit.Func(encodeJsonMarshaler)
_F_encodeTextMarshaler = jit.Func(encodeTextMarshaler)
@ -889,7 +899,7 @@ func (self *_Assembler) _asm_OP_f64(_ *_Instr) {
}
func (self *_Assembler) _asm_OP_str(_ *_Instr) {
self.encode_string(_F_encodeQuote, false)
self.encode_string(false)
}
func (self *_Assembler) _asm_OP_bin(_ *_Instr) {
@ -913,7 +923,7 @@ func (self *_Assembler) _asm_OP_bin(_ *_Instr) {
}
func (self *_Assembler) _asm_OP_quote(_ *_Instr) {
self.encode_string(_F_encodeDoubleQuote, true)
self.encode_string(true)
}
func (self *_Assembler) _asm_OP_number(_ *_Instr) {
@ -977,9 +987,8 @@ func (self *_Assembler) _asm_OP_byte(p *_Instr) {
}
func (self *_Assembler) _asm_OP_text(p *_Instr) {
self.check_size(len(p.vs()))
self.store_str(p.vs())
self.Emit("ADDQ", jit.Imm(int64(len(p.vs()))), _RL) // ADDQ $len(p.vs()), RL
self.check_size(len(p.vs())) // SIZE ${len(p.vs())}
self.add_text(p.vs()) // TEXT ${p.vs()}
}
func (self *_Assembler) _asm_OP_deref(_ *_Instr) {

View file

@ -26,6 +26,7 @@ import (
`unsafe`
`github.com/bytedance/sonic/internal/rt`
`github.com/davecgh/go-spew/spew`
`github.com/stretchr/testify/assert`
)
@ -339,6 +340,18 @@ func TestAssembler_OpCode(t *testing.T) {
}
}
func TestAssembler_StringMoreSpace(t *testing.T) {
p := &_Program{ins: []_Instr{newInsOp(_OP_str)}}
m := make([]byte, 0, 8)
s := new(_Stack)
a := newAssembler(p)
f := a.Load()
v := "\u0001\u0002\u0003\u0004\u0005\u0006\u0007\u0008\u0009\u000a\u000b\u000c\u000d\u000e\u000f\u0010"
e := f(&m, unsafe.Pointer(&v), s)
assert.Nil(t, e)
spew.Dump(m)
}
func TestAssembler_TwitterJSON_Generic(t *testing.T) {
p := &_Program{ins: mustCompile(&_GenericValue).ins}
m := []byte(nil)

View file

@ -38,7 +38,7 @@ func Quote(s string) string {
p = make([]byte, 0, n)
/* call the encoder */
_ = encodeStr(&p, s)
_ = encodeString(&p, s)
return rt.Mem2Str(p)
}

View file

@ -28,126 +28,44 @@ import (
/** Encoder Primitives **/
var _QuoteTab = [256]string {
'\x00' : `\u0000`,
'\x01' : `\u0001`,
'\x02' : `\u0002`,
'\x03' : `\u0003`,
'\x04' : `\u0004`,
'\x05' : `\u0005`,
'\x06' : `\u0006`,
'\x07' : `\u0007`,
'\b' : `\b`,
'\t' : `\t`,
'\n' : `\n`,
'\x0b' : `\u000b`,
'\f' : `\f`,
'\r' : `\r`,
'\x0e' : `\u000e`,
'\x0f' : `\u000f`,
'\x10' : `\u0010`,
'\x11' : `\u0011`,
'\x12' : `\u0012`,
'\x13' : `\u0013`,
'\x14' : `\u0014`,
'\x15' : `\u0015`,
'\x16' : `\u0016`,
'\x17' : `\u0017`,
'\x18' : `\u0018`,
'\x19' : `\u0019`,
'\x1a' : `\u001a`,
'\x1b' : `\u001b`,
'\x1c' : `\u001c`,
'\x1d' : `\u001d`,
'\x1e' : `\u001e`,
'\x1f' : `\u001f`,
'"' : `\"`,
'\\' : `\\`,
}
var _DoubleQuoteTab = [256]string {
'\x00' : `\\u0000`,
'\x01' : `\\u0001`,
'\x02' : `\\u0002`,
'\x03' : `\\u0003`,
'\x04' : `\\u0004`,
'\x05' : `\\u0005`,
'\x06' : `\\u0006`,
'\x07' : `\\u0007`,
'\b' : `\\b`,
'\t' : `\\t`,
'\n' : `\\n`,
'\x0b' : `\\u000b`,
'\f' : `\\f`,
'\r' : `\\r`,
'\x0e' : `\\u000e`,
'\x0f' : `\\u000f`,
'\x10' : `\\u0010`,
'\x11' : `\\u0011`,
'\x12' : `\\u0012`,
'\x13' : `\\u0013`,
'\x14' : `\\u0014`,
'\x15' : `\\u0015`,
'\x16' : `\\u0016`,
'\x17' : `\\u0017`,
'\x18' : `\\u0018`,
'\x19' : `\\u0019`,
'\x1a' : `\\u001a`,
'\x1b' : `\\u001b`,
'\x1c' : `\\u001c`,
'\x1d' : `\\u001d`,
'\x1e' : `\\u001e`,
'\x1f' : `\\u001f`,
'"' : `\\\"`,
'\\' : `\\\\`,
}
func encodeNil(rb *[]byte) error {
*rb = append(*rb, 'n', 'u', 'l', 'l')
return nil
}
func encodeStr(buf *[]byte, val string) error {
func encodeString(buf *[]byte, val string) error {
var sidx int
var pbuf *rt.GoSlice
var pstr *rt.GoString
/* opening quote */
*buf = append(*buf, '"')
encodeQuote(buf, native.Lquote(&val, 0), val)
pbuf = (*rt.GoSlice)(unsafe.Pointer(buf))
pstr = (*rt.GoString)(unsafe.Pointer(&val))
/* encode with native library */
for sidx < pstr.Len {
sn := pstr.Len - sidx
dn := pbuf.Cap - pbuf.Len
sp := padd(pstr.Ptr, sidx)
dp := padd(pbuf.Ptr, pbuf.Len)
nb := native.Quote(sp, sn, dp, &dn, 0)
/* check for errors */
if pbuf.Len += dn; nb >= 0 {
break
}
/* not enough space, grow the slice and try again */
sidx += ^nb
*pbuf = growslice(rt.UnpackType(byteType), *pbuf, pbuf.Cap * 2)
}
/* closing quote */
*buf = append(*buf, '"')
return nil
}
func encodeQuote(buf *[]byte, i int, val string) {
p := 0
n := len(val)
/* quote all the characters, if any */
for i < n {
*buf = append(*buf, rt.Str2Mem(val[p:i])...)
*buf = append(*buf, rt.Str2Mem(_QuoteTab[val[i]])...)
p, i = i + 1, native.Lquote(&val, i + 1)
}
/* add the remaining characters */
if p < n {
*buf = append(*buf, rt.Str2Mem(val[p:])...)
}
}
func encodeDoubleQuote(buf *[]byte, i int, val string) {
p := 0
n := len(val)
/* quote all the characters, if any */
for i < n {
*buf = append(*buf, rt.Str2Mem(val[p:i])...)
*buf = append(*buf, rt.Str2Mem(_DoubleQuoteTab[val[i]])...)
p, i = i + 1, native.Lquote(&val, i + 1)
}
/* add the remaining characters */
if p < n {
*buf = append(*buf, rt.Str2Mem(val[p:])...)
}
}
func encodeTypedPointer(buf *[]byte, vt *rt.GoType, vp *unsafe.Pointer, sb *_Stack) error {
if vt == nil {
return encodeNil(buf)
@ -172,7 +90,7 @@ func encodeTextMarshaler(buf *[]byte, val encoding.TextMarshaler) error {
if ret, err := val.MarshalText(); err != nil {
return err
} else {
return encodeStr(buf, rt.Mem2Str(ret))
return encodeString(buf, rt.Mem2Str(ret))
}
}

View file

@ -23,6 +23,11 @@ import (
`github.com/bytedance/sonic/internal/loader`
)
//go:nosplit
func padd(p unsafe.Pointer, v int) unsafe.Pointer {
return unsafe.Pointer(uintptr(p) + uintptr(v))
}
//go:nosplit
func ptoenc(p loader.Function) _Encoder {
return *(*_Encoder)(unsafe.Pointer(&p))

View file

@ -47,12 +47,17 @@ func __lzero(p unsafe.Pointer, n int) (ret int)
//go:nosplit
//go:noescape
//goland:noinspection GoUnusedParameter
func __lquote(buf *string, off int) (ret int)
func __lspace(sp unsafe.Pointer, nb int, off int) (ret int)
//go:nosplit
//go:noescape
//goland:noinspection GoUnusedParameter
func __lspace(sp unsafe.Pointer, nb int, off int) (ret int)
func __quote(sp unsafe.Pointer, nb int, dp unsafe.Pointer, dn *int, flags uint64) (ret int)
//go:nosplit
//go:noescape
//goland:noinspection GoUnusedParameter
func __unquote(sp unsafe.Pointer, nb int, dp unsafe.Pointer, ep *int, flags uint64) (ret int)
//go:nosplit
//go:noescape
@ -93,8 +98,3 @@ func __skip_array(s *string, p *int, m *types.StateMachine) (ret int)
//go:noescape
//goland:noinspection GoUnusedParameter
func __skip_object(s *string, p *int, m *types.StateMachine) (ret int)
//go:nosplit
//go:noescape
//goland:noinspection GoUnusedParameter
func __unquote(s unsafe.Pointer, nb int, dp unsafe.Pointer, ep *int, flags uint64) (ret int)

File diff suppressed because it is too large Load diff

View file

@ -43,6 +43,45 @@ func TestNative_Value(t *testing.T) {
assert.Equal(t, 3, v.Ep)
}
func TestNative_Quote(t *testing.T) {
s := "hello\b\f\n\r\t\\\"\u666fworld"
d := make([]byte, 256)
dp := (*rt.GoSlice)(unsafe.Pointer(&d))
sp := (*rt.GoString)(unsafe.Pointer(&s))
rv := __quote(sp.Ptr, sp.Len, dp.Ptr, &dp.Len, 0)
if rv < 0 {
require.NoError(t, types.ParsingError(-rv))
}
assert.Equal(t, len(s), rv)
assert.Equal(t, 27, len(d))
assert.Equal(t, `hello\b\f\n\r\t\\\"景world`, string(d))
}
func TestNative_QuoteNoMem(t *testing.T) {
s := "hello\b\f\n\r\t\\\"\u666fworld"
d := make([]byte, 10)
dp := (*rt.GoSlice)(unsafe.Pointer(&d))
sp := (*rt.GoString)(unsafe.Pointer(&s))
rv := __quote(sp.Ptr, sp.Len, dp.Ptr, &dp.Len, 0)
assert.Equal(t, -8, rv)
assert.Equal(t, 9, len(d))
assert.Equal(t, `hello\b\f`, string(d))
}
func TestNative_DoubleQuote(t *testing.T) {
s := "hello\b\f\n\r\t\\\"\u666fworld"
d := make([]byte, 256)
dp := (*rt.GoSlice)(unsafe.Pointer(&d))
sp := (*rt.GoString)(unsafe.Pointer(&s))
rv := __quote(sp.Ptr, sp.Len, dp.Ptr, &dp.Len, types.F_DOUBLE_UNQUOTE)
if rv < 0 {
require.NoError(t, types.ParsingError(-rv))
}
assert.Equal(t, len(s), rv)
assert.Equal(t, 36, len(d))
assert.Equal(t, `hello\\b\\f\\n\\r\\t\\\\\\\"景world`, string(d))
}
func TestNative_Unquote(t *testing.T) {
s := `hello\b\f\n\r\t\\\"\u2333world`
d := make([]byte, 0, len(s))
@ -173,6 +212,16 @@ func TestNative_Vstring(t *testing.T) {
assert.Equal(t, int64(5), v.Iv)
}
func TestNative_VstringEscapeEOF(t *testing.T) {
var v types.JsonState
i := 0
s := `xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\"xxxxxxxxxxxxxxxxxxxxxxxxxxxxx"x`
__vstring(&s, &i, &v)
assert.Equal(t, 95, i)
assert.Equal(t, 63, v.Ep)
assert.Equal(t, int64(0), v.Iv)
}
func TestNative_VstringHangUpOnRandomData(t *testing.T) {
v, e := hex.DecodeString(
"228dc61efd54ef80a908fb6026b7f2d5f92a257ba8b347c995f259eb8685376a" +

View file

@ -21,12 +21,12 @@ package avx
var (
S_f64toa = _subr__f64toa
S_i64toa = _subr__i64toa
S_lquote = _subr__lquote
S_u64toa = _subr__u64toa
S_lspace = _subr__lspace
)
var (
S_lspace = _subr__lspace
S_quote = _subr__quote
S_unquote = _subr__unquote
)

View file

@ -14,29 +14,29 @@ func ___asm2asm_compiled_code__DO_NOT_CALL_THIS_SYMBOL___()
var (
_func__base = ___asm2asm_compiled_code__DO_NOT_CALL_THIS_SYMBOL___
_subr__f64toa = **(**uintptr)(unsafe.Pointer(&_func__base)) + 2480
_subr__i64toa = **(**uintptr)(unsafe.Pointer(&_func__base)) + 5544
_subr__lquote = **(**uintptr)(unsafe.Pointer(&_func__base)) + 295
_subr__lspace = **(**uintptr)(unsafe.Pointer(&_func__base)) + 937
_subr__f64toa = **(**uintptr)(unsafe.Pointer(&_func__base)) + 574
_subr__i64toa = **(**uintptr)(unsafe.Pointer(&_func__base)) + 3638
_subr__lspace = **(**uintptr)(unsafe.Pointer(&_func__base)) + 238
_subr__lzero = **(**uintptr)(unsafe.Pointer(&_func__base)) + 0
_subr__skip_array = **(**uintptr)(unsafe.Pointer(&_func__base)) + 14087
_subr__skip_object = **(**uintptr)(unsafe.Pointer(&_func__base)) + 14122
_subr__skip_one = **(**uintptr)(unsafe.Pointer(&_func__base)) + 12457
_subr__u64toa = **(**uintptr)(unsafe.Pointer(&_func__base)) + 5637
_subr__unquote = **(**uintptr)(unsafe.Pointer(&_func__base)) + 6825
_subr__value = **(**uintptr)(unsafe.Pointer(&_func__base)) + 8460
_subr__vnumber = **(**uintptr)(unsafe.Pointer(&_func__base)) + 10935
_subr__vsigned = **(**uintptr)(unsafe.Pointer(&_func__base)) + 11907
_subr__vstring = **(**uintptr)(unsafe.Pointer(&_func__base)) + 9593
_subr__vunsigned = **(**uintptr)(unsafe.Pointer(&_func__base)) + 12184
_subr__quote = **(**uintptr)(unsafe.Pointer(&_func__base)) + 4951
_subr__skip_array = **(**uintptr)(unsafe.Pointer(&_func__base)) + 13969
_subr__skip_object = **(**uintptr)(unsafe.Pointer(&_func__base)) + 14004
_subr__skip_one = **(**uintptr)(unsafe.Pointer(&_func__base)) + 11647
_subr__u64toa = **(**uintptr)(unsafe.Pointer(&_func__base)) + 3731
_subr__unquote = **(**uintptr)(unsafe.Pointer(&_func__base)) + 5972
_subr__value = **(**uintptr)(unsafe.Pointer(&_func__base)) + 7664
_subr__vnumber = **(**uintptr)(unsafe.Pointer(&_func__base)) + 10125
_subr__vsigned = **(**uintptr)(unsafe.Pointer(&_func__base)) + 11097
_subr__vstring = **(**uintptr)(unsafe.Pointer(&_func__base)) + 9240
_subr__vunsigned = **(**uintptr)(unsafe.Pointer(&_func__base)) + 11374
)
var (
_ = _subr__f64toa
_ = _subr__i64toa
_ = _subr__lquote
_ = _subr__lspace
_ = _subr__lzero
_ = _subr__quote
_ = _subr__skip_array
_ = _subr__skip_object
_ = _subr__skip_one

View file

@ -47,12 +47,17 @@ func __lzero(p unsafe.Pointer, n int) (ret int)
//go:nosplit
//go:noescape
//goland:noinspection GoUnusedParameter
func __lquote(buf *string, off int) (ret int)
func __lspace(sp unsafe.Pointer, nb int, off int) (ret int)
//go:nosplit
//go:noescape
//goland:noinspection GoUnusedParameter
func __lspace(sp unsafe.Pointer, nb int, off int) (ret int)
func __quote(sp unsafe.Pointer, nb int, dp unsafe.Pointer, dn *int, flags uint64) (ret int)
//go:nosplit
//go:noescape
//goland:noinspection GoUnusedParameter
func __unquote(sp unsafe.Pointer, nb int, dp unsafe.Pointer, ep *int, flags uint64) (ret int)
//go:nosplit
//go:noescape
@ -93,8 +98,3 @@ func __skip_array(s *string, p *int, m *types.StateMachine) (ret int)
//go:noescape
//goland:noinspection GoUnusedParameter
func __skip_object(s *string, p *int, m *types.StateMachine) (ret int)
//go:nosplit
//go:noescape
//goland:noinspection GoUnusedParameter
func __unquote(s unsafe.Pointer, nb int, dp unsafe.Pointer, ep *int, flags uint64) (ret int)

File diff suppressed because it is too large Load diff

View file

@ -43,6 +43,45 @@ func TestNative_Value(t *testing.T) {
assert.Equal(t, 3, v.Ep)
}
func TestNative_Quote(t *testing.T) {
s := "hello\b\f\n\r\t\\\"\u666fworld"
d := make([]byte, 256)
dp := (*rt.GoSlice)(unsafe.Pointer(&d))
sp := (*rt.GoString)(unsafe.Pointer(&s))
rv := __quote(sp.Ptr, sp.Len, dp.Ptr, &dp.Len, 0)
if rv < 0 {
require.NoError(t, types.ParsingError(-rv))
}
assert.Equal(t, len(s), rv)
assert.Equal(t, 27, len(d))
assert.Equal(t, `hello\b\f\n\r\t\\\"景world`, string(d))
}
func TestNative_QuoteNoMem(t *testing.T) {
s := "hello\b\f\n\r\t\\\"\u666fworld"
d := make([]byte, 10)
dp := (*rt.GoSlice)(unsafe.Pointer(&d))
sp := (*rt.GoString)(unsafe.Pointer(&s))
rv := __quote(sp.Ptr, sp.Len, dp.Ptr, &dp.Len, 0)
assert.Equal(t, -8, rv)
assert.Equal(t, 9, len(d))
assert.Equal(t, `hello\b\f`, string(d))
}
func TestNative_DoubleQuote(t *testing.T) {
s := "hello\b\f\n\r\t\\\"\u666fworld"
d := make([]byte, 256)
dp := (*rt.GoSlice)(unsafe.Pointer(&d))
sp := (*rt.GoString)(unsafe.Pointer(&s))
rv := __quote(sp.Ptr, sp.Len, dp.Ptr, &dp.Len, types.F_DOUBLE_UNQUOTE)
if rv < 0 {
require.NoError(t, types.ParsingError(-rv))
}
assert.Equal(t, len(s), rv)
assert.Equal(t, 36, len(d))
assert.Equal(t, `hello\\b\\f\\n\\r\\t\\\\\\\"景world`, string(d))
}
func TestNative_Unquote(t *testing.T) {
s := `hello\b\f\n\r\t\\\"\u2333world`
d := make([]byte, 0, len(s))
@ -173,6 +212,16 @@ func TestNative_Vstring(t *testing.T) {
assert.Equal(t, int64(5), v.Iv)
}
func TestNative_VstringEscapeEOF(t *testing.T) {
var v types.JsonState
i := 0
s := `xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\"xxxxxxxxxxxxxxxxxxxxxxxxxxxxx"x`
__vstring(&s, &i, &v)
assert.Equal(t, 95, i)
assert.Equal(t, 63, v.Ep)
assert.Equal(t, int64(0), v.Iv)
}
func TestNative_VstringHangUpOnRandomData(t *testing.T) {
v, e := hex.DecodeString(
"228dc61efd54ef80a908fb6026b7f2d5f92a257ba8b347c995f259eb8685376a" +

View file

@ -21,12 +21,12 @@ package avx2
var (
S_f64toa = _subr__f64toa
S_i64toa = _subr__i64toa
S_lquote = _subr__lquote
S_u64toa = _subr__u64toa
S_lspace = _subr__lspace
)
var (
S_lspace = _subr__lspace
S_quote = _subr__quote
S_unquote = _subr__unquote
)

View file

@ -14,29 +14,29 @@ func ___asm2asm_compiled_code__DO_NOT_CALL_THIS_SYMBOL___()
var (
_func__base = ___asm2asm_compiled_code__DO_NOT_CALL_THIS_SYMBOL___
_subr__f64toa = **(**uintptr)(unsafe.Pointer(&_func__base)) + 3038
_subr__i64toa = **(**uintptr)(unsafe.Pointer(&_func__base)) + 6102
_subr__lquote = **(**uintptr)(unsafe.Pointer(&_func__base)) + 376
_subr__lspace = **(**uintptr)(unsafe.Pointer(&_func__base)) + 1268
_subr__f64toa = **(**uintptr)(unsafe.Pointer(&_func__base)) + 822
_subr__i64toa = **(**uintptr)(unsafe.Pointer(&_func__base)) + 3886
_subr__lspace = **(**uintptr)(unsafe.Pointer(&_func__base)) + 366
_subr__lzero = **(**uintptr)(unsafe.Pointer(&_func__base)) + 0
_subr__skip_array = **(**uintptr)(unsafe.Pointer(&_func__base)) + 14748
_subr__skip_object = **(**uintptr)(unsafe.Pointer(&_func__base)) + 14783
_subr__skip_one = **(**uintptr)(unsafe.Pointer(&_func__base)) + 13195
_subr__u64toa = **(**uintptr)(unsafe.Pointer(&_func__base)) + 6195
_subr__unquote = **(**uintptr)(unsafe.Pointer(&_func__base)) + 7419
_subr__value = **(**uintptr)(unsafe.Pointer(&_func__base)) + 9196
_subr__vnumber = **(**uintptr)(unsafe.Pointer(&_func__base)) + 11673
_subr__vsigned = **(**uintptr)(unsafe.Pointer(&_func__base)) + 12645
_subr__vstring = **(**uintptr)(unsafe.Pointer(&_func__base)) + 10345
_subr__vunsigned = **(**uintptr)(unsafe.Pointer(&_func__base)) + 12922
_subr__quote = **(**uintptr)(unsafe.Pointer(&_func__base)) + 5299
_subr__skip_array = **(**uintptr)(unsafe.Pointer(&_func__base)) + 15851
_subr__skip_object = **(**uintptr)(unsafe.Pointer(&_func__base)) + 15886
_subr__skip_one = **(**uintptr)(unsafe.Pointer(&_func__base)) + 13051
_subr__u64toa = **(**uintptr)(unsafe.Pointer(&_func__base)) + 3979
_subr__unquote = **(**uintptr)(unsafe.Pointer(&_func__base)) + 7136
_subr__value = **(**uintptr)(unsafe.Pointer(&_func__base)) + 9082
_subr__vnumber = **(**uintptr)(unsafe.Pointer(&_func__base)) + 11529
_subr__vsigned = **(**uintptr)(unsafe.Pointer(&_func__base)) + 12501
_subr__vstring = **(**uintptr)(unsafe.Pointer(&_func__base)) + 10760
_subr__vunsigned = **(**uintptr)(unsafe.Pointer(&_func__base)) + 12778
)
var (
_ = _subr__f64toa
_ = _subr__i64toa
_ = _subr__lquote
_ = _subr__lspace
_ = _subr__lzero
_ = _subr__quote
_ = _subr__skip_array
_ = _subr__skip_object
_ = _subr__skip_one

View file

@ -26,11 +26,14 @@ import (
)
var (
S_f64toa uintptr
S_i64toa uintptr
S_u64toa uintptr
S_lquote uintptr
S_lspace uintptr
S_f64toa uintptr
S_i64toa uintptr
S_u64toa uintptr
S_lspace uintptr
)
var (
S_quote uintptr
S_unquote uintptr
)
@ -56,12 +59,12 @@ func Lzero(p unsafe.Pointer, n int) int
//go:nosplit
//go:noescape
//goland:noinspection GoUnusedParameter
func Lquote(buf *string, off int) int
func Quote(s unsafe.Pointer, nb int, dp unsafe.Pointer, dn *int, flags uint64) int
//go:nosplit
//go:noescape
//goland:noinspection GoUnusedParameter
func Lspace(sp unsafe.Pointer, nb int, off int) int
func Unquote(s unsafe.Pointer, nb int, dp unsafe.Pointer, ep *int, flags uint64) int
//go:nosplit
//go:noescape
@ -73,17 +76,12 @@ func Value(s unsafe.Pointer, n int, p int, v *types.JsonState, allow_control int
//goland:noinspection GoUnusedParameter
func SkipOne(s *string, p *int, m *types.StateMachine) int
//go:nosplit
//go:noescape
//goland:noinspection GoUnusedParameter
func Unquote(s unsafe.Pointer, nb int, dp unsafe.Pointer, ep *int, flags uint64) int
func useAVX() {
S_f64toa = avx.S_f64toa
S_i64toa = avx.S_i64toa
S_u64toa = avx.S_u64toa
S_lquote = avx.S_lquote
S_lspace = avx.S_lspace
S_quote = avx.S_quote
S_unquote = avx.S_unquote
S_value = avx.S_value
S_vstring = avx.S_vstring
@ -99,8 +97,8 @@ func useAVX2() {
S_f64toa = avx2.S_f64toa
S_i64toa = avx2.S_i64toa
S_u64toa = avx2.S_u64toa
S_lquote = avx2.S_lquote
S_lspace = avx2.S_lspace
S_quote = avx2.S_quote
S_unquote = avx2.S_unquote
S_value = avx2.S_value
S_vstring = avx2.S_vstring

View file

@ -24,17 +24,17 @@ TEXT ·Lzero(SB), NOSPLIT, $0 - 24
JMP github·combytedancesonicinternalnativeavx2·__lzero(SB)
JMP github·combytedancesonicinternalnativeavx·__lzero(SB)
TEXT ·Lquote(SB), NOSPLIT, $0 - 24
TEXT ·Quote(SB), NOSPLIT, $0 - 48
CMPB github·combytedancesonicinternalcpu·HasAVX2(SB), $0
JE 2(PC)
JMP github·combytedancesonicinternalnativeavx2·__lquote(SB)
JMP github·combytedancesonicinternalnativeavx·__lquote(SB)
JMP github·combytedancesonicinternalnativeavx2·__quote(SB)
JMP github·combytedancesonicinternalnativeavx·__quote(SB)
TEXT ·Lspace(SB), NOSPLIT, $0 - 32
TEXT ·Unquote(SB), NOSPLIT, $0 - 48
CMPB github·combytedancesonicinternalcpu·HasAVX2(SB), $0
JE 2(PC)
JMP github·combytedancesonicinternalnativeavx2·__lspace(SB)
JMP github·combytedancesonicinternalnativeavx·__lspace(SB)
JMP github·combytedancesonicinternalnativeavx2·__unquote(SB)
JMP github·combytedancesonicinternalnativeavx·__unquote(SB)
TEXT ·Value(SB), NOSPLIT, $0 - 48
CMPB github·combytedancesonicinternalcpu·HasAVX2(SB), $0
@ -47,9 +47,3 @@ TEXT ·SkipOne(SB), NOSPLIT, $0 - 32
JE 2(PC)
JMP github·combytedancesonicinternalnativeavx2·__skip_one(SB)
JMP github·combytedancesonicinternalnativeavx·__skip_one(SB)
TEXT ·Unquote(SB), NOSPLIT, $0 - 48
CMPB github·combytedancesonicinternalcpu·HasAVX2(SB), $0
JE 2(PC)
JMP github·combytedancesonicinternalnativeavx2·__unquote(SB)
JMP github·combytedancesonicinternalnativeavx·__unquote(SB)

View file

@ -45,12 +45,17 @@ func __lzero(p unsafe.Pointer, n int) (ret int)
//go:nosplit
//go:noescape
//goland:noinspection GoUnusedParameter
func __lquote(buf *string, off int) (ret int)
func __lspace(sp unsafe.Pointer, nb int, off int) (ret int)
//go:nosplit
//go:noescape
//goland:noinspection GoUnusedParameter
func __lspace(sp unsafe.Pointer, nb int, off int) (ret int)
func __quote(sp unsafe.Pointer, nb int, dp unsafe.Pointer, dn *int, flags uint64) (ret int)
//go:nosplit
//go:noescape
//goland:noinspection GoUnusedParameter
func __unquote(sp unsafe.Pointer, nb int, dp unsafe.Pointer, ep *int, flags uint64) (ret int)
//go:nosplit
//go:noescape
@ -91,8 +96,3 @@ func __skip_array(s *string, p *int, m *types.StateMachine) (ret int)
//go:noescape
//goland:noinspection GoUnusedParameter
func __skip_object(s *string, p *int, m *types.StateMachine) (ret int)
//go:nosplit
//go:noescape
//goland:noinspection GoUnusedParameter
func __unquote(s unsafe.Pointer, nb int, dp unsafe.Pointer, ep *int, flags uint64) (ret int)

View file

@ -41,6 +41,45 @@ func TestNative_Value(t *testing.T) {
assert.Equal(t, 3, v.Ep)
}
func TestNative_Quote(t *testing.T) {
s := "hello\b\f\n\r\t\\\"\u666fworld"
d := make([]byte, 256)
dp := (*rt.GoSlice)(unsafe.Pointer(&d))
sp := (*rt.GoString)(unsafe.Pointer(&s))
rv := __quote(sp.Ptr, sp.Len, dp.Ptr, &dp.Len, 0)
if rv < 0 {
require.NoError(t, types.ParsingError(-rv))
}
assert.Equal(t, len(s), rv)
assert.Equal(t, 27, len(d))
assert.Equal(t, `hello\b\f\n\r\t\\\"景world`, string(d))
}
func TestNative_QuoteNoMem(t *testing.T) {
s := "hello\b\f\n\r\t\\\"\u666fworld"
d := make([]byte, 10)
dp := (*rt.GoSlice)(unsafe.Pointer(&d))
sp := (*rt.GoString)(unsafe.Pointer(&s))
rv := __quote(sp.Ptr, sp.Len, dp.Ptr, &dp.Len, 0)
assert.Equal(t, -8, rv)
assert.Equal(t, 9, len(d))
assert.Equal(t, `hello\b\f`, string(d))
}
func TestNative_DoubleQuote(t *testing.T) {
s := "hello\b\f\n\r\t\\\"\u666fworld"
d := make([]byte, 256)
dp := (*rt.GoSlice)(unsafe.Pointer(&d))
sp := (*rt.GoString)(unsafe.Pointer(&s))
rv := __quote(sp.Ptr, sp.Len, dp.Ptr, &dp.Len, types.F_DOUBLE_UNQUOTE)
if rv < 0 {
require.NoError(t, types.ParsingError(-rv))
}
assert.Equal(t, len(s), rv)
assert.Equal(t, 36, len(d))
assert.Equal(t, `hello\\b\\f\\n\\r\\t\\\\\\\"景world`, string(d))
}
func TestNative_Unquote(t *testing.T) {
s := `hello\b\f\n\r\t\\\"\u2333world`
d := make([]byte, 0, len(s))
@ -171,6 +210,16 @@ func TestNative_Vstring(t *testing.T) {
assert.Equal(t, int64(5), v.Iv)
}
func TestNative_VstringEscapeEOF(t *testing.T) {
var v types.JsonState
i := 0
s := `xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\"xxxxxxxxxxxxxxxxxxxxxxxxxxxxx"x`
__vstring(&s, &i, &v)
assert.Equal(t, 95, i)
assert.Equal(t, 63, v.Ep)
assert.Equal(t, int64(0), v.Iv)
}
func TestNative_VstringHangUpOnRandomData(t *testing.T) {
v, e := hex.DecodeString(
"228dc61efd54ef80a908fb6026b7f2d5f92a257ba8b347c995f259eb8685376a" +

View file

@ -19,12 +19,12 @@ package {{PACKAGE}}
var (
S_f64toa = _subr__f64toa
S_i64toa = _subr__i64toa
S_lquote = _subr__lquote
S_u64toa = _subr__u64toa
S_lspace = _subr__lspace
)
var (
S_lspace = _subr__lspace
S_quote = _subr__quote
S_unquote = _subr__unquote
)

View file

@ -16,417 +16,48 @@
#include "native.h"
#if USE_SSE
#define loop_decl() \
size_t v; \
size_t n = 0; \
const char * p = s; \
#define loop_simd(size, load, func, ...) { \
while (nb >= size) { \
if ((v = func(load((const void *)(p)), ## __VA_ARGS__)) < size) { \
return n + v; \
} else { \
n += v; \
p += size; \
nb -= size; \
} \
} \
static inline int is_zero_sse(__m128i v) {
return _mm_testz_si128(v, v);
}
#if !USE_AVX2
#define loop_zero()
#define loop_m256(func, ...)
#else
#define loop_zero() _mm256_zeroupper();
#define loop_m256(func, ...) loop_simd(32, _mm256_loadu_si256, func, ## __VA_ARGS__)
#endif
#define loop_m128(func, ...) loop_simd(16, _mm_loadu_si128, func, ## __VA_ARGS__)
#define loop_last(func, ...) return func(_mm_loadu_si128(as_m128c(p + nb - 16)), ## __VA_ARGS__) + n + nb - 16;
#define loop_bulk(func, ...) { \
loop_decl() \
loop_m256(func ## _avx2, ## __VA_ARGS__) \
loop_zero(); \
loop_m128(func ## _sse2, ## __VA_ARGS__) \
loop_last(func ## _sse2, ## __VA_ARGS__) \
}
#define loop_duff(func, ...) { \
size_t r = nb; \
__m128i m = _mm_set1_epi8(0xff); \
\
/* remaining bytes */ \
switch (r) { \
case 15 : m = _mm_insert_epi8(m, s[14], 14); \
case 14 : m = _mm_insert_epi8(m, s[13], 13); \
case 13 : m = _mm_insert_epi8(m, s[12], 12); \
case 12 : m = _mm_insert_epi8(m, s[11], 11); \
case 11 : m = _mm_insert_epi8(m, s[10], 10); \
case 10 : m = _mm_insert_epi8(m, s[ 9], 9); \
case 9 : m = _mm_insert_epi8(m, s[ 8], 8); \
case 8 : m = _mm_insert_epi8(m, s[ 7], 7); \
case 7 : m = _mm_insert_epi8(m, s[ 6], 6); \
case 6 : m = _mm_insert_epi8(m, s[ 5], 5); \
case 5 : m = _mm_insert_epi8(m, s[ 4], 4); \
case 4 : m = _mm_insert_epi8(m, s[ 3], 3); \
case 3 : m = _mm_insert_epi8(m, s[ 2], 2); \
case 2 : m = _mm_insert_epi8(m, s[ 1], 1); \
case 1 : m = _mm_insert_epi8(m, s[ 0], 0); \
default : return func ## _sse2(m, ## __VA_ARGS__); \
} \
}
static inline size_t lspace_sse2(__m128i v0) {
__m128i v1 = _mm_cmpeq_epi8 (v0, _mm_set1_epi8(' '));
__m128i v2 = _mm_cmpeq_epi8 (v0, _mm_set1_epi8('\t'));
__m128i v3 = _mm_cmpeq_epi8 (v0, _mm_set1_epi8('\n'));
__m128i v4 = _mm_cmpeq_epi8 (v0, _mm_set1_epi8('\r'));
__m128i v5 = _mm_or_si128 (v1, v2);
__m128i v6 = _mm_or_si128 (v3, v4);
__m128i v7 = _mm_or_si128 (v5, v6);
uint32_t v8 = _mm_movemask_epi8 (v7);
uint32_t v9 = __builtin_ctz (~v8);
return v9;
}
#if USE_AVX2
static inline size_t lspace_avx2(__m256i v0) {
__m256i v1 = _mm256_cmpeq_epi8 (v0, _mm256_set1_epi8(' '));
__m256i v2 = _mm256_cmpeq_epi8 (v0, _mm256_set1_epi8('\t'));
__m256i v3 = _mm256_cmpeq_epi8 (v0, _mm256_set1_epi8('\n'));
__m256i v4 = _mm256_cmpeq_epi8 (v0, _mm256_set1_epi8('\r'));
__m256i v5 = _mm256_or_si256 (v1, v2);
__m256i v6 = _mm256_or_si256 (v3, v4);
__m256i v7 = _mm256_or_si256 (v5, v6);
uint32_t v8 = _mm256_movemask_epi8 (v7);
uint64_t v9 = __builtin_ctzll (~(uint64_t)(v8));
return v9;
}
#endif
static inline size_t lquote_sse2(__m128i v0) {
__m128i v1 = _mm_cmpgt_epi8 (v0, _mm_set1_epi8(-1));
__m128i v2 = _mm_cmplt_epi8 (v0, _mm_set1_epi8(' '));
__m128i v3 = _mm_cmpeq_epi8 (v0, _mm_set1_epi8('"'));
__m128i v4 = _mm_cmpeq_epi8 (v0, _mm_set1_epi8('\\'));
__m128i v5 = _mm_and_si128 (v1, v2);
__m128i v6 = _mm_or_si128 (v3, v4);
__m128i v7 = _mm_or_si128 (v5, v6);
uint32_t v8 = _mm_movemask_epi8 (v7);
uint32_t v9 = __builtin_ctz (v8 | 0xffff0000);
return v9;
}
#if USE_AVX2
static inline size_t lquote_avx2(__m256i v0) {
__m256i v1 = _mm256_cmpgt_epi8 (v0, _mm256_set1_epi8(-1));
__m256i v2 = _mm256_cmpgt_epi8 (v0, _mm256_set1_epi8(31));
__m256i v3 = _mm256_cmpeq_epi8 (v0, _mm256_set1_epi8('"'));
__m256i v4 = _mm256_cmpeq_epi8 (v0, _mm256_set1_epi8('\\'));
__m256i v5 = _mm256_andnot_si256 (v2, v1);
__m256i v6 = _mm256_or_si256 (v3, v4);
__m256i v7 = _mm256_or_si256 (v5, v6);
uint32_t v8 = _mm256_movemask_epi8 (v7);
uint64_t v9 = __builtin_ctzll ((uint64_t)v8 | 0xffffffff00000000);
return v9;
}
#endif
static inline size_t strchr2_sse2(__m128i v0, uint64_t c0, uint64_t c1) {
__m128i v1 = _mm_cmpeq_epi8 (v0, _mm_set1_epi8((char)c0));
__m128i v2 = _mm_cmpeq_epi8 (v0, _mm_set1_epi8((char)c1));
__m128i v3 = _mm_or_si128 (v1, v2);
uint32_t v4 = _mm_movemask_epi8 (v3);
uint32_t v5 = __builtin_ctz (v4 | 0xffff0000);
return v5;
}
#if USE_AVX2
static inline size_t strchr2_avx2(__m256i v0, uint64_t c0, uint64_t c1) {
__m256i v1 = _mm256_cmpeq_epi8 (v0, _mm256_set1_epi8((char)c0));
__m256i v2 = _mm256_cmpeq_epi8 (v0, _mm256_set1_epi8((char)c1));
__m256i v3 = _mm256_or_si256 (v1, v2);
uint32_t v4 = _mm256_movemask_epi8 (v3);
uint64_t v5 = __builtin_ctzll ((uint64_t)v4 | 0xffffffff00000000);
return v5;
}
#endif
#define do_simd(func, ...) { \
if (nb == 0) { \
return 0; \
} if (nb < 16) { \
loop_duff(func, ## __VA_ARGS__) \
} else { \
loop_bulk(func, ## __VA_ARGS__) \
} \
}
#endif
#define is_quote(c) ((c) == '"' || (c) == '\\' || ((c) >= 0 && (c) <= 31))
#define is_space(c) ((c) == ' ' || (c) == '\t' || (c) == '\n' || (c) == '\r')
static inline size_t lspace_p(const char *s, size_t nb) {
#if USE_SSE
do_simd(lspace)
#else
size_t i = 0;
while (i < nb && !is_space(s[i])) i++;
return i;
#endif
}
static inline size_t lquote_p(const char *s, size_t nb) {
#if USE_SSE
do_simd(lquote)
#else
size_t i = 0;
while (i < nb && !is_quote(s[i])) i++;
return i;
#endif
}
static inline size_t strchr1_p(const char *p, size_t nb, uint64_t ch) {
#if USE_SSE
int64_t r;
uint32_t t;
/* prepare the vector */
ssize_t n = nb;
uintptr_t m = (uintptr_t)p;
const char * q = p;
#if USE_AVX2
#define ALIGN_VAL 31
#define _mm_or _mm256_or_si256
#define _mm_load _mm256_load_si256
#define _mm_cmpeq(a, b) _mm256_cmpeq_epi8(a, b)
#define _mm_testz(v) _mm256_testz_si256(v, v)
#define _mm_movemask(v) _mm256_movemask_epi8(v)
__m256i a;
__m256i b;
__m256i c;
__m256i d;
__m256i u;
__m256i v;
__m256i w;
__m256i x = _mm256_set1_epi8(ch);
#else
#define ALIGN_VAL 15
#define _mm_or _mm_or_si128
#define _mm_load _mm_load_si128
#define _mm_cmpeq(a, b) _mm_cmpeq_epi8(a, b)
#define _mm_testz(v) (_mm_movemask_epi8(v) == 0)
#define _mm_movemask(v) _mm_movemask_epi8(v)
__m128i a;
__m128i b;
__m128i c;
__m128i d;
__m128i u;
__m128i v;
__m128i w;
__m128i x = _mm_set1_epi8(ch);
#endif
#define BLOCK_SIZE (ALIGN_VAL + 1)
#define BLOCK_MASK (1ull << BLOCK_SIZE)
#define BLOCK_LARGE (BLOCK_SIZE * 4)
/* check for pointer alignment */
if (m & ALIGN_VAL) {
v = _mm_load ((const void *)(m & -BLOCK_SIZE));
v = _mm_cmpeq (v, x);
r = _mm_movemask (v);
/* check for match in the first characters */
if ((r >>= (t = m & ALIGN_VAL)) != 0) {
if ((r = __builtin_ctzll(r | BLOCK_MASK)) < n) {
return r;
} else {
return -1;
}
}
/* make the pointer aligned */
p += BLOCK_SIZE - t;
n -= BLOCK_SIZE - t;
}
/* attempt to compare 4 blocks at a time */
while (n >= BLOCK_LARGE) {
a = _mm_load ((const void *)(p + BLOCK_SIZE * 0));
b = _mm_load ((const void *)(p + BLOCK_SIZE * 1));
c = _mm_load ((const void *)(p + BLOCK_SIZE * 2));
d = _mm_load ((const void *)(p + BLOCK_SIZE * 3));
a = _mm_cmpeq (a, x);
b = _mm_cmpeq (b, x);
c = _mm_cmpeq (c, x);
d = _mm_cmpeq (d, x);
u = _mm_or (a, b);
v = _mm_or (c, d);
w = _mm_or (u, v);
/* check if anything matches */
if (_mm_testz(w)) {
p += BLOCK_LARGE;
n -= BLOCK_LARGE;
continue;
}
/* match something in the 4-blocks region */
if ((r = _mm_movemask(a)) != 0) {
return p - q + __builtin_ctzll(r | BLOCK_MASK);
} else if ((r = _mm_movemask(b)) != 0) {
return p - q + __builtin_ctzll(r | BLOCK_MASK) + BLOCK_SIZE;
} else if ((r = _mm_movemask(c)) != 0) {
return p - q + __builtin_ctzll(r | BLOCK_MASK) + BLOCK_SIZE * 2;
} else {
return p - q + __builtin_ctzll(_mm_movemask(d) | BLOCK_MASK) + BLOCK_SIZE * 3;
}
}
/* check every block, at most 4 times */
for (int i = 0; i < 4 && n >= 0; i++) {
v = _mm_load ((const void *)p);
v = _mm_cmpeq (v, x);
r = _mm_movemask (v);
/* found something */
if (r != 0) {
if ((r = __builtin_ctzll(r | BLOCK_MASK)) >= n) {
return -1;
} else {
return p - q + r;
}
}
/* otherwise advance to next block */
p += BLOCK_SIZE;
n -= BLOCK_SIZE;
}
#undef _mm_load
#undef _mm_bitor
#undef _mm_cmpeq
#undef _mm_testz
#undef _mm_movemask
#undef ALIGN_VAL
#undef BLOCK_SIZE
#undef BLOCK_LARGE
#else
for (size_t i = 0; i < nb; i++) {
if (p[i] == ch) {
return i;
}
}
#endif
/* not found */
return nb;
}
static inline size_t strchr2_p(const char *s, size_t nb, uint64_t c0, uint64_t c1) {
#if USE_SSE
do_simd(strchr2, c0, c1)
#else
size_t i = 0;
while (i < nb && s[i] != c0 && s[i] != c1) i++;
return i;
#endif
}
size_t lzero(const char *p, size_t n) {
#if USE_SSE
#if USE_AVX
__m256i a;
__m256i b;
__m256i c;
__m256i d;
__m256i u;
__m256i v;
__m256i w;
__m256i y = _mm256_set1_epi8(0xff);
__m256i z = _mm256_setzero_si256();
#define BLOCK_SIZE 32
#else
__m128i a;
__m128i b;
__m128i c;
__m128i d;
__m128i u;
__m128i v;
__m128i w;
__m128i z = _mm_setzero_si128();
#define BLOCK_SIZE 16
static inline int is_zero_avx(__m256i v) {
return _mm256_testz_si256(v, v);
}
#endif
#if USE_AVX2
#define _mm_load _mm256_load_si256
#define _mm_and(a, b) _mm256_and_si256(a, b)
#define _mm_cmpeq(a, b) _mm256_cmpeq_epi8(a, b)
#define _mm_testinz(v) (!_mm256_testc_si256(v, y))
#elif USE_AVX
#define _mm_load _mm256_load_si256
#define _mm_and(a, b) _mm256_and_ps((__m256)a, (__m256)b)
#define _mm_cmpeq(a, b) _mm256_cmp_ps(a, b, _CMP_EQ_OQ)
#define _mm_testinz(v) (!_mm256_testc_si256(v, y))
#else
#define _mm_load _mm_load_si128
#define _mm_and(a, b) _mm_and_si128(a, b)
#define _mm_cmpeq(a, b) _mm_cmpeq_epi8(a, b)
#define _mm_testinz(v) (_mm_movemask_epi8(v) != 0xffff)
#endif
size_t lzero(const char *sp, size_t nb) {
size_t n = nb;
const char * p = sp;
/* multi-block loop */
while (n >= BLOCK_SIZE * 4) {
a = _mm_load ((const void *)(p + BLOCK_SIZE * 0));
b = _mm_load ((const void *)(p + BLOCK_SIZE * 1));
c = _mm_load ((const void *)(p + BLOCK_SIZE * 2));
d = _mm_load ((const void *)(p + BLOCK_SIZE * 3));
a = _mm_cmpeq (a, z);
b = _mm_cmpeq (b, z);
c = _mm_cmpeq (c, z);
d = _mm_cmpeq (d, z);
u = _mm_and (a, b);
v = _mm_and (c, d);
w = _mm_and (u, v);
/* test for zeros */
if (_mm_testinz(w)) {
#if USE_AVX
/* 32-byte loop */
while (n >= 32) {
if (!is_zero_avx(_mm256_loadu_si256((const void *)p))) {
_mm256_zeroupper();
return 1;
} else {
p += 32;
n -= 32;
}
/* move to next block */
p += BLOCK_SIZE * 4;
n -= BLOCK_SIZE * 4;
}
/* single block loop */
while (n >= BLOCK_SIZE) {
a = _mm_load ((const void *)(p));
b = _mm_cmpeq (a, z);
/* test for zeros */
if (_mm_testinz(b)) {
return 1;
}
/* move to next block */
p += BLOCK_SIZE;
n -= BLOCK_SIZE;
}
#undef _mm_load
#undef _mm_cmpeq
#undef _mm_bitand
#undef _mm_testinz
#undef BLOCK_SIZE
/* clear upper half to avoid AVX-SSE transition penalty */
_mm256_zeroupper();
#endif
/* 8 bytes loop */
while (n >= 8) {
/* 16-byte loop */
while (n >= 16) {
if (!is_zero_sse(_mm_loadu_si128((const void *)p))) {
return 1;
} else {
p += 16;
n -= 16;
}
}
/* 8-byte test */
if (n >= 8) {
if (*(uint64_t *)p) {
return 1;
} else {
@ -435,7 +66,7 @@ size_t lzero(const char *p, size_t n) {
}
}
/* 4 bytes test */
/* 4-byte test */
if (n >= 4) {
if (*(uint32_t *)p) {
return 1;
@ -445,7 +76,7 @@ size_t lzero(const char *p, size_t n) {
}
}
/* 2 bytes test */
/* 2-byte test */
if (n >= 2) {
if (*(uint16_t *)p) {
return 1;
@ -463,22 +94,90 @@ size_t lzero(const char *p, size_t n) {
}
}
size_t lquote(const GoString *s, size_t p) {
return lquote_p(s->buf + p, s->len - p) + p;
}
#if USE_AVX2
static const uintptr_t ALIGN_MASK = 31;
#else
static const uintptr_t ALIGN_MASK = 15;
#endif
size_t lspace(const char *sp, size_t nb, size_t p) {
return lspace_p(sp + p, nb - p) + p;
}
int32_t ms;
const char * ss = sp;
ssize_t strchr1(const GoString *s, size_t p, char ch) {
size_t n = s->len - p;
size_t v = strchr1_p(s->buf + p, n, ch);
return v >= n ? -1 : v + p;
}
/* seek to `p` */
sp += p;
nb -= p;
ssize_t strchr2(const GoString *s, size_t p, char c0, char c1) {
size_t n = s->len - p;
size_t v = strchr2_p(s->buf + p, n, c0, c1);
return v >= n ? -1 : v + p;
}
/* likely to run into non-spaces within a few characters, try scalar code first */
while (nb > 0 && ((uintptr_t)sp & ALIGN_MASK)) {
switch ((nb--, *sp++)) {
case ' ' : break;
case '\r' : break;
case '\n' : break;
case '\t' : break;
default : return sp - ss - 1;
}
}
#if USE_AVX2
/* 32-byte loop */
while (likely(nb >= 32)) {
__m256i x = _mm256_load_si256 ((const void *)sp);
__m256i a = _mm256_cmpeq_epi8 (x, _mm256_set1_epi8(' '));
__m256i b = _mm256_cmpeq_epi8 (x, _mm256_set1_epi8('\t'));
__m256i c = _mm256_cmpeq_epi8 (x, _mm256_set1_epi8('\n'));
__m256i d = _mm256_cmpeq_epi8 (x, _mm256_set1_epi8('\r'));
__m256i u = _mm256_or_si256 (a, b);
__m256i v = _mm256_or_si256 (c, d);
__m256i w = _mm256_or_si256 (u, v);
/* check for matches */
if ((ms = _mm256_movemask_epi8(w)) != -1) {
_mm256_zeroupper();
return sp - ss + __builtin_ctzll(~(uint64_t)ms);
}
/* move to next block */
sp += 32;
nb -= 32;
}
/* clear upper half to avoid AVX-SSE transition penalty */
_mm256_zeroupper();
#endif
/* 16-byte loop */
while (likely(nb >= 16)) {
__m128i x = _mm_load_si128 ((const void *)sp);
__m128i a = _mm_cmpeq_epi8 (x, _mm_set1_epi8(' '));
__m128i b = _mm_cmpeq_epi8 (x, _mm_set1_epi8('\t'));
__m128i c = _mm_cmpeq_epi8 (x, _mm_set1_epi8('\n'));
__m128i d = _mm_cmpeq_epi8 (x, _mm_set1_epi8('\r'));
__m128i u = _mm_or_si128 (a, b);
__m128i v = _mm_or_si128 (c, d);
__m128i w = _mm_or_si128 (u, v);
/* check for matches */
if ((ms = _mm_movemask_epi8(w)) != 0xffff) {
return sp - ss + __builtin_ctz(~ms);
}
/* move to next block */
sp += 16;
nb -= 16;
}
/* remaining bytes, do with scalar code */
while (nb-- > 0) {
switch (*sp++) {
case ' ' : break;
case '\r' : break;
case '\n' : break;
case '\t' : break;
default : return sp - ss - 1;
}
}
/* all the characters are spaces */
return sp - ss;
}

View file

@ -16,8 +16,6 @@
#include "native.h"
#if USE_SSE
static const char Digits[200] = {
'0', '0', '0', '1', '0', '2', '0', '3', '0', '4', '0', '5', '0', '6', '0', '7', '0', '8', '0', '9',
'1', '0', '1', '1', '1', '2', '1', '3', '1', '4', '1', '5', '1', '6', '1', '7', '1', '8', '1', '9',
@ -223,8 +221,6 @@ static inline int u64toa_xlarge_sse2(char *out, uint64_t val) {
return n + 16;
}
#endif
int i64toa(char *out, int64_t val) {
if (likely(val >= 0)) {
return u64toa(out, (uint64_t)val);
@ -234,8 +230,6 @@ int i64toa(char *out, int64_t val) {
}
}
#if USE_SSE
int u64toa(char *out, uint64_t val) {
if (likely(val < 10000)) {
return u32toa_small(out, (uint32_t)val);
@ -247,29 +241,3 @@ int u64toa(char *out, uint64_t val) {
return u64toa_xlarge_sse2(out, val);
}
}
#else
int u64toa(char *out, uint64_t val) {
char c;
long n = 0;
uint64_t v = val;
/* convert each digit */
while (val) {
out[n++] = v % 10 + '0';
v /= 10;
}
/* reverse the output */
for (long i = 0; i < n / 2; i++) {
c = out[i];
out[i] = out[n - i - 1];
out[n - i - 1] = c;
}
/* all done */
return n;
}
#endif

View file

@ -93,13 +93,11 @@ int f64toa(char *out, double val);
int i64toa(char *out, int64_t val);
int u64toa(char *out, uint64_t val);
size_t lzero(const char *p, size_t n);
size_t lquote(const GoString *s, size_t p);
size_t lspace(const char *sp, size_t nb, size_t p);
ssize_t unquote(const char *sp, ssize_t nb, char *dp, ssize_t *ep, uint64_t flags);
size_t lzero(const char *sp, size_t nb);
size_t lspace(const char *sp, size_t nb, size_t p);
ssize_t strchr1(const GoString *s, size_t p, char ch);
ssize_t strchr2(const GoString *s, size_t p, char c0, char c1);
ssize_t quote(const char *sp, ssize_t nb, char *dp, ssize_t *dn, uint64_t flags);
ssize_t unquote(const char *sp, ssize_t nb, char *dp, ssize_t *ep, uint64_t flags);
long value(const char *s, size_t n, long p, JsonState *ret, int allow_control);
void vstring(const GoString *src, long *p, JsonState *ret);

View file

@ -16,6 +16,292 @@
#include "native.h"
/** String Quoting **/
typedef struct {
const long n;
const char s[8];
} quoted_t;
static const quoted_t _SingleQuoteTab[256] = {
['\x00'] = { .n = 6, .s = "\\u0000" },
['\x01'] = { .n = 6, .s = "\\u0001" },
['\x02'] = { .n = 6, .s = "\\u0002" },
['\x03'] = { .n = 6, .s = "\\u0003" },
['\x04'] = { .n = 6, .s = "\\u0004" },
['\x05'] = { .n = 6, .s = "\\u0005" },
['\x06'] = { .n = 6, .s = "\\u0006" },
['\x07'] = { .n = 6, .s = "\\u0007" },
['\b' ] = { .n = 2, .s = "\\b" },
['\t' ] = { .n = 2, .s = "\\t" },
['\n' ] = { .n = 2, .s = "\\n" },
['\x0b'] = { .n = 6, .s = "\\u000b" },
['\f' ] = { .n = 2, .s = "\\f" },
['\r' ] = { .n = 2, .s = "\\r" },
['\x0e'] = { .n = 6, .s = "\\u000e" },
['\x0f'] = { .n = 6, .s = "\\u000f" },
['\x10'] = { .n = 6, .s = "\\u0010" },
['\x11'] = { .n = 6, .s = "\\u0011" },
['\x12'] = { .n = 6, .s = "\\u0012" },
['\x13'] = { .n = 6, .s = "\\u0013" },
['\x14'] = { .n = 6, .s = "\\u0014" },
['\x15'] = { .n = 6, .s = "\\u0015" },
['\x16'] = { .n = 6, .s = "\\u0016" },
['\x17'] = { .n = 6, .s = "\\u0017" },
['\x18'] = { .n = 6, .s = "\\u0018" },
['\x19'] = { .n = 6, .s = "\\u0019" },
['\x1a'] = { .n = 6, .s = "\\u001a" },
['\x1b'] = { .n = 6, .s = "\\u001b" },
['\x1c'] = { .n = 6, .s = "\\u001c" },
['\x1d'] = { .n = 6, .s = "\\u001d" },
['\x1e'] = { .n = 6, .s = "\\u001e" },
['\x1f'] = { .n = 6, .s = "\\u001f" },
['"' ] = { .n = 2, .s = "\\\"" },
['\\' ] = { .n = 2, .s = "\\\\" },
};
static const quoted_t _DoubleQuoteTab[256] = {
['\x00'] = { .n = 7, .s = "\\\\u0000" },
['\x01'] = { .n = 7, .s = "\\\\u0001" },
['\x02'] = { .n = 7, .s = "\\\\u0002" },
['\x03'] = { .n = 7, .s = "\\\\u0003" },
['\x04'] = { .n = 7, .s = "\\\\u0004" },
['\x05'] = { .n = 7, .s = "\\\\u0005" },
['\x06'] = { .n = 7, .s = "\\\\u0006" },
['\x07'] = { .n = 7, .s = "\\\\u0007" },
['\b' ] = { .n = 3, .s = "\\\\b" },
['\t' ] = { .n = 3, .s = "\\\\t" },
['\n' ] = { .n = 3, .s = "\\\\n" },
['\x0b'] = { .n = 7, .s = "\\\\u000b" },
['\f' ] = { .n = 3, .s = "\\\\f" },
['\r' ] = { .n = 3, .s = "\\\\r" },
['\x0e'] = { .n = 7, .s = "\\\\u000e" },
['\x0f'] = { .n = 7, .s = "\\\\u000f" },
['\x10'] = { .n = 7, .s = "\\\\u0010" },
['\x11'] = { .n = 7, .s = "\\\\u0011" },
['\x12'] = { .n = 7, .s = "\\\\u0012" },
['\x13'] = { .n = 7, .s = "\\\\u0013" },
['\x14'] = { .n = 7, .s = "\\\\u0014" },
['\x15'] = { .n = 7, .s = "\\\\u0015" },
['\x16'] = { .n = 7, .s = "\\\\u0016" },
['\x17'] = { .n = 7, .s = "\\\\u0017" },
['\x18'] = { .n = 7, .s = "\\\\u0018" },
['\x19'] = { .n = 7, .s = "\\\\u0019" },
['\x1a'] = { .n = 7, .s = "\\\\u001a" },
['\x1b'] = { .n = 7, .s = "\\\\u001b" },
['\x1c'] = { .n = 7, .s = "\\\\u001c" },
['\x1d'] = { .n = 7, .s = "\\\\u001d" },
['\x1e'] = { .n = 7, .s = "\\\\u001e" },
['\x1f'] = { .n = 7, .s = "\\\\u001f" },
['"' ] = { .n = 4, .s = "\\\\\\\"" },
['\\' ] = { .n = 4, .s = "\\\\\\\\" },
};
static inline void memcpy_p8(char *dp, const char *sp, size_t nb) {
if (nb >= 4) { *(uint32_t *)dp = *(const uint32_t *)sp; sp += 4, dp += 4, nb -= 4; }
if (nb >= 2) { *(uint16_t *)dp = *(const uint16_t *)sp; sp += 2, dp += 2, nb -= 2; }
if (nb >= 1) { *dp = *sp; }
}
static inline void memcpy_p16(char *dp, const char *sp, size_t nb) {
if (nb >= 8) { *(uint64_t *)dp = *(const uint64_t *)sp; sp += 8, dp += 8, nb -= 8; }
if (nb >= 4) { *(uint32_t *)dp = *(const uint32_t *)sp; sp += 4, dp += 4, nb -= 4; }
if (nb >= 2) { *(uint16_t *)dp = *(const uint16_t *)sp; sp += 2, dp += 2, nb -= 2; }
if (nb >= 1) { *dp = *sp; }
}
static inline void memcpy_p32(char *dp, const char *sp, size_t nb) {
if (nb >= 16) { _mm_storeu_si128((void *)dp, _mm_loadu_si128((const void *)sp)); sp += 16, dp += 16, nb -= 16; }
if (nb >= 8) { *(uint64_t *)dp = *(const uint64_t *)sp; sp += 8, dp += 8, nb -= 8; }
if (nb >= 4) { *(uint32_t *)dp = *(const uint32_t *)sp; sp += 4, dp += 4, nb -= 4; }
if (nb >= 2) { *(uint16_t *)dp = *(const uint16_t *)sp; sp += 2, dp += 2, nb -= 2; }
if (nb >= 1) { *dp = *sp; }
}
static inline __m128i _mm_find_quote(__m128i vv) {
__m128i e1 = _mm_cmpgt_epi8 (vv, _mm_set1_epi8(-1));
__m128i e2 = _mm_cmpgt_epi8 (vv, _mm_set1_epi8(31));
__m128i e3 = _mm_cmpeq_epi8 (vv, _mm_set1_epi8('"'));
__m128i e4 = _mm_cmpeq_epi8 (vv, _mm_set1_epi8('\\'));
__m128i r1 = _mm_andnot_si128 (e2, e1);
__m128i r2 = _mm_or_si128 (e3, e4);
__m128i rv = _mm_or_si128 (r1, r2);
return rv;
}
#if USE_AVX2
static inline __m256i _mm256_find_quote(__m256i vv) {
__m256i e1 = _mm256_cmpgt_epi8 (vv, _mm256_set1_epi8(-1));
__m256i e2 = _mm256_cmpgt_epi8 (vv, _mm256_set1_epi8(31));
__m256i e3 = _mm256_cmpeq_epi8 (vv, _mm256_set1_epi8('"'));
__m256i e4 = _mm256_cmpeq_epi8 (vv, _mm256_set1_epi8('\\'));
__m256i r1 = _mm256_andnot_si256 (e2, e1);
__m256i r2 = _mm256_or_si256 (e3, e4);
__m256i rv = _mm256_or_si256 (r1, r2);
return rv;
}
#endif
static inline ssize_t memcchr_quote(const char *sp, ssize_t nb, char *dp, ssize_t dn) {
uint32_t mm;
const char * ss = sp;
#if USE_AVX2
/* 32-byte loop, full store */
while (nb >= 32 && dn >= 32) {
__m256i vv = _mm256_loadu_si256 ((const void *)sp);
__m256i rv = _mm256_find_quote (vv);
_mm256_storeu_si256 ((void *)dp, vv);
/* check for matches */
if ((mm = _mm256_movemask_epi8(rv)) != 0) {
return sp - ss + __builtin_ctz(mm);
}
/* move to next block */
sp += 32;
dp += 32;
nb -= 32;
dn -= 32;
}
/* 32-byte test, partial store */
if (nb >= 32) {
__m256i vv = _mm256_loadu_si256 ((const void *)sp);
__m256i rv = _mm256_find_quote (vv);
uint32_t mv = _mm256_movemask_epi8 (rv);
uint32_t fv = __builtin_ctzll ((uint64_t)mv | 0x0100000000);
/* copy at most `dn` characters */
if (fv <= dn) {
memcpy_p32(dp, sp, fv);
return sp - ss + fv;
} else {
memcpy_p32(dp, sp, dn);
return -(sp - ss + dn) - 1;
}
}
/* clear upper half to avoid AVX-SSE transition penalty */
_mm256_zeroupper();
#endif
/* 16-byte loop, full store */
while (nb >= 16 && dn >= 16) {
__m128i vv = _mm_loadu_si128 ((const void *)sp);
__m128i rv = _mm_find_quote (vv);
_mm_storeu_si128 ((void *)dp, vv);
/* check for matches */
if ((mm = _mm_movemask_epi8(rv)) != 0) {
return sp - ss + __builtin_ctz(mm);
}
/* move to next block */
sp += 16;
dp += 16;
nb -= 16;
dn -= 16;
}
/* 16-byte test, partial store */
if (nb >= 16) {
__m128i vv = _mm_loadu_si128 ((const void *)sp);
__m128i rv = _mm_find_quote (vv);
uint32_t mv = _mm_movemask_epi8 (rv);
uint32_t fv = __builtin_ctz (mv | 0x010000);
/* copy at most `dn` characters */
if (fv <= dn) {
memcpy_p16(dp, sp, fv);
return sp - ss + fv;
} else {
memcpy_p16(dp, sp, dn);
return -(sp - ss + dn) - 1;
}
}
/* handle the remaining bytes with scalar code */
while (nb > 0 && dn > 0) {
if (_SingleQuoteTab[*(uint8_t *)sp].n) {
return sp - ss;
} else {
dn--, nb--;
*dp++ = *sp++;
}
}
/* check for dest buffer */
if (nb == 0) {
return sp - ss;
} else {
return -(sp - ss) - 1;
}
}
ssize_t quote(const char *sp, ssize_t nb, char *dp, ssize_t *dn, uint64_t flags) {
ssize_t nd = *dn;
const char * ds = dp;
const char * ss = sp;
const quoted_t * tab;
/* select quoting table */
if (!(flags & F_DBLUNQ)) {
tab = _SingleQuoteTab;
} else {
tab = _DoubleQuoteTab;
}
/* find the special characters, copy on the fly */
while (nb != 0) {
int nc;
uint8_t ch;
ssize_t rb = memcchr_quote(sp, nb, dp, nd);
/* not enough buffer space */
if (rb < 0) {
*dn = dp - ds - rb - 1;
return -(sp - ss - rb - 1) - 1;
}
/* skip already copied bytes */
sp += rb;
dp += rb;
nb -= rb;
nd -= rb;
/* get the escape entry, handle consecutive quotes */
while (nb != 0) {
ch = *(uint8_t *)sp;
nc = tab[ch].n;
/* check for escape character */
if (nc == 0) {
break;
}
/* check for buffer space */
if (nc > nd) {
*dn = dp - ds;
return -(sp - ss) - 1;
}
/* copy the quoted value */
memcpy_p8(dp, tab[ch].s, nc);
sp++;
nb--;
dp += nc;
nd -= nc;
}
}
/* all done */
*dn = dp - ds;
return sp - ss;
}
/** String Unquoting **/
static const char _UnquoteTab[256] = {
['/' ] = '/',
['"' ] = '"',
@ -46,7 +332,7 @@ static inline ssize_t memcchr_p32(const char *s, ssize_t nb, char *p) {
/* check for matches */
if ((r = _mm256_movemask_epi8(v)) != 0) {
return s - q + __builtin_ctzll(r | (1ull << 32));
return s - q + __builtin_ctzll(r);
}
/* move to the next 32 bytes */
@ -54,13 +340,12 @@ static inline ssize_t memcchr_p32(const char *s, ssize_t nb, char *p) {
p += 32;
n -= 32;
}
#endif
#if USE_AVX2
/* clear upper half to avoid AVX-SSE transition penalty */
_mm256_zeroupper();
#endif
#if USE_SSE
/* initialze with '\\' */
__m128i x;
__m128i y;
__m128i a = _mm_set1_epi8('\\');
@ -73,7 +358,7 @@ static inline ssize_t memcchr_p32(const char *s, ssize_t nb, char *p) {
/* check for matches */
if ((r = _mm_movemask_epi8(y)) != 0) {
return s - q + __builtin_ctzll(r | (1 << 16));
return s - q + __builtin_ctzll(r);
}
/* move to the next 16 bytes */
@ -81,7 +366,6 @@ static inline ssize_t memcchr_p32(const char *s, ssize_t nb, char *p) {
p += 16;
n -= 16;
}
#endif
/* remaining bytes, do with scalar code */
while (n--) {

View file

@ -19,6 +19,9 @@
static const char *CS_ARRAY = "[]{},\"[]{},\"[]{}";
static const char *CS_OBJECT = "[]{},:\"[]{}:,\"[]";
static const uint64_t ODD_MASK = 0xaaaaaaaaaaaaaaaa;
static const uint64_t EVEN_MASK = 0x5555555555555555;
static const double P10_TAB[632] = {
/* <================= -Inf ================= */ 1e-323, 1e-322, 1e-321, 1e-320,
1e-319, 1e-318, 1e-317, 1e-316, 1e-315, 1e-314, 1e-313, 1e-312, 1e-311, 1e-310,
@ -96,22 +99,54 @@ static inline double pow10(double v, int p) {
}
}
static inline uint64_t add32(uint64_t v1, uint64_t v2, uint64_t *vo) {
uint32_t v;
uint32_t c = __builtin_uadd_overflow((uint32_t)v1, (uint32_t)v2, &v);
/* set the carry */
*vo = c;
return v;
}
static inline uint64_t add64(uint64_t v1, uint64_t v2, uint64_t *vo) {
uint64_t v;
uint64_t c = __builtin_uaddll_overflow(v1, v2, &v);
/* set the carry */
*vo = c;
return v;
}
static inline char isspace(char ch) {
return ch == ' ' || ch == '\r' || ch == '\n' | ch == '\t';
}
static inline void vdigits(const GoString *src, long *p, JsonState *ret) {
--*p;
vnumber(src, p, ret);
}
static inline char advance(const GoString *src, long *p) {
if (*p >= src->len) {
return 0;
} else {
return src->buf[(*p)++];
}
}
static inline char advance_ns(const GoString *src, long *p) {
*p = lspace(src->buf, src->len, *p);
return advance(src, p);
size_t vi = *p;
size_t nb = src->len;
const char * sp = src->buf;
/* it's likely to run into non-spaces within a few
* characters, so test up to 4 characters manually */
for (int i = 0; i < 4 && vi < nb; i++, vi++) {
if (!isspace(sp[vi])) {
goto nospace;
}
}
/* too many spaces, use SIMD to search for characters */
if ((vi = lspace(sp, nb, vi)) >= nb) {
return 0;
}
nospace:
*p = vi + 1;
return src->buf[vi];
}
static inline int64_t advance_dword(const GoString *src, long *p, long dec, int64_t ret, uint32_t val) {
@ -128,192 +163,219 @@ static inline int64_t advance_dword(const GoString *src, long *p, long dec, int6
}
}
static inline ssize_t advance_string(const GoString *src, long *p, int64_t *ep) {
ssize_t e;
ssize_t i;
static inline ssize_t advance_string(const GoString *src, long p, int64_t *ep) {
char ch;
uint64_t es;
uint64_t fe;
uint64_t os;
uint64_t m0;
uint64_t m1;
uint64_t mx;
uint64_t cr = 0;
/* check for end of string */
if ((e = strchr2(src, *p, '"', '\\')) < 0) {
*p = src->len;
return -ERR_EOF;
}
/* buffer pointers */
size_t nb = src->len;
const char * sp = src->buf;
const char * ss = src->buf;
/* encounters a '"' at the first scan, it's an unquoted string */
if (src->buf[e] == '"') {
*ep = -1;
return e;
}
#define ep_init() *ep = -1;
#define ep_setc() ep_setx(sp - ss - 1)
#define ep_setx(x) if (*ep == -1) { *ep = (x); }
/* search for the next double quote */
i = e;
e = strchr1(src, e + 1, '"');
/* seek to `p` */
nb -= p;
sp += p;
ep_init()
/* seek to the end of string */
while (e >= 0) {
int n = 0;
const char * q = src->buf + e;
#if USE_AVX2
/* initialize vectors */
__m256i v0;
__m256i v1;
__m256i q0;
__m256i q1;
__m256i x0;
__m256i x1;
__m256i cq = _mm256_set1_epi8('"');
__m256i cx = _mm256_set1_epi8('\\');
/* counting backslashes */
while (*--q == '\\') {
n++;
/* partial masks */
uint32_t s0;
uint32_t s1;
uint32_t t0;
uint32_t t1;
#else
/* initialize vectors */
__m128i v0;
__m128i v1;
__m128i v2;
__m128i v3;
__m128i q0;
__m128i q1;
__m128i q2;
__m128i q3;
__m128i x0;
__m128i x1;
__m128i x2;
__m128i x3;
__m128i cq = _mm_set1_epi8('"');
__m128i cx = _mm_set1_epi8('\\');
/* partial masks */
uint32_t s0;
uint32_t s1;
uint32_t s2;
uint32_t s3;
uint32_t t0;
uint32_t t1;
uint32_t t2;
uint32_t t3;
#endif
#define m0_mask(add) \
m1 &= ~cr; \
fe = (m1 << 1) | cr; \
os = (m1 & ~fe) & ODD_MASK; \
es = add(os, m1, &cr) << 1; \
m0 &= ~(fe & (es ^ EVEN_MASK));
/* 64-byte SIMD loop */
while (likely(nb >= 64)) {
#if USE_AVX2
v0 = _mm256_loadu_si256 ((const void *)(sp + 0));
v1 = _mm256_loadu_si256 ((const void *)(sp + 32));
q0 = _mm256_cmpeq_epi8 (v0, cq);
q1 = _mm256_cmpeq_epi8 (v1, cq);
x0 = _mm256_cmpeq_epi8 (v0, cx);
x1 = _mm256_cmpeq_epi8 (v1, cx);
s0 = _mm256_movemask_epi8 (q0);
s1 = _mm256_movemask_epi8 (q1);
t0 = _mm256_movemask_epi8 (x0);
t1 = _mm256_movemask_epi8 (x1);
m0 = ((uint64_t)s1 << 32) | (uint64_t)s0;
m1 = ((uint64_t)t1 << 32) | (uint64_t)t0;
#else
v0 = _mm_loadu_si128 ((const void *)(sp + 0));
v1 = _mm_loadu_si128 ((const void *)(sp + 16));
v2 = _mm_loadu_si128 ((const void *)(sp + 32));
v3 = _mm_loadu_si128 ((const void *)(sp + 48));
q0 = _mm_cmpeq_epi8 (v0, cq);
q1 = _mm_cmpeq_epi8 (v1, cq);
q2 = _mm_cmpeq_epi8 (v2, cq);
q3 = _mm_cmpeq_epi8 (v3, cq);
x0 = _mm_cmpeq_epi8 (v0, cx);
x1 = _mm_cmpeq_epi8 (v1, cx);
x2 = _mm_cmpeq_epi8 (v2, cx);
x3 = _mm_cmpeq_epi8 (v3, cx);
s0 = _mm_movemask_epi8 (q0);
s1 = _mm_movemask_epi8 (q1);
s2 = _mm_movemask_epi8 (q2);
s3 = _mm_movemask_epi8 (q3);
t0 = _mm_movemask_epi8 (x0);
t1 = _mm_movemask_epi8 (x1);
t2 = _mm_movemask_epi8 (x2);
t3 = _mm_movemask_epi8 (x3);
m0 = ((uint64_t)s3 << 48) | ((uint64_t)s2 << 32) | ((uint64_t)s1 << 16) | (uint64_t)s0;
m1 = ((uint64_t)t3 << 48) | ((uint64_t)t2 << 32) | ((uint64_t)t1 << 16) | (uint64_t)t0;
#endif
/** update first quote position */
if (unlikely(m1 != 0)) {
ep_setx(sp - ss + __builtin_ctzll(m1))
}
/* pairs of backslashes cancel each other out */
if ((n & 1) == 0) {
break;
/** mask all the escaped quotes */
if (unlikely(m1 != 0 || cr != 0)) {
m0_mask(add64)
}
/* find the next double quote */
e = e + 1;
e = strchr1(src, e, '"');
}
/* check for end of string */
if (e < 0) {
*p = src->len;
return -ERR_EOF;
}
/* update the result */
*ep = i;
return e;
}
static inline int64_t advance_number(const GoString *src, long *p, long i, JsonState *ret, size_t sp) {
size_t n = src->len;
const char * s = src->buf;
/* check for EOF */
if (i >= n) {
*p = n;
return -ERR_EOF;
}
/* base factors */
int esm = 1;
int exp = 0;
int ovf = 0;
int rem = 0;
double val = 0;
int64_t idx = 0;
int64_t i64 = 0;
int64_t rvt = V_INTEGER;
/* initial state */
ret->iv = 0;
ret->ep = sp;
/* check for the special case of '0' */
if (s[i] == '0') {
if (++i >= n) {
*p = i;
return V_INTEGER;
/* check for end quote */
if (m0 != 0) {
return sp - ss + __builtin_ctzll(m0) + 1;
}
} else {
if (s[i] < '0' || s[i] > '9') {
*p = i;
return -ERR_INVAL;
/* move to the next block */
sp += 64;
nb -= 64;
}
/* 32-byte SIMD round */
if (likely(nb >= 32)) {
#if USE_AVX2
v0 = _mm256_loadu_si256 ((const void *)sp);
q0 = _mm256_cmpeq_epi8 (v0, cq);
x0 = _mm256_cmpeq_epi8 (v0, cx);
s0 = _mm256_movemask_epi8 (q0);
t0 = _mm256_movemask_epi8 (x0);
m0 = (uint64_t)s0;
m1 = (uint64_t)t0;
#else
v0 = _mm_loadu_si128 ((const void *)(sp + 0));
v1 = _mm_loadu_si128 ((const void *)(sp + 16));
q0 = _mm_cmpeq_epi8 (v0, cq);
q1 = _mm_cmpeq_epi8 (v1, cq);
x0 = _mm_cmpeq_epi8 (v0, cx);
x1 = _mm_cmpeq_epi8 (v1, cx);
s0 = _mm_movemask_epi8 (q0);
s1 = _mm_movemask_epi8 (q1);
t0 = _mm_movemask_epi8 (x0);
t1 = _mm_movemask_epi8 (x1);
m0 = ((uint64_t)s1 << 16) | (uint64_t)s0;
m1 = ((uint64_t)t1 << 16) | (uint64_t)t0;
#endif
/** update first quote position */
if (unlikely(m1 != 0)) {
ep_setx(sp - ss + __builtin_ctzll(m1))
}
/** mask all the escaped quotes */
if (unlikely(m1 != 0 || cr != 0)) {
m0_mask(add32)
}
/* check for end quote */
if (m0 != 0) {
return sp - ss + __builtin_ctzll(m0) + 1;
}
/* move to the next block */
sp += 32;
nb -= 32;
}
/* check for carry */
if (unlikely(cr != 0)) {
if (nb == 0) {
return -ERR_EOF;
} else {
while (!(ovf = __builtin_smulll_overflow((idx = i64), 10, &i64)) &&
!(ovf = __builtin_saddll_overflow(i64, s[i] - '0', &i64)) &&
!(++i >= n || !(s[i] >= '0' && s[i] <= '9')));
ep_setc()
sp++, nb--;
}
}
/* set the integer part */
ret->iv = i64;
ret->dv = i64;
/* check for integer overflow, in such case
* the number must be represented by double */
if (ovf) {
val = idx;
rvt = V_DOUBLE;
/* convert the remaining digits */
do {
val *= 10;
val += s[i++] - '0';
} while (i < n && s[i] >= '0' && s[i] <= '9');
/* set the integer part to INT64_MAX to indicate an overflow */
ret->dv = val;
ret->iv = INT64_MAX;
}
/* check for decimal points */
if (i < n && s[i] == '.') {
idx = ++i;
rvt = V_DOUBLE;
/* check for EOF */
if (i >= n) {
*p = n;
return -ERR_EOF;
}
/* should be a digit */
if (s[i] < '0' || s[i] > '9') {
*p = i;
return -ERR_INVAL;
}
/* convert the fractional part */
do {
rem *= 10;
rem += s[i++] - '0';
} while (i < n && s[i] >= '0' && s[i] <= '9');
/* combine with the integer part */
idx -= i;
ret->dv += pow10(rem, idx);
}
/* check for exponent */
if (i < n && (s[i] == 'e' || s[i] == 'E')) {
i++;
rvt = V_DOUBLE;
/* check for EOF */
if (i >= n) {
*p = n;
return -ERR_EOF;
}
/* check for the '+' or '-' sign */
if (s[i] == '+' || s[i] == '-') {
if (i >= n - 1) {
*p = i;
/* handle the remaining bytes with scalar code */
while (nb-- > 0 && (ch = *sp++) != '"') {
if (unlikely(ch == '\\')) {
if (nb == 0) {
return -ERR_EOF;
} else {
if (s[i++] == '+') {
esm = 1;
} else {
esm = -1;
}
ep_setc()
sp++, nb--;
}
}
/* should be a digit */
if (s[i] < '0' || s[i] > '9') {
*p = i;
return -ERR_INVAL;
}
/* convert the power */
do {
exp *= 10;
exp += s[i++] - '0';
} while (i < n && s[i] >= '0' && s[i] <= '9');
/* apply the power */
exp *= esm;
ret->dv = pow10(ret->dv, exp);
}
/* calculate the offset */
*p = i;
return rvt;
#undef ep_init
#undef ep_setc
#undef ep_setx
#undef m0_mask
/* check for quotes */
if (ch == '"') {
return sp - ss;
} else {
return -ERR_EOF;
}
}
/** Value Scanning Routines **/
@ -352,16 +414,17 @@ long value(const char *s, size_t n, long p, JsonState *ret, int allow_control) {
void vstring(const GoString *src, long *p, JsonState *ret) {
int64_t i = *p;
ssize_t e = advance_string(src, p, &ret->ep);
ssize_t e = advance_string(src, i, &ret->ep);
/* check for errors */
if (e < 0) {
*p = src->len;
ret->vt = e;
return;
}
/* update the result */
*p = e + 1;
*p = e;
ret->iv = i;
ret->vt = V_STRING;
}
@ -743,6 +806,218 @@ static inline long fsm_exec(StateMachine *self, const GoString *src, long *p) {
#undef FSM_CHAR
#undef FSM_XERR
#define check_bits(mv) \
if (unlikely((v = mv & (mv - 1)) != 0)) { \
return -(sp - ss + __builtin_ctz(v) + 1); \
}
#define check_sidx(iv) \
if (likely(iv == -1)) { \
iv = sp - ss - 1; \
} else { \
return -(sp - ss); \
}
#define check_vidx(iv, mv) \
if (mv != 0) { \
if (likely(iv == -1)) { \
iv = sp - ss + __builtin_ctz(mv); \
} else { \
return -(sp - ss + __builtin_ctz(mv) + 1); \
} \
}
static inline long skip_number(const char *sp, size_t nb) {
long di = -1;
long ei = -1;
long si = -1;
const char * ss = sp;
/* check for EOF */
if (nb == 0) {
return -1;
}
/* special case of '0' */
if (*sp == '0' && (nb == 1 || sp[1] != '.')) {
return 1;
}
#if USE_AVX2
/* can do with AVX-2 */
if (likely(nb >= 32)) {
__m256i d9 = _mm256_set1_epi8('9');
__m256i ds = _mm256_set1_epi8('/');
__m256i dp = _mm256_set1_epi8('.');
__m256i el = _mm256_set1_epi8('e');
__m256i eu = _mm256_set1_epi8('E');
__m256i xp = _mm256_set1_epi8('+');
__m256i xm = _mm256_set1_epi8('-');
/* 32-byte loop */
do {
__m256i sb = _mm256_loadu_si256 ((const void *)sp);
__m256i i0 = _mm256_cmpgt_epi8 (sb, ds);
__m256i i9 = _mm256_cmpgt_epi8 (sb, d9);
__m256i id = _mm256_cmpeq_epi8 (sb, dp);
__m256i il = _mm256_cmpeq_epi8 (sb, el);
__m256i iu = _mm256_cmpeq_epi8 (sb, eu);
__m256i ip = _mm256_cmpeq_epi8 (sb, xp);
__m256i im = _mm256_cmpeq_epi8 (sb, xm);
__m256i iv = _mm256_andnot_si256 (i9, i0);
__m256i ie = _mm256_or_si256 (il, iu);
__m256i is = _mm256_or_si256 (ip, im);
__m256i rt = _mm256_or_si256 (iv, id);
__m256i ru = _mm256_or_si256 (ie, is);
__m256i rv = _mm256_or_si256 (rt, ru);
/* exponent and sign position */
uint32_t md = _mm256_movemask_epi8(id);
uint32_t me = _mm256_movemask_epi8(ie);
uint32_t ms = _mm256_movemask_epi8(is);
uint32_t mr = _mm256_movemask_epi8(rv);
/* mismatch position */
uint32_t v;
uint32_t i = __builtin_ctzll(~(uint64_t)mr | 0x0100000000);
/* mask out excess characters */
if (i != 32) {
md &= (1 << i) - 1;
me &= (1 << i) - 1;
ms &= (1 << i) - 1;
}
/* check & update decimal point, exponent and sign index */
check_bits(md)
check_bits(me)
check_bits(ms)
check_vidx(di, md)
check_vidx(ei, me)
check_vidx(si, ms)
/* check for valid number */
if (i != 32) {
sp += i;
_mm256_zeroupper();
goto check_index;
}
/* move to next block */
sp += 32;
nb -= 32;
} while (nb >= 32);
/* clear the upper half to prevent AVX-SSE transition penalty */
_mm256_zeroupper();
}
#endif
/* can do with SSE */
if (likely(nb >= 16)) {
__m128i dc = _mm_set1_epi8(':');
__m128i ds = _mm_set1_epi8('/');
__m128i dp = _mm_set1_epi8('.');
__m128i el = _mm_set1_epi8('e');
__m128i eu = _mm_set1_epi8('E');
__m128i xp = _mm_set1_epi8('+');
__m128i xm = _mm_set1_epi8('-');
__m128i v1 = _mm_set1_epi8(0xff);
/* 16-byte loop */
do {
__m128i sb = _mm_loadu_si128 ((const void *)sp);
__m128i i0 = _mm_cmpgt_epi8 (sb, ds);
__m128i i9 = _mm_cmplt_epi8 (sb, dc);
__m128i id = _mm_cmpeq_epi8 (sb, dp);
__m128i il = _mm_cmpeq_epi8 (sb, el);
__m128i iu = _mm_cmpeq_epi8 (sb, eu);
__m128i ip = _mm_cmpeq_epi8 (sb, xp);
__m128i im = _mm_cmpeq_epi8 (sb, xm);
__m128i iv = _mm_and_si128 (i9, i0);
__m128i ie = _mm_or_si128 (il, iu);
__m128i is = _mm_or_si128 (ip, im);
__m128i rt = _mm_or_si128 (iv, id);
__m128i ru = _mm_or_si128 (ie, is);
__m128i rv = _mm_or_si128 (rt, ru);
/* exponent and sign position */
uint32_t md = _mm_movemask_epi8(id);
uint32_t me = _mm_movemask_epi8(ie);
uint32_t ms = _mm_movemask_epi8(is);
uint32_t mr = _mm_movemask_epi8(rv);
/* mismatch position */
uint32_t v;
uint32_t i = __builtin_ctzll(~mr | 0x00010000);
/* mask out excess characters */
if (i != 16) {
md &= (1 << i) - 1;
me &= (1 << i) - 1;
ms &= (1 << i) - 1;
}
/* check & update exponent and sign index */
check_bits(md)
check_bits(me)
check_bits(ms)
check_vidx(di, md)
check_vidx(ei, me)
check_vidx(si, ms)
/* check for valid number */
if (i != 16) {
sp += i;
goto check_index;
}
/* move to next block */
sp += 16;
nb -= 16;
} while (nb >= 16);
}
/* remaining bytes, do with scalar code */
while (likely(--nb >= 0)) {
switch (*sp++) {
case '0' : /* fallthrough */
case '1' : /* fallthrough */
case '2' : /* fallthrough */
case '3' : /* fallthrough */
case '4' : /* fallthrough */
case '5' : /* fallthrough */
case '6' : /* fallthrough */
case '7' : /* fallthrough */
case '8' : /* fallthrough */
case '9' : break;
case '.' : check_sidx(di); break;
case 'e' : /* fallthrough */
case 'E' : check_sidx(ei); break;
case '+' : /* fallthrough */
case '-' : check_sidx(si); break;
default : sp--; goto check_index;
}
}
check_index:
if (di == 0 || si == 0) {
return -1;
} else if (si > 0 && ei != si - 1) {
return -si - 1;
} else if (di >= 0 && ei >= 0 && di > ei - 1) {
return -di - 1;
} else if (di >= 0 && ei >= 0 && di == ei - 1) {
return -ei - 1;
} else {
return sp - ss;
}
}
#undef check_bits
#undef check_sidx
#undef check_vidx
long skip_one(const GoString *src, long *p, StateMachine *m) {
fsm_init(m, FSM_VAL);
return fsm_exec(m, src, p);
@ -761,40 +1036,44 @@ long skip_object(const GoString *src, long *p, StateMachine *m) {
long skip_string(const GoString *src, long *p) {
int64_t v;
ssize_t q = *p - 1;
ssize_t e = advance_string(src, p, &v);
ssize_t e = advance_string(src, *p, &v);
/* check for errors */
if (e < 0) {
/* check for errors, and update the position */
if (e >= 0) {
*p = e;
return q;
} else {
*p = src->len;
return e;
}
/* update the position */
*p = e + 1;
return q;
}
long skip_negative(const GoString *src, long *p) {
long q = *p - 1;
int64_t r;
JsonState v;
long i = *p;
long r = skip_number(src->buf + i, src->len - i);
/* skip the number */
if ((r = advance_number(src, p, *p, &v, q)) < 0) {
return r;
} else {
return q;
/* check for errors */
if (r < 0) {
*p -= r + 1;
return -ERR_INVAL;
}
/* update value pointer */
*p += r;
return i - 1;
}
long skip_positive(const GoString *src, long *p) {
long q = *p - 1;
int64_t r;
JsonState v;
long i = *p - 1;
long r = skip_number(src->buf + i, src->len - i);
/* skip the number */
if ((r = advance_number(src, p, q, &v, q)) < 0) {
return r;
} else {
return q;
/* check for errors */
if (r < 0) {
*p -= r + 2;
return -ERR_INVAL;
}
/* update value pointer */
*p += r - 1;
return i;
}

@ -1 +1 @@
Subproject commit 38a813682862252de3d1a016c90755467bff0ee9
Subproject commit daab6520b48bc30586f7468676c990b5c1f781bd