diff --git a/decoder/assembler_amd64.go b/decoder/assembler_amd64.go index 6547361..af4b89a 100644 --- a/decoder/assembler_amd64.go +++ b/decoder/assembler_amd64.go @@ -125,6 +125,7 @@ var ( ) var ( + _DF = jit.Reg("R10") // reuse R10 in generic decoder for flags _ET = jit.Reg("R10") _EP = jit.Reg("R11") ) @@ -871,12 +872,15 @@ var ( var ( _F_b64decode = jit.Imm(int64(_subr__b64decode)) + _F_decodeValue = jit.Imm(int64(_subr_decode_value)) +) + +var ( _F_skip_array = jit.Imm(int64(native.S_skip_array)) _F_skip_object = jit.Imm(int64(native.S_skip_object)) ) var ( - _F_decodeGeneric obj.Addr _F_decodeTypedPointer obj.Addr _F_FieldMap_GetCaseInsensitive obj.Addr ) @@ -888,24 +892,15 @@ const ( ) func init() { - _F_decodeGeneric = jit.Func(decodeGeneric) _F_decodeTypedPointer = jit.Func(decodeTypedPointer) _F_FieldMap_GetCaseInsensitive = jit.Func((*caching.FieldMap).GetCaseInsensitive) } func (self *_Assembler) _asm_OP_any(_ *_Instr) { - self.Emit("MOVQ" , _ARG_fv, _AX) // MOVQ fv, AX - self.Emit("MOVQ" , _IP, jit.Ptr(_SP, 0)) // MOVQ IP, (SP) - self.Emit("MOVQ" , _IL, jit.Ptr(_SP, 8)) // MOVQ IL, 8(SP) - self.Emit("MOVQ" , _IC, jit.Ptr(_SP, 16)) // MOVQ IC, 16(SP) - self.Emit("MOVQ" , _AX, jit.Ptr(_SP, 24)) // MOVQ AX, 24(SP) - self.call_go(_F_decodeGeneric) // CALL_GO decodeGeneric - self.Emit("MOVQ" , jit.Ptr(_SP, 32), _IC) // MOVQ 32(SP), IC - self.Emit("MOVOU", jit.Ptr(_SP, 40), _X0) // MOVOU 40(SP), X0 - self.Emit("MOVQ" , jit.Ptr(_SP, 56), _EP) // MOVQ 56(SP), EP - self.Emit("TESTQ", _EP, _EP) // TESTQ ET, ET - self.Sjmp("JNZ" , _LB_parsing_error) // JNZ _parsing_error - self.Emit("MOVOU", _X0, jit.Ptr(_VP, 0)) // MOVOU X0, (VP) + self.Emit("MOVQ" , _ARG_fv, _DF) // MOVQ fv, DF + self.call(_F_decodeValue) // CALL decodeValue + self.Emit("TESTQ", _EP, _EP) // TESTQ EP, EP + self.Sjmp("JNZ" , _LB_parsing_error) // JNZ _parsing_error } func (self *_Assembler) _asm_OP_str(_ *_Instr) { diff --git a/decoder/generic.go b/decoder/generic.go deleted file mode 100644 index a4f2299..0000000 --- a/decoder/generic.go +++ /dev/null @@ -1,284 +0,0 @@ -/* - * Copyright 2021 ByteDance Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package decoder - -import ( - `encoding/json` - `reflect` - `sync` - `unsafe` - - `github.com/bytedance/sonic/internal/native` - `github.com/bytedance/sonic/internal/native/types` - `github.com/bytedance/sonic/internal/rt` - `github.com/bytedance/sonic/unquote` -) - -const ( - _S_val = iota - _S_arr - _S_arr_0 - _S_obj - _S_obj_x - _S_obj_delim -) - -var ( - mapVal = map[string]interface{}(nil) - mapType = rt.UnpackType(reflect.TypeOf(mapVal)) -) - -type _GenericDecoder struct { - types.StateMachine - Vp [types.MAX_RECURSE]*interface{} -} - -func (self *_GenericDecoder) val(v interface{}) types.ParsingError { - sp := self.Sp - vt := self.Vt[sp] - - /* must be a value or the first element of an array */ - if vt != _S_val && vt != _S_arr_0 { - return types.ERR_INVALID_CHAR - } - - /* set the value */ - self.Sp-- - *self.Vp[sp] = v - return 0 -} - -func (self *_GenericDecoder) exec(s string, i int, f uint64) (int, interface{}, types.ParsingError) { - var rv interface{} - var ss types.JsonState - var ex types.ParsingError - - /* initialize the state machine */ - self.Sp = 0 - self.Vp[0] = &rv - self.Vt[0] = _S_val - - /* string length and pointer */ - slen := len(s) - sbuf := (*rt.GoString)(unsafe.Pointer(&s)).Ptr - - /* run until the state goes empty */ - for self.Sp >= 0 { - switch i = native.Value(sbuf, slen, i, &ss, 1); ss.Vt { - default: { - return i, nil, types.ParsingError(-ss.Vt) - } - - /* EOF */ - case types.V_EOF: { - return i, nil, types.ERR_EOF - } - - /* null */ - case types.V_NULL: { - if ex = self.val(nil); ex != 0 { - return i - 4, nil, ex - } - } - - /* boolean true */ - case types.V_TRUE: { - if ex = self.val(true); ex != 0 { - return i - 4, nil, ex - } - } - - /* boolean false */ - case types.V_FALSE: { - if ex = self.val(false); ex != 0 { - return i - 5, nil, ex - } - } - - /* strings */ - case types.V_STRING: { - p := i - 1 - v := s[ss.Iv:p] - - /* check for escape sequence */ - if ss.Ep != -1 { - if v, ex = unquote.String(v); ex != 0 { - return int(ss.Iv) - 1, nil, ex - } - } - - /* check for object key */ - if vt := self.Vt[self.Sp]; vt != _S_obj && vt != _S_obj_x { - if ex = self.val(v); ex != 0 { - return int(ss.Iv) - 1, nil, ex - } else { - continue - } - } - - /* get the map */ - ef := rt.UnpackEface(*self.Vp[self.Sp]) - mp := ef.Value - - /* add the delimiter */ - self.Sp++ - self.Vt[self.Sp] = _S_obj_delim - self.Vp[self.Sp] = (*interface{})(mapassign_faststr(mapType, mp, v)) - } - - /* nested arrays */ - case types.V_ARRAY: { - vt := self.Vt[self.Sp] - vv := make([]interface{}, 1, 16) - - /* must be a value */ - if vt != _S_val && vt != _S_arr_0 { - return i - 1, nil, types.ERR_INVALID_CHAR - } - - /* set the value */ - self.Vt[self.Sp] = _S_arr - *self.Vp[self.Sp] = vv - - /* add the first element */ - self.Sp++ - self.Vt[self.Sp] = _S_arr_0 - self.Vp[self.Sp] = &vv[0] - } - - /* nested objects */ - case types.V_OBJECT: { - vt := self.Vt[self.Sp] - vv := map[string]interface{}{} - - /* must be a value */ - if vt != _S_val && vt != _S_arr_0 { - return i - 1, nil, types.ERR_INVALID_CHAR - } - - /* set the value */ - self.Vt[self.Sp] = _S_obj - *self.Vp[self.Sp] = vv - } - - /* floating point numbers */ - case types.V_DOUBLE: { - if (f & (1 << _F_use_number)) == 0 { - if ex = self.val(ss.Dv); ex != 0 { - return ss.Ep, nil, ex - } - } else { - if ex = self.val(json.Number(s[ss.Ep:i])); ex != 0 { - return ss.Ep, nil, ex - } - } - } - - /* integers */ - case types.V_INTEGER: { - if (f & (1 << _F_use_number)) != 0 { - if ex = self.val(json.Number(s[ss.Ep:i])); ex != 0 { - return ss.Ep, nil, ex - } - } else if (f & (1 << _F_use_int64)) == 0 { - if ex = self.val(float64(ss.Iv)); ex != 0 { - return ss.Ep, nil, ex - } - } else { - if ex = self.val(ss.Iv); ex != 0 { - return ss.Ep, nil, ex - } - } - } - - /* key separator ':' */ - case types.V_KEY_SEP: { - if self.Vt[self.Sp] == _S_obj_delim { - self.object_elem() - } else { - return i - 1, nil, types.ERR_INVALID_CHAR - } - } - - /* element separator ',' */ - case types.V_ELEM_SEP: { - switch self.Vt[self.Sp] { - case _S_obj : self.Vt[self.Sp] = _S_obj_x - case _S_arr : self.array_push(self.Vp[self.Sp]) - default : return i - 1, nil, types.ERR_INVALID_CHAR - } - } - - /* array end ']' */ - case types.V_ARRAY_END: { - switch self.Vt[self.Sp] { - case _S_arr : self.Sp-- - case _S_arr_0 : self.array_end() - default : return i - 1, nil, types.ERR_INVALID_CHAR - } - } - - /* object end '}' */ - case types.V_OBJECT_END: { - if self.Vt[self.Sp] == _S_obj { - self.Sp-- - } else { - return i - 1, nil, types.ERR_INVALID_CHAR - } - } - } - } - - /* all done */ - return i, rv, 0 -} - -func (self *_GenericDecoder) array_end() { - (*rt.GoSlice)((*rt.GoEface)(unsafe.Pointer(self.Vp[self.Sp - 1])).Value).Len = 0 - self.Sp -= 2 -} - -func (self *_GenericDecoder) array_add(v *interface{}) *interface{} { - vv := (*[]interface{})((*rt.GoEface)(unsafe.Pointer(v)).Value) - nb := len(*vv) - *vv = append(*vv, nil) - return &(*vv)[nb] -} - -func (self *_GenericDecoder) array_push(v *interface{}) { - self.Sp++ - self.Vt[self.Sp] = _S_val - self.Vp[self.Sp] = self.array_add(v) -} - -func (self *_GenericDecoder) object_elem() { - self.Vt[self.Sp] = _S_val - self.Vt[self.Sp - 1] = _S_obj -} - -var decoderPool = sync.Pool { - New: func() interface{} { - return new(_GenericDecoder) - }, -} - -func decodeGeneric(s string, i int, f uint64) (p int, v interface{}, e types.ParsingError) { - dec := decoderPool.Get().(*_GenericDecoder) - p, v, e = dec.exec(s, i, f) - decoderPool.Put(dec) - return -} diff --git a/decoder/generic_amd64.go b/decoder/generic_amd64.go new file mode 100644 index 0000000..70fd0c5 --- /dev/null +++ b/decoder/generic_amd64.go @@ -0,0 +1,674 @@ +/* + * Copyright 2021 ByteDance Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package decoder + +import ( + `encoding/json` + `fmt` + `reflect` + + `github.com/bytedance/sonic/internal/jit` + `github.com/bytedance/sonic/internal/native` + `github.com/bytedance/sonic/internal/native/types` + `github.com/twitchyliquid64/golang-asm/obj` +) + +/** Crucial Registers: + * + * ST(BX) : ro, decoder stack + * DF(R10) : ro, decoder flags + * EP(R11) : wo, error pointer + * IP(R12) : ro, input pointer + * IL(R13) : ro, input length + * IC(R14) : rw, input cursor + * VP(R15) : ro, value pointer (to an interface{}) + */ + +const ( + _VD_args = 64 // 64 bytes for passing arguments to other Go functions + _VD_saves = 40 // 40 bytes for saving the registers before CALL instructions + _VD_locals = 40 // 40 bytes for local variables +) + +const ( + _VD_offs = _VD_args + _VD_saves + _VD_locals + _VD_size = _VD_offs + 8 // 8 bytes for the parent frame pointer +) + +var ( + _VAR_ss = _VAR_ss_Vt + _VAR_df = jit.Ptr(_SP, _VD_args + _VD_saves) +) + +var ( + _VAR_ss_Vt = jit.Ptr(_SP, _VD_args + _VD_saves + 8) + _VAR_ss_Dv = jit.Ptr(_SP, _VD_args + _VD_saves + 16) + _VAR_ss_Iv = jit.Ptr(_SP, _VD_args + _VD_saves + 24) + _VAR_ss_Ep = jit.Ptr(_SP, _VD_args + _VD_saves + 32) +) + +type _ValueDecoder struct { + jit.BaseAssembler +} + +func (self *_ValueDecoder) build() uintptr { + self.Init(self.compile) + return *(*uintptr)(self.Load("decode_value", _VD_size, 0)) +} + +/** Function Calling Helpers **/ + +func (self *_ValueDecoder) save(r ...obj.Addr) { + for i, v := range r { + if i > _VD_saves / 8 - 1 { + panic("too many registers to save") + } else { + self.Emit("MOVQ", v, jit.Ptr(_SP, _VD_args + int64(i) * 8)) + } + } +} + +func (self *_ValueDecoder) load(r ...obj.Addr) { + for i, v := range r { + if i > _VD_saves / 8 - 1 { + panic("too many registers to load") + } else { + self.Emit("MOVQ", jit.Ptr(_SP, _VD_args + int64(i) * 8), v) + } + } +} + +func (self *_ValueDecoder) call(fn obj.Addr) { + self.Emit("MOVQ", fn, _AX) // MOVQ ${fn}, AX + self.Rjmp("CALL", _AX) // CALL AX +} + +func (self *_ValueDecoder) call_go(fn obj.Addr) { + self.save(_REG_go...) // SAVE $REG_go + self.call(fn) // CALL ${fn} + self.load(_REG_go...) // LOAD $REG_go +} + +/** Decoder Assembler **/ + +const ( + _S_val = iota + 1 + _S_arr + _S_arr_0 + _S_obj + _S_obj_x + _S_obj_delim +) + +const ( + _S_omask = (1 << _S_obj) | (1 << _S_obj_x) + _S_vmask = (1 << _S_val) | (1 << _S_arr_0) +) + +const ( + _X_space = (1 << ' ') | (1 << '\t') | (1 << '\r') | (1 << '\n') +) + +const ( + _A_init_len = 1 + _A_init_cap = 16 +) + +const ( + _ST_Sp = 0 + _ST_Vt = _PtrBytes + _ST_Vp = _PtrBytes * (types.MAX_RECURSE + 1) +) + +var ( + _V_true = jit.Imm(int64(pbool(true))) + _V_false = jit.Imm(int64(pbool(false))) + _F_value = jit.Imm(int64(native.S_value)) +) + +var ( + _V_max = jit.Imm(int64(types.V_MAX)) + _E_eof = jit.Imm(int64(types.ERR_EOF)) + _E_invalid = jit.Imm(int64(types.ERR_INVALID_CHAR)) +) + +var ( + _F_convTslice = jit.Func(convTslice) + _F_convTstring = jit.Func(convTstring) + _F_stack_exceed = jit.Func(stack_exceed) + _F_invalid_vtype = jit.Func(invalid_vtype) +) + +var ( + _T_map = jit.Type(reflect.TypeOf((map[string]interface{})(nil))) + _T_bool = jit.Type(reflect.TypeOf(false)) + _T_int64 = jit.Type(reflect.TypeOf(int64(0))) + _T_eface = jit.Type(reflect.TypeOf((*interface{})(nil)).Elem()) + _T_slice = jit.Type(reflect.TypeOf(([]interface{})(nil))) + _T_string = jit.Type(reflect.TypeOf("")) + _T_number = jit.Type(reflect.TypeOf(json.Number(""))) + _T_float64 = jit.Type(reflect.TypeOf(float64(0))) +) + +var _R_tab = map[int]string { + '[': "_decode_V_ARRAY", + '{': "_decode_V_OBJECT", + ':': "_decode_V_KEY_SEP", + ',': "_decode_V_ELEM_SEP", + ']': "_decode_V_ARRAY_END", + '}': "_decode_V_OBJECT_END", +} + +func (self *_ValueDecoder) compile() { + self.Emit("SUBQ", jit.Imm(_VD_size), _SP) // SUBQ $_VD_size, SP + self.Emit("MOVQ", _BP, jit.Ptr(_SP, _VD_offs)) // MOVQ BP, _VD_offs(SP) + self.Emit("LEAQ", jit.Ptr(_SP, _VD_offs), _BP) // LEAQ _VD_offs(SP), BP + + /* initialize the state machine */ + self.Emit("XORL", _CX, _CX) // XORL CX, CX + self.Emit("MOVQ", _DF, _VAR_df) // MOVQ DF, df + self.Emit("ADDQ", jit.Imm(_FsmOffset), _ST) // ADDQ _FsmOffset, _ST + self.Emit("MOVQ", _CX, jit.Ptr(_ST, _ST_Sp)) // MOVQ CX, ST.Sp + self.Emit("MOVQ", _VP, jit.Ptr(_ST, _ST_Vp)) // MOVQ VP, ST.Vp[0] + self.Emit("MOVQ", jit.Imm(_S_val), jit.Ptr(_ST, _ST_Vt)) // MOVQ _S_val, ST.Vt[0] + self.Sjmp("JMP" , "_next") // JMP _next + + /* set the value from previous round */ + self.Link("_set_value") // _set_value: + self.Emit("MOVL" , jit.Imm(_S_vmask), _DX) // MOVL _S_vmask, DX + self.Emit("MOVQ" , jit.Ptr(_ST, _ST_Sp), _CX) // MOVQ ST.Sp, CX + self.Emit("MOVQ" , jit.Sib(_ST, _CX, 8, _ST_Vt), _AX) // MOVQ ST.Vt[CX], AX + self.Emit("BTQ" , _AX, _DX) // BTQ AX, DX + self.Sjmp("JNC" , "_vtype_error") // JNC _vtype_error + self.Emit("XORL" , _AX, _AX) // XORL AX, AX + self.Emit("SUBQ" , jit.Imm(1), jit.Ptr(_ST, _ST_Sp)) // SUBQ $1, ST.Sp + self.Emit("XCHGQ", jit.Sib(_ST, _CX, 8, _ST_Vp), _AX) // XCHGQ ST.Vp[CX], AX + self.Emit("MOVQ" , _R8, jit.Ptr(_AX, 0)) // MOVQ R8, (AX) + self.Emit("MOVQ" , _R9, jit.Ptr(_AX, 8)) // MOVQ R9, 8(AX) + + /* check for value stack */ + self.Link("_next") // _next: + self.Emit("MOVQ" , jit.Ptr(_ST, _ST_Sp), _AX) // MOVQ ST.Sp, AX + self.Emit("TESTQ", _AX, _AX) // TESTQ AX, AX + self.Sjmp("JS" , "_return") // JS _return + + /* fast path: no-space or 1-space cases */ + self.Emit("CMPQ" , _IC, _IL) // CMPQ IC, IL + self.Sjmp("JAE" , "_decode_V_EOF") // JAE _decode_V_EOF + self.Emit("MOVBQZX", jit.Sib(_IP, _IC, 1, 0), _AX) // MOVBQZX (IP)(IC), AX + self.Emit("MOVQ" , jit.Imm(_X_space), _DX) // MOVQ _X_space, DX + self.Emit("BTQ" , _AX, _DX) // BTQ _AX, _DX + self.Sjmp("JNC" , "_decode_fast") // JNC _decode_fast + + /* 1-space case */ + self.Emit("ADDQ" , jit.Imm(1), _IC) // ADDQ $1, IC + self.Emit("CMPQ" , _IC, _IL) // CMPQ IC, IL + self.Sjmp("JAE" , "_decode_V_EOF") // JAE _decode_V_EOF + self.Emit("MOVBQZX", jit.Sib(_IP, _IC, 1, 0), _AX) // MOVBQZX (IP)(IC), AX + + /* fast path: use lookup table to select decoder */ + self.Link("_decode_fast") // _decode_fast: + self.Byte(0x48, 0x8d, 0x3d) // LEAQ ?(PC), DI + self.Sref("_decode_tab", 4) // .... &_decode_tab + self.Emit("MOVLQSX", jit.Sib(_DI, _AX, 4, 0), _AX) // MOVLQSX (DI)(AX*4), AX + self.Emit("TESTQ" , _AX, _AX) // TESTQ AX, AX + self.Sjmp("JZ" , "_decode_native") // JZ _decode_native + self.Emit("ADDQ" , jit.Imm(1), _IC) // ADDQ $1, IC + self.Emit("ADDQ" , _DI, _AX) // ADDQ DI, AX + self.Rjmp("JMP" , _AX) // JMP AX + + /* decode with native decoder */ + self.Link("_decode_native") // _decode_native: + self.Emit("MOVQ", _IP, _DI) // MOVQ IP, DI + self.Emit("MOVQ", _IL, _SI) // MOVQ IL, SI + self.Emit("MOVQ", _IC, _DX) // MOVQ IC, DX + self.Emit("LEAQ", _VAR_ss, _CX) // LEAQ ss, CX + self.Emit("MOVL", jit.Imm(1), _R8) // MOVL $1, R8 + self.call(_F_value) // CALL value + self.Emit("MOVQ", _AX, _IC) // MOVQ AX, IC + + /* check for errors */ + self.Emit("MOVQ" , _VAR_ss_Vt, _AX) // MOVQ ss.Vt, AX + self.Emit("TESTQ", _AX, _AX) // TESTQ AX, AX + self.Sjmp("JS" , "_parsing_error") // JS _parsing_error + self.Sjmp("JZ" , "_invalid_vtype") // JZ _invalid_vtype + self.Emit("CMPQ" , _AX, _V_max) // CMPQ AX, _V_max + self.Sjmp("JA" , "_invalid_vtype") // JA _invalid_vtype + + /* jump table selector */ + self.Byte(0x48, 0x8d, 0x3d) // LEAQ ?(PC), DI + self.Sref("_switch_table", 4) // .... &_switch_table + self.Emit("MOVLQSX", jit.Sib(_DI, _AX, 4, -4), _AX) // MOVLQSX -4(DI)(AX*4), AX + self.Emit("ADDQ" , _DI, _AX) // ADDQ DI, AX + self.Rjmp("JMP" , _AX) // JMP AX + + /** V_EOF **/ + self.Link("_decode_V_EOF") // _decode_V_EOF: + self.Emit("MOVL", _E_eof, _EP) // MOVL _E_eof, EP + self.Sjmp("JMP" , "_error") // JMP _error + + /** V_NULL **/ + self.Link("_decode_V_NULL") // _decode_V_NULL: + self.Emit("XORL", _R8, _R8) // XORL R8, R8 + self.Emit("XORL", _R9, _R9) // XORL R9, R9 + self.Emit("LEAQ", jit.Ptr(_IC, -4), _DI) // LEAQ -4(IC), DI + self.Sjmp("JMP" , "_set_value") // JMP _set_value + + /** V_TRUE **/ + self.Link("_decode_V_TRUE") // _decode_V_TRUE: + self.Emit("MOVQ", _T_bool, _R8) // MOVQ _T_bool, R8 + self.Emit("MOVQ", _V_true, _R9) // MOVQ _V_true, R9 + self.Emit("LEAQ", jit.Ptr(_IC, -4), _DI) // LEAQ -4(IC), DI + self.Sjmp("JMP" , "_set_value") // JMP _set_value + + /** V_FALSE **/ + self.Link("_decode_V_FALSE") // _decode_V_FALSE: + self.Emit("MOVQ", _T_bool, _R8) // MOVQ _T_bool, R8 + self.Emit("MOVQ", _V_false, _R9) // MOVQ _V_false, R9 + self.Emit("LEAQ", jit.Ptr(_IC, -5), _DI) // LEAQ -5(IC), DI + self.Sjmp("JMP" , "_set_value") // JMP _set_value + + /** V_ARRAY **/ + self.Link("_decode_V_ARRAY") // _decode_V_ARRAY: + self.Emit("MOVL", jit.Imm(_S_vmask), _DX) // MOVL _S_vmask, DX + self.Emit("MOVQ", jit.Ptr(_ST, _ST_Sp), _CX) // MOVQ ST.Sp, CX + self.Emit("MOVQ", jit.Sib(_ST, _CX, 8, _ST_Vt), _AX) // MOVQ ST.Vt[CX], AX + self.Emit("BTQ" , _AX, _DX) // BTQ AX, DX + self.Sjmp("JNC" , "_invalid_char") // JNC _invalid_char + + /* create a new array */ + self.Emit("MOVQ", _T_eface, _AX) // MOVQ _T_eface, AX + self.Emit("MOVQ", _AX, jit.Ptr(_SP, 0)) // MOVQ AX, (SP) + self.Emit("MOVQ", jit.Imm(_A_init_len), jit.Ptr(_SP, 8)) // MOVQ _A_init_len, 8(SP) + self.Emit("MOVQ", jit.Imm(_A_init_cap), jit.Ptr(_SP, 16)) // MOVQ _A_init_cap, 16(SP) + self.call_go(_F_makeslice) // CALL_GO runtime.makeslice + self.Emit("MOVQ", jit.Ptr(_SP, 24), _DX) // MOVQ 24(SP), DX + + /* pack into an interface */ + self.Emit("MOVQ", _DX, jit.Ptr(_SP, 0)) // MOVQ DX, (SP) + self.Emit("MOVQ", jit.Imm(_A_init_len), jit.Ptr(_SP, 8)) // MOVQ _A_init_len, 8(SP) + self.Emit("MOVQ", jit.Imm(_A_init_cap), jit.Ptr(_SP, 16)) // MOVQ _A_init_cap, 16(SP) + self.call_go(_F_convTslice) // CALL_GO runtime.convTslice + self.Emit("MOVQ", jit.Ptr(_SP, 24), _R8) // MOVQ 24(SP), R8 + + /* replace current state with an array */ + self.Emit("MOVQ", jit.Ptr(_ST, _ST_Sp), _CX) // MOVQ ST.Sp, CX + self.Emit("MOVQ", jit.Sib(_ST, _CX, 8, _ST_Vp), _SI) // MOVQ ST.Vp[CX], SI + self.Emit("MOVQ", jit.Imm(_S_arr), jit.Sib(_ST, _CX, 8, _ST_Vt)) // MOVQ _S_arr, ST.Vt[CX] + self.Emit("MOVQ", _T_slice, _AX) // MOVQ _T_slice, AX + self.Emit("MOVQ", _AX, jit.Ptr(_SI, 0)) // MOVQ AX, (SI) + self.Emit("MOVQ", _R8, jit.Ptr(_SI, 8)) // MOVQ R8, 8(SI) + + /* add a new slot for the first element */ + self.Emit("ADDQ", jit.Imm(1), _CX) // ADDQ $1, CX + self.Emit("CMPQ", _CX, jit.Imm(types.MAX_RECURSE)) // CMPQ CX, ${types.MAX_RECURSE} + self.Sjmp("JA" , "_stack_overflow") // JA _stack_overflow + self.Emit("MOVQ", jit.Ptr(_R8, 0), _AX) // MOVQ (R8), AX + self.Emit("MOVQ", _CX, jit.Ptr(_ST, _ST_Sp)) // MOVQ CX, ST.Sp + self.Emit("MOVQ", _AX, jit.Sib(_ST, _CX, 8, _ST_Vp)) // MOVQ AX, ST.Vp[CX] + self.Emit("MOVQ", jit.Imm(_S_arr_0), jit.Sib(_ST, _CX, 8, _ST_Vt)) // MOVQ _S_arr_0, ST.Vt[CX] + self.Sjmp("JMP" , "_next") // JMP _next + + /** V_OBJECT **/ + self.Link("_decode_V_OBJECT") // _decode_V_OBJECT: + self.Emit("MOVL", jit.Imm(_S_vmask), _DX) // MOVL _S_vmask, DX + self.Emit("MOVQ", jit.Ptr(_ST, _ST_Sp), _CX) // MOVQ ST.Sp, CX + self.Emit("MOVQ", jit.Sib(_ST, _CX, 8, _ST_Vt), _AX) // MOVQ ST.Vt[CX], AX + self.Emit("BTQ" , _AX, _DX) // BTQ AX, DX + self.Sjmp("JNC" , "_invalid_char") // JNC _invalid_char + self.call_go(_F_makemap_small) // CALL_GO runtime.makemap_small + self.Emit("MOVQ", jit.Ptr(_SP, 0), _AX) // MOVQ (SP), AX + self.Emit("MOVQ", jit.Ptr(_ST, _ST_Sp), _CX) // MOVQ ST.Sp, CX + self.Emit("MOVQ", jit.Sib(_ST, _CX, 8, _ST_Vp), _SI) // MOVQ ST.Vp[CX], SI + self.Emit("MOVQ", _T_map, _DX) // MOVQ _T_map, DX + self.Emit("MOVQ", _DX, jit.Ptr(_SI, 0)) // MOVQ DX, (SI) + self.Emit("MOVQ", _AX, jit.Ptr(_SI, 8)) // MOVQ AX, 8(SI) + self.Emit("MOVQ", jit.Imm(_S_obj), jit.Sib(_ST, _CX, 8, _ST_Vt)) // MOVQ _S_obj, ST.Vt[CX] + self.Sjmp("JMP" , "_next") // JMP _next + + /** V_STRING **/ + self.Link("_decode_V_STRING") // _decode_V_STRING: + self.Emit("XORL", _DX, _DX) // XORL DX, DX + self.Emit("MOVQ", _VAR_ss_Iv, _CX) // MOVQ ss.Iv, CX + self.Emit("MOVQ", _IC, _AX) // MOVQ IC, AX + self.Emit("SUBQ", _CX, _AX) // SUBQ CX, AX + + /* check for escapes */ + self.Emit("CMPQ", _VAR_ss_Ep, jit.Imm(-1)) // CMPQ ss.Ep, $-1 + self.Sjmp("JNE" , "_unquote") // JNE _unquote + self.Emit("SUBQ", jit.Imm(1), _AX) // SUBQ $1, AX + self.Emit("LEAQ", jit.Sib(_IP, _CX, 1, 0), _R8) // LEAQ (IP)(CX), R8 + + /* strings with no escape sequences */ + self.Link("_noescape") // _noescape: + self.Emit("MOVL", jit.Imm(_S_omask), _DI) // MOVL _S_omask, DI + self.Emit("MOVQ", jit.Ptr(_ST, _ST_Sp), _CX) // MOVQ ST.Sp, CX + self.Emit("MOVQ", jit.Sib(_ST, _CX, 8, _ST_Vt), _SI) // MOVQ ST.Vt[CX], SI + self.Emit("BTQ" , _SI, _DI) // BTQ SI, DI + self.Sjmp("JC" , "_object_key") // JC _object_key + + /* check for pre-packed strings, avoid 1 allocation */ + self.Emit("TESTQ", _DX, _DX) // TESTQ DX, DX + self.Sjmp("JNZ" , "_packed_str") // JNZ _packed_str + self.Emit("MOVQ" , _R8, jit.Ptr(_SP, 0)) // MOVQ R8, (SP) + self.Emit("MOVQ" , _AX, jit.Ptr(_SP, 8)) // MOVQ AX, 8(SP) + self.call_go(_F_convTstring) // CALL_GO runtime.convTstring + self.Emit("MOVQ" , jit.Ptr(_SP, 16), _R9) // MOVQ 16(SP), R9 + + /* packed string already in R9 */ + self.Link("_packed_str") // _packed_str: + self.Emit("MOVQ", _T_string, _R8) // MOVQ _T_string, R8 + self.Emit("MOVQ", _VAR_ss_Iv, _DI) // MOVQ ss.Iv, DI + self.Emit("SUBQ", jit.Imm(1), _DI) // SUBQ $1, DI + self.Sjmp("JMP" , "_set_value") // JMP _set_value + + /* the string is an object key, get the map */ + self.Link("_object_key") + self.Emit("MOVQ", jit.Ptr(_ST, _ST_Sp), _CX) // MOVQ ST.Sp, CX + self.Emit("MOVQ", jit.Sib(_ST, _CX, 8, _ST_Vp), _SI) // MOVQ ST.Vp[CX], SI + self.Emit("MOVQ", jit.Ptr(_SI, 8), _SI) // MOVQ 8(SI), SI + + /* add a new delimiter */ + self.Emit("ADDQ", jit.Imm(1), _CX) // ADDQ $1, CX + self.Emit("CMPQ", _CX, jit.Imm(types.MAX_RECURSE)) // CMPQ CX, ${types.MAX_RECURSE} + self.Sjmp("JA" , "_stack_overflow") // JA _stack_overflow + self.Emit("MOVQ", _CX, jit.Ptr(_ST, _ST_Sp)) // MOVQ CX, ST.Sp + self.Emit("MOVQ", jit.Imm(_S_obj_delim), jit.Sib(_ST, _CX, 8, _ST_Vt)) // MOVQ _S_obj_delim, ST.Vt[CX] + + /* add a new slot int the map */ + self.Emit("MOVQ", _T_map, _DX) // MOVQ _T_map, DX + self.Emit("MOVQ", _DX, jit.Ptr(_SP, 0)) // MOVQ DX, (SP) + self.Emit("MOVQ", _SI, jit.Ptr(_SP, 8)) // MOVQ SI, 8(SP) + self.Emit("MOVQ", _R8, jit.Ptr(_SP, 16)) // MOVQ R9, 16(SP) + self.Emit("MOVQ", _AX, jit.Ptr(_SP, 24)) // MOVQ AX, 24(SP) + self.call_go(_F_mapassign_faststr) // CALL_GO runtime.mapassign_faststr + self.Emit("MOVQ", jit.Ptr(_SP, 32), _AX) // MOVQ 32(SP), AX + + /* add to the pointer stack */ + self.Emit("MOVQ", jit.Ptr(_ST, _ST_Sp), _CX) // MOVQ ST.Sp, CX + self.Emit("MOVQ", _AX, jit.Sib(_ST, _CX, 8, _ST_Vp)) // MOVQ AX, ST.Vp[CX] + self.Sjmp("JMP" , "_next") // JMP _next + + /* allocate memory to store the string header and unquoted result */ + self.Link("_unquote") // _unquote: + self.Emit("ADDQ", jit.Imm(15), _AX) // ADDQ $15, AX + self.Emit("MOVQ", _T_byte, _CX) // MOVQ _T_byte, CX + self.Emit("MOVQ", _AX, jit.Ptr(_SP, 0)) // MOVQ AX, (SP) + self.Emit("MOVQ", _CX, jit.Ptr(_SP, 8)) // MOVQ CX, 8(SP) + self.Emit("MOVB", jit.Imm(0), jit.Ptr(_SP, 16)) // MOVB $0, 16(SP) + self.call_go(_F_mallocgc) // CALL_GO runtime.mallocgc + self.Emit("MOVQ", jit.Ptr(_SP, 24), _R9) // MOVQ 24(SP), R9 + + /* prepare the unquoting parameters */ + self.Emit("MOVQ" , _VAR_ss_Iv, _CX) // MOVQ ss.Iv, CX + self.Emit("LEAQ" , jit.Sib(_IP, _CX, 1, 0), _DI) // LEAQ (IP)(CX), DI + self.Emit("NEGQ" , _CX) // NEGQ CX + self.Emit("LEAQ" , jit.Sib(_IC, _CX, 1, -1), _SI) // LEAQ -1(IC)(CX), SI + self.Emit("LEAQ" , jit.Ptr(_R9, 16), _DX) // LEAQ 16(R8), DX + self.Emit("LEAQ" , _VAR_ss_Ep, _CX) // LEAQ ss.Ep, CX + self.Emit("XORL" , _R8, _R8) // XORL R8, R8 + self.Emit("BTQ" , jit.Imm(_F_disable_urc), _VAR_df) // BTQ ${_F_disable_urc}, fv + self.Emit("SETCC", _R8) // SETCC R8 + self.Emit("SHLQ" , jit.Imm(types.B_UNICODE_REPLACE), _R8) // SHLQ ${types.B_UNICODE_REPLACE}, R8 + + /* unquote the string, with R9 been preserved */ + self.save(_R9) // SAVE R9 + self.call(_F_unquote) // CALL unquote + self.load(_R9) // LOAD R9 + + /* check for errors */ + self.Emit("TESTQ", _AX, _AX) // TESTQ AX, AX + self.Sjmp("JS" , "_unquote_error") // JS _unquote_error + self.Emit("MOVL" , jit.Imm(1), _DX) // MOVL $1, DX + self.Emit("LEAQ" , jit.Ptr(_R9, 16), _R8) // ADDQ $16, R8 + self.Emit("MOVQ" , _R8, jit.Ptr(_R9, 0)) // MOVQ R8, (R9) + self.Emit("MOVQ" , _AX, jit.Ptr(_R9, 8)) // MOVQ AX, 8(R9) + self.Sjmp("JMP" , "_noescape") // JMP _noescape + + /** V_DOUBLE **/ + self.Link("_decode_V_DOUBLE") // _decode_V_DOUBLE: + self.Emit("BTQ" , jit.Imm(_F_use_number), _VAR_df) // BTQ _F_use_number, df + self.Sjmp("JC" , "_use_number") // JC _use_number + self.Emit("MOVSD", _VAR_ss_Dv, _X0) // MOVSD ss.Dv, X0 + self.Sjmp("JMP" , "_use_float64") // JMP _use_float64 + + /** V_INTEGER **/ + self.Link("_decode_V_INTEGER") // _decode_V_INTEGER: + self.Emit("BTQ" , jit.Imm(_F_use_number), _VAR_df) // BTQ _F_use_number, df + self.Sjmp("JC" , "_use_number") // JC _use_number + self.Emit("BTQ" , jit.Imm(_F_use_int64), _VAR_df) // BTQ _F_use_int64, df + self.Sjmp("JC" , "_use_int64") // JC _use_int64 + self.Emit("MOVQ" , _VAR_ss_Iv, _AX) // MOVQ ss.Iv, AX + self.Emit("CVTSQ2SD", _AX, _X0) // CVTSQ2SD AX, X0 + + /* represent number as `float64` */ + self.Link("_use_float64") // _use_float64: + self.Emit("MOVSD", _X0, jit.Ptr(_SP, 0)) // MOVSD X0, (SP) + self.call_go(_F_convT64) // CALL_GO runtime.convT64 + self.Emit("MOVQ" , _T_float64, _R8) // MOVQ _T_float64, R8 + self.Emit("MOVQ" , jit.Ptr(_SP, 8), _R9) // MOVQ 8(SP), R9 + self.Emit("MOVQ" , _VAR_ss_Ep, _DI) // MOVQ ss.Ep, DI + self.Sjmp("JMP" , "_set_value") // JMP _set_value + + /* represent number as `json.Number` */ + self.Link("_use_number") // _use_number + self.Emit("MOVQ", _VAR_ss_Ep, _AX) // MOVQ ss.Ep, AX + self.Emit("LEAQ", jit.Sib(_IP, _AX, 1, 0), _SI) // LEAQ (IP)(AX), SI + self.Emit("MOVQ", _IC, _CX) // MOVQ IC, CX + self.Emit("SUBQ", _AX, _CX) // SUBQ AX, CX + self.Emit("MOVQ", _SI, jit.Ptr(_SP, 0)) // MOVQ SI, (SP) + self.Emit("MOVQ", _CX, jit.Ptr(_SP, 8)) // MOVQ CX, 8(SP) + self.call_go(_F_convTstring) // CALL_GO runtime.convTstring + self.Emit("MOVQ", _T_number, _R8) // MOVQ _T_number, R8 + self.Emit("MOVQ", jit.Ptr(_SP, 16), _R9) // MOVQ 16(SP), R9 + self.Emit("MOVQ", _VAR_ss_Ep, _DI) // MOVQ ss.Ep, DI + self.Sjmp("JMP" , "_set_value") // JMP _set_value + + /* represent number as `int64` */ + self.Link("_use_int64") // _use_int64: + self.Emit("MOVQ", _VAR_ss_Iv, _AX) // MOVQ ss.Iv, AX + self.Emit("MOVQ", _AX, jit.Ptr(_SP, 0)) // MOVQ AX, (SP) + self.call_go(_F_convT64) // CALL_GO runtime.convT64 + self.Emit("MOVQ", _T_int64, _R8) // MOVQ _T_int64, R8 + self.Emit("MOVQ", jit.Ptr(_SP, 8), _R9) // MOVQ 8(SP), R9 + self.Emit("MOVQ", _VAR_ss_Ep, _DI) // MOVQ ss.Ep, DI + self.Sjmp("JMP" , "_set_value") // JMP _set_value + + /** V_KEY_SEP **/ + self.Link("_decode_V_KEY_SEP") // _decode_V_KEY_SEP: + self.Emit("MOVQ", jit.Ptr(_ST, _ST_Sp), _CX) // MOVQ ST.Sp, CX + self.Emit("MOVQ", jit.Sib(_ST, _CX, 8, _ST_Vt), _AX) // MOVQ ST.Vt[CX], AX + self.Emit("CMPQ", _AX, jit.Imm(_S_obj_delim)) // CMPQ AX, _S_obj_delim + self.Sjmp("JNE" , "_invalid_char") // JNE _invalid_char + self.Emit("MOVQ", jit.Imm(_S_val), jit.Sib(_ST, _CX, 8, _ST_Vt)) // MOVQ _S_val, ST.Vt[CX] + self.Emit("MOVQ", jit.Imm(_S_obj), jit.Sib(_ST, _CX, 8, _ST_Vt - 8)) // MOVQ _S_obj, ST.Vt[CX - 1] + self.Sjmp("JMP" , "_next") // JMP _next + + /** V_ELEM_SEP **/ + self.Link("_decode_V_ELEM_SEP") // _decode_V_ELEM_SEP: + self.Emit("MOVQ" , jit.Imm(_S_obj), _AX) // MOVQ _S_obj, AX + self.Emit("MOVQ" , jit.Imm(_S_obj_x), _DX) // MOVQ _S_obj_x, DX + self.Emit("MOVQ" , jit.Ptr(_ST, _ST_Sp), _CX) // MOVQ ST.Sp, CX + self.Emit("CMPXCHGQ", _DX, jit.Sib(_ST, _CX, 8, _ST_Vt)) // CMPXCHGQ DX, ST.Vt[CX] + self.Sjmp("JZ" , "_next") // JZ _next + self.Emit("CMPQ" , _AX, jit.Imm(_S_arr)) // CMPQ _AX, _S_arr + self.Sjmp("JNE" , "_invalid_char") // JNE _invalid_char + + /* arrays */ + self.Emit("MOVQ", jit.Sib(_ST, _CX, 8, _ST_Vp), _SI) // MOVQ ST.Vp[CX], SI + self.Emit("MOVQ", jit.Ptr(_SI, 8), _SI) // MOVQ 8(SI), SI + self.Emit("MOVQ", jit.Ptr(_SI, 8), _DX) // MOVQ 8(SI), DX + self.Emit("CMPQ", _DX, jit.Ptr(_SI, 16)) // CMPQ DX, 16(SI) + self.Sjmp("JAE" , "_array_more") // JAE _array_more + + /* add a slot for the new element */ + self.Link("_array_append") // _array_append: + self.Emit("ADDQ", jit.Imm(1), jit.Ptr(_SI, 8)) // ADDQ $1, 8(SI) + self.Emit("MOVQ", jit.Ptr(_SI, 0), _SI) // MOVQ (SI), SI + self.Emit("ADDQ", jit.Imm(1), _CX) // ADDQ $1, CX + self.Emit("SHLQ", jit.Imm(1), _DX) // SHLQ $1, DX + self.Emit("LEAQ", jit.Sib(_SI, _DX, 8, 0), _SI) // LEAQ (SI)(DX*8), SI + self.Emit("MOVQ", _CX, jit.Ptr(_ST, _ST_Sp)) // MOVQ CX, ST.Sp + self.Emit("MOVQ", _SI, jit.Sib(_ST, _CX, 8, _ST_Vp)) // MOVQ SI, ST.Vp[CX] + self.Emit("MOVQ", jit.Imm(_S_val), jit.Sib(_ST, _CX, 8, _ST_Vt)) // MOVQ _S_val, ST.Vt[CX} + self.Sjmp("JMP" , "_next") // JMP _next + + /** V_ARRAY_END **/ + self.Link("_decode_V_ARRAY_END") // _decode_V_ARRAY_END: + self.Emit("XORL", _DX, _DX) // XORL DX, DX + self.Emit("MOVQ", jit.Ptr(_ST, _ST_Sp), _CX) // MOVQ ST.Sp, CX + self.Emit("MOVQ", jit.Sib(_ST, _CX, 8, _ST_Vt), _AX) // MOVQ ST.Vt[CX], AX + self.Emit("CMPQ", _AX, jit.Imm(_S_arr_0)) // CMPQ AX, _S_arr_0 + self.Sjmp("JE" , "_first_item") // JE _first_item + self.Emit("CMPQ", _AX, jit.Imm(_S_arr)) // CMPQ AX, _S_arr + self.Sjmp("JNE" , "_invalid_char") // JNE _invalid_char + self.Emit("SUBQ", jit.Imm(1), jit.Ptr(_ST, _ST_Sp)) // SUBQ $1, ST.Sp + self.Emit("MOVQ", _DX, jit.Sib(_ST, _CX, 8, _ST_Vp)) // MOVQ DX, ST.Vp[CX] + self.Sjmp("JMP" , "_next") // JMP _next + + /* first element of an array */ + self.Link("_first_item") // _first_item: + self.Emit("MOVQ", jit.Ptr(_ST, _ST_Sp), _CX) // MOVQ ST.Sp, CX + self.Emit("SUBQ", jit.Imm(2), jit.Ptr(_ST, _ST_Sp)) // SUBQ $2, ST.Sp + self.Emit("MOVQ", jit.Sib(_ST, _CX, 8, _ST_Vp - 8), _SI) // MOVQ ST.Vp[CX - 1], SI + self.Emit("MOVQ", jit.Ptr(_SI, 8), _SI) // MOVQ 8(SI), SI + self.Emit("MOVQ", _DX, jit.Sib(_ST, _CX, 8, _ST_Vp - 8)) // MOVQ DX, ST.Vp[CX - 1] + self.Emit("MOVQ", _DX, jit.Sib(_ST, _CX, 8, _ST_Vp)) // MOVQ DX, ST.Vp[CX] + self.Emit("MOVQ", _DX, jit.Ptr(_SI, 8)) // MOVQ DX, 8(SI) + self.Sjmp("JMP" , "_next") // JMP _next + + /** V_OBJECT_END **/ + self.Link("_decode_V_OBJECT_END") // _decode_V_OBJECT_END: + self.Emit("MOVQ", jit.Ptr(_ST, _ST_Sp), _CX) // MOVQ ST.Sp, CX + self.Emit("MOVQ", jit.Sib(_ST, _CX, 8, _ST_Vt), _AX) // MOVQ ST.Vt[CX], AX + self.Emit("CMPQ", _AX, jit.Imm(_S_obj)) // CMPQ AX, _S_obj + self.Sjmp("JNE" , "_invalid_char") // JNE _invalid_char + self.Emit("XORL", _AX, _AX) // XORL AX, AX + self.Emit("SUBQ", jit.Imm(1), jit.Ptr(_ST, _ST_Sp)) // SUBQ $1, ST.Sp + self.Emit("MOVQ", _AX, jit.Sib(_ST, _CX, 8, _ST_Vp)) // MOVQ AX, ST.Vp[CX] + self.Sjmp("JMP" , "_next") // JMP _next + + /* return from decoder */ + self.Link("_return") // _return: + self.Emit("XORL", _EP, _EP) // XORL EP, EP + self.Link("_epilogue") // _epilogue: + self.Emit("SUBQ", jit.Imm(_FsmOffset), _ST) // SUBQ _FsmOffset, _ST + self.Emit("MOVQ", jit.Ptr(_SP, _VD_offs), _BP) // MOVQ _VD_offs(SP), BP + self.Emit("ADDQ", jit.Imm(_VD_size), _SP) // ADDQ $_VD_size, SP + self.Emit("RET") // RET + + /* array expand */ + self.Link("_array_more") // _array_more: + self.Emit("MOVQ" , _T_eface, _AX) // MOVQ _T_eface, AX + self.Emit("MOVOU", jit.Ptr(_SI, 0), _X0) // MOVOU (SI), X0 + self.Emit("MOVQ" , jit.Ptr(_SI, 16), _DX) // MOVQ 16(SI), DX + self.Emit("MOVQ" , _AX, jit.Ptr(_SP, 0)) // MOVQ AX, (SP) + self.Emit("MOVOU", _X0, jit.Ptr(_SP, 8)) // MOVOU X0, 8(SP) + self.Emit("MOVQ" , _DX, jit.Ptr(_SP, 24)) // MOVQ DX, 24(SP) + self.Emit("SHLQ" , jit.Imm(1), _DX) // SHLQ $1, DX + self.Emit("MOVQ" , _DX, jit.Ptr(_SP, 32)) // MOVQ DX, 32(SP) + self.call_go(_F_growslice) // CALL_GO runtime.growslice + self.Emit("MOVQ" , jit.Ptr(_SP, 40), _DI) // MOVOU 40(SP), DI + self.Emit("MOVQ" , jit.Ptr(_SP, 48), _DX) // MOVOU 48(SP), DX + self.Emit("MOVQ" , jit.Ptr(_SP, 56), _AX) // MOVQ 56(SP), AX + + /* update the slice */ + self.Emit("MOVQ", jit.Ptr(_ST, _ST_Sp), _CX) // MOVQ ST.Sp, CX + self.Emit("MOVQ", jit.Sib(_ST, _CX, 8, _ST_Vp), _SI) // MOVQ ST.Vp[CX], SI + self.Emit("MOVQ", jit.Ptr(_SI, 8), _SI) // MOVQ 8(SI), SI + self.Emit("MOVQ", _DI, jit.Ptr(_SI, 0)) // MOVQ DI, (SI) + self.Emit("MOVQ", _DX, jit.Ptr(_SI, 8)) // MOVQ DX, 8(SI) + self.Emit("MOVQ", _AX, jit.Ptr(_SI, 16)) // MOVQ AX, 16(AX) + self.Sjmp("JMP" , "_array_append") // JMP _array_append + + /* error handlers */ + self.Link("_vtype_error") // _vtype_error: + self.Emit("MOVQ" , _DI, _IC) // MOVQ DI, IC + self.Emit("MOVL" , _E_invalid, _EP) // MOVL _E_invalid, EP + self.Sjmp("JMP" , "_error") // JMP _error + self.Link("_invalid_char") // _invalid_char: + self.Emit("SUBQ" , jit.Imm(1), _IC) // SUBQ $1, IC + self.Emit("MOVL" , _E_invalid, _EP) // MOVL _E_invalid, EP + self.Sjmp("JMP" , "_error") // JMP _error + self.Link("_unquote_error") // _unquote_error: + self.Emit("MOVQ" , _VAR_ss_Iv, _IC) // MOVQ ss.Iv, IC + self.Emit("SUBQ" , jit.Imm(1), _IC) // SUBQ $1, IC + self.Link("_parsing_error") // _parsing_error: + self.Emit("NEGQ" , _AX) // NEGQ AX + self.Emit("MOVQ" , _AX, _EP) // MOVQ AX, EP + self.Link("_error") // _error: + self.Emit("PXOR" , _X0, _X0) // PXOR X0, X0 + self.Emit("MOVOU", _X0, jit.Ptr(_VP, 0)) // MOVOU X0, (VP) + self.Sjmp("JMP" , "_epilogue") // JMP _epilogue + + /* invalid value type, never returns */ + self.Link("_invalid_vtype") + self.Emit("MOVQ", _AX, jit.Ptr(_SP, 0)) // MOVQ AX, (SP) + self.call(_F_invalid_vtype) // CALL invalid_type + self.Emit("UD2") // UD2 + + /* stack overflow, never returns */ + self.Link("_stack_overflow") + self.Emit("MOVQ", _CX, jit.Ptr(_SP, 0)) // MOVQ CX, (SP) + self.call(_F_stack_exceed) // CALL stack_exceed + self.Emit("UD2") // UD2 + + /* switch jump table */ + self.Link("_switch_table") // _switch_table: + self.Sref("_decode_V_EOF", 0) // SREF &_decode_V_EOF, $0 + self.Sref("_decode_V_NULL", -4) // SREF &_decode_V_NULL, $-4 + self.Sref("_decode_V_TRUE", -8) // SREF &_decode_V_TRUE, $-8 + self.Sref("_decode_V_FALSE", -12) // SREF &_decode_V_FALSE, $-12 + self.Sref("_decode_V_ARRAY", -16) // SREF &_decode_V_ARRAY, $-16 + self.Sref("_decode_V_OBJECT", -20) // SREF &_decode_V_OBJECT, $-20 + self.Sref("_decode_V_STRING", -24) // SREF &_decode_V_STRING, $-24 + self.Sref("_decode_V_DOUBLE", -28) // SREF &_decode_V_DOUBLE, $-28 + self.Sref("_decode_V_INTEGER", -32) // SREF &_decode_V_INTEGER, $-32 + self.Sref("_decode_V_KEY_SEP", -36) // SREF &_decode_V_KEY_SEP, $-36 + self.Sref("_decode_V_ELEM_SEP", -40) // SREF &_decode_V_ELEM_SEP, $-40 + self.Sref("_decode_V_ARRAY_END", -44) // SREF &_decode_V_ARRAY_END, $-44 + self.Sref("_decode_V_OBJECT_END", -48) // SREF &_decode_V_OBJECT_END, $-48 + + /* fast character lookup table */ + self.Link("_decode_tab") // _decode_tab: + self.Sref("_decode_V_EOF", 0) // SREF &_decode_V_EOF, $0 + + /* generate rest of the tabs */ + for i := 1; i < 256; i++ { + if to, ok := _R_tab[i]; ok { + self.Sref(to, -int64(i) * 4) + } else { + self.Byte(0x00, 0x00, 0x00, 0x00) + } + } +} + +/** Generic Decoder **/ + +var ( + _subr_decode_value = new(_ValueDecoder).build() +) + +//go:nosplit +func stack_exceed(n int) { + panic(fmt.Sprintf("stack size exceeds limit %d: %d", types.MAX_RECURSE, n)) +} + +//go:nosplit +func invalid_vtype(vt types.ValueType) { + throw(fmt.Sprintf("invalid value type: %d", vt)) +} diff --git a/decoder/generic_amd64_test.s b/decoder/generic_amd64_test.s new file mode 100644 index 0000000..29eed01 --- /dev/null +++ b/decoder/generic_amd64_test.s @@ -0,0 +1,35 @@ +// +// Copyright 2021 ByteDance Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include "go_asm.h" +#include "funcdata.h" +#include "textflag.h" + +TEXT ·decodeValueStub(SB), NOSPLIT, $0 - 72 + NO_LOCAL_POINTERS + PXOR X0, X0 + MOVOU X0, rv+48(FP) + MOVQ st+0(FP), BX + MOVQ sp+8(FP), R12 + MOVQ sn+16(FP), R13 + MOVQ ic+24(FP), R14 + MOVQ vp+32(FP), R15 + MOVQ df+40(FP), R10 + MOVQ ·_subr_decode_value(SB), AX + CALL AX + MOVQ R14, rp+48(FP) + MOVQ R11, ex+56(FP) + RET diff --git a/decoder/generic_test.go b/decoder/generic_test.go index ee021fc..19dbab0 100644 --- a/decoder/generic_test.go +++ b/decoder/generic_test.go @@ -22,17 +22,34 @@ import ( `testing` `github.com/bytedance/sonic/ast` + `github.com/bytedance/sonic/internal/native/types` `github.com/davecgh/go-spew/spew` + `github.com/stretchr/testify/assert` `github.com/stretchr/testify/require` ) +//go:nosplit +func decodeValueStub(_ *_Stack, _ string, _ int, _ *interface{}, _ uint64) (int, types.ParsingError) + +func decodeValue(k *_Stack, s string, i int, f uint64) (p int, v interface{}, e types.ParsingError) { + p, e = decodeValueStub(k, s, i, &v, f) + return +} + +func decodeGeneric(s string, i int, f uint64) (p int, v interface{}, e types.ParsingError) { + t := newStack() + p, e = decodeValueStub(t, s, i, &v, f) + freeStack(t) + return +} + func TestGeneric_DecodeInterface(t *testing.T) { s := `[null, true, false, 1234, -1.25e-8, "hello\nworld", [], {"asdf": [1, 2.5, "qwer", null, true, false, [], {"zxcv": "fghj"}], "qwer": 7777}]` i, v, err := decodeGeneric(s, 0, 0) + assert.Equal(t, len(s), i) if err != 0 { require.NoError(t, err) } - require.Equal(t, len(s), i) fmt.Print("v: ") spew.Dump(v) fmt.Printf("type: %s\n", reflect.TypeOf(v)) @@ -48,12 +65,14 @@ func BenchmarkGeneric_DecodeAST(b *testing.B) { } func BenchmarkGeneric_DecodeGeneric(b *testing.B) { - _, _, _ = decodeGeneric(TwitterJson, 0, 0) + t := newStack() + _, _, _ = decodeValue(t, TwitterJson, 0, 0) b.SetBytes(int64(len(TwitterJson))) b.ResetTimer() for i := 0; i < b.N; i++ { - _, _, _ = decodeGeneric(TwitterJson, 0, 0) + _, _, _ = decodeValue(t, TwitterJson, 0, 0) } + freeStack(t) } func BenchmarkGeneric_Parallel_DecodeAST(b *testing.B) { diff --git a/decoder/pools.go b/decoder/pools.go index 850c777..237c609 100644 --- a/decoder/pools.go +++ b/decoder/pools.go @@ -26,13 +26,18 @@ import ( ) const ( - _MinSlice = 16 - _MaxStack = 262144 // 256k slots - _FsmOffset = int64(unsafe.Offsetof(_Stack{}.mm)) + _MinSlice = 16 + _MaxStack = 262144 // 256k slots +) + +const ( + _PtrBytes = _PTR_SIZE / 8 + _FsmOffset = (_MaxStack + 1) * _PtrBytes ) var ( stackPool = sync.Pool{} + valueCache = []unsafe.Pointer(nil) fieldCache = []*caching.FieldMap(nil) programCache = caching.CreateProgramCache() ) @@ -41,6 +46,7 @@ type _Stack struct { sp uintptr sb [_MaxStack]unsafe.Pointer mm types.StateMachine + vp [types.MAX_RECURSE]*interface{} } type _Decoder func( @@ -63,6 +69,11 @@ func freeStack(p *_Stack) { stackPool.Put(p) } +func freezeValue(v unsafe.Pointer) uintptr { + valueCache = append(valueCache, v) + return uintptr(v) +} + func freezeFields(v *caching.FieldMap) int64 { fieldCache = append(fieldCache, v) return referenceFields(v) diff --git a/decoder/stubs.go b/decoder/stubs.go index 5358c0e..7634935 100644 --- a/decoder/stubs.go +++ b/decoder/stubs.go @@ -27,12 +27,30 @@ import ( //go:linkname _subr__b64decode github.com/chenzhuoyu/base64x._subr__b64decode var _subr__b64decode uintptr +//go:nosplit +//go:noescape +//go:linkname throw runtime.throw +//goland:noinspection GoUnusedParameter +func throw(s string) + //go:nosplit //go:noescape //go:linkname convT64 runtime.convT64 //goland:noinspection GoUnusedParameter func convT64(v uint64) unsafe.Pointer +//go:nosplit +//go:noescape +//go:linkname convTslice runtime.convTslice +//goland:noinspection GoUnusedParameter +func convTslice(v []byte) unsafe.Pointer + +//go:nosplit +//go:noescape +//go:linkname convTstring runtime.convTstring +//goland:noinspection GoUnusedParameter +func convTstring(v uint64) unsafe.Pointer + //go:nosplit //go:noescape //go:linkname memequal runtime.memequal diff --git a/decoder/utils.go b/decoder/utils.go index 2f6ef23..69587cb 100644 --- a/decoder/utils.go +++ b/decoder/utils.go @@ -22,6 +22,11 @@ import ( `github.com/bytedance/sonic/internal/loader` ) +//go:nosplit +func pbool(v bool) uintptr { + return freezeValue(unsafe.Pointer(&v)) +} + //go:nosplit func ptodec(p loader.Function) _Decoder { return *(*_Decoder)(unsafe.Pointer(&p)) diff --git a/internal/native/avx/native_export_amd64.go b/internal/native/avx/native_export_amd64.go index 8399261..325e974 100644 --- a/internal/native/avx/native_export_amd64.go +++ b/internal/native/avx/native_export_amd64.go @@ -31,6 +31,7 @@ var ( ) var ( + S_value = _subr__value S_vstring = _subr__vstring S_vnumber = _subr__vnumber S_vsigned = _subr__vsigned diff --git a/internal/native/avx2/native_export_amd64.go b/internal/native/avx2/native_export_amd64.go index 82043e0..93d200c 100644 --- a/internal/native/avx2/native_export_amd64.go +++ b/internal/native/avx2/native_export_amd64.go @@ -31,6 +31,7 @@ var ( ) var ( + S_value = _subr__value S_vstring = _subr__vstring S_vnumber = _subr__vnumber S_vsigned = _subr__vsigned diff --git a/internal/native/dispatch_amd64.go b/internal/native/dispatch_amd64.go index 238dff7..9c1724b 100644 --- a/internal/native/dispatch_amd64.go +++ b/internal/native/dispatch_amd64.go @@ -35,6 +35,7 @@ var ( ) var ( + S_value uintptr S_vstring uintptr S_vnumber uintptr S_vsigned uintptr @@ -84,6 +85,7 @@ func useAVX() { S_lquote = avx.S_lquote S_lspace = avx.S_lspace S_unquote = avx.S_unquote + S_value = avx.S_value S_vstring = avx.S_vstring S_vnumber = avx.S_vnumber S_vsigned = avx.S_vsigned @@ -100,6 +102,7 @@ func useAVX2() { S_lquote = avx2.S_lquote S_lspace = avx2.S_lspace S_unquote = avx2.S_unquote + S_value = avx2.S_value S_vstring = avx2.S_vstring S_vnumber = avx2.S_vnumber S_vsigned = avx2.S_vsigned diff --git a/internal/native/native_export_amd64.tmpl b/internal/native/native_export_amd64.tmpl index b0cd27c..c662202 100644 --- a/internal/native/native_export_amd64.tmpl +++ b/internal/native/native_export_amd64.tmpl @@ -29,6 +29,7 @@ var ( ) var ( + S_value = _subr__value S_vstring = _subr__vstring S_vnumber = _subr__vnumber S_vsigned = _subr__vsigned diff --git a/internal/native/types/types.go b/internal/native/types/types.go index f9e990c..1be28cb 100644 --- a/internal/native/types/types.go +++ b/internal/native/types/types.go @@ -38,6 +38,7 @@ const ( V_ELEM_SEP ValueType = 11 V_ARRAY_END ValueType = 12 V_OBJECT_END ValueType = 13 + V_MAX ) const (