diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index b8d738c..7f63c66 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,10 +1,10 @@ # How to Contribute ## Your First Pull Request -We use github for our codebase. You can start by reading [How To Pull Request](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/about-pull-requests). +We use GitHub for our codebase. You can start by reading [How To Pull Request](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/about-pull-requests). ## Without Semantic Versioning -We keep the stable code in branch `main` like `golang.org/x`. Development base on branch `develop`. And we promise the **Forward Compatibility** by adding new package directory with suffix `v2/v3` when code has break changes. +We keep the stable code in branch `main` like `golang.org/x`. Development base on branch `develop`. We promise the **Forward Compatibility** by adding new package directory with suffix `v2/v3` when code has break changes. ## Branch Organization We use [git-flow](https://nvie.com/posts/a-successful-git-branching-model/) as our branch organization, as known as [FDD](https://en.wikipedia.org/wiki/Feature-driven_development) @@ -15,7 +15,7 @@ We use [git-flow](https://nvie.com/posts/a-successful-git-branching-model/) as o We are using [Github Issues](https://github.com/bytedance/sonic/issues) for our public bugs. We keep a close eye on this and try to make it clear when we have an internal fix in progress. Before filing a new task, try to make sure your problem doesn’t already exist. ### 2. Reporting New Issues -Providing a reduced test code is a recommended way for reporting issues. Then can placed in: +Providing a reduced test code is a recommended way for reporting issues. Then can be placed in: - Just in issues - [Golang Playground](https://play.golang.org/) @@ -37,7 +37,7 @@ Before you submit your Pull Request (PR) consider the following guidelines: 5. Create your patch, including appropriate test cases. 6. Follow our [Style Guides](#code-style-guides). 7. Commit your changes using a descriptive commit message that follows [AngularJS Git Commit Message Conventions](https://docs.google.com/document/d/1QrDFcIiPjSLDn3EL15IJygNPiHORgU1_OOAqWjiDU5Y/edit). - Adherence to these conventions is necessary because release notes are automatically generated from these messages. + Adherence to these conventions is necessary because release notes will be automatically generated from these messages. 8. Push your branch to GitHub: ``` git push origin bugfix/security_bug @@ -50,7 +50,7 @@ Your pr title and commit message should follow https://www.conventionalcommits.o ## Contribution Prerequisites - Our development environment keeps up with [Go Official](https://golang.org/project/). -- You need fully checking with lint tools before submit your pull request. [gofmt](https://golang.org/pkg/cmd/gofmt/) and [golangci-lint](https://github.com/golangci/golangci-lint) +- You need fully checking with lint tools before submit your pull request. [gofmt](https://golang.org/pkg/cmd/gofmt/) & [golangci-lint](https://github.com/golangci/golangci-lint) - You are familiar with [Github](https://github.com) - Maybe you need familiar with [Actions](https://github.com/features/actions)(our default workflow tool). diff --git a/decode_test.go b/decode_test.go index 374d91e..3d545ce 100644 --- a/decode_test.go +++ b/decode_test.go @@ -2124,7 +2124,7 @@ func TestPrefilled(t *testing.T) { { in: `[3]`, ptr: &[...]int{1, 2}, - out: &[...]int{3, 2}, + out: &[...]int{3, 0}, }, } diff --git a/decoder/assembler_amd64.go b/decoder/assembler_amd64.go index 105b737..9f5b21e 100644 --- a/decoder/assembler_amd64.go +++ b/decoder/assembler_amd64.go @@ -241,6 +241,8 @@ var _OpFuncTab = [256]func(*_Assembler, *_Instr) { _OP_map_key_utext : (*_Assembler)._asm_OP_map_key_utext, _OP_map_key_utext_p : (*_Assembler)._asm_OP_map_key_utext_p, _OP_array_skip : (*_Assembler)._asm_OP_array_skip, + _OP_array_clear : (*_Assembler)._asm_OP_array_clear, + _OP_array_clear_p : (*_Assembler)._asm_OP_array_clear_p, _OP_slice_init : (*_Assembler)._asm_OP_slice_init, _OP_slice_append : (*_Assembler)._asm_OP_slice_append, _OP_object_skip : (*_Assembler)._asm_OP_object_skip, @@ -266,7 +268,7 @@ func (self *_Assembler) instr(v *_Instr) { if fn := _OpFuncTab[v.op()]; fn != nil { fn(self, v) } else { - panic(fmt.Sprintf("invalid opcode: 0x%02x", v.op())) + panic(fmt.Sprintf("invalid opcode: %d", v.op())) } } @@ -674,6 +676,32 @@ func (self *_Assembler) unquote_twice(p obj.Addr, n obj.Addr) { self.Link("_noescape_{n}") // _noescape_{n}: } +/** Memory Clearing Routines **/ + +var ( + _F_memclrHasPointers = jit.Func(memclrHasPointers) + _F_memclrNoHeapPointers = jit.Func(memclrNoHeapPointers) +) + +func (self *_Assembler) mem_clear_fn(ptrfree bool) { + if !ptrfree { + self.call_go(_F_memclrHasPointers) + } else { + self.call_go(_F_memclrNoHeapPointers) + } +} + +func (self *_Assembler) mem_clear_rem(size int64, ptrfree bool) { + self.Emit("MOVQ" , jit.Imm(size), _CX) // MOVQ ${size}, CX + self.Emit("MOVQ" , jit.Ptr(_ST, 0), _AX) // MOVQ (ST), AX + self.Emit("MOVQ" , jit.Sib(_ST, _AX, 1, 0), _AX) // MOVQ (ST)(AX), AX + self.Emit("SUBQ" , _VP, _AX) // SUBQ VP, AX + self.Emit("ADDQ" , _AX, _CX) // ADDQ AX, CX + self.Emit("MOVQ" , _VP, jit.Ptr(_SP, 0)) // MOVQ VP, (SP) + self.Emit("MOVQ" , _CX, jit.Ptr(_SP, 8)) // MOVQ CX, 8(SP) + self.mem_clear_fn(ptrfree) // CALL_GO memclr{Has,NoHeap}Pointers +} + /** Map Assigning Routines **/ var ( @@ -1140,6 +1168,14 @@ func (self *_Assembler) _asm_OP_array_skip(_ *_Instr) { self.Sjmp("JS" , _LB_parsing_error_v) // JS _parse_error_v } +func (self *_Assembler) _asm_OP_array_clear(p *_Instr) { + self.mem_clear_rem(p.i64(), true) +} + +func (self *_Assembler) _asm_OP_array_clear_p(p *_Instr) { + self.mem_clear_rem(p.i64(), false) +} + func (self *_Assembler) _asm_OP_slice_init(p *_Instr) { self.Emit("XORL" , _AX, _AX) // XORL AX, AX self.Emit("MOVQ" , _AX, jit.Ptr(_VP, 8)) // MOVQ AX, 8(VP) diff --git a/decoder/assembler_test.go b/decoder/assembler_test.go index 6b1378d..634bc2a 100644 --- a/decoder/assembler_test.go +++ b/decoder/assembler_test.go @@ -54,6 +54,24 @@ func (UjsonValue) UnmarshalJSON(json []byte) error { return nil } +type UtextStruct struct { + V string +} + +func (self *UtextStruct) UnmarshalText(text []byte) error { + self.V = string(text) + return nil +} + +type UjsonStruct struct { + V string +} + +func (self *UjsonStruct) UnmarshalJSON(v []byte) error { + self.V = string(v) + return nil +} + const ( _OP_dbg_get_sr _Op = 253 _OP_dbg_set_sr _Op = 254 @@ -643,6 +661,13 @@ func TestAssembler_OpCode(t *testing.T) { } } +type JsonStruct struct { + A int + B string + C map[string]int + D []int +} + func TestAssembler_DecodeStruct(t *testing.T) { var v JsonStruct s := `{"A": 123, "B": "asdf", "C": {"qwer": 4567}, "D": [1, 2, 3, 4, 5]}` diff --git a/decoder/compiler.go b/decoder/compiler.go index cf8befb..a6db614 100644 --- a/decoder/compiler.go +++ b/decoder/compiler.go @@ -69,6 +69,8 @@ const ( _OP_map_key_utext _OP_map_key_utext_p _OP_array_skip + _OP_array_clear + _OP_array_clear_p _OP_slice_init _OP_slice_append _OP_object_skip @@ -331,7 +333,9 @@ func (self _Instr) disassemble() string { case _OP_recurse : return fmt.Sprintf("%-18s%s", self.op(), self.vt()) case _OP_goto : fallthrough case _OP_is_null : return fmt.Sprintf("%-18sL_%d", self.op(), self.vi()) - case _OP_index : return fmt.Sprintf("%-18s%d", self.op(), self.vi()) + case _OP_index : fallthrough + case _OP_array_clear : fallthrough + case _OP_array_clear_p : return fmt.Sprintf("%-18s%d", self.op(), self.vi()) case _OP_switch : return fmt.Sprintf("%-18s%s", self.op(), self.formatSwitchLabels()) case _OP_struct_field : return fmt.Sprintf("%-18s%s", self.op(), self.formatStructFields()) case _OP_match_char : return fmt.Sprintf("%-18s%s", self.op(), strconv.QuoteRune(rune(self.vb()))) @@ -677,7 +681,7 @@ func (self *_Compiler) compilePtr(p *_Program, sp int, et reflect.Type) { } func (self *_Compiler) compileArray(p *_Program, sp int, vt reflect.Type) { - i := p.pc() + x := p.pc() p.add(_OP_is_null) p.tag(sp) p.chr(_OP_match_char, '[') @@ -687,10 +691,10 @@ func (self *_Compiler) compileArray(p *_Program, sp int, vt reflect.Type) { p.chr(_OP_check_char, ']') /* decode every item */ - for n := 0; n < vt.Len(); n++ { - p.int(_OP_index, n * int(vt.Elem().Size())) + for i := 1; i <= vt.Len(); i++ { self.compileOne(p, sp + 1, vt.Elem(), self.pv) p.add(_OP_load) + p.int(_OP_index, i * int(vt.Elem().Size())) p.add(_OP_lspace) v = append(v, p.pc()) p.chr(_OP_check_char, ']') @@ -699,9 +703,21 @@ func (self *_Compiler) compileArray(p *_Program, sp int, vt reflect.Type) { /* drop rest of the array */ p.add(_OP_array_skip) + w := p.pc() + p.add(_OP_goto) p.rel(v) + + /* check for pointer data */ + if rt.UnpackType(vt.Elem()).NoPtr() { + p.int(_OP_array_clear, int(vt.Size())) + } else { + p.int(_OP_array_clear_p, int(vt.Size())) + } + + /* restore the stack */ + p.pin(w) p.add(_OP_drop) - p.pin(i) + p.pin(x) } func (self *_Compiler) compileSlice(p *_Program, sp int, et reflect.Type) { diff --git a/decoder/decoder_test.go b/decoder/decoder_test.go index 3d8cd11..01c82c0 100644 --- a/decoder/decoder_test.go +++ b/decoder/decoder_test.go @@ -26,6 +26,12 @@ import ( `github.com/stretchr/testify/require` ) +var _BindingValue TwitterStruct + +func init() { + _ = json.Unmarshal([]byte(TwitterJson), &_BindingValue) +} + func decode(s string, v interface{}) (int, error) { d := NewDecoder(s) err := d.Decode(v) diff --git a/decoder/interpreter_test.go b/decoder/interpreter_test.go deleted file mode 100644 index 5a5d42f..0000000 --- a/decoder/interpreter_test.go +++ /dev/null @@ -1,913 +0,0 @@ -/* - * Copyright 2021 ByteDance Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package decoder - -import ( - `encoding/json` - `math` - `reflect` - `sync` - `testing` - `unsafe` - - `github.com/bytedance/sonic/ast` - `github.com/bytedance/sonic/internal/native` - `github.com/bytedance/sonic/internal/rt` - `github.com/chenzhuoyu/base64x` - `github.com/json-iterator/go` - `github.com/stretchr/testify/assert` -) - -var ( - ref_stk_pool = sync.Pool{New: new_stk} - ref_prg_cache = map[reflect.Type]*_Program{} -) - -func new_stk() interface{} { - return make([]unsafe.Pointer, 0, _MaxStack) -} - -func ref_eval(prg []_Instr, s string, p unsafe.Pointer) error { - k := ref_stk_pool.Get().([]unsafe.Pointer) - _, e := ref_eval_impl(prg, s, 0, p, k) - ref_stk_pool.Put(k[:0]) - return e -} - -func ref_unquote(s string, m *[]byte, du bool) int { - var flags uint64 - if du { - flags |= native.F_DOUBLE_UNQUOTE - } - - /* unquote the string */ - pos := -1 - slv := (*rt.GoSlice)(unsafe.Pointer(m)) - str := (*rt.GoString)(unsafe.Pointer(&s)) - ret := native.Unquote(str.Ptr, str.Len, slv.Ptr, &pos, flags) - - /* check for errors */ - if ret < 0 { - return -ret - } - - /* update the length */ - slv.Len = ret - return 0 -} - -func ref_eval_impl(prg []_Instr, s string, i int, p unsafe.Pointer, st []unsafe.Pointer) (int, error) { - pc := 0 - lr := 0 - vv := native.JsonState{} - mm := native.StateMachine{} - - for pc < len(prg) { - ins := prg[pc] - pc++ - - switch ins.op() { - case _OP_any: - j, v, e := ast.Loads(s[i:]) - if e != 0 { - return 0, e - } - i += j - *(*interface{})(p) = v - - case _OP_str: - native.Vstring(&s, &i, &vv) - if vv.Vt != native.V_STRING { - return 0, native.ParsingError(-vv.Vt) - } - v := s[vv.Iv:i - 1] - if vv.Ep == -1 { - *(*string)(p) = v - continue - } - m := make([]byte, 0, len(v)) - e := ref_unquote(v, &m, false) - if e != 0 { - return 0, native.ParsingError(e) - } - *(*string)(p) = rt.Mem2Str(m) - - case _OP_bin: - native.Vstring(&s, &i, &vv) - if vv.Vt != native.V_STRING { - return 0, native.ParsingError(-vv.Vt) - } - v, e := base64x.StdEncoding.DecodeString(s[vv.Iv:i - 1]) - if e != nil { - return 0, e - } - *(*[]byte)(p) = v - - case _OP_bool: - if i + 4 <= len(s) && s[i:i + 4] == "true" { - i += 4 - *(*bool)(p) = true - } else if i + 4 <= len(s) && s[i:i + 4] == "null" { - i += 4 - *(*bool)(p) = false - } else if i + 5 <= len(s) && s[i:i + 5] == "false" { - i += 5 - *(*bool)(p) = false - } else { - return 0, native.ERR_INVALID_CHAR - } - - case _OP_num, - _OP_f32, - _OP_f64: - native.Vnumber(&s, &i, &vv) - if vv.Vt < 0 { - return 0, native.ParsingError(-vv.Vt) - } - switch ins.op() { - case _OP_num: - *(*json.Number)(p) = json.Number(s[vv.Ep:i]) - - case _OP_f32: - if vv.Dv < -math.MaxFloat32 || vv.Dv > math.MaxFloat32 { - return 0, error_value(s[vv.Ep:i], reflect.TypeOf(float32(0.0))) - } - *(*float32)(p) = float32(vv.Dv) - - case _OP_f64: - *(*float64)(p) = vv.Dv - } - - case _OP_i8, - _OP_i16, - _OP_i32, - _OP_i64: - native.Vsigned(&s, &i, &vv) - if vv.Vt < 0 { - return 0, native.ParsingError(-vv.Vt) - } - switch ins.op() { - case _OP_i8: - if vv.Iv < math.MinInt8 || vv.Iv > math.MaxInt8 { - return 0, error_value(s[vv.Ep:i], reflect.TypeOf(int8(0))) - } - *(*int8)(p) = int8(vv.Iv) - - case _OP_i16: - if vv.Iv < math.MinInt16 || vv.Iv > math.MaxInt16 { - return 0, error_value(s[vv.Ep:i], reflect.TypeOf(int16(0))) - } - *(*int16)(p) = int16(vv.Iv) - - case _OP_i32: - if vv.Iv < math.MinInt32 || vv.Iv > math.MaxInt32 { - return 0, error_value(s[vv.Ep:i], reflect.TypeOf(int32(0))) - } - *(*int32)(p) = int32(vv.Iv) - - case _OP_i64: - *(*int64)(p) = vv.Iv - } - - case _OP_u8, - _OP_u16, - _OP_u32, - _OP_u64: - native.Vunsigned(&s, &i, &vv) - if vv.Vt < 0 { - return 0, native.ParsingError(-vv.Vt) - } - switch ins.op() { - case _OP_u8: - if vv.Iv < 0 || vv.Iv > math.MaxUint8 { - return 0, error_value(s[vv.Ep:i], reflect.TypeOf(uint8(0))) - } - *(*uint8)(p) = uint8(vv.Iv) - - case _OP_u16: - if vv.Iv < 0 || vv.Iv > math.MaxUint16 { - return 0, error_value(s[vv.Ep:i], reflect.TypeOf(uint16(0))) - } - *(*uint16)(p) = uint16(vv.Iv) - - case _OP_u32: - if vv.Iv < 0 || vv.Iv > math.MaxUint32 { - return 0, error_value(s[vv.Ep:i], reflect.TypeOf(uint32(0))) - } - *(*uint32)(p) = uint32(vv.Iv) - - case _OP_u64: - *(*uint64)(p) = uint64(vv.Iv) - } - - case _OP_unquote: - if i + 2 > len(s) { - return 0, native.ERR_EOF - } - if s[i] != '\\' || s[i + 1] != '"' { - return 0, native.ERR_INVALID_CHAR - } - i += 2 - native.Vstring(&s, &i, &vv) - if vv.Vt != native.V_STRING { - return 0, native.ParsingError(-vv.Vt) - } - if vv.Ep == -1 { - return 0, native.ERR_EOF - } - v := s[vv.Iv:i - 3] - if vv.Ep == i - 3 { - *(*string)(p) = v - continue - } - m := make([]byte, 0, len(v)) - e := ref_unquote(v, &m, true) - if e != 0 { - return 0, native.ParsingError(e) - } - *(*string)(p) = rt.Mem2Str(m) - - case _OP_nil_1: - *(*[1]uintptr)(p) = [1]uintptr{} - - case _OP_nil_2: - *(*[2]uintptr)(p) = [2]uintptr{} - - case _OP_nil_3: - *(*[3]uintptr)(p) = [3]uintptr{} - - case _OP_deref: - v := (*unsafe.Pointer)(p) - if *v == nil { - t := rt.UnpackType(ins.vt()) - *v = mallocgc(uintptr(t.Size()), t, true) - } - p = *v - - case _OP_index: - p = unsafe.Pointer(uintptr(p) + uintptr(ins.vi())) - - case _OP_is_null: - if i + 4 <= len(s) && s[i:i + 4] == "null" { - i += 4 - pc = ins.vi() - } - - case _OP_map_init: - v := (*unsafe.Pointer)(p) - if *v == nil { - *v = makemap_small() - } - p = *v - - case _OP_map_key_i8, - _OP_map_key_i16, - _OP_map_key_i32, - _OP_map_key_i64: - native.Vsigned(&s, &i, &vv) - if vv.Vt < 0 { - return 0, native.ParsingError(-vv.Vt) - } - mt := rt.UnpackType(ins.vt()) - switch ins.op() { - case _OP_map_key_i8: - if vv.Iv < math.MinInt8 || vv.Iv > math.MaxInt8 { - return 0, error_value(s[vv.Ep:i], reflect.TypeOf(int8(0))) - } - p = mapassign(mt, p, unsafe.Pointer(&vv.Iv)) - - case _OP_map_key_i16: - if vv.Iv < math.MinInt16 || vv.Iv > math.MaxInt16 { - return 0, error_value(s[vv.Ep:i], reflect.TypeOf(int16(0))) - } - p = mapassign(mt, p, unsafe.Pointer(&vv.Iv)) - - case _OP_map_key_i32: - if vv.Iv < math.MinInt32 || vv.Iv > math.MaxInt32 { - return 0, error_value(s[vv.Ep:i], reflect.TypeOf(int32(0))) - } - p = mapassign_fast32(mt, p, uint32(vv.Iv)) - - case _OP_map_key_i64: - p = mapassign_fast64(mt, p, uint64(vv.Iv)) - } - - case _OP_map_key_u8, - _OP_map_key_u16, - _OP_map_key_u32, - _OP_map_key_u64: - native.Vunsigned(&s, &i, &vv) - if vv.Vt < 0 { - return 0, native.ParsingError(-vv.Vt) - } - mt := rt.UnpackType(ins.vt()) - switch ins.op() { - case _OP_map_key_u8: - if vv.Iv < 0 || vv.Iv > math.MaxUint8 { - return 0, error_value(s[vv.Ep:i], reflect.TypeOf(uint8(0))) - } - p = mapassign(mt, p, unsafe.Pointer(&vv.Iv)) - - case _OP_map_key_u16: - if vv.Iv < 0 || vv.Iv > math.MaxUint16 { - return 0, error_value(s[vv.Ep:i], reflect.TypeOf(uint16(0))) - } - p = mapassign(mt, p, unsafe.Pointer(&vv.Iv)) - - case _OP_map_key_u32: - if vv.Iv < 0 || vv.Iv > math.MaxUint32 { - return 0, error_value(s[vv.Ep:i], reflect.TypeOf(uint32(0))) - } - p = mapassign_fast32(mt, p, uint32(vv.Iv)) - - case _OP_map_key_u64: - p = mapassign_fast64(mt, p, uint64(vv.Iv)) - } - - case _OP_map_key_f32, - _OP_map_key_f64: - native.Vnumber(&s, &i, &vv) - if vv.Vt < 0 { - return 0, native.ParsingError(-vv.Vt) - } - mt := rt.UnpackType(ins.vt()) - switch ins.op() { - case _OP_map_key_f32: - if vv.Dv < -math.MaxFloat32 || vv.Dv > math.MaxFloat32 { - return 0, error_value(s[vv.Ep:i], reflect.TypeOf(float32(0))) - } - x := float32(vv.Dv) - p = mapassign(mt, p, unsafe.Pointer(&x)) - - case _OP_map_key_f64: - p = mapassign(mt, p, unsafe.Pointer(&vv.Dv)) - } - - case _OP_map_key_str: - native.Vstring(&s, &i, &vv) - if vv.Vt != native.V_STRING { - return 0, native.ParsingError(-vv.Vt) - } - v := s[vv.Iv:i - 1] - if vv.Ep != -1 { - m := make([]byte, 0, len(v)) - e := ref_unquote(v, &m, false) - if e != 0 { - return 0, native.ParsingError(e) - } - v = rt.Mem2Str(m) - } - p = mapassign_faststr(rt.UnpackType(ins.vt()), p, v) - - case _OP_map_key_utext, - _OP_map_key_utext_p: - native.Vstring(&s, &i, &vv) - if vv.Vt != native.V_STRING { - return 0, native.ParsingError(-vv.Vt) - } - v := s[vv.Iv:i - 1] - if vv.Ep != -1 { - m := make([]byte, 0, len(v)) - e := ref_unquote(v, &m, false) - if e != 0 { - return 0, native.ParsingError(e) - } - v = rt.Mem2Str(m) - } - kk := ins.vt().Key() - pk := rt.UnpackType(kk) - kt := pk - fn := mapassign - if ins.op() == _OP_map_key_utext_p { - pk = rt.UnpackType(reflect.PtrTo(kk)) - } - if kk.Kind() == reflect.Ptr { - kt = rt.UnpackType(kk.Elem()) - fn = mapassign_fast64ptr - } - kp := mallocgc(uintptr(kt.Size()), kt, true) - if err := decodeTextUnmarshaler(rt.GoEface{Type: pk, Value: kp}.Pack(), v); err != nil { - return 0, err - } - p = fn(rt.UnpackType(ins.vt()), p, kp) - - case _OP_array_skip: - native.SkipArray(&s, &i, &mm) - if i < 0 { - return 0, native.ParsingError(-i) - } - - case _OP_slice_init: - v := (*rt.GoSlice)(p) - v.Len = 0 - if v.Ptr == nil { - v.Cap = 16 - v.Ptr = makeslice(rt.UnpackType(ins.vt()), 0, v.Cap) - } - - case _OP_slice_append: - sl := (*rt.GoSlice)(p) - if sl.Len >= sl.Cap { - *sl = growslice(rt.UnpackType(ins.vt()), *sl, sl.Cap * 2) - } - p = unsafe.Pointer(uintptr(sl.Ptr) + uintptr(sl.Len) * ins.vt().Size()) - sl.Len++ - - case _OP_object_skip: - native.SkipObject(&s, &i, &mm) - if i < 0 { - return 0, native.ParsingError(-i) - } - - case _OP_object_next: - q := native.SkipOne(&s, &i, &mm) - if q < 0 { - return 0, native.ParsingError(-q) - } - - case _OP_struct_field: - native.Vstring(&s, &i, &vv) - if vv.Vt != native.V_STRING { - return 0, native.ParsingError(-vv.Vt) - } - v := s[vv.Iv:i - 1] - if vv.Ep != -1 { - m := make([]byte, 0, len(v)) - e := ref_unquote(v, &m, false) - if e != 0 { - return 0, native.ParsingError(e) - } - v = rt.Mem2Str(m) - } - lr = ins.vf().Get(v) - if lr == -1 { - lr = ins.vf().GetCaseInsensitive(v) - } - - case _OP_unmarshal, - _OP_unmarshal_p: - q := native.SkipOne(&s, &i, &mm) - if q < 0 { - return 0, native.ParsingError(-q) - } - v := s[q:i] - kk := ins.vt() - vp := p - if kk.Kind() == reflect.Ptr { - kp := (*unsafe.Pointer)(p) - if *kp == nil { - kt := rt.UnpackType(kk.Elem()) - *kp = mallocgc(uintptr(kt.Size()), kt, true) - } - vp = *kp - } - if ins.op() == _OP_unmarshal_p { - kk = reflect.PtrTo(kk) - } - if err := decodeJsonUnmarshaler(rt.GoEface{Type: rt.UnpackType(kk), Value: vp}.Pack(), v); err != nil { - return 0, err - } - - case _OP_unmarshal_text, - _OP_unmarshal_text_p: - native.Vstring(&s, &i, &vv) - if vv.Vt != native.V_STRING { - return 0, native.ParsingError(-vv.Vt) - } - v := s[vv.Iv:i - 1] - if vv.Ep != -1 { - m := make([]byte, 0, len(v)) - e := ref_unquote(v, &m, false) - if e != 0 { - return 0, native.ParsingError(e) - } - v = rt.Mem2Str(m) - } - kk := ins.vt() - vp := p - if kk.Kind() == reflect.Ptr { - kp := (*unsafe.Pointer)(p) - if *kp == nil { - kt := rt.UnpackType(kk.Elem()) - *kp = mallocgc(uintptr(kt.Size()), kt, true) - } - vp = *kp - } - if ins.op() == _OP_unmarshal_text_p { - kk = reflect.PtrTo(kk) - } - if err := decodeTextUnmarshaler(rt.GoEface{Type: rt.UnpackType(kk), Value: vp}.Pack(), v); err != nil { - return 0, err - } - - case _OP_lspace: - sv := (*rt.GoString)(unsafe.Pointer(&s)) - if i = native.Lspace(sv.Ptr, sv.Len, i); i >= len(s) { - return 0, native.ERR_EOF - } - if i < 0 { - return 0, native.ParsingError(-i) - } - - case _OP_match_char: - if i == len(s) { - return 0, native.ERR_EOF - } - if s[i] != ins.vb() { - return 0, native.ERR_INVALID_CHAR - } - i++ - - case _OP_check_char: - if i == len(s) { - return 0, native.ERR_EOF - } - if s[i] == ins.vb() { - i++ - pc = ins.vi() - } - - case _OP_load: - p = st[len(st) - 1] - - case _OP_save: - st = append(st, p) - - case _OP_drop: - p = st[len(st) - 1] - st = st[:len(st) - 1] - - case _OP_drop_2: - p = st[len(st) - 2] - st = st[:len(st) - 2] - - case _OP_recurse: - var err error - np, ok := ref_prg_cache[ins.vt()] - if !ok { - np, err = newCompiler().compile(ins.vt()) - if err != nil { - return 0, err - } - ref_prg_cache[ins.vt()] = np - } - if i, err = ref_eval_impl(np.ins, s, i, p, st); err != nil { - return 0, err - } - - case _OP_goto: - pc = ins.vi() - - case _OP_switch: - if lr >= 0 && lr < len(ins.vs()) { - pc = ins.vs()[lr] - } - - default: - panic("invalid opcode: " + ins.op().String()) - } - } - - return i, nil -} - -func TestInterpreter_OpCodes_any(t *testing.T) { - var v interface{} - e := ref_eval([]_Instr{newInsOp(_OP_any)}, `{"a": [1.0, 2, -3]}`, unsafe.Pointer(&v)) - if e != nil { - panic(e) - } - assert.Equal(t, map[string]interface{}{"a": []interface{}{1.0, int64(2), int64(-3)}}, v) -} - -func TestInterpreter_OpCodes_str(t *testing.T) { - s := "" - e := ref_eval([]_Instr{newInsOp(_OP_str)}, `hello, world"`, unsafe.Pointer(&s)) - if e != nil { - panic(e) - } - assert.Equal(t, "hello, world", s) - s = "" - e = ref_eval([]_Instr{newInsOp(_OP_str)}, `hello, world \\ \/ \b \f \n \r \t \u666f 测试中文 \ud83d\ude00"`, unsafe.Pointer(&s)) - if e != nil { - panic(e) - } - assert.Equal(t, "hello, world \\ / \b \f \n \r \t 景 测试中文 😀", s) -} - -func TestInterpreter_OpCodes_bin(t *testing.T) { - s := []byte(nil) - e := ref_eval([]_Instr{newInsOp(_OP_bin)}, `aGVsbG8sIHdvcmxk"`, unsafe.Pointer(&s)) - if e != nil { - panic(e) - } - assert.Equal(t, []byte("hello, world"), s) -} - -func TestInterpreter_OpCodes_bool_s(t *testing.T) { - v := false - e := ref_eval([]_Instr{newInsOp(_OP_bool)}, `true`, unsafe.Pointer(&v)) - if e != nil { - panic(e) - } - assert.True(t, v) -} - -func TestInterpreter_OpCodes_num_s(t *testing.T) {{ - v := json.Number("") - e := ref_eval([]_Instr{newInsOp(_OP_num)}, `12345`, unsafe.Pointer(&v)) - if e != nil { - panic(e) - } - assert.Equal(t, json.Number("12345"), v) -}; { - v := int8(0) - e := ref_eval([]_Instr{newInsOp(_OP_i8)}, `123`, unsafe.Pointer(&v)) - if e != nil { - panic(e) - } - assert.Equal(t, int8(123), v) - v = 0 - e = ref_eval([]_Instr{newInsOp(_OP_i8)}, `-123`, unsafe.Pointer(&v)) - if e != nil { - panic(e) - } -}; { - v := uint64(0) - e := ref_eval([]_Instr{newInsOp(_OP_u64)}, `1234567890123`, unsafe.Pointer(&v)) - if e != nil { - panic(e) - } -}} - -func TestInterpreter_OpCodes_unquote(t *testing.T) { - v := "" - e := ref_eval([]_Instr{newInsOp(_OP_unquote)}, `\"hello\\b\\f\\n\\r\\tworld\""`, unsafe.Pointer(&v)) - if e != nil { - panic(e) - } - assert.Equal(t, "hello\b\f\n\r\tworld", v) -} - -func TestInterpreter_OpCodes_map(t *testing.T) { - s := (map[string]string)(nil) - p, e := newCompiler().compile(reflect.TypeOf(s)) - if e != nil { - panic(e) - } - e = ref_eval(p.ins, `{"asdf":"qwer","zxcv":"fdgh"}`, unsafe.Pointer(&s)) - if e != nil { - panic(e) - } - assert.Equal(t, map[string]string{"asdf": "qwer", "zxcv": "fdgh"}, s) -} - -func TestInterpreter_OpCodes_map_i64(t *testing.T) { - s := (map[int64]string)(nil) - p, e := newCompiler().compile(reflect.TypeOf(s)) - if e != nil { - panic(e) - } - e = ref_eval(p.ins, `{"1234":"qwer","-2345":"fdgh"}`, unsafe.Pointer(&s)) - if e != nil { - panic(e) - } - assert.Equal(t, map[int64]string{1234: "qwer", -2345: "fdgh"}, s) -} - -type UtextStruct struct { - V string -} - -func (self *UtextStruct) UnmarshalText(text []byte) error { - self.V = string(text) - return nil -} - -func TestInterpreter_OpCodes_map_utext(t *testing.T) { - s := (map[*UtextStruct]string)(nil) - p, e := newCompiler().compile(reflect.TypeOf(s)) - if e != nil { - panic(e) - } - e = ref_eval(p.ins, `{"asdf":"qwer","zxcv":"fdgh"}`, unsafe.Pointer(&s)) - if e != nil { - panic(e) - } - assert.Equal(t, 2, len(s)) - m := map[string]string{} - for k, v := range s { - m[k.V] = v - } - assert.Equal(t, map[string]string{"asdf": "qwer", "zxcv": "fdgh"}, m) -} - -func TestInterpreter_OpCodes_map_utext_p(t *testing.T) { - s := map[UtextStruct]string{} - p, e := newCompiler().compile(reflect.TypeOf(s)) - if e != nil { - panic(e) - } - e = ref_eval(p.ins, `{"asdf":"qwer","zxcv":"fdgh"}`, unsafe.Pointer(&s)) - if e != nil { - panic(e) - } - assert.Equal(t, 2, len(s)) - m := map[string]string{} - for k, v := range s { - m[k.V] = v - } - assert.Equal(t, map[string]string{"asdf": "qwer", "zxcv": "fdgh"}, m) -} - -func TestInterpreter_OpCodes_array(t *testing.T) { - s := [3]uint64{} - p, e := newCompiler().compile(reflect.TypeOf(s)) - if e != nil { - panic(e) - } - e = ref_eval(p.ins, `[1, 2, 3, 4, 5]`, unsafe.Pointer(&s)) - if e != nil { - panic(e) - } - assert.Equal(t, [3]uint64{1, 2, 3}, s) -} - -func TestInterpreter_OpCodes_slice(t *testing.T) { - s := []uint64(nil) - p, e := newCompiler().compile(reflect.TypeOf(s)) - if e != nil { - panic(e) - } - e = ref_eval(p.ins, `[1, 2, 3, 4, 5]`, unsafe.Pointer(&s)) - if e != nil { - panic(e) - } - assert.Equal(t, []uint64{1, 2, 3, 4, 5}, s) -} - -type JsonStruct struct { - A int - B string - C map[string]int - D []int -} - -func TestInterpreter_OpCodes_struct(t *testing.T) { - s := JsonStruct{} - p, e := newCompiler().compile(reflect.TypeOf(s)) - if e != nil { - panic(e) - } - e = ref_eval(p.ins, `{"A": 123, "B": "asdf", "C": {"qwer": 4567}, "D": [1, 2, 3, 4, 5]}`, unsafe.Pointer(&s)) - if e != nil { - panic(e) - } - assert.Equal(t, JsonStruct{ - A: 123, - B: "asdf", - C: map[string]int{"qwer": 4567}, - D: []int{1, 2, 3, 4, 5}, - }, s) -} - -type UjsonStruct struct { - V string -} - -func (self *UjsonStruct) UnmarshalJSON(v []byte) error { - self.V = string(v) - return nil -} - -func TestInterpreter_OpCodes_ujson(t *testing.T) { - s := (*UjsonStruct)(nil) - p, e := newCompiler().compile(reflect.TypeOf(s)) - if e != nil { - panic(e) - } - e = ref_eval(p.ins, `{"test": "foo"}`, unsafe.Pointer(&s)) - if e != nil { - panic(e) - } - assert.Equal(t, "{\"test\": \"foo\"}", s.V) -} - -func TestInterpreter_OpCodes_utext(t *testing.T) { - s := (*UtextStruct)(nil) - p, e := newCompiler().compile(reflect.TypeOf(s)) - if e != nil { - panic(e) - } - e = ref_eval(p.ins, `"hello, world"`, unsafe.Pointer(&s)) - if e != nil { - panic(e) - } - assert.Equal(t, "hello, world", s.V) -} - -var _BindingValue TwitterStruct - -type StringTag struct { - BoolStr bool `json:",string"` - IntStr int64 `json:",string"` - UintptrStr uintptr `json:",string"` - StrStr string `json:",string"` - NumberStr json.Number `json:",string"` -} - -func init() { - _ = json.Unmarshal([]byte(TwitterJson), &_BindingValue) -} - -func TestInterpreter_ParseJson(t *testing.T) { - var v TwitterStruct - prg, err := newCompiler().compile(reflect.TypeOf(v)) - if err != nil { - panic(err) - } - err = ref_eval(prg.ins, TwitterJson, unsafe.Pointer(&v)) - if err != nil { - panic(err) - } - assert.Equal(t, _BindingValue, v) -} - -func TestInterpreter_ParseStringize(t *testing.T) { - var v StringTag - prg, err := newCompiler().compile(reflect.TypeOf(v)) - if err != nil { - panic(err) - } - s := `{ - "BoolStr": "true", - "IntStr": "42", - "NumberStr": "46", - "StrStr": "\"xzbit\"", - "UintptrStr": "44" - }` - err = ref_eval(prg.ins, s, unsafe.Pointer(&v)) - if err != nil { - panic(err) - } - assert.Equal(t, StringTag{ - BoolStr: true, - IntStr: 42, - UintptrStr: 44, - StrStr: "xzbit", - NumberStr: "46", - }, v) -} - -func BenchmarkInterpreter_ParseJson_Sonic(b *testing.B) { - var v TwitterStruct - prg, err := newCompiler().compile(reflect.TypeOf(v)) - if err != nil { - panic(err) - } - b.SetBytes(int64(len(TwitterJson))) - b.ResetTimer() - b.RunParallel(func(pb *testing.PB) { - for pb.Next() { - _ = ref_eval(prg.ins, TwitterJson, unsafe.Pointer(&v)) - } - }) -} - -func BenchmarkInterpreter_ParseJson_JsonIter(b *testing.B) { - var v TwitterStruct - s := []byte(TwitterJson) - b.SetBytes(int64(len(TwitterJson))) - b.ResetTimer() - b.RunParallel(func(pb *testing.PB) { - for pb.Next() { - _ = jsoniter.Unmarshal(s, &v) - } - }) -} - -func BenchmarkInterpreter_ParseJson_StdLib(b *testing.B) { - var v TwitterStruct - s := []byte(TwitterJson) - b.SetBytes(int64(len(TwitterJson))) - b.ResetTimer() - b.RunParallel(func(pb *testing.PB) { - for pb.Next() { - _ = json.Unmarshal(s, &v) - } - }) -} diff --git a/decoder/stubs.go b/decoder/stubs.go index 7bff3a1..4160f24 100644 --- a/decoder/stubs.go +++ b/decoder/stubs.go @@ -109,3 +109,15 @@ func mapassign_fast64ptr(t *rt.GoType, h unsafe.Pointer, k unsafe.Pointer) unsaf //go:linkname mapassign_faststr runtime.mapassign_faststr //goland:noinspection GoUnusedParameter func mapassign_faststr(t *rt.GoType, h unsafe.Pointer, s string) unsafe.Pointer + +//go:nosplit +//go:noescape +//go:linkname memclrHasPointers runtime.memclrHasPointers +//goland:noinspection GoUnusedParameter +func memclrHasPointers(ptr unsafe.Pointer, n uintptr) + +//go:nosplit +//go:noescape +//go:linkname memclrNoHeapPointers runtime.memclrNoHeapPointers +//goland:noinspection GoUnusedParameter +func memclrNoHeapPointers(ptr unsafe.Pointer, n uintptr) \ No newline at end of file diff --git a/encoder/assembler_amd64.go b/encoder/assembler_amd64.go index 9f69aa3..3f188da 100644 --- a/encoder/assembler_amd64.go +++ b/encoder/assembler_amd64.go @@ -251,7 +251,7 @@ func (self *_Assembler) instr(v *_Instr) { if fn := _OpFuncTab[v.op()]; fn != nil { fn(self, v) } else { - panic(fmt.Sprintf("invalid opcode: 0x%02x", v.op())) + panic(fmt.Sprintf("invalid opcode: %d", v.op())) } } diff --git a/internal/native/native.go b/internal/native/native.go index 9db57d3..4ea8561 100644 --- a/internal/native/native.go +++ b/internal/native/native.go @@ -139,34 +139,10 @@ func Value(s unsafe.Pointer, n int, p int, v *JsonState) int { return __value(s, n, p, v) } -func Vstring(s *string, p *int, v *JsonState) { - __vstring(s, p, v) -} - -func Vnumber(s *string, p *int, v *JsonState) { - __vnumber(s, p, v) -} - -func Vsigned(s *string, p *int, v *JsonState) { - __vsigned(s, p, v) -} - -func Vunsigned(s *string, p *int, v *JsonState) { - __vunsigned(s, p, v) -} - func SkipOne(s *string, p *int, m *StateMachine) int { return __skip_one(s, p, m) } -func SkipArray(s *string, p *int, m *StateMachine) int { - return __skip_array(s, p, m) -} - -func SkipObject(s *string, p *int, m *StateMachine) int { - return __skip_object(s, p, m) -} - func Unquote(s unsafe.Pointer, nb int, dp unsafe.Pointer, ep *int, flags uint64) int { return __unquote(s, nb, dp, ep, flags) } diff --git a/internal/rt/fastvalue.go b/internal/rt/fastvalue.go index d88de44..10f41c9 100644 --- a/internal/rt/fastvalue.go +++ b/internal/rt/fastvalue.go @@ -60,6 +60,10 @@ func (self *GoType) Pack() (t reflect.Type) { return } +func (self *GoType) NoPtr() bool { + return self.ptrd == 0 +} + func (self *GoType) Indir() bool { return (self.kflags & _DirectIface) == 0 } diff --git a/issue7_test.go b/issue7_test.go new file mode 100644 index 0000000..a92cb21 --- /dev/null +++ b/issue7_test.go @@ -0,0 +1,30 @@ +/* + * Copyright 2021 ByteDance Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package sonic + +import ( + `testing` + + `github.com/stretchr/testify/require` +) + +func TestIssue7(t *testing.T) { + v := &[...]int{1, 2, 3, 4, 5, 6, 7} + err := Unmarshal([]byte(`[3]`), v) + require.Nil(t, err) + require.Equal(t, &[...]int{3, 0, 0, 0, 0, 0, 0}, v) +} diff --git a/search_test.go b/search_test.go index 250946f..c9f9eb8 100644 --- a/search_test.go +++ b/search_test.go @@ -124,8 +124,7 @@ func TestRandomData(t *testing.T) { return } lstr = string(b[:n]) - //fmt.Printf("i: %d, lstr: \n%v \n", i, hex.Dump(b[:n])) - ast.NewParser(lstr).Parse() + _, _ = ast.NewParser(lstr).Parse() } } @@ -142,10 +141,9 @@ func TestRandomValidStrings(t *testing.T) { t.Fatal("marshal data failed:",err) } var su string - if err := json.Unmarshal([]byte(sm), &su); err != nil { + if err := json.Unmarshal(sm, &su); err != nil { t.Fatal("unmarshal data failed:",err) } - //fmt.Printf("i: %d, su: %v, lstr: \n%v \n", i, su, hex.Dump(b[:n])) token, err := GetFromString(`{"str":`+string(sm)+`}`, "str") if err != nil { t.Fatal("search data failed:",err)