From 6aec2f3722a4a16a5f3325e341274947e3a81f4b Mon Sep 17 00:00:00 2001 From: Yi Duan Date: Fri, 27 Aug 2021 18:28:56 +0800 Subject: [PATCH] feat(ast): Node supports MarshalJson() (#84) Co-authored-by: duanyi.aster --- README.md | 60 +++++++++------ ast/encode.go | 187 +++++++++++++++++++++++++++++++++++++++++++++ ast/encode_test.go | 141 ++++++++++++++++++++++++++++++++++ ast/node.go | 14 ++-- ast/stubs.go | 9 +++ bench.sh | 1 + 6 files changed, 383 insertions(+), 29 deletions(-) create mode 100644 ast/encode.go create mode 100644 ast/encode_test.go diff --git a/README.md b/README.md index dfb9aef..6d15cb0 100644 --- a/README.md +++ b/README.md @@ -80,28 +80,6 @@ output, err := sonic.Marshal(&data) err := sonic.Unmarshal(input, &data) ``` -### Get - -Search partial json by given pathes, which must be non-negative integer or string or nil -```go -import "github.com/bytedance/sonic" - -input := []byte(`{"key1":[{},{"key2":{"key3":[1,2,3]}}]}`) - -// no path, returns entire json -root, err := sonic.Get(input) -raw := root.Raw() // == string(input) - -// multiple pathes -root, err := sonic.Get(input, "key1", 1, "key2") -sub := root.Get("key3").Index(2).Int64() // == 3 -``` -Returned ast.Node supports: -- secondary search: `Get()`, `Index()`, `GetByPath()` -- type assignment: `Int64()`, `Float64()`, `String()`, `Number()`, `Bool()`, `Map()`, `Array()` -- children traversal: `Values()`, `Properties()` -- supplement: `Set()`, `SetByIndex()`, `Add()`, `Cap()`, `Len()` - ### Use Number/Use Int64 ```go import "github.com/bytedance/sonic/decoder" @@ -155,6 +133,44 @@ if err := dc.Decode(&data); err != nil { } ``` +### Ast.Node + +#### Get +Search partial json by given pathes, which must be non-negative integer or string or nil +```go +import "github.com/bytedance/sonic" + +input := []byte(`{"key1":[{},{"key2":{"key3":[1,2,3]}}]}`) + +// no path, returns entire json +root, err := sonic.Get(input) +raw := root.Raw() // == string(input) + +// multiple pathes +root, err := sonic.Get(input, "key1", 1, "key2") +sub := root.Get("key3").Index(2).Int64() // == 3 +``` + +#### Serialize +To encode `ast.Node` as json, use `MarshalJson()` or `json.Marshal()` (MUST pass the node's pointer) +```go +import ( + "encoding/json" + "github.com/bytedance/sonic" +) + +buf, err := root.MarshalJson() +println(string(buf)) //{"key1":[{},{"key2":{"key3":[1,2,3]}}]} +exp, err := json.Marshal(&root) //WARN: use pointer +println(string(buf) == string(exp)) // true +``` + +#### Other features +- secondary search: `Get()`, `Index()`, `GetByPath()` +- type assignment: `Int64()`, `Float64()`, `String()`, `Number()`, `Bool()`, `Map()`, `Array()` +- children traversal: `Values()`, `Properties()` +- modification: `Set()`, `SetByIndex()`, `Add()`, `Cap()`, `Len()` + ## Tips ### Pretouch diff --git a/ast/encode.go b/ast/encode.go new file mode 100644 index 0000000..21c548f --- /dev/null +++ b/ast/encode.go @@ -0,0 +1,187 @@ +/* + * Copyright 2021 ByteDance Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package ast + +import ( + `errors` + `sync` +) + +const ( + _MaxBuffer = 4 * 1024 // 4KB buffer size +) + +var ( + bytesNull = []byte("null") + bytesTrue = []byte("true") + bytesFalse = []byte("false") + bytesObject = []byte("{}") + bytesArray = []byte("[]") + + lenNull = len("null") + lenTrue = len("true") + lenFalse = len("false") + lenObject = len("{}") + lenArray = len("[]") +) + +var bytesPool = sync.Pool{} + +func (self *Node) MarshalJSON() ([]byte, error) { + buf := newBuffer() + err := self.encode(buf) + ret := make([]byte, len(*buf)) + copy(ret, *buf) + freeBuffer(buf) + return ret, err +} + +func newBuffer() *[]byte { + if ret := bytesPool.Get(); ret != nil { + return ret.(*[]byte) + } else { + buf := make([]byte, 0, _MaxBuffer) + return &buf + } +} + +func freeBuffer(buf *[]byte) { + *buf = (*buf)[:0] + bytesPool.Put(buf) +} + +func (self *Node) encode(buf *[]byte) error { + if self.IsRaw() { + return self.encodeRaw(buf) + } + switch self.Type() { + case V_NONE : return errors.New("value not exist") + case V_NULL : return self.encodeNull(buf) + case V_TRUE : return self.encodeTrue(buf) + case V_FALSE : return self.encodeFalse(buf) + case V_ARRAY : return self.encodeArray(buf) + case V_OBJECT: return self.encodeObject(buf) + case V_STRING: return self.encodeString(buf) + case V_NUMBER: return self.encodeNumber(buf) + default : errors.New("unsupported type") + } + return nil +} + +func (self *Node) encodeRaw(buf *[]byte) error { + raw := self.Raw() + *buf = append(*buf, raw...) + return nil +} + +func (self *Node) encodeNull(buf *[]byte) error { + *buf = append(*buf, bytesNull...) + return nil +} + +func (self *Node) encodeTrue(buf *[]byte) error { + *buf = append(*buf, bytesTrue...) + return nil +} + +func (self *Node) encodeFalse(buf *[]byte) error { + *buf = append(*buf, bytesFalse...) + return nil +} + +func (self *Node) encodeNumber(buf *[]byte) error { + str := addr2str(self.p, self.v) + *buf = append(*buf, str...) + return nil +} + +func (self *Node) encodeString(buf *[]byte) error { + str := addr2str(self.p, self.v) + *buf = append(*buf, '"') + *buf = append(*buf, str...) + *buf = append(*buf, '"') + return nil +} + +func (self *Node) encodeArray(buf *[]byte) error { + if self.isLazy() { + self.skipAllIndex() + } + + nb := self.len() + if nb == 0 { + *buf = append(*buf, bytesArray...) + return nil + } + + *buf = append(*buf, '[') + + var p = (*Node)(self.p) + err := p.encode(buf) + if err != nil { + return err + } + for i := 1; i < nb; i++ { + *buf = append(*buf, ',') + p = p.unsafe_next() + err := p.encode(buf) + if err != nil { + return err + } + } + + *buf = append(*buf, ']') + return nil +} + +func (self *Pair) encode(buf *[]byte) error { + *buf = append(*buf, '"') + *buf = append(*buf, self.Key...) + *buf = append(*buf, '"', ':') + return self.Value.encode(buf) +} + +func (self *Node) encodeObject(buf *[]byte) error { + if self.isLazy() { + self.skipAllKey() + } + + nb := self.len() + if nb == 0 { + *buf = append(*buf, bytesObject...) + return nil + } + + *buf = append(*buf, '{') + + var p = (*Pair)(self.p) + err := p.encode(buf) + if err != nil { + return err + } + for i := 1; i < nb; i++ { + *buf = append(*buf, ',') + p = p.unsafe_next() + err := p.encode(buf) + if err != nil { + return err + } + } + + *buf = append(*buf, '}') + return nil +} \ No newline at end of file diff --git a/ast/encode_test.go b/ast/encode_test.go new file mode 100644 index 0000000..533d2de --- /dev/null +++ b/ast/encode_test.go @@ -0,0 +1,141 @@ +/* + * Copyright 2021 ByteDance Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package ast + +import ( + `encoding/json` + `testing` + + `github.com/bytedance/sonic/internal/native/types` +) + +func TestEncodeValue(t *testing.T) { + type Case struct { + node Node + exp string + err bool + } + input := []Case{ + {NewNull(), "null", false}, + {NewBool(true), "true", false}, + {NewBool(false), "false", false}, + {NewNumber("0.0"), "0.0", false}, + {NewString(""), `""`, false}, + {NewArray([]Node{}), "[]", false}, + {NewArray([]Node{NewBool(true), NewString("true")}), `[true,"true"]`, false}, + {NewObject([]Pair{Pair{"a", NewNull()}, Pair{"b", NewNumber("0")}}), `{"a":null,"b":0}`, false}, + {NewObject([]Pair{}), `{}`, false}, + {newRawNode(`[{ }]`, types.V_ARRAY), "[{}]", false}, + {Node{}, "", true}, + {Node{t: types.V_EOF}, "", true}, + } + for i, c := range input { + buf, err := json.Marshal(&c.node) + if c.err { + if err == nil { + t.Fatal(i) + } + continue + } + if err != nil { + t.Fatal(i, err) + } + if string(buf) != c.exp { + t.Fatal(i, string(buf)) + } + } +} + +func TestEncodeNode(t *testing.T) { + data := `{"a":[{},[],-0.1,true,false,null,""],"b":0,"c":true,"d":false,"e":null,"g":""}` + root, e := NewSearcher(data).GetByPath() + if e != nil { + t.Fatal(root) + } + ret, err := root.MarshalJSON() + if err != nil { + t.Fatal(err) + } + if string(ret) != data { + t.Fatal(string(ret)) + } + root.skipAllKey() + ret, err = root.MarshalJSON() + if err != nil { + t.Fatal(err) + } + if string(ret) != data { + t.Fatal(string(ret)) + } + root.loadAllKey() + ret, err = root.MarshalJSON() + if err != nil { + t.Fatal(err) + } + if string(ret) != data { + t.Fatal(string(ret)) + } +} + +func BenchmarkEncodeRaw(b *testing.B) { + data := _TwitterJson + root, e := NewSearcher(data).GetByPath() + if e != nil { + b.Fatal(root) + } + b.SetBytes(int64(len(data))) + b.ResetTimer() + for i:=0; i> _CAP_BITS) } diff --git a/ast/stubs.go b/ast/stubs.go index 891829a..5e933c5 100644 --- a/ast/stubs.go +++ b/ast/stubs.go @@ -18,10 +18,15 @@ package ast import ( `unsafe` + `reflect` `github.com/bytedance/sonic/internal/rt` ) +var ( + byteType = rt.UnpackType(reflect.TypeOf(byte(0))) +) + //go:noescape //go:linkname memmove runtime.memmove //goland:noinspection GoUnusedParameter @@ -30,3 +35,7 @@ func memmove(to unsafe.Pointer, from unsafe.Pointer, n uintptr) //go:linkname unsafe_NewArray reflect.unsafe_NewArray //goland:noinspection GoUnusedParameter func unsafe_NewArray(typ *rt.GoType, n int) unsafe.Pointer + +//go:linkname growslice runtime.growslice +//goland:noinspection GoUnusedParameter +func growslice(et *rt.GoType, old rt.GoSlice, cap int) rt.GoSlice \ No newline at end of file diff --git a/bench.sh b/bench.sh index 7957f6d..00eb2e9 100644 --- a/bench.sh +++ b/bench.sh @@ -11,5 +11,6 @@ go test -benchmem -run=^$ -benchtime=100000x -bench "^(BenchmarkDecoder_Generic_ cd $pwd/ast go test -benchmem -run=^$ -benchtime=100000x -bench "^(BenchmarkSearchOne_Gjson|BenchmarkSearchOne_Jsoniter|BenchmarkSearchOne_Sonic|BenchmarkSearchOne_Parallel_Gjson|BenchmarkSearchOne_Parallel_Jsoniter|BenchmarkSearchOne_Parallel_Sonic)$" go test -benchmem -run=^$ -benchtime=10000x -bench "^(BenchmarkParser_StdLib|BenchmarkParser_JsonIter|BenchmarkParser_Sonic|BenchmarkParser_Parallel_StdLib|BenchmarkParser_Parallel_JsonIter|BenchmarkParser_Parallel_Sonic|BenchmarkGetOne_Gjson|BenchmarkGetOne_Jsoniter|BenchmarkGetOne_Sonic|BenchmarkGetSeven_Gjson|BenchmarkGetSeven_Jsoniter|BenchmarkGetSeven_SonicParser)$" +go test -benchmem -run=^$ -benchtime=100000x -bench '^(BenchmarkEncodeRaw|BenchmarkEncodeSkip|BenchmarkEncodeLoad)$' cd $pwd \ No newline at end of file