2
0
Fork 0
mirror of https://github.com/ii64/sonic.git synced 2026-06-21 00:46:43 +08:00

feat(ast): Node supports MarshalJson() (#84)

Co-authored-by: duanyi.aster <duanyi.aster@bytedance.com>
This commit is contained in:
Yi Duan 2021-08-27 18:28:56 +08:00 committed by GitHub
parent 8c119dd72d
commit 6aec2f3722
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 383 additions and 29 deletions

View file

@ -80,28 +80,6 @@ output, err := sonic.Marshal(&data)
err := sonic.Unmarshal(input, &data)
```
### Get
Search partial json by given pathes, which must be non-negative integer or string or nil
```go
import "github.com/bytedance/sonic"
input := []byte(`{"key1":[{},{"key2":{"key3":[1,2,3]}}]}`)
// no path, returns entire json
root, err := sonic.Get(input)
raw := root.Raw() // == string(input)
// multiple pathes
root, err := sonic.Get(input, "key1", 1, "key2")
sub := root.Get("key3").Index(2).Int64() // == 3
```
Returned ast.Node supports
- secondary search: `Get()`, `Index()`, `GetByPath()`
- type assignment: `Int64()`, `Float64()`, `String()`, `Number()`, `Bool()`, `Map()`, `Array()`
- children traversal: `Values()`, `Properties()`
- supplement: `Set()`, `SetByIndex()`, `Add()`, `Cap()`, `Len()`
### Use Number/Use Int64
```go
import "github.com/bytedance/sonic/decoder"
@ -155,6 +133,44 @@ if err := dc.Decode(&data); err != nil {
}
```
### Ast.Node
#### Get
Search partial json by given pathes, which must be non-negative integer or string or nil
```go
import "github.com/bytedance/sonic"
input := []byte(`{"key1":[{},{"key2":{"key3":[1,2,3]}}]}`)
// no path, returns entire json
root, err := sonic.Get(input)
raw := root.Raw() // == string(input)
// multiple pathes
root, err := sonic.Get(input, "key1", 1, "key2")
sub := root.Get("key3").Index(2).Int64() // == 3
```
#### Serialize
To encode `ast.Node` as json, use `MarshalJson()` or `json.Marshal()` (MUST pass the node's pointer)
```go
import (
"encoding/json"
"github.com/bytedance/sonic"
)
buf, err := root.MarshalJson()
println(string(buf)) //{"key1":[{},{"key2":{"key3":[1,2,3]}}]}
exp, err := json.Marshal(&root) //WARN: use pointer
println(string(buf) == string(exp)) // true
```
#### Other features
- secondary search: `Get()`, `Index()`, `GetByPath()`
- type assignment: `Int64()`, `Float64()`, `String()`, `Number()`, `Bool()`, `Map()`, `Array()`
- children traversal: `Values()`, `Properties()`
- modification: `Set()`, `SetByIndex()`, `Add()`, `Cap()`, `Len()`
## Tips
### Pretouch

187
ast/encode.go Normal file
View file

@ -0,0 +1,187 @@
/*
* Copyright 2021 ByteDance Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ast
import (
`errors`
`sync`
)
const (
_MaxBuffer = 4 * 1024 // 4KB buffer size
)
var (
bytesNull = []byte("null")
bytesTrue = []byte("true")
bytesFalse = []byte("false")
bytesObject = []byte("{}")
bytesArray = []byte("[]")
lenNull = len("null")
lenTrue = len("true")
lenFalse = len("false")
lenObject = len("{}")
lenArray = len("[]")
)
var bytesPool = sync.Pool{}
func (self *Node) MarshalJSON() ([]byte, error) {
buf := newBuffer()
err := self.encode(buf)
ret := make([]byte, len(*buf))
copy(ret, *buf)
freeBuffer(buf)
return ret, err
}
func newBuffer() *[]byte {
if ret := bytesPool.Get(); ret != nil {
return ret.(*[]byte)
} else {
buf := make([]byte, 0, _MaxBuffer)
return &buf
}
}
func freeBuffer(buf *[]byte) {
*buf = (*buf)[:0]
bytesPool.Put(buf)
}
func (self *Node) encode(buf *[]byte) error {
if self.IsRaw() {
return self.encodeRaw(buf)
}
switch self.Type() {
case V_NONE : return errors.New("value not exist")
case V_NULL : return self.encodeNull(buf)
case V_TRUE : return self.encodeTrue(buf)
case V_FALSE : return self.encodeFalse(buf)
case V_ARRAY : return self.encodeArray(buf)
case V_OBJECT: return self.encodeObject(buf)
case V_STRING: return self.encodeString(buf)
case V_NUMBER: return self.encodeNumber(buf)
default : errors.New("unsupported type")
}
return nil
}
func (self *Node) encodeRaw(buf *[]byte) error {
raw := self.Raw()
*buf = append(*buf, raw...)
return nil
}
func (self *Node) encodeNull(buf *[]byte) error {
*buf = append(*buf, bytesNull...)
return nil
}
func (self *Node) encodeTrue(buf *[]byte) error {
*buf = append(*buf, bytesTrue...)
return nil
}
func (self *Node) encodeFalse(buf *[]byte) error {
*buf = append(*buf, bytesFalse...)
return nil
}
func (self *Node) encodeNumber(buf *[]byte) error {
str := addr2str(self.p, self.v)
*buf = append(*buf, str...)
return nil
}
func (self *Node) encodeString(buf *[]byte) error {
str := addr2str(self.p, self.v)
*buf = append(*buf, '"')
*buf = append(*buf, str...)
*buf = append(*buf, '"')
return nil
}
func (self *Node) encodeArray(buf *[]byte) error {
if self.isLazy() {
self.skipAllIndex()
}
nb := self.len()
if nb == 0 {
*buf = append(*buf, bytesArray...)
return nil
}
*buf = append(*buf, '[')
var p = (*Node)(self.p)
err := p.encode(buf)
if err != nil {
return err
}
for i := 1; i < nb; i++ {
*buf = append(*buf, ',')
p = p.unsafe_next()
err := p.encode(buf)
if err != nil {
return err
}
}
*buf = append(*buf, ']')
return nil
}
func (self *Pair) encode(buf *[]byte) error {
*buf = append(*buf, '"')
*buf = append(*buf, self.Key...)
*buf = append(*buf, '"', ':')
return self.Value.encode(buf)
}
func (self *Node) encodeObject(buf *[]byte) error {
if self.isLazy() {
self.skipAllKey()
}
nb := self.len()
if nb == 0 {
*buf = append(*buf, bytesObject...)
return nil
}
*buf = append(*buf, '{')
var p = (*Pair)(self.p)
err := p.encode(buf)
if err != nil {
return err
}
for i := 1; i < nb; i++ {
*buf = append(*buf, ',')
p = p.unsafe_next()
err := p.encode(buf)
if err != nil {
return err
}
}
*buf = append(*buf, '}')
return nil
}

141
ast/encode_test.go Normal file
View file

@ -0,0 +1,141 @@
/*
* Copyright 2021 ByteDance Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ast
import (
`encoding/json`
`testing`
`github.com/bytedance/sonic/internal/native/types`
)
func TestEncodeValue(t *testing.T) {
type Case struct {
node Node
exp string
err bool
}
input := []Case{
{NewNull(), "null", false},
{NewBool(true), "true", false},
{NewBool(false), "false", false},
{NewNumber("0.0"), "0.0", false},
{NewString(""), `""`, false},
{NewArray([]Node{}), "[]", false},
{NewArray([]Node{NewBool(true), NewString("true")}), `[true,"true"]`, false},
{NewObject([]Pair{Pair{"a", NewNull()}, Pair{"b", NewNumber("0")}}), `{"a":null,"b":0}`, false},
{NewObject([]Pair{}), `{}`, false},
{newRawNode(`[{ }]`, types.V_ARRAY), "[{}]", false},
{Node{}, "", true},
{Node{t: types.V_EOF}, "", true},
}
for i, c := range input {
buf, err := json.Marshal(&c.node)
if c.err {
if err == nil {
t.Fatal(i)
}
continue
}
if err != nil {
t.Fatal(i, err)
}
if string(buf) != c.exp {
t.Fatal(i, string(buf))
}
}
}
func TestEncodeNode(t *testing.T) {
data := `{"a":[{},[],-0.1,true,false,null,""],"b":0,"c":true,"d":false,"e":null,"g":""}`
root, e := NewSearcher(data).GetByPath()
if e != nil {
t.Fatal(root)
}
ret, err := root.MarshalJSON()
if err != nil {
t.Fatal(err)
}
if string(ret) != data {
t.Fatal(string(ret))
}
root.skipAllKey()
ret, err = root.MarshalJSON()
if err != nil {
t.Fatal(err)
}
if string(ret) != data {
t.Fatal(string(ret))
}
root.loadAllKey()
ret, err = root.MarshalJSON()
if err != nil {
t.Fatal(err)
}
if string(ret) != data {
t.Fatal(string(ret))
}
}
func BenchmarkEncodeRaw(b *testing.B) {
data := _TwitterJson
root, e := NewSearcher(data).GetByPath()
if e != nil {
b.Fatal(root)
}
b.SetBytes(int64(len(data)))
b.ResetTimer()
for i:=0; i<b.N; i++ {
_, err := root.MarshalJSON()
if err != nil {
b.Fatal(err)
}
}
}
func BenchmarkEncodeSkip(b *testing.B) {
data := _TwitterJson
root, e := NewParser(data).Parse()
if e != 0 {
b.Fatal(root)
}
b.SetBytes(int64(len(data)))
b.ResetTimer()
for i:=0; i<b.N; i++ {
_, err := root.MarshalJSON()
if err != nil {
b.Fatal(err)
}
}
}
func BenchmarkEncodeLoad(b *testing.B) {
data := _TwitterJson
root, e := NewParser(data).Parse()
if e != 0 {
b.Fatal(root)
}
root.loadAllKey()
b.SetBytes(int64(len(data)))
b.ResetTimer()
for i:=0; i<b.N; i++ {
_, err := root.MarshalJSON()
if err != nil {
b.Fatal(err)
}
}
}

View file

@ -74,11 +74,11 @@ type Node struct {
// V_OBJECT = 6
// V_STRING = 7
// V_NUMBER = 33
func (self *Node) Type() int {
func (self Node) Type() int {
return int(self.t & _MASK_LAZY & _MASK_RAW)
}
func (self *Node) itype() types.ValueType {
func (self Node) itype() types.ValueType {
return self.t & _MASK_LAZY & _MASK_RAW
}
@ -88,11 +88,11 @@ func (self *Node) Exists() bool {
}
// IsRaw returns true if node's underlying value is raw json
func (self *Node) IsRaw() bool {
func (self Node) IsRaw() bool {
return self.t&_V_RAW != 0
}
func (self *Node) isLazy() bool {
func (self Node) isLazy() bool {
return self.t&_V_LAZY != 0
}
@ -100,7 +100,7 @@ func (self *Node) isLazy() bool {
// Raw returns underlying json string of an raw node,
// which usually created by Search() api
func (self *Node) Raw() string {
func (self Node) Raw() string {
if !self.IsRaw() {
panic("value cannot be represented as raw json")
}
@ -186,7 +186,7 @@ func (self *Node) Len() int {
}
}
func (self *Node) len() int {
func (self Node) len() int {
return int(self.v & _LEN_MASK)
}
@ -200,7 +200,7 @@ func (self *Node) Cap() int {
}
}
func (self *Node) cap() int {
func (self Node) cap() int {
return int(self.v >> _CAP_BITS)
}

View file

@ -18,10 +18,15 @@ package ast
import (
`unsafe`
`reflect`
`github.com/bytedance/sonic/internal/rt`
)
var (
byteType = rt.UnpackType(reflect.TypeOf(byte(0)))
)
//go:noescape
//go:linkname memmove runtime.memmove
//goland:noinspection GoUnusedParameter
@ -30,3 +35,7 @@ func memmove(to unsafe.Pointer, from unsafe.Pointer, n uintptr)
//go:linkname unsafe_NewArray reflect.unsafe_NewArray
//goland:noinspection GoUnusedParameter
func unsafe_NewArray(typ *rt.GoType, n int) unsafe.Pointer
//go:linkname growslice runtime.growslice
//goland:noinspection GoUnusedParameter
func growslice(et *rt.GoType, old rt.GoSlice, cap int) rt.GoSlice

View file

@ -11,5 +11,6 @@ go test -benchmem -run=^$ -benchtime=100000x -bench "^(BenchmarkDecoder_Generic_
cd $pwd/ast
go test -benchmem -run=^$ -benchtime=100000x -bench "^(BenchmarkSearchOne_Gjson|BenchmarkSearchOne_Jsoniter|BenchmarkSearchOne_Sonic|BenchmarkSearchOne_Parallel_Gjson|BenchmarkSearchOne_Parallel_Jsoniter|BenchmarkSearchOne_Parallel_Sonic)$"
go test -benchmem -run=^$ -benchtime=10000x -bench "^(BenchmarkParser_StdLib|BenchmarkParser_JsonIter|BenchmarkParser_Sonic|BenchmarkParser_Parallel_StdLib|BenchmarkParser_Parallel_JsonIter|BenchmarkParser_Parallel_Sonic|BenchmarkGetOne_Gjson|BenchmarkGetOne_Jsoniter|BenchmarkGetOne_Sonic|BenchmarkGetSeven_Gjson|BenchmarkGetSeven_Jsoniter|BenchmarkGetSeven_SonicParser)$"
go test -benchmem -run=^$ -benchtime=100000x -bench '^(BenchmarkEncodeRaw|BenchmarkEncodeSkip|BenchmarkEncodeLoad)$'
cd $pwd