From fa5f5026d9a4b8b49a18d731fecc113add41dde4 Mon Sep 17 00:00:00 2001 From: Yi Duan Date: Fri, 3 Sep 2021 20:05:52 +0800 Subject: [PATCH] opt(ast): speed up api (#85) - inline findkey() - use std append() instead of runtime.memmove() Co-authored-by: duanyi.aster --- ast/node.go | 130 +++++++++++--------- ast/node_test.go | 301 +++++++++++++++++++++++++++++++++++++++++------ bench.sh | 2 + 3 files changed, 341 insertions(+), 92 deletions(-) diff --git a/ast/node.go b/ast/node.go index 02ff3fb..67440d2 100644 --- a/ast/node.go +++ b/ast/node.go @@ -267,8 +267,11 @@ func (self Node) cap() int { // Set sets the node of given key under object parent // If the key doesn't exist, it will be append to the last func (self *Node) Set(key string, node Node) (bool, error) { - p := self.Get(key) + if err := node.Check(); err != nil { + return false, err + } + p := self.Get(key) if !p.Exists() { l := self.len() c := self.cap() @@ -311,6 +314,10 @@ func (self *Node) Unset(key string) (bool, error) { // // The index must within parent array's children func (self *Node) SetByIndex(index int, node Node) (bool, error) { + if err := node.Check(); err != nil { + return false, err + } + p := self.Index(index) if !p.Exists() { return false, ErrNotExist @@ -359,19 +366,16 @@ func (self *Node) Add(node Node) error { return err } - l := self.len() - c := self.cap() - if l == c { - // TODO: maybe change append_extra_size in future - c += _DEFAULT_NODE_CAP - mem := unsafe_NewArray(_NODE_TYPE, c) - memmove(mem, self.p, _NODE_SIZE * uintptr(l)) - self.p = mem - } + var p rt.GoSlice + p.Cap = self.cap() + p.Len = self.len() + p.Ptr = self.p - v := self.nodeAt(l) - *v = node - self.setCapAndLen(c, l+1) + s := *(*[]Node)(unsafe.Pointer(&p)) + s = append(s, node) + + self.p = unsafe.Pointer(&s[0]) + self.setCapAndLen(cap(s), len(s)) return nil } @@ -410,8 +414,8 @@ func (self *Node) Get(key string) *Node { return n } -// Index loads given index of an node on demands, -// node type can be either V_OBJECT or V_ARRAY +// Index indexies node at given idx, +// node type CAN be either V_OBJECT or V_ARRAY func (self *Node) Index(idx int) *Node { if err := self.checkRaw(); err != nil { return unwrapError(err) @@ -433,6 +437,30 @@ func (self *Node) Index(idx int) *Node { } } +// IndexPair indexies pair at given idx, +// node type MUST be either V_OBJECT +func (self *Node) IndexPair(idx int) *Pair { + if err := self.should(types.V_OBJECT, "an object"); err != nil { + return nil + } + return self.skipIndexPair(idx) +} + +// IndexOrGet firstly use idx to index a value and check if its key matches +// If not, then use the key to search value +func (self *Node) IndexOrGet(idx int, key string) *Node { + if err := self.should(types.V_OBJECT, "an object"); err != nil { + return unwrapError(err) + } + + pr := self.skipIndexPair(idx) + if pr != nil && pr.Key == key { + return &pr.Value + } + n, _ := self.skipKey(key) + return n +} + // Values returns iterator for array's children traversal func (self *Node) Values() (ListIterator, error) { if err := self.should(types.V_ARRAY, "an array"); err != nil { @@ -630,7 +658,7 @@ func (self *Node) InterfaceUseNode() (interface{}, error) { } return self.toGenericArrayUseNode() case _V_OBJECT_LAZY : - if err := self.loadAllKey(); err != nil { + if err := self.skipAllKey(); err != nil { return nil, err } return self.toGenericObjectUseNode() @@ -701,34 +729,6 @@ func (self *Node) pairAt(i int) *Pair { return (*Pair)(unsafe.Pointer(uintptr(p) + uintptr(i)*_PAIR_SIZE)) } -func (self *Node) findKey(key string) (*Node, int) { - nb := self.len() - if nb <= 0 { - return nil, -1 - } - - var p *Pair - if !self.isLazy() { - p = (*Pair)(self.p) - } else { - s := (*parseObjectStack)(self.p) - p = &s.v[0] - } - - if p.Key == key { - return &p.Value, 0 - } - for i := 1; i < nb; i++ { - p = p.unsafe_next() - if p.Key == key { - return &p.Value, i - } - } - - /* not found */ - return nil, -1 -} - func (self *Node) getParserAndArrayStack() (*Parser, *parseArrayStack) { stack := (*parseArrayStack)(self.p) ret := (*rt.GoSlice)(unsafe.Pointer(&stack.v)) @@ -917,26 +917,46 @@ func (self *Node) skipNextPair() (*Pair) { } func (self *Node) skipKey(key string) (*Node, int) { - node, pos := self.findKey(key) - if node != nil { - return node, pos + nb := self.len() + lazy := self.isLazy() + + if nb > 0 { + /* linear search */ + var p *Pair + if lazy { + s := (*parseObjectStack)(self.p) + p = &s.v[0] + } else { + p = (*Pair)(self.p) + } + + if p.Key == key { + return &p.Value, 0 + } + for i := 1; i < nb; i++ { + p = p.unsafe_next() + if p.Key == key { + return &p.Value, i + } + } } - if !self.isLazy() { - return &Node{}, -1 + + /* not found */ + if !lazy { + return nil, -1 } // lazy load - var i = self.len() - for last := self.skipNextPair(); last != nil; last = self.skipNextPair() { + for last, i := self.skipNextPair(), nb; last != nil; last, i = self.skipNextPair(), i+1 { if last.Value.Check() != nil { return &last.Value, -1 } if last.Key == key { return &last.Value, i } - i++ } - return &Node{}, -1 + + return nil, -1 } func (self *Node) skipIndex(index int) *Node { @@ -946,7 +966,7 @@ func (self *Node) skipIndex(index int) *Node { return v } if !self.isLazy() { - return &Node{} + return nil } // lazy load @@ -959,7 +979,7 @@ func (self *Node) skipIndex(index int) *Node { } } - return &Node{} + return nil } func (self *Node) skipIndexPair(index int) *Pair { diff --git a/ast/node_test.go b/ast/node_test.go index 574f593..0eac58f 100644 --- a/ast/node_test.go +++ b/ast/node_test.go @@ -30,6 +30,38 @@ import ( var parallelism = 4 +func TestIndexPair(t *testing.T) { + root, _ := NewParser(`{"a":1,"b":2}`).Parse() + a := root.IndexPair(0) + if a == nil || a.Key != "a" { + t.Fatal(a) + } + b := root.IndexPair(1) + if b == nil || b.Key != "b" { + t.Fatal(b) + } + c := root.IndexPair(2) + if c != nil { + t.Fatal(c) + } +} + +func TestIndexOrGet(t *testing.T) { + root, _ := NewParser(`{"a":1,"b":2}`).Parse() + a := root.IndexOrGet(0, "a") + if v, err := a.Int64(); err != nil || v != int64(1) { + t.Fatal(a) + } + a = root.IndexOrGet(0, "b") + if v, err := a.Int64(); err != nil || v != int64(2) { + t.Fatal(a) + } + a = root.IndexOrGet(0, "c") + if a.Valid() { + t.Fatal(a) + } +} + func TestTypeCast(t *testing.T) { type tcase struct { method string @@ -634,21 +666,6 @@ func TestNodeAdd(t *testing.T) { } } -func BenchmarkNodeRaw(b *testing.B) { - root, derr := NewSearcher(_TwitterJson).GetByPath("search_metadata") - if derr != nil { - b.Fatalf("decode failed: %v", derr.Error()) - } - b.SetParallelism(parallelism) - b.ResetTimer() - - b.RunParallel(func(pb *testing.PB) { - for pb.Next() { - root.Raw() - } - }) -} - func BenchmarkNodeGetByPath(b *testing.B) { root, derr := NewParser(_TwitterJson).Parse() if derr != 0 { @@ -682,7 +699,7 @@ func BenchmarkStructGetByPath_Jsoniter(b *testing.B) { }) } -func BenchmarkNodeGet(b *testing.B) { +func BenchmarkNodeIndex(b *testing.B) { root, derr := NewParser(_TwitterJson).Parse() if derr != 0 { b.Fatalf("decode failed: %v", derr.Error()) @@ -695,7 +712,73 @@ func BenchmarkNodeGet(b *testing.B) { node.Set("test5", NewNumber("5")) b.ResetTimer() for i := 0; i < b.N; i++ { - node.Get("text") + node.Index(2) + } +} + +func BenchmarkStructIndex(b *testing.B) { + type T struct { + A Node + B Node + C Node + D Node + E Node + } + var obj = new(T) + b.ResetTimer() + for i := 0; i < b.N; i++ { + _ = obj.C + } +} + +func BenchmarkSliceIndex(b *testing.B) { + var obj = []Node{Node{},Node{},Node{},Node{},Node{}} + b.ResetTimer() + for i := 0; i < b.N; i++ { + _ = obj[2] + } +} + +func BenchmarkMapIndex(b *testing.B) { + var obj = map[string]interface{}{"test1":Node{}, "test2":Node{}, "test3":Node{}, "test4":Node{}, "test5":Node{}} + b.ResetTimer() + for i := 0; i < b.N; i++ { + for k := range obj { + if k == "test3" { + break + } + } + } +} + +func BenchmarkNodeGet(b *testing.B) { + var N = 5 + var half = "test" + strconv.Itoa(N/2+1) + root, derr := NewParser(_TwitterJson).Parse() + if derr != 0 { + b.Fatalf("decode failed: %v", derr.Error()) + } + node := root.Get("statuses").Index(3).Get("entities").Get("hashtags").Index(0) + for i:=0; i