diff --git a/.github/workflows/push-check-go115.yml b/.github/workflows/push-check-go115.yml index fcd8d90..d4502b8 100644 --- a/.github/workflows/push-check-go115.yml +++ b/.github/workflows/push-check-go115.yml @@ -21,4 +21,4 @@ jobs: ${{ runner.os }}-go- - name: Unit Test - run: GOMAXPROCS=4 go test -v -covermode=atomic -coverprofile=coverage.out ./... + run: GOMAXPROCS=4 go test -v -gcflags=-d=checkptr=0 -covermode=atomic -coverprofile=coverage.out ./... diff --git a/.github/workflows/push-check-go116.yml b/.github/workflows/push-check-go116.yml index fdfa57f..a962f08 100644 --- a/.github/workflows/push-check-go116.yml +++ b/.github/workflows/push-check-go116.yml @@ -21,4 +21,4 @@ jobs: ${{ runner.os }}-go- - name: Unit Test - run: GOMAXPROCS=4 go test -v -race -covermode=atomic -coverprofile=coverage.out ./... + run: GOMAXPROCS=4 go test -v -gcflags=-d=checkptr=0 -race -covermode=atomic -coverprofile=coverage.out ./... diff --git a/.github/workflows/push-check-go117.yml b/.github/workflows/push-check-go117.yml index 54a73f3..0ff0782 100644 --- a/.github/workflows/push-check-go117.yml +++ b/.github/workflows/push-check-go117.yml @@ -21,4 +21,4 @@ jobs: ${{ runner.os }}-go- - name: Unit Test - run: GOMAXPROCS=4 go test -v -race -covermode=atomic -coverprofile=coverage.out ./... + run: GOMAXPROCS=4 go test -v -gcflags=-d=checkptr=0 -race -covermode=atomic -coverprofile=coverage.out ./... diff --git a/ast/iterator.go b/ast/iterator.go index 3c29fb8..d43ef4e 100644 --- a/ast/iterator.go +++ b/ast/iterator.go @@ -17,6 +17,8 @@ package ast import ( + `fmt` + `github.com/bytedance/sonic/internal/native/types` ) @@ -97,3 +99,62 @@ func (self *ObjectIterator) Next(p *Pair) bool { return true } } + +// Sequence represents scanning path of single-layer nodes. +// Index indicates the value's order in both V_ARRAY and V_OBJECT json. +// Key is the value's key (for V_OBJECT json only, otherwise it will be nil). +type Sequence struct { + Index int + Key *string + // Level int +} + +// String is string representation of one Sequence +func (s Sequence) String() string { + k := "" + if s.Key != nil { + k = *s.Key + } + return fmt.Sprintf("Sequence(%d, %q)", s.Index, k) +} + +type Scanner func(path Sequence, node *Node) bool + +// ForEach scans one V_OBJECT node's children from JSON head to tail, +// and pass the Sequence and Node of corresponding JSON value. +// +// Especailly, if the node is not V_ARRAY or V_OBJECT, +// the node itself will be returned and Sequence.Index == -1. +func (self *Node) ForEach(sc Scanner) error { + switch self.itype() { + case types.V_ARRAY: + ns, err := self.UnsafeArray() + if err != nil { + return err + } + for i := range ns { + if !sc(Sequence{i, nil}, &ns[i]) { + return err + } + } + case types.V_OBJECT: + ns, err := self.UnsafeMap() + if err != nil { + return err + } + for i := range ns { + if !sc(Sequence{i, &ns[i].Key}, &ns[i].Value) { + return err + } + } + default: + sc(Sequence{-1, nil}, self) + } + return self.Check() +} + +type PairSlice []Pair + +func (self PairSlice) Sort() { + radixQsort(self, 0, maxDepth(len(self))) +} \ No newline at end of file diff --git a/ast/iterator_test.go b/ast/iterator_test.go index 9ed1056..c65095c 100644 --- a/ast/iterator_test.go +++ b/ast/iterator_test.go @@ -20,13 +20,14 @@ import ( `fmt` `strconv` `testing` + + `github.com/stretchr/testify/assert` ) -func getTestIteratorSample() (string, int) { +func getTestIteratorSample(loop int) (string, int) { var data []int var v1 = "" var v2 = "" - loop := _DEFAULT_NODE_CAP+1 for i:=0;i= ns { - return newSyntaxError(parser.syntaxError(types.ERR_EOF)) - } - - /* check for empty array */ - if parser.s[parser.p] == ']' { - parser.p++ - self.setArray(ret) - return nil - } - - var val Node - /* skip the value */ - if start, err := parser.skip(); err != 0 { - return newSyntaxError(parser.syntaxError(err)) - } else { - t := switchRawType(parser.s[start]) - if t == _V_NONE { - return newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR)) - } - val = newRawNode(parser.s[start:parser.p], t) - } - - /* add the value to result */ - ret = append(ret, val) - parser.p = parser.lspace(parser.p) - - /* check for EOF */ - if parser.p >= ns { - return newSyntaxError(parser.syntaxError(types.ERR_EOF)) - } - - /* check for the next character */ - switch parser.s[parser.p] { - case ',': - parser.p++ - self.setLazyArray(parser, ret) - return &ret[len(ret)-1] - case ']': - parser.p++ - self.setArray(ret) - return &ret[len(ret)-1] - default: - return newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR)) - } -} - -func (self *Node) skipNextPair() (*Pair) { - if !self.isLazy() { - return nil - } - - parser, stack := self.getParserAndObjectStack() - ret := stack.v - sp := parser.p - ns := len(parser.s) - - /* check for EOF */ - if parser.p = parser.lspace(sp); parser.p >= ns { - return &Pair{"", *newSyntaxError(parser.syntaxError(types.ERR_EOF))} - } - - /* check for empty object */ - if parser.s[parser.p] == '}' { - parser.p++ - self.setObject(ret) - return nil - } - - /* decode one pair */ - var val Node - var njs types.JsonState - var err types.ParsingError - - /* decode the key */ - if njs = parser.decodeValue(); njs.Vt != types.V_STRING { - return &Pair{"", *newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR))} - } - - /* extract the key */ - idx := parser.p - 1 - key := parser.s[njs.Iv:idx] - - /* check for escape sequence */ - if njs.Ep != -1 { - if key, err = unquote.String(key); err != 0 { - return &Pair{key, *newSyntaxError(parser.syntaxError(err))} - } - } - - /* expect a ':' delimiter */ - if err = parser.delim(); err != 0 { - return &Pair{key, *newSyntaxError(parser.syntaxError(err))} - } - - /* skip the value */ - if start, err := parser.skip(); err != 0 { - return &Pair{key, *newSyntaxError(parser.syntaxError(err))} - } else { - t := switchRawType(parser.s[start]) - if t == _V_NONE { - return &Pair{key, *newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR))} - } - val = newRawNode(parser.s[start:parser.p], t) - } - - /* add the value to result */ - ret = append(ret, Pair{Key: key, Value: val}) - parser.p = parser.lspace(parser.p) - - /* check for EOF */ - if parser.p >= ns { - return &Pair{key, *newSyntaxError(parser.syntaxError(types.ERR_EOF))} - } - - /* check for the next character */ - switch parser.s[parser.p] { - case ',': - parser.p++ - self.setLazyObject(parser, ret) - return &ret[len(ret)-1] - case '}': - parser.p++ - self.setObject(ret) - return &ret[len(ret)-1] - default: - return &Pair{key, *newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR))} - } -} - func (self *Node) skipKey(key string) (*Node, int) { nb := self.len() lazy := self.isLazy() diff --git a/ast/node_test.go b/ast/node_test.go index 2ddcede..1e8ad32 100644 --- a/ast/node_test.go +++ b/ast/node_test.go @@ -25,11 +25,66 @@ import ( `strconv` `testing` + `github.com/bytedance/sonic/encoder` `github.com/bytedance/sonic/internal/native/types` `github.com/bytedance/sonic/internal/rt` `github.com/stretchr/testify/assert` ) + +func TestNodeSortKeys(t *testing.T) { + root, err := NewSearcher(_TwitterJson).GetByPath() + if err != nil { + t.Fatal(err) + } + obj, err := root.MapUseNumber() + if err != nil { + t.Fatal(err) + } + exp, err := encoder.Encode(obj, encoder.SortMapKeys) + if err != nil { + t.Fatal(err) + } + if err := root.SortKeys(true); err != nil { + t.Fatal(err) + } + act, err := root.MarshalJSON() + if err != nil { + t.Fatal(err) + } + assert.Equal(t, len(exp), len(act)) + assert.Equal(t, string(exp), string(act)) +} + +func BenchmarkNodeSortKeys(b *testing.B) { + root, err := NewSearcher(_TwitterJson).GetByPath() + if err != nil { + b.Fatal(err) + } + if err := root.LoadAll(); err != nil { + b.Fatal(err) + } + + b.Run("single", func(b *testing.B) { + r := root.Get("statuses") + if r.Check() != nil { + b.Fatal(r.Error()) + } + b.SetBytes(int64(len(_TwitterJson))) + b.ResetTimer() + for i:=0; i= ns { + return types.ERR_EOF + } + + /* check for empty object */ + if self.s[self.p] == '}' { + self.p++ + return _ERR_NOT_FOUND + } + + var njs types.JsonState + var err types.ParsingError + /* decode each pair */ + for { + + /* decode the key */ + if njs = self.decodeValue(); njs.Vt != types.V_STRING { + return types.ERR_INVALID_CHAR + } + + /* extract the key */ + idx := self.p - 1 + key := self.s[njs.Iv:idx] + + /* check for escape sequence */ + if njs.Ep != -1 { + if key, err = unquote.String(key); err != 0 { + return err + } + } + + /* expect a ':' delimiter */ + if err = self.delim(); err != 0 { + return err + } + + /* skip value */ + if key != match { + if _, err = self.skip(); err != 0 { + return err + } + } else { + return 0 + } + + /* check for EOF */ + self.p = self.lspace(self.p) + if self.p >= ns { + return types.ERR_EOF + } + + /* check for the next character */ + switch self.s[self.p] { + case ',': + self.p++ + case '}': + self.p++ + return _ERR_NOT_FOUND + default: + return types.ERR_INVALID_CHAR + } + } +} + +func (self *Parser) searchIndex(idx int) types.ParsingError { + ns := len(self.s) + if err := self.array(); err != 0 { + return err + } + + /* check for EOF */ + if self.p = self.lspace(self.p); self.p >= ns { + return types.ERR_EOF + } + + /* check for empty array */ + if self.s[self.p] == ']' { + self.p++ + return _ERR_NOT_FOUND + } + + var err types.ParsingError + /* allocate array space and parse every element */ + for i := 0; i < idx; i++ { + + /* decode the value */ + if _, err = self.skip(); err != 0 { + return err + } + + /* check for EOF */ + self.p = self.lspace(self.p) + if self.p >= ns { + return types.ERR_EOF + } + + /* check for the next character */ + switch self.s[self.p] { + case ',': + self.p++ + case ']': + self.p++ + return _ERR_NOT_FOUND + default: + return types.ERR_INVALID_CHAR + } + } + + return 0 +} + +func (self *Node) skipNextNode() *Node { + if !self.isLazy() { + return nil + } + + parser, stack := self.getParserAndArrayStack() + ret := stack.v + sp := parser.p + ns := len(parser.s) + + /* check for EOF */ + if parser.p = parser.lspace(sp); parser.p >= ns { + return newSyntaxError(parser.syntaxError(types.ERR_EOF)) + } + + /* check for empty array */ + if parser.s[parser.p] == ']' { + parser.p++ + self.setArray(ret) + return nil + } + + var val Node + /* skip the value */ + if start, err := parser.skip(); err != 0 { + return newSyntaxError(parser.syntaxError(err)) + } else { + t := switchRawType(parser.s[start]) + if t == _V_NONE { + return newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR)) + } + val = newRawNode(parser.s[start:parser.p], t) + } + + /* add the value to result */ + ret = append(ret, val) + parser.p = parser.lspace(parser.p) + + /* check for EOF */ + if parser.p >= ns { + return newSyntaxError(parser.syntaxError(types.ERR_EOF)) + } + + /* check for the next character */ + switch parser.s[parser.p] { + case ',': + parser.p++ + self.setLazyArray(parser, ret) + return &ret[len(ret)-1] + case ']': + parser.p++ + self.setArray(ret) + return &ret[len(ret)-1] + default: + return newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR)) + } +} + +func (self *Node) skipNextPair() (*Pair) { + if !self.isLazy() { + return nil + } + + parser, stack := self.getParserAndObjectStack() + ret := stack.v + sp := parser.p + ns := len(parser.s) + + /* check for EOF */ + if parser.p = parser.lspace(sp); parser.p >= ns { + return &Pair{"", *newSyntaxError(parser.syntaxError(types.ERR_EOF))} + } + + /* check for empty object */ + if parser.s[parser.p] == '}' { + parser.p++ + self.setObject(ret) + return nil + } + + /* decode one pair */ + var val Node + var njs types.JsonState + var err types.ParsingError + + /* decode the key */ + if njs = parser.decodeValue(); njs.Vt != types.V_STRING { + return &Pair{"", *newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR))} + } + + /* extract the key */ + idx := parser.p - 1 + key := parser.s[njs.Iv:idx] + + /* check for escape sequence */ + if njs.Ep != -1 { + if key, err = unquote.String(key); err != 0 { + return &Pair{key, *newSyntaxError(parser.syntaxError(err))} + } + } + + /* expect a ':' delimiter */ + if err = parser.delim(); err != 0 { + return &Pair{key, *newSyntaxError(parser.syntaxError(err))} + } + + /* skip the value */ + if start, err := parser.skip(); err != 0 { + return &Pair{key, *newSyntaxError(parser.syntaxError(err))} + } else { + t := switchRawType(parser.s[start]) + if t == _V_NONE { + return &Pair{key, *newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR))} + } + val = newRawNode(parser.s[start:parser.p], t) + } + + /* add the value to result */ + ret = append(ret, Pair{Key: key, Value: val}) + parser.p = parser.lspace(parser.p) + + /* check for EOF */ + if parser.p >= ns { + return &Pair{key, *newSyntaxError(parser.syntaxError(types.ERR_EOF))} + } + + /* check for the next character */ + switch parser.s[parser.p] { + case ',': + parser.p++ + self.setLazyObject(parser, ret) + return &ret[len(ret)-1] + case '}': + parser.p++ + self.setObject(ret) + return &ret[len(ret)-1] + default: + return &Pair{key, *newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR))} + } +} + + /** Parser Factory **/ // Loads parse all json into interface{} diff --git a/ast/search.go b/ast/search.go index f7d372f..9e809bb 100644 --- a/ast/search.go +++ b/ast/search.go @@ -20,7 +20,6 @@ import ( `fmt` `github.com/bytedance/sonic/internal/native/types` - `github.com/bytedance/sonic/unquote` ) type Searcher struct { @@ -70,122 +69,4 @@ func (self *Searcher) GetByPath(path ...interface{}) (Node, error) { } return newRawNode(self.parser.s[start:self.parser.p], t), nil -} - -func (self *Parser) searchKey(match string) types.ParsingError { - ns := len(self.s) - if err := self.object(); err != 0 { - return err - } - - /* check for EOF */ - if self.p = self.lspace(self.p); self.p >= ns { - return types.ERR_EOF - } - - /* check for empty object */ - if self.s[self.p] == '}' { - self.p++ - return _ERR_NOT_FOUND - } - - var njs types.JsonState - var err types.ParsingError - /* decode each pair */ - for { - - /* decode the key */ - if njs = self.decodeValue(); njs.Vt != types.V_STRING { - return types.ERR_INVALID_CHAR - } - - /* extract the key */ - idx := self.p - 1 - key := self.s[njs.Iv:idx] - - /* check for escape sequence */ - if njs.Ep != -1 { - if key, err = unquote.String(key); err != 0 { - return err - } - } - - /* expect a ':' delimiter */ - if err = self.delim(); err != 0 { - return err - } - - /* skip value */ - if key != match { - if _, err = self.skip(); err != 0 { - return err - } - } else { - return 0 - } - - /* check for EOF */ - self.p = self.lspace(self.p) - if self.p >= ns { - return types.ERR_EOF - } - - /* check for the next character */ - switch self.s[self.p] { - case ',': - self.p++ - case '}': - self.p++ - return _ERR_NOT_FOUND - default: - return types.ERR_INVALID_CHAR - } - } -} - -func (self *Parser) searchIndex(idx int) types.ParsingError { - ns := len(self.s) - if err := self.array(); err != 0 { - return err - } - - /* check for EOF */ - if self.p = self.lspace(self.p); self.p >= ns { - return types.ERR_EOF - } - - /* check for empty array */ - if self.s[self.p] == ']' { - self.p++ - return _ERR_NOT_FOUND - } - - var err types.ParsingError - /* allocate array space and parse every element */ - for i := 0; i < idx; i++ { - - /* decode the value */ - if _, err = self.skip(); err != 0 { - return err - } - - /* check for EOF */ - self.p = self.lspace(self.p) - if self.p >= ns { - return types.ERR_EOF - } - - /* check for the next character */ - switch self.s[self.p] { - case ',': - self.p++ - case ']': - self.p++ - return _ERR_NOT_FOUND - default: - return types.ERR_INVALID_CHAR - } - } - - return 0 -} +} \ No newline at end of file diff --git a/ast/sort.go b/ast/sort.go new file mode 100644 index 0000000..0a9f145 --- /dev/null +++ b/ast/sort.go @@ -0,0 +1,206 @@ +/* + * Copyright 2021 ByteDance Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package ast + +// Algorithm 3-way Radix Quicksort, d means the radix. +// Reference: https://algs4.cs.princeton.edu/51radix/Quick3string.java.html +func radixQsort(kvs PairSlice, d, maxDepth int) { + for len(kvs) > 11 { + // To avoid the worst case of quickSort (time: O(n^2)), use introsort here. + // Reference: https://en.wikipedia.org/wiki/Introsort and + // https://github.com/golang/go/issues/467 + if maxDepth == 0 { + heapSort(kvs, 0, len(kvs)) + return + } + maxDepth-- + + p := pivot(kvs, d) + lt, i, gt := 0, 0, len(kvs) + for i < gt { + c := byteAt(kvs[i].Key, d) + if c < p { + swap(kvs, lt, i) + i++ + lt++ + } else if c > p { + gt-- + swap(kvs, i, gt) + } else { + i++ + } + } + + // kvs[0:lt] < v = kvs[lt:gt] < kvs[gt:len(kvs)] + // Native implemention: + // radixQsort(kvs[:lt], d, maxDepth) + // if p > -1 { + // radixQsort(kvs[lt:gt], d+1, maxDepth) + // } + // radixQsort(kvs[gt:], d, maxDepth) + // Optimize as follows: make recursive calls only for the smaller parts. + // Reference: https://www.geeksforgeeks.org/quicksort-tail-call-optimization-reducing-worst-case-space-log-n/ + if p == -1 { + if lt > len(kvs) - gt { + radixQsort(kvs[gt:], d, maxDepth) + kvs = kvs[:lt] + } else { + radixQsort(kvs[:lt], d, maxDepth) + kvs = kvs[gt:] + } + } else { + ml := maxThree(lt, gt-lt, len(kvs)-gt) + if ml == lt { + radixQsort(kvs[lt:gt], d+1, maxDepth) + radixQsort(kvs[gt:], d, maxDepth) + kvs = kvs[:lt] + } else if ml == gt-lt { + radixQsort(kvs[:lt], d, maxDepth) + radixQsort(kvs[gt:], d, maxDepth) + kvs = kvs[lt:gt] + d += 1 + } else { + radixQsort(kvs[:lt], d, maxDepth) + radixQsort(kvs[lt:gt], d+1, maxDepth) + kvs = kvs[gt:] + } + } + } + insertRadixSort(kvs, d) +} + +func insertRadixSort(kvs PairSlice, d int) { + for i := 1; i < len(kvs); i++ { + for j := i; j > 0 && lessFrom(kvs[j].Key, kvs[j-1].Key, d); j-- { + swap(kvs, j, j-1) + } + } +} + +func pivot(kvs PairSlice, d int) int { + m := len(kvs) >> 1 + if len(kvs) > 40 { + // Tukey's ``Ninther,'' median of three mediankvs of three. + t := len(kvs) / 8 + return medianThree( + medianThree(byteAt(kvs[0].Key, d), byteAt(kvs[t].Key, d), byteAt(kvs[2*t].Key, d)), + medianThree(byteAt(kvs[m].Key, d), byteAt(kvs[m-t].Key, d), byteAt(kvs[m+t].Key, d)), + medianThree(byteAt(kvs[len(kvs)-1].Key, d), + byteAt(kvs[len(kvs)-1-t].Key, d), + byteAt(kvs[len(kvs)-1-2*t].Key, d))) + } + return medianThree(byteAt(kvs[0].Key, d), byteAt(kvs[m].Key, d), byteAt(kvs[len(kvs)-1].Key, d)) +} + +func medianThree(i, j, k int) int { + if i > j { + i, j = j, i + } // i < j + if k < i { + return i + } + if k > j { + return j + } + return k +} + +func maxThree(i, j, k int) int { + max := i + if max < j { + max = j + } + if max < k { + max = k + } + return max +} + +// maxDepth returns a threshold at which quicksort should switch +// to heapsort. It returnkvs 2*ceil(lg(n+1)). +func maxDepth(n int) int { + var depth int + for i := n; i > 0; i >>= 1 { + depth++ + } + return depth * 2 +} + +// siftDown implements the heap property on kvs[lo:hi]. +// first is an offset into the array where the root of the heap lies. +func siftDown(kvs PairSlice, lo, hi, first int) { + root := lo + for { + child := 2*root + 1 + if child >= hi { + break + } + if child+1 < hi && kvs[first+child].Key < kvs[first+child+1].Key { + child++ + } + if kvs[first+root].Key >= kvs[first+child].Key { + return + } + swap(kvs, first+root, first+child) + root = child + } +} + +func heapSort(kvs PairSlice, a, b int) { + first := a + lo := 0 + hi := b - a + + // Build heap with the greatest element at top. + for i := (hi - 1) / 2; i >= 0; i-- { + siftDown(kvs, i, hi, first) + } + + // Pop elements, the largest first, into end of kvs. + for i := hi - 1; i >= 0; i-- { + swap(kvs, first, first+i) + siftDown(kvs, lo, i, first) + } +} + +// Note that Pair.Key is NOT pointed to Pair.m when map key is integer after swap +func swap(kvs PairSlice, a, b int) { + kvs[a].Key, kvs[b].Key = kvs[b].Key, kvs[a].Key + kvs[a].Value, kvs[b].Value = kvs[b].Value, kvs[a].Value +} + +// Compare two strings from the pos d. +func lessFrom(a, b string, d int) bool { + l := len(a) + if l > len(b) { + l = len(b) + } + for i := d; i < l; i++ { + if a[i] == b[i] { + continue + } + return a[i] < b[i] + } + return len(a) < len(b) +} + +func byteAt(b string, p int) int { + if p < len(b) { + return int(b[p]) + } + return -1 +}