2
0
Fork 0
mirror of https://github.com/ii64/sonic.git synced 2026-06-21 00:46:43 +08:00

feat:(ast) support sorting node keys (#164)

* feat: ast.Node support ForEach() iteration (DFS)

Change-Id: Ia53f1db2814036e12b760dfbb7a21094a6abd541

* feat: support Node's key sorting

Change-Id: I0b93d9b4feada853fa2ca9f48277da71948a95be

* fmt

Change-Id: I1a53170959c08f1a32f02b0f163207254a87362c

* test: forbid `checkptr`

Change-Id: I6b34f74ee3bad883f515728300bf735a9e10b0d6

* fmt: add comments

Co-authored-by: duanyi.aster <duanyi.aster@bytedance.com>
This commit is contained in:
Yi Duan 2021-12-28 20:29:09 +08:00 committed by GitHub
parent 8dfaa13d3e
commit c3cb5de704
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 679 additions and 276 deletions

View file

@ -21,4 +21,4 @@ jobs:
${{ runner.os }}-go-
- name: Unit Test
run: GOMAXPROCS=4 go test -v -covermode=atomic -coverprofile=coverage.out ./...
run: GOMAXPROCS=4 go test -v -gcflags=-d=checkptr=0 -covermode=atomic -coverprofile=coverage.out ./...

View file

@ -21,4 +21,4 @@ jobs:
${{ runner.os }}-go-
- name: Unit Test
run: GOMAXPROCS=4 go test -v -race -covermode=atomic -coverprofile=coverage.out ./...
run: GOMAXPROCS=4 go test -v -gcflags=-d=checkptr=0 -race -covermode=atomic -coverprofile=coverage.out ./...

View file

@ -21,4 +21,4 @@ jobs:
${{ runner.os }}-go-
- name: Unit Test
run: GOMAXPROCS=4 go test -v -race -covermode=atomic -coverprofile=coverage.out ./...
run: GOMAXPROCS=4 go test -v -gcflags=-d=checkptr=0 -race -covermode=atomic -coverprofile=coverage.out ./...

View file

@ -17,6 +17,8 @@
package ast
import (
`fmt`
`github.com/bytedance/sonic/internal/native/types`
)
@ -97,3 +99,62 @@ func (self *ObjectIterator) Next(p *Pair) bool {
return true
}
}
// Sequence represents scanning path of single-layer nodes.
// Index indicates the value's order in both V_ARRAY and V_OBJECT json.
// Key is the value's key (for V_OBJECT json only, otherwise it will be nil).
type Sequence struct {
Index int
Key *string
// Level int
}
// String is string representation of one Sequence
func (s Sequence) String() string {
k := ""
if s.Key != nil {
k = *s.Key
}
return fmt.Sprintf("Sequence(%d, %q)", s.Index, k)
}
type Scanner func(path Sequence, node *Node) bool
// ForEach scans one V_OBJECT node's children from JSON head to tail,
// and pass the Sequence and Node of corresponding JSON value.
//
// Especailly, if the node is not V_ARRAY or V_OBJECT,
// the node itself will be returned and Sequence.Index == -1.
func (self *Node) ForEach(sc Scanner) error {
switch self.itype() {
case types.V_ARRAY:
ns, err := self.UnsafeArray()
if err != nil {
return err
}
for i := range ns {
if !sc(Sequence{i, nil}, &ns[i]) {
return err
}
}
case types.V_OBJECT:
ns, err := self.UnsafeMap()
if err != nil {
return err
}
for i := range ns {
if !sc(Sequence{i, &ns[i].Key}, &ns[i].Value) {
return err
}
}
default:
sc(Sequence{-1, nil}, self)
}
return self.Check()
}
type PairSlice []Pair
func (self PairSlice) Sort() {
radixQsort(self, 0, maxDepth(len(self)))
}

View file

@ -20,13 +20,14 @@ import (
`fmt`
`strconv`
`testing`
`github.com/stretchr/testify/assert`
)
func getTestIteratorSample() (string, int) {
func getTestIteratorSample(loop int) (string, int) {
var data []int
var v1 = ""
var v2 = ""
loop := _DEFAULT_NODE_CAP+1
for i:=0;i<loop;i++{
data = append(data, i*i)
v1 += strconv.Itoa(i)
@ -39,8 +40,57 @@ func getTestIteratorSample() (string, int) {
return `{"array":[`+v1+`], "object":{`+v2+`}}`, loop
}
func TestForEach(t *testing.T) {
pathes := []Sequence{}
values := []*Node{}
sc := func(path Sequence, node *Node) bool {
pathes = append(pathes, path)
values = append(values, node)
if path.Key != nil && *path.Key == "array" {
node.ForEach(func(path Sequence, node *Node)bool{
pathes = append(pathes, path)
values = append(values, node)
return true
})
}
return true
}
str, _ := getTestIteratorSample(3)
fmt.Println(str)
root, err := NewSearcher(str).GetByPath()
if err != nil {
t.Fatal(err)
}
err = root.ForEach(sc)
if err != nil {
t.Fatal(err)
}
eObjKey := "object"
eArrKey := "array"
expPath := []Sequence{
{0, &eArrKey},
{0, nil},
{1, nil},
{2, nil},
{1, &eObjKey},
}
expValue := []*Node{
root.Get("array"),
root.GetByPath("array", 0),
root.GetByPath("array", 1),
root.GetByPath("array", 2),
root.Get("object"),
}
// fmt.Printf("pathes:%+v\n", pathes)
// fmt.Printf("values:%+v\n", values)
assert.Equal(t, expPath, pathes)
assert.Equal(t, expValue, values)
}
func TestRawIterator(t *testing.T) {
str, loop := getTestIteratorSample()
str, loop := getTestIteratorSample(_DEFAULT_NODE_CAP)
fmt.Println(str)
root, err := NewSearcher(str).GetByPath("array")
@ -94,7 +144,7 @@ func TestRawIterator(t *testing.T) {
}
func TestIterator(t *testing.T) {
str, loop := getTestIteratorSample()
str, loop := getTestIteratorSample(_DEFAULT_NODE_CAP)
fmt.Println(str)
root, err := NewParser(str).Parse()

View file

@ -24,7 +24,6 @@ import (
`github.com/bytedance/sonic/decoder`
`github.com/bytedance/sonic/internal/native/types`
`github.com/bytedance/sonic/internal/rt`
`github.com/bytedance/sonic/unquote`
`github.com/chenzhuoyu/base64x`
)
@ -621,6 +620,34 @@ func (self *Node) UnsafeMap() ([]Pair, error) {
return *(*[]Pair)(s), nil
}
// SortKeys sorts children of a V_OBJECT node in ascending key-order.
// If recurse is true, it recursively sorts children's children as long as a V_OBJECT node is found.
func (self *Node) SortKeys(recurse bool) (err error) {
ps, err := self.UnsafeMap()
if err != nil {
return err
}
PairSlice(ps).Sort()
if recurse {
var sc Scanner
sc = func(path Sequence, node *Node) bool {
if node.itype() == types.V_OBJECT {
if err := node.SortKeys(recurse); err != nil {
return false
}
}
if node.itype() == types.V_ARRAY {
if err := node.ForEach(sc); err != nil {
return false
}
}
return true
}
self.ForEach(sc)
}
return nil
}
// Array loads all indexes of an array node
func (self *Node) Array() ([]interface{}, error) {
if self.isAny() {
@ -673,7 +700,7 @@ func (self *Node) ArrayUseNode() ([]Node, error) {
if err := self.should(types.V_ARRAY, "an array"); err != nil {
return nil, err
}
if err := self.loadAllIndex(); err != nil {
if err := self.skipAllIndex(); err != nil {
return nil, err
}
return self.toGenericArrayUseNode()
@ -685,7 +712,7 @@ func (self *Node) UnsafeArray() ([]Node, error) {
if err := self.should(types.V_ARRAY, "an array"); err != nil {
return nil, err
}
if err := self.loadAllIndex(); err != nil {
if err := self.skipAllIndex(); err != nil {
return nil, err
}
s := ptr2slice(self.p, self.len(), self.cap())
@ -806,7 +833,9 @@ func (self *Node) LoadAll() error {
}
for i := 0; i < e; i++ {
n := self.nodeAt(i)
n.parseRaw(true)
if n.IsRaw() {
n.parseRaw(true)
}
if err := n.Check(); err != nil {
return err
}
@ -819,7 +848,9 @@ func (self *Node) LoadAll() error {
}
for i := 0; i < e; i++ {
n := self.pairAt(i)
n.Value.parseRaw(true)
if n.Value.IsRaw() {
n.Value.parseRaw(true)
}
if err := n.Value.Check(); err != nil {
return err
}
@ -957,147 +988,6 @@ func (self *Node) skipAllKey() error {
return nil
}
func (self *Node) skipNextNode() *Node {
if !self.isLazy() {
return nil
}
parser, stack := self.getParserAndArrayStack()
ret := stack.v
sp := parser.p
ns := len(parser.s)
/* check for EOF */
if parser.p = parser.lspace(sp); parser.p >= ns {
return newSyntaxError(parser.syntaxError(types.ERR_EOF))
}
/* check for empty array */
if parser.s[parser.p] == ']' {
parser.p++
self.setArray(ret)
return nil
}
var val Node
/* skip the value */
if start, err := parser.skip(); err != 0 {
return newSyntaxError(parser.syntaxError(err))
} else {
t := switchRawType(parser.s[start])
if t == _V_NONE {
return newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR))
}
val = newRawNode(parser.s[start:parser.p], t)
}
/* add the value to result */
ret = append(ret, val)
parser.p = parser.lspace(parser.p)
/* check for EOF */
if parser.p >= ns {
return newSyntaxError(parser.syntaxError(types.ERR_EOF))
}
/* check for the next character */
switch parser.s[parser.p] {
case ',':
parser.p++
self.setLazyArray(parser, ret)
return &ret[len(ret)-1]
case ']':
parser.p++
self.setArray(ret)
return &ret[len(ret)-1]
default:
return newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR))
}
}
func (self *Node) skipNextPair() (*Pair) {
if !self.isLazy() {
return nil
}
parser, stack := self.getParserAndObjectStack()
ret := stack.v
sp := parser.p
ns := len(parser.s)
/* check for EOF */
if parser.p = parser.lspace(sp); parser.p >= ns {
return &Pair{"", *newSyntaxError(parser.syntaxError(types.ERR_EOF))}
}
/* check for empty object */
if parser.s[parser.p] == '}' {
parser.p++
self.setObject(ret)
return nil
}
/* decode one pair */
var val Node
var njs types.JsonState
var err types.ParsingError
/* decode the key */
if njs = parser.decodeValue(); njs.Vt != types.V_STRING {
return &Pair{"", *newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR))}
}
/* extract the key */
idx := parser.p - 1
key := parser.s[njs.Iv:idx]
/* check for escape sequence */
if njs.Ep != -1 {
if key, err = unquote.String(key); err != 0 {
return &Pair{key, *newSyntaxError(parser.syntaxError(err))}
}
}
/* expect a ':' delimiter */
if err = parser.delim(); err != 0 {
return &Pair{key, *newSyntaxError(parser.syntaxError(err))}
}
/* skip the value */
if start, err := parser.skip(); err != 0 {
return &Pair{key, *newSyntaxError(parser.syntaxError(err))}
} else {
t := switchRawType(parser.s[start])
if t == _V_NONE {
return &Pair{key, *newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR))}
}
val = newRawNode(parser.s[start:parser.p], t)
}
/* add the value to result */
ret = append(ret, Pair{Key: key, Value: val})
parser.p = parser.lspace(parser.p)
/* check for EOF */
if parser.p >= ns {
return &Pair{key, *newSyntaxError(parser.syntaxError(types.ERR_EOF))}
}
/* check for the next character */
switch parser.s[parser.p] {
case ',':
parser.p++
self.setLazyObject(parser, ret)
return &ret[len(ret)-1]
case '}':
parser.p++
self.setObject(ret)
return &ret[len(ret)-1]
default:
return &Pair{key, *newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR))}
}
}
func (self *Node) skipKey(key string) (*Node, int) {
nb := self.len()
lazy := self.isLazy()

View file

@ -25,11 +25,66 @@ import (
`strconv`
`testing`
`github.com/bytedance/sonic/encoder`
`github.com/bytedance/sonic/internal/native/types`
`github.com/bytedance/sonic/internal/rt`
`github.com/stretchr/testify/assert`
)
func TestNodeSortKeys(t *testing.T) {
root, err := NewSearcher(_TwitterJson).GetByPath()
if err != nil {
t.Fatal(err)
}
obj, err := root.MapUseNumber()
if err != nil {
t.Fatal(err)
}
exp, err := encoder.Encode(obj, encoder.SortMapKeys)
if err != nil {
t.Fatal(err)
}
if err := root.SortKeys(true); err != nil {
t.Fatal(err)
}
act, err := root.MarshalJSON()
if err != nil {
t.Fatal(err)
}
assert.Equal(t, len(exp), len(act))
assert.Equal(t, string(exp), string(act))
}
func BenchmarkNodeSortKeys(b *testing.B) {
root, err := NewSearcher(_TwitterJson).GetByPath()
if err != nil {
b.Fatal(err)
}
if err := root.LoadAll(); err != nil {
b.Fatal(err)
}
b.Run("single", func(b *testing.B) {
r := root.Get("statuses")
if r.Check() != nil {
b.Fatal(r.Error())
}
b.SetBytes(int64(len(_TwitterJson)))
b.ResetTimer()
for i:=0; i<b.N; i++ {
_ = root.SortKeys(false)
}
})
b.Run("recurse", func(b *testing.B) {
b.SetBytes(int64(len(_TwitterJson)))
b.ResetTimer()
for i:=0; i<b.N; i++ {
_ = root.SortKeys(true)
}
})
}
//go:noinline
func stackObj() interface{} {
var a int = 1
@ -451,7 +506,7 @@ func TestUnset(t *testing.T) {
}
func TestUnsafeNode(t *testing.T) {
str, loop := getTestIteratorSample()
str, loop := getTestIteratorSample(_DEFAULT_NODE_CAP)
root, err := NewSearcher(str).GetByPath("array")
if err != nil {
@ -490,7 +545,7 @@ func TestUnsafeNode(t *testing.T) {
}
func TestUseNode(t *testing.T) {
str, loop := getTestIteratorSample()
str, loop := getTestIteratorSample(_DEFAULT_NODE_CAP)
root, e := NewParser(str).Parse()
if e != 0 {
t.Fatal(e)
@ -576,7 +631,7 @@ func TestUseNode(t *testing.T) {
}
func TestUseNumber(t *testing.T) {
str, _ := getTestIteratorSample()
str, _ := getTestIteratorSample(_DEFAULT_NODE_CAP)
root, e := NewParser(str).Parse()
if e != 0 {
t.Fatal(e)

View file

@ -335,6 +335,266 @@ func (self *Parser) skip() (int, types.ParsingError) {
return start, 0
}
func (self *Parser) searchKey(match string) types.ParsingError {
ns := len(self.s)
if err := self.object(); err != 0 {
return err
}
/* check for EOF */
if self.p = self.lspace(self.p); self.p >= ns {
return types.ERR_EOF
}
/* check for empty object */
if self.s[self.p] == '}' {
self.p++
return _ERR_NOT_FOUND
}
var njs types.JsonState
var err types.ParsingError
/* decode each pair */
for {
/* decode the key */
if njs = self.decodeValue(); njs.Vt != types.V_STRING {
return types.ERR_INVALID_CHAR
}
/* extract the key */
idx := self.p - 1
key := self.s[njs.Iv:idx]
/* check for escape sequence */
if njs.Ep != -1 {
if key, err = unquote.String(key); err != 0 {
return err
}
}
/* expect a ':' delimiter */
if err = self.delim(); err != 0 {
return err
}
/* skip value */
if key != match {
if _, err = self.skip(); err != 0 {
return err
}
} else {
return 0
}
/* check for EOF */
self.p = self.lspace(self.p)
if self.p >= ns {
return types.ERR_EOF
}
/* check for the next character */
switch self.s[self.p] {
case ',':
self.p++
case '}':
self.p++
return _ERR_NOT_FOUND
default:
return types.ERR_INVALID_CHAR
}
}
}
func (self *Parser) searchIndex(idx int) types.ParsingError {
ns := len(self.s)
if err := self.array(); err != 0 {
return err
}
/* check for EOF */
if self.p = self.lspace(self.p); self.p >= ns {
return types.ERR_EOF
}
/* check for empty array */
if self.s[self.p] == ']' {
self.p++
return _ERR_NOT_FOUND
}
var err types.ParsingError
/* allocate array space and parse every element */
for i := 0; i < idx; i++ {
/* decode the value */
if _, err = self.skip(); err != 0 {
return err
}
/* check for EOF */
self.p = self.lspace(self.p)
if self.p >= ns {
return types.ERR_EOF
}
/* check for the next character */
switch self.s[self.p] {
case ',':
self.p++
case ']':
self.p++
return _ERR_NOT_FOUND
default:
return types.ERR_INVALID_CHAR
}
}
return 0
}
func (self *Node) skipNextNode() *Node {
if !self.isLazy() {
return nil
}
parser, stack := self.getParserAndArrayStack()
ret := stack.v
sp := parser.p
ns := len(parser.s)
/* check for EOF */
if parser.p = parser.lspace(sp); parser.p >= ns {
return newSyntaxError(parser.syntaxError(types.ERR_EOF))
}
/* check for empty array */
if parser.s[parser.p] == ']' {
parser.p++
self.setArray(ret)
return nil
}
var val Node
/* skip the value */
if start, err := parser.skip(); err != 0 {
return newSyntaxError(parser.syntaxError(err))
} else {
t := switchRawType(parser.s[start])
if t == _V_NONE {
return newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR))
}
val = newRawNode(parser.s[start:parser.p], t)
}
/* add the value to result */
ret = append(ret, val)
parser.p = parser.lspace(parser.p)
/* check for EOF */
if parser.p >= ns {
return newSyntaxError(parser.syntaxError(types.ERR_EOF))
}
/* check for the next character */
switch parser.s[parser.p] {
case ',':
parser.p++
self.setLazyArray(parser, ret)
return &ret[len(ret)-1]
case ']':
parser.p++
self.setArray(ret)
return &ret[len(ret)-1]
default:
return newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR))
}
}
func (self *Node) skipNextPair() (*Pair) {
if !self.isLazy() {
return nil
}
parser, stack := self.getParserAndObjectStack()
ret := stack.v
sp := parser.p
ns := len(parser.s)
/* check for EOF */
if parser.p = parser.lspace(sp); parser.p >= ns {
return &Pair{"", *newSyntaxError(parser.syntaxError(types.ERR_EOF))}
}
/* check for empty object */
if parser.s[parser.p] == '}' {
parser.p++
self.setObject(ret)
return nil
}
/* decode one pair */
var val Node
var njs types.JsonState
var err types.ParsingError
/* decode the key */
if njs = parser.decodeValue(); njs.Vt != types.V_STRING {
return &Pair{"", *newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR))}
}
/* extract the key */
idx := parser.p - 1
key := parser.s[njs.Iv:idx]
/* check for escape sequence */
if njs.Ep != -1 {
if key, err = unquote.String(key); err != 0 {
return &Pair{key, *newSyntaxError(parser.syntaxError(err))}
}
}
/* expect a ':' delimiter */
if err = parser.delim(); err != 0 {
return &Pair{key, *newSyntaxError(parser.syntaxError(err))}
}
/* skip the value */
if start, err := parser.skip(); err != 0 {
return &Pair{key, *newSyntaxError(parser.syntaxError(err))}
} else {
t := switchRawType(parser.s[start])
if t == _V_NONE {
return &Pair{key, *newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR))}
}
val = newRawNode(parser.s[start:parser.p], t)
}
/* add the value to result */
ret = append(ret, Pair{Key: key, Value: val})
parser.p = parser.lspace(parser.p)
/* check for EOF */
if parser.p >= ns {
return &Pair{key, *newSyntaxError(parser.syntaxError(types.ERR_EOF))}
}
/* check for the next character */
switch parser.s[parser.p] {
case ',':
parser.p++
self.setLazyObject(parser, ret)
return &ret[len(ret)-1]
case '}':
parser.p++
self.setObject(ret)
return &ret[len(ret)-1]
default:
return &Pair{key, *newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR))}
}
}
/** Parser Factory **/
// Loads parse all json into interface{}

View file

@ -20,7 +20,6 @@ import (
`fmt`
`github.com/bytedance/sonic/internal/native/types`
`github.com/bytedance/sonic/unquote`
)
type Searcher struct {
@ -70,122 +69,4 @@ func (self *Searcher) GetByPath(path ...interface{}) (Node, error) {
}
return newRawNode(self.parser.s[start:self.parser.p], t), nil
}
func (self *Parser) searchKey(match string) types.ParsingError {
ns := len(self.s)
if err := self.object(); err != 0 {
return err
}
/* check for EOF */
if self.p = self.lspace(self.p); self.p >= ns {
return types.ERR_EOF
}
/* check for empty object */
if self.s[self.p] == '}' {
self.p++
return _ERR_NOT_FOUND
}
var njs types.JsonState
var err types.ParsingError
/* decode each pair */
for {
/* decode the key */
if njs = self.decodeValue(); njs.Vt != types.V_STRING {
return types.ERR_INVALID_CHAR
}
/* extract the key */
idx := self.p - 1
key := self.s[njs.Iv:idx]
/* check for escape sequence */
if njs.Ep != -1 {
if key, err = unquote.String(key); err != 0 {
return err
}
}
/* expect a ':' delimiter */
if err = self.delim(); err != 0 {
return err
}
/* skip value */
if key != match {
if _, err = self.skip(); err != 0 {
return err
}
} else {
return 0
}
/* check for EOF */
self.p = self.lspace(self.p)
if self.p >= ns {
return types.ERR_EOF
}
/* check for the next character */
switch self.s[self.p] {
case ',':
self.p++
case '}':
self.p++
return _ERR_NOT_FOUND
default:
return types.ERR_INVALID_CHAR
}
}
}
func (self *Parser) searchIndex(idx int) types.ParsingError {
ns := len(self.s)
if err := self.array(); err != 0 {
return err
}
/* check for EOF */
if self.p = self.lspace(self.p); self.p >= ns {
return types.ERR_EOF
}
/* check for empty array */
if self.s[self.p] == ']' {
self.p++
return _ERR_NOT_FOUND
}
var err types.ParsingError
/* allocate array space and parse every element */
for i := 0; i < idx; i++ {
/* decode the value */
if _, err = self.skip(); err != 0 {
return err
}
/* check for EOF */
self.p = self.lspace(self.p)
if self.p >= ns {
return types.ERR_EOF
}
/* check for the next character */
switch self.s[self.p] {
case ',':
self.p++
case ']':
self.p++
return _ERR_NOT_FOUND
default:
return types.ERR_INVALID_CHAR
}
}
return 0
}
}

206
ast/sort.go Normal file
View file

@ -0,0 +1,206 @@
/*
* Copyright 2021 ByteDance Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ast
// Algorithm 3-way Radix Quicksort, d means the radix.
// Reference: https://algs4.cs.princeton.edu/51radix/Quick3string.java.html
func radixQsort(kvs PairSlice, d, maxDepth int) {
for len(kvs) > 11 {
// To avoid the worst case of quickSort (time: O(n^2)), use introsort here.
// Reference: https://en.wikipedia.org/wiki/Introsort and
// https://github.com/golang/go/issues/467
if maxDepth == 0 {
heapSort(kvs, 0, len(kvs))
return
}
maxDepth--
p := pivot(kvs, d)
lt, i, gt := 0, 0, len(kvs)
for i < gt {
c := byteAt(kvs[i].Key, d)
if c < p {
swap(kvs, lt, i)
i++
lt++
} else if c > p {
gt--
swap(kvs, i, gt)
} else {
i++
}
}
// kvs[0:lt] < v = kvs[lt:gt] < kvs[gt:len(kvs)]
// Native implemention:
// radixQsort(kvs[:lt], d, maxDepth)
// if p > -1 {
// radixQsort(kvs[lt:gt], d+1, maxDepth)
// }
// radixQsort(kvs[gt:], d, maxDepth)
// Optimize as follows: make recursive calls only for the smaller parts.
// Reference: https://www.geeksforgeeks.org/quicksort-tail-call-optimization-reducing-worst-case-space-log-n/
if p == -1 {
if lt > len(kvs) - gt {
radixQsort(kvs[gt:], d, maxDepth)
kvs = kvs[:lt]
} else {
radixQsort(kvs[:lt], d, maxDepth)
kvs = kvs[gt:]
}
} else {
ml := maxThree(lt, gt-lt, len(kvs)-gt)
if ml == lt {
radixQsort(kvs[lt:gt], d+1, maxDepth)
radixQsort(kvs[gt:], d, maxDepth)
kvs = kvs[:lt]
} else if ml == gt-lt {
radixQsort(kvs[:lt], d, maxDepth)
radixQsort(kvs[gt:], d, maxDepth)
kvs = kvs[lt:gt]
d += 1
} else {
radixQsort(kvs[:lt], d, maxDepth)
radixQsort(kvs[lt:gt], d+1, maxDepth)
kvs = kvs[gt:]
}
}
}
insertRadixSort(kvs, d)
}
func insertRadixSort(kvs PairSlice, d int) {
for i := 1; i < len(kvs); i++ {
for j := i; j > 0 && lessFrom(kvs[j].Key, kvs[j-1].Key, d); j-- {
swap(kvs, j, j-1)
}
}
}
func pivot(kvs PairSlice, d int) int {
m := len(kvs) >> 1
if len(kvs) > 40 {
// Tukey's ``Ninther,'' median of three mediankvs of three.
t := len(kvs) / 8
return medianThree(
medianThree(byteAt(kvs[0].Key, d), byteAt(kvs[t].Key, d), byteAt(kvs[2*t].Key, d)),
medianThree(byteAt(kvs[m].Key, d), byteAt(kvs[m-t].Key, d), byteAt(kvs[m+t].Key, d)),
medianThree(byteAt(kvs[len(kvs)-1].Key, d),
byteAt(kvs[len(kvs)-1-t].Key, d),
byteAt(kvs[len(kvs)-1-2*t].Key, d)))
}
return medianThree(byteAt(kvs[0].Key, d), byteAt(kvs[m].Key, d), byteAt(kvs[len(kvs)-1].Key, d))
}
func medianThree(i, j, k int) int {
if i > j {
i, j = j, i
} // i < j
if k < i {
return i
}
if k > j {
return j
}
return k
}
func maxThree(i, j, k int) int {
max := i
if max < j {
max = j
}
if max < k {
max = k
}
return max
}
// maxDepth returns a threshold at which quicksort should switch
// to heapsort. It returnkvs 2*ceil(lg(n+1)).
func maxDepth(n int) int {
var depth int
for i := n; i > 0; i >>= 1 {
depth++
}
return depth * 2
}
// siftDown implements the heap property on kvs[lo:hi].
// first is an offset into the array where the root of the heap lies.
func siftDown(kvs PairSlice, lo, hi, first int) {
root := lo
for {
child := 2*root + 1
if child >= hi {
break
}
if child+1 < hi && kvs[first+child].Key < kvs[first+child+1].Key {
child++
}
if kvs[first+root].Key >= kvs[first+child].Key {
return
}
swap(kvs, first+root, first+child)
root = child
}
}
func heapSort(kvs PairSlice, a, b int) {
first := a
lo := 0
hi := b - a
// Build heap with the greatest element at top.
for i := (hi - 1) / 2; i >= 0; i-- {
siftDown(kvs, i, hi, first)
}
// Pop elements, the largest first, into end of kvs.
for i := hi - 1; i >= 0; i-- {
swap(kvs, first, first+i)
siftDown(kvs, lo, i, first)
}
}
// Note that Pair.Key is NOT pointed to Pair.m when map key is integer after swap
func swap(kvs PairSlice, a, b int) {
kvs[a].Key, kvs[b].Key = kvs[b].Key, kvs[a].Key
kvs[a].Value, kvs[b].Value = kvs[b].Value, kvs[a].Value
}
// Compare two strings from the pos d.
func lessFrom(a, b string, d int) bool {
l := len(a)
if l > len(b) {
l = len(b)
}
for i := d; i < l; i++ {
if a[i] == b[i] {
continue
}
return a[i] < b[i]
}
return len(a) < len(b)
}
func byteAt(b string, p int) int {
if p < len(b) {
return int(b[p])
}
return -1
}