mirror of
https://github.com/ii64/sonic.git
synced 2026-06-21 00:46:43 +08:00
opt: remove bound and loop unrolling in quote (begin to use clang13) (#237)
* opt: remove most bound check in quote * opt: remove branch in copied escaped chars * opt: use cache-friendly bool array * opt: use loop unrolling in quote * feat: add `-t` for bench.py Change-Id: I3f41218187597126ef960eab09c6fa6fe252f347 * test:(bench) adjust repeat_time * test: add Diff on CI * test: update bench.py * doc: update README.md Co-authored-by: liuqiang <liuqiang.06@bytedance.com> Co-authored-by: duanyi.aster <duanyi.aster@bytedance.com>
This commit is contained in:
parent
3717c62ec2
commit
410625ca9e
10 changed files with 11244 additions and 10867 deletions
5
.github/workflows/benchmark-linux-amd64.yml
vendored
5
.github/workflows/benchmark-linux-amd64.yml
vendored
|
|
@ -4,7 +4,7 @@ on: pull_request
|
|||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: self-hosted
|
||||
runs-on: [self-hosted, X64]
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
|
||||
|
|
@ -25,3 +25,6 @@ jobs:
|
|||
|
||||
- name: Benchmark
|
||||
run: sh bench.sh
|
||||
|
||||
# - name: Diff
|
||||
# run: ./bench.py -b '"^Benchmark.*Sonic"' -c
|
||||
|
|
|
|||
104
README.md
104
README.md
|
|
@ -20,60 +20,60 @@ goversion: 1.17.1
|
|||
goos: darwin
|
||||
goarch: amd64
|
||||
cpu: Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz
|
||||
BenchmarkEncoder_Generic_Sonic-16 42688 ns/op 305.36 MB/s 15608 B/op 4 allocs/op
|
||||
BenchmarkEncoder_Generic_Sonic_Fast-16 30043 ns/op 433.87 MB/s 14638 B/op 4 allocs/op
|
||||
BenchmarkEncoder_Generic_JsonIter-16 46461 ns/op 280.56 MB/s 13433 B/op 77 allocs/op
|
||||
BenchmarkEncoder_Generic_GoJson-16 73608 ns/op 177.09 MB/s 23219 B/op 16 allocs/op
|
||||
BenchmarkEncoder_Generic_StdLib-16 122622 ns/op 106.30 MB/s 49137 B/op 827 allocs/op
|
||||
BenchmarkEncoder_Binding_Sonic-16 8190 ns/op 1591.61 MB/s 16175 B/op 4 allocs/op
|
||||
BenchmarkEncoder_Binding_Sonic_Fast-16 7365 ns/op 1769.85 MB/s 14367 B/op 4 allocs/op
|
||||
BenchmarkEncoder_Binding_JsonIter-16 23326 ns/op 558.81 MB/s 9487 B/op 2 allocs/op
|
||||
BenchmarkEncoder_Binding_GoJson-16 9412 ns/op 1384.93 MB/s 9480 B/op 1 allocs/op
|
||||
BenchmarkEncoder_Binding_StdLib-16 18510 ns/op 704.22 MB/s 9479 B/op 1 allocs/op
|
||||
BenchmarkEncoder_Parallel_Generic_Sonic-16 7716 ns/op 1689.37 MB/s 12812 B/op 4 allocs/op
|
||||
BenchmarkEncoder_Parallel_Generic_Sonic_Fast-16 4791 ns/op 2720.47 MB/s 10884 B/op 4 allocs/op
|
||||
BenchmarkEncoder_Parallel_Generic_JsonIter-16 10505 ns/op 1240.85 MB/s 13455 B/op 77 allocs/op
|
||||
BenchmarkEncoder_Parallel_Generic_GoJson-16 24086 ns/op 541.19 MB/s 23379 B/op 17 allocs/op
|
||||
BenchmarkEncoder_Parallel_Generic_StdLib-16 65697 ns/op 198.41 MB/s 49164 B/op 827 allocs/op
|
||||
BenchmarkEncoder_Parallel_Binding_Sonic-16 2085 ns/op 6251.53 MB/s 12933 B/op 4 allocs/op
|
||||
BenchmarkEncoder_Parallel_Binding_Sonic_Fast-16 1612 ns/op 8087.31 MB/s 11177 B/op 4 allocs/op
|
||||
BenchmarkEncoder_Parallel_Binding_JsonIter-16 6169 ns/op 2112.84 MB/s 9494 B/op 2 allocs/op
|
||||
BenchmarkEncoder_Parallel_Binding_GoJson-16 3492 ns/op 3733.14 MB/s 9492 B/op 1 allocs/op
|
||||
BenchmarkEncoder_Parallel_Binding_StdLib-16 5170 ns/op 2521.50 MB/s 9482 B/op 1 allocs/op
|
||||
BenchmarkEncoder_Generic_Sonic-16 32393 ns/op 402.40 MB/s 11965 B/op 4 allocs/op
|
||||
BenchmarkEncoder_Generic_Sonic_Fast-16 21668 ns/op 601.57 MB/s 10940 B/op 4 allocs/op
|
||||
BenchmarkEncoder_Generic_JsonIter-16 42168 ns/op 309.12 MB/s 14345 B/op 115 allocs/op
|
||||
BenchmarkEncoder_Generic_GoJson-16 65189 ns/op 199.96 MB/s 23261 B/op 16 allocs/op
|
||||
BenchmarkEncoder_Generic_StdLib-16 106322 ns/op 122.60 MB/s 49136 B/op 789 allocs/op
|
||||
BenchmarkEncoder_Binding_Sonic-16 6269 ns/op 2079.26 MB/s 14173 B/op 4 allocs/op
|
||||
BenchmarkEncoder_Binding_Sonic_Fast-16 5281 ns/op 2468.16 MB/s 12322 B/op 4 allocs/op
|
||||
BenchmarkEncoder_Binding_JsonIter-16 20056 ns/op 649.93 MB/s 9488 B/op 2 allocs/op
|
||||
BenchmarkEncoder_Binding_GoJson-16 8311 ns/op 1568.32 MB/s 9481 B/op 1 allocs/op
|
||||
BenchmarkEncoder_Binding_StdLib-16 16448 ns/op 792.52 MB/s 9479 B/op 1 allocs/op
|
||||
BenchmarkEncoder_Parallel_Generic_Sonic-16 6681 ns/op 1950.93 MB/s 12738 B/op 4 allocs/op
|
||||
BenchmarkEncoder_Parallel_Generic_Sonic_Fast-16 4179 ns/op 3118.99 MB/s 10757 B/op 4 allocs/op
|
||||
BenchmarkEncoder_Parallel_Generic_JsonIter-16 9861 ns/op 1321.84 MB/s 14362 B/op 115 allocs/op
|
||||
BenchmarkEncoder_Parallel_Generic_GoJson-16 18850 ns/op 691.52 MB/s 23278 B/op 16 allocs/op
|
||||
BenchmarkEncoder_Parallel_Generic_StdLib-16 45902 ns/op 283.97 MB/s 49174 B/op 789 allocs/op
|
||||
BenchmarkEncoder_Parallel_Binding_Sonic-16 1480 ns/op 8810.09 MB/s 13049 B/op 4 allocs/op
|
||||
BenchmarkEncoder_Parallel_Binding_Sonic_Fast-16 1209 ns/op 10785.23 MB/s 11546 B/op 4 allocs/op
|
||||
BenchmarkEncoder_Parallel_Binding_JsonIter-16 6170 ns/op 2112.58 MB/s 9504 B/op 2 allocs/op
|
||||
BenchmarkEncoder_Parallel_Binding_GoJson-16 3321 ns/op 3925.52 MB/s 9496 B/op 1 allocs/op
|
||||
BenchmarkEncoder_Parallel_Binding_StdLib-16 3739 ns/op 3486.49 MB/s 9480 B/op 1 allocs/op
|
||||
|
||||
BenchmarkDecoder_Generic_Sonic-16 71589 ns/op 182.08 MB/s 57531 B/op 723 allocs/op
|
||||
BenchmarkDecoder_Generic_Sonic_Fast-16 57653 ns/op 226.10 MB/s 49743 B/op 313 allocs/op
|
||||
BenchmarkDecoder_Generic_StdLib-16 143584 ns/op 90.78 MB/s 50870 B/op 772 allocs/op
|
||||
BenchmarkDecoder_Generic_JsonIter-16 94775 ns/op 137.54 MB/s 55783 B/op 1068 allocs/op
|
||||
BenchmarkDecoder_Generic_GoJson-16 88647 ns/op 147.04 MB/s 66371 B/op 973 allocs/op
|
||||
BenchmarkDecoder_Binding_Sonic-16 32399 ns/op 402.33 MB/s 27814 B/op 137 allocs/op
|
||||
BenchmarkDecoder_Binding_Sonic_Fast-16 28655 ns/op 454.89 MB/s 25127 B/op 34 allocs/op
|
||||
BenchmarkDecoder_Binding_StdLib-16 116617 ns/op 111.78 MB/s 7344 B/op 103 allocs/op
|
||||
BenchmarkDecoder_Binding_JsonIter-16 36206 ns/op 360.02 MB/s 14673 B/op 385 allocs/op
|
||||
BenchmarkDecoder_Binding_GoJson-16 29396 ns/op 443.43 MB/s 22042 B/op 49 allocs/op
|
||||
BenchmarkDecoder_Parallel_Generic_Sonic-16 12243 ns/op 1064.68 MB/s 57135 B/op 723 allocs/op
|
||||
BenchmarkDecoder_Parallel_Generic_Sonic_Fast-16 10101 ns/op 1290.48 MB/s 49440 B/op 313 allocs/op
|
||||
BenchmarkDecoder_Parallel_Generic_StdLib-16 57352 ns/op 227.28 MB/s 50877 B/op 772 allocs/op
|
||||
BenchmarkDecoder_Parallel_Generic_JsonIter-16 58693 ns/op 222.09 MB/s 55814 B/op 1068 allocs/op
|
||||
BenchmarkDecoder_Parallel_Generic_GoJson-16 45245 ns/op 288.10 MB/s 66430 B/op 974 allocs/op
|
||||
BenchmarkDecoder_Parallel_Binding_Sonic-16 7035 ns/op 1852.89 MB/s 27731 B/op 137 allocs/op
|
||||
BenchmarkDecoder_Parallel_Binding_Sonic_Fast-16 6510 ns/op 2002.33 MB/s 24841 B/op 34 allocs/op
|
||||
BenchmarkDecoder_Parallel_Binding_StdLib-16 33086 ns/op 393.97 MB/s 7344 B/op 103 allocs/op
|
||||
BenchmarkDecoder_Parallel_Binding_JsonIter-16 17827 ns/op 731.18 MB/s 14680 B/op 385 allocs/op
|
||||
BenchmarkDecoder_Parallel_Binding_GoJson-16 16813 ns/op 775.29 MB/s 22268 B/op 49 allocs/op
|
||||
BenchmarkDecoder_Generic_Sonic-16 66812 ns/op 195.10 MB/s 57602 B/op 723 allocs/op
|
||||
BenchmarkDecoder_Generic_Sonic_Fast-16 54523 ns/op 239.07 MB/s 49786 B/op 313 allocs/op
|
||||
BenchmarkDecoder_Generic_StdLib-16 124260 ns/op 104.90 MB/s 50869 B/op 772 allocs/op
|
||||
BenchmarkDecoder_Generic_JsonIter-16 91274 ns/op 142.81 MB/s 55782 B/op 1068 allocs/op
|
||||
BenchmarkDecoder_Generic_GoJson-16 88569 ns/op 147.17 MB/s 66367 B/op 973 allocs/op
|
||||
BenchmarkDecoder_Binding_Sonic-16 32557 ns/op 400.38 MB/s 28302 B/op 137 allocs/op
|
||||
BenchmarkDecoder_Binding_Sonic_Fast-16 28649 ns/op 455.00 MB/s 24999 B/op 34 allocs/op
|
||||
BenchmarkDecoder_Binding_StdLib-16 111437 ns/op 116.97 MB/s 10576 B/op 208 allocs/op
|
||||
BenchmarkDecoder_Binding_JsonIter-16 35090 ns/op 371.48 MB/s 14673 B/op 385 allocs/op
|
||||
BenchmarkDecoder_Binding_GoJson-16 28738 ns/op 453.59 MB/s 22039 B/op 49 allocs/op
|
||||
BenchmarkDecoder_Parallel_Generic_Sonic-16 12321 ns/op 1057.91 MB/s 57233 B/op 723 allocs/op
|
||||
BenchmarkDecoder_Parallel_Generic_Sonic_Fast-16 10644 ns/op 1224.64 MB/s 49362 B/op 313 allocs/op
|
||||
BenchmarkDecoder_Parallel_Generic_StdLib-16 57587 ns/op 226.35 MB/s 50874 B/op 772 allocs/op
|
||||
BenchmarkDecoder_Parallel_Generic_JsonIter-16 38666 ns/op 337.12 MB/s 55789 B/op 1068 allocs/op
|
||||
BenchmarkDecoder_Parallel_Generic_GoJson-16 30259 ns/op 430.79 MB/s 66370 B/op 974 allocs/op
|
||||
BenchmarkDecoder_Parallel_Binding_Sonic-16 5965 ns/op 2185.28 MB/s 27747 B/op 137 allocs/op
|
||||
BenchmarkDecoder_Parallel_Binding_Sonic_Fast-16 5170 ns/op 2521.31 MB/s 24715 B/op 34 allocs/op
|
||||
BenchmarkDecoder_Parallel_Binding_StdLib-16 27582 ns/op 472.58 MB/s 10576 B/op 208 allocs/op
|
||||
BenchmarkDecoder_Parallel_Binding_JsonIter-16 13571 ns/op 960.51 MB/s 14685 B/op 385 allocs/op
|
||||
BenchmarkDecoder_Parallel_Binding_GoJson-16 10031 ns/op 1299.51 MB/s 22111 B/op 49 allocs/op
|
||||
|
||||
BenchmarkGetOne_Sonic-16 11328 ns/op 1149.64 MB/s 29 B/op 1 allocs/op
|
||||
BenchmarkGetOne_Gjson-16 12970 ns/op 1004.07 MB/s 0 B/op 0 allocs/op
|
||||
BenchmarkGetOne_Jsoniter-16 59928 ns/op 217.31 MB/s 27936 B/op 647 allocs/op
|
||||
BenchmarkGetOne_Parallel_Sonic-16 1447 ns/op 9002.23 MB/s 114 B/op 1 allocs/op
|
||||
BenchmarkGetOne_Parallel_Gjson-16 1171 ns/op 11125.73 MB/s 0 B/op 0 allocs/op
|
||||
BenchmarkGetOne_Parallel_Jsoniter-16 15545 ns/op 837.75 MB/s 27940 B/op 647 allocs/op
|
||||
BenchmarkSetOne_Sonic-16 16922 ns/op 769.57 MB/s 1936 B/op 17 allocs/op
|
||||
BenchmarkSetOne_Sjson-16 42683 ns/op 305.11 MB/s 52181 B/op 9 allocs/op
|
||||
BenchmarkSetOne_Jsoniter-16 91104 ns/op 142.95 MB/s 45861 B/op 964 allocs/op
|
||||
BenchmarkSetOne_Parallel_Sonic-16 2065 ns/op 6305.03 MB/s 2383 B/op 17 allocs/op
|
||||
BenchmarkSetOne_Parallel_Sjson-16 11526 ns/op 1129.87 MB/s 52175 B/op 9 allocs/op
|
||||
BenchmarkSetOne_Parallel_Jsoniter-16 35044 ns/op 371.61 MB/s 45887 B/op 964 allocs/op
|
||||
BenchmarkGetOne_Sonic-16 11650 ns/op 1117.81 MB/s 29 B/op 1 allocs/op
|
||||
BenchmarkGetOne_Gjson-16 9431 ns/op 1380.81 MB/s 0 B/op 0 allocs/op
|
||||
BenchmarkGetOne_Jsoniter-16 51178 ns/op 254.46 MB/s 27936 B/op 647 allocs/op
|
||||
BenchmarkGetOne_Parallel_Sonic-16 1955 ns/op 6659.94 MB/s 125 B/op 1 allocs/op
|
||||
BenchmarkGetOne_Parallel_Gjson-16 1076 ns/op 12098.62 MB/s 0 B/op 0 allocs/op
|
||||
BenchmarkGetOne_Parallel_Jsoniter-16 17741 ns/op 734.06 MB/s 27945 B/op 647 allocs/op
|
||||
BenchmarkSetOne_Sonic-16 16124 ns/op 807.70 MB/s 1787 B/op 17 allocs/op
|
||||
BenchmarkSetOne_Sjson-16 36456 ns/op 357.22 MB/s 52180 B/op 9 allocs/op
|
||||
BenchmarkSetOne_Jsoniter-16 79475 ns/op 163.86 MB/s 45862 B/op 964 allocs/op
|
||||
BenchmarkSetOne_Parallel_Sonic-16 2383 ns/op 5465.02 MB/s 2186 B/op 17 allocs/op
|
||||
BenchmarkSetOne_Parallel_Sjson-16 18194 ns/op 715.77 MB/s 52247 B/op 9 allocs/op
|
||||
BenchmarkSetOne_Parallel_Jsoniter-16 33560 ns/op 388.05 MB/s 45892 B/op 964 allocs/op
|
||||
```
|
||||
- [Small](https://github.com/bytedance/sonic/blob/main/testdata/small.go) (400B, 11 keys, 3 layers)
|
||||

|
||||
|
|
|
|||
|
|
@ -144,7 +144,7 @@ func TestEncodeNode(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func BenchmarkEncodeRaw(b *testing.B) {
|
||||
func BenchmarkEncodeRaw_Sonic(b *testing.B) {
|
||||
data := _TwitterJson
|
||||
root, e := NewSearcher(data).GetByPath()
|
||||
if e != nil {
|
||||
|
|
@ -164,7 +164,7 @@ func BenchmarkEncodeRaw(b *testing.B) {
|
|||
}
|
||||
}
|
||||
|
||||
func BenchmarkEncodeSkip(b *testing.B) {
|
||||
func BenchmarkEncodeSkip_Sonic(b *testing.B) {
|
||||
data := _TwitterJson
|
||||
root, e := NewParser(data).Parse()
|
||||
if e != 0 {
|
||||
|
|
@ -185,7 +185,7 @@ func BenchmarkEncodeSkip(b *testing.B) {
|
|||
}
|
||||
}
|
||||
|
||||
func BenchmarkEncodeLoad(b *testing.B) {
|
||||
func BenchmarkEncodeLoad_Sonic(b *testing.B) {
|
||||
data := _TwitterJson
|
||||
root, e := NewParser(data).Parse()
|
||||
if e != 0 {
|
||||
|
|
|
|||
29
bench.py
29
bench.py
|
|
@ -28,6 +28,16 @@ def run(cmd):
|
|||
print ("Failed to run cmd: %s"%(cmd))
|
||||
exit(1)
|
||||
|
||||
def run_s(cmd):
|
||||
print (cmd)
|
||||
try:
|
||||
res = os.popen(cmd)
|
||||
except subprocess.CalledProcessError as e:
|
||||
if e.returncode:
|
||||
print (e.output)
|
||||
exit(1)
|
||||
return res.read()
|
||||
|
||||
def run_r(cmd):
|
||||
print (cmd)
|
||||
try:
|
||||
|
|
@ -41,12 +51,12 @@ def run_r(cmd):
|
|||
|
||||
def compare(args):
|
||||
# detech current branch.
|
||||
result = run_r("git branch")
|
||||
current_branch = None
|
||||
for br in result.split('\n'):
|
||||
if br.startswith("* "):
|
||||
current_branch = br.lstrip('* ')
|
||||
break
|
||||
# result = run_r("git branch")
|
||||
current_branch = run_s("git status | head -n1 | sed 's/On branch //'")
|
||||
# for br in result.split('\n'):
|
||||
# if br.startswith("* "):
|
||||
# current_branch = br.lstrip('* ')
|
||||
# break
|
||||
|
||||
if not current_branch:
|
||||
print ("Failed to detech current branch")
|
||||
|
|
@ -70,7 +80,7 @@ def compare(args):
|
|||
run("git checkout -- .")
|
||||
if current_branch != "main":
|
||||
run("git checkout main")
|
||||
run("git pull origin main")
|
||||
run("git pull --allow-unrelated-histories origin main")
|
||||
|
||||
# benchmark main branch
|
||||
(fd, main) = tempfile.mkstemp(".main.txt")
|
||||
|
|
@ -93,12 +103,17 @@ def main():
|
|||
help='Specify the filter for golang benchmark')
|
||||
argparser.add_argument('-c', '--compare', dest='compare', action='store_true', required=False,
|
||||
help='Compare with the main benchmarking')
|
||||
argparser.add_argument('-t', '--times', dest='times', required=False,
|
||||
help='benchmark the times')
|
||||
args = argparser.parse_args()
|
||||
|
||||
if args.filter:
|
||||
gbench_args = "-bench=%s"%(args.filter)
|
||||
else:
|
||||
gbench_args = "-bench=."
|
||||
|
||||
if args.times:
|
||||
gbench_args += " -benchtime=%s"%(args.times)
|
||||
|
||||
if args.compare:
|
||||
target = compare(gbench_args)
|
||||
|
|
|
|||
4
bench.sh
4
bench.sh
|
|
@ -10,9 +10,9 @@ cd $pwd/decoder
|
|||
go test -benchmem -run=^$ -benchtime=100000x -bench "^(BenchmarkDecoder_.*)$"
|
||||
|
||||
cd $pwd/ast
|
||||
go test -benchmem -run=^$ -benchtime=100000x -bench "^(BenchmarkGet.*|BenchmarkEncode.*)$"
|
||||
go test -benchmem -run=^$ -benchtime=1000000x -bench "^(BenchmarkGet.*|BenchmarkSet.*)$"
|
||||
|
||||
go test -benchmem -run=^$ -benchtime=10000x -bench "^(BenchmarkParser_.*)$"
|
||||
go test -benchmem -run=^$ -benchtime=10000x -bench "^(BenchmarkParser_.*|BenchmarkEncode.*)$"
|
||||
|
||||
go test -benchmem -run=^$ -benchtime=10000000x -bench "^(BenchmarkNodeGetByPath|BenchmarkStructGetByPath|BenchmarkNodeIndex|BenchmarkStructIndex|BenchmarkSliceIndex|BenchmarkMapIndex|BenchmarkNodeGet|BenchmarkSliceGet|BenchmarkMapGet|BenchmarkNodeSet|BenchmarkMapSet|BenchmarkNodeSetByIndex|BenchmarkSliceSetByIndex|BenchmarkStructSetByIndex|BenchmarkNodeUnset|BenchmarkMapUnset|BenchmarkNodUnsetByIndex|BenchmarkSliceUnsetByIndex|BenchmarkNodeAdd|BenchmarkSliceAdd|BenchmarkMapAdd)$"
|
||||
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -9,45 +9,45 @@ package avx
|
|||
func __native_entry__() uintptr
|
||||
|
||||
var (
|
||||
_subr__f64toa = __native_entry__() + 630
|
||||
_subr__html_escape = __native_entry__() + 8160
|
||||
_subr__i64toa = __native_entry__() + 3642
|
||||
_subr__lspace = __native_entry__() + 301
|
||||
_subr__f64toa = __native_entry__() + 570
|
||||
_subr__html_escape = __native_entry__() + 9062
|
||||
_subr__i64toa = __native_entry__() + 3205
|
||||
_subr__lspace = __native_entry__() + 251
|
||||
_subr__lzero = __native_entry__() + 13
|
||||
_subr__quote = __native_entry__() + 4955
|
||||
_subr__skip_array = __native_entry__() + 17551
|
||||
_subr__skip_number = __native_entry__() + 20669
|
||||
_subr__skip_object = __native_entry__() + 17588
|
||||
_subr__skip_one = __native_entry__() + 15699
|
||||
_subr__u64toa = __native_entry__() + 3735
|
||||
_subr__unquote = __native_entry__() + 6005
|
||||
_subr__validate_one = __native_entry__() + 20786
|
||||
_subr__value = __native_entry__() + 10880
|
||||
_subr__vnumber = __native_entry__() + 13857
|
||||
_subr__vsigned = __native_entry__() + 15171
|
||||
_subr__vstring = __native_entry__() + 12822
|
||||
_subr__vunsigned = __native_entry__() + 15430
|
||||
_subr__quote = __native_entry__() + 4498
|
||||
_subr__skip_array = __native_entry__() + 18295
|
||||
_subr__skip_number = __native_entry__() + 21246
|
||||
_subr__skip_object = __native_entry__() + 18332
|
||||
_subr__skip_one = __native_entry__() + 16448
|
||||
_subr__u64toa = __native_entry__() + 3300
|
||||
_subr__unquote = __native_entry__() + 6037
|
||||
_subr__validate_one = __native_entry__() + 21363
|
||||
_subr__value = __native_entry__() + 11651
|
||||
_subr__vnumber = __native_entry__() + 14561
|
||||
_subr__vsigned = __native_entry__() + 15866
|
||||
_subr__vstring = __native_entry__() + 13543
|
||||
_subr__vunsigned = __native_entry__() + 16146
|
||||
)
|
||||
|
||||
const (
|
||||
_stack__f64toa = 120
|
||||
_stack__html_escape = 72
|
||||
_stack__f64toa = 136
|
||||
_stack__html_escape = 64
|
||||
_stack__i64toa = 24
|
||||
_stack__lspace = 8
|
||||
_stack__lzero = 8
|
||||
_stack__quote = 80
|
||||
_stack__skip_array = 160
|
||||
_stack__skip_number = 96
|
||||
_stack__skip_object = 160
|
||||
_stack__skip_one = 160
|
||||
_stack__skip_array = 144
|
||||
_stack__skip_number = 80
|
||||
_stack__skip_object = 144
|
||||
_stack__skip_one = 144
|
||||
_stack__u64toa = 8
|
||||
_stack__unquote = 88
|
||||
_stack__validate_one = 160
|
||||
_stack__unquote = 72
|
||||
_stack__validate_one = 144
|
||||
_stack__value = 416
|
||||
_stack__vnumber = 312
|
||||
_stack__vsigned = 16
|
||||
_stack__vstring = 128
|
||||
_stack__vunsigned = 8
|
||||
_stack__vstring = 120
|
||||
_stack__vunsigned = 24
|
||||
)
|
||||
|
||||
var (
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -9,45 +9,45 @@ package avx2
|
|||
func __native_entry__() uintptr
|
||||
|
||||
var (
|
||||
_subr__f64toa = __native_entry__() + 903
|
||||
_subr__html_escape = __native_entry__() + 9535
|
||||
_subr__i64toa = __native_entry__() + 3915
|
||||
_subr__lspace = __native_entry__() + 429
|
||||
_subr__f64toa = __native_entry__() + 814
|
||||
_subr__html_escape = __native_entry__() + 10717
|
||||
_subr__i64toa = __native_entry__() + 3449
|
||||
_subr__lspace = __native_entry__() + 379
|
||||
_subr__lzero = __native_entry__() + 13
|
||||
_subr__quote = __native_entry__() + 5328
|
||||
_subr__skip_array = __native_entry__() + 21558
|
||||
_subr__skip_number = __native_entry__() + 25206
|
||||
_subr__skip_object = __native_entry__() + 21595
|
||||
_subr__skip_one = __native_entry__() + 18458
|
||||
_subr__u64toa = __native_entry__() + 4008
|
||||
_subr__unquote = __native_entry__() + 7080
|
||||
_subr__validate_one = __native_entry__() + 25323
|
||||
_subr__value = __native_entry__() + 13781
|
||||
_subr__vnumber = __native_entry__() + 16616
|
||||
_subr__vsigned = __native_entry__() + 17930
|
||||
_subr__vstring = __native_entry__() + 15739
|
||||
_subr__vunsigned = __native_entry__() + 18189
|
||||
_subr__quote = __native_entry__() + 4842
|
||||
_subr__skip_array = __native_entry__() + 21271
|
||||
_subr__skip_number = __native_entry__() + 24742
|
||||
_subr__skip_object = __native_entry__() + 21308
|
||||
_subr__skip_one = __native_entry__() + 19499
|
||||
_subr__u64toa = __native_entry__() + 3544
|
||||
_subr__unquote = __native_entry__() + 7467
|
||||
_subr__validate_one = __native_entry__() + 24859
|
||||
_subr__value = __native_entry__() + 14548
|
||||
_subr__vnumber = __native_entry__() + 17612
|
||||
_subr__vsigned = __native_entry__() + 18917
|
||||
_subr__vstring = __native_entry__() + 16708
|
||||
_subr__vunsigned = __native_entry__() + 19197
|
||||
)
|
||||
|
||||
const (
|
||||
_stack__f64toa = 120
|
||||
_stack__f64toa = 136
|
||||
_stack__html_escape = 72
|
||||
_stack__i64toa = 24
|
||||
_stack__lspace = 8
|
||||
_stack__lzero = 8
|
||||
_stack__quote = 64
|
||||
_stack__skip_array = 136
|
||||
_stack__skip_number = 96
|
||||
_stack__skip_object = 136
|
||||
_stack__skip_one = 136
|
||||
_stack__quote = 72
|
||||
_stack__skip_array = 152
|
||||
_stack__skip_number = 88
|
||||
_stack__skip_object = 152
|
||||
_stack__skip_one = 152
|
||||
_stack__u64toa = 8
|
||||
_stack__unquote = 72
|
||||
_stack__validate_one = 136
|
||||
_stack__value = 408
|
||||
_stack__validate_one = 152
|
||||
_stack__value = 416
|
||||
_stack__vnumber = 312
|
||||
_stack__vsigned = 16
|
||||
_stack__vstring = 112
|
||||
_stack__vunsigned = 8
|
||||
_stack__vunsigned = 24
|
||||
)
|
||||
|
||||
var (
|
||||
|
|
|
|||
161
native/parsing.c
161
native/parsing.c
|
|
@ -18,10 +18,10 @@
|
|||
#include <stdint.h>
|
||||
|
||||
/** String Quoting **/
|
||||
|
||||
#define MAX_ESCAPED_BYTES 8
|
||||
typedef struct {
|
||||
const long n;
|
||||
const char s[8];
|
||||
const char s[MAX_ESCAPED_BYTES];
|
||||
} quoted_t;
|
||||
|
||||
static const quoted_t _SingleQuoteTab[256] = {
|
||||
|
|
@ -250,6 +250,154 @@ static inline ssize_t memcchr_quote(const char *sp, ssize_t nb, char *dp, ssize_
|
|||
}
|
||||
}
|
||||
|
||||
static const bool _EscTab[256] = {
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x00-0x0F
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x10-0x1F
|
||||
// '"'
|
||||
0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x20-0x2F
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x30-0x3F
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x40-0x4F
|
||||
// '""
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, // 0x50-0x5F
|
||||
// 0x60-0xFF are zeroes
|
||||
};
|
||||
|
||||
static inline uint8_t escape_mask4(const char *sp) {
|
||||
return _EscTab[*(uint8_t *)(sp)] | (_EscTab[*(uint8_t *)(sp + 1)] << 1) | (_EscTab[*(uint8_t *)(sp + 2)] << 2) | (_EscTab[*(uint8_t *)(sp + 3)] << 3);
|
||||
}
|
||||
|
||||
static inline ssize_t memcchr_quote_unsafe(const char *sp, ssize_t nb, char *dp, const quoted_t * tab) {
|
||||
uint32_t mm;
|
||||
const char * ss = sp;
|
||||
const char * ds = dp;
|
||||
size_t cn = 0;
|
||||
uint8_t ch;
|
||||
|
||||
simd_copy:
|
||||
|
||||
if (nb < 16) goto scalar_copy;
|
||||
|
||||
#if USE_AVX2
|
||||
/* 32-byte loop, full store */
|
||||
while (nb >= 32) {
|
||||
__m256i vv = _mm256_loadu_si256 ((const void *)sp);
|
||||
__m256i rv = _mm256_find_quote (vv);
|
||||
_mm256_storeu_si256 ((void *)dp, vv);
|
||||
|
||||
/* check for matches */
|
||||
if ((mm = _mm256_movemask_epi8(rv)) != 0) {
|
||||
cn = __builtin_ctz(mm);
|
||||
sp += cn;
|
||||
nb -= cn;
|
||||
dp += cn;
|
||||
goto escape;
|
||||
}
|
||||
|
||||
/* move to next block */
|
||||
sp += 32;
|
||||
dp += 32;
|
||||
nb -= 32;
|
||||
}
|
||||
|
||||
/* clear upper half to avoid AVX-SSE transition penalty */
|
||||
_mm256_zeroupper();
|
||||
#endif
|
||||
|
||||
/* 16-byte loop, full store */
|
||||
while (nb >= 16) {
|
||||
__m128i vv = _mm_loadu_si128 ((const void *)sp);
|
||||
__m128i rv = _mm_find_quote (vv);
|
||||
_mm_storeu_si128 ((void *)dp, vv);
|
||||
|
||||
/* check for matches */
|
||||
if ((mm = _mm_movemask_epi8(rv)) != 0) {
|
||||
cn = __builtin_ctz(mm);
|
||||
sp += cn;
|
||||
nb -= cn;
|
||||
dp += cn;
|
||||
goto escape;
|
||||
}
|
||||
|
||||
/* move to next block */
|
||||
sp += 16;
|
||||
dp += 16;
|
||||
nb -= 16;
|
||||
}
|
||||
|
||||
/* handle the remaining bytes with scalar code */
|
||||
// while (nb > 0) {
|
||||
// if (_EscTab[*(uint8_t *)sp]) {
|
||||
// goto escape;
|
||||
// } else {
|
||||
// nb--;
|
||||
// *dp++ = *sp++;
|
||||
// }
|
||||
// }
|
||||
// optimize: loop unrolling here
|
||||
|
||||
scalar_copy:
|
||||
if (nb >= 8) {
|
||||
uint8_t mask1 = escape_mask4(sp);
|
||||
*(uint64_t *)dp = *(const uint64_t *)sp;
|
||||
if (unlikely(mask1)) {
|
||||
cn = __builtin_ctz(mask1);
|
||||
sp += cn;
|
||||
nb -= cn;
|
||||
dp += cn;
|
||||
goto escape;
|
||||
}
|
||||
uint8_t mask2 = escape_mask4(sp + 4);
|
||||
if (unlikely(mask2)) {
|
||||
cn = __builtin_ctz(mask2);
|
||||
sp += cn + 4;
|
||||
nb -= cn + 4;
|
||||
dp += cn + 4;
|
||||
goto escape;
|
||||
}
|
||||
dp += 8, sp += 8, nb -= 8;
|
||||
}
|
||||
|
||||
if (nb >= 4) {
|
||||
uint8_t mask2 = escape_mask4(sp);
|
||||
*(uint32_t *)dp = *(const uint32_t *)sp;
|
||||
if (unlikely(mask2)) {
|
||||
cn = __builtin_ctz(mask2);
|
||||
sp += cn;
|
||||
nb -= cn;
|
||||
dp += cn;
|
||||
goto escape;
|
||||
}
|
||||
dp += 4, sp += 4, nb -= 4;
|
||||
}
|
||||
|
||||
while (nb > 0) {
|
||||
if (unlikely(_EscTab[*(uint8_t *)(sp)])) goto escape;
|
||||
*dp++ = *sp++, nb--;
|
||||
}
|
||||
/* all quote done */
|
||||
return dp - ds;
|
||||
escape:
|
||||
/* get the escape entry, handle consecutive quotes */
|
||||
do {
|
||||
uint8_t ch = *(uint8_t *)sp;
|
||||
int nc = tab[ch].n;
|
||||
/* copy the quoted value.
|
||||
* Note: dp always has at least 8 bytes (MAX_ESCAPED_BYTES) here.
|
||||
* so, we not use memcpy_p8(dp, tab[ch].s, nc);
|
||||
*/
|
||||
*(uint64_t *)dp = *(const uint64_t *)tab[ch].s;
|
||||
sp++;
|
||||
nb--;
|
||||
dp += nc;
|
||||
if (nb <= 0) break;
|
||||
/* copy and find escape chars */
|
||||
if (_EscTab[*(uint8_t *)(sp)] == 0) {
|
||||
goto simd_copy;
|
||||
}
|
||||
} while (true);
|
||||
return dp - ds;
|
||||
}
|
||||
|
||||
ssize_t quote(const char *sp, ssize_t nb, char *dp, ssize_t *dn, uint64_t flags) {
|
||||
ssize_t nd = *dn;
|
||||
const char * ds = dp;
|
||||
|
|
@ -263,6 +411,11 @@ ssize_t quote(const char *sp, ssize_t nb, char *dp, ssize_t *dn, uint64_t flags)
|
|||
tab = _DoubleQuoteTab;
|
||||
}
|
||||
|
||||
if (*dn >= nb * MAX_ESCAPED_BYTES) {
|
||||
*dn = memcchr_quote_unsafe(sp, nb, dp, tab);
|
||||
return nb;
|
||||
}
|
||||
|
||||
/* find the special characters, copy on the fly */
|
||||
while (nb != 0) {
|
||||
int nc;
|
||||
|
|
@ -827,4 +980,6 @@ ssize_t html_escape(const char *sp, ssize_t nb, char *dp, ssize_t *dn) {
|
|||
/* all done */
|
||||
*dn = dp - ds;
|
||||
return sp - ss;
|
||||
}
|
||||
}
|
||||
|
||||
#undef MAX_ESCAPED_BYTES
|
||||
Loading…
Reference in a new issue