2
0
Fork 0
mirror of https://github.com/ii64/sonic.git synced 2026-06-21 00:46:43 +08:00

opt: remove bound and loop unrolling in quote (begin to use clang13) (#237)

* opt: remove most bound check in quote

* opt: remove branch in copied escaped chars

* opt: use cache-friendly bool array

* opt: use loop unrolling in quote

* feat: add `-t` for bench.py

Change-Id: I3f41218187597126ef960eab09c6fa6fe252f347

* test:(bench) adjust repeat_time

* test: add Diff on CI

* test: update bench.py

* doc: update README.md

Co-authored-by: liuqiang <liuqiang.06@bytedance.com>
Co-authored-by: duanyi.aster <duanyi.aster@bytedance.com>
This commit is contained in:
liu 2022-06-08 21:50:10 +08:00 committed by GitHub
parent 3717c62ec2
commit 410625ca9e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 11244 additions and 10867 deletions

View file

@ -4,7 +4,7 @@ on: pull_request
jobs:
build:
runs-on: self-hosted
runs-on: [self-hosted, X64]
steps:
- uses: actions/checkout@v2
@ -25,3 +25,6 @@ jobs:
- name: Benchmark
run: sh bench.sh
# - name: Diff
# run: ./bench.py -b '"^Benchmark.*Sonic"' -c

104
README.md
View file

@ -20,60 +20,60 @@ goversion: 1.17.1
goos: darwin
goarch: amd64
cpu: Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz
BenchmarkEncoder_Generic_Sonic-16 42688 ns/op 305.36 MB/s 15608 B/op 4 allocs/op
BenchmarkEncoder_Generic_Sonic_Fast-16 30043 ns/op 433.87 MB/s 14638 B/op 4 allocs/op
BenchmarkEncoder_Generic_JsonIter-16 46461 ns/op 280.56 MB/s 13433 B/op 77 allocs/op
BenchmarkEncoder_Generic_GoJson-16 73608 ns/op 177.09 MB/s 23219 B/op 16 allocs/op
BenchmarkEncoder_Generic_StdLib-16 122622 ns/op 106.30 MB/s 49137 B/op 827 allocs/op
BenchmarkEncoder_Binding_Sonic-16 8190 ns/op 1591.61 MB/s 16175 B/op 4 allocs/op
BenchmarkEncoder_Binding_Sonic_Fast-16 7365 ns/op 1769.85 MB/s 14367 B/op 4 allocs/op
BenchmarkEncoder_Binding_JsonIter-16 23326 ns/op 558.81 MB/s 9487 B/op 2 allocs/op
BenchmarkEncoder_Binding_GoJson-16 9412 ns/op 1384.93 MB/s 9480 B/op 1 allocs/op
BenchmarkEncoder_Binding_StdLib-16 18510 ns/op 704.22 MB/s 9479 B/op 1 allocs/op
BenchmarkEncoder_Parallel_Generic_Sonic-16 7716 ns/op 1689.37 MB/s 12812 B/op 4 allocs/op
BenchmarkEncoder_Parallel_Generic_Sonic_Fast-16 4791 ns/op 2720.47 MB/s 10884 B/op 4 allocs/op
BenchmarkEncoder_Parallel_Generic_JsonIter-16 10505 ns/op 1240.85 MB/s 13455 B/op 77 allocs/op
BenchmarkEncoder_Parallel_Generic_GoJson-16 24086 ns/op 541.19 MB/s 23379 B/op 17 allocs/op
BenchmarkEncoder_Parallel_Generic_StdLib-16 65697 ns/op 198.41 MB/s 49164 B/op 827 allocs/op
BenchmarkEncoder_Parallel_Binding_Sonic-16 2085 ns/op 6251.53 MB/s 12933 B/op 4 allocs/op
BenchmarkEncoder_Parallel_Binding_Sonic_Fast-16 1612 ns/op 8087.31 MB/s 11177 B/op 4 allocs/op
BenchmarkEncoder_Parallel_Binding_JsonIter-16 6169 ns/op 2112.84 MB/s 9494 B/op 2 allocs/op
BenchmarkEncoder_Parallel_Binding_GoJson-16 3492 ns/op 3733.14 MB/s 9492 B/op 1 allocs/op
BenchmarkEncoder_Parallel_Binding_StdLib-16 5170 ns/op 2521.50 MB/s 9482 B/op 1 allocs/op
BenchmarkEncoder_Generic_Sonic-16 32393 ns/op 402.40 MB/s 11965 B/op 4 allocs/op
BenchmarkEncoder_Generic_Sonic_Fast-16 21668 ns/op 601.57 MB/s 10940 B/op 4 allocs/op
BenchmarkEncoder_Generic_JsonIter-16 42168 ns/op 309.12 MB/s 14345 B/op 115 allocs/op
BenchmarkEncoder_Generic_GoJson-16 65189 ns/op 199.96 MB/s 23261 B/op 16 allocs/op
BenchmarkEncoder_Generic_StdLib-16 106322 ns/op 122.60 MB/s 49136 B/op 789 allocs/op
BenchmarkEncoder_Binding_Sonic-16 6269 ns/op 2079.26 MB/s 14173 B/op 4 allocs/op
BenchmarkEncoder_Binding_Sonic_Fast-16 5281 ns/op 2468.16 MB/s 12322 B/op 4 allocs/op
BenchmarkEncoder_Binding_JsonIter-16 20056 ns/op 649.93 MB/s 9488 B/op 2 allocs/op
BenchmarkEncoder_Binding_GoJson-16 8311 ns/op 1568.32 MB/s 9481 B/op 1 allocs/op
BenchmarkEncoder_Binding_StdLib-16 16448 ns/op 792.52 MB/s 9479 B/op 1 allocs/op
BenchmarkEncoder_Parallel_Generic_Sonic-16 6681 ns/op 1950.93 MB/s 12738 B/op 4 allocs/op
BenchmarkEncoder_Parallel_Generic_Sonic_Fast-16 4179 ns/op 3118.99 MB/s 10757 B/op 4 allocs/op
BenchmarkEncoder_Parallel_Generic_JsonIter-16 9861 ns/op 1321.84 MB/s 14362 B/op 115 allocs/op
BenchmarkEncoder_Parallel_Generic_GoJson-16 18850 ns/op 691.52 MB/s 23278 B/op 16 allocs/op
BenchmarkEncoder_Parallel_Generic_StdLib-16 45902 ns/op 283.97 MB/s 49174 B/op 789 allocs/op
BenchmarkEncoder_Parallel_Binding_Sonic-16 1480 ns/op 8810.09 MB/s 13049 B/op 4 allocs/op
BenchmarkEncoder_Parallel_Binding_Sonic_Fast-16 1209 ns/op 10785.23 MB/s 11546 B/op 4 allocs/op
BenchmarkEncoder_Parallel_Binding_JsonIter-16 6170 ns/op 2112.58 MB/s 9504 B/op 2 allocs/op
BenchmarkEncoder_Parallel_Binding_GoJson-16 3321 ns/op 3925.52 MB/s 9496 B/op 1 allocs/op
BenchmarkEncoder_Parallel_Binding_StdLib-16 3739 ns/op 3486.49 MB/s 9480 B/op 1 allocs/op
BenchmarkDecoder_Generic_Sonic-16 71589 ns/op 182.08 MB/s 57531 B/op 723 allocs/op
BenchmarkDecoder_Generic_Sonic_Fast-16 57653 ns/op 226.10 MB/s 49743 B/op 313 allocs/op
BenchmarkDecoder_Generic_StdLib-16 143584 ns/op 90.78 MB/s 50870 B/op 772 allocs/op
BenchmarkDecoder_Generic_JsonIter-16 94775 ns/op 137.54 MB/s 55783 B/op 1068 allocs/op
BenchmarkDecoder_Generic_GoJson-16 88647 ns/op 147.04 MB/s 66371 B/op 973 allocs/op
BenchmarkDecoder_Binding_Sonic-16 32399 ns/op 402.33 MB/s 27814 B/op 137 allocs/op
BenchmarkDecoder_Binding_Sonic_Fast-16 28655 ns/op 454.89 MB/s 25127 B/op 34 allocs/op
BenchmarkDecoder_Binding_StdLib-16 116617 ns/op 111.78 MB/s 7344 B/op 103 allocs/op
BenchmarkDecoder_Binding_JsonIter-16 36206 ns/op 360.02 MB/s 14673 B/op 385 allocs/op
BenchmarkDecoder_Binding_GoJson-16 29396 ns/op 443.43 MB/s 22042 B/op 49 allocs/op
BenchmarkDecoder_Parallel_Generic_Sonic-16 12243 ns/op 1064.68 MB/s 57135 B/op 723 allocs/op
BenchmarkDecoder_Parallel_Generic_Sonic_Fast-16 10101 ns/op 1290.48 MB/s 49440 B/op 313 allocs/op
BenchmarkDecoder_Parallel_Generic_StdLib-16 57352 ns/op 227.28 MB/s 50877 B/op 772 allocs/op
BenchmarkDecoder_Parallel_Generic_JsonIter-16 58693 ns/op 222.09 MB/s 55814 B/op 1068 allocs/op
BenchmarkDecoder_Parallel_Generic_GoJson-16 45245 ns/op 288.10 MB/s 66430 B/op 974 allocs/op
BenchmarkDecoder_Parallel_Binding_Sonic-16 7035 ns/op 1852.89 MB/s 27731 B/op 137 allocs/op
BenchmarkDecoder_Parallel_Binding_Sonic_Fast-16 6510 ns/op 2002.33 MB/s 24841 B/op 34 allocs/op
BenchmarkDecoder_Parallel_Binding_StdLib-16 33086 ns/op 393.97 MB/s 7344 B/op 103 allocs/op
BenchmarkDecoder_Parallel_Binding_JsonIter-16 17827 ns/op 731.18 MB/s 14680 B/op 385 allocs/op
BenchmarkDecoder_Parallel_Binding_GoJson-16 16813 ns/op 775.29 MB/s 22268 B/op 49 allocs/op
BenchmarkDecoder_Generic_Sonic-16 66812 ns/op 195.10 MB/s 57602 B/op 723 allocs/op
BenchmarkDecoder_Generic_Sonic_Fast-16 54523 ns/op 239.07 MB/s 49786 B/op 313 allocs/op
BenchmarkDecoder_Generic_StdLib-16 124260 ns/op 104.90 MB/s 50869 B/op 772 allocs/op
BenchmarkDecoder_Generic_JsonIter-16 91274 ns/op 142.81 MB/s 55782 B/op 1068 allocs/op
BenchmarkDecoder_Generic_GoJson-16 88569 ns/op 147.17 MB/s 66367 B/op 973 allocs/op
BenchmarkDecoder_Binding_Sonic-16 32557 ns/op 400.38 MB/s 28302 B/op 137 allocs/op
BenchmarkDecoder_Binding_Sonic_Fast-16 28649 ns/op 455.00 MB/s 24999 B/op 34 allocs/op
BenchmarkDecoder_Binding_StdLib-16 111437 ns/op 116.97 MB/s 10576 B/op 208 allocs/op
BenchmarkDecoder_Binding_JsonIter-16 35090 ns/op 371.48 MB/s 14673 B/op 385 allocs/op
BenchmarkDecoder_Binding_GoJson-16 28738 ns/op 453.59 MB/s 22039 B/op 49 allocs/op
BenchmarkDecoder_Parallel_Generic_Sonic-16 12321 ns/op 1057.91 MB/s 57233 B/op 723 allocs/op
BenchmarkDecoder_Parallel_Generic_Sonic_Fast-16 10644 ns/op 1224.64 MB/s 49362 B/op 313 allocs/op
BenchmarkDecoder_Parallel_Generic_StdLib-16 57587 ns/op 226.35 MB/s 50874 B/op 772 allocs/op
BenchmarkDecoder_Parallel_Generic_JsonIter-16 38666 ns/op 337.12 MB/s 55789 B/op 1068 allocs/op
BenchmarkDecoder_Parallel_Generic_GoJson-16 30259 ns/op 430.79 MB/s 66370 B/op 974 allocs/op
BenchmarkDecoder_Parallel_Binding_Sonic-16 5965 ns/op 2185.28 MB/s 27747 B/op 137 allocs/op
BenchmarkDecoder_Parallel_Binding_Sonic_Fast-16 5170 ns/op 2521.31 MB/s 24715 B/op 34 allocs/op
BenchmarkDecoder_Parallel_Binding_StdLib-16 27582 ns/op 472.58 MB/s 10576 B/op 208 allocs/op
BenchmarkDecoder_Parallel_Binding_JsonIter-16 13571 ns/op 960.51 MB/s 14685 B/op 385 allocs/op
BenchmarkDecoder_Parallel_Binding_GoJson-16 10031 ns/op 1299.51 MB/s 22111 B/op 49 allocs/op
BenchmarkGetOne_Sonic-16 11328 ns/op 1149.64 MB/s 29 B/op 1 allocs/op
BenchmarkGetOne_Gjson-16 12970 ns/op 1004.07 MB/s 0 B/op 0 allocs/op
BenchmarkGetOne_Jsoniter-16 59928 ns/op 217.31 MB/s 27936 B/op 647 allocs/op
BenchmarkGetOne_Parallel_Sonic-16 1447 ns/op 9002.23 MB/s 114 B/op 1 allocs/op
BenchmarkGetOne_Parallel_Gjson-16 1171 ns/op 11125.73 MB/s 0 B/op 0 allocs/op
BenchmarkGetOne_Parallel_Jsoniter-16 15545 ns/op 837.75 MB/s 27940 B/op 647 allocs/op
BenchmarkSetOne_Sonic-16 16922 ns/op 769.57 MB/s 1936 B/op 17 allocs/op
BenchmarkSetOne_Sjson-16 42683 ns/op 305.11 MB/s 52181 B/op 9 allocs/op
BenchmarkSetOne_Jsoniter-16 91104 ns/op 142.95 MB/s 45861 B/op 964 allocs/op
BenchmarkSetOne_Parallel_Sonic-16 2065 ns/op 6305.03 MB/s 2383 B/op 17 allocs/op
BenchmarkSetOne_Parallel_Sjson-16 11526 ns/op 1129.87 MB/s 52175 B/op 9 allocs/op
BenchmarkSetOne_Parallel_Jsoniter-16 35044 ns/op 371.61 MB/s 45887 B/op 964 allocs/op
BenchmarkGetOne_Sonic-16 11650 ns/op 1117.81 MB/s 29 B/op 1 allocs/op
BenchmarkGetOne_Gjson-16 9431 ns/op 1380.81 MB/s 0 B/op 0 allocs/op
BenchmarkGetOne_Jsoniter-16 51178 ns/op 254.46 MB/s 27936 B/op 647 allocs/op
BenchmarkGetOne_Parallel_Sonic-16 1955 ns/op 6659.94 MB/s 125 B/op 1 allocs/op
BenchmarkGetOne_Parallel_Gjson-16 1076 ns/op 12098.62 MB/s 0 B/op 0 allocs/op
BenchmarkGetOne_Parallel_Jsoniter-16 17741 ns/op 734.06 MB/s 27945 B/op 647 allocs/op
BenchmarkSetOne_Sonic-16 16124 ns/op 807.70 MB/s 1787 B/op 17 allocs/op
BenchmarkSetOne_Sjson-16 36456 ns/op 357.22 MB/s 52180 B/op 9 allocs/op
BenchmarkSetOne_Jsoniter-16 79475 ns/op 163.86 MB/s 45862 B/op 964 allocs/op
BenchmarkSetOne_Parallel_Sonic-16 2383 ns/op 5465.02 MB/s 2186 B/op 17 allocs/op
BenchmarkSetOne_Parallel_Sjson-16 18194 ns/op 715.77 MB/s 52247 B/op 9 allocs/op
BenchmarkSetOne_Parallel_Jsoniter-16 33560 ns/op 388.05 MB/s 45892 B/op 964 allocs/op
```
- [Small](https://github.com/bytedance/sonic/blob/main/testdata/small.go) (400B, 11 keys, 3 layers)
![small benchmarks](bench-small.jpg)

View file

@ -144,7 +144,7 @@ func TestEncodeNode(t *testing.T) {
}
}
func BenchmarkEncodeRaw(b *testing.B) {
func BenchmarkEncodeRaw_Sonic(b *testing.B) {
data := _TwitterJson
root, e := NewSearcher(data).GetByPath()
if e != nil {
@ -164,7 +164,7 @@ func BenchmarkEncodeRaw(b *testing.B) {
}
}
func BenchmarkEncodeSkip(b *testing.B) {
func BenchmarkEncodeSkip_Sonic(b *testing.B) {
data := _TwitterJson
root, e := NewParser(data).Parse()
if e != 0 {
@ -185,7 +185,7 @@ func BenchmarkEncodeSkip(b *testing.B) {
}
}
func BenchmarkEncodeLoad(b *testing.B) {
func BenchmarkEncodeLoad_Sonic(b *testing.B) {
data := _TwitterJson
root, e := NewParser(data).Parse()
if e != 0 {

View file

@ -28,6 +28,16 @@ def run(cmd):
print ("Failed to run cmd: %s"%(cmd))
exit(1)
def run_s(cmd):
print (cmd)
try:
res = os.popen(cmd)
except subprocess.CalledProcessError as e:
if e.returncode:
print (e.output)
exit(1)
return res.read()
def run_r(cmd):
print (cmd)
try:
@ -41,12 +51,12 @@ def run_r(cmd):
def compare(args):
# detech current branch.
result = run_r("git branch")
current_branch = None
for br in result.split('\n'):
if br.startswith("* "):
current_branch = br.lstrip('* ')
break
# result = run_r("git branch")
current_branch = run_s("git status | head -n1 | sed 's/On branch //'")
# for br in result.split('\n'):
# if br.startswith("* "):
# current_branch = br.lstrip('* ')
# break
if not current_branch:
print ("Failed to detech current branch")
@ -70,7 +80,7 @@ def compare(args):
run("git checkout -- .")
if current_branch != "main":
run("git checkout main")
run("git pull origin main")
run("git pull --allow-unrelated-histories origin main")
# benchmark main branch
(fd, main) = tempfile.mkstemp(".main.txt")
@ -93,12 +103,17 @@ def main():
help='Specify the filter for golang benchmark')
argparser.add_argument('-c', '--compare', dest='compare', action='store_true', required=False,
help='Compare with the main benchmarking')
argparser.add_argument('-t', '--times', dest='times', required=False,
help='benchmark the times')
args = argparser.parse_args()
if args.filter:
gbench_args = "-bench=%s"%(args.filter)
else:
gbench_args = "-bench=."
if args.times:
gbench_args += " -benchtime=%s"%(args.times)
if args.compare:
target = compare(gbench_args)

View file

@ -10,9 +10,9 @@ cd $pwd/decoder
go test -benchmem -run=^$ -benchtime=100000x -bench "^(BenchmarkDecoder_.*)$"
cd $pwd/ast
go test -benchmem -run=^$ -benchtime=100000x -bench "^(BenchmarkGet.*|BenchmarkEncode.*)$"
go test -benchmem -run=^$ -benchtime=1000000x -bench "^(BenchmarkGet.*|BenchmarkSet.*)$"
go test -benchmem -run=^$ -benchtime=10000x -bench "^(BenchmarkParser_.*)$"
go test -benchmem -run=^$ -benchtime=10000x -bench "^(BenchmarkParser_.*|BenchmarkEncode.*)$"
go test -benchmem -run=^$ -benchtime=10000000x -bench "^(BenchmarkNodeGetByPath|BenchmarkStructGetByPath|BenchmarkNodeIndex|BenchmarkStructIndex|BenchmarkSliceIndex|BenchmarkMapIndex|BenchmarkNodeGet|BenchmarkSliceGet|BenchmarkMapGet|BenchmarkNodeSet|BenchmarkMapSet|BenchmarkNodeSetByIndex|BenchmarkSliceSetByIndex|BenchmarkStructSetByIndex|BenchmarkNodeUnset|BenchmarkMapUnset|BenchmarkNodUnsetByIndex|BenchmarkSliceUnsetByIndex|BenchmarkNodeAdd|BenchmarkSliceAdd|BenchmarkMapAdd)$"

File diff suppressed because it is too large Load diff

View file

@ -9,45 +9,45 @@ package avx
func __native_entry__() uintptr
var (
_subr__f64toa = __native_entry__() + 630
_subr__html_escape = __native_entry__() + 8160
_subr__i64toa = __native_entry__() + 3642
_subr__lspace = __native_entry__() + 301
_subr__f64toa = __native_entry__() + 570
_subr__html_escape = __native_entry__() + 9062
_subr__i64toa = __native_entry__() + 3205
_subr__lspace = __native_entry__() + 251
_subr__lzero = __native_entry__() + 13
_subr__quote = __native_entry__() + 4955
_subr__skip_array = __native_entry__() + 17551
_subr__skip_number = __native_entry__() + 20669
_subr__skip_object = __native_entry__() + 17588
_subr__skip_one = __native_entry__() + 15699
_subr__u64toa = __native_entry__() + 3735
_subr__unquote = __native_entry__() + 6005
_subr__validate_one = __native_entry__() + 20786
_subr__value = __native_entry__() + 10880
_subr__vnumber = __native_entry__() + 13857
_subr__vsigned = __native_entry__() + 15171
_subr__vstring = __native_entry__() + 12822
_subr__vunsigned = __native_entry__() + 15430
_subr__quote = __native_entry__() + 4498
_subr__skip_array = __native_entry__() + 18295
_subr__skip_number = __native_entry__() + 21246
_subr__skip_object = __native_entry__() + 18332
_subr__skip_one = __native_entry__() + 16448
_subr__u64toa = __native_entry__() + 3300
_subr__unquote = __native_entry__() + 6037
_subr__validate_one = __native_entry__() + 21363
_subr__value = __native_entry__() + 11651
_subr__vnumber = __native_entry__() + 14561
_subr__vsigned = __native_entry__() + 15866
_subr__vstring = __native_entry__() + 13543
_subr__vunsigned = __native_entry__() + 16146
)
const (
_stack__f64toa = 120
_stack__html_escape = 72
_stack__f64toa = 136
_stack__html_escape = 64
_stack__i64toa = 24
_stack__lspace = 8
_stack__lzero = 8
_stack__quote = 80
_stack__skip_array = 160
_stack__skip_number = 96
_stack__skip_object = 160
_stack__skip_one = 160
_stack__skip_array = 144
_stack__skip_number = 80
_stack__skip_object = 144
_stack__skip_one = 144
_stack__u64toa = 8
_stack__unquote = 88
_stack__validate_one = 160
_stack__unquote = 72
_stack__validate_one = 144
_stack__value = 416
_stack__vnumber = 312
_stack__vsigned = 16
_stack__vstring = 128
_stack__vunsigned = 8
_stack__vstring = 120
_stack__vunsigned = 24
)
var (

File diff suppressed because it is too large Load diff

View file

@ -9,45 +9,45 @@ package avx2
func __native_entry__() uintptr
var (
_subr__f64toa = __native_entry__() + 903
_subr__html_escape = __native_entry__() + 9535
_subr__i64toa = __native_entry__() + 3915
_subr__lspace = __native_entry__() + 429
_subr__f64toa = __native_entry__() + 814
_subr__html_escape = __native_entry__() + 10717
_subr__i64toa = __native_entry__() + 3449
_subr__lspace = __native_entry__() + 379
_subr__lzero = __native_entry__() + 13
_subr__quote = __native_entry__() + 5328
_subr__skip_array = __native_entry__() + 21558
_subr__skip_number = __native_entry__() + 25206
_subr__skip_object = __native_entry__() + 21595
_subr__skip_one = __native_entry__() + 18458
_subr__u64toa = __native_entry__() + 4008
_subr__unquote = __native_entry__() + 7080
_subr__validate_one = __native_entry__() + 25323
_subr__value = __native_entry__() + 13781
_subr__vnumber = __native_entry__() + 16616
_subr__vsigned = __native_entry__() + 17930
_subr__vstring = __native_entry__() + 15739
_subr__vunsigned = __native_entry__() + 18189
_subr__quote = __native_entry__() + 4842
_subr__skip_array = __native_entry__() + 21271
_subr__skip_number = __native_entry__() + 24742
_subr__skip_object = __native_entry__() + 21308
_subr__skip_one = __native_entry__() + 19499
_subr__u64toa = __native_entry__() + 3544
_subr__unquote = __native_entry__() + 7467
_subr__validate_one = __native_entry__() + 24859
_subr__value = __native_entry__() + 14548
_subr__vnumber = __native_entry__() + 17612
_subr__vsigned = __native_entry__() + 18917
_subr__vstring = __native_entry__() + 16708
_subr__vunsigned = __native_entry__() + 19197
)
const (
_stack__f64toa = 120
_stack__f64toa = 136
_stack__html_escape = 72
_stack__i64toa = 24
_stack__lspace = 8
_stack__lzero = 8
_stack__quote = 64
_stack__skip_array = 136
_stack__skip_number = 96
_stack__skip_object = 136
_stack__skip_one = 136
_stack__quote = 72
_stack__skip_array = 152
_stack__skip_number = 88
_stack__skip_object = 152
_stack__skip_one = 152
_stack__u64toa = 8
_stack__unquote = 72
_stack__validate_one = 136
_stack__value = 408
_stack__validate_one = 152
_stack__value = 416
_stack__vnumber = 312
_stack__vsigned = 16
_stack__vstring = 112
_stack__vunsigned = 8
_stack__vunsigned = 24
)
var (

View file

@ -18,10 +18,10 @@
#include <stdint.h>
/** String Quoting **/
#define MAX_ESCAPED_BYTES 8
typedef struct {
const long n;
const char s[8];
const char s[MAX_ESCAPED_BYTES];
} quoted_t;
static const quoted_t _SingleQuoteTab[256] = {
@ -250,6 +250,154 @@ static inline ssize_t memcchr_quote(const char *sp, ssize_t nb, char *dp, ssize_
}
}
static const bool _EscTab[256] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x00-0x0F
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x10-0x1F
// '"'
0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x20-0x2F
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x30-0x3F
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x40-0x4F
// '""
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, // 0x50-0x5F
// 0x60-0xFF are zeroes
};
static inline uint8_t escape_mask4(const char *sp) {
return _EscTab[*(uint8_t *)(sp)] | (_EscTab[*(uint8_t *)(sp + 1)] << 1) | (_EscTab[*(uint8_t *)(sp + 2)] << 2) | (_EscTab[*(uint8_t *)(sp + 3)] << 3);
}
static inline ssize_t memcchr_quote_unsafe(const char *sp, ssize_t nb, char *dp, const quoted_t * tab) {
uint32_t mm;
const char * ss = sp;
const char * ds = dp;
size_t cn = 0;
uint8_t ch;
simd_copy:
if (nb < 16) goto scalar_copy;
#if USE_AVX2
/* 32-byte loop, full store */
while (nb >= 32) {
__m256i vv = _mm256_loadu_si256 ((const void *)sp);
__m256i rv = _mm256_find_quote (vv);
_mm256_storeu_si256 ((void *)dp, vv);
/* check for matches */
if ((mm = _mm256_movemask_epi8(rv)) != 0) {
cn = __builtin_ctz(mm);
sp += cn;
nb -= cn;
dp += cn;
goto escape;
}
/* move to next block */
sp += 32;
dp += 32;
nb -= 32;
}
/* clear upper half to avoid AVX-SSE transition penalty */
_mm256_zeroupper();
#endif
/* 16-byte loop, full store */
while (nb >= 16) {
__m128i vv = _mm_loadu_si128 ((const void *)sp);
__m128i rv = _mm_find_quote (vv);
_mm_storeu_si128 ((void *)dp, vv);
/* check for matches */
if ((mm = _mm_movemask_epi8(rv)) != 0) {
cn = __builtin_ctz(mm);
sp += cn;
nb -= cn;
dp += cn;
goto escape;
}
/* move to next block */
sp += 16;
dp += 16;
nb -= 16;
}
/* handle the remaining bytes with scalar code */
// while (nb > 0) {
// if (_EscTab[*(uint8_t *)sp]) {
// goto escape;
// } else {
// nb--;
// *dp++ = *sp++;
// }
// }
// optimize: loop unrolling here
scalar_copy:
if (nb >= 8) {
uint8_t mask1 = escape_mask4(sp);
*(uint64_t *)dp = *(const uint64_t *)sp;
if (unlikely(mask1)) {
cn = __builtin_ctz(mask1);
sp += cn;
nb -= cn;
dp += cn;
goto escape;
}
uint8_t mask2 = escape_mask4(sp + 4);
if (unlikely(mask2)) {
cn = __builtin_ctz(mask2);
sp += cn + 4;
nb -= cn + 4;
dp += cn + 4;
goto escape;
}
dp += 8, sp += 8, nb -= 8;
}
if (nb >= 4) {
uint8_t mask2 = escape_mask4(sp);
*(uint32_t *)dp = *(const uint32_t *)sp;
if (unlikely(mask2)) {
cn = __builtin_ctz(mask2);
sp += cn;
nb -= cn;
dp += cn;
goto escape;
}
dp += 4, sp += 4, nb -= 4;
}
while (nb > 0) {
if (unlikely(_EscTab[*(uint8_t *)(sp)])) goto escape;
*dp++ = *sp++, nb--;
}
/* all quote done */
return dp - ds;
escape:
/* get the escape entry, handle consecutive quotes */
do {
uint8_t ch = *(uint8_t *)sp;
int nc = tab[ch].n;
/* copy the quoted value.
* Note: dp always has at least 8 bytes (MAX_ESCAPED_BYTES) here.
* so, we not use memcpy_p8(dp, tab[ch].s, nc);
*/
*(uint64_t *)dp = *(const uint64_t *)tab[ch].s;
sp++;
nb--;
dp += nc;
if (nb <= 0) break;
/* copy and find escape chars */
if (_EscTab[*(uint8_t *)(sp)] == 0) {
goto simd_copy;
}
} while (true);
return dp - ds;
}
ssize_t quote(const char *sp, ssize_t nb, char *dp, ssize_t *dn, uint64_t flags) {
ssize_t nd = *dn;
const char * ds = dp;
@ -263,6 +411,11 @@ ssize_t quote(const char *sp, ssize_t nb, char *dp, ssize_t *dn, uint64_t flags)
tab = _DoubleQuoteTab;
}
if (*dn >= nb * MAX_ESCAPED_BYTES) {
*dn = memcchr_quote_unsafe(sp, nb, dp, tab);
return nb;
}
/* find the special characters, copy on the fly */
while (nb != 0) {
int nc;
@ -827,4 +980,6 @@ ssize_t html_escape(const char *sp, ssize_t nb, char *dp, ssize_t *dn) {
/* all done */
*dn = dp - ds;
return sp - ss;
}
}
#undef MAX_ESCAPED_BYTES