From 43e4a00e9014e68313aca33e03d3d1e30a5f5f7a Mon Sep 17 00:00:00 2001 From: Yi Duan Date: Wed, 17 Nov 2021 11:21:20 +0800 Subject: [PATCH] fix: add gc_write_barrier checks (#131) * fix: add gcWriteBarrier for decode * add gcWriteBarrier for encoder Co-authored-by: duanyi.aster --- README.md | 48 +++++++++--------- ast/encode_test.go | 3 ++ ast/parser_test.go | 22 ++++++-- ast/search_test.go | 3 ++ bench.sh | 2 + decoder/assembler_amd64.go | 100 ++++++++++++++++++++++++++++++------- decoder/compiler_test.go | 2 +- decoder/debug.go | 12 +++-- decoder/decoder_test.go | 12 +++-- decoder/generic_amd64.go | 80 ++++++++++++++++++++++------- decoder/pools.go | 2 +- decoder/stubs.go | 13 +++++ encode_test.go | 17 +++++-- encoder/assembler_amd64.go | 52 ++++++++++++++----- encoder/debug.go | 9 ++-- encoder/encoder_test.go | 16 ++++-- encoder/stubs.go | 14 ++++++ 17 files changed, 307 insertions(+), 100 deletions(-) diff --git a/README.md b/README.md index 63c4901..4be3727 100644 --- a/README.md +++ b/README.md @@ -26,31 +26,31 @@ For **all sizes** of json and **all cases** of usage, **Sonic performs best**. goos: darwin goarch: amd64 cpu: Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz -BenchmarkEncoder_Generic_Sonic-16 100000 25911 ns/op 503.06 MB/s 13542 B/op 4 allocs/op -BenchmarkEncoder_Generic_JsonIter-16 100000 46693 ns/op 279.16 MB/s 13434 B/op 77 allocs/op -BenchmarkEncoder_Generic_StdLib-16 100000 143080 ns/op 91.10 MB/s 48177 B/op 827 allocs/op -BenchmarkEncoder_Binding_Sonic-16 100000 6851 ns/op 1902.68 MB/s 14229 B/op 4 allocs/op -BenchmarkEncoder_Binding_JsonIter-16 100000 22264 ns/op 585.49 MB/s 9488 B/op 2 allocs/op -BenchmarkEncoder_Binding_StdLib-16 100000 18685 ns/op 697.61 MB/s 9479 B/op 1 allocs/op -BenchmarkEncoder_Parallel_Generic_Sonic-16 100000 4981 ns/op 2617.14 MB/s 10747 B/op 4 allocs/op -BenchmarkEncoder_Parallel_Generic_JsonIter-16 100000 11225 ns/op 1161.24 MB/s 13447 B/op 77 allocs/op -BenchmarkEncoder_Parallel_Generic_StdLib-16 100000 55846 ns/op 233.41 MB/s 48215 B/op 827 allocs/op -BenchmarkEncoder_Parallel_Binding_Sonic-16 100000 1767 ns/op 7375.09 MB/s 11514 B/op 4 allocs/op -BenchmarkEncoder_Parallel_Binding_JsonIter-16 100000 4904 ns/op 2657.84 MB/s 9487 B/op 2 allocs/op -BenchmarkEncoder_Parallel_Binding_StdLib-16 100000 3958 ns/op 3293.18 MB/s 9477 B/op 1 allocs/op +BenchmarkEncoder_Generic_Sonic-16 100000 25547 ns/op 510.23 MB/s 13762 B/op 4 allocs/op +BenchmarkEncoder_Generic_JsonIter-16 100000 44526 ns/op 292.75 MB/s 13433 B/op 77 allocs/op +BenchmarkEncoder_Generic_StdLib-16 100000 134480 ns/op 96.93 MB/s 48177 B/op 827 allocs/op +BenchmarkEncoder_Binding_Sonic-16 100000 6658 ns/op 1957.74 MB/s 14156 B/op 4 allocs/op +BenchmarkEncoder_Binding_JsonIter-16 100000 21367 ns/op 610.05 MB/s 9487 B/op 2 allocs/op +BenchmarkEncoder_Binding_StdLib-16 100000 17558 ns/op 742.41 MB/s 9480 B/op 1 allocs/op +BenchmarkEncoder_Parallel_Generic_Sonic-16 100000 4562 ns/op 2857.18 MB/s 10957 B/op 4 allocs/op +BenchmarkEncoder_Parallel_Generic_JsonIter-16 100000 10943 ns/op 1191.21 MB/s 13449 B/op 77 allocs/op +BenchmarkEncoder_Parallel_Generic_StdLib-16 100000 52174 ns/op 249.84 MB/s 48218 B/op 827 allocs/op +BenchmarkEncoder_Parallel_Binding_Sonic-16 100000 1422 ns/op 9168.12 MB/s 11030 B/op 4 allocs/op +BenchmarkEncoder_Parallel_Binding_JsonIter-16 100000 4630 ns/op 2815.35 MB/s 9496 B/op 2 allocs/op +BenchmarkEncoder_Parallel_Binding_StdLib-16 100000 4977 ns/op 2619.08 MB/s 9488 B/op 1 allocs/op -BenchmarkDecoder_Generic_Sonic-16 100000 55680 ns/op 234.11 MB/s 49755 B/op 313 allocs/op -BenchmarkDecoder_Generic_StdLib-16 100000 144991 ns/op 89.90 MB/s 50897 B/op 772 allocs/op -BenchmarkDecoder_Generic_JsonIter-16 100000 103197 ns/op 126.31 MB/s 55786 B/op 1068 allocs/op -BenchmarkDecoder_Binding_Sonic-16 100000 28399 ns/op 458.99 MB/s 24984 B/op 34 allocs/op -BenchmarkDecoder_Binding_StdLib-16 100000 132178 ns/op 98.62 MB/s 10560 B/op 207 allocs/op -BenchmarkDecoder_Binding_JsonIter-16 100000 39963 ns/op 326.18 MB/s 14674 B/op 385 allocs/op -BenchmarkDecoder_Parallel_Generic_Sonic-16 100000 10999 ns/op 1185.11 MB/s 49658 B/op 313 allocs/op -BenchmarkDecoder_Parallel_Generic_StdLib-16 100000 67083 ns/op 194.31 MB/s 50907 B/op 772 allocs/op -BenchmarkDecoder_Parallel_Generic_JsonIter-16 100000 54292 ns/op 240.09 MB/s 55809 B/op 1068 allocs/op -BenchmarkDecoder_Parallel_Binding_Sonic-16 100000 5699 ns/op 2287.37 MB/s 24968 B/op 34 allocs/op -BenchmarkDecoder_Parallel_Binding_StdLib-16 100000 35801 ns/op 364.09 MB/s 10559 B/op 207 allocs/op -BenchmarkDecoder_Parallel_Binding_JsonIter-16 100000 13783 ns/op 945.74 MB/s 14678 B/op 385 allocs/op +BenchmarkDecoder_Generic_Sonic-16 100000 57247 ns/op 227.70 MB/s 49727 B/op 313 allocs/op +BenchmarkDecoder_Generic_StdLib-16 100000 139698 ns/op 93.31 MB/s 50898 B/op 772 allocs/op +BenchmarkDecoder_Generic_JsonIter-16 100000 101967 ns/op 127.84 MB/s 55787 B/op 1068 allocs/op +BenchmarkDecoder_Binding_Sonic-16 100000 28254 ns/op 461.35 MB/s 25062 B/op 34 allocs/op +BenchmarkDecoder_Binding_StdLib-16 100000 123779 ns/op 105.31 MB/s 10560 B/op 207 allocs/op +BenchmarkDecoder_Binding_JsonIter-16 100000 38253 ns/op 340.75 MB/s 14674 B/op 385 allocs/op +BenchmarkDecoder_Parallel_Generic_Sonic-16 100000 10171 ns/op 1281.59 MB/s 49458 B/op 313 allocs/op +BenchmarkDecoder_Parallel_Generic_StdLib-16 100000 54916 ns/op 237.36 MB/s 50907 B/op 772 allocs/op +BenchmarkDecoder_Parallel_Generic_JsonIter-16 100000 48286 ns/op 269.95 MB/s 55811 B/op 1068 allocs/op +BenchmarkDecoder_Parallel_Binding_Sonic-16 100000 5282 ns/op 2467.83 MB/s 24683 B/op 34 allocs/op +BenchmarkDecoder_Parallel_Binding_StdLib-16 100000 31875 ns/op 408.94 MB/s 10559 B/op 207 allocs/op +BenchmarkDecoder_Parallel_Binding_JsonIter-16 100000 13810 ns/op 943.90 MB/s 14679 B/op 385 allocs/op BenchmarkSearchOne_Gjson-16 100000 8992 ns/op 1448.28 MB/s 0 B/op 0 allocs/op BenchmarkSearchOne_Jsoniter-16 100000 58313 ns/op 223.33 MB/s 27936 B/op 647 allocs/op diff --git a/ast/encode_test.go b/ast/encode_test.go index 614a9d4..3c9302d 100644 --- a/ast/encode_test.go +++ b/ast/encode_test.go @@ -27,6 +27,9 @@ import ( ) func TestGC_Encode(t *testing.T) { + if debugSyncGC { + return + } root, err := NewSearcher(_TwitterJson).GetByPath() if err != nil { t.Fatal(err) diff --git a/ast/parser_test.go b/ast/parser_test.go index 13bf6ac..2d5ddf9 100644 --- a/ast/parser_test.go +++ b/ast/parser_test.go @@ -17,6 +17,7 @@ package ast import ( + `os` `encoding/json` `testing` `runtime` @@ -28,19 +29,30 @@ import ( `github.com/tidwall/gjson` ) +var ( + debugSyncGC = os.Getenv("SONIC_SYNC_GC") != "" + debugAsyncGC = os.Getenv("SONIC_NO_ASYNC_GC") == "" +) + func TestMain(m *testing.M) { go func () { + if !debugAsyncGC { + return + } println("Begin GC looping...") - for { - runtime.GC() - debug.FreeOSMemory() - } - println("stop GC looping!") + for { + runtime.GC() + debug.FreeOSMemory() + } + println("stop GC looping!") }() m.Run() } func TestGC_Parse(t *testing.T) { + if debugSyncGC { + return + } _, _, err := Loads(_TwitterJson) if err != nil { t.Fatal(err) diff --git a/ast/search_test.go b/ast/search_test.go index 6eabecc..4fa7306 100644 --- a/ast/search_test.go +++ b/ast/search_test.go @@ -29,6 +29,9 @@ import ( func TestGC_Search(t *testing.T) { + if debugSyncGC { + return + } _, err := NewSearcher(_TwitterJson).GetByPath("statuses", 0, "id") if err != nil { t.Fatal(err) diff --git a/bench.sh b/bench.sh index e90e51c..36b692b 100644 --- a/bench.sh +++ b/bench.sh @@ -1,6 +1,7 @@ #!/usr/bin/env bash pwd=$(pwd) +export SONIC_NO_ASYNC_GC=1 cd $pwd/encoder go test -benchmem -run=^$ -benchtime=100000x -bench "^(BenchmarkEncoder_Generic_Sonic|BenchmarkEncoder_Generic_StdLib|BenchmarkEncoder_Generic_JsonIter|BenchmarkEncoder_Generic_GoJson|BenchmarkEncoder_Binding_Sonic|BenchmarkEncoder_Binding_StdLib|BenchmarkEncoder_Binding_JsonIter|BenchmarkEncoder_Binding_GoJson|BenchmarkEncoder_Parallel_Generic_Sonic|BenchmarkEncoder_Parallel_Generic_StdLib|BenchmarkEncoder_Parallel_Generic_JsonIter|BenchmarkEncoder_Parallel_Generic_GoJson|BenchmarkEncoder_Parallel_Binding_Sonic|BenchmarkEncoder_Parallel_Binding_StdLib|BenchmarkEncoder_Parallel_Binding_JsonIter|BenchmarkEncoder_Parallel_Binding_GoJson)$" @@ -15,4 +16,5 @@ go test -benchmem -run=^$ -benchtime=100000x -bench '^(BenchmarkEncodeRaw|Benchm go test -benchmem -run=^$ -benchtime=10000000x -bench "^(BenchmarkNodeGetByPath|BenchmarkStructGetByPath_Jsoniter|BenchmarkNodeIndex|BenchmarkStructIndex|BenchmarkSliceIndex|BenchmarkMapIndex|BenchmarkNodeGet|BenchmarkSliceGet|BenchmarkMapGet|BenchmarkNodeSet|BenchmarkMapSet|BenchmarkNodeSetByIndex|BenchmarkSliceSetByIndex|BenchmarkStructSetByIndex|BenchmarkNodeUnset|BenchmarkMapUnset|BenchmarkNodUnsetByIndex|BenchmarkSliceUnsetByIndex|BenchmarkNodeAdd|BenchmarkSliceAdd|BenchmarkMapAdd)$" +unset SONIC_NO_ASYNC_GC cd $pwd \ No newline at end of file diff --git a/decoder/assembler_amd64.go b/decoder/assembler_amd64.go index 5f689f4..679e84b 100644 --- a/decoder/assembler_amd64.go +++ b/decoder/assembler_amd64.go @@ -21,8 +21,9 @@ import ( `fmt` `math` `reflect` + `strconv` `unsafe` - + `github.com/bytedance/sonic/internal/caching` `github.com/bytedance/sonic/internal/cpu` `github.com/bytedance/sonic/internal/jit` @@ -30,6 +31,7 @@ import ( `github.com/bytedance/sonic/internal/native/types` `github.com/bytedance/sonic/internal/rt` `github.com/twitchyliquid64/golang-asm/obj` + `github.com/twitchyliquid64/golang-asm/obj/x86` ) /** Register Allocations @@ -67,7 +69,7 @@ const ( _FP_args = 72 // 72 bytes to pass arguments and return values for this function _FP_fargs = 80 // 80 bytes for passing arguments to other Go functions _FP_saves = 40 // 40 bytes for saving the registers before CALL instructions - _FP_locals = 96 // 96 bytes for local variables + _FP_locals = 96 // 96 bytes for local variables ) const ( @@ -133,9 +135,10 @@ var ( ) var ( - _DF = jit.Reg("R10") // reuse R10 in generic decoder for flags - _ET = jit.Reg("R10") - _EP = jit.Reg("R11") + _R10 = jit.Reg("R10") // used for gcWriteBarrier + _DF = jit.Reg("R10") // reuse R10 in generic decoder for flags + _ET = jit.Reg("R10") + _EP = jit.Reg("R11") ) var ( @@ -536,7 +539,7 @@ func (self *_Assembler) vfollow(vt reflect.Type) { self.Emit("TESTQ", _AX, _AX) // TESTQ AX, AX self.Sjmp("JNZ" , "_end_{n}") // JNZ _end_{n} self.valloc(vt, _AX) // VALLOC ${vt}, AX - self.Emit("MOVQ" , _AX, jit.Ptr(_VP, 0)) // MOVQ AX, (VP) + self.WritePtrAX(1, jit.Ptr(_VP, 0), false) // MOVQ AX, (VP) self.Link("_end_{n}") // _end_{n}: self.Emit("MOVQ" , _AX, _VP) // MOVQ AX, VP } @@ -664,7 +667,7 @@ func (self *_Assembler) unquote_once(p obj.Addr, n obj.Addr) { self.malloc(_SI, _DX) // MALLOC SI, DX self.Emit("MOVQ" , p, _DI) // MOVQ ${p}, DI self.Emit("MOVQ" , n, _SI) // MOVQ ${n}, SI - self.Emit("MOVQ" , _DX, p) // MOVQ DX, ${p} + self.WriteRecNotAX(2, _DX, p, true, true) // MOVQ DX, ${p} self.Emit("LEAQ" , _VAR_sr, _CX) // LEAQ sr, CX self.Emit("XORL" , _R8, _R8) // XORL R8, R8 self.Emit("BTQ" , jit.Imm(_F_disable_urc), _ARG_fv) // BTQ ${_F_disable_urc}, fv @@ -696,7 +699,7 @@ func (self *_Assembler) unquote_twice(p obj.Addr, n obj.Addr) { self.malloc(_SI, _DX) // MALLOC SI, DX self.Emit("MOVQ" , p, _DI) // MOVQ ${p}, DI self.Emit("MOVQ" , n, _SI) // MOVQ ${n}, SI - self.Emit("MOVQ" , _DX, p) // MOVQ DX, ${p} + self.WriteRecNotAX(6, _DX, p, true, true) // MOVQ DX, ${p} self.Emit("LEAQ" , _VAR_sr, _CX) // LEAQ sr, CX self.Emit("MOVL" , jit.Imm(types.F_DOUBLE_UNQUOTE), _R8) // MOVL ${types.F_DOUBLE_UNQUOTE}, R8 self.Emit("BTQ" , jit.Imm(_F_disable_urc), _ARG_fv) // BTQ ${_F_disable_urc}, AX @@ -866,7 +869,7 @@ func (self *_Assembler) unmarshal_func(t reflect.Type, fn obj.Addr, deref bool) self.Emit("TESTQ", _AX, _AX) // TESTQ AX, AX self.Sjmp("JNZ" , "_deref_{n}") // JNZ _deref_{n} self.valloc(t.Elem(), _AX) // VALLOC ${t.Elem()}, AX - self.Emit("MOVQ" , _AX, jit.Ptr(_VP, 0)) // MOVQ AX, (VP) + self.WritePtrAX(3, jit.Ptr(_VP, 0), false) // MOVQ AX, (VP) self.Link("_deref_{n}") // _deref_{n}: } @@ -1029,7 +1032,11 @@ func (self *_Assembler) _asm_OP_bin(_ *_Instr) { /* call the decoder */ self.Emit("XORL" , _DX, _DX) // XORL DX, DX self.Emit("MOVQ" , _VP, _DI) // MOVQ VP, DI - self.Emit("XCHGQ", _SI, jit.Ptr(_VP, 0)) // XCHGQ SI, (VP) + + self.Emit("MOVQ" , jit.Ptr(_VP, 0), _R9) // MOVQ SI, (VP) + self.WriteRecNotAX(4, _SI, jit.Ptr(_VP, 0), true, false) // XCHGQ SI, (VP) + self.Emit("MOVQ" , _R9, _SI) + self.Emit("XCHGQ", _DX, jit.Ptr(_VP, 8)) // XCHGQ DX, 8(VP) self.call(_F_b64decode) // CALL b64decode self.Emit("TESTQ", _AX, _AX) // TESTQ AX, AX @@ -1066,7 +1073,7 @@ func (self *_Assembler) _asm_OP_bool(_ *_Instr) { func (self *_Assembler) _asm_OP_num(_ *_Instr) { self.parse_number() // PARSE NUMBER self.slice_from(_VAR_st_Ep, 0) // SLICE st.Ep, $0 - self.Emit("MOVQ", _DI, jit.Ptr(_VP, 0)) // MOVQ DI, (VP) + self.WriteRecNotAX(5, _DI, jit.Ptr(_VP, 0), false, false) // MOVQ DI, (VP) self.Emit("MOVQ", _SI, jit.Ptr(_VP, 8)) // MOVQ SI, 8(VP) } @@ -1183,7 +1190,7 @@ func (self *_Assembler) _asm_OP_map_init(_ *_Instr) { self.Sjmp("JNZ" , "_end_{n}") // JNZ _end_{n} self.call_go(_F_makemap_small) // CALL_GO makemap_small self.Emit("MOVQ" , jit.Ptr(_SP, 0), _AX) // MOVQ (SP), AX - self.Emit("MOVQ" , _AX, jit.Ptr(_VP, 0)) // MOVQ AX, (VP) + self.WritePtrAX(6, jit.Ptr(_VP, 0), false) // MOVQ AX, (VP) self.Link("_end_{n}") // _end_{n}: self.Emit("MOVQ" , _AX, _VP) // MOVQ AX, VP } @@ -1316,7 +1323,7 @@ func (self *_Assembler) _asm_OP_slice_init(p *_Instr) { self.Emit("MOVQ" , _CX, jit.Ptr(_SP, 16)) // MOVQ CX, 16(SP) self.call_go(_F_makeslice) // CALL_GO makeslice self.Emit("MOVQ" , jit.Ptr(_SP, 24), _AX) // MOVQ 24(SP), AX - self.Emit("MOVQ" , _AX, jit.Ptr(_VP, 0)) // MOVQ AX, (VP) + self.WritePtrAX(7, jit.Ptr(_VP, 0), false) // MOVQ AX, (VP) self.Link("_done_{n}") // _done_{n}: self.Emit("XORL" , _AX, _AX) // XORL AX, AX self.Emit("MOVQ" , _AX, jit.Ptr(_VP, 8)) // MOVQ AX, 8(VP) @@ -1338,7 +1345,7 @@ func (self *_Assembler) _asm_OP_slice_append(p *_Instr) { self.Emit("MOVQ" , jit.Ptr(_SP, 40), _DI) // MOVQ 40(SP), DI self.Emit("MOVQ" , jit.Ptr(_SP, 48), _AX) // MOVQ 48(SP), AX self.Emit("MOVQ" , jit.Ptr(_SP, 56), _SI) // MOVQ 56(SP), SI - self.Emit("MOVQ" , _DI, jit.Ptr(_VP, 0)) // MOVQ DI, (VP) + self.WriteRecNotAX(8, _DI, jit.Ptr(_VP, 0), true, true)// MOVQ DI, (VP) self.Emit("MOVQ" , _AX, jit.Ptr(_VP, 8)) // MOVQ AX, 8(VP) self.Emit("MOVQ" , _SI, jit.Ptr(_VP, 16)) // MOVQ SI, 16(VP) self.Link("_index_{n}") // _index_{n}: @@ -1502,12 +1509,12 @@ func (self *_Assembler) _asm_OP_load(_ *_Instr) { } func (self *_Assembler) _asm_OP_save(_ *_Instr) { - self.Emit("MOVQ", jit.Ptr(_ST, 0), _AX) // MOVQ (ST), AX - self.Emit("CMPQ", _AX, jit.Imm(_MaxStack)) // CMPQ AX, ${_MaxStack} + self.Emit("MOVQ", jit.Ptr(_ST, 0), _CX) // MOVQ (ST), CX + self.Emit("CMPQ", _CX, jit.Imm(_MaxStack)) // CMPQ CX, ${_MaxStack} self.Sjmp("JA" , _LB_stack_error) // JA _stack_error - self.Emit("MOVQ", _VP, jit.Sib(_ST, _AX, 1, 8)) // MOVQ VP, 8(ST)(AX) - self.Emit("ADDQ", jit.Imm(8), _AX) // ADDQ $8, AX - self.Emit("MOVQ", _AX, jit.Ptr(_ST, 0)) // MOVQ AX, (ST) + self.WriteRecNotAX(0 , _VP, jit.Sib(_ST, _CX, 1, 8), false, false) // MOVQ VP, 8(ST)(CX) + self.Emit("ADDQ", jit.Imm(8), _CX) // ADDQ $8, CX + self.Emit("MOVQ", _CX, jit.Ptr(_ST, 0)) // MOVQ CX, (ST) } func (self *_Assembler) _asm_OP_drop(_ *_Instr) { @@ -1559,3 +1566,58 @@ func (self *_Assembler) _asm_OP_switch(p *_Instr) { self.Link("_default_{n}") self.NOP() } + +var ( + _V_writeBarrier = jit.Imm(int64(uintptr(unsafe.Pointer(&_runtime_writeBarrier)))) + + _F_gcWriteBarrierAX = jit.Func(gcWriteBarrierAX) +) + +func (self *_Assembler) WritePtrAX(i int, rec obj.Addr, saveDI bool) { + self.Emit("MOVQ", _V_writeBarrier, _R10) + self.Emit("CMPL", jit.Ptr(_R10, 0), jit.Imm(0)) + self.Sjmp("JE", "_no_writeBarrier" + strconv.Itoa(i) + "_{n}") + if saveDI { + self.save(_DI) + } + self.Emit("LEAQ", rec, _DI) + self.Emit("MOVQ", _F_gcWriteBarrierAX, _R10) // MOVQ ${fn}, AX + self.Rjmp("CALL", _R10) + if saveDI { + self.load(_DI) + } + self.Sjmp("JMP", "_end_writeBarrier" + strconv.Itoa(i) + "_{n}") + self.Link("_no_writeBarrier" + strconv.Itoa(i) + "_{n}") + self.Emit("MOVQ", _AX, rec) + self.Link("_end_writeBarrier" + strconv.Itoa(i) + "_{n}") +} + +func (self *_Assembler) WriteRecNotAX(i int, ptr obj.Addr, rec obj.Addr, saveDI bool, saveAX bool) { + if rec.Reg == x86.REG_AX || rec.Index == x86.REG_AX { + panic("rec contains AX!") + } + self.Emit("MOVQ", _V_writeBarrier, _R10) + self.Emit("CMPL", jit.Ptr(_R10, 0), jit.Imm(0)) + self.Sjmp("JE", "_no_writeBarrier" + strconv.Itoa(i) + "_{n}") + if saveAX { + self.Emit("XCHGQ", ptr, _AX) + } else { + self.Emit("MOVQ", ptr, _AX) + } + if saveDI { + self.save(_DI) + } + self.Emit("LEAQ", rec, _DI) + self.Emit("MOVQ", _F_gcWriteBarrierAX, _R10) // MOVQ ${fn}, AX + self.Rjmp("CALL", _R10) + if saveDI { + self.load(_DI) + } + if saveAX { + self.Emit("XCHGQ", ptr, _AX) + } + self.Sjmp("JMP", "_end_writeBarrier" + strconv.Itoa(i) + "_{n}") + self.Link("_no_writeBarrier" + strconv.Itoa(i) + "_{n}") + self.Emit("MOVQ", ptr, rec) + self.Link("_end_writeBarrier" + strconv.Itoa(i) + "_{n}") +} \ No newline at end of file diff --git a/decoder/compiler_test.go b/decoder/compiler_test.go index 2c168cb..fb8ef76 100644 --- a/decoder/compiler_test.go +++ b/decoder/compiler_test.go @@ -26,5 +26,5 @@ import ( func TestCompiler_Compile(t *testing.T) { prg, err := make(_Compiler).compile(reflect.TypeOf(TwitterStruct{})) assert.Nil(t, err) - println(prg.disassemble()) + prg.disassemble() } diff --git a/decoder/debug.go b/decoder/debug.go index 7da6d4b..73bfdee 100644 --- a/decoder/debug.go +++ b/decoder/debug.go @@ -17,15 +17,19 @@ package decoder import ( - `strings` + `os` `runtime` `runtime/debug` + `strings` `github.com/bytedance/sonic/internal/jit` ) -//WARN: MUST set false after release -var debugGC = false + +var ( + debugSyncGC = os.Getenv("SONIC_SYNC_GC") != "" + debugAsyncGC = os.Getenv("SONIC_NO_ASYNC_GC") == "" +) var ( _Instr_End _Instr = newInsOp(_OP_nil_1) @@ -52,7 +56,7 @@ func (self *_Assembler) force_gc() { } func (self *_Assembler) debug_instr(i int, v *_Instr) { - if debugGC { + if debugSyncGC { if (i+1 == len(self.p)) { self.print_gc(i, v, &_Instr_End) } else { diff --git a/decoder/decoder_test.go b/decoder/decoder_test.go index dd1a0ad..51b36d1 100644 --- a/decoder/decoder_test.go +++ b/decoder/decoder_test.go @@ -32,17 +32,23 @@ import ( func TestMain(m *testing.M) { go func () { + if !debugAsyncGC { + return + } println("Begin GC looping...") - for { + for { runtime.GC() debug.FreeOSMemory() - } - println("stop GC looping!") + } + println("stop GC looping!") }() m.Run() } func TestGC(t *testing.T) { + if debugSyncGC { + return + } var w interface{} out, err := decode(TwitterJson, &w) if err != nil { diff --git a/decoder/generic_amd64.go b/decoder/generic_amd64.go index de32a4b..134a512 100644 --- a/decoder/generic_amd64.go +++ b/decoder/generic_amd64.go @@ -20,11 +20,13 @@ import ( `encoding/json` `fmt` `reflect` + `strconv` `github.com/bytedance/sonic/internal/jit` `github.com/bytedance/sonic/internal/native` `github.com/bytedance/sonic/internal/native/types` `github.com/twitchyliquid64/golang-asm/obj` + `github.com/twitchyliquid64/golang-asm/obj/x86` ) /** Crucial Registers: @@ -68,7 +70,7 @@ type _ValueDecoder struct { func (self *_ValueDecoder) build() uintptr { self.Init(self.compile) - return *(*uintptr)(self.LoadWithFaker("decode_value", _VD_size, _VD_fargs, _Decoder_Generic_Shadow)) + return *(*uintptr)(self.LoadWithFaker("decode_value", _VD_size, _VD_args, _Decoder_Generic_Shadow)) } /** Function Calling Helpers **/ @@ -180,7 +182,7 @@ func (self *_ValueDecoder) compile() { self.Emit("MOVQ", _DF, _VAR_df) // MOVQ DF, df self.Emit("ADDQ", jit.Imm(_FsmOffset), _ST) // ADDQ _FsmOffset, _ST self.Emit("MOVQ", _CX, jit.Ptr(_ST, _ST_Sp)) // MOVQ CX, ST.Sp - self.Emit("MOVQ", _VP, jit.Ptr(_ST, _ST_Vp)) // MOVQ VP, ST.Vp[0] + self.WriteRecNotAX(0, _VP, jit.Ptr(_ST, _ST_Vp), false) // MOVQ VP, ST.Vp[0] self.Emit("MOVQ", jit.Imm(_S_val), jit.Ptr(_ST, _ST_Vt)) // MOVQ _S_val, ST.Vt[0] self.Sjmp("JMP" , "_next") // JMP _next @@ -191,11 +193,11 @@ func (self *_ValueDecoder) compile() { self.Emit("MOVQ" , jit.Sib(_ST, _CX, 8, _ST_Vt), _AX) // MOVQ ST.Vt[CX], AX self.Emit("BTQ" , _AX, _DX) // BTQ AX, DX self.Sjmp("JNC" , "_vtype_error") // JNC _vtype_error - self.Emit("XORL" , _AX, _AX) // XORL AX, AX + self.Emit("XORL" , _SI, _SI) // XORL SI, SI self.Emit("SUBQ" , jit.Imm(1), jit.Ptr(_ST, _ST_Sp)) // SUBQ $1, ST.Sp - self.Emit("XCHGQ", jit.Sib(_ST, _CX, 8, _ST_Vp), _AX) // XCHGQ ST.Vp[CX], AX - self.Emit("MOVQ" , _R8, jit.Ptr(_AX, 0)) // MOVQ R8, (AX) - self.Emit("MOVQ" , _R9, jit.Ptr(_AX, 8)) // MOVQ R9, 8(AX) + self.Emit("XCHGQ", jit.Sib(_ST, _CX, 8, _ST_Vp), _SI) // XCHGQ ST.Vp[CX], SI + self.Emit("MOVQ" , _R8, jit.Ptr(_SI, 0)) // MOVQ R8, (SI) + self.WriteRecNotAX(1, _R9, jit.Ptr(_SI, 8), false) // MOVQ R9, 8(SI) /* check for value stack */ self.Link("_next") // _next: @@ -282,7 +284,8 @@ func (self *_ValueDecoder) compile() { /** V_TRUE **/ self.Link("_decode_V_TRUE") // _decode_V_TRUE: self.Emit("MOVQ", _T_bool, _R8) // MOVQ _T_bool, R8 - self.Emit("MOVQ", _V_true, _R9) // MOVQ _V_true, R9 + // TODO: maybe modified by users? + self.Emit("MOVQ", _V_true, _R9) // MOVQ _V_true, R9 self.Emit("LEAQ", jit.Ptr(_IC, -4), _DI) // LEAQ -4(IC), DI self.Sjmp("JMP" , "_set_value") // JMP _set_value @@ -294,7 +297,7 @@ func (self *_ValueDecoder) compile() { self.Sjmp("JMP" , "_set_value") // JMP _set_value /** V_ARRAY **/ - self.Link("_decode_V_ARRAY") // _decode_V_ARRAY: + self.Link("_decode_V_ARRAY") // _decode_V_ARRAY self.Emit("MOVL", jit.Imm(_S_vmask), _DX) // MOVL _S_vmask, DX self.Emit("MOVQ", jit.Ptr(_ST, _ST_Sp), _CX) // MOVQ ST.Sp, CX self.Emit("MOVQ", jit.Sib(_ST, _CX, 8, _ST_Vt), _AX) // MOVQ ST.Vt[CX], AX @@ -322,7 +325,7 @@ func (self *_ValueDecoder) compile() { self.Emit("MOVQ", jit.Imm(_S_arr), jit.Sib(_ST, _CX, 8, _ST_Vt)) // MOVQ _S_arr, ST.Vt[CX] self.Emit("MOVQ", _T_slice, _AX) // MOVQ _T_slice, AX self.Emit("MOVQ", _AX, jit.Ptr(_SI, 0)) // MOVQ AX, (SI) - self.Emit("MOVQ", _R8, jit.Ptr(_SI, 8)) // MOVQ R8, 8(SI) + self.WriteRecNotAX(2, _R8, jit.Ptr(_SI, 8), false) // MOVQ R8, 8(SI) /* add a new slot for the first element */ self.Emit("ADDQ", jit.Imm(1), _CX) // ADDQ $1, CX @@ -330,7 +333,7 @@ func (self *_ValueDecoder) compile() { self.Sjmp("JA" , "_stack_overflow") // JA _stack_overflow self.Emit("MOVQ", jit.Ptr(_R8, 0), _AX) // MOVQ (R8), AX self.Emit("MOVQ", _CX, jit.Ptr(_ST, _ST_Sp)) // MOVQ CX, ST.Sp - self.Emit("MOVQ", _AX, jit.Sib(_ST, _CX, 8, _ST_Vp)) // MOVQ AX, ST.Vp[CX] + self.WritePtrAX(3, jit.Sib(_ST, _CX, 8, _ST_Vp), false) // MOVQ AX, ST.Vp[CX] self.Emit("MOVQ", jit.Imm(_S_arr_0), jit.Sib(_ST, _CX, 8, _ST_Vt)) // MOVQ _S_arr_0, ST.Vt[CX] self.Sjmp("JMP" , "_next") // JMP _next @@ -344,11 +347,11 @@ func (self *_ValueDecoder) compile() { self.call_go(_F_makemap_small) // CALL_GO runtime.makemap_small self.Emit("MOVQ", jit.Ptr(_SP, 0), _AX) // MOVQ (SP), AX self.Emit("MOVQ", jit.Ptr(_ST, _ST_Sp), _CX) // MOVQ ST.Sp, CX + self.Emit("MOVQ", jit.Imm(_S_obj), jit.Sib(_ST, _CX, 8, _ST_Vt)) // MOVQ _S_obj, ST.Vt[CX] self.Emit("MOVQ", jit.Sib(_ST, _CX, 8, _ST_Vp), _SI) // MOVQ ST.Vp[CX], SI self.Emit("MOVQ", _T_map, _DX) // MOVQ _T_map, DX self.Emit("MOVQ", _DX, jit.Ptr(_SI, 0)) // MOVQ DX, (SI) - self.Emit("MOVQ", _AX, jit.Ptr(_SI, 8)) // MOVQ AX, 8(SI) - self.Emit("MOVQ", jit.Imm(_S_obj), jit.Sib(_ST, _CX, 8, _ST_Vt)) // MOVQ _S_obj, ST.Vt[CX] + self.WritePtrAX(4, jit.Ptr(_SI, 8), false) // MOVQ AX, 8(SI) self.Sjmp("JMP" , "_next") // JMP _next /** V_STRING **/ @@ -410,9 +413,9 @@ func (self *_ValueDecoder) compile() { self.Emit("MOVQ", jit.Ptr(_SP, 32), _AX) // MOVQ 32(SP), AX /* add to the pointer stack */ - self.Emit("MOVQ", jit.Ptr(_ST, _ST_Sp), _CX) // MOVQ ST.Sp, CX - self.Emit("MOVQ", _AX, jit.Sib(_ST, _CX, 8, _ST_Vp)) // MOVQ AX, ST.Vp[CX] - self.Sjmp("JMP" , "_next") // JMP _next + self.Emit("MOVQ", jit.Ptr(_ST, _ST_Sp), _CX) // MOVQ ST.Sp, CX + self.WritePtrAX(6, jit.Sib(_ST, _CX, 8, _ST_Vp), false) // MOVQ AX, ST.Vp[CX] + self.Sjmp("JMP" , "_next") // JMP _next /* allocate memory to store the string header and unquoted result */ self.Link("_unquote") // _unquote: @@ -501,6 +504,7 @@ func (self *_ValueDecoder) compile() { /** V_KEY_SEP **/ self.Link("_decode_V_KEY_SEP") // _decode_V_KEY_SEP: + // self.Byte(0xcc) self.Emit("MOVQ", jit.Ptr(_ST, _ST_Sp), _CX) // MOVQ ST.Sp, CX self.Emit("MOVQ", jit.Sib(_ST, _CX, 8, _ST_Vt), _AX) // MOVQ ST.Vt[CX], AX self.Emit("CMPQ", _AX, jit.Imm(_S_obj_delim)) // CMPQ AX, _S_obj_delim @@ -534,7 +538,7 @@ func (self *_ValueDecoder) compile() { self.Emit("SHLQ", jit.Imm(1), _DX) // SHLQ $1, DX self.Emit("LEAQ", jit.Sib(_SI, _DX, 8, 0), _SI) // LEAQ (SI)(DX*8), SI self.Emit("MOVQ", _CX, jit.Ptr(_ST, _ST_Sp)) // MOVQ CX, ST.Sp - self.Emit("MOVQ", _SI, jit.Sib(_ST, _CX, 8, _ST_Vp)) // MOVQ SI, ST.Vp[CX] + self.WriteRecNotAX(7 , _SI, jit.Sib(_ST, _CX, 8, _ST_Vp), false) // MOVQ SI, ST.Vp[CX] self.Emit("MOVQ", jit.Imm(_S_val), jit.Sib(_ST, _CX, 8, _ST_Vt)) // MOVQ _S_val, ST.Vt[CX} self.Sjmp("JMP" , "_next") // JMP _next @@ -602,9 +606,9 @@ func (self *_ValueDecoder) compile() { self.Emit("MOVQ", jit.Ptr(_ST, _ST_Sp), _CX) // MOVQ ST.Sp, CX self.Emit("MOVQ", jit.Sib(_ST, _CX, 8, _ST_Vp), _SI) // MOVQ ST.Vp[CX], SI self.Emit("MOVQ", jit.Ptr(_SI, 8), _SI) // MOVQ 8(SI), SI - self.Emit("MOVQ", _DI, jit.Ptr(_SI, 0)) // MOVQ DI, (SI) self.Emit("MOVQ", _DX, jit.Ptr(_SI, 8)) // MOVQ DX, 8(SI) self.Emit("MOVQ", _AX, jit.Ptr(_SI, 16)) // MOVQ AX, 16(AX) + self.WriteRecNotAX(8 , _DI, jit.Ptr(_SI, 0), false) // MOVQ R10, (SI) self.Sjmp("JMP" , "_array_append") // JMP _array_append /* error handlers */ @@ -666,6 +670,48 @@ func (self *_ValueDecoder) compile() { } } +func (self *_ValueDecoder) WritePtrAX(i int, rec obj.Addr, saveDI bool) { + self.Emit("MOVQ", _V_writeBarrier, _R10) + self.Emit("CMPL", jit.Ptr(_R10, 0), jit.Imm(0)) + self.Sjmp("JE", "_no_writeBarrier" + strconv.Itoa(i) + "_{n}") + if saveDI { + self.save(_DI) + } + self.Emit("LEAQ", rec, _DI) + self.Emit("MOVQ", _F_gcWriteBarrierAX, _R10) // MOVQ ${fn}, AX + self.Rjmp("CALL", _R10) + if saveDI { + self.load(_DI) + } + self.Sjmp("JMP", "_end_writeBarrier" + strconv.Itoa(i) + "_{n}") + self.Link("_no_writeBarrier" + strconv.Itoa(i) + "_{n}") + self.Emit("MOVQ", _AX, rec) + self.Link("_end_writeBarrier" + strconv.Itoa(i) + "_{n}") +} + +func (self *_ValueDecoder) WriteRecNotAX(i int, ptr obj.Addr, rec obj.Addr, saveDI bool) { + if rec.Reg == x86.REG_AX || rec.Index == x86.REG_AX { + panic("rec contains AX!") + } + self.Emit("MOVQ", _V_writeBarrier, _R10) + self.Emit("CMPL", jit.Ptr(_R10, 0), jit.Imm(0)) + self.Sjmp("JE", "_no_writeBarrier" + strconv.Itoa(i) + "_{n}") + self.Emit("MOVQ", ptr, _AX) + if saveDI { + self.save(_DI) + } + self.Emit("LEAQ", rec, _DI) + self.Emit("MOVQ", _F_gcWriteBarrierAX, _R10) // MOVQ ${fn}, AX + self.Rjmp("CALL", _R10) + if saveDI { + self.load(_DI) + } + self.Sjmp("JMP", "_end_writeBarrier" + strconv.Itoa(i) + "_{n}") + self.Link("_no_writeBarrier" + strconv.Itoa(i) + "_{n}") + self.Emit("MOVQ", ptr, rec) + self.Link("_end_writeBarrier" + strconv.Itoa(i) + "_{n}") +} + /** Generic Decoder **/ var ( diff --git a/decoder/pools.go b/decoder/pools.go index b43e8cd..81cb2fa 100644 --- a/decoder/pools.go +++ b/decoder/pools.go @@ -49,7 +49,7 @@ type _Stack struct { sp uintptr sb [_MaxStack]unsafe.Pointer mm types.StateMachine - vp [types.MAX_RECURSE]*interface{} + vp [types.MAX_RECURSE]unsafe.Pointer } type _Decoder func( diff --git a/decoder/stubs.go b/decoder/stubs.go index dcf1c83..f595e80 100644 --- a/decoder/stubs.go +++ b/decoder/stubs.go @@ -35,6 +35,19 @@ func mapfast(vt reflect.Type) bool { return vt.Elem().Size() <= _max_map_element_size } +type writeBarrier struct { + enabled bool // compiler emits a check of this before calling write barrier + pad [3]byte // compiler uses 32-bit load for "enabled" field + needed bool // whether we need a write barrier for current GC phase + cgo bool // whether we need a write barrier for a cgo check + alignme uint64 // guarantee alignment so that compiler can use a 32 or 64-bit load +} + +//go:linkname _runtime_writeBarrier runtime.writeBarrier +var _runtime_writeBarrier writeBarrier + +//go:linkname gcWriteBarrierAX runtime.gcWriteBarrier +func gcWriteBarrierAX() //go:nosplit //go:linkname throw runtime.throw diff --git a/encode_test.go b/encode_test.go index 34d5526..03a5982 100644 --- a/encode_test.go +++ b/encode_test.go @@ -17,6 +17,7 @@ package sonic import ( + `os` `bytes` `encoding` `encoding/json` @@ -34,14 +35,20 @@ import ( `github.com/bytedance/sonic/encoder` ) +var ( + debugAsyncGC = os.Getenv("SONIC_NO_ASYNC_GC") == "" +) func TestMain(m *testing.M) { go func () { + if !debugAsyncGC { + return + } println("Begin GC looping...") - for { - runtime.GC() - debug.FreeOSMemory() - } - println("stop GC looping!") + for { + runtime.GC() + debug.FreeOSMemory() + } + println("stop GC looping!") }() m.Run() } diff --git a/encoder/assembler_amd64.go b/encoder/assembler_amd64.go index a5ef29c..dd906e1 100644 --- a/encoder/assembler_amd64.go +++ b/encoder/assembler_amd64.go @@ -130,9 +130,10 @@ var ( ) var ( - _LR = jit.Reg("R9") - _ET = jit.Reg("R10") - _EP = jit.Reg("R11") + _LR = jit.Reg("R9") + _R10 = jit.Reg("R10") // used for gcWriterBarrier + _ET = jit.Reg("R10") + _EP = jit.Reg("R11") ) var ( @@ -406,14 +407,14 @@ const ( ) func (self *_Assembler) save_state() { - self.Emit("MOVQ", jit.Ptr(_ST, 0), _AX) // MOVQ (ST), AX - self.Emit("LEAQ", jit.Ptr(_AX, _StateSize), _R8) // LEAQ _StateSize(AX), R8 + self.Emit("MOVQ", jit.Ptr(_ST, 0), _CX) // MOVQ (ST), CX + self.Emit("LEAQ", jit.Ptr(_CX, _StateSize), _R8) // LEAQ _StateSize(CX), R8 self.Emit("CMPQ", _R8, jit.Imm(_StackLimit)) // CMPQ R8, $_StackLimit self.Sjmp("JA" , _LB_error_too_deep) // JA _error_too_deep - self.Emit("MOVQ", _SP_x, jit.Sib(_ST, _AX, 1, 8)) // MOVQ SP.x, 8(ST)(AX) - self.Emit("MOVQ", _SP_f, jit.Sib(_ST, _AX, 1, 16)) // MOVQ SP.f, 16(ST)(AX) - self.Emit("MOVQ", _SP_p, jit.Sib(_ST, _AX, 1, 24)) // MOVQ SP.p, 24(ST)(AX) - self.Emit("MOVQ", _SP_q, jit.Sib(_ST, _AX, 1, 32)) // MOVQ SP.q, 32(ST)(AX) + self.Emit("MOVQ", _SP_x, jit.Sib(_ST, _CX, 1, 8)) // MOVQ SP.x, 8(ST)(CX) + self.Emit("MOVQ", _SP_f, jit.Sib(_ST, _CX, 1, 16)) // MOVQ SP.f, 16(ST)(CX) + self.WriteRecNotAX(0, _SP_p, jit.Sib(_ST, _CX, 1, 24)) // MOVQ SP.p, 24(ST)(CX) + self.WriteRecNotAX(1, _SP_q, jit.Sib(_ST, _CX, 1, 32)) // MOVQ SP.q, 32(ST)(CX) self.Emit("MOVQ", _R8, jit.Ptr(_ST, 0)) // MOVQ R8, (ST) } @@ -459,10 +460,10 @@ func (self *_Assembler) prep_buffer_c() { } func (self *_Assembler) save_buffer() { - self.Emit("MOVQ", _ARG_rb, _AX) // MOVQ rb<>+0(FP), AX - self.Emit("MOVQ", _RP, jit.Ptr(_AX, 0)) // MOVQ RP, (AX) - self.Emit("MOVQ", _RL, jit.Ptr(_AX, 8)) // MOVQ RL, 8(AX) - self.Emit("MOVQ", _RC, jit.Ptr(_AX, 16)) // MOVQ RC, 16(AX) + self.Emit("MOVQ", _ARG_rb, _CX) // MOVQ rb<>+0(FP), CX + self.WriteRecNotAX(2, _RP, jit.Ptr(_CX, 0)) // MOVQ RP, (CX) + self.Emit("MOVQ", _RL, jit.Ptr(_CX, 8)) // MOVQ RL, 8(CX) + self.Emit("MOVQ", _RC, jit.Ptr(_CX, 16)) // MOVQ RC, 16(CX) } func (self *_Assembler) load_buffer() { @@ -1110,3 +1111,28 @@ func (self *_Assembler) _asm_OP_cond_testc(p *_Instr) { self.Emit("BTRQ", jit.Imm(_S_cond), _SP_f) // BTRQ $_S_cond, SP.f self.Xjmp("JC" , p.vi()) } + +var ( + _V_writeBarrier = jit.Imm(int64(uintptr(unsafe.Pointer(&_runtime_writeBarrier)))) + + _F_gcWriteBarrierAX = jit.Func(gcWriteBarrierAX) +) + +func (self *_Assembler) WriteRecNotAX(i int, ptr obj.Addr, rec obj.Addr) { + if rec.Reg == x86.REG_AX || rec.Index == x86.REG_AX { + panic("rec contains AX!") + } + self.Emit("MOVQ", _V_writeBarrier, _R10) + self.Emit("CMPL", jit.Ptr(_R10, 0), jit.Imm(0)) + self.Sjmp("JE", "_no_writeBarrier" + strconv.Itoa(i) + "_{n}") + self.Emit("MOVQ", ptr, _AX) + self.xsave(_DI) + self.Emit("LEAQ", rec, _DI) + self.Emit("MOVQ", _F_gcWriteBarrierAX, _R10) // MOVQ ${fn}, AX + self.Rjmp("CALL", _R10) + self.xload(_DI) + self.Sjmp("JMP", "_end_writeBarrier" + strconv.Itoa(i) + "_{n}") + self.Link("_no_writeBarrier" + strconv.Itoa(i) + "_{n}") + self.Emit("MOVQ", ptr, rec) + self.Link("_end_writeBarrier" + strconv.Itoa(i) + "_{n}") +} diff --git a/encoder/debug.go b/encoder/debug.go index a65d0db..d7c355f 100644 --- a/encoder/debug.go +++ b/encoder/debug.go @@ -17,6 +17,7 @@ package encoder import ( + `os` `strings` `runtime` `runtime/debug` @@ -24,8 +25,10 @@ import ( `github.com/bytedance/sonic/internal/jit` ) -//WARN: MUST set false after release -var debugGC = false +var ( + debugSyncGC = os.Getenv("SONIC_SYNC_GC") != "" + debugAsyncGC = os.Getenv("SONIC_NO_ASYNC_GC") == "" +) var ( _Instr_End _Instr = newInsOp(_OP_null) @@ -52,7 +55,7 @@ func (self *_Assembler) force_gc() { } func (self *_Assembler) debug_instr(i int, v *_Instr) { - if debugGC { + if debugSyncGC { if (i+1 == len(self.p)) { self.print_gc(i, v, &_Instr_End) } else { diff --git a/encoder/encoder_test.go b/encoder/encoder_test.go index 262b0d5..f6b246e 100644 --- a/encoder/encoder_test.go +++ b/encoder/encoder_test.go @@ -31,17 +31,23 @@ import ( func TestMain(m *testing.M) { go func () { + if !debugAsyncGC { + return + } println("Begin GC looping...") - for { - runtime.GC() - debug.FreeOSMemory() - } - println("stop GC looping!") + for { + runtime.GC() + debug.FreeOSMemory() + } + println("stop GC looping!") }() m.Run() } func TestGC(t *testing.T) { + if debugSyncGC { + return + } out, err := Encode(_GenericValue, 0) if err != nil { t.Fatal(err) diff --git a/encoder/stubs.go b/encoder/stubs.go index 0c2850c..96a3749 100644 --- a/encoder/stubs.go +++ b/encoder/stubs.go @@ -27,6 +27,20 @@ import ( //go:linkname _subr__b64encode github.com/chenzhuoyu/base64x._subr__b64encode var _subr__b64encode uintptr +type writeBarrier struct { + enabled bool // compiler emits a check of this before calling write barrier + pad [3]byte // compiler uses 32-bit load for "enabled" field + needed bool // whether we need a write barrier for current GC phase + cgo bool // whether we need a write barrier for a cgo check + alignme uint64 // guarantee alignment so that compiler can use a 32 or 64-bit load +} + +//go:linkname _runtime_writeBarrier runtime.writeBarrier +var _runtime_writeBarrier writeBarrier + +//go:linkname gcWriteBarrierAX runtime.gcWriteBarrier +func gcWriteBarrierAX() + //go:noescape //go:linkname memmove runtime.memmove //goland:noinspection GoUnusedParameter