2
0
Fork 0
mirror of https://github.com/ii64/sonic.git synced 2026-06-20 16:45:22 +08:00

fix: move large-size local array to _Stack (#162)

* fix: move large-size local array to _Stack

* fix: adjust jsonstate and add alwaysinline

* fix: initialize types.JsonState's dbuf at prologue

* fix: replace `always_inline` with `inline`

* fix: remove unused types

* feat: update asm2asm

* fix: check stack size befor call JIT function

Co-authored-by: liuqiang <liuqiang.06@bytedance.com>
Co-authored-by: duanyi.aster <duanyi.aster@bytedance.com>
This commit is contained in:
liu 2021-12-30 14:51:23 +08:00 committed by GitHub
parent c3cb5de704
commit 1443eb3bcf
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
25 changed files with 2707 additions and 8251 deletions

View file

@ -14,7 +14,7 @@ jobs:
- name: Set up Go
uses: actions/setup-go@v2
with:
go-version: 1.17
go-version: 1.17.1
- uses: actions/cache@v2
with:
@ -24,4 +24,4 @@ jobs:
${{ runner.os }}-go-
- name: Benchmark
run: GOMAXPROCS=4 go test -bench=. -benchmem -run=none ./...
run: sh bench.sh

View file

@ -149,7 +149,7 @@ var atoftests = []atofTest{
func TestDecodeFloat(t *testing.T) {
for i, tt := range atoftests {
// default float64
var sonicout, stdout interface{}
var sonicout, stdout float64
sonicerr := decoder.NewDecoder(tt.in).Decode(&sonicout)
stderr := json.NewDecoder(strings.NewReader(tt.in)).Decode(&stdout)
if !reflect.DeepEqual(sonicout, stdout) {

View file

@ -70,7 +70,7 @@
_FP_args = 96 // 96 bytes to pass arguments and return values for this function
_FP_fargs = 80 // 80 bytes for passing arguments to other Go functions
_FP_saves = 40 // 40 bytes for saving the registers before CALL instructions
_FP_locals = 72 // 72 bytes for local variables
_FP_locals = 88 // 88 bytes for local variables
)
const (
@ -174,14 +174,16 @@
_VAR_st_Dv = jit.Ptr(_SP, _FP_fargs + _FP_saves + 8)
_VAR_st_Iv = jit.Ptr(_SP, _FP_fargs + _FP_saves + 16)
_VAR_st_Ep = jit.Ptr(_SP, _FP_fargs + _FP_saves + 24)
_VAR_st_Db = jit.Ptr(_SP, _FP_fargs + _FP_saves + 32)
_VAR_st_Dc = jit.Ptr(_SP, _FP_fargs + _FP_saves + 40)
)
var (
_VAR_ss_AX = jit.Ptr(_SP, _FP_fargs + _FP_saves + 32)
_VAR_ss_CX = jit.Ptr(_SP, _FP_fargs + _FP_saves + 40)
_VAR_ss_SI = jit.Ptr(_SP, _FP_fargs + _FP_saves + 48)
_VAR_ss_R8 = jit.Ptr(_SP, _FP_fargs + _FP_saves + 56)
_VAR_ss_R9 = jit.Ptr(_SP, _FP_fargs + _FP_saves + 64)
_VAR_ss_AX = jit.Ptr(_SP, _FP_fargs + _FP_saves + 48)
_VAR_ss_CX = jit.Ptr(_SP, _FP_fargs + _FP_saves + 56)
_VAR_ss_SI = jit.Ptr(_SP, _FP_fargs + _FP_saves + 64)
_VAR_ss_R8 = jit.Ptr(_SP, _FP_fargs + _FP_saves + 72)
_VAR_ss_R9 = jit.Ptr(_SP, _FP_fargs + _FP_saves + 80)
)
type _Assembler struct {
@ -320,6 +322,10 @@
self.Emit("MOVQ", _ARG_ic, _IC) // MOVQ ic<>+16(FP), IC
self.Emit("MOVQ", _ARG_vp, _VP) // MOVQ vp<>+24(FP), VP
self.Emit("MOVQ", _ARG_sb, _ST) // MOVQ vp<>+32(FP), ST
// initialize digital buffer first
self.Emit("MOVQ", jit.Imm(_MaxDigitNums), _VAR_st_Dc) // MOVQ $_MaxDigitNums, ss.Dcap
self.Emit("LEAQ", jit.Ptr(_ST, _DbufOffset), _AX) // LEAQ _DbufOffset(ST), AX
self.Emit("MOVQ", _AX, _VAR_st_Db) // MOVQ AX, ss.Dbuf
}
/** Function Calling Helpers **/
@ -578,8 +584,8 @@
}
func (self *_Assembler) parse_number() {
self.call_vf(_F_vnumber)
self.check_err()
self.call_vf(_F_vnumber) // call vnumber
self.check_err()
}
func (self *_Assembler) parse_signed() {

View file

@ -71,7 +71,7 @@ const (
_FP_args = 96 // 96 bytes to pass arguments and return values for this function
_FP_fargs = 80 // 80 bytes for passing arguments to other Go functions
_FP_saves = 40 // 40 bytes for saving the registers before CALL instructions
_FP_locals = 72 // 72 bytes for local variables
_FP_locals = 88 // 88 bytes for local variables
)
const (
@ -177,14 +177,16 @@ var (
_VAR_st_Dv = jit.Ptr(_SP, _FP_fargs + _FP_saves + 8)
_VAR_st_Iv = jit.Ptr(_SP, _FP_fargs + _FP_saves + 16)
_VAR_st_Ep = jit.Ptr(_SP, _FP_fargs + _FP_saves + 24)
_VAR_st_Db = jit.Ptr(_SP, _FP_fargs + _FP_saves + 32)
_VAR_st_Dc = jit.Ptr(_SP, _FP_fargs + _FP_saves + 40)
)
var (
_VAR_ss_AX = jit.Ptr(_SP, _FP_fargs + _FP_saves + 32)
_VAR_ss_CX = jit.Ptr(_SP, _FP_fargs + _FP_saves + 40)
_VAR_ss_SI = jit.Ptr(_SP, _FP_fargs + _FP_saves + 48)
_VAR_ss_R8 = jit.Ptr(_SP, _FP_fargs + _FP_saves + 56)
_VAR_ss_R9 = jit.Ptr(_SP, _FP_fargs + _FP_saves + 64)
_VAR_ss_AX = jit.Ptr(_SP, _FP_fargs + _FP_saves + 48)
_VAR_ss_CX = jit.Ptr(_SP, _FP_fargs + _FP_saves + 56)
_VAR_ss_SI = jit.Ptr(_SP, _FP_fargs + _FP_saves + 64)
_VAR_ss_R8 = jit.Ptr(_SP, _FP_fargs + _FP_saves + 72)
_VAR_ss_R9 = jit.Ptr(_SP, _FP_fargs + _FP_saves + 80)
)
type _Assembler struct {
@ -323,6 +325,10 @@ func (self *_Assembler) prologue() {
self.Emit("MOVQ", _ARG_ic, _IC) // MOVQ ic<>+16(FP), IC
self.Emit("MOVQ", _ARG_vp, _VP) // MOVQ vp<>+24(FP), VP
self.Emit("MOVQ", _ARG_sb, _ST) // MOVQ vp<>+32(FP), ST
// initialize digital buffer first
self.Emit("MOVQ", jit.Imm(_MaxDigitNums), _VAR_st_Dc) // MOVQ $_MaxDigitNums, ss.Dcap
self.Emit("LEAQ", jit.Ptr(_ST, _DbufOffset), _AX) // LEAQ _DbufOffset(ST), AX
self.Emit("MOVQ", _AX, _VAR_st_Db) // MOVQ AX, ss.Dbuf
}
/** Function Calling Helpers **/
@ -581,7 +587,7 @@ func (self *_Assembler) parse_string() {
}
func (self *_Assembler) parse_number() {
self.call_vf(_F_vnumber)
self.call_vf(_F_vnumber) // call vnumber
self.check_err()
}

View file

@ -71,7 +71,7 @@ const (
_FP_args = 72 // 72 bytes to pass and spill register arguements
_FP_fargs = 80 // 80 bytes for passing arguments to other Go functions
_FP_saves = 48 // 48 bytes for saving the registers before CALL instructions
_FP_locals = 72 // 72 bytes for local variables
_FP_locals = 88 // 88 bytes for local variables
)
const (
@ -172,14 +172,16 @@ var (
_VAR_st_Dv = jit.Ptr(_SP, _FP_fargs + _FP_saves + 8)
_VAR_st_Iv = jit.Ptr(_SP, _FP_fargs + _FP_saves + 16)
_VAR_st_Ep = jit.Ptr(_SP, _FP_fargs + _FP_saves + 24)
_VAR_st_Db = jit.Ptr(_SP, _FP_fargs + _FP_saves + 32)
_VAR_st_Dc = jit.Ptr(_SP, _FP_fargs + _FP_saves + 40)
)
var (
_VAR_ss_AX = jit.Ptr(_SP, _FP_fargs + _FP_saves + 32)
_VAR_ss_CX = jit.Ptr(_SP, _FP_fargs + _FP_saves + 40)
_VAR_ss_SI = jit.Ptr(_SP, _FP_fargs + _FP_saves + 48)
_VAR_ss_R8 = jit.Ptr(_SP, _FP_fargs + _FP_saves + 56)
_VAR_ss_R9 = jit.Ptr(_SP, _FP_fargs + _FP_saves + 64)
_VAR_ss_AX = jit.Ptr(_SP, _FP_fargs + _FP_saves + 48)
_VAR_ss_CX = jit.Ptr(_SP, _FP_fargs + _FP_saves + 56)
_VAR_ss_SI = jit.Ptr(_SP, _FP_fargs + _FP_saves + 64)
_VAR_ss_R8 = jit.Ptr(_SP, _FP_fargs + _FP_saves + 72)
_VAR_ss_R9 = jit.Ptr(_SP, _FP_fargs + _FP_saves + 80)
)
type _Assembler struct {
@ -331,6 +333,10 @@ func (self *_Assembler) prologue() {
self.Emit("MOVQ", jit.Imm(0), _VAR_sv_p) // MOVQ $0, sv.p<>+48(FP)
self.Emit("MOVQ", jit.Imm(0), _VAR_sv_n) // MOVQ $0, sv.n<>+56(FP)
self.Emit("MOVQ", jit.Imm(0), _VAR_vk) // MOVQ $0, vk<>+64(FP)
// initialize digital buffer first
self.Emit("MOVQ", jit.Imm(_MaxDigitNums), _VAR_st_Dc) // MOVQ $_MaxDigitNums, ss.Dcap
self.Emit("LEAQ", jit.Ptr(_ST, _DbufOffset), _AX) // LEAQ _DbufOffset(ST), AX
self.Emit("MOVQ", _AX, _VAR_st_Db) // MOVQ AX, ss.Dbuf
}
/** Function Calling Helpers **/

View file

@ -17,7 +17,6 @@
package decoder
import (
`runtime`
`testing`
`github.com/bytedance/sonic/internal/native/types`
@ -61,7 +60,7 @@ func TestErrors_EmptyDescription(t *testing.T) {
func TestDecoderErrorStackOverflower(t *testing.T) {
src := `{"a":[]}`
N := _MaxStack * runtime.GOMAXPROCS(0)
N := _MaxStack
for i:=0; i<N; i++ {
var obj map[string]string
err := NewDecoder(src).Decode(&obj)

View file

@ -44,7 +44,7 @@
_VD_args = 8 // 8 bytes for passing arguments to this functions
_VD_fargs = 64 // 64 bytes for passing arguments to other Go functions
_VD_saves = 40 // 40 bytes for saving the registers before CALL instructions
_VD_locals = 40 // 40 bytes for local variables
_VD_locals = 56 // 56 bytes for local variables
)
const (
@ -62,6 +62,8 @@
_VAR_ss_Dv = jit.Ptr(_SP, _VD_fargs + _VD_saves + 16)
_VAR_ss_Iv = jit.Ptr(_SP, _VD_fargs + _VD_saves + 24)
_VAR_ss_Ep = jit.Ptr(_SP, _VD_fargs + _VD_saves + 32)
_VAR_ss_Db = jit.Ptr(_SP, _VD_fargs + _VD_saves + 40)
_VAR_ss_Dc = jit.Ptr(_SP, _VD_fargs + _VD_saves + 48)
)
type _ValueDecoder struct {
@ -180,6 +182,11 @@
/* initialize the state machine */
self.Emit("XORL", _CX, _CX) // XORL CX, CX
self.Emit("MOVQ", _DF, _VAR_df) // MOVQ DF, df
/* initialize digital buffer first */
self.Emit("MOVQ", jit.Imm(_MaxDigitNums), _VAR_ss_Dc) // MOVQ $_MaxDigitNums, ss.Dcap
self.Emit("LEAQ", jit.Ptr(_ST, _DbufOffset), _AX) // LEAQ _DbufOffset(ST), AX
self.Emit("MOVQ", _AX, _VAR_ss_Db) // MOVQ AX, ss.Dbuf
/* add ST offset */
self.Emit("ADDQ", jit.Imm(_FsmOffset), _ST) // ADDQ _FsmOffset, _ST
self.Emit("MOVQ", _CX, jit.Ptr(_ST, _ST_Sp)) // MOVQ CX, ST.Sp
self.Emit("MOVQ", _VP, jit.Ptr(_ST, _ST_Vp)) // MOVQ VP, ST.Vp[0]

View file

@ -46,7 +46,7 @@ const (
_VD_args = 8 // 8 bytes for passing arguments to this functions
_VD_fargs = 64 // 64 bytes for passing arguments to other Go functions
_VD_saves = 40 // 40 bytes for saving the registers before CALL instructions
_VD_locals = 40 // 40 bytes for local variables
_VD_locals = 56 // 56 bytes for local variables
)
const (
@ -64,6 +64,8 @@ var (
_VAR_ss_Dv = jit.Ptr(_SP, _VD_fargs + _VD_saves + 16)
_VAR_ss_Iv = jit.Ptr(_SP, _VD_fargs + _VD_saves + 24)
_VAR_ss_Ep = jit.Ptr(_SP, _VD_fargs + _VD_saves + 32)
_VAR_ss_Db = jit.Ptr(_SP, _VD_fargs + _VD_saves + 40)
_VAR_ss_Dc = jit.Ptr(_SP, _VD_fargs + _VD_saves + 48)
)
type _ValueDecoder struct {
@ -182,6 +184,11 @@ func (self *_ValueDecoder) compile() {
/* initialize the state machine */
self.Emit("XORL", _CX, _CX) // XORL CX, CX
self.Emit("MOVQ", _DF, _VAR_df) // MOVQ DF, df
/* initialize digital buffer first */
self.Emit("MOVQ", jit.Imm(_MaxDigitNums), _VAR_ss_Dc) // MOVQ $_MaxDigitNums, ss.Dcap
self.Emit("LEAQ", jit.Ptr(_ST, _DbufOffset), _AX) // LEAQ _DbufOffset(ST), AX
self.Emit("MOVQ", _AX, _VAR_ss_Db) // MOVQ AX, ss.Dbuf
/* add ST offset */
self.Emit("ADDQ", jit.Imm(_FsmOffset), _ST) // ADDQ _FsmOffset, _ST
self.Emit("MOVQ", _CX, jit.Ptr(_ST, _ST_Sp)) // MOVQ CX, ST.Sp
self.WriteRecNotAX(0, _VP, jit.Ptr(_ST, _ST_Vp), false) // MOVQ VP, ST.Vp[0]

View file

@ -46,7 +46,7 @@ const (
_VD_args = 8 // 8 bytes for passing arguments to this functions
_VD_fargs = 64 // 64 bytes for passing arguments to other Go functions
_VD_saves = 48 // 48 bytes for saving the registers before CALL instructions
_VD_locals = 40 // 40 bytes for local variables
_VD_locals = 64 // 64 bytes for local variables
)
const (
@ -64,10 +64,12 @@ var (
_VAR_ss_Dv = jit.Ptr(_SP, _VD_fargs + _VD_saves + 16)
_VAR_ss_Iv = jit.Ptr(_SP, _VD_fargs + _VD_saves + 24)
_VAR_ss_Ep = jit.Ptr(_SP, _VD_fargs + _VD_saves + 32)
_VAR_ss_Db = jit.Ptr(_SP, _VD_fargs + _VD_saves + 40)
_VAR_ss_Dc = jit.Ptr(_SP, _VD_fargs + _VD_saves + 48)
)
var (
_VAR_R9 = jit.Ptr(_SP, _VD_fargs + _VD_saves +40)
_VAR_R9 = jit.Ptr(_SP, _VD_fargs + _VD_saves + 56)
)
type _ValueDecoder struct {
jit.BaseAssembler
@ -197,6 +199,11 @@ func (self *_ValueDecoder) compile() {
/* initialize the state machine */
self.Emit("XORL", _CX, _CX) // XORL CX, CX
self.Emit("MOVQ", _DF, _VAR_df) // MOVQ DF, df
/* initialize digital buffer first */
self.Emit("MOVQ", jit.Imm(_MaxDigitNums), _VAR_ss_Dc) // MOVQ $_MaxDigitNums, ss.Dcap
self.Emit("LEAQ", jit.Ptr(_ST, _DbufOffset), _AX) // LEAQ _DbufOffset(ST), AX
self.Emit("MOVQ", _AX, _VAR_ss_Db) // MOVQ AX, ss.Dbuf
/* add ST offset */
self.Emit("ADDQ", jit.Imm(_FsmOffset), _ST) // ADDQ _FsmOffset, _ST
self.Emit("MOVQ", _CX, jit.Ptr(_ST, _ST_Sp)) // MOVQ CX, ST.Sp
self.WriteRecNotAX(0, _VP, jit.Ptr(_ST, _ST_Vp), false) // MOVQ VP, ST.Vp[0]

View file

@ -30,12 +30,14 @@ const (
_MinSlice = 16
_MaxStack = 65536 // 64k slots
_MaxStackBytes = _MaxStack * _PtrBytes
_MaxDigitNums = 800 // used in atof fallback algorithm
)
const (
_PtrBytes = _PTR_SIZE / 8
_FsmOffset = (_MaxStack + 1) * _PtrBytes
_StackSize = unsafe.Sizeof(_Stack{})
_PtrBytes = _PTR_SIZE / 8
_FsmOffset = (_MaxStack + 1) * _PtrBytes
_DbufOffset = _FsmOffset + int64(unsafe.Sizeof(types.StateMachine{})) + types.MAX_RECURSE * _PtrBytes
_StackSize = unsafe.Sizeof(_Stack{})
)
var (
@ -51,6 +53,7 @@ type _Stack struct {
sb [_MaxStack]unsafe.Pointer
mm types.StateMachine
vp [types.MAX_RECURSE]unsafe.Pointer
dp [_MaxDigitNums]byte
}
type _Decoder func(

View file

@ -17,17 +17,19 @@
package decoder
import (
`encoding`
`encoding/json`
`unsafe`
`encoding`
`encoding/json`
`unsafe`
`github.com/bytedance/sonic/internal/rt`
`github.com/bytedance/sonic/internal/native`
`github.com/bytedance/sonic/internal/rt`
)
func decodeTypedPointer(s string, i int, vt *rt.GoType, vp unsafe.Pointer, sb *_Stack, fv uint64) (int, error) {
if fn, err := findOrCompile(vt); err != nil {
return 0, err
} else {
rt.MoreStack(_FP_size + _VD_size + native.MaxFrameSize)
return fn(s, i, vp, sb, fv, "", nil)
}
}

View file

@ -71,8 +71,10 @@ func encodeTypedPointer(buf *[]byte, vt *rt.GoType, vp *unsafe.Pointer, sb *_Sta
} else if fn, err := findOrCompile(vt); err != nil {
return err
} else if (vt.KindFlags & rt.F_direct) == 0 {
rt.MoreStack(_FP_size + native.MaxFrameSize)
return fn(buf, *vp, sb, fv)
} else {
rt.MoreStack(_FP_size + native.MaxFrameSize)
return fn(buf, unsafe.Pointer(vp), sb, fv)
}
}

File diff suppressed because it is too large Load diff

View file

@ -14,16 +14,34 @@ var (
_subr__lspace = __native_entry__() + 301
_subr__lzero = __native_entry__() + 13
_subr__quote = __native_entry__() + 4955
_subr__skip_array = __native_entry__() + 17304
_subr__skip_object = __native_entry__() + 17339
_subr__skip_one = __native_entry__() + 15525
_subr__skip_array = __native_entry__() + 16074
_subr__skip_object = __native_entry__() + 16109
_subr__skip_one = __native_entry__() + 14295
_subr__u64toa = __native_entry__() + 3735
_subr__unquote = __native_entry__() + 5888
_subr__value = __native_entry__() + 10928
_subr__vnumber = __native_entry__() + 13724
_subr__vsigned = __native_entry__() + 14997
_subr__vstring = __native_entry__() + 12689
_subr__vunsigned = __native_entry__() + 15256
_subr__value = __native_entry__() + 9657
_subr__vnumber = __native_entry__() + 12453
_subr__vsigned = __native_entry__() + 13767
_subr__vstring = __native_entry__() + 11418
_subr__vunsigned = __native_entry__() + 14026
)
const (
_stack__f64toa = 120
_stack__i64toa = 24
_stack__lspace = 8
_stack__lzero = 8
_stack__quote = 64
_stack__skip_array = 136
_stack__skip_object = 136
_stack__skip_one = 136
_stack__u64toa = 8
_stack__unquote = 88
_stack__value = 400
_stack__vnumber = 312
_stack__vsigned = 16
_stack__vstring = 128
_stack__vunsigned = 8
)
var (
@ -43,3 +61,21 @@ var (
_ = _subr__vstring
_ = _subr__vunsigned
)
const (
_ = _stack__f64toa
_ = _stack__i64toa
_ = _stack__lspace
_ = _stack__lzero
_ = _stack__quote
_ = _stack__skip_array
_ = _stack__skip_object
_ = _stack__skip_one
_ = _stack__u64toa
_ = _stack__unquote
_ = _stack__value
_ = _stack__vnumber
_ = _stack__vsigned
_ = _stack__vstring
_ = _stack__vunsigned
)

File diff suppressed because it is too large Load diff

View file

@ -14,16 +14,34 @@ var (
_subr__lspace = __native_entry__() + 429
_subr__lzero = __native_entry__() + 13
_subr__quote = __native_entry__() + 5328
_subr__skip_array = __native_entry__() + 20330
_subr__skip_object = __native_entry__() + 20365
_subr__skip_one = __native_entry__() + 17473
_subr__skip_array = __native_entry__() + 19163
_subr__skip_object = __native_entry__() + 19198
_subr__skip_one = __native_entry__() + 16306
_subr__u64toa = __native_entry__() + 4008
_subr__unquote = __native_entry__() + 7125
_subr__value = __native_entry__() + 13020
_subr__vnumber = __native_entry__() + 15672
_subr__vsigned = __native_entry__() + 16945
_subr__vstring = __native_entry__() + 14795
_subr__vunsigned = __native_entry__() + 17204
_subr__value = __native_entry__() + 11812
_subr__vnumber = __native_entry__() + 14464
_subr__vsigned = __native_entry__() + 15778
_subr__vstring = __native_entry__() + 13587
_subr__vunsigned = __native_entry__() + 16037
)
const (
_stack__f64toa = 120
_stack__i64toa = 24
_stack__lspace = 8
_stack__lzero = 8
_stack__quote = 80
_stack__skip_array = 128
_stack__skip_object = 128
_stack__skip_one = 128
_stack__u64toa = 8
_stack__unquote = 72
_stack__value = 392
_stack__vnumber = 312
_stack__vsigned = 16
_stack__vstring = 112
_stack__vunsigned = 8
)
var (
@ -43,3 +61,21 @@ var (
_ = _subr__vstring
_ = _subr__vunsigned
)
const (
_ = _stack__f64toa
_ = _stack__i64toa
_ = _stack__lspace
_ = _stack__lzero
_ = _stack__quote
_ = _stack__skip_array
_ = _stack__skip_object
_ = _stack__skip_one
_ = _stack__u64toa
_ = _stack__unquote
_ = _stack__value
_ = _stack__vnumber
_ = _stack__vsigned
_ = _stack__vstring
_ = _stack__vunsigned
)

View file

@ -25,6 +25,8 @@ import (
`github.com/bytedance/sonic/internal/native/types`
)
const MaxFrameSize uintptr = 400
var (
S_f64toa uintptr
S_i64toa uintptr

View file

@ -92,9 +92,11 @@ func (self ParsingError) Message() string {
type JsonState struct {
Vt ValueType
Dv float64
Iv int64
Ep int
Dv float64
Iv int64
Ep int
Dbuf *byte
Dcap int
}
type StateMachine struct {

View file

@ -0,0 +1,20 @@
// +build !noasm !appengine
// Code generated by asm2asm, DO NOT EDIT.
#include "go_asm.h"
#include "funcdata.h"
#include "textflag.h"
TEXT ·MoreStack(SB), NOSPLIT, $0 - 8
NO_LOCAL_POINTERS
_entry:
MOVQ (TLS), R14
MOVQ size+0(FP), R12
NOTQ R12
LEAQ (SP)(R12*1), R12
CMPQ R12, 16(R14)
JBE _stack_grow
RET
_stack_grow:
CALL runtime·morestack_noctxt<>(SB)
JMP _entry

View file

@ -49,3 +49,6 @@ func Str2Mem(s string) (v []byte) {
(*GoSlice)(unsafe.Pointer(&v)).Ptr = (*GoString)(unsafe.Pointer(&s)).Ptr
return
}
//go:nosplit
func MoreStack(size uintptr)

View file

@ -22,25 +22,10 @@ typedef struct u128_output {
uint64_t lo;
} u128_output;
static const uint8_t U8_LEN_TAB[256];
static const uint64_t POW10_M128_TAB[697][2];
static inline int count_len_u64(uint64_t val) {
if (val >> 32) {
return count_len_u64(val >> 32) + 32;
}
if (val >> 16) {
return count_len_u64(val >> 16) + 16;
}
if (val >> 8) {
return count_len_u64(val >> 8) + 8;
}
return U8_LEN_TAB[val];
}
static inline int count_leading_zeros_u64(uint64_t val) {
return 64 - count_len_u64(val);
static inline int count_leading_zeroes_u64(uint64_t u) {
return u ? __builtin_clzl(u) : 64;
}
static inline u128_output mul_u64(uint64_t x, uint64_t y) {
@ -60,7 +45,7 @@ bool atof_eisel_lemire64(uint64_t mant, int exp10, int sgn, double *val) {
}
/* Calculate the 2-base exponent of float */
int clz = count_leading_zeros_u64(mant);
int clz = count_leading_zeroes_u64(mant);
mant <<= clz;
/* lg10/lg2 ≈ 217706>>16 */
uint64_t ret_exp2 = ((uint64_t)((217706 * exp10) >> 16) + 64 + 1023) - ((uint64_t)clz);
@ -134,27 +119,6 @@ bool atof_eisel_lemire64(uint64_t mant, int exp10, int sgn, double *val) {
return true;
}
/* the len of significant digits for unit8 */
static const uint8_t U8_LEN_TAB[256] = {
0x00, 0x01, 0x02, 0x02, 0x03, 0x03, 0x03, 0x03, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
};
/* Including 128-bit mantissa approximations (rounded down) of the powers of 10.
* For example:
* 1e-348 0xFA8FD5A0081C0288_1732C869CD60E453 * (2 ** (-348 * lg10 / lg2 - 127)).

View file

@ -16,7 +16,6 @@
#include "native.h"
#define DECIMAL_MAX_DNUM 800
/* decimical shift witout overflow, e.g. 9 << 61 overflow */
#define MAX_SHIFT 60
@ -26,11 +25,12 @@
* example 3: 999 {"999", 3, 3, 0}
*/
typedef struct Decimal {
char d[DECIMAL_MAX_DNUM];
int nd;
int dp;
int neg;
int trunc;
char* d;
size_t cap;
int nd;
int dp;
int neg;
int trunc;
} Decimal;
/* decimal power of ten to binary power of two.
@ -44,7 +44,7 @@ static const int POW_TAB[9] = {1, 3, 6, 9, 13, 16, 19, 23, 26};
*/
typedef struct lshift_cheat {
int delta; // number of added digits when left shift
const char cutoff[DECIMAL_MAX_DNUM]; // minus one digit if under the half(cutoff).
const char cutoff[100]; // minus one digit if under the half(cutoff).
} lshift_cheat;
/* Look up for the decimal shift information by binary shift bits.
@ -56,8 +56,10 @@ typedef struct lshift_cheat {
*/
const static lshift_cheat LSHIFT_TAB[61];
static inline void decimal_init(Decimal *d) {
for (int i = 0; i < DECIMAL_MAX_DNUM; ++i) {
static inline void decimal_init(Decimal *d, char *dbuf, size_t cap) {
d->d = dbuf;
d->cap = cap;
for (int i = 0; i < d->cap; ++i) {
d->d[i] = 0;
}
d->dp = 0;
@ -66,10 +68,10 @@ static inline void decimal_init(Decimal *d) {
d->trunc = 0;
}
static inline void decimal_set(Decimal *d, const char *s, int len) {
static inline void decimal_set(Decimal *d, const char *s, ssize_t len, char *dbuf, ssize_t cap) {
int i = 0;
decimal_init(d);
decimal_init(d, dbuf, cap);
if (s[i] == '-') {
i++;
d->neg = 1;
@ -82,7 +84,7 @@ static inline void decimal_set(Decimal *d, const char *s, int len) {
d->dp--;
continue;
}
if (d->nd < DECIMAL_MAX_DNUM) {
if (d->nd < d->cap) {
d->d[d->nd] = s[i];
d->nd++;
} else if (s[i] != '0') {
@ -173,7 +175,7 @@ static inline void right_shift(Decimal *d, uint32_t k) {
while (n > 0) {
dig = n >> k;
n &= mask;
if (w < DECIMAL_MAX_DNUM) {
if (w < d->cap) {
d->d[w] = (char)(dig + '0');
w++;
} else if (dig > 0) {
@ -221,7 +223,7 @@ static inline void left_shift(Decimal *d, uint32_t k) {
quo = n / 10;
rem = n - 10 * quo;
w--;
if (w < DECIMAL_MAX_DNUM) {
if (w < d->cap) {
d->d[w] = (char)(rem + '0');
} else if (rem != 0) {
/* truncated */
@ -235,7 +237,7 @@ static inline void left_shift(Decimal *d, uint32_t k) {
quo = n / 10;
rem = n - 10 * quo;
w--;
if (w < DECIMAL_MAX_DNUM) {
if (w < d->cap) {
d->d[w] = (char)(rem + '0');
} else if (rem != 0) {
/* truncated */
@ -245,8 +247,8 @@ static inline void left_shift(Decimal *d, uint32_t k) {
}
d->nd += delta;
if (d->nd >= DECIMAL_MAX_DNUM) {
d->nd = DECIMAL_MAX_DNUM;
if (d->nd >= d->cap) {
d->nd = d->cap;
}
d->dp += delta;
trim(d);
@ -413,15 +415,14 @@ out:
return 0;
}
double atof_native_decimal(const char *buf, int len) {
double atof_native(const char *sp, ssize_t nb, char* dbuf, ssize_t cap) {
Decimal d;
double val = 0;
decimal_set(&d, buf, len);
decimal_set(&d, sp, nb, dbuf, cap);
decimal_to_f64(&d, &val);
return val;
}
#undef DECIMAL_MAX_DNUM
#undef MAX_SHIFT
const static lshift_cheat LSHIFT_TAB[61] = {

View file

@ -35,6 +35,7 @@
#define V_ELEM_SEP 11
#define V_ARRAY_END 12
#define V_OBJECT_END 13
#define V_ATOF_NEED_FALLBACK 14
#define F_DBLUNQ (1 << 0)
#define F_UNIREP (1 << 1)
@ -56,11 +57,14 @@
#define likely(v) (__builtin_expect((v), 1))
#define unlikely(v) (__builtin_expect((v), 0))
#define always_inline inline __attribute__((always_inline))
#define as_m128p(v) ((__m128i *)(v))
#define as_m128c(v) ((const __m128i *)(v))
#define as_m256c(v) ((const __m256i *)(v))
#define as_m128v(v) (*(const __m128i *)(v))
#define as_uint64v(p) (*(uint64_t *)(p))
#define is_infinity(v) ((as_uint64v(&v) << 1) == 0xFFE0000000000000)
typedef struct {
char * buf;
@ -84,6 +88,8 @@ typedef struct {
double dv;
int64_t iv;
int64_t ep;
char* dbuf;
ssize_t dcap;
} JsonState;
typedef struct {
@ -116,6 +122,6 @@ long skip_negative(const GoString *src, long *p);
long skip_positive(const GoString *src, long *p);
bool atof_eisel_lemire64(uint64_t mant, int exp10, int sgn, double *val);
double atof_native_decimal(const char *buf, int len);
double atof_native(const char *sp, ssize_t nb, char* dbuf, ssize_t cap);
#endif

View file

@ -528,27 +528,22 @@ static inline bool is_atof_exact(uint64_t man, int exp, int sgn, double *val) {
return false;
}
static inline double parse_float64(uint64_t man, int exp, int sgn, int trunc, const GoString *src, long idx) {
double val = 0.0;
static inline double atof_fast(uint64_t man, int exp, int sgn, int trunc, double *val) {
double val_up = 0.0;
/* look-up for fast atof if the conversion can be exactly */
if (is_atof_exact(man, exp, sgn, &val)) {
return val;
if (is_atof_exact(man, exp, sgn, val)) {
return true;
}
/* A fast atof algorithm for high percison */
if (atof_eisel_lemire64(man, exp, sgn, &val)) {
if (!trunc) {
return val;
}
if (atof_eisel_lemire64(man+1, exp, sgn, &val_up) && val_up == val) {
return val;
if (atof_eisel_lemire64(man, exp, sgn, val)) {
if (!trunc || (atof_eisel_lemire64(man+1, exp, sgn, &val_up) && val_up == *val)) {
return true;
}
}
/* when above algorithms failed, fallback. It is slow. */
return atof_native_decimal(src->buf + idx, src->len - idx);
return false;
}
static bool inline is_overflow(uint64_t man, int sgn, int exp10) {
@ -564,12 +559,14 @@ void vnumber(const GoString *src, long *p, JsonState *ret) {
int man_nd = 0; // # digits of mantissa, 10 ^ 19 fits uint64_t
int exp10 = 0; // val = sgn * man * 10 ^ exp10
int trunc = 0;
double val = 0;
/* initial buffer pointers */
long i = *p;
size_t n = src->len;
const char * s = src->buf;
long si = *p; // record the idx for fall-back when parsing float.
char *dbuf = ret->dbuf;
ssize_t dcap = ret->dcap;
/* initialize the result, and check for EOF */
init_ret(V_INTEGER)
@ -600,11 +597,10 @@ void vnumber(const GoString *src, long *p, JsonState *ret) {
/* skip the leading zeros of 0.000xxxx */
if (man == 0 && exp10 == 0) {
int idx = i;
while (i < n && s[i] == '0') {
i++;
exp10--;
}
exp10 = idx - i;
man = 0;
man_nd = 0;
}
@ -657,12 +653,18 @@ void vnumber(const GoString *src, long *p, JsonState *ret) {
}
parse_float:
ret->dv = parse_float64(man, exp10, sgn, trunc, src, si);
/* if the float number is infinity */
if (((*(uint64_t *)&ret->dv) << 1) == 0xFFE0000000000000) {
/* when fast algorithms failed, use slow fallback.*/
if(!atof_fast(man, exp10, sgn, trunc, &val)) {
val = atof_native(s + *p, i - *p, dbuf, dcap);
}
/* check parsed double val */
if (is_infinity(val)) {
ret->vt = -ERR_FLOAT_INF;
}
/* update the result */
ret->dv = val;
*p = i;
}

@ -1 +1 @@
Subproject commit a9988b2b8191ac9b8bc879ff8db18c650753a067
Subproject commit 09224ab8c109bdb8da13af04abd7c01cb6e38d87