mirror of
https://github.com/ii64/sonic.git
synced 2026-06-20 16:45:22 +08:00
add ValidateString option into decode (#253)
* fix: add escape validate * feat: add validatestring option * add print debug * add jit debug in decoder * fix go1.16 decoder debug * fix: not change others * fix generic parse bug * remove debug info * test twitter json * fix: option typos * test: add bug test * fix validate bugs * fix: validate * remove files * re-compile * update license * add flags on `skip_xxx()` * fix internal native tests * re-compile * add validstring for perftest * modify api * fix readme * fix comment * add license * rename to ValidateString * fix xprintf.h * add debug Co-authored-by: liuqiang <liuqiang.06@bytedance.com> Co-authored-by: duanyi.aster <duanyi.aster@bytedance.com>
This commit is contained in:
parent
b36771ba37
commit
de2dc2c35a
28 changed files with 11818 additions and 11911 deletions
10
README.md
10
README.md
|
|
@ -3,7 +3,7 @@
|
|||
A blazingly fast JSON serializing & deserializing library, accelerated by JIT (just-in-time compiling) and SIMD (single-instruction-multiple-data).
|
||||
|
||||
## Requirement
|
||||
- Go 1.15/1.16/1.17/1.18
|
||||
- Go 1.15/1.16/1.17/1.18/1.19
|
||||
- Linux/MacOS/Windows
|
||||
- Amd64 CPU with AVX instruction set
|
||||
|
||||
|
|
@ -261,10 +261,10 @@ println(string(buf) == string(exp)) // true
|
|||
- modification: `Set()`, `SetByIndex()`, `Add()`
|
||||
|
||||
## Compatibility
|
||||
Sonic **DOSE NOT** ensure to support all environments, due to the difficulty of developing high-performance codes. For developers who use sonic to build their applications in different environments (ex: developing on M1 Mac but running on linux server), or those who want to handle JSON strictly consistent with `encoding/json`, we provide some compatible APIs as `sonic.API`
|
||||
- `ConfigDefault`: the sonic's default config (`EscapeHTML=false`,`SortKeys=false`...) to run on sonic-supporting environment. It will fall back to `encoding/json` with corresponding config , and some options like `SortKeys=false` will be invalid.
|
||||
- `ConfigStd`: the std-compatible config (`EscapeHTML=true`,`SortKeys=true`...) to run on sonic-supporting environment. It whill fall back to `encoding/json`.
|
||||
- `ConfigFastest`: the fastest config (`NoQuoteTextMarshaler=true`) to run on sonic-supporting environment. It will fall back to `encoding/json` with corresponding config , and some options will be invalid.
|
||||
Sonic **DOSE NOT** ensure to support all environments, due to the difficulty of developing high-performance codes. For developers who use sonic to build their applications in different environments (ex: developing on M1 Mac but running on Linux server), or those who want to handle JSON strictly consistent with `encoding/json`, we provide some compatible APIs as `sonic.API`
|
||||
- `ConfigDefault`: the sonic's default config (`EscapeHTML=false`,`SortKeys=false`...) to run on sonic-supporting environment. It will fall back to `encoding/json` with the corresponding config, and some options like `SortKeys=false` will be invalid.
|
||||
- `ConfigStd`: the std-compatible config (`EscapeHTML=true`,`SortKeys=true`...) to run on sonic-supporting environment. It will fall back to `encoding/json`.
|
||||
- `ConfigFastest`: the fastest config (`NoQuoteTextMarshaler=true`) to run on sonic-supporting environment. It will fall back to `encoding/json` with corresponding config, and some options will be invalid.
|
||||
|
||||
## Tips
|
||||
|
||||
|
|
|
|||
3
api.go
3
api.go
|
|
@ -64,6 +64,9 @@ import (
|
|||
// CopyString indicates decoder to decode string values by copying instead of referring.
|
||||
CopyString bool
|
||||
|
||||
// ValidateString indicates decoder to valid string values: decoder will return errors when
|
||||
// invalid UTF-8 chars or unescaped control chars(\u0000-\u001f) in the string value of JSON.
|
||||
ValidateString bool
|
||||
}
|
||||
|
||||
var (
|
||||
|
|
|
|||
|
|
@ -319,7 +319,7 @@ func (self *Parser) Parse() (Node, types.ParsingError) {
|
|||
|
||||
func (self *Parser) skip() (int, types.ParsingError) {
|
||||
fsm := types.NewStateMachine()
|
||||
start := native.SkipOne(&self.s, &self.p, fsm)
|
||||
start := native.SkipOne(&self.s, &self.p, fsm, uint64(0))
|
||||
types.FreeStateMachine(fsm)
|
||||
|
||||
if start < 0 {
|
||||
|
|
|
|||
|
|
@ -35,8 +35,10 @@ import (
|
|||
`testing`
|
||||
`time`
|
||||
`unsafe`
|
||||
`unicode/utf8`
|
||||
|
||||
`github.com/bytedance/sonic/decoder`
|
||||
`github.com/davecgh/go-spew/spew`
|
||||
)
|
||||
|
||||
type T struct {
|
||||
|
|
@ -410,6 +412,7 @@ type unmarshalTest struct {
|
|||
useNumber bool
|
||||
golden bool
|
||||
disallowUnknownFields bool
|
||||
validateString bool
|
||||
}
|
||||
|
||||
type B struct {
|
||||
|
|
@ -696,11 +699,13 @@ var unmarshalTests = []unmarshalTest{
|
|||
in: "\"hello\xffworld\"",
|
||||
ptr: new(string),
|
||||
out: "hello\xffworld",
|
||||
validateString: false,
|
||||
},
|
||||
{
|
||||
in: "\"hello\xc2\xc2world\"",
|
||||
ptr: new(string),
|
||||
out: "hello\xc2\xc2world",
|
||||
validateString: false,
|
||||
},
|
||||
{
|
||||
in: "\"hello\xc2\xffworld\"",
|
||||
|
|
@ -999,6 +1004,17 @@ var unmarshalTests = []unmarshalTest{
|
|||
ptr: new(map[string]json.Number),
|
||||
err: fmt.Errorf("json: invalid number literal, trying to unmarshal %q into Number", `"invalid"`),
|
||||
},
|
||||
{in: `\u`, ptr: new(interface{}), err: fmt.Errorf("json: invald char"), validateString: true},
|
||||
{in: `\u`, ptr: new(string), err: fmt.Errorf("json: invald char"), validateString: true},
|
||||
|
||||
{in: "\"\x00\"", ptr: new(interface{}), err: fmt.Errorf("json: invald char"), validateString: true},
|
||||
{in: "\"\x00\"", ptr: new(string), err: fmt.Errorf("json: invald char"), validateString: true},
|
||||
{in: "\"\xff\"", ptr: new(interface{}), err: fmt.Errorf("json: invald char"), validateString: true},
|
||||
{in: "\"\xff\"", ptr: new(string), err: fmt.Errorf("json: invald char"), validateString: true},
|
||||
{in: "\"\x00\"", ptr: new(interface{}), out: interface{}("\x00"), validateString: false},
|
||||
{in: "\"\x00\"", ptr: new(string), out: "\x00", validateString: false},
|
||||
{in: "\"\xff\"", ptr: new(interface{}), out: interface{}("\xff"), validateString: false},
|
||||
{in: "\"\xff\"", ptr: new(string), out: "\xff", validateString: false},
|
||||
}
|
||||
|
||||
func trim(b []byte) []byte {
|
||||
|
|
@ -1128,14 +1144,20 @@ func TestUnmarshal(t *testing.T) {
|
|||
}
|
||||
|
||||
dec := decoder.NewDecoder(tt.in)
|
||||
validUtf8 := true
|
||||
if tt.useNumber {
|
||||
dec.UseNumber()
|
||||
}
|
||||
if tt.disallowUnknownFields {
|
||||
dec.DisallowUnknownFields()
|
||||
}
|
||||
if err := dec.Decode(v.Interface()); (err == nil) != (tt.err == nil) {
|
||||
t.Errorf("#%d: %v, want %v", i, err, tt.err)
|
||||
if tt.validateString {
|
||||
dec.ValidateString()
|
||||
validUtf8 = utf8.Valid([]byte(tt.in))
|
||||
}
|
||||
if err := dec.Decode(v.Interface()); (err == nil) != (tt.err == nil && validUtf8) {
|
||||
spew.Dump(tt.in)
|
||||
t.Fatalf("#%d: %v, want %v", i, err, tt.err)
|
||||
continue
|
||||
} else if err != nil {
|
||||
continue
|
||||
|
|
@ -2203,7 +2225,6 @@ func TestInvalidStringOption(t *testing.T) {
|
|||
if err != nil {
|
||||
t.Fatalf("Marshal: %v", err)
|
||||
}
|
||||
|
||||
err = Unmarshal(data, &item)
|
||||
if err != nil {
|
||||
t.Fatalf("Unmarshal: %v", err)
|
||||
|
|
|
|||
|
|
@ -387,6 +387,7 @@ func (self *_Assembler) call_sf(fn obj.Addr) {
|
|||
self.Emit("MOVQ", _IC, _ARG_ic) // MOVQ IC, ic<>+16(FP)
|
||||
self.Emit("LEAQ", _ARG_ic, _SI) // LEAQ ic<>+16(FP), SI
|
||||
self.Emit("LEAQ", jit.Ptr(_ST, _FsmOffset), _DX) // LEAQ _FsmOffset(ST), DX
|
||||
self.Emit("MOVQ", _ARG_fv, _CX)
|
||||
self.call(fn) // CALL ${fn}
|
||||
self.Emit("MOVQ", _ARG_ic, _IC) // MOVQ ic<>+16(FP), IC
|
||||
}
|
||||
|
|
@ -591,7 +592,8 @@ func (self *_Assembler) check_eof(d int64) {
|
|||
}
|
||||
}
|
||||
|
||||
func (self *_Assembler) parse_string() {
|
||||
func (self *_Assembler) parse_string() { // parse_string has a validate flag params in the last
|
||||
self.Emit("MOVQ", _ARG_fv, _CX)
|
||||
self.call_vf(_F_vstring)
|
||||
self.check_err()
|
||||
}
|
||||
|
|
|
|||
|
|
@ -404,6 +404,7 @@ func (self *_Assembler) call_sf(fn obj.Addr) {
|
|||
self.Emit("MOVQ", _IC, _ARG_ic) // MOVQ IC, ic<>+16(FP)
|
||||
self.Emit("LEAQ", _ARG_ic, _SI) // LEAQ ic<>+16(FP), SI
|
||||
self.Emit("LEAQ", jit.Ptr(_ST, _FsmOffset), _DX) // LEAQ _FsmOffset(ST), DX
|
||||
self.Emit("MOVQ", _ARG_fv, _CX)
|
||||
self.callc(fn)
|
||||
self.Emit("MOVQ", _ARG_ic, _IC) // MOVQ ic<>+16(FP), IC
|
||||
}
|
||||
|
|
@ -602,6 +603,7 @@ func (self *_Assembler) check_eof(d int64) {
|
|||
}
|
||||
|
||||
func (self *_Assembler) parse_string() {
|
||||
self.Emit("MOVQ", _ARG_fv, _CX)
|
||||
self.call_vf(_F_vstring)
|
||||
self.check_err()
|
||||
}
|
||||
|
|
@ -933,6 +935,8 @@ func (self *_Assembler) mapassign_utext(t reflect.Type, addressable bool) {
|
|||
|
||||
var (
|
||||
_F_skip_one = jit.Imm(int64(native.S_skip_one))
|
||||
_F_skip_array = jit.Imm(int64(native.S_skip_array))
|
||||
_F_skip_object = jit.Imm(int64(native.S_skip_object))
|
||||
_F_skip_number = jit.Imm(int64(native.S_skip_number))
|
||||
)
|
||||
|
||||
|
|
@ -1031,11 +1035,6 @@ var (
|
|||
_F_decodeValue = jit.Imm(int64(_subr_decode_value))
|
||||
)
|
||||
|
||||
var (
|
||||
_F_skip_array = jit.Imm(int64(native.S_skip_array))
|
||||
_F_skip_object = jit.Imm(int64(native.S_skip_object))
|
||||
)
|
||||
|
||||
var (
|
||||
_F_FieldMap_GetCaseInsensitive obj.Addr
|
||||
)
|
||||
|
|
|
|||
|
|
@ -33,6 +33,7 @@ const (
|
|||
_F_disable_urc
|
||||
_F_disable_unknown
|
||||
_F_copy_string
|
||||
_F_validate_string
|
||||
|
||||
_F_allow_control = 31
|
||||
)
|
||||
|
|
@ -45,6 +46,7 @@ const (
|
|||
OptionUseUnicodeErrors Options = 1 << _F_disable_urc
|
||||
OptionDisableUnknown Options = 1 << _F_disable_unknown
|
||||
OptionCopyString Options = 1 << _F_copy_string
|
||||
OptionValidateString Options = 1 << _F_validate_string
|
||||
)
|
||||
|
||||
func (self *Decoder) SetOptions(opts Options) {
|
||||
|
|
@ -139,6 +141,13 @@ func (self *Decoder) CopyString() {
|
|||
self.f |= 1 << _F_copy_string
|
||||
}
|
||||
|
||||
// ValidateString causes the Decoder to validate string values when decoding string value
|
||||
// in JSON. Validation is that, returning error when unescaped control chars(0x00-0x1f) or
|
||||
// invalid UTF-8 chars in the string value of JSON.
|
||||
func (self *Decoder) ValidateString() {
|
||||
self.f |= 1 << _F_validate_string
|
||||
}
|
||||
|
||||
// Pretouch compiles vt ahead-of-time to avoid JIT compilation on-the-fly, in
|
||||
// order to reduce the first-hit latency.
|
||||
//
|
||||
|
|
@ -199,7 +208,7 @@ func Skip(data []byte) (start int, end int) {
|
|||
s := rt.Mem2Str(data)
|
||||
p := 0
|
||||
m := types.NewStateMachine()
|
||||
ret := native.SkipOne(&s, &p, m)
|
||||
ret := native.SkipOne(&s, &p, m, uint64(0))
|
||||
types.FreeStateMachine(m)
|
||||
return ret, p
|
||||
}
|
||||
|
|
@ -67,12 +67,12 @@ func __unquote(sp unsafe.Pointer, nb int, dp unsafe.Pointer, ep *int, flags uint
|
|||
//go:nosplit
|
||||
//go:noescape
|
||||
//goland:noinspection GoUnusedParameter
|
||||
func __value(s unsafe.Pointer, n int, p int, v *types.JsonState, allow_control int) (ret int)
|
||||
func __value(s unsafe.Pointer, n int, p int, v *types.JsonState, flags uint64) (ret int)
|
||||
|
||||
//go:nosplit
|
||||
//go:noescape
|
||||
//goland:noinspection GoUnusedParameter
|
||||
func __vstring(s *string, p *int, v *types.JsonState)
|
||||
func __vstring(s *string, p *int, v *types.JsonState, flags uint64)
|
||||
|
||||
//go:nosplit
|
||||
//go:noescape
|
||||
|
|
@ -92,17 +92,17 @@ func __vunsigned(s *string, p *int, v *types.JsonState)
|
|||
//go:nosplit
|
||||
//go:noescape
|
||||
//goland:noinspection GoUnusedParameter
|
||||
func __skip_one(s *string, p *int, m *types.StateMachine) (ret int)
|
||||
func __skip_one(s *string, p *int, m *types.StateMachine, flags uint64) (ret int)
|
||||
|
||||
//go:nosplit
|
||||
//go:noescape
|
||||
//goland:noinspection GoUnusedParameter
|
||||
func __skip_array(s *string, p *int, m *types.StateMachine) (ret int)
|
||||
func __skip_array(s *string, p *int, m *types.StateMachine, flags uint64) (ret int)
|
||||
|
||||
//go:nosplit
|
||||
//go:noescape
|
||||
//goland:noinspection GoUnusedParameter
|
||||
func __skip_object(s *string, p *int, m *types.StateMachine) (ret int)
|
||||
func __skip_object(s *string, p *int, m *types.StateMachine, flags uint64) (ret int)
|
||||
|
||||
//go:nosplit
|
||||
//go:noescape
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -237,21 +237,39 @@ func TestNative_Vstring(t *testing.T) {
|
|||
var v types.JsonState
|
||||
i := 0
|
||||
s := `test"test\n2"`
|
||||
__vstring(&s, &i, &v)
|
||||
__vstring(&s, &i, &v, 0)
|
||||
assert.Equal(t, 5, i)
|
||||
assert.Equal(t, -1, v.Ep)
|
||||
assert.Equal(t, int64(0), v.Iv)
|
||||
__vstring(&s, &i, &v)
|
||||
__vstring(&s, &i, &v, 0)
|
||||
assert.Equal(t, 13, i)
|
||||
assert.Equal(t, 9, v.Ep)
|
||||
assert.Equal(t, int64(5), v.Iv)
|
||||
}
|
||||
|
||||
func TestNative_Vstring_ValidUnescapedChars(t *testing.T) {
|
||||
var v types.JsonState
|
||||
valid := uint64(types.F_VALIDATE_STRING)
|
||||
i := 0
|
||||
s := "test\x1f\""
|
||||
__vstring(&s, &i, &v, valid)
|
||||
assert.Equal(t, -int(types.ERR_INVALID_CHAR), int(v.Vt))
|
||||
}
|
||||
|
||||
func TestNative_Vstring_ValidUtf8(t *testing.T) {
|
||||
var v types.JsonState
|
||||
valid := uint64(types.F_VALIDATE_STRING)
|
||||
i := 0
|
||||
s := "test\xff\""
|
||||
__vstring(&s, &i, &v, valid)
|
||||
assert.Equal(t, -int(types.ERR_INVALID_CHAR), int(v.Vt))
|
||||
}
|
||||
|
||||
func TestNative_VstringEscapeEOF(t *testing.T) {
|
||||
var v types.JsonState
|
||||
i := 0
|
||||
s := `xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\"xxxxxxxxxxxxxxxxxxxxxxxxxxxxx"x`
|
||||
__vstring(&s, &i, &v)
|
||||
__vstring(&s, &i, &v, 0)
|
||||
assert.Equal(t, 95, i)
|
||||
assert.Equal(t, 63, v.Ep)
|
||||
assert.Equal(t, int64(0), v.Iv)
|
||||
|
|
@ -312,7 +330,7 @@ func TestNative_VstringHangUpOnRandomData(t *testing.T) {
|
|||
p := 1
|
||||
s := rt.Mem2Str(v)
|
||||
var js types.JsonState
|
||||
__vstring(&s, &p, &js)
|
||||
__vstring(&s, &p, &js, 0)
|
||||
fmt.Printf("js: %s\n", spew.Sdump(js))
|
||||
}
|
||||
|
||||
|
|
@ -506,36 +524,36 @@ func TestNative_Vunsigned(t *testing.T) {
|
|||
func TestNative_SkipOne(t *testing.T) {
|
||||
p := 0
|
||||
s := ` {"asdf": [null, true, false, 1, 2.0, -3]}, 1234.5`
|
||||
q := __skip_one(&s, &p, &types.StateMachine{})
|
||||
q := __skip_one(&s, &p, &types.StateMachine{}, uint64(0))
|
||||
assert.Equal(t, 42, p)
|
||||
assert.Equal(t, 1, q)
|
||||
p = 0
|
||||
s = `1 2.5 -3 "asdf\nqwer" true false null {} []`
|
||||
q = __skip_one(&s, &p, &types.StateMachine{})
|
||||
q = __skip_one(&s, &p, &types.StateMachine{}, uint64(0))
|
||||
assert.Equal(t, 1, p)
|
||||
assert.Equal(t, 0, q)
|
||||
q = __skip_one(&s, &p, &types.StateMachine{})
|
||||
q = __skip_one(&s, &p, &types.StateMachine{}, uint64(0))
|
||||
assert.Equal(t, 5, p)
|
||||
assert.Equal(t, 2, q)
|
||||
q = __skip_one(&s, &p, &types.StateMachine{})
|
||||
q = __skip_one(&s, &p, &types.StateMachine{}, uint64(0))
|
||||
assert.Equal(t, 8, p)
|
||||
assert.Equal(t, 6, q)
|
||||
q = __skip_one(&s, &p, &types.StateMachine{})
|
||||
q = __skip_one(&s, &p, &types.StateMachine{}, uint64(0))
|
||||
assert.Equal(t, 21, p)
|
||||
assert.Equal(t, 9, q)
|
||||
q = __skip_one(&s, &p, &types.StateMachine{})
|
||||
q = __skip_one(&s, &p, &types.StateMachine{}, uint64(0))
|
||||
assert.Equal(t, 26, p)
|
||||
assert.Equal(t, 22, q)
|
||||
q = __skip_one(&s, &p, &types.StateMachine{})
|
||||
q = __skip_one(&s, &p, &types.StateMachine{}, uint64(0))
|
||||
assert.Equal(t, 32, p)
|
||||
assert.Equal(t, 27, q)
|
||||
q = __skip_one(&s, &p, &types.StateMachine{})
|
||||
q = __skip_one(&s, &p, &types.StateMachine{}, uint64(0))
|
||||
assert.Equal(t, 37, p)
|
||||
assert.Equal(t, 33, q)
|
||||
q = __skip_one(&s, &p, &types.StateMachine{})
|
||||
q = __skip_one(&s, &p, &types.StateMachine{}, uint64(0))
|
||||
assert.Equal(t, 40, p)
|
||||
assert.Equal(t, 38, q)
|
||||
q = __skip_one(&s, &p, &types.StateMachine{})
|
||||
q = __skip_one(&s, &p, &types.StateMachine{}, uint64(0))
|
||||
assert.Equal(t, 43, p)
|
||||
assert.Equal(t, 41, q)
|
||||
}
|
||||
|
|
@ -547,7 +565,7 @@ func TestNative_SkipOne_Error(t *testing.T) {
|
|||
`"asdf`, `"\\\"`,
|
||||
}) {
|
||||
p := 0
|
||||
q := __skip_one(&s, &p, &types.StateMachine{})
|
||||
q := __skip_one(&s, &p, &types.StateMachine{}, uint64(0))
|
||||
assert.True(t, q < 0)
|
||||
}
|
||||
}
|
||||
|
|
@ -555,14 +573,14 @@ func TestNative_SkipOne_Error(t *testing.T) {
|
|||
func TestNative_SkipArray(t *testing.T) {
|
||||
p := 0
|
||||
s := `null, true, false, 1, 2.0, -3, {"asdf": "wqer"}],`
|
||||
__skip_array(&s, &p, &types.StateMachine{})
|
||||
__skip_array(&s, &p, &types.StateMachine{}, uint64(0))
|
||||
assert.Equal(t, p, 48)
|
||||
}
|
||||
|
||||
func TestNative_SkipObject(t *testing.T) {
|
||||
p := 0
|
||||
s := `"asdf": "wqer"},`
|
||||
__skip_object(&s, &p, &types.StateMachine{})
|
||||
__skip_object(&s, &p, &types.StateMachine{}, uint64(0))
|
||||
assert.Equal(t, p, 15)
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -9,45 +9,45 @@ package avx
|
|||
func __native_entry__() uintptr
|
||||
|
||||
var (
|
||||
_subr__f64toa = __native_entry__() + 630
|
||||
_subr__html_escape = __native_entry__() + 8581
|
||||
_subr__i64toa = __native_entry__() + 3642
|
||||
_subr__lspace = __native_entry__() + 301
|
||||
_subr__f64toa = __native_entry__() + 570
|
||||
_subr__html_escape = __native_entry__() + 9062
|
||||
_subr__i64toa = __native_entry__() + 3205
|
||||
_subr__lspace = __native_entry__() + 251
|
||||
_subr__lzero = __native_entry__() + 13
|
||||
_subr__quote = __native_entry__() + 4955
|
||||
_subr__skip_array = __native_entry__() + 17819
|
||||
_subr__skip_number = __native_entry__() + 20937
|
||||
_subr__skip_object = __native_entry__() + 17856
|
||||
_subr__skip_one = __native_entry__() + 16120
|
||||
_subr__u64toa = __native_entry__() + 3735
|
||||
_subr__unquote = __native_entry__() + 6426
|
||||
_subr__validate_one = __native_entry__() + 21054
|
||||
_subr__value = __native_entry__() + 11301
|
||||
_subr__vnumber = __native_entry__() + 14278
|
||||
_subr__vsigned = __native_entry__() + 15592
|
||||
_subr__vstring = __native_entry__() + 13243
|
||||
_subr__vunsigned = __native_entry__() + 15851
|
||||
_subr__quote = __native_entry__() + 4498
|
||||
_subr__skip_array = __native_entry__() + 19852
|
||||
_subr__skip_number = __native_entry__() + 21130
|
||||
_subr__skip_object = __native_entry__() + 19887
|
||||
_subr__skip_one = __native_entry__() + 18078
|
||||
_subr__u64toa = __native_entry__() + 3300
|
||||
_subr__unquote = __native_entry__() + 6037
|
||||
_subr__validate_one = __native_entry__() + 21247
|
||||
_subr__value = __native_entry__() + 11651
|
||||
_subr__vnumber = __native_entry__() + 16191
|
||||
_subr__vsigned = __native_entry__() + 17496
|
||||
_subr__vstring = __native_entry__() + 13546
|
||||
_subr__vunsigned = __native_entry__() + 17776
|
||||
)
|
||||
|
||||
const (
|
||||
_stack__f64toa = 120
|
||||
_stack__html_escape = 72
|
||||
_stack__f64toa = 136
|
||||
_stack__html_escape = 64
|
||||
_stack__i64toa = 24
|
||||
_stack__lspace = 8
|
||||
_stack__lzero = 8
|
||||
_stack__quote = 56
|
||||
_stack__quote = 80
|
||||
_stack__skip_array = 144
|
||||
_stack__skip_number = 96
|
||||
_stack__skip_number = 80
|
||||
_stack__skip_object = 144
|
||||
_stack__skip_one = 144
|
||||
_stack__u64toa = 8
|
||||
_stack__unquote = 88
|
||||
_stack__unquote = 72
|
||||
_stack__validate_one = 144
|
||||
_stack__value = 416
|
||||
_stack__vnumber = 312
|
||||
_stack__vsigned = 16
|
||||
_stack__vstring = 128
|
||||
_stack__vunsigned = 8
|
||||
_stack__vstring = 136
|
||||
_stack__vunsigned = 24
|
||||
)
|
||||
|
||||
var (
|
||||
|
|
|
|||
|
|
@ -67,12 +67,12 @@ func __unquote(sp unsafe.Pointer, nb int, dp unsafe.Pointer, ep *int, flags uint
|
|||
//go:nosplit
|
||||
//go:noescape
|
||||
//goland:noinspection GoUnusedParameter
|
||||
func __value(s unsafe.Pointer, n int, p int, v *types.JsonState, allow_control int) (ret int)
|
||||
func __value(s unsafe.Pointer, n int, p int, v *types.JsonState, flags uint64) (ret int)
|
||||
|
||||
//go:nosplit
|
||||
//go:noescape
|
||||
//goland:noinspection GoUnusedParameter
|
||||
func __vstring(s *string, p *int, v *types.JsonState)
|
||||
func __vstring(s *string, p *int, v *types.JsonState, flags uint64)
|
||||
|
||||
//go:nosplit
|
||||
//go:noescape
|
||||
|
|
@ -92,17 +92,17 @@ func __vunsigned(s *string, p *int, v *types.JsonState)
|
|||
//go:nosplit
|
||||
//go:noescape
|
||||
//goland:noinspection GoUnusedParameter
|
||||
func __skip_one(s *string, p *int, m *types.StateMachine) (ret int)
|
||||
func __skip_one(s *string, p *int, m *types.StateMachine, flags uint64) (ret int)
|
||||
|
||||
//go:nosplit
|
||||
//go:noescape
|
||||
//goland:noinspection GoUnusedParameter
|
||||
func __skip_array(s *string, p *int, m *types.StateMachine) (ret int)
|
||||
func __skip_array(s *string, p *int, m *types.StateMachine, flags uint64) (ret int)
|
||||
|
||||
//go:nosplit
|
||||
//go:noescape
|
||||
//goland:noinspection GoUnusedParameter
|
||||
func __skip_object(s *string, p *int, m *types.StateMachine) (ret int)
|
||||
func __skip_object(s *string, p *int, m *types.StateMachine, flags uint64) (ret int)
|
||||
|
||||
//go:nosplit
|
||||
//go:noescape
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -237,21 +237,39 @@ func TestNative_Vstring(t *testing.T) {
|
|||
var v types.JsonState
|
||||
i := 0
|
||||
s := `test"test\n2"`
|
||||
__vstring(&s, &i, &v)
|
||||
__vstring(&s, &i, &v, 0)
|
||||
assert.Equal(t, 5, i)
|
||||
assert.Equal(t, -1, v.Ep)
|
||||
assert.Equal(t, int64(0), v.Iv)
|
||||
__vstring(&s, &i, &v)
|
||||
__vstring(&s, &i, &v, 0)
|
||||
assert.Equal(t, 13, i)
|
||||
assert.Equal(t, 9, v.Ep)
|
||||
assert.Equal(t, int64(5), v.Iv)
|
||||
}
|
||||
|
||||
func TestNative_Vstring_ValidUnescapedChars(t *testing.T) {
|
||||
var v types.JsonState
|
||||
valid := uint64(types.F_VALIDATE_STRING)
|
||||
i := 0
|
||||
s := "test\x1f\""
|
||||
__vstring(&s, &i, &v, valid)
|
||||
assert.Equal(t, -int(types.ERR_INVALID_CHAR), int(v.Vt))
|
||||
}
|
||||
|
||||
func TestNative_Vstring_ValidUtf8(t *testing.T) {
|
||||
var v types.JsonState
|
||||
valid := uint64(types.F_VALIDATE_STRING)
|
||||
i := 0
|
||||
s := "test\xff\""
|
||||
__vstring(&s, &i, &v, valid)
|
||||
assert.Equal(t, -int(types.ERR_INVALID_CHAR), int(v.Vt))
|
||||
}
|
||||
|
||||
func TestNative_VstringEscapeEOF(t *testing.T) {
|
||||
var v types.JsonState
|
||||
i := 0
|
||||
s := `xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\"xxxxxxxxxxxxxxxxxxxxxxxxxxxxx"x`
|
||||
__vstring(&s, &i, &v)
|
||||
__vstring(&s, &i, &v, 0)
|
||||
assert.Equal(t, 95, i)
|
||||
assert.Equal(t, 63, v.Ep)
|
||||
assert.Equal(t, int64(0), v.Iv)
|
||||
|
|
@ -312,7 +330,7 @@ func TestNative_VstringHangUpOnRandomData(t *testing.T) {
|
|||
p := 1
|
||||
s := rt.Mem2Str(v)
|
||||
var js types.JsonState
|
||||
__vstring(&s, &p, &js)
|
||||
__vstring(&s, &p, &js, 0)
|
||||
fmt.Printf("js: %s\n", spew.Sdump(js))
|
||||
}
|
||||
|
||||
|
|
@ -506,36 +524,36 @@ func TestNative_Vunsigned(t *testing.T) {
|
|||
func TestNative_SkipOne(t *testing.T) {
|
||||
p := 0
|
||||
s := ` {"asdf": [null, true, false, 1, 2.0, -3]}, 1234.5`
|
||||
q := __skip_one(&s, &p, &types.StateMachine{})
|
||||
q := __skip_one(&s, &p, &types.StateMachine{}, uint64(0))
|
||||
assert.Equal(t, 42, p)
|
||||
assert.Equal(t, 1, q)
|
||||
p = 0
|
||||
s = `1 2.5 -3 "asdf\nqwer" true false null {} []`
|
||||
q = __skip_one(&s, &p, &types.StateMachine{})
|
||||
q = __skip_one(&s, &p, &types.StateMachine{}, uint64(0))
|
||||
assert.Equal(t, 1, p)
|
||||
assert.Equal(t, 0, q)
|
||||
q = __skip_one(&s, &p, &types.StateMachine{})
|
||||
q = __skip_one(&s, &p, &types.StateMachine{}, uint64(0))
|
||||
assert.Equal(t, 5, p)
|
||||
assert.Equal(t, 2, q)
|
||||
q = __skip_one(&s, &p, &types.StateMachine{})
|
||||
q = __skip_one(&s, &p, &types.StateMachine{}, uint64(0))
|
||||
assert.Equal(t, 8, p)
|
||||
assert.Equal(t, 6, q)
|
||||
q = __skip_one(&s, &p, &types.StateMachine{})
|
||||
q = __skip_one(&s, &p, &types.StateMachine{}, uint64(0))
|
||||
assert.Equal(t, 21, p)
|
||||
assert.Equal(t, 9, q)
|
||||
q = __skip_one(&s, &p, &types.StateMachine{})
|
||||
q = __skip_one(&s, &p, &types.StateMachine{}, uint64(0))
|
||||
assert.Equal(t, 26, p)
|
||||
assert.Equal(t, 22, q)
|
||||
q = __skip_one(&s, &p, &types.StateMachine{})
|
||||
q = __skip_one(&s, &p, &types.StateMachine{}, uint64(0))
|
||||
assert.Equal(t, 32, p)
|
||||
assert.Equal(t, 27, q)
|
||||
q = __skip_one(&s, &p, &types.StateMachine{})
|
||||
q = __skip_one(&s, &p, &types.StateMachine{}, uint64(0))
|
||||
assert.Equal(t, 37, p)
|
||||
assert.Equal(t, 33, q)
|
||||
q = __skip_one(&s, &p, &types.StateMachine{})
|
||||
q = __skip_one(&s, &p, &types.StateMachine{}, uint64(0))
|
||||
assert.Equal(t, 40, p)
|
||||
assert.Equal(t, 38, q)
|
||||
q = __skip_one(&s, &p, &types.StateMachine{})
|
||||
q = __skip_one(&s, &p, &types.StateMachine{}, uint64(0))
|
||||
assert.Equal(t, 43, p)
|
||||
assert.Equal(t, 41, q)
|
||||
}
|
||||
|
|
@ -547,7 +565,7 @@ func TestNative_SkipOne_Error(t *testing.T) {
|
|||
`"asdf`, `"\\\"`,
|
||||
}) {
|
||||
p := 0
|
||||
q := __skip_one(&s, &p, &types.StateMachine{})
|
||||
q := __skip_one(&s, &p, &types.StateMachine{}, uint64(0))
|
||||
assert.True(t, q < 0)
|
||||
}
|
||||
}
|
||||
|
|
@ -555,14 +573,14 @@ func TestNative_SkipOne_Error(t *testing.T) {
|
|||
func TestNative_SkipArray(t *testing.T) {
|
||||
p := 0
|
||||
s := `null, true, false, 1, 2.0, -3, {"asdf": "wqer"}],`
|
||||
__skip_array(&s, &p, &types.StateMachine{})
|
||||
__skip_array(&s, &p, &types.StateMachine{}, uint64(0))
|
||||
assert.Equal(t, p, 48)
|
||||
}
|
||||
|
||||
func TestNative_SkipObject(t *testing.T) {
|
||||
p := 0
|
||||
s := `"asdf": "wqer"},`
|
||||
__skip_object(&s, &p, &types.StateMachine{})
|
||||
__skip_object(&s, &p, &types.StateMachine{}, uint64(0))
|
||||
assert.Equal(t, p, 15)
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -9,45 +9,45 @@ package avx2
|
|||
func __native_entry__() uintptr
|
||||
|
||||
var (
|
||||
_subr__f64toa = __native_entry__() + 903
|
||||
_subr__html_escape = __native_entry__() + 10249
|
||||
_subr__i64toa = __native_entry__() + 3915
|
||||
_subr__lspace = __native_entry__() + 429
|
||||
_subr__f64toa = __native_entry__() + 814
|
||||
_subr__html_escape = __native_entry__() + 10717
|
||||
_subr__i64toa = __native_entry__() + 3449
|
||||
_subr__lspace = __native_entry__() + 379
|
||||
_subr__lzero = __native_entry__() + 13
|
||||
_subr__quote = __native_entry__() + 5328
|
||||
_subr__skip_array = __native_entry__() + 21867
|
||||
_subr__skip_number = __native_entry__() + 25515
|
||||
_subr__skip_object = __native_entry__() + 21904
|
||||
_subr__skip_one = __native_entry__() + 19172
|
||||
_subr__u64toa = __native_entry__() + 4008
|
||||
_subr__unquote = __native_entry__() + 7794
|
||||
_subr__validate_one = __native_entry__() + 25632
|
||||
_subr__value = __native_entry__() + 14495
|
||||
_subr__vnumber = __native_entry__() + 17330
|
||||
_subr__vsigned = __native_entry__() + 18644
|
||||
_subr__vstring = __native_entry__() + 16453
|
||||
_subr__vunsigned = __native_entry__() + 18903
|
||||
_subr__quote = __native_entry__() + 4842
|
||||
_subr__skip_array = __native_entry__() + 22748
|
||||
_subr__skip_number = __native_entry__() + 24641
|
||||
_subr__skip_object = __native_entry__() + 22783
|
||||
_subr__skip_one = __native_entry__() + 20939
|
||||
_subr__u64toa = __native_entry__() + 3544
|
||||
_subr__unquote = __native_entry__() + 7467
|
||||
_subr__validate_one = __native_entry__() + 24758
|
||||
_subr__value = __native_entry__() + 14548
|
||||
_subr__vnumber = __native_entry__() + 19052
|
||||
_subr__vsigned = __native_entry__() + 20357
|
||||
_subr__vstring = __native_entry__() + 16711
|
||||
_subr__vunsigned = __native_entry__() + 20637
|
||||
)
|
||||
|
||||
const (
|
||||
_stack__f64toa = 120
|
||||
_stack__f64toa = 136
|
||||
_stack__html_escape = 72
|
||||
_stack__i64toa = 24
|
||||
_stack__lspace = 8
|
||||
_stack__lzero = 8
|
||||
_stack__quote = 56
|
||||
_stack__quote = 72
|
||||
_stack__skip_array = 152
|
||||
_stack__skip_number = 96
|
||||
_stack__skip_number = 88
|
||||
_stack__skip_object = 152
|
||||
_stack__skip_one = 152
|
||||
_stack__u64toa = 8
|
||||
_stack__unquote = 72
|
||||
_stack__validate_one = 152
|
||||
_stack__value = 408
|
||||
_stack__value = 416
|
||||
_stack__vnumber = 312
|
||||
_stack__vsigned = 16
|
||||
_stack__vstring = 112
|
||||
_stack__vunsigned = 8
|
||||
_stack__vstring = 136
|
||||
_stack__vunsigned = 24
|
||||
)
|
||||
|
||||
var (
|
||||
|
|
|
|||
|
|
@ -77,12 +77,12 @@ func HTMLEscape(s unsafe.Pointer, nb int, dp unsafe.Pointer, dn *int) int
|
|||
//go:nosplit
|
||||
//go:noescape
|
||||
//goland:noinspection GoUnusedParameter
|
||||
func Value(s unsafe.Pointer, n int, p int, v *types.JsonState, allow_control int) int
|
||||
func Value(s unsafe.Pointer, n int, p int, v *types.JsonState, flags uint64) int
|
||||
|
||||
//go:nosplit
|
||||
//go:noescape
|
||||
//goland:noinspection GoUnusedParameter
|
||||
func SkipOne(s *string, p *int, m *types.StateMachine) int
|
||||
func SkipOne(s *string, p *int, m *types.StateMachine, flags uint64) int
|
||||
|
||||
//go:nosplit
|
||||
//go:noescape
|
||||
|
|
|
|||
|
|
@ -48,7 +48,7 @@ TEXT ·Value(SB), NOSPLIT, $0 - 48
|
|||
JMP github·com∕bytedance∕sonic∕internal∕native∕avx2·__value(SB)
|
||||
JMP github·com∕bytedance∕sonic∕internal∕native∕avx·__value(SB)
|
||||
|
||||
TEXT ·SkipOne(SB), NOSPLIT, $0 - 32
|
||||
TEXT ·SkipOne(SB), NOSPLIT, $0 - 40
|
||||
CMPB github·com∕bytedance∕sonic∕internal∕cpu·HasAVX2(SB), $0
|
||||
JE 2(PC)
|
||||
JMP github·com∕bytedance∕sonic∕internal∕native∕avx2·__skip_one(SB)
|
||||
|
|
|
|||
|
|
@ -65,12 +65,12 @@ func __unquote(sp unsafe.Pointer, nb int, dp unsafe.Pointer, ep *int, flags uint
|
|||
//go:nosplit
|
||||
//go:noescape
|
||||
//goland:noinspection GoUnusedParameter
|
||||
func __value(s unsafe.Pointer, n int, p int, v *types.JsonState, allow_control int) (ret int)
|
||||
func __value(s unsafe.Pointer, n int, p int, v *types.JsonState, flags uint64) (ret int)
|
||||
|
||||
//go:nosplit
|
||||
//go:noescape
|
||||
//goland:noinspection GoUnusedParameter
|
||||
func __vstring(s *string, p *int, v *types.JsonState)
|
||||
func __vstring(s *string, p *int, v *types.JsonState, flags uint64)
|
||||
|
||||
//go:nosplit
|
||||
//go:noescape
|
||||
|
|
@ -90,17 +90,17 @@ func __vunsigned(s *string, p *int, v *types.JsonState)
|
|||
//go:nosplit
|
||||
//go:noescape
|
||||
//goland:noinspection GoUnusedParameter
|
||||
func __skip_one(s *string, p *int, m *types.StateMachine) (ret int)
|
||||
func __skip_one(s *string, p *int, m *types.StateMachine, flags uint64) (ret int)
|
||||
|
||||
//go:nosplit
|
||||
//go:noescape
|
||||
//goland:noinspection GoUnusedParameter
|
||||
func __skip_array(s *string, p *int, m *types.StateMachine) (ret int)
|
||||
func __skip_array(s *string, p *int, m *types.StateMachine, flags uint64) (ret int)
|
||||
|
||||
//go:nosplit
|
||||
//go:noescape
|
||||
//goland:noinspection GoUnusedParameter
|
||||
func __skip_object(s *string, p *int, m *types.StateMachine) (ret int)
|
||||
func __skip_object(s *string, p *int, m *types.StateMachine, flags uint64) (ret int)
|
||||
|
||||
//go:nosplit
|
||||
//go:noescape
|
||||
|
|
|
|||
|
|
@ -235,21 +235,39 @@ func TestNative_Vstring(t *testing.T) {
|
|||
var v types.JsonState
|
||||
i := 0
|
||||
s := `test"test\n2"`
|
||||
__vstring(&s, &i, &v)
|
||||
__vstring(&s, &i, &v, 0)
|
||||
assert.Equal(t, 5, i)
|
||||
assert.Equal(t, -1, v.Ep)
|
||||
assert.Equal(t, int64(0), v.Iv)
|
||||
__vstring(&s, &i, &v)
|
||||
__vstring(&s, &i, &v, 0)
|
||||
assert.Equal(t, 13, i)
|
||||
assert.Equal(t, 9, v.Ep)
|
||||
assert.Equal(t, int64(5), v.Iv)
|
||||
}
|
||||
|
||||
func TestNative_Vstring_ValidUnescapedChars(t *testing.T) {
|
||||
var v types.JsonState
|
||||
valid := uint64(types.F_VALIDATE_STRING)
|
||||
i := 0
|
||||
s := "test\x1f\""
|
||||
__vstring(&s, &i, &v, valid)
|
||||
assert.Equal(t, -int(types.ERR_INVALID_CHAR), int(v.Vt))
|
||||
}
|
||||
|
||||
func TestNative_Vstring_ValidUtf8(t *testing.T) {
|
||||
var v types.JsonState
|
||||
valid := uint64(types.F_VALIDATE_STRING)
|
||||
i := 0
|
||||
s := "test\xff\""
|
||||
__vstring(&s, &i, &v, valid)
|
||||
assert.Equal(t, -int(types.ERR_INVALID_CHAR), int(v.Vt))
|
||||
}
|
||||
|
||||
func TestNative_VstringEscapeEOF(t *testing.T) {
|
||||
var v types.JsonState
|
||||
i := 0
|
||||
s := `xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\"xxxxxxxxxxxxxxxxxxxxxxxxxxxxx"x`
|
||||
__vstring(&s, &i, &v)
|
||||
__vstring(&s, &i, &v, 0)
|
||||
assert.Equal(t, 95, i)
|
||||
assert.Equal(t, 63, v.Ep)
|
||||
assert.Equal(t, int64(0), v.Iv)
|
||||
|
|
@ -310,7 +328,7 @@ func TestNative_VstringHangUpOnRandomData(t *testing.T) {
|
|||
p := 1
|
||||
s := rt.Mem2Str(v)
|
||||
var js types.JsonState
|
||||
__vstring(&s, &p, &js)
|
||||
__vstring(&s, &p, &js, 0)
|
||||
fmt.Printf("js: %s\n", spew.Sdump(js))
|
||||
}
|
||||
|
||||
|
|
@ -504,36 +522,36 @@ func TestNative_Vunsigned(t *testing.T) {
|
|||
func TestNative_SkipOne(t *testing.T) {
|
||||
p := 0
|
||||
s := ` {"asdf": [null, true, false, 1, 2.0, -3]}, 1234.5`
|
||||
q := __skip_one(&s, &p, &types.StateMachine{})
|
||||
q := __skip_one(&s, &p, &types.StateMachine{}, uint64(0))
|
||||
assert.Equal(t, 42, p)
|
||||
assert.Equal(t, 1, q)
|
||||
p = 0
|
||||
s = `1 2.5 -3 "asdf\nqwer" true false null {} []`
|
||||
q = __skip_one(&s, &p, &types.StateMachine{})
|
||||
q = __skip_one(&s, &p, &types.StateMachine{}, uint64(0))
|
||||
assert.Equal(t, 1, p)
|
||||
assert.Equal(t, 0, q)
|
||||
q = __skip_one(&s, &p, &types.StateMachine{})
|
||||
q = __skip_one(&s, &p, &types.StateMachine{}, uint64(0))
|
||||
assert.Equal(t, 5, p)
|
||||
assert.Equal(t, 2, q)
|
||||
q = __skip_one(&s, &p, &types.StateMachine{})
|
||||
q = __skip_one(&s, &p, &types.StateMachine{}, uint64(0))
|
||||
assert.Equal(t, 8, p)
|
||||
assert.Equal(t, 6, q)
|
||||
q = __skip_one(&s, &p, &types.StateMachine{})
|
||||
q = __skip_one(&s, &p, &types.StateMachine{}, uint64(0))
|
||||
assert.Equal(t, 21, p)
|
||||
assert.Equal(t, 9, q)
|
||||
q = __skip_one(&s, &p, &types.StateMachine{})
|
||||
q = __skip_one(&s, &p, &types.StateMachine{}, uint64(0))
|
||||
assert.Equal(t, 26, p)
|
||||
assert.Equal(t, 22, q)
|
||||
q = __skip_one(&s, &p, &types.StateMachine{})
|
||||
q = __skip_one(&s, &p, &types.StateMachine{}, uint64(0))
|
||||
assert.Equal(t, 32, p)
|
||||
assert.Equal(t, 27, q)
|
||||
q = __skip_one(&s, &p, &types.StateMachine{})
|
||||
q = __skip_one(&s, &p, &types.StateMachine{}, uint64(0))
|
||||
assert.Equal(t, 37, p)
|
||||
assert.Equal(t, 33, q)
|
||||
q = __skip_one(&s, &p, &types.StateMachine{})
|
||||
q = __skip_one(&s, &p, &types.StateMachine{}, uint64(0))
|
||||
assert.Equal(t, 40, p)
|
||||
assert.Equal(t, 38, q)
|
||||
q = __skip_one(&s, &p, &types.StateMachine{})
|
||||
q = __skip_one(&s, &p, &types.StateMachine{}, uint64(0))
|
||||
assert.Equal(t, 43, p)
|
||||
assert.Equal(t, 41, q)
|
||||
}
|
||||
|
|
@ -545,7 +563,7 @@ func TestNative_SkipOne_Error(t *testing.T) {
|
|||
`"asdf`, `"\\\"`,
|
||||
}) {
|
||||
p := 0
|
||||
q := __skip_one(&s, &p, &types.StateMachine{})
|
||||
q := __skip_one(&s, &p, &types.StateMachine{}, uint64(0))
|
||||
assert.True(t, q < 0)
|
||||
}
|
||||
}
|
||||
|
|
@ -553,14 +571,14 @@ func TestNative_SkipOne_Error(t *testing.T) {
|
|||
func TestNative_SkipArray(t *testing.T) {
|
||||
p := 0
|
||||
s := `null, true, false, 1, 2.0, -3, {"asdf": "wqer"}],`
|
||||
__skip_array(&s, &p, &types.StateMachine{})
|
||||
__skip_array(&s, &p, &types.StateMachine{}, uint64(0))
|
||||
assert.Equal(t, p, 48)
|
||||
}
|
||||
|
||||
func TestNative_SkipObject(t *testing.T) {
|
||||
p := 0
|
||||
s := `"asdf": "wqer"},`
|
||||
__skip_object(&s, &p, &types.StateMachine{})
|
||||
__skip_object(&s, &p, &types.StateMachine{}, uint64(0))
|
||||
assert.Equal(t, p, 15)
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -45,11 +45,13 @@ const (
|
|||
const (
|
||||
B_DOUBLE_UNQUOTE = 0
|
||||
B_UNICODE_REPLACE = 1
|
||||
B_VALIDATE_STRING = 5
|
||||
)
|
||||
|
||||
const (
|
||||
F_DOUBLE_UNQUOTE = 1 << B_DOUBLE_UNQUOTE
|
||||
F_UNICODE_REPLACE = 1 << B_UNICODE_REPLACE
|
||||
F_VALIDATE_STRING = 1 << B_VALIDATE_STRING
|
||||
)
|
||||
|
||||
const (
|
||||
|
|
|
|||
21
licenses/LICENSE-yyjson
Normal file
21
licenses/LICENSE-yyjson
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
MIT License
|
||||
|
||||
Copyright (c) 2020 YaoYuan <ibireme@gmail.com>
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
|
|
@ -13,7 +13,6 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "native.h"
|
||||
#include "fastbytes.c"
|
||||
#include "fastfloat.c"
|
||||
|
|
@ -21,5 +20,4 @@
|
|||
#include "parsing.c"
|
||||
#include "atof_eisel_lemire.c"
|
||||
#include "atof_native.c"
|
||||
#include "scanning.c"
|
||||
#include "utf8.c"
|
||||
#include "scanning.c"
|
||||
|
|
@ -109,22 +109,22 @@ ssize_t unquote(const char *sp, ssize_t nb, char *dp, ssize_t *ep, uint64_t flag
|
|||
ssize_t html_escape(const char *sp, ssize_t nb, char *dp, ssize_t *dn);
|
||||
|
||||
long value(const char *s, size_t n, long p, JsonState *ret, uint64_t flags);
|
||||
void vstring(const GoString *src, long *p, JsonState *ret);
|
||||
void vstring(const GoString *src, long *p, JsonState *ret, uint64_t flags);
|
||||
void vnumber(const GoString *src, long *p, JsonState *ret);
|
||||
void vsigned(const GoString *src, long *p, JsonState *ret);
|
||||
void vunsigned(const GoString *src, long *p, JsonState *ret);
|
||||
|
||||
long skip_one(const GoString *src, long *p, StateMachine *m);
|
||||
long skip_array(const GoString *src, long *p, StateMachine *m);
|
||||
long skip_object(const GoString *src, long *p, StateMachine *m);
|
||||
long skip_one(const GoString *src, long *p, StateMachine *m, uint64_t flags);
|
||||
long skip_array(const GoString *src, long *p, StateMachine *m, uint64_t flags);
|
||||
long skip_object(const GoString *src, long *p, StateMachine *m, uint64_t flags);
|
||||
|
||||
long skip_string(const GoString *src, long *p);
|
||||
long skip_string(const GoString *src, long *p, uint64_t flags);
|
||||
long skip_negative(const GoString *src, long *p);
|
||||
long skip_positive(const GoString *src, long *p);
|
||||
long skip_number(const GoString *src, long *p);
|
||||
|
||||
bool atof_eisel_lemire64(uint64_t mant, int exp10, int sgn, double *val);
|
||||
double atof_native(const char *sp, ssize_t nb, char* dbuf, ssize_t cap);
|
||||
double atof_native(const char *sp, ssize_t nb, char *dbuf, ssize_t cap);
|
||||
|
||||
ssize_t utf8_validate(const char *sp, ssize_t nb);
|
||||
long validate_string(const GoString *src, long *p);
|
||||
|
|
|
|||
|
|
@ -15,13 +15,16 @@
|
|||
*/
|
||||
|
||||
#include "native.h"
|
||||
|
||||
static const char *CS_ARRAY = "[]{},\"[]{},\"[]{}";
|
||||
static const char *CS_OBJECT = "[]{},:\"[]{}:,\"[]";
|
||||
#include "utf8.h"
|
||||
#include "test/xprintf.h"
|
||||
|
||||
static const uint64_t ODD_MASK = 0xaaaaaaaaaaaaaaaa;
|
||||
static const uint64_t EVEN_MASK = 0x5555555555555555;
|
||||
|
||||
// NOTE: mask referenced from decoder/decoder.go
|
||||
static const uint64_t MASK_VALIDATE_STRING = 1ull << 5;
|
||||
static const uint64_t MASK_ALLOW_CONTROL = 1ull << 31;
|
||||
|
||||
static const double P10_TAB[23] = {
|
||||
/* <= the connvertion to double is not exact when less than 1 => */ 1e-000,
|
||||
1e+001, 1e+002, 1e+003, 1e+004, 1e+005, 1e+006, 1e+007, 1e+008, 1e+009, 1e+010,
|
||||
|
|
@ -110,7 +113,7 @@ static inline int64_t advance_dword(const GoString *src, long *p, long dec, int6
|
|||
}
|
||||
}
|
||||
|
||||
static inline ssize_t advance_string(const GoString *src, long p, int64_t *ep) {
|
||||
static inline ssize_t advance_string_default(const GoString *src, long p, int64_t *ep) {
|
||||
char ch;
|
||||
uint64_t es;
|
||||
uint64_t fe;
|
||||
|
|
@ -228,7 +231,6 @@ static inline ssize_t advance_string(const GoString *src, long p, int64_t *ep) {
|
|||
m0 = ((uint64_t)s3 << 48) | ((uint64_t)s2 << 32) | ((uint64_t)s1 << 16) | (uint64_t)s0;
|
||||
m1 = ((uint64_t)t3 << 48) | ((uint64_t)t2 << 32) | ((uint64_t)t1 << 16) | (uint64_t)t0;
|
||||
#endif
|
||||
|
||||
/** update first quote position */
|
||||
if (unlikely(m1 != 0)) {
|
||||
ep_setx(sp - ss + __builtin_ctzll(m1))
|
||||
|
|
@ -238,7 +240,7 @@ static inline ssize_t advance_string(const GoString *src, long p, int64_t *ep) {
|
|||
if (unlikely(m1 != 0 || cr != 0)) {
|
||||
m0_mask(add64)
|
||||
}
|
||||
|
||||
|
||||
/* check for end quote */
|
||||
if (m0 != 0) {
|
||||
return sp - ss + __builtin_ctzll(m0) + 1;
|
||||
|
|
@ -273,7 +275,7 @@ static inline ssize_t advance_string(const GoString *src, long p, int64_t *ep) {
|
|||
m0 = ((uint64_t)s1 << 16) | (uint64_t)s0;
|
||||
m1 = ((uint64_t)t1 << 16) | (uint64_t)t0;
|
||||
#endif
|
||||
|
||||
|
||||
/** update first quote position */
|
||||
if (unlikely(m1 != 0)) {
|
||||
ep_setx(sp - ss + __builtin_ctzll(m1))
|
||||
|
|
@ -283,7 +285,7 @@ static inline ssize_t advance_string(const GoString *src, long p, int64_t *ep) {
|
|||
if (unlikely(m1 != 0 || cr != 0)) {
|
||||
m0_mask(add32)
|
||||
}
|
||||
|
||||
|
||||
/* check for end quote */
|
||||
if (m0 != 0) {
|
||||
return sp - ss + __builtin_ctzll(m0) + 1;
|
||||
|
|
@ -329,17 +331,6 @@ static inline ssize_t advance_string(const GoString *src, long p, int64_t *ep) {
|
|||
}
|
||||
}
|
||||
|
||||
static inline int _mm_get_mask(__m128i v, __m128i t) {
|
||||
return _mm_movemask_epi8(_mm_cmpeq_epi8(v, t));
|
||||
}
|
||||
|
||||
// contrl char: 0x00 ~ 0x1F
|
||||
static inline int _mm_cchars_mask(__m128i v) {
|
||||
__m128i e1 = _mm_cmpgt_epi8 (v, _mm_set1_epi8(-1));
|
||||
__m128i e2 = _mm_cmpgt_epi8 (v, _mm_set1_epi8(31));
|
||||
return _mm_movemask_epi8 (_mm_andnot_si128 (e2, e1));
|
||||
}
|
||||
|
||||
#if USE_AVX2
|
||||
|
||||
static inline int _mm256_get_mask(__m256i v, __m256i t) {
|
||||
|
|
@ -353,33 +344,52 @@ static inline int _mm256_cchars_mask(__m256i v) {
|
|||
return _mm256_movemask_epi8 (_mm256_andnot_si256 (e2, e1));
|
||||
}
|
||||
|
||||
// ascii: 0x00 ~ 0x7F
|
||||
static inline int _mm256_nonascii_mask(__m256i v) {
|
||||
return _mm256_movemask_epi8(v);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
static inline ssize_t advance_validate_string(const GoString *src, long p, int64_t *ep) {
|
||||
static inline int _mm_get_mask(__m128i v, __m128i t) {
|
||||
return _mm_movemask_epi8(_mm_cmpeq_epi8(v, t));
|
||||
}
|
||||
|
||||
// contrl char: 0x00 ~ 0x1F
|
||||
static inline int _mm_cchars_mask(__m128i v) {
|
||||
__m128i e1 = _mm_cmpgt_epi8 (v, _mm_set1_epi8(-1));
|
||||
__m128i e2 = _mm_cmpgt_epi8 (v, _mm_set1_epi8(31));
|
||||
return _mm_movemask_epi8 (_mm_andnot_si128 (e2, e1));
|
||||
}
|
||||
|
||||
// ascii: 0x00 ~ 0x7F
|
||||
static inline int _mm_nonascii_mask(__m128i v) {
|
||||
return _mm_movemask_epi8(v);
|
||||
}
|
||||
|
||||
static inline ssize_t advance_string_validate(const GoString *src, long p, int64_t *ep) {
|
||||
char ch;
|
||||
uint64_t es;
|
||||
uint64_t fe;
|
||||
uint64_t os;
|
||||
uint64_t m0;
|
||||
uint64_t m1;
|
||||
uint64_t m2;
|
||||
uint64_t m0, m1, m2, m3;
|
||||
uint64_t es, fe, os;
|
||||
uint64_t cr = 0;
|
||||
long qp = 0;
|
||||
long np = 0;
|
||||
|
||||
/* prevent out-of-bounds accessing */
|
||||
if (unlikely(src->len == p)) {
|
||||
return -ERR_EOF;
|
||||
}
|
||||
long up = 0;
|
||||
|
||||
/* buffer pointers */
|
||||
size_t nb = src->len;
|
||||
const char * sp = src->buf;
|
||||
const char * ss = src->buf;
|
||||
|
||||
#define ep_init() *ep = -1;
|
||||
#define ep_setc() ep_setx(sp - ss - 1)
|
||||
#define ep_setx(x) if (*ep == -1) { *ep = (x); }
|
||||
/* prevent out-of-bounds accessing */
|
||||
if (unlikely(nb == p)) {
|
||||
return -ERR_EOF;
|
||||
}
|
||||
|
||||
#define ep_init() *ep = -1;
|
||||
#define ep_setc() ep_setx(sp - ss - 1)
|
||||
#define ep_setx(x) if (*ep == -1) { *ep = (x); }
|
||||
#define ep_seterr(x) *ep = (x);
|
||||
|
||||
/* seek to `p` */
|
||||
nb -= p;
|
||||
|
|
@ -397,6 +407,7 @@ static inline ssize_t advance_validate_string(const GoString *src, long p, int64
|
|||
uint32_t s0, s1;
|
||||
uint32_t t0, t1;
|
||||
uint32_t c0, c1;
|
||||
uint32_t u0, u1;
|
||||
#else
|
||||
/* initialize vectors */
|
||||
__m128i v0;
|
||||
|
|
@ -410,6 +421,7 @@ static inline ssize_t advance_validate_string(const GoString *src, long p, int64
|
|||
uint32_t s0, s1, s2, s3;
|
||||
uint32_t t0, t1, t2, t3;
|
||||
uint32_t c0, c1, c2, c3;
|
||||
uint32_t u0, u1, u2, u3;
|
||||
#endif
|
||||
|
||||
#define m0_mask(add) \
|
||||
|
|
@ -419,6 +431,7 @@ static inline ssize_t advance_validate_string(const GoString *src, long p, int64
|
|||
es = add(os, m1, &cr) << 1; \
|
||||
m0 &= ~(fe & (es ^ EVEN_MASK));
|
||||
|
||||
simd_advance:
|
||||
/* 64-byte SIMD loop */
|
||||
while (likely(nb >= 64)) {
|
||||
#if USE_AVX2
|
||||
|
|
@ -430,9 +443,12 @@ static inline ssize_t advance_validate_string(const GoString *src, long p, int64
|
|||
t1 = _mm256_get_mask(v1, cx);
|
||||
c0 = _mm256_cchars_mask(v0);
|
||||
c1 = _mm256_cchars_mask(v1);
|
||||
u0 = _mm256_nonascii_mask(v0);
|
||||
u1 = _mm256_nonascii_mask(v1);
|
||||
m0 = ((uint64_t)s1 << 32) | (uint64_t)s0;
|
||||
m1 = ((uint64_t)t1 << 32) | (uint64_t)t0;
|
||||
m2 = ((uint64_t)c1 << 32) | (uint64_t)c0;
|
||||
m3 = ((uint64_t)u1 << 32) | (uint64_t)u0;
|
||||
#else
|
||||
v0 = _mm_loadu_si128 ((const void *)(sp + 0));
|
||||
v1 = _mm_loadu_si128 ((const void *)(sp + 16));
|
||||
|
|
@ -450,12 +466,17 @@ static inline ssize_t advance_validate_string(const GoString *src, long p, int64
|
|||
c1 = _mm_cchars_mask(v1);
|
||||
c2 = _mm_cchars_mask(v2);
|
||||
c3 = _mm_cchars_mask(v3);
|
||||
u0 = _mm_nonascii_mask(v0);
|
||||
u1 = _mm_nonascii_mask(v1);
|
||||
u2 = _mm_nonascii_mask(v2);
|
||||
u3 = _mm_nonascii_mask(v3);
|
||||
m0 = ((uint64_t)s3 << 48) | ((uint64_t)s2 << 32) | ((uint64_t)s1 << 16) | (uint64_t)s0;
|
||||
m1 = ((uint64_t)t3 << 48) | ((uint64_t)t2 << 32) | ((uint64_t)t1 << 16) | (uint64_t)t0;
|
||||
m2 = ((uint64_t)c3 << 48) | ((uint64_t)c2 << 32) | ((uint64_t)c1 << 16) | (uint64_t)c0;
|
||||
m3 = ((uint64_t)u3 << 48) | ((uint64_t)u2 << 32) | ((uint64_t)u1 << 16) | (uint64_t)u0;
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
/** update first quote position */
|
||||
if (unlikely(m1 != 0)) {
|
||||
ep_setx(sp - ss + __builtin_ctzll(m1))
|
||||
|
|
@ -466,23 +487,35 @@ static inline ssize_t advance_validate_string(const GoString *src, long p, int64
|
|||
m0_mask(add64)
|
||||
}
|
||||
|
||||
qp = m0 ? __builtin_ctzll(m0) : 64;
|
||||
np = m2 ? __builtin_ctzll(m2) : 64;
|
||||
up = m3 ? __builtin_ctzll(m3) : 64;
|
||||
|
||||
/* get the position of end quote */
|
||||
if (m0 != 0) {
|
||||
qp = sp - ss + __builtin_ctzll(m0) + 1;
|
||||
/* check control chars in JSON string */
|
||||
if (unlikely(m2 !=0 && (np = sp - ss + __builtin_ctzll(m2)) < qp)) {
|
||||
ep_setx(np) // set error position
|
||||
if (unlikely(np < qp)) {
|
||||
ep_seterr(sp - ss + np)
|
||||
|
||||
return -ERR_INVAL;
|
||||
}
|
||||
return qp;
|
||||
if (up < qp) {
|
||||
goto valid_utf8;
|
||||
}
|
||||
return sp - ss + qp + 1;
|
||||
}
|
||||
|
||||
/* check control chars in JSON string */
|
||||
if (unlikely(m2 != 0)) {
|
||||
ep_setx(sp - ss + __builtin_ctzll(m2))
|
||||
ep_setx(sp - ss + np)
|
||||
|
||||
return -ERR_INVAL;
|
||||
}
|
||||
|
||||
if (unlikely(m3 != 0)) {
|
||||
goto valid_utf8;
|
||||
}
|
||||
|
||||
/* move to the next block */
|
||||
sp += 64;
|
||||
nb -= 64;
|
||||
|
|
@ -495,9 +528,11 @@ static inline ssize_t advance_validate_string(const GoString *src, long p, int64
|
|||
s0 = _mm256_get_mask (v0, cq);
|
||||
t0 = _mm256_get_mask (v0, cx);
|
||||
c0 = _mm256_cchars_mask(v0);
|
||||
u0 = _mm256_nonascii_mask(v0);
|
||||
m0 = (uint64_t)s0;
|
||||
m1 = (uint64_t)t0;
|
||||
m2 = (uint64_t)c0;
|
||||
m3 = (uint64_t)u0;
|
||||
#else
|
||||
v0 = _mm_loadu_si128 ((const void *)(sp + 0));
|
||||
v1 = _mm_loadu_si128 ((const void *)(sp + 16));
|
||||
|
|
@ -507,11 +542,14 @@ static inline ssize_t advance_validate_string(const GoString *src, long p, int64
|
|||
t1 = _mm_get_mask(v1, cx);
|
||||
c0 = _mm_cchars_mask(v0);
|
||||
c1 = _mm_cchars_mask(v1);
|
||||
u0 = _mm_nonascii_mask(v0);
|
||||
u1 = _mm_nonascii_mask(v1);
|
||||
m0 = ((uint64_t)s1 << 16) | (uint64_t)s0;
|
||||
m1 = ((uint64_t)t1 << 16) | (uint64_t)t0;
|
||||
m2 = ((uint64_t)c1 << 16) | (uint64_t)c0;
|
||||
m3 = ((uint64_t)u1 << 16) | (uint64_t)u0;
|
||||
#endif
|
||||
|
||||
|
||||
/** update first quote position */
|
||||
if (unlikely(m1 != 0)) {
|
||||
ep_setx(sp - ss + __builtin_ctzll(m1))
|
||||
|
|
@ -521,24 +559,34 @@ static inline ssize_t advance_validate_string(const GoString *src, long p, int64
|
|||
if (unlikely(m1 != 0 || cr != 0)) {
|
||||
m0_mask(add32)
|
||||
}
|
||||
|
||||
|
||||
qp = m0 ? __builtin_ctzll(m0) : 64;
|
||||
up = m3 ? __builtin_ctzll(m3) : 64;
|
||||
np = m2 ? __builtin_ctzll(m2) : 64;
|
||||
|
||||
|
||||
/* get the position of end quote */
|
||||
if (m0 != 0) {
|
||||
qp = sp - ss + __builtin_ctzll(m0) + 1;
|
||||
/* check control chars in JSON string */
|
||||
if (unlikely(m2 !=0 && (np = sp - ss + __builtin_ctzll(m2)) < qp)) {
|
||||
ep_setx(np) // set error position
|
||||
if (unlikely(np < qp)) {
|
||||
ep_seterr(sp - ss + np)
|
||||
return -ERR_INVAL;
|
||||
}
|
||||
return qp;
|
||||
if (up < qp) {
|
||||
goto valid_utf8;
|
||||
}
|
||||
return sp - ss + qp + 1;
|
||||
}
|
||||
|
||||
/* check control chars in JSON string */
|
||||
if (unlikely(m2 != 0)) {
|
||||
ep_setx(sp - ss + __builtin_ctzll(m2))
|
||||
ep_seterr(sp - ss + __builtin_ctzll(m2))
|
||||
return -ERR_INVAL;
|
||||
}
|
||||
|
||||
if (m3 != 0) {
|
||||
goto valid_utf8;
|
||||
}
|
||||
|
||||
/* move to the next block */
|
||||
sp += 32;
|
||||
nb -= 32;
|
||||
|
|
@ -554,43 +602,89 @@ static inline ssize_t advance_validate_string(const GoString *src, long p, int64
|
|||
}
|
||||
}
|
||||
|
||||
remain:
|
||||
/* handle the remaining bytes with scalar code */
|
||||
while (nb-- > 0 && (ch = *sp++) != '"') {
|
||||
while (nb > 0) {
|
||||
ch = *sp;
|
||||
if (ch == '"') {
|
||||
|
||||
return sp - ss + 1;
|
||||
}
|
||||
|
||||
/* valid the escaped chars */
|
||||
if (unlikely(ch == '\\')) {
|
||||
if (nb == 0) {
|
||||
if (nb == 1) {
|
||||
return -ERR_EOF;
|
||||
} else {
|
||||
ep_setc()
|
||||
sp++, nb--;
|
||||
}
|
||||
} else if (unlikely( ch >= 0 && ch <= 0x1f)) { // control chars
|
||||
ep_setc()
|
||||
ep_setx(sp - ss)
|
||||
sp += 2, nb -= 2;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* valid unescaped chars */
|
||||
if (unlikely( ch >= 0 && ch <= 0x1f)) { // control chars
|
||||
ep_seterr(sp - ss)
|
||||
return -ERR_INVAL;
|
||||
}
|
||||
|
||||
/* valid utf8 chars */
|
||||
if (ch & 0x80) {
|
||||
uint32_t ubin = nb >= 4 ? *(uint32_t*)sp : less4byte_to_uint32(sp, nb);
|
||||
if ((up = valid_utf8_4byte(ubin))) {
|
||||
sp += up, nb -= up;
|
||||
continue;
|
||||
}
|
||||
ep_seterr(sp - ss)
|
||||
return -ERR_INVAL;
|
||||
}
|
||||
|
||||
sp++, nb--;
|
||||
}
|
||||
return -ERR_EOF;
|
||||
|
||||
valid_utf8:
|
||||
sp += up, nb -= up;
|
||||
while (likely(nb >= 4)) {
|
||||
up = valid_utf8_4byte(*(uint32_t*)sp);
|
||||
if (unlikely(up == 0)) {
|
||||
ep_seterr(sp - ss)
|
||||
return -ERR_INVAL;
|
||||
}
|
||||
|
||||
/* check continous utf-8 */
|
||||
sp += up, nb -= up;
|
||||
if (nb > 0 && (*(uint8_t*)sp & 0x80)) {
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
/* clear the last carried bit */
|
||||
cr = 0;
|
||||
goto simd_advance;
|
||||
}
|
||||
goto remain;
|
||||
|
||||
#undef ep_init
|
||||
#undef ep_setc
|
||||
#undef ep_setx
|
||||
#undef ep_seterr
|
||||
#undef m0_mask
|
||||
}
|
||||
|
||||
/* check for quotes */
|
||||
if (ch == '"') {
|
||||
return sp - ss;
|
||||
static inline ssize_t advance_string(const GoString *src, long p, int64_t *ep, uint64_t flags) {
|
||||
if ((flags & MASK_VALIDATE_STRING) != 0) {
|
||||
return advance_string_validate(src, p, ep);
|
||||
} else {
|
||||
return -ERR_EOF;
|
||||
return advance_string_default(src, p, ep);
|
||||
}
|
||||
}
|
||||
|
||||
/** Value Scanning Routines **/
|
||||
|
||||
const uint64_t MASK_ALLOW_CONTROL = 1ul<<31;
|
||||
|
||||
long value(const char *s, size_t n, long p, JsonState *ret, uint64_t flags) {
|
||||
long q = p;
|
||||
GoString m = {.buf = s, .len = n};
|
||||
bool allow_control = (flags&MASK_ALLOW_CONTROL) != 0;
|
||||
|
||||
bool allow_control = (flags & MASK_ALLOW_CONTROL) != 0;
|
||||
/* parse the next identifier, q is UNSAFE, may cause out-of-bounds accessing */
|
||||
switch (advance_ns(&m, &q)) {
|
||||
case '-' : /* fallthrough */
|
||||
|
|
@ -604,7 +698,7 @@ long value(const char *s, size_t n, long p, JsonState *ret, uint64_t flags) {
|
|||
case '7' : /* fallthrough */
|
||||
case '8' : /* fallthrough */
|
||||
case '9' : vdigits(&m, &q, ret, flags) ; return q;
|
||||
case '"' : vstring(&m, &q, ret) ; return q;
|
||||
case '"' : vstring(&m, &q, ret, flags) ; return q;
|
||||
case 'n' : ret->vt = advance_dword(&m, &q, 1, V_NULL, VS_NULL) ; return q;
|
||||
case 't' : ret->vt = advance_dword(&m, &q, 1, V_TRUE, VS_TRUE) ; return q;
|
||||
case 'f' : ret->vt = advance_dword(&m, &q, 0, V_FALSE, VS_ALSE) ; return q;
|
||||
|
|
@ -619,11 +713,11 @@ long value(const char *s, size_t n, long p, JsonState *ret, uint64_t flags) {
|
|||
}
|
||||
}
|
||||
|
||||
void vstring(const GoString *src, long *p, JsonState *ret) {
|
||||
void vstring(const GoString *src, long *p, JsonState *ret, uint64_t flags) {
|
||||
int64_t v = -1;
|
||||
int64_t i = *p;
|
||||
ssize_t e = advance_string(src, i, &v);
|
||||
|
||||
ssize_t e = advance_string(src, i, &v, flags);
|
||||
|
||||
/* check for errors */
|
||||
if (e < 0) {
|
||||
*p = src->len;
|
||||
|
|
@ -992,10 +1086,7 @@ static inline long fsm_push(StateMachine *self, int vt) {
|
|||
}
|
||||
}
|
||||
|
||||
#define VALID_DEFAULT 0 // basic validate, except JSON string.
|
||||
#define VALID_FULL 1 // also validate JSON string, including control chars or invalid UTF-8.
|
||||
|
||||
static inline long fsm_exec(StateMachine *self, const GoString *src, long *p, int validate_flag) {
|
||||
static inline long fsm_exec(StateMachine *self, const GoString *src, long *p, uint64_t flags) {
|
||||
int vt;
|
||||
char ch;
|
||||
long vi = -1;
|
||||
|
|
@ -1042,7 +1133,7 @@ static inline long fsm_exec(StateMachine *self, const GoString *src, long *p, in
|
|||
case FSM_KEY: {
|
||||
FSM_CHAR('"');
|
||||
FSM_REPL(self, FSM_ELEM);
|
||||
FSM_XERR(skip_string(src, p));
|
||||
FSM_XERR(skip_string(src, p, flags));
|
||||
continue;
|
||||
}
|
||||
|
||||
|
|
@ -1080,11 +1171,7 @@ static inline long fsm_exec(StateMachine *self, const GoString *src, long *p, in
|
|||
/* the quote of the first key */
|
||||
case '"': {
|
||||
FSM_REPL(self, FSM_OBJ);
|
||||
if (validate_flag == VALID_DEFAULT) {
|
||||
FSM_XERR(skip_string(src, p));
|
||||
} else if (validate_flag == VALID_FULL) {
|
||||
FSM_XERR(validate_string(src, p));
|
||||
}
|
||||
FSM_XERR(skip_string(src, p, flags));
|
||||
FSM_XERR(fsm_push(self, FSM_ELEM));
|
||||
continue;
|
||||
}
|
||||
|
|
@ -1110,14 +1197,8 @@ static inline long fsm_exec(StateMachine *self, const GoString *src, long *p, in
|
|||
case 'f' : FSM_XERR(advance_dword(src, p, 0, *p - 1, VS_ALSE)); break;
|
||||
case '[' : FSM_XERR(fsm_push(self, FSM_ARR_0)); break;
|
||||
case '{' : FSM_XERR(fsm_push(self, FSM_OBJ_0)); break;
|
||||
case '"' : {
|
||||
if (validate_flag == VALID_DEFAULT) {
|
||||
FSM_XERR(skip_string(src, p));
|
||||
} else if (validate_flag == VALID_FULL) {
|
||||
FSM_XERR(validate_string(src, p));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case '"' : FSM_XERR(skip_string(src, p, flags)); break;
|
||||
case 0 : return -ERR_EOF;
|
||||
default : return -ERR_INVAL;
|
||||
}
|
||||
}
|
||||
|
|
@ -1343,58 +1424,36 @@ check_index:
|
|||
#undef check_sidx
|
||||
#undef check_vidx
|
||||
|
||||
long skip_one(const GoString *src, long *p, StateMachine *m) {
|
||||
long skip_one(const GoString *src, long *p, StateMachine *m, uint64_t flags) {
|
||||
fsm_init(m, FSM_VAL);
|
||||
return fsm_exec(m, src, p, VALID_DEFAULT);
|
||||
return fsm_exec(m, src, p, flags);
|
||||
}
|
||||
|
||||
long skip_array(const GoString *src, long *p, StateMachine *m) {
|
||||
long skip_array(const GoString *src, long *p, StateMachine *m, uint64_t flags) {
|
||||
fsm_init(m, FSM_ARR_0);
|
||||
return fsm_exec(m, src, p, VALID_DEFAULT);
|
||||
return fsm_exec(m, src, p, flags);
|
||||
}
|
||||
|
||||
long skip_object(const GoString *src, long *p, StateMachine *m) {
|
||||
long skip_object(const GoString *src, long *p, StateMachine *m, uint64_t flags) {
|
||||
fsm_init(m, FSM_OBJ_0);
|
||||
return fsm_exec(m, src, p, VALID_DEFAULT);
|
||||
return fsm_exec(m, src, p, flags);
|
||||
}
|
||||
|
||||
long skip_string(const GoString *src, long *p) {
|
||||
long skip_string(const GoString *src, long *p, uint64_t flags) {
|
||||
int64_t v;
|
||||
ssize_t q = *p - 1;
|
||||
ssize_t e = advance_string(src, *p, &v);
|
||||
ssize_t e = advance_string(src, *p, &v, flags);
|
||||
|
||||
/* check for errors, and update the position */
|
||||
if (e >= 0) {
|
||||
*p = e;
|
||||
return q;
|
||||
} else {
|
||||
*p = src->len;
|
||||
*p = v;
|
||||
return e;
|
||||
}
|
||||
}
|
||||
|
||||
long validate_string(const GoString *src, long *p) {
|
||||
int64_t v;
|
||||
ssize_t q = *p - 1;
|
||||
ssize_t e = advance_validate_string(src, *p, &v);
|
||||
|
||||
/* check for errors in string advance */
|
||||
if (e < 0) {
|
||||
*p = e == -ERR_EOF ? src->len : v;
|
||||
return e;
|
||||
}
|
||||
|
||||
/* check for errors in UTF-8 validate */
|
||||
ssize_t nb = e - *p - 1;
|
||||
ssize_t r = utf8_validate(src->buf + *p, nb);
|
||||
if (r >= 0) {
|
||||
*p += r;
|
||||
return -ERR_INVAL;
|
||||
}
|
||||
*p = e;
|
||||
return q;
|
||||
}
|
||||
|
||||
long skip_negative(const GoString *src, long *p) {
|
||||
long i = *p;
|
||||
long r = do_skip_number(src->buf + i, src->len - i);
|
||||
|
|
@ -1456,5 +1515,5 @@ long skip_number(const GoString *src, long *p) {
|
|||
|
||||
long validate_one(const GoString *src, long *p, StateMachine *m) {
|
||||
fsm_init(m, FSM_VAL);
|
||||
return fsm_exec(m, src, p, VALID_FULL);
|
||||
return fsm_exec(m, src, p, MASK_VALIDATE_STRING);
|
||||
}
|
||||
204
native/test/xprintf.h
Normal file
204
native/test/xprintf.h
Normal file
|
|
@ -0,0 +1,204 @@
|
|||
/*
|
||||
* Copyright 2022 ByteDance Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef XPRINTF_H
|
||||
#define XPRINTF_H
|
||||
|
||||
#include <sys/types.h>
|
||||
|
||||
static void __attribute__((naked)) write_syscall(const char *s, size_t n)
|
||||
{
|
||||
asm volatile(
|
||||
"movq %rsi, %rdx"
|
||||
"\n"
|
||||
"movq %rdi, %rsi"
|
||||
"\n"
|
||||
"movq $1, %rdi"
|
||||
"\n"
|
||||
"movq $0x02000004, %rax"
|
||||
"\n"
|
||||
"syscall"
|
||||
"\n"
|
||||
"retq"
|
||||
"\n");
|
||||
}
|
||||
|
||||
static void printch(const char ch)
|
||||
{
|
||||
write_syscall(&ch, 1);
|
||||
}
|
||||
|
||||
static void printstr(const char *s)
|
||||
{
|
||||
size_t n = 0;
|
||||
const char *p = s;
|
||||
while (*p++)
|
||||
n++;
|
||||
write_syscall(s, n);
|
||||
}
|
||||
|
||||
static void printint(int64_t v)
|
||||
{
|
||||
char neg = 0;
|
||||
char buf[32] = {};
|
||||
char *p = &buf[31];
|
||||
if (v == 0)
|
||||
{
|
||||
printch('0');
|
||||
return;
|
||||
}
|
||||
if (v < 0)
|
||||
{
|
||||
v = -v;
|
||||
neg = 1;
|
||||
}
|
||||
while (v)
|
||||
{
|
||||
*--p = (v % 10) + '0';
|
||||
v /= 10;
|
||||
}
|
||||
if (neg)
|
||||
{
|
||||
*--p = '-';
|
||||
}
|
||||
printstr(p);
|
||||
}
|
||||
|
||||
static const char tab[] = "0123456789abcdef";
|
||||
|
||||
static void printhex(uintptr_t v)
|
||||
{
|
||||
if (v == 0)
|
||||
{
|
||||
printch('0');
|
||||
return;
|
||||
}
|
||||
char buf[32] = {};
|
||||
char *p = &buf[31];
|
||||
|
||||
while (v)
|
||||
{
|
||||
*--p = tab[v & 0x0f];
|
||||
v >>= 4;
|
||||
}
|
||||
printstr(p);
|
||||
}
|
||||
|
||||
#define MAX_BUF_LEN 100
|
||||
|
||||
static void printbytes(GoSlice *s)
|
||||
{
|
||||
printch('[');
|
||||
int i = 0;
|
||||
if (s->len > MAX_BUF_LEN)
|
||||
{
|
||||
i = s->len - MAX_BUF_LEN;
|
||||
}
|
||||
for (; i < s->len; i++)
|
||||
{
|
||||
printch(tab[((s->buf[i]) & 0xf0) >> 4]);
|
||||
printch(tab[(s->buf[i]) & 0x0f]);
|
||||
if (i != s->len - 1)
|
||||
printch(',');
|
||||
}
|
||||
printch(']');
|
||||
}
|
||||
|
||||
static void printgostr(GoString *s)
|
||||
{
|
||||
printch('"');
|
||||
if (s->len < MAX_BUF_LEN)
|
||||
{
|
||||
write_syscall(s->buf, s->len);
|
||||
}
|
||||
else
|
||||
{
|
||||
write_syscall(&s->buf[s->len - MAX_BUF_LEN], MAX_BUF_LEN);
|
||||
}
|
||||
printch('"');
|
||||
}
|
||||
|
||||
static void xprintf(const char *fmt, ...)
|
||||
{
|
||||
#ifdef DEBUG
|
||||
__builtin_va_list va;
|
||||
char buf[256] = {};
|
||||
char *p = buf;
|
||||
__builtin_va_start(va, fmt);
|
||||
for (;;)
|
||||
{
|
||||
if (*fmt == 0)
|
||||
{
|
||||
break;
|
||||
}
|
||||
if (*fmt != '%')
|
||||
{
|
||||
*p++ = *fmt++;
|
||||
continue;
|
||||
}
|
||||
*p = 0;
|
||||
p = buf;
|
||||
fmt++;
|
||||
printstr(buf);
|
||||
switch (*fmt++)
|
||||
{
|
||||
case '%':
|
||||
{
|
||||
printch('%');
|
||||
break;
|
||||
}
|
||||
case 's':
|
||||
{
|
||||
printgostr(__builtin_va_arg(va, GoString *));
|
||||
break;
|
||||
}
|
||||
case 'd':
|
||||
{
|
||||
printint(__builtin_va_arg(va, int64_t));
|
||||
break;
|
||||
}
|
||||
case 'f':
|
||||
{
|
||||
printint(__builtin_va_arg(va, double));
|
||||
break;
|
||||
}
|
||||
case 'c':
|
||||
{
|
||||
printch(__builtin_va_arg(va, const char));
|
||||
break;
|
||||
}
|
||||
case 'x':
|
||||
{
|
||||
printhex(__builtin_va_arg(va, uintptr_t));
|
||||
break;
|
||||
}
|
||||
case 'l':
|
||||
{
|
||||
printbytes(__builtin_va_arg(va, GoSlice *));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
__builtin_va_end(va);
|
||||
if (p != buf)
|
||||
{
|
||||
*p = 0;
|
||||
printstr(buf);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif // XPRINTF_H
|
||||
183
native/utf8.c
183
native/utf8.c
|
|
@ -1,183 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2009 The Go Authors. All rights reserved.
|
||||
* Modifications Copyright 2021 ByteDance Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "native.h"
|
||||
|
||||
// ascii: 0x00 ~ 0x7F
|
||||
static inline int _mm_ascii_mask(__m128i vv) {
|
||||
return _mm_movemask_epi8(vv);
|
||||
}
|
||||
|
||||
#if USE_AVX2
|
||||
|
||||
// ascii: 0x00 ~ 0x7F
|
||||
static inline int _mm256_ascii_mask(__m256i vv) {
|
||||
return _mm256_movemask_epi8(vv);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
static inline bool is_ascii(uint8_t ch) {
|
||||
return ch < 0x80;
|
||||
}
|
||||
|
||||
// The default lowest and highest continuation byte.
|
||||
const static uint8_t locb = 0x80;
|
||||
const static uint8_t hicb = 0xBF;
|
||||
const static uint8_t xx = 0xF1; // invalid: size 1
|
||||
const static uint8_t as = 0xF0; // ASCII: size 1
|
||||
const static uint8_t s1 = 0x02; // accept 0, size 2
|
||||
const static uint8_t s2 = 0x13; // accept 1, size 3
|
||||
const static uint8_t s3 = 0x03; // accept 0, size 3
|
||||
const static uint8_t s4 = 0x23; // accept 2, size 3
|
||||
const static uint8_t s5 = 0x34; // accept 3, size 4
|
||||
const static uint8_t s6 = 0x04; // accept 0, size 4
|
||||
const static uint8_t s7 = 0x44; // accept 4, size 4
|
||||
|
||||
// first is information about the first byte in a UTF-8 sequence.
|
||||
static const uint8_t first[256] = {
|
||||
// 1 2 3 4 5 6 7 8 9 A B C D E F
|
||||
as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x00-0x0F
|
||||
as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x10-0x1F
|
||||
as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x20-0x2F
|
||||
as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x30-0x3F
|
||||
as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x40-0x4F
|
||||
as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x50-0x5F
|
||||
as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x60-0x6F
|
||||
as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x70-0x7F
|
||||
// 1 2 3 4 5 6 7 8 9 A B C D E F
|
||||
xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, // 0x80-0x8F
|
||||
xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, // 0x90-0x9F
|
||||
xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, // 0xA0-0xAF
|
||||
xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, // 0xB0-0xBF
|
||||
xx, xx, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, // 0xC0-0xCF
|
||||
s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, // 0xD0-0xDF
|
||||
s2, s3, s3, s3, s3, s3, s3, s3, s3, s3, s3, s3, s3, s4, s3, s3, // 0xE0-0xEF
|
||||
s5, s6, s6, s6, s7, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, // 0xF0-0xFF
|
||||
};
|
||||
|
||||
// AcceptRange gives the range of valid values for the second byte in a UTF-8
|
||||
// sequence.
|
||||
struct AcceptRange {
|
||||
uint8_t lo; // lowest value for second byte.
|
||||
uint8_t hi; // highest value for second byte.
|
||||
};
|
||||
|
||||
// ranges has size 16 to avoid bounds checks in the code that uses it.
|
||||
const static struct AcceptRange ranges[5] = {
|
||||
{locb, hicb}, // 0
|
||||
{0xA0, hicb}, // 1
|
||||
{locb, 0x9F}, // 2
|
||||
{0x90, hicb}, // 3
|
||||
{locb, 0x8F}, // 4
|
||||
};
|
||||
|
||||
// UTF-8 code point | first byte | second byte | third byte | fourth byte
|
||||
// U+0000 - U+007F | 0___ ____
|
||||
// U+0080 - U+07FF | 110_ ____ | 10__ ____
|
||||
// U+0800 - U+D7FF | 1110 ____ | 10__ ____ | 10__ ____
|
||||
// U+D800 - U+DFFF | reserved for UTF-16 surrogate pairs
|
||||
// U+E000 - U+FFFF | 1110 ____ | 10__ ____ | 10__ ____
|
||||
// U+10000 - U+10FFFF | 1111 0___ | 10__ ____ | 10__ ____ | 10__ ____
|
||||
// checks non-ascii characters, and returns the utf-8 length
|
||||
static inline ssize_t nonascii_is_utf8(const uint8_t* sp, size_t n) {
|
||||
uint8_t mask = first[sp[0]];
|
||||
uint8_t size = mask & 7;
|
||||
if (n < size) {
|
||||
return 0;
|
||||
}
|
||||
struct AcceptRange accept = ranges[mask >> 4];
|
||||
switch (size) {
|
||||
case 4 : if (sp[3] < locb || hicb < sp[3]) return 0;
|
||||
case 3 : if (sp[2] < locb || hicb < sp[2]) return 0;
|
||||
case 2 : if (sp[1] < accept.lo || accept.hi < sp[1]) return 0; break;
|
||||
case 1 : return 0; // invalid chars
|
||||
case 0 : return 1; // ascii chars
|
||||
default: return 0;
|
||||
}
|
||||
return size;
|
||||
}
|
||||
|
||||
ssize_t find_non_ascii(const uint8_t*sp, ssize_t nb) {
|
||||
const uint8_t* ss = sp;
|
||||
int64_t m;
|
||||
|
||||
#if USE_AVX2
|
||||
while (nb >= 32) {
|
||||
__m256i v = _mm256_loadu_si256 ((const void *)(sp));
|
||||
if (unlikely((m = _mm256_ascii_mask(v)) != 0)) {
|
||||
return sp - ss + __builtin_ctzll(m);
|
||||
}
|
||||
nb -= 32;
|
||||
sp += 32;
|
||||
}
|
||||
|
||||
/* clear spper half to avoid AVX-SSE transition penalty */
|
||||
_mm256_zeroupper();
|
||||
#endif
|
||||
|
||||
while (nb >= 16) {
|
||||
__m128i v = _mm_loadu_si128 ((const void *)(sp));
|
||||
if (unlikely((m = _mm_ascii_mask(v)) != 0)) {
|
||||
return sp - ss + __builtin_ctzll(m);
|
||||
}
|
||||
nb -= 16;
|
||||
sp += 16;
|
||||
}
|
||||
|
||||
/* remaining bytes, do with scalar code */
|
||||
while (nb-- > 0) {
|
||||
if (is_ascii(*sp)) {
|
||||
sp++;
|
||||
} else {
|
||||
return sp - ss;
|
||||
}
|
||||
}
|
||||
|
||||
/* nothing found */
|
||||
return -1;
|
||||
}
|
||||
|
||||
// utf8_validate validates whether the JSON string is valid UTF-8.
|
||||
// return -1 if validate, otherwise, return the error postion.
|
||||
ssize_t utf8_validate(const char *sp, ssize_t nb) {
|
||||
const uint8_t* p = (const uint8_t*)sp;
|
||||
const uint8_t* s = (const uint8_t*)sp;
|
||||
ssize_t n;
|
||||
ssize_t b;
|
||||
|
||||
// Optimize for the continuous non-ascii chars */
|
||||
while (nb > 0 && (n = (!is_ascii(*p) ? 0 : find_non_ascii(p, nb))) != -1) {
|
||||
/* not found non-ascii in string */
|
||||
if (n >= nb) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
nb -= n;
|
||||
p += n;
|
||||
|
||||
/* validate the non-ascii */
|
||||
if (unlikely((b = nonascii_is_utf8(p, nb)) == 0)) {
|
||||
return p - s;
|
||||
}
|
||||
|
||||
nb -= b;
|
||||
p += b;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
121
native/utf8.h
Normal file
121
native/utf8.h
Normal file
|
|
@ -0,0 +1,121 @@
|
|||
#ifndef UTF8_H
|
||||
#define UTF8_H
|
||||
/*
|
||||
* Copyright (C) 2019 Yaoyuan <ibireme@gmail.com>.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
* This file may have been modified by ByteDance authors. All ByteDance
|
||||
* Modifications are Copyright 2022 ByteDance Authors.
|
||||
*/
|
||||
|
||||
static inline ssize_t valid_utf8_4byte(uint32_t ubin) {
|
||||
/*
|
||||
Each unicode code point is encoded as 1 to 4 bytes in UTF-8 encoding,
|
||||
we use 4-byte mask and pattern value to validate UTF-8 byte sequence,
|
||||
this requires the input data to have 4-byte zero padding.
|
||||
---------------------------------------------------
|
||||
1 byte
|
||||
unicode range [U+0000, U+007F]
|
||||
unicode min [.......0]
|
||||
unicode max [.1111111]
|
||||
bit pattern [0.......]
|
||||
---------------------------------------------------
|
||||
2 byte
|
||||
unicode range [U+0080, U+07FF]
|
||||
unicode min [......10 ..000000]
|
||||
unicode max [...11111 ..111111]
|
||||
bit require [...xxxx. ........] (1E 00)
|
||||
bit mask [xxx..... xx......] (E0 C0)
|
||||
bit pattern [110..... 10......] (C0 80)
|
||||
// 1101 0100 10110000
|
||||
// 0001 1110
|
||||
---------------------------------------------------
|
||||
3 byte
|
||||
unicode range [U+0800, U+FFFF]
|
||||
unicode min [........ ..100000 ..000000]
|
||||
unicode max [....1111 ..111111 ..111111]
|
||||
bit require [....xxxx ..x..... ........] (0F 20 00)
|
||||
bit mask [xxxx.... xx...... xx......] (F0 C0 C0)
|
||||
bit pattern [1110.... 10...... 10......] (E0 80 80)
|
||||
---------------------------------------------------
|
||||
3 byte invalid (reserved for surrogate halves)
|
||||
unicode range [U+D800, U+DFFF]
|
||||
unicode min [....1101 ..100000 ..000000]
|
||||
unicode max [....1101 ..111111 ..111111]
|
||||
bit mask [....xxxx ..x..... ........] (0F 20 00)
|
||||
bit pattern [....1101 ..1..... ........] (0D 20 00)
|
||||
---------------------------------------------------
|
||||
4 byte
|
||||
unicode range [U+10000, U+10FFFF]
|
||||
unicode min [........ ...10000 ..000000 ..000000]
|
||||
unicode max [.....100 ..001111 ..111111 ..111111]
|
||||
bit err0 [.....100 ........ ........ ........] (04 00 00 00)
|
||||
bit err1 [.....011 ..110000 ........ ........] (03 30 00 00)
|
||||
bit require [.....xxx ..xx.... ........ ........] (07 30 00 00)
|
||||
bit mask [xxxxx... xx...... xx...... xx......] (F8 C0 C0 C0)
|
||||
bit pattern [11110... 10...... 10...... 10......] (F0 80 80 80)
|
||||
---------------------------------------------------
|
||||
*/
|
||||
const uint32_t b1_mask = 0x00000080UL;
|
||||
const uint32_t b1_patt = 0x00000000UL;
|
||||
const uint32_t b2_mask = 0x0000C0E0UL;
|
||||
const uint32_t b2_patt = 0x000080C0UL;
|
||||
const uint32_t b2_requ = 0x0000001EUL;
|
||||
const uint32_t b3_mask = 0x00C0C0F0UL;
|
||||
const uint32_t b3_patt = 0x008080E0UL;
|
||||
const uint32_t b3_requ = 0x0000200FUL;
|
||||
const uint32_t b3_erro = 0x0000200DUL;
|
||||
const uint32_t b4_mask = 0xC0C0C0F8UL;
|
||||
const uint32_t b4_patt = 0x808080F0UL;
|
||||
const uint32_t b4_requ = 0x00003007UL;
|
||||
const uint32_t b4_err0 = 0x00000004UL;
|
||||
const uint32_t b4_err1 = 0x00003003UL;
|
||||
|
||||
#define is_valid_seq_1(uni) ( \
|
||||
((uni & b1_mask) == b1_patt) \
|
||||
)
|
||||
|
||||
#define is_valid_seq_2(uni) ( \
|
||||
((uni & b2_mask) == b2_patt) && \
|
||||
((uni & b2_requ)) \
|
||||
)
|
||||
|
||||
#define is_valid_seq_3(uni) ( \
|
||||
((uni & b3_mask) == b3_patt) && \
|
||||
((tmp = (uni & b3_requ))) && \
|
||||
((tmp != b3_erro)) \
|
||||
)
|
||||
|
||||
#define is_valid_seq_4(uni) ( \
|
||||
((uni & b4_mask) == b4_patt) && \
|
||||
((tmp = (uni & b4_requ))) && \
|
||||
((tmp & b4_err0) == 0 || (tmp & b4_err1) == 0) \
|
||||
)
|
||||
uint32_t tmp = 0;
|
||||
|
||||
if (is_valid_seq_3(ubin)) return 3;
|
||||
if (is_valid_seq_2(ubin)) return 2;
|
||||
if (is_valid_seq_4(ubin)) return 4;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline uint32_t less4byte_to_uint32(const char* sp, size_t nb) {
|
||||
if (nb == 1) return *(uint8_t*)sp;
|
||||
if (nb == 2) return *(uint16_t*)sp;
|
||||
uint32_t hi_1 = (*(uint8_t*)(sp + 2));
|
||||
uint32_t lo_2 = *(uint16_t*)(sp);
|
||||
return hi_1 << 16 | lo_2;
|
||||
}
|
||||
|
||||
#endif
|
||||
12
sonic.go
12
sonic.go
|
|
@ -61,6 +61,8 @@ type frozenConfig struct {
|
|||
// Froze convert the Config to API
|
||||
func (cfg Config) Froze() API {
|
||||
api := &frozenConfig{Config: cfg}
|
||||
|
||||
// configure encoder options:
|
||||
if cfg.EscapeHTML {
|
||||
api.encoderOpts |= encoder.EscapeHTML
|
||||
}
|
||||
|
|
@ -73,6 +75,11 @@ func (cfg Config) Froze() API {
|
|||
if cfg.NoQuoteTextMarshaler {
|
||||
api.encoderOpts |= encoder.NoQuoteTextMarshaler
|
||||
}
|
||||
if cfg.NoNullSliceOrMap {
|
||||
api.encoderOpts |= encoder.NoNullSliceOrMap
|
||||
}
|
||||
|
||||
// configure decoder options:
|
||||
if cfg.UseInt64 {
|
||||
api.decoderOpts |= decoder.OptionUseInt64
|
||||
}
|
||||
|
|
@ -85,8 +92,8 @@ func (cfg Config) Froze() API {
|
|||
if cfg.CopyString {
|
||||
api.decoderOpts |= decoder.OptionCopyString
|
||||
}
|
||||
if cfg.NoNullSliceOrMap {
|
||||
api.encoderOpts |= encoder.NoNullSliceOrMap
|
||||
if cfg.ValidateString {
|
||||
api.decoderOpts |= decoder.OptionValidateString
|
||||
}
|
||||
return api
|
||||
}
|
||||
|
|
@ -111,7 +118,6 @@ func (cfg *frozenConfig) MarshalIndent(val interface{}, prefix, indent string) (
|
|||
func (cfg *frozenConfig) UnmarshalFromString(buf string, val interface{}) error {
|
||||
dec := decoder.NewDecoder(buf)
|
||||
dec.SetOptions(cfg.decoderOpts)
|
||||
|
||||
err := dec.Decode(val)
|
||||
pos := dec.Pos()
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue