mirror of
https://github.com/ii64/sonic.git
synced 2026-06-20 16:45:22 +08:00
feat: repl invalid utf8 in serde by option (#357)
This commit is contained in:
parent
f87d87de7a
commit
02865de676
52 changed files with 23478 additions and 20789 deletions
2
.github/workflows/license-check.yml
vendored
2
.github/workflows/license-check.yml
vendored
|
|
@ -1,6 +1,6 @@
|
||||||
name: License Check
|
name: License Check
|
||||||
|
|
||||||
on: push
|
on: pull_request
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
build:
|
build:
|
||||||
|
|
|
||||||
2
.github/workflows/push-check-go118.yml
vendored
2
.github/workflows/push-check-go118.yml
vendored
|
|
@ -1,6 +1,6 @@
|
||||||
name: Push Check Go1.18-Linux-X64
|
name: Push Check Go1.18-Linux-X64
|
||||||
|
|
||||||
on: push
|
on: pull_request
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
build:
|
build:
|
||||||
|
|
|
||||||
2
.github/workflows/push-check-linux-arm64.yml
vendored
2
.github/workflows/push-check-linux-arm64.yml
vendored
|
|
@ -1,6 +1,6 @@
|
||||||
name: Push Check Linux-ARM
|
name: Push Check Linux-ARM
|
||||||
|
|
||||||
on: push
|
on: pull_request
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
build:
|
build:
|
||||||
|
|
|
||||||
2
.github/workflows/push-check-linux-x64.yml
vendored
2
.github/workflows/push-check-linux-x64.yml
vendored
|
|
@ -1,6 +1,6 @@
|
||||||
name: Push Check Linux-X64
|
name: Push Check Linux-X64
|
||||||
|
|
||||||
on: push
|
on: pull_request
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
build:
|
build:
|
||||||
|
|
|
||||||
2
.github/workflows/push-check-qemu.yml
vendored
2
.github/workflows/push-check-qemu.yml
vendored
|
|
@ -1,6 +1,6 @@
|
||||||
name: Push Check Linux-Qemu
|
name: Push Check Linux-Qemu
|
||||||
|
|
||||||
on: push
|
on: pull_request
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
build:
|
build:
|
||||||
|
|
|
||||||
2
.github/workflows/push-check-windows.yml
vendored
2
.github/workflows/push-check-windows.yml
vendored
|
|
@ -1,6 +1,6 @@
|
||||||
name: Push Check Windows-X64
|
name: Push Check Windows-X64
|
||||||
|
|
||||||
on: push
|
on: pull_request
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
build:
|
build:
|
||||||
|
|
|
||||||
8
Makefile
8
Makefile
|
|
@ -35,7 +35,7 @@ CC_amd64 := clang
|
||||||
ASM2ASM_amd64 := tools/asm2asm/asm2asm.py
|
ASM2ASM_amd64 := tools/asm2asm/asm2asm.py
|
||||||
|
|
||||||
CFLAGS := -mno-red-zone
|
CFLAGS := -mno-red-zone
|
||||||
CFLAGS += -arch x86_64
|
CFLAGS += -target x86_64-apple-macos11
|
||||||
CFLAGS += -fno-asynchronous-unwind-tables
|
CFLAGS += -fno-asynchronous-unwind-tables
|
||||||
CFLAGS += -fno-builtin
|
CFLAGS += -fno-builtin
|
||||||
CFLAGS += -fno-exceptions
|
CFLAGS += -fno-exceptions
|
||||||
|
|
@ -100,8 +100,10 @@ endef
|
||||||
all: ${ARCH}
|
all: ${ARCH}
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -vfr ${TMP_DIR}/{sse,avx,avx2}
|
for arch in ${ARCH}; do \
|
||||||
rm -vfr ${OUT_DIR}/{sse,avx,avx2}
|
rm -vfr ${TMP_DIR}/$${arch}; \
|
||||||
|
rm -vfr ${OUT_DIR}/$${arch}; \
|
||||||
|
done
|
||||||
|
|
||||||
$(foreach \
|
$(foreach \
|
||||||
arch, \
|
arch, \
|
||||||
|
|
|
||||||
10
api.go
10
api.go
|
|
@ -66,8 +66,8 @@ type Config struct {
|
||||||
// CopyString indicates decoder to decode string values by copying instead of referring.
|
// CopyString indicates decoder to decode string values by copying instead of referring.
|
||||||
CopyString bool
|
CopyString bool
|
||||||
|
|
||||||
// ValidateString indicates decoder to valid string values: decoder will return errors when
|
// ValidateString indicates decoder and encoder to valid string values: decoder will return errors
|
||||||
// invalid UTF-8 chars or unescaped control chars(\u0000-\u001f) in the string value of JSON.
|
// when unescaped control chars(\u0000-\u001f) in the string value of JSON.
|
||||||
ValidateString bool
|
ValidateString bool
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -81,6 +81,7 @@ var (
|
||||||
SortMapKeys: true,
|
SortMapKeys: true,
|
||||||
CompactMarshaler: true,
|
CompactMarshaler: true,
|
||||||
CopyString : true,
|
CopyString : true,
|
||||||
|
ValidateString : true,
|
||||||
}.Froze()
|
}.Froze()
|
||||||
|
|
||||||
// ConfigFastest is the fastest config of APIs, aiming at speed.
|
// ConfigFastest is the fastest config of APIs, aiming at speed.
|
||||||
|
|
@ -164,12 +165,15 @@ func UnmarshalString(buf string, val interface{}) error {
|
||||||
return ConfigDefault.UnmarshalFromString(buf, val)
|
return ConfigDefault.UnmarshalFromString(buf, val)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get searches the given path json,
|
// Get searches the given path from json,
|
||||||
// and returns its representing ast.Node.
|
// and returns its representing ast.Node.
|
||||||
//
|
//
|
||||||
// Each path arg must be integer or string:
|
// Each path arg must be integer or string:
|
||||||
// - Integer means searching current node as array
|
// - Integer means searching current node as array
|
||||||
// - String means searching current node as object
|
// - String means searching current node as object
|
||||||
|
//
|
||||||
|
// Note, the api expects the json is well-formed at least,
|
||||||
|
// otherwise it may return unexpected result.
|
||||||
func Get(src []byte, path ...interface{}) (ast.Node, error) {
|
func Get(src []byte, path ...interface{}) (ast.Node, error) {
|
||||||
return GetFromString(string(src), path...)
|
return GetFromString(string(src), path...)
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -691,7 +691,10 @@ func (self *Node) AddAny(val interface{}) error {
|
||||||
}
|
}
|
||||||
|
|
||||||
// GetByPath load given path on demands,
|
// GetByPath load given path on demands,
|
||||||
// which only ensure nodes before this path got parsed
|
// which only ensure nodes before this path got parsed.
|
||||||
|
//
|
||||||
|
// Note, the api expects the json is well-formed at least,
|
||||||
|
// otherwise it may return unexpected result.
|
||||||
func (self *Node) GetByPath(path ...interface{}) *Node {
|
func (self *Node) GetByPath(path ...interface{}) *Node {
|
||||||
if !self.Valid() {
|
if !self.Valid() {
|
||||||
return self
|
return self
|
||||||
|
|
|
||||||
|
|
@ -34,7 +34,6 @@ import (
|
||||||
`strings`
|
`strings`
|
||||||
`testing`
|
`testing`
|
||||||
`time`
|
`time`
|
||||||
`unicode/utf8`
|
|
||||||
`unsafe`
|
`unsafe`
|
||||||
|
|
||||||
`github.com/bytedance/sonic/decoder`
|
`github.com/bytedance/sonic/decoder`
|
||||||
|
|
@ -1011,8 +1010,8 @@ var unmarshalTests = []unmarshalTest{
|
||||||
|
|
||||||
{in: "\"\x00\"", ptr: new(interface{}), err: fmt.Errorf("json: invald char"), validateString: true},
|
{in: "\"\x00\"", ptr: new(interface{}), err: fmt.Errorf("json: invald char"), validateString: true},
|
||||||
{in: "\"\x00\"", ptr: new(string), err: fmt.Errorf("json: invald char"), validateString: true},
|
{in: "\"\x00\"", ptr: new(string), err: fmt.Errorf("json: invald char"), validateString: true},
|
||||||
{in: "\"\xff\"", ptr: new(interface{}), err: fmt.Errorf("json: invald char"), validateString: true},
|
{in: "\"\xff\"", ptr: new(interface{}), out: interface{}("\ufffd"), validateString: true},
|
||||||
{in: "\"\xff\"", ptr: new(string), err: fmt.Errorf("json: invald char"), validateString: true},
|
{in: "\"\xff\"", ptr: new(string), out: "\ufffd", validateString: true},
|
||||||
{in: "\"\x00\"", ptr: new(interface{}), out: interface{}("\x00"), validateString: false},
|
{in: "\"\x00\"", ptr: new(interface{}), out: interface{}("\x00"), validateString: false},
|
||||||
{in: "\"\x00\"", ptr: new(string), out: "\x00", validateString: false},
|
{in: "\"\x00\"", ptr: new(string), out: "\x00", validateString: false},
|
||||||
{in: "\"\xff\"", ptr: new(interface{}), out: interface{}("\xff"), validateString: false},
|
{in: "\"\xff\"", ptr: new(interface{}), out: interface{}("\xff"), validateString: false},
|
||||||
|
|
@ -1147,7 +1146,6 @@ func TestUnmarshal(t *testing.T) {
|
||||||
}
|
}
|
||||||
|
|
||||||
dec := decoder.NewDecoder(tt.in)
|
dec := decoder.NewDecoder(tt.in)
|
||||||
validUtf8 := true
|
|
||||||
if tt.useNumber {
|
if tt.useNumber {
|
||||||
dec.UseNumber()
|
dec.UseNumber()
|
||||||
}
|
}
|
||||||
|
|
@ -1156,10 +1154,9 @@ func TestUnmarshal(t *testing.T) {
|
||||||
}
|
}
|
||||||
if tt.validateString {
|
if tt.validateString {
|
||||||
dec.ValidateString()
|
dec.ValidateString()
|
||||||
validUtf8 = utf8.Valid([]byte(tt.in))
|
|
||||||
}
|
}
|
||||||
if err := dec.Decode(v.Interface()); (err == nil) != (tt.err == nil && validUtf8) {
|
if err := dec.Decode(v.Interface()); (err == nil) != (tt.err == nil) {
|
||||||
spew.Dump(tt.in)
|
spew.Dump(tt)
|
||||||
t.Fatalf("#%d: %v, want %v", i, err, tt.err)
|
t.Fatalf("#%d: %v, want %v", i, err, tt.err)
|
||||||
continue
|
continue
|
||||||
} else if err != nil {
|
} else if err != nil {
|
||||||
|
|
@ -2524,3 +2521,68 @@ func TestChangeTool(t *testing.T) {
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestDecoder_LongestInvalidUtf8(t *testing.T) {
|
||||||
|
for _, data := range([]string{
|
||||||
|
"\"" + strings.Repeat("\x80", 4096) + "\"",
|
||||||
|
"\"" + strings.Repeat("\x80", 4095) + "\"",
|
||||||
|
"\"" + strings.Repeat("\x80", 4097) + "\"",
|
||||||
|
"\"" + strings.Repeat("\x80", 12345) + "\"",
|
||||||
|
}) {
|
||||||
|
testDecodeInvalidUtf8(t, []byte(data))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func testDecodeInvalidUtf8(t *testing.T, data []byte) {
|
||||||
|
var sgot, jgot string
|
||||||
|
serr := ConfigStd.Unmarshal(data, &sgot)
|
||||||
|
jerr := json.Unmarshal(data, &jgot)
|
||||||
|
assert.Equal(t, serr != nil, jerr != nil)
|
||||||
|
if jerr == nil {
|
||||||
|
assert.Equal(t, sgot, jgot)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func needEscape(b byte) bool {
|
||||||
|
return b == '"' || b == '\\' || b < '\x20'
|
||||||
|
}
|
||||||
|
|
||||||
|
func genRandJsonBytes(length int) []byte {
|
||||||
|
var buf bytes.Buffer
|
||||||
|
buf.WriteByte('"')
|
||||||
|
for j := 0; j < length; j++ {
|
||||||
|
r := rand.Intn(0xff + 1)
|
||||||
|
if needEscape(byte(r)) {
|
||||||
|
buf.WriteByte('\\')
|
||||||
|
}
|
||||||
|
buf.WriteByte(byte(r))
|
||||||
|
}
|
||||||
|
buf.WriteByte('"')
|
||||||
|
return buf.Bytes()
|
||||||
|
}
|
||||||
|
|
||||||
|
func genRandJsonRune(length int) []byte {
|
||||||
|
var buf bytes.Buffer
|
||||||
|
buf.WriteByte('"')
|
||||||
|
for j := 0; j < length; j++ {
|
||||||
|
r := rand.Intn(0x10FFFF + 1)
|
||||||
|
if r < 0x80 && needEscape(byte(r)) {
|
||||||
|
buf.WriteByte('\\')
|
||||||
|
buf.WriteByte(byte(r))
|
||||||
|
} else {
|
||||||
|
buf.WriteRune(rune(r))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
buf.WriteByte('"')
|
||||||
|
return buf.Bytes()
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDecoder_RandomInvalidUtf8(t *testing.T) {
|
||||||
|
nums := 1000
|
||||||
|
maxLen := 1000
|
||||||
|
for i := 0; i < nums; i++ {
|
||||||
|
length := rand.Intn(maxLen)
|
||||||
|
testDecodeInvalidUtf8(t, genRandJsonBytes(length))
|
||||||
|
testDecodeInvalidUtf8(t, genRandJsonRune(length))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -25,6 +25,7 @@ import (
|
||||||
`github.com/bytedance/sonic/internal/native/types`
|
`github.com/bytedance/sonic/internal/native/types`
|
||||||
`github.com/bytedance/sonic/internal/rt`
|
`github.com/bytedance/sonic/internal/rt`
|
||||||
`github.com/bytedance/sonic/option`
|
`github.com/bytedance/sonic/option`
|
||||||
|
`github.com/bytedance/sonic/utf8`
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
|
|
@ -80,9 +81,39 @@ func (self *Decoder) Reset(s string) {
|
||||||
// self.f = 0
|
// self.f = 0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (self *Decoder) CheckTrailings() error {
|
||||||
|
pos := self.i
|
||||||
|
buf := self.s
|
||||||
|
/* skip all the trailing spaces */
|
||||||
|
if pos != len(buf) {
|
||||||
|
for pos < len(buf) && (types.SPACE_MASK & (1 << buf[pos])) != 0 {
|
||||||
|
pos++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* then it must be at EOF */
|
||||||
|
if pos == len(buf) {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
/* junk after JSON value */
|
||||||
|
return SyntaxError {
|
||||||
|
Src : buf,
|
||||||
|
Pos : pos,
|
||||||
|
Code : types.ERR_INVALID_CHAR,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
// Decode parses the JSON-encoded data from current position and stores the result
|
// Decode parses the JSON-encoded data from current position and stores the result
|
||||||
// in the value pointed to by val.
|
// in the value pointed to by val.
|
||||||
func (self *Decoder) Decode(val interface{}) error {
|
func (self *Decoder) Decode(val interface{}) error {
|
||||||
|
/* validate json if needed */
|
||||||
|
if (self.f & (1 << _F_validate_string)) != 0 && !utf8.ValidateString(self.s){
|
||||||
|
dbuf := utf8.CorrectWith(nil, rt.Str2Mem(self.s), "\ufffd")
|
||||||
|
self.s = rt.Mem2Str(dbuf)
|
||||||
|
}
|
||||||
|
|
||||||
vv := rt.UnpackEface(val)
|
vv := rt.UnpackEface(val)
|
||||||
vp := vv.Value
|
vp := vv.Value
|
||||||
|
|
||||||
|
|
@ -99,7 +130,6 @@ func (self *Decoder) Decode(val interface{}) error {
|
||||||
/* create a new stack, and call the decoder */
|
/* create a new stack, and call the decoder */
|
||||||
sb, etp := newStack(), rt.PtrElem(vv.Type)
|
sb, etp := newStack(), rt.PtrElem(vv.Type)
|
||||||
nb, err := decodeTypedPointer(self.s, self.i, etp, vp, sb, self.f)
|
nb, err := decodeTypedPointer(self.s, self.i, etp, vp, sb, self.f)
|
||||||
|
|
||||||
/* return the stack back */
|
/* return the stack back */
|
||||||
self.i = nb
|
self.i = nb
|
||||||
freeStack(sb)
|
freeStack(sb)
|
||||||
|
|
|
||||||
|
|
@ -345,7 +345,6 @@ func TestDecoder_Generic(t *testing.T) {
|
||||||
pos, err := decode(TwitterJson, &v, false)
|
pos, err := decode(TwitterJson, &v, false)
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
assert.Equal(t, len(TwitterJson), pos)
|
assert.Equal(t, len(TwitterJson), pos)
|
||||||
spew.Dump(v)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestDecoder_Binding(t *testing.T) {
|
func TestDecoder_Binding(t *testing.T) {
|
||||||
|
|
|
||||||
|
|
@ -34,6 +34,7 @@ import (
|
||||||
`testing`
|
`testing`
|
||||||
`time`
|
`time`
|
||||||
`unsafe`
|
`unsafe`
|
||||||
|
`strings`
|
||||||
|
|
||||||
`github.com/bytedance/sonic/encoder`
|
`github.com/bytedance/sonic/encoder`
|
||||||
`github.com/stretchr/testify/assert`
|
`github.com/stretchr/testify/assert`
|
||||||
|
|
@ -52,7 +53,6 @@ func TestMain(m *testing.M) {
|
||||||
runtime.GC()
|
runtime.GC()
|
||||||
debug.FreeOSMemory()
|
debug.FreeOSMemory()
|
||||||
}
|
}
|
||||||
println("stop GC looping!")
|
|
||||||
}()
|
}()
|
||||||
time.Sleep(time.Millisecond)
|
time.Sleep(time.Millisecond)
|
||||||
m.Run()
|
m.Run()
|
||||||
|
|
@ -1168,4 +1168,33 @@ func TestMarshalNullNil(t *testing.T) {
|
||||||
}.Froze().Marshal(v)
|
}.Froze().Marshal(v)
|
||||||
assert.Nil(t, e)
|
assert.Nil(t, e)
|
||||||
assert.Equal(t, `{"A":[],"B":{}}`, string(o))
|
assert.Equal(t, `{"A":[],"B":{}}`, string(o))
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestEncoder_LongestInvalidUtf8(t *testing.T) {
|
||||||
|
for _, data := range([]string{
|
||||||
|
"\"" + strings.Repeat("\x80", 4096) + "\"",
|
||||||
|
"\"" + strings.Repeat("\x80", 4095) + "\"",
|
||||||
|
"\"" + strings.Repeat("\x80", 4097) + "\"",
|
||||||
|
"\"" + strings.Repeat("\x80", 12345) + "\"",
|
||||||
|
}) {
|
||||||
|
testEncodeInvalidUtf8(t, []byte(data))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func testEncodeInvalidUtf8(t *testing.T, data []byte) {
|
||||||
|
jgot, jerr := json.Marshal(data)
|
||||||
|
sgot, serr := ConfigStd.Marshal(data)
|
||||||
|
assert.Equal(t, serr != nil, jerr != nil)
|
||||||
|
if jerr == nil {
|
||||||
|
assert.Equal(t, sgot, jgot)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestEncoder_RandomInvalidUtf8(t *testing.T) {
|
||||||
|
nums := 1000
|
||||||
|
maxLen := 1000
|
||||||
|
for i := 0; i < nums; i++ {
|
||||||
|
testEncodeInvalidUtf8(t, genRandJsonBytes(maxLen))
|
||||||
|
testEncodeInvalidUtf8(t, genRandJsonRune(maxLen))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -21,11 +21,11 @@ import (
|
||||||
`encoding/json`
|
`encoding/json`
|
||||||
`reflect`
|
`reflect`
|
||||||
`runtime`
|
`runtime`
|
||||||
`unsafe`
|
|
||||||
|
|
||||||
`github.com/bytedance/sonic/internal/native`
|
`github.com/bytedance/sonic/internal/native`
|
||||||
`github.com/bytedance/sonic/internal/native/types`
|
`github.com/bytedance/sonic/internal/native/types`
|
||||||
`github.com/bytedance/sonic/internal/rt`
|
`github.com/bytedance/sonic/internal/rt`
|
||||||
|
`github.com/bytedance/sonic/utf8`
|
||||||
`github.com/bytedance/sonic/option`
|
`github.com/bytedance/sonic/option`
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -38,6 +38,7 @@ const (
|
||||||
bitCompactMarshaler
|
bitCompactMarshaler
|
||||||
bitNoQuoteTextMarshaler
|
bitNoQuoteTextMarshaler
|
||||||
bitNoNullSliceOrMap
|
bitNoNullSliceOrMap
|
||||||
|
bitValidateString
|
||||||
|
|
||||||
// used for recursive compile
|
// used for recursive compile
|
||||||
bitPointerValue = 63
|
bitPointerValue = 63
|
||||||
|
|
@ -65,6 +66,10 @@ const (
|
||||||
// NoNullSliceOrMap indicates all empty Array or Object are encoded as '[]' or '{}',
|
// NoNullSliceOrMap indicates all empty Array or Object are encoded as '[]' or '{}',
|
||||||
// instead of 'null'
|
// instead of 'null'
|
||||||
NoNullSliceOrMap Options = 1 << bitNoNullSliceOrMap
|
NoNullSliceOrMap Options = 1 << bitNoNullSliceOrMap
|
||||||
|
|
||||||
|
// ValidateString indicates that encoder should validate the input string
|
||||||
|
// before encoding it into JSON.
|
||||||
|
ValidateString Options = 1 << bitValidateString
|
||||||
|
|
||||||
// CompatibleWithStd is used to be compatible with std encoder.
|
// CompatibleWithStd is used to be compatible with std encoder.
|
||||||
CompatibleWithStd Options = SortMapKeys | EscapeHTML | CompactMarshaler
|
CompatibleWithStd Options = SortMapKeys | EscapeHTML | CompactMarshaler
|
||||||
|
|
@ -100,6 +105,15 @@ func (self *Encoder) SetEscapeHTML(f bool) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// SetValidateString specifies if option ValidateString opens
|
||||||
|
func (self *Encoder) SetValidateString(f bool) {
|
||||||
|
if f {
|
||||||
|
self.Opts |= ValidateString
|
||||||
|
} else {
|
||||||
|
self.Opts &= ^ValidateString
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// SetCompactMarshaler specifies if option CompactMarshaler opens
|
// SetCompactMarshaler specifies if option CompactMarshaler opens
|
||||||
func (self *Encoder) SetCompactMarshaler(f bool) {
|
func (self *Encoder) SetCompactMarshaler(f bool) {
|
||||||
if f {
|
if f {
|
||||||
|
|
@ -156,7 +170,7 @@ func Encode(val interface{}, opts Options) ([]byte, error) {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
if opts & EscapeHTML != 0 {
|
if opts & EscapeHTML != 0 || opts & ValidateString != 0 {
|
||||||
return buf, nil
|
return buf, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -189,6 +203,12 @@ func EncodeInto(buf *[]byte, val interface{}, opts Options) error {
|
||||||
*buf = dest
|
*buf = dest
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if opts & ValidateString != 0 && !utf8.Validate(*buf) {
|
||||||
|
dest := utf8.CorrectWith(nil, *buf, `\ufffd`)
|
||||||
|
freeBytes(*buf) // free origin used buffer
|
||||||
|
*buf = dest
|
||||||
|
}
|
||||||
|
|
||||||
/* avoid GC ahead */
|
/* avoid GC ahead */
|
||||||
runtime.KeepAlive(buf)
|
runtime.KeepAlive(buf)
|
||||||
runtime.KeepAlive(efv)
|
runtime.KeepAlive(efv)
|
||||||
|
|
@ -203,38 +223,8 @@ var typeByte = rt.UnpackType(reflect.TypeOf(byte(0)))
|
||||||
// For historical reasons, web browsers don't honor standard HTML
|
// For historical reasons, web browsers don't honor standard HTML
|
||||||
// escaping within <script> tags, so an alternative JSON encoding must
|
// escaping within <script> tags, so an alternative JSON encoding must
|
||||||
// be used.
|
// be used.
|
||||||
func HTMLEscape(dest []byte, src []byte) []byte {
|
func HTMLEscape(dst []byte, src []byte) []byte {
|
||||||
nb := len(src)
|
return htmlEscape(dst, src)
|
||||||
|
|
||||||
// initilize dest buffer
|
|
||||||
cap := nb * 6 / 5
|
|
||||||
if dest == nil {
|
|
||||||
dest = make([]byte, 0, cap)
|
|
||||||
}
|
|
||||||
ds := (*rt.GoSlice)(unsafe.Pointer(&dest))
|
|
||||||
sp := (*rt.GoSlice)(unsafe.Pointer(&src)).Ptr
|
|
||||||
ds.Len = 0
|
|
||||||
if (ds.Cap < cap) {
|
|
||||||
*ds = growslice(typeByte, *ds, cap)
|
|
||||||
}
|
|
||||||
|
|
||||||
for nb > 0 {
|
|
||||||
dp := unsafe.Pointer(uintptr(ds.Ptr) + uintptr(ds.Len))
|
|
||||||
dn := ds.Cap - ds.Len
|
|
||||||
|
|
||||||
ret := native.HTMLEscape(sp, nb, dp, &dn)
|
|
||||||
ds.Len += dn
|
|
||||||
|
|
||||||
if ret >= 0 {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
ret = ^ret
|
|
||||||
nb -= ret
|
|
||||||
|
|
||||||
*ds = growslice(typeByte, *ds, ds.Cap * 2)
|
|
||||||
sp = unsafe.Pointer(uintptr(sp) + uintptr(ret))
|
|
||||||
}
|
|
||||||
return dest
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// EncodeIndented is like Encode but applies Indent to format the output.
|
// EncodeIndented is like Encode but applies Indent to format the output.
|
||||||
|
|
@ -293,6 +283,8 @@ func Pretouch(vt reflect.Type, opts ...option.CompileOption) error {
|
||||||
// Valid validates json and returns first non-blank character position,
|
// Valid validates json and returns first non-blank character position,
|
||||||
// if it is only one valid json value.
|
// if it is only one valid json value.
|
||||||
// Otherwise returns invalid character position using start.
|
// Otherwise returns invalid character position using start.
|
||||||
|
//
|
||||||
|
// Note: it does not check for the invalid UTF-8 characters.
|
||||||
func Valid(data []byte) (ok bool, start int) {
|
func Valid(data []byte) (ok bool, start int) {
|
||||||
n := len(data)
|
n := len(data)
|
||||||
if n == 0 {
|
if n == 0 {
|
||||||
|
|
@ -302,14 +294,18 @@ func Valid(data []byte) (ok bool, start int) {
|
||||||
p := 0
|
p := 0
|
||||||
m := types.NewStateMachine()
|
m := types.NewStateMachine()
|
||||||
ret := native.ValidateOne(&s, &p, m)
|
ret := native.ValidateOne(&s, &p, m)
|
||||||
types.FreeStateMachine(m)
|
types.FreeStateMachine(m)
|
||||||
|
|
||||||
if ret < 0 {
|
if ret < 0 {
|
||||||
return false, p-1
|
return false, p-1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* check for trailing spaces */
|
||||||
for ;p < n; p++ {
|
for ;p < n; p++ {
|
||||||
if (types.SPACE_MASK & (1 << data[p])) == 0 {
|
if (types.SPACE_MASK & (1 << data[p])) == 0 {
|
||||||
return false, p
|
return false, p
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return true, ret
|
return true, ret
|
||||||
}
|
}
|
||||||
|
|
@ -113,6 +113,39 @@ func encodeTextMarshaler(buf *[]byte, val encoding.TextMarshaler, opt Options) e
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func htmlEscape(dst []byte, src []byte) []byte {
|
||||||
|
var sidx int
|
||||||
|
|
||||||
|
dst = append(dst, src[:0]...) // avoid check nil dst
|
||||||
|
sbuf := (*rt.GoSlice)(unsafe.Pointer(&src))
|
||||||
|
dbuf := (*rt.GoSlice)(unsafe.Pointer(&dst))
|
||||||
|
|
||||||
|
/* grow dst if it is shorter */
|
||||||
|
if cap(dst) - len(dst) < len(src) + native.BufPaddingSize {
|
||||||
|
cap := len(src) * 3 / 2 + native.BufPaddingSize
|
||||||
|
*dbuf = growslice(typeByte, *dbuf, cap)
|
||||||
|
}
|
||||||
|
|
||||||
|
for sidx < sbuf.Len {
|
||||||
|
sp := padd(sbuf.Ptr, sidx)
|
||||||
|
dp := padd(dbuf.Ptr, dbuf.Len)
|
||||||
|
|
||||||
|
sn := sbuf.Len - sidx
|
||||||
|
dn := dbuf.Cap - dbuf.Len
|
||||||
|
nb := native.HTMLEscape(sp, sn, dp, &dn)
|
||||||
|
|
||||||
|
/* check for errors */
|
||||||
|
if dbuf.Len += dn; nb >= 0 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
/* not enough space, grow the slice and try again */
|
||||||
|
sidx += ^nb
|
||||||
|
*dbuf = growslice(typeByte, *dbuf, dbuf.Cap * 2)
|
||||||
|
}
|
||||||
|
return dst
|
||||||
|
}
|
||||||
|
|
||||||
var (
|
var (
|
||||||
argPtrs = []bool { true, true, true, false }
|
argPtrs = []bool { true, true, true, false }
|
||||||
localPtrs = []bool{}
|
localPtrs = []bool{}
|
||||||
|
|
|
||||||
|
|
@ -1,12 +1,14 @@
|
||||||
|
testname := FuzzMain
|
||||||
|
corpusdir := ./testdata/fuzz/${testname}
|
||||||
|
|
||||||
fuzz:
|
fuzz:
|
||||||
mkdir -p ./testdata/fuzz/FuzzMain
|
mkdir -p ${corpusdir}
|
||||||
rm -rf ./go-fuzz-corpus
|
rm -rf ./go-fuzz-corpus
|
||||||
git clone https://github.com/dvyukov/go-fuzz-corpus.git ./go-fuzz-corpus/
|
git clone https://github.com/dvyukov/go-fuzz-corpus.git ./go-fuzz-corpus/
|
||||||
file2fuzz -o ./testdata/fuzz/FuzzMain ./go-fuzz-corpus/json/corpus/* ./corpus/*
|
file2fuzz -o ${corpusdir} ./go-fuzz-corpus/json/corpus/* ./corpus/*
|
||||||
|
|
||||||
run:
|
run:
|
||||||
GOARCH=amd64 go test -fuzz=Fuzz -v
|
GOARCH=amd64 go test -fuzz=${testname} -v
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -rf ./go-fuzz-corpus/
|
rm -rf ./go-fuzz-corpus/
|
||||||
|
|
|
||||||
|
|
@ -19,17 +19,21 @@
|
||||||
package sonic_fuzz
|
package sonic_fuzz
|
||||||
|
|
||||||
import (
|
import (
|
||||||
`encoding/json`
|
`encoding/json`
|
||||||
`testing`
|
`testing`
|
||||||
`unicode/utf8`
|
_ `unicode/utf8`
|
||||||
`reflect`
|
|
||||||
`os`
|
`os`
|
||||||
`runtime`
|
`runtime`
|
||||||
`runtime/debug`
|
`runtime/debug`
|
||||||
`time`
|
`time`
|
||||||
|
`io`
|
||||||
|
`log`
|
||||||
|
`strconv`
|
||||||
|
|
||||||
`github.com/bytedance/sonic`
|
`github.com/bytedance/sonic`
|
||||||
`github.com/stretchr/testify/require`
|
`github.com/stretchr/testify/require`
|
||||||
|
`github.com/davecgh/go-spew/spew`
|
||||||
|
`github.com/bytedance/gopkg/util/gctuner`
|
||||||
)
|
)
|
||||||
|
|
||||||
func FuzzMain(f *testing.F) {
|
func FuzzMain(f *testing.F) {
|
||||||
|
|
@ -39,11 +43,18 @@ func FuzzMain(f *testing.F) {
|
||||||
f.Fuzz(fuzzMain)
|
f.Fuzz(fuzzMain)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Used for debug falied fuzz corpus
|
||||||
|
func TestCorpus(t *testing.T) {
|
||||||
|
fuzzMain(t, []byte("[1\x00"))
|
||||||
|
}
|
||||||
|
|
||||||
|
var target = sonic.ConfigStd
|
||||||
|
|
||||||
func fuzzMain(t *testing.T, data []byte) {
|
func fuzzMain(t *testing.T, data []byte) {
|
||||||
fuzzValidate(t, data)
|
fuzzValidate(t, data)
|
||||||
fuzzHtmlEscape(t, data)
|
fuzzHtmlEscape(t, data)
|
||||||
// Only fuzz the validate json here, because the default configuration does not have validation in SONIC.
|
// Only fuzz the validate json here.
|
||||||
if !utf8.Valid(data) || !json.Valid(data) {
|
if !json.Valid(data) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
for _, typ := range []func() interface{}{
|
for _, typ := range []func() interface{}{
|
||||||
|
|
@ -54,31 +65,34 @@ func fuzzMain(t *testing.T, data []byte) {
|
||||||
func() interface{} { return new(int64) },
|
func() interface{} { return new(int64) },
|
||||||
func() interface{} { return new(uint64) },
|
func() interface{} { return new(uint64) },
|
||||||
func() interface{} { return new(float64) },
|
func() interface{} { return new(float64) },
|
||||||
func() interface{} { return new(json.Number) },
|
// func() interface{} { return new(json.Number) },
|
||||||
func() interface{} { return new(S) },
|
// func() interface{} { return new(S) },
|
||||||
} {
|
} {
|
||||||
sv, jv := typ(), typ()
|
sv, jv := typ(), typ()
|
||||||
serr := sonic.Unmarshal([]byte(data), sv)
|
serr := target.Unmarshal([]byte(data), sv)
|
||||||
jerr := json.Unmarshal([]byte(data), jv)
|
jerr := json.Unmarshal([]byte(data), jv)
|
||||||
require.Equalf(t, serr != nil, jerr != nil, "different error in sonic unmarshal %v", reflect.TypeOf(jv))
|
require.Equal(t, serr != nil, jerr != nil,
|
||||||
|
dump(data, jv, jerr, sv, serr))
|
||||||
if jerr != nil {
|
if jerr != nil {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
require.Equal(t, sv, jv, "different result in sonic unmarshal %v", reflect.TypeOf(jv))
|
require.Equal(t, sv, jv, dump(data, jv, jerr, sv, serr))
|
||||||
sout, serr := sonic.Marshal(sv)
|
|
||||||
jout, jerr := json.Marshal(jv)
|
v := jv
|
||||||
require.NoError(t, serr, "error in sonic marshal %v", reflect.TypeOf(jv))
|
sout, serr := target.Marshal(v)
|
||||||
require.NoError(t, jerr, "error in json marshal %v", reflect.TypeOf(jv))
|
jout, jerr := json.Marshal(v)
|
||||||
|
require.NoError(t, serr, dump(v, jout, jerr, sout, serr))
|
||||||
|
require.NoError(t, jerr, dump(v, jout, jerr, sout, serr))
|
||||||
|
|
||||||
{
|
{
|
||||||
sv, jv := typ(), typ()
|
sv, jv := typ(), typ()
|
||||||
serr := sonic.Unmarshal(sout, sv)
|
serr := target.Unmarshal(sout, sv)
|
||||||
jerr := json.Unmarshal(jout, jv)
|
jerr := json.Unmarshal(jout, jv)
|
||||||
require.Equalf(t, serr != nil, jerr != nil, "different error in sonic unmarshal again %v", reflect.TypeOf(jv))
|
require.Equalf(t, serr != nil, jerr != nil, dump(data, jv, jerr, sv, serr))
|
||||||
if jerr != nil {
|
if jerr != nil {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
require.Equal(t, sv, jv, "different result in sonic unmarshal again %v", reflect.TypeOf(jv))
|
require.Equal(t, sv, jv, dump(data, jv, jerr, sv, serr))
|
||||||
}
|
}
|
||||||
|
|
||||||
if m, ok := sv.(*map[string]interface{}); ok {
|
if m, ok := sv.(*map[string]interface{}); ok {
|
||||||
|
|
@ -89,52 +103,54 @@ func fuzzMain(t *testing.T, data []byte) {
|
||||||
fuzzASTGetFromArray(t, jout, *a)
|
fuzzASTGetFromArray(t, jout, *a)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
type S struct {
|
type S struct {
|
||||||
A int `json:",omitempty"`
|
A int `json:",omitempty"`
|
||||||
B string `json:"B1,omitempty"`
|
B string `json:"B1,omitempty"`
|
||||||
C float64
|
C float64
|
||||||
D bool
|
D bool
|
||||||
E uint8
|
E uint8
|
||||||
// F []byte // unmarshal []byte is different with encoding/json
|
// F []byte // unmarshal []byte is different with encoding/json
|
||||||
G interface{}
|
G interface{}
|
||||||
H map[string]interface{}
|
H map[string]interface{}
|
||||||
I map[string]string
|
I map[string]string
|
||||||
J []interface{}
|
J []interface{}
|
||||||
K []string
|
K []string
|
||||||
L S1
|
L S1
|
||||||
M *S1
|
M *S1
|
||||||
N *int
|
N *int
|
||||||
O **int
|
O **int
|
||||||
P int `json:",string"`
|
P int `json:",string"`
|
||||||
Q float64 `json:",string"`
|
Q float64 `json:",string"`
|
||||||
R int `json:"-"`
|
R int `json:"-"`
|
||||||
T struct {}
|
T struct {}
|
||||||
U [2]int
|
U [2]int
|
||||||
V uintptr
|
V uintptr
|
||||||
W json.Number
|
W json.Number
|
||||||
// X json.RawMessage
|
// X json.RawMessage
|
||||||
Y Marshaller
|
Y Marshaller
|
||||||
Z TextMarshaller
|
Z TextMarshaller
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
type S1 struct {
|
type S1 struct {
|
||||||
A int
|
A int
|
||||||
B string
|
B string
|
||||||
}
|
}
|
||||||
|
|
||||||
type Marshaller struct {
|
type Marshaller struct {
|
||||||
v string
|
v string
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *Marshaller) MarshalJSON() ([]byte, error) {
|
func (m *Marshaller) MarshalJSON() ([]byte, error) {
|
||||||
return json.Marshal(m.v)
|
return json.Marshal(m.v)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *Marshaller) UnmarshalJSON(data []byte) error {
|
func (m *Marshaller) UnmarshalJSON(data []byte) error {
|
||||||
return json.Unmarshal(data, &m.v)
|
return json.Unmarshal(data, &m.v)
|
||||||
}
|
}
|
||||||
|
|
||||||
type TextMarshaller struct {
|
type TextMarshaller struct {
|
||||||
|
|
@ -149,19 +165,54 @@ func (k *TextMarshaller) UnmarshalText(data []byte) error {
|
||||||
return json.Unmarshal(data, &k.v)
|
return json.Unmarshal(data, &k.v)
|
||||||
}
|
}
|
||||||
|
|
||||||
var debugAsyncGC = os.Getenv("SONIC_NO_ASYNC_GC") == ""
|
|
||||||
|
|
||||||
func TestMain(m *testing.M) {
|
func dump(args ...interface{}) string {
|
||||||
|
return spew.Sdump(args)
|
||||||
|
}
|
||||||
|
|
||||||
|
func fdump(w io.Writer, args ...interface{}) {
|
||||||
|
spew.Fdump(w, args)
|
||||||
|
}
|
||||||
|
|
||||||
|
const (
|
||||||
|
MemoryLimitEnv = "SONIC_FUZZ_MEM_LIMIT"
|
||||||
|
AsynyncGCEnv = "SONIC_NO_ASYNC_GC"
|
||||||
|
KB uint64 = 1024
|
||||||
|
MB uint64 = 1024 * KB
|
||||||
|
GB uint64 = 1024 * MB
|
||||||
|
)
|
||||||
|
|
||||||
|
func setMemLimit(limit uint64) {
|
||||||
|
threshold := uint64(float64(limit) * 0.7)
|
||||||
|
numWorker := uint64(runtime.GOMAXPROCS(0))
|
||||||
|
if os.Getenv(MemoryLimitEnv) != "" {
|
||||||
|
if memGB, err := strconv.ParseUint(os.Getenv(MemoryLimitEnv), 10, 64); err == nil {
|
||||||
|
limit = memGB * GB
|
||||||
|
}
|
||||||
|
}
|
||||||
|
gctuner.Tuning(threshold / numWorker)
|
||||||
|
log.Printf("[%d] Memory Limit: %d GB, Memory Threshold: %d MB\n", os.Getpid(), limit/GB, threshold/MB)
|
||||||
|
log.Printf("[%d] Memory Threshold Per Worker: %d MB\n", os.Getpid(), threshold/numWorker/MB)
|
||||||
|
}
|
||||||
|
|
||||||
|
func enableSyncGC() {
|
||||||
|
var debugAsyncGC = os.Getenv("AsynyncGCEnv") == ""
|
||||||
go func () {
|
go func () {
|
||||||
if !debugAsyncGC {
|
if !debugAsyncGC {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
println("Begin GC looping...")
|
log.Printf("Begin GC looping...")
|
||||||
for {
|
for {
|
||||||
runtime.GC()
|
runtime.GC()
|
||||||
debug.FreeOSMemory()
|
debug.FreeOSMemory()
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestMain(m *testing.M) {
|
||||||
|
// Avoid OOM
|
||||||
|
setMemLimit(8 * GB)
|
||||||
|
enableSyncGC()
|
||||||
time.Sleep(time.Millisecond)
|
time.Sleep(time.Millisecond)
|
||||||
m.Run()
|
m.Run()
|
||||||
}
|
}
|
||||||
|
|
|
||||||
11
fuzz/go.mod
11
fuzz/go.mod
|
|
@ -3,18 +3,19 @@ module github.com/bytedance/sonic/fuzz
|
||||||
go 1.18
|
go 1.18
|
||||||
|
|
||||||
require (
|
require (
|
||||||
github.com/bytedance/sonic v1.0.0
|
github.com/bytedance/gopkg v0.0.0-20221122125632-68358b8ecec6
|
||||||
github.com/stretchr/testify v1.7.0
|
github.com/bytedance/sonic v1.5.0
|
||||||
|
github.com/davecgh/go-spew v1.1.1
|
||||||
|
github.com/stretchr/testify v1.8.1
|
||||||
)
|
)
|
||||||
|
|
||||||
require (
|
require (
|
||||||
github.com/chenzhuoyu/base64x v0.0.0-20211019084208-fb5309c8db06 // indirect
|
github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311 // indirect
|
||||||
github.com/davecgh/go-spew v1.1.1 // indirect
|
|
||||||
github.com/klauspost/cpuid/v2 v2.0.9 // indirect
|
github.com/klauspost/cpuid/v2 v2.0.9 // indirect
|
||||||
github.com/pmezard/go-difflib v1.0.0 // indirect
|
github.com/pmezard/go-difflib v1.0.0 // indirect
|
||||||
github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
|
github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
|
||||||
golang.org/x/arch v0.0.0-20210923205945-b76863e36670 // indirect
|
golang.org/x/arch v0.0.0-20210923205945-b76863e36670 // indirect
|
||||||
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c // indirect
|
gopkg.in/yaml.v3 v3.0.1 // indirect
|
||||||
)
|
)
|
||||||
|
|
||||||
replace github.com/bytedance/sonic => ../.
|
replace github.com/bytedance/sonic => ../.
|
||||||
|
|
|
||||||
37
fuzz/go.sum
37
fuzz/go.sum
|
|
@ -1,40 +1,31 @@
|
||||||
github.com/chenzhuoyu/base64x v0.0.0-20211019084208-fb5309c8db06 h1:1sDoSuDPWzhkdzNVxCxtIaKiAe96ESVPv8coGwc1gZ4=
|
github.com/bytedance/gopkg v0.0.0-20221122125632-68358b8ecec6 h1:FCLDGi1EmB7JzjVVYNZiqc/zAJj2BQ5M0lfkVOxbfs8=
|
||||||
github.com/chenzhuoyu/base64x v0.0.0-20211019084208-fb5309c8db06/go.mod h1:DH46F32mSOjUmXrMHnKwZdA8wcEefY7UVqBKYGjpdQY=
|
github.com/bytedance/gopkg v0.0.0-20221122125632-68358b8ecec6/go.mod h1:5FoAH5xUHHCMDvQPy1rnj8moqLkLHFaDVBjHhcFwEi0=
|
||||||
|
github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311 h1:qSGYFH7+jGhDF8vLC+iwCD4WpbV1EBDSzWkJODFLams=
|
||||||
|
github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311/go.mod h1:b583jCggY9gE99b6G5LEC39OIiVsWj+R97kbl5odCEk=
|
||||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||||
github.com/goccy/go-json v0.9.4 h1:L8MLKG2mvVXiQu07qB6hmfqeSYQdOnqPot2GhsIwIaI=
|
|
||||||
github.com/goccy/go-json v0.9.4/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I=
|
|
||||||
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
|
|
||||||
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
|
|
||||||
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
|
|
||||||
github.com/klauspost/cpuid/v2 v2.0.9 h1:lgaqFMSdTdQYdZ04uHyN2d/eKdOMyi2YLSvlQIBFYa4=
|
github.com/klauspost/cpuid/v2 v2.0.9 h1:lgaqFMSdTdQYdZ04uHyN2d/eKdOMyi2YLSvlQIBFYa4=
|
||||||
github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
|
github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
|
||||||
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421 h1:ZqeYNhU3OHLH3mGKHDcjJRFFRrJa6eAM5H+CtDdOsPc=
|
|
||||||
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
|
|
||||||
github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
|
|
||||||
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
|
|
||||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||||
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
|
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
|
||||||
github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY=
|
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
|
||||||
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||||
github.com/tidwall/gjson v1.12.1/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
|
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||||
github.com/tidwall/gjson v1.13.0 h1:3TFY9yxOQShrvmjdM76K+jc66zJeT6D3/VFFYCGQf7M=
|
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
|
||||||
github.com/tidwall/gjson v1.13.0/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
|
github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk=
|
||||||
github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA=
|
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
|
||||||
github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM=
|
|
||||||
github.com/tidwall/pretty v1.2.0 h1:RWIZEg2iJ8/g6fDDYzMpobmaoGh5OLl4AXtGUGPcqCs=
|
|
||||||
github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU=
|
|
||||||
github.com/tidwall/sjson v1.2.4 h1:cuiLzLnaMeBhRmEv00Lpk3tkYrcxpmbU81tAY4Dw0tc=
|
|
||||||
github.com/tidwall/sjson v1.2.4/go.mod h1:098SZ494YoMWPmMO6ct4dcFnqxwj9r/gF0Etp19pSNM=
|
|
||||||
github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI=
|
github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI=
|
||||||
github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08=
|
github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08=
|
||||||
golang.org/x/arch v0.0.0-20210923205945-b76863e36670 h1:18EFjUmQOcUvxNYSkA6jO9VAiXCnxFY6NyDX0bHDmkU=
|
golang.org/x/arch v0.0.0-20210923205945-b76863e36670 h1:18EFjUmQOcUvxNYSkA6jO9VAiXCnxFY6NyDX0bHDmkU=
|
||||||
golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8=
|
golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8=
|
||||||
|
golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||||
|
golang.org/x/sys v0.0.0-20221010170243-090e33056c14/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
|
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
|
||||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||||
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo=
|
|
||||||
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||||
|
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||||
|
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||||
rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4=
|
rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4=
|
||||||
|
|
|
||||||
|
|
@ -71,10 +71,10 @@ func generateJSONTag(name string) reflect.StructTag {
|
||||||
name = strings.Split(name, ",")[0] // remove origin "," in tag name
|
name = strings.Split(name, ",")[0] // remove origin "," in tag name
|
||||||
switch int(rand.Int() % 5) {
|
switch int(rand.Int() % 5) {
|
||||||
case 0: return reflect.StructTag(`json:"-"`) // always omitted
|
case 0: return reflect.StructTag(`json:"-"`) // always omitted
|
||||||
case 1: return reflect.StructTag("") // empty tag
|
case 1: opt = "" // empty opt
|
||||||
case 2: opt = "" // empty opt
|
case 2: opt = "omitempty"
|
||||||
case 3: opt = "omitempty"
|
// case 3: opt = "string"
|
||||||
case 4: opt = "string"
|
default: return reflect.StructTag("") // empty tag
|
||||||
}
|
}
|
||||||
return reflect.StructTag(fmt.Sprintf(`json:"%s,%s"`, name, opt))
|
return reflect.StructTag(fmt.Sprintf(`json:"%s,%s"`, name, opt))
|
||||||
}
|
}
|
||||||
|
|
@ -146,7 +146,7 @@ func fuzzDynamicStruct(t *testing.T, data []byte, v map[string]interface{}) {
|
||||||
require.NoErrorf(t, err, "error in sonic pretouch struct %v", typ)
|
require.NoErrorf(t, err, "error in sonic pretouch struct %v", typ)
|
||||||
|
|
||||||
// Unmarshal fuzz
|
// Unmarshal fuzz
|
||||||
serr := sonic.Unmarshal(data, &sv)
|
serr := target.Unmarshal(data, &sv)
|
||||||
jerr := json.Unmarshal(data, &jv)
|
jerr := json.Unmarshal(data, &jv)
|
||||||
require.Equalf(t, serr != nil, jerr != nil, "different error in sonic unmarshal %v", typ)
|
require.Equalf(t, serr != nil, jerr != nil, "different error in sonic unmarshal %v", typ)
|
||||||
if serr != nil {
|
if serr != nil {
|
||||||
|
|
@ -155,7 +155,7 @@ func fuzzDynamicStruct(t *testing.T, data []byte, v map[string]interface{}) {
|
||||||
require.Equal(t, sv, jv, "different result in sonic unmarshal %v", typ)
|
require.Equal(t, sv, jv, "different result in sonic unmarshal %v", typ)
|
||||||
|
|
||||||
// Marshal fuzz
|
// Marshal fuzz
|
||||||
sout, serr := sonic.Marshal(sv)
|
sout, serr := target.Marshal(sv)
|
||||||
jout, jerr := json.Marshal(jv)
|
jout, jerr := json.Marshal(jv)
|
||||||
require.NoError(t, serr, "error in sonic marshal %v", typ)
|
require.NoError(t, serr, "error in sonic marshal %v", typ)
|
||||||
require.NoError(t, jerr, "error in json marshal %v", typ)
|
require.NoError(t, jerr, "error in json marshal %v", typ)
|
||||||
|
|
|
||||||
|
|
@ -122,4 +122,14 @@ func __validate_one(s *string, p *int, m *types.StateMachine) (ret int)
|
||||||
//go:nosplit
|
//go:nosplit
|
||||||
//go:noescape
|
//go:noescape
|
||||||
//goland:noinspection GoUnusedParameter
|
//goland:noinspection GoUnusedParameter
|
||||||
func __get_by_path(s *string, p *int, path *[]interface{}) (ret int)
|
func __get_by_path(s *string, p *int, path *[]interface{}) (ret int)
|
||||||
|
|
||||||
|
//go:nosplit
|
||||||
|
//go:noescape
|
||||||
|
//goland:noinspection GoUnusedParameter
|
||||||
|
func __validate_utf8(s *string, p *int, m *types.StateMachine) (ret int)
|
||||||
|
|
||||||
|
//go:nosplit
|
||||||
|
//go:noescape
|
||||||
|
//goland:noinspection GoUnusedParameter
|
||||||
|
func __validate_utf8_fast(s *string) (ret int)
|
||||||
File diff suppressed because it is too large
Load diff
|
|
@ -256,15 +256,6 @@ func TestNative_Vstring_ValidUnescapedChars(t *testing.T) {
|
||||||
assert.Equal(t, -int(types.ERR_INVALID_CHAR), int(v.Vt))
|
assert.Equal(t, -int(types.ERR_INVALID_CHAR), int(v.Vt))
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestNative_Vstring_ValidUtf8(t *testing.T) {
|
|
||||||
var v types.JsonState
|
|
||||||
valid := uint64(types.F_VALIDATE_STRING)
|
|
||||||
i := 0
|
|
||||||
s := "test\xff\""
|
|
||||||
__vstring(&s, &i, &v, valid)
|
|
||||||
assert.Equal(t, -int(types.ERR_INVALID_CHAR), int(v.Vt))
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestNative_VstringEscapeEOF(t *testing.T) {
|
func TestNative_VstringEscapeEOF(t *testing.T) {
|
||||||
var v types.JsonState
|
var v types.JsonState
|
||||||
i := 0
|
i := 0
|
||||||
|
|
@ -275,51 +266,6 @@ func TestNative_VstringEscapeEOF(t *testing.T) {
|
||||||
assert.Equal(t, int64(0), v.Iv)
|
assert.Equal(t, int64(0), v.Iv)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestNative_ValidateOne(t *testing.T) {
|
|
||||||
{
|
|
||||||
p := 0
|
|
||||||
s := "\"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\\n\\r\\b\\f😁ſ景\xef\xbf\xbf\xf4\x8f\xbf\xbf\xc2\x80xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\""
|
|
||||||
r := __validate_one(&s, &p, &types.StateMachine{})
|
|
||||||
assert.Equal(t, len(s), p)
|
|
||||||
assert.Equal(t, 0, r)
|
|
||||||
}
|
|
||||||
{
|
|
||||||
p := 0
|
|
||||||
s := "\"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\bxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\"x"
|
|
||||||
r := __validate_one(&s, &p, &types.StateMachine{})
|
|
||||||
assert.Equal(t, 64, p)
|
|
||||||
assert.Equal(t, -int(types.ERR_INVALID_CHAR), r)
|
|
||||||
}
|
|
||||||
{
|
|
||||||
p := 0
|
|
||||||
s := "\"\x00\"x"
|
|
||||||
r := __validate_one(&s, &p, &types.StateMachine{})
|
|
||||||
assert.Equal(t, 1, p)
|
|
||||||
assert.Equal(t, -int(types.ERR_INVALID_CHAR), r)
|
|
||||||
}
|
|
||||||
{
|
|
||||||
p := 0
|
|
||||||
s := "\"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\x80xxxxxxxxxxxxxxxxxxxxxxxxxxxxx\"x"
|
|
||||||
r := __validate_one(&s, &p, &types.StateMachine{})
|
|
||||||
assert.Equal(t, 64, p)
|
|
||||||
assert.Equal(t, -int(types.ERR_INVALID_CHAR), r)
|
|
||||||
}
|
|
||||||
{
|
|
||||||
p := 0
|
|
||||||
s := "\"\x80\"x"
|
|
||||||
r := __validate_one(&s, &p, &types.StateMachine{})
|
|
||||||
assert.Equal(t, 1, p)
|
|
||||||
assert.Equal(t, -int(types.ERR_INVALID_CHAR), r)
|
|
||||||
}
|
|
||||||
{
|
|
||||||
p := 0
|
|
||||||
s := "\"\xed\xbf\xbf\"x"
|
|
||||||
r := __validate_one(&s, &p, &types.StateMachine{})
|
|
||||||
assert.Equal(t, 1, p)
|
|
||||||
assert.Equal(t, -int(types.ERR_INVALID_CHAR), r)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestNative_VstringHangUpOnRandomData(t *testing.T) {
|
func TestNative_VstringHangUpOnRandomData(t *testing.T) {
|
||||||
v, e := hex.DecodeString(
|
v, e := hex.DecodeString(
|
||||||
"228dc61efd54ef80a908fb6026b7f2d5f92a257ba8b347c995f259eb8685376a" +
|
"228dc61efd54ef80a908fb6026b7f2d5f92a257ba8b347c995f259eb8685376a" +
|
||||||
|
|
|
||||||
|
|
@ -9,32 +9,34 @@ package avx
|
||||||
func __native_entry__() uintptr
|
func __native_entry__() uintptr
|
||||||
|
|
||||||
var (
|
var (
|
||||||
_subr__f32toa = __native_entry__() + 28656
|
_subr__f32toa = __native_entry__() + 29744
|
||||||
_subr__f64toa = __native_entry__() + 496
|
_subr__f64toa = __native_entry__() + 496
|
||||||
_subr__get_by_path = __native_entry__() + 26848
|
_subr__get_by_path = __native_entry__() + 27424
|
||||||
_subr__html_escape = __native_entry__() + 10480
|
_subr__html_escape = __native_entry__() + 9968
|
||||||
_subr__i64toa = __native_entry__() + 4176
|
_subr__i64toa = __native_entry__() + 4112
|
||||||
_subr__lspace = __native_entry__() + 80
|
_subr__lspace = __native_entry__() + 80
|
||||||
_subr__quote = __native_entry__() + 5552
|
_subr__quote = __native_entry__() + 5792
|
||||||
_subr__skip_array = __native_entry__() + 20160
|
_subr__skip_array = __native_entry__() + 20576
|
||||||
_subr__skip_number = __native_entry__() + 23472
|
_subr__skip_number = __native_entry__() + 23920
|
||||||
_subr__skip_object = __native_entry__() + 22048
|
_subr__skip_object = __native_entry__() + 22496
|
||||||
_subr__skip_one = __native_entry__() + 23616
|
_subr__skip_one = __native_entry__() + 24080
|
||||||
_subr__skip_one_fast = __native_entry__() + 23824
|
_subr__skip_one_fast = __native_entry__() + 24320
|
||||||
_subr__u64toa = __native_entry__() + 4288
|
_subr__u64toa = __native_entry__() + 4384
|
||||||
_subr__unquote = __native_entry__() + 7296
|
_subr__unquote = __native_entry__() + 7488
|
||||||
_subr__validate_one = __native_entry__() + 23648
|
_subr__validate_one = __native_entry__() + 24144
|
||||||
_subr__value = __native_entry__() + 13728
|
_subr__validate_utf8 = __native_entry__() + 28464
|
||||||
_subr__vnumber = __native_entry__() + 17904
|
_subr__validate_utf8_fast = __native_entry__() + 29136
|
||||||
_subr__vsigned = __native_entry__() + 19456
|
_subr__value = __native_entry__() + 14672
|
||||||
_subr__vstring = __native_entry__() + 15808
|
_subr__vnumber = __native_entry__() + 18320
|
||||||
_subr__vunsigned = __native_entry__() + 19808
|
_subr__vsigned = __native_entry__() + 19856
|
||||||
|
_subr__vstring = __native_entry__() + 16864
|
||||||
|
_subr__vunsigned = __native_entry__() + 20208
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
_stack__f32toa = 64
|
_stack__f32toa = 56
|
||||||
_stack__f64toa = 80
|
_stack__f64toa = 80
|
||||||
_stack__get_by_path = 296
|
_stack__get_by_path = 312
|
||||||
_stack__html_escape = 64
|
_stack__html_escape = 64
|
||||||
_stack__i64toa = 16
|
_stack__i64toa = 16
|
||||||
_stack__lspace = 8
|
_stack__lspace = 8
|
||||||
|
|
@ -45,10 +47,12 @@ const (
|
||||||
_stack__skip_one = 128
|
_stack__skip_one = 128
|
||||||
_stack__skip_one_fast = 208
|
_stack__skip_one_fast = 208
|
||||||
_stack__u64toa = 8
|
_stack__u64toa = 8
|
||||||
_stack__unquote = 72
|
_stack__unquote = 128
|
||||||
_stack__validate_one = 128
|
_stack__validate_one = 128
|
||||||
_stack__value = 336
|
_stack__validate_utf8 = 48
|
||||||
_stack__vnumber = 248
|
_stack__validate_utf8_fast = 24
|
||||||
|
_stack__value = 368
|
||||||
|
_stack__vnumber = 280
|
||||||
_stack__vsigned = 16
|
_stack__vsigned = 16
|
||||||
_stack__vstring = 128
|
_stack__vstring = 128
|
||||||
_stack__vunsigned = 24
|
_stack__vunsigned = 24
|
||||||
|
|
@ -70,6 +74,8 @@ var (
|
||||||
_ = _subr__u64toa
|
_ = _subr__u64toa
|
||||||
_ = _subr__unquote
|
_ = _subr__unquote
|
||||||
_ = _subr__validate_one
|
_ = _subr__validate_one
|
||||||
|
_ = _subr__validate_utf8
|
||||||
|
_ = _subr__validate_utf8_fast
|
||||||
_ = _subr__value
|
_ = _subr__value
|
||||||
_ = _subr__vnumber
|
_ = _subr__vnumber
|
||||||
_ = _subr__vsigned
|
_ = _subr__vsigned
|
||||||
|
|
@ -93,6 +99,8 @@ const (
|
||||||
_ = _stack__u64toa
|
_ = _stack__u64toa
|
||||||
_ = _stack__unquote
|
_ = _stack__unquote
|
||||||
_ = _stack__validate_one
|
_ = _stack__validate_one
|
||||||
|
_ = _stack__validate_utf8
|
||||||
|
_ = _stack__validate_utf8_fast
|
||||||
_ = _stack__value
|
_ = _stack__value
|
||||||
_ = _stack__vnumber
|
_ = _stack__vnumber
|
||||||
_ = _stack__vsigned
|
_ = _stack__vsigned
|
||||||
|
|
|
||||||
|
|
@ -122,4 +122,14 @@ func __validate_one(s *string, p *int, m *types.StateMachine) (ret int)
|
||||||
//go:nosplit
|
//go:nosplit
|
||||||
//go:noescape
|
//go:noescape
|
||||||
//goland:noinspection GoUnusedParameter
|
//goland:noinspection GoUnusedParameter
|
||||||
func __get_by_path(s *string, p *int, path *[]interface{}) (ret int)
|
func __get_by_path(s *string, p *int, path *[]interface{}) (ret int)
|
||||||
|
|
||||||
|
//go:nosplit
|
||||||
|
//go:noescape
|
||||||
|
//goland:noinspection GoUnusedParameter
|
||||||
|
func __validate_utf8(s *string, p *int, m *types.StateMachine) (ret int)
|
||||||
|
|
||||||
|
//go:nosplit
|
||||||
|
//go:noescape
|
||||||
|
//goland:noinspection GoUnusedParameter
|
||||||
|
func __validate_utf8_fast(s *string) (ret int)
|
||||||
File diff suppressed because it is too large
Load diff
|
|
@ -256,15 +256,6 @@ func TestNative_Vstring_ValidUnescapedChars(t *testing.T) {
|
||||||
assert.Equal(t, -int(types.ERR_INVALID_CHAR), int(v.Vt))
|
assert.Equal(t, -int(types.ERR_INVALID_CHAR), int(v.Vt))
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestNative_Vstring_ValidUtf8(t *testing.T) {
|
|
||||||
var v types.JsonState
|
|
||||||
valid := uint64(types.F_VALIDATE_STRING)
|
|
||||||
i := 0
|
|
||||||
s := "test\xff\""
|
|
||||||
__vstring(&s, &i, &v, valid)
|
|
||||||
assert.Equal(t, -int(types.ERR_INVALID_CHAR), int(v.Vt))
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestNative_VstringEscapeEOF(t *testing.T) {
|
func TestNative_VstringEscapeEOF(t *testing.T) {
|
||||||
var v types.JsonState
|
var v types.JsonState
|
||||||
i := 0
|
i := 0
|
||||||
|
|
@ -275,51 +266,6 @@ func TestNative_VstringEscapeEOF(t *testing.T) {
|
||||||
assert.Equal(t, int64(0), v.Iv)
|
assert.Equal(t, int64(0), v.Iv)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestNative_ValidateOne(t *testing.T) {
|
|
||||||
{
|
|
||||||
p := 0
|
|
||||||
s := "\"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\\n\\r\\b\\f😁ſ景\xef\xbf\xbf\xf4\x8f\xbf\xbf\xc2\x80xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\""
|
|
||||||
r := __validate_one(&s, &p, &types.StateMachine{})
|
|
||||||
assert.Equal(t, len(s), p)
|
|
||||||
assert.Equal(t, 0, r)
|
|
||||||
}
|
|
||||||
{
|
|
||||||
p := 0
|
|
||||||
s := "\"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\bxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\"x"
|
|
||||||
r := __validate_one(&s, &p, &types.StateMachine{})
|
|
||||||
assert.Equal(t, 64, p)
|
|
||||||
assert.Equal(t, -int(types.ERR_INVALID_CHAR), r)
|
|
||||||
}
|
|
||||||
{
|
|
||||||
p := 0
|
|
||||||
s := "\"\x00\"x"
|
|
||||||
r := __validate_one(&s, &p, &types.StateMachine{})
|
|
||||||
assert.Equal(t, 1, p)
|
|
||||||
assert.Equal(t, -int(types.ERR_INVALID_CHAR), r)
|
|
||||||
}
|
|
||||||
{
|
|
||||||
p := 0
|
|
||||||
s := "\"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\x80xxxxxxxxxxxxxxxxxxxxxxxxxxxxx\"x"
|
|
||||||
r := __validate_one(&s, &p, &types.StateMachine{})
|
|
||||||
assert.Equal(t, 64, p)
|
|
||||||
assert.Equal(t, -int(types.ERR_INVALID_CHAR), r)
|
|
||||||
}
|
|
||||||
{
|
|
||||||
p := 0
|
|
||||||
s := "\"\x80\"x"
|
|
||||||
r := __validate_one(&s, &p, &types.StateMachine{})
|
|
||||||
assert.Equal(t, 1, p)
|
|
||||||
assert.Equal(t, -int(types.ERR_INVALID_CHAR), r)
|
|
||||||
}
|
|
||||||
{
|
|
||||||
p := 0
|
|
||||||
s := "\"\xed\xbf\xbf\"x"
|
|
||||||
r := __validate_one(&s, &p, &types.StateMachine{})
|
|
||||||
assert.Equal(t, 1, p)
|
|
||||||
assert.Equal(t, -int(types.ERR_INVALID_CHAR), r)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestNative_VstringHangUpOnRandomData(t *testing.T) {
|
func TestNative_VstringHangUpOnRandomData(t *testing.T) {
|
||||||
v, e := hex.DecodeString(
|
v, e := hex.DecodeString(
|
||||||
"228dc61efd54ef80a908fb6026b7f2d5f92a257ba8b347c995f259eb8685376a" +
|
"228dc61efd54ef80a908fb6026b7f2d5f92a257ba8b347c995f259eb8685376a" +
|
||||||
|
|
|
||||||
|
|
@ -9,48 +9,52 @@ package avx2
|
||||||
func __native_entry__() uintptr
|
func __native_entry__() uintptr
|
||||||
|
|
||||||
var (
|
var (
|
||||||
_subr__f32toa = __native_entry__() + 32816
|
_subr__f32toa = __native_entry__() + 35216
|
||||||
_subr__f64toa = __native_entry__() + 752
|
_subr__f64toa = __native_entry__() + 752
|
||||||
_subr__get_by_path = __native_entry__() + 30896
|
_subr__get_by_path = __native_entry__() + 30384
|
||||||
_subr__html_escape = __native_entry__() + 12320
|
_subr__html_escape = __native_entry__() + 11712
|
||||||
_subr__i64toa = __native_entry__() + 4432
|
_subr__i64toa = __native_entry__() + 4368
|
||||||
_subr__lspace = __native_entry__() + 224
|
_subr__lspace = __native_entry__() + 224
|
||||||
_subr__quote = __native_entry__() + 5904
|
_subr__quote = __native_entry__() + 6160
|
||||||
_subr__skip_array = __native_entry__() + 23472
|
_subr__skip_array = __native_entry__() + 22864
|
||||||
_subr__skip_number = __native_entry__() + 27440
|
_subr__skip_number = __native_entry__() + 26928
|
||||||
_subr__skip_object = __native_entry__() + 25392
|
_subr__skip_object = __native_entry__() + 24864
|
||||||
_subr__skip_one = __native_entry__() + 27584
|
_subr__skip_one = __native_entry__() + 27088
|
||||||
_subr__skip_one_fast = __native_entry__() + 27984
|
_subr__skip_one_fast = __native_entry__() + 27504
|
||||||
_subr__u64toa = __native_entry__() + 4544
|
_subr__u64toa = __native_entry__() + 4640
|
||||||
_subr__unquote = __native_entry__() + 8848
|
_subr__unquote = __native_entry__() + 8960
|
||||||
_subr__validate_one = __native_entry__() + 27616
|
_subr__validate_one = __native_entry__() + 27152
|
||||||
_subr__value = __native_entry__() + 16896
|
_subr__validate_utf8 = __native_entry__() + 31552
|
||||||
_subr__vnumber = __native_entry__() + 21216
|
_subr__validate_utf8_fast = __native_entry__() + 32496
|
||||||
_subr__vsigned = __native_entry__() + 22768
|
_subr__value = __native_entry__() + 16816
|
||||||
_subr__vstring = __native_entry__() + 19280
|
_subr__vnumber = __native_entry__() + 20608
|
||||||
_subr__vunsigned = __native_entry__() + 23120
|
_subr__vsigned = __native_entry__() + 22144
|
||||||
|
_subr__vstring = __native_entry__() + 19312
|
||||||
|
_subr__vunsigned = __native_entry__() + 22496
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
_stack__f32toa = 64
|
_stack__f32toa = 56
|
||||||
_stack__f64toa = 80
|
_stack__f64toa = 80
|
||||||
_stack__get_by_path = 304
|
_stack__get_by_path = 320
|
||||||
_stack__html_escape = 72
|
_stack__html_escape = 72
|
||||||
_stack__i64toa = 16
|
_stack__i64toa = 16
|
||||||
_stack__lspace = 8
|
_stack__lspace = 8
|
||||||
_stack__quote = 72
|
_stack__quote = 72
|
||||||
_stack__skip_array = 136
|
_stack__skip_array = 120
|
||||||
_stack__skip_number = 80
|
_stack__skip_number = 80
|
||||||
_stack__skip_object = 136
|
_stack__skip_object = 120
|
||||||
_stack__skip_one = 136
|
_stack__skip_one = 120
|
||||||
_stack__skip_one_fast = 216
|
_stack__skip_one_fast = 216
|
||||||
_stack__u64toa = 8
|
_stack__u64toa = 8
|
||||||
_stack__unquote = 72
|
_stack__unquote = 128
|
||||||
_stack__validate_one = 136
|
_stack__validate_one = 120
|
||||||
_stack__value = 336
|
_stack__validate_utf8 = 48
|
||||||
_stack__vnumber = 248
|
_stack__validate_utf8_fast = 200
|
||||||
|
_stack__value = 368
|
||||||
|
_stack__vnumber = 280
|
||||||
_stack__vsigned = 16
|
_stack__vsigned = 16
|
||||||
_stack__vstring = 136
|
_stack__vstring = 104
|
||||||
_stack__vunsigned = 24
|
_stack__vunsigned = 24
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -70,6 +74,8 @@ var (
|
||||||
_ = _subr__u64toa
|
_ = _subr__u64toa
|
||||||
_ = _subr__unquote
|
_ = _subr__unquote
|
||||||
_ = _subr__validate_one
|
_ = _subr__validate_one
|
||||||
|
_ = _subr__validate_utf8
|
||||||
|
_ = _subr__validate_utf8_fast
|
||||||
_ = _subr__value
|
_ = _subr__value
|
||||||
_ = _subr__vnumber
|
_ = _subr__vnumber
|
||||||
_ = _subr__vsigned
|
_ = _subr__vsigned
|
||||||
|
|
@ -93,6 +99,8 @@ const (
|
||||||
_ = _stack__u64toa
|
_ = _stack__u64toa
|
||||||
_ = _stack__unquote
|
_ = _stack__unquote
|
||||||
_ = _stack__validate_one
|
_ = _stack__validate_one
|
||||||
|
_ = _stack__validate_utf8
|
||||||
|
_ = _stack__validate_utf8_fast
|
||||||
_ = _stack__value
|
_ = _stack__value
|
||||||
_ = _stack__vnumber
|
_ = _stack__vnumber
|
||||||
_ = _stack__vsigned
|
_ = _stack__vsigned
|
||||||
|
|
|
||||||
|
|
@ -26,7 +26,10 @@ import (
|
||||||
`github.com/bytedance/sonic/internal/native/types`
|
`github.com/bytedance/sonic/internal/native/types`
|
||||||
)
|
)
|
||||||
|
|
||||||
const MaxFrameSize uintptr = 400
|
const (
|
||||||
|
MaxFrameSize uintptr = 400
|
||||||
|
BufPaddingSize int = 64
|
||||||
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
S_f64toa uintptr
|
S_f64toa uintptr
|
||||||
|
|
@ -113,6 +116,16 @@ func U64toa(out *byte, val uint64) (ret int)
|
||||||
//goland:noinspection GoUnusedParameter
|
//goland:noinspection GoUnusedParameter
|
||||||
func F64toa(out *byte, val float64) (ret int)
|
func F64toa(out *byte, val float64) (ret int)
|
||||||
|
|
||||||
|
//go:nosplit
|
||||||
|
//go:noescape
|
||||||
|
//goland:noinspection GoUnusedParameter
|
||||||
|
func ValidateUTF8(s *string, p *int, m *types.StateMachine) (ret int)
|
||||||
|
|
||||||
|
//go:nosplit
|
||||||
|
//go:noescape
|
||||||
|
//goland:noinspection GoUnusedParameter
|
||||||
|
func ValidateUTF8Fast(s *string) (ret int)
|
||||||
|
|
||||||
func useAVX() {
|
func useAVX() {
|
||||||
S_f64toa = avx.S_f64toa
|
S_f64toa = avx.S_f64toa
|
||||||
S_f32toa = avx.S_f32toa
|
S_f32toa = avx.S_f32toa
|
||||||
|
|
|
||||||
|
|
@ -45,7 +45,6 @@ TEXT ·HTMLEscape(SB), NOSPLIT, $0 - 40
|
||||||
JMP github·com∕bytedance∕sonic∕internal∕native∕avx·__html_escape(SB)
|
JMP github·com∕bytedance∕sonic∕internal∕native∕avx·__html_escape(SB)
|
||||||
JMP github·com∕bytedance∕sonic∕internal∕native∕sse·__html_escape(SB)
|
JMP github·com∕bytedance∕sonic∕internal∕native∕sse·__html_escape(SB)
|
||||||
|
|
||||||
|
|
||||||
TEXT ·Value(SB), NOSPLIT, $0 - 48
|
TEXT ·Value(SB), NOSPLIT, $0 - 48
|
||||||
CMPB github·com∕bytedance∕sonic∕internal∕cpu·HasAVX2(SB), $0
|
CMPB github·com∕bytedance∕sonic∕internal∕cpu·HasAVX2(SB), $0
|
||||||
JE 2(PC)
|
JE 2(PC)
|
||||||
|
|
@ -81,6 +80,7 @@ TEXT ·GetByPath(SB), NOSPLIT, $0 - 32
|
||||||
JE 2(PC)
|
JE 2(PC)
|
||||||
JMP github·com∕bytedance∕sonic∕internal∕native∕avx·__get_by_path(SB)
|
JMP github·com∕bytedance∕sonic∕internal∕native∕avx·__get_by_path(SB)
|
||||||
JMP github·com∕bytedance∕sonic∕internal∕native∕sse·__get_by_path(SB)
|
JMP github·com∕bytedance∕sonic∕internal∕native∕sse·__get_by_path(SB)
|
||||||
|
|
||||||
TEXT ·ValidateOne(SB), NOSPLIT, $0 - 32
|
TEXT ·ValidateOne(SB), NOSPLIT, $0 - 32
|
||||||
CMPB github·com∕bytedance∕sonic∕internal∕cpu·HasAVX2(SB), $0
|
CMPB github·com∕bytedance∕sonic∕internal∕cpu·HasAVX2(SB), $0
|
||||||
JE 2(PC)
|
JE 2(PC)
|
||||||
|
|
@ -90,6 +90,24 @@ TEXT ·ValidateOne(SB), NOSPLIT, $0 - 32
|
||||||
JMP github·com∕bytedance∕sonic∕internal∕native∕avx·__validate_one(SB)
|
JMP github·com∕bytedance∕sonic∕internal∕native∕avx·__validate_one(SB)
|
||||||
JMP github·com∕bytedance∕sonic∕internal∕native∕sse·__validate_one(SB)
|
JMP github·com∕bytedance∕sonic∕internal∕native∕sse·__validate_one(SB)
|
||||||
|
|
||||||
|
TEXT ·ValidateUTF8(SB), NOSPLIT, $0 - 40
|
||||||
|
CMPB github·com∕bytedance∕sonic∕internal∕cpu·HasAVX2(SB), $0
|
||||||
|
JE 2(PC)
|
||||||
|
JMP github·com∕bytedance∕sonic∕internal∕native∕avx2·__validate_utf8(SB)
|
||||||
|
CMPB github·com∕bytedance∕sonic∕internal∕cpu·HasAVX(SB), $0
|
||||||
|
JE 2(PC)
|
||||||
|
JMP github·com∕bytedance∕sonic∕internal∕native∕avx·__validate_utf8(SB)
|
||||||
|
JMP github·com∕bytedance∕sonic∕internal∕native∕sse·__validate_utf8(SB)
|
||||||
|
|
||||||
|
TEXT ·ValidateUTF8Fast(SB), NOSPLIT, $0 - 16
|
||||||
|
CMPB github·com∕bytedance∕sonic∕internal∕cpu·HasAVX2(SB), $0
|
||||||
|
JE 2(PC)
|
||||||
|
JMP github·com∕bytedance∕sonic∕internal∕native∕avx2·__validate_utf8_fast(SB)
|
||||||
|
CMPB github·com∕bytedance∕sonic∕internal∕cpu·HasAVX(SB), $0
|
||||||
|
JE 2(PC)
|
||||||
|
JMP github·com∕bytedance∕sonic∕internal∕native∕avx·__validate_utf8_fast(SB)
|
||||||
|
JMP github·com∕bytedance∕sonic∕internal∕native∕sse·__validate_utf8_fast(SB)
|
||||||
|
|
||||||
TEXT ·I64toa(SB), NOSPLIT, $0 - 32
|
TEXT ·I64toa(SB), NOSPLIT, $0 - 32
|
||||||
CMPB github·com∕bytedance∕sonic∕internal∕cpu·HasAVX2(SB), $0
|
CMPB github·com∕bytedance∕sonic∕internal∕cpu·HasAVX2(SB), $0
|
||||||
JE 2(PC)
|
JE 2(PC)
|
||||||
|
|
|
||||||
|
|
@ -120,4 +120,14 @@ func __validate_one(s *string, p *int, m *types.StateMachine) (ret int)
|
||||||
//go:nosplit
|
//go:nosplit
|
||||||
//go:noescape
|
//go:noescape
|
||||||
//goland:noinspection GoUnusedParameter
|
//goland:noinspection GoUnusedParameter
|
||||||
func __get_by_path(s *string, p *int, path *[]interface{}) (ret int)
|
func __get_by_path(s *string, p *int, path *[]interface{}) (ret int)
|
||||||
|
|
||||||
|
//go:nosplit
|
||||||
|
//go:noescape
|
||||||
|
//goland:noinspection GoUnusedParameter
|
||||||
|
func __validate_utf8(s *string, p *int, m *types.StateMachine) (ret int)
|
||||||
|
|
||||||
|
//go:nosplit
|
||||||
|
//go:noescape
|
||||||
|
//goland:noinspection GoUnusedParameter
|
||||||
|
func __validate_utf8_fast(s *string) (ret int)
|
||||||
|
|
@ -254,15 +254,6 @@ func TestNative_Vstring_ValidUnescapedChars(t *testing.T) {
|
||||||
assert.Equal(t, -int(types.ERR_INVALID_CHAR), int(v.Vt))
|
assert.Equal(t, -int(types.ERR_INVALID_CHAR), int(v.Vt))
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestNative_Vstring_ValidUtf8(t *testing.T) {
|
|
||||||
var v types.JsonState
|
|
||||||
valid := uint64(types.F_VALIDATE_STRING)
|
|
||||||
i := 0
|
|
||||||
s := "test\xff\""
|
|
||||||
__vstring(&s, &i, &v, valid)
|
|
||||||
assert.Equal(t, -int(types.ERR_INVALID_CHAR), int(v.Vt))
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestNative_VstringEscapeEOF(t *testing.T) {
|
func TestNative_VstringEscapeEOF(t *testing.T) {
|
||||||
var v types.JsonState
|
var v types.JsonState
|
||||||
i := 0
|
i := 0
|
||||||
|
|
@ -273,51 +264,6 @@ func TestNative_VstringEscapeEOF(t *testing.T) {
|
||||||
assert.Equal(t, int64(0), v.Iv)
|
assert.Equal(t, int64(0), v.Iv)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestNative_ValidateOne(t *testing.T) {
|
|
||||||
{
|
|
||||||
p := 0
|
|
||||||
s := "\"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\\n\\r\\b\\f😁ſ景\xef\xbf\xbf\xf4\x8f\xbf\xbf\xc2\x80xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\""
|
|
||||||
r := __validate_one(&s, &p, &types.StateMachine{})
|
|
||||||
assert.Equal(t, len(s), p)
|
|
||||||
assert.Equal(t, 0, r)
|
|
||||||
}
|
|
||||||
{
|
|
||||||
p := 0
|
|
||||||
s := "\"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\bxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\"x"
|
|
||||||
r := __validate_one(&s, &p, &types.StateMachine{})
|
|
||||||
assert.Equal(t, 64, p)
|
|
||||||
assert.Equal(t, -int(types.ERR_INVALID_CHAR), r)
|
|
||||||
}
|
|
||||||
{
|
|
||||||
p := 0
|
|
||||||
s := "\"\x00\"x"
|
|
||||||
r := __validate_one(&s, &p, &types.StateMachine{})
|
|
||||||
assert.Equal(t, 1, p)
|
|
||||||
assert.Equal(t, -int(types.ERR_INVALID_CHAR), r)
|
|
||||||
}
|
|
||||||
{
|
|
||||||
p := 0
|
|
||||||
s := "\"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\x80xxxxxxxxxxxxxxxxxxxxxxxxxxxxx\"x"
|
|
||||||
r := __validate_one(&s, &p, &types.StateMachine{})
|
|
||||||
assert.Equal(t, 64, p)
|
|
||||||
assert.Equal(t, -int(types.ERR_INVALID_CHAR), r)
|
|
||||||
}
|
|
||||||
{
|
|
||||||
p := 0
|
|
||||||
s := "\"\x80\"x"
|
|
||||||
r := __validate_one(&s, &p, &types.StateMachine{})
|
|
||||||
assert.Equal(t, 1, p)
|
|
||||||
assert.Equal(t, -int(types.ERR_INVALID_CHAR), r)
|
|
||||||
}
|
|
||||||
{
|
|
||||||
p := 0
|
|
||||||
s := "\"\xed\xbf\xbf\"x"
|
|
||||||
r := __validate_one(&s, &p, &types.StateMachine{})
|
|
||||||
assert.Equal(t, 1, p)
|
|
||||||
assert.Equal(t, -int(types.ERR_INVALID_CHAR), r)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestNative_VstringHangUpOnRandomData(t *testing.T) {
|
func TestNative_VstringHangUpOnRandomData(t *testing.T) {
|
||||||
v, e := hex.DecodeString(
|
v, e := hex.DecodeString(
|
||||||
"228dc61efd54ef80a908fb6026b7f2d5f92a257ba8b347c995f259eb8685376a" +
|
"228dc61efd54ef80a908fb6026b7f2d5f92a257ba8b347c995f259eb8685376a" +
|
||||||
|
|
|
||||||
|
|
@ -122,4 +122,14 @@ func __validate_one(s *string, p *int, m *types.StateMachine) (ret int)
|
||||||
//go:nosplit
|
//go:nosplit
|
||||||
//go:noescape
|
//go:noescape
|
||||||
//goland:noinspection GoUnusedParameter
|
//goland:noinspection GoUnusedParameter
|
||||||
func __get_by_path(s *string, p *int, path *[]interface{}) (ret int)
|
func __get_by_path(s *string, p *int, path *[]interface{}) (ret int)
|
||||||
|
|
||||||
|
//go:nosplit
|
||||||
|
//go:noescape
|
||||||
|
//goland:noinspection GoUnusedParameter
|
||||||
|
func __validate_utf8(s *string, p *int, m *types.StateMachine) (ret int)
|
||||||
|
|
||||||
|
//go:nosplit
|
||||||
|
//go:noescape
|
||||||
|
//goland:noinspection GoUnusedParameter
|
||||||
|
func __validate_utf8_fast(s *string) (ret int)
|
||||||
File diff suppressed because it is too large
Load diff
|
|
@ -256,15 +256,6 @@ func TestNative_Vstring_ValidUnescapedChars(t *testing.T) {
|
||||||
assert.Equal(t, -int(types.ERR_INVALID_CHAR), int(v.Vt))
|
assert.Equal(t, -int(types.ERR_INVALID_CHAR), int(v.Vt))
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestNative_Vstring_ValidUtf8(t *testing.T) {
|
|
||||||
var v types.JsonState
|
|
||||||
valid := uint64(types.F_VALIDATE_STRING)
|
|
||||||
i := 0
|
|
||||||
s := "test\xff\""
|
|
||||||
__vstring(&s, &i, &v, valid)
|
|
||||||
assert.Equal(t, -int(types.ERR_INVALID_CHAR), int(v.Vt))
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestNative_VstringEscapeEOF(t *testing.T) {
|
func TestNative_VstringEscapeEOF(t *testing.T) {
|
||||||
var v types.JsonState
|
var v types.JsonState
|
||||||
i := 0
|
i := 0
|
||||||
|
|
@ -275,51 +266,6 @@ func TestNative_VstringEscapeEOF(t *testing.T) {
|
||||||
assert.Equal(t, int64(0), v.Iv)
|
assert.Equal(t, int64(0), v.Iv)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestNative_ValidateOne(t *testing.T) {
|
|
||||||
{
|
|
||||||
p := 0
|
|
||||||
s := "\"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\\n\\r\\b\\f😁ſ景\xef\xbf\xbf\xf4\x8f\xbf\xbf\xc2\x80xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\""
|
|
||||||
r := __validate_one(&s, &p, &types.StateMachine{})
|
|
||||||
assert.Equal(t, len(s), p)
|
|
||||||
assert.Equal(t, 0, r)
|
|
||||||
}
|
|
||||||
{
|
|
||||||
p := 0
|
|
||||||
s := "\"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\bxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\"x"
|
|
||||||
r := __validate_one(&s, &p, &types.StateMachine{})
|
|
||||||
assert.Equal(t, 64, p)
|
|
||||||
assert.Equal(t, -int(types.ERR_INVALID_CHAR), r)
|
|
||||||
}
|
|
||||||
{
|
|
||||||
p := 0
|
|
||||||
s := "\"\x00\"x"
|
|
||||||
r := __validate_one(&s, &p, &types.StateMachine{})
|
|
||||||
assert.Equal(t, 1, p)
|
|
||||||
assert.Equal(t, -int(types.ERR_INVALID_CHAR), r)
|
|
||||||
}
|
|
||||||
{
|
|
||||||
p := 0
|
|
||||||
s := "\"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\x80xxxxxxxxxxxxxxxxxxxxxxxxxxxxx\"x"
|
|
||||||
r := __validate_one(&s, &p, &types.StateMachine{})
|
|
||||||
assert.Equal(t, 64, p)
|
|
||||||
assert.Equal(t, -int(types.ERR_INVALID_CHAR), r)
|
|
||||||
}
|
|
||||||
{
|
|
||||||
p := 0
|
|
||||||
s := "\"\x80\"x"
|
|
||||||
r := __validate_one(&s, &p, &types.StateMachine{})
|
|
||||||
assert.Equal(t, 1, p)
|
|
||||||
assert.Equal(t, -int(types.ERR_INVALID_CHAR), r)
|
|
||||||
}
|
|
||||||
{
|
|
||||||
p := 0
|
|
||||||
s := "\"\xed\xbf\xbf\"x"
|
|
||||||
r := __validate_one(&s, &p, &types.StateMachine{})
|
|
||||||
assert.Equal(t, 1, p)
|
|
||||||
assert.Equal(t, -int(types.ERR_INVALID_CHAR), r)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestNative_VstringHangUpOnRandomData(t *testing.T) {
|
func TestNative_VstringHangUpOnRandomData(t *testing.T) {
|
||||||
v, e := hex.DecodeString(
|
v, e := hex.DecodeString(
|
||||||
"228dc61efd54ef80a908fb6026b7f2d5f92a257ba8b347c995f259eb8685376a" +
|
"228dc61efd54ef80a908fb6026b7f2d5f92a257ba8b347c995f259eb8685376a" +
|
||||||
|
|
|
||||||
|
|
@ -9,32 +9,34 @@ package sse
|
||||||
func __native_entry__() uintptr
|
func __native_entry__() uintptr
|
||||||
|
|
||||||
var (
|
var (
|
||||||
_subr__f32toa = __native_entry__() + 29152
|
_subr__f32toa = __native_entry__() + 28688
|
||||||
_subr__f64toa = __native_entry__() + 464
|
_subr__f64toa = __native_entry__() + 464
|
||||||
_subr__get_by_path = __native_entry__() + 27392
|
_subr__get_by_path = __native_entry__() + 26432
|
||||||
_subr__html_escape = __native_entry__() + 10416
|
_subr__html_escape = __native_entry__() + 9584
|
||||||
_subr__i64toa = __native_entry__() + 4048
|
_subr__i64toa = __native_entry__() + 3744
|
||||||
_subr__lspace = __native_entry__() + 80
|
_subr__lspace = __native_entry__() + 80
|
||||||
_subr__quote = __native_entry__() + 5456
|
_subr__quote = __native_entry__() + 5472
|
||||||
_subr__skip_array = __native_entry__() + 20144
|
_subr__skip_array = __native_entry__() + 19184
|
||||||
_subr__skip_number = __native_entry__() + 23488
|
_subr__skip_number = __native_entry__() + 22528
|
||||||
_subr__skip_object = __native_entry__() + 22032
|
_subr__skip_object = __native_entry__() + 21088
|
||||||
_subr__skip_one = __native_entry__() + 23632
|
_subr__skip_one = __native_entry__() + 22688
|
||||||
_subr__skip_one_fast = __native_entry__() + 23840
|
_subr__skip_one_fast = __native_entry__() + 22912
|
||||||
_subr__u64toa = __native_entry__() + 4176
|
_subr__u64toa = __native_entry__() + 4016
|
||||||
_subr__unquote = __native_entry__() + 7232
|
_subr__unquote = __native_entry__() + 7184
|
||||||
_subr__validate_one = __native_entry__() + 23664
|
_subr__validate_one = __native_entry__() + 22736
|
||||||
_subr__value = __native_entry__() + 13680
|
_subr__validate_utf8 = __native_entry__() + 27456
|
||||||
_subr__vnumber = __native_entry__() + 17888
|
_subr__validate_utf8_fast = __native_entry__() + 28128
|
||||||
_subr__vsigned = __native_entry__() + 19440
|
_subr__value = __native_entry__() + 13216
|
||||||
_subr__vstring = __native_entry__() + 15760
|
_subr__vnumber = __native_entry__() + 16928
|
||||||
_subr__vunsigned = __native_entry__() + 19792
|
_subr__vsigned = __native_entry__() + 18464
|
||||||
|
_subr__vstring = __native_entry__() + 15408
|
||||||
|
_subr__vunsigned = __native_entry__() + 18816
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
_stack__f32toa = 64
|
_stack__f32toa = 56
|
||||||
_stack__f64toa = 80
|
_stack__f64toa = 80
|
||||||
_stack__get_by_path = 232
|
_stack__get_by_path = 264
|
||||||
_stack__html_escape = 64
|
_stack__html_escape = 64
|
||||||
_stack__i64toa = 16
|
_stack__i64toa = 16
|
||||||
_stack__lspace = 8
|
_stack__lspace = 8
|
||||||
|
|
@ -43,14 +45,16 @@ const (
|
||||||
_stack__skip_number = 72
|
_stack__skip_number = 72
|
||||||
_stack__skip_object = 128
|
_stack__skip_object = 128
|
||||||
_stack__skip_one = 128
|
_stack__skip_one = 128
|
||||||
_stack__skip_one_fast = 144
|
_stack__skip_one_fast = 160
|
||||||
_stack__u64toa = 8
|
_stack__u64toa = 8
|
||||||
_stack__unquote = 72
|
_stack__unquote = 128
|
||||||
_stack__validate_one = 128
|
_stack__validate_one = 128
|
||||||
_stack__value = 336
|
_stack__validate_utf8 = 48
|
||||||
_stack__vnumber = 248
|
_stack__validate_utf8_fast = 24
|
||||||
|
_stack__value = 368
|
||||||
|
_stack__vnumber = 280
|
||||||
_stack__vsigned = 16
|
_stack__vsigned = 16
|
||||||
_stack__vstring = 144
|
_stack__vstring = 128
|
||||||
_stack__vunsigned = 24
|
_stack__vunsigned = 24
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -70,6 +74,8 @@ var (
|
||||||
_ = _subr__u64toa
|
_ = _subr__u64toa
|
||||||
_ = _subr__unquote
|
_ = _subr__unquote
|
||||||
_ = _subr__validate_one
|
_ = _subr__validate_one
|
||||||
|
_ = _subr__validate_utf8
|
||||||
|
_ = _subr__validate_utf8_fast
|
||||||
_ = _subr__value
|
_ = _subr__value
|
||||||
_ = _subr__vnumber
|
_ = _subr__vnumber
|
||||||
_ = _subr__vsigned
|
_ = _subr__vsigned
|
||||||
|
|
@ -93,6 +99,8 @@ const (
|
||||||
_ = _stack__u64toa
|
_ = _stack__u64toa
|
||||||
_ = _stack__unquote
|
_ = _stack__unquote
|
||||||
_ = _stack__validate_one
|
_ = _stack__validate_one
|
||||||
|
_ = _stack__validate_utf8
|
||||||
|
_ = _stack__validate_utf8_fast
|
||||||
_ = _stack__value
|
_ = _stack__value
|
||||||
_ = _stack__vnumber
|
_ = _stack__vnumber
|
||||||
_ = _stack__vsigned
|
_ = _stack__vsigned
|
||||||
|
|
|
||||||
|
|
@ -25,7 +25,7 @@ type ValueType int
|
||||||
type ParsingError uint
|
type ParsingError uint
|
||||||
type SearchingError uint
|
type SearchingError uint
|
||||||
|
|
||||||
// !NOT MODIFIED ONLY.
|
// NOTE: !NOT MODIFIED ONLY.
|
||||||
// This definitions are followed in native/types.h.
|
// This definitions are followed in native/types.h.
|
||||||
|
|
||||||
const (
|
const (
|
||||||
|
|
@ -75,6 +75,7 @@ const (
|
||||||
ERR_RECURSE_EXCEED_MAX ParsingError = 7
|
ERR_RECURSE_EXCEED_MAX ParsingError = 7
|
||||||
ERR_FLOAT_INFINITY ParsingError = 8
|
ERR_FLOAT_INFINITY ParsingError = 8
|
||||||
ERR_MISMATCH ParsingError = 9
|
ERR_MISMATCH ParsingError = 9
|
||||||
|
ERR_INVALID_UTF8 ParsingError = 10
|
||||||
)
|
)
|
||||||
|
|
||||||
var _ParsingErrors = []string{
|
var _ParsingErrors = []string{
|
||||||
|
|
@ -88,6 +89,7 @@ var _ParsingErrors = []string{
|
||||||
ERR_RECURSE_EXCEED_MAX : "recursion exceeded max depth",
|
ERR_RECURSE_EXCEED_MAX : "recursion exceeded max depth",
|
||||||
ERR_FLOAT_INFINITY : "float number is infinity",
|
ERR_FLOAT_INFINITY : "float number is infinity",
|
||||||
ERR_MISMATCH : "mismatched type with value",
|
ERR_MISMATCH : "mismatched type with value",
|
||||||
|
ERR_INVALID_UTF8 : "invalid UTF8",
|
||||||
}
|
}
|
||||||
|
|
||||||
func (self ParsingError) Error() string {
|
func (self ParsingError) Error() string {
|
||||||
|
|
|
||||||
201
licenses/LICENSE-simdjson
Normal file
201
licenses/LICENSE-simdjson
Normal file
|
|
@ -0,0 +1,201 @@
|
||||||
|
Apache License
|
||||||
|
Version 2.0, January 2004
|
||||||
|
http://www.apache.org/licenses/
|
||||||
|
|
||||||
|
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||||
|
|
||||||
|
1. Definitions.
|
||||||
|
|
||||||
|
"License" shall mean the terms and conditions for use, reproduction,
|
||||||
|
and distribution as defined by Sections 1 through 9 of this document.
|
||||||
|
|
||||||
|
"Licensor" shall mean the copyright owner or entity authorized by
|
||||||
|
the copyright owner that is granting the License.
|
||||||
|
|
||||||
|
"Legal Entity" shall mean the union of the acting entity and all
|
||||||
|
other entities that control, are controlled by, or are under common
|
||||||
|
control with that entity. For the purposes of this definition,
|
||||||
|
"control" means (i) the power, direct or indirect, to cause the
|
||||||
|
direction or management of such entity, whether by contract or
|
||||||
|
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||||
|
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||||
|
|
||||||
|
"You" (or "Your") shall mean an individual or Legal Entity
|
||||||
|
exercising permissions granted by this License.
|
||||||
|
|
||||||
|
"Source" form shall mean the preferred form for making modifications,
|
||||||
|
including but not limited to software source code, documentation
|
||||||
|
source, and configuration files.
|
||||||
|
|
||||||
|
"Object" form shall mean any form resulting from mechanical
|
||||||
|
transformation or translation of a Source form, including but
|
||||||
|
not limited to compiled object code, generated documentation,
|
||||||
|
and conversions to other media types.
|
||||||
|
|
||||||
|
"Work" shall mean the work of authorship, whether in Source or
|
||||||
|
Object form, made available under the License, as indicated by a
|
||||||
|
copyright notice that is included in or attached to the work
|
||||||
|
(an example is provided in the Appendix below).
|
||||||
|
|
||||||
|
"Derivative Works" shall mean any work, whether in Source or Object
|
||||||
|
form, that is based on (or derived from) the Work and for which the
|
||||||
|
editorial revisions, annotations, elaborations, or other modifications
|
||||||
|
represent, as a whole, an original work of authorship. For the purposes
|
||||||
|
of this License, Derivative Works shall not include works that remain
|
||||||
|
separable from, or merely link (or bind by name) to the interfaces of,
|
||||||
|
the Work and Derivative Works thereof.
|
||||||
|
|
||||||
|
"Contribution" shall mean any work of authorship, including
|
||||||
|
the original version of the Work and any modifications or additions
|
||||||
|
to that Work or Derivative Works thereof, that is intentionally
|
||||||
|
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||||
|
or by an individual or Legal Entity authorized to submit on behalf of
|
||||||
|
the copyright owner. For the purposes of this definition, "submitted"
|
||||||
|
means any form of electronic, verbal, or written communication sent
|
||||||
|
to the Licensor or its representatives, including but not limited to
|
||||||
|
communication on electronic mailing lists, source code control systems,
|
||||||
|
and issue tracking systems that are managed by, or on behalf of, the
|
||||||
|
Licensor for the purpose of discussing and improving the Work, but
|
||||||
|
excluding communication that is conspicuously marked or otherwise
|
||||||
|
designated in writing by the copyright owner as "Not a Contribution."
|
||||||
|
|
||||||
|
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||||
|
on behalf of whom a Contribution has been received by Licensor and
|
||||||
|
subsequently incorporated within the Work.
|
||||||
|
|
||||||
|
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||||
|
this License, each Contributor hereby grants to You a perpetual,
|
||||||
|
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||||
|
copyright license to reproduce, prepare Derivative Works of,
|
||||||
|
publicly display, publicly perform, sublicense, and distribute the
|
||||||
|
Work and such Derivative Works in Source or Object form.
|
||||||
|
|
||||||
|
3. Grant of Patent License. Subject to the terms and conditions of
|
||||||
|
this License, each Contributor hereby grants to You a perpetual,
|
||||||
|
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||||
|
(except as stated in this section) patent license to make, have made,
|
||||||
|
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||||
|
where such license applies only to those patent claims licensable
|
||||||
|
by such Contributor that are necessarily infringed by their
|
||||||
|
Contribution(s) alone or by combination of their Contribution(s)
|
||||||
|
with the Work to which such Contribution(s) was submitted. If You
|
||||||
|
institute patent litigation against any entity (including a
|
||||||
|
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||||
|
or a Contribution incorporated within the Work constitutes direct
|
||||||
|
or contributory patent infringement, then any patent licenses
|
||||||
|
granted to You under this License for that Work shall terminate
|
||||||
|
as of the date such litigation is filed.
|
||||||
|
|
||||||
|
4. Redistribution. You may reproduce and distribute copies of the
|
||||||
|
Work or Derivative Works thereof in any medium, with or without
|
||||||
|
modifications, and in Source or Object form, provided that You
|
||||||
|
meet the following conditions:
|
||||||
|
|
||||||
|
(a) You must give any other recipients of the Work or
|
||||||
|
Derivative Works a copy of this License; and
|
||||||
|
|
||||||
|
(b) You must cause any modified files to carry prominent notices
|
||||||
|
stating that You changed the files; and
|
||||||
|
|
||||||
|
(c) You must retain, in the Source form of any Derivative Works
|
||||||
|
that You distribute, all copyright, patent, trademark, and
|
||||||
|
attribution notices from the Source form of the Work,
|
||||||
|
excluding those notices that do not pertain to any part of
|
||||||
|
the Derivative Works; and
|
||||||
|
|
||||||
|
(d) If the Work includes a "NOTICE" text file as part of its
|
||||||
|
distribution, then any Derivative Works that You distribute must
|
||||||
|
include a readable copy of the attribution notices contained
|
||||||
|
within such NOTICE file, excluding those notices that do not
|
||||||
|
pertain to any part of the Derivative Works, in at least one
|
||||||
|
of the following places: within a NOTICE text file distributed
|
||||||
|
as part of the Derivative Works; within the Source form or
|
||||||
|
documentation, if provided along with the Derivative Works; or,
|
||||||
|
within a display generated by the Derivative Works, if and
|
||||||
|
wherever such third-party notices normally appear. The contents
|
||||||
|
of the NOTICE file are for informational purposes only and
|
||||||
|
do not modify the License. You may add Your own attribution
|
||||||
|
notices within Derivative Works that You distribute, alongside
|
||||||
|
or as an addendum to the NOTICE text from the Work, provided
|
||||||
|
that such additional attribution notices cannot be construed
|
||||||
|
as modifying the License.
|
||||||
|
|
||||||
|
You may add Your own copyright statement to Your modifications and
|
||||||
|
may provide additional or different license terms and conditions
|
||||||
|
for use, reproduction, or distribution of Your modifications, or
|
||||||
|
for any such Derivative Works as a whole, provided Your use,
|
||||||
|
reproduction, and distribution of the Work otherwise complies with
|
||||||
|
the conditions stated in this License.
|
||||||
|
|
||||||
|
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||||
|
any Contribution intentionally submitted for inclusion in the Work
|
||||||
|
by You to the Licensor shall be under the terms and conditions of
|
||||||
|
this License, without any additional terms or conditions.
|
||||||
|
Notwithstanding the above, nothing herein shall supersede or modify
|
||||||
|
the terms of any separate license agreement you may have executed
|
||||||
|
with Licensor regarding such Contributions.
|
||||||
|
|
||||||
|
6. Trademarks. This License does not grant permission to use the trade
|
||||||
|
names, trademarks, service marks, or product names of the Licensor,
|
||||||
|
except as required for reasonable and customary use in describing the
|
||||||
|
origin of the Work and reproducing the content of the NOTICE file.
|
||||||
|
|
||||||
|
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||||
|
agreed to in writing, Licensor provides the Work (and each
|
||||||
|
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||||
|
implied, including, without limitation, any warranties or conditions
|
||||||
|
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||||
|
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||||
|
appropriateness of using or redistributing the Work and assume any
|
||||||
|
risks associated with Your exercise of permissions under this License.
|
||||||
|
|
||||||
|
8. Limitation of Liability. In no event and under no legal theory,
|
||||||
|
whether in tort (including negligence), contract, or otherwise,
|
||||||
|
unless required by applicable law (such as deliberate and grossly
|
||||||
|
negligent acts) or agreed to in writing, shall any Contributor be
|
||||||
|
liable to You for damages, including any direct, indirect, special,
|
||||||
|
incidental, or consequential damages of any character arising as a
|
||||||
|
result of this License or out of the use or inability to use the
|
||||||
|
Work (including but not limited to damages for loss of goodwill,
|
||||||
|
work stoppage, computer failure or malfunction, or any and all
|
||||||
|
other commercial damages or losses), even if such Contributor
|
||||||
|
has been advised of the possibility of such damages.
|
||||||
|
|
||||||
|
9. Accepting Warranty or Additional Liability. While redistributing
|
||||||
|
the Work or Derivative Works thereof, You may choose to offer,
|
||||||
|
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||||
|
or other liability obligations and/or rights consistent with this
|
||||||
|
License. However, in accepting such obligations, You may act only
|
||||||
|
on Your own behalf and on Your sole responsibility, not on behalf
|
||||||
|
of any other Contributor, and only if You agree to indemnify,
|
||||||
|
defend, and hold each Contributor harmless for any liability
|
||||||
|
incurred by, or claims asserted against, such Contributor by reason
|
||||||
|
of your accepting any such warranty or additional liability.
|
||||||
|
|
||||||
|
END OF TERMS AND CONDITIONS
|
||||||
|
|
||||||
|
APPENDIX: How to apply the Apache License to your work.
|
||||||
|
|
||||||
|
To apply the Apache License to your work, attach the following
|
||||||
|
boilerplate notice, with the fields enclosed by brackets "{}"
|
||||||
|
replaced with your own identifying information. (Don't include
|
||||||
|
the brackets!) The text should be enclosed in the appropriate
|
||||||
|
comment syntax for the file format. We also recommend that a
|
||||||
|
file or class name and description of purpose be included on the
|
||||||
|
same "printed page" as the copyright notice for easier
|
||||||
|
identification within third-party archives.
|
||||||
|
|
||||||
|
Copyright 2018-2023 The simdjson authors
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
|
@ -48,13 +48,13 @@ static inline unsigned ctz10_u32(const uint32_t v) {
|
||||||
if (v < 1000000) return 6;
|
if (v < 1000000) return 6;
|
||||||
if (v < 10000000) return 7;
|
if (v < 10000000) return 7;
|
||||||
if (v < 100000000) return 8;
|
if (v < 100000000) return 8;
|
||||||
return 9;
|
else return 9;
|
||||||
} else {
|
} else {
|
||||||
if (v < 10) return 1;
|
if (v < 10) return 1;
|
||||||
if (v < 100) return 2;
|
if (v < 100) return 2;
|
||||||
if (v < 1000) return 3;
|
if (v < 1000) return 3;
|
||||||
if (v < 10000) return 4;
|
if (v < 10000) return 4;
|
||||||
return 5;
|
else return 5;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -54,7 +54,7 @@ static inline unsigned ctz10(const uint64_t v) {
|
||||||
if (v < 100000000000000ull) return 14;
|
if (v < 100000000000000ull) return 14;
|
||||||
if (v < 1000000000000000ull) return 15;
|
if (v < 1000000000000000ull) return 15;
|
||||||
if (v < 10000000000000000ull) return 16;
|
if (v < 10000000000000000ull) return 16;
|
||||||
return 17;
|
else return 17;
|
||||||
}
|
}
|
||||||
if (v < 10ull) return 1;
|
if (v < 10ull) return 1;
|
||||||
if (v < 100ull) return 2;
|
if (v < 100ull) return 2;
|
||||||
|
|
@ -65,7 +65,7 @@ static inline unsigned ctz10(const uint64_t v) {
|
||||||
if (v < 10000000ull) return 7;
|
if (v < 10000000ull) return 7;
|
||||||
if (v < 100000000ull) return 8;
|
if (v < 100000000ull) return 8;
|
||||||
if (v < 1000000000ull) return 9;
|
if (v < 1000000000ull) return 9;
|
||||||
return 10;
|
else return 10;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -14,6 +14,8 @@
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
#include "native.h"
|
#include "native.h"
|
||||||
|
#include "test/xprintf.h"
|
||||||
|
#include "test/xassert.h"
|
||||||
#include "fastbytes.c"
|
#include "fastbytes.c"
|
||||||
#include "fastfloat.c"
|
#include "fastfloat.c"
|
||||||
#include "fastint.c"
|
#include "fastint.c"
|
||||||
|
|
|
||||||
|
|
@ -112,8 +112,8 @@ typedef struct {
|
||||||
} JsonState;
|
} JsonState;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
int sp;
|
int64_t sp;
|
||||||
int vt[MAX_RECURSE];
|
int64_t vt[MAX_RECURSE];
|
||||||
} StateMachine;
|
} StateMachine;
|
||||||
|
|
||||||
int f64toa(char *out, double val);
|
int f64toa(char *out, double val);
|
||||||
|
|
@ -144,9 +144,10 @@ long skip_number(const GoString *src, long *p);
|
||||||
bool atof_eisel_lemire64(uint64_t mant, int exp10, int sgn, double *val);
|
bool atof_eisel_lemire64(uint64_t mant, int exp10, int sgn, double *val);
|
||||||
double atof_native(const char *sp, ssize_t nb, char *dbuf, ssize_t cap);
|
double atof_native(const char *sp, ssize_t nb, char *dbuf, ssize_t cap);
|
||||||
|
|
||||||
ssize_t utf8_validate(const char *sp, ssize_t nb);
|
|
||||||
long validate_string(const GoString *src, long *p);
|
long validate_string(const GoString *src, long *p);
|
||||||
long validate_one(const GoString *src, long *p, StateMachine *m);
|
long validate_one(const GoString *src, long *p, StateMachine *m);
|
||||||
|
long validate_utf8(const GoString *src, long *p, StateMachine *m);
|
||||||
|
long validate_utf8_fast(const GoString *src);
|
||||||
|
|
||||||
long skip_one_fast(const GoString *src, long *p);
|
long skip_one_fast(const GoString *src, long *p);
|
||||||
long get_by_path(const GoString *src, long *p, const GoSlice *path);
|
long get_by_path(const GoString *src, long *p, const GoSlice *path);
|
||||||
|
|
|
||||||
|
|
@ -15,6 +15,7 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "native.h"
|
#include "native.h"
|
||||||
|
#include "utils.h"
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
|
||||||
/** String Quoting **/
|
/** String Quoting **/
|
||||||
|
|
@ -108,27 +109,6 @@ static const quoted_t _HtmlQuoteTab[256] = {
|
||||||
[0xa9] = { .n = 6, .s = "\\u2029" },
|
[0xa9] = { .n = 6, .s = "\\u2029" },
|
||||||
};
|
};
|
||||||
|
|
||||||
static inline void memcpy_p8(char *dp, const char *sp, ssize_t nb) {
|
|
||||||
if (nb >= 4) { *(uint32_t *)dp = *(const uint32_t *)sp; sp += 4, dp += 4, nb -= 4; }
|
|
||||||
if (nb >= 2) { *(uint16_t *)dp = *(const uint16_t *)sp; sp += 2, dp += 2, nb -= 2; }
|
|
||||||
if (nb >= 1) { *dp = *sp; }
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void memcpy_p16(char *dp, const char *sp, size_t nb) {
|
|
||||||
if (nb >= 8) { *(uint64_t *)dp = *(const uint64_t *)sp; sp += 8, dp += 8, nb -= 8; }
|
|
||||||
if (nb >= 4) { *(uint32_t *)dp = *(const uint32_t *)sp; sp += 4, dp += 4, nb -= 4; }
|
|
||||||
if (nb >= 2) { *(uint16_t *)dp = *(const uint16_t *)sp; sp += 2, dp += 2, nb -= 2; }
|
|
||||||
if (nb >= 1) { *dp = *sp; }
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void memcpy_p32(char *dp, const char *sp, size_t nb) {
|
|
||||||
if (nb >= 16) { _mm_storeu_si128((void *)dp, _mm_loadu_si128((const void *)sp)); sp += 16, dp += 16, nb -= 16; }
|
|
||||||
if (nb >= 8) { *(uint64_t *)dp = *(const uint64_t *)sp; sp += 8, dp += 8, nb -= 8; }
|
|
||||||
if (nb >= 4) { *(uint32_t *)dp = *(const uint32_t *)sp; sp += 4, dp += 4, nb -= 4; }
|
|
||||||
if (nb >= 2) { *(uint16_t *)dp = *(const uint16_t *)sp; sp += 2, dp += 2, nb -= 2; }
|
|
||||||
if (nb >= 1) { *dp = *sp; }
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline __m128i _mm_find_quote(__m128i vv) {
|
static inline __m128i _mm_find_quote(__m128i vv) {
|
||||||
__m128i e1 = _mm_cmpgt_epi8 (vv, _mm_set1_epi8(-1));
|
__m128i e1 = _mm_cmpgt_epi8 (vv, _mm_set1_epi8(-1));
|
||||||
__m128i e2 = _mm_cmpgt_epi8 (vv, _mm_set1_epi8(31));
|
__m128i e2 = _mm_cmpgt_epi8 (vv, _mm_set1_epi8(31));
|
||||||
|
|
|
||||||
|
|
@ -16,6 +16,7 @@
|
||||||
|
|
||||||
#include "native.h"
|
#include "native.h"
|
||||||
#include "utf8.h"
|
#include "utf8.h"
|
||||||
|
#include "utils.h"
|
||||||
|
|
||||||
static const uint64_t ODD_MASK = 0xaaaaaaaaaaaaaaaa;
|
static const uint64_t ODD_MASK = 0xaaaaaaaaaaaaaaaa;
|
||||||
static const uint64_t EVEN_MASK = 0x5555555555555555;
|
static const uint64_t EVEN_MASK = 0x5555555555555555;
|
||||||
|
|
@ -41,7 +42,7 @@ static inline uint64_t add32(uint64_t v1, uint64_t v2, uint64_t *vo) {
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline uint64_t add64(uint64_t v1, uint64_t v2, uint64_t *vo) {
|
static inline uint64_t add64(uint64_t v1, uint64_t v2, uint64_t *vo) {
|
||||||
uint64_t v;
|
unsigned long long v;
|
||||||
uint64_t c = __builtin_uaddll_overflow(v1, v2, &v);
|
uint64_t c = __builtin_uaddll_overflow(v1, v2, &v);
|
||||||
|
|
||||||
/* set the carry */
|
/* set the carry */
|
||||||
|
|
@ -107,7 +108,7 @@ static inline int64_t advance_dword(const GoString *src, long *p, long dec, int6
|
||||||
return ret;
|
return ret;
|
||||||
} else {
|
} else {
|
||||||
*p -= dec;
|
*p -= dec;
|
||||||
for (int i = 0; src->buf[*p] == (val & 0xff); i++, ++*p) { val >>= 8; }
|
for (int i = 0; src->buf[*p] == (val & 0xff) && i < 4; i++, ++*p) { val >>= 8; }
|
||||||
return -ERR_INVAL;
|
return -ERR_INVAL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -368,12 +369,11 @@ static inline int _mm_nonascii_mask(__m128i v) {
|
||||||
|
|
||||||
static inline ssize_t advance_string_validate(const GoString *src, long p, int64_t *ep) {
|
static inline ssize_t advance_string_validate(const GoString *src, long p, int64_t *ep) {
|
||||||
char ch;
|
char ch;
|
||||||
uint64_t m0, m1, m2, m3;
|
uint64_t m0, m1, m2;
|
||||||
uint64_t es, fe, os;
|
uint64_t es, fe, os;
|
||||||
uint64_t cr = 0;
|
uint64_t cr = 0;
|
||||||
long qp = 0;
|
long qp = 0;
|
||||||
long np = 0;
|
long np = 0;
|
||||||
long up = 0;
|
|
||||||
|
|
||||||
/* buffer pointers */
|
/* buffer pointers */
|
||||||
size_t nb = src->len;
|
size_t nb = src->len;
|
||||||
|
|
@ -406,7 +406,6 @@ static inline ssize_t advance_string_validate(const GoString *src, long p, int64
|
||||||
uint32_t s0, s1;
|
uint32_t s0, s1;
|
||||||
uint32_t t0, t1;
|
uint32_t t0, t1;
|
||||||
uint32_t c0, c1;
|
uint32_t c0, c1;
|
||||||
uint32_t u0, u1;
|
|
||||||
#else
|
#else
|
||||||
/* initialize vectors */
|
/* initialize vectors */
|
||||||
__m128i v0;
|
__m128i v0;
|
||||||
|
|
@ -420,7 +419,6 @@ static inline ssize_t advance_string_validate(const GoString *src, long p, int64
|
||||||
uint32_t s0, s1, s2, s3;
|
uint32_t s0, s1, s2, s3;
|
||||||
uint32_t t0, t1, t2, t3;
|
uint32_t t0, t1, t2, t3;
|
||||||
uint32_t c0, c1, c2, c3;
|
uint32_t c0, c1, c2, c3;
|
||||||
uint32_t u0, u1, u2, u3;
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define m0_mask(add) \
|
#define m0_mask(add) \
|
||||||
|
|
@ -430,7 +428,6 @@ static inline ssize_t advance_string_validate(const GoString *src, long p, int64
|
||||||
es = add(os, m1, &cr) << 1; \
|
es = add(os, m1, &cr) << 1; \
|
||||||
m0 &= ~(fe & (es ^ EVEN_MASK));
|
m0 &= ~(fe & (es ^ EVEN_MASK));
|
||||||
|
|
||||||
simd_advance:
|
|
||||||
/* 64-byte SIMD loop */
|
/* 64-byte SIMD loop */
|
||||||
while (likely(nb >= 64)) {
|
while (likely(nb >= 64)) {
|
||||||
#if USE_AVX2
|
#if USE_AVX2
|
||||||
|
|
@ -442,12 +439,9 @@ simd_advance:
|
||||||
t1 = _mm256_get_mask(v1, cx);
|
t1 = _mm256_get_mask(v1, cx);
|
||||||
c0 = _mm256_cchars_mask(v0);
|
c0 = _mm256_cchars_mask(v0);
|
||||||
c1 = _mm256_cchars_mask(v1);
|
c1 = _mm256_cchars_mask(v1);
|
||||||
u0 = _mm256_nonascii_mask(v0);
|
|
||||||
u1 = _mm256_nonascii_mask(v1);
|
|
||||||
m0 = ((uint64_t)s1 << 32) | (uint64_t)s0;
|
m0 = ((uint64_t)s1 << 32) | (uint64_t)s0;
|
||||||
m1 = ((uint64_t)t1 << 32) | (uint64_t)t0;
|
m1 = ((uint64_t)t1 << 32) | (uint64_t)t0;
|
||||||
m2 = ((uint64_t)c1 << 32) | (uint64_t)c0;
|
m2 = ((uint64_t)c1 << 32) | (uint64_t)c0;
|
||||||
m3 = ((uint64_t)u1 << 32) | (uint64_t)u0;
|
|
||||||
#else
|
#else
|
||||||
v0 = _mm_loadu_si128 ((const void *)(sp + 0));
|
v0 = _mm_loadu_si128 ((const void *)(sp + 0));
|
||||||
v1 = _mm_loadu_si128 ((const void *)(sp + 16));
|
v1 = _mm_loadu_si128 ((const void *)(sp + 16));
|
||||||
|
|
@ -465,14 +459,9 @@ simd_advance:
|
||||||
c1 = _mm_cchars_mask(v1);
|
c1 = _mm_cchars_mask(v1);
|
||||||
c2 = _mm_cchars_mask(v2);
|
c2 = _mm_cchars_mask(v2);
|
||||||
c3 = _mm_cchars_mask(v3);
|
c3 = _mm_cchars_mask(v3);
|
||||||
u0 = _mm_nonascii_mask(v0);
|
|
||||||
u1 = _mm_nonascii_mask(v1);
|
|
||||||
u2 = _mm_nonascii_mask(v2);
|
|
||||||
u3 = _mm_nonascii_mask(v3);
|
|
||||||
m0 = ((uint64_t)s3 << 48) | ((uint64_t)s2 << 32) | ((uint64_t)s1 << 16) | (uint64_t)s0;
|
m0 = ((uint64_t)s3 << 48) | ((uint64_t)s2 << 32) | ((uint64_t)s1 << 16) | (uint64_t)s0;
|
||||||
m1 = ((uint64_t)t3 << 48) | ((uint64_t)t2 << 32) | ((uint64_t)t1 << 16) | (uint64_t)t0;
|
m1 = ((uint64_t)t3 << 48) | ((uint64_t)t2 << 32) | ((uint64_t)t1 << 16) | (uint64_t)t0;
|
||||||
m2 = ((uint64_t)c3 << 48) | ((uint64_t)c2 << 32) | ((uint64_t)c1 << 16) | (uint64_t)c0;
|
m2 = ((uint64_t)c3 << 48) | ((uint64_t)c2 << 32) | ((uint64_t)c1 << 16) | (uint64_t)c0;
|
||||||
m3 = ((uint64_t)u3 << 48) | ((uint64_t)u2 << 32) | ((uint64_t)u1 << 16) | (uint64_t)u0;
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
@ -488,7 +477,6 @@ simd_advance:
|
||||||
|
|
||||||
qp = m0 ? __builtin_ctzll(m0) : 64;
|
qp = m0 ? __builtin_ctzll(m0) : 64;
|
||||||
np = m2 ? __builtin_ctzll(m2) : 64;
|
np = m2 ? __builtin_ctzll(m2) : 64;
|
||||||
up = m3 ? __builtin_ctzll(m3) : 64;
|
|
||||||
|
|
||||||
/* get the position of end quote */
|
/* get the position of end quote */
|
||||||
if (m0 != 0) {
|
if (m0 != 0) {
|
||||||
|
|
@ -498,9 +486,6 @@ simd_advance:
|
||||||
|
|
||||||
return -ERR_INVAL;
|
return -ERR_INVAL;
|
||||||
}
|
}
|
||||||
if (up < qp) {
|
|
||||||
goto valid_utf8;
|
|
||||||
}
|
|
||||||
return sp - ss + qp + 1;
|
return sp - ss + qp + 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -511,10 +496,6 @@ simd_advance:
|
||||||
return -ERR_INVAL;
|
return -ERR_INVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (unlikely(m3 != 0)) {
|
|
||||||
goto valid_utf8;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* move to the next block */
|
/* move to the next block */
|
||||||
sp += 64;
|
sp += 64;
|
||||||
nb -= 64;
|
nb -= 64;
|
||||||
|
|
@ -527,11 +508,9 @@ simd_advance:
|
||||||
s0 = _mm256_get_mask (v0, cq);
|
s0 = _mm256_get_mask (v0, cq);
|
||||||
t0 = _mm256_get_mask (v0, cx);
|
t0 = _mm256_get_mask (v0, cx);
|
||||||
c0 = _mm256_cchars_mask(v0);
|
c0 = _mm256_cchars_mask(v0);
|
||||||
u0 = _mm256_nonascii_mask(v0);
|
|
||||||
m0 = (uint64_t)s0;
|
m0 = (uint64_t)s0;
|
||||||
m1 = (uint64_t)t0;
|
m1 = (uint64_t)t0;
|
||||||
m2 = (uint64_t)c0;
|
m2 = (uint64_t)c0;
|
||||||
m3 = (uint64_t)u0;
|
|
||||||
#else
|
#else
|
||||||
v0 = _mm_loadu_si128 ((const void *)(sp + 0));
|
v0 = _mm_loadu_si128 ((const void *)(sp + 0));
|
||||||
v1 = _mm_loadu_si128 ((const void *)(sp + 16));
|
v1 = _mm_loadu_si128 ((const void *)(sp + 16));
|
||||||
|
|
@ -541,12 +520,9 @@ simd_advance:
|
||||||
t1 = _mm_get_mask(v1, cx);
|
t1 = _mm_get_mask(v1, cx);
|
||||||
c0 = _mm_cchars_mask(v0);
|
c0 = _mm_cchars_mask(v0);
|
||||||
c1 = _mm_cchars_mask(v1);
|
c1 = _mm_cchars_mask(v1);
|
||||||
u0 = _mm_nonascii_mask(v0);
|
|
||||||
u1 = _mm_nonascii_mask(v1);
|
|
||||||
m0 = ((uint64_t)s1 << 16) | (uint64_t)s0;
|
m0 = ((uint64_t)s1 << 16) | (uint64_t)s0;
|
||||||
m1 = ((uint64_t)t1 << 16) | (uint64_t)t0;
|
m1 = ((uint64_t)t1 << 16) | (uint64_t)t0;
|
||||||
m2 = ((uint64_t)c1 << 16) | (uint64_t)c0;
|
m2 = ((uint64_t)c1 << 16) | (uint64_t)c0;
|
||||||
m3 = ((uint64_t)u1 << 16) | (uint64_t)u0;
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/** update first quote position */
|
/** update first quote position */
|
||||||
|
|
@ -560,19 +536,14 @@ simd_advance:
|
||||||
}
|
}
|
||||||
|
|
||||||
qp = m0 ? __builtin_ctzll(m0) : 64;
|
qp = m0 ? __builtin_ctzll(m0) : 64;
|
||||||
up = m3 ? __builtin_ctzll(m3) : 64;
|
|
||||||
np = m2 ? __builtin_ctzll(m2) : 64;
|
np = m2 ? __builtin_ctzll(m2) : 64;
|
||||||
|
|
||||||
|
|
||||||
/* get the position of end quote */
|
/* get the position of end quote */
|
||||||
if (m0 != 0) {
|
if (m0 != 0) {
|
||||||
if (unlikely(np < qp)) {
|
if (unlikely(np < qp)) {
|
||||||
ep_seterr(sp - ss + np)
|
ep_seterr(sp - ss + np)
|
||||||
return -ERR_INVAL;
|
return -ERR_INVAL;
|
||||||
}
|
}
|
||||||
if (up < qp) {
|
|
||||||
goto valid_utf8;
|
|
||||||
}
|
|
||||||
return sp - ss + qp + 1;
|
return sp - ss + qp + 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -582,10 +553,6 @@ simd_advance:
|
||||||
return -ERR_INVAL;
|
return -ERR_INVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (m3 != 0) {
|
|
||||||
goto valid_utf8;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* move to the next block */
|
/* move to the next block */
|
||||||
sp += 32;
|
sp += 32;
|
||||||
nb -= 32;
|
nb -= 32;
|
||||||
|
|
@ -601,7 +568,6 @@ simd_advance:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
remain:
|
|
||||||
/* handle the remaining bytes with scalar code */
|
/* handle the remaining bytes with scalar code */
|
||||||
while (nb > 0) {
|
while (nb > 0) {
|
||||||
ch = *sp;
|
ch = *sp;
|
||||||
|
|
@ -626,43 +592,9 @@ remain:
|
||||||
return -ERR_INVAL;
|
return -ERR_INVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* valid utf8 chars */
|
|
||||||
if (ch & 0x80) {
|
|
||||||
uint32_t ubin = nb >= 4 ? *(uint32_t*)sp : less4byte_to_uint32(sp, nb);
|
|
||||||
if ((up = valid_utf8_4byte(ubin))) {
|
|
||||||
sp += up, nb -= up;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
ep_seterr(sp - ss)
|
|
||||||
return -ERR_INVAL;
|
|
||||||
}
|
|
||||||
|
|
||||||
sp++, nb--;
|
sp++, nb--;
|
||||||
}
|
}
|
||||||
return -ERR_EOF;
|
return -ERR_EOF;
|
||||||
|
|
||||||
valid_utf8:
|
|
||||||
sp += up, nb -= up;
|
|
||||||
while (likely(nb >= 4)) {
|
|
||||||
up = valid_utf8_4byte(*(uint32_t*)sp);
|
|
||||||
if (unlikely(up == 0)) {
|
|
||||||
ep_seterr(sp - ss)
|
|
||||||
return -ERR_INVAL;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* check continous utf-8 */
|
|
||||||
sp += up, nb -= up;
|
|
||||||
if (nb > 0 && (*(uint8_t*)sp & 0x80)) {
|
|
||||||
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* clear the last carried bit */
|
|
||||||
cr = 0;
|
|
||||||
goto simd_advance;
|
|
||||||
}
|
|
||||||
goto remain;
|
|
||||||
|
|
||||||
#undef ep_init
|
#undef ep_init
|
||||||
#undef ep_setc
|
#undef ep_setc
|
||||||
#undef ep_setx
|
#undef ep_setx
|
||||||
|
|
@ -1640,24 +1572,6 @@ static always_inline long skip_number_fast(const GoString *src, long *p) {
|
||||||
return vi;
|
return vi;
|
||||||
}
|
}
|
||||||
|
|
||||||
static always_inline void memcpy_p64(char * restrict dp, const char * restrict sp, size_t n) {
|
|
||||||
long nb = n;
|
|
||||||
#if USE_AVX2
|
|
||||||
if (nb >= 32) { _mm256_storeu_si256((void *)dp, _mm256_loadu_si256((const void *)sp)); sp += 32, dp += 32, nb -= 32; }
|
|
||||||
#endif
|
|
||||||
while (nb >= 16) { _mm_storeu_si128((void *)dp, _mm_loadu_si128((const void *)sp)); sp += 16, dp += 16, nb -= 16; }
|
|
||||||
if (nb >= 8) { *(uint64_t *)dp = *(const uint64_t *)sp; sp += 8, dp += 8, nb -= 8; }
|
|
||||||
if (nb >= 4) { *(uint32_t *)dp = *(const uint32_t *)sp; sp += 4, dp += 4, nb -= 4; }
|
|
||||||
if (nb >= 2) { *(uint16_t *)dp = *(const uint16_t *)sp; sp += 2, dp += 2, nb -= 2; }
|
|
||||||
if (nb >= 1) { *dp = *sp; }
|
|
||||||
}
|
|
||||||
|
|
||||||
static always_inline bool vec_cross_page(const void * p, size_t n) {
|
|
||||||
#define PAGE_SIZE 4096
|
|
||||||
return (((size_t)(p)) & (PAGE_SIZE - 1)) > (PAGE_SIZE - n);
|
|
||||||
#undef PAGE_SIZE
|
|
||||||
}
|
|
||||||
|
|
||||||
static always_inline long skip_container_fast(const GoString *src, long *p, char lc, char rc) {
|
static always_inline long skip_container_fast(const GoString *src, long *p, char lc, char rc) {
|
||||||
long nb = src->len - *p;
|
long nb = src->len - *p;
|
||||||
const char *s = src->buf + *p;
|
const char *s = src->buf + *p;
|
||||||
|
|
@ -1955,4 +1869,21 @@ skip_in_arr:
|
||||||
err_inval:
|
err_inval:
|
||||||
*p -= 1; // backward error position
|
*p -= 1; // backward error position
|
||||||
return -ERR_INVAL;
|
return -ERR_INVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
long validate_utf8(const GoString *src, long *p, StateMachine *m) {
|
||||||
|
xassert(*p >= 0 && src->len > *p);
|
||||||
|
return validate_utf8_with_errors(src->buf, src->len, p, m);
|
||||||
|
}
|
||||||
|
|
||||||
|
// validate_utf8_fast returns zero if valid, otherwise, the error position.
|
||||||
|
long validate_utf8_fast(const GoString *s) {
|
||||||
|
#if USE_AVX2
|
||||||
|
/* fast path for valid utf8 */
|
||||||
|
if (validate_utf8_avx2(s) == 0) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
return validate_utf8_errors(s);
|
||||||
}
|
}
|
||||||
|
|
@ -17,6 +17,7 @@
|
||||||
#ifndef XASSERT_H
|
#ifndef XASSERT_H
|
||||||
#define XASSERT_H
|
#define XASSERT_H
|
||||||
|
|
||||||
|
|
||||||
#ifndef DEBUG
|
#ifndef DEBUG
|
||||||
#define xassert(expr) ((void)0)
|
#define xassert(expr) ((void)0)
|
||||||
#else
|
#else
|
||||||
|
|
|
||||||
|
|
@ -14,11 +14,23 @@
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef XPRINTF_H
|
#pragma once
|
||||||
#define XPRINTF_H
|
|
||||||
|
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
|
|
||||||
|
#ifdef LOG_LEVEL
|
||||||
|
#define DEBUG
|
||||||
|
#define LOG_TRACE(_VA_ARGS__...) do { if (LOG_LEVEL >= 0) xprintf(_VA_ARGS__ ); } while (0)
|
||||||
|
#define LOG_DEBUG(_VA_ARGS__...) do { if (LOG_LEVEL >= 1) xprintf(_VA_ARGS__ ); } while (0)
|
||||||
|
#define LOG_INFO(_VA_ARGS__...) do { if (LOG_LEVEL >= 2) xprintf(_VA_ARGS__ ); } while (0)
|
||||||
|
#else
|
||||||
|
#define LOG_TRACE(_VA_ARGS__...) ((void)0)
|
||||||
|
#define LOG_DEBUG(_VA_ARGS__...) ((void)0)
|
||||||
|
#define LOG_INFO(_VA_ARGS__...) ((void)0)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Note: this code is on cross-compile, so we can't use System-specific Predefined Macros here.
|
||||||
|
#if USE_APPLE
|
||||||
static inline void __attribute__((naked)) write_syscall(const char *s, size_t n)
|
static inline void __attribute__((naked)) write_syscall(const char *s, size_t n)
|
||||||
{
|
{
|
||||||
asm volatile(
|
asm volatile(
|
||||||
|
|
@ -35,6 +47,24 @@ static inline void __attribute__((naked)) write_syscall(const char *s, size_t n)
|
||||||
"retq"
|
"retq"
|
||||||
"\n");
|
"\n");
|
||||||
}
|
}
|
||||||
|
#else
|
||||||
|
static inline void __attribute__((naked)) write_syscall(const char *s, size_t n)
|
||||||
|
{
|
||||||
|
asm volatile(
|
||||||
|
"movq %rsi, %rdx"
|
||||||
|
"\n"
|
||||||
|
"movq %rdi, %rsi"
|
||||||
|
"\n"
|
||||||
|
"movq $1, %rdi"
|
||||||
|
"\n"
|
||||||
|
"movq $1, %rax"
|
||||||
|
"\n"
|
||||||
|
"syscall"
|
||||||
|
"\n"
|
||||||
|
"retq"
|
||||||
|
"\n");
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
static inline void printch(const char ch)
|
static inline void printch(const char ch)
|
||||||
{
|
{
|
||||||
|
|
@ -115,7 +145,7 @@ static inline void printhex(uintptr_t v)
|
||||||
printstr(p);
|
printstr(p);
|
||||||
}
|
}
|
||||||
|
|
||||||
#define MAX_BUF_LEN 100
|
#define MAX_BUF_LEN 1000
|
||||||
|
|
||||||
static inline void printbytes(GoSlice *s)
|
static inline void printbytes(GoSlice *s)
|
||||||
{
|
{
|
||||||
|
|
@ -150,9 +180,8 @@ static inline void printgostr(GoString *s)
|
||||||
printch('"');
|
printch('"');
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void xprintf(const char *fmt, ...)
|
static inline void do_xprintf(const char *fmt, ...)
|
||||||
{
|
{
|
||||||
#ifdef DEBUG
|
|
||||||
__builtin_va_list va;
|
__builtin_va_list va;
|
||||||
char buf[256] = {};
|
char buf[256] = {};
|
||||||
char *p = buf;
|
char *p = buf;
|
||||||
|
|
@ -227,7 +256,26 @@ static inline void xprintf(const char *fmt, ...)
|
||||||
*p = 0;
|
*p = 0;
|
||||||
printstr(buf);
|
printstr(buf);
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif // XPRINTF_H
|
#ifdef DEBUG
|
||||||
|
#define xprintf(_VA_ARGS__...) do_xprintf(_VA_ARGS__)
|
||||||
|
#else
|
||||||
|
#define xprintf(_VA_ARGS__...) ((void)0)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static always_inline void print_longhex(const void *input, const char* s, int bytes) {
|
||||||
|
const uint8_t* p = (const uint8_t*)(input);
|
||||||
|
xprintf("%s : ", s);
|
||||||
|
for (int i = 0; i < bytes; i++) {
|
||||||
|
uintptr_t u = p[i];
|
||||||
|
if (u < 0x10) xprintf("0");
|
||||||
|
xprintf("%x", u);
|
||||||
|
if ((i + 1) < bytes && (i + 1) % 4 == 0) {
|
||||||
|
xprintf("-");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
xprintf("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
#define psimd(simd) print_longhex((const void *)(simd), #simd, sizeof(*simd))
|
||||||
|
|
@ -2,7 +2,7 @@
|
||||||
#ifndef TYPES_H
|
#ifndef TYPES_H
|
||||||
#define TYPES_H
|
#define TYPES_H
|
||||||
|
|
||||||
// !NOT MODIFIED ONLY.
|
// NOTE: !NOT MODIFIED ONLY.
|
||||||
// This definitions are copied from internal/native/types/types.go.
|
// This definitions are copied from internal/native/types/types.go.
|
||||||
|
|
||||||
#define V_EOF 1
|
#define V_EOF 1
|
||||||
|
|
@ -34,6 +34,8 @@
|
||||||
#define ERR_NUMBER_FMT 6
|
#define ERR_NUMBER_FMT 6
|
||||||
#define ERR_RECURSE_MAX 7
|
#define ERR_RECURSE_MAX 7
|
||||||
#define ERR_FLOAT_INF 8
|
#define ERR_FLOAT_INF 8
|
||||||
|
#define ERR_MISMATCH 9
|
||||||
|
#define ERR_INVAL_UTF8 10
|
||||||
|
|
||||||
#define MAX_RECURSE 4096
|
#define MAX_RECURSE 4096
|
||||||
|
|
||||||
|
|
|
||||||
396
native/utf8.h
396
native/utf8.h
|
|
@ -1,5 +1,3 @@
|
||||||
#ifndef UTF8_H
|
|
||||||
#define UTF8_H
|
|
||||||
/*
|
/*
|
||||||
* Copyright (C) 2019 Yaoyuan <ibireme@gmail.com>.
|
* Copyright (C) 2019 Yaoyuan <ibireme@gmail.com>.
|
||||||
*
|
*
|
||||||
|
|
@ -15,10 +13,31 @@
|
||||||
* See the License for the specific language governing permissions and
|
* See the License for the specific language governing permissions and
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*
|
*
|
||||||
|
* Copyright 2018-2023 The simdjson authors
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*
|
||||||
* This file may have been modified by ByteDance authors. All ByteDance
|
* This file may have been modified by ByteDance authors. All ByteDance
|
||||||
* Modifications are Copyright 2022 ByteDance Authors.
|
* Modifications are Copyright 2022 ByteDance Authors.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "native.h"
|
||||||
|
#include "utils.h"
|
||||||
|
#include "test/xassert.h"
|
||||||
|
#include "test/xprintf.h"
|
||||||
|
|
||||||
static inline ssize_t valid_utf8_4byte(uint32_t ubin) {
|
static inline ssize_t valid_utf8_4byte(uint32_t ubin) {
|
||||||
/*
|
/*
|
||||||
Each unicode code point is encoded as 1 to 4 bytes in UTF-8 encoding,
|
Each unicode code point is encoded as 1 to 4 bytes in UTF-8 encoding,
|
||||||
|
|
@ -104,12 +123,371 @@ static inline ssize_t valid_utf8_4byte(uint32_t ubin) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline uint32_t less4byte_to_uint32(const char* sp, size_t nb) {
|
static always_inline long write_error(int pos, StateMachine *m, size_t msize) {
|
||||||
if (nb == 1) return *(uint8_t*)sp;
|
if (m->sp >= msize) {
|
||||||
if (nb == 2) return *(uint16_t*)sp;
|
return -1;
|
||||||
uint32_t hi_1 = (*(uint8_t*)(sp + 2));
|
}
|
||||||
uint32_t lo_2 = *(uint16_t*)(sp);
|
m->vt[m->sp++] = pos;
|
||||||
return hi_1 << 16 | lo_2;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
// scalar code, error position should excesss 4096
|
||||||
|
static always_inline long validate_utf8_with_errors(const char *src, long len, long *p, StateMachine *m) {
|
||||||
|
const char* start = src + *p;
|
||||||
|
const char* end = src + len;
|
||||||
|
while (start < end - 3) {
|
||||||
|
uint32_t u = (*(uint32_t*)(start));
|
||||||
|
if ((unsigned)(*start) < 0x80) {
|
||||||
|
start += 1;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
size_t n = valid_utf8_4byte(u);
|
||||||
|
if (n != 0) { // valid utf
|
||||||
|
start += n;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
long err = write_error(start - src, m, MAX_RECURSE);
|
||||||
|
if (err) {
|
||||||
|
*p = start - src;
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
start += 1;
|
||||||
|
}
|
||||||
|
while (start < end) {
|
||||||
|
if ((unsigned)(*start) < 0x80) {
|
||||||
|
start += 1;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
uint32_t u = 0;
|
||||||
|
memcpy_p4(&u, start, end - start);
|
||||||
|
size_t n = valid_utf8_4byte(u);
|
||||||
|
if (n != 0) { // valid utf
|
||||||
|
start += n;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
long err = write_error(start - src, m, MAX_RECURSE);
|
||||||
|
if (err) {
|
||||||
|
*p = start - src;
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
start += 1;
|
||||||
|
}
|
||||||
|
*p = start - src;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// validate_utf8_errors returns zero if valid, otherwise, the error position.
|
||||||
|
static always_inline long validate_utf8_errors(const GoString* s) {
|
||||||
|
const char* start = s->buf;
|
||||||
|
const char* end = s->buf + s->len;
|
||||||
|
while (start < end - 3) {
|
||||||
|
uint32_t u = (*(uint32_t*)(start));
|
||||||
|
if ((unsigned)(*start) < 0x80) {
|
||||||
|
start += 1;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
size_t n = valid_utf8_4byte(u);
|
||||||
|
if (n == 0) { // invalid utf
|
||||||
|
return -(start - s->buf) - 1;
|
||||||
|
}
|
||||||
|
start += n;
|
||||||
|
}
|
||||||
|
while (start < end) {
|
||||||
|
if ((unsigned)(*start) < 0x80) {
|
||||||
|
start += 1;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
uint32_t u = 0;
|
||||||
|
memcpy_p4(&u, start, end - start);
|
||||||
|
size_t n = valid_utf8_4byte(u);
|
||||||
|
if (n == 0) { // invalid utf
|
||||||
|
return -(start - s->buf) - 1;
|
||||||
|
}
|
||||||
|
start += n;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// SIMD implementation
|
||||||
|
#if USE_AVX2
|
||||||
|
|
||||||
|
static always_inline __m256i simd256_shr(const __m256i input, const int shift) {
|
||||||
|
__m256i shifted = _mm256_srli_epi16(input, shift);
|
||||||
|
__m256i mask = _mm256_set1_epi8(0xFFu >> shift);
|
||||||
|
return _mm256_and_si256(shifted, mask);
|
||||||
|
}
|
||||||
|
|
||||||
|
#define simd256_prev(input, prev, N) _mm256_alignr_epi8(input, _mm256_permute2x128_si256(prev, input, 0x21), 16 - (N));
|
||||||
|
|
||||||
|
static always_inline __m256i must_be_2_3_continuation(const __m256i prev2, const __m256i prev3) {
|
||||||
|
__m256i is_third_byte = _mm256_subs_epu8(prev2, _mm256_set1_epi8(0b11100000u-1)); // Only 111_____ will be > 0
|
||||||
|
__m256i is_fourth_byte = _mm256_subs_epu8(prev3, _mm256_set1_epi8(0b11110000u-1)); // Only 1111____ will be > 0
|
||||||
|
// Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine.
|
||||||
|
__m256i or = _mm256_or_si256(is_third_byte, is_fourth_byte);
|
||||||
|
return _mm256_cmpgt_epi8(or, _mm256_set1_epi8(0));;
|
||||||
|
}
|
||||||
|
|
||||||
|
static always_inline __m256i simd256_lookup16(const __m256i input, const uint8_t* table) {
|
||||||
|
return _mm256_shuffle_epi8(_mm256_setr_epi8(table[0], table[1], table[2], table[3], table[4], table[5], table[6], table[7], table[8], table[9], table[10], table[11], table[12], table[13], table[14], table[15], table[0], table[1], table[2], table[3], table[4], table[5], table[6], table[7], table[8], table[9], table[10], table[11], table[12], table[13], table[14], table[15]), input);
|
||||||
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
// Return nonzero if there are incomplete multibyte characters at the end of the block:
|
||||||
|
// e.g. if there is a 4-byte character, but it's 3 bytes from the end.
|
||||||
|
//
|
||||||
|
static always_inline __m256i is_incomplete(const __m256i input) {
|
||||||
|
// If the previous input's last 3 bytes match this, they're too short (they ended at EOF):
|
||||||
|
// ... 1111____ 111_____ 11______
|
||||||
|
const uint8_t tab[32] = {
|
||||||
|
255, 255, 255, 255, 255, 255, 255, 255,
|
||||||
|
255, 255, 255, 255, 255, 255, 255, 255,
|
||||||
|
255, 255, 255, 255, 255, 255, 255, 255,
|
||||||
|
255, 255, 255, 255, 255, 0b11110000u-1, 0b11100000u-1, 0b11000000u-1};
|
||||||
|
const __m256i max_value = _mm256_loadu_si256((const __m256i_u *)(&tab[0]));
|
||||||
|
return _mm256_subs_epu8(input, max_value);
|
||||||
|
}
|
||||||
|
|
||||||
|
static always_inline __m256i check_special_cases(const __m256i input, const __m256i prev1) {
|
||||||
|
// Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII)
|
||||||
|
// Bit 1 = Too Long (ASCII followed by continuation)
|
||||||
|
// Bit 2 = Overlong 3-byte
|
||||||
|
// Bit 4 = Surrogate
|
||||||
|
// Bit 5 = Overlong 2-byte
|
||||||
|
// Bit 7 = Two Continuations
|
||||||
|
const uint8_t TOO_SHORT = 1<<0; // 11______ 0_______
|
||||||
|
// 11______ 11______
|
||||||
|
const uint8_t TOO_LONG = 1<<1; // 0_______ 10______
|
||||||
|
const uint8_t OVERLONG_3 = 1<<2; // 11100000 100_____
|
||||||
|
const uint8_t SURROGATE = 1<<4; // 11101101 101_____
|
||||||
|
const uint8_t OVERLONG_2 = 1<<5; // 1100000_ 10______
|
||||||
|
const uint8_t TWO_CONTS = 1<<7; // 10______ 10______
|
||||||
|
const uint8_t TOO_LARGE = 1<<3; // 11110100 1001____
|
||||||
|
// 11110100 101_____
|
||||||
|
// 11110101 1001____
|
||||||
|
// 11110101 101_____
|
||||||
|
// 1111011_ 1001____
|
||||||
|
// 1111011_ 101_____
|
||||||
|
// 11111___ 1001____
|
||||||
|
// 11111___ 101_____
|
||||||
|
const uint8_t TOO_LARGE_1000 = 1<<6;
|
||||||
|
// 11110101 1000____
|
||||||
|
// 1111011_ 1000____
|
||||||
|
// 11111___ 1000____
|
||||||
|
const uint8_t OVERLONG_4 = 1<<6; // 11110000 1000____
|
||||||
|
|
||||||
|
const __m256i prev1_shr4 = simd256_shr(prev1, 4);
|
||||||
|
static const uint8_t tab1[16] = {
|
||||||
|
// 0_______ ________ <ASCII in byte 1>
|
||||||
|
TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG,
|
||||||
|
TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG,
|
||||||
|
// 10______ ________ <continuation in byte 1>
|
||||||
|
TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS,
|
||||||
|
// 1100____ ________ <two byte lead in byte 1>
|
||||||
|
TOO_SHORT | OVERLONG_2,
|
||||||
|
// 1101____ ________ <two byte lead in byte 1>
|
||||||
|
TOO_SHORT,
|
||||||
|
// 1110____ ________ <three byte lead in byte 1>
|
||||||
|
TOO_SHORT | OVERLONG_3 | SURROGATE,
|
||||||
|
// 1111____ ________ <four+ byte lead in byte 1>
|
||||||
|
TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4,
|
||||||
|
};
|
||||||
|
__m256i byte_1_high = simd256_lookup16(prev1_shr4, tab1);
|
||||||
|
|
||||||
|
|
||||||
|
const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 .
|
||||||
|
__m256i prev1_low = _mm256_and_si256(prev1, _mm256_set1_epi8(0x0F));
|
||||||
|
static const uint8_t tab2[16] = {
|
||||||
|
// ____0000 ________
|
||||||
|
CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4,
|
||||||
|
// ____0001 ________
|
||||||
|
CARRY | OVERLONG_2,
|
||||||
|
// ____001_ ________
|
||||||
|
CARRY,
|
||||||
|
CARRY,
|
||||||
|
|
||||||
|
// ____0100 ________
|
||||||
|
CARRY | TOO_LARGE,
|
||||||
|
// ____0101 ________
|
||||||
|
CARRY | TOO_LARGE | TOO_LARGE_1000,
|
||||||
|
// ____011_ ________
|
||||||
|
CARRY | TOO_LARGE | TOO_LARGE_1000,
|
||||||
|
CARRY | TOO_LARGE | TOO_LARGE_1000,
|
||||||
|
|
||||||
|
// ____1___ ________
|
||||||
|
CARRY | TOO_LARGE | TOO_LARGE_1000,
|
||||||
|
CARRY | TOO_LARGE | TOO_LARGE_1000,
|
||||||
|
CARRY | TOO_LARGE | TOO_LARGE_1000,
|
||||||
|
CARRY | TOO_LARGE | TOO_LARGE_1000,
|
||||||
|
CARRY | TOO_LARGE | TOO_LARGE_1000,
|
||||||
|
// ____1101 ________
|
||||||
|
CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE,
|
||||||
|
CARRY | TOO_LARGE | TOO_LARGE_1000,
|
||||||
|
CARRY | TOO_LARGE | TOO_LARGE_1000
|
||||||
|
};
|
||||||
|
__m256i byte_1_low = simd256_lookup16(prev1_low, tab2);
|
||||||
|
|
||||||
|
|
||||||
|
const __m256i input_shr4 = simd256_shr(input, 4);
|
||||||
|
static const uint8_t tab3[16] = {
|
||||||
|
// ________ 0_______ <ASCII in byte 2>
|
||||||
|
TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT,
|
||||||
|
TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT,
|
||||||
|
|
||||||
|
// ________ 1000____
|
||||||
|
TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | OVERLONG_4,
|
||||||
|
// ________ 1001____
|
||||||
|
TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE,
|
||||||
|
// ________ 101_____
|
||||||
|
TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE,
|
||||||
|
TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE,
|
||||||
|
|
||||||
|
// ________ 11______
|
||||||
|
TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT
|
||||||
|
};
|
||||||
|
__m256i byte_2_high = simd256_lookup16(input_shr4, tab3);
|
||||||
|
|
||||||
|
|
||||||
|
return _mm256_and_si256(_mm256_and_si256(byte_1_high, byte_1_low), byte_2_high);
|
||||||
|
}
|
||||||
|
|
||||||
|
static always_inline __m256i check_multibyte_lengths(const __m256i input, const __m256i prev_input, const __m256i sc) {
|
||||||
|
__m256i prev2 = simd256_prev(input, prev_input, 2);
|
||||||
|
__m256i prev3 = simd256_prev(input, prev_input, 3);
|
||||||
|
|
||||||
|
|
||||||
|
__m256i must23 = must_be_2_3_continuation(prev2, prev3);
|
||||||
|
|
||||||
|
__m256i must23_80 = _mm256_and_si256(must23, _mm256_set1_epi8(0x80));
|
||||||
|
|
||||||
|
return _mm256_xor_si256(must23_80, sc);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Check whether the current bytes are valid UTF-8.
|
||||||
|
static always_inline __m256i check_utf8_bytes(const __m256i input, const __m256i prev_input) {
|
||||||
|
// Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes
|
||||||
|
// (2, 3, 4-byte leads become large positive numbers instead of small negative numbers)
|
||||||
|
__m256i prev1 = simd256_prev(input, prev_input, 1);
|
||||||
|
__m256i sc = check_special_cases(input, prev1);
|
||||||
|
__m256i ret = check_multibyte_lengths(input, prev_input, sc);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static always_inline bool is_ascii(const __m256i input) {
|
||||||
|
return _mm256_movemask_epi8(input) == 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
// If this is nonzero, there has been a UTF-8 error.
|
||||||
|
__m256i error;
|
||||||
|
// The last input we received
|
||||||
|
__m256i prev_input_block;
|
||||||
|
// Whether the last input we received was incomplete (used for ASCII fast path)
|
||||||
|
__m256i prev_incomplete;
|
||||||
|
} utf8_checker;
|
||||||
|
|
||||||
|
static always_inline void utf8_checker_init(utf8_checker* checker) {
|
||||||
|
checker->error = _mm256_setzero_si256();
|
||||||
|
checker->prev_input_block = _mm256_setzero_si256();
|
||||||
|
checker->prev_incomplete = _mm256_setzero_si256();
|
||||||
|
}
|
||||||
|
|
||||||
|
static always_inline bool check_error(utf8_checker* checker) {
|
||||||
|
return !_mm256_testz_si256(checker->error, checker->error);
|
||||||
|
}
|
||||||
|
|
||||||
|
static always_inline void check64_utf(utf8_checker* checker, const uint8_t* start) {
|
||||||
|
__m256i input = _mm256_loadu_si256((__m256i*)start);
|
||||||
|
__m256i input2 = _mm256_loadu_si256((__m256i*)(start + 32));
|
||||||
|
// check utf-8 chars
|
||||||
|
__m256i error1 = check_utf8_bytes(input, checker->prev_input_block);
|
||||||
|
__m256i error2 = check_utf8_bytes(input2, input);
|
||||||
|
checker->error = _mm256_or_si256(checker->error, _mm256_or_si256(error1, error2));
|
||||||
|
checker->prev_input_block = input2;
|
||||||
|
checker->prev_incomplete = is_incomplete(input2);
|
||||||
|
}
|
||||||
|
|
||||||
|
static always_inline void check64(utf8_checker* checker, const uint8_t* start) {
|
||||||
|
// fast path for contiguous ASCII
|
||||||
|
__m256i input = _mm256_loadu_si256((__m256i*)start);
|
||||||
|
__m256i input2 = _mm256_loadu_si256((__m256i*)(start + 32));
|
||||||
|
__m256i reducer = _mm256_or_si256(input, input2);
|
||||||
|
// check utf-8
|
||||||
|
if (likely(is_ascii(reducer))) {
|
||||||
|
checker->error = _mm256_or_si256(checker->error, checker->prev_incomplete);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
check64_utf(checker, start);
|
||||||
|
}
|
||||||
|
|
||||||
|
static always_inline void check128(utf8_checker* checker, const uint8_t* start) {
|
||||||
|
// fast path for contiguous ASCII
|
||||||
|
__m256i input = _mm256_loadu_si256((__m256i*)start);
|
||||||
|
__m256i input2 = _mm256_loadu_si256((__m256i*)(start + 32));
|
||||||
|
__m256i input3 = _mm256_loadu_si256((__m256i*)(start + 64));
|
||||||
|
__m256i input4 = _mm256_loadu_si256((__m256i*)(start + 96));
|
||||||
|
|
||||||
|
__m256i reducer1 = _mm256_or_si256(input, input2);
|
||||||
|
__m256i reducer2 = _mm256_or_si256(input3, input4);
|
||||||
|
__m256i reducer = _mm256_or_si256(reducer1, reducer2);
|
||||||
|
|
||||||
|
// full 128 bytes are ascii
|
||||||
|
if (likely(is_ascii(reducer))) {
|
||||||
|
checker->error = _mm256_or_si256(checker->error, checker->prev_incomplete);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// frist 64 bytes is ascii, next 64 bytes must be utf8
|
||||||
|
if (likely(is_ascii(reducer1))) {
|
||||||
|
checker->error = _mm256_or_si256(checker->error, checker->prev_incomplete);
|
||||||
|
check64_utf(checker, start + 64);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// frist 64 bytes has utf8, next 64 bytes
|
||||||
|
check64_utf(checker, start);
|
||||||
|
if (unlikely(is_ascii(reducer2))) {
|
||||||
|
checker->error = _mm256_or_si256(checker->error, checker->prev_incomplete);
|
||||||
|
} else {
|
||||||
|
check64_utf(checker, start + 64);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static always_inline void check_eof(utf8_checker* checker) {
|
||||||
|
checker->error = _mm256_or_si256(checker->error, checker->prev_incomplete);
|
||||||
|
}
|
||||||
|
|
||||||
|
static always_inline void check_remain(utf8_checker* checker, const uint8_t* start, const uint8_t* end) {
|
||||||
|
uint8_t buffer[64] = {0};
|
||||||
|
int i = 0;
|
||||||
|
while (start < end) {
|
||||||
|
buffer[i++] = *(start++);
|
||||||
|
};
|
||||||
|
check64(checker, buffer);
|
||||||
|
check_eof(checker);
|
||||||
|
}
|
||||||
|
|
||||||
|
static always_inline long validate_utf8_avx2(const GoString* s) {
|
||||||
|
xassert(s->buf != NULL || s->len != 0);
|
||||||
|
const uint8_t* start = (const uint8_t*)(s->buf);
|
||||||
|
const uint8_t* end = (const uint8_t*)(s->buf + s->len);
|
||||||
|
/* check eof */
|
||||||
|
if (s->len == 0) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
utf8_checker checker;
|
||||||
|
utf8_checker_init(&checker);
|
||||||
|
while (start < (end - 128)) {
|
||||||
|
check128(&checker, start);
|
||||||
|
if (check_error(&checker)) {
|
||||||
|
}
|
||||||
|
start += 128;
|
||||||
|
};
|
||||||
|
while (start < end - 64) {
|
||||||
|
check64(&checker, start);
|
||||||
|
start += 64;
|
||||||
|
}
|
||||||
|
check_remain(&checker, start, end);
|
||||||
|
return check_error(&checker) ? -1 : 0;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
|
||||||
78
native/utils.h
Normal file
78
native/utils.h
Normal file
|
|
@ -0,0 +1,78 @@
|
||||||
|
/*
|
||||||
|
* Copyright 2022 ByteDance Inc.
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <immintrin.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include "native.h"
|
||||||
|
|
||||||
|
static always_inline bool vec_cross_page(const void * p, size_t n) {
|
||||||
|
#define PAGE_SIZE 4096
|
||||||
|
return (((size_t)(p)) & (PAGE_SIZE - 1)) > (PAGE_SIZE - n);
|
||||||
|
#undef PAGE_SIZE
|
||||||
|
}
|
||||||
|
|
||||||
|
static always_inline void memcpy4 (void *__restrict dp, const void *__restrict sp) {
|
||||||
|
((uint32_t *)dp)[0] = ((const uint32_t *)sp)[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
static always_inline void memcpy8 (void *__restrict dp, const void *__restrict sp) {
|
||||||
|
((uint64_t *)dp)[0] = ((const uint64_t *)sp)[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
static always_inline void memcpy16 (void *__restrict dp, const void *__restrict sp) {
|
||||||
|
_mm_storeu_si128((void *)(dp), _mm_loadu_si128((const void *)(sp)));
|
||||||
|
}
|
||||||
|
|
||||||
|
static always_inline void memcpy32(void *__restrict dp, const void *__restrict sp) {
|
||||||
|
#if USE_AVX2
|
||||||
|
_mm256_storeu_si256((void *)dp, _mm256_loadu_si256((const void *)sp));
|
||||||
|
#else
|
||||||
|
_mm_storeu_si128((void *)(dp), _mm_loadu_si128((const void *)(sp)));
|
||||||
|
_mm_storeu_si128((void *)(dp + 16), _mm_loadu_si128((const void *)(sp + 16)));
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
static always_inline void memcpy64(void *__restrict dp, const void *__restrict sp) {
|
||||||
|
memcpy32(dp, sp);
|
||||||
|
memcpy32(dp + 32, sp + 32);
|
||||||
|
}
|
||||||
|
|
||||||
|
static always_inline void memcpy_p4(void *__restrict dp, const void *__restrict sp, size_t nb) {
|
||||||
|
if (nb >= 2) { *(uint16_t *)dp = *(const uint16_t *)sp; sp += 2, dp += 2, nb -= 2; }
|
||||||
|
if (nb >= 1) { *(uint8_t *) dp = *(const uint8_t *)sp; }
|
||||||
|
}
|
||||||
|
|
||||||
|
static always_inline void memcpy_p8(void *__restrict dp, const void *__restrict sp, ssize_t nb) {
|
||||||
|
if (nb >= 4) { memcpy4(dp, sp); sp += 4, dp += 4, nb -= 4; }
|
||||||
|
memcpy_p4(dp, sp, nb);
|
||||||
|
}
|
||||||
|
|
||||||
|
static always_inline void memcpy_p16(void *__restrict dp, const void *__restrict sp, size_t nb) {
|
||||||
|
if (nb >= 8) { memcpy8(dp, sp); sp += 8, dp += 8, nb -= 8; }
|
||||||
|
memcpy_p8(dp, sp, nb);
|
||||||
|
}
|
||||||
|
|
||||||
|
static always_inline void memcpy_p32(void *__restrict dp, const void *__restrict sp, size_t nb) {
|
||||||
|
if (nb >= 16) { memcpy16(dp, sp); sp += 16, dp += 16, nb -= 16; }
|
||||||
|
memcpy_p16(dp, sp, nb);
|
||||||
|
}
|
||||||
|
|
||||||
|
static always_inline void memcpy_p64(void *__restrict dp, const void *__restrict sp, size_t nb) {
|
||||||
|
if (nb >= 32) { memcpy32(dp, sp); sp += 32, dp += 32, nb -= 32; }
|
||||||
|
memcpy_p32(dp, sp, nb);
|
||||||
|
}
|
||||||
29
sonic.go
29
sonic.go
|
|
@ -26,31 +26,9 @@ import (
|
||||||
`github.com/bytedance/sonic/decoder`
|
`github.com/bytedance/sonic/decoder`
|
||||||
`github.com/bytedance/sonic/encoder`
|
`github.com/bytedance/sonic/encoder`
|
||||||
`github.com/bytedance/sonic/option`
|
`github.com/bytedance/sonic/option`
|
||||||
`github.com/bytedance/sonic/internal/native/types`
|
|
||||||
`github.com/bytedance/sonic/internal/rt`
|
`github.com/bytedance/sonic/internal/rt`
|
||||||
)
|
)
|
||||||
|
|
||||||
func checkTrailings(buf string, pos int) error {
|
|
||||||
/* skip all the trailing spaces */
|
|
||||||
if pos != len(buf) {
|
|
||||||
for pos < len(buf) && (types.SPACE_MASK & (1 << buf[pos])) != 0 {
|
|
||||||
pos++
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* then it must be at EOF */
|
|
||||||
if pos == len(buf) {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
/* junk after JSON value */
|
|
||||||
return decoder.SyntaxError {
|
|
||||||
Src : buf,
|
|
||||||
Pos : pos,
|
|
||||||
Code : types.ERR_INVALID_CHAR,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
type frozenConfig struct {
|
type frozenConfig struct {
|
||||||
Config
|
Config
|
||||||
encoderOpts encoder.Options
|
encoderOpts encoder.Options
|
||||||
|
|
@ -77,6 +55,9 @@ func (cfg Config) Froze() API {
|
||||||
if cfg.NoNullSliceOrMap {
|
if cfg.NoNullSliceOrMap {
|
||||||
api.encoderOpts |= encoder.NoNullSliceOrMap
|
api.encoderOpts |= encoder.NoNullSliceOrMap
|
||||||
}
|
}
|
||||||
|
if cfg.ValidateString {
|
||||||
|
api.encoderOpts |= encoder.ValidateString
|
||||||
|
}
|
||||||
|
|
||||||
// configure decoder options:
|
// configure decoder options:
|
||||||
if cfg.UseInt64 {
|
if cfg.UseInt64 {
|
||||||
|
|
@ -118,13 +99,13 @@ func (cfg frozenConfig) UnmarshalFromString(buf string, val interface{}) error {
|
||||||
dec := decoder.NewDecoder(buf)
|
dec := decoder.NewDecoder(buf)
|
||||||
dec.SetOptions(cfg.decoderOpts)
|
dec.SetOptions(cfg.decoderOpts)
|
||||||
err := dec.Decode(val)
|
err := dec.Decode(val)
|
||||||
pos := dec.Pos()
|
|
||||||
|
|
||||||
/* check for errors */
|
/* check for errors */
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
return checkTrailings(buf, pos)
|
|
||||||
|
return dec.CheckTrailings()
|
||||||
}
|
}
|
||||||
|
|
||||||
// Unmarshal is implemented by sonic
|
// Unmarshal is implemented by sonic
|
||||||
|
|
|
||||||
71
utf8/utf8.go
Normal file
71
utf8/utf8.go
Normal file
|
|
@ -0,0 +1,71 @@
|
||||||
|
/*
|
||||||
|
* Copyright 2022 ByteDance Inc.
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package utf8
|
||||||
|
|
||||||
|
import (
|
||||||
|
`github.com/bytedance/sonic/internal/rt`
|
||||||
|
`github.com/bytedance/sonic/internal/native/types`
|
||||||
|
`github.com/bytedance/sonic/internal/native`
|
||||||
|
)
|
||||||
|
|
||||||
|
// CorrectWith corrects the invalid utf8 byte with repl string.
|
||||||
|
func CorrectWith(dst []byte, src []byte, repl string) []byte {
|
||||||
|
sstr := rt.Mem2Str(src)
|
||||||
|
sidx := 0
|
||||||
|
|
||||||
|
/* state machine records the invalid postions */
|
||||||
|
m := types.NewStateMachine()
|
||||||
|
m.Sp = 0 // invalid utf8 numbers
|
||||||
|
|
||||||
|
for sidx < len(sstr) {
|
||||||
|
scur := sidx
|
||||||
|
ecode := native.ValidateUTF8(&sstr, &sidx, m)
|
||||||
|
|
||||||
|
if m.Sp != 0 {
|
||||||
|
if m.Sp > len(sstr) {
|
||||||
|
panic("numbers of invalid utf8 exceed the string len!")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := 0; i < m.Sp; i++ {
|
||||||
|
ipos := m.Vt[i] // invalid utf8 position
|
||||||
|
dst = append(dst, sstr[scur:ipos]...)
|
||||||
|
dst = append(dst, repl...)
|
||||||
|
scur = m.Vt[i] + 1
|
||||||
|
}
|
||||||
|
/* append the remained valid utf8 bytes */
|
||||||
|
dst = append(dst, sstr[scur:sidx]...)
|
||||||
|
|
||||||
|
/* not enough space, reset and continue */
|
||||||
|
if ecode != 0 {
|
||||||
|
m.Sp = 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
types.FreeStateMachine(m)
|
||||||
|
return dst
|
||||||
|
}
|
||||||
|
|
||||||
|
// Validate is a simd-accelereated drop-in replacement for the standard library's utf8.Valid.
|
||||||
|
func Validate(src []byte) bool {
|
||||||
|
return ValidateString(rt.Mem2Str(src))
|
||||||
|
}
|
||||||
|
|
||||||
|
// ValidateString as Validate, but for string.
|
||||||
|
func ValidateString(src string) bool {
|
||||||
|
return native.ValidateUTF8Fast(&src) == 0
|
||||||
|
}
|
||||||
138
utf8/utf8_test.go
Normal file
138
utf8/utf8_test.go
Normal file
|
|
@ -0,0 +1,138 @@
|
||||||
|
/*
|
||||||
|
* Copyright 2022 ByteDance Inc.
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package utf8
|
||||||
|
|
||||||
|
import (
|
||||||
|
`testing`
|
||||||
|
`strings`
|
||||||
|
`github.com/stretchr/testify/assert`
|
||||||
|
`unicode/utf8`
|
||||||
|
`bytes`
|
||||||
|
`math/rand`
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
_Header_2Bytes = string([]byte{0xC0})
|
||||||
|
_Header_3Bytes = string([]byte{0xE0})
|
||||||
|
_Header_4Bytes = string([]byte{0xF0})
|
||||||
|
_Low_Surrogate = string([]byte{0xED, 0xA0, 0x80}) // \ud800
|
||||||
|
_High_Surrogate = string([]byte{0xED, 0xB0, 0x80}) // \udc00
|
||||||
|
_Cont = "\xb0"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestCorrectWith_InvalidUtf8(t *testing.T) {
|
||||||
|
var tests = []struct {
|
||||||
|
name string
|
||||||
|
input string
|
||||||
|
expect string
|
||||||
|
errpos int
|
||||||
|
} {
|
||||||
|
{"basic", `abc`, "abc", -1},
|
||||||
|
{"long", strings.Repeat("helloα,景😊", 1000), strings.Repeat("helloα,景😊", 1000), -1},
|
||||||
|
|
||||||
|
// invalid utf8 - single byte
|
||||||
|
{"single_Cont", _Cont, "\ufffd", 0},
|
||||||
|
{"single_Header_2Bytes", _Header_2Bytes, "\ufffd", 0},
|
||||||
|
{"single_Header_3Bytes", _Header_3Bytes, "\ufffd", 0},
|
||||||
|
{"single_Header_4Bytes", _Header_4Bytes, "\ufffd", 0},
|
||||||
|
|
||||||
|
// invalid utf8 - two bytes
|
||||||
|
{"two_Header_2Bytes + _Cont", _Header_2Bytes + _Cont, "\ufffd\ufffd", 0},
|
||||||
|
{`two_Header_4Bytes + _Cont+ "xx"`, _Header_4Bytes + _Cont + "xx", "\ufffd\ufffdxx", 0},
|
||||||
|
{ `"xx" + three_Header_4Bytes + _Cont + _Cont`, "xx" + _Header_4Bytes + _Cont + _Cont, "xx\ufffd\ufffd\ufffd", 2},
|
||||||
|
|
||||||
|
// invalid utf8 - three bytes
|
||||||
|
{`three_Low_Surrogate`, _Low_Surrogate, "\ufffd\ufffd\ufffd", 0},
|
||||||
|
{`three__High_Surrogate`, _High_Surrogate, "\ufffd\ufffd\ufffd", 0},
|
||||||
|
|
||||||
|
// invalid utf8 - multi bytes
|
||||||
|
{`_High_Surrogate + _Low_Surrogate`, _High_Surrogate + _Low_Surrogate, "\ufffd\ufffd\ufffd\ufffd\ufffd\ufffd", 0},
|
||||||
|
{`"\x80\x80\x80\x80"`, "\x80\x80\x80\x80", "\ufffd\ufffd\ufffd\ufffd", 0},
|
||||||
|
}
|
||||||
|
for _, test := range tests {
|
||||||
|
got := CorrectWith(nil, []byte(test.input), "\ufffd")
|
||||||
|
assert.Equal(t, []byte(test.expect), got, test.name)
|
||||||
|
assert.Equal(t,test.errpos == -1, utf8.ValidString(test.input), test.name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func genRandBytes(length int) []byte {
|
||||||
|
var buf bytes.Buffer
|
||||||
|
for j := 0; j < length; j++ {
|
||||||
|
buf.WriteByte(byte(rand.Intn(0xFF + 1)))
|
||||||
|
}
|
||||||
|
return buf.Bytes()
|
||||||
|
}
|
||||||
|
|
||||||
|
func genRandAscii(length int) []byte {
|
||||||
|
var buf bytes.Buffer
|
||||||
|
for j := 0; j < length; j++ {
|
||||||
|
buf.WriteByte(byte(rand.Intn(0x7F + 1)))
|
||||||
|
}
|
||||||
|
return buf.Bytes()
|
||||||
|
}
|
||||||
|
|
||||||
|
func genRandRune(length int) []byte {
|
||||||
|
var buf bytes.Buffer
|
||||||
|
for j := 0; j < length; j++ {
|
||||||
|
buf.WriteRune(rune(rand.Intn(0x10FFFF + 1)))
|
||||||
|
}
|
||||||
|
return buf.Bytes()
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestValidate_Random(t *testing.T) {
|
||||||
|
// compare with stdlib
|
||||||
|
compare := func(t *testing.T, data []byte) {
|
||||||
|
assert.Equal(t, utf8.Valid(data), Validate(data), string(data))
|
||||||
|
}
|
||||||
|
|
||||||
|
// random testing
|
||||||
|
nums := 1000
|
||||||
|
maxLen := 1000
|
||||||
|
for i := 0; i < nums; i++ {
|
||||||
|
length := rand.Intn(maxLen)
|
||||||
|
compare(t, genRandBytes(length))
|
||||||
|
compare(t, genRandRune(length))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkValidate(b *testing.B) {
|
||||||
|
bench := []struct {
|
||||||
|
name string
|
||||||
|
data []byte
|
||||||
|
} {
|
||||||
|
{"ValidAscii", genRandAscii(1000)},
|
||||||
|
{"ValidUTF8", genRandRune(1000)},
|
||||||
|
{"RandomBytes", genRandBytes(1000)},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, test := range bench {
|
||||||
|
if utf8.Valid(test.data) != Validate(test.data) {
|
||||||
|
b.Fatalf("sonic utf8 validate wrong for %s string: %v", test.name, test.data)
|
||||||
|
}
|
||||||
|
b.Run("Sonic_" + test.name, func(b *testing.B) {
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
Validate(test.data)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
b.Run("StdLib_" + test.name, func(b *testing.B) {
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
utf8.Valid(test.data)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
Reference in a new issue