2
0
Fork 0
mirror of https://github.com/ii64/sonic.git synced 2026-06-20 16:45:22 +08:00

feat:(ast) add fallback api on not-amd64 env (#341)

* feat:(ast) add  fallback api on `not-amd64` env

* test: add native `linux-arm64` CI

* opt: just skip number chars whne `decodeFloat64`

* fmt

* fix: check EOF
This commit is contained in:
Yi Duan 2023-01-03 19:47:55 +08:00 committed by GitHub
parent 134fba2c1d
commit 67cffb15bd
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
26 changed files with 975 additions and 190 deletions

View file

@ -0,0 +1,27 @@
name: Benchmark Linux-ARM
on: pull_request
jobs:
build:
runs-on: [arm]
steps:
- uses: actions/checkout@v2
- name: Check Branch
run: ./check_branch_name.sh ${{ github.head_ref }}
- name: Set up Go
uses: actions/setup-go@v2
with:
go-version: 1.17.1
- uses: actions/cache@v2
with:
path: ~/go/pkg/mod
key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }}
restore-keys: |
${{ runner.os }}-go-
- name: Benchmark sonic
run: sh bench-arm.sh

View file

@ -1,4 +1,4 @@
name: Pull Request Benchmark name: Benchmark Linux-X64
on: pull_request on: pull_request
@ -24,10 +24,4 @@ jobs:
${{ runner.os }}-go- ${{ runner.os }}-go-
- name: Benchmark sonic - name: Benchmark sonic
run: sh bench.sh run: sh bench.sh
# - name: Benchmark third-party
# run: go test -benchmem -run=^$ -bench . -v ./generic_test
# - name: Diff
# run: ./bench.py -b '"^Benchmark.*Sonic"' -c

View file

@ -1,4 +1,4 @@
name: Push Check Go1.18 name: Push Check Go1.18-Linux-X64
on: push on: push

View file

@ -0,0 +1,31 @@
name: Push Check Linux-ARM
on: push
jobs:
build:
strategy:
matrix:
go-version: [1.15.x, 1.19.x]
os: [arm]
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v2
- name: Set up Go
uses: actions/setup-go@v2
with:
go-version: ${{ matrix.go-version }}
- uses: actions/cache@v2
with:
path: ~/go/pkg/mod
key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }}
restore-keys: |
${{ runner.os }}-go-
- name: Compatibility Test - main
run: GOMAXPROCS=4 go test -v -gcflags=-d=checkptr=0 -race github.com/bytedance/sonic
- name: Compatibility Test - ast
run: GOMAXPROCS=4 go test -v -gcflags=-d=checkptr=0 -race github.com/bytedance/sonic/ast

View file

@ -1,4 +1,4 @@
name: Push Check All name: Push Check Linux-X64
on: push on: push

View file

@ -1,4 +1,4 @@
name: Push Check ARM name: Push Check Linux-Qemu
on: push on: push
@ -24,10 +24,10 @@ jobs:
restore-keys: | restore-keys: |
${{ runner.os }}-go- ${{ runner.os }}-go-
- name: Compatibility Test - name: Compatibility Test - qemu
run: | run: |
printf ' #!/bin/bash\n if [ ! -x "/usr/bin/qemu-x86_64" ];then\n sudo apt-get update\n sudo apt-get -y install make gcc g++ libglib2.0-dev libpixman-1-dev libfdt-dev python3-pip ninja-build\n sudo pip3 install meson\n wget https://download.qemu.org/qemu-6.2.0.tar.xz\n tar -xvf qemu-6.2.0.tar.xz\n cd qemu-6.2.0\n sudo ./configure\n sudo make -j 4\n sudo make install\n cd ..\n cp /usr/local/bin/qemu-x86_64 /usr/bin/qemu-x86_64\n fi\n' > qemu_install.sh printf ' #!/bin/bash\n if [ ! -x "/usr/bin/qemu-x86_64" ];then\n sudo apt-get update\n sudo apt-get -y install make gcc g++ libglib2.0-dev libpixman-1-dev libfdt-dev python3-pip ninja-build\n sudo pip3 install meson\n wget https://download.qemu.org/qemu-6.2.0.tar.xz\n tar -xvf qemu-6.2.0.tar.xz\n cd qemu-6.2.0\n sudo ./configure\n sudo make -j 4\n sudo make install\n cd ..\n cp /usr/local/bin/qemu-x86_64 /usr/bin/qemu-x86_64\n fi\n' > qemu_install.sh
chmod +x qemu_install.sh chmod +x qemu_install.sh
./qemu_install.sh ./qemu_install.sh
GOARCH=amd64 go test -gcflags=-d=checkptr=0 -c . GOARCH=amd64 go test -gcflags=-d=checkptr=0 -c .
qemu-x86_64 -cpu max ./sonic.test -test.v qemu-x86_64 -cpu max ./sonic.test -test.v

View file

@ -1,4 +1,4 @@
name: Push Check Windows name: Push Check Windows-X64
on: push on: push

91
ast/api_amd64.go Normal file
View file

@ -0,0 +1,91 @@
//go:build amd64
// +build amd64
package ast
import (
`runtime`
`unsafe`
`github.com/bytedance/sonic/encoder`
`github.com/bytedance/sonic/internal/native`
`github.com/bytedance/sonic/internal/native/types`
`github.com/bytedance/sonic/internal/rt`
uq `github.com/bytedance/sonic/unquote`
`github.com/chenzhuoyu/base64x`
)
var typeByte = rt.UnpackEface(byte(0)).Type
func quote(buf *[]byte, val string) {
*buf = append(*buf, '"')
if len(val) == 0 {
*buf = append(*buf, '"')
}
sp := rt.IndexChar(val, 0)
nb := len(val)
b := (*rt.GoSlice)(unsafe.Pointer(buf))
// input buffer
for nb > 0 {
// output buffer
dp := unsafe.Pointer(uintptr(b.Ptr) + uintptr(b.Len))
dn := b.Cap - b.Len
// call native.Quote, dn is byte count it outputs
ret := native.Quote(sp, nb, dp, &dn, 0)
// update *buf length
b.Len += dn
// no need more output
if ret >= 0 {
break
}
// double buf size
*b = growslice(typeByte, *b, b.Cap*2)
// ret is the complement of consumed input
ret = ^ret
// update input buffer
nb -= ret
sp = unsafe.Pointer(uintptr(sp) + uintptr(ret))
}
runtime.KeepAlive(buf)
runtime.KeepAlive(sp)
*buf = append(*buf, '"')
}
func unquote(src string) (string, types.ParsingError) {
return uq.String(src)
}
func decodeBase64(src string) ([]byte, error) {
return base64x.StdEncoding.DecodeString(src)
}
func encodeBase64(src []byte) string {
return base64x.StdEncoding.EncodeToString(src)
}
func (self *Parser) decodeValue() (val types.JsonState) {
sv := (*rt.GoString)(unsafe.Pointer(&self.s))
self.p = native.Value(sv.Ptr, sv.Len, self.p, &val, 0)
return
}
func (self *Parser) skip() (int, types.ParsingError) {
fsm := types.NewStateMachine()
start := native.SkipOne(&self.s, &self.p, fsm, 0)
types.FreeStateMachine(fsm)
if start < 0 {
return self.p, types.ParsingError(-start)
}
return start, 0
}
func (self *Node) encodeInterface(buf *[]byte) error {
//WARN: NOT compatible with json.Encoder
return encoder.EncodeInto(buf, self.packAny(), 0)
}

34
ast/api_amd64_test.go Normal file
View file

@ -0,0 +1,34 @@
//go:build amd64
// +build amd64
package ast
import (
`testing`
`github.com/bytedance/sonic/encoder`
`github.com/stretchr/testify/assert`
)
func TestSortNodeTwitter(t *testing.T) {root, err := NewSearcher(_TwitterJson).GetByPath()
if err != nil {
t.Fatal(err)
}
obj, err := root.MapUseNumber()
if err != nil {
t.Fatal(err)
}
exp, err := encoder.Encode(obj, encoder.SortMapKeys)
if err != nil {
t.Fatal(err)
}
if err := root.SortKeys(true); err != nil {
t.Fatal(err)
}
act, err := root.MarshalJSON()
if err != nil {
t.Fatal(err)
}
assert.Equal(t, len(exp), len(act))
assert.Equal(t, string(exp), string(act))
}

62
ast/api_compat.go Normal file
View file

@ -0,0 +1,62 @@
//go:build !amd64
// +build !amd64
package ast
import (
`encoding/base64`
`encoding/json`
`github.com/bytedance/sonic/internal/native/types`
`github.com/bytedance/sonic/internal/rt`
)
func quote(buf *[]byte, val string) {
quoteString(buf, val)
}
func unquote(src string) (string, types.ParsingError) {
sp := rt.IndexChar(src, -1)
out, ok := unquoteBytes(rt.BytesFrom(sp, len(src)+2, len(src)+2))
if !ok {
return "", types.ERR_INVALID_ESCAPE
}
return rt.Mem2Str(out), 0
}
func decodeBase64(src string) ([]byte, error) {
return base64.StdEncoding.DecodeString(src)
}
func encodeBase64(src []byte) string {
return base64.StdEncoding.EncodeToString(src)
}
func (self *Parser) decodeValue() (val types.JsonState) {
e, v := decodeValue(self.s, self.p)
if e < 0 {
return v
}
self.p = e
return v
}
func (self *Parser) skip() (int, types.ParsingError) {
e, s := skipValue(self.s, self.p)
if e < 0 {
return self.p, types.ParsingError(-e)
}
self.p = e
return s, 0
}
func (self *Node) encodeInterface(buf *[]byte) error {
out, err := json.Marshal(self.packAny())
if err != nil {
return err
}
*buf = append(*buf, out...)
return nil
}

430
ast/decode.go Normal file
View file

@ -0,0 +1,430 @@
package ast
import (
`encoding/base64`
`runtime`
`strconv`
`unsafe`
`github.com/bytedance/sonic/internal/native/types`
`github.com/bytedance/sonic/internal/rt`
)
const _blankCharsMask = (1 << ' ') | (1 << '\t') | (1 << '\r') | (1 << '\n')
const (
bytesNull = "null"
bytesTrue = "true"
bytesFalse = "false"
bytesObject = "{}"
bytesArray = "[]"
)
func isSpace(c byte) bool {
return (int(1<<c) & _blankCharsMask) != 0
}
func skipBlank(src string, pos int) int {
se := uintptr(rt.IndexChar(src, len(src)))
sp := uintptr(rt.IndexChar(src, pos))
for sp < se {
if !isSpace(*(*byte)(unsafe.Pointer(sp))) {
break
}
sp += 1
}
if sp >= se {
return -int(types.ERR_EOF)
}
runtime.KeepAlive(src)
return int(sp - uintptr(rt.IndexChar(src, 0)))
}
func decodeNull(src string, pos int) (ret int) {
ret = pos + 4
if ret > len(src) {
return -int(types.ERR_EOF)
}
if src[pos:ret] == bytesNull {
return ret
} else {
return -int(types.ERR_INVALID_CHAR)
}
}
func decodeTrue(src string, pos int) (ret int) {
ret = pos + 4
if ret > len(src) {
return -int(types.ERR_EOF)
}
if src[pos:ret] == bytesTrue {
return ret
} else {
return -int(types.ERR_INVALID_CHAR)
}
}
func decodeFalse(src string, pos int) (ret int) {
ret = pos + 5
if ret > len(src) {
return -int(types.ERR_EOF)
}
if src[pos:ret] == bytesFalse {
return ret
}
return -int(types.ERR_INVALID_CHAR)
}
func decodeString(src string, pos int) (ret int, v string) {
ret, ep := skipString(src, pos)
if ep == -1 {
(*rt.GoString)(unsafe.Pointer(&v)).Ptr = rt.IndexChar(src, pos+1)
(*rt.GoString)(unsafe.Pointer(&v)).Len = ret - pos - 2
return ret, v
}
vv, ok := unquoteBytes(rt.Str2Mem(src[pos:ret]))
if !ok {
return -int(types.ERR_INVALID_CHAR), ""
}
runtime.KeepAlive(src)
return ret, rt.Mem2Str(vv)
}
func decodeBinary(src string, pos int) (ret int, v []byte) {
var vv string
ret, vv = decodeString(src, pos)
if ret < 0 {
return ret, nil
}
var err error
v, err = base64.StdEncoding.DecodeString(vv)
if err != nil {
return -int(types.ERR_INVALID_CHAR), nil
}
return ret, v
}
func isDigit(c byte) bool {
return c >= '0' && c <= '9'
}
func decodeInt64(src string, pos int) (ret int, v int64, err error) {
sp := uintptr(rt.IndexChar(src, pos))
ss := uintptr(sp)
se := uintptr(rt.IndexChar(src, len(src)))
if uintptr(sp) >= se {
return -int(types.ERR_EOF), 0, nil
}
if c := *(*byte)(unsafe.Pointer(sp)); c == '-' {
sp += 1
}
if sp == se {
return -int(types.ERR_EOF), 0, nil
}
for ; sp < se; sp += uintptr(1) {
if !isDigit(*(*byte)(unsafe.Pointer(sp))) {
break
}
}
if sp < se {
if c := *(*byte)(unsafe.Pointer(sp)); c == '.' || c == 'e' || c == 'E' {
return -int(types.ERR_INVALID_NUMBER_FMT), 0, nil
}
}
var vv string
ret = int(uintptr(sp) - uintptr((*rt.GoString)(unsafe.Pointer(&src)).Ptr))
(*rt.GoString)(unsafe.Pointer(&vv)).Ptr = unsafe.Pointer(ss)
(*rt.GoString)(unsafe.Pointer(&vv)).Len = ret - pos
v, err = strconv.ParseInt(vv, 10, 64)
if err != nil {
//NOTICE: allow overflow here
if err.(*strconv.NumError).Err == strconv.ErrRange {
return ret, 0, err
}
return -int(types.ERR_INVALID_CHAR), 0, err
}
runtime.KeepAlive(src)
return ret, v, nil
}
func isNumberChars(c byte) bool {
return (c >= '0' && c <= '9') || c == '+' || c == '-' || c == 'e' || c == 'E' || c == '.'
}
func decodeFloat64(src string, pos int) (ret int, v float64, err error) {
sp := uintptr(rt.IndexChar(src, pos))
ss := uintptr(sp)
se := uintptr(rt.IndexChar(src, len(src)))
if uintptr(sp) >= se {
return -int(types.ERR_EOF), 0, nil
}
if c := *(*byte)(unsafe.Pointer(sp)); c == '-' {
sp += 1
}
if sp == se {
return -int(types.ERR_EOF), 0, nil
}
for ; sp < se; sp += uintptr(1) {
if !isNumberChars(*(*byte)(unsafe.Pointer(sp))) {
break
}
}
var vv string
ret = int(uintptr(sp) - uintptr((*rt.GoString)(unsafe.Pointer(&src)).Ptr))
(*rt.GoString)(unsafe.Pointer(&vv)).Ptr = unsafe.Pointer(ss)
(*rt.GoString)(unsafe.Pointer(&vv)).Len = ret - pos
v, err = strconv.ParseFloat(vv, 64)
if err != nil {
//NOTICE: allow overflow here
if err.(*strconv.NumError).Err == strconv.ErrRange {
return ret, 0, err
}
return -int(types.ERR_INVALID_CHAR), 0, err
}
runtime.KeepAlive(src)
return ret, v, nil
}
func decodeValue(src string, pos int) (ret int, v types.JsonState) {
pos = skipBlank(src, pos)
if pos < 0 {
return pos, types.JsonState{Vt: types.ValueType(pos)}
}
switch c := src[pos]; c {
case 'n':
ret = decodeNull(src, pos)
if ret < 0 {
return ret, types.JsonState{Vt: types.ValueType(ret)}
}
return ret, types.JsonState{Vt: types.V_NULL}
case '"':
var ep int
ret, ep = skipString(src, pos)
if ret < 0 {
return ret, types.JsonState{Vt: types.ValueType(ret)}
}
return ret, types.JsonState{Vt: types.V_STRING, Iv: int64(pos + 1), Ep: ep}
case '{':
return pos + 1, types.JsonState{Vt: types.V_OBJECT}
case '[':
return pos + 1, types.JsonState{Vt: types.V_ARRAY}
case 't':
ret = decodeTrue(src, pos)
if ret < 0 {
return ret, types.JsonState{Vt: types.ValueType(ret)}
}
return ret, types.JsonState{Vt: types.V_TRUE}
case 'f':
ret = decodeFalse(src, pos)
if ret < 0 {
return ret, types.JsonState{Vt: types.ValueType(ret)}
}
return ret, types.JsonState{Vt: types.V_FALSE}
case '-', '+', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
var iv int64
ret, iv, _ = decodeInt64(src, pos)
if ret >= 0 {
return ret, types.JsonState{Vt: types.V_INTEGER, Iv: iv, Ep: pos}
} else if ret != -int(types.ERR_INVALID_NUMBER_FMT) {
return ret, types.JsonState{Vt: types.ValueType(ret)}
}
var fv float64
ret, fv, _ = decodeFloat64(src, pos)
if ret >= 0 {
return ret, types.JsonState{Vt: types.V_DOUBLE, Dv: fv, Ep: pos}
} else {
return ret, types.JsonState{Vt: types.ValueType(ret)}
}
default:
return -int(types.ERR_INVALID_CHAR), types.JsonState{Vt:-types.ValueType(types.ERR_INVALID_CHAR)}
}
}
func skipNumber(src string, pos int) (ret int) {
sp := uintptr(rt.IndexChar(src, pos))
se := uintptr(rt.IndexChar(src, len(src)))
if uintptr(sp) >= se {
return -int(types.ERR_EOF)
}
if c := *(*byte)(unsafe.Pointer(sp)); c == '-' {
sp += 1
}
ss := sp
var pointer bool
var exponent bool
var lastIsDigit bool
var nextNeedDigit = true
for ; sp < se; sp += uintptr(1) {
c := *(*byte)(unsafe.Pointer(sp))
if isDigit(c) {
lastIsDigit = true
nextNeedDigit = false
continue
} else if nextNeedDigit {
return -int(types.ERR_INVALID_CHAR)
} else if c == '.' {
if !lastIsDigit || pointer || sp == ss {
return -int(types.ERR_INVALID_CHAR)
}
pointer = true
lastIsDigit = false
nextNeedDigit = true
continue
} else if c == 'e' || c == 'E' {
if !lastIsDigit || exponent {
return -int(types.ERR_INVALID_CHAR)
}
if sp == se-1 {
return -int(types.ERR_EOF)
}
exponent = true
lastIsDigit = false
nextNeedDigit = false
continue
} else if c == '-' || c == '+' {
if prev := *(*byte)(unsafe.Pointer(sp - 1)); prev != 'e' && prev != 'E' {
return -int(types.ERR_INVALID_CHAR)
}
lastIsDigit = false
nextNeedDigit = true
continue
} else {
break
}
}
if nextNeedDigit {
return -int(types.ERR_EOF)
}
runtime.KeepAlive(src)
return int(uintptr(sp) - uintptr((*rt.GoString)(unsafe.Pointer(&src)).Ptr))
}
func skipString(src string, pos int) (ret int, ep int) {
if pos+1 >= len(src) {
return -int(types.ERR_EOF), -1
}
sp := uintptr(rt.IndexChar(src, pos))
se := uintptr(rt.IndexChar(src, len(src)))
if *(*byte)(unsafe.Pointer(sp)) != '"' {
return -int(types.ERR_INVALID_CHAR), -1
}
sp += 1
ep = -1
for sp < se {
c := *(*byte)(unsafe.Pointer(sp))
if c == '\\' {
if ep == -1 {
ep = int(uintptr(sp) - uintptr((*rt.GoString)(unsafe.Pointer(&src)).Ptr))
}
sp += 2
continue
}
sp += 1
if c == '"' {
break
}
}
if sp > se {
return -int(types.ERR_EOF), -1
}
runtime.KeepAlive(src)
return int(uintptr(sp) - uintptr((*rt.GoString)(unsafe.Pointer(&src)).Ptr)), ep
}
func skipPair(src string, pos int, lchar byte, rchar byte) (ret int) {
if pos+1 >= len(src) {
return -int(types.ERR_EOF)
}
sp := uintptr(rt.IndexChar(src, pos))
se := uintptr(rt.IndexChar(src, len(src)))
if *(*byte)(unsafe.Pointer(sp)) != lchar {
return -int(types.ERR_INVALID_CHAR)
}
sp += 1
nbrace := 1
inquote := false
for sp < se {
c := *(*byte)(unsafe.Pointer(sp))
if c == '\\' {
sp += 2
continue
} else if c == '"' {
inquote = !inquote
} else if c == lchar {
if !inquote {
nbrace += 1
}
} else if c == rchar {
if !inquote {
nbrace -= 1
if nbrace == 0 {
sp += 1
break
}
}
}
sp += 1
}
if nbrace != 0 {
return -int(types.ERR_INVALID_CHAR)
}
runtime.KeepAlive(src)
return int(uintptr(sp) - uintptr((*rt.GoString)(unsafe.Pointer(&src)).Ptr))
}
func skipValue(src string, pos int) (ret int, start int) {
pos = skipBlank(src, pos)
if pos < 0 {
return pos, -1
}
switch c := src[pos]; c {
case 'n':
ret = decodeNull(src, pos)
case '"':
ret, _ = skipString(src, pos)
case '{':
ret = skipPair(src, pos, '{', '}')
case '[':
ret = skipPair(src, pos, '[', ']')
case 't':
ret = decodeTrue(src, pos)
case 'f':
ret = decodeFalse(src, pos)
case '-', '+', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
ret = skipNumber(src, pos)
default:
ret = -int(types.ERR_INVALID_CHAR)
}
return ret, pos
}

View file

@ -17,26 +17,77 @@
package ast package ast
import ( import (
`reflect`
`sync` `sync`
`unsafe` `unicode/utf8`
`github.com/bytedance/sonic/encoder`
`github.com/bytedance/sonic/internal/native`
`github.com/bytedance/sonic/internal/rt`
) )
const ( const (
_MaxBuffer = 1024 // 1KB buffer size _MaxBuffer = 1024 // 1KB buffer size
) )
const ( func quoteString(e *[]byte, s string) {
bytesNull = "null" *e = append(*e, '"')
bytesTrue = "true" start := 0
bytesFalse = "false" for i := 0; i < len(s); {
bytesObject = "{}" if b := s[i]; b < utf8.RuneSelf {
bytesArray = "[]" if safeSet[b] {
) i++
continue
}
if start < i {
*e = append(*e, s[start:i]...)
}
*e = append(*e, '\\')
switch b {
case '\\', '"':
*e = append(*e, b)
case '\n':
*e = append(*e, 'n')
case '\r':
*e = append(*e, 'r')
case '\t':
*e = append(*e, 't')
default:
// This encodes bytes < 0x20 except for \t, \n and \r.
// If escapeHTML is set, it also escapes <, >, and &
// because they can lead to security holes when
// user-controlled strings are rendered into JSON
// and served to some browsers.
*e = append(*e, `u00`...)
*e = append(*e, hex[b>>4])
*e = append(*e, hex[b&0xF])
}
i++
start = i
continue
}
c, size := utf8.DecodeRuneInString(s[i:])
// if c == utf8.RuneError && size == 1 {
// if start < i {
// e.Write(s[start:i])
// }
// e.WriteString(`\ufffd`)
// i += size
// start = i
// continue
// }
if c == '\u2028' || c == '\u2029' {
if start < i {
*e = append(*e, s[start:i]...)
}
*e = append(*e, `\u202`...)
*e = append(*e, hex[c&0xF])
i += size
start = i
continue
}
i += size
}
if start < len(s) {
*e = append(*e, s[start:]...)
}
*e = append(*e, '"')
}
var bytesPool = sync.Pool{} var bytesPool = sync.Pool{}
@ -117,46 +168,13 @@ func (self *Node) encodeNumber(buf *[]byte) error {
return nil return nil
} }
var typeByte = rt.UnpackType(reflect.TypeOf(byte(0)))
func quote(buf *[]byte, sp unsafe.Pointer, nb int) {
b := (*rt.GoSlice)(unsafe.Pointer(buf))
// input buffer
for nb > 0 {
// output buffer
dp := unsafe.Pointer(uintptr(b.Ptr) + uintptr(b.Len))
dn := b.Cap - b.Len
// call native.Quote, dn is byte count it outputs
ret := native.Quote(sp, nb, dp, &dn, 0)
// update *buf length
b.Len += dn
// no need more output
if ret >= 0 {
break
}
// double buf size
*b = growslice(typeByte, *b, b.Cap * 2)
// ret is the complement of consumed input
ret = ^ret
// update input buffer
nb -= ret
sp = unsafe.Pointer(uintptr(sp) + uintptr(ret))
}
}
func (self *Node) encodeString(buf *[]byte) error { func (self *Node) encodeString(buf *[]byte) error {
*buf = append(*buf, '"') if self.v == 0 {
nb := int(self.v) *buf = append(*buf, '"', '"')
if nb == 0 {
*buf = append(*buf, '"')
return nil return nil
} }
quote(buf, self.p, nb) quote(buf, addr2str(self.p, self.v))
*buf = append(*buf, '"')
return nil return nil
} }
@ -194,16 +212,14 @@ func (self *Node) encodeArray(buf *[]byte) error {
} }
func (self *Pair) encode(buf *[]byte) error { func (self *Pair) encode(buf *[]byte) error {
*buf = append(*buf, '"') if len(*buf) == 0 {
sptr := (*rt.GoString)(unsafe.Pointer(&self.Key)) *buf = append(*buf, '"', '"', ':')
if sptr.Len == 0 {
*buf = append(*buf, '"', ':')
return self.Value.encode(buf) return self.Value.encode(buf)
} }
quote(buf, sptr.Ptr, sptr.Len) quote(buf, self.Key)
*buf = append(*buf, ':')
*buf = append(*buf, '"', ':')
return self.Value.encode(buf) return self.Value.encode(buf)
} }
@ -238,8 +254,4 @@ func (self *Node) encodeObject(buf *[]byte) error {
*buf = append(*buf, '}') *buf = append(*buf, '}')
return nil return nil
}
func (self *Node) encodeInterface(buf *[]byte) error {
return encoder.EncodeInto(buf, self.packAny(), 0)
} }

View file

@ -17,12 +17,11 @@
package ast package ast
import ( import (
`encoding/json`
`runtime` `runtime`
`sync` `sync`
`testing` `testing`
`github.com/bytedance/sonic/decoder`
`github.com/bytedance/sonic/encoder`
`github.com/bytedance/sonic/internal/native/types` `github.com/bytedance/sonic/internal/native/types`
`github.com/stretchr/testify/assert` `github.com/stretchr/testify/assert`
) )
@ -63,14 +62,15 @@ func TestGC_Encode(t *testing.T) {
func TestEncodeValue(t *testing.T) { func TestEncodeValue(t *testing.T) {
obj := new(_TwitterStruct) obj := new(_TwitterStruct)
if err := decoder.NewDecoder(_TwitterJson).Decode(obj); err != nil { if err := json.Unmarshal([]byte(_TwitterJson), obj); err != nil {
t.Fatal(err) t.Fatal(err)
} }
buf, err := encoder.Encode(obj, 0) // buf, err := encoder.Encode(obj, encoder.EscapeHTML|encoder.SortMapKeys)
buf, err := json.Marshal(obj)
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
quote, err := encoder.Encode(_TwitterJson, 0) quote, err := json.Marshal(_TwitterJson)
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
@ -90,16 +90,17 @@ func TestEncodeValue(t *testing.T) {
{NewArray([]Node{}), "[]", false}, {NewArray([]Node{}), "[]", false},
{NewArray([]Node{NewBool(true), NewString("true"), NewString("\t")}), `[true,"true","\t"]`, false}, {NewArray([]Node{NewBool(true), NewString("true"), NewString("\t")}), `[true,"true","\t"]`, false},
{NewObject([]Pair{Pair{"a", NewNull()}, Pair{"b", NewNumber("0")}}), `{"a":null,"b":0}`, false}, {NewObject([]Pair{Pair{"a", NewNull()}, Pair{"b", NewNumber("0")}}), `{"a":null,"b":0}`, false},
{NewObject([]Pair{Pair{"\ta", NewString("\t")}, Pair{"\bb", NewString("\b")}, Pair{"\nb", NewString("\n")}, Pair{"\ra", NewString("\r")}}), `{"\ta":"\t","\u0008b":"\u0008","\nb":"\n","\ra":"\r"}`, false}, {NewObject([]Pair{Pair{"\ta", NewString("\t")}, Pair{"\bb", NewString("\b")}, Pair{"\nb", NewString("\n")}, Pair{"\ra", NewString("\r")}}),`{"\ta":"\t","\u0008b":"\u0008","\nb":"\n","\ra":"\r"}`, false},
{NewObject([]Pair{}), `{}`, false}, {NewObject([]Pair{}), `{}`, false},
{NewBytes([]byte("hello, world")), `"aGVsbG8sIHdvcmxk"`, false}, {NewBytes([]byte("hello, world")), `"aGVsbG8sIHdvcmxk"`, false},
{NewAny(obj), string(buf), false}, {NewAny(obj), string(buf), false},
{NewRaw(`[{ }]`), "[{ }]", false}, {NewRaw(`[{ }]`), "[{}]", false},
{Node{}, "", true}, {Node{}, "", true},
{Node{t: types.ValueType(1)}, "", true}, {Node{t: types.ValueType(1)}, "", true},
} }
for i, c := range input { for i, c := range input {
buf, err := encoder.Encode(&c.node, 0) t.Log(i)
buf, err := json.Marshal(&c.node)
if c.err { if c.err {
if err == nil { if err == nil {
t.Fatal(i) t.Fatal(i)

98
ast/error.go Normal file
View file

@ -0,0 +1,98 @@
package ast
import (
`fmt`
`strings`
`unsafe`
`github.com/bytedance/sonic/internal/native/types`
)
func (self *Parser) syntaxError(err types.ParsingError) SyntaxError {
return SyntaxError{
Pos : self.p,
Src : self.s,
Code: err,
}
}
func newSyntaxError(err SyntaxError) *Node {
msg := err.Description()
return &Node{
t: V_ERROR,
v: int64(err.Code),
p: unsafe.Pointer(&msg),
}
}
type SyntaxError struct {
Pos int
Src string
Code types.ParsingError
Msg string
}
func (self SyntaxError) Error() string {
return fmt.Sprintf("%q", self.Description())
}
func (self SyntaxError) Description() string {
return "Syntax error " + self.description()
}
func (self SyntaxError) description() string {
i := 16
p := self.Pos - i
q := self.Pos + i
/* check for empty source */
if self.Src == "" {
return fmt.Sprintf("no sources available: %#v", self)
}
/* prevent slicing before the beginning */
if p < 0 {
p, q, i = 0, q - p, i + p
}
/* prevent slicing beyond the end */
if n := len(self.Src); q > n {
n = q - n
q = len(self.Src)
/* move the left bound if possible */
if p > n {
i += n
p -= n
}
}
/* left and right length */
x := clamp_zero(i)
y := clamp_zero(q - p - i - 1)
/* compose the error description */
return fmt.Sprintf(
"at index %d: %s\n\n\t%s\n\t%s^%s\n",
self.Pos,
self.Message(),
self.Src[p:q],
strings.Repeat(".", x),
strings.Repeat(".", y),
)
}
func (self SyntaxError) Message() string {
if self.Msg == "" {
return self.Code.Message()
}
return self.Msg
}
func clamp_zero(v int) int {
if v < 0 {
return 0
} else {
return v
}
}

View file

@ -22,10 +22,8 @@ import (
`strconv` `strconv`
`unsafe` `unsafe`
`github.com/bytedance/sonic/decoder`
`github.com/bytedance/sonic/internal/native/types` `github.com/bytedance/sonic/internal/native/types`
`github.com/bytedance/sonic/internal/rt` `github.com/bytedance/sonic/internal/rt`
`github.com/chenzhuoyu/base64x`
) )
const ( const (
@ -1566,7 +1564,7 @@ func NewBytes(src []byte) Node {
if len(src) == 0 { if len(src) == 0 {
panic("empty src bytes") panic("empty src bytes")
} }
out := base64x.StdEncoding.EncodeToString(src) out := encodeBase64(src)
return NewString(out) return NewString(out)
} }
@ -1756,15 +1754,6 @@ func newError(err types.ParsingError, msg string) *Node {
} }
} }
func newSyntaxError(err *decoder.SyntaxError) *Node {
msg := err.Description()
return &Node{
t: V_ERROR,
v: int64(err.Code),
p: unsafe.Pointer(&msg),
}
}
var typeJumpTable = [256]types.ValueType{ var typeJumpTable = [256]types.ValueType{
'"' : types.V_STRING, '"' : types.V_STRING,
'-' : _V_NUMBER, '-' : _V_NUMBER,

View file

@ -26,7 +26,6 @@ import (
`strconv` `strconv`
`testing` `testing`
`github.com/bytedance/sonic/encoder`
`github.com/bytedance/sonic/internal/native/types` `github.com/bytedance/sonic/internal/native/types`
`github.com/bytedance/sonic/internal/rt` `github.com/bytedance/sonic/internal/rt`
`github.com/stretchr/testify/assert` `github.com/stretchr/testify/assert`
@ -34,7 +33,8 @@ import (
func TestNodeSortKeys(t *testing.T) { func TestNodeSortKeys(t *testing.T) {
root, err := NewSearcher(_TwitterJson).GetByPath() var src = `{"b":1,"a":2,"c":3}`
root, err := NewSearcher(src).GetByPath()
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
@ -42,7 +42,7 @@ func TestNodeSortKeys(t *testing.T) {
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
exp, err := encoder.Encode(obj, encoder.SortMapKeys) exp, err := json.Marshal(obj)
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }

View file

@ -18,13 +18,9 @@ package ast
import ( import (
`fmt` `fmt`
`unsafe`
`github.com/bytedance/sonic/decoder`
`github.com/bytedance/sonic/internal/native`
`github.com/bytedance/sonic/internal/native/types` `github.com/bytedance/sonic/internal/native/types`
`github.com/bytedance/sonic/internal/rt` `github.com/bytedance/sonic/internal/rt`
`github.com/bytedance/sonic/unquote`
) )
const _DEFAULT_NODE_CAP int = 16 const _DEFAULT_NODE_CAP int = 16
@ -112,12 +108,6 @@ func (self *Parser) lspace(sp int) int {
return sp return sp
} }
func (self *Parser) decodeValue() (val types.JsonState) {
sv := (*rt.GoString)(unsafe.Pointer(&self.s))
self.p = native.Value(sv.Ptr, sv.Len, self.p, &val, 0)
return
}
func (self *Parser) decodeArray(ret []Node) (Node, types.ParsingError) { func (self *Parser) decodeArray(ret []Node) (Node, types.ParsingError) {
sp := self.p sp := self.p
ns := len(self.s) ns := len(self.s)
@ -213,7 +203,7 @@ func (self *Parser) decodeObject(ret []Pair) (Node, types.ParsingError) {
/* check for escape sequence */ /* check for escape sequence */
if njs.Ep != -1 { if njs.Ep != -1 {
if key, err = unquote.String(key); err != 0 { if key, err = unquote(key); err != 0 {
return Node{}, err return Node{}, err
} }
} }
@ -277,14 +267,13 @@ func (self *Parser) decodeString(iv int64, ep int) (Node, types.ParsingError) {
} }
/* unquote the string */ /* unquote the string */
buf := make([]byte, 0, len(s)) out, err := unquote(s)
err := unquote.IntoBytes(s, &buf)
/* check for errors */ /* check for errors */
if err != 0 { if err != 0 {
return Node{}, err return Node{}, err
} else { } else {
return newBytes(buf), 0 return newBytes(rt.Str2Mem(out)), 0
} }
} }
@ -317,17 +306,6 @@ func (self *Parser) Parse() (Node, types.ParsingError) {
} }
} }
func (self *Parser) skip() (int, types.ParsingError) {
fsm := types.NewStateMachine()
start := native.SkipOne(&self.s, &self.p, fsm, uint64(0))
types.FreeStateMachine(fsm)
if start < 0 {
return self.p, types.ParsingError(-start)
}
return start, 0
}
func (self *Parser) searchKey(match string) types.ParsingError { func (self *Parser) searchKey(match string) types.ParsingError {
ns := len(self.s) ns := len(self.s)
if err := self.object(); err != 0 { if err := self.object(); err != 0 {
@ -361,7 +339,7 @@ func (self *Parser) searchKey(match string) types.ParsingError {
/* check for escape sequence */ /* check for escape sequence */
if njs.Ep != -1 { if njs.Ep != -1 {
if key, err = unquote.String(key); err != 0 { if key, err = unquote(key); err != 0 {
return err return err
} }
} }
@ -542,7 +520,7 @@ func (self *Node) skipNextPair() (*Pair) {
/* check for escape sequence */ /* check for escape sequence */
if njs.Ep != -1 { if njs.Ep != -1 {
if key, err = unquote.String(key); err != 0 { if key, err = unquote(key); err != 0 {
return &Pair{key, *newSyntaxError(parser.syntaxError(err))} return &Pair{key, *newSyntaxError(parser.syntaxError(err))}
} }
} }
@ -633,17 +611,9 @@ func (self *Parser) ExportError(err types.ParsingError) error {
if err == _ERR_NOT_FOUND { if err == _ERR_NOT_FOUND {
return ErrNotExist return ErrNotExist
} }
return fmt.Errorf("%q", decoder.SyntaxError{ return fmt.Errorf("%q", SyntaxError{
Pos : self.p, Pos : self.p,
Src : self.s, Src : self.s,
Code: err, Code: err,
}.Description()) }.Description())
}
func (self *Parser) syntaxError(err types.ParsingError) *decoder.SyntaxError {
return &decoder.SyntaxError{
Pos : self.p,
Src : self.s,
Code: err,
}
} }

View file

@ -20,6 +20,7 @@ import (
`math` `math`
`runtime` `runtime`
`strconv` `strconv`
`strings`
`sync` `sync`
`testing` `testing`
@ -60,7 +61,7 @@ func TestExportError(t *testing.T) {
if err == nil { if err == nil {
t.Fatal() t.Fatal()
} }
if err.Error() != `"Syntax error at index 6: invalid char\n\n\t{\"a\":]\n\t......^\n"` { if strings.Index(err.Error(), `"Syntax error at `) != 0 {
t.Fatal(err) t.Fatal(err)
} }

View file

@ -19,6 +19,7 @@ package ast
import ( import (
`unsafe` `unsafe`
`reflect` `reflect`
`unicode/utf8`
`github.com/bytedance/sonic/internal/rt` `github.com/bytedance/sonic/internal/rt`
) )
@ -38,4 +39,43 @@ func unsafe_NewArray(typ *rt.GoType, n int) unsafe.Pointer
//go:linkname growslice runtime.growslice //go:linkname growslice runtime.growslice
//goland:noinspection GoUnusedParameter //goland:noinspection GoUnusedParameter
func growslice(et *rt.GoType, old rt.GoSlice, cap int) rt.GoSlice func growslice(et *rt.GoType, old rt.GoSlice, cap int) rt.GoSlice
//go:nosplit
func mem2ptr(s []byte) unsafe.Pointer {
return (*rt.GoSlice)(unsafe.Pointer(&s)).Ptr
}
//go:nosplit
func ptr2slice(s unsafe.Pointer, l int, c int) unsafe.Pointer {
slice := &rt.GoSlice{
Ptr: s,
Len: l,
Cap: c,
}
return unsafe.Pointer(slice)
}
//go:nosplit
func str2ptr(s string) unsafe.Pointer {
return (*rt.GoString)(unsafe.Pointer(&s)).Ptr
}
//go:nosplit
func addr2str(p unsafe.Pointer, n int64) (s string) {
(*rt.GoString)(unsafe.Pointer(&s)).Ptr = p
(*rt.GoString)(unsafe.Pointer(&s)).Len = int(n)
return
}
var (
//go:linkname safeSet encoding/json.safeSet
safeSet [utf8.RuneSelf]bool
//go:linkname hex encoding/json.hex
hex string
)
//go:linkname unquoteBytes encoding/json.unquoteBytes
func unquoteBytes(s []byte) (t []byte, ok bool)

View file

@ -1,56 +0,0 @@
/*
* Copyright 2021 ByteDance Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ast
import (
`unsafe`
`github.com/bytedance/sonic/internal/rt`
)
//go:nosplit
func mem2ptr(s []byte) unsafe.Pointer {
return (*rt.GoSlice)(unsafe.Pointer(&s)).Ptr
}
//go:nosplit
func ptr2slice(s unsafe.Pointer, l int, c int) unsafe.Pointer {
slice := &rt.GoSlice{
Ptr: s,
Len: l,
Cap: c,
}
return unsafe.Pointer(slice)
}
//go:nosplit
func str2ptr(s string) unsafe.Pointer {
return (*rt.GoString)(unsafe.Pointer(&s)).Ptr
}
//go:nosplit
func addr2str(p unsafe.Pointer, n int64) (s string) {
(*rt.GoString)(unsafe.Pointer(&s)).Ptr = p
(*rt.GoString)(unsafe.Pointer(&s)).Len = int(n)
return
}
const _SPACE_CHAR_MASK = (1<<' ')|(1<<'\t')|(1<<'\r')|(1<<'\n')
func isSpace(c byte) bool {
return (int(1<<c) & _SPACE_CHAR_MASK) != 0
}

14
bench-arm.sh Normal file
View file

@ -0,0 +1,14 @@
#!/usr/bin/env bash
pwd=$(pwd)
export SONIC_NO_ASYNC_GC=1
cd $pwd/ast
go test -benchmem -run=^$ -benchtime=1000000x -bench "^(BenchmarkGet.*|BenchmarkSet.*)$"
go test -benchmem -run=^$ -benchtime=10000x -bench "^(BenchmarkParser_.*|BenchmarkEncode.*)$"
go test -benchmem -run=^$ -benchtime=10000000x -bench "^(BenchmarkNodeGetByPath|BenchmarkStructGetByPath|BenchmarkNodeIndex|BenchmarkStructIndex|BenchmarkSliceIndex|BenchmarkMapIndex|BenchmarkNodeGet|BenchmarkSliceGet|BenchmarkMapGet|BenchmarkNodeSet|BenchmarkMapSet|BenchmarkNodeSetByIndex|BenchmarkSliceSetByIndex|BenchmarkStructSetByIndex|BenchmarkNodeUnset|BenchmarkMapUnset|BenchmarkNodUnsetByIndex|BenchmarkSliceUnsetByIndex|BenchmarkNodeAdd|BenchmarkSliceAdd|BenchmarkMapAdd)$"
unset SONIC_NO_ASYNC_GC
cd $pwd

View file

@ -96,6 +96,11 @@ func I64toa(out *byte, val int64) (ret int)
//goland:noinspection GoUnusedParameter //goland:noinspection GoUnusedParameter
func U64toa(out *byte, val uint64) (ret int) func U64toa(out *byte, val uint64) (ret int)
//go:nosplit
//go:noescape
//goland:noinspection GoUnusedParameter
func F64toa(out *byte, val float64) (ret int)
func useAVX() { func useAVX() {
S_f64toa = avx.S_f64toa S_f64toa = avx.S_f64toa
S_f32toa = avx.S_f32toa S_f32toa = avx.S_f32toa

View file

@ -91,3 +91,12 @@ TEXT ·U64toa(SB), NOSPLIT, $0 - 32
JMP github·combytedancesonicinternalnativeavx·__u64toa(SB) JMP github·combytedancesonicinternalnativeavx·__u64toa(SB)
JMP github·combytedancesonicinternalnativesse·__u64toa(SB) JMP github·combytedancesonicinternalnativesse·__u64toa(SB)
TEXT ·F64toa(SB), NOSPLIT, $0 - 32
CMPB github·combytedancesonicinternalcpu·HasAVX2(SB), $0
JE 2(PC)
JMP github·combytedancesonicinternalnativeavx2·__f64toa(SB)
CMPB github·combytedancesonicinternalcpu·HasAVX(SB), $0
JE 2(PC)
JMP github·combytedancesonicinternalnativeavx·__f64toa(SB)
JMP github·combytedancesonicinternalnativesse·__f64toa(SB)

10
internal/rt/asm_arm64.s Normal file
View file

@ -0,0 +1,10 @@
// +build !noasm !appengine
// Code generated by asm2asm, DO NOT EDIT.
#include "go_asm.h"
#include "funcdata.h"
#include "textflag.h"
TEXT ·MoreStack(SB), NOSPLIT, $0 - 8
NO_LOCAL_POINTERS
RET

View file

@ -65,3 +65,26 @@ func FuncAddr(f interface{}) unsafe.Pointer {
return *(*unsafe.Pointer)(vv.Value) return *(*unsafe.Pointer)(vv.Value)
} }
} }
func IndexChar(src string, index int) unsafe.Pointer {
return unsafe.Pointer(uintptr((*GoString)(unsafe.Pointer(&src)).Ptr) + uintptr(index))
}
func IndexByte(ptr []byte, index int) unsafe.Pointer {
return unsafe.Pointer(uintptr((*GoSlice)(unsafe.Pointer(&ptr)).Ptr) + uintptr(index))
}
func GuardSlice(buf *[]byte, n int) {
c := cap(*buf)
l := len(*buf)
if c-l < n {
c = c>>1 + n + l
if c < 32 {
c = 32
}
tmp := make([]byte, l, c)
copy(tmp, *buf)
*buf = tmp
}
return
}