mirror of
https://github.com/ii64/sonic.git
synced 2026-06-21 00:46:43 +08:00
* feat (decoder): support streaming IO * opt: fine tune buffer size factors * feat (encoder): support streaming IO * doc: refactor format and add comments * fix: io.EOF dead loop * build: adjust CI files * doc: update README.md
344 lines
No EOL
9.4 KiB
Go
344 lines
No EOL
9.4 KiB
Go
/*
|
|
* Copyright 2021 ByteDance Inc.
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
package encoder
|
|
|
|
import (
|
|
`bytes`
|
|
`encoding/json`
|
|
`reflect`
|
|
`runtime`
|
|
`unsafe`
|
|
|
|
`github.com/bytedance/sonic/internal/native`
|
|
`github.com/bytedance/sonic/internal/native/types`
|
|
`github.com/bytedance/sonic/internal/rt`
|
|
`github.com/bytedance/sonic/option`
|
|
)
|
|
|
|
// Options is a set of encoding options.
|
|
type Options uint64
|
|
|
|
const (
|
|
bitSortMapKeys = iota
|
|
bitEscapeHTML
|
|
bitCompactMarshaler
|
|
bitNoQuoteTextMarshaler
|
|
)
|
|
|
|
const (
|
|
// SortMapKeys indicates that the keys of a map needs to be sorted
|
|
// before serializing into JSON.
|
|
// WARNING: This hurts performance A LOT, USE WITH CARE.
|
|
SortMapKeys Options = 1 << bitSortMapKeys
|
|
|
|
// EscapeHTML indicates encoder to escape all HTML characters
|
|
// after serializing into JSON (see https://pkg.go.dev/encoding/json#HTMLEscape).
|
|
// WARNING: This hurts performance A LOT, USE WITH CARE.
|
|
EscapeHTML Options = 1 << bitEscapeHTML
|
|
|
|
// CompactMarshaler indicates that the output JSON from json.Marshaler
|
|
// is always compact and needs no validation
|
|
CompactMarshaler Options = 1 << bitCompactMarshaler
|
|
|
|
// NoQuoteTextMarshaler indicates that the output text from encoding.TextMarshaler
|
|
// is always escaped string and needs no quoting
|
|
NoQuoteTextMarshaler Options = 1 << bitNoQuoteTextMarshaler
|
|
)
|
|
|
|
// Encoder represents a specific set of encoder configurations.
|
|
type Encoder struct {
|
|
Opts Options
|
|
prefix string
|
|
indent string
|
|
}
|
|
|
|
// Encode returns the JSON encoding of v.
|
|
func (self *Encoder) Encode(v interface{}) ([]byte, error) {
|
|
if self.indent != "" || self.prefix != "" {
|
|
return EncodeIndented(v, self.prefix, self.indent, self.Opts)
|
|
}
|
|
return Encode(v, self.Opts)
|
|
}
|
|
|
|
// SortKeys enables the SortMapKeys option.
|
|
func (self *Encoder) SortKeys() *Encoder {
|
|
self.Opts |= SortMapKeys
|
|
return self
|
|
}
|
|
|
|
// SetEscapeHTML specifies if option EscapeHTML opens
|
|
func (self *Encoder) SetEscapeHTML(f bool) {
|
|
if f {
|
|
self.Opts |= EscapeHTML
|
|
} else {
|
|
self.Opts &= ^EscapeHTML
|
|
}
|
|
}
|
|
|
|
// SetCompactMarshaler specifies if option CompactMarshaler opens
|
|
func (self *Encoder) SetCompactMarshaler(f bool) {
|
|
if f {
|
|
self.Opts |= CompactMarshaler
|
|
} else {
|
|
self.Opts &= ^CompactMarshaler
|
|
}
|
|
}
|
|
|
|
// SetNoQuoteTextMarshaler specifies if option NoQuoteTextMarshaler opens
|
|
func (self *Encoder) SetNoQuoteTextMarshaler(f bool) {
|
|
if f {
|
|
self.Opts |= NoQuoteTextMarshaler
|
|
} else {
|
|
self.Opts &= ^NoQuoteTextMarshaler
|
|
}
|
|
}
|
|
|
|
// SetIndent instructs the encoder to format each subsequent encoded
|
|
// value as if indented by the package-level function EncodeIndent().
|
|
// Calling SetIndent("", "") disables indentation.
|
|
func (enc *Encoder) SetIndent(prefix, indent string) {
|
|
enc.prefix = prefix
|
|
enc.indent = indent
|
|
}
|
|
|
|
// Quote returns the JSON-quoted version of s.
|
|
func Quote(s string) string {
|
|
var n int
|
|
var p []byte
|
|
|
|
/* check for empty string */
|
|
if s == "" {
|
|
return `""`
|
|
}
|
|
|
|
/* allocate space for result */
|
|
n = len(s) + 2
|
|
p = make([]byte, 0, n)
|
|
|
|
/* call the encoder */
|
|
_ = encodeString(&p, s)
|
|
return rt.Mem2Str(p)
|
|
}
|
|
|
|
// Encode returns the JSON encoding of val, encoded with opts.
|
|
func Encode(val interface{}, opts Options) ([]byte, error) {
|
|
buf := newBytes()
|
|
err := EncodeInto(&buf, val, opts)
|
|
|
|
/* check for errors */
|
|
if err != nil {
|
|
freeBytes(buf)
|
|
return nil, err
|
|
}
|
|
|
|
if opts & EscapeHTML != 0 {
|
|
return buf, nil
|
|
}
|
|
|
|
/* make a copy of the result */
|
|
ret := make([]byte, len(buf))
|
|
copy(ret, buf)
|
|
|
|
freeBytes(buf)
|
|
/* return the buffer into pool */
|
|
return ret, nil
|
|
}
|
|
|
|
// EncodeInto is like Encode but uses a user-supplied buffer instead of allocating
|
|
// a new one.
|
|
func EncodeInto(buf *[]byte, val interface{}, opts Options) error {
|
|
stk := newStack()
|
|
efv := rt.UnpackEface(val)
|
|
err := encodeTypedPointer(buf, efv.Type, &efv.Value, stk, uint64(opts))
|
|
|
|
/* return the stack into pool */
|
|
if err != nil {
|
|
resetStack(stk)
|
|
}
|
|
freeStack(stk)
|
|
|
|
/* EscapeHTML needs to allocate a new buffer*/
|
|
if opts & EscapeHTML != 0 {
|
|
dest := HTMLEscape(nil, *buf)
|
|
freeBytes(*buf) // free origin used buffer
|
|
*buf = dest
|
|
}
|
|
|
|
/* avoid GC ahead */
|
|
runtime.KeepAlive(buf)
|
|
runtime.KeepAlive(efv)
|
|
return err
|
|
}
|
|
|
|
var typeByte = rt.UnpackType(reflect.TypeOf(byte(0)))
|
|
|
|
// HTMLEscape appends to dst the JSON-encoded src with <, >, &, U+2028 and U+2029
|
|
// characters inside string literals changed to \u003c, \u003e, \u0026, \u2028, \u2029
|
|
// so that the JSON will be safe to embed inside HTML <script> tags.
|
|
// For historical reasons, web browsers don't honor standard HTML
|
|
// escaping within <script> tags, so an alternative JSON encoding must
|
|
// be used.
|
|
func HTMLEscape(dest []byte, src []byte) []byte {
|
|
nb := len(src)
|
|
|
|
// initilize dest buffer
|
|
cap := nb * 6 / 5
|
|
if dest == nil {
|
|
dest = make([]byte, 0, cap)
|
|
}
|
|
ds := (*rt.GoSlice)(unsafe.Pointer(&dest))
|
|
sp := (*rt.GoSlice)(unsafe.Pointer(&src)).Ptr
|
|
ds.Len = 0
|
|
if (ds.Cap < cap) {
|
|
*ds = growslice(typeByte, *ds, cap)
|
|
}
|
|
|
|
for nb > 0 {
|
|
dp := unsafe.Pointer(uintptr(ds.Ptr) + uintptr(ds.Len))
|
|
dn := ds.Cap - ds.Len
|
|
|
|
ret := native.HTMLEscape(sp, nb, dp, &dn)
|
|
ds.Len += dn
|
|
|
|
if ret >= 0 {
|
|
break
|
|
}
|
|
ret = ^ret
|
|
nb -= ret
|
|
|
|
*ds = growslice(typeByte, *ds, ds.Cap * 2)
|
|
sp = unsafe.Pointer(uintptr(sp) + uintptr(ret))
|
|
}
|
|
return dest
|
|
}
|
|
|
|
// EncodeIndented is like Encode but applies Indent to format the output.
|
|
// Each JSON element in the output will begin on a new line beginning with prefix
|
|
// followed by one or more copies of indent according to the indentation nesting.
|
|
func EncodeIndented(val interface{}, prefix string, indent string, opts Options) ([]byte, error) {
|
|
var err error
|
|
var out []byte
|
|
var buf *bytes.Buffer
|
|
|
|
/* encode into the buffer */
|
|
out = newBytes()
|
|
err = EncodeInto(&out, val, opts)
|
|
|
|
/* check for errors */
|
|
if err != nil {
|
|
freeBytes(out)
|
|
return nil, err
|
|
}
|
|
|
|
/* indent the JSON */
|
|
buf = newBuffer()
|
|
err = json.Indent(buf, out, prefix, indent)
|
|
|
|
/* check for errors */
|
|
if err != nil {
|
|
freeBytes(out)
|
|
freeBuffer(buf)
|
|
return nil, err
|
|
}
|
|
|
|
/* copy to the result buffer */
|
|
ret := make([]byte, buf.Len())
|
|
copy(ret, buf.Bytes())
|
|
|
|
/* return the buffers into pool */
|
|
freeBytes(out)
|
|
freeBuffer(buf)
|
|
return ret, nil
|
|
}
|
|
|
|
// Pretouch compiles vt ahead-of-time to avoid JIT compilation on-the-fly, in
|
|
// order to reduce the first-hit latency.
|
|
//
|
|
// Opts are the compile options, for example, "option.WithCompileRecursiveDepth" is
|
|
// a compile option to set the depth of recursive compile for the nested struct type.
|
|
func Pretouch(vt reflect.Type, opts ...option.CompileOption) error {
|
|
cfg := option.DefaultCompileOptions()
|
|
for _, opt := range opts {
|
|
opt(&cfg)
|
|
break
|
|
}
|
|
return pretouchRec(map[reflect.Type]bool{vt:true}, cfg)
|
|
}
|
|
|
|
func pretouchType(_vt reflect.Type, opts option.CompileOptions) (map[reflect.Type]bool, error) {
|
|
/* compile function */
|
|
compiler := newCompiler().apply(opts)
|
|
encoder := func(vt *rt.GoType) (interface{}, error) {
|
|
if pp, err := compiler.compile(_vt); err != nil {
|
|
return nil, err
|
|
} else {
|
|
return newAssembler(pp).Load(), nil
|
|
}
|
|
}
|
|
|
|
/* find or compile */
|
|
vt := rt.UnpackType(_vt)
|
|
if val := programCache.Get(vt); val != nil {
|
|
return nil, nil
|
|
} else if _, err := programCache.Compute(vt, encoder); err == nil {
|
|
return compiler.rec, nil
|
|
} else {
|
|
return nil, err
|
|
}
|
|
}
|
|
|
|
func pretouchRec(vtm map[reflect.Type]bool, opts option.CompileOptions) error {
|
|
if opts.RecursiveDepth < 0 || len(vtm) == 0 {
|
|
return nil
|
|
}
|
|
next := make(map[reflect.Type]bool)
|
|
for vt, _ := range(vtm) {
|
|
sub, err := pretouchType(vt, opts)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
for svt, _ := range(sub) {
|
|
next[svt] = true
|
|
}
|
|
}
|
|
opts.RecursiveDepth -= 1
|
|
return pretouchRec(next, opts)
|
|
}
|
|
|
|
// Valid validates json and returns first non-blank character position,
|
|
// if it is only one valid json value.
|
|
// Otherwise returns invalid character position using start.
|
|
func Valid(data []byte) (ok bool, start int) {
|
|
n := len(data)
|
|
if n == 0 {
|
|
return false, -1
|
|
}
|
|
s := rt.Mem2Str(data)
|
|
p := 0
|
|
m := types.NewStateMachine()
|
|
ret := native.ValidateOne(&s, &p, m)
|
|
types.FreeStateMachine(m)
|
|
if ret < 0 {
|
|
return false, p-1
|
|
}
|
|
for ;p < n; p++ {
|
|
if (types.SPACE_MASK & (1 << data[p])) == 0 {
|
|
return false, p
|
|
}
|
|
}
|
|
return true, ret
|
|
} |