2
0
Fork 0
mirror of https://github.com/ii64/sonic.git synced 2026-06-21 00:46:43 +08:00
sonic/encoder/encoder.go
liu d75ce3f730
opt: use simd to optimize htmlescape (#171)
* opt: use simd to optimize htmlescape

* opt: reuse escaped buffer

* feat: cmake with Clang12

Co-authored-by: liuqiang <liuqiang.06@bytedance.com>
Co-authored-by: duanyi.aster <duanyi.aster@bytedance.com>
2022-01-18 11:30:28 +08:00

279 lines
No EOL
7.6 KiB
Go

/*
* Copyright 2021 ByteDance Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package encoder
import (
`bytes`
`encoding/json`
`reflect`
`runtime`
`unsafe`
`github.com/bytedance/sonic/internal/native`
`github.com/bytedance/sonic/internal/rt`
`github.com/bytedance/sonic/option`
)
// Options is a set of encoding options.
type Options uint64
const (
bitSortMapKeys = iota
bitEscapeHTML
bitNoCompactMarshaler
bitNoQuoteTextMarshaler
)
const (
// SortMapKeys indicates that the keys of a map needs to be sorted
// before serializing into JSON.
// WARNING: This hurts performance A LOT, USE WITH CARE.
SortMapKeys Options = 1 << bitSortMapKeys
// EscapeHTML indicates encoder to escape all HTML characters
// after serializing into JSON (see https://pkg.go.dev/encoding/json#HTMLEscape).
// WARNING: This hurts performance A LOT, USE WITH CARE.
EscapeHTML Options = 1 << bitEscapeHTML
// NoCompactMarshaler indicates that the output JSON from json.Marshaler
// is always compact and needs no validation
NoCompactMarshaler Options = 1 << bitNoCompactMarshaler
// NoQuoteTextMarshaler indicates that the output text from encoding.TextMarshaler
// is always escaped string and needs no quoting
NoQuoteTextMarshaler Options = 1 << bitNoQuoteTextMarshaler
)
// Encoder represents a specific set of encoder configurations.
type Encoder struct {
Opts Options
}
// Encode returns the JSON encoding of v.
func (self *Encoder) Encode(v interface{}) ([]byte, error) {
return Encode(v, self.Opts)
}
// SortKeys enables the SortMapKeys option.
func (self *Encoder) SortKeys() *Encoder {
self.Opts |= SortMapKeys
return self
}
// Quote returns the JSON-quoted version of s.
func Quote(s string) string {
var n int
var p []byte
/* check for empty string */
if s == "" {
return `""`
}
/* allocate space for result */
n = len(s) + 2
p = make([]byte, 0, n)
/* call the encoder */
_ = encodeString(&p, s)
return rt.Mem2Str(p)
}
// Encode returns the JSON encoding of val, encoded with opts.
func Encode(val interface{}, opts Options) ([]byte, error) {
buf := newBytes()
err := EncodeInto(&buf, val, opts)
/* check for errors */
if err != nil {
freeBytes(buf)
return nil, err
}
if opts & EscapeHTML != 0 {
return buf, nil
}
/* make a copy of the result */
ret := make([]byte, len(buf))
copy(ret, buf)
freeBytes(buf)
/* return the buffer into pool */
return ret, nil
}
// EncodeInto is like Encode but uses a user-supplied buffer instead of allocating
// a new one.
func EncodeInto(buf *[]byte, val interface{}, opts Options) error {
stk := newStack()
efv := rt.UnpackEface(val)
err := encodeTypedPointer(buf, efv.Type, &efv.Value, stk, uint64(opts))
/* return the stack into pool */
if err != nil {
resetStack(stk)
}
freeStack(stk)
/* EscapeHTML needs to allocate a new buffer*/
if opts & EscapeHTML != 0 {
dest := HTMLEscape(nil, *buf)
freeBytes(*buf) // free origin used buffer
*buf = dest
}
/* avoid GC ahead */
runtime.KeepAlive(buf)
runtime.KeepAlive(efv)
return err
}
var typeByte = rt.UnpackType(reflect.TypeOf(byte(0)))
// HTMLEscape appends to dst the JSON-encoded src with <, >, &, U+2028 and U+2029
// characters inside string literals changed to \u003c, \u003e, \u0026, \u2028, \u2029
// so that the JSON will be safe to embed inside HTML <script> tags.
// For historical reasons, web browsers don't honor standard HTML
// escaping within <script> tags, so an alternative JSON encoding must
// be used.
func HTMLEscape(dest []byte, src []byte) []byte {
nb := len(src)
// initilize dest buffer
cap := nb * 6 / 5
if dest == nil {
dest = make([]byte, 0, cap)
}
ds := (*rt.GoSlice)(unsafe.Pointer(&dest))
sp := (*rt.GoSlice)(unsafe.Pointer(&src)).Ptr
ds.Len = 0
if (ds.Cap < cap) {
*ds = growslice(typeByte, *ds, cap)
}
for nb > 0 {
dp := unsafe.Pointer(uintptr(ds.Ptr) + uintptr(ds.Len))
dn := ds.Cap - ds.Len
ret := native.HTMLEscape(sp, nb, dp, &dn)
ds.Len += dn
if ret >= 0 {
break
}
ret = ^ret
nb -= ret
*ds = growslice(typeByte, *ds, ds.Cap * 2)
sp = unsafe.Pointer(uintptr(sp) + uintptr(ret))
}
return dest
}
// EncodeIndented is like Encode but applies Indent to format the output.
// Each JSON element in the output will begin on a new line beginning with prefix
// followed by one or more copies of indent according to the indentation nesting.
func EncodeIndented(val interface{}, prefix string, indent string, opts Options) ([]byte, error) {
var err error
var out []byte
var buf *bytes.Buffer
/* encode into the buffer */
out = newBytes()
err = EncodeInto(&out, val, opts)
/* check for errors */
if err != nil {
freeBytes(out)
return nil, err
}
/* indent the JSON */
buf = newBuffer()
err = json.Indent(buf, out, prefix, indent)
/* check for errors */
if err != nil {
freeBytes(out)
freeBuffer(buf)
return nil, err
}
/* copy to the result buffer */
ret := make([]byte, buf.Len())
copy(ret, buf.Bytes())
/* return the buffers into pool */
freeBytes(out)
freeBuffer(buf)
return ret, nil
}
// Pretouch compiles vt ahead-of-time to avoid JIT compilation on-the-fly, in
// order to reduce the first-hit latency.
//
// Opts are the compile options, for example, "option.WithCompileRecursiveDepth" is
// a compile option to set the depth of recursive compile for the nested struct type.
func Pretouch(vt reflect.Type, opts ...option.CompileOption) error {
cfg := option.DefaultCompileOptions()
for _, opt := range opts {
opt(&cfg)
break
}
return pretouchRec(map[reflect.Type]bool{vt:true}, cfg)
}
func pretouchType(_vt reflect.Type, opts option.CompileOptions) (map[reflect.Type]bool, error) {
/* compile function */
compiler := newCompiler().apply(opts)
encoder := func(vt *rt.GoType) (interface{}, error) {
if pp, err := compiler.compile(_vt); err != nil {
return nil, err
} else {
return newAssembler(pp).Load(), nil
}
}
/* find or compile */
vt := rt.UnpackType(_vt)
if val := programCache.Get(vt); val != nil {
return nil, nil
} else if _, err := programCache.Compute(vt, encoder); err == nil {
return compiler.rec, nil
} else {
return nil, err
}
}
func pretouchRec(vtm map[reflect.Type]bool, opts option.CompileOptions) error {
if opts.RecursiveDepth < 0 || len(vtm) == 0 {
return nil
}
next := make(map[reflect.Type]bool)
for vt, _ := range(vtm) {
sub, err := pretouchType(vt, opts)
if err != nil {
return err
}
for svt, _ := range(sub) {
next[svt] = true
}
}
opts.RecursiveDepth -= 1
return pretouchRec(next, opts)
}