From d75ce3f730e481fcf1febbcca117cc735cf29597 Mon Sep 17 00:00:00 2001 From: liu Date: Tue, 18 Jan 2022 11:30:28 +0800 Subject: [PATCH] opt: use simd to optimize htmlescape (#171) * opt: use simd to optimize htmlescape * opt: reuse escaped buffer * feat: cmake with Clang12 Co-authored-by: liuqiang Co-authored-by: duanyi.aster --- encoder/encoder.go | 52 +- encoder/encoder_test.go | 44 + internal/native/avx/native_amd64.go | 5 + internal/native/avx/native_amd64.s | 3622 ++++++++++------- internal/native/avx/native_amd64_test.go | 25 + internal/native/avx/native_subr_amd64.go | 24 +- internal/native/avx2/native_amd64.go | 5 + internal/native/avx2/native_amd64.s | 4460 ++++++++++++--------- internal/native/avx2/native_amd64_test.go | 25 + internal/native/avx2/native_subr_amd64.go | 24 +- internal/native/dispatch_amd64.go | 5 + internal/native/dispatch_amd64.s | 6 + internal/native/native_amd64.tmpl | 5 + internal/native/native_amd64_test.tmpl | 25 + native/native.h | 1 + native/parsing.c | 192 +- 16 files changed, 5203 insertions(+), 3317 deletions(-) diff --git a/encoder/encoder.go b/encoder/encoder.go index 5df4bff..1536416 100644 --- a/encoder/encoder.go +++ b/encoder/encoder.go @@ -21,7 +21,9 @@ import ( `encoding/json` `reflect` `runtime` + `unsafe` + `github.com/bytedance/sonic/internal/native` `github.com/bytedance/sonic/internal/rt` `github.com/bytedance/sonic/option` ) @@ -102,7 +104,6 @@ func Encode(val interface{}, opts Options) ([]byte, error) { return nil, err } - /* EscapeHTML has already returned a new buffer*/ if opts & EscapeHTML != 0 { return buf, nil } @@ -131,10 +132,9 @@ func EncodeInto(buf *[]byte, val interface{}, opts Options) error { /* EscapeHTML needs to allocate a new buffer*/ if opts & EscapeHTML != 0 { - dst := bytes.NewBuffer(make([]byte, 0, len(*buf))) - json.HTMLEscape(dst, *buf) - freeBytes(*buf) - *buf = dst.Bytes() + dest := HTMLEscape(nil, *buf) + freeBytes(*buf) // free origin used buffer + *buf = dest } /* avoid GC ahead */ @@ -143,6 +143,48 @@ func EncodeInto(buf *[]byte, val interface{}, opts Options) error { return err } +var typeByte = rt.UnpackType(reflect.TypeOf(byte(0))) + +// HTMLEscape appends to dst the JSON-encoded src with <, >, &, U+2028 and U+2029 +// characters inside string literals changed to \u003c, \u003e, \u0026, \u2028, \u2029 +// so that the JSON will be safe to embed inside HTML