From 14121d64f1737afe68ecac840796ccfce63a1190 Mon Sep 17 00:00:00 2001 From: Yi Duan Date: Wed, 5 Jan 2022 14:20:46 +0800 Subject: [PATCH] feat:(encoder) add optimazing options (#168) * feat:(encoder) add option `NoCompactMarshaler` and `NoEscapeTextMarshaler` * feat: add `EscapeHTML` to align with std lib --- encoder/assembler_amd64_go115.go | 12 ++++--- encoder/assembler_amd64_go116.go | 12 ++++--- encoder/assembler_amd64_go117.go | 4 ++- encoder/encoder.go | 39 +++++++++++++++++++--- encoder/encoder_test.go | 55 ++++++++++++++++++++++++++++++-- encoder/primitives.go | 14 ++++++-- 6 files changed, 117 insertions(+), 19 deletions(-) diff --git a/encoder/assembler_amd64_go115.go b/encoder/assembler_amd64_go115.go index 9d42e2b..a0a099b 100644 --- a/encoder/assembler_amd64_go115.go +++ b/encoder/assembler_amd64_go115.go @@ -528,9 +528,11 @@ self.prep_buffer() // MOVE {buf}, (SP) self.Emit("MOVOU", jit.Ptr(_SP, 24), _X0) // MOVOU 24(SP), X0 self.Emit("MOVOU", _X0, jit.Ptr(_SP, 8)) // MOVOU X0, 8(SP) + self.Emit("MOVQ", _ARG_fv, _CX) // MOVQ ARG.fv, CX + self.Emit("MOVQ", _CX, jit.Ptr(_SP, 24)) // MOVQ CX, 24(SP) self.call_encoder(fn) // CALL $fn - self.Emit("MOVQ" , jit.Ptr(_SP, 24), _ET) // MOVQ 24(SP), ET - self.Emit("MOVQ" , jit.Ptr(_SP, 32), _EP) // MOVQ 32(SP), EP + self.Emit("MOVQ" , jit.Ptr(_SP, 32), _ET) // MOVQ 32(SP), ET + self.Emit("MOVQ" , jit.Ptr(_SP, 40), _EP) // MOVQ 40(SP), EP self.Emit("TESTQ", _ET, _ET) // TESTQ ET, ET self.Sjmp("JNZ" , _LB_error) // JNZ _error self.Sjmp("JMP" , "_done_{n}") // JMP _done_{n} @@ -555,9 +557,11 @@ } /* call the encoder, and perform error checks */ + self.Emit("MOVQ", _ARG_fv, _CX) // MOVQ ARG.fv, CX + self.Emit("MOVQ", _CX, jit.Ptr(_SP, 24)) // MOVQ CX, 24(SP) self.call_encoder(fn) // CALL $fn - self.Emit("MOVQ" , jit.Ptr(_SP, 24), _ET) // MOVQ 24(SP), ET - self.Emit("MOVQ" , jit.Ptr(_SP, 32), _EP) // MOVQ 32(SP), EP + self.Emit("MOVQ" , jit.Ptr(_SP, 32), _ET) // MOVQ 32(SP), ET + self.Emit("MOVQ" , jit.Ptr(_SP, 40), _EP) // MOVQ 40(SP), EP self.Emit("TESTQ", _ET, _ET) // TESTQ ET, ET self.Sjmp("JNZ" , _LB_error) // JNZ _error } diff --git a/encoder/assembler_amd64_go116.go b/encoder/assembler_amd64_go116.go index a4c4d85..bdb1b6d 100644 --- a/encoder/assembler_amd64_go116.go +++ b/encoder/assembler_amd64_go116.go @@ -529,9 +529,11 @@ func (self *_Assembler) call_marshaler_i(fn obj.Addr, it *rt.GoType) { self.prep_buffer() // MOVE {buf}, (SP) self.Emit("MOVOU", jit.Ptr(_SP, 24), _X0) // MOVOU 24(SP), X0 self.Emit("MOVOU", _X0, jit.Ptr(_SP, 8)) // MOVOU X0, 8(SP) + self.Emit("MOVQ", _ARG_fv, _CX) // MOVQ ARG.fv, CX + self.Emit("MOVQ", _CX, jit.Ptr(_SP, 24)) // MOVQ CX, 24(SP) self.call_encoder(fn) // CALL $fn - self.Emit("MOVQ" , jit.Ptr(_SP, 24), _ET) // MOVQ 24(SP), ET - self.Emit("MOVQ" , jit.Ptr(_SP, 32), _EP) // MOVQ 32(SP), EP + self.Emit("MOVQ" , jit.Ptr(_SP, 32), _ET) // MOVQ 32(SP), ET + self.Emit("MOVQ" , jit.Ptr(_SP, 40), _EP) // MOVQ 40(SP), EP self.Emit("TESTQ", _ET, _ET) // TESTQ ET, ET self.Sjmp("JNZ" , _LB_error) // JNZ _error self.Sjmp("JMP" , "_done_{n}") // JMP _done_{n} @@ -556,9 +558,11 @@ func (self *_Assembler) call_marshaler_v(fn obj.Addr, it *rt.GoType, vt reflect. } /* call the encoder, and perform error checks */ + self.Emit("MOVQ", _ARG_fv, _CX) // MOVQ ARG.fv, CX + self.Emit("MOVQ", _CX, jit.Ptr(_SP, 24)) // MOVQ CX, 24(SP) self.call_encoder(fn) // CALL $fn - self.Emit("MOVQ" , jit.Ptr(_SP, 24), _ET) // MOVQ 24(SP), ET - self.Emit("MOVQ" , jit.Ptr(_SP, 32), _EP) // MOVQ 32(SP), EP + self.Emit("MOVQ" , jit.Ptr(_SP, 32), _ET) // MOVQ 32(SP), ET + self.Emit("MOVQ" , jit.Ptr(_SP, 40), _EP) // MOVQ 40(SP), EP self.Emit("TESTQ", _ET, _ET) // TESTQ ET, ET self.Sjmp("JNZ" , _LB_error) // JNZ _error } diff --git a/encoder/assembler_amd64_go117.go b/encoder/assembler_amd64_go117.go index 818e1d3..7339d77 100644 --- a/encoder/assembler_amd64_go117.go +++ b/encoder/assembler_amd64_go117.go @@ -547,6 +547,7 @@ func (self *_Assembler) call_marshaler_i(fn obj.Addr, it *rt.GoType) { self.Emit("MOVQ", _BX, _CX) // MOVQ BX, CX self.Emit("MOVQ", _AX, _BX) // MOVQ AX, BX self.prep_buffer_AX() + self.Emit("MOVQ", _ARG_fv, _DI) // MOVQ ARG.fv, DI self.call_go(fn) // CALL $fn self.Emit("TESTQ", _ET, _ET) // TESTQ ET, ET self.Sjmp("JNZ" , _LB_error) // JNZ _error @@ -571,7 +572,8 @@ func (self *_Assembler) call_marshaler_v(fn obj.Addr, it *rt.GoType, vt reflect. } /* call the encoder, and perform error checks */ - self.call_go(fn) // CALL $fn + self.Emit("MOVQ", _ARG_fv, _DI) // MOVQ ARG.fv, DI + self.call_go(fn) // CALL $fn self.Emit("TESTQ", _ET, _ET) // TESTQ ET, ET self.Sjmp("JNZ" , _LB_error) // JNZ _error self.load_buffer_AX() diff --git a/encoder/encoder.go b/encoder/encoder.go index 7b0bf6d..5df4bff 100644 --- a/encoder/encoder.go +++ b/encoder/encoder.go @@ -30,14 +30,30 @@ import ( type Options uint64 const ( - bitSortMapKeys = iota + bitSortMapKeys = iota + bitEscapeHTML + bitNoCompactMarshaler + bitNoQuoteTextMarshaler ) const ( - // SortMapKeys indicate that the keys of a map needs to be sorted before - // serializing into JSON. + // SortMapKeys indicates that the keys of a map needs to be sorted + // before serializing into JSON. // WARNING: This hurts performance A LOT, USE WITH CARE. - SortMapKeys Options = 1 << bitSortMapKeys + SortMapKeys Options = 1 << bitSortMapKeys + + // EscapeHTML indicates encoder to escape all HTML characters + // after serializing into JSON (see https://pkg.go.dev/encoding/json#HTMLEscape). + // WARNING: This hurts performance A LOT, USE WITH CARE. + EscapeHTML Options = 1 << bitEscapeHTML + + // NoCompactMarshaler indicates that the output JSON from json.Marshaler + // is always compact and needs no validation + NoCompactMarshaler Options = 1 << bitNoCompactMarshaler + + // NoQuoteTextMarshaler indicates that the output text from encoding.TextMarshaler + // is always escaped string and needs no quoting + NoQuoteTextMarshaler Options = 1 << bitNoQuoteTextMarshaler ) // Encoder represents a specific set of encoder configurations. @@ -86,12 +102,17 @@ func Encode(val interface{}, opts Options) ([]byte, error) { return nil, err } + /* EscapeHTML has already returned a new buffer*/ + if opts & EscapeHTML != 0 { + return buf, nil + } + /* make a copy of the result */ ret := make([]byte, len(buf)) copy(ret, buf) - /* return the buffer into pool */ freeBytes(buf) + /* return the buffer into pool */ return ret, nil } @@ -108,6 +129,14 @@ func EncodeInto(buf *[]byte, val interface{}, opts Options) error { } freeStack(stk) + /* EscapeHTML needs to allocate a new buffer*/ + if opts & EscapeHTML != 0 { + dst := bytes.NewBuffer(make([]byte, 0, len(*buf))) + json.HTMLEscape(dst, *buf) + freeBytes(*buf) + *buf = dst.Bytes() + } + /* avoid GC ahead */ runtime.KeepAlive(buf) runtime.KeepAlive(efv) diff --git a/encoder/encoder_test.go b/encoder/encoder_test.go index 1cad451..f4bd48f 100644 --- a/encoder/encoder_test.go +++ b/encoder/encoder_test.go @@ -109,7 +109,8 @@ type MarshalerImpl struct { } func (self *MarshalerImpl) MarshalJSON() ([]byte, error) { - return []byte(strconv.Itoa(self.X)), nil + ret := []byte(strconv.Itoa(self.X)) + return append(ret, " "...), nil } type MarshalerStruct struct { @@ -124,6 +125,13 @@ func TestEncoder_Marshaler(t *testing.T) { ret, err = Encode(v, 0) require.NoError(t, err) require.Equal(t, `{"V":{"X":12345}}`, string(ret)) + + ret2, err2 := Encode(&v, NoCompactMarshaler) + require.NoError(t, err2) + require.Equal(t, `{"V":12345 }`, string(ret2)) + ret3, err3 := Encode(v, NoCompactMarshaler) + require.NoError(t, err3) + require.Equal(t, `{"V":{"X":12345}}`, string(ret3)) } type RawMessageStruct struct { @@ -132,11 +140,54 @@ type RawMessageStruct struct { func TestEncoder_RawMessage(t *testing.T) { rms := RawMessageStruct{ - X: json.RawMessage("123456"), + X: json.RawMessage("123456 "), } ret, err := Encode(&rms, 0) require.NoError(t, err) require.Equal(t, `{"X":123456}`, string(ret)) + + ret, err = Encode(&rms, NoCompactMarshaler) + require.NoError(t, err) + require.Equal(t, `{"X":123456 }`, string(ret)) +} + +type TextMarshalerImpl struct { + X string +} + +func (self *TextMarshalerImpl) MarshalText() ([]byte, error) { + return []byte(self.X), nil +} + +type TextMarshalerStruct struct { + V TextMarshalerImpl +} + +func TestEncoder_TextMarshaler(t *testing.T) { + v := TextMarshalerStruct{V: TextMarshalerImpl{X: (`{"a"}`)}} + ret, err := Encode(&v, 0) + require.NoError(t, err) + require.Equal(t, `{"V":"{\"a\"}"}`, string(ret)) + ret, err = Encode(v, 0) + require.NoError(t, err) + require.Equal(t, `{"V":{"X":"{\"a\"}"}}`, string(ret)) + + ret2, err2 := Encode(&v, NoQuoteTextMarshaler) + require.NoError(t, err2) + require.Equal(t, `{"V":{"a"}}`, string(ret2)) + ret3, err3 := Encode(v, NoQuoteTextMarshaler) + require.NoError(t, err3) + require.Equal(t, `{"V":{"X":"{\"a\"}"}}`, string(ret3)) +} + +func TestEncoder_EscapeHTML(t *testing.T) { + v := map[string]TextMarshalerImpl{"&&":{"<>"}} + ret, err := Encode(v, EscapeHTML) + require.NoError(t, err) + require.Equal(t, `{"\u0026\u0026":{"X":"\u003c\u003e"}}`, string(ret)) + ret, err = Encode(v, 0) + require.NoError(t, err) + require.Equal(t, `{"&&":{"X":"<>"}}`, string(ret)) } var _GenericValue interface{} diff --git a/encoder/primitives.go b/encoder/primitives.go index fdb4f73..09f1b32 100644 --- a/encoder/primitives.go +++ b/encoder/primitives.go @@ -79,18 +79,26 @@ func encodeTypedPointer(buf *[]byte, vt *rt.GoType, vp *unsafe.Pointer, sb *_Sta } } -func encodeJsonMarshaler(buf *[]byte, val json.Marshaler) error { +func encodeJsonMarshaler(buf *[]byte, val json.Marshaler, opt Options) error { if ret, err := val.MarshalJSON(); err != nil { return err } else { + if opt & NoCompactMarshaler != 0 { + *buf = append(*buf, ret...) + return nil + } return compact(buf, ret) } } -func encodeTextMarshaler(buf *[]byte, val encoding.TextMarshaler) error { +func encodeTextMarshaler(buf *[]byte, val encoding.TextMarshaler, opt Options) error { if ret, err := val.MarshalText(); err != nil { return err } else { - return encodeString(buf, rt.Mem2Str(ret)) + if opt & NoQuoteTextMarshaler != 0 { + *buf = append(*buf, ret...) + return nil + } + return encodeString(buf, rt.Mem2Str(ret) ) } } \ No newline at end of file