From 3585ae1a30a29e13aebf1fb6f63b6c54fb8c45be Mon Sep 17 00:00:00 2001 From: liu Date: Tue, 23 May 2023 18:47:36 +0800 Subject: [PATCH] fix: quote empty string key in ast (#427) * fix: quote empty string key in ast * test: enhance ast fuzz * fix: unquote as default encoding json --- ast/api_amd64.go | 1 + ast/encode_test.go | 2 ++ fuzz/fuzz_test.go | 27 ++++++++++++++++++++++----- unquote/unquote.go | 5 ++++- 4 files changed, 29 insertions(+), 6 deletions(-) diff --git a/ast/api_amd64.go b/ast/api_amd64.go index 0e902be..3047f59 100644 --- a/ast/api_amd64.go +++ b/ast/api_amd64.go @@ -37,6 +37,7 @@ func quote(buf *[]byte, val string) { *buf = append(*buf, '"') if len(val) == 0 { *buf = append(*buf, '"') + return } sp := rt.IndexChar(val, 0) diff --git a/ast/encode_test.go b/ast/encode_test.go index 848600d..4e3aed8 100644 --- a/ast/encode_test.go +++ b/ast/encode_test.go @@ -88,10 +88,12 @@ func TestEncodeValue(t *testing.T) { {NewString(`\"\"`), `"\\\"\\\""`, false}, {NewString(_TwitterJson), string(quote), false}, {NewArray([]Node{}), "[]", false}, + {NewArray([]Node{NewString(""), NewNull()}), `["",null]`, false}, {NewArray([]Node{NewBool(true), NewString("true"), NewString("\t")}), `[true,"true","\t"]`, false}, {NewObject([]Pair{Pair{"a", NewNull()}, Pair{"b", NewNumber("0")}}), `{"a":null,"b":0}`, false}, {NewObject([]Pair{Pair{"\ta", NewString("\t")}, Pair{"\bb", NewString("\b")}, Pair{"\nb", NewString("\n")}, Pair{"\ra", NewString("\r")}}),`{"\ta":"\t","\u0008b":"\u0008","\nb":"\n","\ra":"\r"}`, false}, {NewObject([]Pair{}), `{}`, false}, + {NewObject([]Pair{Pair{Key: "", Value: NewNull()}}), `{"":null}`, false}, {NewBytes([]byte("hello, world")), `"aGVsbG8sIHdvcmxk"`, false}, {NewAny(obj), string(buf), false}, {NewRaw(`[{ }]`), "[{}]", false}, diff --git a/fuzz/fuzz_test.go b/fuzz/fuzz_test.go index fc0b476..c6d32f2 100644 --- a/fuzz/fuzz_test.go +++ b/fuzz/fuzz_test.go @@ -46,6 +46,8 @@ func FuzzMain(f *testing.F) { // Used for debug falied fuzz corpus func TestCorpus(t *testing.T) { fuzzMain(t, []byte("[1\x00")) + fuzzMain(t, []byte("\"\\uDE1D\\uDE1D\\uDEDD\\uDE1D\\uDE1D\\uDE1D\\uDE1D\\uDEDD\\uDE1D\"")) + // fuzzMain(t, []byte(`{"":null}`)) } var target = sonic.ConfigStd @@ -59,7 +61,7 @@ func fuzzMain(t *testing.T, data []byte) { if !json.Valid(data) { return } - for _, typ := range []func() interface{}{ + for i, typ := range []func() interface{}{ func() interface{} { return new(interface{}) }, func() interface{} { return new(map[string]interface{}) }, func() interface{} { return new([]interface{}) }, @@ -70,9 +72,10 @@ func fuzzMain(t *testing.T, data []byte) { // func() interface{} { return new(json.Number) }, // func() interface{} { return new(S) }, } { - sv, jv := typ(), typ() - serr := target.Unmarshal([]byte(data), sv) - jerr := json.Unmarshal([]byte(data), jv) + var sv = typ() + var jv = typ() + serr := target.Unmarshal(data, sv) + jerr := json.Unmarshal(data, jv) require.Equal(t, serr != nil, jerr != nil, dump(data, jv, jerr, sv, serr)) if jerr != nil { @@ -87,7 +90,7 @@ func fuzzMain(t *testing.T, data []byte) { require.NoError(t, jerr, dump(v, jout, jerr, sout, serr)) { - sv, jv := typ(), typ() + sv, jv = typ(), typ() serr := target.Unmarshal(sout, sv) jerr := json.Unmarshal(jout, jv) require.Equalf(t, serr != nil, jerr != nil, dump(data, jv, jerr, sv, serr)) @@ -97,6 +100,20 @@ func fuzzMain(t *testing.T, data []byte) { require.Equal(t, sv, jv, dump(data, jv, jerr, sv, serr)) } + // fuzz ast MarshalJSON API + if i == 0 { + root, aerr := sonic.Get(data) + require.Equal(t, aerr, nil) + aerr = root.LoadAll() + require.Equal(t, aerr, nil, dump(data, jv, jerr, root, aerr)) + aout, aerr := root.MarshalJSON() + require.Equal(t, aerr, nil) + sv = typ() + serr := json.Unmarshal(aout, sv) + require.Equal(t, serr, nil) + require.Equal(t, sv, jv, dump(data, jv, jerr, sv, serr)) + } + if m, ok := sv.(*map[string]interface{}); ok { fuzzDynamicStruct(t, jout, *m) fuzzASTGetFromObject(t, jout, *m) diff --git a/unquote/unquote.go b/unquote/unquote.go index 0ffdaac..23fca73 100644 --- a/unquote/unquote.go +++ b/unquote/unquote.go @@ -18,6 +18,7 @@ package unquote import ( `unsafe` + `runtime` `github.com/bytedance/sonic/internal/native` `github.com/bytedance/sonic/internal/native/types` @@ -43,7 +44,8 @@ func intoBytesUnsafe(s string, m *[]byte) types.ParsingError { pos := -1 slv := (*rt.GoSlice)(unsafe.Pointer(m)) str := (*rt.GoString)(unsafe.Pointer(&s)) - ret := native.Unquote(str.Ptr, str.Len, slv.Ptr, &pos, 0) + /* unquote as the default configuration, replace invalid unicode with \ufffd */ + ret := native.Unquote(str.Ptr, str.Len, slv.Ptr, &pos, types.F_UNICODE_REPLACE) /* check for errors */ if ret < 0 { @@ -52,5 +54,6 @@ func intoBytesUnsafe(s string, m *[]byte) types.ParsingError { /* update the length */ slv.Len = ret + runtime.KeepAlive(s) return 0 }