2
0
Fork 0
mirror of https://github.com/ii64/sonic.git synced 2026-06-23 01:46:44 +08:00

fix: handling of surrogate-half characters within double-quoted strings (#89)

This commit is contained in:
Oxygen 2021-09-07 14:14:06 +08:00 committed by GitHub
parent 12e088f9e9
commit 6aff4b1ad6
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 1461 additions and 952 deletions

View file

@ -688,7 +688,9 @@ func (self *_Assembler) unquote_twice(p obj.Addr, n obj.Addr) {
self.slice_from(_VAR_st_Iv, -3) // SLICE st.Iv, $-3 self.slice_from(_VAR_st_Iv, -3) // SLICE st.Iv, $-3
self.Emit("MOVQ" , _DI, p) // MOVQ DI, ${p} self.Emit("MOVQ" , _DI, p) // MOVQ DI, ${p}
self.Emit("MOVQ" , _SI, n) // MOVQ SI, ${n} self.Emit("MOVQ" , _SI, n) // MOVQ SI, ${n}
self.Emit("CMPQ" , _VAR_st_Ep, _SI) // CMPQ st.Ep, SI self.Emit("MOVQ" , _SI, _AX) // MOVQ SI, AX
self.Emit("ADDQ" , _VAR_st_Iv, _AX) // ADDQ st.Iv, AX
self.Emit("CMPQ" , _VAR_st_Ep, _AX) // CMPQ st.Ep, AX
self.Sjmp("JE" , "_noescape_{n}") // JE _noescape_{n} self.Sjmp("JE" , "_noescape_{n}") // JE _noescape_{n}
self.malloc(_SI, _DX) // MALLOC SI, DX self.malloc(_SI, _DX) // MALLOC SI, DX
self.Emit("MOVQ" , p, _DI) // MOVQ ${p}, DI self.Emit("MOVQ" , p, _DI) // MOVQ ${p}, DI

View file

@ -478,7 +478,7 @@ func (self *_ValueDecoder) compile() {
self.Emit("MOVQ" , _VAR_ss_Iv, _AX) // MOVQ ss.Iv, AX self.Emit("MOVQ" , _VAR_ss_Iv, _AX) // MOVQ ss.Iv, AX
self.Emit("CVTSQ2SD", _AX, _X0) // CVTSQ2SD AX, X0 self.Emit("CVTSQ2SD", _AX, _X0) // CVTSQ2SD AX, X0
/* represent number as `float64` */ /* represent numbers as `float64` */
self.Link("_use_float64") // _use_float64: self.Link("_use_float64") // _use_float64:
self.Emit("MOVSD", _X0, jit.Ptr(_SP, 0)) // MOVSD X0, (SP) self.Emit("MOVSD", _X0, jit.Ptr(_SP, 0)) // MOVSD X0, (SP)
self.call_go(_F_convT64) // CALL_GO runtime.convT64 self.call_go(_F_convT64) // CALL_GO runtime.convT64
@ -487,7 +487,7 @@ func (self *_ValueDecoder) compile() {
self.Emit("MOVQ" , _VAR_ss_Ep, _DI) // MOVQ ss.Ep, DI self.Emit("MOVQ" , _VAR_ss_Ep, _DI) // MOVQ ss.Ep, DI
self.Sjmp("JMP" , "_set_value") // JMP _set_value self.Sjmp("JMP" , "_set_value") // JMP _set_value
/* represent number as `json.Number` */ /* represent numbers as `json.Number` */
self.Link("_use_number") // _use_number self.Link("_use_number") // _use_number
self.Emit("MOVQ", _VAR_ss_Ep, _AX) // MOVQ ss.Ep, AX self.Emit("MOVQ", _VAR_ss_Ep, _AX) // MOVQ ss.Ep, AX
self.Emit("LEAQ", jit.Sib(_IP, _AX, 1, 0), _SI) // LEAQ (IP)(AX), SI self.Emit("LEAQ", jit.Sib(_IP, _AX, 1, 0), _SI) // LEAQ (IP)(AX), SI
@ -501,7 +501,7 @@ func (self *_ValueDecoder) compile() {
self.Emit("MOVQ", _VAR_ss_Ep, _DI) // MOVQ ss.Ep, DI self.Emit("MOVQ", _VAR_ss_Ep, _DI) // MOVQ ss.Ep, DI
self.Sjmp("JMP" , "_set_value") // JMP _set_value self.Sjmp("JMP" , "_set_value") // JMP _set_value
/* represent number as `int64` */ /* represent numbers as `int64` */
self.Link("_use_int64") // _use_int64: self.Link("_use_int64") // _use_int64:
self.Emit("MOVQ", _VAR_ss_Iv, _AX) // MOVQ ss.Iv, AX self.Emit("MOVQ", _VAR_ss_Iv, _AX) // MOVQ ss.Iv, AX
self.Emit("MOVQ", _AX, jit.Ptr(_SP, 0)) // MOVQ AX, (SP) self.Emit("MOVQ", _AX, jit.Ptr(_SP, 0)) // MOVQ AX, (SP)

File diff suppressed because it is too large Load diff

View file

@ -19,16 +19,16 @@ var (
_subr__lspace = **(**uintptr)(unsafe.Pointer(&_func__base)) + 238 _subr__lspace = **(**uintptr)(unsafe.Pointer(&_func__base)) + 238
_subr__lzero = **(**uintptr)(unsafe.Pointer(&_func__base)) + 0 _subr__lzero = **(**uintptr)(unsafe.Pointer(&_func__base)) + 0
_subr__quote = **(**uintptr)(unsafe.Pointer(&_func__base)) + 4854 _subr__quote = **(**uintptr)(unsafe.Pointer(&_func__base)) + 4854
_subr__skip_array = **(**uintptr)(unsafe.Pointer(&_func__base)) + 15888 _subr__skip_array = **(**uintptr)(unsafe.Pointer(&_func__base)) + 16673
_subr__skip_object = **(**uintptr)(unsafe.Pointer(&_func__base)) + 15923 _subr__skip_object = **(**uintptr)(unsafe.Pointer(&_func__base)) + 16708
_subr__skip_one = **(**uintptr)(unsafe.Pointer(&_func__base)) + 13566 _subr__skip_one = **(**uintptr)(unsafe.Pointer(&_func__base)) + 14351
_subr__u64toa = **(**uintptr)(unsafe.Pointer(&_func__base)) + 3634 _subr__u64toa = **(**uintptr)(unsafe.Pointer(&_func__base)) + 3634
_subr__unquote = **(**uintptr)(unsafe.Pointer(&_func__base)) + 5875 _subr__unquote = **(**uintptr)(unsafe.Pointer(&_func__base)) + 5875
_subr__value = **(**uintptr)(unsafe.Pointer(&_func__base)) + 9279 _subr__value = **(**uintptr)(unsafe.Pointer(&_func__base)) + 10064
_subr__vnumber = **(**uintptr)(unsafe.Pointer(&_func__base)) + 11854 _subr__vnumber = **(**uintptr)(unsafe.Pointer(&_func__base)) + 12639
_subr__vsigned = **(**uintptr)(unsafe.Pointer(&_func__base)) + 13016 _subr__vsigned = **(**uintptr)(unsafe.Pointer(&_func__base)) + 13801
_subr__vstring = **(**uintptr)(unsafe.Pointer(&_func__base)) + 10885 _subr__vstring = **(**uintptr)(unsafe.Pointer(&_func__base)) + 11670
_subr__vunsigned = **(**uintptr)(unsafe.Pointer(&_func__base)) + 13293 _subr__vunsigned = **(**uintptr)(unsafe.Pointer(&_func__base)) + 14078
) )
var ( var (

File diff suppressed because it is too large Load diff

View file

@ -19,16 +19,16 @@ var (
_subr__lspace = **(**uintptr)(unsafe.Pointer(&_func__base)) + 366 _subr__lspace = **(**uintptr)(unsafe.Pointer(&_func__base)) + 366
_subr__lzero = **(**uintptr)(unsafe.Pointer(&_func__base)) + 0 _subr__lzero = **(**uintptr)(unsafe.Pointer(&_func__base)) + 0
_subr__quote = **(**uintptr)(unsafe.Pointer(&_func__base)) + 5202 _subr__quote = **(**uintptr)(unsafe.Pointer(&_func__base)) + 5202
_subr__skip_array = **(**uintptr)(unsafe.Pointer(&_func__base)) + 18308 _subr__skip_array = **(**uintptr)(unsafe.Pointer(&_func__base)) + 19129
_subr__skip_object = **(**uintptr)(unsafe.Pointer(&_func__base)) + 18343 _subr__skip_object = **(**uintptr)(unsafe.Pointer(&_func__base)) + 19164
_subr__skip_one = **(**uintptr)(unsafe.Pointer(&_func__base)) + 15508 _subr__skip_one = **(**uintptr)(unsafe.Pointer(&_func__base)) + 16329
_subr__u64toa = **(**uintptr)(unsafe.Pointer(&_func__base)) + 3882 _subr__u64toa = **(**uintptr)(unsafe.Pointer(&_func__base)) + 3882
_subr__unquote = **(**uintptr)(unsafe.Pointer(&_func__base)) + 7039 _subr__unquote = **(**uintptr)(unsafe.Pointer(&_func__base)) + 7039
_subr__value = **(**uintptr)(unsafe.Pointer(&_func__base)) + 11232 _subr__value = **(**uintptr)(unsafe.Pointer(&_func__base)) + 12053
_subr__vnumber = **(**uintptr)(unsafe.Pointer(&_func__base)) + 13796 _subr__vnumber = **(**uintptr)(unsafe.Pointer(&_func__base)) + 14617
_subr__vsigned = **(**uintptr)(unsafe.Pointer(&_func__base)) + 14958 _subr__vsigned = **(**uintptr)(unsafe.Pointer(&_func__base)) + 15779
_subr__vstring = **(**uintptr)(unsafe.Pointer(&_func__base)) + 12943 _subr__vstring = **(**uintptr)(unsafe.Pointer(&_func__base)) + 13764
_subr__vunsigned = **(**uintptr)(unsafe.Pointer(&_func__base)) + 15235 _subr__vunsigned = **(**uintptr)(unsafe.Pointer(&_func__base)) + 16056
) )
var ( var (

38
issue83_test.go Normal file
View file

@ -0,0 +1,38 @@
/*
* Copyright 2021 ByteDance Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package sonic
import (
`testing`
`github.com/bytedance/sonic/decoder`
`github.com/stretchr/testify/require`
)
type Issue83Struct struct {
X string `json:"x,string"`
}
func TestIssue83_SurrogateHalfInDoubleQuotedString(t *testing.T) {
var v Issue83Struct
err := Unmarshal([]byte(`{"x":"\"\\ud800\\u1234\""}`), &v)
if err != nil {
println(err.(decoder.SyntaxError).Description())
require.NoError(t, err)
}
require.Equal(t, Issue83Struct{"\ufffd\u1234"}, v)
}

View file

@ -512,6 +512,9 @@ ssize_t unquote(const char *sp, ssize_t nb, char *dp, ssize_t *ep, uint64_t flag
sp += 4; sp += 4;
nb -= 4; nb -= 4;
/* from line 598 */
retry_decode:
/* ASCII characters, unlikely */ /* ASCII characters, unlikely */
if (unlikely(r0 <= 0x7f)) { if (unlikely(r0 <= 0x7f)) {
*dp++ = (char)r0; *dp++ = (char)r0;
@ -536,14 +539,24 @@ ssize_t unquote(const char *sp, ssize_t nb, char *dp, ssize_t *ep, uint64_t flag
/* check for double unquote */ /* check for double unquote */
if (unlikely(flags & F_DBLUNQ)) { if (unlikely(flags & F_DBLUNQ)) {
if (nb < 1) { if (nb < 1) {
*ep = x; if (likely(flags & F_UNIREP)) {
return -ERR_EOF; unirep(&dp);
} else if (sp[0] != '\\') { continue;
*ep = sp - s - 4; } else {
return -ERR_UNICODE; *ep = x;
return -ERR_EOF;
}
} else { } else {
nb--; if (sp[0] == '\\') {
sp++; nb--;
sp++;
} else if (likely(flags & F_UNIREP)) {
unirep(&dp);
continue;
} else {
*ep = sp - s - 4;
return -ERR_UNICODE;
}
} }
} }
@ -561,7 +574,7 @@ ssize_t unquote(const char *sp, ssize_t nb, char *dp, ssize_t *ep, uint64_t flag
/* check the hexadecimal escape */ /* check the hexadecimal escape */
if (!unhex16_is(sp + 2)) { if (!unhex16_is(sp + 2)) {
*ep = sp - s + 2; *ep = sp - s + 2;
for (int i = 0; i < 4 && ishex(sp[2]); i++, sp++) ++*ep; for (int i = 2; i < 6 && ishex(sp[i]); i++) ++*ep;
return -ERR_INVAL; return -ERR_INVAL;
} }
@ -572,13 +585,17 @@ ssize_t unquote(const char *sp, ssize_t nb, char *dp, ssize_t *ep, uint64_t flag
/* it must be the other half */ /* it must be the other half */
if (r1 < 0xdc00 || r1 > 0xdfff) { if (r1 < 0xdc00 || r1 > 0xdfff) {
if (likely(!(flags & F_UNIREP))) { if (unlikely(!(flags & F_UNIREP))) {
*ep = sp - s - 4; *ep = sp - s - 4;
return -ERR_UNICODE; return -ERR_UNICODE;
} else { } else if (likely(r1 >= 0xd800 && r1 <= 0xdfff)) {
unirep(&dp); unirep(&dp);
unirep(&dp); unirep(&dp);
continue; continue;
} else {
r0 = r1;
unirep(&dp);
goto retry_decode;
} }
} }