2
0
Fork 0
mirror of https://github.com/ii64/sonic.git synced 2026-06-21 00:46:43 +08:00

fix: handling of surrogate-half characters within double-quoted strings (#89)

This commit is contained in:
Oxygen 2021-09-07 14:14:06 +08:00 committed by GitHub
parent 12e088f9e9
commit 6aff4b1ad6
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 1461 additions and 952 deletions

View file

@ -688,7 +688,9 @@ func (self *_Assembler) unquote_twice(p obj.Addr, n obj.Addr) {
self.slice_from(_VAR_st_Iv, -3) // SLICE st.Iv, $-3
self.Emit("MOVQ" , _DI, p) // MOVQ DI, ${p}
self.Emit("MOVQ" , _SI, n) // MOVQ SI, ${n}
self.Emit("CMPQ" , _VAR_st_Ep, _SI) // CMPQ st.Ep, SI
self.Emit("MOVQ" , _SI, _AX) // MOVQ SI, AX
self.Emit("ADDQ" , _VAR_st_Iv, _AX) // ADDQ st.Iv, AX
self.Emit("CMPQ" , _VAR_st_Ep, _AX) // CMPQ st.Ep, AX
self.Sjmp("JE" , "_noescape_{n}") // JE _noescape_{n}
self.malloc(_SI, _DX) // MALLOC SI, DX
self.Emit("MOVQ" , p, _DI) // MOVQ ${p}, DI

View file

@ -478,7 +478,7 @@ func (self *_ValueDecoder) compile() {
self.Emit("MOVQ" , _VAR_ss_Iv, _AX) // MOVQ ss.Iv, AX
self.Emit("CVTSQ2SD", _AX, _X0) // CVTSQ2SD AX, X0
/* represent number as `float64` */
/* represent numbers as `float64` */
self.Link("_use_float64") // _use_float64:
self.Emit("MOVSD", _X0, jit.Ptr(_SP, 0)) // MOVSD X0, (SP)
self.call_go(_F_convT64) // CALL_GO runtime.convT64
@ -487,7 +487,7 @@ func (self *_ValueDecoder) compile() {
self.Emit("MOVQ" , _VAR_ss_Ep, _DI) // MOVQ ss.Ep, DI
self.Sjmp("JMP" , "_set_value") // JMP _set_value
/* represent number as `json.Number` */
/* represent numbers as `json.Number` */
self.Link("_use_number") // _use_number
self.Emit("MOVQ", _VAR_ss_Ep, _AX) // MOVQ ss.Ep, AX
self.Emit("LEAQ", jit.Sib(_IP, _AX, 1, 0), _SI) // LEAQ (IP)(AX), SI
@ -501,7 +501,7 @@ func (self *_ValueDecoder) compile() {
self.Emit("MOVQ", _VAR_ss_Ep, _DI) // MOVQ ss.Ep, DI
self.Sjmp("JMP" , "_set_value") // JMP _set_value
/* represent number as `int64` */
/* represent numbers as `int64` */
self.Link("_use_int64") // _use_int64:
self.Emit("MOVQ", _VAR_ss_Iv, _AX) // MOVQ ss.Iv, AX
self.Emit("MOVQ", _AX, jit.Ptr(_SP, 0)) // MOVQ AX, (SP)

File diff suppressed because it is too large Load diff

View file

@ -19,16 +19,16 @@ var (
_subr__lspace = **(**uintptr)(unsafe.Pointer(&_func__base)) + 238
_subr__lzero = **(**uintptr)(unsafe.Pointer(&_func__base)) + 0
_subr__quote = **(**uintptr)(unsafe.Pointer(&_func__base)) + 4854
_subr__skip_array = **(**uintptr)(unsafe.Pointer(&_func__base)) + 15888
_subr__skip_object = **(**uintptr)(unsafe.Pointer(&_func__base)) + 15923
_subr__skip_one = **(**uintptr)(unsafe.Pointer(&_func__base)) + 13566
_subr__skip_array = **(**uintptr)(unsafe.Pointer(&_func__base)) + 16673
_subr__skip_object = **(**uintptr)(unsafe.Pointer(&_func__base)) + 16708
_subr__skip_one = **(**uintptr)(unsafe.Pointer(&_func__base)) + 14351
_subr__u64toa = **(**uintptr)(unsafe.Pointer(&_func__base)) + 3634
_subr__unquote = **(**uintptr)(unsafe.Pointer(&_func__base)) + 5875
_subr__value = **(**uintptr)(unsafe.Pointer(&_func__base)) + 9279
_subr__vnumber = **(**uintptr)(unsafe.Pointer(&_func__base)) + 11854
_subr__vsigned = **(**uintptr)(unsafe.Pointer(&_func__base)) + 13016
_subr__vstring = **(**uintptr)(unsafe.Pointer(&_func__base)) + 10885
_subr__vunsigned = **(**uintptr)(unsafe.Pointer(&_func__base)) + 13293
_subr__value = **(**uintptr)(unsafe.Pointer(&_func__base)) + 10064
_subr__vnumber = **(**uintptr)(unsafe.Pointer(&_func__base)) + 12639
_subr__vsigned = **(**uintptr)(unsafe.Pointer(&_func__base)) + 13801
_subr__vstring = **(**uintptr)(unsafe.Pointer(&_func__base)) + 11670
_subr__vunsigned = **(**uintptr)(unsafe.Pointer(&_func__base)) + 14078
)
var (

File diff suppressed because it is too large Load diff

View file

@ -19,16 +19,16 @@ var (
_subr__lspace = **(**uintptr)(unsafe.Pointer(&_func__base)) + 366
_subr__lzero = **(**uintptr)(unsafe.Pointer(&_func__base)) + 0
_subr__quote = **(**uintptr)(unsafe.Pointer(&_func__base)) + 5202
_subr__skip_array = **(**uintptr)(unsafe.Pointer(&_func__base)) + 18308
_subr__skip_object = **(**uintptr)(unsafe.Pointer(&_func__base)) + 18343
_subr__skip_one = **(**uintptr)(unsafe.Pointer(&_func__base)) + 15508
_subr__skip_array = **(**uintptr)(unsafe.Pointer(&_func__base)) + 19129
_subr__skip_object = **(**uintptr)(unsafe.Pointer(&_func__base)) + 19164
_subr__skip_one = **(**uintptr)(unsafe.Pointer(&_func__base)) + 16329
_subr__u64toa = **(**uintptr)(unsafe.Pointer(&_func__base)) + 3882
_subr__unquote = **(**uintptr)(unsafe.Pointer(&_func__base)) + 7039
_subr__value = **(**uintptr)(unsafe.Pointer(&_func__base)) + 11232
_subr__vnumber = **(**uintptr)(unsafe.Pointer(&_func__base)) + 13796
_subr__vsigned = **(**uintptr)(unsafe.Pointer(&_func__base)) + 14958
_subr__vstring = **(**uintptr)(unsafe.Pointer(&_func__base)) + 12943
_subr__vunsigned = **(**uintptr)(unsafe.Pointer(&_func__base)) + 15235
_subr__value = **(**uintptr)(unsafe.Pointer(&_func__base)) + 12053
_subr__vnumber = **(**uintptr)(unsafe.Pointer(&_func__base)) + 14617
_subr__vsigned = **(**uintptr)(unsafe.Pointer(&_func__base)) + 15779
_subr__vstring = **(**uintptr)(unsafe.Pointer(&_func__base)) + 13764
_subr__vunsigned = **(**uintptr)(unsafe.Pointer(&_func__base)) + 16056
)
var (

38
issue83_test.go Normal file
View file

@ -0,0 +1,38 @@
/*
* Copyright 2021 ByteDance Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package sonic
import (
`testing`
`github.com/bytedance/sonic/decoder`
`github.com/stretchr/testify/require`
)
type Issue83Struct struct {
X string `json:"x,string"`
}
func TestIssue83_SurrogateHalfInDoubleQuotedString(t *testing.T) {
var v Issue83Struct
err := Unmarshal([]byte(`{"x":"\"\\ud800\\u1234\""}`), &v)
if err != nil {
println(err.(decoder.SyntaxError).Description())
require.NoError(t, err)
}
require.Equal(t, Issue83Struct{"\ufffd\u1234"}, v)
}

View file

@ -512,6 +512,9 @@ ssize_t unquote(const char *sp, ssize_t nb, char *dp, ssize_t *ep, uint64_t flag
sp += 4;
nb -= 4;
/* from line 598 */
retry_decode:
/* ASCII characters, unlikely */
if (unlikely(r0 <= 0x7f)) {
*dp++ = (char)r0;
@ -536,14 +539,24 @@ ssize_t unquote(const char *sp, ssize_t nb, char *dp, ssize_t *ep, uint64_t flag
/* check for double unquote */
if (unlikely(flags & F_DBLUNQ)) {
if (nb < 1) {
*ep = x;
return -ERR_EOF;
} else if (sp[0] != '\\') {
*ep = sp - s - 4;
return -ERR_UNICODE;
if (likely(flags & F_UNIREP)) {
unirep(&dp);
continue;
} else {
*ep = x;
return -ERR_EOF;
}
} else {
nb--;
sp++;
if (sp[0] == '\\') {
nb--;
sp++;
} else if (likely(flags & F_UNIREP)) {
unirep(&dp);
continue;
} else {
*ep = sp - s - 4;
return -ERR_UNICODE;
}
}
}
@ -561,7 +574,7 @@ ssize_t unquote(const char *sp, ssize_t nb, char *dp, ssize_t *ep, uint64_t flag
/* check the hexadecimal escape */
if (!unhex16_is(sp + 2)) {
*ep = sp - s + 2;
for (int i = 0; i < 4 && ishex(sp[2]); i++, sp++) ++*ep;
for (int i = 2; i < 6 && ishex(sp[i]); i++) ++*ep;
return -ERR_INVAL;
}
@ -572,13 +585,17 @@ ssize_t unquote(const char *sp, ssize_t nb, char *dp, ssize_t *ep, uint64_t flag
/* it must be the other half */
if (r1 < 0xdc00 || r1 > 0xdfff) {
if (likely(!(flags & F_UNIREP))) {
if (unlikely(!(flags & F_UNIREP))) {
*ep = sp - s - 4;
return -ERR_UNICODE;
} else {
} else if (likely(r1 >= 0xd800 && r1 <= 0xdfff)) {
unirep(&dp);
unirep(&dp);
continue;
} else {
r0 = r1;
unirep(&dp);
goto retry_decode;
}
}