mirror of
https://github.com/ii64/sonic.git
synced 2026-06-23 01:46:44 +08:00
fix: handling of surrogate-half characters within double-quoted strings (#89)
This commit is contained in:
parent
12e088f9e9
commit
6aff4b1ad6
8 changed files with 1461 additions and 952 deletions
|
|
@ -688,7 +688,9 @@ func (self *_Assembler) unquote_twice(p obj.Addr, n obj.Addr) {
|
||||||
self.slice_from(_VAR_st_Iv, -3) // SLICE st.Iv, $-3
|
self.slice_from(_VAR_st_Iv, -3) // SLICE st.Iv, $-3
|
||||||
self.Emit("MOVQ" , _DI, p) // MOVQ DI, ${p}
|
self.Emit("MOVQ" , _DI, p) // MOVQ DI, ${p}
|
||||||
self.Emit("MOVQ" , _SI, n) // MOVQ SI, ${n}
|
self.Emit("MOVQ" , _SI, n) // MOVQ SI, ${n}
|
||||||
self.Emit("CMPQ" , _VAR_st_Ep, _SI) // CMPQ st.Ep, SI
|
self.Emit("MOVQ" , _SI, _AX) // MOVQ SI, AX
|
||||||
|
self.Emit("ADDQ" , _VAR_st_Iv, _AX) // ADDQ st.Iv, AX
|
||||||
|
self.Emit("CMPQ" , _VAR_st_Ep, _AX) // CMPQ st.Ep, AX
|
||||||
self.Sjmp("JE" , "_noescape_{n}") // JE _noescape_{n}
|
self.Sjmp("JE" , "_noescape_{n}") // JE _noescape_{n}
|
||||||
self.malloc(_SI, _DX) // MALLOC SI, DX
|
self.malloc(_SI, _DX) // MALLOC SI, DX
|
||||||
self.Emit("MOVQ" , p, _DI) // MOVQ ${p}, DI
|
self.Emit("MOVQ" , p, _DI) // MOVQ ${p}, DI
|
||||||
|
|
|
||||||
|
|
@ -478,7 +478,7 @@ func (self *_ValueDecoder) compile() {
|
||||||
self.Emit("MOVQ" , _VAR_ss_Iv, _AX) // MOVQ ss.Iv, AX
|
self.Emit("MOVQ" , _VAR_ss_Iv, _AX) // MOVQ ss.Iv, AX
|
||||||
self.Emit("CVTSQ2SD", _AX, _X0) // CVTSQ2SD AX, X0
|
self.Emit("CVTSQ2SD", _AX, _X0) // CVTSQ2SD AX, X0
|
||||||
|
|
||||||
/* represent number as `float64` */
|
/* represent numbers as `float64` */
|
||||||
self.Link("_use_float64") // _use_float64:
|
self.Link("_use_float64") // _use_float64:
|
||||||
self.Emit("MOVSD", _X0, jit.Ptr(_SP, 0)) // MOVSD X0, (SP)
|
self.Emit("MOVSD", _X0, jit.Ptr(_SP, 0)) // MOVSD X0, (SP)
|
||||||
self.call_go(_F_convT64) // CALL_GO runtime.convT64
|
self.call_go(_F_convT64) // CALL_GO runtime.convT64
|
||||||
|
|
@ -487,7 +487,7 @@ func (self *_ValueDecoder) compile() {
|
||||||
self.Emit("MOVQ" , _VAR_ss_Ep, _DI) // MOVQ ss.Ep, DI
|
self.Emit("MOVQ" , _VAR_ss_Ep, _DI) // MOVQ ss.Ep, DI
|
||||||
self.Sjmp("JMP" , "_set_value") // JMP _set_value
|
self.Sjmp("JMP" , "_set_value") // JMP _set_value
|
||||||
|
|
||||||
/* represent number as `json.Number` */
|
/* represent numbers as `json.Number` */
|
||||||
self.Link("_use_number") // _use_number
|
self.Link("_use_number") // _use_number
|
||||||
self.Emit("MOVQ", _VAR_ss_Ep, _AX) // MOVQ ss.Ep, AX
|
self.Emit("MOVQ", _VAR_ss_Ep, _AX) // MOVQ ss.Ep, AX
|
||||||
self.Emit("LEAQ", jit.Sib(_IP, _AX, 1, 0), _SI) // LEAQ (IP)(AX), SI
|
self.Emit("LEAQ", jit.Sib(_IP, _AX, 1, 0), _SI) // LEAQ (IP)(AX), SI
|
||||||
|
|
@ -501,7 +501,7 @@ func (self *_ValueDecoder) compile() {
|
||||||
self.Emit("MOVQ", _VAR_ss_Ep, _DI) // MOVQ ss.Ep, DI
|
self.Emit("MOVQ", _VAR_ss_Ep, _DI) // MOVQ ss.Ep, DI
|
||||||
self.Sjmp("JMP" , "_set_value") // JMP _set_value
|
self.Sjmp("JMP" , "_set_value") // JMP _set_value
|
||||||
|
|
||||||
/* represent number as `int64` */
|
/* represent numbers as `int64` */
|
||||||
self.Link("_use_int64") // _use_int64:
|
self.Link("_use_int64") // _use_int64:
|
||||||
self.Emit("MOVQ", _VAR_ss_Iv, _AX) // MOVQ ss.Iv, AX
|
self.Emit("MOVQ", _VAR_ss_Iv, _AX) // MOVQ ss.Iv, AX
|
||||||
self.Emit("MOVQ", _AX, jit.Ptr(_SP, 0)) // MOVQ AX, (SP)
|
self.Emit("MOVQ", _AX, jit.Ptr(_SP, 0)) // MOVQ AX, (SP)
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load diff
|
|
@ -19,16 +19,16 @@ var (
|
||||||
_subr__lspace = **(**uintptr)(unsafe.Pointer(&_func__base)) + 238
|
_subr__lspace = **(**uintptr)(unsafe.Pointer(&_func__base)) + 238
|
||||||
_subr__lzero = **(**uintptr)(unsafe.Pointer(&_func__base)) + 0
|
_subr__lzero = **(**uintptr)(unsafe.Pointer(&_func__base)) + 0
|
||||||
_subr__quote = **(**uintptr)(unsafe.Pointer(&_func__base)) + 4854
|
_subr__quote = **(**uintptr)(unsafe.Pointer(&_func__base)) + 4854
|
||||||
_subr__skip_array = **(**uintptr)(unsafe.Pointer(&_func__base)) + 15888
|
_subr__skip_array = **(**uintptr)(unsafe.Pointer(&_func__base)) + 16673
|
||||||
_subr__skip_object = **(**uintptr)(unsafe.Pointer(&_func__base)) + 15923
|
_subr__skip_object = **(**uintptr)(unsafe.Pointer(&_func__base)) + 16708
|
||||||
_subr__skip_one = **(**uintptr)(unsafe.Pointer(&_func__base)) + 13566
|
_subr__skip_one = **(**uintptr)(unsafe.Pointer(&_func__base)) + 14351
|
||||||
_subr__u64toa = **(**uintptr)(unsafe.Pointer(&_func__base)) + 3634
|
_subr__u64toa = **(**uintptr)(unsafe.Pointer(&_func__base)) + 3634
|
||||||
_subr__unquote = **(**uintptr)(unsafe.Pointer(&_func__base)) + 5875
|
_subr__unquote = **(**uintptr)(unsafe.Pointer(&_func__base)) + 5875
|
||||||
_subr__value = **(**uintptr)(unsafe.Pointer(&_func__base)) + 9279
|
_subr__value = **(**uintptr)(unsafe.Pointer(&_func__base)) + 10064
|
||||||
_subr__vnumber = **(**uintptr)(unsafe.Pointer(&_func__base)) + 11854
|
_subr__vnumber = **(**uintptr)(unsafe.Pointer(&_func__base)) + 12639
|
||||||
_subr__vsigned = **(**uintptr)(unsafe.Pointer(&_func__base)) + 13016
|
_subr__vsigned = **(**uintptr)(unsafe.Pointer(&_func__base)) + 13801
|
||||||
_subr__vstring = **(**uintptr)(unsafe.Pointer(&_func__base)) + 10885
|
_subr__vstring = **(**uintptr)(unsafe.Pointer(&_func__base)) + 11670
|
||||||
_subr__vunsigned = **(**uintptr)(unsafe.Pointer(&_func__base)) + 13293
|
_subr__vunsigned = **(**uintptr)(unsafe.Pointer(&_func__base)) + 14078
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load diff
|
|
@ -19,16 +19,16 @@ var (
|
||||||
_subr__lspace = **(**uintptr)(unsafe.Pointer(&_func__base)) + 366
|
_subr__lspace = **(**uintptr)(unsafe.Pointer(&_func__base)) + 366
|
||||||
_subr__lzero = **(**uintptr)(unsafe.Pointer(&_func__base)) + 0
|
_subr__lzero = **(**uintptr)(unsafe.Pointer(&_func__base)) + 0
|
||||||
_subr__quote = **(**uintptr)(unsafe.Pointer(&_func__base)) + 5202
|
_subr__quote = **(**uintptr)(unsafe.Pointer(&_func__base)) + 5202
|
||||||
_subr__skip_array = **(**uintptr)(unsafe.Pointer(&_func__base)) + 18308
|
_subr__skip_array = **(**uintptr)(unsafe.Pointer(&_func__base)) + 19129
|
||||||
_subr__skip_object = **(**uintptr)(unsafe.Pointer(&_func__base)) + 18343
|
_subr__skip_object = **(**uintptr)(unsafe.Pointer(&_func__base)) + 19164
|
||||||
_subr__skip_one = **(**uintptr)(unsafe.Pointer(&_func__base)) + 15508
|
_subr__skip_one = **(**uintptr)(unsafe.Pointer(&_func__base)) + 16329
|
||||||
_subr__u64toa = **(**uintptr)(unsafe.Pointer(&_func__base)) + 3882
|
_subr__u64toa = **(**uintptr)(unsafe.Pointer(&_func__base)) + 3882
|
||||||
_subr__unquote = **(**uintptr)(unsafe.Pointer(&_func__base)) + 7039
|
_subr__unquote = **(**uintptr)(unsafe.Pointer(&_func__base)) + 7039
|
||||||
_subr__value = **(**uintptr)(unsafe.Pointer(&_func__base)) + 11232
|
_subr__value = **(**uintptr)(unsafe.Pointer(&_func__base)) + 12053
|
||||||
_subr__vnumber = **(**uintptr)(unsafe.Pointer(&_func__base)) + 13796
|
_subr__vnumber = **(**uintptr)(unsafe.Pointer(&_func__base)) + 14617
|
||||||
_subr__vsigned = **(**uintptr)(unsafe.Pointer(&_func__base)) + 14958
|
_subr__vsigned = **(**uintptr)(unsafe.Pointer(&_func__base)) + 15779
|
||||||
_subr__vstring = **(**uintptr)(unsafe.Pointer(&_func__base)) + 12943
|
_subr__vstring = **(**uintptr)(unsafe.Pointer(&_func__base)) + 13764
|
||||||
_subr__vunsigned = **(**uintptr)(unsafe.Pointer(&_func__base)) + 15235
|
_subr__vunsigned = **(**uintptr)(unsafe.Pointer(&_func__base)) + 16056
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
|
|
|
||||||
38
issue83_test.go
Normal file
38
issue83_test.go
Normal file
|
|
@ -0,0 +1,38 @@
|
||||||
|
/*
|
||||||
|
* Copyright 2021 ByteDance Inc.
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package sonic
|
||||||
|
|
||||||
|
import (
|
||||||
|
`testing`
|
||||||
|
|
||||||
|
`github.com/bytedance/sonic/decoder`
|
||||||
|
`github.com/stretchr/testify/require`
|
||||||
|
)
|
||||||
|
|
||||||
|
type Issue83Struct struct {
|
||||||
|
X string `json:"x,string"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestIssue83_SurrogateHalfInDoubleQuotedString(t *testing.T) {
|
||||||
|
var v Issue83Struct
|
||||||
|
err := Unmarshal([]byte(`{"x":"\"\\ud800\\u1234\""}`), &v)
|
||||||
|
if err != nil {
|
||||||
|
println(err.(decoder.SyntaxError).Description())
|
||||||
|
require.NoError(t, err)
|
||||||
|
}
|
||||||
|
require.Equal(t, Issue83Struct{"\ufffd\u1234"}, v)
|
||||||
|
}
|
||||||
|
|
@ -512,6 +512,9 @@ ssize_t unquote(const char *sp, ssize_t nb, char *dp, ssize_t *ep, uint64_t flag
|
||||||
sp += 4;
|
sp += 4;
|
||||||
nb -= 4;
|
nb -= 4;
|
||||||
|
|
||||||
|
/* from line 598 */
|
||||||
|
retry_decode:
|
||||||
|
|
||||||
/* ASCII characters, unlikely */
|
/* ASCII characters, unlikely */
|
||||||
if (unlikely(r0 <= 0x7f)) {
|
if (unlikely(r0 <= 0x7f)) {
|
||||||
*dp++ = (char)r0;
|
*dp++ = (char)r0;
|
||||||
|
|
@ -536,14 +539,24 @@ ssize_t unquote(const char *sp, ssize_t nb, char *dp, ssize_t *ep, uint64_t flag
|
||||||
/* check for double unquote */
|
/* check for double unquote */
|
||||||
if (unlikely(flags & F_DBLUNQ)) {
|
if (unlikely(flags & F_DBLUNQ)) {
|
||||||
if (nb < 1) {
|
if (nb < 1) {
|
||||||
*ep = x;
|
if (likely(flags & F_UNIREP)) {
|
||||||
return -ERR_EOF;
|
unirep(&dp);
|
||||||
} else if (sp[0] != '\\') {
|
continue;
|
||||||
*ep = sp - s - 4;
|
} else {
|
||||||
return -ERR_UNICODE;
|
*ep = x;
|
||||||
|
return -ERR_EOF;
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
nb--;
|
if (sp[0] == '\\') {
|
||||||
sp++;
|
nb--;
|
||||||
|
sp++;
|
||||||
|
} else if (likely(flags & F_UNIREP)) {
|
||||||
|
unirep(&dp);
|
||||||
|
continue;
|
||||||
|
} else {
|
||||||
|
*ep = sp - s - 4;
|
||||||
|
return -ERR_UNICODE;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -561,7 +574,7 @@ ssize_t unquote(const char *sp, ssize_t nb, char *dp, ssize_t *ep, uint64_t flag
|
||||||
/* check the hexadecimal escape */
|
/* check the hexadecimal escape */
|
||||||
if (!unhex16_is(sp + 2)) {
|
if (!unhex16_is(sp + 2)) {
|
||||||
*ep = sp - s + 2;
|
*ep = sp - s + 2;
|
||||||
for (int i = 0; i < 4 && ishex(sp[2]); i++, sp++) ++*ep;
|
for (int i = 2; i < 6 && ishex(sp[i]); i++) ++*ep;
|
||||||
return -ERR_INVAL;
|
return -ERR_INVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -572,13 +585,17 @@ ssize_t unquote(const char *sp, ssize_t nb, char *dp, ssize_t *ep, uint64_t flag
|
||||||
|
|
||||||
/* it must be the other half */
|
/* it must be the other half */
|
||||||
if (r1 < 0xdc00 || r1 > 0xdfff) {
|
if (r1 < 0xdc00 || r1 > 0xdfff) {
|
||||||
if (likely(!(flags & F_UNIREP))) {
|
if (unlikely(!(flags & F_UNIREP))) {
|
||||||
*ep = sp - s - 4;
|
*ep = sp - s - 4;
|
||||||
return -ERR_UNICODE;
|
return -ERR_UNICODE;
|
||||||
} else {
|
} else if (likely(r1 >= 0xd800 && r1 <= 0xdfff)) {
|
||||||
unirep(&dp);
|
unirep(&dp);
|
||||||
unirep(&dp);
|
unirep(&dp);
|
||||||
continue;
|
continue;
|
||||||
|
} else {
|
||||||
|
r0 = r1;
|
||||||
|
unirep(&dp);
|
||||||
|
goto retry_decode;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue