mirror of
https://github.com/ii64/sonic.git
synced 2026-06-21 00:46:43 +08:00
fix: handling of surrogate-half characters within double-quoted strings (#89)
This commit is contained in:
parent
12e088f9e9
commit
6aff4b1ad6
8 changed files with 1461 additions and 952 deletions
|
|
@ -688,7 +688,9 @@ func (self *_Assembler) unquote_twice(p obj.Addr, n obj.Addr) {
|
|||
self.slice_from(_VAR_st_Iv, -3) // SLICE st.Iv, $-3
|
||||
self.Emit("MOVQ" , _DI, p) // MOVQ DI, ${p}
|
||||
self.Emit("MOVQ" , _SI, n) // MOVQ SI, ${n}
|
||||
self.Emit("CMPQ" , _VAR_st_Ep, _SI) // CMPQ st.Ep, SI
|
||||
self.Emit("MOVQ" , _SI, _AX) // MOVQ SI, AX
|
||||
self.Emit("ADDQ" , _VAR_st_Iv, _AX) // ADDQ st.Iv, AX
|
||||
self.Emit("CMPQ" , _VAR_st_Ep, _AX) // CMPQ st.Ep, AX
|
||||
self.Sjmp("JE" , "_noescape_{n}") // JE _noescape_{n}
|
||||
self.malloc(_SI, _DX) // MALLOC SI, DX
|
||||
self.Emit("MOVQ" , p, _DI) // MOVQ ${p}, DI
|
||||
|
|
|
|||
|
|
@ -478,7 +478,7 @@ func (self *_ValueDecoder) compile() {
|
|||
self.Emit("MOVQ" , _VAR_ss_Iv, _AX) // MOVQ ss.Iv, AX
|
||||
self.Emit("CVTSQ2SD", _AX, _X0) // CVTSQ2SD AX, X0
|
||||
|
||||
/* represent number as `float64` */
|
||||
/* represent numbers as `float64` */
|
||||
self.Link("_use_float64") // _use_float64:
|
||||
self.Emit("MOVSD", _X0, jit.Ptr(_SP, 0)) // MOVSD X0, (SP)
|
||||
self.call_go(_F_convT64) // CALL_GO runtime.convT64
|
||||
|
|
@ -487,7 +487,7 @@ func (self *_ValueDecoder) compile() {
|
|||
self.Emit("MOVQ" , _VAR_ss_Ep, _DI) // MOVQ ss.Ep, DI
|
||||
self.Sjmp("JMP" , "_set_value") // JMP _set_value
|
||||
|
||||
/* represent number as `json.Number` */
|
||||
/* represent numbers as `json.Number` */
|
||||
self.Link("_use_number") // _use_number
|
||||
self.Emit("MOVQ", _VAR_ss_Ep, _AX) // MOVQ ss.Ep, AX
|
||||
self.Emit("LEAQ", jit.Sib(_IP, _AX, 1, 0), _SI) // LEAQ (IP)(AX), SI
|
||||
|
|
@ -501,7 +501,7 @@ func (self *_ValueDecoder) compile() {
|
|||
self.Emit("MOVQ", _VAR_ss_Ep, _DI) // MOVQ ss.Ep, DI
|
||||
self.Sjmp("JMP" , "_set_value") // JMP _set_value
|
||||
|
||||
/* represent number as `int64` */
|
||||
/* represent numbers as `int64` */
|
||||
self.Link("_use_int64") // _use_int64:
|
||||
self.Emit("MOVQ", _VAR_ss_Iv, _AX) // MOVQ ss.Iv, AX
|
||||
self.Emit("MOVQ", _AX, jit.Ptr(_SP, 0)) // MOVQ AX, (SP)
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -19,16 +19,16 @@ var (
|
|||
_subr__lspace = **(**uintptr)(unsafe.Pointer(&_func__base)) + 238
|
||||
_subr__lzero = **(**uintptr)(unsafe.Pointer(&_func__base)) + 0
|
||||
_subr__quote = **(**uintptr)(unsafe.Pointer(&_func__base)) + 4854
|
||||
_subr__skip_array = **(**uintptr)(unsafe.Pointer(&_func__base)) + 15888
|
||||
_subr__skip_object = **(**uintptr)(unsafe.Pointer(&_func__base)) + 15923
|
||||
_subr__skip_one = **(**uintptr)(unsafe.Pointer(&_func__base)) + 13566
|
||||
_subr__skip_array = **(**uintptr)(unsafe.Pointer(&_func__base)) + 16673
|
||||
_subr__skip_object = **(**uintptr)(unsafe.Pointer(&_func__base)) + 16708
|
||||
_subr__skip_one = **(**uintptr)(unsafe.Pointer(&_func__base)) + 14351
|
||||
_subr__u64toa = **(**uintptr)(unsafe.Pointer(&_func__base)) + 3634
|
||||
_subr__unquote = **(**uintptr)(unsafe.Pointer(&_func__base)) + 5875
|
||||
_subr__value = **(**uintptr)(unsafe.Pointer(&_func__base)) + 9279
|
||||
_subr__vnumber = **(**uintptr)(unsafe.Pointer(&_func__base)) + 11854
|
||||
_subr__vsigned = **(**uintptr)(unsafe.Pointer(&_func__base)) + 13016
|
||||
_subr__vstring = **(**uintptr)(unsafe.Pointer(&_func__base)) + 10885
|
||||
_subr__vunsigned = **(**uintptr)(unsafe.Pointer(&_func__base)) + 13293
|
||||
_subr__value = **(**uintptr)(unsafe.Pointer(&_func__base)) + 10064
|
||||
_subr__vnumber = **(**uintptr)(unsafe.Pointer(&_func__base)) + 12639
|
||||
_subr__vsigned = **(**uintptr)(unsafe.Pointer(&_func__base)) + 13801
|
||||
_subr__vstring = **(**uintptr)(unsafe.Pointer(&_func__base)) + 11670
|
||||
_subr__vunsigned = **(**uintptr)(unsafe.Pointer(&_func__base)) + 14078
|
||||
)
|
||||
|
||||
var (
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -19,16 +19,16 @@ var (
|
|||
_subr__lspace = **(**uintptr)(unsafe.Pointer(&_func__base)) + 366
|
||||
_subr__lzero = **(**uintptr)(unsafe.Pointer(&_func__base)) + 0
|
||||
_subr__quote = **(**uintptr)(unsafe.Pointer(&_func__base)) + 5202
|
||||
_subr__skip_array = **(**uintptr)(unsafe.Pointer(&_func__base)) + 18308
|
||||
_subr__skip_object = **(**uintptr)(unsafe.Pointer(&_func__base)) + 18343
|
||||
_subr__skip_one = **(**uintptr)(unsafe.Pointer(&_func__base)) + 15508
|
||||
_subr__skip_array = **(**uintptr)(unsafe.Pointer(&_func__base)) + 19129
|
||||
_subr__skip_object = **(**uintptr)(unsafe.Pointer(&_func__base)) + 19164
|
||||
_subr__skip_one = **(**uintptr)(unsafe.Pointer(&_func__base)) + 16329
|
||||
_subr__u64toa = **(**uintptr)(unsafe.Pointer(&_func__base)) + 3882
|
||||
_subr__unquote = **(**uintptr)(unsafe.Pointer(&_func__base)) + 7039
|
||||
_subr__value = **(**uintptr)(unsafe.Pointer(&_func__base)) + 11232
|
||||
_subr__vnumber = **(**uintptr)(unsafe.Pointer(&_func__base)) + 13796
|
||||
_subr__vsigned = **(**uintptr)(unsafe.Pointer(&_func__base)) + 14958
|
||||
_subr__vstring = **(**uintptr)(unsafe.Pointer(&_func__base)) + 12943
|
||||
_subr__vunsigned = **(**uintptr)(unsafe.Pointer(&_func__base)) + 15235
|
||||
_subr__value = **(**uintptr)(unsafe.Pointer(&_func__base)) + 12053
|
||||
_subr__vnumber = **(**uintptr)(unsafe.Pointer(&_func__base)) + 14617
|
||||
_subr__vsigned = **(**uintptr)(unsafe.Pointer(&_func__base)) + 15779
|
||||
_subr__vstring = **(**uintptr)(unsafe.Pointer(&_func__base)) + 13764
|
||||
_subr__vunsigned = **(**uintptr)(unsafe.Pointer(&_func__base)) + 16056
|
||||
)
|
||||
|
||||
var (
|
||||
|
|
|
|||
38
issue83_test.go
Normal file
38
issue83_test.go
Normal file
|
|
@ -0,0 +1,38 @@
|
|||
/*
|
||||
* Copyright 2021 ByteDance Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package sonic
|
||||
|
||||
import (
|
||||
`testing`
|
||||
|
||||
`github.com/bytedance/sonic/decoder`
|
||||
`github.com/stretchr/testify/require`
|
||||
)
|
||||
|
||||
type Issue83Struct struct {
|
||||
X string `json:"x,string"`
|
||||
}
|
||||
|
||||
func TestIssue83_SurrogateHalfInDoubleQuotedString(t *testing.T) {
|
||||
var v Issue83Struct
|
||||
err := Unmarshal([]byte(`{"x":"\"\\ud800\\u1234\""}`), &v)
|
||||
if err != nil {
|
||||
println(err.(decoder.SyntaxError).Description())
|
||||
require.NoError(t, err)
|
||||
}
|
||||
require.Equal(t, Issue83Struct{"\ufffd\u1234"}, v)
|
||||
}
|
||||
|
|
@ -512,6 +512,9 @@ ssize_t unquote(const char *sp, ssize_t nb, char *dp, ssize_t *ep, uint64_t flag
|
|||
sp += 4;
|
||||
nb -= 4;
|
||||
|
||||
/* from line 598 */
|
||||
retry_decode:
|
||||
|
||||
/* ASCII characters, unlikely */
|
||||
if (unlikely(r0 <= 0x7f)) {
|
||||
*dp++ = (char)r0;
|
||||
|
|
@ -536,14 +539,24 @@ ssize_t unquote(const char *sp, ssize_t nb, char *dp, ssize_t *ep, uint64_t flag
|
|||
/* check for double unquote */
|
||||
if (unlikely(flags & F_DBLUNQ)) {
|
||||
if (nb < 1) {
|
||||
*ep = x;
|
||||
return -ERR_EOF;
|
||||
} else if (sp[0] != '\\') {
|
||||
*ep = sp - s - 4;
|
||||
return -ERR_UNICODE;
|
||||
if (likely(flags & F_UNIREP)) {
|
||||
unirep(&dp);
|
||||
continue;
|
||||
} else {
|
||||
*ep = x;
|
||||
return -ERR_EOF;
|
||||
}
|
||||
} else {
|
||||
nb--;
|
||||
sp++;
|
||||
if (sp[0] == '\\') {
|
||||
nb--;
|
||||
sp++;
|
||||
} else if (likely(flags & F_UNIREP)) {
|
||||
unirep(&dp);
|
||||
continue;
|
||||
} else {
|
||||
*ep = sp - s - 4;
|
||||
return -ERR_UNICODE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -561,7 +574,7 @@ ssize_t unquote(const char *sp, ssize_t nb, char *dp, ssize_t *ep, uint64_t flag
|
|||
/* check the hexadecimal escape */
|
||||
if (!unhex16_is(sp + 2)) {
|
||||
*ep = sp - s + 2;
|
||||
for (int i = 0; i < 4 && ishex(sp[2]); i++, sp++) ++*ep;
|
||||
for (int i = 2; i < 6 && ishex(sp[i]); i++) ++*ep;
|
||||
return -ERR_INVAL;
|
||||
}
|
||||
|
||||
|
|
@ -572,13 +585,17 @@ ssize_t unquote(const char *sp, ssize_t nb, char *dp, ssize_t *ep, uint64_t flag
|
|||
|
||||
/* it must be the other half */
|
||||
if (r1 < 0xdc00 || r1 > 0xdfff) {
|
||||
if (likely(!(flags & F_UNIREP))) {
|
||||
if (unlikely(!(flags & F_UNIREP))) {
|
||||
*ep = sp - s - 4;
|
||||
return -ERR_UNICODE;
|
||||
} else {
|
||||
} else if (likely(r1 >= 0xd800 && r1 <= 0xdfff)) {
|
||||
unirep(&dp);
|
||||
unirep(&dp);
|
||||
continue;
|
||||
} else {
|
||||
r0 = r1;
|
||||
unirep(&dp);
|
||||
goto retry_decode;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue