2
0
Fork 0
mirror of https://github.com/ii64/sonic.git synced 2026-06-24 18:36:43 +08:00

fix: enhance float parsing as Go encoding/json

This commit is contained in:
liuqiang 2021-07-21 18:52:23 +08:00 committed by Oxygen
parent 6b4022a19f
commit f9632ab873
7 changed files with 3164 additions and 3586 deletions

158
decode_float_test.go Normal file
View file

@ -0,0 +1,158 @@
/*
* Copyright 2021 ByteDance Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package sonic
import (
"encoding/json"
"reflect"
"strings"
"testing"
"github.com/bytedance/sonic/decoder"
)
type atofTest struct {
in string
out string
err error
}
// Tests from Go strconv package, https://github.com/golang/go/blob/master/src/strconv/atof_test.go
// All tests are passed in Go encoding/json.
var atoftests = []atofTest{
{"1.234e", "1.234", nil},
{"1i", "1", nil},
{"1", "1", nil},
{"1e23", "1e+23", nil},
{"1E23", "1e+23", nil},
{"100000000000000000000000", "1e+23", nil},
{"1e-100", "1e-100", nil},
{"123456700", "1.234567e+08", nil},
{"99999999999999974834176", "9.999999999999997e+22", nil},
{"100000000000000000000001", "1.0000000000000001e+23", nil},
{"100000000000000008388608", "1.0000000000000001e+23", nil},
{"100000000000000016777215", "1.0000000000000001e+23", nil},
{"100000000000000016777216", "1.0000000000000003e+23", nil},
{"-1", "-1", nil},
{"-0.1", "-0.1", nil},
{"-0", "-0", nil},
{"1e-20", "1e-20", nil},
{"625e-3", "0.625", nil},
// zeros
{"0", "0", nil},
{"0e0", "0", nil},
{"-0e0", "-0", nil},
{"0e-0", "0", nil},
{"-0e-0", "-0", nil},
{"0e+0", "0", nil},
{"-0e+0", "-0", nil},
{"0e+01234567890123456789", "0", nil},
{"0.00e-01234567890123456789", "0", nil},
{"-0e+01234567890123456789", "-0", nil},
{"-0.00e-01234567890123456789", "-0", nil},
{"0e291", "0", nil}, // issue 15364
{"0e292", "0", nil}, // issue 15364
{"0e347", "0", nil}, // issue 15364
{"0e348", "0", nil}, // issue 15364
{"-0e291", "-0", nil},
{"-0e292", "-0", nil},
{"-0e347", "-0", nil},
{"-0e348", "-0", nil},
// largest float64
{"1.7976931348623157e308", "1.7976931348623157e+308", nil},
{"-1.7976931348623157e308", "-1.7976931348623157e+308", nil},
// the border is ...158079
// borderline - okay
{"1.7976931348623158e308", "1.7976931348623157e+308", nil},
{"-1.7976931348623158e308", "-1.7976931348623157e+308", nil},
// a little too large
{"1e308", "1e+308", nil},
// denormalized
{"1e-305", "1e-305", nil},
{"1e-306", "1e-306", nil},
{"1e-307", "1e-307", nil},
{"1e-308", "1e-308", nil},
{"1e-309", "1e-309", nil},
{"1e-310", "1e-310", nil},
{"1e-322", "1e-322", nil},
// smallest denormal
{"5e-324", "5e-324", nil},
{"4e-324", "5e-324", nil},
{"3e-324", "5e-324", nil},
// too small
{"2e-324", "0", nil},
// way too small
{"1e-350", "0", nil},
{"1e-400000", "0", nil},
// try to overflow exponent
{"1e-4294967296", "0", nil},
{"1e-18446744073709551616", "0", nil},
// https://www.exploringbinary.com/java-hangs-when-converting-2-2250738585072012e-308/
{"2.2250738585072012e-308", "2.2250738585072014e-308", nil},
// https://www.exploringbinary.com/php-hangs-on-numeric-value-2-2250738585072011e-308/
{"2.2250738585072011e-308", "2.225073858507201e-308", nil},
// A very large number (initially wrongly parsed by the fast algorithm).
{"4.630813248087435e+307", "4.630813248087435e+307", nil},
// A different kind of very large number.
{"22.222222222222222", "22.22222222222222", nil},
{"2." + strings.Repeat("2", 800) + "e+1", "22.22222222222222", nil},
// Exactly halfway between 1 and math.Nextafter(1, 2).
// Round to even (down).
{"1.00000000000000011102230246251565404236316680908203125", "1", nil},
// Slightly lower; still round down.
{"1.00000000000000011102230246251565404236316680908203124", "1", nil},
// Slightly higher; round up.
{"1.00000000000000011102230246251565404236316680908203126", "1.0000000000000002", nil},
// Slightly higher, but you have to read all the way to the end.
{"1.00000000000000011102230246251565404236316680908203125" + strings.Repeat("0", 10000) + "1", "1.0000000000000002", nil},
// Halfway between x := math.Nextafter(1, 2) and math.Nextafter(x, 2)
// Round to even (up).
{"1.00000000000000033306690738754696212708950042724609375", "1.0000000000000004", nil},
// Halfway between 1090544144181609278303144771584 and 1090544144181609419040633126912
// (15497564393479157p+46, should round to even 15497564393479156p+46, issue 36657)
{"1090544144181609348671888949248", "1.0905441441816093e+30", nil},
// slightly above, rounds up
{"1090544144181609348835077142190", "1.0905441441816094e+30", nil},
}
func TestDecodeFloat(t *testing.T) {
var sonicout, stdout interface{}
for _, tt := range atoftests {
// default float64
sonicerr := decoder.NewDecoder(tt.in).Decode(&sonicout)
stderr := json.NewDecoder(strings.NewReader(tt.in)).Decode(&stdout)
if !reflect.DeepEqual(sonicout, stdout) {
t.Fatalf("Test %#v\ngot:\n %#v\nexp:\n %#v\n", tt.in, sonicout, stdout)
}
if !reflect.DeepEqual(sonicerr == nil, stderr == nil) {
t.Fatalf("Test %#v\ngot:\n %#v\nexp:\n %#v\n", tt.in, sonicerr, stderr)
}
}
}

File diff suppressed because it is too large Load diff

View file

@ -19,16 +19,16 @@ var (
_subr__lspace = **(**uintptr)(unsafe.Pointer(&_func__base)) + 238 _subr__lspace = **(**uintptr)(unsafe.Pointer(&_func__base)) + 238
_subr__lzero = **(**uintptr)(unsafe.Pointer(&_func__base)) + 0 _subr__lzero = **(**uintptr)(unsafe.Pointer(&_func__base)) + 0
_subr__quote = **(**uintptr)(unsafe.Pointer(&_func__base)) + 4951 _subr__quote = **(**uintptr)(unsafe.Pointer(&_func__base)) + 4951
_subr__skip_array = **(**uintptr)(unsafe.Pointer(&_func__base)) + 16916 _subr__skip_array = **(**uintptr)(unsafe.Pointer(&_func__base)) + 16081
_subr__skip_object = **(**uintptr)(unsafe.Pointer(&_func__base)) + 16951 _subr__skip_object = **(**uintptr)(unsafe.Pointer(&_func__base)) + 16116
_subr__skip_one = **(**uintptr)(unsafe.Pointer(&_func__base)) + 14594 _subr__skip_one = **(**uintptr)(unsafe.Pointer(&_func__base)) + 13759
_subr__u64toa = **(**uintptr)(unsafe.Pointer(&_func__base)) + 3731 _subr__u64toa = **(**uintptr)(unsafe.Pointer(&_func__base)) + 3731
_subr__unquote = **(**uintptr)(unsafe.Pointer(&_func__base)) + 5972 _subr__unquote = **(**uintptr)(unsafe.Pointer(&_func__base)) + 5972
_subr__value = **(**uintptr)(unsafe.Pointer(&_func__base)) + 10112 _subr__value = **(**uintptr)(unsafe.Pointer(&_func__base)) + 9426
_subr__vnumber = **(**uintptr)(unsafe.Pointer(&_func__base)) + 12979 _subr__vnumber = **(**uintptr)(unsafe.Pointer(&_func__base)) + 11985
_subr__vsigned = **(**uintptr)(unsafe.Pointer(&_func__base)) + 14044 _subr__vsigned = **(**uintptr)(unsafe.Pointer(&_func__base)) + 13209
_subr__vstring = **(**uintptr)(unsafe.Pointer(&_func__base)) + 11718 _subr__vstring = **(**uintptr)(unsafe.Pointer(&_func__base)) + 11032
_subr__vunsigned = **(**uintptr)(unsafe.Pointer(&_func__base)) + 14321 _subr__vunsigned = **(**uintptr)(unsafe.Pointer(&_func__base)) + 13486
) )
var ( var (

File diff suppressed because it is too large Load diff

View file

@ -19,16 +19,16 @@ var (
_subr__lspace = **(**uintptr)(unsafe.Pointer(&_func__base)) + 366 _subr__lspace = **(**uintptr)(unsafe.Pointer(&_func__base)) + 366
_subr__lzero = **(**uintptr)(unsafe.Pointer(&_func__base)) + 0 _subr__lzero = **(**uintptr)(unsafe.Pointer(&_func__base)) + 0
_subr__quote = **(**uintptr)(unsafe.Pointer(&_func__base)) + 5299 _subr__quote = **(**uintptr)(unsafe.Pointer(&_func__base)) + 5299
_subr__skip_array = **(**uintptr)(unsafe.Pointer(&_func__base)) + 19341 _subr__skip_array = **(**uintptr)(unsafe.Pointer(&_func__base)) + 18501
_subr__skip_object = **(**uintptr)(unsafe.Pointer(&_func__base)) + 19376 _subr__skip_object = **(**uintptr)(unsafe.Pointer(&_func__base)) + 18536
_subr__skip_one = **(**uintptr)(unsafe.Pointer(&_func__base)) + 16541 _subr__skip_one = **(**uintptr)(unsafe.Pointer(&_func__base)) + 15701
_subr__u64toa = **(**uintptr)(unsafe.Pointer(&_func__base)) + 3979 _subr__u64toa = **(**uintptr)(unsafe.Pointer(&_func__base)) + 3979
_subr__unquote = **(**uintptr)(unsafe.Pointer(&_func__base)) + 7136 _subr__unquote = **(**uintptr)(unsafe.Pointer(&_func__base)) + 7136
_subr__value = **(**uintptr)(unsafe.Pointer(&_func__base)) + 12070 _subr__value = **(**uintptr)(unsafe.Pointer(&_func__base)) + 11379
_subr__vnumber = **(**uintptr)(unsafe.Pointer(&_func__base)) + 14926 _subr__vnumber = **(**uintptr)(unsafe.Pointer(&_func__base)) + 13927
_subr__vsigned = **(**uintptr)(unsafe.Pointer(&_func__base)) + 15991 _subr__vsigned = **(**uintptr)(unsafe.Pointer(&_func__base)) + 15151
_subr__vstring = **(**uintptr)(unsafe.Pointer(&_func__base)) + 13781 _subr__vstring = **(**uintptr)(unsafe.Pointer(&_func__base)) + 13090
_subr__vunsigned = **(**uintptr)(unsafe.Pointer(&_func__base)) + 16268 _subr__vunsigned = **(**uintptr)(unsafe.Pointer(&_func__base)) + 15428
) )
var ( var (

View file

@ -30,6 +30,7 @@ typedef struct Decimal {
int nd; int nd;
int dp; int dp;
int neg; int neg;
int trunc;
} Decimal; } Decimal;
/* decimal power of ten to binary power of two. /* decimal power of ten to binary power of two.
@ -62,6 +63,7 @@ static inline void decimal_init(Decimal *d) {
d->dp = 0; d->dp = 0;
d->nd = 0; d->nd = 0;
d->neg = 0; d->neg = 0;
d->trunc = 0;
} }
static inline void decimal_set(Decimal *d, const char *s, int len) { static inline void decimal_set(Decimal *d, const char *s, int len) {
@ -88,6 +90,7 @@ static inline void decimal_set(Decimal *d, const char *s, int len) {
d->nd++; d->nd++;
} else if (s[i] != '0') { } else if (s[i] != '0') {
/* truncat the remaining digits */ /* truncat the remaining digits */
d->trunc = 1;
} }
} else if (s[i] == '.') { } else if (s[i] == '.') {
saw_dot = 1; saw_dot = 1;
@ -115,7 +118,7 @@ static inline void decimal_set(Decimal *d, const char *s, int len) {
esgn = -1; esgn = -1;
} }
for (; i < len && ('0' <= s[i] && s[i] <= '9'); i++) { for (; i < len && ('0' <= s[i] && s[i] <= '9') && exp < 10000; i++) {
exp = exp * 10 + (s[i] - '0'); exp = exp * 10 + (s[i] - '0');
} }
d->dp += exp * esgn; d->dp += exp * esgn;
@ -178,6 +181,7 @@ static inline void right_shift(Decimal *d, uint32_t k) {
w++; w++;
} else if (dig > 0) { } else if (dig > 0) {
/* truncated */ /* truncated */
d->trunc = 1;
} }
n *= 10; n *= 10;
} }
@ -227,6 +231,7 @@ static inline void left_shift(Decimal *d, uint32_t k) {
w--; w--;
} else if (rem != 0) { } else if (rem != 0) {
/* truncated */ /* truncated */
d->trunc = 1;
} }
n = quo; n = quo;
} }
@ -240,6 +245,7 @@ static inline void left_shift(Decimal *d, uint32_t k) {
w--; w--;
} else if (rem != 0) { } else if (rem != 0) {
/* truncated */ /* truncated */
d->trunc = 1;
} }
n = quo; n = quo;
} }
@ -286,6 +292,9 @@ static inline int should_roundup(Decimal *d, int nd) {
/* Exactly halfway - round to even */ /* Exactly halfway - round to even */
if (d->d[nd] == '5' && nd+1 == d->nd) { if (d->d[nd] == '5' && nd+1 == d->nd) {
if (d->trunc) {
return 1;
}
return nd > 0 && (d->d[nd-1]-'0')%2 != 0; return nd > 0 && (d->d[nd-1]-'0')%2 != 0;
} }

View file

@ -493,7 +493,7 @@ static inline int is_atof_exact(uint64_t man, int exp, int sgn, double *val) {
f *= sgn; f *= sgn;
*val = 0; *val = 0;
if (exp == 0) { if (exp == 0 || man == 0) {
*val = f; *val = f;
return 1; return 1;
} else if (exp > 0 && exp <= 15+22) { } else if (exp > 0 && exp <= 15+22) {
@ -520,8 +520,9 @@ static inline int is_atof_exact(uint64_t man, int exp, int sgn, double *val) {
return 0; return 0;
} }
double parse_float64(uint64_t man, int exp, int sgn, const GoString *src, long idx) { static inline double parse_float64(uint64_t man, int exp, int sgn, int trunc, const GoString *src, long idx) {
double val = 0.0; double val = 0.0;
double val_up = 0.0;
/* look-up for fast atof if the conversion can be exactly */ /* look-up for fast atof if the conversion can be exactly */
if (is_atof_exact(man, exp, sgn, &val)) { if (is_atof_exact(man, exp, sgn, &val)) {
@ -530,8 +531,13 @@ double parse_float64(uint64_t man, int exp, int sgn, const GoString *src, long i
/* A fast atof algorithm for high percison */ /* A fast atof algorithm for high percison */
if (atof_eisel_lemire64(man, exp, sgn, &val)) { if (atof_eisel_lemire64(man, exp, sgn, &val)) {
if (!trunc) {
return val; return val;
} }
if (atof_eisel_lemire64(man+1, exp, sgn, &val_up) && val_up == val) {
return val;
}
}
/* when above algorithms failed, fallback. It is slow. */ /* when above algorithms failed, fallback. It is slow. */
return atof_native_decimal(src->buf + idx, src->len - idx); return atof_native_decimal(src->buf + idx, src->len - idx);
@ -540,12 +546,11 @@ double parse_float64(uint64_t man, int exp, int sgn, const GoString *src, long i
void vnumber(const GoString *src, long *p, JsonState *ret) { void vnumber(const GoString *src, long *p, JsonState *ret) {
int dig; int dig;
int ovf = 0;
int sgn = 1; int sgn = 1;
double val = 0;
uint64_t man = 0; // mantissa for double (float64) uint64_t man = 0; // mantissa for double (float64)
int man_nd = 0; // # digits of mantissa, 10^19 fits uint64_t int man_nd = 0; // # digits of mantissa, 10^19 fits uint64_t
int exp10 = 0; // man * exp10 represents the true value int exp10 = 0; // man * exp10 represents the true value
int trunc = 0;
/* initial buffer pointers */ /* initial buffer pointers */
long i = *p; long i = *p;
@ -574,6 +579,10 @@ void vnumber(const GoString *src, long *p, JsonState *ret) {
i++; i++;
} }
if (exp10 > 0) {
trunc = 1;
}
/* check for decimal points */ /* check for decimal points */
if (i < n && s[i] == '.') { if (i < n && s[i] == '.') {
i++; i++;
@ -601,6 +610,7 @@ void vnumber(const GoString *src, long *p, JsonState *ret) {
/* skip the remaining digits */ /* skip the remaining digits */
while (i < n && is_digit(s[i])) { while (i < n && is_digit(s[i])) {
trunc = 1;
i++; i++;
} }
@ -616,7 +626,10 @@ void vnumber(const GoString *src, long *p, JsonState *ret) {
parse_sign(esm) parse_sign(esm)
check_digit() check_digit()
while (i < n && is_digit(s[i])) { while (i < n && is_digit(s[i])) {
exp = exp * 10 + (s[i++] - '0'); if (exp < 10000) {
exp = exp * 10 + (s[i] - '0');
}
i++;
} }
exp10 += exp * esm; exp10 += exp * esm;
} }
@ -633,7 +646,7 @@ out:
} }
if (ret->vt == V_DOUBLE) { if (ret->vt == V_DOUBLE) {
ret->dv = parse_float64(man, exp10, sgn, src, si); ret->dv = parse_float64(man, exp10, sgn, trunc, src, si);
} }
/* update the result */ /* update the result */