2
0
Fork 0
mirror of https://github.com/ii64/sonic.git synced 2026-06-21 00:46:43 +08:00

fix: enhance float parsing as Go encoding/json

This commit is contained in:
liuqiang 2021-07-21 18:52:23 +08:00 committed by Oxygen
parent 6b4022a19f
commit f9632ab873
7 changed files with 3164 additions and 3586 deletions

158
decode_float_test.go Normal file
View file

@ -0,0 +1,158 @@
/*
* Copyright 2021 ByteDance Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package sonic
import (
"encoding/json"
"reflect"
"strings"
"testing"
"github.com/bytedance/sonic/decoder"
)
type atofTest struct {
in string
out string
err error
}
// Tests from Go strconv package, https://github.com/golang/go/blob/master/src/strconv/atof_test.go
// All tests are passed in Go encoding/json.
var atoftests = []atofTest{
{"1.234e", "1.234", nil},
{"1i", "1", nil},
{"1", "1", nil},
{"1e23", "1e+23", nil},
{"1E23", "1e+23", nil},
{"100000000000000000000000", "1e+23", nil},
{"1e-100", "1e-100", nil},
{"123456700", "1.234567e+08", nil},
{"99999999999999974834176", "9.999999999999997e+22", nil},
{"100000000000000000000001", "1.0000000000000001e+23", nil},
{"100000000000000008388608", "1.0000000000000001e+23", nil},
{"100000000000000016777215", "1.0000000000000001e+23", nil},
{"100000000000000016777216", "1.0000000000000003e+23", nil},
{"-1", "-1", nil},
{"-0.1", "-0.1", nil},
{"-0", "-0", nil},
{"1e-20", "1e-20", nil},
{"625e-3", "0.625", nil},
// zeros
{"0", "0", nil},
{"0e0", "0", nil},
{"-0e0", "-0", nil},
{"0e-0", "0", nil},
{"-0e-0", "-0", nil},
{"0e+0", "0", nil},
{"-0e+0", "-0", nil},
{"0e+01234567890123456789", "0", nil},
{"0.00e-01234567890123456789", "0", nil},
{"-0e+01234567890123456789", "-0", nil},
{"-0.00e-01234567890123456789", "-0", nil},
{"0e291", "0", nil}, // issue 15364
{"0e292", "0", nil}, // issue 15364
{"0e347", "0", nil}, // issue 15364
{"0e348", "0", nil}, // issue 15364
{"-0e291", "-0", nil},
{"-0e292", "-0", nil},
{"-0e347", "-0", nil},
{"-0e348", "-0", nil},
// largest float64
{"1.7976931348623157e308", "1.7976931348623157e+308", nil},
{"-1.7976931348623157e308", "-1.7976931348623157e+308", nil},
// the border is ...158079
// borderline - okay
{"1.7976931348623158e308", "1.7976931348623157e+308", nil},
{"-1.7976931348623158e308", "-1.7976931348623157e+308", nil},
// a little too large
{"1e308", "1e+308", nil},
// denormalized
{"1e-305", "1e-305", nil},
{"1e-306", "1e-306", nil},
{"1e-307", "1e-307", nil},
{"1e-308", "1e-308", nil},
{"1e-309", "1e-309", nil},
{"1e-310", "1e-310", nil},
{"1e-322", "1e-322", nil},
// smallest denormal
{"5e-324", "5e-324", nil},
{"4e-324", "5e-324", nil},
{"3e-324", "5e-324", nil},
// too small
{"2e-324", "0", nil},
// way too small
{"1e-350", "0", nil},
{"1e-400000", "0", nil},
// try to overflow exponent
{"1e-4294967296", "0", nil},
{"1e-18446744073709551616", "0", nil},
// https://www.exploringbinary.com/java-hangs-when-converting-2-2250738585072012e-308/
{"2.2250738585072012e-308", "2.2250738585072014e-308", nil},
// https://www.exploringbinary.com/php-hangs-on-numeric-value-2-2250738585072011e-308/
{"2.2250738585072011e-308", "2.225073858507201e-308", nil},
// A very large number (initially wrongly parsed by the fast algorithm).
{"4.630813248087435e+307", "4.630813248087435e+307", nil},
// A different kind of very large number.
{"22.222222222222222", "22.22222222222222", nil},
{"2." + strings.Repeat("2", 800) + "e+1", "22.22222222222222", nil},
// Exactly halfway between 1 and math.Nextafter(1, 2).
// Round to even (down).
{"1.00000000000000011102230246251565404236316680908203125", "1", nil},
// Slightly lower; still round down.
{"1.00000000000000011102230246251565404236316680908203124", "1", nil},
// Slightly higher; round up.
{"1.00000000000000011102230246251565404236316680908203126", "1.0000000000000002", nil},
// Slightly higher, but you have to read all the way to the end.
{"1.00000000000000011102230246251565404236316680908203125" + strings.Repeat("0", 10000) + "1", "1.0000000000000002", nil},
// Halfway between x := math.Nextafter(1, 2) and math.Nextafter(x, 2)
// Round to even (up).
{"1.00000000000000033306690738754696212708950042724609375", "1.0000000000000004", nil},
// Halfway between 1090544144181609278303144771584 and 1090544144181609419040633126912
// (15497564393479157p+46, should round to even 15497564393479156p+46, issue 36657)
{"1090544144181609348671888949248", "1.0905441441816093e+30", nil},
// slightly above, rounds up
{"1090544144181609348835077142190", "1.0905441441816094e+30", nil},
}
func TestDecodeFloat(t *testing.T) {
var sonicout, stdout interface{}
for _, tt := range atoftests {
// default float64
sonicerr := decoder.NewDecoder(tt.in).Decode(&sonicout)
stderr := json.NewDecoder(strings.NewReader(tt.in)).Decode(&stdout)
if !reflect.DeepEqual(sonicout, stdout) {
t.Fatalf("Test %#v\ngot:\n %#v\nexp:\n %#v\n", tt.in, sonicout, stdout)
}
if !reflect.DeepEqual(sonicerr == nil, stderr == nil) {
t.Fatalf("Test %#v\ngot:\n %#v\nexp:\n %#v\n", tt.in, sonicerr, stderr)
}
}
}

File diff suppressed because it is too large Load diff

View file

@ -19,16 +19,16 @@ var (
_subr__lspace = **(**uintptr)(unsafe.Pointer(&_func__base)) + 238
_subr__lzero = **(**uintptr)(unsafe.Pointer(&_func__base)) + 0
_subr__quote = **(**uintptr)(unsafe.Pointer(&_func__base)) + 4951
_subr__skip_array = **(**uintptr)(unsafe.Pointer(&_func__base)) + 16916
_subr__skip_object = **(**uintptr)(unsafe.Pointer(&_func__base)) + 16951
_subr__skip_one = **(**uintptr)(unsafe.Pointer(&_func__base)) + 14594
_subr__skip_array = **(**uintptr)(unsafe.Pointer(&_func__base)) + 16081
_subr__skip_object = **(**uintptr)(unsafe.Pointer(&_func__base)) + 16116
_subr__skip_one = **(**uintptr)(unsafe.Pointer(&_func__base)) + 13759
_subr__u64toa = **(**uintptr)(unsafe.Pointer(&_func__base)) + 3731
_subr__unquote = **(**uintptr)(unsafe.Pointer(&_func__base)) + 5972
_subr__value = **(**uintptr)(unsafe.Pointer(&_func__base)) + 10112
_subr__vnumber = **(**uintptr)(unsafe.Pointer(&_func__base)) + 12979
_subr__vsigned = **(**uintptr)(unsafe.Pointer(&_func__base)) + 14044
_subr__vstring = **(**uintptr)(unsafe.Pointer(&_func__base)) + 11718
_subr__vunsigned = **(**uintptr)(unsafe.Pointer(&_func__base)) + 14321
_subr__value = **(**uintptr)(unsafe.Pointer(&_func__base)) + 9426
_subr__vnumber = **(**uintptr)(unsafe.Pointer(&_func__base)) + 11985
_subr__vsigned = **(**uintptr)(unsafe.Pointer(&_func__base)) + 13209
_subr__vstring = **(**uintptr)(unsafe.Pointer(&_func__base)) + 11032
_subr__vunsigned = **(**uintptr)(unsafe.Pointer(&_func__base)) + 13486
)
var (

File diff suppressed because it is too large Load diff

View file

@ -19,16 +19,16 @@ var (
_subr__lspace = **(**uintptr)(unsafe.Pointer(&_func__base)) + 366
_subr__lzero = **(**uintptr)(unsafe.Pointer(&_func__base)) + 0
_subr__quote = **(**uintptr)(unsafe.Pointer(&_func__base)) + 5299
_subr__skip_array = **(**uintptr)(unsafe.Pointer(&_func__base)) + 19341
_subr__skip_object = **(**uintptr)(unsafe.Pointer(&_func__base)) + 19376
_subr__skip_one = **(**uintptr)(unsafe.Pointer(&_func__base)) + 16541
_subr__skip_array = **(**uintptr)(unsafe.Pointer(&_func__base)) + 18501
_subr__skip_object = **(**uintptr)(unsafe.Pointer(&_func__base)) + 18536
_subr__skip_one = **(**uintptr)(unsafe.Pointer(&_func__base)) + 15701
_subr__u64toa = **(**uintptr)(unsafe.Pointer(&_func__base)) + 3979
_subr__unquote = **(**uintptr)(unsafe.Pointer(&_func__base)) + 7136
_subr__value = **(**uintptr)(unsafe.Pointer(&_func__base)) + 12070
_subr__vnumber = **(**uintptr)(unsafe.Pointer(&_func__base)) + 14926
_subr__vsigned = **(**uintptr)(unsafe.Pointer(&_func__base)) + 15991
_subr__vstring = **(**uintptr)(unsafe.Pointer(&_func__base)) + 13781
_subr__vunsigned = **(**uintptr)(unsafe.Pointer(&_func__base)) + 16268
_subr__value = **(**uintptr)(unsafe.Pointer(&_func__base)) + 11379
_subr__vnumber = **(**uintptr)(unsafe.Pointer(&_func__base)) + 13927
_subr__vsigned = **(**uintptr)(unsafe.Pointer(&_func__base)) + 15151
_subr__vstring = **(**uintptr)(unsafe.Pointer(&_func__base)) + 13090
_subr__vunsigned = **(**uintptr)(unsafe.Pointer(&_func__base)) + 15428
)
var (

View file

@ -23,16 +23,17 @@
/* Decimal represent the integer or float
* example 1: 1.1 {"11", 2, 1, 0}
* example 2: -0.1 {"1", 1, 0, 1}
* example 3: 999 {"999", 3, 3, 0}
* example 3: 999 {"999", 3, 3, 0}
*/
typedef struct Decimal {
char d[DECIMAL_MAX_DNUM];
int nd;
int dp;
int neg;
int neg;
int trunc;
} Decimal;
/* decimal power of ten to binary power of two.
/* decimal power of ten to binary power of two.
* For example: POW_TAB[1]: 10 ** 1 ~ 2 ** 3
*/
static const int POW_TAB[9] = {1, 3, 6, 9, 13, 16, 19, 23, 26};
@ -50,7 +51,7 @@ typedef struct lshift_cheat {
* idx is shift bits for binary.
* value is the shift information for decimal.
* For example, idx is 4, the value is {2, "625"}.
* That means the binary shift 4 bits left, will cause add 2 digits to the decimal
* That means the binary shift 4 bits left, will cause add 2 digits to the decimal
* if the prefix of decimal is under "625".
*/
const static lshift_cheat LSHIFT_TAB[61];
@ -59,9 +60,10 @@ static inline void decimal_init(Decimal *d) {
for (int i = 0; i < DECIMAL_MAX_DNUM; ++i) {
d->d[i] = 0;
}
d->dp = 0;
d->nd = 0;
d->neg = 0;
d->dp = 0;
d->nd = 0;
d->neg = 0;
d->trunc = 0;
}
static inline void decimal_set(Decimal *d, const char *s, int len) {
@ -70,7 +72,7 @@ static inline void decimal_set(Decimal *d, const char *s, int len) {
decimal_init(d);
if (s[i] == '+') {
i++;
}
}
else if (s[i] == '-') {
i++;
d->neg = 1;
@ -88,17 +90,18 @@ static inline void decimal_set(Decimal *d, const char *s, int len) {
d->nd++;
} else if (s[i] != '0') {
/* truncat the remaining digits */
d->trunc = 1;
}
} else if (s[i] == '.') {
saw_dot = 1;
d->dp = d->nd;
} else {
break;
}
}
}
/* integer */
if (saw_dot == 0) {
if (saw_dot == 0) {
d->dp = d->nd;
}
@ -115,7 +118,7 @@ static inline void decimal_set(Decimal *d, const char *s, int len) {
esgn = -1;
}
for (; i < len && ('0' <= s[i] && s[i] <= '9'); i++) {
for (; i < len && ('0' <= s[i] && s[i] <= '9') && exp < 10000; i++) {
exp = exp * 10 + (s[i] - '0');
}
d->dp += exp * esgn;
@ -148,7 +151,7 @@ static inline void right_shift(Decimal *d, uint32_t k) {
return;
}
/* until n has enough bits for right shift */
while (n>>k == 0) {
while (n>>k == 0) {
n *= 10;
r++;
}
@ -165,7 +168,7 @@ static inline void right_shift(Decimal *d, uint32_t k) {
for (; r < d->nd; r++) {
dig = n >> k;
n &= mask;
d->d[w++] = (char)(dig + '0');
d->d[w++] = (char)(dig + '0');
n = n * 10 + d->d[r] - '0';
}
@ -178,6 +181,7 @@ static inline void right_shift(Decimal *d, uint32_t k) {
w++;
} else if (dig > 0) {
/* truncated */
d->trunc = 1;
}
n *= 10;
}
@ -227,6 +231,7 @@ static inline void left_shift(Decimal *d, uint32_t k) {
w--;
} else if (rem != 0) {
/* truncated */
d->trunc = 1;
}
n = quo;
}
@ -240,6 +245,7 @@ static inline void left_shift(Decimal *d, uint32_t k) {
w--;
} else if (rem != 0) {
/* truncated */
d->trunc = 1;
}
n = quo;
}
@ -276,7 +282,7 @@ static inline void decimal_shift(Decimal *d, int k) {
right_shift(d, -k);
}
}
}
static inline int should_roundup(Decimal *d, int nd) {
@ -285,7 +291,10 @@ static inline int should_roundup(Decimal *d, int nd) {
}
/* Exactly halfway - round to even */
if (d->d[nd] == '5' && nd+1 == d->nd) {
if (d->d[nd] == '5' && nd+1 == d->nd) {
if (d->trunc) {
return 1;
}
return nd > 0 && (d->d[nd-1]-'0')%2 != 0;
}
@ -344,7 +353,7 @@ int decimal_to_f64(Decimal *d, double *val) {
} else {
n = POW_TAB[d->dp];
}
decimal_shift(d, -n); // shift right
decimal_shift(d, -n); // shift right
exp2 += n;
}
while ((d->dp < 0) || (d->dp == 0) && (d->d[0] < '5')) { // d < 0.5
@ -376,12 +385,12 @@ int decimal_to_f64(Decimal *d, double *val) {
}
/* Extract 53 bits. */
decimal_shift(d, 53); // shift left
decimal_shift(d, 53); // shift left
mant = rounded_integer(d);
/* Rounding might have added a bit; shift down. */
if (mant == (((uint64_t)2) << 52)) { // mant has 54 bits
mant >>= 1;
mant >>= 1;
exp2 ++;
if ((exp2 + 1023) >= 0x7FF) {
goto overflow;
@ -496,4 +505,4 @@ const static lshift_cheat LSHIFT_TAB[61] = {
{18, "34694469519536141888238489627838134765625"}, // * 288230376151711744
{18, "173472347597680709441192448139190673828125"}, // * 576460752303423488
{19, "867361737988403547205962240695953369140625"}, // * 1152921504606846976
};
};

View file

@ -486,14 +486,14 @@ void vstring(const GoString *src, long *p, JsonState *ret) {
static inline int is_atof_exact(uint64_t man, int exp, int sgn, double *val) {
double f = (double)man;
if (man >> 52 != 0) {
if (man >> 52 != 0) {
return 0;
}
f *= sgn;
*val = 0;
if (exp == 0) {
if (exp == 0 || man == 0) {
*val = f;
return 1;
} else if (exp > 0 && exp <= 15+22) {
@ -515,13 +515,14 @@ static inline int is_atof_exact(uint64_t man, int exp, int sgn, double *val) {
} else if (exp < 0 && exp >= -22) {
*val = f / P10_TAB[-exp];
return 1;
}
}
return 0;
}
double parse_float64(uint64_t man, int exp, int sgn, const GoString *src, long idx) {
double val = 0.0;
static inline double parse_float64(uint64_t man, int exp, int sgn, int trunc, const GoString *src, long idx) {
double val = 0.0;
double val_up = 0.0;
/* look-up for fast atof if the conversion can be exactly */
if (is_atof_exact(man, exp, sgn, &val)) {
@ -530,22 +531,26 @@ double parse_float64(uint64_t man, int exp, int sgn, const GoString *src, long i
/* A fast atof algorithm for high percison */
if (atof_eisel_lemire64(man, exp, sgn, &val)) {
return val;
if (!trunc) {
return val;
}
if (atof_eisel_lemire64(man+1, exp, sgn, &val_up) && val_up == val) {
return val;
}
}
/* when above algorithms failed, fallback. It is slow. */
return atof_native_decimal(src->buf + idx, src->len - idx);
}
}
void vnumber(const GoString *src, long *p, JsonState *ret) {
int dig;
int ovf = 0;
int sgn = 1;
double val = 0;
uint64_t man = 0; // mantissa for double (float64)
int man_nd = 0; // # digits of mantissa, 10^19 fits uint64_t
int exp10 = 0; // man * exp10 represents the true value
int trunc = 0;
/* initial buffer pointers */
long i = *p;
@ -559,7 +564,7 @@ void vnumber(const GoString *src, long *p, JsonState *ret) {
check_sign(sgn = -1)
/* zero */
if (i + 1 == n && s[i] == '0') {
if (i + 1 == n && s[i] == '0') {
i++;
goto out;
}
@ -574,6 +579,10 @@ void vnumber(const GoString *src, long *p, JsonState *ret) {
i++;
}
if (exp10 > 0) {
trunc = 1;
}
/* check for decimal points */
if (i < n && s[i] == '.') {
i++;
@ -582,8 +591,8 @@ void vnumber(const GoString *src, long *p, JsonState *ret) {
check_digit()
}
/* skip the leading zeros of 0.000xxxx */
if (man == 0 && exp10 == 0) {
/* skip the leading zeros of 0.000xxxx */
if (man == 0 && exp10 == 0) {
int idx = i;
while (i < n && s[i] == '0') {
i++;
@ -601,6 +610,7 @@ void vnumber(const GoString *src, long *p, JsonState *ret) {
/* skip the remaining digits */
while (i < n && is_digit(s[i])) {
trunc = 1;
i++;
}
@ -616,12 +626,15 @@ void vnumber(const GoString *src, long *p, JsonState *ret) {
parse_sign(esm)
check_digit()
while (i < n && is_digit(s[i])) {
exp = exp * 10 + (s[i++] - '0');
if (exp < 10000) {
exp = exp * 10 + (s[i] - '0');
}
i++;
}
exp10 += exp * esm;
}
out:
out:
if (ret->vt == V_INTEGER) {
/* if INT64_MIN <= man * sgn <= INT64_MAX */
if ( exp10 == 0 && (((man & ((uint64_t)1 << 63)) == 0) || ((man & sgn) == man))) {
@ -633,7 +646,7 @@ out:
}
if (ret->vt == V_DOUBLE) {
ret->dv = parse_float64(man, exp10, sgn, src, si);
ret->dv = parse_float64(man, exp10, sgn, trunc, src, si);
}
/* update the result */