fix: enhance float parsing as Go encoding/json

2026-06-24 18:36:43 +08:00 · 2021-07-21 18:52:23 +08:00 · 2021-07-21 18:52:23 +08:00 · f9632ab873
commit f9632ab873
parent 6b4022a19f
7 changed files with 3164 additions and 3586 deletions
--- a/decode_float_test.go
+++ b/decode_float_test.go
@ -0,0 +1,158 @@
 /*
 * Copyright 2021 ByteDance Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package sonic
 import (
 	"encoding/json"
 	"reflect"
 	"strings"
 	"testing"
 	"github.com/bytedance/sonic/decoder"
 )
 type atofTest struct {
 	in  string
 	out string
 	err error
 }
 // Tests from Go strconv package, https://github.com/golang/go/blob/master/src/strconv/atof_test.go
 // All tests are passed in Go encoding/json.
 var atoftests = []atofTest{
 	{"1.234e", "1.234", nil},
 	{"1i", "1", nil},
 	{"1", "1", nil},
 	{"1e23", "1e+23", nil},
 	{"1E23", "1e+23", nil},
 	{"100000000000000000000000", "1e+23", nil},
 	{"1e-100", "1e-100", nil},
 	{"123456700", "1.234567e+08", nil},
 	{"99999999999999974834176", "9.999999999999997e+22", nil},
 	{"100000000000000000000001", "1.0000000000000001e+23", nil},
 	{"100000000000000008388608", "1.0000000000000001e+23", nil},
 	{"100000000000000016777215", "1.0000000000000001e+23", nil},
 	{"100000000000000016777216", "1.0000000000000003e+23", nil},
 	{"-1", "-1", nil},
 	{"-0.1", "-0.1", nil},
 	{"-0", "-0", nil},
 	{"1e-20", "1e-20", nil},
 	{"625e-3", "0.625", nil},
 	// zeros
 	{"0", "0", nil},
 	{"0e0", "0", nil},
 	{"-0e0", "-0", nil},
 	{"0e-0", "0", nil},
 	{"-0e-0", "-0", nil},
 	{"0e+0", "0", nil},
 	{"-0e+0", "-0", nil},
 	{"0e+01234567890123456789", "0", nil},
 	{"0.00e-01234567890123456789", "0", nil},
 	{"-0e+01234567890123456789", "-0", nil},
 	{"-0.00e-01234567890123456789", "-0", nil},
 	{"0e291", "0", nil}, // issue 15364
 	{"0e292", "0", nil}, // issue 15364
 	{"0e347", "0", nil}, // issue 15364
 	{"0e348", "0", nil}, // issue 15364
 	{"-0e291", "-0", nil},
 	{"-0e292", "-0", nil},
 	{"-0e347", "-0", nil},
 	{"-0e348", "-0", nil},
 	// largest float64
 	{"1.7976931348623157e308", "1.7976931348623157e+308", nil},
 	{"-1.7976931348623157e308", "-1.7976931348623157e+308", nil},
 	// the border is ...158079
 	// borderline - okay
 	{"1.7976931348623158e308", "1.7976931348623157e+308", nil},
 	{"-1.7976931348623158e308", "-1.7976931348623157e+308", nil},
 	// a little too large
 	{"1e308", "1e+308", nil},
 	// denormalized
 	{"1e-305", "1e-305", nil},
 	{"1e-306", "1e-306", nil},
 	{"1e-307", "1e-307", nil},
 	{"1e-308", "1e-308", nil},
 	{"1e-309", "1e-309", nil},
 	{"1e-310", "1e-310", nil},
 	{"1e-322", "1e-322", nil},
 	// smallest denormal
 	{"5e-324", "5e-324", nil},
 	{"4e-324", "5e-324", nil},
 	{"3e-324", "5e-324", nil},
 	// too small
 	{"2e-324", "0", nil},
 	// way too small
 	{"1e-350", "0", nil},
 	{"1e-400000", "0", nil},
 	// try to overflow exponent
 	{"1e-4294967296", "0", nil},
 	{"1e-18446744073709551616", "0", nil},
 	// https://www.exploringbinary.com/java-hangs-when-converting-2-2250738585072012e-308/
 	{"2.2250738585072012e-308", "2.2250738585072014e-308", nil},
 	// https://www.exploringbinary.com/php-hangs-on-numeric-value-2-2250738585072011e-308/
 	{"2.2250738585072011e-308", "2.225073858507201e-308", nil},
 	// A very large number (initially wrongly parsed by the fast algorithm).
 	{"4.630813248087435e+307", "4.630813248087435e+307", nil},
 	// A different kind of very large number.
 	{"22.222222222222222", "22.22222222222222", nil},
 	{"2." + strings.Repeat("2", 800) + "e+1", "22.22222222222222", nil},
 	// Exactly halfway between 1 and math.Nextafter(1, 2).
 	// Round to even (down).
 	{"1.00000000000000011102230246251565404236316680908203125", "1", nil},
 	// Slightly lower; still round down.
 	{"1.00000000000000011102230246251565404236316680908203124", "1", nil},
 	// Slightly higher; round up.
 	{"1.00000000000000011102230246251565404236316680908203126", "1.0000000000000002", nil},
 	// Slightly higher, but you have to read all the way to the end.
 	{"1.00000000000000011102230246251565404236316680908203125" + strings.Repeat("0", 10000) + "1", "1.0000000000000002", nil},
 	// Halfway between x := math.Nextafter(1, 2) and math.Nextafter(x, 2)
 	// Round to even (up).
 	{"1.00000000000000033306690738754696212708950042724609375", "1.0000000000000004", nil},
 	// Halfway between 1090544144181609278303144771584 and 1090544144181609419040633126912
 	// (15497564393479157p+46, should round to even 15497564393479156p+46, issue 36657)
 	{"1090544144181609348671888949248", "1.0905441441816093e+30", nil},
 	// slightly above, rounds up
 	{"1090544144181609348835077142190", "1.0905441441816094e+30", nil},
 }
 func TestDecodeFloat(t *testing.T) {
 	var sonicout, stdout interface{}
 	for _, tt := range atoftests {
 		// default float64
 		sonicerr := decoder.NewDecoder(tt.in).Decode(&sonicout)
 		stderr := json.NewDecoder(strings.NewReader(tt.in)).Decode(&stdout)
 		if !reflect.DeepEqual(sonicout, stdout) {
 			t.Fatalf("Test %#v\ngot:\n   %#v\nexp:\n   %#v\n", tt.in, sonicout, stdout)
 		}
 		if !reflect.DeepEqual(sonicerr == nil, stderr == nil) {
 			t.Fatalf("Test %#v\ngot:\n   %#v\nexp:\n   %#v\n", tt.in, sonicerr, stderr)
 		}
 	}
 }
--- a/internal/native/avx/native_amd64.s
+++ b/internal/native/avx/native_amd64.s
--- a/internal/native/avx/native_subr_amd64.go
+++ b/internal/native/avx/native_subr_amd64.go
@ -19,16 +19,16 @@ var (
    _subr__lspace      = **(**uintptr)(unsafe.Pointer(&_func__base)) + 238
    _subr__lzero       = **(**uintptr)(unsafe.Pointer(&_func__base)) + 0
    _subr__quote       = **(**uintptr)(unsafe.Pointer(&_func__base)) + 4951
-    _subr__skip_array  = **(**uintptr)(unsafe.Pointer(&_func__base)) + 16916
+    _subr__skip_array  = **(**uintptr)(unsafe.Pointer(&_func__base)) + 16081
-    _subr__skip_object = **(**uintptr)(unsafe.Pointer(&_func__base)) + 16951
+    _subr__skip_object = **(**uintptr)(unsafe.Pointer(&_func__base)) + 16116
-    _subr__skip_one    = **(**uintptr)(unsafe.Pointer(&_func__base)) + 14594
+    _subr__skip_one    = **(**uintptr)(unsafe.Pointer(&_func__base)) + 13759
    _subr__u64toa      = **(**uintptr)(unsafe.Pointer(&_func__base)) + 3731
    _subr__unquote     = **(**uintptr)(unsafe.Pointer(&_func__base)) + 5972
-    _subr__value       = **(**uintptr)(unsafe.Pointer(&_func__base)) + 10112
+    _subr__value       = **(**uintptr)(unsafe.Pointer(&_func__base)) + 9426
-    _subr__vnumber     = **(**uintptr)(unsafe.Pointer(&_func__base)) + 12979
+    _subr__vnumber     = **(**uintptr)(unsafe.Pointer(&_func__base)) + 11985
-    _subr__vsigned     = **(**uintptr)(unsafe.Pointer(&_func__base)) + 14044
+    _subr__vsigned     = **(**uintptr)(unsafe.Pointer(&_func__base)) + 13209
-    _subr__vstring     = **(**uintptr)(unsafe.Pointer(&_func__base)) + 11718
+    _subr__vstring     = **(**uintptr)(unsafe.Pointer(&_func__base)) + 11032
-    _subr__vunsigned   = **(**uintptr)(unsafe.Pointer(&_func__base)) + 14321
+    _subr__vunsigned   = **(**uintptr)(unsafe.Pointer(&_func__base)) + 13486
 )
 var (
--- a/internal/native/avx2/native_amd64.s
+++ b/internal/native/avx2/native_amd64.s
--- a/internal/native/avx2/native_subr_amd64.go
+++ b/internal/native/avx2/native_subr_amd64.go
@ -19,16 +19,16 @@ var (
    _subr__lspace      = **(**uintptr)(unsafe.Pointer(&_func__base)) + 366
    _subr__lzero       = **(**uintptr)(unsafe.Pointer(&_func__base)) + 0
    _subr__quote       = **(**uintptr)(unsafe.Pointer(&_func__base)) + 5299
-    _subr__skip_array  = **(**uintptr)(unsafe.Pointer(&_func__base)) + 19341
+    _subr__skip_array  = **(**uintptr)(unsafe.Pointer(&_func__base)) + 18501
-    _subr__skip_object = **(**uintptr)(unsafe.Pointer(&_func__base)) + 19376
+    _subr__skip_object = **(**uintptr)(unsafe.Pointer(&_func__base)) + 18536
-    _subr__skip_one    = **(**uintptr)(unsafe.Pointer(&_func__base)) + 16541
+    _subr__skip_one    = **(**uintptr)(unsafe.Pointer(&_func__base)) + 15701
    _subr__u64toa      = **(**uintptr)(unsafe.Pointer(&_func__base)) + 3979
    _subr__unquote     = **(**uintptr)(unsafe.Pointer(&_func__base)) + 7136
-    _subr__value       = **(**uintptr)(unsafe.Pointer(&_func__base)) + 12070
+    _subr__value       = **(**uintptr)(unsafe.Pointer(&_func__base)) + 11379
-    _subr__vnumber     = **(**uintptr)(unsafe.Pointer(&_func__base)) + 14926
+    _subr__vnumber     = **(**uintptr)(unsafe.Pointer(&_func__base)) + 13927
-    _subr__vsigned     = **(**uintptr)(unsafe.Pointer(&_func__base)) + 15991
+    _subr__vsigned     = **(**uintptr)(unsafe.Pointer(&_func__base)) + 15151
-    _subr__vstring     = **(**uintptr)(unsafe.Pointer(&_func__base)) + 13781
+    _subr__vstring     = **(**uintptr)(unsafe.Pointer(&_func__base)) + 13090
-    _subr__vunsigned   = **(**uintptr)(unsafe.Pointer(&_func__base)) + 16268
+    _subr__vunsigned   = **(**uintptr)(unsafe.Pointer(&_func__base)) + 15428
 )
 var (
--- a/native/atof_native.c
+++ b/native/atof_native.c
@ -30,6 +30,7 @@ typedef struct Decimal {
    int   nd;
    int   dp;
    int   neg;
    int   trunc;
 } Decimal;
 /* decimal power of ten to binary power of two.
@ -62,6 +63,7 @@ static inline void decimal_init(Decimal *d) {
    d->dp    = 0;
    d->nd    = 0;
    d->neg   = 0;
    d->trunc = 0;
 }
 static inline void decimal_set(Decimal *d, const char *s, int len) {
@ -88,6 +90,7 @@ static inline void decimal_set(Decimal *d, const char *s, int len) {
                d->nd++;
            } else if (s[i] != '0') {
                /* truncat the remaining digits */
                d->trunc = 1;
            }
        } else if (s[i] == '.') {
            saw_dot = 1;
@ -115,7 +118,7 @@ static inline void decimal_set(Decimal *d, const char *s, int len) {
            esgn = -1;
        }
-        for (; i < len && ('0' <= s[i] && s[i] <= '9'); i++) {
+        for (; i < len && ('0' <= s[i] && s[i] <= '9') && exp < 10000; i++) {
                exp = exp * 10 + (s[i] - '0');
        }
        d->dp += exp * esgn;
@ -178,6 +181,7 @@ static inline void right_shift(Decimal *d, uint32_t k) {
            w++;
        } else if (dig > 0) {
            /* truncated */
            d->trunc = 1;
        }
        n *= 10;
    }
@ -227,6 +231,7 @@ static inline void left_shift(Decimal *d, uint32_t k) {
            w--;
        } else if (rem != 0) {
            /* truncated */
            d->trunc = 1;
        }
        n = quo;
    }
@ -240,6 +245,7 @@ static inline void left_shift(Decimal *d, uint32_t k) {
            w--;
        } else if (rem != 0) {
            /* truncated */
            d->trunc = 1;
        }
        n = quo;
    }
@ -286,6 +292,9 @@ static inline int should_roundup(Decimal *d, int nd) {
    /* Exactly halfway - round to even */
    if (d->d[nd] == '5' && nd+1 == d->nd) {
        if (d->trunc) {
            return 1;
        }
        return nd > 0 && (d->d[nd-1]-'0')%2 != 0;
    }
--- a/native/scanning.c
+++ b/native/scanning.c
@ -493,7 +493,7 @@ static inline int is_atof_exact(uint64_t man, int exp, int sgn, double *val) {
    f *= sgn;
    *val = 0;
-    if (exp == 0) {
+    if (exp == 0 || man == 0) {
        *val = f;
        return 1;
    } else if (exp > 0 && exp <= 15+22) {
@ -520,8 +520,9 @@ static inline int is_atof_exact(uint64_t man, int exp, int sgn, double *val) {
    return 0;
 }
-double parse_float64(uint64_t man, int exp, int sgn, const GoString *src, long idx) {
+static inline double parse_float64(uint64_t man, int exp, int sgn, int trunc, const GoString *src, long idx) {
    double val    = 0.0;
    double val_up = 0.0;
    /* look-up for fast atof if the conversion can be exactly */
    if (is_atof_exact(man, exp, sgn, &val)) {
@ -530,8 +531,13 @@ double parse_float64(uint64_t man, int exp, int sgn, const GoString *src, long i
    /* A fast atof algorithm for high percison */
    if (atof_eisel_lemire64(man, exp, sgn, &val)) {
        if (!trunc) {
            return val;
        }
        if (atof_eisel_lemire64(man+1, exp, sgn, &val_up) && val_up == val) {
            return val;
        }
    }
    /* when above algorithms failed, fallback. It is slow. */
    return atof_native_decimal(src->buf + idx, src->len - idx);
@ -540,12 +546,11 @@ double parse_float64(uint64_t man, int exp, int sgn, const GoString *src, long i
 void vnumber(const GoString *src, long *p, JsonState *ret) {
    int      dig;
    int      ovf = 0;
    int      sgn = 1;
    double   val = 0;
    uint64_t man = 0; // mantissa for double (float64)
    int   man_nd = 0; // # digits of mantissa, 10^19 fits uint64_t
    int    exp10 = 0; // man * exp10 represents the true value
    int    trunc = 0;
    /* initial buffer pointers */
    long         i = *p;
@ -574,6 +579,10 @@ void vnumber(const GoString *src, long *p, JsonState *ret) {
        i++;
    }
    if (exp10 > 0) {
        trunc = 1;
    }
    /* check for decimal points */
    if (i < n && s[i] == '.') {
        i++;
@ -601,6 +610,7 @@ void vnumber(const GoString *src, long *p, JsonState *ret) {
     /* skip the remaining digits */
    while (i < n && is_digit(s[i])) {
        trunc = 1;
        i++;
    }
@ -616,7 +626,10 @@ void vnumber(const GoString *src, long *p, JsonState *ret) {
        parse_sign(esm)
        check_digit()
        while (i < n && is_digit(s[i])) {
-            exp = exp * 10 + (s[i++] - '0');
+            if (exp < 10000) {
                exp = exp * 10 + (s[i] - '0');
            }
            i++;
        }
        exp10 += exp * esm;
    }
@ -633,7 +646,7 @@ out:
    }
    if (ret->vt == V_DOUBLE) {
-        ret->dv = parse_float64(man, exp10, sgn, src, si);
+        ret->dv = parse_float64(man, exp10, sgn, trunc, src, si);
    }
    /* update the result */