fix: support IEEE-754 negative zero in both marshal and unmarshal

2026-06-21 00:46:43 +08:00 · 2021-09-12 16:48:34 +08:00 · 2021-09-12 16:48:34 +08:00 · ec15697761
commit ec15697761
parent 514208a4d5
10 changed files with 1489 additions and 1411 deletions
--- a/internal/native/avx/fastfloat_amd64_test.go
+++ b/internal/native/avx/fastfloat_amd64_test.go
@ -29,7 +29,7 @@ import (
 func TestFastFloat_Encode(t *testing.T) {
    var buf [64]byte
    assert.Equal(t, "0"                         , string(buf[:__f64toa(&buf[0], 0)]))
-    assert.Equal(t, "0"                         , string(buf[:__f64toa(&buf[0], math.Float64frombits(0x8000000000000000))]))
+    assert.Equal(t, "-0"                         , string(buf[:__f64toa(&buf[0], math.Float64frombits(0x8000000000000000))]))
    assert.Equal(t, "12340000000"               , string(buf[:__f64toa(&buf[0], 1234e7)]))
    assert.Equal(t, "12.34"                     , string(buf[:__f64toa(&buf[0], 1234e-2)]))
    assert.Equal(t, "0.001234"                  , string(buf[:__f64toa(&buf[0], 1234e-6)]))
@ -64,3 +64,21 @@ func BenchmarkFastFloat_Encode(b *testing.B) {
        b.Run(bm.name, bm.test)
    }
 }
+
+func BenchmarkFastFloat_EncodeZero(b *testing.B) {
+    val := float64(0)
+    benchmarks := []struct {
+        name string
+        test func(*testing.B)
+    }{{
+        name: "StdLib",
+        test: func(b *testing.B) { var buf [64]byte; for i := 0; i < b.N; i++ { strconv.AppendFloat(buf[:], val, 'g', -1, 64) }},
+    }, {
+        name: "FastFloat",
+        test: func(b *testing.B) { var buf [64]byte; for i := 0; i < b.N; i++ { __f64toa(&buf[0], val) }},
+    }}
+    for _, bm := range benchmarks {
+        b.Run(bm.name, bm.test)
+    }
+}
+
--- a/internal/native/avx/native_amd64.s
+++ b/internal/native/avx/native_amd64.s
--- a/internal/native/avx/native_subr_amd64.go
+++ b/internal/native/avx/native_subr_amd64.go
@ -14,21 +14,21 @@ func ___asm2asm_compiled_code__DO_NOT_CALL_THIS_SYMBOL___()

 var (
    _func__base        = ___asm2asm_compiled_code__DO_NOT_CALL_THIS_SYMBOL___
-    _subr__f64toa      = **(**uintptr)(unsafe.Pointer(&_func__base)) + 558
-    _subr__i64toa      = **(**uintptr)(unsafe.Pointer(&_func__base)) + 3541
+    _subr__f64toa      = **(**uintptr)(unsafe.Pointer(&_func__base)) + 542
+    _subr__i64toa      = **(**uintptr)(unsafe.Pointer(&_func__base)) + 3551
    _subr__lspace      = **(**uintptr)(unsafe.Pointer(&_func__base)) + 238
    _subr__lzero       = **(**uintptr)(unsafe.Pointer(&_func__base)) + 0
-    _subr__quote       = **(**uintptr)(unsafe.Pointer(&_func__base)) + 4854
-    _subr__skip_array  = **(**uintptr)(unsafe.Pointer(&_func__base)) + 16673
-    _subr__skip_object = **(**uintptr)(unsafe.Pointer(&_func__base)) + 16708
-    _subr__skip_one    = **(**uintptr)(unsafe.Pointer(&_func__base)) + 14351
-    _subr__u64toa      = **(**uintptr)(unsafe.Pointer(&_func__base)) + 3634
-    _subr__unquote     = **(**uintptr)(unsafe.Pointer(&_func__base)) + 5875
-    _subr__value       = **(**uintptr)(unsafe.Pointer(&_func__base)) + 10064
-    _subr__vnumber     = **(**uintptr)(unsafe.Pointer(&_func__base)) + 12639
-    _subr__vsigned     = **(**uintptr)(unsafe.Pointer(&_func__base)) + 13801
-    _subr__vstring     = **(**uintptr)(unsafe.Pointer(&_func__base)) + 11670
-    _subr__vunsigned   = **(**uintptr)(unsafe.Pointer(&_func__base)) + 14078
+    _subr__quote       = **(**uintptr)(unsafe.Pointer(&_func__base)) + 4864
+    _subr__skip_array  = **(**uintptr)(unsafe.Pointer(&_func__base)) + 16717
+    _subr__skip_object = **(**uintptr)(unsafe.Pointer(&_func__base)) + 16752
+    _subr__skip_one    = **(**uintptr)(unsafe.Pointer(&_func__base)) + 14395
+    _subr__u64toa      = **(**uintptr)(unsafe.Pointer(&_func__base)) + 3644
+    _subr__unquote     = **(**uintptr)(unsafe.Pointer(&_func__base)) + 5885
+    _subr__value       = **(**uintptr)(unsafe.Pointer(&_func__base)) + 10074
+    _subr__vnumber     = **(**uintptr)(unsafe.Pointer(&_func__base)) + 12633
+    _subr__vsigned     = **(**uintptr)(unsafe.Pointer(&_func__base)) + 13845
+    _subr__vstring     = **(**uintptr)(unsafe.Pointer(&_func__base)) + 11680
+    _subr__vunsigned   = **(**uintptr)(unsafe.Pointer(&_func__base)) + 14122
 )

 var (
--- a/internal/native/avx2/fastfloat_amd64_test.go
+++ b/internal/native/avx2/fastfloat_amd64_test.go
@ -29,7 +29,7 @@ import (
 func TestFastFloat_Encode(t *testing.T) {
    var buf [64]byte
    assert.Equal(t, "0"                         , string(buf[:__f64toa(&buf[0], 0)]))
-    assert.Equal(t, "0"                         , string(buf[:__f64toa(&buf[0], math.Float64frombits(0x8000000000000000))]))
+    assert.Equal(t, "-0"                         , string(buf[:__f64toa(&buf[0], math.Float64frombits(0x8000000000000000))]))
    assert.Equal(t, "12340000000"               , string(buf[:__f64toa(&buf[0], 1234e7)]))
    assert.Equal(t, "12.34"                     , string(buf[:__f64toa(&buf[0], 1234e-2)]))
    assert.Equal(t, "0.001234"                  , string(buf[:__f64toa(&buf[0], 1234e-6)]))
@ -64,3 +64,21 @@ func BenchmarkFastFloat_Encode(b *testing.B) {
        b.Run(bm.name, bm.test)
    }
 }
+
+func BenchmarkFastFloat_EncodeZero(b *testing.B) {
+    val := float64(0)
+    benchmarks := []struct {
+        name string
+        test func(*testing.B)
+    }{{
+        name: "StdLib",
+        test: func(b *testing.B) { var buf [64]byte; for i := 0; i < b.N; i++ { strconv.AppendFloat(buf[:], val, 'g', -1, 64) }},
+    }, {
+        name: "FastFloat",
+        test: func(b *testing.B) { var buf [64]byte; for i := 0; i < b.N; i++ { __f64toa(&buf[0], val) }},
+    }}
+    for _, bm := range benchmarks {
+        b.Run(bm.name, bm.test)
+    }
+}
+
--- a/internal/native/avx2/native_amd64.s
+++ b/internal/native/avx2/native_amd64.s
--- a/internal/native/avx2/native_subr_amd64.go
+++ b/internal/native/avx2/native_subr_amd64.go
@ -14,21 +14,21 @@ func ___asm2asm_compiled_code__DO_NOT_CALL_THIS_SYMBOL___()

 var (
    _func__base        = ___asm2asm_compiled_code__DO_NOT_CALL_THIS_SYMBOL___
-    _subr__f64toa      = **(**uintptr)(unsafe.Pointer(&_func__base)) + 806
-    _subr__i64toa      = **(**uintptr)(unsafe.Pointer(&_func__base)) + 3789
+    _subr__f64toa      = **(**uintptr)(unsafe.Pointer(&_func__base)) + 790
+    _subr__i64toa      = **(**uintptr)(unsafe.Pointer(&_func__base)) + 3799
    _subr__lspace      = **(**uintptr)(unsafe.Pointer(&_func__base)) + 366
    _subr__lzero       = **(**uintptr)(unsafe.Pointer(&_func__base)) + 0
-    _subr__quote       = **(**uintptr)(unsafe.Pointer(&_func__base)) + 5202
-    _subr__skip_array  = **(**uintptr)(unsafe.Pointer(&_func__base)) + 19129
-    _subr__skip_object = **(**uintptr)(unsafe.Pointer(&_func__base)) + 19164
-    _subr__skip_one    = **(**uintptr)(unsafe.Pointer(&_func__base)) + 16329
-    _subr__u64toa      = **(**uintptr)(unsafe.Pointer(&_func__base)) + 3882
-    _subr__unquote     = **(**uintptr)(unsafe.Pointer(&_func__base)) + 7039
-    _subr__value       = **(**uintptr)(unsafe.Pointer(&_func__base)) + 12053
-    _subr__vnumber     = **(**uintptr)(unsafe.Pointer(&_func__base)) + 14617
-    _subr__vsigned     = **(**uintptr)(unsafe.Pointer(&_func__base)) + 15779
-    _subr__vstring     = **(**uintptr)(unsafe.Pointer(&_func__base)) + 13764
-    _subr__vunsigned   = **(**uintptr)(unsafe.Pointer(&_func__base)) + 16056
+    _subr__quote       = **(**uintptr)(unsafe.Pointer(&_func__base)) + 5212
+    _subr__skip_array  = **(**uintptr)(unsafe.Pointer(&_func__base)) + 19173
+    _subr__skip_object = **(**uintptr)(unsafe.Pointer(&_func__base)) + 19208
+    _subr__skip_one    = **(**uintptr)(unsafe.Pointer(&_func__base)) + 16373
+    _subr__u64toa      = **(**uintptr)(unsafe.Pointer(&_func__base)) + 3892
+    _subr__unquote     = **(**uintptr)(unsafe.Pointer(&_func__base)) + 7049
+    _subr__value       = **(**uintptr)(unsafe.Pointer(&_func__base)) + 12063
+    _subr__vnumber     = **(**uintptr)(unsafe.Pointer(&_func__base)) + 14611
+    _subr__vsigned     = **(**uintptr)(unsafe.Pointer(&_func__base)) + 15823
+    _subr__vstring     = **(**uintptr)(unsafe.Pointer(&_func__base)) + 13774
+    _subr__vunsigned   = **(**uintptr)(unsafe.Pointer(&_func__base)) + 16100
 )

 var (
--- a/internal/native/fastfloat_amd64_test.tmpl
+++ b/internal/native/fastfloat_amd64_test.tmpl
@ -27,7 +27,7 @@ import (
 func TestFastFloat_Encode(t *testing.T) {
    var buf [64]byte
    assert.Equal(t, "0"                         , string(buf[:__f64toa(&buf[0], 0)]))
-    assert.Equal(t, "0"                         , string(buf[:__f64toa(&buf[0], math.Float64frombits(0x8000000000000000))]))
+    assert.Equal(t, "-0"                         , string(buf[:__f64toa(&buf[0], math.Float64frombits(0x8000000000000000))]))
    assert.Equal(t, "12340000000"               , string(buf[:__f64toa(&buf[0], 1234e7)]))
    assert.Equal(t, "12.34"                     , string(buf[:__f64toa(&buf[0], 1234e-2)]))
    assert.Equal(t, "0.001234"                  , string(buf[:__f64toa(&buf[0], 1234e-6)]))
@ -62,3 +62,21 @@ func BenchmarkFastFloat_Encode(b *testing.B) {
        b.Run(bm.name, bm.test)
    }
 }
+
+func BenchmarkFastFloat_EncodeZero(b *testing.B) {
+    val := float64(0)
+    benchmarks := []struct {
+        name string
+        test func(*testing.B)
+    }{{
+        name: "StdLib",
+        test: func(b *testing.B) { var buf [64]byte; for i := 0; i < b.N; i++ { strconv.AppendFloat(buf[:], val, 'g', -1, 64) }},
+    }, {
+        name: "FastFloat",
+        test: func(b *testing.B) { var buf [64]byte; for i := 0; i < b.N; i++ { __f64toa(&buf[0], val) }},
+    }}
+    for _, bm := range benchmarks {
+        b.Run(bm.name, bm.test)
+    }
+}
+
--- a/issue93_test.go
+++ b/issue93_test.go
@ -0,0 +1,46 @@
+/*
+ * Copyright 2021 ByteDance Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package sonic
+
+import (
+    `testing`
+    `math`
+    `encoding/json`
+
+    `github.com/bytedance/sonic/decoder`
+    `github.com/stretchr/testify/require`
+)
+
+
+func TestNegZeroInIEEE754(t *testing.T) {
+    var sonicobj, stdobj float64
+    sonicerr := Unmarshal([]byte("-0.0"), &sonicobj)
+    stderr := json.Unmarshal([]byte("-0.0"), &stdobj)
+    if sonicerr != nil && stderr == nil {
+        println(sonicerr.(decoder.SyntaxError).Description())
+        require.NoError(t, sonicerr)
+    }
+    require.Equal(t, math.Float64bits(sonicobj), math.Float64bits(stdobj))
+
+    sonicout, sonicerr2 := Marshal(&stdobj)
+    stdout, stderr2 := json.Marshal(&stdobj)
+    if sonicerr2 != nil && stderr2 == nil {
+        println(sonicerr2)
+        require.NoError(t, sonicerr2)
+    }
+    require.Equal(t, sonicout, stdout)
+}
--- a/native/fastfloat.c
+++ b/native/fastfloat.c
@ -429,9 +429,8 @@ static inline bool f64tod_exct_int(const uint64_t man, const uint32_t exp,
    return true;
 }

-static int inline ryu(double val, char *out) {
+static int inline ryu(uint64_t bits, char *out) {
    /* Step 1: Decode the floating-point number */
-    uint64_t bits = *(uint64_t *)(&val);
    uint64_t man = bits & ((1ull << 52) - 1);
    uint32_t exp = (uint32_t) ((bits >> 52) & ((1u << 11) - 1));

@ -459,29 +458,28 @@ static int inline ryu(double val, char *out) {
    else      // decimal format
        idx += print_decimal(v, out + idx, mlen);

-    /* Terminate the string */
-    out[idx] = '\0';
    return idx;
 }

 int f64toa(char *out, double val) {
    int   i = 0;
    char *p = out;
-
-    /* simple case of 0.0 */
-    if (val == 0.0) {
-        *p = '0';
-        return 1;
-    }
+    uint64_t uval = *(uint64_t *)&val;

    /* negative numbers */
-    if (val < 0.0) {
+    if (unlikely(uval >> 63) == 1) {
        i    = 1;
-        val  = -val;
+        uval &= ((1ull << 63) - 1);
        *p++ = '-';
    }

+    /* simple case of 0.0 */
+    if (uval ==  0) {
+        *p = '0';
+        return i + 1;
+    }
+
    /* print the number with Ryu algorithm */
-    int n = ryu(val, p);
+    int n = ryu(uval, p);
    return n + i;
 }
--- a/native/scanning.c
+++ b/native/scanning.c
@ -100,7 +100,6 @@ static inline ssize_t advance_string(const GoString *src, long p, int64_t *ep) {
    uint64_t os;
    uint64_t m0;
    uint64_t m1;
-    uint64_t mx;
    uint64_t cr = 0;

    /* buffer pointers */
@ -484,38 +483,35 @@ void vstring(const GoString *src, long *p, JsonState *ret) {

 /** check whether float can represent the val exactly **/
 static inline bool is_atof_exact(uint64_t man, int exp, int sgn, double *val) {
-    double f = (double)man;
+    *val = (double)man;

    if (man >> 52 != 0) {
        return false;
    }

-    if (sgn == -1) {
-        f = -f;
-    }
-    *val = 0;
+    /* equal to if (sgn == -1) { *val *= -1; } */
+    *(uint64_t *)val |= ((uint64_t)(sgn) >> 63 << 63);

    if (exp == 0 || man == 0) {
-        *val = f;
        return true;
    } else if (exp > 0 && exp <= 15+22) {
        /* uint64 integers: accurate range <= 10^15          *
         * Powers of 10: accurate range <= 10^22, as P10_TAB *
         * Example: man 1, exp 36, is ok                     */
        if (exp > 22) {
-            f *= P10_TAB[exp-22];
+            *val *= P10_TAB[exp-22];
            exp = 22;
        }

        /* f is not accurate when too larger */
-        if (f > 1e15 || f < -1e15) {
+        if (*val > 1e15 || *val < -1e15) {
            return false;
        }

-        *val = f * P10_TAB[exp];
+        *val *= P10_TAB[exp];
        return true;
    } else if (exp < 0 && exp >= -22) {
-        *val = f / P10_TAB[-exp];
+        *val /=  P10_TAB[-exp];
        return true;
    }

@ -546,15 +542,16 @@ static inline double parse_float64(uint64_t man, int exp, int sgn, int trunc, co
 }

 static bool inline is_overflow(uint64_t man, int sgn, int exp10) {
+    /* the former exp10 != 0 means man has overflowed
+     * the later euqals to man*sgn < INT64_MIN or > INT64_MAX */
    return exp10 != 0 ||
        ((man >> 63) == 1 && ((uint64_t)sgn & man) != (1ull << 63));
 }

 void vnumber(const GoString *src, long *p, JsonState *ret) {
-    int      dig;
    int      sgn = 1;
    uint64_t man = 0; // mantissa for double (float64)
-    int   man_nd = 0; // # digits of mantissa, 10^19 fits uint64_t
+    int   man_nd = 0; // # digits of mantissa, 10 ^ 19 fits uint64_t
    int    exp10 = 0; // val = sgn * man * 10 ^ exp10
    int    trunc = 0;

@ -632,21 +629,25 @@ void vnumber(const GoString *src, long *p, JsonState *ret) {
            i++;
        }
        exp10 += exp * esm;
+        goto parse_float;
    }

    if (ret->vt == V_INTEGER) {
        if (!is_overflow(man, sgn, exp10)) {
            ret->iv = (int64_t)man * sgn;
-            ret->dv = (double)(ret->iv);
-        } else {
-            set_vt(V_DOUBLE)
+            
+            /* following lines equal to ret->dv = (double)(man) * sgn */
+            ret->dv = (double)(man);
+            *(uint64_t *)&ret->dv |= ((uint64_t)(sgn) >> 63 << 63);
+
+            *p = i;
+            return;
        }
+        set_vt(V_DOUBLE)
    }

-    if (ret->vt == V_DOUBLE) {
-        ret->dv = parse_float64(man, exp10, sgn, trunc, src, si);
-    }
-
+parse_float:
+    ret->dv = parse_float64(man, exp10, sgn, trunc, src, si);
    /* update the result */
    *p = i;
 }
@ -946,7 +947,6 @@ static inline long skip_number(const char *sp, size_t nb) {
        __m128i eu = _mm_set1_epi8('E');
        __m128i xp = _mm_set1_epi8('+');
        __m128i xm = _mm_set1_epi8('-');
-        __m128i v1 = _mm_set1_epi8(0xff);

        /* 16-byte loop */
        do {