fix: support IEEE-754 negative zero in both marshal and unmarshal

2026-06-24 02:16:45 +08:00 · 2021-09-12 16:48:34 +08:00 · 2021-09-12 16:48:34 +08:00 · ec15697761
commit ec15697761
parent 514208a4d5
10 changed files with 1489 additions and 1411 deletions
--- a/internal/native/avx/fastfloat_amd64_test.go
+++ b/internal/native/avx/fastfloat_amd64_test.go
@ -29,7 +29,7 @@ import (
 func TestFastFloat_Encode(t *testing.T) {
    var buf [64]byte
    assert.Equal(t, "0"                         , string(buf[:__f64toa(&buf[0], 0)]))
-    assert.Equal(t, "0"                         , string(buf[:__f64toa(&buf[0], math.Float64frombits(0x8000000000000000))]))
+    assert.Equal(t, "-0"                         , string(buf[:__f64toa(&buf[0], math.Float64frombits(0x8000000000000000))]))
    assert.Equal(t, "12340000000"               , string(buf[:__f64toa(&buf[0], 1234e7)]))
    assert.Equal(t, "12.34"                     , string(buf[:__f64toa(&buf[0], 1234e-2)]))
    assert.Equal(t, "0.001234"                  , string(buf[:__f64toa(&buf[0], 1234e-6)]))
@ -64,3 +64,21 @@ func BenchmarkFastFloat_Encode(b *testing.B) {
        b.Run(bm.name, bm.test)
    }
 }
 func BenchmarkFastFloat_EncodeZero(b *testing.B) {
    val := float64(0)
    benchmarks := []struct {
        name string
        test func(*testing.B)
    }{{
        name: "StdLib",
        test: func(b *testing.B) { var buf [64]byte; for i := 0; i < b.N; i++ { strconv.AppendFloat(buf[:], val, 'g', -1, 64) }},
    }, {
        name: "FastFloat",
        test: func(b *testing.B) { var buf [64]byte; for i := 0; i < b.N; i++ { __f64toa(&buf[0], val) }},
    }}
    for _, bm := range benchmarks {
        b.Run(bm.name, bm.test)
    }
 }
--- a/internal/native/avx/native_amd64.s
+++ b/internal/native/avx/native_amd64.s
--- a/internal/native/avx/native_subr_amd64.go
+++ b/internal/native/avx/native_subr_amd64.go
@ -14,21 +14,21 @@ func ___asm2asm_compiled_code__DO_NOT_CALL_THIS_SYMBOL___()
 var (
    _func__base        = ___asm2asm_compiled_code__DO_NOT_CALL_THIS_SYMBOL___
-    _subr__f64toa      = **(**uintptr)(unsafe.Pointer(&_func__base)) + 558
+    _subr__f64toa      = **(**uintptr)(unsafe.Pointer(&_func__base)) + 542
-    _subr__i64toa      = **(**uintptr)(unsafe.Pointer(&_func__base)) + 3541
+    _subr__i64toa      = **(**uintptr)(unsafe.Pointer(&_func__base)) + 3551
    _subr__lspace      = **(**uintptr)(unsafe.Pointer(&_func__base)) + 238
    _subr__lzero       = **(**uintptr)(unsafe.Pointer(&_func__base)) + 0
-    _subr__quote       = **(**uintptr)(unsafe.Pointer(&_func__base)) + 4854
+    _subr__quote       = **(**uintptr)(unsafe.Pointer(&_func__base)) + 4864
-    _subr__skip_array  = **(**uintptr)(unsafe.Pointer(&_func__base)) + 16673
+    _subr__skip_array  = **(**uintptr)(unsafe.Pointer(&_func__base)) + 16717
-    _subr__skip_object = **(**uintptr)(unsafe.Pointer(&_func__base)) + 16708
+    _subr__skip_object = **(**uintptr)(unsafe.Pointer(&_func__base)) + 16752
-    _subr__skip_one    = **(**uintptr)(unsafe.Pointer(&_func__base)) + 14351
+    _subr__skip_one    = **(**uintptr)(unsafe.Pointer(&_func__base)) + 14395
-    _subr__u64toa      = **(**uintptr)(unsafe.Pointer(&_func__base)) + 3634
+    _subr__u64toa      = **(**uintptr)(unsafe.Pointer(&_func__base)) + 3644
-    _subr__unquote     = **(**uintptr)(unsafe.Pointer(&_func__base)) + 5875
+    _subr__unquote     = **(**uintptr)(unsafe.Pointer(&_func__base)) + 5885
-    _subr__value       = **(**uintptr)(unsafe.Pointer(&_func__base)) + 10064
+    _subr__value       = **(**uintptr)(unsafe.Pointer(&_func__base)) + 10074
-    _subr__vnumber     = **(**uintptr)(unsafe.Pointer(&_func__base)) + 12639
+    _subr__vnumber     = **(**uintptr)(unsafe.Pointer(&_func__base)) + 12633
-    _subr__vsigned     = **(**uintptr)(unsafe.Pointer(&_func__base)) + 13801
+    _subr__vsigned     = **(**uintptr)(unsafe.Pointer(&_func__base)) + 13845
-    _subr__vstring     = **(**uintptr)(unsafe.Pointer(&_func__base)) + 11670
+    _subr__vstring     = **(**uintptr)(unsafe.Pointer(&_func__base)) + 11680
-    _subr__vunsigned   = **(**uintptr)(unsafe.Pointer(&_func__base)) + 14078
+    _subr__vunsigned   = **(**uintptr)(unsafe.Pointer(&_func__base)) + 14122
 )
 var (
--- a/internal/native/avx2/fastfloat_amd64_test.go
+++ b/internal/native/avx2/fastfloat_amd64_test.go
@ -29,7 +29,7 @@ import (
 func TestFastFloat_Encode(t *testing.T) {
    var buf [64]byte
    assert.Equal(t, "0"                         , string(buf[:__f64toa(&buf[0], 0)]))
-    assert.Equal(t, "0"                         , string(buf[:__f64toa(&buf[0], math.Float64frombits(0x8000000000000000))]))
+    assert.Equal(t, "-0"                         , string(buf[:__f64toa(&buf[0], math.Float64frombits(0x8000000000000000))]))
    assert.Equal(t, "12340000000"               , string(buf[:__f64toa(&buf[0], 1234e7)]))
    assert.Equal(t, "12.34"                     , string(buf[:__f64toa(&buf[0], 1234e-2)]))
    assert.Equal(t, "0.001234"                  , string(buf[:__f64toa(&buf[0], 1234e-6)]))
@ -64,3 +64,21 @@ func BenchmarkFastFloat_Encode(b *testing.B) {
        b.Run(bm.name, bm.test)
    }
 }
 func BenchmarkFastFloat_EncodeZero(b *testing.B) {
    val := float64(0)
    benchmarks := []struct {
        name string
        test func(*testing.B)
    }{{
        name: "StdLib",
        test: func(b *testing.B) { var buf [64]byte; for i := 0; i < b.N; i++ { strconv.AppendFloat(buf[:], val, 'g', -1, 64) }},
    }, {
        name: "FastFloat",
        test: func(b *testing.B) { var buf [64]byte; for i := 0; i < b.N; i++ { __f64toa(&buf[0], val) }},
    }}
    for _, bm := range benchmarks {
        b.Run(bm.name, bm.test)
    }
 }
--- a/internal/native/avx2/native_amd64.s
+++ b/internal/native/avx2/native_amd64.s
--- a/internal/native/avx2/native_subr_amd64.go
+++ b/internal/native/avx2/native_subr_amd64.go
@ -14,21 +14,21 @@ func ___asm2asm_compiled_code__DO_NOT_CALL_THIS_SYMBOL___()
 var (
    _func__base        = ___asm2asm_compiled_code__DO_NOT_CALL_THIS_SYMBOL___
-    _subr__f64toa      = **(**uintptr)(unsafe.Pointer(&_func__base)) + 806
+    _subr__f64toa      = **(**uintptr)(unsafe.Pointer(&_func__base)) + 790
-    _subr__i64toa      = **(**uintptr)(unsafe.Pointer(&_func__base)) + 3789
+    _subr__i64toa      = **(**uintptr)(unsafe.Pointer(&_func__base)) + 3799
    _subr__lspace      = **(**uintptr)(unsafe.Pointer(&_func__base)) + 366
    _subr__lzero       = **(**uintptr)(unsafe.Pointer(&_func__base)) + 0
-    _subr__quote       = **(**uintptr)(unsafe.Pointer(&_func__base)) + 5202
+    _subr__quote       = **(**uintptr)(unsafe.Pointer(&_func__base)) + 5212
-    _subr__skip_array  = **(**uintptr)(unsafe.Pointer(&_func__base)) + 19129
+    _subr__skip_array  = **(**uintptr)(unsafe.Pointer(&_func__base)) + 19173
-    _subr__skip_object = **(**uintptr)(unsafe.Pointer(&_func__base)) + 19164
+    _subr__skip_object = **(**uintptr)(unsafe.Pointer(&_func__base)) + 19208
-    _subr__skip_one    = **(**uintptr)(unsafe.Pointer(&_func__base)) + 16329
+    _subr__skip_one    = **(**uintptr)(unsafe.Pointer(&_func__base)) + 16373
-    _subr__u64toa      = **(**uintptr)(unsafe.Pointer(&_func__base)) + 3882
+    _subr__u64toa      = **(**uintptr)(unsafe.Pointer(&_func__base)) + 3892
-    _subr__unquote     = **(**uintptr)(unsafe.Pointer(&_func__base)) + 7039
+    _subr__unquote     = **(**uintptr)(unsafe.Pointer(&_func__base)) + 7049
-    _subr__value       = **(**uintptr)(unsafe.Pointer(&_func__base)) + 12053
+    _subr__value       = **(**uintptr)(unsafe.Pointer(&_func__base)) + 12063
-    _subr__vnumber     = **(**uintptr)(unsafe.Pointer(&_func__base)) + 14617
+    _subr__vnumber     = **(**uintptr)(unsafe.Pointer(&_func__base)) + 14611
-    _subr__vsigned     = **(**uintptr)(unsafe.Pointer(&_func__base)) + 15779
+    _subr__vsigned     = **(**uintptr)(unsafe.Pointer(&_func__base)) + 15823
-    _subr__vstring     = **(**uintptr)(unsafe.Pointer(&_func__base)) + 13764
+    _subr__vstring     = **(**uintptr)(unsafe.Pointer(&_func__base)) + 13774
-    _subr__vunsigned   = **(**uintptr)(unsafe.Pointer(&_func__base)) + 16056
+    _subr__vunsigned   = **(**uintptr)(unsafe.Pointer(&_func__base)) + 16100
 )
 var (
--- a/internal/native/fastfloat_amd64_test.tmpl
+++ b/internal/native/fastfloat_amd64_test.tmpl
@ -27,7 +27,7 @@ import (
 func TestFastFloat_Encode(t *testing.T) {
    var buf [64]byte
    assert.Equal(t, "0"                         , string(buf[:__f64toa(&buf[0], 0)]))
-    assert.Equal(t, "0"                         , string(buf[:__f64toa(&buf[0], math.Float64frombits(0x8000000000000000))]))
+    assert.Equal(t, "-0"                         , string(buf[:__f64toa(&buf[0], math.Float64frombits(0x8000000000000000))]))
    assert.Equal(t, "12340000000"               , string(buf[:__f64toa(&buf[0], 1234e7)]))
    assert.Equal(t, "12.34"                     , string(buf[:__f64toa(&buf[0], 1234e-2)]))
    assert.Equal(t, "0.001234"                  , string(buf[:__f64toa(&buf[0], 1234e-6)]))
@ -62,3 +62,21 @@ func BenchmarkFastFloat_Encode(b *testing.B) {
        b.Run(bm.name, bm.test)
    }
 }
 func BenchmarkFastFloat_EncodeZero(b *testing.B) {
    val := float64(0)
    benchmarks := []struct {
        name string
        test func(*testing.B)
    }{{
        name: "StdLib",
        test: func(b *testing.B) { var buf [64]byte; for i := 0; i < b.N; i++ { strconv.AppendFloat(buf[:], val, 'g', -1, 64) }},
    }, {
        name: "FastFloat",
        test: func(b *testing.B) { var buf [64]byte; for i := 0; i < b.N; i++ { __f64toa(&buf[0], val) }},
    }}
    for _, bm := range benchmarks {
        b.Run(bm.name, bm.test)
    }
 }
--- a/issue93_test.go
+++ b/issue93_test.go
@ -0,0 +1,46 @@
 /*
 * Copyright 2021 ByteDance Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package sonic
 import (
    `testing`
    `math`
    `encoding/json`
    `github.com/bytedance/sonic/decoder`
    `github.com/stretchr/testify/require`
 )
 func TestNegZeroInIEEE754(t *testing.T) {
    var sonicobj, stdobj float64
    sonicerr := Unmarshal([]byte("-0.0"), &sonicobj)
    stderr := json.Unmarshal([]byte("-0.0"), &stdobj)
    if sonicerr != nil && stderr == nil {
        println(sonicerr.(decoder.SyntaxError).Description())
        require.NoError(t, sonicerr)
    }
    require.Equal(t, math.Float64bits(sonicobj), math.Float64bits(stdobj))
    sonicout, sonicerr2 := Marshal(&stdobj)
    stdout, stderr2 := json.Marshal(&stdobj)
    if sonicerr2 != nil && stderr2 == nil {
        println(sonicerr2)
        require.NoError(t, sonicerr2)
    }
    require.Equal(t, sonicout, stdout)
 }
--- a/native/fastfloat.c
+++ b/native/fastfloat.c
@ -429,9 +429,8 @@ static inline bool f64tod_exct_int(const uint64_t man, const uint32_t exp,
    return true;
 }
-static int inline ryu(double val, char *out) {
+static int inline ryu(uint64_t bits, char *out) {
    /* Step 1: Decode the floating-point number */
    uint64_t bits = *(uint64_t *)(&val);
    uint64_t man = bits & ((1ull << 52) - 1);
    uint32_t exp = (uint32_t) ((bits >> 52) & ((1u << 11) - 1));
@ -459,29 +458,28 @@ static int inline ryu(double val, char *out) {
    else      // decimal format
        idx += print_decimal(v, out + idx, mlen);
    /* Terminate the string */
    out[idx] = '\0';
    return idx;
 }
 int f64toa(char *out, double val) {
    int   i = 0;
    char *p = out;
-
+    uint64_t uval = *(uint64_t *)&val;
    /* simple case of 0.0 */
    if (val == 0.0) {
        *p = '0';
        return 1;
    }
    /* negative numbers */
-    if (val < 0.0) {
+    if (unlikely(uval >> 63) == 1) {
        i    = 1;
-        val  = -val;
+        uval &= ((1ull << 63) - 1);
        *p++ = '-';
    }
    /* simple case of 0.0 */
    if (uval ==  0) {
        *p = '0';
        return i + 1;
    }
    /* print the number with Ryu algorithm */
-    int n = ryu(val, p);
+    int n = ryu(uval, p);
    return n + i;
 }
--- a/native/scanning.c
+++ b/native/scanning.c
@ -100,7 +100,6 @@ static inline ssize_t advance_string(const GoString *src, long p, int64_t *ep) {
    uint64_t os;
    uint64_t m0;
    uint64_t m1;
    uint64_t mx;
    uint64_t cr = 0;
    /* buffer pointers */
@ -484,38 +483,35 @@ void vstring(const GoString *src, long *p, JsonState *ret) {
 /** check whether float can represent the val exactly **/
 static inline bool is_atof_exact(uint64_t man, int exp, int sgn, double *val) {
-    double f = (double)man;
+    *val = (double)man;
    if (man >> 52 != 0) {
        return false;
    }
-    if (sgn == -1) {
+    /* equal to if (sgn == -1) { *val *= -1; } */
-        f = -f;
+    *(uint64_t *)val |= ((uint64_t)(sgn) >> 63 << 63);
    }
    *val = 0;
    if (exp == 0 || man == 0) {
        *val = f;
        return true;
    } else if (exp > 0 && exp <= 15+22) {
        /* uint64 integers: accurate range <= 10^15          *
         * Powers of 10: accurate range <= 10^22, as P10_TAB *
         * Example: man 1, exp 36, is ok                     */
        if (exp > 22) {
-            f *= P10_TAB[exp-22];
+            *val *= P10_TAB[exp-22];
            exp = 22;
        }
        /* f is not accurate when too larger */
-        if (f > 1e15 || f < -1e15) {
+        if (*val > 1e15 || *val < -1e15) {
            return false;
        }
-        *val = f * P10_TAB[exp];
+        *val *= P10_TAB[exp];
        return true;
    } else if (exp < 0 && exp >= -22) {
-        *val = f / P10_TAB[-exp];
+        *val /=  P10_TAB[-exp];
        return true;
    }
@ -546,15 +542,16 @@ static inline double parse_float64(uint64_t man, int exp, int sgn, int trunc, co
 }
 static bool inline is_overflow(uint64_t man, int sgn, int exp10) {
    /* the former exp10 != 0 means man has overflowed
     * the later euqals to man*sgn < INT64_MIN or > INT64_MAX */
    return exp10 != 0 ||
        ((man >> 63) == 1 && ((uint64_t)sgn & man) != (1ull << 63));
 }
 void vnumber(const GoString *src, long *p, JsonState *ret) {
    int      dig;
    int      sgn = 1;
    uint64_t man = 0; // mantissa for double (float64)
-    int   man_nd = 0; // # digits of mantissa, 10^19 fits uint64_t
+    int   man_nd = 0; // # digits of mantissa, 10 ^ 19 fits uint64_t
    int    exp10 = 0; // val = sgn * man * 10 ^ exp10
    int    trunc = 0;
@ -632,21 +629,25 @@ void vnumber(const GoString *src, long *p, JsonState *ret) {
            i++;
        }
        exp10 += exp * esm;
        goto parse_float;
    }
    if (ret->vt == V_INTEGER) {
        if (!is_overflow(man, sgn, exp10)) {
            ret->iv = (int64_t)man * sgn;
-            ret->dv = (double)(ret->iv);
+            
-        } else {
+            /* following lines equal to ret->dv = (double)(man) * sgn */
            ret->dv = (double)(man);
            *(uint64_t *)&ret->dv |= ((uint64_t)(sgn) >> 63 << 63);
            *p = i;
            return;
        }
        set_vt(V_DOUBLE)
    }
    }
-    if (ret->vt == V_DOUBLE) {
+parse_float:
    ret->dv = parse_float64(man, exp10, sgn, trunc, src, si);
    }
    /* update the result */
    *p = i;
 }
@ -946,7 +947,6 @@ static inline long skip_number(const char *sp, size_t nb) {
        __m128i eu = _mm_set1_epi8('E');
        __m128i xp = _mm_set1_epi8('+');
        __m128i xm = _mm_set1_epi8('-');
        __m128i v1 = _mm_set1_epi8(0xff);
        /* 16-byte loop */
        do {