2
0
Fork 0
mirror of https://github.com/ii64/sonic.git synced 2026-06-21 00:46:43 +08:00
sonic/native/fastfloat.c
2021-05-28 23:58:58 +08:00

456 lines
13 KiB
C

/*
* Copyright 2021 ByteDance Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/** This is the Florian's Grisu2 Algorithm implemented in C.
* See https://legacy.cs.indiana.edu/~dyb/pubs/FP-Printing-PLDI96.pdf for more info.
*/
#include "native.h"
struct f64_t {
int32_t e;
uint64_t f;
};
#define FP_SSIZE 64
#define DP_SSIZE 52
#define DP_1_LG10 0.30102999566398114 // = 1 / log2(10)
#define F64_EBIAS 1075
#define F64_EXMIN -F64_EBIAS
#define F64_HBIT 0x0010000000000000
#define F64_EMASK 0x7ff0000000000000
#define F64_SMASK 0x000fffffffffffff
static const int16_t TabPowE[87] = {
-1220, -1193, -1166, -1140, -1113, -1087, -1060, -1034, -1007, -980,
-954, -927, -901, -874, -847, -821, -794, -768, -741, -715,
-688, -661, -635, -608, -582, -555, -529, -502, -475, -449,
-422, -396, -369, -343, -316, -289, -263, -236, -210, -183,
-157, -130, -103, -77, -50, -24, 3, 30, 56, 83,
109, 136, 162, 189, 216, 242, 269, 295, 322, 348,
375, 402, 428, 455, 481, 508, 534, 561, 588, 614,
641, 667, 694, 720, 747, 774, 800, 827, 853, 880,
907, 933, 960, 986, 1013, 1039, 1066
};
static const uint64_t TabPowF[87] = {
0xfa8fd5a0081c0288, 0xbaaee17fa23ebf76,
0x8b16fb203055ac76, 0xcf42894a5dce35ea,
0x9a6bb0aa55653b2d, 0xe61acf033d1a45df,
0xab70fe17c79ac6ca, 0xff77b1fcbebcdc4f,
0xbe5691ef416bd60c, 0x8dd01fad907ffc3c,
0xd3515c2831559a83, 0x9d71ac8fada6c9b5,
0xea9c227723ee8bcb, 0xaecc49914078536d,
0x823c12795db6ce57, 0xc21094364dfb5637,
0x9096ea6f3848984f, 0xd77485cb25823ac7,
0xa086cfcd97bf97f4, 0xef340a98172aace5,
0xb23867fb2a35b28e, 0x84c8d4dfd2c63f3b,
0xc5dd44271ad3cdba, 0x936b9fcebb25c996,
0xdbac6c247d62a584, 0xa3ab66580d5fdaf6,
0xf3e2f893dec3f126, 0xb5b5ada8aaff80b8,
0x87625f056c7c4a8b, 0xc9bcff6034c13053,
0x964e858c91ba2655, 0xdff9772470297ebd,
0xa6dfbd9fb8e5b88f, 0xf8a95fcf88747d94,
0xb94470938fa89bcf, 0x8a08f0f8bf0f156b,
0xcdb02555653131b6, 0x993fe2c6d07b7fac,
0xe45c10c42a2b3b06, 0xaa242499697392d3,
0xfd87b5f28300ca0e, 0xbce5086492111aeb,
0x8cbccc096f5088cc, 0xd1b71758e219652c,
0x9c40000000000000, 0xe8d4a51000000000,
0xad78ebc5ac620000, 0x813f3978f8940984,
0xc097ce7bc90715b3, 0x8f7e32ce7bea5c70,
0xd5d238a4abe98068, 0x9f4f2726179a2245,
0xed63a231d4c4fb27, 0xb0de65388cc8ada8,
0x83c7088e1aab65db, 0xc45d1df942711d9a,
0x924d692ca61be758, 0xda01ee641a708dea,
0xa26da3999aef774a, 0xf209787bb47d6b85,
0xb454e4a179dd1877, 0x865b86925b9bc5c2,
0xc83553c5c8965d3d, 0x952ab45cfa97a0b3,
0xde469fbd99a05fe3, 0xa59bc234db398c25,
0xf6c69a72a3989f5c, 0xb7dcbf5354e9bece,
0x88fcf317f22241e2, 0xcc20ce9bd35c78a5,
0x98165af37b2153df, 0xe2a0b5dc971f303a,
0xa8d9d1535ce3b396, 0xfb9b7cd9a4a7443c,
0xbb764c4ca7a44410, 0x8bab8eefb6409c1a,
0xd01fef10a657842c, 0x9b10a4e5e9913129,
0xe7109bfba19c0c9d, 0xac2820d9623bf429,
0x80444b5e7aa7cf85, 0xbf21e44003acdd2d,
0x8e679c2f5e44ff8f, 0xd433179d9c8cb841,
0x9e19db92b4e31ba9, 0xeb96bf6ebadf77d9,
0xaf87023b9bf0ee6b
};
static const uint64_t TabPow10[10] = {
1,
10,
100,
1000,
10000,
100000,
1000000,
10000000,
100000000,
1000000000
};
/** FP-64 Helper **/
static inline void f64_set(struct f64_t *r, double v) {
uint64_t bv = *(uint64_t *)&v;
uint64_t sv = bv & F64_SMASK;
int32_t ev = (bv & F64_EMASK) >> DP_SSIZE;
/* check for denormalized values */
if (ev == 0) {
r->f = sv;
r->e = F64_EXMIN + 1;
} else {
r->f = sv + F64_HBIT;
r->e = ev - F64_EBIAS;
}
}
static inline void f64_raw(struct f64_t *r, uint64_t f, int32_t e) {
r->e = e;
r->f = f;
}
static inline void f64_sub(struct f64_t *r, const struct f64_t *a, const struct f64_t *b) {
r->e = a->e;
r->f = a->f - b->f;
}
static inline void f64_mul(struct f64_t *r, const struct f64_t *a, const struct f64_t *b) {
__int128_t v0 = a->f;
__int128_t v1 = b->f;
__int128_t v2 = v0 * v1;
uint64_t vh = v2 >> 64;
uint64_t vl = (uint64_t)v2;
/* rounding */
if (vl & (1ull << 63)) {
vh++;
}
/* save the result */
r->f = vh;
r->e = a->e + b->e + 64;
}
static inline void f64_norm(struct f64_t *r, const struct f64_t *v) {
uint64_t f = v->f;
uint32_t s = __builtin_clzll(f);
/* remove the leading zeros, and adjust the exponent */
r->f = f << s;
r->e = v->e - s;
}
static inline void f64_normb(struct f64_t *m, struct f64_t *p, const struct f64_t *v) {
int32_t dv = v->f != F64_HBIT ? 1 : 2;
int32_t e0 = v->e - 1;
int32_t e1 = v->e - dv;
uint64_t f0 = (v->f << 1) + 1;
uint64_t f1 = (v->f << dv) - 1;
uint32_t sh = __builtin_clzll(f0);
/* calculate the m+ */
p->e = e0 - sh;
p->f = f0 << sh;
/* calculate the m- */
m->e = p->e;
m->f = f1 << (e1 - p->e);
}
static inline void f64_power(struct f64_t *v, int e, int *k) {
double dk = (-61 - e) * DP_1_LG10 + 347;
int32_t ik = (int32_t)dk;
uint32_t id;
/* ceil the logrithmic result */
if (dk - ik > 0.0) {
ik++;
}
/* calculate the K value */
id = (uint32_t)(ik >> 3) + 1;
*k = 348 - (int32_t)(id << 3);
/* lookup the power */
v->e = TabPowE[id];
v->f = TabPowF[id];
}
/** Florian's Grisu2 Algorithm **/
static const char TabDigits[200] = {
'0', '0', '0', '1', '0', '2', '0', '3', '0', '4', '0', '5', '0', '6', '0', '7', '0', '8', '0', '9',
'1', '0', '1', '1', '1', '2', '1', '3', '1', '4', '1', '5', '1', '6', '1', '7', '1', '8', '1', '9',
'2', '0', '2', '1', '2', '2', '2', '3', '2', '4', '2', '5', '2', '6', '2', '7', '2', '8', '2', '9',
'3', '0', '3', '1', '3', '2', '3', '3', '3', '4', '3', '5', '3', '6', '3', '7', '3', '8', '3', '9',
'4', '0', '4', '1', '4', '2', '4', '3', '4', '4', '4', '5', '4', '6', '4', '7', '4', '8', '4', '9',
'5', '0', '5', '1', '5', '2', '5', '3', '5', '4', '5', '5', '5', '6', '5', '7', '5', '8', '5', '9',
'6', '0', '6', '1', '6', '2', '6', '3', '6', '4', '6', '5', '6', '6', '6', '7', '6', '8', '6', '9',
'7', '0', '7', '1', '7', '2', '7', '3', '7', '4', '7', '5', '7', '6', '7', '7', '7', '8', '7', '9',
'8', '0', '8', '1', '8', '2', '8', '3', '8', '4', '8', '5', '8', '6', '8', '7', '8', '8', '8', '9',
'9', '0', '9', '1', '9', '2', '9', '3', '9', '4', '9', '5', '9', '6', '9', '7', '9', '8', '9', '9'
};
static inline int ctz10(uint64_t n) {
if (n < 10) return 1;
if (n < 100) return 2;
if (n < 1000) return 3;
if (n < 10000) return 4;
if (n < 100000) return 5;
if (n < 1000000) return 6;
if (n < 10000000) return 7;
if (n < 100000000) return 8;
if (n < 1000000000) return 9;
return 10;
}
static inline int divmod(uint64_t *p1, int kp) {
switch (kp) {
case 10: kp = *p1 / 1000000000; *p1 %= 1000000000; return kp;
case 9: kp = *p1 / 100000000; *p1 %= 100000000; return kp;
case 8: kp = *p1 / 10000000; *p1 %= 10000000; return kp;
case 7: kp = *p1 / 1000000; *p1 %= 1000000; return kp;
case 6: kp = *p1 / 100000; *p1 %= 100000; return kp;
case 5: kp = *p1 / 10000; *p1 %= 10000; return kp;
case 4: kp = *p1 / 1000; *p1 %= 1000; return kp;
case 3: kp = *p1 / 100; *p1 %= 100; return kp;
case 2: kp = *p1 / 10; *p1 %= 10; return kp;
case 1: kp = *p1; *p1 = 0; return kp;
default: __builtin_unreachable();
}
}
static inline void roundg(char *p, uint64_t d, uint64_t r, uint64_t kp10, uint64_t dpw) {
while (r < dpw && d - r >= kp10 && (r + kp10 < dpw || dpw - r > r + kp10 - dpw)) {
r += kp10;
p[-1] -= 1;
}
}
static inline int digits(char *p, int *k, const struct f64_t *w, const struct f64_t *m, uint64_t d) {
uint32_t dv;
uint64_t vt;
struct f64_t dpw;
struct f64_t one;
/* initial state */
f64_sub(&dpw, m, w);
f64_raw(&one, 1ull << -m->e, m->e);
/* m+ cutoff */
uint64_t p1 = m->f >> -one.e;
uint64_t p2 = m->f & (one.f - 1);
/* count the integer part length */
char * pb = p;
int32_t kp = ctz10(p1);
/* small values */
while (kp > 0) {
dv = divmod(&p1, kp);
kp--;
/* write one digit */
if (dv || p > pb) {
*p++ = (char)(dv + '0');
}
/* calculate the error */
vt = p1 << -one.e;
vt += p2;
/* check the precision */
if (vt <= d) {
*k += kp;
roundg(p, d, vt, TabPow10[kp] << -one.e, dpw.f);
return p - pb;
}
}
/* large values (longer than 6 leading digits) */
for (;;) {
d *= 10;
p2 *= 10;
dv = (p2 >> -one.e) & 0xff;
p2 &= one.f - 1;
kp--;
/* write one digit */
if (dv || p > pb) {
*p++ = (char)(dv + '0');
}
/* check the precision */
if (p2 < d) {
*k += kp;
roundg(p, d, p2, one.f, dpw.f * TabPow10[-kp]);
return p - pb;
}
}
}
static inline int grisu2(char *p, double v, int *k) {
struct f64_t vv;
struct f64_t mk;
struct f64_t wm;
struct f64_t wp;
struct f64_t ww;
/* calculate m+ and m- */
f64_set (&vv, v);
f64_normb (&wm, &wp, &vv);
f64_power (&mk, wp.e, k);
f64_norm (&ww, &vv);
f64_mul (&ww, &ww, &mk);
f64_mul (&wp, &wp, &mk);
f64_mul (&wm, &wm, &mk);
/* generate the digits */
wm.f++;
wp.f--;
return digits(p, k, &ww, &wp, wp.f - wm.f);
}
static inline void movchar(char *p, int n, int v, int d) {
for (int i = n + d - 1; i >= v; i--) {
p[i] = p[i - d];
}
}
static inline void setchar(char *p, char c, int n) {
while (n--) {
*p++ = c;
}
}
static inline void inschr1(char *p, char c, int n, int v) {
movchar(p, n, v, 1);
p[v] = c;
}
static inline void inschr2(char *p, char c0, char c1, int n, int v, int d) {
movchar(p, n, v, d + 2);
p[v + 0] = c0;
p[v + 1] = c1;
}
static inline void setexpo(char *p, int *n, int k) {
int ex = k;
int n0 = *n;
char * p0 = p;
/* negative exponent */
if (ex < 0) {
ex = -ex;
*p++ = '-';
}
/* single digit exponent */
if (ex < 10) {
*n = p - p0 + n0 + 1;
*p = (char)(ex + '0');
return;
}
/* 2-digit exponent */
if (ex < 100) {
*n = p - p0 + n0 + 2;
*p++ = TabDigits[ex * 2];
*p++ = TabDigits[ex * 2 + 1];
return;
}
/* 3-digit exponent */
*n = p - p0 + n0 + 3;
*p++ = (char)(ex / 100 + '0');
*p++ = TabDigits[(ex % 100) * 2];
*p++ = TabDigits[(ex % 100) * 2 + 1];
}
static inline void normalize(char *p, int *np, int k) {
int n = *np;
int nk = n + k;
/* case 1: p = "1234", k = 7 -> "12340000000" */
if (n <= nk && nk <= 21) {
*np = nk;
setchar(p + n, '0', k);
return;
}
/* case 2: p = "1234", k = -2 -> "12.34" */
if (0 < nk && nk <= 21) {
*np = n + 1;
inschr1(p, '.', n, nk);
return;
}
/* case 3: p = "1234", k = -6 -> "0.001234" */
if (-6 < nk && nk <= 0) {
*np = 2 - k;
inschr2(p, '0', '.', n, 0, -nk);
setchar(p + 2, '0', -nk);
return;
}
/* case 4: p = "1", k = 30 -> "1e30" */
if (n == 1) {
(*np)++;
p[1] = 'e';
setexpo(p + 2, np, nk - 1);
return;
}
/* case 5 (final case): p = "1234", k = 30 -> "1.234e33" */
*np += 2;
inschr1(p, '.', n, 1);
setchar(p + n + 1, 'e', 1);
setexpo(p + n + 2, np, nk - 1);
}
int f64toa(char *out, double val) {
int i = 0;
char * p = out;
/* simple case of 0.0 */
if (val == 0.0) {
*p = '0';
return 1;
}
/* negative numbers */
if (val < 0.0) {
i = 1;
val = -val;
*p++ = '-';
}
/* print the number with Grisu2 algorithm */
int k;
int n = grisu2(p, val, &k);
/* normalize the output, and adjust the length */
normalize(p, &n, k);
return n + i;
}