mirror of
https://github.com/ii64/sonic.git
synced 2026-06-21 00:46:43 +08:00
opt: skip space use shuffle (#416)
This commit is contained in:
parent
ebbe7589ca
commit
d83abb5435
7 changed files with 9710 additions and 8575 deletions
File diff suppressed because it is too large
Load diff
|
|
@ -9,28 +9,28 @@ package avx
|
||||||
func __native_entry__() uintptr
|
func __native_entry__() uintptr
|
||||||
|
|
||||||
var (
|
var (
|
||||||
_subr__f32toa = __native_entry__() + 28800
|
_subr__f32toa = __native_entry__() + 31264
|
||||||
_subr__f64toa = __native_entry__() + 448
|
_subr__f64toa = __native_entry__() + 192
|
||||||
_subr__get_by_path = __native_entry__() + 25664
|
_subr__get_by_path = __native_entry__() + 25856
|
||||||
_subr__html_escape = __native_entry__() + 9296
|
_subr__html_escape = __native_entry__() + 9040
|
||||||
_subr__i64toa = __native_entry__() + 3744
|
_subr__i64toa = __native_entry__() + 3488
|
||||||
_subr__lspace = __native_entry__() + 80
|
_subr__lspace = __native_entry__() + 16
|
||||||
_subr__quote = __native_entry__() + 5136
|
_subr__quote = __native_entry__() + 4880
|
||||||
_subr__skip_array = __native_entry__() + 18592
|
_subr__skip_array = __native_entry__() + 17952
|
||||||
_subr__skip_number = __native_entry__() + 22224
|
_subr__skip_number = __native_entry__() + 21952
|
||||||
_subr__skip_object = __native_entry__() + 20640
|
_subr__skip_object = __native_entry__() + 20368
|
||||||
_subr__skip_one = __native_entry__() + 22384
|
_subr__skip_one = __native_entry__() + 22112
|
||||||
_subr__skip_one_fast = __native_entry__() + 22624
|
_subr__skip_one_fast = __native_entry__() + 22352
|
||||||
_subr__u64toa = __native_entry__() + 3856
|
_subr__u64toa = __native_entry__() + 3600
|
||||||
_subr__unquote = __native_entry__() + 6928
|
_subr__unquote = __native_entry__() + 6672
|
||||||
_subr__validate_one = __native_entry__() + 22448
|
_subr__validate_one = __native_entry__() + 22176
|
||||||
_subr__validate_utf8 = __native_entry__() + 27552
|
_subr__validate_utf8 = __native_entry__() + 30000
|
||||||
_subr__validate_utf8_fast = __native_entry__() + 28224
|
_subr__validate_utf8_fast = __native_entry__() + 30672
|
||||||
_subr__value = __native_entry__() + 12480
|
_subr__value = __native_entry__() + 12224
|
||||||
_subr__vnumber = __native_entry__() + 16256
|
_subr__vnumber = __native_entry__() + 15616
|
||||||
_subr__vsigned = __native_entry__() + 17872
|
_subr__vsigned = __native_entry__() + 17232
|
||||||
_subr__vstring = __native_entry__() + 14704
|
_subr__vstring = __native_entry__() + 14064
|
||||||
_subr__vunsigned = __native_entry__() + 18240
|
_subr__vunsigned = __native_entry__() + 17600
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
|
|
@ -45,7 +45,7 @@ const (
|
||||||
_stack__skip_number = 72
|
_stack__skip_number = 72
|
||||||
_stack__skip_object = 128
|
_stack__skip_object = 128
|
||||||
_stack__skip_one = 128
|
_stack__skip_one = 128
|
||||||
_stack__skip_one_fast = 216
|
_stack__skip_one_fast = 200
|
||||||
_stack__u64toa = 8
|
_stack__u64toa = 8
|
||||||
_stack__unquote = 88
|
_stack__unquote = 88
|
||||||
_stack__validate_one = 128
|
_stack__validate_one = 128
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load diff
|
|
@ -9,34 +9,34 @@ package avx2
|
||||||
func __native_entry__() uintptr
|
func __native_entry__() uintptr
|
||||||
|
|
||||||
var (
|
var (
|
||||||
_subr__f32toa = __native_entry__() + 34720
|
_subr__f32toa = __native_entry__() + 33888
|
||||||
_subr__f64toa = __native_entry__() + 736
|
_subr__f64toa = __native_entry__() + 288
|
||||||
_subr__get_by_path = __native_entry__() + 29248
|
_subr__get_by_path = __native_entry__() + 28336
|
||||||
_subr__html_escape = __native_entry__() + 10944
|
_subr__html_escape = __native_entry__() + 10496
|
||||||
_subr__i64toa = __native_entry__() + 4032
|
_subr__i64toa = __native_entry__() + 3584
|
||||||
_subr__lspace = __native_entry__() + 224
|
_subr__lspace = __native_entry__() + 64
|
||||||
_subr__quote = __native_entry__() + 5520
|
_subr__quote = __native_entry__() + 5072
|
||||||
_subr__skip_array = __native_entry__() + 21616
|
_subr__skip_array = __native_entry__() + 20688
|
||||||
_subr__skip_number = __native_entry__() + 25840
|
_subr__skip_number = __native_entry__() + 24912
|
||||||
_subr__skip_object = __native_entry__() + 23648
|
_subr__skip_object = __native_entry__() + 22736
|
||||||
_subr__skip_one = __native_entry__() + 26000
|
_subr__skip_one = __native_entry__() + 25072
|
||||||
_subr__skip_one_fast = __native_entry__() + 26416
|
_subr__skip_one_fast = __native_entry__() + 25488
|
||||||
_subr__u64toa = __native_entry__() + 4144
|
_subr__u64toa = __native_entry__() + 3696
|
||||||
_subr__unquote = __native_entry__() + 8336
|
_subr__unquote = __native_entry__() + 7888
|
||||||
_subr__validate_one = __native_entry__() + 26064
|
_subr__validate_one = __native_entry__() + 25136
|
||||||
_subr__validate_utf8 = __native_entry__() + 31152
|
_subr__validate_utf8 = __native_entry__() + 30320
|
||||||
_subr__validate_utf8_fast = __native_entry__() + 32112
|
_subr__validate_utf8_fast = __native_entry__() + 31280
|
||||||
_subr__value = __native_entry__() + 15472
|
_subr__value = __native_entry__() + 15024
|
||||||
_subr__vnumber = __native_entry__() + 19280
|
_subr__vnumber = __native_entry__() + 18352
|
||||||
_subr__vsigned = __native_entry__() + 20896
|
_subr__vsigned = __native_entry__() + 19968
|
||||||
_subr__vstring = __native_entry__() + 17952
|
_subr__vstring = __native_entry__() + 17024
|
||||||
_subr__vunsigned = __native_entry__() + 21264
|
_subr__vunsigned = __native_entry__() + 20336
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
_stack__f32toa = 48
|
_stack__f32toa = 48
|
||||||
_stack__f64toa = 80
|
_stack__f64toa = 80
|
||||||
_stack__get_by_path = 312
|
_stack__get_by_path = 296
|
||||||
_stack__html_escape = 72
|
_stack__html_escape = 72
|
||||||
_stack__i64toa = 16
|
_stack__i64toa = 16
|
||||||
_stack__lspace = 8
|
_stack__lspace = 8
|
||||||
|
|
@ -45,7 +45,7 @@ const (
|
||||||
_stack__skip_number = 72
|
_stack__skip_number = 72
|
||||||
_stack__skip_object = 128
|
_stack__skip_object = 128
|
||||||
_stack__skip_one = 128
|
_stack__skip_one = 128
|
||||||
_stack__skip_one_fast = 224
|
_stack__skip_one_fast = 208
|
||||||
_stack__u64toa = 8
|
_stack__u64toa = 8
|
||||||
_stack__unquote = 72
|
_stack__unquote = 72
|
||||||
_stack__validate_one = 128
|
_stack__validate_one = 128
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load diff
|
|
@ -9,34 +9,34 @@ package sse
|
||||||
func __native_entry__() uintptr
|
func __native_entry__() uintptr
|
||||||
|
|
||||||
var (
|
var (
|
||||||
_subr__f32toa = __native_entry__() + 29440
|
_subr__f32toa = __native_entry__() + 31760
|
||||||
_subr__f64toa = __native_entry__() + 448
|
_subr__f64toa = __native_entry__() + 160
|
||||||
_subr__get_by_path = __native_entry__() + 26304
|
_subr__get_by_path = __native_entry__() + 26384
|
||||||
_subr__html_escape = __native_entry__() + 9360
|
_subr__html_escape = __native_entry__() + 9072
|
||||||
_subr__i64toa = __native_entry__() + 3712
|
_subr__i64toa = __native_entry__() + 3424
|
||||||
_subr__lspace = __native_entry__() + 80
|
_subr__lspace = __native_entry__() + 16
|
||||||
_subr__quote = __native_entry__() + 5152
|
_subr__quote = __native_entry__() + 4864
|
||||||
_subr__skip_array = __native_entry__() + 18800
|
_subr__skip_array = __native_entry__() + 18112
|
||||||
_subr__skip_number = __native_entry__() + 22448
|
_subr__skip_number = __native_entry__() + 22128
|
||||||
_subr__skip_object = __native_entry__() + 20832
|
_subr__skip_object = __native_entry__() + 20512
|
||||||
_subr__skip_one = __native_entry__() + 22608
|
_subr__skip_one = __native_entry__() + 22288
|
||||||
_subr__skip_one_fast = __native_entry__() + 22832
|
_subr__skip_one_fast = __native_entry__() + 22512
|
||||||
_subr__u64toa = __native_entry__() + 3840
|
_subr__u64toa = __native_entry__() + 3552
|
||||||
_subr__unquote = __native_entry__() + 6992
|
_subr__unquote = __native_entry__() + 6704
|
||||||
_subr__validate_one = __native_entry__() + 22656
|
_subr__validate_one = __native_entry__() + 22336
|
||||||
_subr__validate_utf8 = __native_entry__() + 28208
|
_subr__validate_utf8 = __native_entry__() + 30528
|
||||||
_subr__validate_utf8_fast = __native_entry__() + 28880
|
_subr__validate_utf8_fast = __native_entry__() + 31200
|
||||||
_subr__value = __native_entry__() + 12560
|
_subr__value = __native_entry__() + 12272
|
||||||
_subr__vnumber = __native_entry__() + 16416
|
_subr__vnumber = __native_entry__() + 15728
|
||||||
_subr__vsigned = __native_entry__() + 18064
|
_subr__vsigned = __native_entry__() + 17376
|
||||||
_subr__vstring = __native_entry__() + 14800
|
_subr__vstring = __native_entry__() + 14112
|
||||||
_subr__vunsigned = __native_entry__() + 18448
|
_subr__vunsigned = __native_entry__() + 17760
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
_stack__f32toa = 48
|
_stack__f32toa = 48
|
||||||
_stack__f64toa = 80
|
_stack__f64toa = 80
|
||||||
_stack__get_by_path = 256
|
_stack__get_by_path = 240
|
||||||
_stack__html_escape = 64
|
_stack__html_escape = 64
|
||||||
_stack__i64toa = 16
|
_stack__i64toa = 16
|
||||||
_stack__lspace = 8
|
_stack__lspace = 8
|
||||||
|
|
@ -45,7 +45,7 @@ const (
|
||||||
_stack__skip_number = 72
|
_stack__skip_number = 72
|
||||||
_stack__skip_object = 128
|
_stack__skip_object = 128
|
||||||
_stack__skip_one = 128
|
_stack__skip_one = 128
|
||||||
_stack__skip_one_fast = 168
|
_stack__skip_one_fast = 136
|
||||||
_stack__u64toa = 8
|
_stack__u64toa = 8
|
||||||
_stack__unquote = 88
|
_stack__unquote = 88
|
||||||
_stack__validate_one = 128
|
_stack__validate_one = 128
|
||||||
|
|
|
||||||
|
|
@ -16,14 +16,7 @@
|
||||||
|
|
||||||
#include "native.h"
|
#include "native.h"
|
||||||
|
|
||||||
#if USE_AVX2
|
|
||||||
static const uintptr_t ALIGN_MASK = 31;
|
|
||||||
#else
|
|
||||||
static const uintptr_t ALIGN_MASK = 15;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
size_t lspace(const char *sp, size_t nb, size_t p) {
|
size_t lspace(const char *sp, size_t nb, size_t p) {
|
||||||
int32_t ms;
|
|
||||||
const char * ss = sp;
|
const char * ss = sp;
|
||||||
|
|
||||||
/* seek to `p` */
|
/* seek to `p` */
|
||||||
|
|
@ -31,64 +24,28 @@ size_t lspace(const char *sp, size_t nb, size_t p) {
|
||||||
nb -= p;
|
nb -= p;
|
||||||
|
|
||||||
/* likely to run into non-spaces within a few characters, try scalar code first */
|
/* likely to run into non-spaces within a few characters, try scalar code first */
|
||||||
while (nb > 0 && ((uintptr_t)sp & ALIGN_MASK)) {
|
|
||||||
switch ((nb--, *sp++)) {
|
|
||||||
case ' ' : break;
|
|
||||||
case '\r' : break;
|
|
||||||
case '\n' : break;
|
|
||||||
case '\t' : break;
|
|
||||||
default : return sp - ss - 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#if USE_AVX2
|
#if USE_AVX2
|
||||||
|
__m256i space_tab = _mm256_setr_epi8(
|
||||||
|
'\x20', 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, '\x09', '\x0A', 0, 0, '\x0D', 0, 0,
|
||||||
|
'\x20', 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, '\x09', '\x0A', 0, 0, '\x0D', 0, 0
|
||||||
|
);
|
||||||
|
|
||||||
/* 32-byte loop */
|
/* 32-byte loop */
|
||||||
while (likely(nb >= 32)) {
|
while (likely(nb >= 32)) {
|
||||||
__m256i x = _mm256_load_si256 ((const void *)sp);
|
__m256i input = _mm256_loadu_si256((__m256i*)sp);
|
||||||
__m256i a = _mm256_cmpeq_epi8 (x, _mm256_set1_epi8(' '));
|
__m256i shuffle = _mm256_shuffle_epi8(space_tab, input);
|
||||||
__m256i b = _mm256_cmpeq_epi8 (x, _mm256_set1_epi8('\t'));
|
__m256i result = _mm256_cmpeq_epi8(input, shuffle);
|
||||||
__m256i c = _mm256_cmpeq_epi8 (x, _mm256_set1_epi8('\n'));
|
int32_t mask = _mm256_movemask_epi8(result);
|
||||||
__m256i d = _mm256_cmpeq_epi8 (x, _mm256_set1_epi8('\r'));
|
if (mask != -1) {
|
||||||
__m256i u = _mm256_or_si256 (a, b);
|
return sp - ss + __builtin_ctzll(~(uint64_t)mask);
|
||||||
__m256i v = _mm256_or_si256 (c, d);
|
|
||||||
__m256i w = _mm256_or_si256 (u, v);
|
|
||||||
|
|
||||||
/* check for matches */
|
|
||||||
if ((ms = _mm256_movemask_epi8(w)) != -1) {
|
|
||||||
_mm256_zeroupper();
|
|
||||||
return sp - ss + __builtin_ctzll(~(uint64_t)ms);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* move to next block */
|
|
||||||
sp += 32;
|
sp += 32;
|
||||||
nb -= 32;
|
nb -= 32;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* clear upper half to avoid AVX-SSE transition penalty */
|
|
||||||
_mm256_zeroupper();
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* 16-byte loop */
|
|
||||||
while (likely(nb >= 16)) {
|
|
||||||
__m128i x = _mm_load_si128 ((const void *)sp);
|
|
||||||
__m128i a = _mm_cmpeq_epi8 (x, _mm_set1_epi8(' '));
|
|
||||||
__m128i b = _mm_cmpeq_epi8 (x, _mm_set1_epi8('\t'));
|
|
||||||
__m128i c = _mm_cmpeq_epi8 (x, _mm_set1_epi8('\n'));
|
|
||||||
__m128i d = _mm_cmpeq_epi8 (x, _mm_set1_epi8('\r'));
|
|
||||||
__m128i u = _mm_or_si128 (a, b);
|
|
||||||
__m128i v = _mm_or_si128 (c, d);
|
|
||||||
__m128i w = _mm_or_si128 (u, v);
|
|
||||||
|
|
||||||
/* check for matches */
|
|
||||||
if ((ms = _mm_movemask_epi8(w)) != 0xffff) {
|
|
||||||
return sp - ss + __builtin_ctz(~ms);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* move to next block */
|
|
||||||
sp += 16;
|
|
||||||
nb -= 16;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* remaining bytes, do with scalar code */
|
/* remaining bytes, do with scalar code */
|
||||||
while (nb-- > 0) {
|
while (nb-- > 0) {
|
||||||
switch (*sp++) {
|
switch (*sp++) {
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue