mirror of
https://github.com/ii64/sonic.git
synced 2026-06-20 16:45:22 +08:00
opt: skip space use shuffle (#416)
This commit is contained in:
parent
ebbe7589ca
commit
d83abb5435
7 changed files with 9710 additions and 8575 deletions
File diff suppressed because it is too large
Load diff
|
|
@ -9,28 +9,28 @@ package avx
|
|||
func __native_entry__() uintptr
|
||||
|
||||
var (
|
||||
_subr__f32toa = __native_entry__() + 28800
|
||||
_subr__f64toa = __native_entry__() + 448
|
||||
_subr__get_by_path = __native_entry__() + 25664
|
||||
_subr__html_escape = __native_entry__() + 9296
|
||||
_subr__i64toa = __native_entry__() + 3744
|
||||
_subr__lspace = __native_entry__() + 80
|
||||
_subr__quote = __native_entry__() + 5136
|
||||
_subr__skip_array = __native_entry__() + 18592
|
||||
_subr__skip_number = __native_entry__() + 22224
|
||||
_subr__skip_object = __native_entry__() + 20640
|
||||
_subr__skip_one = __native_entry__() + 22384
|
||||
_subr__skip_one_fast = __native_entry__() + 22624
|
||||
_subr__u64toa = __native_entry__() + 3856
|
||||
_subr__unquote = __native_entry__() + 6928
|
||||
_subr__validate_one = __native_entry__() + 22448
|
||||
_subr__validate_utf8 = __native_entry__() + 27552
|
||||
_subr__validate_utf8_fast = __native_entry__() + 28224
|
||||
_subr__value = __native_entry__() + 12480
|
||||
_subr__vnumber = __native_entry__() + 16256
|
||||
_subr__vsigned = __native_entry__() + 17872
|
||||
_subr__vstring = __native_entry__() + 14704
|
||||
_subr__vunsigned = __native_entry__() + 18240
|
||||
_subr__f32toa = __native_entry__() + 31264
|
||||
_subr__f64toa = __native_entry__() + 192
|
||||
_subr__get_by_path = __native_entry__() + 25856
|
||||
_subr__html_escape = __native_entry__() + 9040
|
||||
_subr__i64toa = __native_entry__() + 3488
|
||||
_subr__lspace = __native_entry__() + 16
|
||||
_subr__quote = __native_entry__() + 4880
|
||||
_subr__skip_array = __native_entry__() + 17952
|
||||
_subr__skip_number = __native_entry__() + 21952
|
||||
_subr__skip_object = __native_entry__() + 20368
|
||||
_subr__skip_one = __native_entry__() + 22112
|
||||
_subr__skip_one_fast = __native_entry__() + 22352
|
||||
_subr__u64toa = __native_entry__() + 3600
|
||||
_subr__unquote = __native_entry__() + 6672
|
||||
_subr__validate_one = __native_entry__() + 22176
|
||||
_subr__validate_utf8 = __native_entry__() + 30000
|
||||
_subr__validate_utf8_fast = __native_entry__() + 30672
|
||||
_subr__value = __native_entry__() + 12224
|
||||
_subr__vnumber = __native_entry__() + 15616
|
||||
_subr__vsigned = __native_entry__() + 17232
|
||||
_subr__vstring = __native_entry__() + 14064
|
||||
_subr__vunsigned = __native_entry__() + 17600
|
||||
)
|
||||
|
||||
const (
|
||||
|
|
@ -45,7 +45,7 @@ const (
|
|||
_stack__skip_number = 72
|
||||
_stack__skip_object = 128
|
||||
_stack__skip_one = 128
|
||||
_stack__skip_one_fast = 216
|
||||
_stack__skip_one_fast = 200
|
||||
_stack__u64toa = 8
|
||||
_stack__unquote = 88
|
||||
_stack__validate_one = 128
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -9,34 +9,34 @@ package avx2
|
|||
func __native_entry__() uintptr
|
||||
|
||||
var (
|
||||
_subr__f32toa = __native_entry__() + 34720
|
||||
_subr__f64toa = __native_entry__() + 736
|
||||
_subr__get_by_path = __native_entry__() + 29248
|
||||
_subr__html_escape = __native_entry__() + 10944
|
||||
_subr__i64toa = __native_entry__() + 4032
|
||||
_subr__lspace = __native_entry__() + 224
|
||||
_subr__quote = __native_entry__() + 5520
|
||||
_subr__skip_array = __native_entry__() + 21616
|
||||
_subr__skip_number = __native_entry__() + 25840
|
||||
_subr__skip_object = __native_entry__() + 23648
|
||||
_subr__skip_one = __native_entry__() + 26000
|
||||
_subr__skip_one_fast = __native_entry__() + 26416
|
||||
_subr__u64toa = __native_entry__() + 4144
|
||||
_subr__unquote = __native_entry__() + 8336
|
||||
_subr__validate_one = __native_entry__() + 26064
|
||||
_subr__validate_utf8 = __native_entry__() + 31152
|
||||
_subr__validate_utf8_fast = __native_entry__() + 32112
|
||||
_subr__value = __native_entry__() + 15472
|
||||
_subr__vnumber = __native_entry__() + 19280
|
||||
_subr__vsigned = __native_entry__() + 20896
|
||||
_subr__vstring = __native_entry__() + 17952
|
||||
_subr__vunsigned = __native_entry__() + 21264
|
||||
_subr__f32toa = __native_entry__() + 33888
|
||||
_subr__f64toa = __native_entry__() + 288
|
||||
_subr__get_by_path = __native_entry__() + 28336
|
||||
_subr__html_escape = __native_entry__() + 10496
|
||||
_subr__i64toa = __native_entry__() + 3584
|
||||
_subr__lspace = __native_entry__() + 64
|
||||
_subr__quote = __native_entry__() + 5072
|
||||
_subr__skip_array = __native_entry__() + 20688
|
||||
_subr__skip_number = __native_entry__() + 24912
|
||||
_subr__skip_object = __native_entry__() + 22736
|
||||
_subr__skip_one = __native_entry__() + 25072
|
||||
_subr__skip_one_fast = __native_entry__() + 25488
|
||||
_subr__u64toa = __native_entry__() + 3696
|
||||
_subr__unquote = __native_entry__() + 7888
|
||||
_subr__validate_one = __native_entry__() + 25136
|
||||
_subr__validate_utf8 = __native_entry__() + 30320
|
||||
_subr__validate_utf8_fast = __native_entry__() + 31280
|
||||
_subr__value = __native_entry__() + 15024
|
||||
_subr__vnumber = __native_entry__() + 18352
|
||||
_subr__vsigned = __native_entry__() + 19968
|
||||
_subr__vstring = __native_entry__() + 17024
|
||||
_subr__vunsigned = __native_entry__() + 20336
|
||||
)
|
||||
|
||||
const (
|
||||
_stack__f32toa = 48
|
||||
_stack__f64toa = 80
|
||||
_stack__get_by_path = 312
|
||||
_stack__get_by_path = 296
|
||||
_stack__html_escape = 72
|
||||
_stack__i64toa = 16
|
||||
_stack__lspace = 8
|
||||
|
|
@ -45,7 +45,7 @@ const (
|
|||
_stack__skip_number = 72
|
||||
_stack__skip_object = 128
|
||||
_stack__skip_one = 128
|
||||
_stack__skip_one_fast = 224
|
||||
_stack__skip_one_fast = 208
|
||||
_stack__u64toa = 8
|
||||
_stack__unquote = 72
|
||||
_stack__validate_one = 128
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -9,34 +9,34 @@ package sse
|
|||
func __native_entry__() uintptr
|
||||
|
||||
var (
|
||||
_subr__f32toa = __native_entry__() + 29440
|
||||
_subr__f64toa = __native_entry__() + 448
|
||||
_subr__get_by_path = __native_entry__() + 26304
|
||||
_subr__html_escape = __native_entry__() + 9360
|
||||
_subr__i64toa = __native_entry__() + 3712
|
||||
_subr__lspace = __native_entry__() + 80
|
||||
_subr__quote = __native_entry__() + 5152
|
||||
_subr__skip_array = __native_entry__() + 18800
|
||||
_subr__skip_number = __native_entry__() + 22448
|
||||
_subr__skip_object = __native_entry__() + 20832
|
||||
_subr__skip_one = __native_entry__() + 22608
|
||||
_subr__skip_one_fast = __native_entry__() + 22832
|
||||
_subr__u64toa = __native_entry__() + 3840
|
||||
_subr__unquote = __native_entry__() + 6992
|
||||
_subr__validate_one = __native_entry__() + 22656
|
||||
_subr__validate_utf8 = __native_entry__() + 28208
|
||||
_subr__validate_utf8_fast = __native_entry__() + 28880
|
||||
_subr__value = __native_entry__() + 12560
|
||||
_subr__vnumber = __native_entry__() + 16416
|
||||
_subr__vsigned = __native_entry__() + 18064
|
||||
_subr__vstring = __native_entry__() + 14800
|
||||
_subr__vunsigned = __native_entry__() + 18448
|
||||
_subr__f32toa = __native_entry__() + 31760
|
||||
_subr__f64toa = __native_entry__() + 160
|
||||
_subr__get_by_path = __native_entry__() + 26384
|
||||
_subr__html_escape = __native_entry__() + 9072
|
||||
_subr__i64toa = __native_entry__() + 3424
|
||||
_subr__lspace = __native_entry__() + 16
|
||||
_subr__quote = __native_entry__() + 4864
|
||||
_subr__skip_array = __native_entry__() + 18112
|
||||
_subr__skip_number = __native_entry__() + 22128
|
||||
_subr__skip_object = __native_entry__() + 20512
|
||||
_subr__skip_one = __native_entry__() + 22288
|
||||
_subr__skip_one_fast = __native_entry__() + 22512
|
||||
_subr__u64toa = __native_entry__() + 3552
|
||||
_subr__unquote = __native_entry__() + 6704
|
||||
_subr__validate_one = __native_entry__() + 22336
|
||||
_subr__validate_utf8 = __native_entry__() + 30528
|
||||
_subr__validate_utf8_fast = __native_entry__() + 31200
|
||||
_subr__value = __native_entry__() + 12272
|
||||
_subr__vnumber = __native_entry__() + 15728
|
||||
_subr__vsigned = __native_entry__() + 17376
|
||||
_subr__vstring = __native_entry__() + 14112
|
||||
_subr__vunsigned = __native_entry__() + 17760
|
||||
)
|
||||
|
||||
const (
|
||||
_stack__f32toa = 48
|
||||
_stack__f64toa = 80
|
||||
_stack__get_by_path = 256
|
||||
_stack__get_by_path = 240
|
||||
_stack__html_escape = 64
|
||||
_stack__i64toa = 16
|
||||
_stack__lspace = 8
|
||||
|
|
@ -45,7 +45,7 @@ const (
|
|||
_stack__skip_number = 72
|
||||
_stack__skip_object = 128
|
||||
_stack__skip_one = 128
|
||||
_stack__skip_one_fast = 168
|
||||
_stack__skip_one_fast = 136
|
||||
_stack__u64toa = 8
|
||||
_stack__unquote = 88
|
||||
_stack__validate_one = 128
|
||||
|
|
|
|||
|
|
@ -16,14 +16,7 @@
|
|||
|
||||
#include "native.h"
|
||||
|
||||
#if USE_AVX2
|
||||
static const uintptr_t ALIGN_MASK = 31;
|
||||
#else
|
||||
static const uintptr_t ALIGN_MASK = 15;
|
||||
#endif
|
||||
|
||||
size_t lspace(const char *sp, size_t nb, size_t p) {
|
||||
int32_t ms;
|
||||
const char * ss = sp;
|
||||
|
||||
/* seek to `p` */
|
||||
|
|
@ -31,64 +24,28 @@ size_t lspace(const char *sp, size_t nb, size_t p) {
|
|||
nb -= p;
|
||||
|
||||
/* likely to run into non-spaces within a few characters, try scalar code first */
|
||||
while (nb > 0 && ((uintptr_t)sp & ALIGN_MASK)) {
|
||||
switch ((nb--, *sp++)) {
|
||||
case ' ' : break;
|
||||
case '\r' : break;
|
||||
case '\n' : break;
|
||||
case '\t' : break;
|
||||
default : return sp - ss - 1;
|
||||
}
|
||||
}
|
||||
|
||||
#if USE_AVX2
|
||||
__m256i space_tab = _mm256_setr_epi8(
|
||||
'\x20', 0, 0, 0, 0, 0, 0, 0,
|
||||
0, '\x09', '\x0A', 0, 0, '\x0D', 0, 0,
|
||||
'\x20', 0, 0, 0, 0, 0, 0, 0,
|
||||
0, '\x09', '\x0A', 0, 0, '\x0D', 0, 0
|
||||
);
|
||||
|
||||
/* 32-byte loop */
|
||||
while (likely(nb >= 32)) {
|
||||
__m256i x = _mm256_load_si256 ((const void *)sp);
|
||||
__m256i a = _mm256_cmpeq_epi8 (x, _mm256_set1_epi8(' '));
|
||||
__m256i b = _mm256_cmpeq_epi8 (x, _mm256_set1_epi8('\t'));
|
||||
__m256i c = _mm256_cmpeq_epi8 (x, _mm256_set1_epi8('\n'));
|
||||
__m256i d = _mm256_cmpeq_epi8 (x, _mm256_set1_epi8('\r'));
|
||||
__m256i u = _mm256_or_si256 (a, b);
|
||||
__m256i v = _mm256_or_si256 (c, d);
|
||||
__m256i w = _mm256_or_si256 (u, v);
|
||||
|
||||
/* check for matches */
|
||||
if ((ms = _mm256_movemask_epi8(w)) != -1) {
|
||||
_mm256_zeroupper();
|
||||
return sp - ss + __builtin_ctzll(~(uint64_t)ms);
|
||||
__m256i input = _mm256_loadu_si256((__m256i*)sp);
|
||||
__m256i shuffle = _mm256_shuffle_epi8(space_tab, input);
|
||||
__m256i result = _mm256_cmpeq_epi8(input, shuffle);
|
||||
int32_t mask = _mm256_movemask_epi8(result);
|
||||
if (mask != -1) {
|
||||
return sp - ss + __builtin_ctzll(~(uint64_t)mask);
|
||||
}
|
||||
|
||||
/* move to next block */
|
||||
sp += 32;
|
||||
nb -= 32;
|
||||
}
|
||||
|
||||
/* clear upper half to avoid AVX-SSE transition penalty */
|
||||
_mm256_zeroupper();
|
||||
#endif
|
||||
|
||||
/* 16-byte loop */
|
||||
while (likely(nb >= 16)) {
|
||||
__m128i x = _mm_load_si128 ((const void *)sp);
|
||||
__m128i a = _mm_cmpeq_epi8 (x, _mm_set1_epi8(' '));
|
||||
__m128i b = _mm_cmpeq_epi8 (x, _mm_set1_epi8('\t'));
|
||||
__m128i c = _mm_cmpeq_epi8 (x, _mm_set1_epi8('\n'));
|
||||
__m128i d = _mm_cmpeq_epi8 (x, _mm_set1_epi8('\r'));
|
||||
__m128i u = _mm_or_si128 (a, b);
|
||||
__m128i v = _mm_or_si128 (c, d);
|
||||
__m128i w = _mm_or_si128 (u, v);
|
||||
|
||||
/* check for matches */
|
||||
if ((ms = _mm_movemask_epi8(w)) != 0xffff) {
|
||||
return sp - ss + __builtin_ctz(~ms);
|
||||
}
|
||||
|
||||
/* move to next block */
|
||||
sp += 16;
|
||||
nb -= 16;
|
||||
}
|
||||
|
||||
/* remaining bytes, do with scalar code */
|
||||
while (nb-- > 0) {
|
||||
switch (*sp++) {
|
||||
|
|
|
|||
Loading…
Reference in a new issue