2
0
Fork 0
mirror of https://github.com/ii64/sonic.git synced 2026-06-20 16:45:22 +08:00

opt: skip space use shuffle (#416)

This commit is contained in:
liu 2023-05-16 13:24:22 +08:00 committed by GitHub
parent ebbe7589ca
commit d83abb5435
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 9710 additions and 8575 deletions

File diff suppressed because it is too large Load diff

View file

@ -9,28 +9,28 @@ package avx
func __native_entry__() uintptr
var (
_subr__f32toa = __native_entry__() + 28800
_subr__f64toa = __native_entry__() + 448
_subr__get_by_path = __native_entry__() + 25664
_subr__html_escape = __native_entry__() + 9296
_subr__i64toa = __native_entry__() + 3744
_subr__lspace = __native_entry__() + 80
_subr__quote = __native_entry__() + 5136
_subr__skip_array = __native_entry__() + 18592
_subr__skip_number = __native_entry__() + 22224
_subr__skip_object = __native_entry__() + 20640
_subr__skip_one = __native_entry__() + 22384
_subr__skip_one_fast = __native_entry__() + 22624
_subr__u64toa = __native_entry__() + 3856
_subr__unquote = __native_entry__() + 6928
_subr__validate_one = __native_entry__() + 22448
_subr__validate_utf8 = __native_entry__() + 27552
_subr__validate_utf8_fast = __native_entry__() + 28224
_subr__value = __native_entry__() + 12480
_subr__vnumber = __native_entry__() + 16256
_subr__vsigned = __native_entry__() + 17872
_subr__vstring = __native_entry__() + 14704
_subr__vunsigned = __native_entry__() + 18240
_subr__f32toa = __native_entry__() + 31264
_subr__f64toa = __native_entry__() + 192
_subr__get_by_path = __native_entry__() + 25856
_subr__html_escape = __native_entry__() + 9040
_subr__i64toa = __native_entry__() + 3488
_subr__lspace = __native_entry__() + 16
_subr__quote = __native_entry__() + 4880
_subr__skip_array = __native_entry__() + 17952
_subr__skip_number = __native_entry__() + 21952
_subr__skip_object = __native_entry__() + 20368
_subr__skip_one = __native_entry__() + 22112
_subr__skip_one_fast = __native_entry__() + 22352
_subr__u64toa = __native_entry__() + 3600
_subr__unquote = __native_entry__() + 6672
_subr__validate_one = __native_entry__() + 22176
_subr__validate_utf8 = __native_entry__() + 30000
_subr__validate_utf8_fast = __native_entry__() + 30672
_subr__value = __native_entry__() + 12224
_subr__vnumber = __native_entry__() + 15616
_subr__vsigned = __native_entry__() + 17232
_subr__vstring = __native_entry__() + 14064
_subr__vunsigned = __native_entry__() + 17600
)
const (
@ -45,7 +45,7 @@ const (
_stack__skip_number = 72
_stack__skip_object = 128
_stack__skip_one = 128
_stack__skip_one_fast = 216
_stack__skip_one_fast = 200
_stack__u64toa = 8
_stack__unquote = 88
_stack__validate_one = 128

File diff suppressed because it is too large Load diff

View file

@ -9,34 +9,34 @@ package avx2
func __native_entry__() uintptr
var (
_subr__f32toa = __native_entry__() + 34720
_subr__f64toa = __native_entry__() + 736
_subr__get_by_path = __native_entry__() + 29248
_subr__html_escape = __native_entry__() + 10944
_subr__i64toa = __native_entry__() + 4032
_subr__lspace = __native_entry__() + 224
_subr__quote = __native_entry__() + 5520
_subr__skip_array = __native_entry__() + 21616
_subr__skip_number = __native_entry__() + 25840
_subr__skip_object = __native_entry__() + 23648
_subr__skip_one = __native_entry__() + 26000
_subr__skip_one_fast = __native_entry__() + 26416
_subr__u64toa = __native_entry__() + 4144
_subr__unquote = __native_entry__() + 8336
_subr__validate_one = __native_entry__() + 26064
_subr__validate_utf8 = __native_entry__() + 31152
_subr__validate_utf8_fast = __native_entry__() + 32112
_subr__value = __native_entry__() + 15472
_subr__vnumber = __native_entry__() + 19280
_subr__vsigned = __native_entry__() + 20896
_subr__vstring = __native_entry__() + 17952
_subr__vunsigned = __native_entry__() + 21264
_subr__f32toa = __native_entry__() + 33888
_subr__f64toa = __native_entry__() + 288
_subr__get_by_path = __native_entry__() + 28336
_subr__html_escape = __native_entry__() + 10496
_subr__i64toa = __native_entry__() + 3584
_subr__lspace = __native_entry__() + 64
_subr__quote = __native_entry__() + 5072
_subr__skip_array = __native_entry__() + 20688
_subr__skip_number = __native_entry__() + 24912
_subr__skip_object = __native_entry__() + 22736
_subr__skip_one = __native_entry__() + 25072
_subr__skip_one_fast = __native_entry__() + 25488
_subr__u64toa = __native_entry__() + 3696
_subr__unquote = __native_entry__() + 7888
_subr__validate_one = __native_entry__() + 25136
_subr__validate_utf8 = __native_entry__() + 30320
_subr__validate_utf8_fast = __native_entry__() + 31280
_subr__value = __native_entry__() + 15024
_subr__vnumber = __native_entry__() + 18352
_subr__vsigned = __native_entry__() + 19968
_subr__vstring = __native_entry__() + 17024
_subr__vunsigned = __native_entry__() + 20336
)
const (
_stack__f32toa = 48
_stack__f64toa = 80
_stack__get_by_path = 312
_stack__get_by_path = 296
_stack__html_escape = 72
_stack__i64toa = 16
_stack__lspace = 8
@ -45,7 +45,7 @@ const (
_stack__skip_number = 72
_stack__skip_object = 128
_stack__skip_one = 128
_stack__skip_one_fast = 224
_stack__skip_one_fast = 208
_stack__u64toa = 8
_stack__unquote = 72
_stack__validate_one = 128

File diff suppressed because it is too large Load diff

View file

@ -9,34 +9,34 @@ package sse
func __native_entry__() uintptr
var (
_subr__f32toa = __native_entry__() + 29440
_subr__f64toa = __native_entry__() + 448
_subr__get_by_path = __native_entry__() + 26304
_subr__html_escape = __native_entry__() + 9360
_subr__i64toa = __native_entry__() + 3712
_subr__lspace = __native_entry__() + 80
_subr__quote = __native_entry__() + 5152
_subr__skip_array = __native_entry__() + 18800
_subr__skip_number = __native_entry__() + 22448
_subr__skip_object = __native_entry__() + 20832
_subr__skip_one = __native_entry__() + 22608
_subr__skip_one_fast = __native_entry__() + 22832
_subr__u64toa = __native_entry__() + 3840
_subr__unquote = __native_entry__() + 6992
_subr__validate_one = __native_entry__() + 22656
_subr__validate_utf8 = __native_entry__() + 28208
_subr__validate_utf8_fast = __native_entry__() + 28880
_subr__value = __native_entry__() + 12560
_subr__vnumber = __native_entry__() + 16416
_subr__vsigned = __native_entry__() + 18064
_subr__vstring = __native_entry__() + 14800
_subr__vunsigned = __native_entry__() + 18448
_subr__f32toa = __native_entry__() + 31760
_subr__f64toa = __native_entry__() + 160
_subr__get_by_path = __native_entry__() + 26384
_subr__html_escape = __native_entry__() + 9072
_subr__i64toa = __native_entry__() + 3424
_subr__lspace = __native_entry__() + 16
_subr__quote = __native_entry__() + 4864
_subr__skip_array = __native_entry__() + 18112
_subr__skip_number = __native_entry__() + 22128
_subr__skip_object = __native_entry__() + 20512
_subr__skip_one = __native_entry__() + 22288
_subr__skip_one_fast = __native_entry__() + 22512
_subr__u64toa = __native_entry__() + 3552
_subr__unquote = __native_entry__() + 6704
_subr__validate_one = __native_entry__() + 22336
_subr__validate_utf8 = __native_entry__() + 30528
_subr__validate_utf8_fast = __native_entry__() + 31200
_subr__value = __native_entry__() + 12272
_subr__vnumber = __native_entry__() + 15728
_subr__vsigned = __native_entry__() + 17376
_subr__vstring = __native_entry__() + 14112
_subr__vunsigned = __native_entry__() + 17760
)
const (
_stack__f32toa = 48
_stack__f64toa = 80
_stack__get_by_path = 256
_stack__get_by_path = 240
_stack__html_escape = 64
_stack__i64toa = 16
_stack__lspace = 8
@ -45,7 +45,7 @@ const (
_stack__skip_number = 72
_stack__skip_object = 128
_stack__skip_one = 128
_stack__skip_one_fast = 168
_stack__skip_one_fast = 136
_stack__u64toa = 8
_stack__unquote = 88
_stack__validate_one = 128

View file

@ -16,14 +16,7 @@
#include "native.h"
#if USE_AVX2
static const uintptr_t ALIGN_MASK = 31;
#else
static const uintptr_t ALIGN_MASK = 15;
#endif
size_t lspace(const char *sp, size_t nb, size_t p) {
int32_t ms;
const char * ss = sp;
/* seek to `p` */
@ -31,64 +24,28 @@ size_t lspace(const char *sp, size_t nb, size_t p) {
nb -= p;
/* likely to run into non-spaces within a few characters, try scalar code first */
while (nb > 0 && ((uintptr_t)sp & ALIGN_MASK)) {
switch ((nb--, *sp++)) {
case ' ' : break;
case '\r' : break;
case '\n' : break;
case '\t' : break;
default : return sp - ss - 1;
}
}
#if USE_AVX2
__m256i space_tab = _mm256_setr_epi8(
'\x20', 0, 0, 0, 0, 0, 0, 0,
0, '\x09', '\x0A', 0, 0, '\x0D', 0, 0,
'\x20', 0, 0, 0, 0, 0, 0, 0,
0, '\x09', '\x0A', 0, 0, '\x0D', 0, 0
);
/* 32-byte loop */
while (likely(nb >= 32)) {
__m256i x = _mm256_load_si256 ((const void *)sp);
__m256i a = _mm256_cmpeq_epi8 (x, _mm256_set1_epi8(' '));
__m256i b = _mm256_cmpeq_epi8 (x, _mm256_set1_epi8('\t'));
__m256i c = _mm256_cmpeq_epi8 (x, _mm256_set1_epi8('\n'));
__m256i d = _mm256_cmpeq_epi8 (x, _mm256_set1_epi8('\r'));
__m256i u = _mm256_or_si256 (a, b);
__m256i v = _mm256_or_si256 (c, d);
__m256i w = _mm256_or_si256 (u, v);
/* check for matches */
if ((ms = _mm256_movemask_epi8(w)) != -1) {
_mm256_zeroupper();
return sp - ss + __builtin_ctzll(~(uint64_t)ms);
__m256i input = _mm256_loadu_si256((__m256i*)sp);
__m256i shuffle = _mm256_shuffle_epi8(space_tab, input);
__m256i result = _mm256_cmpeq_epi8(input, shuffle);
int32_t mask = _mm256_movemask_epi8(result);
if (mask != -1) {
return sp - ss + __builtin_ctzll(~(uint64_t)mask);
}
/* move to next block */
sp += 32;
nb -= 32;
}
/* clear upper half to avoid AVX-SSE transition penalty */
_mm256_zeroupper();
#endif
/* 16-byte loop */
while (likely(nb >= 16)) {
__m128i x = _mm_load_si128 ((const void *)sp);
__m128i a = _mm_cmpeq_epi8 (x, _mm_set1_epi8(' '));
__m128i b = _mm_cmpeq_epi8 (x, _mm_set1_epi8('\t'));
__m128i c = _mm_cmpeq_epi8 (x, _mm_set1_epi8('\n'));
__m128i d = _mm_cmpeq_epi8 (x, _mm_set1_epi8('\r'));
__m128i u = _mm_or_si128 (a, b);
__m128i v = _mm_or_si128 (c, d);
__m128i w = _mm_or_si128 (u, v);
/* check for matches */
if ((ms = _mm_movemask_epi8(w)) != 0xffff) {
return sp - ss + __builtin_ctz(~ms);
}
/* move to next block */
sp += 16;
nb -= 16;
}
/* remaining bytes, do with scalar code */
while (nb-- > 0) {
switch (*sp++) {