mirror of
https://github.com/ii64/sonic.git
synced 2026-06-21 00:46:43 +08:00
* fix: use sse instead of sse4 * fix: use dispatch * fix: remove lzero Co-authored-by: liuqiang <liuqiang.06@bytedance.com>
105 lines
No EOL
3.1 KiB
C
105 lines
No EOL
3.1 KiB
C
/*
|
|
* Copyright 2021 ByteDance Inc.
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
#include "native.h"
|
|
|
|
#if USE_AVX2
|
|
static const uintptr_t ALIGN_MASK = 31;
|
|
#else
|
|
static const uintptr_t ALIGN_MASK = 15;
|
|
#endif
|
|
|
|
size_t lspace(const char *sp, size_t nb, size_t p) {
|
|
int32_t ms;
|
|
const char * ss = sp;
|
|
|
|
/* seek to `p` */
|
|
sp += p;
|
|
nb -= p;
|
|
|
|
/* likely to run into non-spaces within a few characters, try scalar code first */
|
|
while (nb > 0 && ((uintptr_t)sp & ALIGN_MASK)) {
|
|
switch ((nb--, *sp++)) {
|
|
case ' ' : break;
|
|
case '\r' : break;
|
|
case '\n' : break;
|
|
case '\t' : break;
|
|
default : return sp - ss - 1;
|
|
}
|
|
}
|
|
|
|
#if USE_AVX2
|
|
/* 32-byte loop */
|
|
while (likely(nb >= 32)) {
|
|
__m256i x = _mm256_load_si256 ((const void *)sp);
|
|
__m256i a = _mm256_cmpeq_epi8 (x, _mm256_set1_epi8(' '));
|
|
__m256i b = _mm256_cmpeq_epi8 (x, _mm256_set1_epi8('\t'));
|
|
__m256i c = _mm256_cmpeq_epi8 (x, _mm256_set1_epi8('\n'));
|
|
__m256i d = _mm256_cmpeq_epi8 (x, _mm256_set1_epi8('\r'));
|
|
__m256i u = _mm256_or_si256 (a, b);
|
|
__m256i v = _mm256_or_si256 (c, d);
|
|
__m256i w = _mm256_or_si256 (u, v);
|
|
|
|
/* check for matches */
|
|
if ((ms = _mm256_movemask_epi8(w)) != -1) {
|
|
_mm256_zeroupper();
|
|
return sp - ss + __builtin_ctzll(~(uint64_t)ms);
|
|
}
|
|
|
|
/* move to next block */
|
|
sp += 32;
|
|
nb -= 32;
|
|
}
|
|
|
|
/* clear upper half to avoid AVX-SSE transition penalty */
|
|
_mm256_zeroupper();
|
|
#endif
|
|
|
|
/* 16-byte loop */
|
|
while (likely(nb >= 16)) {
|
|
__m128i x = _mm_load_si128 ((const void *)sp);
|
|
__m128i a = _mm_cmpeq_epi8 (x, _mm_set1_epi8(' '));
|
|
__m128i b = _mm_cmpeq_epi8 (x, _mm_set1_epi8('\t'));
|
|
__m128i c = _mm_cmpeq_epi8 (x, _mm_set1_epi8('\n'));
|
|
__m128i d = _mm_cmpeq_epi8 (x, _mm_set1_epi8('\r'));
|
|
__m128i u = _mm_or_si128 (a, b);
|
|
__m128i v = _mm_or_si128 (c, d);
|
|
__m128i w = _mm_or_si128 (u, v);
|
|
|
|
/* check for matches */
|
|
if ((ms = _mm_movemask_epi8(w)) != 0xffff) {
|
|
return sp - ss + __builtin_ctz(~ms);
|
|
}
|
|
|
|
/* move to next block */
|
|
sp += 16;
|
|
nb -= 16;
|
|
}
|
|
|
|
/* remaining bytes, do with scalar code */
|
|
while (nb-- > 0) {
|
|
switch (*sp++) {
|
|
case ' ' : break;
|
|
case '\r' : break;
|
|
case '\n' : break;
|
|
case '\t' : break;
|
|
default : return sp - ss - 1;
|
|
}
|
|
}
|
|
|
|
/* all the characters are spaces */
|
|
return sp - ss;
|
|
} |