Skip to content

Commit c3299d7

Browse files
frankdjxGirgias
authored andcommitted
X86: Fast CRC32 computation using PCLMULQDQ instruction
Based on: "Fast CRC Computation for Generic Polynomials Using PCLMULQDQ Instruction" V. Gopal, E. Ozturk, et al., 2009, http://intel.ly/2ySEwL0 Signed-off-by: Frank Du <[email protected]> Closes phpGH-6018
1 parent cb284f6 commit c3299d7

File tree

12 files changed

+620
-10
lines changed

12 files changed

+620
-10
lines changed

Zend/zend_cpuinfo.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,17 @@ static zend_always_inline int zend_cpu_supports_sse42() {
159159
return __builtin_cpu_supports("sse4.2");
160160
}
161161

162+
/* __builtin_cpu_supports has pclmul from gcc9 */
163+
#if (!defined(__GNUC__) || (ZEND_GCC_VERSION >= 9000))
164+
ZEND_NO_SANITIZE_ADDRESS
165+
static zend_always_inline int zend_cpu_supports_pclmul() {
166+
#if PHP_HAVE_BUILTIN_CPU_INIT
167+
__builtin_cpu_init();
168+
#endif
169+
return __builtin_cpu_supports("pclmul");
170+
}
171+
#endif
172+
162173
ZEND_NO_SANITIZE_ADDRESS
163174
static zend_always_inline int zend_cpu_supports_avx() {
164175
#if PHP_HAVE_BUILTIN_CPU_INIT
@@ -196,6 +207,10 @@ static zend_always_inline int zend_cpu_supports_sse42() {
196207
return zend_cpu_supports(ZEND_CPU_FEATURE_SSE42);
197208
}
198209

210+
static zend_always_inline int zend_cpu_supports_pclmul() {
211+
return zend_cpu_supports(ZEND_CPU_FEATURE_PCLMULQDQ);
212+
}
213+
199214
static zend_always_inline int zend_cpu_supports_avx() {
200215
return zend_cpu_supports(ZEND_CPU_FEATURE_AVX);
201216
}

Zend/zend_portability.h

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -487,6 +487,10 @@ extern "C++" {
487487
# define PHP_HAVE_SSE4_2
488488
# endif
489489

490+
# if defined(HAVE_WMMINTRIN_H)
491+
# define PHP_HAVE_PCLMUL
492+
# endif
493+
490494
/*
491495
* AVX2 support was added in gcc 4.7, but AVX2 intrinsics don't work in
492496
* __attribute__((target("avx2"))) functions until gcc 4.9.
@@ -547,6 +551,58 @@ extern "C++" {
547551
# define ZEND_INTRIN_SSE4_2_FUNC_DECL(func)
548552
#endif
549553

554+
#ifdef __PCLMUL__
555+
/* Instructions compiled directly. */
556+
# define ZEND_INTRIN_PCLMUL_NATIVE 1
557+
#elif (defined(HAVE_FUNC_ATTRIBUTE_TARGET) && defined(PHP_HAVE_PCLMUL)) || defined(ZEND_WIN32)
558+
/* Function resolved by ifunc or MINIT. */
559+
# define ZEND_INTRIN_PCLMUL_RESOLVER 1
560+
#endif
561+
562+
/* Do not use for conditional declaration of API functions! */
563+
#if defined(ZEND_INTRIN_PCLMUL_RESOLVER) && defined(ZEND_INTRIN_HAVE_IFUNC_TARGET) && (!defined(__GNUC__) || (ZEND_GCC_VERSION >= 9000))
564+
/* __builtin_cpu_supports has pclmul from gcc9 */
565+
# define ZEND_INTRIN_PCLMUL_FUNC_PROTO 1
566+
#elif defined(ZEND_INTRIN_PCLMUL_RESOLVER)
567+
# define ZEND_INTRIN_PCLMUL_FUNC_PTR 1
568+
#endif
569+
570+
#ifdef ZEND_INTRIN_PCLMUL_RESOLVER
571+
# ifdef HAVE_FUNC_ATTRIBUTE_TARGET
572+
# define ZEND_INTRIN_PCLMUL_FUNC_DECL(func) ZEND_API func __attribute__((target("pclmul")))
573+
# else
574+
# define ZEND_INTRIN_PCLMUL_FUNC_DECL(func) func
575+
# endif
576+
#else
577+
# define ZEND_INTRIN_PCLMUL_FUNC_DECL(func)
578+
#endif
579+
580+
#if defined(ZEND_INTRIN_SSE4_2_NATIVE) && defined(ZEND_INTRIN_PCLMUL_NATIVE)
581+
/* Instructions compiled directly. */
582+
# define ZEND_INTRIN_SSE4_2_PCLMUL_NATIVE 1
583+
#elif (defined(HAVE_FUNC_ATTRIBUTE_TARGET) && defined(PHP_HAVE_SSE4_2) && defined(PHP_HAVE_PCLMUL)) || defined(ZEND_WIN32)
584+
/* Function resolved by ifunc or MINIT. */
585+
# define ZEND_INTRIN_SSE4_2_PCLMUL_RESOLVER 1
586+
#endif
587+
588+
/* Do not use for conditional declaration of API functions! */
589+
#if defined(ZEND_INTRIN_SSE4_2_PCLMUL_RESOLVER) && defined(ZEND_INTRIN_HAVE_IFUNC_TARGET) && (!defined(__GNUC__) || (ZEND_GCC_VERSION >= 9000))
590+
/* __builtin_cpu_supports has pclmul from gcc9 */
591+
# define ZEND_INTRIN_SSE4_2_PCLMUL_FUNC_PROTO 1
592+
#elif defined(ZEND_INTRIN_SSE4_2_PCLMUL_RESOLVER)
593+
# define ZEND_INTRIN_SSE4_2_PCLMUL_FUNC_PTR 1
594+
#endif
595+
596+
#ifdef ZEND_INTRIN_SSE4_2_PCLMUL_RESOLVER
597+
# ifdef HAVE_FUNC_ATTRIBUTE_TARGET
598+
# define ZEND_INTRIN_SSE4_2_PCLMUL_FUNC_DECL(func) ZEND_API func __attribute__((target("sse4.2,pclmul")))
599+
# else
600+
# define ZEND_INTRIN_SSE4_2_PCLMUL_FUNC_DECL(func) func
601+
# endif
602+
#else
603+
# define ZEND_INTRIN_SSE4_2_PCLMUL_FUNC_DECL(func)
604+
#endif
605+
550606
#ifdef __AVX2__
551607
# define ZEND_INTRIN_AVX2_NATIVE 1
552608
#elif (defined(HAVE_FUNC_ATTRIBUTE_TARGET) && defined(PHP_HAVE_AVX2)) || defined(ZEND_WIN32)

configure.ac

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -409,6 +409,7 @@ sys/ipc.h \
409409
dlfcn.h \
410410
tmmintrin.h \
411411
nmmintrin.h \
412+
wmmintrin.h \
412413
immintrin.h
413414
],[],[],[
414415
#ifdef HAVE_SYS_PARAM_H

ext/hash/hash_crc32.c

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include "php_hash.h"
1919
#include "php_hash_crc32.h"
2020
#include "php_hash_crc32_tables.h"
21+
#include "ext/standard/crc32_x86.h"
2122

2223
PHP_HASH_API void PHP_CRC32Init(PHP_CRC32_CTX *context)
2324
{
@@ -26,27 +27,39 @@ PHP_HASH_API void PHP_CRC32Init(PHP_CRC32_CTX *context)
2627

2728
PHP_HASH_API void PHP_CRC32Update(PHP_CRC32_CTX *context, const unsigned char *input, size_t len)
2829
{
29-
size_t i;
30+
size_t i = 0;
3031

31-
for (i = 0; i < len; ++i) {
32+
#if ZEND_INTRIN_SSE4_2_PCLMUL_NATIVE || ZEND_INTRIN_SSE4_2_PCLMUL_RESOLVER
33+
i += crc32_x86_simd_update(X86_CRC32, &context->state, input, len);
34+
#endif
35+
36+
for (; i < len; ++i) {
3237
context->state = (context->state << 8) ^ crc32_table[(context->state >> 24) ^ (input[i] & 0xff)];
3338
}
3439
}
3540

3641
PHP_HASH_API void PHP_CRC32BUpdate(PHP_CRC32_CTX *context, const unsigned char *input, size_t len)
3742
{
38-
size_t i;
43+
size_t i = 0;
44+
45+
#if ZEND_INTRIN_SSE4_2_PCLMUL_NATIVE || ZEND_INTRIN_SSE4_2_PCLMUL_RESOLVER
46+
i += crc32_x86_simd_update(X86_CRC32B, &context->state, input, len);
47+
#endif
3948

40-
for (i = 0; i < len; ++i) {
49+
for (; i < len; ++i) {
4150
context->state = (context->state >> 8) ^ crc32b_table[(context->state ^ input[i]) & 0xff];
4251
}
4352
}
4453

4554
PHP_HASH_API void PHP_CRC32CUpdate(PHP_CRC32_CTX *context, const unsigned char *input, size_t len)
4655
{
47-
size_t i;
56+
size_t i = 0;
57+
58+
#if ZEND_INTRIN_SSE4_2_PCLMUL_NATIVE || ZEND_INTRIN_SSE4_2_PCLMUL_RESOLVER
59+
i += crc32_x86_simd_update(X86_CRC32C, &context->state, input, len);
60+
#endif
4861

49-
for (i = 0; i < len; ++i) {
62+
for (; i < len; ++i) {
5063
context->state = (context->state >> 8) ^ crc32c_table[(context->state ^ input[i]) & 0xff];
5164
}
5265
}

ext/hash/tests/crc32.phpt

Lines changed: 85 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,20 @@ echo hash('crc32', 'message digest'), "\n";
1010
echo hash('crc32', 'abcdefghijklmnopqrstuvwxyz'), "\n";
1111
echo hash('crc32', 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'), "\n";
1212
echo hash('crc32', '12345678901234567890123456789012345678901234567890123456789012345678901234567890'), "\n";
13+
echo hash('crc32', '1234567890123456'), "\n";
14+
echo hash('crc32', '1234567890123456abc'), "\n";
15+
echo hash('crc32', '12345678901234561234567890123456'), "\n";
16+
echo hash('crc32', '12345678901234561234567890123456abc'), "\n";
17+
echo hash('crc32', '123456789012345612345678901234561234567890123456'), "\n";
18+
echo hash('crc32', '123456789012345612345678901234561234567890123456abc'), "\n";
19+
echo hash('crc32', '1234567890123456123456789012345612345678901234561234567890123456'), "\n";
20+
echo hash('crc32', '1234567890123456123456789012345612345678901234561234567890123456abc'), "\n";
21+
echo hash('crc32', '12345678901234561234567890123456123456789012345612345678901234561234567890123456'), "\n";
22+
echo hash('crc32', '12345678901234561234567890123456123456789012345612345678901234561234567890123456abc'), "\n";
23+
echo hash('crc32', '12345678901234561234567890123456123456789012345612345678901234561234567890123456123456789012345612345678901234561234567890123456'), "\n";
24+
echo hash('crc32', '12345678901234561234567890123456123456789012345612345678901234561234567890123456123456789012345612345678901234561234567890123456abc'), "\n";
25+
echo hash('crc32', '123456789012345612345678901234561234567890123456123456789012345612345678901234561234567890123456123456789012345612345678901234561234567890123456'), "\n";
26+
echo hash('crc32', '123456789012345612345678901234561234567890123456123456789012345612345678901234561234567890123456123456789012345612345678901234561234567890123456abc'), "\n";
1327

1428
echo "crc32b\n";
1529
echo hash('crc32b', ''), "\n";
@@ -19,6 +33,20 @@ echo hash('crc32b', 'message digest'), "\n";
1933
echo hash('crc32b', 'abcdefghijklmnopqrstuvwxyz'), "\n";
2034
echo hash('crc32b', 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'), "\n";
2135
echo hash('crc32b', '12345678901234567890123456789012345678901234567890123456789012345678901234567890'), "\n";
36+
echo hash('crc32b', '1234567890123456'), "\n";
37+
echo hash('crc32b', '1234567890123456abc'), "\n";
38+
echo hash('crc32b', '12345678901234561234567890123456'), "\n";
39+
echo hash('crc32b', '12345678901234561234567890123456abc'), "\n";
40+
echo hash('crc32b', '123456789012345612345678901234561234567890123456'), "\n";
41+
echo hash('crc32b', '123456789012345612345678901234561234567890123456abc'), "\n";
42+
echo hash('crc32b', '1234567890123456123456789012345612345678901234561234567890123456'), "\n";
43+
echo hash('crc32b', '1234567890123456123456789012345612345678901234561234567890123456abc'), "\n";
44+
echo hash('crc32b', '12345678901234561234567890123456123456789012345612345678901234561234567890123456'), "\n";
45+
echo hash('crc32b', '12345678901234561234567890123456123456789012345612345678901234561234567890123456abc'), "\n";
46+
echo hash('crc32b', '12345678901234561234567890123456123456789012345612345678901234561234567890123456123456789012345612345678901234561234567890123456'), "\n";
47+
echo hash('crc32b', '12345678901234561234567890123456123456789012345612345678901234561234567890123456123456789012345612345678901234561234567890123456abc'), "\n";
48+
echo hash('crc32b', '123456789012345612345678901234561234567890123456123456789012345612345678901234561234567890123456123456789012345612345678901234561234567890123456'), "\n";
49+
echo hash('crc32b', '123456789012345612345678901234561234567890123456123456789012345612345678901234561234567890123456123456789012345612345678901234561234567890123456abc'), "\n";
2250

2351
echo "crc32c\n";
2452
echo hash('crc32c', ''), "\n";
@@ -59,6 +87,20 @@ echo hash('crc32c', "Even if I could be Shakespeare, I think I should still choo
5987
echo hash('crc32c', "The fugacity of a constituent in a mixture of gases at a given temperature is proportional to its mole fraction. Lewis-Randall Rule"), "\n";
6088
echo hash('crc32c', "How can you write a big system without C++? -Paul Glick"), "\n";
6189
echo hash('crc32c', "\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\v\f\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#\$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"), "\n";
90+
echo hash('crc32c', '1234567890123456'), "\n";
91+
echo hash('crc32c', '1234567890123456abc'), "\n";
92+
echo hash('crc32c', '12345678901234561234567890123456'), "\n";
93+
echo hash('crc32c', '12345678901234561234567890123456abc'), "\n";
94+
echo hash('crc32c', '123456789012345612345678901234561234567890123456'), "\n";
95+
echo hash('crc32c', '123456789012345612345678901234561234567890123456abc'), "\n";
96+
echo hash('crc32c', '1234567890123456123456789012345612345678901234561234567890123456'), "\n";
97+
echo hash('crc32c', '1234567890123456123456789012345612345678901234561234567890123456abc'), "\n";
98+
echo hash('crc32c', '12345678901234561234567890123456123456789012345612345678901234561234567890123456'), "\n";
99+
echo hash('crc32c', '12345678901234561234567890123456123456789012345612345678901234561234567890123456abc'), "\n";
100+
echo hash('crc32c', '12345678901234561234567890123456123456789012345612345678901234561234567890123456123456789012345612345678901234561234567890123456'), "\n";
101+
echo hash('crc32c', '12345678901234561234567890123456123456789012345612345678901234561234567890123456123456789012345612345678901234561234567890123456abc'), "\n";
102+
echo hash('crc32c', '123456789012345612345678901234561234567890123456123456789012345612345678901234561234567890123456123456789012345612345678901234561234567890123456'), "\n";
103+
echo hash('crc32c', '123456789012345612345678901234561234567890123456123456789012345612345678901234561234567890123456123456789012345612345678901234561234567890123456abc'), "\n";
62104

63105
?>
64106
--EXPECT--
@@ -70,6 +112,20 @@ crc32
70112
9693bf77
71113
882174a0
72114
96790816
115+
98b0e78d
116+
a6f33d71
117+
900a1d38
118+
396978fe
119+
adfc6afe
120+
d3ef9388
121+
c53911dc
122+
37006f1b
123+
4a54af3a
124+
98d05c71
125+
5a26f5b4
126+
b9108715
127+
cc684112
128+
b2ac45af
73129
crc32b
74130
00000000
75131
e8b7be43
@@ -78,6 +134,20 @@ e8b7be43
78134
4c2750bd
79135
1fc2e6d2
80136
7ca94a72
137+
1e5fcdb7
138+
70b54c2f
139+
094fb11e
140+
38210c49
141+
7399c6ef
142+
83e98d04
143+
1f26a94e
144+
e2e8634a
145+
0642542d
146+
43b42c9b
147+
262e1ded
148+
b7a463c4
149+
dfa1bbae
150+
4022d57a
81151
crc32c
82152
00000000
83153
c1d04330
@@ -116,4 +186,18 @@ de2e65c5
116186
297a88ed
117187
66ed1d8b
118188
dcded527
119-
9c44184b
189+
9c44184b
190+
9aa4287f
191+
ab2761c5
192+
cd486b4b
193+
c19c4a41
194+
1ea5b441
195+
36d20512
196+
31d11ffa
197+
65d5bb9e
198+
a0e3e317
199+
8dc10a7c
200+
7ab04135
201+
c292a38d
202+
e3e558ec
203+
b6c5e13e

ext/standard/basic_functions.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
#include "ext/standard/php_dns.h"
3434
#include "ext/standard/php_uuencode.h"
3535
#include "ext/standard/php_mt_rand.h"
36+
#include "ext/standard/crc32_x86.h"
3637

3738
#ifdef PHP_WIN32
3839
#include "win32/php_win32_globals.h"
@@ -363,6 +364,10 @@ PHP_MINIT_FUNCTION(basic) /* {{{ */
363364
BASIC_MINIT_SUBMODULE(string_intrin)
364365
#endif
365366

367+
#if ZEND_INTRIN_SSE4_2_PCLMUL_FUNC_PTR
368+
BASIC_MINIT_SUBMODULE(crc32_x86_intrin)
369+
#endif
370+
366371
#if ZEND_INTRIN_AVX2_FUNC_PTR || ZEND_INTRIN_SSSE3_FUNC_PTR
367372
BASIC_MINIT_SUBMODULE(base64_intrin)
368373
#endif

ext/standard/config.m4

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -449,7 +449,7 @@ PHP_NEW_EXTENSION(standard, array.c base64.c basic_functions.c browscap.c crc32.
449449
http_fopen_wrapper.c php_fopen_wrapper.c credits.c css.c \
450450
var_unserializer.c ftok.c sha1.c user_filters.c uuencode.c \
451451
filters.c proc_open.c streamsfuncs.c http.c password.c \
452-
random.c net.c hrtime.c,,,
452+
random.c net.c hrtime.c crc32_x86.c,,,
453453
-DZEND_ENABLE_STATIC_TSRMLS_CACHE=1)
454454

455455
PHP_ADD_MAKEFILE_FRAGMENT

ext/standard/config.w32

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ ADD_FLAG("LIBS_STANDARD", "iphlpapi.lib");
2525

2626
EXTENSION("standard", "array.c base64.c basic_functions.c browscap.c \
2727
crc32.c crypt.c crypt_freesec.c crypt_blowfish.c crypt_sha256.c \
28-
crypt_sha512.c php_crypt_r.c \
28+
crypt_sha512.c php_crypt_r.c crc32_x86.c \
2929
datetime.c dir.c dl.c dns.c dns_win32.c exec.c \
3030
file.c filestat.c formatted_print.c fsock.c head.c html.c image.c \
3131
info.c iptc.c lcg.c link.c mail.c math.c md5.c metaphone.c microtime.c \

ext/standard/crc32.c

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include "php.h"
1818
#include "basic_functions.h"
1919
#include "crc32.h"
20+
#include "crc32_x86.h"
2021

2122
#if HAVE_AARCH64_CRC32
2223
# include <arm_acle.h>
@@ -74,7 +75,7 @@ PHP_FUNCTION(crc32)
7475
char *p;
7576
size_t nr;
7677
uint32_t crcinit = 0;
77-
register uint32_t crc;
78+
uint32_t crc;
7879

7980
ZEND_PARSE_PARAMETERS_START(1, 1)
8081
Z_PARAM_STRING(p, nr)
@@ -89,6 +90,12 @@ PHP_FUNCTION(crc32)
8990
}
9091
#endif
9192

93+
#if ZEND_INTRIN_SSE4_2_PCLMUL_NATIVE || ZEND_INTRIN_SSE4_2_PCLMUL_RESOLVER
94+
size_t nr_simd = crc32_x86_simd_update(X86_CRC32B, &crc, (const unsigned char *)p, nr);
95+
nr -= nr_simd;
96+
p += nr_simd;
97+
#endif
98+
9299
for (; nr--; ++p) {
93100
crc = ((crc >> 8) & 0x00FFFFFF) ^ crc32tab[(crc ^ (*p)) & 0xFF ];
94101
}

0 commit comments

Comments
 (0)