Skip to content

Commit 18c48d5

Browse files
committed
Always take BLOSC_STRICT_ALIGN path
Unaligned access is UB, even on x86. UBsan will also detect this (-fsanitize=undefined or -fsanitize=alignment). On arm, I hit SIGBUSes in pandas's test suite via pandas->pytables->c-blosc2 because of unaligned loads and stores. Modern compilers are capable of optimising the "slow" path bitshifts and memcpy into faster alternatives where it is legal. Bug: https://bugs.gentoo.org/911660 Bug: pandas-dev/pandas#54391 Bug: pandas-dev/pandas#54396 Signed-off-by: Sam James <[email protected]>
1 parent 79a6e96 commit 18c48d5

File tree

6 files changed

+6
-123
lines changed

6 files changed

+6
-123
lines changed

blosc/blosc2.c

-4
Original file line numberDiff line numberDiff line change
@@ -1045,11 +1045,7 @@ static bool get_run(const uint8_t* ip, const uint8_t* ip_bound) {
10451045
/* Broadcast the value for every byte in a 64-bit register */
10461046
memset(&value, x, 8);
10471047
while (ip < (ip_bound - 8)) {
1048-
#if defined(BLOSC_STRICT_ALIGN)
10491048
memcpy(&value2, ip, 8);
1050-
#else
1051-
value2 = *(int64_t*)ip;
1052-
#endif
10531049
if (value != value2) {
10541050
// Values differ. We don't have a run.
10551051
return false;

blosc/blosclz.c

+2-24
Original file line numberDiff line numberDiff line change
@@ -46,13 +46,8 @@
4646
#define MAX_DISTANCE 8191
4747
#define MAX_FARDISTANCE (65535 + MAX_DISTANCE - 1)
4848

49-
#ifdef BLOSC_STRICT_ALIGN
50-
#define BLOSCLZ_READU16(p) ((p)[0] | (p)[1]<<8)
51-
#define BLOSCLZ_READU32(p) ((p)[0] | (p)[1]<<8 | (p)[2]<<16 | (p)[3]<<24)
52-
#else
53-
#define BLOSCLZ_READU16(p) *((const uint16_t*)(p))
54-
#define BLOSCLZ_READU32(p) *((const uint32_t*)(p))
55-
#endif
49+
#define BLOSCLZ_READU16(p) ((p)[0] | (p)[1]<<8)
50+
#define BLOSCLZ_READU32(p) ((p)[0] | (p)[1]<<8 | (p)[2]<<16 | (p)[3]<<24)
5651

5752
#define HASH_LOG (14U)
5853

@@ -123,11 +118,7 @@ static uint8_t *get_run(uint8_t *ip, const uint8_t *ip_bound, const uint8_t *ref
123118
memset(&value, x, 8);
124119
/* safe because the outer check against ip limit */
125120
while (ip < (ip_bound - sizeof(int64_t))) {
126-
#if defined(BLOSC_STRICT_ALIGN)
127121
memcpy(&value2, ref, 8);
128-
#else
129-
value2 = ((int64_t*)ref)[0];
130-
#endif
131122
if (value != value2) {
132123
/* Return the byte that starts to differ */
133124
while (*ref++ == x) ip++;
@@ -146,19 +137,6 @@ static uint8_t *get_run(uint8_t *ip, const uint8_t *ip_bound, const uint8_t *ref
146137

147138
/* Return the byte that starts to differ */
148139
uint8_t *get_match(uint8_t *ip, const uint8_t *ip_bound, const uint8_t *ref) {
149-
#if !defined(BLOSC_STRICT_ALIGN)
150-
while (ip < (ip_bound - sizeof(int64_t))) {
151-
if (*(int64_t*)ref != *(int64_t*)ip) {
152-
/* Return the byte that starts to differ */
153-
while (*ref++ == *ip++) {}
154-
return ip;
155-
}
156-
else {
157-
ip += sizeof(int64_t);
158-
ref += sizeof(int64_t);
159-
}
160-
}
161-
#endif
162140
/* Look into the remainder */
163141
while ((ip < ip_bound) && (*ref++ == *ip++)) {}
164142
return ip;

blosc/fastcopy.c

+4-57
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,7 @@
2222

2323
#include <assert.h>
2424
#include <stdint.h>
25-
#if defined(BLOSC_STRICT_ALIGN)
2625
#include <string.h>
27-
#endif
2826

2927
/*
3028
* Use inlined functions for supported systems.
@@ -40,13 +38,9 @@ static inline unsigned char *copy_1_bytes(unsigned char *out, const unsigned cha
4038
}
4139

4240
static inline unsigned char *copy_2_bytes(unsigned char *out, const unsigned char *from) {
43-
#if defined(BLOSC_STRICT_ALIGN)
4441
uint16_t chunk;
4542
memcpy(&chunk, from, 2);
4643
memcpy(out, &chunk, 2);
47-
#else
48-
*(uint16_t *) out = *(uint16_t *) from;
49-
#endif
5044
return out + 2;
5145
}
5246

@@ -56,13 +50,9 @@ static inline unsigned char *copy_3_bytes(unsigned char *out, const unsigned cha
5650
}
5751

5852
static inline unsigned char *copy_4_bytes(unsigned char *out, const unsigned char *from) {
59-
#if defined(BLOSC_STRICT_ALIGN)
6053
uint32_t chunk;
6154
memcpy(&chunk, from, 4);
6255
memcpy(out, &chunk, 4);
63-
#else
64-
*(uint32_t *) out = *(uint32_t *) from;
65-
#endif
6656
return out + 4;
6757
}
6858

@@ -82,13 +72,9 @@ static inline unsigned char *copy_7_bytes(unsigned char *out, const unsigned cha
8272
}
8373

8474
static inline unsigned char *copy_8_bytes(unsigned char *out, const unsigned char *from) {
85-
#if defined(BLOSC_STRICT_ALIGN)
8675
uint64_t chunk;
8776
memcpy(&chunk, from, 8);
8877
memcpy(out, &chunk, 8);
89-
#else
90-
*(uint64_t *) out = *(uint64_t *) from;
91-
#endif
9278
return out + 8;
9379
}
9480

@@ -99,17 +85,10 @@ static inline unsigned char *copy_16_bytes(unsigned char *out, const unsigned ch
9985
chunk = _mm_loadu_si128((__m128i*)from);
10086
_mm_storeu_si128((__m128i*)out, chunk);
10187
out += 16;
102-
#elif !defined(BLOSC_STRICT_ALIGN)
103-
*(uint64_t*)out = *(uint64_t*)from;
104-
from += 8; out += 8;
105-
*(uint64_t*)out = *(uint64_t*)from;
106-
from += 8; out += 8;
107-
#else
108-
int i;
109-
for (i = 0; i < 16; i++) {
110-
*out++ = *from++;
111-
}
112-
#endif
88+
int i;
89+
for (i = 0; i < 16; i++) {
90+
*out++ = *from++;
91+
}
11392
return out;
11493
}
11594

@@ -127,15 +106,6 @@ static inline unsigned char *copy_32_bytes(unsigned char *out, const unsigned ch
127106
chunk = _mm_loadu_si128((__m128i*)from);
128107
_mm_storeu_si128((__m128i*)out, chunk);
129108
out += 16;
130-
#elif !defined(BLOSC_STRICT_ALIGN)
131-
*(uint64_t*)out = *(uint64_t*)from;
132-
from += 8; out += 8;
133-
*(uint64_t*)out = *(uint64_t*)from;
134-
from += 8; out += 8;
135-
*(uint64_t*)out = *(uint64_t*)from;
136-
from += 8; out += 8;
137-
*(uint64_t*)out = *(uint64_t*)from;
138-
from += 8; out += 8;
139109
#else
140110
int i;
141111
for (i = 0; i < 32; i++) {
@@ -159,32 +129,9 @@ static inline unsigned char *copy_32_bytes(unsigned char *out, const unsigned ch
159129
static inline unsigned char *copy_bytes(unsigned char *out, const unsigned char *from, unsigned len) {
160130
assert(len < 8);
161131

162-
#ifdef BLOSC_STRICT_ALIGN
163132
while (len--) {
164133
*out++ = *from++;
165134
}
166-
#else
167-
switch (len) {
168-
case 7:
169-
return copy_7_bytes(out, from);
170-
case 6:
171-
return copy_6_bytes(out, from);
172-
case 5:
173-
return copy_5_bytes(out, from);
174-
case 4:
175-
return copy_4_bytes(out, from);
176-
case 3:
177-
return copy_3_bytes(out, from);
178-
case 2:
179-
return copy_2_bytes(out, from);
180-
case 1:
181-
return copy_1_bytes(out, from);
182-
case 0:
183-
return out;
184-
default:
185-
assert(0);
186-
}
187-
#endif /* BLOSC_STRICT_ALIGN */
188135
return out;
189136
}
190137

include/blosc2/blosc2-common.h

-28
Original file line numberDiff line numberDiff line change
@@ -42,34 +42,6 @@
4242
#define __SSE2__
4343
#endif
4444

45-
/*
46-
* Detect if the architecture is fine with unaligned access.
47-
*/
48-
#if !defined(BLOSC_STRICT_ALIGN)
49-
#define BLOSC_STRICT_ALIGN
50-
#if defined(__i386__) || defined(__386) || defined (__amd64) /* GNU C, Sun Studio */
51-
#undef BLOSC_STRICT_ALIGN
52-
#elif defined(__i486__) || defined(__i586__) || defined(__i686__) /* GNU C */
53-
#undef BLOSC_STRICT_ALIGN
54-
#elif defined(_M_IX86) || defined(_M_X64) /* Intel, MSVC */
55-
#undef BLOSC_STRICT_ALIGN
56-
#elif defined(__386)
57-
#undef BLOSC_STRICT_ALIGN
58-
#elif defined(_X86_) /* MinGW */
59-
#undef BLOSC_STRICT_ALIGN
60-
#elif defined(__I86__) /* Digital Mars */
61-
#undef BLOSC_STRICT_ALIGN
62-
/* Modern ARM systems (like ARM64) should support unaligned access
63-
quite efficiently. */
64-
#elif defined(__ARM_FEATURE_UNALIGNED) /* ARM, GNU C */
65-
#undef BLOSC_STRICT_ALIGN
66-
#elif defined(_ARCH_PPC) || defined(__PPC__)
67-
/* Modern PowerPC systems (like POWER8) should support unaligned access
68-
quite efficiently. */
69-
#undef BLOSC_STRICT_ALIGN
70-
#endif
71-
#endif
72-
7345
#if defined(__SSE2__)
7446
#include <emmintrin.h>
7547
#endif

plugins/codecs/ndlz/ndlz4x4.c

-5
Original file line numberDiff line numberDiff line change
@@ -45,13 +45,8 @@
4545
#define MAX_DISTANCE 65535
4646

4747

48-
#ifdef BLOSC_STRICT_ALIGN
4948
#define NDLZ_READU16(p) ((p)[0] | (p)[1]<<8)
5049
#define NDLZ_READU32(p) ((p)[0] | (p)[1]<<8 | (p)[2]<<16 | (p)[3]<<24)
51-
#else
52-
#define NDLZ_READU16(p) *((const uint16_t*)(p))
53-
#define NDLZ_READU32(p) *((const uint32_t*)(p))
54-
#endif
5550

5651
#define HASH_LOG (12)
5752

plugins/codecs/ndlz/ndlz8x8.c

-5
Original file line numberDiff line numberDiff line change
@@ -45,13 +45,8 @@
4545
#define MAX_DISTANCE 65535
4646

4747

48-
#ifdef BLOSC_STRICT_ALIGN
4948
#define NDLZ_READU16(p) ((p)[0] | (p)[1]<<8)
5049
#define NDLZ_READU32(p) ((p)[0] | (p)[1]<<8 | (p)[2]<<16 | (p)[3]<<24)
51-
#else
52-
#define NDLZ_READU16(p) *((const uint16_t*)(p))
53-
#define NDLZ_READU32(p) *((const uint32_t*)(p))
54-
#endif
5550

5651
#define HASH_LOG (12)
5752

0 commit comments

Comments
 (0)