Skip to content

Commit a266af7

Browse files
committed
[InstCombine] Canonicalize SPF to min/max intrinsics
Now that integer min/max intrinsics have good support in both InstCombine and other passes, start canonicalizing SPF min/max to intrinsic min/max. Once this sticks, we can stop matching SPF min/max in various places, and can remove hacks we have for preventing infinite loops and breaking of SPF canonicalization. Differential Revision: https://reviews.llvm.org/D98152
1 parent aa551ad commit a266af7

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

62 files changed

+1708
-2167
lines changed

clang/test/CodeGen/builtins-wasm.c

+24-36
Original file line numberDiff line numberDiff line change
@@ -262,86 +262,74 @@ i64x2 abs_i64x2(i64x2 v) {
262262

263263
i8x16 min_s_i8x16(i8x16 x, i8x16 y) {
264264
return __builtin_wasm_min_s_i8x16(x, y);
265-
// WEBASSEMBLY: %0 = icmp slt <16 x i8> %x, %y
266-
// WEBASSEMBLY-NEXT: %1 = select <16 x i1> %0, <16 x i8> %x, <16 x i8> %y
267-
// WEBASSEMBLY-NEXT: ret <16 x i8> %1
265+
// WEBASSEMBLY: call <16 x i8> @llvm.smin.v16i8(<16 x i8> %x, <16 x i8> %y)
266+
// WEBASSEMBLY-NEXT: ret
268267
}
269268

270269
u8x16 min_u_i8x16(u8x16 x, u8x16 y) {
271270
return __builtin_wasm_min_u_i8x16(x, y);
272-
// WEBASSEMBLY: %0 = icmp ult <16 x i8> %x, %y
273-
// WEBASSEMBLY-NEXT: %1 = select <16 x i1> %0, <16 x i8> %x, <16 x i8> %y
274-
// WEBASSEMBLY-NEXT: ret <16 x i8> %1
271+
// WEBASSEMBLY: call <16 x i8> @llvm.umin.v16i8(<16 x i8> %x, <16 x i8> %y)
272+
// WEBASSEMBLY-NEXT: ret
275273
}
276274

277275
i8x16 max_s_i8x16(i8x16 x, i8x16 y) {
278276
return __builtin_wasm_max_s_i8x16(x, y);
279-
// WEBASSEMBLY: %0 = icmp sgt <16 x i8> %x, %y
280-
// WEBASSEMBLY-NEXT: %1 = select <16 x i1> %0, <16 x i8> %x, <16 x i8> %y
281-
// WEBASSEMBLY-NEXT: ret <16 x i8> %1
277+
// WEBASSEMBLY: call <16 x i8> @llvm.smax.v16i8(<16 x i8> %x, <16 x i8> %y)
278+
// WEBASSEMBLY-NEXT: ret
282279
}
283280

284281
u8x16 max_u_i8x16(u8x16 x, u8x16 y) {
285282
return __builtin_wasm_max_u_i8x16(x, y);
286-
// WEBASSEMBLY: %0 = icmp ugt <16 x i8> %x, %y
287-
// WEBASSEMBLY-NEXT: %1 = select <16 x i1> %0, <16 x i8> %x, <16 x i8> %y
288-
// WEBASSEMBLY-NEXT: ret <16 x i8> %1
283+
// WEBASSEMBLY: call <16 x i8> @llvm.umax.v16i8(<16 x i8> %x, <16 x i8> %y)
284+
// WEBASSEMBLY-NEXT: ret
289285
}
290286

291287
i16x8 min_s_i16x8(i16x8 x, i16x8 y) {
292288
return __builtin_wasm_min_s_i16x8(x, y);
293-
// WEBASSEMBLY: %0 = icmp slt <8 x i16> %x, %y
294-
// WEBASSEMBLY-NEXT: %1 = select <8 x i1> %0, <8 x i16> %x, <8 x i16> %y
295-
// WEBASSEMBLY-NEXT: ret <8 x i16> %1
289+
// WEBASSEMBLY: call <8 x i16> @llvm.smin.v8i16(<8 x i16> %x, <8 x i16> %y)
290+
// WEBASSEMBLY-NEXT: ret
296291
}
297292

298293
u16x8 min_u_i16x8(u16x8 x, u16x8 y) {
299294
return __builtin_wasm_min_u_i16x8(x, y);
300-
// WEBASSEMBLY: %0 = icmp ult <8 x i16> %x, %y
301-
// WEBASSEMBLY-NEXT: %1 = select <8 x i1> %0, <8 x i16> %x, <8 x i16> %y
302-
// WEBASSEMBLY-NEXT: ret <8 x i16> %1
295+
// WEBASSEMBLY: call <8 x i16> @llvm.umin.v8i16(<8 x i16> %x, <8 x i16> %y)
296+
// WEBASSEMBLY-NEXT: ret
303297
}
304298

305299
i16x8 max_s_i16x8(i16x8 x, i16x8 y) {
306300
return __builtin_wasm_max_s_i16x8(x, y);
307-
// WEBASSEMBLY: %0 = icmp sgt <8 x i16> %x, %y
308-
// WEBASSEMBLY-NEXT: %1 = select <8 x i1> %0, <8 x i16> %x, <8 x i16> %y
309-
// WEBASSEMBLY-NEXT: ret <8 x i16> %1
301+
// WEBASSEMBLY: call <8 x i16> @llvm.smax.v8i16(<8 x i16> %x, <8 x i16> %y)
302+
// WEBASSEMBLY-NEXT: ret
310303
}
311304

312305
u16x8 max_u_i16x8(u16x8 x, u16x8 y) {
313306
return __builtin_wasm_max_u_i16x8(x, y);
314-
// WEBASSEMBLY: %0 = icmp ugt <8 x i16> %x, %y
315-
// WEBASSEMBLY-NEXT: %1 = select <8 x i1> %0, <8 x i16> %x, <8 x i16> %y
316-
// WEBASSEMBLY-NEXT: ret <8 x i16> %1
307+
// WEBASSEMBLY: call <8 x i16> @llvm.umax.v8i16(<8 x i16> %x, <8 x i16> %y)
308+
// WEBASSEMBLY-NEXT: ret
317309
}
318310

319311
i32x4 min_s_i32x4(i32x4 x, i32x4 y) {
320312
return __builtin_wasm_min_s_i32x4(x, y);
321-
// WEBASSEMBLY: %0 = icmp slt <4 x i32> %x, %y
322-
// WEBASSEMBLY-NEXT: %1 = select <4 x i1> %0, <4 x i32> %x, <4 x i32> %y
323-
// WEBASSEMBLY-NEXT: ret <4 x i32> %1
313+
// WEBASSEMBLY: call <4 x i32> @llvm.smin.v4i32(<4 x i32> %x, <4 x i32> %y)
314+
// WEBASSEMBLY-NEXT: ret
324315
}
325316

326317
u32x4 min_u_i32x4(u32x4 x, u32x4 y) {
327318
return __builtin_wasm_min_u_i32x4(x, y);
328-
// WEBASSEMBLY: %0 = icmp ult <4 x i32> %x, %y
329-
// WEBASSEMBLY-NEXT: %1 = select <4 x i1> %0, <4 x i32> %x, <4 x i32> %y
330-
// WEBASSEMBLY-NEXT: ret <4 x i32> %1
319+
// WEBASSEMBLY: call <4 x i32> @llvm.umin.v4i32(<4 x i32> %x, <4 x i32> %y)
320+
// WEBASSEMBLY-NEXT: ret
331321
}
332322

333323
i32x4 max_s_i32x4(i32x4 x, i32x4 y) {
334324
return __builtin_wasm_max_s_i32x4(x, y);
335-
// WEBASSEMBLY: %0 = icmp sgt <4 x i32> %x, %y
336-
// WEBASSEMBLY-NEXT: %1 = select <4 x i1> %0, <4 x i32> %x, <4 x i32> %y
337-
// WEBASSEMBLY-NEXT: ret <4 x i32> %1
325+
// WEBASSEMBLY: call <4 x i32> @llvm.smax.v4i32(<4 x i32> %x, <4 x i32> %y)
326+
// WEBASSEMBLY-NEXT: ret
338327
}
339328

340329
u32x4 max_u_i32x4(u32x4 x, u32x4 y) {
341330
return __builtin_wasm_max_u_i32x4(x, y);
342-
// WEBASSEMBLY: %0 = icmp ugt <4 x i32> %x, %y
343-
// WEBASSEMBLY-NEXT: %1 = select <4 x i1> %0, <4 x i32> %x, <4 x i32> %y
344-
// WEBASSEMBLY-NEXT: ret <4 x i32> %1
331+
// WEBASSEMBLY: call <4 x i32> @llvm.umax.v4i32(<4 x i32> %x, <4 x i32> %y)
332+
// WEBASSEMBLY-NEXT: ret
345333
}
346334

347335
i16x8 sub_sat_s_i16x8(i16x8 x, i16x8 y) {

clang/test/Headers/wasm.c

+32-44
Original file line numberDiff line numberDiff line change
@@ -1711,10 +1711,9 @@ v128_t test_u8x16_sub_sat(v128_t a, v128_t b) {
17111711
// CHECK-NEXT: entry:
17121712
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
17131713
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
1714-
// CHECK-NEXT: [[TMP2:%.*]] = icmp slt <16 x i8> [[TMP0]], [[TMP1]]
1715-
// CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i8> [[TMP0]], <16 x i8> [[TMP1]]
1716-
// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x i32>
1717-
// CHECK-NEXT: ret <4 x i32> [[TMP4]]
1714+
// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.smin.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR7]]
1715+
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
1716+
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
17181717
//
17191718
v128_t test_i8x16_min(v128_t a, v128_t b) {
17201719
return wasm_i8x16_min(a, b);
@@ -1724,10 +1723,9 @@ v128_t test_i8x16_min(v128_t a, v128_t b) {
17241723
// CHECK-NEXT: entry:
17251724
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
17261725
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
1727-
// CHECK-NEXT: [[TMP2:%.*]] = icmp ult <16 x i8> [[TMP0]], [[TMP1]]
1728-
// CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i8> [[TMP0]], <16 x i8> [[TMP1]]
1729-
// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x i32>
1730-
// CHECK-NEXT: ret <4 x i32> [[TMP4]]
1726+
// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.umin.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR7]]
1727+
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
1728+
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
17311729
//
17321730
v128_t test_u8x16_min(v128_t a, v128_t b) {
17331731
return wasm_u8x16_min(a, b);
@@ -1737,10 +1735,9 @@ v128_t test_u8x16_min(v128_t a, v128_t b) {
17371735
// CHECK-NEXT: entry:
17381736
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
17391737
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
1740-
// CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <16 x i8> [[TMP0]], [[TMP1]]
1741-
// CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i8> [[TMP0]], <16 x i8> [[TMP1]]
1742-
// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x i32>
1743-
// CHECK-NEXT: ret <4 x i32> [[TMP4]]
1738+
// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.smax.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR7]]
1739+
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
1740+
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
17441741
//
17451742
v128_t test_i8x16_max(v128_t a, v128_t b) {
17461743
return wasm_i8x16_max(a, b);
@@ -1750,10 +1747,9 @@ v128_t test_i8x16_max(v128_t a, v128_t b) {
17501747
// CHECK-NEXT: entry:
17511748
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
17521749
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
1753-
// CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <16 x i8> [[TMP0]], [[TMP1]]
1754-
// CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i8> [[TMP0]], <16 x i8> [[TMP1]]
1755-
// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x i32>
1756-
// CHECK-NEXT: ret <4 x i32> [[TMP4]]
1750+
// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.umax.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR7]]
1751+
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
1752+
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
17571753
//
17581754
v128_t test_u8x16_max(v128_t a, v128_t b) {
17591755
return wasm_u8x16_max(a, b);
@@ -1944,10 +1940,9 @@ v128_t test_i16x8_mul(v128_t a, v128_t b) {
19441940
// CHECK-NEXT: entry:
19451941
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
19461942
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
1947-
// CHECK-NEXT: [[TMP2:%.*]] = icmp slt <8 x i16> [[TMP0]], [[TMP1]]
1948-
// CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP0]], <8 x i16> [[TMP1]]
1949-
// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <4 x i32>
1950-
// CHECK-NEXT: ret <4 x i32> [[TMP4]]
1943+
// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.smin.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR7]]
1944+
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
1945+
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
19511946
//
19521947
v128_t test_i16x8_min(v128_t a, v128_t b) {
19531948
return wasm_i16x8_min(a, b);
@@ -1957,10 +1952,9 @@ v128_t test_i16x8_min(v128_t a, v128_t b) {
19571952
// CHECK-NEXT: entry:
19581953
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
19591954
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
1960-
// CHECK-NEXT: [[TMP2:%.*]] = icmp ult <8 x i16> [[TMP0]], [[TMP1]]
1961-
// CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP0]], <8 x i16> [[TMP1]]
1962-
// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <4 x i32>
1963-
// CHECK-NEXT: ret <4 x i32> [[TMP4]]
1955+
// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.umin.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR7]]
1956+
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
1957+
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
19641958
//
19651959
v128_t test_u16x8_min(v128_t a, v128_t b) {
19661960
return wasm_u16x8_min(a, b);
@@ -1970,10 +1964,9 @@ v128_t test_u16x8_min(v128_t a, v128_t b) {
19701964
// CHECK-NEXT: entry:
19711965
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
19721966
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
1973-
// CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <8 x i16> [[TMP0]], [[TMP1]]
1974-
// CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP0]], <8 x i16> [[TMP1]]
1975-
// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <4 x i32>
1976-
// CHECK-NEXT: ret <4 x i32> [[TMP4]]
1967+
// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.smax.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR7]]
1968+
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
1969+
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
19771970
//
19781971
v128_t test_i16x8_max(v128_t a, v128_t b) {
19791972
return wasm_i16x8_max(a, b);
@@ -1983,10 +1976,9 @@ v128_t test_i16x8_max(v128_t a, v128_t b) {
19831976
// CHECK-NEXT: entry:
19841977
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
19851978
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
1986-
// CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <8 x i16> [[TMP0]], [[TMP1]]
1987-
// CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP0]], <8 x i16> [[TMP1]]
1988-
// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <4 x i32>
1989-
// CHECK-NEXT: ret <4 x i32> [[TMP4]]
1979+
// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.umax.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR7]]
1980+
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
1981+
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
19901982
//
19911983
v128_t test_u16x8_max(v128_t a, v128_t b) {
19921984
return wasm_u16x8_max(a, b);
@@ -2103,39 +2095,35 @@ v128_t test_i32x4_mul(v128_t a, v128_t b) {
21032095

21042096
// CHECK-LABEL: @test_i32x4_min(
21052097
// CHECK-NEXT: entry:
2106-
// CHECK-NEXT: [[TMP0:%.*]] = icmp slt <4 x i32> [[A:%.*]], [[B:%.*]]
2107-
// CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[A]], <4 x i32> [[B]]
2108-
// CHECK-NEXT: ret <4 x i32> [[TMP1]]
2098+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR7]]
2099+
// CHECK-NEXT: ret <4 x i32> [[TMP0]]
21092100
//
21102101
v128_t test_i32x4_min(v128_t a, v128_t b) {
21112102
return wasm_i32x4_min(a, b);
21122103
}
21132104

21142105
// CHECK-LABEL: @test_u32x4_min(
21152106
// CHECK-NEXT: entry:
2116-
// CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i32> [[A:%.*]], [[B:%.*]]
2117-
// CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[A]], <4 x i32> [[B]]
2118-
// CHECK-NEXT: ret <4 x i32> [[TMP1]]
2107+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.umin.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR7]]
2108+
// CHECK-NEXT: ret <4 x i32> [[TMP0]]
21192109
//
21202110
v128_t test_u32x4_min(v128_t a, v128_t b) {
21212111
return wasm_u32x4_min(a, b);
21222112
}
21232113

21242114
// CHECK-LABEL: @test_i32x4_max(
21252115
// CHECK-NEXT: entry:
2126-
// CHECK-NEXT: [[TMP0:%.*]] = icmp sgt <4 x i32> [[A:%.*]], [[B:%.*]]
2127-
// CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[A]], <4 x i32> [[B]]
2128-
// CHECK-NEXT: ret <4 x i32> [[TMP1]]
2116+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR7]]
2117+
// CHECK-NEXT: ret <4 x i32> [[TMP0]]
21292118
//
21302119
v128_t test_i32x4_max(v128_t a, v128_t b) {
21312120
return wasm_i32x4_max(a, b);
21322121
}
21332122

21342123
// CHECK-LABEL: @test_u32x4_max(
21352124
// CHECK-NEXT: entry:
2136-
// CHECK-NEXT: [[TMP0:%.*]] = icmp ugt <4 x i32> [[A:%.*]], [[B:%.*]]
2137-
// CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[A]], <4 x i32> [[B]]
2138-
// CHECK-NEXT: ret <4 x i32> [[TMP1]]
2125+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.umax.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR7]]
2126+
// CHECK-NEXT: ret <4 x i32> [[TMP0]]
21392127
//
21402128
v128_t test_u32x4_max(v128_t a, v128_t b) {
21412129
return wasm_u32x4_max(a, b);

llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp

+47-21
Original file line numberDiff line numberDiff line change
@@ -1142,29 +1142,55 @@ static Instruction *canonicalizeMinMaxWithConstant(SelectInst &Sel,
11421142
return &Sel;
11431143
}
11441144

1145-
static Instruction *canonicalizeAbsNabs(SelectInst &Sel, ICmpInst &Cmp,
1146-
InstCombinerImpl &IC) {
1147-
if (!Cmp.hasOneUse() || !isa<Constant>(Cmp.getOperand(1)))
1148-
return nullptr;
1149-
1145+
static Instruction *canonicalizeSPF(SelectInst &Sel, ICmpInst &Cmp,
1146+
InstCombinerImpl &IC) {
11501147
Value *LHS, *RHS;
1151-
SelectPatternFlavor SPF = matchSelectPattern(&Sel, LHS, RHS).Flavor;
1152-
if (SPF != SelectPatternFlavor::SPF_ABS &&
1153-
SPF != SelectPatternFlavor::SPF_NABS)
1148+
// TODO: What to do with pointer min/max patterns?
1149+
if (!Sel.getType()->isIntOrIntVectorTy())
11541150
return nullptr;
11551151

1156-
// Note that NSW flag can only be propagated for normal, non-negated abs!
1157-
bool IntMinIsPoison = SPF == SelectPatternFlavor::SPF_ABS &&
1158-
match(RHS, m_NSWNeg(m_Specific(LHS)));
1159-
Constant *IntMinIsPoisonC =
1160-
ConstantInt::get(Type::getInt1Ty(Sel.getContext()), IntMinIsPoison);
1161-
Instruction *Abs =
1162-
IC.Builder.CreateBinaryIntrinsic(Intrinsic::abs, LHS, IntMinIsPoisonC);
1163-
1164-
if (SPF == SelectPatternFlavor::SPF_NABS)
1165-
return BinaryOperator::CreateNeg(Abs); // Always without NSW flag!
1152+
SelectPatternFlavor SPF = matchSelectPattern(&Sel, LHS, RHS).Flavor;
1153+
if (SPF == SelectPatternFlavor::SPF_ABS ||
1154+
SPF == SelectPatternFlavor::SPF_NABS) {
1155+
if (!Cmp.hasOneUse())
1156+
return nullptr; // TODO: Relax this restriction.
1157+
1158+
// Note that NSW flag can only be propagated for normal, non-negated abs!
1159+
bool IntMinIsPoison = SPF == SelectPatternFlavor::SPF_ABS &&
1160+
match(RHS, m_NSWNeg(m_Specific(LHS)));
1161+
Constant *IntMinIsPoisonC =
1162+
ConstantInt::get(Type::getInt1Ty(Sel.getContext()), IntMinIsPoison);
1163+
Instruction *Abs =
1164+
IC.Builder.CreateBinaryIntrinsic(Intrinsic::abs, LHS, IntMinIsPoisonC);
1165+
1166+
if (SPF == SelectPatternFlavor::SPF_NABS)
1167+
return BinaryOperator::CreateNeg(Abs); // Always without NSW flag!
1168+
return IC.replaceInstUsesWith(Sel, Abs);
1169+
}
1170+
1171+
if (SelectPatternResult::isMinOrMax(SPF)) {
1172+
Intrinsic::ID IntrinsicID;
1173+
switch (SPF) {
1174+
case SelectPatternFlavor::SPF_UMIN:
1175+
IntrinsicID = Intrinsic::umin;
1176+
break;
1177+
case SelectPatternFlavor::SPF_UMAX:
1178+
IntrinsicID = Intrinsic::umax;
1179+
break;
1180+
case SelectPatternFlavor::SPF_SMIN:
1181+
IntrinsicID = Intrinsic::smin;
1182+
break;
1183+
case SelectPatternFlavor::SPF_SMAX:
1184+
IntrinsicID = Intrinsic::smax;
1185+
break;
1186+
default:
1187+
llvm_unreachable("Unexpected SPF");
1188+
}
1189+
return IC.replaceInstUsesWith(
1190+
Sel, IC.Builder.CreateBinaryIntrinsic(IntrinsicID, LHS, RHS));
1191+
}
11661192

1167-
return IC.replaceInstUsesWith(Sel, Abs);
1193+
return nullptr;
11681194
}
11691195

11701196
/// If we have a select with an equality comparison, then we know the value in
@@ -1540,8 +1566,8 @@ Instruction *InstCombinerImpl::foldSelectInstWithICmp(SelectInst &SI,
15401566
if (Instruction *NewSel = canonicalizeMinMaxWithConstant(SI, *ICI, *this))
15411567
return NewSel;
15421568

1543-
if (Instruction *NewAbs = canonicalizeAbsNabs(SI, *ICI, *this))
1544-
return NewAbs;
1569+
if (Instruction *NewSPF = canonicalizeSPF(SI, *ICI, *this))
1570+
return NewSPF;
15451571

15461572
if (Value *V = canonicalizeClampLike(SI, *ICI, Builder))
15471573
return replaceInstUsesWith(SI, V);

llvm/test/CodeGen/AMDGPU/reqd-work-group-size.ll

+4-7
Original file line numberDiff line numberDiff line change
@@ -198,8 +198,7 @@ define amdgpu_kernel void @local_size_x_8_16_2_wrong_group_id(i64 addrspace(1)*
198198
; CHECK: %group.id = tail call i32 @llvm.amdgcn.workgroup.id.x()
199199
; CHECK: %group.id_x_group.size.x.neg = mul i32 %group.id, -8
200200
; CHECK: %sub = add i32 %group.id_x_group.size.x.neg, %grid.size.x
201-
; CHECK: %cmp = icmp slt i32 %sub, 8
202-
; CHECK: %select = select i1 %cmp, i32 %sub, i32 8
201+
; CHECK: %1 = call i32 @llvm.smin.i32(i32 %sub, i32 8)
203202
define amdgpu_kernel void @local_size_x_8_16_2_wrong_cmp_type(i64 addrspace(1)* %out) #0 !reqd_work_group_size !0 {
204203
%dispatch.ptr = tail call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
205204
%gep.group.size.x = getelementptr inbounds i8, i8 addrspace(4)* %dispatch.ptr, i64 4
@@ -222,9 +221,8 @@ define amdgpu_kernel void @local_size_x_8_16_2_wrong_cmp_type(i64 addrspace(1)*
222221
; CHECK-LABEL: @local_size_x_8_16_2_wrong_select(
223222
; CHECK: %group.id_x_group.size.x.neg = mul i32 %group.id, -8
224223
; CHECK: %sub = add i32 %group.id_x_group.size.x.neg, %grid.size.x
225-
; CHECK: %1 = icmp ugt i32 %sub, 8
226-
; CHECK: %select = select i1 %1, i32 %sub, i32 8
227-
; CHECK: %zext = zext i32 %select to i64
224+
; CHECK: %1 = call i32 @llvm.umax.i32(i32 %sub, i32 8)
225+
; CHECK: %zext = zext i32 %1 to i64
228226
define amdgpu_kernel void @local_size_x_8_16_2_wrong_select(i64 addrspace(1)* %out) #0 !reqd_work_group_size !0 {
229227
%dispatch.ptr = tail call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
230228
%gep.group.size.x = getelementptr inbounds i8, i8 addrspace(4)* %dispatch.ptr, i64 4
@@ -472,8 +470,7 @@ define amdgpu_kernel void @use_local_size_x_uniform_work_group_size(i64 addrspac
472470
}
473471

474472
; CHECK-LABEL: @use_local_size_x_uniform_work_group_size_false(
475-
; CHECK: icmp ult
476-
; CHECK: select
473+
; CHECK: call i32 @llvm.umin
477474
define amdgpu_kernel void @use_local_size_x_uniform_work_group_size_false(i64 addrspace(1)* %out) #3 {
478475
%dispatch.ptr = tail call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
479476
%gep.group.size.x = getelementptr inbounds i8, i8 addrspace(4)* %dispatch.ptr, i64 4

0 commit comments

Comments
 (0)