Skip to content

Commit 6568ceb

Browse files
authored
[CodeGenPrepare] Drop nsw flags in optimizeLoadExt (#118180)
Alive2: https://alive2.llvm.org/ce/z/pMcD7q Closes #118172.
1 parent 1c7e407 commit 6568ceb

File tree

2 files changed

+93
-0
lines changed

2 files changed

+93
-0
lines changed

llvm/lib/CodeGen/CodeGenPrepare.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7138,6 +7138,7 @@ bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {
71387138
SmallVector<Instruction *, 8> WorkList;
71397139
SmallPtrSet<Instruction *, 16> Visited;
71407140
SmallVector<Instruction *, 8> AndsToMaybeRemove;
7141+
SmallVector<Instruction *, 8> DropFlags;
71417142
for (auto *U : Load->users())
71427143
WorkList.push_back(cast<Instruction>(U));
71437144

@@ -7185,13 +7186,15 @@ bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {
71857186
return false;
71867187
uint64_t ShiftAmt = ShlC->getLimitedValue(BitWidth - 1);
71877188
DemandBits.setLowBits(BitWidth - ShiftAmt);
7189+
DropFlags.push_back(I);
71887190
break;
71897191
}
71907192

71917193
case Instruction::Trunc: {
71927194
EVT TruncVT = TLI->getValueType(*DL, I->getType());
71937195
unsigned TruncBitWidth = TruncVT.getSizeInBits();
71947196
DemandBits.setLowBits(TruncBitWidth);
7197+
DropFlags.push_back(I);
71957198
break;
71967199
}
71977200

@@ -7249,6 +7252,10 @@ bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {
72497252
++NumAndUses;
72507253
}
72517254

7255+
// NSW flags may not longer hold.
7256+
for (auto *Inst : DropFlags)
7257+
Inst->setHasNoSignedWrap(false);
7258+
72527259
++NumAndsAdded;
72537260
return true;
72547261
}
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S -passes='require<profile-summary>,function(codegenprepare)' -mtriple=x86_64-unknown-unknown < %s | FileCheck %s
3+
4+
; Make sure the nsw flag is dropped when the load ext is combined.
5+
define i32 @simplify_load_ext_drop_trunc_nsw(ptr %p) {
6+
; CHECK-LABEL: define i32 @simplify_load_ext_drop_trunc_nsw(
7+
; CHECK-SAME: ptr [[P:%.*]]) {
8+
; CHECK-NEXT: [[ENTRY:.*:]]
9+
; CHECK-NEXT: [[X:%.*]] = load i32, ptr [[P]], align 4
10+
; CHECK-NEXT: [[TMP0:%.*]] = and i32 [[X]], 255
11+
; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[TMP0]] to i8
12+
; CHECK-NEXT: [[EXT1:%.*]] = sext i8 [[TRUNC]] to i16
13+
; CHECK-NEXT: call void @use(i32 [[TMP0]])
14+
; CHECK-NEXT: [[EXT2:%.*]] = zext i16 [[EXT1]] to i32
15+
; CHECK-NEXT: ret i32 [[EXT2]]
16+
;
17+
entry:
18+
%x = load i32, ptr %p, align 4
19+
%trunc = trunc nsw i32 %x to i8
20+
%ext1 = sext i8 %trunc to i16
21+
%conv2 = and i32 %x, 255
22+
call void @use(i32 %conv2)
23+
%ext2 = zext i16 %ext1 to i32
24+
ret i32 %ext2
25+
}
26+
27+
; Make sure the nsw flag is dropped when the load ext is combined.
28+
define i32 @simplify_load_ext_drop_shl_nsw(ptr %p) {
29+
; CHECK-LABEL: define i32 @simplify_load_ext_drop_shl_nsw(
30+
; CHECK-SAME: ptr [[P:%.*]]) {
31+
; CHECK-NEXT: [[ENTRY:.*:]]
32+
; CHECK-NEXT: [[X:%.*]] = load i32, ptr [[P]], align 4
33+
; CHECK-NEXT: [[TMP0:%.*]] = and i32 [[X]], 255
34+
; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[TMP0]], 24
35+
; CHECK-NEXT: call void @use(i32 [[TMP0]])
36+
; CHECK-NEXT: ret i32 [[SHL]]
37+
;
38+
entry:
39+
%x = load i32, ptr %p, align 4
40+
%shl = shl nsw i32 %x, 24
41+
%conv2 = and i32 %x, 255
42+
call void @use(i32 %conv2)
43+
ret i32 %shl
44+
}
45+
46+
define i32 @simplify_load_ext_keep_trunc_nuw(ptr %p) {
47+
; CHECK-LABEL: define i32 @simplify_load_ext_keep_trunc_nuw(
48+
; CHECK-SAME: ptr [[P:%.*]]) {
49+
; CHECK-NEXT: [[ENTRY:.*:]]
50+
; CHECK-NEXT: [[X:%.*]] = load i32, ptr [[P]], align 4
51+
; CHECK-NEXT: [[TMP0:%.*]] = and i32 [[X]], 255
52+
; CHECK-NEXT: [[TRUNC:%.*]] = trunc nuw i32 [[TMP0]] to i8
53+
; CHECK-NEXT: [[EXT1:%.*]] = sext i8 [[TRUNC]] to i16
54+
; CHECK-NEXT: call void @use(i32 [[TMP0]])
55+
; CHECK-NEXT: [[EXT2:%.*]] = zext i16 [[EXT1]] to i32
56+
; CHECK-NEXT: ret i32 [[EXT2]]
57+
;
58+
entry:
59+
%x = load i32, ptr %p, align 4
60+
%trunc = trunc nuw i32 %x to i8
61+
%ext1 = sext i8 %trunc to i16
62+
%conv2 = and i32 %x, 255
63+
call void @use(i32 %conv2)
64+
%ext2 = zext i16 %ext1 to i32
65+
ret i32 %ext2
66+
}
67+
68+
define i32 @simplify_load_ext_drop_shl_nuw(ptr %p) {
69+
; CHECK-LABEL: define i32 @simplify_load_ext_drop_shl_nuw(
70+
; CHECK-SAME: ptr [[P:%.*]]) {
71+
; CHECK-NEXT: [[ENTRY:.*:]]
72+
; CHECK-NEXT: [[X:%.*]] = load i32, ptr [[P]], align 4
73+
; CHECK-NEXT: [[TMP0:%.*]] = and i32 [[X]], 255
74+
; CHECK-NEXT: [[SHL:%.*]] = shl nuw i32 [[TMP0]], 24
75+
; CHECK-NEXT: call void @use(i32 [[TMP0]])
76+
; CHECK-NEXT: ret i32 [[SHL]]
77+
;
78+
entry:
79+
%x = load i32, ptr %p, align 4
80+
%shl = shl nuw i32 %x, 24
81+
%conv2 = and i32 %x, 255
82+
call void @use(i32 %conv2)
83+
ret i32 %shl
84+
}
85+
86+
declare void @use(i32)

0 commit comments

Comments
 (0)