Skip to content

Commit 5e28d30

Browse files
committed
[XCOFF][AIX] Peephole optimization for toc-data.
Followup to D101178 - peephole optimization that converts a load address instruction and a consuming load/store into just the load/store when its safe to do so. eg: converts the 2 instruction code sequence la 4, i[TD](2) stw 3, 0(4) to stw 3, i[TD](2) Differential Revision: https://reviews.llvm.org/D101470
1 parent ddd18d0 commit 5e28d30

File tree

5 files changed

+119
-61
lines changed

5 files changed

+119
-61
lines changed

llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp

Lines changed: 33 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7633,6 +7633,20 @@ void PPCDAGToDAGISel::PeepholePPC64() {
76337633
case PPC::ADDItocL:
76347634
Flags = PPCII::MO_TOC_LO;
76357635
break;
7636+
case PPC::ADDItoc:
7637+
case PPC::ADDItoc8:
7638+
if (RequiresMod4Offset) {
7639+
if (GlobalAddressSDNode *GA =
7640+
dyn_cast<GlobalAddressSDNode>(Base.getOperand(0))) {
7641+
const GlobalValue *GV = GA->getGlobal();
7642+
Align Alignment = GV->getPointerAlignment(CurDAG->getDataLayout());
7643+
// XMC_TD global that is underaligned being accessed with a DS form
7644+
// instruction.
7645+
if (Alignment < 4)
7646+
continue;
7647+
}
7648+
}
7649+
break;
76367650
}
76377651

76387652
SDValue ImmOpnd = Base.getOperand(1);
@@ -7727,12 +7741,27 @@ void PPCDAGToDAGISel::PeepholePPC64() {
77277741
}
77287742
}
77297743

7744+
const unsigned BaseOpcode = Base.getMachineOpcode();
7745+
// ADDItoc and ADDItoc8 are pseudos used exclusively by AIX small code
7746+
// model when a global is defined in the TOC.
7747+
const bool OpcodeIsAIXTocData =
7748+
BaseOpcode == PPC::ADDItoc || BaseOpcode == PPC::ADDItoc8;
7749+
77307750
if (FirstOp == 1) // Store
7731-
(void)CurDAG->UpdateNodeOperands(N, N->getOperand(0), ImmOpnd,
7732-
Base.getOperand(0), N->getOperand(3));
7751+
if (OpcodeIsAIXTocData)
7752+
(void)CurDAG->UpdateNodeOperands(N, N->getOperand(0),
7753+
Base.getOperand(0), Base.getOperand(1),
7754+
N->getOperand(3));
7755+
else
7756+
(void)CurDAG->UpdateNodeOperands(N, N->getOperand(0), ImmOpnd,
7757+
Base.getOperand(0), N->getOperand(3));
77337758
else // Load
7734-
(void)CurDAG->UpdateNodeOperands(N, ImmOpnd, Base.getOperand(0),
7735-
N->getOperand(2));
7759+
if (OpcodeIsAIXTocData)
7760+
(void)CurDAG->UpdateNodeOperands(N, Base.getOperand(0),
7761+
Base.getOperand(1), N->getOperand(2));
7762+
else
7763+
(void)CurDAG->UpdateNodeOperands(N, ImmOpnd, Base.getOperand(0),
7764+
N->getOperand(2));
77367765

77377766
if (UpdateHBase)
77387767
(void)CurDAG->UpdateNodeOperands(HBase.getNode(), HBase.getOperand(0),

llvm/lib/Target/PowerPC/PPCMCInstLower.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,10 @@ static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO,
4242
Mangler::getNameWithPrefix(Name, MO.getSymbolName(), DL);
4343
} else {
4444
const GlobalValue *GV = MO.getGlobal();
45+
if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
46+
if (GVar->hasAttribute("toc-data"))
47+
return TM.getSymbol(GV);
48+
4549
TM.getNameWithPrefix(Name, GV, Mang);
4650
}
4751

llvm/test/CodeGen/PowerPC/toc-data-const.ll

Lines changed: 38 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
1-
; RUN: llc -mtriple powerpc-ibm-aix-xcoff < %s | FileCheck %s --check-prefix CHECK
2-
; RUN: llc -mtriple powerpc64-ibm-aix-xcoff < %s | FileCheck %s --check-prefix CHECK
1+
; RUN: llc -mtriple powerpc-ibm-aix-xcoff -O0 < %s | FileCheck %s --check-prefixes=CHECK,CHECK32,NOOPT
2+
; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -O0 < %s | FileCheck %s --check-prefixes=CHECK,CHECK64,NOOPT
3+
4+
; RUN: llc -mtriple powerpc-ibm-aix-xcoff < %s | FileCheck %s --check-prefixes=CHECK,CHECK32,OPT
5+
; RUN: llc -mtriple powerpc64-ibm-aix-xcoff < %s | FileCheck %s --check-prefixes=CHECK,CHECK64,OPT
36

47
; RUN: llc -filetype=obj -mtriple powerpc-ibm-aix-xcoff -verify-machineinstrs < %s -o %t32.o
58
; RUN: llvm-readobj %t32.o --syms --relocs | FileCheck %s --check-prefix=OBJ32
@@ -23,27 +26,31 @@ define ptr @retptr() {
2326

2427
attributes #0 = { "toc-data" }
2528

26-
; CHECK: .read:
27-
; CHECK: la 3, i1[TD](2)
29+
; CHECK-LABEL: .read:
30+
; NOOPT: la 3, i1[TD](2)
31+
; NOOPT: lwz 3, 0(3)
32+
; OPT: lwz 3, i1[TD](2)
2833

29-
; CHECK: .retptr:
30-
; CHECK: la 3, i2[TD](2)
34+
; CHECK-LABEL: .retptr:
35+
; CHECK: la 3, i2[TD](2)
36+
; CHECK-NEXT: blr
3137

3238
; CHECK-DAG: .toc
3339
; CHECK: .extern i1[TD]
34-
; CHECK: .csect i2[TD]
40+
; CHECK32: .csect i2[TD],2
41+
; CHECK64: .csect i2[TD],3
3542

3643
; OBJ32: Relocations [
3744
; OBJ32-NEXT: Section (index: 1) .text {
3845
; OBJ32-NEXT: 0x2 R_TOC i1(1) 0xF
39-
; OBJ32-NEXT: 0x26 R_TOC i2(15) 0xF
46+
; OBJ32-NEXT: 0x22 R_TOC i2(15) 0xF
4047
; OBJ32-NEXT: }
4148
; OBJ32-NEXT: Section (index: 2) .data {
42-
; OBJ32-NEXT: 0x44 R_POS .read(5) 0x1F
43-
; OBJ32-NEXT: 0x48 R_POS TOC(13) 0x1F
44-
; OBJ32-NEXT: 0x50 R_POS .retptr(7) 0x1F
45-
; OBJ32-NEXT: 0x54 R_POS TOC(13) 0x1F
46-
; OBJ32-NEXT: 0x5C R_POS i1(1) 0x1F
49+
; OBJ32-NEXT: 0x40 R_POS .read(5) 0x1F
50+
; OBJ32-NEXT: 0x44 R_POS TOC(13) 0x1F
51+
; OBJ32-NEXT: 0x4C R_POS .retptr(7) 0x1F
52+
; OBJ32-NEXT: 0x50 R_POS TOC(13) 0x1F
53+
; OBJ32-NEXT: 0x58 R_POS i1(1) 0x1F
4754
; OBJ32-NEXT: }
4855
; OBJ32-NEXT: ]
4956

@@ -70,7 +77,7 @@ attributes #0 = { "toc-data" }
7077
; OBJ32: Symbol {
7178
; OBJ32: Index: 13
7279
; OBJ32-NEXT: Name: TOC
73-
; OBJ32-NEXT: Value (RelocatableAddress): 0x5C
80+
; OBJ32-NEXT: Value (RelocatableAddress): 0x58
7481
; OBJ32-NEXT: Section: .data
7582
; OBJ32-NEXT: Type: 0x0
7683
; OBJ32-NEXT: StorageClass: C_HIDEXT (0x6B)
@@ -90,7 +97,7 @@ attributes #0 = { "toc-data" }
9097
; OBJ32: Symbol {
9198
; OBJ32: Index: 15
9299
; OBJ32-NEXT: Name: i2
93-
; OBJ32-NEXT: Value (RelocatableAddress): 0x5C
100+
; OBJ32-NEXT: Value (RelocatableAddress): 0x58
94101
; OBJ32-NEXT: Section: .data
95102
; OBJ32-NEXT: Type: 0x0
96103
; OBJ32-NEXT: StorageClass: C_EXT (0x2)
@@ -111,14 +118,14 @@ attributes #0 = { "toc-data" }
111118
; OBJ64: Relocations [
112119
; OBJ64-NEXT: Section (index: 1) .text {
113120
; OBJ64-NEXT: 0x2 R_TOC i1(1) 0xF
114-
; OBJ64-NEXT: 0x26 R_TOC i2(15) 0xF
121+
; OBJ64-NEXT: 0x22 R_TOC i2(15) 0xF
115122
; OBJ64-NEXT: }
116123
; OBJ64-NEXT: Section (index: 2) .data {
117-
; OBJ64-NEXT: 0x48 R_POS .read(5) 0x3F
118-
; OBJ64-NEXT: 0x50 R_POS TOC(13) 0x3F
119-
; OBJ64-NEXT: 0x60 R_POS .retptr(7) 0x3F
120-
; OBJ64-NEXT: 0x68 R_POS TOC(13) 0x3F
121-
; OBJ64-NEXT: 0x78 R_POS i1(1) 0x3F
124+
; OBJ64-NEXT: 0x40 R_POS .read(5) 0x3F
125+
; OBJ64-NEXT: 0x48 R_POS TOC(13) 0x3F
126+
; OBJ64-NEXT: 0x58 R_POS .retptr(7) 0x3F
127+
; OBJ64-NEXT: 0x60 R_POS TOC(13) 0x3F
128+
; OBJ64-NEXT: 0x70 R_POS i1(1) 0x3F
122129
; OBJ64-NEXT: }
123130
; OBJ64-NEXT: ]
124131

@@ -144,7 +151,7 @@ attributes #0 = { "toc-data" }
144151
; OBJ64: Symbol {
145152
; OBJ64: Index: 13
146153
; OBJ64-NEXT: Name: TOC
147-
; OBJ64-NEXT: Value (RelocatableAddress): 0x78
154+
; OBJ64-NEXT: Value (RelocatableAddress): 0x70
148155
; OBJ64-NEXT: Section: .data
149156
; OBJ64-NEXT: Type: 0x0
150157
; OBJ64-NEXT: StorageClass: C_HIDEXT (0x6B)
@@ -163,7 +170,7 @@ attributes #0 = { "toc-data" }
163170
; OBJ64: Symbol {
164171
; OBJ64: Index: 15
165172
; OBJ64-NEXT: Name: i2
166-
; OBJ64-NEXT: Value (RelocatableAddress): 0x78
173+
; OBJ64-NEXT: Value (RelocatableAddress): 0x70
167174
; OBJ64-NEXT: Section: .data
168175
; OBJ64-NEXT: Type: 0x0
169176
; OBJ64-NEXT: StorageClass: C_EXT (0x2)
@@ -181,15 +188,15 @@ attributes #0 = { "toc-data" }
181188
; OBJ64-NEXT: }
182189

183190
; DIS32: 00000000 <.read>:
184-
; DIS32-NEXT: 0: 38 62 00 00 addi 3, 2, 0
191+
; DIS32-NEXT: 0: 80 62 00 00 lwz 3, 0(2)
185192
; DIS32-NEXT: 00000002: R_TOC i1
186-
; DIS32: 00000024 <.retptr>:
187-
; DIS32-NEXT: 24: 38 62 00 00 addi 3, 2, 0
188-
; DIS32-NEXT: 00000026: R_TOC i2
193+
; DIS32: 00000020 <.retptr>:
194+
; DIS32-NEXT: 20: 38 62 00 00 addi 3, 2, 0
195+
; DIS32-NEXT: 00000022: R_TOC i2
189196

190197
; DIS64: 0000000000000000 <.read>:
191-
; DIS64-NEXT: 0: 38 62 00 00 addi 3, 2, 0
198+
; DIS64-NEXT: 0: 80 62 00 00 lwz 3, 0(2)
192199
; DIS64-NEXT: 0000000000000002: R_TOC i1
193-
; DIS64: 0000000000000024 <.retptr>:
194-
; DIS64-NEXT: 24: 38 62 00 00 addi 3, 2, 0
195-
; DIS64-NEXT: 0000000000000026: R_TOC i2
200+
; DIS64: 0000000000000020 <.retptr>:
201+
; DIS64-NEXT: 20: 38 62 00 00 addi 3, 2, 0
202+
; DIS64-NEXT: 0000000000000022: R_TOC i2
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -verify-machineinstrs < %s | FileCheck %s
2+
3+
@underaligned = dso_local global i32 123, align 1 #0
4+
5+
define i64 @read() {
6+
entry:
7+
%0 = load i32, ptr @underaligned, align 1
8+
%1 = sext i32 %0 to i64
9+
ret i64 %1
10+
}
11+
12+
attributes #0 = { "toc-data" }
13+
14+
; CHECK-LABEL: .read
15+
; CHECK: la [[DEF:[0-9]+]], underaligned[TD](2)
16+
; CHCEK: lwa {{[0-9]+}}, 0([[DEF]])

llvm/test/CodeGen/PowerPC/toc-data.ll

Lines changed: 28 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,16 @@
11
; RUN: llc -mtriple powerpc-ibm-aix-xcoff -verify-machineinstrs < %s \
2-
; RUN: -stop-before=ppc-vsx-copy | FileCheck %s --check-prefix CHECK32
2+
; RUN: -stop-before=ppc-vsx-copy | FileCheck %s --check-prefixes=CHECK32,OPT32
33
; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -verify-machineinstrs < %s \
44
; RUN: -stop-before=ppc-vsx-copy | FileCheck %s --check-prefix CHECK64
5-
; RUN: llc -mtriple powerpc-ibm-aix-xcoff -verify-machineinstrs < %s | FileCheck %s --check-prefix TEST32
6-
; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -verify-machineinstrs < %s | FileCheck %s --check-prefix TEST64
5+
; RUN: llc -mtriple powerpc-ibm-aix-xcoff -verify-machineinstrs < %s | FileCheck %s --check-prefixes=TEST32,ASMOPT32
6+
; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -verify-machineinstrs < %s | FileCheck %s --check-prefixes=TEST64,ASMOPT64
77

88
; RUN: llc -mtriple powerpc-ibm-aix-xcoff -verify-machineinstrs < %s \
9-
; RUN: -stop-before=ppc-vsx-copy -O0 | FileCheck %s --check-prefix CHECK32
9+
; RUN: -stop-before=ppc-vsx-copy -O0 | FileCheck %s --check-prefixes=CHECK32,NOOPT32
1010
; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -verify-machineinstrs < %s \
1111
; RUN: -stop-before=ppc-vsx-copy -O0 | FileCheck %s --check-prefix CHECK64-NOOPT
12-
; RUN: llc -mtriple powerpc-ibm-aix-xcoff -verify-machineinstrs -O0 < %s | FileCheck %s --check-prefix TEST32
13-
; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -verify-machineinstrs -O0 < %s | FileCheck %s --check-prefix TEST64
12+
; RUN: llc -mtriple powerpc-ibm-aix-xcoff -verify-machineinstrs -O0 < %s | FileCheck %s --check-prefixes=TEST32,ASMNOOPT32
13+
; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -verify-machineinstrs -O0 < %s | FileCheck %s --check-prefixes=TEST64,ASMNOOPT64
1414

1515
@i = dso_local global i32 0, align 4 #0
1616
@d = dso_local local_unnamed_addr global double 3.141590e+00, align 8
@@ -24,26 +24,27 @@ define dso_local void @write_int(i32 signext %in) {
2424
ret void
2525
}
2626
; CHECK32: name: write_int
27-
; CHECK32: %[[SCRATCH:[0-9]+]]:gprc_and_gprc_nor0 = ADDItoc @i, $r2
28-
; CHECK32-NEXT: STW %{{[0-9]+}}, 0, killed %[[SCRATCH]] :: (store (s32) into @i)
27+
; NOOPT32: %[[SCRATCH:[0-9]+]]:gprc_and_gprc_nor0 = ADDItoc @i, $r2
28+
; NOOPT32-NEXT: STW %{{[0-9]+}}, 0, killed %[[SCRATCH]] :: (store (s32) into @i)
29+
; OPT32: STW %{{[0-9]+}}, @i, $r2 :: (store (s32) into @i)
2930

3031
; TEST32: .write_int:
31-
; TEST32: la 4, i[TD](2)
32-
; TEST32-NEXT: stw 3, 0(4)
32+
; ASMNOOPT32: la 4, i[TD](2)
33+
; ASMNOOPT32-NEXT: stw 3, 0(4)
34+
; ASMOPT32: stw 3, i[TD](2)
3335

3436
; CHECK64: name: write_int
35-
; CHECK64: %[[SCRATCH:[0-9]+]]:g8rc_and_g8rc_nox0 = ADDItoc8 @i, $x2
36-
; CHECK64-NEXT: STW8 %{{[0-9]+}}, 0, killed %[[SCRATCH]] :: (store (s32) into @i)
37+
; CHECK64: STW8 %{{[0-9]+}}, @i, $x2 :: (store (s32) into @i)
3738

3839
; CHECK64-NOOPT: name: write_int
3940
; CHECK64-NOOPT: %[[SUBREG:[0-9]+]]:gprc = COPY %{{[0-9]}}.sub_32
4041
; CHECK64-NOOPT: %[[ADDR:[0-9]+]]:g8rc_and_g8rc_nox0 = ADDItoc8 @i, $x2 :: (load (s64) from got)
4142
; CHECK64-NOOPT: STW %[[SUBREG]], 0, killed %[[ADDR]] :: (store (s32) into @i)
4243

4344
; TEST64: .write_int:
44-
; TEST64: la 4, i[TD](2)
45-
; TEST64-NEXT: stw 3, 0(4)
46-
45+
; ASMNOOPT64: la 4, i[TD](2)
46+
; ASMNOOPT64-NEXT: stw 3, 0(4)
47+
; ASMOPT64: stw 3, i[TD](2)
4748

4849
define dso_local i64 @read_ll() {
4950
entry:
@@ -77,25 +78,26 @@ define dso_local float @read_float() {
7778
ret float %0
7879
}
7980
; CHECK32: name: read_float
80-
; CHECK32: %[[SCRATCH:[0-9]+]]:gprc_and_gprc_nor0 = ADDItoc @f, $r2
81-
; CHECK32: %{{[0-9]+}}:f4rc = LFS 0, killed %[[SCRATCH]] :: (dereferenceable load (s32) from @f)
81+
; NOOPT32: %[[SCRATCH:[0-9]+]]:gprc_and_gprc_nor0 = ADDItoc @f, $r2
82+
; NOOPT32: %{{[0-9]+}}:f4rc = LFS 0, killed %[[SCRATCH]] :: (dereferenceable load (s32) from @f)
83+
; OPT32: %{{[0-9]+}}:f4rc = LFS @f, $r2 :: (dereferenceable load (s32) from @f)
8284

83-
; TEST32: .read_float:
84-
; TEST32: la 3, f[TD](2)
85-
; TEST32-NEXT: lfs 1, 0(3)
85+
; TEST32: .read_float:
86+
; ASMNOOPT32: la 3, f[TD](2)
87+
; ASMNOOPT32-NEXT: lfs 1, 0(3)
88+
; ASMOPT32: lfs 1, f[TD](2)
8689

8790
; CHECK64: name: read_float
88-
; CHECK64: %[[SCRATCH:[0-9]+]]:g8rc_and_g8rc_nox0 = ADDItoc8 @f, $x2
89-
; CHECK64: %{{[0-9]+}}:f4rc = LFS 0, killed %[[SCRATCH]] :: (dereferenceable load (s32) from @f)
91+
; CHECK64: %{{[0-9]+}}:f4rc = LFS @f, $x2 :: (dereferenceable load (s32) from @f)
9092

9193
; CHECK64-NOOPT: name: read_float
9294
; CHECK64-NOOPT: %[[SCRATCH:[0-9]+]]:g8rc_and_g8rc_nox0 = ADDItoc8 @f, $x2
9395
; CHECK64-NOOPT: %{{[0-9]+}}:f4rc = LFS 0, killed %[[SCRATCH]]
9496

95-
; TEST64: .read_float:
96-
; TEST64: la 3, f[TD](2)
97-
; TEST64-NEXT: lfs 1, 0(3)
98-
97+
; TEST64: .read_float:
98+
; ASMNOOPT64: la 3, f[TD](2)
99+
; ASMNOOPT64-NEXT: lfs 1, 0(3)
100+
; ASMOPT64: lfs 1, f[TD](2)
99101

100102
define dso_local void @write_double(double %in) {
101103
entry:

0 commit comments

Comments
 (0)