Skip to content

Commit cfbe205

Browse files
aykevldylanmckay
authored andcommitted
[AVR] Remove faulty stack pushing behavior
An instruction like this will need to allocate some stack space for the last parameter: %x = call addrspace(1) i16 @bar(i64 undef, i64 undef, i16 undef, i16 0) This worked fine when passing an actual value (in this case 0). However, when passing undef, no value was pushed to the stack and therefore no push instructions were created. This caused an unbalanced stack leading to interesting results. This commit fixes that by replacing the push logic with a regular stack adjustment and stack-relative load/stores. This is less efficient but at least it correctly compiles the code. I can think of a few improvements in the future: * The stack should have been adjusted in the function prologue when there are no allocas in the function. * Many (if not most) stack adjustments can be replaced by pushing/popping the values directly. Exactly like the previous code attempted but didn't do correctly. * Small stack adjustments can be done more efficiently with a few push/pop instructions (pushing/popping bogus values), both for code size and for speed. All in all, as long as there are no allocas in the function I think that it is almost always more efficient to emit regular push/pop instructions. This is however left for future optimizations. Differential Revision: https://reviews.llvm.org/D78581
1 parent 143e146 commit cfbe205

File tree

4 files changed

+73
-64
lines changed

4 files changed

+73
-64
lines changed

llvm/lib/Target/AVR/AVRFrameLowering.cpp

Lines changed: 28 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -285,15 +285,10 @@ bool AVRFrameLowering::restoreCalleeSavedRegisters(
285285
}
286286

287287
/// Replace pseudo store instructions that pass arguments through the stack with
288-
/// real instructions. If insertPushes is true then all instructions are
289-
/// replaced with push instructions, otherwise regular std instructions are
290-
/// inserted.
288+
/// real instructions.
291289
static void fixStackStores(MachineBasicBlock &MBB,
292290
MachineBasicBlock::iterator MI,
293-
const TargetInstrInfo &TII, bool insertPushes) {
294-
const AVRSubtarget &STI = MBB.getParent()->getSubtarget<AVRSubtarget>();
295-
const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
296-
291+
const TargetInstrInfo &TII, Register FP) {
297292
// Iterate through the BB until we hit a call instruction or we reach the end.
298293
for (auto I = MI, E = MBB.end(); I != E && !I->isCall();) {
299294
MachineBasicBlock::iterator NextMI = std::next(I);
@@ -308,37 +303,14 @@ static void fixStackStores(MachineBasicBlock &MBB,
308303

309304
assert(MI.getOperand(0).getReg() == AVR::SP &&
310305
"Invalid register, should be SP!");
311-
if (insertPushes) {
312-
// Replace this instruction with a push.
313-
Register SrcReg = MI.getOperand(2).getReg();
314-
bool SrcIsKill = MI.getOperand(2).isKill();
315-
316-
// We can't use PUSHWRr here because when expanded the order of the new
317-
// instructions are reversed from what we need. Perform the expansion now.
318-
if (Opcode == AVR::STDWSPQRr) {
319-
BuildMI(MBB, I, MI.getDebugLoc(), TII.get(AVR::PUSHRr))
320-
.addReg(TRI.getSubReg(SrcReg, AVR::sub_hi),
321-
getKillRegState(SrcIsKill));
322-
BuildMI(MBB, I, MI.getDebugLoc(), TII.get(AVR::PUSHRr))
323-
.addReg(TRI.getSubReg(SrcReg, AVR::sub_lo),
324-
getKillRegState(SrcIsKill));
325-
} else {
326-
BuildMI(MBB, I, MI.getDebugLoc(), TII.get(AVR::PUSHRr))
327-
.addReg(SrcReg, getKillRegState(SrcIsKill));
328-
}
329-
330-
MI.eraseFromParent();
331-
I = NextMI;
332-
continue;
333-
}
334306

335307
// Replace this instruction with a regular store. Use Y as the base
336308
// pointer since it is guaranteed to contain a copy of SP.
337309
unsigned STOpc =
338310
(Opcode == AVR::STDWSPQRr) ? AVR::STDWPtrQRr : AVR::STDPtrQRr;
339311

340312
MI.setDesc(TII.get(STOpc));
341-
MI.getOperand(0).setReg(AVR::R29R28);
313+
MI.getOperand(0).setReg(FP);
342314

343315
I = NextMI;
344316
}
@@ -354,26 +326,45 @@ MachineBasicBlock::iterator AVRFrameLowering::eliminateCallFramePseudoInstr(
354326
// function entry. Delete the call frame pseudo and replace all pseudo stores
355327
// with real store instructions.
356328
if (hasReservedCallFrame(MF)) {
357-
fixStackStores(MBB, MI, TII, false);
329+
fixStackStores(MBB, MI, TII, AVR::R29R28);
358330
return MBB.erase(MI);
359331
}
360332

361333
DebugLoc DL = MI->getDebugLoc();
362334
unsigned int Opcode = MI->getOpcode();
363335
int Amount = TII.getFrameSize(*MI);
364336

365-
// Adjcallstackup does not need to allocate stack space for the call, instead
366-
// we insert push instructions that will allocate the necessary stack.
367-
// For adjcallstackdown we convert it into an 'adiw reg, <amt>' handling
368-
// the read and write of SP in I/O space.
337+
// ADJCALLSTACKUP and ADJCALLSTACKDOWN are converted to adiw/subi
338+
// instructions to read and write the stack pointer in I/O space.
369339
if (Amount != 0) {
370340
assert(getStackAlignment() == 1 && "Unsupported stack alignment");
371341

372342
if (Opcode == TII.getCallFrameSetupOpcode()) {
373-
fixStackStores(MBB, MI, TII, true);
343+
// Update the stack pointer.
344+
// In many cases this can be done far more efficiently by pushing the
345+
// relevant values directly to the stack. However, doing that correctly
346+
// (in the right order, possibly skipping some empty space for undef
347+
// values, etc) is tricky and thus left to be optimized in the future.
348+
BuildMI(MBB, MI, DL, TII.get(AVR::SPREAD), AVR::R31R30).addReg(AVR::SP);
349+
350+
MachineInstr *New = BuildMI(MBB, MI, DL, TII.get(AVR::SUBIWRdK), AVR::R31R30)
351+
.addReg(AVR::R31R30, RegState::Kill)
352+
.addImm(Amount);
353+
New->getOperand(3).setIsDead();
354+
355+
BuildMI(MBB, MI, DL, TII.get(AVR::SPWRITE), AVR::SP)
356+
.addReg(AVR::R31R30, RegState::Kill);
357+
358+
// Make sure the remaining stack stores are converted to real store
359+
// instructions.
360+
fixStackStores(MBB, MI, TII, AVR::R31R30);
374361
} else {
375362
assert(Opcode == TII.getCallFrameDestroyOpcode());
376363

364+
// Note that small stack changes could be implemented more efficiently
365+
// with a few pop instructions instead of the 8-9 instructions now
366+
// required.
367+
377368
// Select the best opcode to adjust SP based on the offset size.
378369
unsigned addOpcode;
379370
if (isUInt<6>(Amount)) {

llvm/test/CodeGen/AVR/call.ll

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,8 @@ define i8 @calli8_stack() {
3232
; CHECK-LABEL: calli8_stack:
3333
; CHECK: ldi [[REG1:r[0-9]+]], 10
3434
; CHECK: ldi [[REG2:r[0-9]+]], 11
35-
; CHECK: push [[REG2]]
36-
; CHECK: push [[REG1]]
35+
; CHECK: std Z+1, [[REG1]]
36+
; CHECK: std Z+2, [[REG2]]
3737
; CHECK: call foo8_3
3838
%result1 = call i8 @foo8_3(i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11)
3939
ret i8 %result1
@@ -54,12 +54,12 @@ define i16 @calli16_stack() {
5454
; CHECK-LABEL: calli16_stack:
5555
; CHECK: ldi [[REG1:r[0-9]+]], 9
5656
; CHECK: ldi [[REG2:r[0-9]+]], 2
57-
; CHECK: push [[REG2]]
58-
; CHECK: push [[REG1]]
57+
; CHECK: std Z+1, [[REG1]]
58+
; CHECK: std Z+2, [[REG2]]
5959
; CHECK: ldi [[REG1:r[0-9]+]], 10
6060
; CHECK: ldi [[REG2:r[0-9]+]], 2
61-
; CHECK: push [[REG2]]
62-
; CHECK: push [[REG1]]
61+
; CHECK: std Z+3, [[REG1]]
62+
; CHECK: std Z+4, [[REG2]]
6363
; CHECK: call foo16_2
6464
%result1 = call i16 @foo16_2(i16 512, i16 513, i16 514, i16 515, i16 516, i16 517, i16 518, i16 519, i16 520, i16 521, i16 522)
6565
ret i16 %result1
@@ -84,12 +84,12 @@ define i32 @calli32_stack() {
8484
; CHECK-LABEL: calli32_stack:
8585
; CHECK: ldi [[REG1:r[0-9]+]], 64
8686
; CHECK: ldi [[REG2:r[0-9]+]], 66
87-
; CHECK: push [[REG2]]
88-
; CHECK: push [[REG1]]
87+
; CHECK: std Z+1, [[REG1]]
88+
; CHECK: std Z+2, [[REG2]]
8989
; CHECK: ldi [[REG1:r[0-9]+]], 15
9090
; CHECK: ldi [[REG2:r[0-9]+]], 2
91-
; CHECK: push [[REG2]]
92-
; CHECK: push [[REG1]]
91+
; CHECK: std Z+3, [[REG1]]
92+
; CHECK: std Z+4, [[REG2]]
9393
; CHECK: call foo32_2
9494
%result1 = call i32 @foo32_2(i32 1, i32 2, i32 3, i32 4, i32 34554432)
9595
ret i32 %result1
@@ -115,20 +115,20 @@ define i64 @calli64_stack() {
115115

116116
; CHECK: ldi [[REG1:r[0-9]+]], 76
117117
; CHECK: ldi [[REG2:r[0-9]+]], 73
118-
; CHECK: push [[REG2]]
119-
; CHECK: push [[REG1]]
118+
; CHECK: std Z+5, [[REG1]]
119+
; CHECK: std Z+6, [[REG2]]
120120
; CHECK: ldi [[REG1:r[0-9]+]], 31
121121
; CHECK: ldi [[REG2:r[0-9]+]], 242
122-
; CHECK: push [[REG2]]
123-
; CHECK: push [[REG1]]
122+
; CHECK: std Z+7, [[REG1]]
123+
; CHECK: std Z+8, [[REG2]]
124124
; CHECK: ldi [[REG1:r[0-9]+]], 155
125125
; CHECK: ldi [[REG2:r[0-9]+]], 88
126-
; CHECK: push [[REG2]]
127-
; CHECK: push [[REG1]]
126+
; CHECK: std Z+3, [[REG1]]
127+
; CHECK: std Z+4, [[REG2]]
128128
; CHECK: ldi [[REG1:r[0-9]+]], 255
129129
; CHECK: ldi [[REG2:r[0-9]+]], 255
130-
; CHECK: push [[REG2]]
131-
; CHECK: push [[REG1]]
130+
; CHECK: std Z+1, [[REG1]]
131+
; CHECK: std Z+2, [[REG2]]
132132
; CHECK: call foo64_2
133133
%result1 = call i64 @foo64_2(i64 1, i64 2, i64 17446744073709551615)
134134
ret i64 %result1

llvm/test/CodeGen/AVR/dynalloca.ll

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -53,9 +53,27 @@ define void @dynalloca2(i16 %x) {
5353
; CHECK-LABEL: dynalloca2:
5454
; CHECK: in [[SPCOPY1:r[0-9]+]], 61
5555
; CHECK: in [[SPCOPY2:r[0-9]+]], 62
56-
; CHECK: push
57-
; CHECK-NOT: st
58-
; CHECK-NOT: std
56+
; Allocate stack space for call
57+
; CHECK: in {{.*}}, 61
58+
; CHECK: in {{.*}}, 62
59+
; CHECK: subi
60+
; CHECK: sbci
61+
; CHECK: in r0, 63
62+
; CHECK-NEXT: cli
63+
; CHECK-NEXT: out 62, {{.*}}
64+
; CHECK-NEXT: out 63, r0
65+
; CHECK-NEXT: out 61, {{.*}}
66+
; Store values on the stack
67+
; CHECK: ldi r16, 0
68+
; CHECK: ldi r17, 0
69+
; CHECK: std Z+5, r16
70+
; CHECK: std Z+6, r17
71+
; CHECK: std Z+7, r16
72+
; CHECK: std Z+8, r17
73+
; CHECK: std Z+3, r16
74+
; CHECK: std Z+4, r17
75+
; CHECK: std Z+1, r16
76+
; CHECK: std Z+2, r17
5977
; CHECK: call
6078
; Call frame restore
6179
; CHECK-NEXT: in r30, 61

llvm/test/CodeGen/AVR/varargs.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -42,16 +42,16 @@ define void @varargcall() {
4242
; CHECK-LABEL: varargcall:
4343
; CHECK: ldi [[REG1:r[0-9]+]], 189
4444
; CHECK: ldi [[REG2:r[0-9]+]], 205
45-
; CHECK: push [[REG2]]
46-
; CHECK: push [[REG1]]
45+
; CHECK: std Z+3, [[REG1]]
46+
; CHECK: std Z+4, [[REG2]]
4747
; CHECK: ldi [[REG1:r[0-9]+]], 191
4848
; CHECK: ldi [[REG2:r[0-9]+]], 223
49-
; CHECK: push [[REG2]]
50-
; CHECK: push [[REG1]]
49+
; CHECK: std Z+5, [[REG1]]
50+
; CHECK: std Z+6, [[REG2]]
5151
; CHECK: ldi [[REG1:r[0-9]+]], 205
5252
; CHECK: ldi [[REG2:r[0-9]+]], 171
53-
; CHECK: push [[REG2]]
54-
; CHECK: push [[REG1]]
53+
; CHECK: std Z+1, [[REG1]]
54+
; CHECK: std Z+2, [[REG2]]
5555
; CHECK: call
5656
; CHECK: adiw r30, 6
5757
tail call void (i16, ...) @var1223(i16 -21555, i16 -12867, i16 -8257)

0 commit comments

Comments
 (0)