Skip to content

Commit 926a71f

Browse files
authored
[CodeGen][WinEH] Update saved esp for inlined inallocas (llvm#116585)
This fixes issue llvm#116583 When inalloca calls are inlined the static stack pointer saving prolog of X86WinEHState breaks due to dynamic allocas. In this case we need to update the saved esp for every inalloca and for every stackrestore also related to inalloca.
1 parent 8e65b72 commit 926a71f

File tree

3 files changed

+226
-13
lines changed

3 files changed

+226
-13
lines changed

llvm/lib/Target/X86/X86WinEHState.cpp

+41-13
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include "llvm/IR/Function.h"
2424
#include "llvm/IR/IRBuilder.h"
2525
#include "llvm/IR/Instructions.h"
26+
#include "llvm/IR/IntrinsicInst.h"
2627
#include "llvm/IR/Intrinsics.h"
2728
#include "llvm/IR/IntrinsicsX86.h"
2829
#include "llvm/IR/Module.h"
@@ -41,7 +42,7 @@ class WinEHStatePass : public FunctionPass {
4142
public:
4243
static char ID; // Pass identification, replacement for typeid.
4344

44-
WinEHStatePass() : FunctionPass(ID) { }
45+
WinEHStatePass() : FunctionPass(ID) {}
4546

4647
bool runOnFunction(Function &Fn) override;
4748

@@ -75,6 +76,8 @@ class WinEHStatePass : public FunctionPass {
7576
int getStateForCall(DenseMap<BasicBlock *, ColorVector> &BlockColors,
7677
WinEHFuncInfo &FuncInfo, CallBase &Call);
7778

79+
void updateEspForInAllocas(Function &F);
80+
7881
// Module-level type getters.
7982
Type *getEHLinkRegistrationType();
8083
Type *getSEHRegistrationType();
@@ -100,6 +103,9 @@ class WinEHStatePass : public FunctionPass {
100103
/// fs:00 chain and the current state.
101104
AllocaInst *RegNode = nullptr;
102105

106+
// Struct type of RegNode. Used for GEPing.
107+
Type *RegNodeTy = nullptr;
108+
103109
// The allocation containing the EH security guard.
104110
AllocaInst *EHGuardNode = nullptr;
105111

@@ -152,8 +158,7 @@ bool WinEHStatePass::runOnFunction(Function &F) {
152158
// Check the personality. Do nothing if this personality doesn't use funclets.
153159
if (!F.hasPersonalityFn())
154160
return false;
155-
PersonalityFn =
156-
dyn_cast<Function>(F.getPersonalityFn()->stripPointerCasts());
161+
PersonalityFn = dyn_cast<Function>(F.getPersonalityFn()->stripPointerCasts());
157162
if (!PersonalityFn)
158163
return false;
159164
Personality = classifyEHPersonality(PersonalityFn);
@@ -188,11 +193,13 @@ bool WinEHStatePass::runOnFunction(Function &F) {
188193
// numbers into an immutable analysis pass.
189194
WinEHFuncInfo FuncInfo;
190195
addStateStores(F, FuncInfo);
196+
updateEspForInAllocas(F);
191197

192198
// Reset per-function state.
193199
PersonalityFn = nullptr;
194200
Personality = EHPersonality::Unknown;
195201
UseStackGuard = false;
202+
RegNodeTy = nullptr;
196203
RegNode = nullptr;
197204
EHGuardNode = nullptr;
198205

@@ -269,9 +276,6 @@ void WinEHStatePass::emitExceptionRegistrationRecord(Function *F) {
269276
assert(Personality == EHPersonality::MSVC_CXX ||
270277
Personality == EHPersonality::MSVC_X86SEH);
271278

272-
// Struct type of RegNode. Used for GEPing.
273-
Type *RegNodeTy;
274-
275279
IRBuilder<> Builder(&F->getEntryBlock(), F->getEntryBlock().begin());
276280
Type *Int8PtrType = Builder.getPtrTy();
277281
Type *Int32Ty = Builder.getInt32Ty();
@@ -387,11 +391,11 @@ Function *WinEHStatePass::generateLSDAInEAXThunk(Function *ParentFunc) {
387391
FunctionType *TargetFuncTy =
388392
FunctionType::get(Int32Ty, ArrayRef(&ArgTys[0], 5),
389393
/*isVarArg=*/false);
390-
Function *Trampoline =
391-
Function::Create(TrampolineTy, GlobalValue::InternalLinkage,
392-
Twine("__ehhandler$") + GlobalValue::dropLLVMManglingEscape(
393-
ParentFunc->getName()),
394-
TheModule);
394+
Function *Trampoline = Function::Create(
395+
TrampolineTy, GlobalValue::InternalLinkage,
396+
Twine("__ehhandler$") +
397+
GlobalValue::dropLLVMManglingEscape(ParentFunc->getName()),
398+
TheModule);
395399
if (auto *C = ParentFunc->getComdat())
396400
Trampoline->setComdat(C);
397401
BasicBlock *EntryBB = BasicBlock::Create(Context, "entry", Trampoline);
@@ -482,8 +486,8 @@ void WinEHStatePass::rewriteSetJmpCall(IRBuilder<> &Builder, Function &F,
482486
NewCall = NewCI;
483487
} else {
484488
auto *II = cast<InvokeInst>(&Call);
485-
NewCall = Builder.CreateInvoke(
486-
SetJmp3, II->getNormalDest(), II->getUnwindDest(), Args, OpBundles);
489+
NewCall = Builder.CreateInvoke(SetJmp3, II->getNormalDest(),
490+
II->getUnwindDest(), Args, OpBundles);
487491
}
488492
NewCall->setCallingConv(Call.getCallingConv());
489493
NewCall->setAttributes(Call.getAttributes());
@@ -774,3 +778,27 @@ void WinEHStatePass::insertStateNumberStore(Instruction *IP, int State) {
774778
RegNode, StateFieldIndex);
775779
Builder.CreateStore(Builder.getInt32(State), StateField);
776780
}
781+
782+
void WinEHStatePass::updateEspForInAllocas(Function &F) {
783+
for (BasicBlock &BB : F) {
784+
for (Instruction &I : BB) {
785+
if (auto *Alloca = dyn_cast<AllocaInst>(&I)) {
786+
if (Alloca->isStaticAlloca())
787+
continue;
788+
IRBuilder<> Builder(Alloca->getNextNonDebugInstruction());
789+
// SavedESP = llvm.stacksave()
790+
Value *SP = Builder.CreateStackSave();
791+
Builder.CreateStore(SP, Builder.CreateStructGEP(RegNodeTy, RegNode, 0));
792+
}
793+
794+
if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
795+
if (II->getIntrinsicID() != Intrinsic::stackrestore)
796+
continue;
797+
IRBuilder<> Builder(II->getNextNonDebugInstruction());
798+
// SavedESP = llvm.stacksave()
799+
Value *SP = Builder.CreateStackSave();
800+
Builder.CreateStore(SP, Builder.CreateStructGEP(RegNodeTy, RegNode, 0));
801+
}
802+
}
803+
}
804+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s | FileCheck %s
3+
target datalayout = "e-m:x-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32-a:0:32-S32"
4+
target triple = "i386-pc-windows-msvc"
5+
6+
%struct.Foo = type { i32, i32 }
7+
8+
@bar = external global i1
9+
10+
define dso_local noundef i32 @foo() local_unnamed_addr #0 personality ptr @__CxxFrameHandler3 {
11+
; CHECK-LABEL: foo:
12+
; CHECK: # %bb.0: # %entry
13+
; CHECK-NEXT: pushl %ebp
14+
; CHECK-NEXT: movl %esp, %ebp
15+
; CHECK-NEXT: pushl %ebx
16+
; CHECK-NEXT: pushl %edi
17+
; CHECK-NEXT: pushl %esi
18+
; CHECK-NEXT: subl $16, %esp
19+
; CHECK-NEXT: movl %esp, -28(%ebp)
20+
; CHECK-NEXT: movl $-1, -16(%ebp)
21+
; CHECK-NEXT: leal -24(%ebp), %eax
22+
; CHECK-NEXT: movl $___ehhandler$foo, -20(%ebp)
23+
; CHECK-NEXT: movl %fs:0, %ecx
24+
; CHECK-NEXT: movl %ecx, -24(%ebp)
25+
; CHECK-NEXT: movl %eax, %fs:0
26+
; CHECK-NEXT: cmpb $1, _bar
27+
; CHECK-NEXT: je LBB0_1
28+
; CHECK-NEXT: LBB0_5: # %exit
29+
; CHECK-NEXT: $ehgcr_0_5:
30+
; CHECK-NEXT: movl -24(%ebp), %eax
31+
; CHECK-NEXT: movl %eax, %fs:0
32+
; CHECK-NEXT: xorl %eax, %eax
33+
; CHECK-NEXT: leal -12(%ebp), %esp
34+
; CHECK-NEXT: popl %esi
35+
; CHECK-NEXT: popl %edi
36+
; CHECK-NEXT: popl %ebx
37+
; CHECK-NEXT: popl %ebp
38+
; CHECK-NEXT: retl
39+
; CHECK-NEXT: LBB0_1: # %if.then
40+
; CHECK-NEXT: pushl %eax
41+
; CHECK-NEXT: pushl %eax
42+
; CHECK-NEXT: movl %esp, %eax
43+
; CHECK-NEXT: movl %esp, -28(%ebp)
44+
; CHECK-NEXT: movl $123, (%eax)
45+
; CHECK-NEXT: movl $0, -16(%ebp)
46+
; CHECK-NEXT: calll _alwaysthrows
47+
; CHECK-NEXT: # %bb.4: # %unreachable.i
48+
; CHECK-NEXT: LBB0_3: # Block address taken
49+
; CHECK-NEXT: # %catch.i
50+
; CHECK-NEXT: addl $12, %ebp
51+
; CHECK-NEXT: jmp LBB0_5
52+
; CHECK-NEXT: .def "?catch$2@?0?foo@4HA";
53+
; CHECK-NEXT: .scl 3;
54+
; CHECK-NEXT: .type 32;
55+
; CHECK-NEXT: .endef
56+
; CHECK-NEXT: .p2align 4
57+
; CHECK-NEXT: "?catch$2@?0?foo@4HA":
58+
; CHECK-NEXT: LBB0_2: # %catch.i
59+
; CHECK-NEXT: pushl %ebp
60+
; CHECK-NEXT: addl $12, %ebp
61+
; CHECK-NEXT: movl %esp, -28(%ebp)
62+
; CHECK-NEXT: movl $LBB0_3, %eax
63+
; CHECK-NEXT: popl %ebp
64+
; CHECK-NEXT: retl # CATCHRET
65+
; CHECK-NEXT: Lfunc_end0:
66+
entry:
67+
%cmp = load i1, ptr @bar
68+
br i1 %cmp, label %if.then, label %exit
69+
70+
if.then: ; preds = %entry
71+
%foo = alloca <{ %struct.Foo }>, align 4
72+
store i32 123, ptr %foo, align 4
73+
invoke void @alwaysthrows() #1
74+
to label %unreachable.i unwind label %catch.dispatch.i
75+
76+
catch.dispatch.i: ; preds = %if.then
77+
%3 = catchswitch within none [label %catch.i] unwind to caller
78+
79+
catch.i: ; preds = %catch.dispatch.i
80+
%4 = catchpad within %3 [ptr null, i32 64, ptr null]
81+
catchret from %4 to label %exit
82+
83+
unreachable.i: ; preds = %if.then
84+
unreachable
85+
86+
exit: ; preds = %entry, %catch.i
87+
ret i32 0
88+
}
89+
90+
declare dso_local i32 @__CxxFrameHandler3(...)
91+
92+
declare dso_local void @alwaysthrows() local_unnamed_addr
93+
94+
attributes #0 = { norecurse "min-legal-vector-width"="0" "target-cpu"="pentium4" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
95+
attributes #1 = { noreturn }
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s | FileCheck %s
3+
target datalayout = "e-m:x-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32-a:0:32-S32"
4+
target triple = "i386-pc-windows-msvc"
5+
6+
%struct.Foo = type { i32, i32 }
7+
8+
define dso_local noundef i32 @foo() local_unnamed_addr #0 personality ptr @__CxxFrameHandler3 {
9+
; CHECK-LABEL: foo:
10+
; CHECK: # %bb.0: # %entry
11+
; CHECK-NEXT: pushl %ebp
12+
; CHECK-NEXT: movl %esp, %ebp
13+
; CHECK-NEXT: pushl %ebx
14+
; CHECK-NEXT: pushl %edi
15+
; CHECK-NEXT: pushl %esi
16+
; CHECK-NEXT: subl $16, %esp
17+
; CHECK-NEXT: movl %esp, -28(%ebp)
18+
; CHECK-NEXT: movl $-1, -16(%ebp)
19+
; CHECK-NEXT: leal -24(%ebp), %eax
20+
; CHECK-NEXT: movl $___ehhandler$foo, -20(%ebp)
21+
; CHECK-NEXT: movl %fs:0, %ecx
22+
; CHECK-NEXT: movl %ecx, -24(%ebp)
23+
; CHECK-NEXT: movl %eax, %fs:0
24+
; CHECK-NEXT: pushl %eax
25+
; CHECK-NEXT: pushl %eax
26+
; CHECK-NEXT: movl %esp, %ecx
27+
; CHECK-NEXT: movl %esp, -28(%ebp)
28+
; CHECK-NEXT: movl $123, (%ecx)
29+
; CHECK-NEXT: calll _bar
30+
; CHECK-NEXT: movl $0, -16(%ebp)
31+
; CHECK-NEXT: calll _alwaysthrows
32+
; CHECK-NEXT: # %bb.3: # %unreachable.i
33+
; CHECK-NEXT: LBB0_2: # Block address taken
34+
; CHECK-NEXT: # %catch.i
35+
; CHECK-NEXT: addl $12, %ebp
36+
; CHECK-NEXT: jmp LBB0_4
37+
; CHECK-NEXT: LBB0_4: # %exit
38+
; CHECK-NEXT: $ehgcr_0_4:
39+
; CHECK-NEXT: movl -24(%ebp), %eax
40+
; CHECK-NEXT: movl %eax, %fs:0
41+
; CHECK-NEXT: xorl %eax, %eax
42+
; CHECK-NEXT: leal -12(%ebp), %esp
43+
; CHECK-NEXT: popl %esi
44+
; CHECK-NEXT: popl %edi
45+
; CHECK-NEXT: popl %ebx
46+
; CHECK-NEXT: popl %ebp
47+
; CHECK-NEXT: retl
48+
; CHECK-NEXT: .def "?catch$1@?0?foo@4HA";
49+
; CHECK-NEXT: .scl 3;
50+
; CHECK-NEXT: .type 32;
51+
; CHECK-NEXT: .endef
52+
; CHECK-NEXT: .p2align 4
53+
; CHECK-NEXT: "?catch$1@?0?foo@4HA":
54+
; CHECK-NEXT: LBB0_1: # %catch.i
55+
; CHECK-NEXT: pushl %ebp
56+
; CHECK-NEXT: addl $12, %ebp
57+
; CHECK-NEXT: movl %esp, -28(%ebp)
58+
; CHECK-NEXT: movl $LBB0_2, %eax
59+
; CHECK-NEXT: popl %ebp
60+
; CHECK-NEXT: retl # CATCHRET
61+
; CHECK-NEXT: Lfunc_end0:
62+
entry:
63+
%argmem = alloca inalloca <{ %struct.Foo }>, align 4
64+
store i32 123, ptr %argmem, align 4
65+
call x86_thiscallcc void @bar(ptr noundef nonnull align 4 dereferenceable(8) %argmem)
66+
invoke void @alwaysthrows() #1
67+
to label %unreachable.i unwind label %catch.dispatch.i
68+
69+
catch.dispatch.i: ; preds = %entry
70+
%3 = catchswitch within none [label %catch.i] unwind to caller
71+
72+
catch.i: ; preds = %catch.dispatch.i
73+
%4 = catchpad within %3 [ptr null, i32 64, ptr null]
74+
catchret from %4 to label %exit
75+
76+
unreachable.i: ; preds = %entry
77+
unreachable
78+
79+
exit: ; preds = %catch.i
80+
ret i32 0
81+
}
82+
83+
declare dso_local x86_thiscallcc void @bar(ptr noundef nonnull align 4 dereferenceable(8) %this) local_unnamed_addr
84+
85+
declare dso_local i32 @__CxxFrameHandler3(...)
86+
87+
declare dso_local void @alwaysthrows() local_unnamed_addr
88+
89+
attributes #0 = { norecurse "min-legal-vector-width"="0" "target-cpu"="pentium4" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
90+
attributes #1 = { noreturn }

0 commit comments

Comments
 (0)