|
| 1 | +; RUN: %opt < %s %loadEnzyme -enzyme -enzyme-preopt=false -mem2reg -instsimplify -simplifycfg -correlated-propagation -adce -S | FileCheck %s |
| 2 | +source_filename = "text" |
| 3 | +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" |
| 4 | +target triple = "x86_64-pc-linux-gnu" |
| 5 | + |
| 6 | +; Function Attrs: cold noreturn nounwind |
| 7 | +declare void @llvm.trap() #0 |
| 8 | + |
| 9 | +declare dso_local i32 @MPI_Comm_rank(i64, i64) |
| 10 | + |
| 11 | +define double @sum(double* %arg, i64 %comm) { |
| 12 | +bb: |
| 13 | + %alloc = alloca i32, align 8 |
| 14 | + %i5 = ptrtoint i32* %alloc to i64 |
| 15 | + br label %bb11 |
| 16 | + |
| 17 | +bb11: ; preds = %bb |
| 18 | + %idx = phi i64 [ 0, %bb ], [ %inc, %bb22 ] |
| 19 | + %sum = phi double [ 0.000000e+00, %bb ], [ %add, %bb22 ] |
| 20 | + %inc = add i64 %idx, 1 |
| 21 | + %i13 = getelementptr inbounds double, double* %arg, i64 %idx |
| 22 | + %i14 = load double, double* %i13, align 8 |
| 23 | + %i16 = fmul double %i14, %i14 |
| 24 | + %i19 = call i32 @MPI_Comm_rank(i64 %comm, i64 %i5) |
| 25 | + %ld = load i32, i32* %alloc |
| 26 | + %cf = uitofp i32 %ld to double |
| 27 | + %mm = fmul double %i16, %cf |
| 28 | + %add = fadd double %sum, %mm |
| 29 | + %i20 = icmp eq i32 %i19, 0 |
| 30 | + br i1 %i20, label %bb22, label %bb21 |
| 31 | + |
| 32 | +bb21: ; preds = %bb11, %bb |
| 33 | + call void @llvm.trap() #1 |
| 34 | + unreachable |
| 35 | + |
| 36 | +bb22: |
| 37 | + %cmp = icmp eq i64 %idx, 9 |
| 38 | + br i1 %cmp, label %exit, label %bb11 |
| 39 | + |
| 40 | +exit: |
| 41 | + ret double %add |
| 42 | +} |
| 43 | + |
| 44 | +define void @dsum(double* %x, double* %xp, i64 %n) { |
| 45 | +entry: |
| 46 | + %0 = tail call double (double (double*, i64)*, ...) @__enzyme_autodiff(double (double*, i64)* nonnull @sum, double* %x, double* %xp, i64 %n) |
| 47 | + ret void |
| 48 | +} |
| 49 | + |
| 50 | +declare double @__enzyme_autodiff(double (double*, i64)*, ...) |
| 51 | + |
| 52 | +attributes #0 = { cold noreturn nounwind } |
| 53 | +attributes #1 = { noreturn } |
| 54 | + |
| 55 | +; CHECK: define internal void @diffesum(double* %arg, double* %"arg'", i64 %comm, double %differeturn) |
| 56 | +; CHECK-NEXT: bb: |
| 57 | +; CHECK-NEXT: %0 = alloca i32 |
| 58 | +; CHECK-NEXT: %1 = alloca i32 |
| 59 | +; CHECK-NEXT: br label %bb11 |
| 60 | + |
| 61 | +; CHECK: bb11: ; preds = %bb11, %bb |
| 62 | +; CHECK-NEXT: %iv = phi i64 [ %iv.next, %bb11 ], [ 0, %bb ] |
| 63 | +; CHECK-NEXT: %iv.next = add nuw nsw i64 %iv, 1 |
| 64 | +; CHECK-NEXT: %2 = bitcast i32* %1 to i8* |
| 65 | +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %2) |
| 66 | +; CHECK-NEXT: %3 = ptrtoint i32* %1 to i64 |
| 67 | +; CHECK-NEXT: %4 = call i32 @MPI_Comm_rank(i64 %comm, i64 %3) |
| 68 | +; CHECK-NEXT: %5 = bitcast i32* %1 to i8* |
| 69 | +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %5) |
| 70 | +; CHECK-NEXT: %cmp = icmp eq i64 %iv, 9 |
| 71 | +; CHECK-NEXT: br i1 %cmp, label %invertbb22, label %bb11 |
| 72 | + |
| 73 | +; CHECK: invertbb: ; preds = %invertbb22 |
| 74 | +; CHECK-NEXT: ret void |
| 75 | + |
| 76 | +; CHECK: incinvertbb11: ; preds = %invertbb22 |
| 77 | +; CHECK-NEXT: %6 = add nsw i64 %"iv'ac.0", -1 |
| 78 | +; CHECK-NEXT: br label %invertbb22 |
| 79 | + |
| 80 | +; CHECK: invertbb22: ; preds = %bb11, %incinvertbb11 |
| 81 | +; CHECK-NEXT: %"iv'ac.0" = phi i64 [ %6, %incinvertbb11 ], [ 9, %bb11 ] |
| 82 | +; CHECK-NEXT: %7 = bitcast i32* %0 to i8* |
| 83 | +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %7) |
| 84 | +; CHECK-NEXT: %8 = ptrtoint i32* %0 to i64 |
| 85 | +; CHECK-NEXT: %9 = call i32 @MPI_Comm_rank(i64 %comm, i64 %8) |
| 86 | +; CHECK-NEXT: %10 = load i32, i32* %0 |
| 87 | +; CHECK-NEXT: %11 = bitcast i32* %0 to i8* |
| 88 | +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %11) |
| 89 | +; CHECK-NEXT: %cf_unwrap = uitofp i32 %10 to double |
| 90 | +; CHECK-NEXT: %m0diffei16 = fmul fast double %differeturn, %cf_unwrap |
| 91 | +; CHECK-NEXT: %i13_unwrap = getelementptr inbounds double, double* %arg, i64 %"iv'ac.0" |
| 92 | +; CHECK-NEXT: %i14_unwrap = load double, double* %i13_unwrap, align 8, !invariant.group !0 |
| 93 | +; CHECK-NEXT: %m0diffei14 = fmul fast double %m0diffei16, %i14_unwrap |
| 94 | +; CHECK-NEXT: %m1diffei14 = fmul fast double %m0diffei16, %i14_unwrap |
| 95 | +; CHECK-NEXT: %12 = fadd fast double %m0diffei14, %m1diffei14 |
| 96 | +; CHECK-NEXT: %"i13'ipg_unwrap" = getelementptr inbounds double, double* %"arg'", i64 %"iv'ac.0" |
| 97 | +; CHECK-NEXT: %13 = load double, double* %"i13'ipg_unwrap", align 8 |
| 98 | +; CHECK-NEXT: %14 = fadd fast double %13, %12 |
| 99 | +; CHECK-NEXT: store double %14, double* %"i13'ipg_unwrap", align 8 |
| 100 | +; CHECK-NEXT: %15 = icmp eq i64 %"iv'ac.0", 0 |
| 101 | +; CHECK-NEXT: br i1 %15, label %invertbb, label %incinvertbb11 |
| 102 | +; CHECK-NEXT: } |
0 commit comments