|
| 1 | +;RUN: if [ %llvmver -ge 10 ]; then %clang %s -Xclang -load -Xclang %loadBC -mllvm -bcpath=%BClibdir -S -emit-llvm -o - | %FileCheck %s; fi |
| 2 | + |
| 3 | +;#include <cblas.h> |
| 4 | +;#include <stdio.h> |
| 5 | +; |
| 6 | +;extern double __enzyme_autodiff(void *, double *, double *, double *, |
| 7 | +; double *); |
| 8 | +; |
| 9 | +;double g(double *m, double *n) { |
| 10 | +; double x = cblas_ddot(3, m, 1, n, 1); |
| 11 | +; m[0] = 11.0; |
| 12 | +; m[1] = 12.0; |
| 13 | +; m[2] = 13.0; |
| 14 | +; double y = x * x; |
| 15 | +; return y; |
| 16 | +;} |
| 17 | +; |
| 18 | +;int main() { |
| 19 | +; double m[3] = {1, 2, 3}; |
| 20 | +; double m1[3] = {0, 0, 0}; |
| 21 | +; double n[3] = {4, 5, 6}; |
| 22 | +; double n1[3] = {0, 0, 0}; |
| 23 | +; double val = __enzyme_autodiff((void*)g, m, m1, n, n1); |
| 24 | +; return 1; |
| 25 | +;} |
| 26 | + |
| 27 | +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" |
| 28 | +target triple = "x86_64-unknown-linux-gnu" |
| 29 | + |
| 30 | +@__const.main.m = private unnamed_addr constant [3 x double] [double 1.000000e+00, double 2.000000e+00, double 3.000000e+00], align 16 |
| 31 | +@__const.main.n = private unnamed_addr constant [3 x double] [double 4.000000e+00, double 5.000000e+00, double 6.000000e+00], align 16 |
| 32 | + |
| 33 | +; Function Attrs: noinline nounwind optnone uwtable |
| 34 | +define dso_local double @g(double* %m, double* %n) { |
| 35 | +entry: |
| 36 | + %m.addr = alloca double*, align 8 |
| 37 | + %n.addr = alloca double*, align 8 |
| 38 | + %x = alloca double, align 8 |
| 39 | + %y = alloca double, align 8 |
| 40 | + store double* %m, double** %m.addr, align 8 |
| 41 | + store double* %n, double** %n.addr, align 8 |
| 42 | + %0 = load double*, double** %m.addr, align 8 |
| 43 | + %1 = load double*, double** %n.addr, align 8 |
| 44 | + %call = call double @cblas_ddot(i32 3, double* %0, i32 1, double* %1, i32 1) |
| 45 | + store double %call, double* %x, align 8 |
| 46 | + %2 = load double*, double** %m.addr, align 8 |
| 47 | + %arrayidx = getelementptr inbounds double, double* %2, i64 0 |
| 48 | + store double 1.100000e+01, double* %arrayidx, align 8 |
| 49 | + %3 = load double*, double** %m.addr, align 8 |
| 50 | + %arrayidx1 = getelementptr inbounds double, double* %3, i64 1 |
| 51 | + store double 1.200000e+01, double* %arrayidx1, align 8 |
| 52 | + %4 = load double*, double** %m.addr, align 8 |
| 53 | + %arrayidx2 = getelementptr inbounds double, double* %4, i64 2 |
| 54 | + store double 1.300000e+01, double* %arrayidx2, align 8 |
| 55 | + %5 = load double, double* %x, align 8 |
| 56 | + %6 = load double, double* %x, align 8 |
| 57 | + %mul = fmul double %5, %6 |
| 58 | + store double %mul, double* %y, align 8 |
| 59 | + %7 = load double, double* %y, align 8 |
| 60 | + ret double %7 |
| 61 | +} |
| 62 | + |
| 63 | +declare dso_local double @cblas_ddot(i32, double*, i32, double*, i32) |
| 64 | + |
| 65 | +; Function Attrs: noinline nounwind optnone uwtable |
| 66 | +define dso_local i32 @main() { |
| 67 | +entry: |
| 68 | + %retval = alloca i32, align 4 |
| 69 | + %m = alloca [3 x double], align 16 |
| 70 | + %m1 = alloca [3 x double], align 16 |
| 71 | + %n = alloca [3 x double], align 16 |
| 72 | + %n1 = alloca [3 x double], align 16 |
| 73 | + %val = alloca double, align 8 |
| 74 | + store i32 0, i32* %retval, align 4 |
| 75 | + %0 = bitcast [3 x double]* %m to i8* |
| 76 | + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %0, i8* align 16 bitcast ([3 x double]* @__const.main.m to i8*), i64 24, i1 false) |
| 77 | + %1 = bitcast [3 x double]* %m1 to i8* |
| 78 | + call void @llvm.memset.p0i8.i64(i8* align 16 %1, i8 0, i64 24, i1 false) |
| 79 | + %2 = bitcast [3 x double]* %n to i8* |
| 80 | + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %2, i8* align 16 bitcast ([3 x double]* @__const.main.n to i8*), i64 24, i1 false) |
| 81 | + %3 = bitcast [3 x double]* %n1 to i8* |
| 82 | + call void @llvm.memset.p0i8.i64(i8* align 16 %3, i8 0, i64 24, i1 false) |
| 83 | + %arraydecay = getelementptr inbounds [3 x double], [3 x double]* %m, i32 0, i32 0 |
| 84 | + %arraydecay1 = getelementptr inbounds [3 x double], [3 x double]* %m1, i32 0, i32 0 |
| 85 | + %arraydecay2 = getelementptr inbounds [3 x double], [3 x double]* %n, i32 0, i32 0 |
| 86 | + %arraydecay3 = getelementptr inbounds [3 x double], [3 x double]* %n1, i32 0, i32 0 |
| 87 | + %call = call double @__enzyme_autodiff(i8* bitcast (double (double*, double*)* @g to i8*), double* %arraydecay, double* %arraydecay1, double* %arraydecay2, double* %arraydecay3) |
| 88 | + store double %call, double* %val, align 8 |
| 89 | + ret i32 1 |
| 90 | +} |
| 91 | + |
| 92 | +; Function Attrs: argmemonly nounwind |
| 93 | +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) |
| 94 | + |
| 95 | +; Function Attrs: argmemonly nounwind |
| 96 | +declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1) |
| 97 | + |
| 98 | +declare dso_local double @__enzyme_autodiff(i8*, double*, double*, double*, double*) |
| 99 | + |
| 100 | +;CHECK: define dso_local double @cblas_ddot(i32 %__N, double* %__X, i32 %__incX, double* %__Y, i32 %__incY) |
| 101 | +;CHECK-NEXT: entry: |
| 102 | +;CHECK-NEXT: %__N.addr = alloca i32, align 4 |
| 103 | +;CHECK-NEXT: %__X.addr = alloca double*, align 8 |
| 104 | +;CHECK-NEXT: %__incX.addr = alloca i32, align 4 |
| 105 | +;CHECK-NEXT: %__Y.addr = alloca double*, align 8 |
| 106 | +;CHECK-NEXT: %__incY.addr = alloca i32, align 4 |
| 107 | +;CHECK-NEXT: %sum = alloca double, align 8 |
| 108 | +;CHECK-NEXT: %i = alloca i32, align 4 |
| 109 | +;CHECK-NEXT: store i32 %__N, i32* %__N.addr, align 4 |
| 110 | +;CHECK-NEXT: store double* %__X, double** %__X.addr, align 8 |
| 111 | +;CHECK-NEXT: store i32 %__incX, i32* %__incX.addr, align 4 |
| 112 | +;CHECK-NEXT: store double* %__Y, double** %__Y.addr, align 8 |
| 113 | +;CHECK-NEXT: store i32 %__incY, i32* %__incY.addr, align 4 |
| 114 | +;CHECK-NEXT: store double 0.000000e+00, double* %sum, align 8 |
| 115 | +;CHECK-NEXT: store i32 0, i32* %i, align 4 |
| 116 | +;CHECK-NEXT: br label %for.cond |
| 117 | + |
| 118 | +;CHECK: for.cond: ; preds = %for.inc, %entry |
| 119 | +;CHECK-NEXT: %0 = load i32, i32* %i, align 4 |
| 120 | +;CHECK-NEXT: %1 = load i32, i32* %__N.addr, align 4 |
| 121 | +;CHECK-NEXT: %cmp = icmp slt i32 %0, %1 |
| 122 | +;CHECK-NEXT: br i1 %cmp, label %for.body, label %for.end |
| 123 | + |
| 124 | +;CHECK: for.body: ; preds = %for.cond |
| 125 | +;CHECK-NEXT: %2 = load double, double* %sum, align 8 |
| 126 | +;CHECK-NEXT: %3 = load double*, double** %__X.addr, align 8 |
| 127 | +;CHECK-NEXT: %4 = load i32, i32* %i, align 4 |
| 128 | +;CHECK-NEXT: %idxprom = sext i32 %4 to i64 |
| 129 | +;CHECK-NEXT: %arrayidx = getelementptr inbounds double, double* %3, i64 %idxprom |
| 130 | +;CHECK-NEXT: %5 = load double, double* %arrayidx, align 8 |
| 131 | +;CHECK-NEXT: %6 = load double*, double** %__Y.addr, align 8 |
| 132 | +;CHECK-NEXT: %7 = load i32, i32* %i, align 4 |
| 133 | +;CHECK-NEXT: %idxprom1 = sext i32 %7 to i64 |
| 134 | +;CHECK-NEXT: %arrayidx2 = getelementptr inbounds double, double* %6, i64 %idxprom1 |
| 135 | +;CHECK-NEXT: %8 = load double, double* %arrayidx2, align 8 |
| 136 | +;CHECK-NEXT: %mul = fmul double %5, %8 |
| 137 | +;CHECK-NEXT: %add = fadd double %2, %mul |
| 138 | +;CHECK-NEXT: store double %add, double* %sum, align 8 |
| 139 | +;CHECK-NEXT: br label %for.inc |
| 140 | + |
| 141 | +;CHECK: for.inc: ; preds = %for.body |
| 142 | +;CHECK-NEXT: %9 = load i32, i32* %i, align 4 |
| 143 | +;CHECK-NEXT: %inc = add nsw i32 %9, 1 |
| 144 | +;CHECK-NEXT: store i32 %inc, i32* %i, align 4 |
| 145 | +;CHECK-NEXT: br label %for.cond |
| 146 | + |
| 147 | +;CHECK: for.end: ; preds = %for.cond |
| 148 | +;CHECK-NEXT: %10 = load double, double* %sum, align 8 |
| 149 | +;CHECK-NEXT: ret double %10 |
| 150 | +;CHECK-NEXT: } |
0 commit comments