Skip to content

Commit d7f59c8

Browse files
authored
[mlir] Lower math dialect later in gpu-lower-to-nvvm-pipeline (llvm#81489)
This PR moves lowering of math dialect later in the pipeline. Because math dialect is lowered correctly by createConvertGpuOpsToNVVMOps for GPU target, and it needs to run it first. Reland llvm#78556
1 parent 785eddd commit d7f59c8

File tree

2 files changed

+31
-1
lines changed

2 files changed

+31
-1
lines changed

mlir/lib/Dialect/GPU/Pipelines/GPUToNVVMPipeline.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,6 @@ void buildCommonPassPipeline(
5151
pm.addPass(createConvertVectorToSCFPass());
5252
pm.addPass(createConvertSCFToCFPass());
5353
pm.addPass(createConvertNVVMToLLVMPass());
54-
pm.addPass(createConvertMathToLLVMPass());
5554
pm.addPass(createConvertFuncToLLVMPass());
5655
pm.addPass(memref::createExpandStridedMetadataPass());
5756

@@ -98,6 +97,7 @@ void buildHostPostPipeline(OpPassManager &pm,
9897
GpuModuleToBinaryPassOptions gpuModuleToBinaryPassOptions;
9998
gpuModuleToBinaryPassOptions.compilationTarget = options.cubinFormat;
10099
pm.addPass(createGpuModuleToBinaryPass(gpuModuleToBinaryPassOptions));
100+
pm.addPass(createConvertMathToLLVMPass());
101101
pm.addPass(createCanonicalizerPass());
102102
pm.addPass(createCSEPass());
103103
pm.addPass(createReconcileUnrealizedCastsPass());
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
// REQUIRES: host-supports-nvptx
2+
// RUN: mlir-opt %s \
3+
// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-format=isa" \
4+
// RUN: | FileCheck %s
5+
6+
// RUN: mlir-opt %s \
7+
// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-format=isa" \
8+
// RUN: --mlir-print-ir-after=convert-gpu-to-nvvm 2>&1 \
9+
// RUN: | FileCheck %s --check-prefixes=CHECK-NVVM
10+
11+
// This test checks whether the GPU region is compiled correctly to PTX by
12+
// pipeline. It doesn't test IR for GPU side, but it can test Host IR and
13+
// generated PTX.
14+
15+
// CHECK-LABEL: llvm.func @test_math(%arg0: f32) {
16+
func.func @test_math(%arg0 : f32) {
17+
%c2 = arith.constant 2 : index
18+
%c1 = arith.constant 1 : index
19+
// CHECK: gpu.launch_func @test_math_kernel::@test_math_kernel
20+
// CHECK: gpu.binary @test_math_kernel [#gpu.object<#nvvm.target
21+
gpu.launch
22+
blocks(%0, %1, %2) in (%3 = %c1, %4 = %c1, %5 = %c1)
23+
threads(%6, %7, %8) in (%9 = %c2, %10 = %c1, %11 = %c1) {
24+
// CHECK-NVVM: __nv_expf
25+
%s1 = math.exp %arg0 : f32
26+
gpu.printf "%f" %s1 : f32
27+
gpu.terminator
28+
}
29+
return
30+
}

0 commit comments

Comments
 (0)