[mlir] Lower math dialect later in gpu-lower-to-nvvm-pipeline (llvm#81489)

grypp · web-flow · commit d7f59c8fb83c · 2024-02-13T08:31:42.000+01:00
This PR moves lowering of math dialect later in the pipeline. Because math dialect is lowered correctly by createConvertGpuOpsToNVVMOps for GPU target, and it needs to run it first. Reland llvm#78556
diff --git a/mlir/lib/Dialect/GPU/Pipelines/GPUToNVVMPipeline.cpp b/mlir/lib/Dialect/GPU/Pipelines/GPUToNVVMPipeline.cpp
@@ -51,7 +51,6 @@ void buildCommonPassPipeline(
   pm.addPass(createConvertVectorToSCFPass());
   pm.addPass(createConvertSCFToCFPass());
   pm.addPass(createConvertNVVMToLLVMPass());
-  pm.addPass(createConvertMathToLLVMPass());
   pm.addPass(createConvertFuncToLLVMPass());
   pm.addPass(memref::createExpandStridedMetadataPass());
 
@@ -98,6 +97,7 @@ void buildHostPostPipeline(OpPassManager &pm,
   GpuModuleToBinaryPassOptions gpuModuleToBinaryPassOptions;
   gpuModuleToBinaryPassOptions.compilationTarget = options.cubinFormat;
   pm.addPass(createGpuModuleToBinaryPass(gpuModuleToBinaryPassOptions));
+  pm.addPass(createConvertMathToLLVMPass());
   pm.addPass(createCanonicalizerPass());
   pm.addPass(createCSEPass());
   pm.addPass(createReconcileUnrealizedCastsPass());
diff --git a/mlir/test/Dialect/GPU/test-nvvm-pipeline.mlir b/mlir/test/Dialect/GPU/test-nvvm-pipeline.mlir
@@ -0,0 +1,30 @@
+// REQUIRES: host-supports-nvptx
+// RUN: mlir-opt %s \
+// RUN:  | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-format=isa" \
+// RUN:   | FileCheck %s
+
+// RUN: mlir-opt %s \
+// RUN:  | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-format=isa" \
+// RUN:    --mlir-print-ir-after=convert-gpu-to-nvvm 2>&1 \
+// RUN:  | FileCheck %s --check-prefixes=CHECK-NVVM
+
+// This test checks whether the GPU region is compiled correctly to PTX by 
+// pipeline. It doesn't test IR for GPU side, but it can test Host IR and 
+// generated PTX.
+
+// CHECK-LABEL: llvm.func @test_math(%arg0: f32) {
+func.func @test_math(%arg0 : f32) {
+    %c2 = arith.constant 2 : index
+    %c1 = arith.constant 1 : index
+    // CHECK: gpu.launch_func  @test_math_kernel::@test_math_kernel
+    // CHECK: gpu.binary @test_math_kernel  [#gpu.object<#nvvm.target
+    gpu.launch 
+        blocks(%0, %1, %2) in (%3 = %c1, %4 = %c1, %5 = %c1) 
+        threads(%6, %7, %8) in (%9 = %c2, %10 = %c1, %11 = %c1) { 
+        // CHECK-NVVM: __nv_expf 
+        %s1 = math.exp %arg0 : f32
+        gpu.printf "%f" %s1 : f32
+        gpu.terminator
+    }
+    return
+}