[PhaseOrdering] Add test for incorrect merge function scheduling

nikic · nikic · commit 5b94037a304e · 2021-12-04T10:12:04.000+01:00
Add an -enable-merge-functions option to allow testing of function
merging as it will actually happen in the optimization pipeline.
Based on that add a test where we currently produce two identical
functions without merging them due to incorrect pass scheduling
under the new pass manager.
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -178,6 +178,10 @@ static cl::opt<bool> EnableNoRerunSimplificationPipeline(
         "than once in the case that SCC mutations cause a function to be "
         "visited multiple times as long as the function has not been changed"));
 
+static cl::opt<bool> EnableMergeFunctions(
+    "enable-merge-functions", cl::init(false), cl::Hidden,
+    cl::desc("Enable function merging as part of the optimization pipeline"));
+
 PipelineTuningOptions::PipelineTuningOptions() {
   LoopInterleaving = true;
   LoopVectorization = true;
@@ -187,7 +191,7 @@ PipelineTuningOptions::PipelineTuningOptions() {
   LicmMssaOptCap = SetLicmMssaOptCap;
   LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap;
   CallGraphProfile = true;
-  MergeFunctions = false;
+  MergeFunctions = EnableMergeFunctions;
   EagerlyInvalidateAnalyses = EnableEagerlyInvalidateAnalyses;
 }
 
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/merge-functions.ll b/llvm/test/Transforms/PhaseOrdering/X86/merge-functions.ll
@@ -0,0 +1,126 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes='default<O3>' -enable-merge-functions -S < %s | FileCheck %s
+
+; TODO: These two functions should get merged, but currently aren't, because
+; the function merging pass is scheduled too early.
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i1 @test1(i32 %c) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SWITCH_TABLEIDX:%.*]] = add i32 [[C:%.*]], -100
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[SWITCH_TABLEIDX]], 20
+; CHECK-NEXT:    [[SWITCH_CAST:%.*]] = trunc i32 [[SWITCH_TABLEIDX]] to i20
+; CHECK-NEXT:    [[SWITCH_DOWNSHIFT:%.*]] = lshr i20 -490991, [[SWITCH_CAST]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i20 [[SWITCH_DOWNSHIFT]], 1
+; CHECK-NEXT:    [[SWITCH_MASKED:%.*]] = icmp ne i20 [[TMP1]], 0
+; CHECK-NEXT:    [[I_0:%.*]] = select i1 [[TMP0]], i1 [[SWITCH_MASKED]], i1 false
+; CHECK-NEXT:    ret i1 [[I_0]]
+;
+entry:
+  %_4 = alloca i8, align 1
+  %_3 = alloca i8, align 1
+  %_2 = alloca i8, align 1
+  %i = alloca i8, align 1
+  %i1 = icmp eq i32 %c, 115
+  br i1 %i1, label %bb10, label %bb11
+
+bb10:                                             ; preds = %entry
+  store i8 1, i8* %_4, align 1
+  br label %bb12
+
+bb11:                                             ; preds = %entry
+  %_6 = icmp eq i32 %c, 109
+  %i2 = zext i1 %_6 to i8
+  store i8 %i2, i8* %_4, align 1
+  br label %bb12
+
+bb12:                                             ; preds = %bb11, %bb10
+  %i3 = load i8, i8* %_4, align 1
+  %i4 = trunc i8 %i3 to i1
+  br i1 %i4, label %bb7, label %bb8
+
+bb8:                                              ; preds = %bb12
+  %_8 = icmp eq i32 %c, 104
+  %i5 = zext i1 %_8 to i8
+  store i8 %i5, i8* %_3, align 1
+  br label %bb9
+
+bb7:                                              ; preds = %bb12
+  store i8 1, i8* %_3, align 1
+  br label %bb9
+
+bb9:                                              ; preds = %bb7, %bb8
+  %i6 = load i8, i8* %_3, align 1
+  %i7 = trunc i8 %i6 to i1
+  br i1 %i7, label %bb4, label %bb5
+
+bb5:                                              ; preds = %bb9
+  %_10 = icmp eq i32 %c, 100
+  %i8 = zext i1 %_10 to i8
+  store i8 %i8, i8* %_2, align 1
+  br label %bb6
+
+bb4:                                              ; preds = %bb9
+  store i8 1, i8* %_2, align 1
+  br label %bb6
+
+bb6:                                              ; preds = %bb4, %bb5
+  %i9 = load i8, i8* %_2, align 1
+  %i10 = trunc i8 %i9 to i1
+  br i1 %i10, label %bb1, label %bb2
+
+bb2:                                              ; preds = %bb6
+  %_12 = icmp eq i32 %c, 119
+  %i11 = zext i1 %_12 to i8
+  store i8 %i11, i8* %i, align 1
+  br label %bb3
+
+bb1:                                              ; preds = %bb6
+  store i8 1, i8* %i, align 1
+  br label %bb3
+
+bb3:                                              ; preds = %bb1, %bb2
+  %i12 = load i8, i8* %i, align 1
+  %i13 = trunc i8 %i12 to i1
+  ret i1 %i13
+}
+
+define i1 @test2(i32 %c) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SWITCH_TABLEIDX:%.*]] = add i32 [[C:%.*]], -100
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[SWITCH_TABLEIDX]], 20
+; CHECK-NEXT:    [[SWITCH_CAST:%.*]] = trunc i32 [[SWITCH_TABLEIDX]] to i20
+; CHECK-NEXT:    [[SWITCH_DOWNSHIFT:%.*]] = lshr i20 -490991, [[SWITCH_CAST]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i20 [[SWITCH_DOWNSHIFT]], 1
+; CHECK-NEXT:    [[SWITCH_MASKED:%.*]] = icmp ne i20 [[TMP1]], 0
+; CHECK-NEXT:    [[I_0:%.*]] = select i1 [[TMP0]], i1 [[SWITCH_MASKED]], i1 false
+; CHECK-NEXT:    ret i1 [[I_0]]
+;
+entry:
+  %i = alloca i8, align 1
+  switch i32 %c, label %bb1 [
+  i32 115, label %bb2
+  i32 109, label %bb2
+  i32 104, label %bb2
+  i32 100, label %bb2
+  i32 119, label %bb2
+  ]
+
+bb1:                                              ; preds = %entry
+  store i8 0, i8* %i, align 1
+  br label %bb3
+
+bb2:                                              ; preds = %entry, %entry, %entry, %entry, %entry
+  store i8 1, i8* %i, align 1
+  br label %bb3
+
+bb3:                                              ; preds = %bb2, %bb1
+  %i1 = load i8, i8* %i, align 1
+  %i2 = trunc i8 %i1 to i1
+  ret i1 %i2
+}
+