llvm
diff --git a/‎llvm/include/llvm/Analysis/ScalarFuncs.def
Lines changed: 117 additions & 0 deletions b/‎llvm/include/llvm/Analysis/ScalarFuncs.def
Lines changed: 117 additions & 0 deletions
diff --git a/‎llvm/include/llvm/CodeGen/CommandFlags.h
Lines changed: 2 additions & 0 deletions b/‎llvm/include/llvm/CodeGen/CommandFlags.h
Lines changed: 2 additions & 0 deletions
diff --git a/‎llvm/include/llvm/IR/Attributes.td
Lines changed: 2 additions & 0 deletions b/‎llvm/include/llvm/IR/Attributes.td
Lines changed: 2 additions & 0 deletions
diff --git a/‎llvm/lib/CodeGen/CommandFlags.cpp
Lines changed: 9 additions & 0 deletions b/‎llvm/lib/CodeGen/CommandFlags.cpp
Lines changed: 9 additions & 0 deletions
diff --git a/‎llvm/lib/Target/PowerPC/CMakeLists.txt
Lines changed: 1 addition & 0 deletions b/‎llvm/lib/Target/PowerPC/CMakeLists.txt
Lines changed: 1 addition & 0 deletions
diff --git a/‎llvm/lib/Target/PowerPC/PPC.h
Lines changed: 4 additions & 0 deletions b/‎llvm/lib/Target/PowerPC/PPC.h
Lines changed: 4 additions & 0 deletions
diff --git a/‎llvm/lib/Target/PowerPC/PPCGenScalarMASSEntries.cpp
Lines changed: 141 additions & 0 deletions b/‎llvm/lib/Target/PowerPC/PPCGenScalarMASSEntries.cpp
Lines changed: 141 additions & 0 deletions
@@ -0,0 +1,117 @@
+//===-- ScalarFuncs.def - Library information ----------*- C++ -*----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// This .def file creates mapping from standard IEEE math functions
+// their corresponding entries in the IBM MASS (scalar) library.
+// LLVM intrinsic math functions will be handled in PPCISelLowing to
+// allow existing optimizations like pow(x,0.5) --> sqrt(x).
+
+#if defined(TLI_DEFINE_SCALAR_MASS_FUNCS)
+#define TLI_DEFINE_SCALAR_MASS_FUNC(SCAL, MASSENTRY) {SCAL, MASSENTRY},
+#endif
+
+TLI_DEFINE_SCALAR_MASS_FUNC("acosf", "__xl_acosf")
+TLI_DEFINE_SCALAR_MASS_FUNC("__acosf_finite", "__xl_acosf")
+TLI_DEFINE_SCALAR_MASS_FUNC("acos", "__xl_acos")
+TLI_DEFINE_SCALAR_MASS_FUNC("__acos_finite", "__xl_acos")
+
+TLI_DEFINE_SCALAR_MASS_FUNC("acoshf", "__xl_acoshf")
+TLI_DEFINE_SCALAR_MASS_FUNC("__acoshf_finite", "__xl_acoshf")
+TLI_DEFINE_SCALAR_MASS_FUNC("acosh", "__xl_acosh")
+TLI_DEFINE_SCALAR_MASS_FUNC("__acosh_finite", "__xl_acosh")
+
+TLI_DEFINE_SCALAR_MASS_FUNC("asinf", "__xl_asinf")
+TLI_DEFINE_SCALAR_MASS_FUNC("__asinf_finite", "__xl_asinf")
+TLI_DEFINE_SCALAR_MASS_FUNC("asin", "__xl_asin")
+TLI_DEFINE_SCALAR_MASS_FUNC("__asin_finite", "__xl_asin")
+
+TLI_DEFINE_SCALAR_MASS_FUNC("asinhf", "__xl_asinhf")
+TLI_DEFINE_SCALAR_MASS_FUNC("asinh", "__xl_asinh")
+
+TLI_DEFINE_SCALAR_MASS_FUNC("atanf", "__xl_atanf")
+TLI_DEFINE_SCALAR_MASS_FUNC("atan", "__xl_atan")
+
+TLI_DEFINE_SCALAR_MASS_FUNC("atan2f", "__xl_atan2f")
+TLI_DEFINE_SCALAR_MASS_FUNC("__atan2f_finite", "__xl_atan2f")
+TLI_DEFINE_SCALAR_MASS_FUNC("atan2", "__xl_atan2")
+TLI_DEFINE_SCALAR_MASS_FUNC("__atan2_finite", "__xl_atan2")
+
+TLI_DEFINE_SCALAR_MASS_FUNC("atanhf", "__xl_atanhf")
+TLI_DEFINE_SCALAR_MASS_FUNC("__atanhf_finite", "__xl_atanhf")
+TLI_DEFINE_SCALAR_MASS_FUNC("atanh", "__xl_atanh")
+TLI_DEFINE_SCALAR_MASS_FUNC("__atanh_finite", "__xl_atanh")
+
+TLI_DEFINE_SCALAR_MASS_FUNC("cbrtf", "__xl_cbrtf")
+TLI_DEFINE_SCALAR_MASS_FUNC("cbrt", "__xl_cbrt")
+
+TLI_DEFINE_SCALAR_MASS_FUNC("cosf", "__xl_cosf")
+TLI_DEFINE_SCALAR_MASS_FUNC("cos", "__xl_cos")
+
+TLI_DEFINE_SCALAR_MASS_FUNC("coshf", "__xl_coshf")
+TLI_DEFINE_SCALAR_MASS_FUNC("__coshf_finite", "__xl_coshf")
+TLI_DEFINE_SCALAR_MASS_FUNC("cosh", "__xl_cosh")
+TLI_DEFINE_SCALAR_MASS_FUNC("__cosh_finite", "__xl_cosh")
+
+TLI_DEFINE_SCALAR_MASS_FUNC("erff", "__xl_erff")
+TLI_DEFINE_SCALAR_MASS_FUNC("erf", "__xl_erf")
+
+TLI_DEFINE_SCALAR_MASS_FUNC("erfcf", "__xl_erfcf")
+TLI_DEFINE_SCALAR_MASS_FUNC("erfc", "__xl_erfc")
+
+TLI_DEFINE_SCALAR_MASS_FUNC("expf", "__xl_expf")
+TLI_DEFINE_SCALAR_MASS_FUNC("__expf_finite", "__xl_expf")
+TLI_DEFINE_SCALAR_MASS_FUNC("exp", "__xl_exp")
+TLI_DEFINE_SCALAR_MASS_FUNC("__exp_finite", "__xl_exp")
+
+TLI_DEFINE_SCALAR_MASS_FUNC("expm1f", "__xl_expm1f")
+TLI_DEFINE_SCALAR_MASS_FUNC("expm1", "__xl_expm1")
+
+TLI_DEFINE_SCALAR_MASS_FUNC("hypotf", "__xl_hypotf")
+TLI_DEFINE_SCALAR_MASS_FUNC("hypot", "__xl_hypot")
+
+TLI_DEFINE_SCALAR_MASS_FUNC("lgammaf", "__xl_lgammaf")
+TLI_DEFINE_SCALAR_MASS_FUNC("lgamma", "__xl_lgamma")
+
+TLI_DEFINE_SCALAR_MASS_FUNC("logf", "__xl_logf")
+TLI_DEFINE_SCALAR_MASS_FUNC("__logf_finite", "__xl_logf")
+TLI_DEFINE_SCALAR_MASS_FUNC("log", "__xl_log")
+TLI_DEFINE_SCALAR_MASS_FUNC("__log_finite", "__xl_log")
+
+TLI_DEFINE_SCALAR_MASS_FUNC("log10f", "__xl_log10f")
+TLI_DEFINE_SCALAR_MASS_FUNC("__log10f_finite", "__xl_log10f")
+TLI_DEFINE_SCALAR_MASS_FUNC("log10", "__xl_log10")
+TLI_DEFINE_SCALAR_MASS_FUNC("__log10_finite", "__xl_log10")
+
+TLI_DEFINE_SCALAR_MASS_FUNC("log1pf", "__xl_log1pf")
+TLI_DEFINE_SCALAR_MASS_FUNC("log1p", "__xl_log1p")
+
+TLI_DEFINE_SCALAR_MASS_FUNC("powf", "__xl_powf")
+TLI_DEFINE_SCALAR_MASS_FUNC("__powf_finite", "__xl_powf")
+TLI_DEFINE_SCALAR_MASS_FUNC("pow", "__xl_pow")
+TLI_DEFINE_SCALAR_MASS_FUNC("__pow_finite", "__xl_pow")
+
+TLI_DEFINE_SCALAR_MASS_FUNC("rsqrt", "__xl_rsqrt")
+
+TLI_DEFINE_SCALAR_MASS_FUNC("sinf", "__xl_sinf")
+TLI_DEFINE_SCALAR_MASS_FUNC("sin", "__xl_sin")
+
+TLI_DEFINE_SCALAR_MASS_FUNC("sinhf", "__xl_sinhf")
+TLI_DEFINE_SCALAR_MASS_FUNC("__sinhf_finite", "__xl_sinhf")
+TLI_DEFINE_SCALAR_MASS_FUNC("sinh", "__xl_sinh")
+TLI_DEFINE_SCALAR_MASS_FUNC("__sinh_finite", "__xl_sinh")
+
+TLI_DEFINE_SCALAR_MASS_FUNC("sqrt", "__xl_sqrt")
+
+TLI_DEFINE_SCALAR_MASS_FUNC("tanf", "__xl_tanf")
+TLI_DEFINE_SCALAR_MASS_FUNC("tan", "__xl_tan")
+
+TLI_DEFINE_SCALAR_MASS_FUNC("tanhf", "__xl_tanhf")
+TLI_DEFINE_SCALAR_MASS_FUNC("tanh", "__xl_tanh")
+
+#undef TLI_DEFINE_SCALAR_MASS_FUNCS
+#undef TLI_DEFINE_SCALAR_MASS_FUNC
@@ -62,6 +62,8 @@ bool getEnableNoNaNsFPMath();
 
 bool getEnableNoSignedZerosFPMath();
 
+bool getEnableApproxFuncFPMath();
+
 bool getEnableNoTrappingFPMath();
 
 DenormalMode::DenormalModeKind getDenormalFPMath();
 
@@ -294,6 +294,7 @@ def MustProgress : EnumAttr<"mustprogress", [FnAttr]>;
 def LessPreciseFPMAD : StrBoolAttr<"less-precise-fpmad">;
 def NoInfsFPMath : StrBoolAttr<"no-infs-fp-math">;
 def NoNansFPMath : StrBoolAttr<"no-nans-fp-math">;
+def ApproxFuncFPMath : StrBoolAttr<"approx-func-fp-math">;
 def NoSignedZerosFPMath : StrBoolAttr<"no-signed-zeros-fp-math">;
 def UnsafeFPMath : StrBoolAttr<"unsafe-fp-math">;
 def NoJumpTables : StrBoolAttr<"no-jump-tables">;
@@ -333,6 +334,7 @@ class MergeRule<string F> {
 def : MergeRule<"setAND<LessPreciseFPMADAttr>">;
 def : MergeRule<"setAND<NoInfsFPMathAttr>">;
 def : MergeRule<"setAND<NoNansFPMathAttr>">;
+def : MergeRule<"setAND<ApproxFuncFPMathAttr>">;
 def : MergeRule<"setAND<NoSignedZerosFPMathAttr>">;
 def : MergeRule<"setAND<UnsafeFPMathAttr>">;
 def : MergeRule<"setOR<NoImplicitFloatAttr>">;
 
@@ -58,6 +58,7 @@ CGOPT(bool, EnableUnsafeFPMath)
 CGOPT(bool, EnableNoInfsFPMath)
 CGOPT(bool, EnableNoNaNsFPMath)
 CGOPT(bool, EnableNoSignedZerosFPMath)
+CGOPT(bool, EnableApproxFuncFPMath)
 CGOPT(bool, EnableNoTrappingFPMath)
 CGOPT(bool, EnableAIXExtendedAltivecABI)
 CGOPT(DenormalMode::DenormalModeKind, DenormalFPMath)
@@ -218,6 +219,12 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() {
       cl::init(false));
   CGBINDOPT(EnableNoSignedZerosFPMath);
 
+  static cl::opt<bool> EnableApproxFuncFPMath(
+      "enable-approx-func-fp-math",
+      cl::desc("Enable FP math optimizations that assume approx func"),
+      cl::init(false));
+  CGBINDOPT(EnableApproxFuncFPMath);
+
   static cl::opt<bool> EnableNoTrappingFPMath(
       "enable-no-trapping-fp-math",
       cl::desc("Enable setting the FP exceptions build "
@@ -493,6 +500,7 @@ codegen::InitTargetOptionsFromCodeGenFlags(const Triple &TheTriple) {
   Options.NoInfsFPMath = getEnableNoInfsFPMath();
   Options.NoNaNsFPMath = getEnableNoNaNsFPMath();
   Options.NoSignedZerosFPMath = getEnableNoSignedZerosFPMath();
+  Options.ApproxFuncFPMath = getEnableApproxFuncFPMath();
   Options.NoTrappingFPMath = getEnableNoTrappingFPMath();
 
   DenormalMode::DenormalModeKind DenormKind = getDenormalFPMath();
@@ -643,6 +651,7 @@ void codegen::setFunctionAttributes(StringRef CPU, StringRef Features,
   HANDLE_BOOL_ATTR(EnableNoInfsFPMathView, "no-infs-fp-math");
   HANDLE_BOOL_ATTR(EnableNoNaNsFPMathView, "no-nans-fp-math");
   HANDLE_BOOL_ATTR(EnableNoSignedZerosFPMathView, "no-signed-zeros-fp-math");
+  HANDLE_BOOL_ATTR(EnableApproxFuncFPMathView, "approx-func-fp-math");
 
   if (DenormalFPMathView->getNumOccurrences() > 0 &&
       !F.hasFnAttribute("denormal-fp-math")) {
 
@@ -55,6 +55,7 @@ add_llvm_target(PowerPCCodeGen
   PPCExpandISEL.cpp
   PPCPreEmitPeephole.cpp
   PPCLowerMASSVEntries.cpp
+  PPCGenScalarMASSEntries.cpp
   GISel/PPCCallLowering.cpp
   GISel/PPCRegisterBankInfo.cpp
   GISel/PPCLegalizerInfo.cpp
 
@@ -84,6 +84,10 @@ FunctionPass *createPPCCTRLoops();
   void initializePPCLowerMASSVEntriesPass(PassRegistry &);
   extern char &PPCLowerMASSVEntriesID;
 
+  ModulePass *createPPCGenScalarMASSEntriesPass();
+  void initializePPCGenScalarMASSEntriesPass(PassRegistry &);
+  extern char &PPCGenScalarMASSEntriesID;
+
   InstructionSelector *
   createPPCInstructionSelector(const PPCTargetMachine &, const PPCSubtarget &,
                                const PPCRegisterBankInfo &);
 
@@ -0,0 +1,141 @@
+//===-- PPCGenScalarMASSEntries.cpp ---------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This transformation converts standard math functions into their
+// corresponding MASS (scalar) entries for PowerPC targets.
+// Following are examples of such conversion:
+//     tanh ---> __xl_tanh_finite
+// Such lowering is legal under the fast-math option.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPC.h"
+#include "PPCSubtarget.h"
+#include "PPCTargetMachine.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+
+#define DEBUG_TYPE "ppc-gen-scalar-mass"
+
+using namespace llvm;
+
+namespace {
+
+class PPCGenScalarMASSEntries : public ModulePass {
+public:
+  static char ID;
+
+  PPCGenScalarMASSEntries() : ModulePass(ID) {
+    ScalarMASSFuncs = {
+#define TLI_DEFINE_SCALAR_MASS_FUNCS
+#include "llvm/Analysis/ScalarFuncs.def"
+    };
+  }
+
+  bool runOnModule(Module &M) override;
+
+  StringRef getPassName() const override {
+    return "PPC Generate Scalar MASS Entries";
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<TargetTransformInfoWrapperPass>();
+  }
+
+private:
+  std::map<StringRef, StringRef> ScalarMASSFuncs;
+  bool isCandidateSafeToLower(const CallInst &CI) const;
+  bool isFiniteCallSafe(const CallInst &CI) const;
+  bool createScalarMASSCall(StringRef MASSEntry, CallInst &CI,
+                            Function &Func) const;
+};
+
+} // namespace
+
+// Returns true if 'afn' flag exists on the call instruction with the math
+// function
+bool PPCGenScalarMASSEntries::isCandidateSafeToLower(const CallInst &CI) const {
+  return CI.hasApproxFunc();
+}
+
+// Returns true if 'nnan', 'ninf' and 'nsz' flags exist on the call instruction
+// with the math function
+bool PPCGenScalarMASSEntries::isFiniteCallSafe(const CallInst &CI) const {
+  // FIXME: no-errno and trapping-math need to be set for MASS converstion
+  // but they don't have IR representation.
+  return CI.hasNoNaNs() && CI.hasNoInfs() && CI.hasNoSignedZeros();
+}
+
+/// Lowers scalar math functions to scalar MASS functions.
+///     e.g.: tanh         --> __xl_tanh_finite or __xl_tanh
+/// Both function prototype and its callsite is updated during lowering.
+bool PPCGenScalarMASSEntries::createScalarMASSCall(StringRef MASSEntry,
+                                                   CallInst &CI,
+                                                   Function &Func) const {
+  if (CI.use_empty())
+    return false;
+
+  Module *M = Func.getParent();
+  assert(M && "Expecting a valid Module");
+
+  std::string MASSEntryStr = MASSEntry.str();
+  if (isFiniteCallSafe(CI))
+    MASSEntryStr += "_finite";
+
+  FunctionCallee FCache = M->getOrInsertFunction(
+      MASSEntryStr, Func.getFunctionType(), Func.getAttributes());
+
+  CI.setCalledFunction(FCache);
+
+  return true;
+}
+
+bool PPCGenScalarMASSEntries::runOnModule(Module &M) {
+  bool Changed = false;
+
+  auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
+  if (!TPC || skipModule(M))
+    return false;
+
+  for (Function &Func : M) {
+    if (!Func.isDeclaration())
+      continue;
+
+    auto Iter = ScalarMASSFuncs.find(Func.getName());
+    if (Iter == ScalarMASSFuncs.end())
+      continue;
+
+    // The call to createScalarMASSCall() invalidates the iterator over users
+    // upon replacing the users. Precomputing the current list of users allows
+    // us to replace all the call sites.
+    SmallVector<User *, 4> TheUsers;
+    for (auto *User : Func.users())
+      TheUsers.push_back(User);
+
+    for (auto *User : TheUsers)
+      if (auto *CI = dyn_cast_or_null<CallInst>(User)) {
+        if (isCandidateSafeToLower(*CI))
+          Changed |= createScalarMASSCall(Iter->second, *CI, Func);
+      }
+  }
+
+  return Changed;
+}
+
+char PPCGenScalarMASSEntries::ID = 0;
+
+char &llvm::PPCGenScalarMASSEntriesID = PPCGenScalarMASSEntries::ID;
+
+INITIALIZE_PASS(PPCGenScalarMASSEntries, DEBUG_TYPE,
+                "Generate Scalar MASS entries", false, false)
+
+ModulePass *llvm::createPPCGenScalarMASSEntriesPass() {
+  return new PPCGenScalarMASSEntries();
+}