Skip to content

Commit cfe1adc

Browse files
authored
Reland: [DirectX] Add atan2 intrinsic and expand for DXIL backend (p1) (#109878)
This change is part of this proposal: https://discourse.llvm.org/t/rfc-all-the-math-intrinsics/78294 This preliminary work adds the intrinsic to llvm and expands using atan intrinsic for DXIL backend, since DXIL has no atan2 op. Part 1 for Implement the atan2 HLSL Function #70096. (reland #108865 reverted in #109842 due to doc build break)
1 parent 81dac7d commit cfe1adc

File tree

5 files changed

+188
-0
lines changed

5 files changed

+188
-0
lines changed

llvm/docs/LangRef.rst

+37
Original file line numberDiff line numberDiff line change
@@ -15583,6 +15583,43 @@ trapping or setting ``errno``.
1558315583
When specified with the fast-math-flag 'afn', the result may be approximated
1558415584
using a less accurate calculation.
1558515585

15586+
'``llvm.atan2.*``' Intrinsic
15587+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
15588+
15589+
Syntax:
15590+
"""""""
15591+
15592+
This is an overloaded intrinsic. You can use ``llvm.atan2`` on any
15593+
floating-point or vector of floating-point type. Not all targets support
15594+
all types however.
15595+
15596+
::
15597+
15598+
declare float @llvm.atan2.f32(float %X, float %Y)
15599+
declare double @llvm.atan2.f64(double %X, double %Y)
15600+
declare x86_fp80 @llvm.atan2.f80(x86_fp80 %X, x86_fp80 %Y)
15601+
declare fp128 @llvm.atan2.f128(fp128 %X, fp128 %Y)
15602+
declare ppc_fp128 @llvm.atan2.ppcf128(ppc_fp128 %X, ppc_fp128 %Y)
15603+
15604+
Overview:
15605+
"""""""""
15606+
15607+
The '``llvm.atan2.*``' intrinsics return the arctangent of the operand.
15608+
15609+
Arguments:
15610+
""""""""""
15611+
15612+
The arguments and return value are floating-point numbers of the same type.
15613+
15614+
Semantics:
15615+
""""""""""
15616+
15617+
Return the same value as a corresponding libm '``atan2``' function but without
15618+
trapping or setting ``errno``.
15619+
15620+
When specified with the fast-math-flag 'afn', the result may be approximated
15621+
using a less accurate calculation.
15622+
1558615623
'``llvm.sinh.*``' Intrinsic
1558715624
^^^^^^^^^^^^^^^^^^^^^^^^^^^
1558815625

llvm/include/llvm/IR/Intrinsics.td

+1
Original file line numberDiff line numberDiff line change
@@ -1016,6 +1016,7 @@ let IntrProperties = [IntrNoMem, IntrSpeculatable, IntrWillReturn] in {
10161016
def int_asin : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
10171017
def int_acos : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
10181018
def int_atan : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
1019+
def int_atan2 : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>]>;
10191020
def int_sin : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
10201021
def int_cos : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
10211022
def int_tan : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;

llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp

+52
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ using namespace llvm;
3636
static bool isIntrinsicExpansion(Function &F) {
3737
switch (F.getIntrinsicID()) {
3838
case Intrinsic::abs:
39+
case Intrinsic::atan2:
3940
case Intrinsic::exp:
4041
case Intrinsic::log:
4142
case Intrinsic::log10:
@@ -307,6 +308,54 @@ static Value *expandNormalizeIntrinsic(CallInst *Orig) {
307308
return Builder.CreateFMul(X, MultiplicandVec);
308309
}
309310

311+
static Value *expandAtan2Intrinsic(CallInst *Orig) {
312+
Value *Y = Orig->getOperand(0);
313+
Value *X = Orig->getOperand(1);
314+
Type *Ty = X->getType();
315+
IRBuilder<> Builder(Orig);
316+
Builder.setFastMathFlags(Orig->getFastMathFlags());
317+
318+
Value *Tan = Builder.CreateFDiv(Y, X);
319+
320+
CallInst *Atan =
321+
Builder.CreateIntrinsic(Ty, Intrinsic::atan, {Tan}, nullptr, "Elt.Atan");
322+
Atan->setTailCall(Orig->isTailCall());
323+
Atan->setAttributes(Orig->getAttributes());
324+
325+
// Modify atan result based on https://en.wikipedia.org/wiki/Atan2.
326+
Constant *Pi = ConstantFP::get(Ty, llvm::numbers::pi);
327+
Constant *HalfPi = ConstantFP::get(Ty, llvm::numbers::pi / 2);
328+
Constant *NegHalfPi = ConstantFP::get(Ty, -llvm::numbers::pi / 2);
329+
Constant *Zero = ConstantFP::get(Ty, 0);
330+
Value *AtanAddPi = Builder.CreateFAdd(Atan, Pi);
331+
Value *AtanSubPi = Builder.CreateFSub(Atan, Pi);
332+
333+
// x > 0 -> atan.
334+
Value *Result = Atan;
335+
Value *XLt0 = Builder.CreateFCmpOLT(X, Zero);
336+
Value *XEq0 = Builder.CreateFCmpOEQ(X, Zero);
337+
Value *YGe0 = Builder.CreateFCmpOGE(Y, Zero);
338+
Value *YLt0 = Builder.CreateFCmpOLT(Y, Zero);
339+
340+
// x < 0, y >= 0 -> atan + pi.
341+
Value *XLt0AndYGe0 = Builder.CreateAnd(XLt0, YGe0);
342+
Result = Builder.CreateSelect(XLt0AndYGe0, AtanAddPi, Result);
343+
344+
// x < 0, y < 0 -> atan - pi.
345+
Value *XLt0AndYLt0 = Builder.CreateAnd(XLt0, YLt0);
346+
Result = Builder.CreateSelect(XLt0AndYLt0, AtanSubPi, Result);
347+
348+
// x == 0, y < 0 -> -pi/2
349+
Value *XEq0AndYLt0 = Builder.CreateAnd(XEq0, YLt0);
350+
Result = Builder.CreateSelect(XEq0AndYLt0, NegHalfPi, Result);
351+
352+
// x == 0, y > 0 -> pi/2
353+
Value *XEq0AndYGe0 = Builder.CreateAnd(XEq0, YGe0);
354+
Result = Builder.CreateSelect(XEq0AndYGe0, HalfPi, Result);
355+
356+
return Result;
357+
}
358+
310359
static Value *expandPowIntrinsic(CallInst *Orig) {
311360

312361
Value *X = Orig->getOperand(0);
@@ -418,6 +467,9 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) {
418467
case Intrinsic::abs:
419468
Result = expandAbs(Orig);
420469
break;
470+
case Intrinsic::atan2:
471+
Result = expandAtan2Intrinsic(Orig);
472+
break;
421473
case Intrinsic::exp:
422474
Result = expandExpIntrinsic(Orig);
423475
break;

llvm/test/CodeGen/DirectX/atan2.ll

+87
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
; RUN: opt -S -dxil-intrinsic-expansion -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s --check-prefixes=CHECK,EXPCHECK
2+
; RUN: opt -S -dxil-intrinsic-expansion -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s --check-prefixes=CHECK,DOPCHECK
3+
4+
; Make sure correct dxil expansions for atan2 are generated for float and half.
5+
6+
define noundef float @atan2_float(float noundef %y, float noundef %x) {
7+
entry:
8+
; CHECK: [[DIV:%.+]] = fdiv float %y, %x
9+
; EXPCHECK: [[ATAN:%.+]] = call float @llvm.atan.f32(float [[DIV]])
10+
; DOPCHECK: [[ATAN:%.+]] = call float @dx.op.unary.f32(i32 17, float [[DIV]])
11+
; CHECK-DAG: [[ADD_PI:%.+]] = fadd float [[ATAN]], 0x400921FB60000000
12+
; CHECK-DAG: [[SUB_PI:%.+]] = fsub float [[ATAN]], 0x400921FB60000000
13+
; CHECK-DAG: [[X_LT_0:%.+]] = fcmp olt float %x, 0.000000e+00
14+
; CHECK-DAG: [[X_EQ_0:%.+]] = fcmp oeq float %x, 0.000000e+00
15+
; CHECK-DAG: [[Y_GE_0:%.+]] = fcmp oge float %y, 0.000000e+00
16+
; CHECK-DAG: [[Y_LT_0:%.+]] = fcmp olt float %y, 0.000000e+00
17+
; CHECK: [[XLT0_AND_YGE0:%.+]] = and i1 [[X_LT_0]], [[Y_GE_0]]
18+
; CHECK: [[SELECT_ADD_PI:%.+]] = select i1 [[XLT0_AND_YGE0]], float [[ADD_PI]], float [[ATAN]]
19+
; CHECK: [[XLT0_AND_YLT0:%.+]] = and i1 [[X_LT_0]], [[Y_LT_0]]
20+
; CHECK: [[SELECT_SUB_PI:%.+]] = select i1 [[XLT0_AND_YLT0]], float [[SUB_PI]], float [[SELECT_ADD_PI]]
21+
; CHECK: [[XEQ0_AND_YLT0:%.+]] = and i1 [[X_EQ_0]], [[Y_LT_0]]
22+
; CHECK: [[SELECT_NEGHPI:%.+]] = select i1 [[XEQ0_AND_YLT0]], float 0xBFF921FB60000000, float [[SELECT_SUB_PI]]
23+
; CHECK: [[XEQ0_AND_YGE0:%.+]] = and i1 [[X_EQ_0]], [[Y_GE_0]]
24+
; CHECK: [[SELECT_HPI:%.+]] = select i1 [[XEQ0_AND_YGE0]], float 0x3FF921FB60000000, float [[SELECT_NEGHPI]]
25+
; CHECK: ret float [[SELECT_HPI]]
26+
%elt.atan2 = call float @llvm.atan2.f32(float %y, float %x)
27+
ret float %elt.atan2
28+
}
29+
30+
define noundef half @atan2_half(half noundef %y, half noundef %x) {
31+
entry:
32+
; CHECK: [[DIV:%.+]] = fdiv half %y, %x
33+
; EXPCHECK: [[ATAN:%.+]] = call half @llvm.atan.f16(half [[DIV]])
34+
; DOPCHECK: [[ATAN:%.+]] = call half @dx.op.unary.f16(i32 17, half [[DIV]])
35+
; CHECK-DAG: [[ADD_PI:%.+]] = fadd half [[ATAN]], 0xH4248
36+
; CHECK-DAG: [[SUB_PI:%.+]] = fsub half [[ATAN]], 0xH4248
37+
; CHECK-DAG: [[X_LT_0:%.+]] = fcmp olt half %x, 0xH0000
38+
; CHECK-DAG: [[X_EQ_0:%.+]] = fcmp oeq half %x, 0xH0000
39+
; CHECK-DAG: [[Y_GE_0:%.+]] = fcmp oge half %y, 0xH0000
40+
; CHECK-DAG: [[Y_LT_0:%.+]] = fcmp olt half %y, 0xH0000
41+
; CHECK: [[XLT0_AND_YGE0:%.+]] = and i1 [[X_LT_0]], [[Y_GE_0]]
42+
; CHECK: [[SELECT_ADD_PI:%.+]] = select i1 [[XLT0_AND_YGE0]], half [[ADD_PI]], half [[ATAN]]
43+
; CHECK: [[XLT0_AND_YLT0:%.+]] = and i1 [[X_LT_0]], [[Y_LT_0]]
44+
; CHECK: [[SELECT_SUB_PI:%.+]] = select i1 [[XLT0_AND_YLT0]], half [[SUB_PI]], half [[SELECT_ADD_PI]]
45+
; CHECK: [[XEQ0_AND_YLT0:%.+]] = and i1 [[X_EQ_0]], [[Y_LT_0]]
46+
; CHECK: [[SELECT_NEGHPI:%.+]] = select i1 [[XEQ0_AND_YLT0]], half 0xHBE48, half [[SELECT_SUB_PI]]
47+
; CHECK: [[XEQ0_AND_YGE0:%.+]] = and i1 [[X_EQ_0]], [[Y_GE_0]]
48+
; CHECK: [[SELECT_HPI:%.+]] = select i1 [[XEQ0_AND_YGE0]], half 0xH3E48, half [[SELECT_NEGHPI]]
49+
; CHECK: ret half [[SELECT_HPI]]
50+
%elt.atan2 = call half @llvm.atan2.f16(half %y, half %x)
51+
ret half %elt.atan2
52+
}
53+
54+
define noundef <4 x float> @atan2_float4(<4 x float> noundef %y, <4 x float> noundef %x) {
55+
entry:
56+
; Just Expansion, no scalarization or lowering:
57+
; EXPCHECK: [[DIV:%.+]] = fdiv <4 x float> %y, %x
58+
; EXPCHECK: [[ATAN:%.+]] = call <4 x float> @llvm.atan.v4f32(<4 x float> [[DIV]])
59+
; EXPCHECK-DAG: [[ADD_PI:%.+]] = fadd <4 x float> [[ATAN]], <float 0x400921FB60000000, float 0x400921FB60000000, float 0x400921FB60000000, float 0x400921FB60000000>
60+
; EXPCHECK-DAG: [[SUB_PI:%.+]] = fsub <4 x float> [[ATAN]], <float 0x400921FB60000000, float 0x400921FB60000000, float 0x400921FB60000000, float 0x400921FB60000000>
61+
; EXPCHECK-DAG: [[X_LT_0:%.+]] = fcmp olt <4 x float> %x, zeroinitializer
62+
; EXPCHECK-DAG: [[X_EQ_0:%.+]] = fcmp oeq <4 x float> %x, zeroinitializer
63+
; EXPCHECK-DAG: [[Y_GE_0:%.+]] = fcmp oge <4 x float> %y, zeroinitializer
64+
; EXPCHECK-DAG: [[Y_LT_0:%.+]] = fcmp olt <4 x float> %y, zeroinitializer
65+
; EXPCHECK: [[XLT0_AND_YGE0:%.+]] = and <4 x i1> [[X_LT_0]], [[Y_GE_0]]
66+
; EXPCHECK: [[SELECT_ADD_PI:%.+]] = select <4 x i1> [[XLT0_AND_YGE0]], <4 x float> [[ADD_PI]], <4 x float> [[ATAN]]
67+
; EXPCHECK: [[XLT0_AND_YLT0:%.+]] = and <4 x i1> [[X_LT_0]], [[Y_LT_0]]
68+
; EXPCHECK: [[SELECT_SUB_PI:%.+]] = select <4 x i1> [[XLT0_AND_YLT0]], <4 x float> [[SUB_PI]], <4 x float> [[SELECT_ADD_PI]]
69+
; EXPCHECK: [[XEQ0_AND_YLT0:%.+]] = and <4 x i1> [[X_EQ_0]], [[Y_LT_0]]
70+
; EXPCHECK: [[SELECT_NEGHPI:%.+]] = select <4 x i1> [[XEQ0_AND_YLT0]], <4 x float> <float 0xBFF921FB60000000, float 0xBFF921FB60000000, float 0xBFF921FB60000000, float 0xBFF921FB60000000>, <4 x float> [[SELECT_SUB_PI]]
71+
; EXPCHECK: [[XEQ0_AND_YGE0:%.+]] = and <4 x i1> [[X_EQ_0]], [[Y_GE_0]]
72+
; EXPCHECK: [[SELECT_HPI:%.+]] = select <4 x i1> [[XEQ0_AND_YGE0]], <4 x float> <float 0x3FF921FB60000000, float 0x3FF921FB60000000, float 0x3FF921FB60000000, float 0x3FF921FB60000000>, <4 x float> [[SELECT_NEGHPI]]
73+
; EXPCHECK: ret <4 x float> [[SELECT_HPI]]
74+
75+
; Scalarization occurs after expansion, so atan scalarization is tested separately.
76+
; Expansion, scalarization and lowering:
77+
; Just make sure this expands to exactly 4 scalar DXIL atan (OpCode=17) calls.
78+
; DOPCHECK-COUNT-4: call float @dx.op.unary.f32(i32 17, float %{{.*}})
79+
; DOPCHECK-NOT: call float @dx.op.unary.f32(i32 17,
80+
81+
%elt.atan2 = call <4 x float> @llvm.atan2.v4f32(<4 x float> %y, <4 x float> %x)
82+
ret <4 x float> %elt.atan2
83+
}
84+
85+
declare half @llvm.atan2.f16(half, half)
86+
declare float @llvm.atan2.f32(float, float)
87+
declare <4 x float> @llvm.atan2.v4f32(<4 x float>, <4 x float>)
+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
; RUN: not opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s 2>&1 | FileCheck %s
2+
3+
; DXIL operation atan does not support double overload type
4+
; CHECK: in function atan2_double
5+
; CHECK-SAME: Cannot create ATan operation: Invalid overload type
6+
7+
define noundef double @atan2_double(double noundef %a, double noundef %b) #0 {
8+
entry:
9+
%1 = call double @llvm.atan2.f64(double %a, double %b)
10+
ret double %1
11+
}

0 commit comments

Comments
 (0)