@@ -151,9 +151,6 @@ def doRsqrtOpt : Predicate<"doRsqrtOpt()">;
151
151
152
152
def doMulWide : Predicate<"doMulWide">;
153
153
154
- def do_DIVF32_APPROX : Predicate<"getDivF32Level()==0">;
155
- def do_DIVF32_FULL : Predicate<"getDivF32Level()==1">;
156
-
157
154
def do_SQRTF32_APPROX : Predicate<"!usePrecSqrtF32()">;
158
155
def do_SQRTF32_RN : Predicate<"usePrecSqrtF32()">;
159
156
@@ -1119,26 +1116,19 @@ def INEG64 :
1119
1116
//-----------------------------------
1120
1117
1121
1118
// Constant 1.0f
1122
- def FloatConst1 : PatLeaf<(fpimm) , [{
1123
- return &N->getValueAPF() .getSemantics() == &llvm::APFloat::IEEEsingle() &&
1124
- N->getValueAPF() .convertToFloat() == 1.0f;
1119
+ def f32imm_1 : FPImmLeaf<f32 , [{
1120
+ return &Imm .getSemantics() == &llvm::APFloat::IEEEsingle() &&
1121
+ Imm .convertToFloat() == 1.0f;
1125
1122
}]>;
1126
1123
// Constant 1.0 (double)
1127
- def DoubleConst1 : PatLeaf<(fpimm) , [{
1128
- return &N->getValueAPF() .getSemantics() == &llvm::APFloat::IEEEdouble() &&
1129
- N->getValueAPF() .convertToDouble() == 1.0;
1124
+ def f64imm_1 : FPImmLeaf<f64 , [{
1125
+ return &Imm .getSemantics() == &llvm::APFloat::IEEEdouble() &&
1126
+ Imm .convertToDouble() == 1.0;
1130
1127
}]>;
1131
1128
// Constant -1.0 (double)
1132
- def DoubleConstNeg1 : PatLeaf<(fpimm), [{
1133
- return &N->getValueAPF().getSemantics() == &llvm::APFloat::IEEEdouble() &&
1134
- N->getValueAPF().convertToDouble() == -1.0;
1135
- }]>;
1136
-
1137
-
1138
- // Constant -X -> X (double)
1139
- def NegDoubleConst : SDNodeXForm<fpimm, [{
1140
- return CurDAG->getTargetConstantFP(-(N->getValueAPF()),
1141
- SDLoc(N), MVT::f64);
1129
+ def f64imm_neg1 : FPImmLeaf<f64, [{
1130
+ return &Imm.getSemantics() == &llvm::APFloat::IEEEdouble() &&
1131
+ Imm.convertToDouble() == -1.0;
1142
1132
}]>;
1143
1133
1144
1134
defm FADD : F3_fma_component<"add", fadd>;
@@ -1189,11 +1179,11 @@ def BFNEG16x2 : FNEG_BF16_F16X2<"neg.bf16x2", v2bf16, Int32Regs, True>;
1189
1179
//
1190
1180
// F64 division
1191
1181
//
1192
- def FDIV641r :
1182
+ def FRCP64r :
1193
1183
NVPTXInst<(outs Float64Regs:$dst),
1194
- (ins f64imm:$a, Float64Regs:$b),
1184
+ (ins Float64Regs:$b),
1195
1185
"rcp.rn.f64 \t$dst, $b;",
1196
- [(set f64:$dst, (fdiv DoubleConst1:$a , f64:$b))]>;
1186
+ [(set f64:$dst, (fdiv f64imm_1 , f64:$b))]>;
1197
1187
def FDIV64rr :
1198
1188
NVPTXInst<(outs Float64Regs:$dst),
1199
1189
(ins Float64Regs:$a, Float64Regs:$b),
@@ -1207,109 +1197,114 @@ def FDIV64ri :
1207
1197
1208
1198
// fdiv will be converted to rcp
1209
1199
// fneg (fdiv 1.0, X) => fneg (rcp.rn X)
1210
- def : Pat<(fdiv DoubleConstNeg1:$a , f64:$b),
1211
- (FNEGf64 (FDIV641r (NegDoubleConst node:$a), $b))>;
1200
+ def : Pat<(fdiv f64imm_neg1 , f64:$b),
1201
+ (FNEGf64 (FRCP64r $b))>;
1212
1202
1213
1203
//
1214
1204
// F32 Approximate reciprocal
1215
1205
//
1216
- def FDIV321r_ftz :
1206
+
1207
+ def fdiv_approx : PatFrag<(ops node:$a, node:$b),
1208
+ (fdiv node:$a, node:$b), [{
1209
+ return getDivF32Level(N) == NVPTX::DivPrecisionLevel::Approx;
1210
+ }]>;
1211
+
1212
+
1213
+ def FRCP32_approx_r_ftz :
1217
1214
NVPTXInst<(outs Float32Regs:$dst),
1218
- (ins f32imm:$a, Float32Regs:$b),
1215
+ (ins Float32Regs:$b),
1219
1216
"rcp.approx.ftz.f32 \t$dst, $b;",
1220
- [(set f32:$dst, (fdiv FloatConst1:$a , f32:$b))]>,
1221
- Requires<[do_DIVF32_APPROX, doF32FTZ]>;
1222
- def FDIV321r :
1217
+ [(set f32:$dst, (fdiv_approx f32imm_1 , f32:$b))]>,
1218
+ Requires<[doF32FTZ]>;
1219
+ def FRCP32_approx_r :
1223
1220
NVPTXInst<(outs Float32Regs:$dst),
1224
- (ins f32imm:$a, Float32Regs:$b),
1221
+ (ins Float32Regs:$b),
1225
1222
"rcp.approx.f32 \t$dst, $b;",
1226
- [(set f32:$dst, (fdiv FloatConst1:$a , f32:$b))]>,
1227
- Requires<[do_DIVF32_APPROX]>;
1223
+ [(set f32:$dst, (fdiv_approx f32imm_1 , f32:$b))]>;
1224
+
1228
1225
//
1229
1226
// F32 Approximate division
1230
1227
//
1231
1228
def FDIV32approxrr_ftz :
1232
1229
NVPTXInst<(outs Float32Regs:$dst),
1233
1230
(ins Float32Regs:$a, Float32Regs:$b),
1234
1231
"div.approx.ftz.f32 \t$dst, $a, $b;",
1235
- [(set f32:$dst, (fdiv f32:$a, f32:$b))]>,
1236
- Requires<[do_DIVF32_APPROX, doF32FTZ]>;
1232
+ [(set f32:$dst, (fdiv_approx f32:$a, f32:$b))]>,
1233
+ Requires<[doF32FTZ]>;
1237
1234
def FDIV32approxri_ftz :
1238
1235
NVPTXInst<(outs Float32Regs:$dst),
1239
1236
(ins Float32Regs:$a, f32imm:$b),
1240
1237
"div.approx.ftz.f32 \t$dst, $a, $b;",
1241
- [(set f32:$dst, (fdiv f32:$a, fpimm:$b))]>,
1242
- Requires<[do_DIVF32_APPROX, doF32FTZ]>;
1238
+ [(set f32:$dst, (fdiv_approx f32:$a, fpimm:$b))]>,
1239
+ Requires<[doF32FTZ]>;
1243
1240
def FDIV32approxrr :
1244
1241
NVPTXInst<(outs Float32Regs:$dst),
1245
1242
(ins Float32Regs:$a, Float32Regs:$b),
1246
1243
"div.approx.f32 \t$dst, $a, $b;",
1247
- [(set f32:$dst, (fdiv f32:$a, f32:$b))]>,
1248
- Requires<[do_DIVF32_APPROX]>;
1244
+ [(set f32:$dst, (fdiv_approx f32:$a, f32:$b))]>;
1249
1245
def FDIV32approxri :
1250
1246
NVPTXInst<(outs Float32Regs:$dst),
1251
1247
(ins Float32Regs:$a, f32imm:$b),
1252
1248
"div.approx.f32 \t$dst, $a, $b;",
1253
- [(set f32:$dst, (fdiv f32:$a, fpimm:$b))]>,
1254
- Requires<[do_DIVF32_APPROX]>;
1249
+ [(set f32:$dst, (fdiv_approx f32:$a, fpimm:$b))]>;
1255
1250
//
1256
1251
// F32 Semi-accurate reciprocal
1257
1252
//
1258
1253
// rcp.approx gives the same result as div.full(1.0f, a) and is faster.
1259
1254
//
1260
- def FDIV321r_approx_ftz :
1261
- NVPTXInst<(outs Float32Regs:$dst),
1262
- (ins f32imm:$a, Float32Regs:$b),
1263
- "rcp.approx.ftz.f32 \t$dst, $b;",
1264
- [(set f32:$dst, (fdiv FloatConst1:$a, f32:$b))]>,
1265
- Requires<[do_DIVF32_FULL, doF32FTZ]>;
1266
- def FDIV321r_approx :
1267
- NVPTXInst<(outs Float32Regs:$dst),
1268
- (ins f32imm:$a, Float32Regs:$b),
1269
- "rcp.approx.f32 \t$dst, $b;",
1270
- [(set f32:$dst, (fdiv FloatConst1:$a, f32:$b))]>,
1271
- Requires<[do_DIVF32_FULL]>;
1255
+
1256
+ def fdiv_full : PatFrag<(ops node:$a, node:$b),
1257
+ (fdiv node:$a, node:$b), [{
1258
+ return getDivF32Level(N) == NVPTX::DivPrecisionLevel::Full;
1259
+ }]>;
1260
+
1261
+
1262
+ def : Pat<(fdiv_full f32imm_1, f32:$b),
1263
+ (FRCP32_approx_r_ftz $b)>,
1264
+ Requires<[doF32FTZ]>;
1265
+
1266
+ def : Pat<(fdiv_full f32imm_1, f32:$b),
1267
+ (FRCP32_approx_r $b)>;
1268
+
1272
1269
//
1273
1270
// F32 Semi-accurate division
1274
1271
//
1275
1272
def FDIV32rr_ftz :
1276
1273
NVPTXInst<(outs Float32Regs:$dst),
1277
1274
(ins Float32Regs:$a, Float32Regs:$b),
1278
1275
"div.full.ftz.f32 \t$dst, $a, $b;",
1279
- [(set f32:$dst, (fdiv Float32Regs :$a, f32:$b))]>,
1280
- Requires<[do_DIVF32_FULL, doF32FTZ]>;
1276
+ [(set f32:$dst, (fdiv_full f32 :$a, f32:$b))]>,
1277
+ Requires<[doF32FTZ]>;
1281
1278
def FDIV32ri_ftz :
1282
1279
NVPTXInst<(outs Float32Regs:$dst),
1283
1280
(ins Float32Regs:$a, f32imm:$b),
1284
1281
"div.full.ftz.f32 \t$dst, $a, $b;",
1285
- [(set f32:$dst, (fdiv f32:$a, fpimm:$b))]>,
1286
- Requires<[do_DIVF32_FULL, doF32FTZ]>;
1282
+ [(set f32:$dst, (fdiv_full f32:$a, fpimm:$b))]>,
1283
+ Requires<[doF32FTZ]>;
1287
1284
def FDIV32rr :
1288
1285
NVPTXInst<(outs Float32Regs:$dst),
1289
1286
(ins Float32Regs:$a, Float32Regs:$b),
1290
1287
"div.full.f32 \t$dst, $a, $b;",
1291
- [(set f32:$dst, (fdiv f32:$a, f32:$b))]>,
1292
- Requires<[do_DIVF32_FULL]>;
1288
+ [(set f32:$dst, (fdiv_full f32:$a, f32:$b))]>;
1293
1289
def FDIV32ri :
1294
1290
NVPTXInst<(outs Float32Regs:$dst),
1295
1291
(ins Float32Regs:$a, f32imm:$b),
1296
1292
"div.full.f32 \t$dst, $a, $b;",
1297
- [(set f32:$dst, (fdiv f32:$a, fpimm:$b))]>,
1298
- Requires<[do_DIVF32_FULL]>;
1293
+ [(set f32:$dst, (fdiv_full f32:$a, fpimm:$b))]>;
1299
1294
//
1300
1295
// F32 Accurate reciprocal
1301
1296
//
1302
- def FDIV321r_prec_ftz :
1297
+ def FRCP32r_prec_ftz :
1303
1298
NVPTXInst<(outs Float32Regs:$dst),
1304
- (ins f32imm:$a, Float32Regs:$b),
1299
+ (ins Float32Regs:$b),
1305
1300
"rcp.rn.ftz.f32 \t$dst, $b;",
1306
- [(set f32:$dst, (fdiv FloatConst1:$a , f32:$b))]>,
1301
+ [(set f32:$dst, (fdiv f32imm_1 , f32:$b))]>,
1307
1302
Requires<[doF32FTZ]>;
1308
- def FDIV321r_prec :
1303
+ def FRCP32r_prec :
1309
1304
NVPTXInst<(outs Float32Regs:$dst),
1310
- (ins f32imm:$a, Float32Regs:$b),
1305
+ (ins Float32Regs:$b),
1311
1306
"rcp.rn.f32 \t$dst, $b;",
1312
- [(set f32:$dst, (fdiv FloatConst1:$a , f32:$b))]>;
1307
+ [(set f32:$dst, (fdiv f32imm_1 , f32:$b))]>;
1313
1308
//
1314
1309
// F32 Accurate division
1315
1310
//
0 commit comments