make simd_reduce_{mul,add}_unordered use only the 'reassoc' flag, not all fast-math flags

RalfJung · RalfJung · commit 3dc631a61a73 · 2024-02-21T16:28:20.000+01:00
diff --git a/compiler/rustc_codegen_gcc/src/builder.rs b/compiler/rustc_codegen_gcc/src/builder.rs
@@ -1752,7 +1752,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
         self.vector_reduce(src, |a, b, context| context.new_binary_op(None, op, a.get_type(), a, b))
     }
 
-    pub fn vector_reduce_fadd_fast(&mut self, _acc: RValue<'gcc>, _src: RValue<'gcc>) -> RValue<'gcc> {
+    pub fn vector_reduce_fadd_reassoc(&mut self, _acc: RValue<'gcc>, _src: RValue<'gcc>) -> RValue<'gcc> {
         unimplemented!();
     }
 
@@ -1772,7 +1772,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
         unimplemented!();
     }
 
-    pub fn vector_reduce_fmul_fast(&mut self, _acc: RValue<'gcc>, _src: RValue<'gcc>) -> RValue<'gcc> {
+    pub fn vector_reduce_fmul_reassoc(&mut self, _acc: RValue<'gcc>, _src: RValue<'gcc>) -> RValue<'gcc> {
         unimplemented!();
     }
 
diff --git a/compiler/rustc_codegen_gcc/src/intrinsic/simd.rs b/compiler/rustc_codegen_gcc/src/intrinsic/simd.rs
@@ -989,14 +989,14 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
 
     arith_red!(
         simd_reduce_add_unordered: BinaryOp::Plus,
-        vector_reduce_fadd_fast,
+        vector_reduce_fadd_reassoc,
         false,
         add,
         0.0 // TODO: Use this argument.
     );
     arith_red!(
         simd_reduce_mul_unordered: BinaryOp::Mult,
-        vector_reduce_fmul_fast,
+        vector_reduce_fmul_reassoc,
         false,
         mul,
         1.0
diff --git a/compiler/rustc_codegen_llvm/src/builder.rs b/compiler/rustc_codegen_llvm/src/builder.rs
@@ -1367,17 +1367,17 @@ impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> {
     pub fn vector_reduce_fmul(&mut self, acc: &'ll Value, src: &'ll Value) -> &'ll Value {
         unsafe { llvm::LLVMRustBuildVectorReduceFMul(self.llbuilder, acc, src) }
     }
-    pub fn vector_reduce_fadd_algebraic(&mut self, acc: &'ll Value, src: &'ll Value) -> &'ll Value {
+    pub fn vector_reduce_fadd_reassoc(&mut self, acc: &'ll Value, src: &'ll Value) -> &'ll Value {
         unsafe {
             let instr = llvm::LLVMRustBuildVectorReduceFAdd(self.llbuilder, acc, src);
-            llvm::LLVMRustSetAlgebraicMath(instr);
+            llvm::LLVMRustSetAllowReassoc(instr);
             instr
         }
     }
-    pub fn vector_reduce_fmul_algebraic(&mut self, acc: &'ll Value, src: &'ll Value) -> &'ll Value {
+    pub fn vector_reduce_fmul_reassoc(&mut self, acc: &'ll Value, src: &'ll Value) -> &'ll Value {
         unsafe {
             let instr = llvm::LLVMRustBuildVectorReduceFMul(self.llbuilder, acc, src);
-            llvm::LLVMRustSetAlgebraicMath(instr);
+            llvm::LLVMRustSetAllowReassoc(instr);
             instr
         }
     }
diff --git a/compiler/rustc_codegen_llvm/src/intrinsic.rs b/compiler/rustc_codegen_llvm/src/intrinsic.rs
@@ -1880,14 +1880,14 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
     arith_red!(simd_reduce_mul_ordered: vector_reduce_mul, vector_reduce_fmul, true, mul, 1.0);
     arith_red!(
         simd_reduce_add_unordered: vector_reduce_add,
-        vector_reduce_fadd_algebraic,
+        vector_reduce_fadd_reassoc,
         false,
         add,
         0.0
     );
     arith_red!(
         simd_reduce_mul_unordered: vector_reduce_mul,
-        vector_reduce_fmul_algebraic,
+        vector_reduce_fmul_reassoc,
         false,
         mul,
         1.0
diff --git a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
@@ -1619,6 +1619,7 @@ extern "C" {
 
     pub fn LLVMRustSetFastMath(Instr: &Value);
     pub fn LLVMRustSetAlgebraicMath(Instr: &Value);
+    pub fn LLVMRustSetAllowReassoc(Instr: &Value);
 
     // Miscellaneous instructions
     pub fn LLVMRustGetInstrProfIncrementIntrinsic(M: &Module) -> &Value;
diff --git a/compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp b/compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp
@@ -418,11 +418,17 @@ extern "C" LLVMAttributeRef LLVMRustCreateMemoryEffectsAttr(LLVMContextRef C,
   }
 }
 
+<<<<<<< HEAD
 // Enable all fast-math flags, including those which will cause floating-point operations
 // to return poison for some well-defined inputs. This function can only be used to build
 // unsafe Rust intrinsics. That unsafety does permit additional optimizations, but at the
 // time of writing, their value is not well-understood relative to those enabled by
 // LLVMRustSetAlgebraicMath.
+||||||| parent of 019019d83e2 (make simd_reduce_{mul,add}_unordered use only the 'reassoc' flag, not all fast-math flags)
+// Enable a fast-math flag
+=======
+// Enable all fast-math flags
+>>>>>>> 019019d83e2 (make simd_reduce_{mul,add}_unordered use only the 'reassoc' flag, not all fast-math flags)
 //
 // https://llvm.org/docs/LangRef.html#fast-math-flags
 extern "C" void LLVMRustSetFastMath(LLVMValueRef V) {
@@ -450,6 +456,15 @@ extern "C" void LLVMRustSetAlgebraicMath(LLVMValueRef V) {
   }
 }
 
+// Enable the reassoc fast-math flag
+//
+// https://llvm.org/docs/LangRef.html#fast-math-flags
+extern "C" void LLVMRustSetAllowReassoc(LLVMValueRef V) {
+  if (auto I = dyn_cast<Instruction>(unwrap<Value>(V))) {
+    I->setHasAllowReassoc(true);
+  }
+}
+
 extern "C" LLVMValueRef
 LLVMRustBuildAtomicLoad(LLVMBuilderRef B, LLVMTypeRef Ty, LLVMValueRef Source,
                         const char *Name, LLVMAtomicOrdering Order) {
diff --git a/library/core/src/intrinsics/simd.rs b/library/core/src/intrinsics/simd.rs
@@ -335,16 +335,12 @@ extern "platform-intrinsic" {
     /// Starting with the value `y`, add the elements of `x` and accumulate.
     pub fn simd_reduce_add_ordered<T, U>(x: T, y: U) -> U;
 
-    /// Add elements within a vector in arbitrary order, and without regard
-    /// for signed zeros.
+    /// Add elements within a vector in arbitrary order. May also be re-associated with
+    /// unordered additions on the inputs/outputs.
     ///
     /// `T` must be a vector of integer or floating-point primitive types.
     ///
     /// `U` must be the element type of `T`.
-    ///
-    /// # Safety
-    ///
-    /// All input elements must be finite (i.e., not NAN and not +/- INF).
     pub fn simd_reduce_add_unordered<T, U>(x: T) -> U;
 
     /// Multiply elements within a vector from left to right.
@@ -356,16 +352,12 @@ extern "platform-intrinsic" {
     /// Starting with the value `y`, multiply the elements of `x` and accumulate.
     pub fn simd_reduce_mul_ordered<T, U>(x: T, y: U) -> U;
 
-    /// Multiply elements within a vector in arbitrary order, and without regard
-    /// for signed zeros.
+    /// Add elements within a vector in arbitrary order. May also be re-associated with
+    /// unordered additions on the inputs/outputs.
     ///
     /// `T` must be a vector of integer or floating-point primitive types.
     ///
     /// `U` must be the element type of `T`.
-    ///
-    /// # Safety
-    ///
-    /// All input elements must be finite (i.e., not NAN and not +/- INF).
     pub fn simd_reduce_mul_unordered<T, U>(x: T) -> U;
 
     /// Check if all mask values are true.

Original file line number	Diff line number	Diff line change
`@@ -1752,7 +1752,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {`
`1752`	`1752`	`self.vector_reduce(src, \|a, b, context\| context.new_binary_op(None, op, a.get_type(), a, b))`
`1753`	`1753`	`}`
`1754`	`1754`
`1755`		`- pub fn vector_reduce_fadd_fast(&mut self, _acc: RValue<'gcc>, _src: RValue<'gcc>) -> RValue<'gcc> {`
	`1755`	`+ pub fn vector_reduce_fadd_reassoc(&mut self, _acc: RValue<'gcc>, _src: RValue<'gcc>) -> RValue<'gcc> {`
`1756`	`1756`	`unimplemented!();`
`1757`	`1757`	`}`
`1758`	`1758`
`@@ -1772,7 +1772,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {`
`1772`	`1772`	`unimplemented!();`
`1773`	`1773`	`}`
`1774`	`1774`
`1775`		`- pub fn vector_reduce_fmul_fast(&mut self, _acc: RValue<'gcc>, _src: RValue<'gcc>) -> RValue<'gcc> {`
	`1775`	`+ pub fn vector_reduce_fmul_reassoc(&mut self, _acc: RValue<'gcc>, _src: RValue<'gcc>) -> RValue<'gcc> {`
`1776`	`1776`	`unimplemented!();`
`1777`	`1777`	`}`
`1778`	`1778`
Original file line number	Diff line number	Diff line change
`@@ -1367,17 +1367,17 @@ impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> {`
`1367`	`1367`	`pub fn vector_reduce_fmul(&mut self, acc: &'ll Value, src: &'ll Value) -> &'ll Value {`
`1368`	`1368`	`unsafe { llvm::LLVMRustBuildVectorReduceFMul(self.llbuilder, acc, src) }`
`1369`	`1369`	`}`
`1370`		`- pub fn vector_reduce_fadd_algebraic(&mut self, acc: &'ll Value, src: &'ll Value) -> &'ll Value {`
	`1370`	`+ pub fn vector_reduce_fadd_reassoc(&mut self, acc: &'ll Value, src: &'ll Value) -> &'ll Value {`
`1371`	`1371`	`unsafe {`
`1372`	`1372`	`let instr = llvm::LLVMRustBuildVectorReduceFAdd(self.llbuilder, acc, src);`
`1373`		`- llvm::LLVMRustSetAlgebraicMath(instr);`
	`1373`	`+ llvm::LLVMRustSetAllowReassoc(instr);`
`1374`	`1374`	`instr`
`1375`	`1375`	`}`
`1376`	`1376`	`}`
`1377`		`- pub fn vector_reduce_fmul_algebraic(&mut self, acc: &'ll Value, src: &'ll Value) -> &'ll Value {`
	`1377`	`+ pub fn vector_reduce_fmul_reassoc(&mut self, acc: &'ll Value, src: &'ll Value) -> &'ll Value {`
`1378`	`1378`	`unsafe {`
`1379`	`1379`	`let instr = llvm::LLVMRustBuildVectorReduceFMul(self.llbuilder, acc, src);`
`1380`		`- llvm::LLVMRustSetAlgebraicMath(instr);`
	`1380`	`+ llvm::LLVMRustSetAllowReassoc(instr);`
`1381`	`1381`	`instr`
`1382`	`1382`	`}`
`1383`	`1383`	`}`