[mlir][tosa] Constant optimizations for reduce operations

amirBish · amrami · commit f5f7e2a336ce · 2023-09-21T19:19:50.000+03:00
Replace the different reduce operations which is getting a constant tensor as an input argument with a constant tensor. As the arguement of the reduce operation is constant tensor and has only a single user we could calculate the resulted constant tensor in compilation time and replace it with reduced memory tensor This optimization has been implemented for: tosa.reduce_sum tosa.reduce_prod tosa.reduce_any tosa.reduce_all tosa.reduce_max tosa.reduce_min Reviewed By: rsuderman Differential Revision: https://reviews.llvm.org/D154832
diff --git a/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td b/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td
@@ -1273,6 +1273,11 @@ def Tosa_ReduceAllOp : Tosa_InferTensorTypeOp<"reduce_all"> {
     /// Returns true when two result types are compatible for this op;
     /// Method used by InferTypeOpInterface.
     static bool isCompatibleReturnTypes(TypeRange l, TypeRange r);
+
+    /// Return the AND result between two integer operands
+    static inline APInt calcOneElement(APInt leftOperand, APInt rightOperand) {
+      return leftOperand & rightOperand;
+    }
   }];
 }
 
@@ -1301,6 +1306,11 @@ def Tosa_ReduceAnyOp : Tosa_InferTensorTypeOp<"reduce_any"> {
     /// Returns true when two result types are compatible for this op;
     /// Method used by InferTypeOpInterface.
     static bool isCompatibleReturnTypes(TypeRange l, TypeRange r);
+
+    /// Return the OR result between two integer operands
+    static inline APInt calcOneElement(APInt leftOperand, APInt rightOperand) {
+      return leftOperand | rightOperand;
+    }
   }];
 }
 
@@ -1329,6 +1339,12 @@ def Tosa_ReduceMaxOp : Tosa_InferTensorTypeOp<"reduce_max"> {
     /// Returns true when two result types are compatible for this op;
     /// Method used by InferTypeOpInterface.
     static bool isCompatibleReturnTypes(TypeRange l, TypeRange r);
+
+    /// Return the max of the two integer operands
+    static inline APInt calcOneElement(APInt leftOperand, APInt rightOperand) {
+      const llvm::APInt subtractRes = leftOperand - rightOperand;
+      return (!subtractRes.isNegative()) ? leftOperand : rightOperand;
+    }
   }];
 }
 
@@ -1357,6 +1373,12 @@ def Tosa_ReduceMinOp : Tosa_InferTensorTypeOp<"reduce_min"> {
     /// Returns true when two result types are compatible for this op;
     /// Method used by InferTypeOpInterface.
     static bool isCompatibleReturnTypes(TypeRange l, TypeRange r);
+
+    /// Return the min of the two integer operands
+    static inline APInt calcOneElement(APInt leftOperand, APInt rightOperand) {
+      const llvm::APInt subtractRes = leftOperand - rightOperand;
+      return (!subtractRes.isNegative()) ? rightOperand : leftOperand;
+    }
   }];
 }
 
@@ -1385,6 +1407,11 @@ def Tosa_ReduceProdOp : Tosa_InferTensorTypeOp<"reduce_prod"> {
     /// Returns true when two result types are compatible for this op;
     /// Method used by InferTypeOpInterface.
     static bool isCompatibleReturnTypes(TypeRange l, TypeRange r);
+
+    /// Return the prod of the two integer operands
+    static inline APInt calcOneElement(APInt leftOperand, APInt rightOperand) {
+      return leftOperand * rightOperand;
+    }
   }];
 }
 
@@ -1406,13 +1433,17 @@ def Tosa_ReduceSumOp : Tosa_InferTensorTypeOp<"reduce_sum"> {
   let results = (outs
     Tosa_Tensor:$output
   );
-
   let hasFolder = 1;
 
   let extraClassDeclaration = [{
     /// Returns true when two result types are compatible for this op;
     /// Method used by InferTypeOpInterface.
     static bool isCompatibleReturnTypes(TypeRange l, TypeRange r);
+
+    /// Return the sum of the two integer operands
+    static inline APInt calcOneElement(APInt leftOperand, APInt rightOperand) {
+      return leftOperand + rightOperand;
+    }
   }];
 }
 
diff --git a/mlir/include/mlir/Dialect/Tosa/Transforms/Passes.h b/mlir/include/mlir/Dialect/Tosa/Transforms/Passes.h
@@ -34,6 +34,8 @@ void populateTosaFoldConstantReciprocalPatterns(MLIRContext *ctx,
                                                 RewritePatternSet &patterns);
 void populateTosaFoldConstantTransposePatterns(MLIRContext *ctx,
                                                RewritePatternSet &patterns);
+void populateTosaConstantReduction(MLIRContext *ctx,
+                                   RewritePatternSet &patterns);
 
 std::unique_ptr<Pass> createTosaLayerwiseConstantFoldPass();
 std::unique_ptr<Pass> createTosaInferShapesPass();
diff --git a/mlir/lib/Dialect/Tosa/Transforms/TosaFolders.cpp b/mlir/lib/Dialect/Tosa/Transforms/TosaFolders.cpp
@@ -11,6 +11,7 @@
 //===----------------------------------------------------------------------===//
 
 #include <functional>
+#include <numeric>
 
 #include "mlir/Dialect/Tosa/IR/TosaOps.h"
 #include "mlir/Dialect/Tosa/Transforms/Passes.h"
@@ -289,8 +290,130 @@ struct TosaFoldConstantReciprocal : public OpRewritePattern<ReciprocalOp> {
   }
 };
 
+/// Getting the axes position of the element which is located
+/// in the tensor at the counter index
+
+llvm::SmallVector<int64_t>
+getPositionFromIndex(int64_t index, llvm::ArrayRef<int64_t> tensorShape) {
+  int64_t remaining = index;
+  llvm::SmallVector<int64_t> position(tensorShape.size(), 0);
+  for (int64_t i = tensorShape.size() - 1; i >= 0; --i) {
+    position[i] = remaining % tensorShape[i];
+    remaining /= tensorShape[i];
+  }
+  return position;
+}
+
+/// Getting the index of the element which is located at the
+/// axes position in the tensor
+
+int64_t getIndexFromPosition(llvm::ArrayRef<int64_t> position,
+                             llvm::ArrayRef<int64_t> tensorShape) {
+  int64_t index = 0;
+  int64_t multiplierTmp = 1;
+  for (int64_t i = position.size() - 1; i >= 0; --i) {
+    index += position[i] * multiplierTmp;
+    multiplierTmp *= tensorShape[i];
+  }
+  return index;
+}
+
+template <typename OperationType>
+llvm::APInt calculateReducedValue(const mlir::ElementsAttr &oldTensorAttr,
+                                  llvm::ArrayRef<int64_t> oldShape,
+                                  int64_t reductionAxis,
+                                  int64_t reductionIndex) {
+
+  llvm::SmallVector<int64_t> newShape(oldShape);
+  newShape[reductionAxis] = 1;
+  /// Let's calculate the position of the index
+  llvm::SmallVector<int64_t> position =
+      getPositionFromIndex(reductionIndex, newShape);
+  auto oldTensor = oldTensorAttr.getValues<llvm::APInt>();
+  /// Starting from the first positon along the reduction axis
+  position[reductionAxis] = 0;
+  int64_t indexAtOldTensor = getIndexFromPosition(position, oldShape);
+  llvm::APInt reducedValue = oldTensor[indexAtOldTensor];
+
+  for (int64_t reductionAxisVal = 1; reductionAxisVal < oldShape[reductionAxis];
+       ++reductionAxisVal) {
+
+    int64_t stride = std::accumulate(oldShape.begin() + reductionAxis + 1,
+                                     oldShape.end(), 1, std::multiplies<int>());
+    int64_t index = indexAtOldTensor + stride * reductionAxisVal;
+    reducedValue =
+        OperationType::calcOneElement(reducedValue, oldTensor[index]);
+  }
+  return reducedValue;
+}
+
+template <typename OperationType>
+struct ReduceConstantOptimization : public OpRewritePattern<OperationType> {
+
+  using OpRewritePattern<OperationType>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(OperationType op,
+                                PatternRewriter &rewriter) const override {
+    Value inputOp = op.getInput();
+    auto constOp = inputOp.getDefiningOp<tosa::ConstOp>();
+
+    if (!constOp)
+      return rewriter.notifyMatchFailure(
+          op, "reduce input must be const operation");
+
+    if (!inputOp.hasOneUse())
+      return rewriter.notifyMatchFailure(
+          op, "input operation has more than one user");
+
+    auto resultType = cast<ShapedType>(op.getOutput().getType());
+
+    if (!resultType.hasStaticShape())
+      return rewriter.notifyMatchFailure(op, "result type shape is not static");
+
+    auto reductionAxis = op.getAxis();
+    const auto denseElementsAttr = constOp.getValue();
+    const auto shapedOldElementsValues =
+        denseElementsAttr.getType().cast<ShapedType>();
+
+    if (!llvm::isa<IntegerType>(shapedOldElementsValues.getElementType()))
+      return rewriter.notifyMatchFailure(
+          op, "reduce input currently supported with integer type");
+
+    auto oldShape = shapedOldElementsValues.getShape();
+    auto newShape = resultType.getShape();
+
+    auto newNumOfElements = std::accumulate(newShape.begin(), newShape.end(), 1,
+                                            std::multiplies<int>());
+    llvm::SmallVector<APInt> newReducedTensor(newNumOfElements);
+
+    for (int64_t reductionIndex = 0; reductionIndex < newNumOfElements;
+         ++reductionIndex) {
+
+      /// Let's reduce all the elements along this reduction axis
+      newReducedTensor[reductionIndex] = calculateReducedValue<OperationType>(
+          denseElementsAttr, oldShape, reductionAxis, reductionIndex);
+    }
+
+    auto rankedTensorType = cast<RankedTensorType>(resultType);
+    auto denseAttr =
+        mlir::DenseElementsAttr::get(rankedTensorType, newReducedTensor);
+    rewriter.replaceOpWithNewOp<tosa::ConstOp>(op, rankedTensorType, denseAttr);
+    return success();
+  }
+};
+
 } // namespace
 
+void mlir::tosa::populateTosaConstantReduction(MLIRContext *ctx,
+                                               RewritePatternSet &patterns) {
+  patterns.add<ReduceConstantOptimization<ReduceAllOp>>(ctx);
+  patterns.add<ReduceConstantOptimization<ReduceAnyOp>>(ctx);
+  patterns.add<ReduceConstantOptimization<ReduceMaxOp>>(ctx);
+  patterns.add<ReduceConstantOptimization<ReduceMinOp>>(ctx);
+  patterns.add<ReduceConstantOptimization<ReduceProdOp>>(ctx);
+  patterns.add<ReduceConstantOptimization<ReduceSumOp>>(ctx);
+}
+
 void mlir::tosa::populateTosaFoldConstantTransposePatterns(
     MLIRContext *ctx, RewritePatternSet &patterns) {
   patterns.add<TosaFoldConstantTranspose>(ctx);
diff --git a/mlir/lib/Dialect/Tosa/Transforms/TosaLayerwiseConstantFoldPass.cpp b/mlir/lib/Dialect/Tosa/Transforms/TosaLayerwiseConstantFoldPass.cpp
@@ -52,6 +52,7 @@ struct TosaLayerwiseConstantFoldPass
 
     mlir::tosa::populateTosaFoldConstantReciprocalPatterns(ctx, patterns);
     mlir::tosa::populateTosaFoldConstantTransposePatterns(ctx, patterns);
+    mlir::tosa::populateTosaConstantReduction(ctx, patterns);
     populateTosaOpsCanonicalizationPatterns(ctx, patterns);
 
     if (applyPatternsAndFoldGreedily(func, std::move(patterns)).failed())
diff --git a/mlir/test/Dialect/Tosa/constant-op-fold.mlir b/mlir/test/Dialect/Tosa/constant-op-fold.mlir