nikic
diff --git a/‎mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h
Lines changed: 2 additions & 0 deletions b/‎mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h
Lines changed: 2 additions & 0 deletions
diff --git a/‎mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td
Lines changed: 0 additions & 4 deletions b/‎mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td
Lines changed: 0 additions & 4 deletions
diff --git a/‎mlir/include/mlir/Dialect/SparseTensor/Pipelines/Passes.h
Lines changed: 2 additions & 11 deletions b/‎mlir/include/mlir/Dialect/SparseTensor/Pipelines/Passes.h
Lines changed: 2 additions & 11 deletions
diff --git a/‎mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.h
Lines changed: 2 additions & 29 deletions b/‎mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.h
Lines changed: 2 additions & 29 deletions
diff --git a/‎mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.td
Lines changed: 0 additions & 4 deletions b/‎mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.td
Lines changed: 0 additions & 4 deletions
diff --git a/‎mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h
Lines changed: 70 additions & 0 deletions b/‎mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h
Lines changed: 70 additions & 0 deletions
diff --git a/‎mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp
Lines changed: 1 addition & 18 deletions b/‎mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp
Lines changed: 1 addition & 18 deletions
diff --git a/‎mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp
Lines changed: 3 additions & 3 deletions b/‎mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp
Lines changed: 3 additions & 3 deletions
diff --git a/‎mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp
Lines changed: 10 additions & 19 deletions b/‎mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp
Lines changed: 10 additions & 19 deletions
@@ -151,6 +151,8 @@ enum class Action : uint32_t {
   kToCOO = 5,
   kToIterator = 6,
   kPack = 7,
+  // Sort an unordered COO in place.
+  kSortCOOInPlace = 8,
 };
 
 /// This enum defines all the sparse representations supportable by
 
@@ -200,10 +200,6 @@ def SparseTensor_ConvertOp : SparseTensor_Op<"convert",
      // Whether the convert can be done by a single step (either a sort or a foreach),
      // or it would require a tmp buffer (sort, then foreach).
      bool directConvertable();
-
-     // Whether the convert is actually a sort coo
-     // TODO: The method will be removed when sort_coo operation is introduced.
-     bool isSortCOOConvert();
   }];
 
   let assemblyFormat = "$source attr-dict `:` type($source) `to` type($dest)";
 
@@ -88,6 +88,8 @@ struct SparseCompilerOptions
       *this, "enable-buffer-initialization",
       desc("Enable zero-initialization of memory buffers"), init(false)};
 
+  // TODO: Delete the option, it should also be false after switching to
+  // buffer-deallocation-pass
   PassOptions::Option<bool> createSparseDeallocs{
       *this, "create-sparse-deallocs",
       desc("Specify if the temporary buffers created by the sparse "
@@ -100,11 +102,6 @@ struct SparseCompilerOptions
       *this, "vl", desc("Set the vector length (0 disables vectorization)"),
       init(0)};
 
-  // These options must be kept in sync with `SparseTensorConversionBase`.
-  PassOptions::Option<int32_t> sparseToSparse{
-      *this, "s2s-strategy",
-      desc("Set the strategy for sparse-to-sparse conversion"), init(0)};
-
   // These options must be kept in sync with the `ConvertVectorToLLVM`
   // (defined in include/mlir/Dialect/SparseTensor/Pipelines/Passes.h).
   PassOptions::Option<bool> reassociateFPReductions{
@@ -174,12 +171,6 @@ struct SparseCompilerOptions
                                  enableRuntimeLibrary);
   }
 
-  /// Projects out the options for `createSparseTensorConversionPass`.
-  SparseTensorConversionOptions sparseTensorConversionOptions() const {
-    return SparseTensorConversionOptions(
-        sparseToSparseConversionStrategy(sparseToSparse));
-  }
-
   /// Projects out the options for `createConvertVectorToLLVMPass`.
   ConvertVectorToLLVMPassOptions lowerVectorToLLVMOptions() const {
     ConvertVectorToLLVMPassOptions opts{};
 
@@ -119,37 +119,11 @@ class SparseTensorTypeToPtrConverter : public TypeConverter {
   SparseTensorTypeToPtrConverter();
 };
 
-/// Defines a strategy for implementing sparse-to-sparse conversion.
-/// `kAuto` leaves it up to the compiler to automatically determine
-/// the method used.  `kViaCOO` converts the source tensor to COO and
-/// then converts the COO to the target format.  `kDirect` converts
-/// directly via the algorithm in <https://arxiv.org/abs/2001.02609>;
-/// however, beware that there are many formats not supported by this
-/// conversion method.
-enum class SparseToSparseConversionStrategy { kAuto, kViaCOO, kDirect };
-
-/// Converts command-line sparse2sparse flag to the strategy enum.
-SparseToSparseConversionStrategy sparseToSparseConversionStrategy(int32_t flag);
-
-/// SparseTensorConversion options.
-struct SparseTensorConversionOptions {
-  SparseTensorConversionOptions(SparseToSparseConversionStrategy s2s)
-      : sparseToSparseStrategy(s2s) {}
-  SparseTensorConversionOptions()
-      : SparseTensorConversionOptions(SparseToSparseConversionStrategy::kAuto) {
-  }
-  SparseToSparseConversionStrategy sparseToSparseStrategy;
-};
-
 /// Sets up sparse tensor conversion rules.
-void populateSparseTensorConversionPatterns(
-    TypeConverter &typeConverter, RewritePatternSet &patterns,
-    const SparseTensorConversionOptions &options =
-        SparseTensorConversionOptions());
+void populateSparseTensorConversionPatterns(TypeConverter &typeConverter,
+                                            RewritePatternSet &patterns);
 
 std::unique_ptr<Pass> createSparseTensorConversionPass();
-std::unique_ptr<Pass>
-createSparseTensorConversionPass(const SparseTensorConversionOptions &options);
 
 //===----------------------------------------------------------------------===//
 // The SparseTensorCodegen pass.
@@ -235,7 +209,6 @@ std::unique_ptr<Pass> createSparsificationAndBufferizationPass();
 std::unique_ptr<Pass> createSparsificationAndBufferizationPass(
     const bufferization::OneShotBufferizationOptions &bufferizationOptions,
     const SparsificationOptions &sparsificationOptions,
-    const SparseTensorConversionOptions &sparseTensorConversionOptions,
     bool createSparseDeallocs, bool enableRuntimeLibrary,
     bool enableBufferInitialization, unsigned vectorLength,
     bool enableVLAVectorization, bool enableSIMDIndex32);
 
@@ -201,10 +201,6 @@ def SparseTensorConversionPass : Pass<"sparse-tensor-conversion", "ModuleOp"> {
     "scf::SCFDialect",
     "sparse_tensor::SparseTensorDialect",
   ];
-  let options = [
-    Option<"sparseToSparse", "s2s-strategy", "int32_t", "0",
-           "Set the strategy for sparse-to-sparse conversion">,
-  ];
 }
 
 def SparseTensorCodegen : Pass<"sparse-tensor-codegen", "ModuleOp"> {
 
@@ -374,6 +374,19 @@ class SparseTensorStorage final : public SparseTensorStorageBase {
   /// Partially specialize lexicographical insertions based on template types.
   void lexInsert(const uint64_t *lvlCoords, V val) final {
     assert(lvlCoords && "Received nullptr for level-coordinates");
+    // TODO: get rid of this! canonicalize all-dense "sparse" array into dense
+    // tensors.
+    bool allDense = std::all_of(getLvlTypes().begin(), getLvlTypes().end(),
+                                [](DimLevelType lt) { return isDenseDLT(lt); });
+    if (allDense) {
+      uint64_t lvlRank = getLvlRank();
+      uint64_t valIdx = 0;
+      // Linearize the address
+      for (size_t lvl = 0; lvl < lvlRank; lvl++)
+        valIdx = valIdx * getLvlSize(lvl) + lvlCoords[lvl];
+      values[valIdx] = val;
+      return;
+    }
     // First, wrap up pending insertion path.
     uint64_t diffLvl = 0;
     uint64_t full = 0;
@@ -457,6 +470,63 @@ class SparseTensorStorage final : public SparseTensorStorageBase {
     return coo;
   }
 
+  /// Sort the unordered tensor in place, the method assumes that it is
+  /// an unordered COO tensor.
+  void sortInPlace() {
+    uint64_t nnz = values.size();
+#ifndef NDEBUG
+    for (uint64_t l = 0; l < getLvlRank(); l++)
+      assert(nnz == coordinates[l].size());
+#endif
+
+    // In-place permutation.
+    auto applyPerm = [this](std::vector<uint64_t> &perm) {
+      size_t length = perm.size();
+      size_t lvlRank = getLvlRank();
+      // Cache for the current level coordinates.
+      std::vector<P> lvlCrds(lvlRank);
+      for (size_t i = 0; i < length; i++) {
+        size_t current = i;
+        if (i != perm[current]) {
+          for (size_t l = 0; l < lvlRank; l++)
+            lvlCrds[l] = coordinates[l][i];
+          V val = values[i];
+          // Deals with a permutation cycle.
+          while (i != perm[current]) {
+            size_t next = perm[current];
+            // Swaps the level coordinates and value.
+            for (size_t l = 0; l < lvlRank; l++)
+              coordinates[l][current] = coordinates[l][next];
+            values[current] = values[next];
+            perm[current] = current;
+            current = next;
+          }
+          for (size_t l = 0; l < lvlRank; l++)
+            coordinates[l][current] = lvlCrds[l];
+          values[current] = val;
+          perm[current] = current;
+        }
+      }
+    };
+
+    std::vector<uint64_t> sortedIdx(nnz, 0);
+    for (uint64_t i = 0; i < nnz; i++)
+      sortedIdx[i] = i;
+
+    std::sort(sortedIdx.begin(), sortedIdx.end(),
+              [this](uint64_t lhs, uint64_t rhs) {
+                for (uint64_t l = 0; l < getLvlRank(); l++) {
+                  if (coordinates[l][lhs] == coordinates[l][rhs])
+                    continue;
+                  return coordinates[l][lhs] < coordinates[l][rhs];
+                }
+                assert(false && "duplicate coordinates");
+                return false;
+              });
+
+    applyPerm(sortedIdx);
+  }
+
 private:
   /// Appends an arbitrary new position to `positions[lvl]`.  This method
   /// checks that `pos` is representable in the `P` type; however, it
 
@@ -1060,20 +1060,12 @@ LogicalResult ConvertOp::verify() {
 }
 
 OpFoldResult ConvertOp::fold(FoldAdaptor adaptor) {
-  Type dstType = getType();
-  // Fold trivial dense-to-dense convert and leave trivial sparse-to-sparse
-  // convert for codegen to remove. This is because we use trivial
-  // sparse-to-sparse convert to tell bufferization that the sparse codegen
-  // will expand the tensor buffer into sparse tensor storage.
-  if (!getSparseTensorEncoding(dstType) && dstType == getSource().getType())
+  if (getType() == getSource().getType())
     return getSource();
   return {};
 }
 
 bool ConvertOp::directConvertable() {
-  if (isSortCOOConvert())
-    return false;
-
   SparseTensorType srcStt = getSparseTensorType(getSource());
   SparseTensorType dstStt = getSparseTensorType(getDest());
 
@@ -1099,15 +1091,6 @@ bool ConvertOp::directConvertable() {
   return false;
 }
 
-bool ConvertOp::isSortCOOConvert() {
-  // TODO: we should instead use a different sort_coo operation to handle
-  // the conversion between COOs (but with different ordering).
-  return isUniqueCOOType(getSource().getType()) &&
-         isUniqueCOOType(getDest().getType()) &&
-         !getSparseTensorType(getSource()).isAllOrdered() &&
-         getSparseTensorType(getDest()).isAllOrdered();
-}
-
 LogicalResult ToPositionsOp::verify() {
   auto e = getSparseTensorEncoding(getTensor().getType());
   if (failed(lvlIsInBounds(getLevel(), getTensor())))
 
@@ -35,9 +35,9 @@ void mlir::sparse_tensor::buildSparseCompiler(
   pm.addPass(createSparsificationAndBufferizationPass(
       getBufferizationOptionsForSparsification(
           options.testBufferizationAnalysisOnly),
-      options.sparsificationOptions(), options.sparseTensorConversionOptions(),
-      options.createSparseDeallocs, options.enableRuntimeLibrary,
-      options.enableBufferInitialization, options.vectorLength,
+      options.sparsificationOptions(), options.createSparseDeallocs,
+      options.enableRuntimeLibrary, options.enableBufferInitialization,
+      options.vectorLength,
       /*enableVLAVectorization=*/options.armSVE,
       /*enableSIMDIndex32=*/options.force32BitVectorIndices));
   if (options.testBufferizationAnalysisOnly)
 
@@ -680,31 +680,26 @@ class SparseDimOpConverter : public OpConversionPattern<tensor::DimOp> {
 };
 
 // TODO: use a new SortCOO operation here instead of reusing convert op.
-struct SparseSortCOOConverter : public OpConversionPattern<ConvertOp> {
+struct SparseReorderCOOConverter : public OpConversionPattern<ReorderCOOOp> {
   using OpConversionPattern::OpConversionPattern;
   LogicalResult
-  matchAndRewrite(ConvertOp op, ConvertOpAdaptor adaptor,
+  matchAndRewrite(ReorderCOOOp op, ReorderCOOOpAdaptor adaptor,
                   ConversionPatternRewriter &rewriter) const override {
-    // Direct conversion should have already been lowered.
-    if (!op.isSortCOOConvert())
-      return failure();
-
     Location loc = op.getLoc();
     MLIRContext *ctx = op.getContext();
 
-    SparseTensorType srcStt = getSparseTensorType(op.getSource());
-    SparseTensorType dstStt = getSparseTensorType(op.getDest());
+    SparseTensorType srcStt = getSparseTensorType(op.getInputCoo());
+    SparseTensorType dstStt = getSparseTensorType(op.getResultCoo());
 
-    // TODO: This should be verification rules for sort_coo operation.
+    // Should have been verified.
     assert(dstStt.isAllOrdered() && !srcStt.isAllOrdered() &&
            isUniqueCOOType(srcStt.getRankedTensorType()) &&
            isUniqueCOOType(dstStt.getRankedTensorType()));
-
     assert(dstStt.hasSameDimToLvl(srcStt));
 
     // We don't need a mutable descriptor here as we perform sorting in-place.
-    auto nnz = genValMemSize(rewriter, op.getLoc(), adaptor.getSource());
-    auto desc = getDescriptorFromTensorTuple(adaptor.getSource());
+    auto nnz = genValMemSize(rewriter, op.getLoc(), adaptor.getInputCoo());
+    auto desc = getDescriptorFromTensorTuple(adaptor.getInputCoo());
     auto crd = desc.getAOSMemRef();
     auto val = desc.getValMemRef();
 
@@ -715,12 +710,11 @@ struct SparseSortCOOConverter : public OpConversionPattern<ConvertOp> {
     auto id = AffineMap::getMultiDimIdentityMap(srcStt.getLvlRank(), ctx);
 
     rewriter.create<SortOp>(loc, nnz, crd, ValueRange{val}, id,
-                            rewriter.getIndexAttr(0),
-                            SparseTensorSortKind::HybridQuickSort);
+                            rewriter.getIndexAttr(0), op.getAlgorithm());
 
     // Since we do in-place sorting, the destinate tensor will have the same set
     // of memrefs as the source tensor.
-    rewriter.replaceOp(op, adaptor.getSource());
+    rewriter.replaceOp(op, adaptor.getInputCoo());
     return success();
   }
 };
@@ -1147,9 +1141,6 @@ class SparseConvertConverter : public OpConversionPattern<ConvertOp> {
   LogicalResult
   matchAndRewrite(ConvertOp op, OpAdaptor adaptor,
                   ConversionPatternRewriter &rewriter) const override {
-    if (op.isSortCOOConvert())
-      return failure();
-
     SparseTensorEncodingAttr encDst = getSparseTensorEncoding(op.getType());
     SparseTensorEncodingAttr encSrc =
         getSparseTensorEncoding(op.getSource().getType());
@@ -1603,7 +1594,7 @@ void mlir::populateSparseTensorCodegenPatterns(
                SparseCastConverter, SparseExtractSliceConverter,
                SparseTensorLoadConverter, SparseExpandConverter,
                SparseCompressConverter, SparseInsertConverter,
-               SparseSortCOOConverter,
+               SparseReorderCOOConverter,
                SparseSliceGetterOpConverter<ToSliceOffsetOp,
                                             StorageSpecifierKind::DimOffset>,
                SparseSliceGetterOpConverter<ToSliceStrideOp,