@@ -263,23 +263,15 @@ Value LoopEmitter::genSegmentHigh(OpBuilder &builder, Location loc,
263
263
}
264
264
265
265
Value LoopEmitter::genSparseCrd (OpBuilder &builder, Location loc, TensorId tid,
266
- Level dstLvl ) {
266
+ Level lvl ) {
267
267
Value crd = C_IDX (0 );
268
- const auto reassoc = getCollapseReassociation (tid, dstLvl);
269
- const unsigned reassocSize = reassoc.size ();
270
- for (unsigned i = 0 ; i < reassocSize; i++) {
271
- const Level srcLvl = reassoc[i];
272
- // A load on the coordinates array yields the coordinate.
273
- const Value mem = coordinatesBuffers[tid][srcLvl];
274
- // / FIXME: See the [CLARIFY_POSITS_LVL] note in the header.
275
- const Value pos = posits[tid][dstLvl];
276
- const Value off = genIndexLoad (builder, loc, mem, pos);
277
- // Linearized the coordinates within the same collapse reassociation.
278
- crd = ADDI (crd, off);
279
- if (i != reassocSize - 1 ) {
280
- crd = MULI (crd, this ->lvlSizes [tid][reassoc[i + 1 ]]);
281
- }
282
- }
268
+ // A load on the coordinates array yields the coordinate.
269
+ const Value mem = coordinatesBuffers[tid][lvl];
270
+ // / FIXME: See the [CLARIFY_POSITS_LVL] note in the header.
271
+ const Value pos = posits[tid][lvl];
272
+ const Value off = genIndexLoad (builder, loc, mem, pos);
273
+ // Linearized the coordinates within the same collapse reassociation.
274
+ crd = ADDI (crd, off);
283
275
return crd;
284
276
}
285
277
@@ -312,7 +304,6 @@ void LoopEmitter::initialize(ValueRange ts, StringAttr loopTag, bool hasOutput,
312
304
this ->positionsBuffers .assign (numTensors, std::vector<Value>());
313
305
this ->coordinatesBuffers .assign (numTensors, std::vector<Value>());
314
306
this ->valBuffer .assign (numTensors, nullptr );
315
- this ->collapseReassoc .assign (numTensors, nullptr );
316
307
this ->isSparseSlices .assign (numTensors, false );
317
308
this ->sliceOffsets .assign (numTensors, std::vector<Value>());
318
309
this ->sliceStrides .assign (numTensors, std::vector<Value>());
@@ -348,16 +339,6 @@ void LoopEmitter::initialize(ValueRange ts, StringAttr loopTag, bool hasOutput,
348
339
continue ;
349
340
350
341
auto rtp = getRankedTensorType (t);
351
- if (auto reshape = t.getDefiningOp <tensor::CollapseShapeOp>();
352
- isUniqueCOOType (rtp) && reshape) {
353
- // TODO: Supports more kinds of sparse tensors.
354
- // FIXME: We should instead lower reshape operations on sparse tensors
355
- // to view change.
356
- collapseReassoc[tid] = reshape.getReassociation ();
357
- rtp = reshape.getSrcType ();
358
- // Overwrites the tensor to the source tensor of reshape operations.
359
- tensors[tid] = reshape.getSrc ();
360
- }
361
342
const SparseTensorType stt (rtp);
362
343
lvlRank = stt.getLvlRank ();
363
344
@@ -394,16 +375,11 @@ void LoopEmitter::initialize(ValueRange ts, StringAttr loopTag, bool hasOutput,
394
375
/* offset=*/ Value (), /* isNonEmpty*/ Value (),
395
376
std::nullopt, 0 );
396
377
if (dimGetter && !isSynTensor (tid)) {
397
- auto reassoc = collapseReassoc[tid];
398
- Level dstRank = reassoc ? reassoc.size () : lvlRank;
399
- for (Level l = 0 ; l < dstRank; l++) {
378
+ for (Level l = 0 ; l < lvlRank; l++) {
400
379
dependentLvlMap[tid][l] = dimGetter (tid, l);
401
380
unsigned depends = dependentLvlMap[tid][l].size ();
402
381
if (depends == 0 )
403
382
continue ;
404
- // TODO: View-base collapse and dependent index reduction are not
405
- // compatible right now.
406
- assert (!reassoc);
407
383
// We need `depends - 1` slices to fully the affine expression.
408
384
sliceSizes[tid][l].assign (depends - 1 , nullptr );
409
385
slicePosBuffer[tid][l].assign (depends - 1 , nullptr );
@@ -645,23 +621,18 @@ Value LoopEmitter::genAffine(OpBuilder &builder, Location loc, AffineExpr a) {
645
621
}
646
622
647
623
std::pair<Operation *, Value> LoopEmitter::emitForLoopOverTensorAtLvl (
648
- OpBuilder &builder, Location loc, TensorId tid, Level dstLvl , Value lo,
624
+ OpBuilder &builder, Location loc, TensorId tid, Level lvl , Value lo,
649
625
Value hi, MutableArrayRef<Value> reduc, bool isParallel) {
650
- bool isSparseCond = isCompressedDLT (lvlTypes[tid][dstLvl]) ||
651
- isCompressedWithHiDLT (lvlTypes[tid][dstLvl]) ||
652
- isSingletonDLT (lvlTypes[tid][dstLvl]);
653
-
654
- const auto reassoc = getCollapseReassociation (tid, dstLvl);
626
+ bool isSparseCond = isCompressedDLT (lvlTypes[tid][lvl]) ||
627
+ isCompressedWithHiDLT (lvlTypes[tid][lvl]) ||
628
+ isSingletonDLT (lvlTypes[tid][lvl]);
655
629
// TODO: support dynamic slices.
656
630
// Uses the first dimension here to build the loop bound (which is also the
657
631
// biggest range).
658
- const Level srcLvl = reassoc.front ();
659
632
Value step = C_IDX (1 );
660
-
661
633
Operation *loop = nullptr ;
662
634
Value iv;
663
635
if (isParallel) {
664
- assert (collapseReassoc[tid] == nullptr );
665
636
scf::ParallelOp parOp =
666
637
builder.create <scf::ParallelOp>(loc, lo, hi, step, reduc);
667
638
builder.setInsertionPointToStart (parOp.getBody ());
@@ -693,12 +664,10 @@ std::pair<Operation *, Value> LoopEmitter::emitForLoopOverTensorAtLvl(
693
664
694
665
Value crd;
695
666
if (isSparseCond) {
696
- assert (reassoc.size () == 1 || isUniqueCOOType (tensors[tid].getType ()));
697
667
// For COO, the position is the same across consecutive levels.
698
668
// / FIXME: See the [CLARIFY_POSITS_LVL] note in the header.
699
- llvm::for_each (reassoc,
700
- [this , tid, iv](Level srcLvl) { posits[tid][srcLvl] = iv; });
701
- crd = genSparseCrd (builder, loc, tid, dstLvl);
669
+ posits[tid][lvl] = iv;
670
+ crd = genSparseCrd (builder, loc, tid, lvl);
702
671
} else {
703
672
// Dense tensor, the coordinate is the inducation variable.
704
673
crd = iv;
@@ -711,7 +680,7 @@ std::pair<Operation *, Value> LoopEmitter::emitForLoopOverTensorAtLvl(
711
680
for (Value red : reduc)
712
681
types.push_back (red.getType ());
713
682
714
- auto [trans, pred] = genSliceLegitPredicate (builder, loc, crd, tid, srcLvl );
683
+ auto [trans, pred] = genSliceLegitPredicate (builder, loc, crd, tid, lvl );
715
684
bool hasReduc = !types.empty ();
716
685
scf::IfOp ifOp = builder.create <scf::IfOp>(loc, types, pred,
717
686
/* else*/ hasReduc);
@@ -733,7 +702,7 @@ std::pair<Operation *, Value> LoopEmitter::emitForLoopOverTensorAtLvl(
733
702
}
734
703
735
704
assert (crd);
736
- coords[tid][dstLvl ] = crd;
705
+ coords[tid][lvl ] = crd;
737
706
return {loop, crd};
738
707
}
739
708
@@ -743,11 +712,9 @@ Value LoopEmitter::genWhileLoopConditions(OpBuilder &builder, Location loc,
743
712
744
713
switch (cond.second ) {
745
714
case LoopCondKind::SparseCond: {
746
- const auto reassoc = getCollapseReassociation (tid, lvl);
747
- assert (reassoc.size () == ivs.size ());
748
- assert (reassoc.size () == 1 || isUniqueCOOType (tensors[tid].getType ()));
715
+ assert (ivs.size () == 1 );
749
716
// We used the first level bound as the bound the collapsed set of levels.
750
- return CMPI (ult, ivs.back (), highs[tid][reassoc. front () ]);
717
+ return CMPI (ult, ivs.back (), highs[tid][lvl ]);
751
718
}
752
719
case LoopCondKind::SparseSliceCond: {
753
720
assert (ivs.size () == 1 );
@@ -787,17 +754,9 @@ std::optional<Value> LoopEmitter::genWhileLoopBody(OpBuilder &builder,
787
754
788
755
switch (cond.second ) {
789
756
case LoopCondKind::SparseCond: {
790
- const auto reassoc = getCollapseReassociation (tid, lvl);
791
- assert (reassoc.size () == 1 || isUniqueCOOType (tensors[tid].getType ()));
792
- // Links the SSA chain for segHi.
793
- for (unsigned i = 0 , e = reassoc.size () - 1 ; i < e; i++)
794
- if (!isUniqueDLT (lvlTypes[tid][reassoc[i]]))
795
- segHi[tid][reassoc[i]] = ivs[i];
796
-
797
757
// Updates position. For collapsed COO, the position is the same across
798
758
// consecutive levels.
799
- for (auto srcLvl : reassoc)
800
- posits[tid][srcLvl] = ivs.back ();
759
+ posits[tid][lvl] = ivs.back ();
801
760
802
761
// Update coordinates.
803
762
coords[tid][lvl] = genSparseCrd (builder, loc, tid, lvl);
@@ -883,11 +842,9 @@ std::pair<Operation *, Value> LoopEmitter::emitWhileLoopOverTensorsAtLvls(
883
842
(void )lvlTp;
884
843
885
844
unsigned prevSz = ivs.size ();
886
- const auto reassoc = getCollapseReassociation (tid, lvl);
887
845
if (isAffineIdxCond (cKind)) {
888
846
// TODO: Support view-based reshape on sparse levels with affine index
889
847
// expressions.
890
- assert (reassoc.size () == 1 );
891
848
if (isAffineIdxUnRedCond (cKind)) {
892
849
SliceInfo &sliceInfo = sliceStack[tid].back ();
893
850
// The order matters!
@@ -901,12 +858,7 @@ std::pair<Operation *, Value> LoopEmitter::emitWhileLoopOverTensorsAtLvls(
901
858
levelReducedDep[tid][lvl]++;
902
859
} else {
903
860
assert (dependentLvlMap[tid][lvl].empty ());
904
- for (unsigned i = 0 , e = reassoc.size () - 1 ; i < e; i++) {
905
- // This is the segment high for each non-unique levels.
906
- if (!isUniqueDLT (lvlTypes[tid][reassoc[i]]))
907
- ivs.push_back (C_IDX (0 ));
908
- }
909
- const Value pos = posits[tid][reassoc.front ()];
861
+ const Value pos = posits[tid][lvl];
910
862
ivs.push_back (pos);
911
863
}
912
864
opSegSize.push_back (ivs.size () - prevSz);
@@ -985,49 +937,11 @@ std::pair<Operation *, Value> LoopEmitter::emitWhileLoopOverTensorsAtLvls(
985
937
builder.setInsertionPointToStart (&ifOp.getThenRegion ().front ());
986
938
}
987
939
988
- for (auto [tid, dstLvl] : unpackTensorLevelFromCondRange (spConds)) {
989
- const auto reassoc = getCollapseReassociation (tid, dstLvl);
990
- assert (reassoc.size () == 1 || isUniqueCOOType (tensors[tid].getType ()));
991
- // TODO: Refactors this into smaller functions.
992
- // NOTE: For all the collapsed level (except for the last one, that is why
993
- // the loop ends with `reassoc.size() - 1`), as each iteration is advanced
994
- // by the segment size of the last level, which does not always invalidate
995
- // the segment size for the previous levels, thus we need to propagate the
996
- // segment sizes across loop iterations and only forward if needed.
997
- //
998
- // E.g., for a COO tensor with the following coordinates array.
999
- // (0, 0, 1),
1000
- // (0, 0, 2),
1001
- // (1, 1, 1),
1002
- // segHi[lvl=0] = segHi[lvl=1] = 2
1003
- // segHi[lvl=2] = 1,
1004
- // the first iteration does not invalidate segHi[0] and segHi[1]
1005
- for (unsigned i = 0 , e = reassoc.size () - 1 ; i < e; i++) {
1006
- const Level srcLvl = reassoc[i];
1007
- if (!isUniqueDLT (lvlTypes[tid][srcLvl])) {
1008
- const Value pos = posits[tid][srcLvl];
1009
- const auto oldSegHi = segHi[tid][srcLvl];
1010
- assert (oldSegHi);
1011
- Value newSegHi = builder.create <arith::CmpIOp>(
1012
- loc, arith::CmpIPredicate::uge, pos, oldSegHi);
1013
- auto ifNewSegHi = builder.create <scf::IfOp>(loc, builder.getIndexType (),
1014
- newSegHi, true );
1015
- {
1016
- OpBuilder::InsertionGuard guard (builder);
1017
- builder.setInsertionPointToStart (ifNewSegHi.thenBlock ());
1018
- YIELD (genSegmentHigh (builder, loc, tid, srcLvl, pos,
1019
- highs[tid][srcLvl]));
1020
- // Else, resues the same segment high.
1021
- builder.setInsertionPointToStart (ifNewSegHi.elseBlock ());
1022
- YIELD (oldSegHi);
1023
- }
1024
- highs[tid][srcLvl + 1 ] = segHi[tid][srcLvl] = ifNewSegHi.getResult (0 );
1025
- }
1026
- };
1027
- const auto srcLvl = reassoc.back ();
1028
- if (!isUniqueDLT (lvlTypes[tid][srcLvl])) {
1029
- segHi[tid][srcLvl] = genSegmentHigh (
1030
- builder, loc, tid, srcLvl, posits[tid][srcLvl], highs[tid][srcLvl]);
940
+ for (auto [tid, lvl] : unpackTensorLevelFromCondRange (spConds)) {
941
+ // Generates segment high for non-unique level.
942
+ if (!isUniqueDLT (lvlTypes[tid][lvl])) {
943
+ segHi[tid][lvl] = genSegmentHigh (builder, loc, tid, lvl, posits[tid][lvl],
944
+ highs[tid][lvl]);
1031
945
}
1032
946
}
1033
947
@@ -1074,9 +988,8 @@ bool LoopEmitter::shouldIteratedByForLoop(ArrayRef<TensorLvlCond> sparseConds,
1074
988
// non-unique levels when deduplication is required.
1075
989
if (sparseConds.size () == 1 ) {
1076
990
auto [tid, lvl] = unpackTensorLevel (sparseConds.back ().first );
1077
- auto reassoc = getCollapseReassociation (tid, lvl);
1078
991
return !isAffineIdxCond (sparseConds.back ().second ) &&
1079
- !(genDedup && !isUniqueDLT (lvlTypes[tid][reassoc. back () ]));
992
+ !(genDedup && !isUniqueDLT (lvlTypes[tid][lvl ]));
1080
993
}
1081
994
1082
995
return true ;
@@ -1245,50 +1158,45 @@ void LoopEmitter::genDenseAffineAddress(OpBuilder &builder, Location loc,
1245
1158
}
1246
1159
1247
1160
void LoopEmitter::prepareLoopOverTensorAtLvl (OpBuilder &builder, Location loc,
1248
- TensorId tid, Level dstLvl ) {
1249
- assert (isValidLevel (tid, dstLvl ));
1250
- const auto lvlTp = lvlTypes[tid][dstLvl ];
1161
+ TensorId tid, Level lvl ) {
1162
+ assert (isValidLevel (tid, lvl ));
1163
+ const auto lvlTp = lvlTypes[tid][lvl ];
1251
1164
1252
1165
if (isDenseDLT (lvlTp))
1253
1166
return ;
1254
1167
1255
1168
const Value c0 = C_IDX (0 );
1256
1169
const Value c1 = C_IDX (1 );
1257
- for (const Level srcLvl : getCollapseReassociation (tid, dstLvl)) {
1258
- // Either the first level, or the previous level has been set.
1259
- // / FIXME: See the [CLARIFY_POSITS_LVL] note in the header.
1260
- assert (srcLvl == 0 || posits[tid][srcLvl - 1 ]);
1261
- if (isDenseDLT (lvlTp))
1262
- continue ;
1263
- if (isCompressedDLT (lvlTp) || isCompressedWithHiDLT (lvlTp)) {
1264
- const Value mem = positionsBuffers[tid][srcLvl];
1265
-
1266
- Value pLo = srcLvl == 0 ? c0 : posits[tid][srcLvl - 1 ];
1267
- if (isCompressedWithHiDLT (lvlTp))
1268
- pLo = builder.create <arith::MulIOp>(loc, pLo, C_IDX (2 ));
1269
- posits[tid][srcLvl] = genIndexLoad (builder, loc, mem, pLo);
1270
-
1271
- const Value pHi = ADDI (pLo, c1);
1272
- highs[tid][srcLvl] = genIndexLoad (builder, loc, mem, pHi);
1273
- return ;
1274
- }
1275
- if (isSingletonDLT (lvlTp)) {
1276
- const Value pLo = srcLvl == 0 ? c0 : posits[tid][srcLvl - 1 ];
1277
- posits[tid][srcLvl] = pLo;
1278
-
1279
- // If we are coiterating non-unique levels, then use pHi=segHi;
1280
- // otherwise use pHi=pLo+1.
1281
- // NOTE: Just because the level is non-unique, that does not
1282
- // guarantee that segHi is defined: because we only generate segHi
1283
- // whenever coiterating, in order to improve code quality for the
1284
- // non-coiterating cases.
1285
- const auto parentSegHi = segHi[tid][srcLvl - 1 ];
1286
- highs[tid][srcLvl] =
1287
- (!isUniqueDLT (lvlTypes[tid][srcLvl - 1 ]) && parentSegHi)
1288
- ? parentSegHi
1289
- : ADDI (pLo, c1);
1290
- return ;
1291
- }
1170
+ // Either the first level, or the previous level has been set.
1171
+ // / FIXME: See the [CLARIFY_POSITS_LVL] note in the header.
1172
+ assert (lvl == 0 || posits[tid][lvl - 1 ]);
1173
+ if (isCompressedDLT (lvlTp) || isCompressedWithHiDLT (lvlTp)) {
1174
+ const Value mem = positionsBuffers[tid][lvl];
1175
+
1176
+ Value pLo = lvl == 0 ? c0 : posits[tid][lvl - 1 ];
1177
+ if (isCompressedWithHiDLT (lvlTp))
1178
+ pLo = builder.create <arith::MulIOp>(loc, pLo, C_IDX (2 ));
1179
+ posits[tid][lvl] = genIndexLoad (builder, loc, mem, pLo);
1180
+
1181
+ const Value pHi = ADDI (pLo, c1);
1182
+ highs[tid][lvl] = genIndexLoad (builder, loc, mem, pHi);
1183
+ return ;
1184
+ }
1185
+ if (isSingletonDLT (lvlTp)) {
1186
+ const Value pLo = lvl == 0 ? c0 : posits[tid][lvl - 1 ];
1187
+ posits[tid][lvl] = pLo;
1188
+
1189
+ // If we are coiterating non-unique levels, then use pHi=segHi;
1190
+ // otherwise use pHi=pLo+1.
1191
+ // NOTE: Just because the level is non-unique, that does not
1192
+ // guarantee that segHi is defined: because we only generate segHi
1193
+ // whenever coiterating, in order to improve code quality for the
1194
+ // non-coiterating cases.
1195
+ const auto parentSegHi = segHi[tid][lvl - 1 ];
1196
+ highs[tid][lvl] = (!isUniqueDLT (lvlTypes[tid][lvl - 1 ]) && parentSegHi)
1197
+ ? parentSegHi
1198
+ : ADDI (pLo, c1);
1199
+ return ;
1292
1200
}
1293
1201
1294
1202
llvm_unreachable (" Unrecognized level-type!" );
@@ -1542,28 +1450,18 @@ void LoopEmitter::exitWhileLoop(OpBuilder &builder, Location loc,
1542
1450
posits[tid][lvl] = whileOp->getResult (o++);
1543
1451
};
1544
1452
1545
- for (auto [tid, dstLvl ] : unpackTensorLevelRange (loopInfo.trivialTidLvls )) {
1546
- const auto lvlTp = lvlTypes[tid][dstLvl ];
1453
+ for (auto [tid, lvl ] : unpackTensorLevelRange (loopInfo.trivialTidLvls )) {
1454
+ const auto lvlTp = lvlTypes[tid][lvl ];
1547
1455
if (isCompressedDLT (lvlTp) || isSingletonDLT (lvlTp) ||
1548
1456
isCompressedWithHiDLT (lvlTp)) {
1549
- const auto reassoc = getCollapseReassociation (tid, dstLvl);
1550
- assert (reassoc.size () == 1 || isUniqueCOOType (tensors[tid].getType ()));
1551
- for (unsigned i = 0 , e = reassoc.size () - 1 ; i < e; i++) {
1552
- const Level srcLvl = reassoc[i];
1553
- if (!isUniqueDLT (lvlTypes[tid][srcLvl])) {
1554
- operands.push_back (segHi[tid][srcLvl]);
1555
- o++;
1556
- }
1557
- }
1558
- const Value crd = coords[tid][dstLvl];
1559
- const Value pos = posits[tid][dstLvl];
1457
+ const Value crd = coords[tid][lvl];
1458
+ const Value pos = posits[tid][lvl];
1560
1459
Value cmp = CMPI (eq, crd, iv);
1561
1460
// If the loop contains a coiteration with non-unique level, we fast
1562
1461
// forward all the duplicated coords by setting the position to the
1563
1462
// segment high.
1564
- Value add = !isUniqueDLT (lvlTypes[tid][reassoc.back ()])
1565
- ? segHi[tid][reassoc.back ()]
1566
- : ADDI (pos, one);
1463
+ Value add =
1464
+ !isUniqueDLT (lvlTypes[tid][lvl]) ? segHi[tid][lvl] : ADDI (pos, one);
1567
1465
1568
1466
operands.push_back (SELECT (cmp, add, pos));
1569
1467
// Following loops continue iteration from the break point of the
@@ -1573,14 +1471,12 @@ void LoopEmitter::exitWhileLoop(OpBuilder &builder, Location loc,
1573
1471
// warnings about "captured structured bindings are a C++20 extension".
1574
1472
// FIXME(wrengr): define a helper function to capture this idiom!
1575
1473
const TensorId newTid = tid;
1576
- llvm::for_each (reassoc, [this , newTid, newPos](Level srcLvl) {
1577
- posits[newTid][srcLvl] = newPos;
1578
- });
1474
+ posits[newTid][lvl] = newPos;
1579
1475
1580
1476
// The coordinate is invalid now.
1581
- coords[tid][dstLvl ] = nullptr ;
1477
+ coords[tid][lvl ] = nullptr ;
1582
1478
// The segment high is invalid now.
1583
- segHi[tid][dstLvl ] = nullptr ;
1479
+ segHi[tid][lvl ] = nullptr ;
1584
1480
// highs remains unchanged.
1585
1481
}
1586
1482
}
0 commit comments