Skip to content

Commit 0165f88

Browse files
authored
[RISCV] Fix the worst case for VSHA2MS in SiFive P400/P600 scheduling models (llvm#116893)
For each RVV instruction we should have a single WriteRes assignment to the worst case scheduling class. This assignment is usually equal to that of the largest LMUL + smallest SEW. My llvm#114317 accidentally made two of these assignments on `WriteVSHA2MSV_WorstCase`. This won't affect our MachineScheduler nor most of our llvm-mca use cases (assuming you populate the correct LMUL and SEW), yet it's not ideal either. This patch fixes this issue by assigning the correct numbers and resource mapping to `WriteVSHA2MSV_WorstCase`, which is equal to that of largest LMUL + _largest_ SEW (Zvknh's scheduling properties are special). I also added a MCA test to make sure we always pick up the correct worst case numbers for P600's scheduling model. Original issue was reported by @reidtatge
1 parent 040f1c7 commit 0165f88

File tree

3 files changed

+31
-11
lines changed

3 files changed

+31
-11
lines changed

llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td

+10-2
Original file line numberDiff line numberDiff line change
@@ -883,8 +883,16 @@ foreach mx = SchedMxList in {
883883
let Latency = 3, ReleaseAtCycles = [LMulLat] in {
884884
defm "" : LMULWriteResMX<"WriteVSHA2CHV", [SiFiveP400VEXQ0], mx, IsWorstCase>;
885885
defm "" : LMULWriteResMX<"WriteVSHA2CLV", [SiFiveP400VEXQ0], mx, IsWorstCase>;
886-
foreach sew = !listremove(SchedSEWSet<mx>.val, [8, 16]) in
887-
defm "" : LMULSEWWriteResMXSEW<"WriteVSHA2MSV", [SiFiveP400VEXQ0], mx, sew, IsWorstCase>;
886+
defvar ZvknhSEWs = !listremove(SchedSEWSet<mx>.val, [8, 16]);
887+
// Largest SEW is the last element, assuming SchedSEWSet is sorted in ascending
888+
// order.
889+
defvar LargestZvknhSEW = !foldl(!head(ZvknhSEWs), ZvknhSEWs, last, curr, curr);
890+
foreach sew = ZvknhSEWs in {
891+
// The worst case for Zvknh[ab] is designated to the largest SEW and LMUL.
892+
defvar IsWorstCaseVSHA2MSV = !and(IsWorstCase, !eq(sew, LargestZvknhSEW));
893+
defm "" : LMULSEWWriteResMXSEW<"WriteVSHA2MSV", [SiFiveP400VEXQ0], mx, sew,
894+
IsWorstCaseVSHA2MSV>;
895+
}
888896
}
889897
// Zvkned
890898
let Latency = 2, ReleaseAtCycles = [LMulLat] in {

llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td

+9-2
Original file line numberDiff line numberDiff line change
@@ -1135,9 +1135,16 @@ foreach mx = SchedMxList in {
11351135
let Latency = 3, ReleaseAtCycles = [LMulLat] in {
11361136
defm "" : LMULWriteResMX<"WriteVSHA2CHV", [SiFiveP600VectorCrypto], mx, IsWorstCase>;
11371137
defm "" : LMULWriteResMX<"WriteVSHA2CLV", [SiFiveP600VectorCrypto], mx, IsWorstCase>;
1138-
foreach sew = !listremove(SchedSEWSet<mx>.val, [8, 16]) in {
1138+
defvar ZvknhSEWs = !listremove(SchedSEWSet<mx>.val, [8, 16]);
1139+
// Largest SEW is the last element, assuming SchedSEWSet is sorted in ascending
1140+
// order.
1141+
defvar LargestZvknhSEW = !foldl(!head(ZvknhSEWs), ZvknhSEWs, last, curr, curr);
1142+
foreach sew = ZvknhSEWs in {
1143+
// The worst case for Zvknh[ab] is designated to the largest SEW and LMUL.
1144+
defvar IsWorstCaseVSHA2MSV = !and(IsWorstCase, !eq(sew, LargestZvknhSEW));
11391145
let ReleaseAtCycles = [SiFiveP600VSHA2MSCycles<mx, sew>.c] in
1140-
defm "" : LMULSEWWriteResMXSEW<"WriteVSHA2MSV", [SiFiveP600VectorCrypto], mx, sew, IsWorstCase>;
1146+
defm "" : LMULSEWWriteResMXSEW<"WriteVSHA2MSV", [SiFiveP600VectorCrypto], mx, sew,
1147+
IsWorstCaseVSHA2MSV>;
11411148
}
11421149
}
11431150
// Zvkned

llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvknhb.s

+12-7
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
22
# RUN: llvm-mca -mtriple=riscv64 -mcpu=sifive-p670 -iterations=1 < %s | FileCheck %s
33

4+
# Worst case for vsha2ms should be that of LMUL=8 and SEW=64.
5+
vsha2ms.vv v4, v8, v12
6+
47
# SEW is only e32 or e64
58

69
vsetvli zero, zero, e32, m1, tu, mu
@@ -44,14 +47,14 @@ vsha2ch.vv v8, v16, v24
4447
vsha2cl.vv v8, v16, v24
4548

4649
# CHECK: Iterations: 1
47-
# CHECK-NEXT: Instructions: 32
48-
# CHECK-NEXT: Total Cycles: 108
49-
# CHECK-NEXT: Total uOps: 32
50+
# CHECK-NEXT: Instructions: 33
51+
# CHECK-NEXT: Total Cycles: 119
52+
# CHECK-NEXT: Total uOps: 33
5053

5154
# CHECK: Dispatch Width: 4
52-
# CHECK-NEXT: uOps Per Cycle: 0.30
53-
# CHECK-NEXT: IPC: 0.30
54-
# CHECK-NEXT: Block RThroughput: 97.0
55+
# CHECK-NEXT: uOps Per Cycle: 0.28
56+
# CHECK-NEXT: IPC: 0.28
57+
# CHECK-NEXT: Block RThroughput: 109.0
5558

5659
# CHECK: Instruction Info:
5760
# CHECK-NEXT: [1]: #uOps
@@ -62,6 +65,7 @@ vsha2cl.vv v8, v16, v24
6265
# CHECK-NEXT: [6]: HasSideEffects (U)
6366

6467
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
68+
# CHECK-NEXT: 1 3 12.00 vsha2ms.vv v4, v8, v12
6569
# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m1, tu, mu
6670
# CHECK-NEXT: 1 3 1.00 vsha2ms.vv v4, v8, v12
6771
# CHECK-NEXT: 1 3 1.00 vsha2ch.vv v4, v8, v12
@@ -115,10 +119,11 @@ vsha2cl.vv v8, v16, v24
115119

116120
# CHECK: Resource pressure per iteration:
117121
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8.0] [8.1] [9] [10] [11] [12] [13] [14]
118-
# CHECK-NEXT: - - - - 8.00 - - - - - - 97.00 - - - -
122+
# CHECK-NEXT: - - - - 8.00 - - - - - - 109.00 - - - -
119123

120124
# CHECK: Resource pressure by instruction:
121125
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8.0] [8.1] [9] [10] [11] [12] [13] [14] Instructions:
126+
# CHECK-NEXT: - - - - - - - - - - - 12.00 - - - - vsha2ms.vv v4, v8, v12
122127
# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m1, tu, mu
123128
# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vsha2ms.vv v4, v8, v12
124129
# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vsha2ch.vv v4, v8, v12

0 commit comments

Comments
 (0)