Skip to content

Commit 7832d0f

Browse files
committed
[mlir] [VectorOps] [integration_test] Sparse matrix times vector (DOT version)
Integration test that illustrates the gather operation with a real-world operation expressed in mostly the Vector dialect. Uses jagged diagonal storage. Reviewed By: bondhugula Differential Revision: https://reviews.llvm.org/D84571
1 parent 2a67276 commit 7832d0f

File tree

1 file changed

+270
-0
lines changed

1 file changed

+270
-0
lines changed
Lines changed: 270 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,270 @@
1+
// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
2+
// RUN: mlir-cpu-runner -e entry -entry-point-result=void \
3+
// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
4+
// RUN: FileCheck %s
5+
6+
// Illustrates an 8x8 Sparse Matrix x Vector implemented with only operations
7+
// of the vector dialect (and some std/scf). Essentially, this example performs
8+
// the following multiplication:
9+
//
10+
// 0 1 2 3 4 5 6 7
11+
// +------------------------+
12+
// 0 | 1 0 2 0 0 1 0 1 | | 1 | | 21 |
13+
// 1 | 1 8 0 0 3 0 1 0 | | 2 | | 39 |
14+
// 2 | 0 0 1 0 0 2 6 2 | | 3 | | 73 |
15+
// 3 | 0 3 0 1 0 1 0 1 | x | 4 | = | 24 |
16+
// 4 | 5 0 0 1 1 1 0 0 | | 5 | | 20 |
17+
// 5 | 0 3 0 0 2 1 2 0 | | 6 | | 36 |
18+
// 6 | 4 0 7 0 1 0 1 0 | | 7 | | 37 |
19+
// 7 | 0 3 0 2 0 0 1 1 | | 8 | | 29 |
20+
// +------------------------+
21+
//
22+
// The sparse storage scheme used is an extended column scheme (also referred
23+
// to as jagged diagonal, which is essentially a vector friendly variant of
24+
// the general sparse row-wise scheme (also called compressed row storage),
25+
// using fixed length vectors and no explicit pointer indexing into the
26+
// value array to find the rows.
27+
//
28+
// The extended column storage for the matrix shown above is as follows.
29+
//
30+
// VALUE INDEX
31+
// +---------+ +---------+
32+
// 0 | 1 2 1 1 | | 0 2 5 7 |
33+
// 1 | 1 8 3 1 | | 0 1 4 6 |
34+
// 2 | 1 2 6 2 | | 2 5 6 7 |
35+
// 3 | 3 1 1 1 | | 1 3 5 7 |
36+
// 4 | 5 1 1 1 | | 0 3 4 5 |
37+
// 5 | 3 2 1 2 | | 1 4 5 6 |
38+
// 6 | 4 7 1 1 | | 0 2 4 6 |
39+
// 7 | 3 2 1 1 | | 1 3 6 7 |
40+
// +---------+ +---------+
41+
//
42+
// This example illustrates a DOT version for the operation. Another example
43+
// in this directory illustrates an effective SAXPY version that operates on the
44+
// transposed jagged diagonal storage to obtain higher vector lengths.
45+
46+
#contraction_accesses = [
47+
affine_map<(i) -> (i)>,
48+
affine_map<(i) -> (i)>,
49+
affine_map<(i) -> ()>
50+
]
51+
#dot_trait = {
52+
indexing_maps = #contraction_accesses,
53+
iterator_types = ["reduction"]
54+
}
55+
56+
func @spmv8x8(%AVAL: memref<8xvector<4xf32>>,
57+
%AIDX: memref<8xvector<4xi32>>, %X: memref<?xf32>, %B: memref<?xf32>) {
58+
%c0 = constant 0 : index
59+
%c1 = constant 1 : index
60+
%cn = constant 8 : index
61+
%f0 = constant 0.0 : f32
62+
%mask = vector.constant_mask [4] : vector<4xi1>
63+
scf.for %i = %c0 to %cn step %c1 {
64+
%aval = load %AVAL[%i] : memref<8xvector<4xf32>>
65+
%aidx = load %AIDX[%i] : memref<8xvector<4xi32>>
66+
%0 = vector.gather %X, %aidx, %mask
67+
: (memref<?xf32>, vector<4xi32>, vector<4xi1>) -> vector<4xf32>
68+
%1 = vector.contract #dot_trait %aval, %0, %f0 : vector<4xf32>, vector<4xf32> into f32
69+
store %1, %B[%i] : memref<?xf32>
70+
}
71+
return
72+
}
73+
74+
func @entry() {
75+
%c0 = constant 0 : index
76+
%c1 = constant 1 : index
77+
%c2 = constant 2 : index
78+
%c3 = constant 3 : index
79+
%c4 = constant 4 : index
80+
%c5 = constant 5 : index
81+
%c6 = constant 6 : index
82+
%c7 = constant 7 : index
83+
%c8 = constant 8 : index
84+
85+
%f0 = constant 0.0 : f32
86+
%f1 = constant 1.0 : f32
87+
%f2 = constant 2.0 : f32
88+
%f3 = constant 3.0 : f32
89+
%f4 = constant 4.0 : f32
90+
%f5 = constant 5.0 : f32
91+
%f6 = constant 6.0 : f32
92+
%f7 = constant 7.0 : f32
93+
%f8 = constant 8.0 : f32
94+
95+
%i0 = constant 0 : i32
96+
%i1 = constant 1 : i32
97+
%i2 = constant 2 : i32
98+
%i3 = constant 3 : i32
99+
%i4 = constant 4 : i32
100+
%i5 = constant 5 : i32
101+
%i6 = constant 6 : i32
102+
%i7 = constant 7 : i32
103+
104+
//
105+
// Allocate.
106+
//
107+
108+
%AVAL = alloc() {alignment = 64} : memref<8xvector<4xf32>>
109+
%AIDX = alloc() {alignment = 64} : memref<8xvector<4xi32>>
110+
%X = alloc(%c8) {alignment = 64} : memref<?xf32>
111+
%B = alloc(%c8) {alignment = 64} : memref<?xf32>
112+
113+
//
114+
// Initialize.
115+
//
116+
117+
%vf1 = vector.broadcast %f1 : f32 to vector<4xf32>
118+
119+
%0 = vector.insert %f2, %vf1[1] : f32 into vector<4xf32>
120+
store %0, %AVAL[%c0] : memref<8xvector<4xf32>>
121+
122+
%1 = vector.insert %f8, %vf1[1] : f32 into vector<4xf32>
123+
%2 = vector.insert %f3, %1[2] : f32 into vector<4xf32>
124+
store %2, %AVAL[%c1] : memref<8xvector<4xf32>>
125+
126+
%3 = vector.insert %f2, %vf1[1] : f32 into vector<4xf32>
127+
%4 = vector.insert %f6, %3[2] : f32 into vector<4xf32>
128+
%5 = vector.insert %f2, %4[3] : f32 into vector<4xf32>
129+
store %5, %AVAL[%c2] : memref<8xvector<4xf32>>
130+
131+
%6 = vector.insert %f3, %vf1[0] : f32 into vector<4xf32>
132+
store %6, %AVAL[%c3] : memref<8xvector<4xf32>>
133+
134+
%7 = vector.insert %f5, %vf1[0] : f32 into vector<4xf32>
135+
store %7, %AVAL[%c4] : memref<8xvector<4xf32>>
136+
137+
%8 = vector.insert %f3, %vf1[0] : f32 into vector<4xf32>
138+
%9 = vector.insert %f2, %8[1] : f32 into vector<4xf32>
139+
%10 = vector.insert %f2, %9[3] : f32 into vector<4xf32>
140+
store %10, %AVAL[%c5] : memref<8xvector<4xf32>>
141+
142+
%11 = vector.insert %f4, %vf1[0] : f32 into vector<4xf32>
143+
%12 = vector.insert %f7, %11[1] : f32 into vector<4xf32>
144+
store %12, %AVAL[%c6] : memref<8xvector<4xf32>>
145+
146+
%13 = vector.insert %f3, %vf1[0] : f32 into vector<4xf32>
147+
%14 = vector.insert %f2, %13[1] : f32 into vector<4xf32>
148+
store %14, %AVAL[%c7] : memref<8xvector<4xf32>>
149+
150+
%vi0 = vector.broadcast %i0 : i32 to vector<4xi32>
151+
152+
%20 = vector.insert %i2, %vi0[1] : i32 into vector<4xi32>
153+
%21 = vector.insert %i5, %20[2] : i32 into vector<4xi32>
154+
%22 = vector.insert %i7, %21[3] : i32 into vector<4xi32>
155+
store %22, %AIDX[%c0] : memref<8xvector<4xi32>>
156+
157+
%23 = vector.insert %i1, %vi0[1] : i32 into vector<4xi32>
158+
%24 = vector.insert %i4, %23[2] : i32 into vector<4xi32>
159+
%25 = vector.insert %i6, %24[3] : i32 into vector<4xi32>
160+
store %25, %AIDX[%c1] : memref<8xvector<4xi32>>
161+
162+
%26 = vector.insert %i2, %vi0[0] : i32 into vector<4xi32>
163+
%27 = vector.insert %i5, %26[1] : i32 into vector<4xi32>
164+
%28 = vector.insert %i6, %27[2] : i32 into vector<4xi32>
165+
%29 = vector.insert %i7, %28[3] : i32 into vector<4xi32>
166+
store %29, %AIDX[%c2] : memref<8xvector<4xi32>>
167+
168+
%30 = vector.insert %i1, %vi0[0] : i32 into vector<4xi32>
169+
%31 = vector.insert %i3, %30[1] : i32 into vector<4xi32>
170+
%32 = vector.insert %i5, %31[2] : i32 into vector<4xi32>
171+
%33 = vector.insert %i7, %32[3] : i32 into vector<4xi32>
172+
store %33, %AIDX[%c3] : memref<8xvector<4xi32>>
173+
174+
%34 = vector.insert %i3, %vi0[1] : i32 into vector<4xi32>
175+
%35 = vector.insert %i4, %34[2] : i32 into vector<4xi32>
176+
%36 = vector.insert %i5, %35[3] : i32 into vector<4xi32>
177+
store %36, %AIDX[%c4] : memref<8xvector<4xi32>>
178+
179+
%37 = vector.insert %i1, %vi0[0] : i32 into vector<4xi32>
180+
%38 = vector.insert %i4, %37[1] : i32 into vector<4xi32>
181+
%39 = vector.insert %i5, %38[2] : i32 into vector<4xi32>
182+
%40 = vector.insert %i6, %39[3] : i32 into vector<4xi32>
183+
store %40, %AIDX[%c5] : memref<8xvector<4xi32>>
184+
185+
%41 = vector.insert %i2, %vi0[1] : i32 into vector<4xi32>
186+
%42 = vector.insert %i4, %41[2] : i32 into vector<4xi32>
187+
%43 = vector.insert %i6, %42[3] : i32 into vector<4xi32>
188+
store %43, %AIDX[%c6] : memref<8xvector<4xi32>>
189+
190+
%44 = vector.insert %i1, %vi0[0] : i32 into vector<4xi32>
191+
%45 = vector.insert %i3, %44[1] : i32 into vector<4xi32>
192+
%46 = vector.insert %i6, %45[2] : i32 into vector<4xi32>
193+
%47 = vector.insert %i7, %46[3] : i32 into vector<4xi32>
194+
store %47, %AIDX[%c7] : memref<8xvector<4xi32>>
195+
196+
scf.for %i = %c0 to %c8 step %c1 {
197+
%ix = addi %i, %c1 : index
198+
%kx = index_cast %ix : index to i32
199+
%fx = sitofp %kx : i32 to f32
200+
store %fx, %X[%i] : memref<?xf32>
201+
store %f0, %B[%i] : memref<?xf32>
202+
}
203+
204+
//
205+
// Multiply.
206+
//
207+
208+
call @spmv8x8(%AVAL, %AIDX, %X, %B) : (memref<8xvector<4xf32>>,
209+
memref<8xvector<4xi32>>,
210+
memref<?xf32>, memref<?xf32>) -> ()
211+
212+
//
213+
// Print and verify.
214+
//
215+
216+
scf.for %i = %c0 to %c8 step %c1 {
217+
%aval = load %AVAL[%i] : memref<8xvector<4xf32>>
218+
vector.print %aval : vector<4xf32>
219+
}
220+
221+
scf.for %i = %c0 to %c8 step %c1 {
222+
%aidx = load %AIDX[%i] : memref<8xvector<4xi32>>
223+
vector.print %aidx : vector<4xi32>
224+
}
225+
226+
scf.for %i = %c0 to %c8 step %c1 {
227+
%ldb = load %B[%i] : memref<?xf32>
228+
vector.print %ldb : f32
229+
}
230+
231+
//
232+
// CHECK: ( 1, 2, 1, 1 )
233+
// CHECK-NEXT: ( 1, 8, 3, 1 )
234+
// CHECK-NEXT: ( 1, 2, 6, 2 )
235+
// CHECK-NEXT: ( 3, 1, 1, 1 )
236+
// CHECK-NEXT: ( 5, 1, 1, 1 )
237+
// CHECK-NEXT: ( 3, 2, 1, 2 )
238+
// CHECK-NEXT: ( 4, 7, 1, 1 )
239+
// CHECK-NEXT: ( 3, 2, 1, 1 )
240+
//
241+
// CHECK-NEXT: ( 0, 2, 5, 7 )
242+
// CHECK-NEXT: ( 0, 1, 4, 6 )
243+
// CHECK-NEXT: ( 2, 5, 6, 7 )
244+
// CHECK-NEXT: ( 1, 3, 5, 7 )
245+
// CHECK-NEXT: ( 0, 3, 4, 5 )
246+
// CHECK-NEXT: ( 1, 4, 5, 6 )
247+
// CHECK-NEXT: ( 0, 2, 4, 6 )
248+
// CHECK-NEXT: ( 1, 3, 6, 7 )
249+
//
250+
// CHECK-NEXT: 21
251+
// CHECK-NEXT: 39
252+
// CHECK-NEXT: 73
253+
// CHECK-NEXT: 24
254+
// CHECK-NEXT: 20
255+
// CHECK-NEXT: 36
256+
// CHECK-NEXT: 37
257+
// CHECK-NEXT: 29
258+
//
259+
260+
//
261+
// Free.
262+
//
263+
264+
dealloc %AVAL : memref<8xvector<4xf32>>
265+
dealloc %AIDX : memref<8xvector<4xi32>>
266+
dealloc %X : memref<?xf32>
267+
dealloc %B : memref<?xf32>
268+
269+
return
270+
}

0 commit comments

Comments
 (0)