|
| 1 | +// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \ |
| 2 | +// RUN: mlir-cpu-runner -e entry -entry-point-result=void \ |
| 3 | +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \ |
| 4 | +// RUN: FileCheck %s |
| 5 | + |
| 6 | +// Illustrates an 8x8 Sparse Matrix x Vector implemented with only operations |
| 7 | +// of the vector dialect (and some std/scf). Essentially, this example performs |
| 8 | +// the following multiplication: |
| 9 | +// |
| 10 | +// 0 1 2 3 4 5 6 7 |
| 11 | +// +------------------------+ |
| 12 | +// 0 | 1 0 2 0 0 1 0 1 | | 1 | | 21 | |
| 13 | +// 1 | 1 8 0 0 3 0 1 0 | | 2 | | 39 | |
| 14 | +// 2 | 0 0 1 0 0 2 6 2 | | 3 | | 73 | |
| 15 | +// 3 | 0 3 0 1 0 1 0 1 | x | 4 | = | 24 | |
| 16 | +// 4 | 5 0 0 1 1 1 0 0 | | 5 | | 20 | |
| 17 | +// 5 | 0 3 0 0 2 1 2 0 | | 6 | | 36 | |
| 18 | +// 6 | 4 0 7 0 1 0 1 0 | | 7 | | 37 | |
| 19 | +// 7 | 0 3 0 2 0 0 1 1 | | 8 | | 29 | |
| 20 | +// +------------------------+ |
| 21 | +// |
| 22 | +// The sparse storage scheme used is an extended column scheme (also referred |
| 23 | +// to as jagged diagonal, which is essentially a vector friendly variant of |
| 24 | +// the general sparse row-wise scheme (also called compressed row storage), |
| 25 | +// using fixed length vectors and no explicit pointer indexing into the |
| 26 | +// value array to find the rows. |
| 27 | +// |
| 28 | +// The extended column storage for the matrix shown above is as follows. |
| 29 | +// |
| 30 | +// VALUE INDEX |
| 31 | +// +---------+ +---------+ |
| 32 | +// 0 | 1 2 1 1 | | 0 2 5 7 | |
| 33 | +// 1 | 1 8 3 1 | | 0 1 4 6 | |
| 34 | +// 2 | 1 2 6 2 | | 2 5 6 7 | |
| 35 | +// 3 | 3 1 1 1 | | 1 3 5 7 | |
| 36 | +// 4 | 5 1 1 1 | | 0 3 4 5 | |
| 37 | +// 5 | 3 2 1 2 | | 1 4 5 6 | |
| 38 | +// 6 | 4 7 1 1 | | 0 2 4 6 | |
| 39 | +// 7 | 3 2 1 1 | | 1 3 6 7 | |
| 40 | +// +---------+ +---------+ |
| 41 | +// |
| 42 | +// This example illustrates a DOT version for the operation. Another example |
| 43 | +// in this directory illustrates an effective SAXPY version that operates on the |
| 44 | +// transposed jagged diagonal storage to obtain higher vector lengths. |
| 45 | + |
| 46 | +#contraction_accesses = [ |
| 47 | + affine_map<(i) -> (i)>, |
| 48 | + affine_map<(i) -> (i)>, |
| 49 | + affine_map<(i) -> ()> |
| 50 | +] |
| 51 | +#dot_trait = { |
| 52 | + indexing_maps = #contraction_accesses, |
| 53 | + iterator_types = ["reduction"] |
| 54 | +} |
| 55 | + |
| 56 | +func @spmv8x8(%AVAL: memref<8xvector<4xf32>>, |
| 57 | + %AIDX: memref<8xvector<4xi32>>, %X: memref<?xf32>, %B: memref<?xf32>) { |
| 58 | + %c0 = constant 0 : index |
| 59 | + %c1 = constant 1 : index |
| 60 | + %cn = constant 8 : index |
| 61 | + %f0 = constant 0.0 : f32 |
| 62 | + %mask = vector.constant_mask [4] : vector<4xi1> |
| 63 | + scf.for %i = %c0 to %cn step %c1 { |
| 64 | + %aval = load %AVAL[%i] : memref<8xvector<4xf32>> |
| 65 | + %aidx = load %AIDX[%i] : memref<8xvector<4xi32>> |
| 66 | + %0 = vector.gather %X, %aidx, %mask |
| 67 | + : (memref<?xf32>, vector<4xi32>, vector<4xi1>) -> vector<4xf32> |
| 68 | + %1 = vector.contract #dot_trait %aval, %0, %f0 : vector<4xf32>, vector<4xf32> into f32 |
| 69 | + store %1, %B[%i] : memref<?xf32> |
| 70 | + } |
| 71 | + return |
| 72 | +} |
| 73 | + |
| 74 | +func @entry() { |
| 75 | + %c0 = constant 0 : index |
| 76 | + %c1 = constant 1 : index |
| 77 | + %c2 = constant 2 : index |
| 78 | + %c3 = constant 3 : index |
| 79 | + %c4 = constant 4 : index |
| 80 | + %c5 = constant 5 : index |
| 81 | + %c6 = constant 6 : index |
| 82 | + %c7 = constant 7 : index |
| 83 | + %c8 = constant 8 : index |
| 84 | + |
| 85 | + %f0 = constant 0.0 : f32 |
| 86 | + %f1 = constant 1.0 : f32 |
| 87 | + %f2 = constant 2.0 : f32 |
| 88 | + %f3 = constant 3.0 : f32 |
| 89 | + %f4 = constant 4.0 : f32 |
| 90 | + %f5 = constant 5.0 : f32 |
| 91 | + %f6 = constant 6.0 : f32 |
| 92 | + %f7 = constant 7.0 : f32 |
| 93 | + %f8 = constant 8.0 : f32 |
| 94 | + |
| 95 | + %i0 = constant 0 : i32 |
| 96 | + %i1 = constant 1 : i32 |
| 97 | + %i2 = constant 2 : i32 |
| 98 | + %i3 = constant 3 : i32 |
| 99 | + %i4 = constant 4 : i32 |
| 100 | + %i5 = constant 5 : i32 |
| 101 | + %i6 = constant 6 : i32 |
| 102 | + %i7 = constant 7 : i32 |
| 103 | + |
| 104 | + // |
| 105 | + // Allocate. |
| 106 | + // |
| 107 | + |
| 108 | + %AVAL = alloc() {alignment = 64} : memref<8xvector<4xf32>> |
| 109 | + %AIDX = alloc() {alignment = 64} : memref<8xvector<4xi32>> |
| 110 | + %X = alloc(%c8) {alignment = 64} : memref<?xf32> |
| 111 | + %B = alloc(%c8) {alignment = 64} : memref<?xf32> |
| 112 | + |
| 113 | + // |
| 114 | + // Initialize. |
| 115 | + // |
| 116 | + |
| 117 | + %vf1 = vector.broadcast %f1 : f32 to vector<4xf32> |
| 118 | + |
| 119 | + %0 = vector.insert %f2, %vf1[1] : f32 into vector<4xf32> |
| 120 | + store %0, %AVAL[%c0] : memref<8xvector<4xf32>> |
| 121 | + |
| 122 | + %1 = vector.insert %f8, %vf1[1] : f32 into vector<4xf32> |
| 123 | + %2 = vector.insert %f3, %1[2] : f32 into vector<4xf32> |
| 124 | + store %2, %AVAL[%c1] : memref<8xvector<4xf32>> |
| 125 | + |
| 126 | + %3 = vector.insert %f2, %vf1[1] : f32 into vector<4xf32> |
| 127 | + %4 = vector.insert %f6, %3[2] : f32 into vector<4xf32> |
| 128 | + %5 = vector.insert %f2, %4[3] : f32 into vector<4xf32> |
| 129 | + store %5, %AVAL[%c2] : memref<8xvector<4xf32>> |
| 130 | + |
| 131 | + %6 = vector.insert %f3, %vf1[0] : f32 into vector<4xf32> |
| 132 | + store %6, %AVAL[%c3] : memref<8xvector<4xf32>> |
| 133 | + |
| 134 | + %7 = vector.insert %f5, %vf1[0] : f32 into vector<4xf32> |
| 135 | + store %7, %AVAL[%c4] : memref<8xvector<4xf32>> |
| 136 | + |
| 137 | + %8 = vector.insert %f3, %vf1[0] : f32 into vector<4xf32> |
| 138 | + %9 = vector.insert %f2, %8[1] : f32 into vector<4xf32> |
| 139 | + %10 = vector.insert %f2, %9[3] : f32 into vector<4xf32> |
| 140 | + store %10, %AVAL[%c5] : memref<8xvector<4xf32>> |
| 141 | + |
| 142 | + %11 = vector.insert %f4, %vf1[0] : f32 into vector<4xf32> |
| 143 | + %12 = vector.insert %f7, %11[1] : f32 into vector<4xf32> |
| 144 | + store %12, %AVAL[%c6] : memref<8xvector<4xf32>> |
| 145 | + |
| 146 | + %13 = vector.insert %f3, %vf1[0] : f32 into vector<4xf32> |
| 147 | + %14 = vector.insert %f2, %13[1] : f32 into vector<4xf32> |
| 148 | + store %14, %AVAL[%c7] : memref<8xvector<4xf32>> |
| 149 | + |
| 150 | + %vi0 = vector.broadcast %i0 : i32 to vector<4xi32> |
| 151 | + |
| 152 | + %20 = vector.insert %i2, %vi0[1] : i32 into vector<4xi32> |
| 153 | + %21 = vector.insert %i5, %20[2] : i32 into vector<4xi32> |
| 154 | + %22 = vector.insert %i7, %21[3] : i32 into vector<4xi32> |
| 155 | + store %22, %AIDX[%c0] : memref<8xvector<4xi32>> |
| 156 | + |
| 157 | + %23 = vector.insert %i1, %vi0[1] : i32 into vector<4xi32> |
| 158 | + %24 = vector.insert %i4, %23[2] : i32 into vector<4xi32> |
| 159 | + %25 = vector.insert %i6, %24[3] : i32 into vector<4xi32> |
| 160 | + store %25, %AIDX[%c1] : memref<8xvector<4xi32>> |
| 161 | + |
| 162 | + %26 = vector.insert %i2, %vi0[0] : i32 into vector<4xi32> |
| 163 | + %27 = vector.insert %i5, %26[1] : i32 into vector<4xi32> |
| 164 | + %28 = vector.insert %i6, %27[2] : i32 into vector<4xi32> |
| 165 | + %29 = vector.insert %i7, %28[3] : i32 into vector<4xi32> |
| 166 | + store %29, %AIDX[%c2] : memref<8xvector<4xi32>> |
| 167 | + |
| 168 | + %30 = vector.insert %i1, %vi0[0] : i32 into vector<4xi32> |
| 169 | + %31 = vector.insert %i3, %30[1] : i32 into vector<4xi32> |
| 170 | + %32 = vector.insert %i5, %31[2] : i32 into vector<4xi32> |
| 171 | + %33 = vector.insert %i7, %32[3] : i32 into vector<4xi32> |
| 172 | + store %33, %AIDX[%c3] : memref<8xvector<4xi32>> |
| 173 | + |
| 174 | + %34 = vector.insert %i3, %vi0[1] : i32 into vector<4xi32> |
| 175 | + %35 = vector.insert %i4, %34[2] : i32 into vector<4xi32> |
| 176 | + %36 = vector.insert %i5, %35[3] : i32 into vector<4xi32> |
| 177 | + store %36, %AIDX[%c4] : memref<8xvector<4xi32>> |
| 178 | + |
| 179 | + %37 = vector.insert %i1, %vi0[0] : i32 into vector<4xi32> |
| 180 | + %38 = vector.insert %i4, %37[1] : i32 into vector<4xi32> |
| 181 | + %39 = vector.insert %i5, %38[2] : i32 into vector<4xi32> |
| 182 | + %40 = vector.insert %i6, %39[3] : i32 into vector<4xi32> |
| 183 | + store %40, %AIDX[%c5] : memref<8xvector<4xi32>> |
| 184 | + |
| 185 | + %41 = vector.insert %i2, %vi0[1] : i32 into vector<4xi32> |
| 186 | + %42 = vector.insert %i4, %41[2] : i32 into vector<4xi32> |
| 187 | + %43 = vector.insert %i6, %42[3] : i32 into vector<4xi32> |
| 188 | + store %43, %AIDX[%c6] : memref<8xvector<4xi32>> |
| 189 | + |
| 190 | + %44 = vector.insert %i1, %vi0[0] : i32 into vector<4xi32> |
| 191 | + %45 = vector.insert %i3, %44[1] : i32 into vector<4xi32> |
| 192 | + %46 = vector.insert %i6, %45[2] : i32 into vector<4xi32> |
| 193 | + %47 = vector.insert %i7, %46[3] : i32 into vector<4xi32> |
| 194 | + store %47, %AIDX[%c7] : memref<8xvector<4xi32>> |
| 195 | + |
| 196 | + scf.for %i = %c0 to %c8 step %c1 { |
| 197 | + %ix = addi %i, %c1 : index |
| 198 | + %kx = index_cast %ix : index to i32 |
| 199 | + %fx = sitofp %kx : i32 to f32 |
| 200 | + store %fx, %X[%i] : memref<?xf32> |
| 201 | + store %f0, %B[%i] : memref<?xf32> |
| 202 | + } |
| 203 | + |
| 204 | + // |
| 205 | + // Multiply. |
| 206 | + // |
| 207 | + |
| 208 | + call @spmv8x8(%AVAL, %AIDX, %X, %B) : (memref<8xvector<4xf32>>, |
| 209 | + memref<8xvector<4xi32>>, |
| 210 | + memref<?xf32>, memref<?xf32>) -> () |
| 211 | + |
| 212 | + // |
| 213 | + // Print and verify. |
| 214 | + // |
| 215 | + |
| 216 | + scf.for %i = %c0 to %c8 step %c1 { |
| 217 | + %aval = load %AVAL[%i] : memref<8xvector<4xf32>> |
| 218 | + vector.print %aval : vector<4xf32> |
| 219 | + } |
| 220 | + |
| 221 | + scf.for %i = %c0 to %c8 step %c1 { |
| 222 | + %aidx = load %AIDX[%i] : memref<8xvector<4xi32>> |
| 223 | + vector.print %aidx : vector<4xi32> |
| 224 | + } |
| 225 | + |
| 226 | + scf.for %i = %c0 to %c8 step %c1 { |
| 227 | + %ldb = load %B[%i] : memref<?xf32> |
| 228 | + vector.print %ldb : f32 |
| 229 | + } |
| 230 | + |
| 231 | + // |
| 232 | + // CHECK: ( 1, 2, 1, 1 ) |
| 233 | + // CHECK-NEXT: ( 1, 8, 3, 1 ) |
| 234 | + // CHECK-NEXT: ( 1, 2, 6, 2 ) |
| 235 | + // CHECK-NEXT: ( 3, 1, 1, 1 ) |
| 236 | + // CHECK-NEXT: ( 5, 1, 1, 1 ) |
| 237 | + // CHECK-NEXT: ( 3, 2, 1, 2 ) |
| 238 | + // CHECK-NEXT: ( 4, 7, 1, 1 ) |
| 239 | + // CHECK-NEXT: ( 3, 2, 1, 1 ) |
| 240 | + // |
| 241 | + // CHECK-NEXT: ( 0, 2, 5, 7 ) |
| 242 | + // CHECK-NEXT: ( 0, 1, 4, 6 ) |
| 243 | + // CHECK-NEXT: ( 2, 5, 6, 7 ) |
| 244 | + // CHECK-NEXT: ( 1, 3, 5, 7 ) |
| 245 | + // CHECK-NEXT: ( 0, 3, 4, 5 ) |
| 246 | + // CHECK-NEXT: ( 1, 4, 5, 6 ) |
| 247 | + // CHECK-NEXT: ( 0, 2, 4, 6 ) |
| 248 | + // CHECK-NEXT: ( 1, 3, 6, 7 ) |
| 249 | + // |
| 250 | + // CHECK-NEXT: 21 |
| 251 | + // CHECK-NEXT: 39 |
| 252 | + // CHECK-NEXT: 73 |
| 253 | + // CHECK-NEXT: 24 |
| 254 | + // CHECK-NEXT: 20 |
| 255 | + // CHECK-NEXT: 36 |
| 256 | + // CHECK-NEXT: 37 |
| 257 | + // CHECK-NEXT: 29 |
| 258 | + // |
| 259 | + |
| 260 | + // |
| 261 | + // Free. |
| 262 | + // |
| 263 | + |
| 264 | + dealloc %AVAL : memref<8xvector<4xf32>> |
| 265 | + dealloc %AIDX : memref<8xvector<4xi32>> |
| 266 | + dealloc %X : memref<?xf32> |
| 267 | + dealloc %B : memref<?xf32> |
| 268 | + |
| 269 | + return |
| 270 | +} |
0 commit comments