@@ -134,16 +134,169 @@ bb:
134
134
ret void
135
135
}
136
136
137
- ; GCN-LABEL: {{^}}test_mfma_f64_16x16x4f64_splat_imm :
137
+ ; GCN-LABEL: {{^}}test_mfma_f64_16x16x4f64_splat_imm_0 :
138
138
; GFX90A: v_mfma_f64_16x16x4f64 [[M1:a\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], 0{{$}}
139
139
; GFX90A: v_mfma_f64_16x16x4f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], [[M1]] cbsz:1 abid:2 blgp:3
140
140
; GFX942: v_mfma_f64_16x16x4_f64 [[M1:a\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], 0{{$}}
141
141
; GFX942: v_mfma_f64_16x16x4_f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], [[M1]] cbsz:1 abid:2 neg:[1,1,0]
142
142
; GCN: global_store_dwordx4
143
143
; GCN: global_store_dwordx4
144
- define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_imm (ptr addrspace (1 ) %arg , double %a , double %b ) #0 {
144
+ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_imm_0 (ptr addrspace (1 ) %arg , double %a , double %b ) #0 {
145
145
bb:
146
- %mai.1 = tail call <4 x double > @llvm.amdgcn.mfma.f64.16x16x4f64 (double %a , double %b , <4 x double > <double 0 .0 , double 0 .0 , double 0 .0 , double 0 .0 >, i32 0 , i32 0 , i32 0 )
146
+ %mai.1 = tail call <4 x double > @llvm.amdgcn.mfma.f64.16x16x4f64 (double %a , double %b , <4 x double > zeroinitializer , i32 0 , i32 0 , i32 0 )
147
+ %mai.2 = tail call <4 x double > @llvm.amdgcn.mfma.f64.16x16x4f64 (double %a , double %b , <4 x double > %mai.1 , i32 1 , i32 2 , i32 3 )
148
+ store <4 x double > %mai.2 , ptr addrspace (1 ) %arg
149
+ ret void
150
+ }
151
+
152
+ ; GCN-LABEL: {{^}}test_mfma_f64_16x16x4f64_splat_imm_int_neg1:
153
+ ; GFX90A: v_mfma_f64_16x16x4f64 [[M1:a\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], -1{{$}}
154
+ ; GFX90A: v_mfma_f64_16x16x4f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], [[M1]] cbsz:1 abid:2 blgp:3
155
+ ; GFX942: v_mfma_f64_16x16x4_f64 [[M1:a\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], -1{{$}}
156
+ ; GFX942: v_mfma_f64_16x16x4_f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], [[M1]] cbsz:1 abid:2 neg:[1,1,0]
157
+ ; GCN: global_store_dwordx4
158
+ ; GCN: global_store_dwordx4
159
+ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_imm_int_neg1 (ptr addrspace (1 ) %arg , double %a , double %b ) #0 {
160
+ bb:
161
+ %mai.1 = tail call <4 x double > @llvm.amdgcn.mfma.f64.16x16x4f64 (double %a , double %b , <4 x double > splat (double bitcast (i64 -1 to double )), i32 0 , i32 0 , i32 0 )
162
+ %mai.2 = tail call <4 x double > @llvm.amdgcn.mfma.f64.16x16x4f64 (double %a , double %b , <4 x double > %mai.1 , i32 1 , i32 2 , i32 3 )
163
+ store <4 x double > %mai.2 , ptr addrspace (1 ) %arg
164
+ ret void
165
+ }
166
+
167
+ ; GCN-LABEL: {{^}}test_mfma_f64_16x16x4f64_splat_imm_1:
168
+ ; GCN: v_mov_b32_e32 [[HIGH_BITS:v[0-9]+]], 0x3ff00000
169
+ ; GCN: v_accvgpr_write_b32 a[[A_HIGH_BITS_0:[0-9]+]], [[HIGH_BITS]]
170
+ ; GCN: v_accvgpr_write_b32 a[[A_LOW_BITS_0:[0-9]+]], 0{{$}}
171
+ ; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
172
+ ; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_HIGH_BITS_0]]
173
+ ; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
174
+ ; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_HIGH_BITS_0]]
175
+ ; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
176
+ ; GCN: v_accvgpr_mov_b32 a[[LAST_CONST_REG:[0-9]+]], a[[A_HIGH_BITS_0]]
177
+
178
+ ; GFX90A: v_mfma_f64_16x16x4f64 [[M1:a\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a{{\[}}[[A_LOW_BITS_0]]:[[LAST_CONST_REG]]{{\]$}}
179
+ ; GFX90A: v_mfma_f64_16x16x4f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], [[M1]] cbsz:1 abid:2 blgp:3
180
+ ; GFX942: v_mfma_f64_16x16x4_f64 [[M1:a\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a{{\[}}[[A_LOW_BITS_0]]:[[LAST_CONST_REG]]{{\]$}}
181
+ ; GFX942: v_mfma_f64_16x16x4_f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], [[M1]] cbsz:1 abid:2 neg:[1,1,0]
182
+ ; GCN: global_store_dwordx4
183
+ ; GCN: global_store_dwordx4
184
+ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_imm_1 (ptr addrspace (1 ) %arg , double %a , double %b ) #0 {
185
+ bb:
186
+ %mai.1 = tail call <4 x double > @llvm.amdgcn.mfma.f64.16x16x4f64 (double %a , double %b , <4 x double > splat (double 1 .0 ), i32 0 , i32 0 , i32 0 )
187
+ %mai.2 = tail call <4 x double > @llvm.amdgcn.mfma.f64.16x16x4f64 (double %a , double %b , <4 x double > %mai.1 , i32 1 , i32 2 , i32 3 )
188
+ store <4 x double > %mai.2 , ptr addrspace (1 ) %arg
189
+ ret void
190
+ }
191
+
192
+ ; GCN-LABEL: {{^}}test_mfma_f64_16x16x4f64_splat_imm_neg1:
193
+ ; GCN: v_mov_b32_e32 [[HIGH_BITS:v[0-9]+]], 0xbff00000
194
+ ; GCN: v_accvgpr_write_b32 a[[A_HIGH_BITS_0:[0-9]+]], [[HIGH_BITS]]
195
+ ; GCN: v_accvgpr_write_b32 a[[A_LOW_BITS_0:[0-9]+]], 0{{$}}
196
+ ; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
197
+ ; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_HIGH_BITS_0]]
198
+ ; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
199
+ ; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_HIGH_BITS_0]]
200
+ ; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
201
+ ; GCN: v_accvgpr_mov_b32 a[[LAST_CONST_REG:[0-9]+]], a[[A_HIGH_BITS_0]]
202
+
203
+ ; GFX90A: v_mfma_f64_16x16x4f64 [[M1:a\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a{{\[}}[[A_LOW_BITS_0]]:[[LAST_CONST_REG]]{{\]$}}
204
+ ; GFX90A: v_mfma_f64_16x16x4f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], [[M1]] cbsz:1 abid:2 blgp:3
205
+ ; GFX942: v_mfma_f64_16x16x4_f64 [[M1:a\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a{{\[}}[[A_LOW_BITS_0]]:[[LAST_CONST_REG]]{{\]$}}
206
+ ; GFX942: v_mfma_f64_16x16x4_f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], [[M1]] cbsz:1 abid:2 neg:[1,1,0]
207
+ ; GCN: global_store_dwordx4
208
+ ; GCN: global_store_dwordx4
209
+ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_imm_neg1 (ptr addrspace (1 ) %arg , double %a , double %b ) #0 {
210
+ bb:
211
+ %mai.1 = tail call <4 x double > @llvm.amdgcn.mfma.f64.16x16x4f64 (double %a , double %b , <4 x double > splat (double -1 .0 ), i32 0 , i32 0 , i32 0 )
212
+ %mai.2 = tail call <4 x double > @llvm.amdgcn.mfma.f64.16x16x4f64 (double %a , double %b , <4 x double > %mai.1 , i32 1 , i32 2 , i32 3 )
213
+ store <4 x double > %mai.2 , ptr addrspace (1 ) %arg
214
+ ret void
215
+ }
216
+
217
+ ; GCN-LABEL: {{^}}test_mfma_f64_16x16x4f64_splat_imm_int_64:
218
+ ; GCN: v_accvgpr_write_b32 a[[A_LOW_BITS_0:[0-9]+]], 64{{$}}
219
+ ; GCN: v_accvgpr_write_b32 a[[A_HIGH_BITS_0:[0-9]+]], 0
220
+ ; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
221
+ ; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_HIGH_BITS_0]]
222
+ ; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
223
+ ; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_HIGH_BITS_0]]
224
+ ; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
225
+ ; GCN: v_accvgpr_mov_b32 a[[LAST_CONST_REG:[0-9]+]], a[[A_HIGH_BITS_0]]
226
+
227
+ ; GFX90A: v_mfma_f64_16x16x4f64 [[M1:a\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a{{\[}}[[A_LOW_BITS_0]]:[[LAST_CONST_REG]]{{\]$}}
228
+ ; GFX90A: v_mfma_f64_16x16x4f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], [[M1]] cbsz:1 abid:2 blgp:3
229
+ ; GFX942: v_mfma_f64_16x16x4_f64 [[M1:a\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a{{\[}}[[A_LOW_BITS_0]]:[[LAST_CONST_REG]]{{\]$}}
230
+ ; GFX942: v_mfma_f64_16x16x4_f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], [[M1]] cbsz:1 abid:2 neg:[1,1,0]
231
+ ; GCN: global_store_dwordx4
232
+ ; GCN: global_store_dwordx4
233
+ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_imm_int_64 (ptr addrspace (1 ) %arg , double %a , double %b ) #0 {
234
+ bb:
235
+ %mai.1 = tail call <4 x double > @llvm.amdgcn.mfma.f64.16x16x4f64 (double %a , double %b , <4 x double > splat (double bitcast (i64 64 to double )), i32 0 , i32 0 , i32 0 )
236
+ %mai.2 = tail call <4 x double > @llvm.amdgcn.mfma.f64.16x16x4f64 (double %a , double %b , <4 x double > %mai.1 , i32 1 , i32 2 , i32 3 )
237
+ store <4 x double > %mai.2 , ptr addrspace (1 ) %arg
238
+ ret void
239
+ }
240
+
241
+ ; GCN-LABEL: {{^}}test_mfma_f64_16x16x4f64_splat_imm_int_64_in_high_bits:
242
+ ; GCN: v_accvgpr_write_b32 a[[A_LOW_BITS_0:[0-9]+]], 0{{$}}
243
+ ; GCN: v_accvgpr_write_b32 a[[A_HIGH_BITS_0:[0-9]+]], 64
244
+ ; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
245
+ ; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_HIGH_BITS_0]]
246
+ ; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
247
+ ; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_HIGH_BITS_0]]
248
+ ; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
249
+ ; GCN: v_accvgpr_mov_b32 a[[LAST_CONST_REG:[0-9]+]], a[[A_HIGH_BITS_0]]
250
+
251
+ ; GFX90A: v_mfma_f64_16x16x4f64 [[M1:a\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a{{\[}}[[A_LOW_BITS_0]]:[[LAST_CONST_REG]]{{\]$}}
252
+ ; GFX90A: v_mfma_f64_16x16x4f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], [[M1]] cbsz:1 abid:2 blgp:3
253
+ ; GFX942: v_mfma_f64_16x16x4_f64 [[M1:a\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a{{\[}}[[A_LOW_BITS_0]]:[[LAST_CONST_REG]]{{\]$}}
254
+ ; GFX942: v_mfma_f64_16x16x4_f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], [[M1]] cbsz:1 abid:2 neg:[1,1,0]
255
+ ; GCN: global_store_dwordx4
256
+ ; GCN: global_store_dwordx4
257
+ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_imm_int_64_in_high_bits (ptr addrspace (1 ) %arg , double %a , double %b ) #0 {
258
+ bb:
259
+ %mai.1 = tail call <4 x double > @llvm.amdgcn.mfma.f64.16x16x4f64 (double %a , double %b , <4 x double > splat (double bitcast (i64 274877906944 to double )), i32 0 , i32 0 , i32 0 )
260
+ %mai.2 = tail call <4 x double > @llvm.amdgcn.mfma.f64.16x16x4f64 (double %a , double %b , <4 x double > %mai.1 , i32 1 , i32 2 , i32 3 )
261
+ store <4 x double > %mai.2 , ptr addrspace (1 ) %arg
262
+ ret void
263
+ }
264
+
265
+ ; FIXME: This should not be foldable as an inline immediate
266
+ ; GCN-LABEL: {{^}}test_mfma_f64_16x16x4f64_splat_imm_int_64_in_high_and_low:
267
+ ; GFX90A: v_mfma_f64_16x16x4f64 [[M1:a\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], 64{{$}}
268
+ ; GFX90A: v_mfma_f64_16x16x4f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], [[M1]] cbsz:1 abid:2 blgp:3
269
+ ; GFX942: v_mfma_f64_16x16x4_f64 [[M1:a\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], 64{{$}}
270
+ ; GFX942: v_mfma_f64_16x16x4_f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], [[M1]] cbsz:1 abid:2 neg:[1,1,0]
271
+ ; GCN: global_store_dwordx4
272
+ ; GCN: global_store_dwordx4
273
+ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_imm_int_64_in_high_and_low (ptr addrspace (1 ) %arg , double %a , double %b ) #0 {
274
+ bb:
275
+ %mai.1 = tail call <4 x double > @llvm.amdgcn.mfma.f64.16x16x4f64 (double %a , double %b , <4 x double > splat (double bitcast (i64 274877907008 to double )), i32 0 , i32 0 , i32 0 )
276
+ %mai.2 = tail call <4 x double > @llvm.amdgcn.mfma.f64.16x16x4f64 (double %a , double %b , <4 x double > %mai.1 , i32 1 , i32 2 , i32 3 )
277
+ store <4 x double > %mai.2 , ptr addrspace (1 ) %arg
278
+ ret void
279
+ }
280
+
281
+ ; GCN-LABEL: {{^}}test_mfma_f64_16x16x4f64_splat_imm_f32_1_in_high_and_low:
282
+ ; GCN: v_accvgpr_write_b32 a[[A_LOW_BITS_0:[0-9]+]], 1.0
283
+ ; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
284
+ ; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
285
+ ; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
286
+ ; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
287
+ ; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
288
+ ; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
289
+ ; GCN: v_accvgpr_mov_b32 a[[LAST_CONST_REG:[0-9]+]], a[[A_LOW_BITS_0]]
290
+
291
+ ; GFX90A: v_mfma_f64_16x16x4f64 [[M1:a\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a{{\[}}[[A_LOW_BITS_0]]:[[LAST_CONST_REG]]{{\]$}}
292
+ ; GFX90A: v_mfma_f64_16x16x4f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], [[M1]] cbsz:1 abid:2 blgp:3
293
+ ; GFX942: v_mfma_f64_16x16x4_f64 [[M1:a\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a{{\[}}[[A_LOW_BITS_0]]:[[LAST_CONST_REG]]{{\]$}}
294
+ ; GFX942: v_mfma_f64_16x16x4_f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], [[M1]] cbsz:1 abid:2 neg:[1,1,0]
295
+ ; GCN: global_store_dwordx4
296
+ ; GCN: global_store_dwordx4
297
+ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_imm_f32_1_in_high_and_low (ptr addrspace (1 ) %arg , double %a , double %b ) #0 {
298
+ bb:
299
+ %mai.1 = tail call <4 x double > @llvm.amdgcn.mfma.f64.16x16x4f64 (double %a , double %b , <4 x double > splat (double bitcast (<2 x float > splat (float 1 .0 ) to double )), i32 0 , i32 0 , i32 0 )
147
300
%mai.2 = tail call <4 x double > @llvm.amdgcn.mfma.f64.16x16x4f64 (double %a , double %b , <4 x double > %mai.1 , i32 1 , i32 2 , i32 3 )
148
301
store <4 x double > %mai.2 , ptr addrspace (1 ) %arg
149
302
ret void
0 commit comments