1
- ; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck %s
1
+ ; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa %s | FileCheck -check-prefixes=GCN,CI %s
2
+ ; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=fiji %s | FileCheck -check-prefixes=GCN,VI %s
3
+ ; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 %s | FileCheck -check-prefixes=GCN,GFX9 %s
2
4
3
- ; CHECK : 'extractelement_v2i32'
4
- ; CHECK : estimated cost of 0 for {{.*}} extractelement <2 x i32>
5
+ ; GCN : 'extractelement_v2i32'
6
+ ; GCN : estimated cost of 0 for {{.*}} extractelement <2 x i32>
5
7
define amdgpu_kernel void @extractelement_v2i32 (i32 addrspace (1 )* %out , <2 x i32 > addrspace (1 )* %vaddr ) {
6
8
%vec = load <2 x i32 >, <2 x i32 > addrspace (1 )* %vaddr
7
9
%elt = extractelement <2 x i32 > %vec , i32 1
8
10
store i32 %elt , i32 addrspace (1 )* %out
9
11
ret void
10
12
}
11
13
12
- ; CHECK : 'extractelement_v2f32'
13
- ; CHECK : estimated cost of 0 for {{.*}} extractelement <2 x float>
14
+ ; GCN : 'extractelement_v2f32'
15
+ ; GCN : estimated cost of 0 for {{.*}} extractelement <2 x float>
14
16
define amdgpu_kernel void @extractelement_v2f32 (float addrspace (1 )* %out , <2 x float > addrspace (1 )* %vaddr ) {
15
17
%vec = load <2 x float >, <2 x float > addrspace (1 )* %vaddr
16
18
%elt = extractelement <2 x float > %vec , i32 1
17
19
store float %elt , float addrspace (1 )* %out
18
20
ret void
19
21
}
20
22
21
- ; CHECK : 'extractelement_v3i32'
22
- ; CHECK : estimated cost of 0 for {{.*}} extractelement <3 x i32>
23
+ ; GCN : 'extractelement_v3i32'
24
+ ; GCN : estimated cost of 0 for {{.*}} extractelement <3 x i32>
23
25
define amdgpu_kernel void @extractelement_v3i32 (i32 addrspace (1 )* %out , <3 x i32 > addrspace (1 )* %vaddr ) {
24
26
%vec = load <3 x i32 >, <3 x i32 > addrspace (1 )* %vaddr
25
27
%elt = extractelement <3 x i32 > %vec , i32 1
26
28
store i32 %elt , i32 addrspace (1 )* %out
27
29
ret void
28
30
}
29
31
30
- ; CHECK : 'extractelement_v4i32'
31
- ; CHECK : estimated cost of 0 for {{.*}} extractelement <4 x i32>
32
+ ; GCN : 'extractelement_v4i32'
33
+ ; GCN : estimated cost of 0 for {{.*}} extractelement <4 x i32>
32
34
define amdgpu_kernel void @extractelement_v4i32 (i32 addrspace (1 )* %out , <4 x i32 > addrspace (1 )* %vaddr ) {
33
35
%vec = load <4 x i32 >, <4 x i32 > addrspace (1 )* %vaddr
34
36
%elt = extractelement <4 x i32 > %vec , i32 1
35
37
store i32 %elt , i32 addrspace (1 )* %out
36
38
ret void
37
39
}
38
40
39
- ; CHECK : 'extractelement_v8i32'
40
- ; CHECK : estimated cost of 0 for {{.*}} extractelement <8 x i32>
41
+ ; GCN : 'extractelement_v8i32'
42
+ ; GCN : estimated cost of 0 for {{.*}} extractelement <8 x i32>
41
43
define amdgpu_kernel void @extractelement_v8i32 (i32 addrspace (1 )* %out , <8 x i32 > addrspace (1 )* %vaddr ) {
42
44
%vec = load <8 x i32 >, <8 x i32 > addrspace (1 )* %vaddr
43
45
%elt = extractelement <8 x i32 > %vec , i32 1
@@ -46,65 +48,85 @@ define amdgpu_kernel void @extractelement_v8i32(i32 addrspace(1)* %out, <8 x i32
46
48
}
47
49
48
50
; FIXME: Should be non-0
49
- ; CHECK : 'extractelement_v8i32_dynindex'
50
- ; CHECK : estimated cost of 2 for {{.*}} extractelement <8 x i32>
51
+ ; GCN : 'extractelement_v8i32_dynindex'
52
+ ; GCN : estimated cost of 2 for {{.*}} extractelement <8 x i32>
51
53
define amdgpu_kernel void @extractelement_v8i32_dynindex (i32 addrspace (1 )* %out , <8 x i32 > addrspace (1 )* %vaddr , i32 %idx ) {
52
54
%vec = load <8 x i32 >, <8 x i32 > addrspace (1 )* %vaddr
53
55
%elt = extractelement <8 x i32 > %vec , i32 %idx
54
56
store i32 %elt , i32 addrspace (1 )* %out
55
57
ret void
56
58
}
57
59
58
- ; CHECK : 'extractelement_v2i64'
59
- ; CHECK : estimated cost of 0 for {{.*}} extractelement <2 x i64>
60
+ ; GCN : 'extractelement_v2i64'
61
+ ; GCN : estimated cost of 0 for {{.*}} extractelement <2 x i64>
60
62
define amdgpu_kernel void @extractelement_v2i64 (i64 addrspace (1 )* %out , <2 x i64 > addrspace (1 )* %vaddr ) {
61
63
%vec = load <2 x i64 >, <2 x i64 > addrspace (1 )* %vaddr
62
64
%elt = extractelement <2 x i64 > %vec , i64 1
63
65
store i64 %elt , i64 addrspace (1 )* %out
64
66
ret void
65
67
}
66
68
67
- ; CHECK : 'extractelement_v3i64'
68
- ; CHECK : estimated cost of 0 for {{.*}} extractelement <3 x i64>
69
+ ; GCN : 'extractelement_v3i64'
70
+ ; GCN : estimated cost of 0 for {{.*}} extractelement <3 x i64>
69
71
define amdgpu_kernel void @extractelement_v3i64 (i64 addrspace (1 )* %out , <3 x i64 > addrspace (1 )* %vaddr ) {
70
72
%vec = load <3 x i64 >, <3 x i64 > addrspace (1 )* %vaddr
71
73
%elt = extractelement <3 x i64 > %vec , i64 1
72
74
store i64 %elt , i64 addrspace (1 )* %out
73
75
ret void
74
76
}
75
77
76
- ; CHECK : 'extractelement_v4i64'
77
- ; CHECK : estimated cost of 0 for {{.*}} extractelement <4 x i64>
78
+ ; GCN : 'extractelement_v4i64'
79
+ ; GCN : estimated cost of 0 for {{.*}} extractelement <4 x i64>
78
80
define amdgpu_kernel void @extractelement_v4i64 (i64 addrspace (1 )* %out , <4 x i64 > addrspace (1 )* %vaddr ) {
79
81
%vec = load <4 x i64 >, <4 x i64 > addrspace (1 )* %vaddr
80
82
%elt = extractelement <4 x i64 > %vec , i64 1
81
83
store i64 %elt , i64 addrspace (1 )* %out
82
84
ret void
83
85
}
84
86
85
- ; CHECK : 'extractelement_v8i64'
86
- ; CHECK : estimated cost of 0 for {{.*}} extractelement <8 x i64>
87
+ ; GCN : 'extractelement_v8i64'
88
+ ; GCN : estimated cost of 0 for {{.*}} extractelement <8 x i64>
87
89
define amdgpu_kernel void @extractelement_v8i64 (i64 addrspace (1 )* %out , <8 x i64 > addrspace (1 )* %vaddr ) {
88
90
%vec = load <8 x i64 >, <8 x i64 > addrspace (1 )* %vaddr
89
91
%elt = extractelement <8 x i64 > %vec , i64 1
90
92
store i64 %elt , i64 addrspace (1 )* %out
91
93
ret void
92
94
}
93
95
94
- ; CHECK : 'extractelement_v4i8'
95
- ; CHECK : estimated cost of 0 for {{.*}} extractelement <4 x i8>
96
+ ; GCN : 'extractelement_v4i8'
97
+ ; GCN : estimated cost of 1 for {{.*}} extractelement <4 x i8>
96
98
define amdgpu_kernel void @extractelement_v4i8 (i8 addrspace (1 )* %out , <4 x i8 > addrspace (1 )* %vaddr ) {
97
99
%vec = load <4 x i8 >, <4 x i8 > addrspace (1 )* %vaddr
98
100
%elt = extractelement <4 x i8 > %vec , i8 1
99
101
store i8 %elt , i8 addrspace (1 )* %out
100
102
ret void
101
103
}
102
104
103
- ; CHECK: 'extractelement_v2i16'
104
- ; CHECK: estimated cost of 0 for {{.*}} extractelement <2 x i16>
105
- define amdgpu_kernel void @extractelement_v2i16 (i16 addrspace (1 )* %out , <2 x i16 > addrspace (1 )* %vaddr ) {
105
+ ; GCN: 'extractelement_0_v2i16':
106
+ ; CI: estimated cost of 1 for {{.*}} extractelement <2 x i16> %vec, i16 0
107
+ ; VI: estimated cost of 0 for {{.*}} extractelement <2 x i16>
108
+ ; GFX9: estimated cost of 0 for {{.*}} extractelement <2 x i16>
109
+ define amdgpu_kernel void @extractelement_0_v2i16 (i16 addrspace (1 )* %out , <2 x i16 > addrspace (1 )* %vaddr ) {
110
+ %vec = load <2 x i16 >, <2 x i16 > addrspace (1 )* %vaddr
111
+ %elt = extractelement <2 x i16 > %vec , i16 0
112
+ store i16 %elt , i16 addrspace (1 )* %out
113
+ ret void
114
+ }
115
+
116
+ ; GCN: 'extractelement_1_v2i16':
117
+ ; GCN: estimated cost of 1 for {{.*}} extractelement <2 x i16>
118
+ define amdgpu_kernel void @extractelement_1_v2i16 (i16 addrspace (1 )* %out , <2 x i16 > addrspace (1 )* %vaddr ) {
106
119
%vec = load <2 x i16 >, <2 x i16 > addrspace (1 )* %vaddr
107
120
%elt = extractelement <2 x i16 > %vec , i16 1
108
121
store i16 %elt , i16 addrspace (1 )* %out
109
122
ret void
110
123
}
124
+
125
+ ; GCN: 'extractelement_var_v2i16'
126
+ ; GCN: estimated cost of 1 for {{.*}} extractelement <2 x i16>
127
+ define amdgpu_kernel void @extractelement_var_v2i16 (i16 addrspace (1 )* %out , <2 x i16 > addrspace (1 )* %vaddr , i32 %idx ) {
128
+ %vec = load <2 x i16 >, <2 x i16 > addrspace (1 )* %vaddr
129
+ %elt = extractelement <2 x i16 > %vec , i32 %idx
130
+ store i16 %elt , i16 addrspace (1 )* %out
131
+ ret void
132
+ }
0 commit comments