7
7
define amdgpu_kernel void @localize_constants (i1 %cond ) {
8
8
; GFX9-LABEL: localize_constants:
9
9
; GFX9: ; %bb.0: ; %entry
10
- ; GFX9-NEXT: s_load_dword s0, s[4:5], 0x0
10
+ ; GFX9-NEXT: s_load_dword s1, s[4:5], 0x0
11
+ ; GFX9-NEXT: s_mov_b32 s0, 1
11
12
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
12
- ; GFX9-NEXT: s_and_b32 s0, s0, 1
13
- ; GFX9-NEXT: s_cmp_lg_u32 s0, 0
13
+ ; GFX9-NEXT: s_xor_b32 s1, s1, 1
14
+ ; GFX9-NEXT: s_and_b32 s1, s1, 1
15
+ ; GFX9-NEXT: s_cmp_lg_u32 s1, 0
14
16
; GFX9-NEXT: s_cbranch_scc0 BB0_2
15
- ; GFX9-NEXT: ; %bb.1: ; %bb0
16
- ; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b
17
- ; GFX9-NEXT: global_store_dword v[0:1], v0, off
18
- ; GFX9-NEXT: v_mov_b32_e32 v0, 0x1c8
17
+ ; GFX9-NEXT: ; %bb.1: ; %bb1
18
+ ; GFX9-NEXT: v_mov_b32_e32 v0, 0x5be6
19
19
; GFX9-NEXT: global_store_dword v[0:1], v0, off
20
- ; GFX9-NEXT: v_mov_b32_e32 v0, 0x3e7
20
+ ; GFX9-NEXT: v_mov_b32_e32 v0, 0x1c7
21
21
; GFX9-NEXT: global_store_dword v[0:1], v0, off
22
22
; GFX9-NEXT: v_mov_b32_e32 v0, 0x3e8
23
23
; GFX9-NEXT: global_store_dword v[0:1], v0, off
24
- ; GFX9-NEXT: v_mov_b32_e32 v0, 0x1c7
25
- ; GFX9-NEXT: global_store_dword v[0:1], v0, off
26
- ; GFX9-NEXT: v_mov_b32_e32 v0, 0x5be6
24
+ ; GFX9-NEXT: v_mov_b32_e32 v0, 0x1c8
27
25
; GFX9-NEXT: global_store_dword v[0:1], v0, off
28
- ; GFX9-NEXT: s_endpgm
29
- ; GFX9-NEXT: BB0_2: ; %bb1
30
- ; GFX9-NEXT: v_mov_b32_e32 v0, 0x5be6
26
+ ; GFX9-NEXT: v_mov_b32_e32 v0, 0x3e7
31
27
; GFX9-NEXT: global_store_dword v[0:1], v0, off
32
- ; GFX9-NEXT: v_mov_b32_e32 v0, 0x1c7
28
+ ; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b
29
+ ; GFX9-NEXT: s_mov_b32 s0, 0
33
30
; GFX9-NEXT: global_store_dword v[0:1], v0, off
34
- ; GFX9-NEXT: v_mov_b32_e32 v0, 0x3e8
31
+ ; GFX9-NEXT: BB0_2: ; %Flow
32
+ ; GFX9-NEXT: s_and_b32 s0, s0, 1
33
+ ; GFX9-NEXT: s_cmp_lg_u32 s0, 0
34
+ ; GFX9-NEXT: s_cbranch_scc0 BB0_4
35
+ ; GFX9-NEXT: ; %bb.3: ; %bb0
36
+ ; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b
35
37
; GFX9-NEXT: global_store_dword v[0:1], v0, off
36
38
; GFX9-NEXT: v_mov_b32_e32 v0, 0x1c8
37
39
; GFX9-NEXT: global_store_dword v[0:1], v0, off
38
40
; GFX9-NEXT: v_mov_b32_e32 v0, 0x3e7
39
41
; GFX9-NEXT: global_store_dword v[0:1], v0, off
40
- ; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b
42
+ ; GFX9-NEXT: v_mov_b32_e32 v0, 0x3e8
41
43
; GFX9-NEXT: global_store_dword v[0:1], v0, off
44
+ ; GFX9-NEXT: v_mov_b32_e32 v0, 0x1c7
45
+ ; GFX9-NEXT: global_store_dword v[0:1], v0, off
46
+ ; GFX9-NEXT: v_mov_b32_e32 v0, 0x5be6
47
+ ; GFX9-NEXT: global_store_dword v[0:1], v0, off
48
+ ; GFX9-NEXT: BB0_4: ; %bb2
42
49
; GFX9-NEXT: s_endpgm
43
50
entry:
44
51
br i1 %cond , label %bb0 , label %bb1
75
82
define amdgpu_kernel void @localize_globals (i1 %cond ) {
76
83
; GFX9-LABEL: localize_globals:
77
84
; GFX9: ; %bb.0: ; %entry
78
- ; GFX9-NEXT: s_load_dword s0, s[4:5], 0x0
85
+ ; GFX9-NEXT: s_load_dword s1, s[4:5], 0x0
86
+ ; GFX9-NEXT: s_mov_b32 s0, 1
87
+ ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
88
+ ; GFX9-NEXT: s_xor_b32 s1, s1, 1
89
+ ; GFX9-NEXT: s_and_b32 s1, s1, 1
90
+ ; GFX9-NEXT: s_cmp_lg_u32 s1, 0
91
+ ; GFX9-NEXT: s_cbranch_scc0 BB1_2
92
+ ; GFX9-NEXT: ; %bb.1: ; %bb1
93
+ ; GFX9-NEXT: s_getpc_b64 s[2:3]
94
+ ; GFX9-NEXT: s_add_u32 s2, s2, gv2@gotpcrel32@lo+4
95
+ ; GFX9-NEXT: s_addc_u32 s3, s3, gv2@gotpcrel32@hi+4
96
+ ; GFX9-NEXT: s_getpc_b64 s[4:5]
97
+ ; GFX9-NEXT: s_add_u32 s4, s4, gv3@gotpcrel32@lo+4
98
+ ; GFX9-NEXT: s_addc_u32 s5, s5, gv3@gotpcrel32@hi+4
99
+ ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
100
+ ; GFX9-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
101
+ ; GFX9-NEXT: v_mov_b32_e32 v2, 0
102
+ ; GFX9-NEXT: s_mov_b32 s0, 0
79
103
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
104
+ ; GFX9-NEXT: v_mov_b32_e32 v0, s2
105
+ ; GFX9-NEXT: v_mov_b32_e32 v1, s3
106
+ ; GFX9-NEXT: global_store_dword v[0:1], v2, off
107
+ ; GFX9-NEXT: v_mov_b32_e32 v0, s4
108
+ ; GFX9-NEXT: v_mov_b32_e32 v2, 1
109
+ ; GFX9-NEXT: v_mov_b32_e32 v1, s5
110
+ ; GFX9-NEXT: global_store_dword v[0:1], v2, off
111
+ ; GFX9-NEXT: BB1_2: ; %Flow
80
112
; GFX9-NEXT: s_and_b32 s0, s0, 1
81
113
; GFX9-NEXT: s_cmp_lg_u32 s0, 0
82
- ; GFX9-NEXT: s_cbranch_scc0 BB1_2
83
- ; GFX9-NEXT: ; %bb.1 : ; %bb0
114
+ ; GFX9-NEXT: s_cbranch_scc0 BB1_4
115
+ ; GFX9-NEXT: ; %bb.3 : ; %bb0
84
116
; GFX9-NEXT: s_getpc_b64 s[0:1]
85
117
; GFX9-NEXT: s_add_u32 s0, s0, gv0@gotpcrel32@lo+4
86
118
; GFX9-NEXT: s_addc_u32 s1, s1, gv0@gotpcrel32@hi+4
87
- ; GFX9-NEXT: v_mov_b32_e32 v2, 0
119
+ ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
88
120
; GFX9-NEXT: s_getpc_b64 s[2:3]
89
121
; GFX9-NEXT: s_add_u32 s2, s2, gv1@gotpcrel32@lo+4
90
122
; GFX9-NEXT: s_addc_u32 s3, s3, gv1@gotpcrel32@hi+4
91
- ; GFX9-NEXT: s_branch BB1_3
92
- ; GFX9-NEXT: BB1_2: ; %bb1
93
- ; GFX9-NEXT: s_getpc_b64 s[0:1]
94
- ; GFX9-NEXT: s_add_u32 s0, s0, gv2@gotpcrel32@lo+4
95
- ; GFX9-NEXT: s_addc_u32 s1, s1, gv2@gotpcrel32@hi+4
96
- ; GFX9-NEXT: v_mov_b32_e32 v2, 0
97
- ; GFX9-NEXT: s_getpc_b64 s[2:3]
98
- ; GFX9-NEXT: s_add_u32 s2, s2, gv3@gotpcrel32@lo+4
99
- ; GFX9-NEXT: s_addc_u32 s3, s3, gv3@gotpcrel32@hi+4
100
- ; GFX9-NEXT: BB1_3: ; %bb2
101
- ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
102
123
; GFX9-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
124
+ ; GFX9-NEXT: v_mov_b32_e32 v2, 0
103
125
; GFX9-NEXT: v_mov_b32_e32 v3, 1
104
126
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
105
127
; GFX9-NEXT: v_mov_b32_e32 v0, s0
@@ -108,6 +130,7 @@ define amdgpu_kernel void @localize_globals(i1 %cond) {
108
130
; GFX9-NEXT: v_mov_b32_e32 v0, s2
109
131
; GFX9-NEXT: v_mov_b32_e32 v1, s3
110
132
; GFX9-NEXT: global_store_dword v[0:1], v3, off
133
+ ; GFX9-NEXT: BB1_4: ; %bb2
111
134
; GFX9-NEXT: s_endpgm
112
135
entry:
113
136
br i1 %cond , label %bb0 , label %bb1
0 commit comments