Skip to content

Commit c34d81a

Browse files
committed
Update tests.
Undo the changes in previous commit. Now the amdgpu-no-flatscratch-init attribute is only manually added if the tests are relevant to the attribute.
1 parent b225a48 commit c34d81a

File tree

84 files changed

+14124
-2824
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

84 files changed

+14124
-2824
lines changed

llvm/lib/Target/AMDGPU/GCNSubtarget.cpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -628,9 +628,7 @@ GCNUserSGPRUsageInfo::GCNUserSGPRUsageInfo(const Function &F,
628628

629629
if (ST.hasFlatAddressSpace() && AMDGPU::isEntryFunctionCC(CC) &&
630630
(IsAmdHsaOrMesa || ST.enableFlatScratch()) &&
631-
// The line below: If enableFlatScratch() is true, whether
632-
// no-flat-scratch-init is set is not important. If enableFlatScratch()
633-
// is false, FlatScratchInit cannot be true for graphics CC.
631+
// FlatScratchInit cannot be true for graphics CC.
634632
(ST.enableFlatScratch() ||
635633
(!IsNoFlatScratchInitSet && !AMDGPU::isGraphics(CC))) &&
636634
!ST.flatScratchIsArchitected()) {

llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_udec_wrap.ll

Lines changed: 341 additions & 29 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_uinc_wrap.ll

Lines changed: 361 additions & 31 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll

Lines changed: 43 additions & 36 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement-stack-lower.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ define amdgpu_kernel void @v_insert_v64i32_varidx(ptr addrspace(1) %out.ptr, ptr
99
; GCN: ; %bb.0:
1010
; GCN-NEXT: s_load_dwordx4 s[20:23], s[8:9], 0x0
1111
; GCN-NEXT: s_load_dwordx2 s[24:25], s[8:9], 0x10
12-
; GCN-NEXT: s_add_u32 s0, s0, s15
12+
; GCN-NEXT: s_add_u32 s0, s0, s17
1313
; GCN-NEXT: s_addc_u32 s1, s1, 0
1414
; GCN-NEXT: v_mov_b32_e32 v64, 0
1515
; GCN-NEXT: s_waitcnt lgkmcnt(0)
@@ -256,4 +256,4 @@ define amdgpu_kernel void @v_insert_v64i32_varidx(ptr addrspace(1) %out.ptr, ptr
256256
ret void
257257
}
258258

259-
attributes #0 = { "amdgpu-flat-work-group-size"="1,256" "amdgpu-waves-per-eu"="1,10" "amdgpu-no-flat-scratch-init" }
259+
attributes #0 = { "amdgpu-flat-work-group-size"="1,256" "amdgpu-waves-per-eu"="1,10" }

llvm/test/CodeGen/AMDGPU/GlobalISel/lds-global-value.ll

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,16 @@ define amdgpu_kernel void @use_lds_globals(ptr addrspace(1) %out, ptr addrspace(
1111
; CHECK-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
1212
; CHECK-NEXT: v_mov_b32_e32 v0, 4
1313
; CHECK-NEXT: s_mov_b32 m0, -1
14+
; CHECK-NEXT: s_add_i32 s12, s12, s17
1415
; CHECK-NEXT: ds_read_b32 v2, v0
15-
; CHECK-NEXT: v_mov_b32_e32 v3, 9
16+
; CHECK-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
1617
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
1718
; CHECK-NEXT: s_add_u32 s0, s0, 4
1819
; CHECK-NEXT: s_addc_u32 s1, s1, 0
1920
; CHECK-NEXT: v_mov_b32_e32 v0, s0
21+
; CHECK-NEXT: s_mov_b32 flat_scratch_lo, s13
2022
; CHECK-NEXT: v_mov_b32_e32 v1, s1
23+
; CHECK-NEXT: v_mov_b32_e32 v3, 9
2124
; CHECK-NEXT: flat_store_dword v[0:1], v2
2225
; CHECK-NEXT: v_mov_b32_e32 v0, 0x200
2326
; CHECK-NEXT: ds_write_b32 v0, v3
@@ -31,4 +34,4 @@ entry:
3134
ret void
3235
}
3336

34-
attributes #0 = { nounwind "amdgpu-no-flat-scratch-init" }
37+
attributes #0 = { nounwind }

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.if.break.i64.ll

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,14 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22
; RUN: llc -global-isel -mtriple=amdgcn--amdhsa -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
33

4-
define amdgpu_kernel void @test_wave64(i32 %arg0, [8 x i32], i64 %saved) #0 {
4+
define amdgpu_kernel void @test_wave64(i32 %arg0, [8 x i32], i64 %saved) {
55
; GCN-LABEL: test_wave64:
66
; GCN: ; %bb.0: ; %entry
77
; GCN-NEXT: s_load_dword s2, s[8:9], 0x0
88
; GCN-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0xa
9+
; GCN-NEXT: s_add_i32 s12, s12, s17
10+
; GCN-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
11+
; GCN-NEXT: s_mov_b32 flat_scratch_lo, s13
912
; GCN-NEXT: s_waitcnt lgkmcnt(0)
1013
; GCN-NEXT: s_cmp_eq_u32 s2, 0
1114
; GCN-NEXT: s_cselect_b32 s2, 1, 0
@@ -25,5 +28,3 @@ entry:
2528
}
2629

2730
declare i64 @llvm.amdgcn.if.break.i64(i1, i64)
28-
29-
attributes #0 = { "amdgpu-no-flat-scratch-init" }

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.trig.preop.ll

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,11 +37,14 @@ define double @v_trig_preop_f64_imm(double %a, i32 %b) {
3737
ret double %result
3838
}
3939

40-
define amdgpu_kernel void @s_trig_preop_f64(double %a, i32 %b) #1 {
40+
define amdgpu_kernel void @s_trig_preop_f64(double %a, i32 %b) {
4141
; CI-LABEL: s_trig_preop_f64:
4242
; CI: ; %bb.0:
4343
; CI-NEXT: s_load_dword s2, s[8:9], 0x2
4444
; CI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
45+
; CI-NEXT: s_add_i32 s12, s12, s17
46+
; CI-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
47+
; CI-NEXT: s_mov_b32 flat_scratch_lo, s13
4548
; CI-NEXT: s_waitcnt lgkmcnt(0)
4649
; CI-NEXT: v_mov_b32_e32 v0, s2
4750
; CI-NEXT: v_trig_preop_f64 v[0:1], s[0:1], v0
@@ -59,6 +62,9 @@ define amdgpu_kernel void @s_trig_preop_f64(double %a, i32 %b) #1 {
5962
; VI: ; %bb.0:
6063
; VI-NEXT: s_load_dword s2, s[8:9], 0x8
6164
; VI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
65+
; VI-NEXT: s_add_i32 s12, s12, s17
66+
; VI-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
67+
; VI-NEXT: s_mov_b32 flat_scratch_lo, s13
6268
; VI-NEXT: s_waitcnt lgkmcnt(0)
6369
; VI-NEXT: v_mov_b32_e32 v0, s2
6470
; VI-NEXT: v_trig_preop_f64 v[0:1], s[0:1], v0
@@ -76,6 +82,8 @@ define amdgpu_kernel void @s_trig_preop_f64(double %a, i32 %b) #1 {
7682
; GFX9: ; %bb.0:
7783
; GFX9-NEXT: s_load_dword s2, s[8:9], 0x8
7884
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
85+
; GFX9-NEXT: s_add_u32 flat_scratch_lo, s12, s17
86+
; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
7987
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
8088
; GFX9-NEXT: v_mov_b32_e32 v0, s2
8189
; GFX9-NEXT: v_trig_preop_f64 v[0:1], s[0:1], v0
@@ -85,6 +93,10 @@ define amdgpu_kernel void @s_trig_preop_f64(double %a, i32 %b) #1 {
8593
;
8694
; GFX10-LABEL: s_trig_preop_f64:
8795
; GFX10: ; %bb.0:
96+
; GFX10-NEXT: s_add_u32 s12, s12, s17
97+
; GFX10-NEXT: s_addc_u32 s13, s13, 0
98+
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s12
99+
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13
88100
; GFX10-NEXT: s_clause 0x1
89101
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
90102
; GFX10-NEXT: s_load_dword s2, s[8:9], 0x8
@@ -109,10 +121,13 @@ define amdgpu_kernel void @s_trig_preop_f64(double %a, i32 %b) #1 {
109121
ret void
110122
}
111123

112-
define amdgpu_kernel void @s_trig_preop_f64_imm(double %a, i32 %b) #1 {
124+
define amdgpu_kernel void @s_trig_preop_f64_imm(double %a, i32 %b) {
113125
; CI-LABEL: s_trig_preop_f64_imm:
114126
; CI: ; %bb.0:
115127
; CI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
128+
; CI-NEXT: s_add_i32 s12, s12, s17
129+
; CI-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
130+
; CI-NEXT: s_mov_b32 flat_scratch_lo, s13
116131
; CI-NEXT: s_waitcnt lgkmcnt(0)
117132
; CI-NEXT: v_trig_preop_f64 v[0:1], s[0:1], 7
118133
; CI-NEXT: s_add_u32 s0, s0, 4
@@ -128,6 +143,9 @@ define amdgpu_kernel void @s_trig_preop_f64_imm(double %a, i32 %b) #1 {
128143
; VI-LABEL: s_trig_preop_f64_imm:
129144
; VI: ; %bb.0:
130145
; VI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
146+
; VI-NEXT: s_add_i32 s12, s12, s17
147+
; VI-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
148+
; VI-NEXT: s_mov_b32 flat_scratch_lo, s13
131149
; VI-NEXT: s_waitcnt lgkmcnt(0)
132150
; VI-NEXT: v_trig_preop_f64 v[0:1], s[0:1], 7
133151
; VI-NEXT: s_add_u32 s0, s0, 4
@@ -143,6 +161,8 @@ define amdgpu_kernel void @s_trig_preop_f64_imm(double %a, i32 %b) #1 {
143161
; GFX9-LABEL: s_trig_preop_f64_imm:
144162
; GFX9: ; %bb.0:
145163
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
164+
; GFX9-NEXT: s_add_u32 flat_scratch_lo, s12, s17
165+
; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
146166
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
147167
; GFX9-NEXT: v_trig_preop_f64 v[0:1], s[0:1], 7
148168
; GFX9-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
@@ -151,6 +171,10 @@ define amdgpu_kernel void @s_trig_preop_f64_imm(double %a, i32 %b) #1 {
151171
;
152172
; GFX10-LABEL: s_trig_preop_f64_imm:
153173
; GFX10: ; %bb.0:
174+
; GFX10-NEXT: s_add_u32 s12, s12, s17
175+
; GFX10-NEXT: s_addc_u32 s13, s13, 0
176+
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s12
177+
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13
154178
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
155179
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
156180
; GFX10-NEXT: v_trig_preop_f64 v[0:1], s[0:1], 7
@@ -174,4 +198,3 @@ define amdgpu_kernel void @s_trig_preop_f64_imm(double %a, i32 %b) #1 {
174198
declare double @llvm.amdgcn.trig.preop.f64(double, i32) #0
175199

176200
attributes #0 = { nounwind readnone speculatable }
177-
attributes #1 = { "amdgpu-no-flat-scratch-init" }

llvm/test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll

Lines changed: 44 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,13 @@
33
; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -amdgpu-bypass-slow-div=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 %s
44
; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -amdgpu-bypass-slow-div=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
55

6-
define amdgpu_kernel void @sdivrem_i32(ptr addrspace(1) %out0, ptr addrspace(1) %out1, i32 %x, i32 %y) #0 {
6+
define amdgpu_kernel void @sdivrem_i32(ptr addrspace(1) %out0, ptr addrspace(1) %out1, i32 %x, i32 %y) {
77
; GFX8-LABEL: sdivrem_i32:
88
; GFX8: ; %bb.0:
99
; GFX8-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x10
10+
; GFX8-NEXT: s_add_i32 s12, s12, s17
11+
; GFX8-NEXT: s_mov_b32 flat_scratch_lo, s13
12+
; GFX8-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
1013
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
1114
; GFX8-NEXT: s_ashr_i32 s6, s5, 31
1215
; GFX8-NEXT: s_add_i32 s0, s5, s6
@@ -142,10 +145,13 @@ define amdgpu_kernel void @sdivrem_i32(ptr addrspace(1) %out0, ptr addrspace(1)
142145
ret void
143146
}
144147

145-
define amdgpu_kernel void @sdivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1) %out1, i64 %x, i64 %y) #0 {
148+
define amdgpu_kernel void @sdivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1) %out1, i64 %x, i64 %y) {
146149
; GFX8-LABEL: sdivrem_i64:
147150
; GFX8: ; %bb.0:
148151
; GFX8-NEXT: s_load_dwordx8 s[4:11], s[8:9], 0x0
152+
; GFX8-NEXT: s_add_i32 s12, s12, s17
153+
; GFX8-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
154+
; GFX8-NEXT: s_mov_b32 flat_scratch_lo, s13
149155
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
150156
; GFX8-NEXT: s_ashr_i32 s2, s9, 31
151157
; GFX8-NEXT: s_ashr_i32 s12, s11, 31
@@ -613,10 +619,13 @@ define amdgpu_kernel void @sdivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1)
613619
ret void
614620
}
615621

616-
define amdgpu_kernel void @sdivrem_v2i32(ptr addrspace(1) %out0, ptr addrspace(1) %out1, <2 x i32> %x, <2 x i32> %y) #0 {
622+
define amdgpu_kernel void @sdivrem_v2i32(ptr addrspace(1) %out0, ptr addrspace(1) %out1, <2 x i32> %x, <2 x i32> %y) {
617623
; GFX8-LABEL: sdivrem_v2i32:
618624
; GFX8: ; %bb.0:
619625
; GFX8-NEXT: s_load_dwordx8 s[4:11], s[8:9], 0x0
626+
; GFX8-NEXT: s_add_i32 s12, s12, s17
627+
; GFX8-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
628+
; GFX8-NEXT: s_mov_b32 flat_scratch_lo, s13
620629
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
621630
; GFX8-NEXT: s_ashr_i32 s2, s10, 31
622631
; GFX8-NEXT: s_add_i32 s0, s10, s2
@@ -842,9 +851,12 @@ define amdgpu_kernel void @sdivrem_v2i32(ptr addrspace(1) %out0, ptr addrspace(1
842851
ret void
843852
}
844853

845-
define amdgpu_kernel void @sdivrem_v4i32(ptr addrspace(1) %out0, ptr addrspace(1) %out1, <4 x i32> %x, <4 x i32> %y) #0 {
854+
define amdgpu_kernel void @sdivrem_v4i32(ptr addrspace(1) %out0, ptr addrspace(1) %out1, <4 x i32> %x, <4 x i32> %y) {
846855
; GFX8-LABEL: sdivrem_v4i32:
847856
; GFX8: ; %bb.0:
857+
; GFX8-NEXT: s_add_i32 s12, s12, s17
858+
; GFX8-NEXT: s_mov_b32 flat_scratch_lo, s13
859+
; GFX8-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
848860
; GFX8-NEXT: s_load_dwordx8 s[12:19], s[8:9], 0x10
849861
; GFX8-NEXT: s_load_dwordx4 s[4:7], s[8:9], 0x0
850862
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
@@ -1268,9 +1280,12 @@ define amdgpu_kernel void @sdivrem_v4i32(ptr addrspace(1) %out0, ptr addrspace(1
12681280
ret void
12691281
}
12701282

1271-
define amdgpu_kernel void @sdivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1) %out1, <2 x i64> %x, <2 x i64> %y) #0 {
1283+
define amdgpu_kernel void @sdivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1) %out1, <2 x i64> %x, <2 x i64> %y) {
12721284
; GFX8-LABEL: sdivrem_v2i64:
12731285
; GFX8: ; %bb.0:
1286+
; GFX8-NEXT: s_add_i32 s12, s12, s17
1287+
; GFX8-NEXT: s_mov_b32 flat_scratch_lo, s13
1288+
; GFX8-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
12741289
; GFX8-NEXT: s_load_dwordx8 s[12:19], s[8:9], 0x0
12751290
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x20
12761291
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
@@ -2183,10 +2198,13 @@ define amdgpu_kernel void @sdivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1
21832198
ret void
21842199
}
21852200

2186-
define amdgpu_kernel void @sdiv_i8(ptr addrspace(1) %out0, ptr addrspace(1) %out1, i8 %x, i8 %y) #0 {
2201+
define amdgpu_kernel void @sdiv_i8(ptr addrspace(1) %out0, ptr addrspace(1) %out1, i8 %x, i8 %y) {
21872202
; GFX8-LABEL: sdiv_i8:
21882203
; GFX8: ; %bb.0:
21892204
; GFX8-NEXT: s_load_dword s4, s[8:9], 0x10
2205+
; GFX8-NEXT: s_add_i32 s12, s12, s17
2206+
; GFX8-NEXT: s_mov_b32 flat_scratch_lo, s13
2207+
; GFX8-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
21902208
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
21912209
; GFX8-NEXT: s_bfe_i32 s0, s4, 0x80008
21922210
; GFX8-NEXT: s_ashr_i32 s5, s0, 31
@@ -2328,10 +2346,13 @@ define amdgpu_kernel void @sdiv_i8(ptr addrspace(1) %out0, ptr addrspace(1) %out
23282346
ret void
23292347
}
23302348

2331-
define amdgpu_kernel void @sdivrem_v2i8(ptr addrspace(1) %out0, ptr addrspace(1) %out1, <2 x i8> %x, <2 x i8> %y) #0 {
2349+
define amdgpu_kernel void @sdivrem_v2i8(ptr addrspace(1) %out0, ptr addrspace(1) %out1, <2 x i8> %x, <2 x i8> %y) {
23322350
; GFX8-LABEL: sdivrem_v2i8:
23332351
; GFX8: ; %bb.0:
23342352
; GFX8-NEXT: s_load_dword s2, s[8:9], 0x10
2353+
; GFX8-NEXT: s_add_i32 s12, s12, s17
2354+
; GFX8-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
2355+
; GFX8-NEXT: s_mov_b32 flat_scratch_lo, s13
23352356
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
23362357
; GFX8-NEXT: s_bfe_i32 s0, s2, 0x80010
23372358
; GFX8-NEXT: s_ashr_i32 s3, s0, 31
@@ -2592,10 +2613,13 @@ define amdgpu_kernel void @sdivrem_v2i8(ptr addrspace(1) %out0, ptr addrspace(1)
25922613
ret void
25932614
}
25942615

2595-
define amdgpu_kernel void @sdiv_i16(ptr addrspace(1) %out0, ptr addrspace(1) %out1, i16 %x, i16 %y) #0 {
2616+
define amdgpu_kernel void @sdiv_i16(ptr addrspace(1) %out0, ptr addrspace(1) %out1, i16 %x, i16 %y) {
25962617
; GFX8-LABEL: sdiv_i16:
25972618
; GFX8: ; %bb.0:
25982619
; GFX8-NEXT: s_load_dword s4, s[8:9], 0x10
2620+
; GFX8-NEXT: s_add_i32 s12, s12, s17
2621+
; GFX8-NEXT: s_mov_b32 flat_scratch_lo, s13
2622+
; GFX8-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
25992623
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
26002624
; GFX8-NEXT: s_bfe_i32 s0, s4, 0x100010
26012625
; GFX8-NEXT: s_ashr_i32 s5, s0, 31
@@ -2737,10 +2761,13 @@ define amdgpu_kernel void @sdiv_i16(ptr addrspace(1) %out0, ptr addrspace(1) %ou
27372761
ret void
27382762
}
27392763

2740-
define amdgpu_kernel void @sdivrem_v2i16(ptr addrspace(1) %out0, ptr addrspace(1) %out1, <2 x i16> %x, <2 x i16> %y) #0 {
2764+
define amdgpu_kernel void @sdivrem_v2i16(ptr addrspace(1) %out0, ptr addrspace(1) %out1, <2 x i16> %x, <2 x i16> %y) {
27412765
; GFX8-LABEL: sdivrem_v2i16:
27422766
; GFX8: ; %bb.0:
27432767
; GFX8-NEXT: s_load_dwordx2 s[2:3], s[8:9], 0x10
2768+
; GFX8-NEXT: s_add_i32 s12, s12, s17
2769+
; GFX8-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
2770+
; GFX8-NEXT: s_mov_b32 flat_scratch_lo, s13
27442771
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
27452772
; GFX8-NEXT: s_sext_i32_i16 s0, s3
27462773
; GFX8-NEXT: s_ashr_i32 s10, s0, 31
@@ -2998,10 +3025,13 @@ define amdgpu_kernel void @sdivrem_v2i16(ptr addrspace(1) %out0, ptr addrspace(1
29983025
ret void
29993026
}
30003027

3001-
define amdgpu_kernel void @sdivrem_i3(ptr addrspace(1) %out0, ptr addrspace(1) %out1, i3 %x, i3 %y) #0 {
3028+
define amdgpu_kernel void @sdivrem_i3(ptr addrspace(1) %out0, ptr addrspace(1) %out1, i3 %x, i3 %y) {
30023029
; GFX8-LABEL: sdivrem_i3:
30033030
; GFX8: ; %bb.0:
30043031
; GFX8-NEXT: s_load_dword s4, s[8:9], 0x10
3032+
; GFX8-NEXT: s_add_i32 s12, s12, s17
3033+
; GFX8-NEXT: s_mov_b32 flat_scratch_lo, s13
3034+
; GFX8-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
30053035
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
30063036
; GFX8-NEXT: s_bfe_i32 s0, s4, 0x30008
30073037
; GFX8-NEXT: s_ashr_i32 s5, s0, 31
@@ -3149,10 +3179,13 @@ define amdgpu_kernel void @sdivrem_i3(ptr addrspace(1) %out0, ptr addrspace(1) %
31493179
ret void
31503180
}
31513181

3152-
define amdgpu_kernel void @sdivrem_i27(ptr addrspace(1) %out0, ptr addrspace(1) %out1, i27 %x, i27 %y) #0 {
3182+
define amdgpu_kernel void @sdivrem_i27(ptr addrspace(1) %out0, ptr addrspace(1) %out1, i27 %x, i27 %y) {
31533183
; GFX8-LABEL: sdivrem_i27:
31543184
; GFX8: ; %bb.0:
31553185
; GFX8-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x10
3186+
; GFX8-NEXT: s_add_i32 s12, s12, s17
3187+
; GFX8-NEXT: s_mov_b32 flat_scratch_lo, s13
3188+
; GFX8-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
31563189
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
31573190
; GFX8-NEXT: s_bfe_i32 s0, s5, 0x1b0000
31583191
; GFX8-NEXT: s_ashr_i32 s5, s0, 31
@@ -3299,5 +3332,3 @@ define amdgpu_kernel void @sdivrem_i27(ptr addrspace(1) %out0, ptr addrspace(1)
32993332
store i27 %rem, ptr addrspace(1) %out1
33003333
ret void
33013334
}
3302-
3303-
attributes #0 = { "amdgpu-no-flat-scratch-init" }

0 commit comments

Comments
 (0)