|
3 | 3 | ; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -amdgpu-bypass-slow-div=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 %s
|
4 | 4 | ; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -amdgpu-bypass-slow-div=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
|
5 | 5 |
|
6 |
| -define amdgpu_kernel void @sdivrem_i32(ptr addrspace(1) %out0, ptr addrspace(1) %out1, i32 %x, i32 %y) #0 { |
| 6 | +define amdgpu_kernel void @sdivrem_i32(ptr addrspace(1) %out0, ptr addrspace(1) %out1, i32 %x, i32 %y) { |
7 | 7 | ; GFX8-LABEL: sdivrem_i32:
|
8 | 8 | ; GFX8: ; %bb.0:
|
9 | 9 | ; GFX8-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x10
|
| 10 | +; GFX8-NEXT: s_add_i32 s12, s12, s17 |
| 11 | +; GFX8-NEXT: s_mov_b32 flat_scratch_lo, s13 |
| 12 | +; GFX8-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 |
10 | 13 | ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
|
11 | 14 | ; GFX8-NEXT: s_ashr_i32 s6, s5, 31
|
12 | 15 | ; GFX8-NEXT: s_add_i32 s0, s5, s6
|
@@ -142,10 +145,13 @@ define amdgpu_kernel void @sdivrem_i32(ptr addrspace(1) %out0, ptr addrspace(1)
|
142 | 145 | ret void
|
143 | 146 | }
|
144 | 147 |
|
145 |
| -define amdgpu_kernel void @sdivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1) %out1, i64 %x, i64 %y) #0 { |
| 148 | +define amdgpu_kernel void @sdivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1) %out1, i64 %x, i64 %y) { |
146 | 149 | ; GFX8-LABEL: sdivrem_i64:
|
147 | 150 | ; GFX8: ; %bb.0:
|
148 | 151 | ; GFX8-NEXT: s_load_dwordx8 s[4:11], s[8:9], 0x0
|
| 152 | +; GFX8-NEXT: s_add_i32 s12, s12, s17 |
| 153 | +; GFX8-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 |
| 154 | +; GFX8-NEXT: s_mov_b32 flat_scratch_lo, s13 |
149 | 155 | ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
|
150 | 156 | ; GFX8-NEXT: s_ashr_i32 s2, s9, 31
|
151 | 157 | ; GFX8-NEXT: s_ashr_i32 s12, s11, 31
|
@@ -613,10 +619,13 @@ define amdgpu_kernel void @sdivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1)
|
613 | 619 | ret void
|
614 | 620 | }
|
615 | 621 |
|
616 |
| -define amdgpu_kernel void @sdivrem_v2i32(ptr addrspace(1) %out0, ptr addrspace(1) %out1, <2 x i32> %x, <2 x i32> %y) #0 { |
| 622 | +define amdgpu_kernel void @sdivrem_v2i32(ptr addrspace(1) %out0, ptr addrspace(1) %out1, <2 x i32> %x, <2 x i32> %y) { |
617 | 623 | ; GFX8-LABEL: sdivrem_v2i32:
|
618 | 624 | ; GFX8: ; %bb.0:
|
619 | 625 | ; GFX8-NEXT: s_load_dwordx8 s[4:11], s[8:9], 0x0
|
| 626 | +; GFX8-NEXT: s_add_i32 s12, s12, s17 |
| 627 | +; GFX8-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 |
| 628 | +; GFX8-NEXT: s_mov_b32 flat_scratch_lo, s13 |
620 | 629 | ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
|
621 | 630 | ; GFX8-NEXT: s_ashr_i32 s2, s10, 31
|
622 | 631 | ; GFX8-NEXT: s_add_i32 s0, s10, s2
|
@@ -842,9 +851,12 @@ define amdgpu_kernel void @sdivrem_v2i32(ptr addrspace(1) %out0, ptr addrspace(1
|
842 | 851 | ret void
|
843 | 852 | }
|
844 | 853 |
|
845 |
| -define amdgpu_kernel void @sdivrem_v4i32(ptr addrspace(1) %out0, ptr addrspace(1) %out1, <4 x i32> %x, <4 x i32> %y) #0 { |
| 854 | +define amdgpu_kernel void @sdivrem_v4i32(ptr addrspace(1) %out0, ptr addrspace(1) %out1, <4 x i32> %x, <4 x i32> %y) { |
846 | 855 | ; GFX8-LABEL: sdivrem_v4i32:
|
847 | 856 | ; GFX8: ; %bb.0:
|
| 857 | +; GFX8-NEXT: s_add_i32 s12, s12, s17 |
| 858 | +; GFX8-NEXT: s_mov_b32 flat_scratch_lo, s13 |
| 859 | +; GFX8-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 |
848 | 860 | ; GFX8-NEXT: s_load_dwordx8 s[12:19], s[8:9], 0x10
|
849 | 861 | ; GFX8-NEXT: s_load_dwordx4 s[4:7], s[8:9], 0x0
|
850 | 862 | ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
|
@@ -1268,9 +1280,12 @@ define amdgpu_kernel void @sdivrem_v4i32(ptr addrspace(1) %out0, ptr addrspace(1
|
1268 | 1280 | ret void
|
1269 | 1281 | }
|
1270 | 1282 |
|
1271 |
| -define amdgpu_kernel void @sdivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1) %out1, <2 x i64> %x, <2 x i64> %y) #0 { |
| 1283 | +define amdgpu_kernel void @sdivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1) %out1, <2 x i64> %x, <2 x i64> %y) { |
1272 | 1284 | ; GFX8-LABEL: sdivrem_v2i64:
|
1273 | 1285 | ; GFX8: ; %bb.0:
|
| 1286 | +; GFX8-NEXT: s_add_i32 s12, s12, s17 |
| 1287 | +; GFX8-NEXT: s_mov_b32 flat_scratch_lo, s13 |
| 1288 | +; GFX8-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 |
1274 | 1289 | ; GFX8-NEXT: s_load_dwordx8 s[12:19], s[8:9], 0x0
|
1275 | 1290 | ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x20
|
1276 | 1291 | ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
|
@@ -2183,10 +2198,13 @@ define amdgpu_kernel void @sdivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1
|
2183 | 2198 | ret void
|
2184 | 2199 | }
|
2185 | 2200 |
|
2186 |
| -define amdgpu_kernel void @sdiv_i8(ptr addrspace(1) %out0, ptr addrspace(1) %out1, i8 %x, i8 %y) #0 { |
| 2201 | +define amdgpu_kernel void @sdiv_i8(ptr addrspace(1) %out0, ptr addrspace(1) %out1, i8 %x, i8 %y) { |
2187 | 2202 | ; GFX8-LABEL: sdiv_i8:
|
2188 | 2203 | ; GFX8: ; %bb.0:
|
2189 | 2204 | ; GFX8-NEXT: s_load_dword s4, s[8:9], 0x10
|
| 2205 | +; GFX8-NEXT: s_add_i32 s12, s12, s17 |
| 2206 | +; GFX8-NEXT: s_mov_b32 flat_scratch_lo, s13 |
| 2207 | +; GFX8-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 |
2190 | 2208 | ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
|
2191 | 2209 | ; GFX8-NEXT: s_bfe_i32 s0, s4, 0x80008
|
2192 | 2210 | ; GFX8-NEXT: s_ashr_i32 s5, s0, 31
|
@@ -2328,10 +2346,13 @@ define amdgpu_kernel void @sdiv_i8(ptr addrspace(1) %out0, ptr addrspace(1) %out
|
2328 | 2346 | ret void
|
2329 | 2347 | }
|
2330 | 2348 |
|
2331 |
| -define amdgpu_kernel void @sdivrem_v2i8(ptr addrspace(1) %out0, ptr addrspace(1) %out1, <2 x i8> %x, <2 x i8> %y) #0 { |
| 2349 | +define amdgpu_kernel void @sdivrem_v2i8(ptr addrspace(1) %out0, ptr addrspace(1) %out1, <2 x i8> %x, <2 x i8> %y) { |
2332 | 2350 | ; GFX8-LABEL: sdivrem_v2i8:
|
2333 | 2351 | ; GFX8: ; %bb.0:
|
2334 | 2352 | ; GFX8-NEXT: s_load_dword s2, s[8:9], 0x10
|
| 2353 | +; GFX8-NEXT: s_add_i32 s12, s12, s17 |
| 2354 | +; GFX8-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 |
| 2355 | +; GFX8-NEXT: s_mov_b32 flat_scratch_lo, s13 |
2335 | 2356 | ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
|
2336 | 2357 | ; GFX8-NEXT: s_bfe_i32 s0, s2, 0x80010
|
2337 | 2358 | ; GFX8-NEXT: s_ashr_i32 s3, s0, 31
|
@@ -2592,10 +2613,13 @@ define amdgpu_kernel void @sdivrem_v2i8(ptr addrspace(1) %out0, ptr addrspace(1)
|
2592 | 2613 | ret void
|
2593 | 2614 | }
|
2594 | 2615 |
|
2595 |
| -define amdgpu_kernel void @sdiv_i16(ptr addrspace(1) %out0, ptr addrspace(1) %out1, i16 %x, i16 %y) #0 { |
| 2616 | +define amdgpu_kernel void @sdiv_i16(ptr addrspace(1) %out0, ptr addrspace(1) %out1, i16 %x, i16 %y) { |
2596 | 2617 | ; GFX8-LABEL: sdiv_i16:
|
2597 | 2618 | ; GFX8: ; %bb.0:
|
2598 | 2619 | ; GFX8-NEXT: s_load_dword s4, s[8:9], 0x10
|
| 2620 | +; GFX8-NEXT: s_add_i32 s12, s12, s17 |
| 2621 | +; GFX8-NEXT: s_mov_b32 flat_scratch_lo, s13 |
| 2622 | +; GFX8-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 |
2599 | 2623 | ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
|
2600 | 2624 | ; GFX8-NEXT: s_bfe_i32 s0, s4, 0x100010
|
2601 | 2625 | ; GFX8-NEXT: s_ashr_i32 s5, s0, 31
|
@@ -2737,10 +2761,13 @@ define amdgpu_kernel void @sdiv_i16(ptr addrspace(1) %out0, ptr addrspace(1) %ou
|
2737 | 2761 | ret void
|
2738 | 2762 | }
|
2739 | 2763 |
|
2740 |
| -define amdgpu_kernel void @sdivrem_v2i16(ptr addrspace(1) %out0, ptr addrspace(1) %out1, <2 x i16> %x, <2 x i16> %y) #0 { |
| 2764 | +define amdgpu_kernel void @sdivrem_v2i16(ptr addrspace(1) %out0, ptr addrspace(1) %out1, <2 x i16> %x, <2 x i16> %y) { |
2741 | 2765 | ; GFX8-LABEL: sdivrem_v2i16:
|
2742 | 2766 | ; GFX8: ; %bb.0:
|
2743 | 2767 | ; GFX8-NEXT: s_load_dwordx2 s[2:3], s[8:9], 0x10
|
| 2768 | +; GFX8-NEXT: s_add_i32 s12, s12, s17 |
| 2769 | +; GFX8-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 |
| 2770 | +; GFX8-NEXT: s_mov_b32 flat_scratch_lo, s13 |
2744 | 2771 | ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
|
2745 | 2772 | ; GFX8-NEXT: s_sext_i32_i16 s0, s3
|
2746 | 2773 | ; GFX8-NEXT: s_ashr_i32 s10, s0, 31
|
@@ -2998,10 +3025,13 @@ define amdgpu_kernel void @sdivrem_v2i16(ptr addrspace(1) %out0, ptr addrspace(1
|
2998 | 3025 | ret void
|
2999 | 3026 | }
|
3000 | 3027 |
|
3001 |
| -define amdgpu_kernel void @sdivrem_i3(ptr addrspace(1) %out0, ptr addrspace(1) %out1, i3 %x, i3 %y) #0 { |
| 3028 | +define amdgpu_kernel void @sdivrem_i3(ptr addrspace(1) %out0, ptr addrspace(1) %out1, i3 %x, i3 %y) { |
3002 | 3029 | ; GFX8-LABEL: sdivrem_i3:
|
3003 | 3030 | ; GFX8: ; %bb.0:
|
3004 | 3031 | ; GFX8-NEXT: s_load_dword s4, s[8:9], 0x10
|
| 3032 | +; GFX8-NEXT: s_add_i32 s12, s12, s17 |
| 3033 | +; GFX8-NEXT: s_mov_b32 flat_scratch_lo, s13 |
| 3034 | +; GFX8-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 |
3005 | 3035 | ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
|
3006 | 3036 | ; GFX8-NEXT: s_bfe_i32 s0, s4, 0x30008
|
3007 | 3037 | ; GFX8-NEXT: s_ashr_i32 s5, s0, 31
|
@@ -3149,10 +3179,13 @@ define amdgpu_kernel void @sdivrem_i3(ptr addrspace(1) %out0, ptr addrspace(1) %
|
3149 | 3179 | ret void
|
3150 | 3180 | }
|
3151 | 3181 |
|
3152 |
| -define amdgpu_kernel void @sdivrem_i27(ptr addrspace(1) %out0, ptr addrspace(1) %out1, i27 %x, i27 %y) #0 { |
| 3182 | +define amdgpu_kernel void @sdivrem_i27(ptr addrspace(1) %out0, ptr addrspace(1) %out1, i27 %x, i27 %y) { |
3153 | 3183 | ; GFX8-LABEL: sdivrem_i27:
|
3154 | 3184 | ; GFX8: ; %bb.0:
|
3155 | 3185 | ; GFX8-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x10
|
| 3186 | +; GFX8-NEXT: s_add_i32 s12, s12, s17 |
| 3187 | +; GFX8-NEXT: s_mov_b32 flat_scratch_lo, s13 |
| 3188 | +; GFX8-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 |
3156 | 3189 | ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
|
3157 | 3190 | ; GFX8-NEXT: s_bfe_i32 s0, s5, 0x1b0000
|
3158 | 3191 | ; GFX8-NEXT: s_ashr_i32 s5, s0, 31
|
@@ -3299,5 +3332,3 @@ define amdgpu_kernel void @sdivrem_i27(ptr addrspace(1) %out0, ptr addrspace(1)
|
3299 | 3332 | store i27 %rem, ptr addrspace(1) %out1
|
3300 | 3333 | ret void
|
3301 | 3334 | }
|
3302 |
| - |
3303 |
| -attributes #0 = { "amdgpu-no-flat-scratch-init" } |
|
0 commit comments