Skip to content

Commit 39b2e35

Browse files
committed
[RISCV][test] Precommit tests showing codegen for unaligned load/store with zbkb
We have missed opportunities for selecting pack* instructions, that will be addressed in future patches.
1 parent e2a855d commit 39b2e35

File tree

1 file changed

+143
-25
lines changed

1 file changed

+143
-25
lines changed

llvm/test/CodeGen/RISCV/unaligned-load-store.ll

Lines changed: 143 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,12 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22
; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
3-
; RUN: | FileCheck -check-prefixes=ALL,SLOW,RV32I %s
3+
; RUN: | FileCheck -check-prefixes=ALL,SLOW,SLOWBASE,RV32I %s
44
; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
5-
; RUN: | FileCheck -check-prefixes=ALL,SLOW,RV64I %s
5+
; RUN: | FileCheck -check-prefixes=ALL,SLOW,SLOWBASE,RV64I %s
6+
; RUN: llc -mtriple=riscv32 -mattr=+zbkb -verify-machineinstrs < %s \
7+
; RUN: | FileCheck -check-prefixes=ALL,SLOW,SLOWZBKB,RV32IZBKB %s
8+
; RUN: llc -mtriple=riscv64 -mattr=+zbkb -verify-machineinstrs < %s \
9+
; RUN: | FileCheck -check-prefixes=ALL,SLOW,SLOWZBKB,RV64IZBKB %s
610
; RUN: llc -mtriple=riscv32 -mattr=+unaligned-scalar-mem -verify-machineinstrs < %s \
711
; RUN: | FileCheck -check-prefixes=ALL,FAST,RV32I-FAST %s
812
; RUN: llc -mtriple=riscv64 -mattr=+unaligned-scalar-mem -verify-machineinstrs < %s \
@@ -37,16 +41,35 @@ define i16 @load_i16(ptr %p) {
3741
}
3842

3943
define i24 @load_i24(ptr %p) {
40-
; SLOW-LABEL: load_i24:
41-
; SLOW: # %bb.0:
42-
; SLOW-NEXT: lbu a1, 1(a0)
43-
; SLOW-NEXT: lbu a2, 0(a0)
44-
; SLOW-NEXT: lbu a0, 2(a0)
45-
; SLOW-NEXT: slli a1, a1, 8
46-
; SLOW-NEXT: or a1, a1, a2
47-
; SLOW-NEXT: slli a0, a0, 16
48-
; SLOW-NEXT: or a0, a1, a0
49-
; SLOW-NEXT: ret
44+
; SLOWBASE-LABEL: load_i24:
45+
; SLOWBASE: # %bb.0:
46+
; SLOWBASE-NEXT: lbu a1, 1(a0)
47+
; SLOWBASE-NEXT: lbu a2, 0(a0)
48+
; SLOWBASE-NEXT: lbu a0, 2(a0)
49+
; SLOWBASE-NEXT: slli a1, a1, 8
50+
; SLOWBASE-NEXT: or a1, a1, a2
51+
; SLOWBASE-NEXT: slli a0, a0, 16
52+
; SLOWBASE-NEXT: or a0, a1, a0
53+
; SLOWBASE-NEXT: ret
54+
;
55+
; RV32IZBKB-LABEL: load_i24:
56+
; RV32IZBKB: # %bb.0:
57+
; RV32IZBKB-NEXT: lbu a1, 1(a0)
58+
; RV32IZBKB-NEXT: lbu a2, 0(a0)
59+
; RV32IZBKB-NEXT: lbu a0, 2(a0)
60+
; RV32IZBKB-NEXT: packh a1, a2, a1
61+
; RV32IZBKB-NEXT: pack a0, a1, a0
62+
; RV32IZBKB-NEXT: ret
63+
;
64+
; RV64IZBKB-LABEL: load_i24:
65+
; RV64IZBKB: # %bb.0:
66+
; RV64IZBKB-NEXT: lbu a1, 1(a0)
67+
; RV64IZBKB-NEXT: lbu a2, 0(a0)
68+
; RV64IZBKB-NEXT: lbu a0, 2(a0)
69+
; RV64IZBKB-NEXT: packh a1, a2, a1
70+
; RV64IZBKB-NEXT: slli a0, a0, 16
71+
; RV64IZBKB-NEXT: or a0, a1, a0
72+
; RV64IZBKB-NEXT: ret
5073
;
5174
; FAST-LABEL: load_i24:
5275
; FAST: # %bb.0:
@@ -60,19 +83,32 @@ define i24 @load_i24(ptr %p) {
6083
}
6184

6285
define i32 @load_i32(ptr %p) {
63-
; SLOW-LABEL: load_i32:
64-
; SLOW: # %bb.0:
65-
; SLOW-NEXT: lbu a1, 1(a0)
66-
; SLOW-NEXT: lbu a2, 0(a0)
67-
; SLOW-NEXT: lbu a3, 2(a0)
68-
; SLOW-NEXT: lbu a0, 3(a0)
69-
; SLOW-NEXT: slli a1, a1, 8
70-
; SLOW-NEXT: or a1, a1, a2
71-
; SLOW-NEXT: slli a3, a3, 16
72-
; SLOW-NEXT: slli a0, a0, 24
73-
; SLOW-NEXT: or a0, a0, a3
74-
; SLOW-NEXT: or a0, a0, a1
75-
; SLOW-NEXT: ret
86+
; SLOWBASE-LABEL: load_i32:
87+
; SLOWBASE: # %bb.0:
88+
; SLOWBASE-NEXT: lbu a1, 1(a0)
89+
; SLOWBASE-NEXT: lbu a2, 0(a0)
90+
; SLOWBASE-NEXT: lbu a3, 2(a0)
91+
; SLOWBASE-NEXT: lbu a0, 3(a0)
92+
; SLOWBASE-NEXT: slli a1, a1, 8
93+
; SLOWBASE-NEXT: or a1, a1, a2
94+
; SLOWBASE-NEXT: slli a3, a3, 16
95+
; SLOWBASE-NEXT: slli a0, a0, 24
96+
; SLOWBASE-NEXT: or a0, a0, a3
97+
; SLOWBASE-NEXT: or a0, a0, a1
98+
; SLOWBASE-NEXT: ret
99+
;
100+
; SLOWZBKB-LABEL: load_i32:
101+
; SLOWZBKB: # %bb.0:
102+
; SLOWZBKB-NEXT: lbu a1, 1(a0)
103+
; SLOWZBKB-NEXT: lbu a2, 0(a0)
104+
; SLOWZBKB-NEXT: lbu a3, 2(a0)
105+
; SLOWZBKB-NEXT: lbu a0, 3(a0)
106+
; SLOWZBKB-NEXT: packh a1, a2, a1
107+
; SLOWZBKB-NEXT: slli a3, a3, 16
108+
; SLOWZBKB-NEXT: slli a0, a0, 24
109+
; SLOWZBKB-NEXT: or a0, a0, a3
110+
; SLOWZBKB-NEXT: or a0, a0, a1
111+
; SLOWZBKB-NEXT: ret
76112
;
77113
; FAST-LABEL: load_i32:
78114
; FAST: # %bb.0:
@@ -134,6 +170,51 @@ define i64 @load_i64(ptr %p) {
134170
; RV64I-NEXT: or a0, a0, a1
135171
; RV64I-NEXT: ret
136172
;
173+
; RV32IZBKB-LABEL: load_i64:
174+
; RV32IZBKB: # %bb.0:
175+
; RV32IZBKB-NEXT: lbu a1, 1(a0)
176+
; RV32IZBKB-NEXT: lbu a2, 0(a0)
177+
; RV32IZBKB-NEXT: lbu a3, 2(a0)
178+
; RV32IZBKB-NEXT: lbu a4, 3(a0)
179+
; RV32IZBKB-NEXT: packh a1, a2, a1
180+
; RV32IZBKB-NEXT: slli a3, a3, 16
181+
; RV32IZBKB-NEXT: slli a4, a4, 24
182+
; RV32IZBKB-NEXT: or a3, a4, a3
183+
; RV32IZBKB-NEXT: lbu a2, 5(a0)
184+
; RV32IZBKB-NEXT: lbu a4, 4(a0)
185+
; RV32IZBKB-NEXT: lbu a5, 6(a0)
186+
; RV32IZBKB-NEXT: lbu a6, 7(a0)
187+
; RV32IZBKB-NEXT: or a0, a3, a1
188+
; RV32IZBKB-NEXT: packh a1, a4, a2
189+
; RV32IZBKB-NEXT: slli a5, a5, 16
190+
; RV32IZBKB-NEXT: slli a6, a6, 24
191+
; RV32IZBKB-NEXT: or a2, a6, a5
192+
; RV32IZBKB-NEXT: or a1, a2, a1
193+
; RV32IZBKB-NEXT: ret
194+
;
195+
; RV64IZBKB-LABEL: load_i64:
196+
; RV64IZBKB: # %bb.0:
197+
; RV64IZBKB-NEXT: lbu a1, 5(a0)
198+
; RV64IZBKB-NEXT: lbu a2, 4(a0)
199+
; RV64IZBKB-NEXT: lbu a3, 6(a0)
200+
; RV64IZBKB-NEXT: lbu a4, 7(a0)
201+
; RV64IZBKB-NEXT: packh a1, a2, a1
202+
; RV64IZBKB-NEXT: slli a3, a3, 16
203+
; RV64IZBKB-NEXT: slli a4, a4, 24
204+
; RV64IZBKB-NEXT: or a3, a4, a3
205+
; RV64IZBKB-NEXT: lbu a2, 1(a0)
206+
; RV64IZBKB-NEXT: lbu a4, 0(a0)
207+
; RV64IZBKB-NEXT: lbu a5, 2(a0)
208+
; RV64IZBKB-NEXT: lbu a0, 3(a0)
209+
; RV64IZBKB-NEXT: or a1, a3, a1
210+
; RV64IZBKB-NEXT: packh a2, a4, a2
211+
; RV64IZBKB-NEXT: slli a5, a5, 16
212+
; RV64IZBKB-NEXT: slli a0, a0, 24
213+
; RV64IZBKB-NEXT: or a0, a0, a5
214+
; RV64IZBKB-NEXT: or a0, a0, a2
215+
; RV64IZBKB-NEXT: pack a0, a0, a1
216+
; RV64IZBKB-NEXT: ret
217+
;
137218
; RV32I-FAST-LABEL: load_i64:
138219
; RV32I-FAST: # %bb.0:
139220
; RV32I-FAST-NEXT: lw a2, 0(a0)
@@ -252,6 +333,43 @@ define void @store_i64(ptr %p, i64 %v) {
252333
; RV64I-NEXT: sb a1, 1(a0)
253334
; RV64I-NEXT: ret
254335
;
336+
; RV32IZBKB-LABEL: store_i64:
337+
; RV32IZBKB: # %bb.0:
338+
; RV32IZBKB-NEXT: sb a2, 4(a0)
339+
; RV32IZBKB-NEXT: sb a1, 0(a0)
340+
; RV32IZBKB-NEXT: srli a3, a2, 24
341+
; RV32IZBKB-NEXT: sb a3, 7(a0)
342+
; RV32IZBKB-NEXT: srli a3, a2, 16
343+
; RV32IZBKB-NEXT: sb a3, 6(a0)
344+
; RV32IZBKB-NEXT: srli a2, a2, 8
345+
; RV32IZBKB-NEXT: sb a2, 5(a0)
346+
; RV32IZBKB-NEXT: srli a2, a1, 24
347+
; RV32IZBKB-NEXT: sb a2, 3(a0)
348+
; RV32IZBKB-NEXT: srli a2, a1, 16
349+
; RV32IZBKB-NEXT: sb a2, 2(a0)
350+
; RV32IZBKB-NEXT: srli a1, a1, 8
351+
; RV32IZBKB-NEXT: sb a1, 1(a0)
352+
; RV32IZBKB-NEXT: ret
353+
;
354+
; RV64IZBKB-LABEL: store_i64:
355+
; RV64IZBKB: # %bb.0:
356+
; RV64IZBKB-NEXT: sb a1, 0(a0)
357+
; RV64IZBKB-NEXT: srli a2, a1, 56
358+
; RV64IZBKB-NEXT: sb a2, 7(a0)
359+
; RV64IZBKB-NEXT: srli a2, a1, 48
360+
; RV64IZBKB-NEXT: sb a2, 6(a0)
361+
; RV64IZBKB-NEXT: srli a2, a1, 40
362+
; RV64IZBKB-NEXT: sb a2, 5(a0)
363+
; RV64IZBKB-NEXT: srli a2, a1, 32
364+
; RV64IZBKB-NEXT: sb a2, 4(a0)
365+
; RV64IZBKB-NEXT: srli a2, a1, 24
366+
; RV64IZBKB-NEXT: sb a2, 3(a0)
367+
; RV64IZBKB-NEXT: srli a2, a1, 16
368+
; RV64IZBKB-NEXT: sb a2, 2(a0)
369+
; RV64IZBKB-NEXT: srli a1, a1, 8
370+
; RV64IZBKB-NEXT: sb a1, 1(a0)
371+
; RV64IZBKB-NEXT: ret
372+
;
255373
; RV32I-FAST-LABEL: store_i64:
256374
; RV32I-FAST: # %bb.0:
257375
; RV32I-FAST-NEXT: sw a2, 4(a0)

0 commit comments

Comments
 (0)