Skip to content

Commit f35cb7b

Browse files
xen0nSixWeining
authored andcommitted
[LoongArch] Add codegen support for bswap
Differential Revision: https://reviews.llvm.org/D131352
1 parent e8c807f commit f35cb7b

File tree

4 files changed

+204
-0
lines changed

4 files changed

+204
-0
lines changed

llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,14 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
6969
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
7070
}
7171

72+
// LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and
73+
// the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16
74+
// and i32 could still be byte-swapped relatively cheaply.
75+
setOperationAction(ISD::BSWAP, MVT::i16, Custom);
76+
if (Subtarget.is64Bit()) {
77+
setOperationAction(ISD::BSWAP, MVT::i32, Custom);
78+
}
79+
7280
static const ISD::CondCode FPCCToExpand[] = {ISD::SETOGT, ISD::SETOGE,
7381
ISD::SETUGT, ISD::SETUGE};
7482

@@ -131,6 +139,8 @@ SDValue LoongArchTargetLowering::LowerOperation(SDValue Op,
131139
assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() &&
132140
"Unexpected custom legalisation");
133141
return SDValue();
142+
case ISD::BSWAP:
143+
return SDValue();
134144
case ISD::ConstantPool:
135145
return lowerConstantPool(Op, DAG);
136146
case ISD::FP_TO_SINT:
@@ -418,6 +428,29 @@ void LoongArchTargetLowering::ReplaceNodeResults(
418428
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Tmp1));
419429
break;
420430
}
431+
case ISD::BSWAP: {
432+
SDValue Src = N->getOperand(0);
433+
EVT VT = N->getValueType(0);
434+
assert((VT == MVT::i16 || VT == MVT::i32) &&
435+
"Unexpected custom legalization");
436+
MVT GRLenVT = Subtarget.getGRLenVT();
437+
SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
438+
SDValue Tmp;
439+
switch (VT.getSizeInBits()) {
440+
default:
441+
llvm_unreachable("Unexpected operand width");
442+
case 16:
443+
Tmp = DAG.getNode(LoongArchISD::REVB_2H, DL, GRLenVT, NewSrc);
444+
break;
445+
case 32:
446+
// Only LA64 will get to here due to the size mismatch between VT and
447+
// GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo.
448+
Tmp = DAG.getNode(LoongArchISD::REVB_2W, DL, GRLenVT, NewSrc);
449+
break;
450+
}
451+
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
452+
break;
453+
}
421454
}
422455
}
423456

@@ -847,6 +880,8 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
847880
NODE_NAME_CASE(MOVGR2FR_W_LA64)
848881
NODE_NAME_CASE(MOVFR2GR_S_LA64)
849882
NODE_NAME_CASE(FTINT)
883+
NODE_NAME_CASE(REVB_2H)
884+
NODE_NAME_CASE(REVB_2W)
850885
}
851886
#undef NODE_NAME_CASE
852887
return nullptr;

llvm/lib/Target/LoongArch/LoongArchISelLowering.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,9 @@ enum NodeType : unsigned {
4444
BSTRINS,
4545
BSTRPICK,
4646

47+
// Byte swapping operations
48+
REVB_2H,
49+
REVB_2W,
4750
};
4851
} // end namespace LoongArchISD
4952

llvm/lib/Target/LoongArch/LoongArchInstrInfo.td

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,8 @@ def loongarch_bstrins
5555
: SDNode<"LoongArchISD::BSTRINS", SDT_LoongArchBStrIns>;
5656
def loongarch_bstrpick
5757
: SDNode<"LoongArchISD::BSTRPICK", SDT_LoongArchBStrPick>;
58+
def loongarch_revb_2h : SDNode<"LoongArchISD::REVB_2H", SDTUnaryOp>;
59+
def loongarch_revb_2w : SDNode<"LoongArchISD::REVB_2W", SDTUnaryOp>;
5860

5961
//===----------------------------------------------------------------------===//
6062
// Operand and SDNode transformation definitions.
@@ -816,6 +818,19 @@ def : Pat<(loongarch_bstrpick GPR:$rj, uimm6:$msbd, uimm6:$lsbd),
816818
(BSTRPICK_D GPR:$rj, uimm6:$msbd, uimm6:$lsbd)>;
817819
} // Predicates = [IsLA64]
818820

821+
/// Byte-swapping
822+
823+
def : Pat<(loongarch_revb_2h GPR:$rj), (REVB_2H GPR:$rj)>;
824+
825+
let Predicates = [IsLA32] in {
826+
def : Pat<(bswap GPR:$rj), (ROTRI_W (REVB_2H GPR:$rj), 16)>;
827+
} // Predicates = [IsLA32]
828+
829+
let Predicates = [IsLA64] in {
830+
def : Pat<(loongarch_revb_2w GPR:$rj), (REVB_2W GPR:$rj)>;
831+
def : Pat<(bswap GPR:$rj), (REVB_D GPR:$rj)>;
832+
} // Predicates = [IsLA64]
833+
819834
/// Loads
820835

821836
multiclass LdPat<PatFrag LoadOp, LAInst Inst, ValueType vt = GRLenVT> {

llvm/test/CodeGen/LoongArch/bswap.ll

Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,151 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=loongarch32 --verify-machineinstrs < %s \
3+
; RUN: | FileCheck %s --check-prefix=LA32
4+
; RUN: llc -mtriple=loongarch64 --verify-machineinstrs < %s \
5+
; RUN: | FileCheck %s --check-prefix=LA64
6+
7+
declare i16 @llvm.bswap.i16(i16)
8+
declare i32 @llvm.bswap.i32(i32)
9+
declare i48 @llvm.bswap.i48(i48)
10+
declare i64 @llvm.bswap.i64(i64)
11+
declare i80 @llvm.bswap.i80(i80)
12+
declare i128 @llvm.bswap.i128(i128)
13+
14+
define i16 @test_bswap_i16(i16 %a) nounwind {
15+
; LA32-LABEL: test_bswap_i16:
16+
; LA32: # %bb.0:
17+
; LA32-NEXT: revb.2h $a0, $a0
18+
; LA32-NEXT: jirl $zero, $ra, 0
19+
;
20+
; LA64-LABEL: test_bswap_i16:
21+
; LA64: # %bb.0:
22+
; LA64-NEXT: revb.2h $a0, $a0
23+
; LA64-NEXT: jirl $zero, $ra, 0
24+
%tmp = call i16 @llvm.bswap.i16(i16 %a)
25+
ret i16 %tmp
26+
}
27+
28+
define i32 @test_bswap_i32(i32 %a) nounwind {
29+
; LA32-LABEL: test_bswap_i32:
30+
; LA32: # %bb.0:
31+
; LA32-NEXT: revb.2h $a0, $a0
32+
; LA32-NEXT: rotri.w $a0, $a0, 16
33+
; LA32-NEXT: jirl $zero, $ra, 0
34+
;
35+
; LA64-LABEL: test_bswap_i32:
36+
; LA64: # %bb.0:
37+
; LA64-NEXT: revb.2w $a0, $a0
38+
; LA64-NEXT: jirl $zero, $ra, 0
39+
%tmp = call i32 @llvm.bswap.i32(i32 %a)
40+
ret i32 %tmp
41+
}
42+
43+
define i64 @test_bswap_i64(i64 %a) nounwind {
44+
; LA32-LABEL: test_bswap_i64:
45+
; LA32: # %bb.0:
46+
; LA32-NEXT: revb.2h $a1, $a1
47+
; LA32-NEXT: rotri.w $a2, $a1, 16
48+
; LA32-NEXT: revb.2h $a0, $a0
49+
; LA32-NEXT: rotri.w $a1, $a0, 16
50+
; LA32-NEXT: move $a0, $a2
51+
; LA32-NEXT: jirl $zero, $ra, 0
52+
;
53+
; LA64-LABEL: test_bswap_i64:
54+
; LA64: # %bb.0:
55+
; LA64-NEXT: revb.d $a0, $a0
56+
; LA64-NEXT: jirl $zero, $ra, 0
57+
%tmp = call i64 @llvm.bswap.i64(i64 %a)
58+
ret i64 %tmp
59+
}
60+
61+
;; Bswap on non-native integer widths.
62+
63+
define i48 @test_bswap_i48(i48 %a) nounwind {
64+
; LA32-LABEL: test_bswap_i48:
65+
; LA32: # %bb.0:
66+
; LA32-NEXT: revb.2h $a1, $a1
67+
; LA32-NEXT: rotri.w $a1, $a1, 16
68+
; LA32-NEXT: srli.w $a1, $a1, 16
69+
; LA32-NEXT: revb.2h $a0, $a0
70+
; LA32-NEXT: rotri.w $a2, $a0, 16
71+
; LA32-NEXT: slli.w $a0, $a2, 16
72+
; LA32-NEXT: or $a0, $a1, $a0
73+
; LA32-NEXT: srli.w $a1, $a2, 16
74+
; LA32-NEXT: jirl $zero, $ra, 0
75+
;
76+
; LA64-LABEL: test_bswap_i48:
77+
; LA64: # %bb.0:
78+
; LA64-NEXT: revb.d $a0, $a0
79+
; LA64-NEXT: srli.d $a0, $a0, 16
80+
; LA64-NEXT: jirl $zero, $ra, 0
81+
%tmp = call i48 @llvm.bswap.i48(i48 %a)
82+
ret i48 %tmp
83+
}
84+
85+
define i80 @test_bswap_i80(i80 %a) nounwind {
86+
; LA32-LABEL: test_bswap_i80:
87+
; LA32: # %bb.0:
88+
; LA32-NEXT: ld.w $a2, $a1, 0
89+
; LA32-NEXT: revb.2h $a2, $a2
90+
; LA32-NEXT: rotri.w $a2, $a2, 16
91+
; LA32-NEXT: ld.w $a3, $a1, 4
92+
; LA32-NEXT: revb.2h $a3, $a3
93+
; LA32-NEXT: rotri.w $a3, $a3, 16
94+
; LA32-NEXT: srli.w $a4, $a3, 16
95+
; LA32-NEXT: slli.w $a5, $a2, 16
96+
; LA32-NEXT: or $a4, $a5, $a4
97+
; LA32-NEXT: srli.w $a2, $a2, 16
98+
; LA32-NEXT: st.h $a2, $a0, 8
99+
; LA32-NEXT: st.w $a4, $a0, 4
100+
; LA32-NEXT: slli.w $a2, $a3, 16
101+
; LA32-NEXT: ld.w $a1, $a1, 8
102+
; LA32-NEXT: revb.2h $a1, $a1
103+
; LA32-NEXT: rotri.w $a1, $a1, 16
104+
; LA32-NEXT: srli.w $a1, $a1, 16
105+
; LA32-NEXT: or $a1, $a1, $a2
106+
; LA32-NEXT: st.w $a1, $a0, 0
107+
; LA32-NEXT: jirl $zero, $ra, 0
108+
;
109+
; LA64-LABEL: test_bswap_i80:
110+
; LA64: # %bb.0:
111+
; LA64-NEXT: revb.d $a1, $a1
112+
; LA64-NEXT: srli.d $a1, $a1, 48
113+
; LA64-NEXT: revb.d $a2, $a0
114+
; LA64-NEXT: slli.d $a0, $a2, 16
115+
; LA64-NEXT: or $a0, $a1, $a0
116+
; LA64-NEXT: srli.d $a1, $a2, 48
117+
; LA64-NEXT: jirl $zero, $ra, 0
118+
%tmp = call i80 @llvm.bswap.i80(i80 %a)
119+
ret i80 %tmp
120+
}
121+
122+
define i128 @test_bswap_i128(i128 %a) nounwind {
123+
; LA32-LABEL: test_bswap_i128:
124+
; LA32: # %bb.0:
125+
; LA32-NEXT: ld.w $a2, $a1, 0
126+
; LA32-NEXT: revb.2h $a2, $a2
127+
; LA32-NEXT: rotri.w $a2, $a2, 16
128+
; LA32-NEXT: st.w $a2, $a0, 12
129+
; LA32-NEXT: ld.w $a2, $a1, 4
130+
; LA32-NEXT: revb.2h $a2, $a2
131+
; LA32-NEXT: rotri.w $a2, $a2, 16
132+
; LA32-NEXT: st.w $a2, $a0, 8
133+
; LA32-NEXT: ld.w $a2, $a1, 8
134+
; LA32-NEXT: revb.2h $a2, $a2
135+
; LA32-NEXT: rotri.w $a2, $a2, 16
136+
; LA32-NEXT: st.w $a2, $a0, 4
137+
; LA32-NEXT: ld.w $a1, $a1, 12
138+
; LA32-NEXT: revb.2h $a1, $a1
139+
; LA32-NEXT: rotri.w $a1, $a1, 16
140+
; LA32-NEXT: st.w $a1, $a0, 0
141+
; LA32-NEXT: jirl $zero, $ra, 0
142+
;
143+
; LA64-LABEL: test_bswap_i128:
144+
; LA64: # %bb.0:
145+
; LA64-NEXT: revb.d $a2, $a1
146+
; LA64-NEXT: revb.d $a1, $a0
147+
; LA64-NEXT: move $a0, $a2
148+
; LA64-NEXT: jirl $zero, $ra, 0
149+
%tmp = call i128 @llvm.bswap.i128(i128 %a)
150+
ret i128 %tmp
151+
}

0 commit comments

Comments
 (0)