Skip to content

Commit 529a932

Browse files
[AArch64] SME2 multi-vec unpack, ZIP, frint for two and four registers
This patch adds the assembly/disassembly for the following instructions: SUNPK: Unpack and sign-extend multi-vector elements. UUNPK: Unpack and zero-extend multi-vector elements. ZIP (four registers): Interleave elements from four vectors. ZIP (two registers): Interleave elements from two vectors. FRINTA: Multi-vector floating-point round to integral value, to nearest with ties away from zero. FRINTM: Multi-vector floating-point round to integral value, toward minus Infinity. FRINTN: Multi-vector floating-point round to integral value, to nearest with ties to even. FRINTP: Multi-vector floating-point round to integral value, toward plus Infinity. The reference can be found here: https://developer.arm.com/documentation/ddi0602/2022-09 Differential Revision: https://reviews.llvm.org/D136091
1 parent 5062116 commit 529a932

19 files changed

+1513
-77
lines changed

llvm/lib/Target/AArch64/AArch64RegisterInfo.td

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1233,6 +1233,10 @@ let EncoderMethod = "EncodeRegAsMultipleOf<2>",
12331233
def ZZ_d_mul_r : RegisterOperand<ZPR2Mul2, "printTypedVectorList<0,'d'>"> {
12341234
let ParserMatchClass = ZPRVectorListMul<64, 2>;
12351235
}
1236+
1237+
def ZZ_q_mul_r : RegisterOperand<ZPR2Mul2, "printTypedVectorList<0,'q'>"> {
1238+
let ParserMatchClass = ZPRVectorListMul<128, 2>;
1239+
}
12361240
} // end let EncoderMethod/DecoderMethod
12371241

12381242
let EncoderMethod = "EncodeRegAsMultipleOf<4>",
@@ -1252,6 +1256,10 @@ let EncoderMethod = "EncodeRegAsMultipleOf<4>",
12521256
def ZZZZ_d_mul_r : RegisterOperand<ZPR4Mul4, "printTypedVectorList<0,'d'>"> {
12531257
let ParserMatchClass = ZPRVectorListMul<64, 4>;
12541258
}
1259+
1260+
def ZZZZ_q_mul_r : RegisterOperand<ZPR4Mul4, "printTypedVectorList<0,'q'>"> {
1261+
let ParserMatchClass = ZPRVectorListMul<128, 4>;
1262+
}
12551263
} // end let EncoderMethod/DecoderMethod
12561264

12571265
class ZPRExtendAsmOperand<string ShiftExtend, int RegWidth, int Scale,

llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td

Lines changed: 37 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -351,29 +351,29 @@ defm UMLSL_VG4_M4ZZ : sme2_int_mla_long_array_vg4_single<"umlsl", 0b11>;
351351
defm UMLSL_VG2_M2Z2Z : sme2_int_mla_long_array_vg2_multi<"umlsl", 0b11>;
352352
defm UMLSL_VG4_M4Z4Z : sme2_int_mla_long_array_vg4_multi<"umlsl", 0b11>;
353353

354-
def FCVT_Z2Z_StoH : sme2_cvt_vg2_single<"fcvt", 0b00, 0b00>;
355-
def FCVTN_Z2Z_StoH : sme2_cvt_vg2_single<"fcvtn", 0b01, 0b00>;
356-
def BFCVT_Z2Z_StoH : sme2_cvt_vg2_single<"bfcvt", 0b10, 0b00>;
357-
def BFCVTN_Z2Z_StoH : sme2_cvt_vg2_single<"bfcvtn", 0b11, 0b00>;
358-
359-
def SQCVT_Z2Z_StoH : sme2_cvt_vg2_single<"sqcvt", 0b00, 0b11>;
360-
def UQCVT_Z2Z_StoH : sme2_cvt_vg2_single<"uqcvt", 0b01, 0b11>;
361-
def SQCVTU_Z2Z_StoH : sme2_cvt_vg2_single<"sqcvtu", 0b10, 0b11>;
354+
defm FCVT_Z2Z_StoH : sme2_cvt_vg2_single<"fcvt", 0b0000>;
355+
defm FCVTN_Z2Z_StoH : sme2_cvt_vg2_single<"fcvtn", 0b0001>;
356+
defm BFCVT_Z2Z_StoH : sme2_cvt_vg2_single<"bfcvt", 0b1000>;
357+
defm BFCVTN_Z2Z_StoH : sme2_cvt_vg2_single<"bfcvtn", 0b1001>;
358+
359+
defm SQCVT_Z2Z_StoH : sme2_cvt_vg2_single<"sqcvt", 0b0110>;
360+
defm UQCVT_Z2Z_StoH : sme2_cvt_vg2_single<"uqcvt", 0b0111>;
361+
defm SQCVTU_Z2Z_StoH : sme2_cvt_vg2_single<"sqcvtu", 0b1110>;
362362
defm SQCVT_Z4Z : sme2_int_cvt_vg4_single<"sqcvt", 0b000>;
363363
defm UQCVT_Z4Z : sme2_int_cvt_vg4_single<"uqcvt", 0b001>;
364364
defm SQCVTU_Z4Z : sme2_int_cvt_vg4_single<"sqcvtu", 0b100>;
365365
defm SQCVTN_Z4Z : sme2_int_cvt_vg4_single<"sqcvtn", 0b010>;
366366
defm SQCVTUN_Z4Z : sme2_int_cvt_vg4_single<"sqcvtun", 0b110>;
367367
defm UQCVTN_Z4Z : sme2_int_cvt_vg4_single<"uqcvtn", 0b011>;
368368

369-
def FCVTZS_2Z2Z_StoS : sme2_fp_cvt_vg2_multi<"fcvtzs", 0b010>;
370-
def FCVTZS_4Z4Z_StoS : sme2_fp_cvt_vg4_multi<"fcvtzs", 0b010>;
371-
def FCVTZU_2Z2Z_StoS : sme2_fp_cvt_vg2_multi<"fcvtzu", 0b011>;
372-
def FCVTZU_4Z4Z_StoS : sme2_fp_cvt_vg4_multi<"fcvtzu", 0b011>;
373-
def SCVTF_2Z2Z_StoS : sme2_fp_cvt_vg2_multi<"scvtf", 0b100>;
374-
def SCVTF_4Z4Z_StoS : sme2_fp_cvt_vg4_multi<"scvtf", 0b100>;
375-
def UCVTF_2Z2Z_StoS : sme2_fp_cvt_vg2_multi<"ucvtf", 0b101>;
376-
def UCVTF_4Z4Z_StoS : sme2_fp_cvt_vg4_multi<"ucvtf", 0b101>;
369+
defm FCVTZS_2Z2Z_StoS : sme2_fp_cvt_vg2_multi<"fcvtzs", 0b00010>;
370+
defm FCVTZS_4Z4Z_StoS : sme2_fp_cvt_vg4_multi<"fcvtzs", 0b0001000>;
371+
defm FCVTZU_2Z2Z_StoS : sme2_fp_cvt_vg2_multi<"fcvtzu", 0b00011>;
372+
defm FCVTZU_4Z4Z_StoS : sme2_fp_cvt_vg4_multi<"fcvtzu", 0b0001010>;
373+
defm SCVTF_2Z2Z_StoS : sme2_fp_cvt_vg2_multi<"scvtf", 0b00100>;
374+
defm SCVTF_4Z4Z_StoS : sme2_fp_cvt_vg4_multi<"scvtf", 0b0010000>;
375+
defm UCVTF_2Z2Z_StoS : sme2_fp_cvt_vg2_multi<"ucvtf", 0b00101>;
376+
defm UCVTF_4Z4Z_StoS : sme2_fp_cvt_vg4_multi<"ucvtf", 0b0010010>;
377377

378378
defm SMAX_VG2_2ZZ : sme2_int_sve_destructive_vector_vg2_single<"smax", 0b0000000>;
379379
defm SMAX_VG4_4ZZ : sme2_int_sve_destructive_vector_vg4_single<"smax", 0b0000000>;
@@ -575,6 +575,27 @@ defm LUTI2_4ZTZI : sme2_luti2_vector_vg4_index<"luti2">;
575575
defm LUTI4_ZTZI : sme2_luti4_vector_index<"luti4">;
576576
defm LUTI4_2ZTZI : sme2_luti4_vector_vg2_index<"luti4">;
577577
defm LUTI4_4ZTZI : sme2_luti4_vector_vg4_index<"luti4">;
578+
579+
defm SUNPK_VG2_2ZZ : sme2_unpk_vector_vg2<"sunpk", 0b0>;
580+
defm SUNPK_VG4_4Z2Z : sme2_unpk_vector_vg4<"sunpk", 0b0>;
581+
defm UUNPK_VG2_2ZZ : sme2_unpk_vector_vg2<"uunpk", 0b1>;
582+
defm UUNPK_VG4_4Z2Z : sme2_unpk_vector_vg4<"uunpk", 0b1>;
583+
584+
defm ZIP_VG2_2ZZZ : sme2_zip_vector_vg2<"zip", 0b0>;
585+
defm UZP_VG2_2ZZZ : sme2_zip_vector_vg2<"uzp", 0b1>;
586+
defm ZIP_VG4_4Z4Z : sme2_zip_vector_vg4<"zip", 0b0110000>;
587+
defm UZP_VG4_4Z4Z : sme2_zip_vector_vg4<"uzp", 0b0110001>;
588+
defm ZIP_VG4_4Z4Z_Q : sme2_zip_vector_vg4_Q<"zip", 0b0111000>;
589+
defm UZP_VG4_4Z4Z_Q : sme2_zip_vector_vg4_Q<"uzp", 0b0111001>;
590+
591+
defm FRINTA_2Z2Z: sme2_frint_vector_vg2_multi<"frinta", 0b11000>;
592+
defm FRINTA_4Z4Z: sme2_frint_vector_vg4_multi<"frinta", 0b1100000>;
593+
defm FRINTM_2Z2Z: sme2_frint_vector_vg2_multi<"frintm", 0b10100>;
594+
defm FRINTM_4Z4Z: sme2_frint_vector_vg4_multi<"frintm", 0b1010000>;
595+
defm FRINTN_2Z2Z: sme2_frint_vector_vg2_multi<"frintn", 0b10000>;
596+
defm FRINTN_4Z4Z: sme2_frint_vector_vg4_multi<"frintn", 0b1000000>;
597+
defm FRINTP_2Z2Z: sme2_frint_vector_vg2_multi<"frintp", 0b10010>;
598+
defm FRINTP_4Z4Z: sme2_frint_vector_vg4_multi<"frintp", 0b1001000>;
578599
}
579600

580601
let Predicates = [HasSME2, HasSMEI16I64] in {

0 commit comments

Comments
 (0)