Skip to content

Commit b3e8008

Browse files
committed
True16 for v_pack_b32_f16 in MC
1 parent a1d71c3 commit b3e8008

13 files changed

+386
-140
lines changed

llvm/lib/Target/AMDGPU/VOP3Instructions.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -642,7 +642,7 @@ defm V_MAD_I32_I16 : VOP3Inst <"v_mad_i32_i16", VOP3_Profile<VOP_I32_I16_I16_I32
642642
defm V_CVT_PKNORM_I16_F16 : VOP3Inst <"v_cvt_pknorm_i16_f16", VOP3_Profile<VOP_B32_F16_F16, VOP3_OPSEL>>;
643643
defm V_CVT_PKNORM_U16_F16 : VOP3Inst <"v_cvt_pknorm_u16_f16", VOP3_Profile<VOP_B32_F16_F16, VOP3_OPSEL>>;
644644

645-
defm V_PACK_B32_F16 : VOP3Inst <"v_pack_b32_f16", VOP3_Profile<VOP_B32_F16_F16, VOP3_OPSEL>>;
645+
defm V_PACK_B32_F16 : VOP3Inst_t16 <"v_pack_b32_f16", VOP_B32_F16_F16>;
646646

647647
let isReMaterializable = 1 in {
648648
defm V_SUB_I32 : VOP3Inst <"v_sub_i32", VOP3_Profile<VOP_I32_I32_I32_ARITH>>;
@@ -1751,7 +1751,7 @@ defm V_MIN_U16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x30
17511751
defm V_MIN_I16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x30c, "v_min_i16">;
17521752
defm V_ADD_NC_I16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x30d, "v_add_nc_i16", "V_ADD_I16">;
17531753
defm V_SUB_NC_I16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x30e, "v_sub_nc_i16", "V_SUB_I16">;
1754-
defm V_PACK_B32_F16 : VOP3_Realtriple_gfx11_gfx12<0x311>;
1754+
defm V_PACK_B32_F16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x311, "v_pack_b32_f16">;
17551755
defm V_CVT_PK_NORM_I16_F16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x312, "V_CVT_PKNORM_I16_F16" , "v_cvt_pk_norm_i16_f16" >;
17561756
defm V_CVT_PK_NORM_U16_F16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x313, "V_CVT_PKNORM_U16_F16" , "v_cvt_pk_norm_u16_f16" >;
17571757
defm V_SUB_NC_I32 : VOP3_Realtriple_with_name_gfx11_gfx12<0x325, "V_SUB_I32", "v_sub_nc_i32">;

llvm/test/MC/AMDGPU/gfx11_asm_vop3.s

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5207,11 +5207,11 @@ v_or_b16 v5, src_scc, vcc_lo
52075207
v_or_b16 v255, 0xfe0b, vcc_hi
52085208
// GFX11: v_or_b16 v255, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x63,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
52095209

5210-
v_pack_b32_f16 v5, v1, v2
5211-
// GFX11: v_pack_b32_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x11,0xd7,0x01,0x05,0x02,0x00]
5210+
v_pack_b32_f16 v5, v1.l, v2.l
5211+
// GFX11: v_pack_b32_f16 v5, v1.l, v2.l ; encoding: [0x05,0x00,0x11,0xd7,0x01,0x05,0x02,0x00]
52125212

5213-
v_pack_b32_f16 v5, v255, v255
5214-
// GFX11: v_pack_b32_f16 v5, v255, v255 ; encoding: [0x05,0x00,0x11,0xd7,0xff,0xff,0x03,0x00]
5213+
v_pack_b32_f16 v5, v255.l, v255.l
5214+
// GFX11: v_pack_b32_f16 v5, v255.l, v255.l ; encoding: [0x05,0x00,0x11,0xd7,0xff,0xff,0x03,0x00]
52155215

52165216
v_pack_b32_f16 v5, s1, s2
52175217
// GFX11: v_pack_b32_f16 v5, s1, s2 ; encoding: [0x05,0x00,0x11,0xd7,0x01,0x04,0x00,0x00]
@@ -5243,7 +5243,7 @@ v_pack_b32_f16 v5, null, exec_lo
52435243
v_pack_b32_f16 v5, -1, exec_hi
52445244
// GFX11: v_pack_b32_f16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x11,0xd7,0xc1,0xfe,0x00,0x00]
52455245

5246-
v_pack_b32_f16 v5, 0.5, -m0 op_sel:[0,0,0]
5246+
v_pack_b32_f16 v5, 0.5, -m0
52475247
// GFX11: v_pack_b32_f16 v5, 0.5, -m0 ; encoding: [0x05,0x00,0x11,0xd7,0xf0,0xfa,0x00,0x40]
52485248

52495249
v_pack_b32_f16 v5, -src_scc, |vcc_lo| op_sel:[1,0,0]
@@ -5252,6 +5252,18 @@ v_pack_b32_f16 v5, -src_scc, |vcc_lo| op_sel:[1,0,0]
52525252
v_pack_b32_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0]
52535253
// GFX11: v_pack_b32_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] ; encoding: [0xff,0x13,0x11,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
52545254

5255+
v_pack_b32_f16 v5, v1.h, v2.l
5256+
// GFX11: v_pack_b32_f16 v5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x11,0xd7,0x01,0x05,0x02,0x00]
5257+
5258+
v_pack_b32_f16 v5, v255.l, v255.h
5259+
// GFX11: v_pack_b32_f16 v5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x11,0xd7,0xff,0xff,0x03,0x00]
5260+
5261+
v_pack_b32_f16 v5, -src_scc, |vcc_lo|
5262+
// GFX11: v_pack_b32_f16 v5, -src_scc, |vcc_lo| ; encoding: [0x05,0x02,0x11,0xd7,0xfd,0xd4,0x00,0x20]
5263+
5264+
v_pack_b32_f16 v255, -|0xfe0b|, -|vcc_hi|
5265+
// GFX11: v_pack_b32_f16 v255, -|0xfe0b|, -|vcc_hi| ; encoding: [0xff,0x03,0x11,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
5266+
52555267
v_perm_b32 v5, v1, v2, s3
52565268
// GFX11: v_perm_b32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x44,0xd6,0x01,0x05,0x0e,0x00]
52575269

llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s

Lines changed: 38 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -3674,47 +3674,53 @@ v_or_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
36743674
v_or_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
36753675
// GFX11: v_or_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x63,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
36763676

3677-
v_pack_b32_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0]
3678-
// GFX11: v_pack_b32_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
3677+
v_pack_b32_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0]
3678+
// GFX11: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
36793679

3680-
v_pack_b32_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3]
3681-
// GFX11: v_pack_b32_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
3680+
v_pack_b32_f16_e64_dpp v5, v1.l, v2.l quad_perm:[0,1,2,3]
3681+
// GFX11: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
36823682

3683-
v_pack_b32_f16_e64_dpp v5, v1, v2 row_mirror
3684-
// GFX11: v_pack_b32_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
3683+
v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_mirror
3684+
// GFX11: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
36853685

3686-
v_pack_b32_f16_e64_dpp v5, v1, v2 row_half_mirror
3687-
// GFX11: v_pack_b32_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
3686+
v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_half_mirror
3687+
// GFX11: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
36883688

3689-
v_pack_b32_f16_e64_dpp v5, v1, v2 row_shl:1
3690-
// GFX11: v_pack_b32_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
3689+
v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_shl:1
3690+
// GFX11: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
36913691

3692-
v_pack_b32_f16_e64_dpp v5, v1, v2 row_shl:15
3693-
// GFX11: v_pack_b32_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
3692+
v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_shl:15
3693+
// GFX11: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
36943694

3695-
v_pack_b32_f16_e64_dpp v5, v1, v2 row_shr:1
3696-
// GFX11: v_pack_b32_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
3695+
v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_shr:1
3696+
// GFX11: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
36973697

3698-
v_pack_b32_f16_e64_dpp v5, v1, v2 row_shr:15
3699-
// GFX11: v_pack_b32_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
3698+
v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_shr:15
3699+
// GFX11: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
37003700

3701-
v_pack_b32_f16_e64_dpp v5, v1, v2 row_ror:1
3702-
// GFX11: v_pack_b32_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
3701+
v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_ror:1
3702+
// GFX11: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
37033703

3704-
v_pack_b32_f16_e64_dpp v5, v1, v2 row_ror:15
3705-
// GFX11: v_pack_b32_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
3704+
v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_ror:15
3705+
// GFX11: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
37063706

3707-
v_pack_b32_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
3708-
// GFX11: v_pack_b32_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
3707+
v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
3708+
// GFX11: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
37093709

3710-
v_pack_b32_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
3711-
// GFX11: v_pack_b32_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x11,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
3710+
v_pack_b32_f16_e64_dpp v5, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
3711+
// GFX11: v_pack_b32_f16_e64_dpp v5, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x11,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
37123712

3713-
v_pack_b32_f16_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
3714-
// GFX11: v_pack_b32_f16_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x11,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
3713+
v_pack_b32_f16_e64_dpp v5, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
3714+
// GFX11: v_pack_b32_f16_e64_dpp v5, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x11,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
37153715

3716-
v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
3717-
// GFX11: v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x03,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
3716+
v_pack_b32_f16_e64_dpp v255, -|v255.l|, -|v255.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1
3717+
// GFX11: v_pack_b32_f16_e64_dpp v255, -|v255.l|, -|v255.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x03,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
3718+
3719+
v_pack_b32_f16_e64_dpp v5, -v1.h, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
3720+
// GFX11: v_pack_b32_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x0a,0x11,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
3721+
3722+
v_pack_b32_f16_e64_dpp v255, -|v255.l|, -|v255.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
3723+
// GFX11: v_pack_b32_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x13,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
37183724

37193725
v_perm_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0]
37203726
// GFX11: v_perm_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
@@ -4611,11 +4617,11 @@ v_min3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank
46114617
v_min3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1
46124618
// GFX11: v_min3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30]
46134619

4614-
v_pack_b32_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3
4615-
// GFX11: v_pack_b32_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x11,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
4620+
v_pack_b32_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3
4621+
// GFX11: v_pack_b32_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x11,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
46164622

4617-
v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1
4618-
// GFX11: v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
4623+
v_pack_b32_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1
4624+
// GFX11: v_pack_b32_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
46194625

46204626
v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.l quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1
46214627
// GFX11: v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.l quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x00,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00]

llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2359,17 +2359,23 @@ v_or_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
23592359
v_or_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
23602360
// GFX11: v_or_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x00,0x63,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
23612361

2362-
v_pack_b32_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
2363-
// GFX11: v_pack_b32_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x11,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
2362+
v_pack_b32_f16_e64_dpp v5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
2363+
// GFX11: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x11,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
23642364

2365-
v_pack_b32_f16_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
2366-
// GFX11: v_pack_b32_f16_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x11,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
2365+
v_pack_b32_f16_e64_dpp v5, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
2366+
// GFX11: v_pack_b32_f16_e64_dpp v5, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x11,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
23672367

2368-
v_pack_b32_f16_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
2369-
// GFX11: v_pack_b32_f16_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x02,0x11,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
2368+
v_pack_b32_f16_e64_dpp v5, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
2369+
// GFX11: v_pack_b32_f16_e64_dpp v5, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x02,0x11,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
23702370

2371-
v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0
2372-
// GFX11: v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x03,0x11,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
2371+
v_pack_b32_f16_e64_dpp v255, -|v255.l|, -|v255.l| dpp8:[0,0,0,0,0,0,0,0]
2372+
// GFX11: v_pack_b32_f16_e64_dpp v255, -|v255.l|, -|v255.l| dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x03,0x11,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
2373+
2374+
v_pack_b32_f16_e64_dpp v5, -v1.h, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
2375+
// GFX11: v_pack_b32_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x0a,0x11,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
2376+
2377+
v_pack_b32_f16_e64_dpp v255, -|v255.l|, -|v255.h| dpp8:[0,0,0,0,0,0,0,0] fi:0
2378+
// GFX11: v_pack_b32_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x13,0x11,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
23732379

23742380
v_perm_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0]
23752381
// GFX11: v_perm_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x44,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
@@ -2968,11 +2974,11 @@ v_min3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0]
29682974
v_min3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1
29692975
// GFX11: v_min3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4b,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00]
29702976

2971-
v_pack_b32_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0]
2972-
// GFX11: v_pack_b32_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x11,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
2977+
v_pack_b32_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0]
2978+
// GFX11: v_pack_b32_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x11,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
29732979

2974-
v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1
2975-
// GFX11: v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x11,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
2980+
v_pack_b32_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1
2981+
// GFX11: v_pack_b32_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x11,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
29762982

29772983
v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.l dpp8:[0,1,2,3,4,4,4,4]
29782984
// GFX11: v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.l dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x00,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]

llvm/test/MC/AMDGPU/gfx12_asm_vop3.s

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5030,11 +5030,11 @@ v_or_b16 v5, src_scc, vcc_lo
50305030
v_or_b16 v255, 0xfe0b, vcc_hi
50315031
// GFX12: v_or_b16 v255, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x63,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
50325032

5033-
v_pack_b32_f16 v5, v1, v2
5034-
// GFX12: v_pack_b32_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x11,0xd7,0x01,0x05,0x02,0x00]
5033+
v_pack_b32_f16 v5, v1.l, v2.l
5034+
// GFX12: v_pack_b32_f16 v5, v1.l, v2.l ; encoding: [0x05,0x00,0x11,0xd7,0x01,0x05,0x02,0x00]
50355035

5036-
v_pack_b32_f16 v5, v255, v255
5037-
// GFX12: v_pack_b32_f16 v5, v255, v255 ; encoding: [0x05,0x00,0x11,0xd7,0xff,0xff,0x03,0x00]
5036+
v_pack_b32_f16 v5, v255.l, v255.l
5037+
// GFX12: v_pack_b32_f16 v5, v255.l, v255.l ; encoding: [0x05,0x00,0x11,0xd7,0xff,0xff,0x03,0x00]
50385038

50395039
v_pack_b32_f16 v5, s1, s2
50405040
// GFX12: v_pack_b32_f16 v5, s1, s2 ; encoding: [0x05,0x00,0x11,0xd7,0x01,0x04,0x00,0x00]
@@ -5066,7 +5066,7 @@ v_pack_b32_f16 v5, null, exec_lo
50665066
v_pack_b32_f16 v5, -1, exec_hi
50675067
// GFX12: v_pack_b32_f16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x11,0xd7,0xc1,0xfe,0x00,0x00]
50685068

5069-
v_pack_b32_f16 v5, 0.5, -m0 op_sel:[0,0,0]
5069+
v_pack_b32_f16 v5, 0.5, -m0
50705070
// GFX12: v_pack_b32_f16 v5, 0.5, -m0 ; encoding: [0x05,0x00,0x11,0xd7,0xf0,0xfa,0x00,0x40]
50715071

50725072
v_pack_b32_f16 v5, -src_scc, |vcc_lo| op_sel:[1,0,0]
@@ -5075,6 +5075,12 @@ v_pack_b32_f16 v5, -src_scc, |vcc_lo| op_sel:[1,0,0]
50755075
v_pack_b32_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0]
50765076
// GFX12: v_pack_b32_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] ; encoding: [0xff,0x13,0x11,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
50775077

5078+
v_pack_b32_f16 v5, v1.h, v2.l
5079+
// GFX12: v_pack_b32_f16 v5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x11,0xd7,0x01,0x05,0x02,0x00]
5080+
5081+
v_pack_b32_f16 v5, v255.l, v255.h
5082+
// GFX12: v_pack_b32_f16 v5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x11,0xd7,0xff,0xff,0x03,0x00]
5083+
50785084
v_perm_b32 v5, v1, v2, s3
50795085
// GFX12: v_perm_b32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x44,0xd6,0x01,0x05,0x0e,0x00]
50805086

0 commit comments

Comments
 (0)