Skip to content

Commit 45fdb77

Browse files
committed
[MCA][X86] Cleanup znver4 instregex patterns for (V)PMOV extension/truncation instructions
Split extension/truncation patterns to simplify matching. Fix patterns to consistently match SSE/AVX1/AVX2 variants as well. Add some missing src/dst type variants - there should be no difference in scheduling, its purely based on dst reg width. Confirmed with Agner/uops.info Noticed while triaging #110308
1 parent d471c85 commit 45fdb77

File tree

6 files changed

+90
-92
lines changed

6 files changed

+90
-92
lines changed

llvm/lib/Target/X86/X86ScheduleZnver4.td

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1646,10 +1646,8 @@ def Zn4MOVS: SchedWriteRes<[Zn4FPFMisc12]> {
16461646
let NumMicroOps = 1;
16471647
}
16481648
def : InstRW<[Zn4MOVS], (instregex
1649-
"(V?)PMOV(SX|ZX)(BD|BQ|BW|WD|WQ|DQ)(Z128?|Z256?)(rr|rrk|rrkz)",
1650-
"(V?)PMOV(SX|QD|UZ|ZX)(BD|BQ|BW?)(Y|Z128?)(rr|rrk|rrkz)",
1651-
"(V?)PMOV(SX|US|ZX)(DQ|WD|QW|WQ?)(Y|Z128?)(rr|rrk|rrkz)",
1652-
"VPMOV(DB|DW|QB|QD|QW|SDB|SDW|SQB|SQD|SQW|SWB|USDB|USDW|USQB|USQD|USWB|WB)(Z128?|Z256?)(rr|rrk|rrkz)"
1649+
"(V?)PMOV(SX|ZX)(BD|BQ|BW|WD|WQ|DQ)(Y?|Z128?|Z256?)(rr|rrk|rrkz)",
1650+
"(V?)PMOV(S?|US?)(DB|DW|QB|QD|QW|WB)(Z128|Z256)(rr|rrk|rrkz)"
16531651
)>;
16541652

16551653
def Zn4MOVSZ: SchedWriteRes<[Zn4FPFMisc12]> {
@@ -1667,7 +1665,7 @@ def Zn4MOVSrr: SchedWriteRes<[Zn4FPFMisc12]> {
16671665
let NumMicroOps = 1;
16681666
}
16691667
def : InstRW<[Zn4MOVSrr], (instregex
1670-
"(V?)PMOV(DB|QB|QW|SDB|SQB|SQW|USDB|USQB|USQW)Z(rr|rrk|rrkz)"
1668+
"(V?)PMOV(S?|US?)(DB|DW|QB|QD|QW|WB)Z(rr|rrk|rrkz)"
16711669
)>;
16721670

16731671

llvm/test/tools/llvm-mca/X86/Znver4/resources-avx1.s

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1506,29 +1506,29 @@ vzeroupper
15061506
# CHECK-NEXT: 1 1 0.25 vpminuw %xmm0, %xmm1, %xmm2
15071507
# CHECK-NEXT: 1 8 0.50 * vpminuw (%rax), %xmm1, %xmm2
15081508
# CHECK-NEXT: 1 1 1.00 vpmovmskb %xmm0, %ecx
1509-
# CHECK-NEXT: 1 1 0.50 vpmovsxbd %xmm0, %xmm2
1509+
# CHECK-NEXT: 1 2 0.50 vpmovsxbd %xmm0, %xmm2
15101510
# CHECK-NEXT: 1 8 0.50 * vpmovsxbd (%rax), %xmm2
1511-
# CHECK-NEXT: 1 1 0.50 vpmovsxbq %xmm0, %xmm2
1511+
# CHECK-NEXT: 1 2 0.50 vpmovsxbq %xmm0, %xmm2
15121512
# CHECK-NEXT: 1 8 0.50 * vpmovsxbq (%rax), %xmm2
1513-
# CHECK-NEXT: 1 1 0.50 vpmovsxbw %xmm0, %xmm2
1513+
# CHECK-NEXT: 1 2 0.50 vpmovsxbw %xmm0, %xmm2
15141514
# CHECK-NEXT: 1 8 0.50 * vpmovsxbw (%rax), %xmm2
1515-
# CHECK-NEXT: 1 1 0.50 vpmovsxdq %xmm0, %xmm2
1515+
# CHECK-NEXT: 1 2 0.50 vpmovsxdq %xmm0, %xmm2
15161516
# CHECK-NEXT: 1 8 0.50 * vpmovsxdq (%rax), %xmm2
1517-
# CHECK-NEXT: 1 1 0.50 vpmovsxwd %xmm0, %xmm2
1517+
# CHECK-NEXT: 1 2 0.50 vpmovsxwd %xmm0, %xmm2
15181518
# CHECK-NEXT: 1 8 0.50 * vpmovsxwd (%rax), %xmm2
1519-
# CHECK-NEXT: 1 1 0.50 vpmovsxwq %xmm0, %xmm2
1519+
# CHECK-NEXT: 1 2 0.50 vpmovsxwq %xmm0, %xmm2
15201520
# CHECK-NEXT: 1 8 0.50 * vpmovsxwq (%rax), %xmm2
1521-
# CHECK-NEXT: 1 1 0.50 vpmovzxbd %xmm0, %xmm2
1521+
# CHECK-NEXT: 1 2 0.50 vpmovzxbd %xmm0, %xmm2
15221522
# CHECK-NEXT: 1 8 0.50 * vpmovzxbd (%rax), %xmm2
1523-
# CHECK-NEXT: 1 1 0.50 vpmovzxbq %xmm0, %xmm2
1523+
# CHECK-NEXT: 1 2 0.50 vpmovzxbq %xmm0, %xmm2
15241524
# CHECK-NEXT: 1 8 0.50 * vpmovzxbq (%rax), %xmm2
1525-
# CHECK-NEXT: 1 1 0.50 vpmovzxbw %xmm0, %xmm2
1525+
# CHECK-NEXT: 1 2 0.50 vpmovzxbw %xmm0, %xmm2
15261526
# CHECK-NEXT: 1 8 0.50 * vpmovzxbw (%rax), %xmm2
1527-
# CHECK-NEXT: 1 1 0.50 vpmovzxdq %xmm0, %xmm2
1527+
# CHECK-NEXT: 1 2 0.50 vpmovzxdq %xmm0, %xmm2
15281528
# CHECK-NEXT: 1 8 0.50 * vpmovzxdq (%rax), %xmm2
1529-
# CHECK-NEXT: 1 1 0.50 vpmovzxwd %xmm0, %xmm2
1529+
# CHECK-NEXT: 1 2 0.50 vpmovzxwd %xmm0, %xmm2
15301530
# CHECK-NEXT: 1 8 0.50 * vpmovzxwd (%rax), %xmm2
1531-
# CHECK-NEXT: 1 1 0.50 vpmovzxwq %xmm0, %xmm2
1531+
# CHECK-NEXT: 1 2 0.50 vpmovzxwq %xmm0, %xmm2
15321532
# CHECK-NEXT: 1 8 0.50 * vpmovzxwq (%rax), %xmm2
15331533
# CHECK-NEXT: 1 3 0.50 vpmuldq %xmm0, %xmm1, %xmm2
15341534
# CHECK-NEXT: 1 10 0.50 * vpmuldq (%rax), %xmm1, %xmm2

0 commit comments

Comments
 (0)