Skip to content

Commit 68d6fe5

Browse files
authored
[X86][CodeGen] Prefer KMOVkk_EVEX than KMOVkk when EGPR is supported (llvm#74048)
In memory fold table, we have ``` {X86::KMOVDkk, X86::KMOVDkm, 0}, {X86::KMOVDkk_EVEX, X86::KMOVDkm_EVEX, 0} ``` where `KMOVDkm_EVEX` can use EGPR as base and index registers, while `KMOVDkm` can't. Hence, though `KMOVkk` does not have any GPR operands, we prefer to use `KMOVDkk_EVEX` to help register allocation. It will be compressed to `KMOVDkk` in EVEX2VEX pass if memory folding does not happen.
1 parent 5e94080 commit 68d6fe5

File tree

3 files changed

+31
-10
lines changed

3 files changed

+31
-10
lines changed

llvm/lib/Target/X86/X86DomainReassignment.cpp

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -625,8 +625,10 @@ void X86DomainReassignment::initConverters() {
625625
createReplacerDstCOPY(X86::MOVZX64rm16,
626626
HasEGPR ? X86::KMOVWkm_EVEX : X86::KMOVWkm);
627627

628-
createReplacerDstCOPY(X86::MOVZX32rr16, X86::KMOVWkk);
629-
createReplacerDstCOPY(X86::MOVZX64rr16, X86::KMOVWkk);
628+
createReplacerDstCOPY(X86::MOVZX32rr16,
629+
HasEGPR ? X86::KMOVWkk_EVEX : X86::KMOVWkk);
630+
createReplacerDstCOPY(X86::MOVZX64rr16,
631+
HasEGPR ? X86::KMOVWkk_EVEX : X86::KMOVWkk);
630632

631633
if (STI->hasDQI()) {
632634
createReplacerDstCOPY(X86::MOVZX16rm8,
@@ -636,9 +638,12 @@ void X86DomainReassignment::initConverters() {
636638
createReplacerDstCOPY(X86::MOVZX64rm8,
637639
HasEGPR ? X86::KMOVBkm_EVEX : X86::KMOVBkm);
638640

639-
createReplacerDstCOPY(X86::MOVZX16rr8, X86::KMOVBkk);
640-
createReplacerDstCOPY(X86::MOVZX32rr8, X86::KMOVBkk);
641-
createReplacerDstCOPY(X86::MOVZX64rr8, X86::KMOVBkk);
641+
createReplacerDstCOPY(X86::MOVZX16rr8,
642+
HasEGPR ? X86::KMOVBkk_EVEX : X86::KMOVBkk);
643+
createReplacerDstCOPY(X86::MOVZX32rr8,
644+
HasEGPR ? X86::KMOVBkk_EVEX : X86::KMOVBkk);
645+
createReplacerDstCOPY(X86::MOVZX64rr8,
646+
HasEGPR ? X86::KMOVBkk_EVEX : X86::KMOVBkk);
642647
}
643648

644649
auto createReplacer = [&](unsigned From, unsigned To) {
@@ -647,7 +652,7 @@ void X86DomainReassignment::initConverters() {
647652

648653
createReplacer(X86::MOV16rm, HasEGPR ? X86::KMOVWkm_EVEX : X86::KMOVWkm);
649654
createReplacer(X86::MOV16mr, HasEGPR ? X86::KMOVWmk_EVEX : X86::KMOVWmk);
650-
createReplacer(X86::MOV16rr, X86::KMOVWkk);
655+
createReplacer(X86::MOV16rr, HasEGPR ? X86::KMOVWkk_EVEX : X86::KMOVWkk);
651656
createReplacer(X86::SHR16ri, X86::KSHIFTRWri);
652657
createReplacer(X86::SHL16ri, X86::KSHIFTLWri);
653658
createReplacer(X86::NOT16r, X86::KNOTWrr);
@@ -662,8 +667,8 @@ void X86DomainReassignment::initConverters() {
662667
createReplacer(X86::MOV32mr, HasEGPR ? X86::KMOVDmk_EVEX : X86::KMOVDmk);
663668
createReplacer(X86::MOV64mr, HasEGPR ? X86::KMOVQmk_EVEX : X86::KMOVQmk);
664669

665-
createReplacer(X86::MOV32rr, X86::KMOVDkk);
666-
createReplacer(X86::MOV64rr, X86::KMOVQkk);
670+
createReplacer(X86::MOV32rr, HasEGPR ? X86::KMOVDkk_EVEX : X86::KMOVDkk);
671+
createReplacer(X86::MOV64rr, HasEGPR ? X86::KMOVQkk_EVEX : X86::KMOVQkk);
667672

668673
createReplacer(X86::SHR32ri, X86::KSHIFTRDri);
669674
createReplacer(X86::SHR64ri, X86::KSHIFTRQri);
@@ -703,7 +708,7 @@ void X86DomainReassignment::initConverters() {
703708

704709
createReplacer(X86::MOV8rm, HasEGPR ? X86::KMOVBkm_EVEX : X86::KMOVBkm);
705710
createReplacer(X86::MOV8mr, HasEGPR ? X86::KMOVBmk_EVEX : X86::KMOVBmk);
706-
createReplacer(X86::MOV8rr, X86::KMOVBkk);
711+
createReplacer(X86::MOV8rr, HasEGPR ? X86::KMOVBkk_EVEX : X86::KMOVBkk);
707712

708713
createReplacer(X86::NOT8r, X86::KNOTBrr);
709714

llvm/lib/Target/X86/X86InstrInfo.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4070,6 +4070,7 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
40704070
// First deal with the normal symmetric copies.
40714071
bool HasAVX = Subtarget.hasAVX();
40724072
bool HasVLX = Subtarget.hasVLX();
4073+
bool HasEGPR = Subtarget.hasEGPR();
40734074
unsigned Opc = 0;
40744075
if (X86::GR64RegClass.contains(DestReg, SrcReg))
40754076
Opc = X86::MOV64rr;
@@ -4124,7 +4125,8 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
41244125
// All KMASK RegClasses hold the same k registers, can be tested against
41254126
// anyone.
41264127
else if (X86::VK16RegClass.contains(DestReg, SrcReg))
4127-
Opc = Subtarget.hasBWI() ? X86::KMOVQkk : X86::KMOVWkk;
4128+
Opc = Subtarget.hasBWI() ? (HasEGPR ? X86::KMOVQkk_EVEX : X86::KMOVQkk)
4129+
: (HasEGPR ? X86::KMOVQkk_EVEX : X86::KMOVWkk);
41284130
if (!Opc)
41294131
Opc = CopyToFromAsymmetricReg(DestReg, SrcReg, Subtarget);
41304132

llvm/test/CodeGen/X86/apx/kmov-kk.ll

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f,+egpr -show-mc-encoding | FileCheck --check-prefix=EGPR %s
2+
3+
define <16 x i32> @kmovkk(ptr %base, <16 x i32> %ind, i16 %mask) {
4+
; EGPR: kmovq %k1, %k2 # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf8,0x90,0xd1]
5+
%broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0
6+
%broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer
7+
%gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind
8+
%imask = bitcast i16 %mask to <16 x i1>
9+
%gt1 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> %imask, <16 x i32>undef)
10+
%gt2 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> %imask, <16 x i32>%gt1)
11+
%res = add <16 x i32> %gt1, %gt2
12+
ret <16 x i32> %res
13+
}
14+
declare <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr>, i32, <16 x i1>, <16 x i32>)

0 commit comments

Comments
 (0)