Skip to content

Commit 5d089ac

Browse files
committed
[X86] Improve optmasks handling for AVX10.1-256
Quadword opmask instructions are only supported on processors supporting vector lengths of 512 bits.
1 parent 68db7ae commit 5d089ac

File tree

10 files changed

+2614
-323
lines changed

10 files changed

+2614
-323
lines changed

llvm/lib/Target/X86/X86DomainReassignment.cpp

Lines changed: 14 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -662,37 +662,30 @@ void X86DomainReassignment::initConverters() {
662662

663663
if (STI->hasBWI()) {
664664
createReplacer(X86::MOV32rm, HasEGPR ? X86::KMOVDkm_EVEX : X86::KMOVDkm);
665-
createReplacer(X86::MOV64rm, HasEGPR ? X86::KMOVQkm_EVEX : X86::KMOVQkm);
666-
667665
createReplacer(X86::MOV32mr, HasEGPR ? X86::KMOVDmk_EVEX : X86::KMOVDmk);
668-
createReplacer(X86::MOV64mr, HasEGPR ? X86::KMOVQmk_EVEX : X86::KMOVQmk);
669-
670666
createReplacer(X86::MOV32rr, HasEGPR ? X86::KMOVDkk_EVEX : X86::KMOVDkk);
671-
createReplacer(X86::MOV64rr, HasEGPR ? X86::KMOVQkk_EVEX : X86::KMOVQkk);
672-
673667
createReplacer(X86::SHR32ri, X86::KSHIFTRDri);
674-
createReplacer(X86::SHR64ri, X86::KSHIFTRQri);
675-
676668
createReplacer(X86::SHL32ri, X86::KSHIFTLDri);
677-
createReplacer(X86::SHL64ri, X86::KSHIFTLQri);
678-
679669
createReplacer(X86::ADD32rr, X86::KADDDrr);
680-
createReplacer(X86::ADD64rr, X86::KADDQrr);
681-
682670
createReplacer(X86::NOT32r, X86::KNOTDrr);
683-
createReplacer(X86::NOT64r, X86::KNOTQrr);
684-
685671
createReplacer(X86::OR32rr, X86::KORDrr);
686-
createReplacer(X86::OR64rr, X86::KORQrr);
687-
688672
createReplacer(X86::AND32rr, X86::KANDDrr);
689-
createReplacer(X86::AND64rr, X86::KANDQrr);
690-
691673
createReplacer(X86::ANDN32rr, X86::KANDNDrr);
692-
createReplacer(X86::ANDN64rr, X86::KANDNQrr);
693-
694674
createReplacer(X86::XOR32rr, X86::KXORDrr);
695-
createReplacer(X86::XOR64rr, X86::KXORQrr);
675+
676+
if (STI->hasEVEX512()) {
677+
createReplacer(X86::MOV64rm, HasEGPR ? X86::KMOVQkm_EVEX : X86::KMOVQkm);
678+
createReplacer(X86::MOV64mr, HasEGPR ? X86::KMOVQmk_EVEX : X86::KMOVQmk);
679+
createReplacer(X86::MOV64rr, HasEGPR ? X86::KMOVQkk_EVEX : X86::KMOVQkk);
680+
createReplacer(X86::SHR64ri, X86::KSHIFTRQri);
681+
createReplacer(X86::SHL64ri, X86::KSHIFTLQri);
682+
createReplacer(X86::ADD64rr, X86::KADDQrr);
683+
createReplacer(X86::NOT64r, X86::KNOTQrr);
684+
createReplacer(X86::OR64rr, X86::KORQrr);
685+
createReplacer(X86::AND64rr, X86::KANDQrr);
686+
createReplacer(X86::ANDN64rr, X86::KANDNQrr);
687+
createReplacer(X86::XOR64rr, X86::KXORQrr);
688+
}
696689

697690
// TODO: KTEST is not a replacement for TEST due to flag differences. Need
698691
// to prove only Z flag is used.

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2058,9 +2058,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
20582058
// AVX512BW..
20592059
if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) {
20602060
addRegisterClass(MVT::v32i1, &X86::VK32RegClass);
2061-
addRegisterClass(MVT::v64i1, &X86::VK64RegClass);
2061+
if (Subtarget.hasEVEX512())
2062+
addRegisterClass(MVT::v64i1, &X86::VK64RegClass);
20622063

20632064
for (auto VT : { MVT::v32i1, MVT::v64i1 }) {
2065+
if (VT == MVT::v64i1 && !Subtarget.hasEVEX512())
2066+
continue;
20642067
setOperationAction(ISD::VSELECT, VT, Expand);
20652068
setOperationAction(ISD::TRUNCATE, VT, Custom);
20662069
setOperationAction(ISD::SETCC, VT, Custom);

llvm/lib/Target/X86/X86InstrInfo.cpp

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3996,7 +3996,8 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg,
39963996
// anyone.
39973997
if (X86::VK16RegClass.contains(SrcReg)) {
39983998
if (X86::GR64RegClass.contains(DestReg)) {
3999-
assert(Subtarget.hasBWI());
3999+
assert(Subtarget.hasBWI() && Subtarget.hasEVEX512() &&
4000+
"KMOVQ requires BWI with EVEX512");
40004001
return HasEGPR ? X86::KMOVQrk_EVEX : X86::KMOVQrk;
40014002
}
40024003
if (X86::GR32RegClass.contains(DestReg))
@@ -4011,7 +4012,8 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg,
40114012
// anyone.
40124013
if (X86::VK16RegClass.contains(DestReg)) {
40134014
if (X86::GR64RegClass.contains(SrcReg)) {
4014-
assert(Subtarget.hasBWI());
4015+
assert(Subtarget.hasBWI() && Subtarget.hasEVEX512() &&
4016+
"KMOVQ requires BWI with EVEX512");
40154017
return HasEGPR ? X86::KMOVQkr_EVEX : X86::KMOVQkr;
40164018
}
40174019
if (X86::GR32RegClass.contains(SrcReg))
@@ -4125,8 +4127,9 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
41254127
// All KMASK RegClasses hold the same k registers, can be tested against
41264128
// anyone.
41274129
else if (X86::VK16RegClass.contains(DestReg, SrcReg))
4128-
Opc = Subtarget.hasBWI() ? (HasEGPR ? X86::KMOVQkk_EVEX : X86::KMOVQkk)
4129-
: (HasEGPR ? X86::KMOVQkk_EVEX : X86::KMOVWkk);
4130+
Opc = Subtarget.hasBWI() && Subtarget.hasEVEX512()
4131+
? (HasEGPR ? X86::KMOVQkk_EVEX : X86::KMOVQkk)
4132+
: (HasEGPR ? X86::KMOVQkk_EVEX : X86::KMOVWkk);
41304133
if (!Opc)
41314134
Opc = CopyToFromAsymmetricReg(DestReg, SrcReg, Subtarget);
41324135

@@ -4247,7 +4250,8 @@ static unsigned getLoadStoreRegOpcode(Register Reg,
42474250
if (X86::RFP64RegClass.hasSubClassEq(RC))
42484251
return Load ? X86::LD_Fp64m : X86::ST_Fp64m;
42494252
if (X86::VK64RegClass.hasSubClassEq(RC)) {
4250-
assert(STI.hasBWI() && "KMOVQ requires BWI");
4253+
assert(STI.hasBWI() && STI.hasEVEX512() &&
4254+
"KMOVQ requires BWI with EVEX512");
42514255
return Load ? (HasEGPR ? X86::KMOVQkm_EVEX : X86::KMOVQkm)
42524256
: (HasEGPR ? X86::KMOVQmk_EVEX : X86::KMOVQmk);
42534257
}
@@ -10523,7 +10527,7 @@ void X86InstrInfo::buildClearRegister(Register Reg, MachineBasicBlock &MBB,
1052310527
return;
1052410528

1052510529
// KXOR is safe to use because it doesn't affect flags.
10526-
unsigned Op = ST.hasBWI() ? X86::KXORQrr : X86::KXORWrr;
10530+
unsigned Op = ST.hasBWI() && ST.hasEVEX512() ? X86::KXORQrr : X86::KXORWrr;
1052710531
BuildMI(MBB, Iter, DL, get(Op), Reg)
1052810532
.addReg(Reg, RegState::Undef)
1052910533
.addReg(Reg, RegState::Undef);

llvm/lib/Target/X86/X86Subtarget.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -244,7 +244,8 @@ class X86Subtarget final : public X86GenSubtargetInfo {
244244
// TODO: Currently we're always allowing widening on CPUs without VLX,
245245
// because for many cases we don't have a better option.
246246
bool canExtendTo512DQ() const {
247-
return hasAVX512() && (!hasVLX() || getPreferVectorWidth() >= 512);
247+
return hasAVX512() && (!hasVLX() || getPreferVectorWidth() >= 512) &&
248+
hasEVEX512();
248249
}
249250
bool canExtendTo512BW() const {
250251
return hasBWI() && canExtendTo512DQ();

0 commit comments

Comments
 (0)