Skip to content

Commit f3d2a31

Browse files
committed
[X86][CodeGen] Cleanup code for EVEX2VEX pass, NFCI
1. Remove unused variables, e.g X86Subtarget object in performCustomAdjustments 2. Define checkVEXInstPredicate directly instead of generating it b/c the function is small and it's unlikely we have more instructions to check the predicate in the future 3. Check the tables are sorted only once for each function 4. Remove some blanks and clang-format code
1 parent a3ef858 commit f3d2a31

File tree

4 files changed

+96
-126
lines changed

4 files changed

+96
-126
lines changed

llvm/lib/Target/X86/X86EvexToVex.cpp

Lines changed: 92 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,10 @@
1212
/// are encoded using the EVEX prefix and if possible replaces them by their
1313
/// corresponding VEX encoding which is usually shorter by 2 bytes.
1414
/// EVEX instructions may be encoded via the VEX prefix when the AVX-512
15-
/// instruction has a corresponding AVX/AVX2 opcode, when vector length
16-
/// accessed by instruction is less than 512 bits and when it does not use
17-
// the xmm or the mask registers or xmm/ymm registers with indexes higher than 15.
15+
/// instruction has a corresponding AVX/AVX2 opcode, when vector length
16+
/// accessed by instruction is less than 512 bits and when it does not use
17+
// the xmm or the mask registers or xmm/ymm registers with indexes higher
18+
// than 15.
1819
/// The pass applies code reduction on the generated code for AVX-512 instrs.
1920
//
2021
//===----------------------------------------------------------------------===//
@@ -39,16 +40,16 @@ using namespace llvm;
3940

4041
// Including the generated EVEX2VEX tables.
4142
struct X86EvexToVexCompressTableEntry {
42-
uint16_t EvexOpcode;
43-
uint16_t VexOpcode;
43+
uint16_t EvexOpc;
44+
uint16_t VexOpc;
4445

4546
bool operator<(const X86EvexToVexCompressTableEntry &RHS) const {
46-
return EvexOpcode < RHS.EvexOpcode;
47+
return EvexOpc < RHS.EvexOpc;
4748
}
4849

4950
friend bool operator<(const X86EvexToVexCompressTableEntry &TE,
5051
unsigned Opc) {
51-
return TE.EvexOpcode < Opc;
52+
return TE.EvexOpc < Opc;
5253
}
5354
};
5455
#include "X86GenEVEX2VEXTables.inc"
@@ -61,16 +62,9 @@ struct X86EvexToVexCompressTableEntry {
6162
namespace {
6263

6364
class EvexToVexInstPass : public MachineFunctionPass {
64-
65-
/// For EVEX instructions that can be encoded using VEX encoding, replace
66-
/// them by the VEX encoding in order to reduce size.
67-
bool CompressEvexToVexImpl(MachineInstr &MI) const;
68-
6965
public:
7066
static char ID;
71-
72-
EvexToVexInstPass() : MachineFunctionPass(ID) { }
73-
67+
EvexToVexInstPass() : MachineFunctionPass(ID) {}
7468
StringRef getPassName() const override { return EVEX2VEX_DESC; }
7569

7670
/// Loop over all of the basic blocks, replacing EVEX instructions
@@ -82,53 +76,23 @@ class EvexToVexInstPass : public MachineFunctionPass {
8276
return MachineFunctionProperties().set(
8377
MachineFunctionProperties::Property::NoVRegs);
8478
}
85-
86-
private:
87-
/// Machine instruction info used throughout the class.
88-
const X86InstrInfo *TII = nullptr;
89-
90-
const X86Subtarget *ST = nullptr;
9179
};
9280

9381
} // end anonymous namespace
9482

9583
char EvexToVexInstPass::ID = 0;
9684

97-
bool EvexToVexInstPass::runOnMachineFunction(MachineFunction &MF) {
98-
TII = MF.getSubtarget<X86Subtarget>().getInstrInfo();
99-
100-
ST = &MF.getSubtarget<X86Subtarget>();
101-
if (!ST->hasAVX512())
102-
return false;
103-
104-
bool Changed = false;
105-
106-
/// Go over all basic blocks in function and replace
107-
/// EVEX encoded instrs by VEX encoding when possible.
108-
for (MachineBasicBlock &MBB : MF) {
109-
110-
// Traverse the basic block.
111-
for (MachineInstr &MI : MBB)
112-
Changed |= CompressEvexToVexImpl(MI);
113-
}
114-
115-
return Changed;
116-
}
117-
11885
static bool usesExtendedRegister(const MachineInstr &MI) {
11986
auto isHiRegIdx = [](unsigned Reg) {
12087
// Check for XMM register with indexes between 16 - 31.
12188
if (Reg >= X86::XMM16 && Reg <= X86::XMM31)
12289
return true;
123-
12490
// Check for YMM register with indexes between 16 - 31.
12591
if (Reg >= X86::YMM16 && Reg <= X86::YMM31)
12692
return true;
127-
12893
// Check for GPR with indexes between 16 - 31.
12994
if (X86II::isApxExtendedReg(Reg))
13095
return true;
131-
13296
return false;
13397
};
13498

@@ -139,32 +103,67 @@ static bool usesExtendedRegister(const MachineInstr &MI) {
139103
continue;
140104

141105
Register Reg = MO.getReg();
142-
143-
assert(!(Reg >= X86::ZMM0 && Reg <= X86::ZMM31) &&
106+
assert(!X86II::isZMMReg(Reg) &&
144107
"ZMM instructions should not be in the EVEX->VEX tables");
145-
146108
if (isHiRegIdx(Reg))
147109
return true;
148110
}
149111

150112
return false;
151113
}
152114

115+
static bool checkVEXInstPredicate(unsigned EvexOpc, const X86Subtarget &ST) {
116+
switch (EvexOpc) {
117+
default:
118+
return true;
119+
case X86::VCVTNEPS2BF16Z128rm:
120+
case X86::VCVTNEPS2BF16Z128rr:
121+
case X86::VCVTNEPS2BF16Z256rm:
122+
case X86::VCVTNEPS2BF16Z256rr:
123+
return ST.hasAVXNECONVERT();
124+
case X86::VPDPBUSDSZ128m:
125+
case X86::VPDPBUSDSZ128r:
126+
case X86::VPDPBUSDSZ256m:
127+
case X86::VPDPBUSDSZ256r:
128+
case X86::VPDPBUSDZ128m:
129+
case X86::VPDPBUSDZ128r:
130+
case X86::VPDPBUSDZ256m:
131+
case X86::VPDPBUSDZ256r:
132+
case X86::VPDPWSSDSZ128m:
133+
case X86::VPDPWSSDSZ128r:
134+
case X86::VPDPWSSDSZ256m:
135+
case X86::VPDPWSSDSZ256r:
136+
case X86::VPDPWSSDZ128m:
137+
case X86::VPDPWSSDZ128r:
138+
case X86::VPDPWSSDZ256m:
139+
case X86::VPDPWSSDZ256r:
140+
return ST.hasAVXVNNI();
141+
case X86::VPMADD52HUQZ128m:
142+
case X86::VPMADD52HUQZ128r:
143+
case X86::VPMADD52HUQZ256m:
144+
case X86::VPMADD52HUQZ256r:
145+
case X86::VPMADD52LUQZ128m:
146+
case X86::VPMADD52LUQZ128r:
147+
case X86::VPMADD52LUQZ256m:
148+
case X86::VPMADD52LUQZ256r:
149+
return ST.hasAVXIFMA();
150+
}
151+
}
152+
153153
// Do any custom cleanup needed to finalize the conversion.
154-
static bool performCustomAdjustments(MachineInstr &MI, unsigned NewOpc,
155-
const X86Subtarget *ST) {
156-
(void)NewOpc;
154+
static bool performCustomAdjustments(MachineInstr &MI, unsigned VexOpc) {
155+
(void)VexOpc;
157156
unsigned Opc = MI.getOpcode();
158157
switch (Opc) {
159158
case X86::VALIGNDZ128rri:
160159
case X86::VALIGNDZ128rmi:
161160
case X86::VALIGNQZ128rri:
162161
case X86::VALIGNQZ128rmi: {
163-
assert((NewOpc == X86::VPALIGNRrri || NewOpc == X86::VPALIGNRrmi) &&
162+
assert((VexOpc == X86::VPALIGNRrri || VexOpc == X86::VPALIGNRrmi) &&
164163
"Unexpected new opcode!");
165-
unsigned Scale = (Opc == X86::VALIGNQZ128rri ||
166-
Opc == X86::VALIGNQZ128rmi) ? 8 : 4;
167-
MachineOperand &Imm = MI.getOperand(MI.getNumExplicitOperands()-1);
164+
unsigned Scale =
165+
(Opc == X86::VALIGNQZ128rri || Opc == X86::VALIGNQZ128rmi) ? 8 : 4;
166+
MachineOperand &Imm = MI.getOperand(MI.getNumExplicitOperands() - 1);
168167
Imm.setImm(Imm.getImm() * Scale);
169168
break;
170169
}
@@ -176,10 +175,10 @@ static bool performCustomAdjustments(MachineInstr &MI, unsigned NewOpc,
176175
case X86::VSHUFI32X4Z256rri:
177176
case X86::VSHUFI64X2Z256rmi:
178177
case X86::VSHUFI64X2Z256rri: {
179-
assert((NewOpc == X86::VPERM2F128rr || NewOpc == X86::VPERM2I128rr ||
180-
NewOpc == X86::VPERM2F128rm || NewOpc == X86::VPERM2I128rm) &&
178+
assert((VexOpc == X86::VPERM2F128rr || VexOpc == X86::VPERM2I128rr ||
179+
VexOpc == X86::VPERM2F128rm || VexOpc == X86::VPERM2I128rm) &&
181180
"Unexpected new opcode!");
182-
MachineOperand &Imm = MI.getOperand(MI.getNumExplicitOperands()-1);
181+
MachineOperand &Imm = MI.getOperand(MI.getNumExplicitOperands() - 1);
183182
int64_t ImmVal = Imm.getImm();
184183
// Set bit 5, move bit 1 to bit 4, copy bit 0.
185184
Imm.setImm(0x20 | ((ImmVal & 2) << 3) | (ImmVal & 1));
@@ -212,18 +211,16 @@ static bool performCustomAdjustments(MachineInstr &MI, unsigned NewOpc,
212211
return true;
213212
}
214213

215-
216214
// For EVEX instructions that can be encoded using VEX encoding
217215
// replace them by the VEX encoding in order to reduce size.
218-
bool EvexToVexInstPass::CompressEvexToVexImpl(MachineInstr &MI) const {
216+
static bool CompressEvexToVexImpl(MachineInstr &MI, const X86Subtarget &ST) {
219217
// VEX format.
220218
// # of bytes: 0,2,3 1 1 0,1 0,1,2,4 0,1
221219
// [Prefixes] [VEX] OPCODE ModR/M [SIB] [DISP] [IMM]
222220
//
223221
// EVEX format.
224222
// # of bytes: 4 1 1 1 4 / 1 1
225223
// [Prefixes] EVEX Opcode ModR/M [SIB] [Disp32] / [Disp8*N] [Immediate]
226-
227224
const MCInstrDesc &Desc = MI.getDesc();
228225

229226
// Check for EVEX instructions only.
@@ -241,6 +238,29 @@ bool EvexToVexInstPass::CompressEvexToVexImpl(MachineInstr &MI) const {
241238
if (Desc.TSFlags & X86II::EVEX_L2)
242239
return false;
243240

241+
// Use the VEX.L bit to select the 128 or 256-bit table.
242+
ArrayRef<X86EvexToVexCompressTableEntry> Table =
243+
(Desc.TSFlags & X86II::VEX_L) ? ArrayRef(X86EvexToVex256CompressTable)
244+
: ArrayRef(X86EvexToVex128CompressTable);
245+
246+
unsigned EvexOpc = MI.getOpcode();
247+
const auto *I = llvm::lower_bound(Table, EvexOpc);
248+
if (I == Table.end() || I->EvexOpc != EvexOpc)
249+
return false;
250+
251+
if (usesExtendedRegister(MI))
252+
return false;
253+
if (!checkVEXInstPredicate(EvexOpc, ST))
254+
return false;
255+
if (!performCustomAdjustments(MI, I->VexOpc))
256+
return false;
257+
258+
MI.setDesc(ST.getInstrInfo()->get(I->VexOpc));
259+
MI.setAsmPrinterFlag(X86::AC_EVEX_2_VEX);
260+
return true;
261+
}
262+
263+
bool EvexToVexInstPass::runOnMachineFunction(MachineFunction &MF) {
244264
#ifndef NDEBUG
245265
// Make sure the tables are sorted.
246266
static std::atomic<bool> TableChecked(false);
@@ -252,30 +272,21 @@ bool EvexToVexInstPass::CompressEvexToVexImpl(MachineInstr &MI) const {
252272
TableChecked.store(true, std::memory_order_relaxed);
253273
}
254274
#endif
255-
256-
// Use the VEX.L bit to select the 128 or 256-bit table.
257-
ArrayRef<X86EvexToVexCompressTableEntry> Table =
258-
(Desc.TSFlags & X86II::VEX_L) ? ArrayRef(X86EvexToVex256CompressTable)
259-
: ArrayRef(X86EvexToVex128CompressTable);
260-
261-
const auto *I = llvm::lower_bound(Table, MI.getOpcode());
262-
if (I == Table.end() || I->EvexOpcode != MI.getOpcode())
275+
const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
276+
if (!ST.hasAVX512())
263277
return false;
264278

265-
unsigned NewOpc = I->VexOpcode;
266-
267-
if (usesExtendedRegister(MI))
268-
return false;
269-
270-
if (!CheckVEXInstPredicate(MI, ST))
271-
return false;
279+
bool Changed = false;
272280

273-
if (!performCustomAdjustments(MI, NewOpc, ST))
274-
return false;
281+
/// Go over all basic blocks in function and replace
282+
/// EVEX encoded instrs by VEX encoding when possible.
283+
for (MachineBasicBlock &MBB : MF) {
284+
// Traverse the basic block.
285+
for (MachineInstr &MI : MBB)
286+
Changed |= CompressEvexToVexImpl(MI, ST);
287+
}
275288

276-
MI.setDesc(TII->get(NewOpc));
277-
MI.setAsmPrinterFlag(X86::AC_EVEX_2_VEX);
278-
return true;
289+
return Changed;
279290
}
280291

281292
INITIALIZE_PASS(EvexToVexInstPass, EVEX2VEX_NAME, EVEX2VEX_DESC, false, false)

llvm/lib/Target/X86/X86InstrFormats.td

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -371,8 +371,6 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
371371
bit notEVEX2VEXConvertible = 0; // Prevent EVEX->VEX conversion.
372372
ExplicitOpPrefix explicitOpPrefix = NoExplicitOpPrefix;
373373
bits<2> explicitOpPrefixBits = explicitOpPrefix.Value;
374-
// Force to check predicate before compress EVEX to VEX encoding.
375-
bit checkVEXPredicate = 0;
376374
// TSFlags layout should be kept in sync with X86BaseInfo.h.
377375
let TSFlags{6-0} = FormBits;
378376
let TSFlags{8-7} = OpSizeBits;

llvm/lib/Target/X86/X86InstrSSE.td

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7316,7 +7316,7 @@ defm VMASKMOVPD : avx_movmask_rm<0x2D, 0x2F, "vmaskmovpd",
73167316
// AVX_VNNI
73177317
//===----------------------------------------------------------------------===//
73187318
let Predicates = [HasAVXVNNI, NoVLX_Or_NoVNNI], Constraints = "$src1 = $dst",
7319-
explicitOpPrefix = ExplicitVEX, checkVEXPredicate = 1 in
7319+
explicitOpPrefix = ExplicitVEX in
73207320
multiclass avx_vnni_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
73217321
bit IsCommutable> {
73227322
let isCommutable = IsCommutable in
@@ -8142,8 +8142,7 @@ let isCommutable = 0 in {
81428142
}
81438143

81448144
// AVX-IFMA
8145-
let Predicates = [HasAVXIFMA, NoVLX_Or_NoIFMA], Constraints = "$src1 = $dst",
8146-
checkVEXPredicate = 1 in
8145+
let Predicates = [HasAVXIFMA, NoVLX_Or_NoIFMA], Constraints = "$src1 = $dst" in
81478146
multiclass avx_ifma_rm<bits<8> opc, string OpcodeStr, SDNode OpNode> {
81488147
// NOTE: The SDNode have the multiply operands first with the add last.
81498148
// This enables commuted load patterns to be autogenerated by tablegen.
@@ -8287,7 +8286,6 @@ let Predicates = [HasAVXNECONVERT] in {
82878286
f256mem>, T8XD;
82888287
defm VCVTNEOPH2PS : AVX_NE_CONVERT_BASE<0xb0, "vcvtneoph2ps", f128mem,
82898288
f256mem>, T8PS;
8290-
let checkVEXPredicate = 1 in
82918289
defm VCVTNEPS2BF16 : VCVTNEPS2BF16_BASE, VEX, T8XS, ExplicitVEXPrefix;
82928290

82938291
def : Pat<(v8bf16 (X86vfpround (v8f32 VR256:$src))),

0 commit comments

Comments
 (0)