Skip to content

Commit bddab51

Browse files
[X86][MC,Driver] Support -msse2avx to encode SSE instruction with VEX prefix (#96860)
For GCC compatibility https://gcc.gnu.org/onlinedocs/gcc-14.1.0/gcc/x86-Options.html.
1 parent 25752a4 commit bddab51

File tree

12 files changed

+197
-0
lines changed

12 files changed

+197
-0
lines changed

clang/include/clang/Basic/CodeGenOptions.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ VALUE_CODEGENOPT(Name, Bits, Default)
3838
CODEGENOPT(DisableIntegratedAS, 1, 0) ///< -no-integrated-as
3939
CODEGENOPT(Crel, 1, 0) ///< -Wa,--crel
4040
CODEGENOPT(RelaxELFRelocations, 1, 1) ///< -Wa,-mrelax-relocations={yes,no}
41+
CODEGENOPT(SSE2AVX , 1, 0) ///< -msse2avx
4142
CODEGENOPT(AsmVerbose , 1, 0) ///< -dA, -fverbose-asm.
4243
CODEGENOPT(PreserveAsmComments, 1, 1) ///< -dA, -fno-preserve-as-comments.
4344
CODEGENOPT(AssumeSaneOperatorNew , 1, 1) ///< implicit __attribute__((malloc)) operator new

clang/include/clang/Driver/Options.td

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5179,6 +5179,13 @@ def mvx : Flag<["-"], "mvx">, Group<m_Group>;
51795179
def mno_vx : Flag<["-"], "mno-vx">, Group<m_Group>;
51805180
} // let Flags = [TargetSpecific]
51815181

5182+
let Flags = [TargetSpecific] in {
5183+
def msse2avx : Flag<["-"], "msse2avx">, Group<m_Group>,
5184+
Visibility<[ClangOption, CC1Option, CC1AsOption]>,
5185+
HelpText<"Specify that the assembler should encode SSE instructions with VEX prefix">,
5186+
MarshallingInfoFlag<CodeGenOpts<"SSE2AVX">>;
5187+
} // let Flags = [TargetSpecific]
5188+
51825189
defm zvector : BoolFOption<"zvector",
51835190
LangOpts<"ZVector">, DefaultFalse,
51845191
PosFlag<SetTrue, [], [ClangOption, CC1Option],

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2545,6 +2545,13 @@ static void CollectArgsForIntegratedAssembler(Compilation &C,
25452545
switch (C.getDefaultToolChain().getArch()) {
25462546
default:
25472547
break;
2548+
case llvm::Triple::x86:
2549+
case llvm::Triple::x86_64:
2550+
if (Value == "-msse2avx") {
2551+
CmdArgs.push_back("-msse2avx");
2552+
continue;
2553+
}
2554+
break;
25482555
case llvm::Triple::wasm32:
25492556
case llvm::Triple::wasm64:
25502557
if (Value == "--no-type-check") {

clang/test/Driver/msse2avx.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
// RUN: %clang -### -c -march=x86-64 -Xassembler -msse2avx %s 2>&1 | FileCheck %s
2+
// RUN: %clang -### -c -march=x86-64 -x assembler -Xassembler -msse2avx %s 2>&1 | FileCheck %s
3+
4+
// CHECK: "-msse2avx"
5+
6+
// RUN: not %clang -### -c --target=aarch64 -march=armv8a -msse2avx %s 2>&1 | FileCheck --check-prefix=ERR %s
7+
// ERR: error: unsupported option '-msse2avx' for target 'aarch64'

clang/tools/driver/cc1as_main.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,8 @@ struct AssemblerInvocation {
9898
LLVM_PREFERRED_TYPE(bool)
9999
unsigned RelaxELFRelocations : 1;
100100
LLVM_PREFERRED_TYPE(bool)
101+
unsigned SSE2AVX : 1;
102+
LLVM_PREFERRED_TYPE(bool)
101103
unsigned Dwarf64 : 1;
102104
unsigned DwarfVersion;
103105
std::string DwarfDebugFlags;
@@ -197,6 +199,7 @@ struct AssemblerInvocation {
197199
ShowInst = 0;
198200
ShowEncoding = 0;
199201
RelaxAll = 0;
202+
SSE2AVX = 0;
200203
NoExecStack = 0;
201204
FatalWarnings = 0;
202205
NoWarn = 0;
@@ -288,6 +291,7 @@ bool AssemblerInvocation::CreateFromArgs(AssemblerInvocation &Opts,
288291
}
289292

290293
Opts.RelaxELFRelocations = !Args.hasArg(OPT_mrelax_relocations_no);
294+
Opts.SSE2AVX = Args.hasArg(OPT_msse2avx);
291295
if (auto *DwarfFormatArg = Args.getLastArg(OPT_gdwarf64, OPT_gdwarf32))
292296
Opts.Dwarf64 = DwarfFormatArg->getOption().matches(OPT_gdwarf64);
293297
Opts.DwarfVersion = getLastArgIntValue(Args, OPT_dwarf_version_EQ, 2, Diags);
@@ -437,6 +441,7 @@ static bool ExecuteAssemblerImpl(AssemblerInvocation &Opts,
437441
MCOptions.MCSaveTempLabels = Opts.SaveTemporaryLabels;
438442
MCOptions.Crel = Opts.Crel;
439443
MCOptions.X86RelaxRelocations = Opts.RelaxELFRelocations;
444+
MCOptions.X86Sse2Avx = Opts.SSE2AVX;
440445
MCOptions.CompressDebugSections = Opts.CompressDebugSections;
441446
MCOptions.AsSecureLogFile = Opts.AsSecureLogFile;
442447

llvm/include/llvm/MC/MCTargetOptions.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,8 @@ class MCTargetOptions {
6868
// ELF.
6969
bool X86RelaxRelocations = true;
7070

71+
bool X86Sse2Avx = false;
72+
7173
EmitDwarfUnwindType EmitDwarfUnwind;
7274

7375
int DwarfVersion = 0;

llvm/include/llvm/MC/MCTargetOptionsCommandFlags.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,8 @@ bool getCrel();
5555

5656
bool getX86RelaxRelocations();
5757

58+
bool getX86Sse2Avx();
59+
5860
std::string getABIName();
5961

6062
std::string getAsSecureLogFile();

llvm/lib/MC/MCTargetOptionsCommandFlags.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ MCOPT(bool, NoTypeCheck)
4949
MCOPT(bool, SaveTempLabels)
5050
MCOPT(bool, Crel)
5151
MCOPT(bool, X86RelaxRelocations)
52+
MCOPT(bool, X86Sse2Avx)
5253
MCOPT(std::string, ABIName)
5354
MCOPT(std::string, AsSecureLogFile)
5455

@@ -140,6 +141,11 @@ llvm::mc::RegisterMCTargetOptionsFlags::RegisterMCTargetOptionsFlags() {
140141
cl::init(true));
141142
MCBINDOPT(X86RelaxRelocations);
142143

144+
static cl::opt<bool> X86Sse2Avx(
145+
"x86-sse2avx", cl::desc("Specify that the assembler should encode SSE "
146+
"instructions with VEX prefix"));
147+
MCBINDOPT(X86Sse2Avx);
148+
143149
static cl::opt<std::string> ABIName(
144150
"target-abi", cl::Hidden,
145151
cl::desc("The name of the ABI to be targeted from the backend."),
@@ -169,6 +175,7 @@ MCTargetOptions llvm::mc::InitMCTargetOptionsFromFlags() {
169175
Options.MCSaveTempLabels = getSaveTempLabels();
170176
Options.Crel = getCrel();
171177
Options.X86RelaxRelocations = getX86RelaxRelocations();
178+
Options.X86Sse2Avx = getX86Sse2Avx();
172179
Options.EmitDwarfUnwind = getEmitDwarfUnwind();
173180
Options.EmitCompactUnwindNonCanonical = getEmitCompactUnwindNonCanonical();
174181
Options.AsSecureLogFile = getAsSecureLogFile();

llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,10 @@ static bool checkScale(unsigned Scale, StringRef &ErrMsg) {
5858

5959
namespace {
6060

61+
// Including the generated SSE2AVX compression tables.
62+
#define GET_X86_SSE2AVX_TABLE
63+
#include "X86GenInstrMapping.inc"
64+
6165
static const char OpPrecedence[] = {
6266
0, // IC_OR
6367
1, // IC_XOR
@@ -3744,7 +3748,27 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
37443748
return false;
37453749
}
37463750

3751+
static bool convertSSEToAVX(MCInst &Inst) {
3752+
ArrayRef<X86TableEntry> Table{X86SSE2AVXTable};
3753+
unsigned Opcode = Inst.getOpcode();
3754+
const auto I = llvm::lower_bound(Table, Opcode);
3755+
if (I == Table.end() || I->OldOpc != Opcode)
3756+
return false;
3757+
3758+
Inst.setOpcode(I->NewOpc);
3759+
// AVX variant of BLENDVPD/BLENDVPS/PBLENDVB instructions has more
3760+
// operand compare to SSE variant, which is added below
3761+
if (X86::isBLENDVPD(Opcode) || X86::isBLENDVPS(Opcode) ||
3762+
X86::isPBLENDVB(Opcode))
3763+
Inst.addOperand(Inst.getOperand(2));
3764+
3765+
return true;
3766+
}
3767+
37473768
bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) {
3769+
if (MCOptions.X86Sse2Avx && convertSSEToAVX(Inst))
3770+
return true;
3771+
37483772
if (ForcedOpcodePrefix != OpcodePrefix_VEX3 &&
37493773
X86::optimizeInstFromVEX3ToVEX2(Inst, MII.get(Inst.getOpcode())))
37503774
return true;

llvm/test/MC/AsmParser/sse2avx-att.s

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
# RUN: llvm-mc -triple x86_64 -x86-sse2avx %s | FileCheck %s
2+
# RUN: llvm-mc -triple=x86_64 -output-asm-variant=1 %s | llvm-mc -triple=x86_64 -x86-asm-syntax=intel -x86-sse2avx
3+
.text
4+
# CHECK: vmovsd -352(%rbp), %xmm0
5+
movsd -352(%rbp), %xmm0 # xmm0 = mem[0],zero
6+
# CHECK-NEXT: vunpcklpd %xmm1, %xmm0, %xmm0 # xmm0 = xmm0[0],xmm1[0]
7+
unpcklpd %xmm1, %xmm0 # xmm0 = xmm0[0],xmm1[0]
8+
# CHECK-NEXT: vmovapd %xmm0, -368(%rbp)
9+
movapd %xmm0, -368(%rbp)
10+
# CHECK-NEXT: vmovapd -368(%rbp), %xmm0
11+
movapd -368(%rbp), %xmm0
12+
# CHECK-NEXT: vmovsd -376(%rbp), %xmm1
13+
movsd -376(%rbp), %xmm1 # xmm1 = mem[0],zero
14+
# CHECK-NEXT: vmovsd -384(%rbp), %xmm0
15+
movsd -384(%rbp), %xmm0 # xmm0 = mem[0],zero
16+
# CHECK-NEXT: vunpcklpd %xmm1, %xmm0, %xmm0 # xmm0 = xmm0[0],xmm1[0]
17+
unpcklpd %xmm1, %xmm0 # xmm0 = xmm0[0],xmm1[0]
18+
# CHECK-NEXT: vaddpd %xmm1, %xmm0, %xmm0
19+
addpd %xmm1, %xmm0
20+
# CHECK-NEXT: vmovapd %xmm0, -464(%rbp)
21+
movapd %xmm0, -464(%rbp)
22+
# CHECK-NEXT: vmovaps -304(%rbp), %xmm1
23+
movaps -304(%rbp), %xmm1
24+
# CHECK-NEXT: vpandn %xmm1, %xmm0, %xmm0
25+
pandn %xmm1, %xmm0
26+
# CHECK-NEXT: vmovaps %xmm0, -480(%rbp)
27+
movaps %xmm0, -480(%rbp)
28+
# CHECK-NEXT: vmovss -220(%rbp), %xmm1
29+
movss -220(%rbp), %xmm1 # xmm1 = mem[0],zero,zero,zero
30+
# CHECK-NEXT: vinsertps $16, %xmm1, %xmm0, %xmm0 # xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
31+
insertps $16, %xmm1, %xmm0 # xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
32+
# CHECK-NEXT: vmovaps %xmm0, -496(%rbp)
33+
movaps %xmm0, -496(%rbp)
34+
# CHECK-NEXT: vmovss -256(%rbp), %xmm0
35+
movss -256(%rbp), %xmm0 # xmm0 = mem[0],zero,zero,zero
36+
# CHECK-NEXT: vmovaps -192(%rbp), %xmm0
37+
movaps -192(%rbp), %xmm0
38+
# CHECK-NEXT: vdivss %xmm1, %xmm0, %xmm0
39+
divss %xmm1, %xmm0
40+
# CHECK-NEXT: vmovaps %xmm0, -192(%rbp)
41+
movaps %xmm0, -192(%rbp)
42+
# CHECK-NEXT: vmovd -128(%rbp), %xmm0
43+
movd -128(%rbp), %xmm0 # xmm0 = mem[0],zero,zero,zero
44+
# CHECK-NEXT: vpinsrd $1, %edx, %xmm0, %xmm0
45+
pinsrd $1, %edx, %xmm0
46+
# CHECK-NEXT: vmovaps %xmm0, -144(%rbp)
47+
movaps %xmm0, -144(%rbp)
48+
# CHECK-NEXT: vmovd -160(%rbp), %xmm0
49+
movd -160(%rbp), %xmm0 # xmm0 = mem[0],zero,zero,zero
50+
# CHECK-NEXT: vpblendw $170, %xmm1, %xmm0, %xmm0 # xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
51+
pblendw $170, %xmm1, %xmm0 # xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
52+
# CHECK-NEXT: vmovdqa %xmm0, -576(%rbp)
53+
movdqa %xmm0, -576(%rbp)
54+
# CHECK-NEXT: vphsubw %xmm1, %xmm0, %xmm0
55+
phsubw %xmm1, %xmm0
56+
# CHECK-NEXT: vmovdqa %xmm0, -592(%rbp)
57+
movdqa %xmm0, -592(%rbp)
58+
# CHECK-NEXT: vmovaps -496(%rbp), %xmm0
59+
movaps -496(%rbp), %xmm0
60+
# CHECK-NEXT: vroundps $8, %xmm0, %xmm0
61+
roundps $8, %xmm0, %xmm0
62+
# CHECK-NEXT: vmovaps %xmm0, -608(%rbp)
63+
movaps %xmm0, -608(%rbp)
64+
# CHECK-NEXT: vmovapd -432(%rbp), %xmm0
65+
movapd -432(%rbp), %xmm0
66+
# CHECK-NEXT: vpxor %xmm1, %xmm0, %xmm0
67+
pxor %xmm1, %xmm0
68+
# CHECK-NEXT: vmovaps %xmm0, -640(%rbp)
69+
movaps %xmm0, -640(%rbp)
70+
# CHECK-NEXT: vmovapd -32(%rbp), %xmm0
71+
movapd -32(%rbp), %xmm0
72+
# CHECK-NEXT: vmovupd %xmm0, (%rax)
73+
movupd %xmm0, (%rax)
74+
# CHECK-NEXT: vmovsd -656(%rbp), %xmm0
75+
movsd -656(%rbp), %xmm0 # xmm0 = mem[0],zero
76+
# CHECK-NEXT: extrq $16, $8, %xmm0 # xmm0 = xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
77+
extrq $16, $8, %xmm0
78+
# CHECK-NEXT: insertq $16, $8, %xmm1, %xmm0 # xmm0 = xmm0[0,1],xmm1[0],xmm0[3,4,5,6,7,u,u,u,u,u,u,u,u]
79+
insertq $16, $8, %xmm1, %xmm0
80+
# CHECK-NEXT: pshufw $1, %mm0, %mm2 # mm2 = mm0[1,0,0,0]
81+
pshufw $1, %mm0, %mm2
82+
# CHECK-NEXT: vpblendvb %xmm2, %xmm2, %xmm1, %xmm1
83+
pblendvb %xmm0, %xmm2, %xmm1
84+
# CHECK-NEXT: vblendvps %xmm0, %xmm0, %xmm2, %xmm2
85+
blendvps %xmm0, %xmm0, %xmm2
86+
# CHECK-NEXT: vblendvpd %xmm0, %xmm0, %xmm2, %xmm2
87+
blendvpd %xmm0, %xmm0, %xmm2
88+
# CHECK-NEXT: vblendvpd %xmm0, %xmm0, %xmm2, %xmm2
89+
blendvpd %xmm0, %xmm2

llvm/utils/TableGen/X86InstrMappingEmitter.cpp

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,8 @@ class X86InstrMappingEmitter {
5656
raw_ostream &OS);
5757
void emitND2NonNDTable(ArrayRef<const CodeGenInstruction *> Insts,
5858
raw_ostream &OS);
59+
void emitSSE2AVXTable(ArrayRef<const CodeGenInstruction *> Insts,
60+
raw_ostream &OS);
5961

6062
// Prints the definition of class X86TableEntry.
6163
void printClassDef(raw_ostream &OS);
@@ -335,6 +337,38 @@ void X86InstrMappingEmitter::emitND2NonNDTable(
335337
printTable(Table, "X86ND2NonNDTable", "GET_X86_ND2NONND_TABLE", OS);
336338
}
337339

340+
void X86InstrMappingEmitter::emitSSE2AVXTable(
341+
ArrayRef<const CodeGenInstruction *> Insts, raw_ostream &OS) {
342+
343+
const std::map<StringRef, StringRef> ManualMap = {
344+
#define ENTRY_SSE2AVX(OLD, NEW) {#OLD, #NEW},
345+
#include "X86ManualInstrMapping.def"
346+
};
347+
348+
std::vector<Entry> Table;
349+
for (const CodeGenInstruction *Inst : Insts) {
350+
const Record *Rec = Inst->TheDef;
351+
StringRef Name = Rec->getName();
352+
if (!isInteresting(Rec))
353+
continue;
354+
if (ManualMap.find(Name) != ManualMap.end()) {
355+
auto *NewRec = Records.getDef(ManualMap.at(Rec->getName()));
356+
assert(NewRec && "Instruction not found!");
357+
auto &NewInst = Target.getInstruction(NewRec);
358+
Table.push_back(std::pair(Inst, &NewInst));
359+
continue;
360+
}
361+
362+
std::string NewName = ("V" + Name).str();
363+
auto *AVXRec = Records.getDef(NewName);
364+
if (!AVXRec)
365+
continue;
366+
auto &AVXInst = Target.getInstruction(AVXRec);
367+
Table.push_back(std::pair(Inst, &AVXInst));
368+
}
369+
printTable(Table, "X86SSE2AVXTable", "GET_X86_SSE2AVX_TABLE", OS);
370+
}
371+
338372
void X86InstrMappingEmitter::run(raw_ostream &OS) {
339373
emitSourceFileHeader("X86 instruction mapping", OS);
340374

@@ -344,6 +378,7 @@ void X86InstrMappingEmitter::run(raw_ostream &OS) {
344378
emitCompressEVEXTable(Insts, OS);
345379
emitNFTransformTable(Insts, OS);
346380
emitND2NonNDTable(Insts, OS);
381+
emitSSE2AVXTable(Insts, OS);
347382
}
348383
} // namespace
349384

llvm/utils/TableGen/X86ManualInstrMapping.def

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -349,3 +349,14 @@ NOCOMP_ND(CFCMOV64rr_ND)
349349
ENTRY_ND(MOVBE32rr, BSWAP32r)
350350
ENTRY_ND(MOVBE64rr, BSWAP64r)
351351
#undef ENTRY_ND
352+
353+
#ifndef ENTRY_SSE2AVX
354+
#define ENTRY_SSE2AVX(OLD, NEW)
355+
#endif
356+
ENTRY_SSE2AVX(BLENDVPDrm0, VBLENDVPDrmr)
357+
ENTRY_SSE2AVX(BLENDVPDrr0, VBLENDVPDrrr)
358+
ENTRY_SSE2AVX(BLENDVPSrm0, VBLENDVPSrmr)
359+
ENTRY_SSE2AVX(BLENDVPSrr0, VBLENDVPSrrr)
360+
ENTRY_SSE2AVX(PBLENDVBrm0, VPBLENDVBrmr)
361+
ENTRY_SSE2AVX(PBLENDVBrr0, VPBLENDVBrrr)
362+
#undef ENTRY_SSE2AVX

0 commit comments

Comments
 (0)