Skip to content

Commit aa659f1

Browse files
committed
Add test and address review comments
1 parent fe617f4 commit aa659f1

File tree

2 files changed

+24
-23
lines changed

2 files changed

+24
-23
lines changed

llvm/lib/Target/X86/X86InstrInfo.cpp

Lines changed: 15 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -2326,34 +2326,26 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
23262326
case X86::VBLENDPSrri:
23272327
// If we're optimizing for size, try to use MOVSD/MOVSS.
23282328
if (MI.getParent()->getParent()->getFunction().hasOptSize()) {
2329-
unsigned Mask;
2330-
unsigned NewOpc;
2331-
switch (Opc) {
2332-
default:
2333-
llvm_unreachable("Unreachable!");
2334-
case X86::BLENDPDrri:
2335-
NewOpc = X86::MOVSDrr;
2336-
Mask = 0x03;
2337-
break;
2338-
case X86::BLENDPSrri:
2339-
NewOpc = X86::MOVSSrr;
2340-
Mask = 0x0F;
2341-
break;
2342-
case X86::VBLENDPDrri:
2343-
NewOpc = X86::VMOVSDrr;
2344-
Mask = 0x03;
2345-
break;
2346-
case X86::VBLENDPSrri:
2347-
NewOpc = X86::VMOVSSrr;
2348-
Mask = 0x0F;
2349-
break;
2350-
}
2329+
unsigned Mask = (Opc == X86::BLENDPDrri || Opc == X86::VBLENDPDrri) ? 0x03: 0x0F;
23512330
if ((MI.getOperand(3).getImm() ^ Mask) == 1) {
2331+
#define FROM_TO(A, B) \
2332+
case X86::A: \
2333+
Opc = X86::B; \
2334+
break;
2335+
switch (Opc) {
2336+
default:
2337+
llvm_unreachable("Unreachable!");
2338+
FROM_TO(BLENDPDrri, MOVSDrr)
2339+
FROM_TO(BLENDPSrri, MOVSSrr)
2340+
FROM_TO(VBLENDPDrri, VMOVSDrr)
2341+
FROM_TO(VBLENDPSrri, VMOVSSrr)
2342+
}
23522343
WorkingMI = CloneIfNew(MI);
2353-
WorkingMI->setDesc(get(NewOpc));
2344+
WorkingMI->setDesc(get(Opc));
23542345
WorkingMI->removeOperand(3);
23552346
break;
23562347
}
2348+
#undef FROM_TO
23572349
}
23582350
[[fallthrough]];
23592351
case X86::PBLENDWrri:

llvm/test/CodeGen/X86/commute-blend-avx2.ll

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,3 +88,12 @@ define <4 x double> @commute_fold_vblendpd_256(<4 x double> %a, ptr %b) #0 {
8888
ret <4 x double> %2
8989
}
9090
declare <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
91+
92+
define <4 x float> @commute_vblendpd_128_for_code_size(<4 x float> %a, <4 x float> %b) optsize {
93+
; CHECK-LABEL: commute_vblendpd_128_for_code_size:
94+
; CHECK: # %bb.0:
95+
; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3]
96+
; CHECK-NEXT: retq
97+
%r = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
98+
ret <4 x float> %r
99+
}

0 commit comments

Comments
 (0)