Skip to content
This repository was archived by the owner on Apr 23, 2020. It is now read-only.

Commit b6dac61

Browse files
committed
[X86][XOP] Added VPERMIL2PD/VPERMIL2PS shuffle mask comment decoding
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@271809 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent a90cde9 commit b6dac61

File tree

5 files changed

+198
-5
lines changed

5 files changed

+198
-5
lines changed

lib/Target/X86/X86MCInstLower.cpp

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1422,6 +1422,40 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
14221422
}
14231423
break;
14241424
}
1425+
1426+
case X86::VPERMIL2PDrm:
1427+
case X86::VPERMIL2PSrm:
1428+
case X86::VPERMIL2PDrmY:
1429+
case X86::VPERMIL2PSrmY: {
1430+
if (!OutStreamer->isVerboseAsm())
1431+
break;
1432+
assert(MI->getNumOperands() > 7 &&
1433+
"We should always have at least 7 operands!");
1434+
const MachineOperand &DstOp = MI->getOperand(0);
1435+
const MachineOperand &SrcOp1 = MI->getOperand(1);
1436+
const MachineOperand &SrcOp2 = MI->getOperand(2);
1437+
const MachineOperand &MaskOp = MI->getOperand(6);
1438+
const MachineOperand &CtrlOp = MI->getOperand(MI->getNumOperands() - 1);
1439+
1440+
if (!CtrlOp.isImm())
1441+
break;
1442+
1443+
unsigned ElSize;
1444+
switch (MI->getOpcode()) {
1445+
default: llvm_unreachable("Invalid opcode");
1446+
case X86::VPERMIL2PSrm: case X86::VPERMIL2PSrmY: ElSize = 32; break;
1447+
case X86::VPERMIL2PDrm: case X86::VPERMIL2PDrmY: ElSize = 64; break;
1448+
}
1449+
1450+
if (auto *C = getConstantFromPool(*MI, MaskOp)) {
1451+
SmallVector<int, 16> Mask;
1452+
DecodeVPERMIL2PMask(C, (unsigned)CtrlOp.getImm(), ElSize, Mask);
1453+
if (!Mask.empty())
1454+
OutStreamer->AddComment(getShuffleComment(DstOp, SrcOp1, SrcOp2, Mask));
1455+
}
1456+
break;
1457+
}
1458+
14251459
case X86::VPPERMrrm: {
14261460
if (!OutStreamer->isVerboseAsm())
14271461
break;

lib/Target/X86/X86ShuffleDecodeConstantPool.cpp

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,77 @@ void DecodeVPERMILPMask(const Constant *C, unsigned ElSize,
153153
// TODO: Handle funny-looking vectors too.
154154
}
155155

156+
void DecodeVPERMIL2PMask(const Constant *C, unsigned M2Z, unsigned ElSize,
157+
SmallVectorImpl<int> &ShuffleMask) {
158+
Type *MaskTy = C->getType();
159+
160+
unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
161+
if (MaskTySize != 128 && MaskTySize != 256)
162+
return;
163+
164+
// Only support vector types.
165+
if (!MaskTy->isVectorTy())
166+
return;
167+
168+
// Make sure its an integer type.
169+
Type *VecEltTy = MaskTy->getVectorElementType();
170+
if (!VecEltTy->isIntegerTy())
171+
return;
172+
173+
// Support any element type from byte up to element size.
174+
// This is necessary primarily because 64-bit elements get split to 32-bit
175+
// in the constant pool on 32-bit target.
176+
unsigned EltTySize = VecEltTy->getIntegerBitWidth();
177+
if (EltTySize < 8 || EltTySize > ElSize)
178+
return;
179+
180+
unsigned NumElements = MaskTySize / ElSize;
181+
assert((NumElements == 2 || NumElements == 4 || NumElements == 8) &&
182+
"Unexpected number of vector elements.");
183+
ShuffleMask.reserve(NumElements);
184+
unsigned NumElementsPerLane = 128 / ElSize;
185+
unsigned Factor = ElSize / EltTySize;
186+
187+
for (unsigned i = 0; i < NumElements; ++i) {
188+
Constant *COp = C->getAggregateElement(i * Factor);
189+
if (!COp) {
190+
ShuffleMask.clear();
191+
return;
192+
} else if (isa<UndefValue>(COp)) {
193+
ShuffleMask.push_back(SM_SentinelUndef);
194+
continue;
195+
}
196+
197+
// VPERMIL2 Operation.
198+
// Bits[3] - Match Bit.
199+
// Bits[2:1] - (Per Lane) PD Shuffle Mask.
200+
// Bits[2:0] - (Per Lane) PS Shuffle Mask.
201+
uint64_t Selector = cast<ConstantInt>(COp)->getZExtValue();
202+
int MatchBit = (Selector >> 3) & 0x1;
203+
204+
// M2Z[0:1] MatchBit
205+
// 0Xb X Source selected by Selector index.
206+
// 10b 0 Source selected by Selector index.
207+
// 10b 1 Zero.
208+
// 11b 0 Zero.
209+
// 11b 1 Source selected by Selector index.
210+
if ((M2Z & 0x2) != 0 && MatchBit != (M2Z & 0x1)) {
211+
ShuffleMask.push_back(SM_SentinelZero);
212+
continue;
213+
}
214+
215+
int Index = Selector & 0x3;
216+
Index >>= (ElSize == 64 ? 1 : 0);
217+
Index += (i / NumElementsPerLane) * NumElementsPerLane;
218+
219+
int Src = (Selector >> 2) & 0x1;
220+
Index += Src * NumElements;
221+
ShuffleMask.push_back(Index);
222+
}
223+
224+
// TODO: Handle funny-looking vectors too.
225+
}
226+
156227
void DecodeVPPERMMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask) {
157228
Type *MaskTy = C->getType();
158229
assert(MaskTy->getPrimitiveSizeInBits() == 128);

lib/Target/X86/X86ShuffleDecodeConstantPool.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,10 @@ void DecodePSHUFBMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask);
3232
void DecodeVPERMILPMask(const Constant *C, unsigned ElSize,
3333
SmallVectorImpl<int> &ShuffleMask);
3434

35+
/// Decode a VPERMILP2 variable mask from an IR-level vector constant.
36+
void DecodeVPERMIL2PMask(const Constant *C, unsigned MatchImm, unsigned ElSize,
37+
SmallVectorImpl<int> &ShuffleMask);
38+
3539
/// Decode a VPPERM variable mask from an IR-level vector constant.
3640
void DecodeVPPERMMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask);
3741

test/CodeGen/X86/vector-shuffle-combining-xop.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ define <8 x float> @combine_vpermil2ps256_identity(<8 x float> %a0, <8 x float>
6262
define <4 x float> @combine_vpermil2ps_blend_with_zero(<4 x float> %a0, <4 x float> %a1) {
6363
; CHECK-LABEL: combine_vpermil2ps_blend_with_zero:
6464
; CHECK: # BB#0:
65-
; CHECK-NEXT: vpermil2ps $2, {{.*}}(%rip), %xmm1, %xmm0, %xmm0
65+
; CHECK-NEXT: vpermil2ps {{.*#+}} xmm0 = zero,xmm0[1,2,3]
6666
; CHECK-NEXT: retq
6767
%res0 = call <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 8, i32 1, i32 2, i32 3>, i8 2)
6868
ret <4 x float> %res0

test/CodeGen/X86/xop-mask-comments.ll

Lines changed: 88 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -95,10 +95,94 @@ define <16 x i8> @vpperm_shuffle_general(<16 x i8> %a0, <16 x i8> %a1) {
9595
; VPERMIL2
9696
;
9797

98-
declare <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone
99-
declare <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone
98+
define <2 x double> @vpermil2pd_21(<2 x double> %a0, <2 x double> %a1) {
99+
; X32-LABEL: vpermil2pd_21:
100+
; X32: # BB#0:
101+
; X32-NEXT: vpermil2pd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
102+
; X32-NEXT: retl
103+
;
104+
; X64-LABEL: vpermil2pd_21:
105+
; X64: # BB#0:
106+
; X64-NEXT: vpermil2pd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
107+
; X64-NEXT: retq
108+
%1 = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x i64> <i64 4, i64 2>, i8 0)
109+
ret <2 x double> %1
110+
}
111+
112+
define <4 x double> @vpermil2pd256_0062(<4 x double> %a0, <4 x double> %a1) {
113+
; X32-LABEL: vpermil2pd256_0062:
114+
; X32: # BB#0:
115+
; X32-NEXT: vpermil2pd {{.*#+}} ymm0 = ymm0[0,0],ymm1[2],ymm0[2]
116+
; X32-NEXT: retl
117+
;
118+
; X64-LABEL: vpermil2pd256_0062:
119+
; X64: # BB#0:
120+
; X64-NEXT: vpermil2pd {{.*#+}} ymm0 = ymm0[0,0],ymm1[2],ymm0[2]
121+
; X64-NEXT: retq
122+
%1 = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %a1, <4 x i64> <i64 0, i64 0, i64 4, i64 0>, i8 0)
123+
ret <4 x double> %1
124+
}
125+
126+
define <4 x double> @vpermil2pd256_zz73(<4 x double> %a0, <4 x double> %a1) {
127+
; X32-LABEL: vpermil2pd256_zz73:
128+
; X32: # BB#0:
129+
; X32-NEXT: vpermil2pd {{.*#+}} ymm0 = zero,zero,ymm1[3],ymm0[3]
130+
; X32-NEXT: retl
131+
;
132+
; X64-LABEL: vpermil2pd256_zz73:
133+
; X64: # BB#0:
134+
; X64-NEXT: vpermil2pd {{.*#+}} ymm0 = zero,zero,ymm1[3],ymm0[3]
135+
; X64-NEXT: retq
136+
%1 = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %a1, <4 x i64> <i64 0, i64 0, i64 14, i64 10>, i8 3)
137+
ret <4 x double> %1
138+
}
139+
140+
define <4 x float> @vpermil2ps_0561(<4 x float> %a0, <4 x float> %a1) {
141+
; X32-LABEL: vpermil2ps_0561:
142+
; X32: # BB#0:
143+
; X32-NEXT: vpermil2ps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[1]
144+
; X32-NEXT: retl
145+
;
146+
; X64-LABEL: vpermil2ps_0561:
147+
; X64: # BB#0:
148+
; X64-NEXT: vpermil2ps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[1]
149+
; X64-NEXT: retq
150+
%1 = call <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 5, i32 6, i32 1>, i8 0)
151+
ret <4 x float> %1
152+
}
153+
154+
define <8 x float> @vpermil2ps256_098144FE(<8 x float> %a0, <8 x float> %a1) {
155+
; X32-LABEL: vpermil2ps256_098144FE:
156+
; X32: # BB#0:
157+
; X32-NEXT: vpermil2ps {{.*#+}} ymm0 = ymm0[0],ymm1[1,0],ymm0[1,4,4],ymm1[7,6]
158+
; X32-NEXT: retl
159+
;
160+
; X64-LABEL: vpermil2ps256_098144FE:
161+
; X64: # BB#0:
162+
; X64-NEXT: vpermil2ps {{.*#+}} ymm0 = ymm0[0],ymm1[1,0],ymm0[1,4,4],ymm1[7,6]
163+
; X64-NEXT: retq
164+
%1 = call <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float> %a0, <8 x float> %a1, <8 x i32> <i32 0, i32 5, i32 4, i32 1, i32 0, i32 0, i32 7, i32 6>, i8 0)
165+
ret <8 x float> %1
166+
}
167+
168+
define <8 x float> @vpermil2ps256_0zz8BzzA(<8 x float> %a0, <8 x float> %a1) {
169+
; X32-LABEL: vpermil2ps256_0zz8BzzA:
170+
; X32: # BB#0:
171+
; X32-NEXT: vpermil2ps {{.*#+}} ymm0 = ymm0[0],zero,zero,ymm1[0,7],zero,zero,ymm1[6]
172+
; X32-NEXT: retl
173+
;
174+
; X64-LABEL: vpermil2ps256_0zz8BzzA:
175+
; X64: # BB#0:
176+
; X64-NEXT: vpermil2ps {{.*#+}} ymm0 = ymm0[0],zero,zero,ymm1[0,7],zero,zero,ymm1[6]
177+
; X64-NEXT: retq
178+
%1 = call <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float> %a0, <8 x float> %a1, <8 x i32> <i32 0, i32 8, i32 8, i32 4, i32 7, i32 8, i32 8, i32 6>, i8 2)
179+
ret <8 x float> %1
180+
}
181+
182+
declare <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double>, <2 x double>, <2 x i64>, i8) nounwind readnone
183+
declare <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double>, <4 x double>, <4 x i64>, i8) nounwind readnone
100184

101-
declare <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
102-
declare <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
185+
declare <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float>, <4 x float>, <4 x i32>, i8) nounwind readnone
186+
declare <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float>, <8 x float>, <8 x i32>, i8) nounwind readnone
103187

104188
declare <16 x i8> @llvm.x86.xop.vpperm(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone

0 commit comments

Comments
 (0)