Skip to content
This repository was archived by the owner on Feb 5, 2019. It is now read-only.

Commit 999f7b7

Browse files
committed
Merging r310552:
------------------------------------------------------------------------ r310552 | eladcohen | 2017-08-10 00:44:23 -0700 (Thu, 10 Aug 2017) | 19 lines [SelectionDAG] When scalarizing vselect, don't assert on a legal cond operand. When scalarizing the result of a vselect, the legalizer currently expects to already have scalarized the operands. While this is true for the true/false operands (which have the same type as the result), it is not case for the condition operand. On X86 AVX512, v1i1 is legal - this leads to operations such as '< N x type> vselect < N x i1> < N x type> < N x type>' where < N x type > is illegal to hit an assertion during the scalarization. The handling is similar to r205625. This also exposes the fact that (v1i1 extract_subvector) should be legal and selectable on AVX512 - We do this by custom lowering to vector_extract_elt. This still leaves us in some cases with redundant dag nodes which will be combined in a separate soon to come patch. This fixes pr33349. Differential revision: https://reviews.llvm.org/D36511 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_50@310635 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 366b5fa commit 999f7b7

File tree

3 files changed

+123
-2
lines changed

3 files changed

+123
-2
lines changed

lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -302,7 +302,21 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N) {
302302
}
303303

304304
SDValue DAGTypeLegalizer::ScalarizeVecRes_VSELECT(SDNode *N) {
305-
SDValue Cond = GetScalarizedVector(N->getOperand(0));
305+
SDValue Cond = N->getOperand(0);
306+
EVT OpVT = Cond.getValueType();
307+
SDLoc DL(N);
308+
// The vselect result and true/value operands needs scalarizing, but it's
309+
// not a given that the Cond does. For instance, in AVX512 v1i1 is legal.
310+
// See the similar logic in ScalarizeVecRes_VSETCC
311+
if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) {
312+
Cond = GetScalarizedVector(Cond);
313+
} else {
314+
EVT VT = OpVT.getVectorElementType();
315+
Cond = DAG.getNode(
316+
ISD::EXTRACT_VECTOR_ELT, DL, VT, Cond,
317+
DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
318+
}
319+
306320
SDValue LHS = GetScalarizedVector(N->getOperand(1));
307321
TargetLowering::BooleanContent ScalarBool =
308322
TLI.getBooleanContents(false, false);

lib/Target/X86/X86ISelLowering.cpp

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1383,7 +1383,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
13831383
// (result) is 256-bit but the source is 512-bit wide.
13841384
// 128-bit was made Custom under AVX1.
13851385
for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
1386-
MVT::v8f32, MVT::v4f64 })
1386+
MVT::v8f32, MVT::v4f64, MVT::v1i1 })
13871387
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
13881388
for (auto VT : { MVT::v2i1, MVT::v4i1, MVT::v8i1,
13891389
MVT::v16i1, MVT::v32i1, MVT::v64i1 })
@@ -14570,6 +14570,21 @@ static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, const X86Subtarget &Subtarget,
1457014570
unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
1457114571
MVT ResVT = Op.getSimpleValueType();
1457214572

14573+
// When v1i1 is legal a scalarization of a vselect with a vXi1 Cond
14574+
// would result with: v1i1 = extract_subvector(vXi1, idx).
14575+
// Lower these into extract_vector_elt which is already selectable.
14576+
if (ResVT == MVT::v1i1) {
14577+
assert(Subtarget.hasAVX512() &&
14578+
"Boolean EXTRACT_SUBVECTOR requires AVX512");
14579+
14580+
MVT EltVT = ResVT.getVectorElementType();
14581+
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14582+
MVT LegalVT =
14583+
(TLI.getTypeToTransformTo(*DAG.getContext(), EltVT)).getSimpleVT();
14584+
SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, LegalVT, In, Idx);
14585+
return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, ResVT, Res);
14586+
}
14587+
1457314588
assert((In.getSimpleValueType().is256BitVector() ||
1457414589
In.getSimpleValueType().is512BitVector()) &&
1457514590
"Can only extract from 256-bit or 512-bit vectors");

test/CodeGen/X86/pr33349.ll

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s -mattr=+avx512f | FileCheck %s --check-prefix=KNL
3+
; RUN: llc < %s -mattr=+avx512f,+avx512vl,+avx512bw,+avx512dq | FileCheck %s --check-prefix=SKX
4+
5+
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
6+
target triple = "x86_64-unknown-linux-gnu"
7+
8+
define void @test(<4 x i1> %m, <4 x x86_fp80> %v, <4 x x86_fp80>*%p) local_unnamed_addr {
9+
; KNL-LABEL: test:
10+
; KNL: # BB#0: # %bb
11+
; KNL-NEXT: vpextrb $0, %xmm0, %eax
12+
; KNL-NEXT: testb $1, %al
13+
; KNL-NEXT: fld1
14+
; KNL-NEXT: fldz
15+
; KNL-NEXT: fld %st(0)
16+
; KNL-NEXT: fcmovne %st(2), %st(0)
17+
; KNL-NEXT: vpextrb $4, %xmm0, %eax
18+
; KNL-NEXT: testb $1, %al
19+
; KNL-NEXT: fld %st(1)
20+
; KNL-NEXT: fcmovne %st(3), %st(0)
21+
; KNL-NEXT: vpextrb $8, %xmm0, %eax
22+
; KNL-NEXT: testb $1, %al
23+
; KNL-NEXT: fld %st(2)
24+
; KNL-NEXT: fcmovne %st(4), %st(0)
25+
; KNL-NEXT: vpextrb $12, %xmm0, %eax
26+
; KNL-NEXT: testb $1, %al
27+
; KNL-NEXT: fxch %st(3)
28+
; KNL-NEXT: fcmovne %st(4), %st(0)
29+
; KNL-NEXT: fstp %st(4)
30+
; KNL-NEXT: fxch %st(3)
31+
; KNL-NEXT: fstpt 30(%rdi)
32+
; KNL-NEXT: fxch %st(1)
33+
; KNL-NEXT: fstpt 20(%rdi)
34+
; KNL-NEXT: fxch %st(1)
35+
; KNL-NEXT: fstpt 10(%rdi)
36+
; KNL-NEXT: fstpt (%rdi)
37+
; KNL-NEXT: retq
38+
;
39+
; SKX-LABEL: test:
40+
; SKX: # BB#0: # %bb
41+
; SKX-NEXT: vpslld $31, %xmm0, %xmm0
42+
; SKX-NEXT: vptestmd %xmm0, %xmm0, %k0
43+
; SKX-NEXT: kshiftrw $2, %k0, %k1
44+
; SKX-NEXT: kshiftlw $15, %k1, %k2
45+
; SKX-NEXT: kshiftrw $15, %k2, %k2
46+
; SKX-NEXT: kshiftlw $15, %k2, %k2
47+
; SKX-NEXT: kshiftrw $15, %k2, %k2
48+
; SKX-NEXT: kmovd %k2, %eax
49+
; SKX-NEXT: testb $1, %al
50+
; SKX-NEXT: fld1
51+
; SKX-NEXT: fldz
52+
; SKX-NEXT: fld %st(0)
53+
; SKX-NEXT: fcmovne %st(2), %st(0)
54+
; SKX-NEXT: kshiftlw $14, %k1, %k1
55+
; SKX-NEXT: kshiftrw $15, %k1, %k1
56+
; SKX-NEXT: kshiftlw $15, %k1, %k1
57+
; SKX-NEXT: kshiftrw $15, %k1, %k1
58+
; SKX-NEXT: kmovd %k1, %eax
59+
; SKX-NEXT: testb $1, %al
60+
; SKX-NEXT: fld %st(1)
61+
; SKX-NEXT: fcmovne %st(3), %st(0)
62+
; SKX-NEXT: kshiftlw $15, %k0, %k1
63+
; SKX-NEXT: kshiftrw $15, %k1, %k1
64+
; SKX-NEXT: kshiftlw $15, %k1, %k1
65+
; SKX-NEXT: kshiftrw $15, %k1, %k1
66+
; SKX-NEXT: kmovd %k1, %eax
67+
; SKX-NEXT: testb $1, %al
68+
; SKX-NEXT: fld %st(2)
69+
; SKX-NEXT: fcmovne %st(4), %st(0)
70+
; SKX-NEXT: kshiftlw $14, %k0, %k0
71+
; SKX-NEXT: kshiftrw $15, %k0, %k0
72+
; SKX-NEXT: kshiftlw $15, %k0, %k0
73+
; SKX-NEXT: kshiftrw $15, %k0, %k0
74+
; SKX-NEXT: kmovd %k0, %eax
75+
; SKX-NEXT: testb $1, %al
76+
; SKX-NEXT: fxch %st(3)
77+
; SKX-NEXT: fcmovne %st(4), %st(0)
78+
; SKX-NEXT: fstp %st(4)
79+
; SKX-NEXT: fxch %st(3)
80+
; SKX-NEXT: fstpt 10(%rdi)
81+
; SKX-NEXT: fxch %st(1)
82+
; SKX-NEXT: fstpt (%rdi)
83+
; SKX-NEXT: fxch %st(1)
84+
; SKX-NEXT: fstpt 30(%rdi)
85+
; SKX-NEXT: fstpt 20(%rdi)
86+
; SKX-NEXT: retq
87+
bb:
88+
%tmp = select <4 x i1> %m, <4 x x86_fp80> <x86_fp80 0xK3FFF8000000000000000, x86_fp80 0xK3FFF8000000000000000, x86_fp80 0xK3FFF8000000000000000, x86_fp80 0xK3FFF8000000000000000>, <4 x x86_fp80> zeroinitializer
89+
store <4 x x86_fp80> %tmp, <4 x x86_fp80>* %p, align 16
90+
ret void
91+
}
92+

0 commit comments

Comments
 (0)