Skip to content

Commit 3405237

Browse files
committed
[X86] Mask off upper bits of splat element in LowerBUILD_VECTORvXi1 when forming a SELECT.
The i1 scalar would have been type legalized to i8, but that doesn't guarantee anything about the upper bits. If we're going to use it as condition we need to make sure the upper bits are 0. I've special cased ISD::SETCC conditions since that should guarantee zero upper bits. We could go further and use computeKnownBits, but we have no tests that would need that. Fixes PR43507. llvm-svn: 373246
1 parent 8216414 commit 3405237

File tree

3 files changed

+34
-6
lines changed

3 files changed

+34
-6
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8459,10 +8459,20 @@ static SDValue LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG,
84598459
}
84608460

84618461
// for splat use " (select i1 splat_elt, all-ones, all-zeroes)"
8462-
if (IsSplat)
8463-
return DAG.getSelect(dl, VT, Op.getOperand(SplatIdx),
8462+
if (IsSplat) {
8463+
// The build_vector allows the scalar element to be larger than the vector
8464+
// element type. We need to mask it to use as a condition unless we know
8465+
// the upper bits are zero.
8466+
// FIXME: Use computeKnownBits instead of checking specific opcode?
8467+
SDValue Cond = Op.getOperand(SplatIdx);
8468+
assert(Cond.getValueType() == MVT::i8 && "Unexpected VT!");
8469+
if (Cond.getOpcode() != ISD::SETCC)
8470+
Cond = DAG.getNode(ISD::AND, dl, MVT::i8, Cond,
8471+
DAG.getConstant(1, dl, MVT::i8));
8472+
return DAG.getSelect(dl, VT, Cond,
84648473
DAG.getConstant(1, dl, VT),
84658474
DAG.getConstant(0, dl, VT));
8475+
}
84668476

84678477
// insert elements one by one
84688478
SDValue DstVec;

llvm/test/CodeGen/X86/avx512-calling-conv.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -729,12 +729,12 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind {
729729
; KNL-NEXT: korw %k2, %k0, %k0
730730
; KNL-NEXT: kandw %k1, %k0, %k0
731731
; KNL-NEXT: xorl %ecx, %ecx
732-
; KNL-NEXT: cmpb $0, {{[0-9]+}}(%rsp)
732+
; KNL-NEXT: testb $1, {{[0-9]+}}(%rsp)
733733
; KNL-NEXT: movl $65535, %edx ## imm = 0xFFFF
734734
; KNL-NEXT: movl $0, %esi
735735
; KNL-NEXT: cmovnel %edx, %esi
736736
; KNL-NEXT: kmovw %esi, %k1
737-
; KNL-NEXT: cmpb $0, {{[0-9]+}}(%rsp)
737+
; KNL-NEXT: testb $1, {{[0-9]+}}(%rsp)
738738
; KNL-NEXT: cmovnel %edx, %ecx
739739
; KNL-NEXT: kmovw %ecx, %k2
740740
; KNL-NEXT: kandw %k1, %k2, %k1
@@ -1314,11 +1314,11 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind {
13141314
; KNL_X32-NEXT: kshiftlw $15, %k2, %k2
13151315
; KNL_X32-NEXT: korw %k2, %k1, %k1
13161316
; KNL_X32-NEXT: xorl %eax, %eax
1317-
; KNL_X32-NEXT: cmpb $0, {{[0-9]+}}(%esp)
1317+
; KNL_X32-NEXT: testb $1, {{[0-9]+}}(%esp)
13181318
; KNL_X32-NEXT: movl $65535, %ecx ## imm = 0xFFFF
13191319
; KNL_X32-NEXT: movl $0, %edx
13201320
; KNL_X32-NEXT: cmovnel %ecx, %edx
1321-
; KNL_X32-NEXT: cmpb $0, {{[0-9]+}}(%esp)
1321+
; KNL_X32-NEXT: testb $1, {{[0-9]+}}(%esp)
13221322
; KNL_X32-NEXT: cmovnel %ecx, %eax
13231323
; KNL_X32-NEXT: kandw %k0, %k1, %k0
13241324
; KNL_X32-NEXT: kmovw %edx, %k1

llvm/test/CodeGen/X86/pr43507.ll

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skx | FileCheck %s
3+
4+
define <8 x i1> @ham(i64 %arg) {
5+
; CHECK-LABEL: ham:
6+
; CHECK: # %bb.0:
7+
; CHECK-NEXT: xorl %eax, %eax
8+
; CHECK-NEXT: testb $1, %dil
9+
; CHECK-NEXT: movl $255, %ecx
10+
; CHECK-NEXT: cmovel %eax, %ecx
11+
; CHECK-NEXT: kmovd %ecx, %k0
12+
; CHECK-NEXT: vpmovm2w %k0, %xmm0
13+
; CHECK-NEXT: retq
14+
%tmp = trunc i64 %arg to i1
15+
%tmp1 = insertelement <8 x i1> undef, i1 %tmp, i32 0
16+
%tmp2 = shufflevector <8 x i1> %tmp1, <8 x i1> undef, <8 x i32> zeroinitializer
17+
ret <8 x i1> %tmp2
18+
}

0 commit comments

Comments
 (0)