Skip to content

Commit 6984cfe

Browse files
committed
[X86] Ensure concat(blendi(),blendi()) -> vselect() uses legal select mask types
For 256-bit selections, we could be using sub-i8/vXi8 selection condition masks - extend these to i8 and then extract the lowest mask subvector Fixes #132844
1 parent d46a699 commit 6984cfe

File tree

2 files changed

+37
-2
lines changed

2 files changed

+37
-2
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58617,10 +58617,13 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
5861758617
APInt Mask = getBLENDIBlendMask(Ops[0]).zext(NumElts);
5861858618
for (unsigned I = 1; I != NumOps; ++I)
5861958619
Mask.insertBits(getBLENDIBlendMask(Ops[I]), I * (NumElts / NumOps));
58620-
MVT MaskSVT = MVT::getIntegerVT(NumElts);
58621-
MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
58620+
unsigned NumMaskBits = NumElts >= 8 ? NumElts : 8;
58621+
Mask = Mask.zextOrTrunc(NumMaskBits);
58622+
MVT MaskSVT = MVT::getIntegerVT(NumMaskBits);
58623+
MVT MaskVT = MVT::getVectorVT(MVT::i1, NumMaskBits);
5862258624
SDValue Sel =
5862358625
DAG.getBitcast(MaskVT, DAG.getConstant(Mask, DL, MaskSVT));
58626+
Sel = extractSubVector(Sel, 0, DAG, DL, NumElts);
5862458627
Concat0 = Concat0 ? Concat0 : ConcatSubOperand(VT, Ops, 0);
5862558628
Concat1 = Concat1 ? Concat1 : ConcatSubOperand(VT, Ops, 1);
5862658629
return DAG.getSelect(DL, VT, Sel, Concat1, Concat0);

llvm/test/CodeGen/X86/pr132844.ll

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s
3+
4+
define { ptr, i8 } @PR132844(<4 x ptr> %0, <4 x ptr> %1) {
5+
; CHECK-LABEL: PR132844:
6+
; CHECK: # %bb.0:
7+
; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm2
8+
; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
9+
; CHECK-NEXT: movb $10, %al
10+
; CHECK-NEXT: kmovd %eax, %k1
11+
; CHECK-NEXT: vinserti64x2 $1, 16, %ymm2, %ymm0 {%k1}
12+
; CHECK-NEXT: vmovdqu %ymm0, {{[0-9]+}}(%rsp)
13+
; CHECK-NEXT: xorl %eax, %eax
14+
; CHECK-NEXT: xorl %edx, %edx
15+
; CHECK-NEXT: vzeroupper
16+
; CHECK-NEXT: retq
17+
%3 = alloca [35 x ptr], i32 0, align 16
18+
%4 = load <4 x ptr>, ptr null, align 8
19+
%5 = getelementptr i8, ptr %3, i64 216
20+
%6 = extractelement <4 x ptr> %4, i64 3
21+
store ptr %6, ptr %5, align 8
22+
%7 = getelementptr i8, ptr %3, i64 208
23+
%8 = extractelement <4 x ptr> %0, i64 0
24+
store ptr %8, ptr %7, align 8
25+
%9 = getelementptr i8, ptr %3, i64 200
26+
%10 = extractelement <4 x ptr> %0, i64 3
27+
store ptr %10, ptr %9, align 8
28+
%11 = getelementptr i8, ptr %3, i64 192
29+
%12 = extractelement <4 x ptr> %1, i64 0
30+
store ptr %12, ptr %11, align 8
31+
ret { ptr, i8 } zeroinitializer
32+
}

0 commit comments

Comments
 (0)