Skip to content

Commit 8ac6b41

Browse files
committed
[X86] Ensure VPERMV3 -> VPERMV fold comes from a double width vector
#96414 + #97206 didn't ensure that we were extracting subvectors from a vector double the width of the destination. We can relax this in a future patch, but fix the #97968 crash first. Fixes #97968
1 parent 124b18b commit 8ac6b41

File tree

2 files changed

+23
-1
lines changed

2 files changed

+23
-1
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41336,6 +41336,7 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
4133641336
case X86ISD::VPERMV3: {
4133741337
// Combine VPERMV3 to widened VPERMV if the two source operands are split
4133841338
// from the same vector.
41339+
// TODO: Handle extraction from a wider source vector (e.g. v16i32 -> v4i32).
4133941340
SDValue V1 = peekThroughBitcasts(N.getOperand(0));
4134041341
SDValue V2 = peekThroughBitcasts(N.getOperand(2));
4134141342
MVT SVT = V1.getSimpleValueType();
@@ -41346,7 +41347,8 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
4134641347
V1.getConstantOperandVal(1) == 0 &&
4134741348
V2.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
4134841349
V2.getConstantOperandVal(1) == SVT.getVectorNumElements() &&
41349-
V1.getOperand(0) == V2.getOperand(0)) {
41350+
V1.getOperand(0) == V2.getOperand(0) &&
41351+
V1.getOperand(0).getValueSizeInBits() == NVT.getSizeInBits()) {
4135041352
SDValue Mask =
4135141353
DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NVT, DAG.getUNDEF(NVT),
4135241354
N.getOperand(1), DAG.getIntPtrConstant(0, DL));

llvm/test/CodeGen/X86/pr97968.ll

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver4 | FileCheck %s
3+
4+
define <2 x i32> @PR97968(<16 x i32> %a0) {
5+
; CHECK-LABEL: PR97968:
6+
; CHECK: # %bb.0:
7+
; CHECK-NEXT: vpbroadcastq {{.*#+}} xmm1 = [2,7,2,7]
8+
; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm2
9+
; CHECK-NEXT: vpermi2d %xmm2, %xmm0, %xmm1
10+
; CHECK-NEXT: vmovdqa %xmm1, %xmm0
11+
; CHECK-NEXT: vzeroupper
12+
; CHECK-NEXT: retq
13+
%sub0 = shufflevector <16 x i32> %a0, <16 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
14+
%sub1 = shufflevector <16 x i32> %a0, <16 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
15+
%elt0 = extractelement <4 x i32> %sub0, i64 2
16+
%elt7 = extractelement <4 x i32> %sub1, i64 3
17+
%scl0 = insertelement <2 x i32> undef, i32 %elt0, i32 0
18+
%scl1 = insertelement <2 x i32> %scl0, i32 %elt7, i32 1
19+
ret <2 x i32> %scl1
20+
}

0 commit comments

Comments
 (0)