Skip to content

Commit 352b9e6

Browse files
committed
[X86] combineConcatVectorOps - extend ISD::ROTLI/VROTRI handling to support 256-bit types
Add checks that we aren't concatenating 128-bit X86ISD::VPERMI nodes.
1 parent 3f62718 commit 352b9e6

File tree

2 files changed

+8
-4
lines changed

2 files changed

+8
-4
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58330,11 +58330,15 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
5833058330
case X86ISD::VPERMI:
5833158331
case X86ISD::VROTLI:
5833258332
case X86ISD::VROTRI:
58333-
// TODO: 256-bit VROT?I handling
58334-
if (VT.is512BitVector() && Subtarget.useAVX512Regs() &&
58333+
if (!IsSplat &&
58334+
((VT.is256BitVector() && Subtarget.hasVLX()) ||
58335+
(VT.is512BitVector() && Subtarget.useAVX512Regs())) &&
5833558336
llvm::all_of(Ops, [Op0](SDValue Op) {
5833658337
return Op0.getOperand(1) == Op.getOperand(1);
5833758338
})) {
58339+
assert(!(Opcode == X86ISD::VPERMI &&
58340+
Op0.getValueType().is128BitVector()) &&
58341+
"Illegal 128-bit X86ISD::VPERMI nodes");
5833858342
return DAG.getNode(Opcode, DL, VT, ConcatSubOperand(VT, Ops, 0),
5833958343
Op0.getOperand(1));
5834058344
}

llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -102,9 +102,9 @@ declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
102102
define <8 x i32> @concat_vrotli_v4i32(<4 x i32> %a0, <4 x i32> %a1) {
103103
; CHECK-LABEL: concat_vrotli_v4i32:
104104
; CHECK: # %bb.0:
105-
; CHECK-NEXT: vprold $3, %xmm0, %xmm0
106-
; CHECK-NEXT: vprold $3, %xmm1, %xmm1
105+
; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
107106
; CHECK-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
107+
; CHECK-NEXT: vprold $3, %ymm0, %ymm0
108108
; CHECK-NEXT: ret{{[l|q]}}
109109
%r0 = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a0, <4 x i32> %a0, <4 x i32> splat (i32 3))
110110
%r1 = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a1, <4 x i32> %a1, <4 x i32> splat (i32 3))

0 commit comments

Comments
 (0)