Skip to content

Commit d81c2f1

Browse files
committed
[X86] canCreateUndefOrPoisonForTargetNode - X86ISD::VPERMV3 shuffles don't create undef/poison
The operands might contain an undef/poison element, but the shuffle node itself will not create one by itself. Improves test case from #109272
1 parent fd8a4b0 commit d81c2f1

File tree

2 files changed

+4
-3
lines changed

2 files changed

+4
-3
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43839,7 +43839,8 @@ bool X86TargetLowering::isGuaranteedNotToBeUndefOrPoisonForTargetNode(
4383943839

4384043840
switch (Op.getOpcode()) {
4384143841
case X86ISD::PSHUFD:
43842-
case X86ISD::VPERMILPI: {
43842+
case X86ISD::VPERMILPI:
43843+
case X86ISD::VPERMV3: {
4384343844
SmallVector<int, 8> Mask;
4384443845
SmallVector<SDValue, 2> Ops;
4384543846
if (getTargetShuffleMask(Op, true, Ops, Mask)) {
@@ -43883,6 +43884,7 @@ bool X86TargetLowering::canCreateUndefOrPoisonForTargetNode(
4388343884
return false;
4388443885
case X86ISD::PSHUFD:
4388543886
case X86ISD::VPERMILPI:
43887+
case X86ISD::VPERMV3:
4388643888
case X86ISD::UNPCKH:
4388743889
case X86ISD::UNPCKL:
4388843890
return false;

llvm/test/CodeGen/X86/vector-shuffle-combining-avx512vbmi.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -151,10 +151,9 @@ define <64 x i8> @combine_permi2q_pshufb_as_permi2d_mask(<8 x i64> %a0, <8 x i64
151151
define <64 x i8> @combine_vpermi2var_v64i8_with_mask(<64 x i8> %a0, <64 x i8> %a1, <64 x i8> %a2) {
152152
; CHECK-LABEL: combine_vpermi2var_v64i8_with_mask:
153153
; CHECK: # %bb.0:
154-
; CHECK-NEXT: vpermt2b %zmm2, %zmm1, %zmm0
155154
; CHECK-NEXT: vpmovb2m %zmm1, %k0
156155
; CHECK-NEXT: knotq %k0, %k1
157-
; CHECK-NEXT: vmovdqu8 %zmm0, %zmm0 {%k1} {z}
156+
; CHECK-NEXT: vpermt2b %zmm2, %zmm1, %zmm0 {%k1} {z}
158157
; CHECK-NEXT: ret{{[l|q]}}
159158
%perm = tail call <64 x i8> @llvm.x86.avx512.vpermi2var.qi.512(<64 x i8> %a0, <64 x i8> %a1, <64 x i8> %a2)
160159
%cmp = icmp slt <64 x i8> %a1, zeroinitializer

0 commit comments

Comments
 (0)