[InstCombine][X86] Peek through bitcast+shuffle+bitcast sequence when folding BLENDV to SELECT

RKSimon · RKSimon · commit 6c1c97c5b674 · 2024-07-05T16:53:06.000+01:00
Mentioned on #96882
diff --git a/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp b/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp
@@ -2882,6 +2882,8 @@ X86TTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
       return SelectInst::Create(NewSelector, Op1, Op0, "blendv");
     }
 
+    Mask = InstCombiner::peekThroughBitcast(Mask);
+
     // Peek through a one-use shuffle - VectorCombine should have simplified
     // this for cases where we're splitting wider vectors to use blendv
     // intrinsics.
@@ -2895,13 +2897,12 @@ X86TTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
           any_of(ShuffleMask,
                  [NumElts](int M) { return M < 0 || M >= NumElts; }))
         break;
-      Mask = MaskSrc;
+      Mask = InstCombiner::peekThroughBitcast(MaskSrc);
     }
 
     // Convert to a vector select if we can bypass casts and find a boolean
     // vector condition value.
     Value *BoolVec;
-    Mask = InstCombiner::peekThroughBitcast(Mask);
     if (match(Mask, m_SExt(m_Value(BoolVec))) &&
         BoolVec->getType()->isVectorTy() &&
         BoolVec->getType()->getScalarSizeInBits() == 1) {
diff --git a/llvm/test/Transforms/InstCombine/X86/blend_x86.ll b/llvm/test/Transforms/InstCombine/X86/blend_x86.ll
@@ -285,17 +285,13 @@ define <2 x i64> @sel_v16i8_sse_reality(ptr nocapture readonly %x, <2 x i64> %y,
 define <4 x float> @sel_v16i8_bitcast_shuffle_bitcast_cmp(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x float> %d) {
 ; CHECK-LABEL: @sel_v16i8_bitcast_shuffle_bitcast_cmp(
 ; CHECK-NEXT:    [[CMP:%.*]] = fcmp olt <8 x float> [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i32>
 ; CHECK-NEXT:    [[A_BC:%.*]] = bitcast <8 x float> [[A]] to <8 x i32>
 ; CHECK-NEXT:    [[B_BC:%.*]] = bitcast <8 x float> [[B]] to <8 x i32>
-; CHECK-NEXT:    [[SEXT_LO:%.*]] = shufflevector <8 x i32> [[SEXT]], <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; CHECK-NEXT:    [[A_LO:%.*]] = shufflevector <8 x i32> [[A_BC]], <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; CHECK-NEXT:    [[B_LO:%.*]] = shufflevector <8 x i32> [[B_BC]], <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    [[A_LO_BC:%.*]] = bitcast <4 x i32> [[A_LO]] to <16 x i8>
-; CHECK-NEXT:    [[B_LO_BC:%.*]] = bitcast <4 x i32> [[B_LO]] to <16 x i8>
-; CHECK-NEXT:    [[SEXT_LO_BC:%.*]] = bitcast <4 x i32> [[SEXT_LO]] to <16 x i8>
-; CHECK-NEXT:    [[BLENDV:%.*]] = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> [[A_LO_BC]], <16 x i8> [[B_LO_BC]], <16 x i8> [[SEXT_LO_BC]])
-; CHECK-NEXT:    [[RES:%.*]] = bitcast <16 x i8> [[BLENDV]] to <4 x float>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i1> [[CMP]], <8 x i1> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[B_LO]], <4 x i32> [[A_LO]]
+; CHECK-NEXT:    [[RES:%.*]] = bitcast <4 x i32> [[TMP2]] to <4 x float>
 ; CHECK-NEXT:    ret <4 x float> [[RES]]
 ;
   %cmp = fcmp olt <8 x float> %a, %b