Skip to content

Commit d0cd550

Browse files
committed
patch widen-cast in isSingleScalar
1 parent 4a158f6 commit d0cd550

File tree

3 files changed

+8
-3
lines changed

3 files changed

+8
-3
lines changed

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1175,7 +1175,7 @@ static void narrowToSingleScalarRecipes(VPlan &Plan) {
11751175
vp_depth_first_shallow(Plan.getVectorLoopRegion()->getEntry()))) {
11761176
for (VPRecipeBase &R : make_early_inc_range(reverse(*VPBB))) {
11771177
auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
1178-
if (!RepR && !isa<VPWidenRecipe>(&R))
1178+
if (!RepR && !isa<VPWidenRecipe, VPWidenCastRecipe>(&R))
11791179
continue;
11801180
if (RepR && (RepR->isSingleScalar() || RepR->isPredicated()))
11811181
continue;

llvm/lib/Transforms/Vectorize/VPlanUtils.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,11 @@ inline bool isSingleScalar(const VPValue *VPV) {
7575
return PreservesUniformity(WidenR->getOpcode()) &&
7676
all_of(WidenR->operands(), isSingleScalar);
7777
}
78+
if (auto *CastR = dyn_cast<VPWidenCastRecipe>(VPV)) {
79+
return PreservesUniformity(CastR->getOpcode()) &&
80+
all_of(CastR->operands(), isSingleScalar);
81+
}
82+
7883
if (auto *VPI = dyn_cast<VPInstruction>(VPV))
7984
return VPI->isSingleScalar() || VPI->isVectorToScalar() ||
8085
(PreservesUniformity(VPI->getOpcode()) &&

llvm/test/Transforms/LoopVectorize/X86/cost-model.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -378,8 +378,8 @@ define void @multi_exit(ptr %dst, ptr %src.1, ptr %src.2, i64 %A, i64 %B) #0 {
378378
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq <2 x i64> [[BROADCAST_SPLAT]], zeroinitializer
379379
; CHECK-NEXT: [[TMP16:%.*]] = icmp ne <2 x i64> [[BROADCAST_SPLAT10]], zeroinitializer
380380
; CHECK-NEXT: [[TMP17:%.*]] = and <2 x i1> [[TMP16]], [[TMP15]]
381-
; CHECK-NEXT: [[TMP18:%.*]] = zext <2 x i1> [[TMP17]] to <2 x i8>
382-
; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x i8> [[TMP18]], i32 1
381+
; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x i1> [[TMP17]], i32 0
382+
; CHECK-NEXT: [[TMP19:%.*]] = zext i1 [[TMP18]] to i8
383383
; CHECK-NEXT: store i8 [[TMP19]], ptr [[DST]], align 1, !alias.scope [[META10:![0-9]+]], !noalias [[META12:![0-9]+]]
384384
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
385385
; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]

0 commit comments

Comments
 (0)