Skip to content

Commit b6a1865

Browse files
committed
patch widen-cast in isSingleScalar
1 parent 9468b17 commit b6a1865

File tree

2 files changed

+7
-7
lines changed

2 files changed

+7
-7
lines changed

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1189,15 +1189,15 @@ static void narrowToSingleScalarRecipes(VPlan &Plan) {
11891189
vp_depth_first_shallow(Plan.getVectorLoopRegion()->getEntry()))) {
11901190
for (VPRecipeBase &R : make_early_inc_range(reverse(*VPBB))) {
11911191
auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
1192-
if (!RepR && !isa<VPWidenRecipe>(&R))
1192+
if (!RepR && !isa<VPWidenRecipe, VPWidenCastRecipe>(&R))
11931193
continue;
11941194
if (RepR && (RepR->isSingleScalar() || RepR->isPredicated()))
11951195
continue;
11961196

11971197
auto *RepOrWidenR = cast<VPSingleDefRecipe>(&R);
1198-
Instruction *UI = RepOrWidenR->getUnderlyingInstr();
1199-
if (!UI)
1200-
continue;
1198+
Value *UV = RepOrWidenR->getUnderlyingValue();
1199+
if (!UV)
1200+
continue;
12011201

12021202
// Skip recipes that aren't single scalars or don't have only their
12031203
// scalar results used. In the latter case, we would introduce extra
@@ -1207,7 +1207,7 @@ static void narrowToSingleScalarRecipes(VPlan &Plan) {
12071207
[RepOrWidenR](VPUser *U) {
12081208
return !U->usesScalars(RepOrWidenR);
12091209
}) ||
1210-
UI->getType() != TypeInfo.inferScalarType(RepOrWidenR))
1210+
UV->getType() != TypeInfo.inferScalarType(RepOrWidenR))
12111211
continue;
12121212

12131213
auto *Clone = new VPReplicateRecipe(RepOrWidenR->getUnderlyingInstr(),

llvm/test/Transforms/LoopVectorize/X86/cost-model.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -378,8 +378,8 @@ define void @multi_exit(ptr %dst, ptr %src.1, ptr %src.2, i64 %A, i64 %B) #0 {
378378
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq <2 x i64> [[BROADCAST_SPLAT]], zeroinitializer
379379
; CHECK-NEXT: [[TMP16:%.*]] = icmp ne <2 x i64> [[BROADCAST_SPLAT10]], zeroinitializer
380380
; CHECK-NEXT: [[TMP17:%.*]] = and <2 x i1> [[TMP16]], [[TMP15]]
381-
; CHECK-NEXT: [[TMP18:%.*]] = zext <2 x i1> [[TMP17]] to <2 x i8>
382-
; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x i8> [[TMP18]], i32 0
381+
; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x i1> [[TMP17]], i32 0
382+
; CHECK-NEXT: [[TMP19:%.*]] = zext i1 [[TMP18]] to i8
383383
; CHECK-NEXT: store i8 [[TMP19]], ptr [[DST]], align 1, !alias.scope [[META10:![0-9]+]], !noalias [[META12:![0-9]+]]
384384
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
385385
; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]

0 commit comments

Comments
 (0)