Skip to content

Commit f32401e

Browse files
committed
IsSingleScalar
1 parent 218fe3d commit f32401e

File tree

4 files changed

+17
-5
lines changed

4 files changed

+17
-5
lines changed

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1180,6 +1180,7 @@ static void narrowToSingleScalarRecipes(VPlan &Plan) {
11801180
if (Plan.hasScalarVFOnly())
11811181
return;
11821182

1183+
VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType());
11831184
// Try to narrow wide and replicating recipes to single scalar recipes,
11841185
// based on VPlan analysis. Only process blocks in the loop region for now,
11851186
// without traversing into nested regions, as recipes in replicate regions
@@ -1194,13 +1195,19 @@ static void narrowToSingleScalarRecipes(VPlan &Plan) {
11941195
continue;
11951196

11961197
auto *RepOrWidenR = cast<VPSingleDefRecipe>(&R);
1198+
Instruction *UI = RepOrWidenR->getUnderlyingInstr();
1199+
if (!UI)
1200+
continue;
1201+
11971202
// Skip recipes that aren't single scalars or don't have only their
11981203
// scalar results used. In the latter case, we would introduce extra
11991204
// broadcasts.
12001205
if (!vputils::isSingleScalar(RepOrWidenR) ||
1201-
any_of(RepOrWidenR->users(), [RepOrWidenR](VPUser *U) {
1202-
return !U->usesScalars(RepOrWidenR);
1203-
}))
1206+
any_of(RepOrWidenR->users(),
1207+
[RepOrWidenR](VPUser *U) {
1208+
return !U->usesScalars(RepOrWidenR);
1209+
}) ||
1210+
UI->getType() != TypeInfo.inferScalarType(RepOrWidenR))
12041211
continue;
12051212

12061213
auto *Clone = new VPReplicateRecipe(RepOrWidenR->getUnderlyingInstr(),

llvm/lib/Transforms/Vectorize/VPlanUtils.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,11 @@ inline bool isSingleScalar(const VPValue *VPV) {
7575
return PreservesUniformity(WidenR->getOpcode()) &&
7676
all_of(WidenR->operands(), isSingleScalar);
7777
}
78+
if (auto *CastR = dyn_cast<VPWidenCastRecipe>(VPV)) {
79+
return PreservesUniformity(CastR->getOpcode()) &&
80+
all_of(CastR->operands(), isSingleScalar);
81+
}
82+
7883
if (auto *VPI = dyn_cast<VPInstruction>(VPV))
7984
return VPI->isSingleScalar() || VPI->isVectorToScalar() ||
8085
(PreservesUniformity(VPI->getOpcode()) &&

llvm/test/Transforms/LoopVectorize/X86/cost-model.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -379,7 +379,7 @@ define void @multi_exit(ptr %dst, ptr %src.1, ptr %src.2, i64 %A, i64 %B) #0 {
379379
; CHECK-NEXT: [[TMP16:%.*]] = icmp ne <2 x i64> [[BROADCAST_SPLAT10]], zeroinitializer
380380
; CHECK-NEXT: [[TMP17:%.*]] = and <2 x i1> [[TMP16]], [[TMP15]]
381381
; CHECK-NEXT: [[TMP18:%.*]] = zext <2 x i1> [[TMP17]] to <2 x i8>
382-
; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x i8> [[TMP18]], i32 1
382+
; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x i8> [[TMP18]], i32 0
383383
; CHECK-NEXT: store i8 [[TMP19]], ptr [[DST]], align 1, !alias.scope [[META10:![0-9]+]], !noalias [[META12:![0-9]+]]
384384
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
385385
; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]

llvm/test/Transforms/LoopVectorize/single-scalar-cast-minbw.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ define void @minbw_cast(ptr %dst, i64 %n, i1 %bool1, i1 %bool2) {
2222
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
2323
; CHECK: [[VECTOR_BODY]]:
2424
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
25-
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i8> [[TMP2]], i32 3
25+
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i8> [[TMP2]], i32 0
2626
; CHECK-NEXT: store i8 [[TMP3]], ptr [[DST]], align 1
2727
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
2828
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]

0 commit comments

Comments
 (0)