Skip to content

Commit 593e25f

Browse files
committed
[Vectorize] Fix vectorization, scalarization and folding of llvm.is.fpclass
llvm.is.fpclass is different from other vectorizable intrinsics in that it is overloaded on an argument type, not on the return type. Differential Revision: https://reviews.llvm.org/D148905
1 parent 3237497 commit 593e25f

File tree

9 files changed

+67
-23
lines changed

9 files changed

+67
-23
lines changed

llvm/include/llvm/Analysis/VectorUtils.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -347,9 +347,9 @@ bool isTriviallyVectorizable(Intrinsic::ID ID);
347347
bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID,
348348
unsigned ScalarOpdIdx);
349349

350-
/// Identifies if the vector form of the intrinsic has a operand that has
351-
/// an overloaded type.
352-
bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, unsigned OpdIdx);
350+
/// Identifies if the vector form of the intrinsic is overloaded on the type of
351+
/// the operand at index \p OpdIdx, or on the return type if \p OpdIdx is -1.
352+
bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx);
353353

354354
/// Returns intrinsic ID for call.
355355
/// For the input call instruction it finds mapping intrinsic and returns

llvm/lib/Analysis/VectorUtils.cpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) {
8686
case Intrinsic::pow:
8787
case Intrinsic::fma:
8888
case Intrinsic::fmuladd:
89+
case Intrinsic::is_fpclass:
8990
case Intrinsic::powi:
9091
case Intrinsic::canonicalize:
9192
case Intrinsic::fptosi_sat:
@@ -103,6 +104,7 @@ bool llvm::isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID,
103104
case Intrinsic::abs:
104105
case Intrinsic::ctlz:
105106
case Intrinsic::cttz:
107+
case Intrinsic::is_fpclass:
106108
case Intrinsic::powi:
107109
return (ScalarOpdIdx == 1);
108110
case Intrinsic::smul_fix:
@@ -116,15 +118,17 @@ bool llvm::isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID,
116118
}
117119

118120
bool llvm::isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID,
119-
unsigned OpdIdx) {
121+
int OpdIdx) {
120122
switch (ID) {
121123
case Intrinsic::fptosi_sat:
122124
case Intrinsic::fptoui_sat:
125+
return OpdIdx == -1 || OpdIdx == 0;
126+
case Intrinsic::is_fpclass:
123127
return OpdIdx == 0;
124128
case Intrinsic::powi:
125-
return OpdIdx == 1;
129+
return OpdIdx == -1 || OpdIdx == 1;
126130
default:
127-
return false;
131+
return OpdIdx == -1;
128132
}
129133
}
130134

llvm/lib/Transforms/Scalar/Scalarizer.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -583,7 +583,9 @@ bool ScalarizerVisitor::splitCall(CallInst &CI) {
583583
Scattered.resize(NumArgs);
584584

585585
SmallVector<llvm::Type *, 3> Tys;
586-
Tys.push_back(VT->getScalarType());
586+
// Add return type if intrinsic is overloaded on it.
587+
if (isVectorIntrinsicWithOverloadTypeAtArg(ID, -1))
588+
Tys.push_back(VT->getScalarType());
587589

588590
// Assumes that any vector type has the same number of elements as the return
589591
// vector type, which is true for all current intrinsics.

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10335,8 +10335,11 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
1033510335

1033610336
Value *ScalarArg = nullptr;
1033710337
std::vector<Value *> OpVecs;
10338-
SmallVector<Type *, 2> TysForDecl =
10339-
{FixedVectorType::get(CI->getType(), E->Scalars.size())};
10338+
SmallVector<Type *, 2> TysForDecl;
10339+
// Add return type if intrinsic is overloaded on it.
10340+
if (isVectorIntrinsicWithOverloadTypeAtArg(IID, -1))
10341+
TysForDecl.push_back(
10342+
FixedVectorType::get(CI->getType(), E->Scalars.size()));
1034010343
for (int j = 0, e = CI->arg_size(); j < e; ++j) {
1034110344
ValueList OpVL;
1034210345
// Some intrinsics have scalar arguments. This argument should not be

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -481,7 +481,14 @@ void VPWidenCallRecipe::execute(VPTransformState &State) {
481481
State.setDebugLocFromInst(&CI);
482482

483483
for (unsigned Part = 0; Part < State.UF; ++Part) {
484-
SmallVector<Type *, 2> TysForDecl = {CI.getType()};
484+
SmallVector<Type *, 2> TysForDecl;
485+
// Add return type if intrinsic is overloaded on it.
486+
if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, -1)) {
487+
TysForDecl.push_back(
488+
State.VF.isVector()
489+
? VectorType::get(CI.getType()->getScalarType(), State.VF)
490+
: CI.getType());
491+
}
485492
SmallVector<Value *, 4> Args;
486493
for (const auto &I : enumerate(operands())) {
487494
// Some intrinsics have a scalar argument - don't replace it with a
@@ -500,9 +507,6 @@ void VPWidenCallRecipe::execute(VPTransformState &State) {
500507
Function *VectorF;
501508
if (VectorIntrinsicID != Intrinsic::not_intrinsic) {
502509
// Use vector version of the intrinsic.
503-
if (State.VF.isVector())
504-
TysForDecl[0] =
505-
VectorType::get(CI.getType()->getScalarType(), State.VF);
506510
Module *M = State.Builder.GetInsertBlock()->getModule();
507511
VectorF = Intrinsic::getDeclaration(M, VectorIntrinsicID, TysForDecl);
508512
assert(VectorF && "Can't retrieve vector intrinsic.");
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
2+
; RUN: opt < %s -S -passes=instsimplify | FileCheck %s
3+
4+
define <2 x i1> @f() {
5+
; CHECK-LABEL: define <2 x i1> @f() {
6+
; CHECK-NEXT: ret <2 x i1> zeroinitializer
7+
;
8+
%i = call <2 x i1> @llvm.is.fpclass.v2f16(<2 x half> <half 0xH7C00, half 0xH7C00>, i32 3)
9+
ret <2 x i1> %i
10+
}
11+
12+
declare <2 x i1> @llvm.is.fpclass.v2f16(<2 x half>, i32 immarg)

llvm/test/Transforms/LoopVectorize/is_fpclass.ll

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,17 +4,36 @@
44
define void @d() {
55
; CHECK-LABEL: define void @d() {
66
; CHECK-NEXT: entry:
7+
; CHECK-NEXT: br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
8+
; CHECK: vector.ph:
9+
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
10+
; CHECK: vector.body:
11+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
12+
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
13+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, ptr @d, i64 [[TMP0]]
14+
; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i1> @llvm.is.fpclass.v2f32(<2 x float> zeroinitializer, i32 0)
15+
; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x float> zeroinitializer, <2 x float> zeroinitializer
16+
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr float, ptr [[TMP1]], i32 0
17+
; CHECK-NEXT: store <2 x float> [[TMP3]], ptr [[TMP4]], align 4
18+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
19+
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0
20+
; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
21+
; CHECK: middle.block:
22+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 0, 0
23+
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
24+
; CHECK: scalar.ph:
25+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
726
; CHECK-NEXT: br label [[LOOP:%.*]]
827
; CHECK: loop:
9-
; CHECK-NEXT: [[I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[I7:%.*]], [[LOOP]] ]
28+
; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I7:%.*]], [[LOOP]] ]
1029
; CHECK-NEXT: [[I3:%.*]] = load float, ptr null, align 4
1130
; CHECK-NEXT: [[I4:%.*]] = getelementptr float, ptr @d, i64 [[I]]
1231
; CHECK-NEXT: [[I5:%.*]] = tail call i1 @llvm.is.fpclass.f32(float 0.000000e+00, i32 0)
1332
; CHECK-NEXT: [[I6:%.*]] = select i1 [[I5]], float 0.000000e+00, float 0.000000e+00
1433
; CHECK-NEXT: store float [[I6]], ptr [[I4]], align 4
1534
; CHECK-NEXT: [[I7]] = add i64 [[I]], 1
1635
; CHECK-NEXT: [[I8:%.*]] = icmp eq i64 [[I7]], 0
17-
; CHECK-NEXT: br i1 [[I8]], label [[EXIT:%.*]], label [[LOOP]]
36+
; CHECK-NEXT: br i1 [[I8]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
1837
; CHECK: exit:
1938
; CHECK-NEXT: ret void
2039
;

llvm/test/Transforms/SLPVectorizer/is_fpclass.ll

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,8 @@
44
define <2 x i1> @scalarize_is_fpclass(<2 x float> %x) {
55
; CHECK-LABEL: define <2 x i1> @scalarize_is_fpclass
66
; CHECK-SAME: (<2 x float> [[X:%.*]]) {
7-
; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X]], i32 0
8-
; CHECK-NEXT: [[ISFPCLASS_I0:%.*]] = call i1 @llvm.is.fpclass.f32(float [[X_I0]], i32 123)
9-
; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i32 1
10-
; CHECK-NEXT: [[ISFPCLASS_I1:%.*]] = call i1 @llvm.is.fpclass.f32(float [[X_I1]], i32 123)
11-
; CHECK-NEXT: [[ISFPCLASS_UPTO0:%.*]] = insertelement <2 x i1> poison, i1 [[ISFPCLASS_I0]], i32 0
12-
; CHECK-NEXT: [[ISFPCLASS:%.*]] = insertelement <2 x i1> [[ISFPCLASS_UPTO0]], i1 [[ISFPCLASS_I1]], i32 1
13-
; CHECK-NEXT: ret <2 x i1> [[ISFPCLASS]]
7+
; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i1> @llvm.is.fpclass.v2f32(<2 x float> [[X]], i32 123)
8+
; CHECK-NEXT: ret <2 x i1> [[TMP1]]
149
;
1510
%x.i0 = extractelement <2 x float> %x, i32 0
1611
%isfpclass.i0 = call i1 @llvm.is.fpclass.f32(float %x.i0, i32 123)

llvm/test/Transforms/Scalarizer/intrinsics.ll

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -212,7 +212,12 @@ define <2 x i32> @scalarize_fptoui_sat(<2 x float> %x) #0 {
212212

213213
define <2 x i1> @scalarize_is_fpclass(<2 x float> %x) #0 {
214214
; CHECK-LABEL: @scalarize_is_fpclass(
215-
; CHECK-NEXT: [[ISFPCLASS:%.*]] = call <2 x i1> @llvm.is.fpclass.v2f32(<2 x float> [[X:%.*]], i32 123)
215+
; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0
216+
; CHECK-NEXT: [[ISFPCLASS_I0:%.*]] = call i1 @llvm.is.fpclass.f32(float [[X_I0]], i32 123)
217+
; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i32 1
218+
; CHECK-NEXT: [[ISFPCLASS_I1:%.*]] = call i1 @llvm.is.fpclass.f32(float [[X_I1]], i32 123)
219+
; CHECK-NEXT: [[ISFPCLASS_UPTO0:%.*]] = insertelement <2 x i1> poison, i1 [[ISFPCLASS_I0]], i32 0
220+
; CHECK-NEXT: [[ISFPCLASS:%.*]] = insertelement <2 x i1> [[ISFPCLASS_UPTO0]], i1 [[ISFPCLASS_I1]], i32 1
216221
; CHECK-NEXT: ret <2 x i1> [[ISFPCLASS]]
217222
;
218223
%isfpclass = call <2 x i1> @llvm.is.fpclass(<2 x float> %x, i32 123)

0 commit comments

Comments
 (0)