Skip to content

Commit 6f79792

Browse files
[TLI] Pass replace-with-veclib works with Scalable Vectors.
The pass uses the Masked variant of TLI method when the Intrinsic operates on Scalable Vectors and it fails to find a non-Masked variant.
1 parent cace1ec commit 6f79792

File tree

4 files changed

+50
-47
lines changed

4 files changed

+50
-47
lines changed

llvm/lib/Analysis/VFABIDemangling.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ static ParseRet tryParseLinearTokenWithRuntimeStep(StringRef &ParseString,
126126
return ParseRet::None;
127127
}
128128

129-
/// The function looks for the following stringt at the beginning of
129+
/// The function looks for the following string at the beginning of
130130
/// the input string `ParseString`:
131131
///
132132
/// <token> <number>

llvm/lib/CodeGen/ReplaceWithVeclib.cpp

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@ static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI,
105105
// all vector operands have identical vector width.
106106
ElementCount VF = ElementCount::getFixed(0);
107107
SmallVector<Type *> ScalarTypes;
108+
bool MayBeMasked = false;
108109
for (auto Arg : enumerate(CI.args())) {
109110
auto *ArgType = Arg.value()->getType();
110111
// Vector calls to intrinsics can still have
@@ -121,17 +122,13 @@ static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI,
121122
return false;
122123
}
123124
ElementCount NumElements = VectorArgTy->getElementCount();
124-
if (NumElements.isScalable()) {
125-
// The current implementation does not support
126-
// scalable vectors.
127-
return false;
128-
}
129-
if (VF.isNonZero() && VF != NumElements) {
130-
// The different arguments differ in vector size.
125+
if (NumElements.isScalable())
126+
MayBeMasked = true;
127+
128+
// The different arguments differ in vector size.
129+
if (VF.isNonZero() && VF != NumElements)
131130
return false;
132-
} else {
133-
VF = NumElements;
134-
}
131+
VF = NumElements;
135132
ScalarTypes.push_back(VectorArgTy->getElementType());
136133
}
137134
}
@@ -152,11 +149,14 @@ static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI,
152149
return false;
153150
}
154151

152+
// Assume it has a mask when that is a possibility and has no mapping for
153+
// a Non-Masked variant.
154+
const bool IsMasked =
155+
MayBeMasked && !TLI.getVectorMappingInfo(ScalarName, VF, false);
155156
// Try to find the mapping for the scalar version of this intrinsic
156157
// and the exact vector width of the call operands in the
157158
// TargetLibraryInfo.
158-
StringRef TLIName = TLI.getVectorizedFunction(ScalarName, VF);
159-
159+
StringRef TLIName = TLI.getVectorizedFunction(ScalarName, VF, IsMasked);
160160
LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Looking up TLI mapping for `"
161161
<< ScalarName << "` and vector width " << VF << ".\n");
162162

llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-armpl.ll

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ declare <vscale x 2 x double> @llvm.cos.nxv2f64(<vscale x 2 x double>)
1515
declare <vscale x 4 x float> @llvm.cos.nxv4f32(<vscale x 4 x float>)
1616

1717
;.
18-
; CHECK: @llvm.compiler.used = appending global [16 x ptr] [ptr @armpl_vcosq_f64, ptr @armpl_vcosq_f32, ptr @armpl_vsinq_f64, ptr @armpl_vsinq_f32, ptr @armpl_vexpq_f64, ptr @armpl_vexpq_f32, ptr @armpl_vexp2q_f64, ptr @armpl_vexp2q_f32, ptr @armpl_vexp10q_f64, ptr @armpl_vexp10q_f32, ptr @armpl_vlogq_f64, ptr @armpl_vlogq_f32, ptr @armpl_vlog2q_f64, ptr @armpl_vlog2q_f32, ptr @armpl_vlog10q_f64, ptr @armpl_vlog10q_f32], section "llvm.metadata"
18+
; CHECK: @llvm.compiler.used = appending global [32 x ptr] [ptr @armpl_vcosq_f64, ptr @armpl_vcosq_f32, ptr @armpl_svcos_f64_x, ptr @armpl_svcos_f32_x, ptr @armpl_vsinq_f64, ptr @armpl_vsinq_f32, ptr @armpl_svsin_f64_x, ptr @armpl_svsin_f32_x, ptr @armpl_vexpq_f64, ptr @armpl_vexpq_f32, ptr @armpl_svexp_f64_x, ptr @armpl_svexp_f32_x, ptr @armpl_vexp2q_f64, ptr @armpl_vexp2q_f32, ptr @armpl_svexp2_f64_x, ptr @armpl_svexp2_f32_x, ptr @armpl_vexp10q_f64, ptr @armpl_vexp10q_f32, ptr @armpl_svexp10_f64_x, ptr @armpl_svexp10_f32_x, ptr @armpl_vlogq_f64, ptr @armpl_vlogq_f32, ptr @armpl_svlog_f64_x, ptr @armpl_svlog_f32_x, ptr @armpl_vlog2q_f64, ptr @armpl_vlog2q_f32, ptr @armpl_svlog2_f64_x, ptr @armpl_svlog2_f32_x, ptr @armpl_vlog10q_f64, ptr @armpl_vlog10q_f32, ptr @armpl_svlog10_f64_x, ptr @armpl_svlog10_f32_x], section "llvm.metadata"
1919
;.
2020
define <2 x double> @llvm_cos_f64(<2 x double> %in) {
2121
; CHECK-LABEL: define <2 x double> @llvm_cos_f64
@@ -40,7 +40,7 @@ define <4 x float> @llvm_cos_f32(<4 x float> %in) {
4040
define <vscale x 2 x double> @llvm_cos_vscale_f64(<vscale x 2 x double> %in) #0 {
4141
; CHECK-LABEL: define <vscale x 2 x double> @llvm_cos_vscale_f64
4242
; CHECK-SAME: (<vscale x 2 x double> [[IN:%.*]]) #[[ATTR1:[0-9]+]] {
43-
; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @llvm.cos.nxv2f64(<vscale x 2 x double> [[IN]])
43+
; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @armpl_svcos_f64_x(<vscale x 2 x double> [[IN]])
4444
; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
4545
;
4646
%1 = call fast <vscale x 2 x double> @llvm.cos.nxv2f64(<vscale x 2 x double> %in)
@@ -50,7 +50,7 @@ define <vscale x 2 x double> @llvm_cos_vscale_f64(<vscale x 2 x double> %in) #0
5050
define <vscale x 4 x float> @llvm_cos_vscale_f32(<vscale x 4 x float> %in) #0 {
5151
; CHECK-LABEL: define <vscale x 4 x float> @llvm_cos_vscale_f32
5252
; CHECK-SAME: (<vscale x 4 x float> [[IN:%.*]]) #[[ATTR1]] {
53-
; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @llvm.cos.nxv4f32(<vscale x 4 x float> [[IN]])
53+
; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @armpl_svcos_f32_x(<vscale x 4 x float> [[IN]])
5454
; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
5555
;
5656
%1 = call fast <vscale x 4 x float> @llvm.cos.nxv4f32(<vscale x 4 x float> %in)
@@ -85,7 +85,7 @@ define <4 x float> @llvm_sin_f32(<4 x float> %in) {
8585
define <vscale x 2 x double> @llvm_sin_vscale_f64(<vscale x 2 x double> %in) #0 {
8686
; CHECK-LABEL: define <vscale x 2 x double> @llvm_sin_vscale_f64
8787
; CHECK-SAME: (<vscale x 2 x double> [[IN:%.*]]) #[[ATTR1]] {
88-
; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @llvm.sin.nxv2f64(<vscale x 2 x double> [[IN]])
88+
; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @armpl_svsin_f64_x(<vscale x 2 x double> [[IN]])
8989
; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
9090
;
9191
%1 = call fast <vscale x 2 x double> @llvm.sin.nxv2f64(<vscale x 2 x double> %in)
@@ -95,7 +95,7 @@ define <vscale x 2 x double> @llvm_sin_vscale_f64(<vscale x 2 x double> %in) #0
9595
define <vscale x 4 x float> @llvm_sin_vscale_f32(<vscale x 4 x float> %in) #0 {
9696
; CHECK-LABEL: define <vscale x 4 x float> @llvm_sin_vscale_f32
9797
; CHECK-SAME: (<vscale x 4 x float> [[IN:%.*]]) #[[ATTR1]] {
98-
; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @llvm.sin.nxv4f32(<vscale x 4 x float> [[IN]])
98+
; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @armpl_svsin_f32_x(<vscale x 4 x float> [[IN]])
9999
; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
100100
;
101101
%1 = call fast <vscale x 4 x float> @llvm.sin.nxv4f32(<vscale x 4 x float> %in)
@@ -130,7 +130,7 @@ define <4 x float> @llvm_exp_f32(<4 x float> %in) {
130130
define <vscale x 2 x double> @llvm_exp_vscale_f64(<vscale x 2 x double> %in) #0 {
131131
; CHECK-LABEL: define <vscale x 2 x double> @llvm_exp_vscale_f64
132132
; CHECK-SAME: (<vscale x 2 x double> [[IN:%.*]]) #[[ATTR1]] {
133-
; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @llvm.exp.nxv2f64(<vscale x 2 x double> [[IN]])
133+
; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @armpl_svexp_f64_x(<vscale x 2 x double> [[IN]])
134134
; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
135135
;
136136
%1 = call fast <vscale x 2 x double> @llvm.exp.nxv2f64(<vscale x 2 x double> %in)
@@ -140,7 +140,7 @@ define <vscale x 2 x double> @llvm_exp_vscale_f64(<vscale x 2 x double> %in) #0
140140
define <vscale x 4 x float> @llvm_exp_vscale_f32(<vscale x 4 x float> %in) #0 {
141141
; CHECK-LABEL: define <vscale x 4 x float> @llvm_exp_vscale_f32
142142
; CHECK-SAME: (<vscale x 4 x float> [[IN:%.*]]) #[[ATTR1]] {
143-
; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @llvm.exp.nxv4f32(<vscale x 4 x float> [[IN]])
143+
; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @armpl_svexp_f32_x(<vscale x 4 x float> [[IN]])
144144
; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
145145
;
146146
%1 = call fast <vscale x 4 x float> @llvm.exp.nxv4f32(<vscale x 4 x float> %in)
@@ -175,7 +175,7 @@ define <4 x float> @llvm_exp2_f32(<4 x float> %in) {
175175
define <vscale x 2 x double> @llvm_exp2_vscale_f64(<vscale x 2 x double> %in) #0 {
176176
; CHECK-LABEL: define <vscale x 2 x double> @llvm_exp2_vscale_f64
177177
; CHECK-SAME: (<vscale x 2 x double> [[IN:%.*]]) #[[ATTR1]] {
178-
; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @llvm.exp2.nxv2f64(<vscale x 2 x double> [[IN]])
178+
; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @armpl_svexp2_f64_x(<vscale x 2 x double> [[IN]])
179179
; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
180180
;
181181
%1 = call fast <vscale x 2 x double> @llvm.exp2.nxv2f64(<vscale x 2 x double> %in)
@@ -185,7 +185,7 @@ define <vscale x 2 x double> @llvm_exp2_vscale_f64(<vscale x 2 x double> %in) #0
185185
define <vscale x 4 x float> @llvm_exp2_vscale_f32(<vscale x 4 x float> %in) #0 {
186186
; CHECK-LABEL: define <vscale x 4 x float> @llvm_exp2_vscale_f32
187187
; CHECK-SAME: (<vscale x 4 x float> [[IN:%.*]]) #[[ATTR1]] {
188-
; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @llvm.exp2.nxv4f32(<vscale x 4 x float> [[IN]])
188+
; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @armpl_svexp2_f32_x(<vscale x 4 x float> [[IN]])
189189
; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
190190
;
191191
%1 = call fast <vscale x 4 x float> @llvm.exp2.nxv4f32(<vscale x 4 x float> %in)
@@ -220,7 +220,7 @@ define <4 x float> @llvm_exp10_f32(<4 x float> %in) {
220220
define <vscale x 2 x double> @llvm_exp10_vscale_f64(<vscale x 2 x double> %in) #0 {
221221
; CHECK-LABEL: define <vscale x 2 x double> @llvm_exp10_vscale_f64
222222
; CHECK-SAME: (<vscale x 2 x double> [[IN:%.*]]) #[[ATTR1]] {
223-
; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @llvm.exp10.nxv2f64(<vscale x 2 x double> [[IN]])
223+
; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @armpl_svexp10_f64_x(<vscale x 2 x double> [[IN]])
224224
; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
225225
;
226226
%1 = call fast <vscale x 2 x double> @llvm.exp10.nxv2f64(<vscale x 2 x double> %in)
@@ -230,7 +230,7 @@ define <vscale x 2 x double> @llvm_exp10_vscale_f64(<vscale x 2 x double> %in) #
230230
define <vscale x 4 x float> @llvm_exp10_vscale_f32(<vscale x 4 x float> %in) #0 {
231231
; CHECK-LABEL: define <vscale x 4 x float> @llvm_exp10_vscale_f32
232232
; CHECK-SAME: (<vscale x 4 x float> [[IN:%.*]]) #[[ATTR1]] {
233-
; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @llvm.exp10.nxv4f32(<vscale x 4 x float> [[IN]])
233+
; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @armpl_svexp10_f32_x(<vscale x 4 x float> [[IN]])
234234
; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
235235
;
236236
%1 = call fast <vscale x 4 x float> @llvm.exp10.nxv4f32(<vscale x 4 x float> %in)
@@ -265,7 +265,7 @@ define <4 x float> @llvm_log_f32(<4 x float> %in) {
265265
define <vscale x 2 x double> @llvm_log_vscale_f64(<vscale x 2 x double> %in) #0 {
266266
; CHECK-LABEL: define <vscale x 2 x double> @llvm_log_vscale_f64
267267
; CHECK-SAME: (<vscale x 2 x double> [[IN:%.*]]) #[[ATTR1]] {
268-
; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @llvm.log.nxv2f64(<vscale x 2 x double> [[IN]])
268+
; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @armpl_svlog_f64_x(<vscale x 2 x double> [[IN]])
269269
; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
270270
;
271271
%1 = call fast <vscale x 2 x double> @llvm.log.nxv2f64(<vscale x 2 x double> %in)
@@ -275,7 +275,7 @@ define <vscale x 2 x double> @llvm_log_vscale_f64(<vscale x 2 x double> %in) #0
275275
define <vscale x 4 x float> @llvm_log_vscale_f32(<vscale x 4 x float> %in) #0 {
276276
; CHECK-LABEL: define <vscale x 4 x float> @llvm_log_vscale_f32
277277
; CHECK-SAME: (<vscale x 4 x float> [[IN:%.*]]) #[[ATTR1]] {
278-
; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @llvm.log.nxv4f32(<vscale x 4 x float> [[IN]])
278+
; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @armpl_svlog_f32_x(<vscale x 4 x float> [[IN]])
279279
; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
280280
;
281281
%1 = call fast <vscale x 4 x float> @llvm.log.nxv4f32(<vscale x 4 x float> %in)
@@ -310,7 +310,7 @@ define <4 x float> @llvm_log2_f32(<4 x float> %in) {
310310
define <vscale x 2 x double> @llvm_log2_vscale_f64(<vscale x 2 x double> %in) #0 {
311311
; CHECK-LABEL: define <vscale x 2 x double> @llvm_log2_vscale_f64
312312
; CHECK-SAME: (<vscale x 2 x double> [[IN:%.*]]) #[[ATTR1]] {
313-
; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @llvm.log2.nxv2f64(<vscale x 2 x double> [[IN]])
313+
; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @armpl_svlog2_f64_x(<vscale x 2 x double> [[IN]])
314314
; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
315315
;
316316
%1 = call fast <vscale x 2 x double> @llvm.log2.nxv2f64(<vscale x 2 x double> %in)
@@ -320,7 +320,7 @@ define <vscale x 2 x double> @llvm_log2_vscale_f64(<vscale x 2 x double> %in) #0
320320
define <vscale x 4 x float> @llvm_log2_vscale_f32(<vscale x 4 x float> %in) #0 {
321321
; CHECK-LABEL: define <vscale x 4 x float> @llvm_log2_vscale_f32
322322
; CHECK-SAME: (<vscale x 4 x float> [[IN:%.*]]) #[[ATTR1]] {
323-
; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @llvm.log2.nxv4f32(<vscale x 4 x float> [[IN]])
323+
; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @armpl_svlog2_f32_x(<vscale x 4 x float> [[IN]])
324324
; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
325325
;
326326
%1 = call fast <vscale x 4 x float> @llvm.log2.nxv4f32(<vscale x 4 x float> %in)
@@ -355,7 +355,7 @@ define <4 x float> @llvm_log10_f32(<4 x float> %in) {
355355
define <vscale x 2 x double> @llvm_log10_vscale_f64(<vscale x 2 x double> %in) #0 {
356356
; CHECK-LABEL: define <vscale x 2 x double> @llvm_log10_vscale_f64
357357
; CHECK-SAME: (<vscale x 2 x double> [[IN:%.*]]) #[[ATTR1]] {
358-
; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @llvm.log10.nxv2f64(<vscale x 2 x double> [[IN]])
358+
; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @armpl_svlog10_f64_x(<vscale x 2 x double> [[IN]])
359359
; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
360360
;
361361
%1 = call fast <vscale x 2 x double> @llvm.log10.nxv2f64(<vscale x 2 x double> %in)
@@ -365,7 +365,7 @@ define <vscale x 2 x double> @llvm_log10_vscale_f64(<vscale x 2 x double> %in) #
365365
define <vscale x 4 x float> @llvm_log10_vscale_f32(<vscale x 4 x float> %in) #0 {
366366
; CHECK-LABEL: define <vscale x 4 x float> @llvm_log10_vscale_f32
367367
; CHECK-SAME: (<vscale x 4 x float> [[IN:%.*]]) #[[ATTR1]] {
368-
; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @llvm.log10.nxv4f32(<vscale x 4 x float> [[IN]])
368+
; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @armpl_svlog10_f32_x(<vscale x 4 x float> [[IN]])
369369
; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
370370
;
371371
%1 = call fast <vscale x 4 x float> @llvm.log10.nxv4f32(<vscale x 4 x float> %in)
@@ -380,7 +380,7 @@ declare <vscale x 4 x float> @llvm.pow.nxv4f32(<vscale x 4 x float>, <vscale x 4
380380
;
381381
; There is a bug in the replace-with-veclib pass, and for intrinsics which take
382382
; more than one arguments, but has just one overloaded type, it incorrectly
383-
; reconstructs the scalar name, for pow specificlly it is searching for:
383+
; reconstructs the scalar name, for pow specifically it is searching for:
384384
; llvm.pow.f64.f64 and llvm.pow.f32.f32
385385
;
386386

0 commit comments

Comments
 (0)