Skip to content

Commit 88d325c

Browse files
[LV][LAA] Vectorize math lib calls with mem write-only attribute
Teach LAA to consider safe specific math lib calls which are known to have set the memory write-only attribute. Those attributes are set to calls by inferNonMandatoryLibFuncAttrs, in BuildLibCalls.cpp, and the current ones are modf/modff and frexp/frexpf. This happens only when the calls are found through TLI to have vectorized counterparts.
1 parent aa983fd commit 88d325c

File tree

3 files changed

+35
-15
lines changed

3 files changed

+35
-15
lines changed

clang/test/CodeGen/aarch64-veclib-function-calls-linear-ptrs.c

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ vectorize.
1717

1818
// CHECK-LABEL: define dso_local void @frexp_f64(
1919
// CHECK-SAME: ptr nocapture noundef readonly [[IN:%.*]], ptr nocapture noundef writeonly [[OUT1:%.*]], ptr nocapture noundef writeonly [[OUT2:%.*]], i32 noundef [[N:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
20-
// CHECK: [[CALL:%.*]] = tail call double @frexp(double noundef [[TMP0:%.*]], ptr noundef [[ADD_PTR:%.*]]) #[[ATTR2:[0-9]+]]
20+
// CHECK: [[CALL:%.*]] = tail call double @frexp(double noundef [[TMP0:%.*]], ptr noundef [[ADD_PTR:%.*]]) #[[ATTR5:[0-9]+]]
2121
//
2222
void frexp_f64(double *in, double *out1, int *out2, int N) {
2323
for (int i = 0; i < N; ++i)
@@ -26,30 +26,27 @@ void frexp_f64(double *in, double *out1, int *out2, int N) {
2626

2727
// CHECK-LABEL: define dso_local void @frexp_f32(
2828
// CHECK-SAME: ptr nocapture noundef readonly [[IN:%.*]], ptr nocapture noundef writeonly [[OUT1:%.*]], ptr nocapture noundef writeonly [[OUT2:%.*]], i32 noundef [[N:%.*]]) local_unnamed_addr #[[ATTR0]] {
29-
// CHECK: [[CALL:%.*]] = tail call float @frexpf(float noundef [[TMP0:%.*]], ptr noundef [[ADD_PTR:%.*]]) #[[ATTR2]]
29+
// CHECK: [[CALL:%.*]] = tail call float @frexpf(float noundef [[TMP0:%.*]], ptr noundef [[ADD_PTR:%.*]]) #[[ATTR5]]
3030
//
3131
void frexp_f32(float *in, float *out1, int *out2, int N) {
3232
for (int i = 0; i < N; ++i)
3333
*out1 = frexpf(in[i], out2+i);
3434
}
3535

36-
37-
// TODO: LAA must allow vectorization.
38-
3936
// CHECK-LABEL: define dso_local void @modf_f64(
4037
// CHECK-SAME: ptr nocapture noundef readonly [[IN:%.*]], ptr nocapture noundef writeonly [[OUT1:%.*]], ptr nocapture noundef writeonly [[OUT2:%.*]], i32 noundef [[N:%.*]]) local_unnamed_addr #[[ATTR0]] {
41-
// CHECK: [[CALL:%.*]] = tail call double @modf(double noundef [[TMP0:%.*]], ptr noundef [[ADD_PTR:%.*]]) #[[ATTR3:[0-9]+]]
38+
// CHECK: [[TMP11:%.*]] = tail call <vscale x 2 x double> @armpl_svmodf_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP10:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
39+
// CHECK: [[CALL:%.*]] = tail call double @modf(double noundef [[TMP14:%.*]], ptr noundef [[ADD_PTR:%.*]]) #[[ATTR6:[0-9]+]]
4240
//
4341
void modf_f64(double *in, double *out1, double *out2, int N) {
4442
for (int i = 0; i < N; ++i)
4543
out1[i] = modf(in[i], out2+i);
4644
}
4745

48-
// TODO: LAA must allow vectorization.
49-
5046
// CHECK-LABEL: define dso_local void @modf_f32(
5147
// CHECK-SAME: ptr nocapture noundef readonly [[IN:%.*]], ptr nocapture noundef writeonly [[OUT1:%.*]], ptr nocapture noundef writeonly [[OUT2:%.*]], i32 noundef [[N:%.*]]) local_unnamed_addr #[[ATTR0]] {
52-
// CHECK: [[CALL:%.*]] = tail call float @modff(float noundef [[TMP0:%.*]], ptr noundef [[ADD_PTR:%.*]]) #[[ATTR4:[0-9]+]]
48+
// CHECK: [[TMP11:%.*]] = tail call <vscale x 4 x float> @armpl_svmodf_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP10:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
49+
// CHECK: [[CALL:%.*]] = tail call float @modff(float noundef [[TMP14:%.*]], ptr noundef [[ADD_PTR:%.*]]) #[[ATTR7:[0-9]+]]
5350
//
5451
void modf_f32(float *in, float *out1, float *out2, int N) {
5552
for (int i = 0; i < N; ++i)

llvm/lib/Analysis/LoopAccessAnalysis.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2381,6 +2381,20 @@ bool LoopAccessInfo::canAnalyzeLoop() {
23812381
return true;
23822382
}
23832383

2384+
/// Returns whether \p I is a known math library call that has memory write-only
2385+
/// attribute set.
2386+
static bool isMathLibCallMemWriteOnly(const TargetLibraryInfo *TLI,
2387+
const Instruction &I) {
2388+
auto *Call = dyn_cast<CallInst>(&I);
2389+
if (!Call)
2390+
return false;
2391+
2392+
LibFunc Func;
2393+
TLI->getLibFunc(*Call, Func);
2394+
return Func == LibFunc::LibFunc_modf || Func == LibFunc::LibFunc_modff ||
2395+
Func == LibFunc::LibFunc_frexp || Func == LibFunc::LibFunc_frexpf;
2396+
}
2397+
23842398
void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
23852399
const TargetLibraryInfo *TLI,
23862400
DominatorTree *DT) {
@@ -2477,6 +2491,11 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
24772491

24782492
// Save 'store' instructions. Abort if other instructions write to memory.
24792493
if (I.mayWriteToMemory()) {
2494+
// We can safety handle math functions that have vectorized
2495+
// counterparts and have the memory write-only attribute set.
2496+
if (isMathLibCallMemWriteOnly(TLI, I))
2497+
continue;
2498+
24802499
auto *St = dyn_cast<StoreInst>(&I);
24812500
if (!St) {
24822501
recordAnalysis("CantVectorizeInstruction", St)

llvm/test/Transforms/LoopVectorize/AArch64/veclib-function-calls.ll

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3011,26 +3011,28 @@ declare float @modff(float, ptr) #1
30113011
define void @modf_f64(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
30123012
; SLEEF-NEON-LABEL: define void @modf_f64
30133013
; SLEEF-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
3014-
; SLEEF-NEON: [[DATA:%.*]] = call double @modf(double [[NUM:%.*]], ptr [[GEPB:%.*]]) #[[ATTR2:[0-9]+]]
3014+
; SLEEF-NEON: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2vl8_modf(<2 x double> [[WIDE_LOAD:%.*]], ptr [[TMP4:%.*]])
30153015
;
30163016
; SLEEF-SVE-LABEL: define void @modf_f64
30173017
; SLEEF-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
30183018
; SLEEF-SVE: [[DATA:%.*]] = call double @modf(double [[NUM:%.*]], ptr [[GEPB:%.*]]) #[[ATTR5:[0-9]+]]
30193019
;
30203020
; SLEEF-SVE-NOPRED-LABEL: define void @modf_f64
30213021
; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
3022+
; SLEEF-SVE-NOPRED: [[TMP17:%.*]] = call <vscale x 2 x double> @_ZGVsNxvl8_modf(<vscale x 2 x double> [[WIDE_LOAD:%.*]], ptr [[TMP16:%.*]])
30223023
; SLEEF-SVE-NOPRED: [[DATA:%.*]] = call double @modf(double [[NUM:%.*]], ptr [[GEPB:%.*]]) #[[ATTR65:[0-9]+]]
30233024
;
30243025
; ARMPL-NEON-LABEL: define void @modf_f64
30253026
; ARMPL-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
3026-
; ARMPL-NEON: [[DATA:%.*]] = call double @modf(double [[NUM:%.*]], ptr [[GEPB:%.*]]) #[[ATTR2:[0-9]+]]
3027+
; ARMPL-NEON: [[TMP5:%.*]] = call <2 x double> @armpl_vmodfq_f64(<2 x double> [[WIDE_LOAD:%.*]], ptr [[TMP4:%.*]])
30273028
;
30283029
; ARMPL-SVE-LABEL: define void @modf_f64
30293030
; ARMPL-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
3030-
; ARMPL-SVE: [[DATA:%.*]] = call double @modf(double [[NUM:%.*]], ptr [[GEPB:%.*]]) #[[ATTR5:[0-9]+]]
3031+
; ARMPL-SVE: [[TMP23:%.*]] = call <vscale x 2 x double> @armpl_svmodf_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP22:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
30313032
;
30323033
; ARMPL-SVE-NOPRED-LABEL: define void @modf_f64
30333034
; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
3035+
; ARMPL-SVE-NOPRED: [[TMP17:%.*]] = call <vscale x 2 x double> @armpl_svmodf_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], ptr [[TMP16:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
30343036
; ARMPL-SVE-NOPRED: [[DATA:%.*]] = call double @modf(double [[NUM:%.*]], ptr [[GEPB:%.*]]) #[[ATTR65:[0-9]+]]
30353037
;
30363038
entry:
@@ -3055,26 +3057,28 @@ for.cond.cleanup:
30553057
define void @modf_f32(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
30563058
; SLEEF-NEON-LABEL: define void @modf_f32
30573059
; SLEEF-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
3058-
; SLEEF-NEON: [[DATA:%.*]] = call float @modff(float [[NUM:%.*]], ptr [[GEPB:%.*]]) #[[ATTR3:[0-9]+]]
3060+
; SLEEF-NEON: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4vl4_modff(<4 x float> [[WIDE_LOAD:%.*]], ptr [[TMP4:%.*]])
30593061
;
30603062
; SLEEF-SVE-LABEL: define void @modf_f32
30613063
; SLEEF-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
30623064
; SLEEF-SVE: [[DATA:%.*]] = call float @modff(float [[NUM:%.*]], ptr [[GEPB:%.*]]) #[[ATTR6:[0-9]+]]
30633065
;
30643066
; SLEEF-SVE-NOPRED-LABEL: define void @modf_f32
30653067
; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
3068+
; SLEEF-SVE-NOPRED: [[TMP17:%.*]] = call <vscale x 4 x float> @_ZGVsNxvl4_modff(<vscale x 4 x float> [[WIDE_LOAD:%.*]], ptr [[TMP16:%.*]])
30663069
; SLEEF-SVE-NOPRED: [[DATA:%.*]] = call float @modff(float [[NUM:%.*]], ptr [[GEPB:%.*]]) #[[ATTR66:[0-9]+]]
30673070
;
30683071
; ARMPL-NEON-LABEL: define void @modf_f32
30693072
; ARMPL-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
3070-
; ARMPL-NEON: [[DATA:%.*]] = call float @modff(float [[NUM:%.*]], ptr [[GEPB:%.*]]) #[[ATTR3:[0-9]+]]
3073+
; ARMPL-NEON: [[TMP5:%.*]] = call <4 x float> @armpl_vmodfq_f32(<4 x float> [[WIDE_LOAD:%.*]], ptr [[TMP4:%.*]])
30713074
;
30723075
; ARMPL-SVE-LABEL: define void @modf_f32
30733076
; ARMPL-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
3074-
; ARMPL-SVE: [[DATA:%.*]] = call float @modff(float [[NUM:%.*]], ptr [[GEPB:%.*]]) #[[ATTR6:[0-9]+]]
3077+
; ARMPL-SVE: [[TMP23:%.*]] = call <vscale x 4 x float> @armpl_svmodf_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP22:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
30753078
;
30763079
; ARMPL-SVE-NOPRED-LABEL: define void @modf_f32
30773080
; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
3081+
; ARMPL-SVE-NOPRED: [[TMP17:%.*]] = call <vscale x 4 x float> @armpl_svmodf_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], ptr [[TMP16:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
30783082
; ARMPL-SVE-NOPRED: [[DATA:%.*]] = call float @modff(float [[NUM:%.*]], ptr [[GEPB:%.*]]) #[[ATTR66:[0-9]+]]
30793083
;
30803084
entry:

0 commit comments

Comments
 (0)