[FuncSpec] Consider literal constants of recursive functions #111162

hazzlim · 2024-10-04T14:15:33Z

Enable specialization of literal constants by default for recursive functions.

llvmbot · 2024-10-04T14:16:08Z

@llvm/pr-subscribers-function-specialization

@llvm/pr-subscribers-llvm-transforms

Author: Hari Limaye (hazzlim)

Changes

Enable specialization of literal constants by default for recursive functions.

Full diff: https://github.com/llvm/llvm-project/pull/111162.diff

4 Files Affected:

(modified) llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h (+4-2)
(modified) llvm/lib/Transforms/IPO/FunctionSpecialization.cpp (+10-6)
(modified) llvm/test/Transforms/FunctionSpecialization/function-specialization2.ll (+56-16)
(modified) llvm/test/Transforms/FunctionSpecialization/identical-specializations.ll (+50-8)

diff --git a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
index b001771951e0fe..329491f2a0fe97 100644
--- a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
+++ b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
@@ -311,9 +311,11 @@ class FunctionSpecializer {
   /// @param FuncSize Cost of specializing a function.
   /// @param AllSpecs A vector to add potential specializations to.
   /// @param SM  A map for a function's specialisation range
+  /// @param ConsiderLiterals Whether to specialize on literal constants
   /// @return True, if any potential specializations were found
   bool findSpecializations(Function *F, unsigned FuncSize,
-                           SmallVectorImpl<Spec> &AllSpecs, SpecMap &SM);
+                           SmallVectorImpl<Spec> &AllSpecs, SpecMap &SM,
+                           bool ConsiderLiterals);
 
   /// Compute the inlining bonus for replacing argument \p A with constant \p C.
   unsigned getInliningBonus(Argument *A, Constant *C);
@@ -328,7 +330,7 @@ class FunctionSpecializer {
 
   /// Determine if it is possible to specialise the function for constant values
   /// of the formal parameter \p A.
-  bool isArgumentInteresting(Argument *A);
+  bool isArgumentInteresting(Argument *A, bool ConsiderLiterals);
 
   /// Check if the value \p V  (an actual argument) is a constant or can only
   /// have a constant value. Return that constant.
diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
index 548335d750e33d..db0f06ec8ac17d 100644
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -663,7 +663,8 @@ bool FunctionSpecializer::run() {
     if (Inserted && Metrics.isRecursive)
       promoteConstantStackValues(&F);
 
-    if (!findSpecializations(&F, FuncSize, AllSpecs, SM)) {
+    bool ConsiderLiterals = SpecializeLiteralConstant || Metrics.isRecursive;
+    if (!findSpecializations(&F, FuncSize, AllSpecs, SM, ConsiderLiterals)) {
       LLVM_DEBUG(
           dbgs() << "FnSpecialization: No possible specializations found for "
                  << F.getName() << "\n");
@@ -803,7 +804,8 @@ static Function *cloneCandidateFunction(Function *F, unsigned NSpecs) {
 
 bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize,
                                               SmallVectorImpl<Spec> &AllSpecs,
-                                              SpecMap &SM) {
+                                              SpecMap &SM,
+                                              bool ConsiderLiterals) {
   // A mapping from a specialisation signature to the index of the respective
   // entry in the all specialisation array. Used to ensure uniqueness of
   // specialisations.
@@ -812,7 +814,7 @@ bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize,
   // Get a list of interesting arguments.
   SmallVector<Argument *> Args;
   for (Argument &Arg : F->args())
-    if (isArgumentInteresting(&Arg))
+    if (isArgumentInteresting(&Arg, ConsiderLiterals))
       Args.push_back(&Arg);
 
   if (Args.empty())
@@ -1032,14 +1034,16 @@ unsigned FunctionSpecializer::getInliningBonus(Argument *A, Constant *C) {
 
 /// Determine if it is possible to specialise the function for constant values
 /// of the formal parameter \p A.
-bool FunctionSpecializer::isArgumentInteresting(Argument *A) {
+bool FunctionSpecializer::isArgumentInteresting(Argument *A,
+                                                bool ConsiderLiterals) {
   // No point in specialization if the argument is unused.
   if (A->user_empty())
     return false;
 
   Type *Ty = A->getType();
-  if (!Ty->isPointerTy() && (!SpecializeLiteralConstant ||
-      (!Ty->isIntegerTy() && !Ty->isFloatingPointTy() && !Ty->isStructTy())))
+  if (!Ty->isPointerTy() &&
+      (!ConsiderLiterals ||
+       (!Ty->isIntegerTy() && !Ty->isFloatingPointTy() && !Ty->isStructTy())))
     return false;
 
   // SCCP solver does not record an argument that will be constructed on
diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization2.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization2.ll
index ef830a0e9a4a9e..00319f1424d766 100644
--- a/llvm/test/Transforms/FunctionSpecialization/function-specialization2.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization2.ll
@@ -64,27 +64,27 @@ define i32 @main(ptr %0, i32 %1) {
 ; CHECK-LABEL: define i32 @main(
 ; CHECK-SAME: ptr [[TMP0:%.*]], i32 [[TMP1:%.*]]) {
 ; CHECK-NEXT:    call void @func.specialized.2(ptr [[TMP0]], i32 [[TMP1]])
-; CHECK-NEXT:    call void @func.specialized.1(ptr [[TMP0]], i32 0)
+; CHECK-NEXT:    call void @func.specialized.1(ptr [[TMP0]])
 ; CHECK-NEXT:    ret i32 0
 ;
 ;
 ; CHECK-LABEL: define internal void @func.specialized.1(
-; CHECK-SAME: ptr [[TMP0:%.*]], i32 [[TMP1:%.*]]) {
+; CHECK-SAME: ptr [[TMP0:%.*]]) {
 ; CHECK-NEXT:    [[TMP2:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    store i32 [[TMP1]], ptr [[TMP2]], align 4
+; CHECK-NEXT:    store i32 0, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = icmp slt i32 [[TMP3]], 1
-; CHECK-NEXT:    br i1 [[TMP4]], label %[[BB12:.*]], label %[[BB6:.*]]
-; CHECK:       [[BB6]]:
+; CHECK-NEXT:    br i1 [[TMP4]], label %[[BB11:.*]], label %[[BB5:.*]]
+; CHECK:       [[BB5]]:
 ; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP7:%.*]] = sext i32 [[TMP6]] to i64
 ; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 [[TMP7]]
 ; CHECK-NEXT:    call void @decrement(ptr [[TMP8]])
 ; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP10:%.*]] = add nsw i32 [[TMP9]], -1
-; CHECK-NEXT:    call void @func.specialized.1(ptr [[TMP0]], i32 [[TMP10]])
-; CHECK-NEXT:    br label %[[BB12]]
-; CHECK:       [[BB12]]:
+; CHECK-NEXT:    call void @func.specialized.3(ptr [[TMP0]], i32 [[TMP10]])
+; CHECK-NEXT:    br label %[[BB11]]
+; CHECK:       [[BB11]]:
 ; CHECK-NEXT:    ret void
 ;
 ;
@@ -108,6 +108,46 @@ define i32 @main(ptr %0, i32 %1) {
 ; CHECK-NEXT:    ret void
 ;
 ;
+; CHECK-LABEL: define internal void @func.specialized.3(
+; CHECK-SAME: ptr [[TMP0:%.*]], i32 [[TMP1:%.*]]) {
+; CHECK-NEXT:    [[TMP3:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    store i32 [[TMP1]], ptr [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp slt i32 [[TMP4]], 1
+; CHECK-NEXT:    br i1 [[TMP5]], label %[[BB12:.*]], label %[[BB6:.*]]
+; CHECK:       [[BB6]]:
+; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP8:%.*]] = sext i32 [[TMP7]] to i64
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 [[TMP8]]
+; CHECK-NEXT:    call void @decrement(ptr [[TMP9]])
+; CHECK-NEXT:    [[TMP10:%.*]] = load i32, ptr [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP11:%.*]] = add nsw i32 [[TMP10]], -1
+; CHECK-NEXT:    call void @func.specialized.3(ptr [[TMP0]], i32 [[TMP11]])
+; CHECK-NEXT:    br label %[[BB12]]
+; CHECK:       [[BB12]]:
+; CHECK-NEXT:    ret void
+;
+;
+; ONE-ITER-LABEL: define internal void @func(
+; ONE-ITER-SAME: ptr [[TMP0:%.*]], i32 [[TMP1:%.*]], ptr nocapture [[TMP2:%.*]]) {
+; ONE-ITER-NEXT:    [[TMP4:%.*]] = alloca i32, align 4
+; ONE-ITER-NEXT:    store i32 [[TMP1]], ptr [[TMP4]], align 4
+; ONE-ITER-NEXT:    [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
+; ONE-ITER-NEXT:    [[TMP6:%.*]] = icmp slt i32 [[TMP5]], 1
+; ONE-ITER-NEXT:    br i1 [[TMP6]], label %[[BB13:.*]], label %[[BB7:.*]]
+; ONE-ITER:       [[BB7]]:
+; ONE-ITER-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4
+; ONE-ITER-NEXT:    [[TMP9:%.*]] = sext i32 [[TMP8]] to i64
+; ONE-ITER-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 [[TMP9]]
+; ONE-ITER-NEXT:    call void [[TMP2]](ptr [[TMP10]])
+; ONE-ITER-NEXT:    [[TMP11:%.*]] = load i32, ptr [[TMP4]], align 4
+; ONE-ITER-NEXT:    [[TMP12:%.*]] = add nsw i32 [[TMP11]], -1
+; ONE-ITER-NEXT:    call void @func(ptr [[TMP0]], i32 [[TMP12]], ptr [[TMP2]])
+; ONE-ITER-NEXT:    br label %[[BB13]]
+; ONE-ITER:       [[BB13]]:
+; ONE-ITER-NEXT:    ret void
+;
+;
 ; ONE-ITER-LABEL: define internal void @increment(
 ; ONE-ITER-SAME: ptr nocapture [[TMP0:%.*]]) {
 ; ONE-ITER-NEXT:    [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4
@@ -127,27 +167,27 @@ define i32 @main(ptr %0, i32 %1) {
 ; ONE-ITER-LABEL: define i32 @main(
 ; ONE-ITER-SAME: ptr [[TMP0:%.*]], i32 [[TMP1:%.*]]) {
 ; ONE-ITER-NEXT:    call void @func.specialized.2(ptr [[TMP0]], i32 [[TMP1]])
-; ONE-ITER-NEXT:    call void @func.specialized.1(ptr [[TMP0]], i32 0)
+; ONE-ITER-NEXT:    call void @func.specialized.1(ptr [[TMP0]])
 ; ONE-ITER-NEXT:    ret i32 0
 ;
 ;
 ; ONE-ITER-LABEL: define internal void @func.specialized.1(
-; ONE-ITER-SAME: ptr [[TMP0:%.*]], i32 [[TMP1:%.*]]) {
+; ONE-ITER-SAME: ptr [[TMP0:%.*]]) {
 ; ONE-ITER-NEXT:    [[TMP2:%.*]] = alloca i32, align 4
-; ONE-ITER-NEXT:    store i32 [[TMP1]], ptr [[TMP2]], align 4
+; ONE-ITER-NEXT:    store i32 0, ptr [[TMP2]], align 4
 ; ONE-ITER-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
 ; ONE-ITER-NEXT:    [[TMP4:%.*]] = icmp slt i32 [[TMP3]], 1
-; ONE-ITER-NEXT:    br i1 [[TMP4]], label %[[BB12:.*]], label %[[BB6:.*]]
-; ONE-ITER:       [[BB6]]:
+; ONE-ITER-NEXT:    br i1 [[TMP4]], label %[[BB11:.*]], label %[[BB5:.*]]
+; ONE-ITER:       [[BB5]]:
 ; ONE-ITER-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP2]], align 4
 ; ONE-ITER-NEXT:    [[TMP7:%.*]] = sext i32 [[TMP6]] to i64
 ; ONE-ITER-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 [[TMP7]]
 ; ONE-ITER-NEXT:    call void @decrement(ptr [[TMP8]])
 ; ONE-ITER-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4
 ; ONE-ITER-NEXT:    [[TMP10:%.*]] = add nsw i32 [[TMP9]], -1
-; ONE-ITER-NEXT:    call void @func.specialized.1(ptr [[TMP0]], i32 [[TMP10]])
-; ONE-ITER-NEXT:    br label %[[BB12]]
-; ONE-ITER:       [[BB12]]:
+; ONE-ITER-NEXT:    call void @func(ptr [[TMP0]], i32 [[TMP10]], ptr @decrement)
+; ONE-ITER-NEXT:    br label %[[BB11]]
+; ONE-ITER:       [[BB11]]:
 ; ONE-ITER-NEXT:    ret void
 ;
 ;
diff --git a/llvm/test/Transforms/FunctionSpecialization/identical-specializations.ll b/llvm/test/Transforms/FunctionSpecialization/identical-specializations.ll
index 930ed6627f7f1e..e7e2fffac5d6ec 100644
--- a/llvm/test/Transforms/FunctionSpecialization/identical-specializations.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/identical-specializations.ll
@@ -51,7 +51,7 @@ entry:
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    br i1 [[FLAG]], label %[[PLUS:.*]], label %[[MINUS:.*]]
 ; CHECK:       [[PLUS]]:
-; CHECK-NEXT:    [[CMP0:%.*]] = call i64 @compute.specialized.2(i64 [[X]], i64 [[Y]], ptr @plus, ptr @minus)
+; CHECK-NEXT:    [[CMP0:%.*]] = call i64 @compute.specialized.1(i64 [[X]], i64 [[Y]], ptr @plus, ptr @minus)
 ; CHECK-NEXT:    br label %[[MERGE:.*]]
 ; CHECK:       [[MINUS]]:
 ; CHECK-NEXT:    [[CMP1:%.*]] = call i64 @compute.specialized.3(i64 [[X]], i64 [[Y]], ptr @minus, ptr @plus)
@@ -79,10 +79,10 @@ entry:
 ; CHECK-LABEL: define internal i64 @compute.specialized.1(
 ; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]], ptr [[BINOP1:%.*]], ptr [[BINOP2:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[OP1:%.*]] = call i64 [[BINOP1]](i64 [[X]], i64 [[Y]])
 ; CHECK-NEXT:    [[OP0:%.*]] = call i64 @plus(i64 [[X]], i64 [[Y]])
-; CHECK-NEXT:    [[OP2:%.*]] = call i64 @compute.specialized.1(i64 [[X]], i64 [[Y]], ptr [[BINOP1]], ptr @plus)
-; CHECK-NEXT:    [[ADD0:%.*]] = add i64 [[OP1]], [[OP0]]
+; CHECK-NEXT:    [[OP1:%.*]] = call i64 @minus(i64 [[X]], i64 [[Y]])
+; CHECK-NEXT:    [[OP2:%.*]] = call i64 @compute.specialized.5(i64 [[X]], i64 [[Y]], ptr @plus, ptr @plus)
+; CHECK-NEXT:    [[ADD0:%.*]] = add i64 [[OP0]], [[OP1]]
 ; CHECK-NEXT:    [[ADD1:%.*]] = add i64 [[ADD0]], [[OP2]]
 ; CHECK-NEXT:    [[DIV:%.*]] = sdiv i64 [[ADD1]], [[X]]
 ; CHECK-NEXT:    [[SUB:%.*]] = sub i64 [[DIV]], [[Y]]
@@ -93,13 +93,13 @@ entry:
 ; CHECK-LABEL: define internal i64 @compute.specialized.2(
 ; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]], ptr [[BINOP1:%.*]], ptr [[BINOP2:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[OP0:%.*]] = call i64 @plus(i64 [[X]], i64 [[Y]])
-; CHECK-NEXT:    [[OP1:%.*]] = call i64 @minus(i64 [[X]], i64 [[Y]])
-; CHECK-NEXT:    [[OP2:%.*]] = call i64 @compute.specialized.1(i64 [[X]], i64 [[Y]], ptr @plus, ptr @plus)
+; CHECK-NEXT:    [[OP0:%.*]] = call i64 @plus(i64 [[X]], i64 42)
+; CHECK-NEXT:    [[OP1:%.*]] = call i64 @minus(i64 [[X]], i64 42)
+; CHECK-NEXT:    [[OP2:%.*]] = call i64 @compute.specialized.4(i64 [[X]], i64 42, ptr @plus, ptr @plus)
 ; CHECK-NEXT:    [[ADD0:%.*]] = add i64 [[OP0]], [[OP1]]
 ; CHECK-NEXT:    [[ADD1:%.*]] = add i64 [[ADD0]], [[OP2]]
 ; CHECK-NEXT:    [[DIV:%.*]] = sdiv i64 [[ADD1]], [[X]]
-; CHECK-NEXT:    [[SUB:%.*]] = sub i64 [[DIV]], [[Y]]
+; CHECK-NEXT:    [[SUB:%.*]] = sub i64 [[DIV]], 42
 ; CHECK-NEXT:    [[MUL:%.*]] = mul i64 [[SUB]], 2
 ; CHECK-NEXT:    ret i64 [[MUL]]
 ;
@@ -117,3 +117,45 @@ entry:
 ; CHECK-NEXT:    [[MUL:%.*]] = mul i64 [[SUB]], 2
 ; CHECK-NEXT:    ret i64 [[MUL]]
 ;
+;
+; CHECK-LABEL: define internal i64 @compute.specialized.4(
+; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]], ptr [[BINOP1:%.*]], ptr [[BINOP2:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[OP0:%.*]] = call i64 @plus(i64 [[X]], i64 42)
+; CHECK-NEXT:    [[OP1:%.*]] = call i64 @plus(i64 [[X]], i64 42)
+; CHECK-NEXT:    [[OP2:%.*]] = call i64 @compute.specialized.4(i64 [[X]], i64 42, ptr @plus, ptr @plus)
+; CHECK-NEXT:    [[ADD0:%.*]] = add i64 [[OP0]], [[OP1]]
+; CHECK-NEXT:    [[ADD1:%.*]] = add i64 [[ADD0]], [[OP2]]
+; CHECK-NEXT:    [[DIV:%.*]] = sdiv i64 [[ADD1]], [[X]]
+; CHECK-NEXT:    [[SUB:%.*]] = sub i64 [[DIV]], 42
+; CHECK-NEXT:    [[MUL:%.*]] = mul i64 [[SUB]], 2
+; CHECK-NEXT:    ret i64 [[MUL]]
+;
+;
+; CHECK-LABEL: define internal i64 @compute.specialized.5(
+; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]], ptr [[BINOP1:%.*]], ptr [[BINOP2:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[OP0:%.*]] = call i64 @plus(i64 [[X]], i64 [[Y]])
+; CHECK-NEXT:    [[OP1:%.*]] = call i64 @plus(i64 [[X]], i64 [[Y]])
+; CHECK-NEXT:    [[OP2:%.*]] = call i64 @compute.specialized.5(i64 [[X]], i64 [[Y]], ptr @plus, ptr @plus)
+; CHECK-NEXT:    [[ADD0:%.*]] = add i64 [[OP0]], [[OP1]]
+; CHECK-NEXT:    [[ADD1:%.*]] = add i64 [[ADD0]], [[OP2]]
+; CHECK-NEXT:    [[DIV:%.*]] = sdiv i64 [[ADD1]], [[X]]
+; CHECK-NEXT:    [[SUB:%.*]] = sub i64 [[DIV]], [[Y]]
+; CHECK-NEXT:    [[MUL:%.*]] = mul i64 [[SUB]], 2
+; CHECK-NEXT:    ret i64 [[MUL]]
+;
+;
+; CHECK-LABEL: define internal i64 @compute.specialized.6(
+; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]], ptr [[BINOP1:%.*]], ptr [[BINOP2:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[OP0:%.*]] = call i64 [[BINOP1]](i64 [[X]], i64 [[Y]])
+; CHECK-NEXT:    [[OP1:%.*]] = call i64 @plus(i64 [[X]], i64 [[Y]])
+; CHECK-NEXT:    [[OP2:%.*]] = call i64 @compute.specialized.6(i64 [[X]], i64 [[Y]], ptr [[BINOP1]], ptr @plus)
+; CHECK-NEXT:    [[ADD0:%.*]] = add i64 [[OP0]], [[OP1]]
+; CHECK-NEXT:    [[ADD1:%.*]] = add i64 [[ADD0]], [[OP2]]
+; CHECK-NEXT:    [[DIV:%.*]] = sdiv i64 [[ADD1]], [[X]]
+; CHECK-NEXT:    [[SUB:%.*]] = sub i64 [[DIV]], [[Y]]
+; CHECK-NEXT:    [[MUL:%.*]] = mul i64 [[SUB]], 2
+; CHECK-NEXT:    ret i64 [[MUL]]
+;

hazzlim · 2024-10-04T14:33:37Z

This change is intended to prevent regressions in code which would otherwise be negatively affected by #111163.

This change has negligible impact on CTMark compile times (Geomean 0.00%) for both LTO and non-LTO pipelines.

labrinea

Can we remove promoteConstantStackValues() ? Do you see any regressions doing so? As far as I remember that was a hack for exchange2_r, since fortran passes arguments by reference.

hazzlim · 2024-10-04T14:47:13Z

Can we remove promoteConstantStackValues() ? Do you see any regressions doing so? As far as I remember that was a hack for exchange2_r, since fortran passes arguments by reference.

I think that provided both #111163 and this PR get in, we would be able to remove promoteConstantStackValues() without causing any regressions. So that would be the goal following these changes, but it does rely on Argument Promotion being run before IPSCCP. For this reason I was intending to put this up as a separate patch - however I could merge it into this change if that seems appropriate.

david-arm · 2024-10-07T08:01:26Z

Hi @hazzlim, I do understand why you want to make this change, because it can lead to a significant gain in the bwaves benchmark (in combination with PR #111163). However, it feels like this patch is adding a fairly arbitrary heuristic, i.e. that only Fortran routines that are recursive are able to benefit from PR #111163. If I understand correctly, PR #111163 will promote references to constants to actual constant arguments, which unlocks optimisations later in the pipeline, in particular IPSCCP. I thought the fundamental reason why we are careful to specialise based on a constant literal is we want to avoid a potential explosion in specialisations, which not only increases compile time but also binary size? This patch could also lead to an explosion in compile times for certain benchmarks if there are lots of different calls to the recursive function where we pass in different literals. It just so happens for bwaves this isn't a problem, if I understand correctly?

So I suppose what this patch implies is that if a function is recursive there will be at least two callers passing in that literal value, i.e. the original call and the recursive call. That makes me wonder if there is a better heuristic we can use that would benefit non-recursive Fortran functions too? For example, what if we decided to specialise functions with literal constants only if we can prove there that it's worth it based on the number or percentage of callers using that same literal? Would such a heuristic also work for bwaves?

david-arm · 2024-10-07T08:04:25Z

Or we could also apply a weight or hotness to calls, where recursive calls are essentially hotter? Just for the record, I'm not expecting you to solve all the problems at once in this initial patch, but we might be able to sketch out an initial algorithm as part of this work, even if it is only limited to recursive functions for now.

labrinea · 2024-10-07T12:21:03Z

llvm/test/Transforms/FunctionSpecialization/function-specialization2.ll

+; ONE-ITER-NEXT:    br label %[[BB13]]
+; ONE-ITER:       [[BB13]]:
+; ONE-ITER-NEXT:    ret void
+;


Any new tests should be moved to a new file named accordingly based on what the test is trying to check. In this case something like recursive-literal-const-args.ll or similar.

Oh I've realised you're not adding tests but changing the codegen of existing ones. That doesn't seem right, I'd change the RUN lines of this file to prevent specialization on literal constants.

labrinea · 2024-10-07T12:21:29Z

llvm/test/Transforms/FunctionSpecialization/identical-specializations.ll

+; CHECK-NEXT:    [[DIV:%.*]] = sdiv i64 [[ADD1]], [[X]]
+; CHECK-NEXT:    [[SUB:%.*]] = sub i64 [[DIV]], [[Y]]
+; CHECK-NEXT:    [[MUL:%.*]] = mul i64 [[SUB]], 2
+; CHECK-NEXT:    ret i64 [[MUL]]


Any new tests should be moved to a new file named accordingly based on what the test is trying to check. In this case something like recursive-literal-const-args.ll or similar.

Oh I've realised you're not adding tests but changing the codegen of existing ones. That doesn't seem right, I'd change the RUN lines of this file to prevent specialization on literal constants.

Same here

hazzlim · 2024-10-23T11:06:53Z

Closing this in favor of #113442

[FuncSpec] Consider literal constants of recursive functions

3c57b1f

Enable specialization of literal constants by default for recursive functions.

llvmbot added function-specialization llvm:transforms labels Oct 4, 2024

hazzlim mentioned this pull request Oct 4, 2024

[LTO] Run Argument Promotion before IPSCCP #111163

Merged

hazzlim requested review from nikic, momchil-velikov, labrinea, davemgreen and david-arm October 4, 2024 14:33

hazzlim requested a review from ChuanqiXu9 October 4, 2024 14:40

labrinea reviewed Oct 4, 2024

View reviewed changes

labrinea reviewed Oct 7, 2024

View reviewed changes

hazzlim closed this Oct 23, 2024

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[FuncSpec] Consider literal constants of recursive functions #111162

[FuncSpec] Consider literal constants of recursive functions #111162

Uh oh!

hazzlim commented Oct 4, 2024

Uh oh!

llvmbot commented Oct 4, 2024 •

edited

Loading

Uh oh!

hazzlim commented Oct 4, 2024

Uh oh!

labrinea left a comment

Uh oh!

hazzlim commented Oct 4, 2024

Uh oh!

david-arm commented Oct 7, 2024

Uh oh!

david-arm commented Oct 7, 2024

Uh oh!

labrinea Oct 7, 2024

Uh oh!

labrinea Oct 7, 2024

Uh oh!

labrinea Oct 7, 2024 •

edited

Loading

Uh oh!

hazzlim commented Oct 23, 2024

Uh oh!

Uh oh!

[FuncSpec] Consider literal constants of recursive functions #111162

[FuncSpec] Consider literal constants of recursive functions #111162

Uh oh!

Conversation

hazzlim commented Oct 4, 2024

Uh oh!

llvmbot commented Oct 4, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

hazzlim commented Oct 4, 2024

Uh oh!

labrinea left a comment

Choose a reason for hiding this comment

Uh oh!

hazzlim commented Oct 4, 2024

Uh oh!

david-arm commented Oct 7, 2024

Uh oh!

david-arm commented Oct 7, 2024

Uh oh!

labrinea Oct 7, 2024

Choose a reason for hiding this comment

Uh oh!

labrinea Oct 7, 2024

Choose a reason for hiding this comment

Uh oh!

labrinea Oct 7, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

hazzlim commented Oct 23, 2024

Uh oh!

Uh oh!

llvmbot commented Oct 4, 2024 •

edited

Loading

labrinea Oct 7, 2024 •

edited

Loading