Skip to content

Commit b69b0f8

Browse files
Zain Jaffalfhahn
authored andcommitted
[instcombine] Optimise for zero initialisation of product given fast flags are enabled
Currently, clang ignores the 0 initialisation in finite math For example: ``` double f_prod = 0; double arr[1000]; for (size_t i = 0; i < 1000; i++) { f_prod *= arr[i]; } ``` Clang will ignore that `f_prod` is set to zero and it will generate assembly to iterate over the loop. Reviewed By: fhahn, spatel Differential Revision: https://reviews.llvm.org/D131672 (Cherry picked from f61f99a)
1 parent 87a36f7 commit b69b0f8

File tree

3 files changed

+14
-11
lines changed

3 files changed

+14
-11
lines changed

llvm/lib/Analysis/ValueTracking.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6328,7 +6328,8 @@ bool llvm::matchSimpleRecurrence(const PHINode *P, BinaryOperator *&BO,
63286328
case Instruction::Sub:
63296329
case Instruction::And:
63306330
case Instruction::Or:
6331-
case Instruction::Mul: {
6331+
case Instruction::Mul:
6332+
case Instruction::FMul: {
63326333
Value *LL = LU->getOperand(0);
63336334
Value *LR = LU->getOperand(1);
63346335
// Find a recurrence.

llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include "llvm/ADT/APInt.h"
1616
#include "llvm/ADT/SmallVector.h"
1717
#include "llvm/Analysis/InstructionSimplify.h"
18+
#include "llvm/Analysis/ValueTracking.h"
1819
#include "llvm/IR/BasicBlock.h"
1920
#include "llvm/IR/Constant.h"
2021
#include "llvm/IR/Constants.h"
@@ -644,6 +645,15 @@ Instruction *InstCombinerImpl::visitFMul(BinaryOperator &I) {
644645
}
645646
}
646647

648+
// Simplify FMUL recurrences starting with 0.0 to 0.0 if nnan and nsz are set.
649+
// Given a phi node with entry value as 0 and it used in fmul operation,
650+
// we can replace fmul with 0 safely and eleminate loop operation.
651+
PHINode *PN = nullptr;
652+
Value *Start = nullptr, *Step = nullptr;
653+
if (matchSimpleRecurrence(&I, PN, Start, Step) && I.hasNoNaNs() &&
654+
I.hasNoSignedZeros() && match(Start, m_Zero()))
655+
return replaceInstUsesWith(I, Start);
656+
647657
return nullptr;
648658
}
649659

llvm/test/Transforms/InstCombine/remove-loop-phi-fastmul.ll

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,11 @@ define double @test_mul_fast_flags(ptr %arr_d) {
66
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
77
; CHECK: for.body:
88
; CHECK-NEXT: [[I_02:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
9-
; CHECK-NEXT: [[F_PROD_01:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[MUL:%.*]], [[FOR_BODY]] ]
10-
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x double], ptr [[ARR_D:%.*]], i64 0, i64 [[I_02]]
11-
; CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[ARRAYIDX]], align 8
12-
; CHECK-NEXT: [[MUL]] = fmul fast double [[F_PROD_01]], [[TMP0]]
139
; CHECK-NEXT: [[INC]] = add i64 [[I_02]], 1
1410
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[INC]], 1000
1511
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[END:%.*]]
1612
; CHECK: end:
17-
; CHECK-NEXT: ret double [[MUL]]
13+
; CHECK-NEXT: ret double 0.000000e+00
1814
;
1915
entry:
2016
br label %for.body
@@ -40,15 +36,11 @@ define double @test_nsz_nnan_flags_enabled(ptr %arr_d) {
4036
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
4137
; CHECK: for.body:
4238
; CHECK-NEXT: [[I_02:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
43-
; CHECK-NEXT: [[F_PROD_01:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[MUL:%.*]], [[FOR_BODY]] ]
44-
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x double], ptr [[ARR_D:%.*]], i64 0, i64 [[I_02]]
45-
; CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[ARRAYIDX]], align 8
46-
; CHECK-NEXT: [[MUL]] = fmul nnan nsz double [[F_PROD_01]], [[TMP0]]
4739
; CHECK-NEXT: [[INC]] = add i64 [[I_02]], 1
4840
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[INC]], 1000
4941
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[END:%.*]]
5042
; CHECK: end:
51-
; CHECK-NEXT: ret double [[MUL]]
43+
; CHECK-NEXT: ret double 0.000000e+00
5244
;
5345
entry:
5446
br label %for.body

0 commit comments

Comments
 (0)