Skip to content

Commit 6640df9

Browse files
committed
ValueTracking: Remove CannotBeOrderedLessThanZero
Replace the last user of CannotBeOrderedLessThanZero with new version. Makes assumes work in this case.
1 parent a1e80ac commit 6640df9

File tree

2 files changed

+23
-6
lines changed

2 files changed

+23
-6
lines changed

llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -400,7 +400,8 @@ static bool tryToFPToSat(Instruction &I, TargetTransformInfo &TTI) {
400400
/// pessimistic codegen that has to account for setting errno and can enable
401401
/// vectorization.
402402
static bool foldSqrt(Instruction &I, TargetTransformInfo &TTI,
403-
TargetLibraryInfo &TLI) {
403+
TargetLibraryInfo &TLI, AssumptionCache &AC,
404+
DominatorTree &DT) {
404405
// Match a call to sqrt mathlib function.
405406
auto *Call = dyn_cast<CallInst>(&I);
406407
if (!Call)
@@ -424,7 +425,8 @@ static bool foldSqrt(Instruction &I, TargetTransformInfo &TTI,
424425
Value *Arg = Call->getArgOperand(0);
425426
if (TTI.haveFastSqrt(Ty) &&
426427
(Call->hasNoNaNs() ||
427-
CannotBeOrderedLessThanZero(Arg, M->getDataLayout(), &TLI))) {
428+
cannotBeOrderedLessThanZero(Arg, M->getDataLayout(), &TLI, 0, &AC, &I,
429+
&DT))) {
428430
IRBuilder<> Builder(&I);
429431
IRBuilderBase::FastMathFlagGuard Guard(Builder);
430432
Builder.setFastMathFlags(Call->getFastMathFlags());
@@ -918,7 +920,8 @@ static bool foldPatternedLoads(Instruction &I, const DataLayout &DL) {
918920
/// occur frequently and/or have more than a constant-length pattern match.
919921
static bool foldUnusualPatterns(Function &F, DominatorTree &DT,
920922
TargetTransformInfo &TTI,
921-
TargetLibraryInfo &TLI, AliasAnalysis &AA) {
923+
TargetLibraryInfo &TLI, AliasAnalysis &AA,
924+
AssumptionCache &AC) {
922925
bool MadeChange = false;
923926
for (BasicBlock &BB : F) {
924927
// Ignore unreachable basic blocks.
@@ -943,7 +946,7 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT,
943946
// NOTE: This function introduces erasing of the instruction `I`, so it
944947
// needs to be called at the end of this sequence, otherwise we may make
945948
// bugs.
946-
MadeChange |= foldSqrt(I, TTI, TLI);
949+
MadeChange |= foldSqrt(I, TTI, TLI, AC, DT);
947950
}
948951
}
949952

@@ -964,7 +967,7 @@ static bool runImpl(Function &F, AssumptionCache &AC, TargetTransformInfo &TTI,
964967
const DataLayout &DL = F.getParent()->getDataLayout();
965968
TruncInstCombine TIC(AC, TLI, DL, DT);
966969
MadeChange |= TIC.run(F);
967-
MadeChange |= foldUnusualPatterns(F, DT, TTI, TLI, AA);
970+
MadeChange |= foldUnusualPatterns(F, DT, TTI, TLI, AA, AC);
968971
return MadeChange;
969972
}
970973

llvm/test/Transforms/AggressiveInstCombine/X86/sqrt.ll

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ declare float @sqrtf(float)
55
declare double @sqrt(double)
66
declare fp128 @sqrtl(fp128)
77
declare float @llvm.fabs.f32(float)
8+
declare void @llvm.assume(i1 noundef)
89

910
; "nnan" implies no setting of errno and the target can lower this to an
1011
; instruction, so transform to an intrinsic.
@@ -46,7 +47,7 @@ define fp128 @sqrt_call_nnan_f128(fp128 %x) {
4647

4748
define float @sqrt_call_nnan_f32_nobuiltin(float %x) {
4849
; CHECK-LABEL: @sqrt_call_nnan_f32_nobuiltin(
49-
; CHECK-NEXT: [[SQRT:%.*]] = call nnan float @sqrtf(float [[X:%.*]]) #[[ATTR1:[0-9]+]]
50+
; CHECK-NEXT: [[SQRT:%.*]] = call nnan float @sqrtf(float [[X:%.*]]) #[[ATTR2:[0-9]+]]
5051
; CHECK-NEXT: ret float [[SQRT]]
5152
;
5253
%sqrt = call nnan float @sqrtf(float %x) nobuiltin
@@ -74,3 +75,16 @@ define float @sqrt_call_f32_fabs(float %x) {
7475
%sqrt = call float @sqrtf(float %a)
7576
ret float %sqrt
7677
}
78+
79+
define float @sqrt_call_f32_assume_oge_n0(float %x) {
80+
; CHECK-LABEL: @sqrt_call_f32_assume_oge_n0(
81+
; CHECK-NEXT: [[IS_POS:%.*]] = fcmp oge float [[X:%.*]], -0.000000e+00
82+
; CHECK-NEXT: call void @llvm.assume(i1 [[IS_POS]])
83+
; CHECK-NEXT: [[SQRT1:%.*]] = call float @llvm.sqrt.f32(float [[X]])
84+
; CHECK-NEXT: ret float [[SQRT1]]
85+
;
86+
%is.pos = fcmp oge float %x, -0.0
87+
call void @llvm.assume(i1 %is.pos)
88+
%sqrt = call float @sqrtf(float %x)
89+
ret float %sqrt
90+
}

0 commit comments

Comments
 (0)