Skip to content

Commit 5a36904

Browse files
committed
Reapply "InstCombine: Introduce SimplifyDemandedUseFPClass"
This reverts commit 26bb22b.
1 parent 32d16b6 commit 5a36904

File tree

6 files changed

+246
-129
lines changed

6 files changed

+246
-129
lines changed

clang/test/Headers/__clang_hip_math.hip

Lines changed: 50 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -231,26 +231,26 @@ extern "C" __device__ uint64_t test___make_mantissa(const char *p) {
231231

232232
// CHECK-LABEL: @test_abs(
233233
// CHECK-NEXT: entry:
234-
// CHECK-NEXT: [[ABS_I:%.*]] = tail call noundef i32 @llvm.abs.i32(i32 [[X:%.*]], i1 true)
235-
// CHECK-NEXT: ret i32 [[ABS_I]]
234+
// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef i32 @llvm.abs.i32(i32 [[X:%.*]], i1 true)
235+
// CHECK-NEXT: ret i32 [[TMP0]]
236236
//
237237
extern "C" __device__ int test_abs(int x) {
238238
return abs(x);
239239
}
240240

241241
// CHECK-LABEL: @test_labs(
242242
// CHECK-NEXT: entry:
243-
// CHECK-NEXT: [[ABS_I:%.*]] = tail call noundef i64 @llvm.abs.i64(i64 [[X:%.*]], i1 true)
244-
// CHECK-NEXT: ret i64 [[ABS_I]]
243+
// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef i64 @llvm.abs.i64(i64 [[X:%.*]], i1 true)
244+
// CHECK-NEXT: ret i64 [[TMP0]]
245245
//
246246
extern "C" __device__ long test_labs(long x) {
247247
return labs(x);
248248
}
249249

250250
// CHECK-LABEL: @test_llabs(
251251
// CHECK-NEXT: entry:
252-
// CHECK-NEXT: [[ABS_I:%.*]] = tail call noundef i64 @llvm.abs.i64(i64 [[X:%.*]], i1 true)
253-
// CHECK-NEXT: ret i64 [[ABS_I]]
252+
// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef i64 @llvm.abs.i64(i64 [[X:%.*]], i1 true)
253+
// CHECK-NEXT: ret i64 [[TMP0]]
254254
//
255255
extern "C" __device__ long long test_llabs(long x) {
256256
return llabs(x);
@@ -2557,33 +2557,65 @@ extern "C" __device__ double test_nan(const char *tag) {
25572557
return nan(tag);
25582558
}
25592559

2560-
// CHECK-LABEL: @test_nanf_emptystr(
2561-
// CHECK-NEXT: entry:
2562-
// CHECK-NEXT: ret float 0x7FF8000000000000
2560+
// DEFAULT-LABEL: @test_nanf_emptystr(
2561+
// DEFAULT-NEXT: entry:
2562+
// DEFAULT-NEXT: ret float 0x7FF8000000000000
2563+
//
2564+
// FINITEONLY-LABEL: @test_nanf_emptystr(
2565+
// FINITEONLY-NEXT: entry:
2566+
// FINITEONLY-NEXT: ret float poison
2567+
//
2568+
// APPROX-LABEL: @test_nanf_emptystr(
2569+
// APPROX-NEXT: entry:
2570+
// APPROX-NEXT: ret float 0x7FF8000000000000
25632571
//
25642572
extern "C" __device__ float test_nanf_emptystr() {
25652573
return nanf("");
25662574
}
25672575

2568-
// CHECK-LABEL: @test_nan_emptystr(
2569-
// CHECK-NEXT: entry:
2570-
// CHECK-NEXT: ret double 0x7FF8000000000000
2576+
// DEFAULT-LABEL: @test_nan_emptystr(
2577+
// DEFAULT-NEXT: entry:
2578+
// DEFAULT-NEXT: ret double 0x7FF8000000000000
2579+
//
2580+
// FINITEONLY-LABEL: @test_nan_emptystr(
2581+
// FINITEONLY-NEXT: entry:
2582+
// FINITEONLY-NEXT: ret double poison
2583+
//
2584+
// APPROX-LABEL: @test_nan_emptystr(
2585+
// APPROX-NEXT: entry:
2586+
// APPROX-NEXT: ret double 0x7FF8000000000000
25712587
//
25722588
extern "C" __device__ double test_nan_emptystr() {
25732589
return nan("");
25742590
}
25752591

2576-
// CHECK-LABEL: @test_nanf_fill(
2577-
// CHECK-NEXT: entry:
2578-
// CHECK-NEXT: ret float 0x7FF8000000000000
2592+
// DEFAULT-LABEL: @test_nanf_fill(
2593+
// DEFAULT-NEXT: entry:
2594+
// DEFAULT-NEXT: ret float 0x7FF8000000000000
2595+
//
2596+
// FINITEONLY-LABEL: @test_nanf_fill(
2597+
// FINITEONLY-NEXT: entry:
2598+
// FINITEONLY-NEXT: ret float poison
2599+
//
2600+
// APPROX-LABEL: @test_nanf_fill(
2601+
// APPROX-NEXT: entry:
2602+
// APPROX-NEXT: ret float 0x7FF8000000000000
25792603
//
25802604
extern "C" __device__ float test_nanf_fill() {
25812605
return nanf("0x456");
25822606
}
25832607

2584-
// CHECK-LABEL: @test_nan_fill(
2585-
// CHECK-NEXT: entry:
2586-
// CHECK-NEXT: ret double 0x7FF8000000000000
2608+
// DEFAULT-LABEL: @test_nan_fill(
2609+
// DEFAULT-NEXT: entry:
2610+
// DEFAULT-NEXT: ret double 0x7FF8000000000000
2611+
//
2612+
// FINITEONLY-LABEL: @test_nan_fill(
2613+
// FINITEONLY-NEXT: entry:
2614+
// FINITEONLY-NEXT: ret double poison
2615+
//
2616+
// APPROX-LABEL: @test_nan_fill(
2617+
// APPROX-NEXT: entry:
2618+
// APPROX-NEXT: ret double 0x7FF8000000000000
25872619
//
25882620
extern "C" __device__ double test_nan_fill() {
25892621
return nan("0x123");

llvm/include/llvm/Analysis/ValueTracking.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,10 @@ struct KnownFPClass {
243243
/// definitely set or false if the sign bit is definitely unset.
244244
std::optional<bool> SignBit;
245245

246+
bool operator==(KnownFPClass Other) const {
247+
return KnownFPClasses == Other.KnownFPClasses && SignBit == Other.SignBit;
248+
}
249+
246250
/// Return true if it's known this can never be one of the mask entries.
247251
bool isKnownNever(FPClassTest Mask) const {
248252
return (KnownFPClasses & Mask) == fcNone;

llvm/lib/Transforms/InstCombine/InstCombineInternal.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -548,6 +548,15 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
548548
APInt &UndefElts, unsigned Depth = 0,
549549
bool AllowMultipleUsers = false) override;
550550

551+
/// Attempts to replace V with a simpler value based on the demanded
552+
/// floating-point classes
553+
Value *SimplifyDemandedUseFPClass(Value *V, FPClassTest DemandedMask,
554+
KnownFPClass &Known, unsigned Depth,
555+
Instruction *CxtI);
556+
bool SimplifyDemandedFPClass(Instruction *I, unsigned Op,
557+
FPClassTest DemandedMask, KnownFPClass &Known,
558+
unsigned Depth = 0);
559+
551560
/// Canonicalize the position of binops relative to shufflevector.
552561
Instruction *foldVectorBinop(BinaryOperator &Inst);
553562
Instruction *foldVectorSelect(SelectInst &Sel);

llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1781,3 +1781,116 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
17811781

17821782
return MadeChange ? I : nullptr;
17831783
}
1784+
1785+
/// For floating-point classes that resolve to a single bit pattern, return that
1786+
/// value.
1787+
static Constant *getFPClassConstant(Type *Ty, FPClassTest Mask) {
1788+
switch (Mask) {
1789+
case fcPosZero:
1790+
return ConstantFP::getZero(Ty);
1791+
case fcNegZero:
1792+
return ConstantFP::getZero(Ty, true);
1793+
case fcPosInf:
1794+
return ConstantFP::getInfinity(Ty);
1795+
case fcNegInf:
1796+
return ConstantFP::getInfinity(Ty, true);
1797+
case fcNone:
1798+
return PoisonValue::get(Ty);
1799+
default:
1800+
return nullptr;
1801+
}
1802+
}
1803+
1804+
Value *InstCombinerImpl::SimplifyDemandedUseFPClass(
1805+
Value *V, const FPClassTest DemandedMask, KnownFPClass &Known,
1806+
unsigned Depth, Instruction *CxtI) {
1807+
assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
1808+
Type *VTy = V->getType();
1809+
1810+
assert(Known == KnownFPClass() && "expected uninitialized state");
1811+
1812+
if (DemandedMask == fcNone)
1813+
return isa<UndefValue>(V) ? nullptr : PoisonValue::get(VTy);
1814+
1815+
if (Depth == MaxAnalysisRecursionDepth)
1816+
return nullptr;
1817+
1818+
Instruction *I = dyn_cast<Instruction>(V);
1819+
if (!I) {
1820+
// Handle constants and arguments
1821+
Known = computeKnownFPClass(V, fcAllFlags, CxtI, Depth + 1);
1822+
Value *FoldedToConst =
1823+
getFPClassConstant(VTy, DemandedMask & Known.KnownFPClasses);
1824+
return FoldedToConst == V ? nullptr : FoldedToConst;
1825+
}
1826+
1827+
if (!I->hasOneUse())
1828+
return nullptr;
1829+
1830+
// TODO: Should account for nofpclass/FastMathFlags on current instruction
1831+
switch (I->getOpcode()) {
1832+
case Instruction::FNeg: {
1833+
if (SimplifyDemandedFPClass(I, 0, llvm::fneg(DemandedMask), Known,
1834+
Depth + 1))
1835+
return I;
1836+
Known.fneg();
1837+
break;
1838+
}
1839+
case Instruction::Call: {
1840+
CallInst *CI = cast<CallInst>(I);
1841+
switch (CI->getIntrinsicID()) {
1842+
case Intrinsic::fabs:
1843+
if (SimplifyDemandedFPClass(I, 0, llvm::inverse_fabs(DemandedMask), Known,
1844+
Depth + 1))
1845+
return I;
1846+
Known.fabs();
1847+
break;
1848+
case Intrinsic::arithmetic_fence:
1849+
if (SimplifyDemandedFPClass(I, 0, DemandedMask, Known, Depth + 1))
1850+
return I;
1851+
break;
1852+
default:
1853+
Known = computeKnownFPClass(I, ~DemandedMask, CxtI, Depth + 1);
1854+
break;
1855+
}
1856+
1857+
break;
1858+
}
1859+
case Instruction::Select: {
1860+
KnownFPClass KnownLHS, KnownRHS;
1861+
if (SimplifyDemandedFPClass(I, 2, DemandedMask, KnownRHS, Depth + 1) ||
1862+
SimplifyDemandedFPClass(I, 1, DemandedMask, KnownLHS, Depth + 1))
1863+
return I;
1864+
1865+
if (KnownLHS.isKnownNever(DemandedMask))
1866+
return I->getOperand(2);
1867+
if (KnownRHS.isKnownNever(DemandedMask))
1868+
return I->getOperand(1);
1869+
1870+
// TODO: Recognize clamping patterns
1871+
Known = KnownLHS | KnownRHS;
1872+
break;
1873+
}
1874+
default:
1875+
Known = computeKnownFPClass(I, ~DemandedMask, CxtI, Depth + 1);
1876+
break;
1877+
}
1878+
1879+
return getFPClassConstant(VTy, DemandedMask & Known.KnownFPClasses);
1880+
}
1881+
1882+
bool InstCombinerImpl::SimplifyDemandedFPClass(Instruction *I, unsigned OpNo,
1883+
FPClassTest DemandedMask,
1884+
KnownFPClass &Known,
1885+
unsigned Depth) {
1886+
Use &U = I->getOperandUse(OpNo);
1887+
Value *NewVal =
1888+
SimplifyDemandedUseFPClass(U.get(), DemandedMask, Known, Depth, I);
1889+
if (!NewVal)
1890+
return false;
1891+
if (Instruction *OpInst = dyn_cast<Instruction>(U))
1892+
salvageDebugInfo(*OpInst);
1893+
1894+
replaceUse(U, NewVal);
1895+
return true;
1896+
}

llvm/lib/Transforms/InstCombine/InstructionCombining.cpp

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2732,8 +2732,22 @@ Instruction *InstCombinerImpl::visitFree(CallInst &FI, Value *Op) {
27322732
}
27332733

27342734
Instruction *InstCombinerImpl::visitReturnInst(ReturnInst &RI) {
2735-
// Nothing for now.
2736-
return nullptr;
2735+
Value *RetVal = RI.getReturnValue();
2736+
if (!RetVal || !AttributeFuncs::isNoFPClassCompatibleType(RetVal->getType()))
2737+
return nullptr;
2738+
2739+
Function *F = RI.getFunction();
2740+
FPClassTest ReturnClass = F->getAttributes().getRetNoFPClass();
2741+
if (ReturnClass == fcNone)
2742+
return nullptr;
2743+
2744+
KnownFPClass KnownClass;
2745+
Value *Simplified =
2746+
SimplifyDemandedUseFPClass(RetVal, ~ReturnClass, KnownClass, 0, &RI);
2747+
if (!Simplified)
2748+
return nullptr;
2749+
2750+
return ReturnInst::Create(RI.getContext(), Simplified);
27372751
}
27382752

27392753
// WARNING: keep in sync with SimplifyCFGOpt::simplifyUnreachable()!

0 commit comments

Comments
 (0)