Skip to content

Commit 457eb1a

Browse files
author
git apple-llvm automerger
committed
Merge commit 'b080d0bae8dd' from llvm.org/main into experimental/cas/main
2 parents 0b8384f + b080d0b commit 457eb1a

19 files changed

+131
-1085
lines changed

llvm/include/llvm/Transforms/IPO/Attributor.h

Lines changed: 42 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -209,51 +209,9 @@ Optional<Value *>
209209
combineOptionalValuesInAAValueLatice(const Optional<Value *> &A,
210210
const Optional<Value *> &B, Type *Ty);
211211

212-
/// Helper to represent an access offset and size, with logic to deal with
213-
/// uncertainty and check for overlapping accesses.
214-
struct OffsetAndSize : public std::pair<int64_t, int64_t> {
215-
using BaseTy = std::pair<int64_t, int64_t>;
216-
OffsetAndSize(int64_t Offset, int64_t Size) : BaseTy(Offset, Size) {}
217-
OffsetAndSize(const BaseTy &P) : BaseTy(P) {}
218-
int64_t getOffset() const { return first; }
219-
int64_t getSize() const { return second; }
220-
static OffsetAndSize getUnknown() { return OffsetAndSize(Unknown, Unknown); }
221-
222-
/// Return true if offset or size are unknown.
223-
bool offsetOrSizeAreUnknown() const {
224-
return getOffset() == OffsetAndSize::Unknown ||
225-
getSize() == OffsetAndSize::Unknown;
226-
}
227-
228-
/// Return true if offset and size are unknown, thus this is the default
229-
/// unknown object.
230-
bool offsetAndSizeAreUnknown() const {
231-
return getOffset() == OffsetAndSize::Unknown &&
232-
getSize() == OffsetAndSize::Unknown;
233-
}
234-
235-
/// Return true if this offset and size pair might describe an address that
236-
/// overlaps with \p OAS.
237-
bool mayOverlap(const OffsetAndSize &OAS) const {
238-
// Any unknown value and we are giving up -> overlap.
239-
if (offsetOrSizeAreUnknown() || OAS.offsetOrSizeAreUnknown())
240-
return true;
241-
242-
// Check if one offset point is in the other interval [offset,
243-
// offset+size].
244-
return OAS.getOffset() + OAS.getSize() > getOffset() &&
245-
OAS.getOffset() < getOffset() + getSize();
246-
}
247-
248-
/// Constant used to represent unknown offset or sizes.
249-
static constexpr int64_t Unknown = 1 << 31;
250-
};
251-
252212
/// Return the initial value of \p Obj with type \p Ty if that is a constant.
253213
Constant *getInitialValueForObj(Value &Obj, Type &Ty,
254-
const TargetLibraryInfo *TLI,
255-
const DataLayout &DL,
256-
OffsetAndSize *OASPtr = nullptr);
214+
const TargetLibraryInfo *TLI);
257215

258216
/// Collect all potential underlying objects of \p Ptr at position \p CtxI in
259217
/// \p Objects. Assumed information is used and dependences onto \p QueryingAA
@@ -5078,13 +5036,47 @@ struct AAPointerInfo : public AbstractAttribute {
50785036
/// See AbstractAttribute::getIdAddr()
50795037
const char *getIdAddr() const override { return &ID; }
50805038

5039+
/// Helper to represent an access offset and size, with logic to deal with
5040+
/// uncertainty and check for overlapping accesses.
5041+
struct OffsetAndSize : public std::pair<int64_t, int64_t> {
5042+
using BaseTy = std::pair<int64_t, int64_t>;
5043+
OffsetAndSize(int64_t Offset, int64_t Size) : BaseTy(Offset, Size) {}
5044+
OffsetAndSize(const BaseTy &P) : BaseTy(P) {}
5045+
int64_t getOffset() const { return first; }
5046+
int64_t getSize() const { return second; }
5047+
static OffsetAndSize getUnknown() {
5048+
return OffsetAndSize(Unknown, Unknown);
5049+
}
5050+
5051+
/// Return true if offset or size are unknown.
5052+
bool offsetOrSizeAreUnknown() const {
5053+
return getOffset() == OffsetAndSize::Unknown ||
5054+
getSize() == OffsetAndSize::Unknown;
5055+
}
5056+
5057+
/// Return true if this offset and size pair might describe an address that
5058+
/// overlaps with \p OAS.
5059+
bool mayOverlap(const OffsetAndSize &OAS) const {
5060+
// Any unknown value and we are giving up -> overlap.
5061+
if (offsetOrSizeAreUnknown() || OAS.offsetOrSizeAreUnknown())
5062+
return true;
5063+
5064+
// Check if one offset point is in the other interval [offset,
5065+
// offset+size].
5066+
return OAS.getOffset() + OAS.getSize() > getOffset() &&
5067+
OAS.getOffset() < getOffset() + getSize();
5068+
}
5069+
5070+
/// Constant used to represent unknown offset or sizes.
5071+
static constexpr int64_t Unknown = 1 << 31;
5072+
};
5073+
50815074
/// Call \p CB on all accesses that might interfere with \p OAS and return
50825075
/// true if all such accesses were known and the callback returned true for
50835076
/// all of them, false otherwise. An access interferes with an offset-size
50845077
/// pair if it might read or write that memory region.
50855078
virtual bool forallInterferingAccesses(
5086-
AA::OffsetAndSize OAS,
5087-
function_ref<bool(const Access &, bool)> CB) const = 0;
5079+
OffsetAndSize OAS, function_ref<bool(const Access &, bool)> CB) const = 0;
50885080

50895081
/// Call \p CB on all accesses that might interfere with \p I and
50905082
/// return true if all such accesses were known and the callback returned true
@@ -5093,10 +5085,11 @@ struct AAPointerInfo : public AbstractAttribute {
50935085
/// affect the load even if they on the surface look as if they would. The
50945086
/// flag \p HasBeenWrittenTo will be set to true if we know that \p I does not
50955087
/// read the intial value of the underlying memory.
5096-
virtual bool forallInterferingAccesses(
5097-
Attributor &A, const AbstractAttribute &QueryingAA, Instruction &I,
5098-
function_ref<bool(const Access &, bool)> CB, bool &HasBeenWrittenTo,
5099-
AA::OffsetAndSize *OASPtr = nullptr) const = 0;
5088+
virtual bool
5089+
forallInterferingAccesses(Attributor &A, const AbstractAttribute &QueryingAA,
5090+
Instruction &I,
5091+
function_ref<bool(const Access &, bool)> CB,
5092+
bool &HasBeenWrittenTo) const = 0;
51005093

51015094
/// This function should return true if the type of the \p AA is AAPointerInfo
51025095
static bool classof(const AbstractAttribute *AA) {

llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -547,32 +547,32 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
547547

548548
bool funcRetrievesMultigridSyncArg(Attributor &A) {
549549
auto Pos = llvm::AMDGPU::getMultigridSyncArgImplicitArgPosition();
550-
AA::OffsetAndSize OAS(Pos, 8);
550+
AAPointerInfo::OffsetAndSize OAS(Pos, 8);
551551
return funcRetrievesImplicitKernelArg(A, OAS);
552552
}
553553

554554
bool funcRetrievesHostcallPtr(Attributor &A) {
555555
auto Pos = llvm::AMDGPU::getHostcallImplicitArgPosition();
556-
AA::OffsetAndSize OAS(Pos, 8);
556+
AAPointerInfo::OffsetAndSize OAS(Pos, 8);
557557
return funcRetrievesImplicitKernelArg(A, OAS);
558558
}
559559

560560
bool funcRetrievesHeapPtr(Attributor &A) {
561561
if (AMDGPU::getAmdhsaCodeObjectVersion() != 5)
562562
return false;
563-
AA::OffsetAndSize OAS(AMDGPU::ImplicitArg::HEAP_PTR_OFFSET, 8);
563+
AAPointerInfo::OffsetAndSize OAS(AMDGPU::ImplicitArg::HEAP_PTR_OFFSET, 8);
564564
return funcRetrievesImplicitKernelArg(A, OAS);
565565
}
566566

567567
bool funcRetrievesQueuePtr(Attributor &A) {
568568
if (AMDGPU::getAmdhsaCodeObjectVersion() != 5)
569569
return false;
570-
AA::OffsetAndSize OAS(AMDGPU::ImplicitArg::QUEUE_PTR_OFFSET, 8);
570+
AAPointerInfo::OffsetAndSize OAS(AMDGPU::ImplicitArg::QUEUE_PTR_OFFSET, 8);
571571
return funcRetrievesImplicitKernelArg(A, OAS);
572572
}
573573

574574
bool funcRetrievesImplicitKernelArg(Attributor &A,
575-
AA::OffsetAndSize OAS) {
575+
AAPointerInfo::OffsetAndSize OAS) {
576576
// Check if this is a call to the implicitarg_ptr builtin and it
577577
// is used to retrieve the hostcall pointer. The implicit arg for
578578
// hostcall is not used only if every use of the implicitarg_ptr

llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp

Lines changed: 0 additions & 166 deletions
Original file line numberDiff line numberDiff line change
@@ -473,168 +473,6 @@ foldSqrt(Instruction &I, TargetTransformInfo &TTI, TargetLibraryInfo &TLI) {
473473
return false;
474474
}
475475

476-
// Check if this array of constants represents a cttz table.
477-
// Iterate over the elements from \p Table by trying to find/match all
478-
// the numbers from 0 to \p InputBits that should represent cttz results.
479-
static bool isCTTZTable(const ConstantDataArray &Table, uint64_t Mul,
480-
uint64_t Shift, uint64_t InputBits) {
481-
unsigned Length = Table.getNumElements();
482-
if (Length < InputBits || Length > InputBits * 2)
483-
return false;
484-
485-
APInt Mask = APInt::getBitsSetFrom(InputBits, Shift);
486-
unsigned Matched = 0;
487-
488-
for (unsigned i = 0; i < Length; i++) {
489-
uint64_t Element = Table.getElementAsInteger(i);
490-
if (Element >= InputBits)
491-
continue;
492-
493-
// Check if \p Element matches a concrete answer. It could fail for some
494-
// elements that are never accessed, so we keep iterating over each element
495-
// from the table. The number of matched elements should be equal to the
496-
// number of potential right answers which is \p InputBits actually.
497-
if ((((Mul << Element) & Mask.getZExtValue()) >> Shift) == i)
498-
Matched++;
499-
}
500-
501-
return Matched == InputBits;
502-
}
503-
504-
// Try to recognize table-based ctz implementation.
505-
// E.g., an example in C (for more cases please see the llvm/tests):
506-
// int f(unsigned x) {
507-
// static const char table[32] =
508-
// {0, 1, 28, 2, 29, 14, 24, 3, 30,
509-
// 22, 20, 15, 25, 17, 4, 8, 31, 27,
510-
// 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9};
511-
// return table[((unsigned)((x & -x) * 0x077CB531U)) >> 27];
512-
// }
513-
// this can be lowered to `cttz` instruction.
514-
// There is also a special case when the element is 0.
515-
//
516-
// Here are some examples or LLVM IR for a 64-bit target:
517-
//
518-
// CASE 1:
519-
// %sub = sub i32 0, %x
520-
// %and = and i32 %sub, %x
521-
// %mul = mul i32 %and, 125613361
522-
// %shr = lshr i32 %mul, 27
523-
// %idxprom = zext i32 %shr to i64
524-
// %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @ctz1.table, i64 0,
525-
// i64 %idxprom %0 = load i8, i8* %arrayidx, align 1, !tbaa !8
526-
//
527-
// CASE 2:
528-
// %sub = sub i32 0, %x
529-
// %and = and i32 %sub, %x
530-
// %mul = mul i32 %and, 72416175
531-
// %shr = lshr i32 %mul, 26
532-
// %idxprom = zext i32 %shr to i64
533-
// %arrayidx = getelementptr inbounds [64 x i16], [64 x i16]* @ctz2.table, i64
534-
// 0, i64 %idxprom %0 = load i16, i16* %arrayidx, align 2, !tbaa !8
535-
//
536-
// CASE 3:
537-
// %sub = sub i32 0, %x
538-
// %and = and i32 %sub, %x
539-
// %mul = mul i32 %and, 81224991
540-
// %shr = lshr i32 %mul, 27
541-
// %idxprom = zext i32 %shr to i64
542-
// %arrayidx = getelementptr inbounds [32 x i32], [32 x i32]* @ctz3.table, i64
543-
// 0, i64 %idxprom %0 = load i32, i32* %arrayidx, align 4, !tbaa !8
544-
//
545-
// CASE 4:
546-
// %sub = sub i64 0, %x
547-
// %and = and i64 %sub, %x
548-
// %mul = mul i64 %and, 283881067100198605
549-
// %shr = lshr i64 %mul, 58
550-
// %arrayidx = getelementptr inbounds [64 x i8], [64 x i8]* @table, i64 0, i64
551-
// %shr %0 = load i8, i8* %arrayidx, align 1, !tbaa !8
552-
//
553-
// All this can be lowered to @llvm.cttz.i32/64 intrinsic.
554-
static bool tryToRecognizeTableBasedCttz(Instruction &I) {
555-
LoadInst *LI = dyn_cast<LoadInst>(&I);
556-
if (!LI)
557-
return false;
558-
559-
Type *AccessType = LI->getType();
560-
if (!AccessType->isIntegerTy())
561-
return false;
562-
563-
GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(LI->getPointerOperand());
564-
if (!GEP || !GEP->isInBounds() || GEP->getNumIndices() != 2)
565-
return false;
566-
567-
if (!GEP->getSourceElementType()->isArrayTy())
568-
return false;
569-
570-
uint64_t ArraySize = GEP->getSourceElementType()->getArrayNumElements();
571-
if (ArraySize != 32 && ArraySize != 64)
572-
return false;
573-
574-
GlobalVariable *GVTable = dyn_cast<GlobalVariable>(GEP->getPointerOperand());
575-
if (!GVTable || !GVTable->hasInitializer())
576-
return false;
577-
578-
ConstantDataArray *ConstData =
579-
dyn_cast<ConstantDataArray>(GVTable->getInitializer());
580-
if (!ConstData)
581-
return false;
582-
583-
if (!match(GEP->idx_begin()->get(), m_ZeroInt()))
584-
return false;
585-
586-
Value *Idx2 = std::next(GEP->idx_begin())->get();
587-
Value *X1;
588-
uint64_t MulConst, ShiftConst;
589-
// FIXME: 64-bit targets have `i64` type for the GEP index, so this match will
590-
// probably fail for other (e.g. 32-bit) targets.
591-
if (!match(Idx2, m_ZExtOrSelf(
592-
m_LShr(m_Mul(m_c_And(m_Neg(m_Value(X1)), m_Deferred(X1)),
593-
m_ConstantInt(MulConst)),
594-
m_ConstantInt(ShiftConst)))))
595-
return false;
596-
597-
unsigned InputBits = X1->getType()->getScalarSizeInBits();
598-
if (InputBits != 32 && InputBits != 64)
599-
return false;
600-
601-
// Shift should extract top 5..7 bits.
602-
if (InputBits - Log2_32(InputBits) != ShiftConst &&
603-
InputBits - Log2_32(InputBits) - 1 != ShiftConst)
604-
return false;
605-
606-
if (!isCTTZTable(*ConstData, MulConst, ShiftConst, InputBits))
607-
return false;
608-
609-
auto ZeroTableElem = ConstData->getElementAsInteger(0);
610-
bool DefinedForZero = ZeroTableElem == InputBits;
611-
612-
IRBuilder<> B(LI);
613-
ConstantInt *BoolConst = B.getInt1(!DefinedForZero);
614-
Type *XType = X1->getType();
615-
auto Cttz = B.CreateIntrinsic(Intrinsic::cttz, {XType}, {X1, BoolConst});
616-
Value *ZExtOrTrunc = nullptr;
617-
618-
if (DefinedForZero) {
619-
ZExtOrTrunc = B.CreateZExtOrTrunc(Cttz, AccessType);
620-
} else {
621-
// If the value in elem 0 isn't the same as InputBits, we still want to
622-
// produce the value from the table.
623-
auto Cmp = B.CreateICmpEQ(X1, ConstantInt::get(XType, 0));
624-
auto Select =
625-
B.CreateSelect(Cmp, ConstantInt::get(XType, ZeroTableElem), Cttz);
626-
627-
// NOTE: If the table[0] is 0, but the cttz(0) is defined by the Target
628-
// it should be handled as: `cttz(x) & (typeSize - 1)`.
629-
630-
ZExtOrTrunc = B.CreateZExtOrTrunc(Select, AccessType);
631-
}
632-
633-
LI->replaceAllUsesWith(ZExtOrTrunc);
634-
635-
return true;
636-
}
637-
638476
/// This is the entry point for folds that could be implemented in regular
639477
/// InstCombine, but they are separated because they are not expected to
640478
/// occur frequently and/or have more than a constant-length pattern match.
@@ -657,10 +495,6 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT,
657495
MadeChange |= foldGuardedFunnelShift(I, DT);
658496
MadeChange |= tryToRecognizePopCount(I);
659497
MadeChange |= tryToFPToSat(I, TTI);
660-
MadeChange |= tryToRecognizeTableBasedCttz(I);
661-
// NOTE: This function introduces erasing of the instruction `I`, so it
662-
// needs to be called at the end of this sequence, otherwise we may make
663-
// bugs.
664498
MadeChange |= foldSqrt(I, TTI, TLI);
665499
}
666500
}

0 commit comments

Comments
 (0)