Skip to content

Commit ad479dd

Browse files
committed
Revert "[SeparateConstOffsetFromGEP] Decompose constant xor operand if possible (#135788)"
This reverts commit 13ccce2. The tests are on non-canonical IR, and adds an extra unrelated pre-processing step to the pass. I'm assuming this is a workaround for the known-bits recursion depth limit in instcombine.
1 parent 1bf4702 commit ad479dd

File tree

2 files changed

+0
-397
lines changed

2 files changed

+0
-397
lines changed

llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp

Lines changed: 0 additions & 193 deletions
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,6 @@
174174
#include "llvm/IR/Function.h"
175175
#include "llvm/IR/GetElementPtrTypeIterator.h"
176176
#include "llvm/IR/IRBuilder.h"
177-
#include "llvm/IR/InstIterator.h"
178177
#include "llvm/IR/InstrTypes.h"
179178
#include "llvm/IR/Instruction.h"
180179
#include "llvm/IR/Instructions.h"
@@ -191,7 +190,6 @@
191190
#include "llvm/Support/ErrorHandling.h"
192191
#include "llvm/Support/raw_ostream.h"
193192
#include "llvm/Transforms/Scalar.h"
194-
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
195193
#include "llvm/Transforms/Utils/Local.h"
196194
#include <cassert>
197195
#include <cstdint>
@@ -200,8 +198,6 @@
200198
using namespace llvm;
201199
using namespace llvm::PatternMatch;
202200

203-
#define DEBUG_TYPE "separate-offset-gep"
204-
205201
static cl::opt<bool> DisableSeparateConstOffsetFromGEP(
206202
"disable-separate-const-offset-from-gep", cl::init(false),
207203
cl::desc("Do not separate the constant offset from a GEP instruction"),
@@ -492,42 +488,6 @@ class SeparateConstOffsetFromGEP {
492488
DenseMap<ExprKey, SmallVector<Instruction *, 2>> DominatingSubs;
493489
};
494490

495-
/// A helper class that aims to convert xor operations into or operations when
496-
/// their operands are disjoint and the result is used in a GEP's index. This
497-
/// can then enable further GEP optimizations by effectively turning BaseVal |
498-
/// Const into BaseVal + Const when they are disjoint, which
499-
/// SeparateConstOffsetFromGEP can then process. This is a common pattern that
500-
/// sets up a grid of memory accesses across a wave where each thread acesses
501-
/// data at various offsets.
502-
class XorToOrDisjointTransformer {
503-
public:
504-
XorToOrDisjointTransformer(Function &F, DominatorTree &DT,
505-
const DataLayout &DL)
506-
: F(F), DT(DT), DL(DL) {}
507-
508-
bool run();
509-
510-
private:
511-
Function &F;
512-
DominatorTree &DT;
513-
const DataLayout &DL;
514-
/// Maps a common operand to all Xor instructions
515-
using XorOpList = SmallVector<std::pair<BinaryOperator *, APInt>, 8>;
516-
using XorBaseValInst = DenseMap<Instruction *, XorOpList>;
517-
XorBaseValInst XorGroups;
518-
519-
/// Checks if the given value has at least one GetElementPtr user
520-
static bool hasGEPUser(const Value *V);
521-
522-
/// Helper function to check if BaseXor dominates all XORs in the group
523-
bool dominatesAllXors(BinaryOperator *BaseXor, const XorOpList &XorsInGroup);
524-
525-
/// Processes a group of XOR instructions that share the same non-constant
526-
/// base operand. Returns true if this group's processing modified the
527-
/// function.
528-
bool processXorGroup(Instruction *OriginalBaseInst, XorOpList &XorsInGroup);
529-
};
530-
531491
} // end anonymous namespace
532492

533493
char SeparateConstOffsetFromGEPLegacyPass::ID = 0;
@@ -1263,154 +1223,6 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
12631223
return true;
12641224
}
12651225

1266-
// Helper function to check if an instruction has at least one GEP user
1267-
bool XorToOrDisjointTransformer::hasGEPUser(const Value *V) {
1268-
return llvm::any_of(V->users(), [](const User *U) {
1269-
return isa<llvm::GetElementPtrInst>(U);
1270-
});
1271-
}
1272-
1273-
bool XorToOrDisjointTransformer::dominatesAllXors(
1274-
BinaryOperator *BaseXor, const XorOpList &XorsInGroup) {
1275-
return llvm::all_of(XorsInGroup, [&](const auto &XorEntry) {
1276-
BinaryOperator *XorInst = XorEntry.first;
1277-
// Do not evaluate the BaseXor, otherwise we end up cloning it.
1278-
return XorInst == BaseXor || DT.dominates(BaseXor, XorInst);
1279-
});
1280-
}
1281-
1282-
bool XorToOrDisjointTransformer::processXorGroup(Instruction *OriginalBaseInst,
1283-
XorOpList &XorsInGroup) {
1284-
bool Changed = false;
1285-
if (XorsInGroup.size() <= 1)
1286-
return false;
1287-
1288-
// Sort XorsInGroup by the constant offset value in increasing order.
1289-
llvm::sort(XorsInGroup, [](const auto &A, const auto &B) {
1290-
return A.second.slt(B.second);
1291-
});
1292-
1293-
// Dominance check
1294-
// The "base" XOR for dominance purposes is the one with the smallest
1295-
// constant.
1296-
BinaryOperator *XorWithSmallConst = XorsInGroup[0].first;
1297-
1298-
if (!dominatesAllXors(XorWithSmallConst, XorsInGroup)) {
1299-
LLVM_DEBUG(dbgs() << DEBUG_TYPE
1300-
<< ": Cloning and inserting XOR with smallest constant ("
1301-
<< *XorWithSmallConst
1302-
<< ") as it does not dominate all other XORs"
1303-
<< " in function " << F.getName() << "\n");
1304-
1305-
BinaryOperator *ClonedXor =
1306-
cast<BinaryOperator>(XorWithSmallConst->clone());
1307-
ClonedXor->setName(XorWithSmallConst->getName() + ".dom_clone");
1308-
ClonedXor->insertAfter(OriginalBaseInst);
1309-
LLVM_DEBUG(dbgs() << " Cloned Inst: " << *ClonedXor << "\n");
1310-
Changed = true;
1311-
XorWithSmallConst = ClonedXor;
1312-
}
1313-
1314-
SmallVector<Instruction *, 8> InstructionsToErase;
1315-
const APInt SmallestConst =
1316-
cast<ConstantInt>(XorWithSmallConst->getOperand(1))->getValue();
1317-
1318-
// Main transformation loop: Iterate over the original XORs in the sorted
1319-
// group.
1320-
for (const auto &XorEntry : XorsInGroup) {
1321-
BinaryOperator *XorInst = XorEntry.first; // Original XOR instruction
1322-
const APInt ConstOffsetVal = XorEntry.second;
1323-
1324-
// Do not process the one with smallest constant as it is the base.
1325-
if (XorInst == XorWithSmallConst)
1326-
continue;
1327-
1328-
// Disjointness Check 1
1329-
APInt NewConstVal = ConstOffsetVal - SmallestConst;
1330-
if ((NewConstVal & SmallestConst) != 0) {
1331-
LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Cannot transform XOR in function "
1332-
<< F.getName() << ":\n"
1333-
<< " New Const: " << NewConstVal
1334-
<< " Smallest Const: " << SmallestConst
1335-
<< " are not disjoint \n");
1336-
continue;
1337-
}
1338-
1339-
// Disjointness Check 2
1340-
if (MaskedValueIsZero(XorWithSmallConst, NewConstVal, SimplifyQuery(DL),
1341-
0)) {
1342-
LLVM_DEBUG(dbgs() << DEBUG_TYPE
1343-
<< ": Transforming XOR to OR (disjoint) in function "
1344-
<< F.getName() << ":\n"
1345-
<< " Xor: " << *XorInst << "\n"
1346-
<< " Base Val: " << *XorWithSmallConst << "\n"
1347-
<< " New Const: " << NewConstVal << "\n");
1348-
1349-
auto *NewOrInst = BinaryOperator::CreateDisjointOr(
1350-
XorWithSmallConst,
1351-
ConstantInt::get(OriginalBaseInst->getType(), NewConstVal),
1352-
XorInst->getName() + ".or_disjoint", XorInst->getIterator());
1353-
1354-
NewOrInst->copyMetadata(*XorInst);
1355-
XorInst->replaceAllUsesWith(NewOrInst);
1356-
LLVM_DEBUG(dbgs() << " New Inst: " << *NewOrInst << "\n");
1357-
InstructionsToErase.push_back(XorInst); // Mark original XOR for deletion
1358-
1359-
Changed = true;
1360-
} else {
1361-
LLVM_DEBUG(
1362-
dbgs() << DEBUG_TYPE
1363-
<< ": Cannot transform XOR (not proven disjoint) in function "
1364-
<< F.getName() << ":\n"
1365-
<< " Xor: " << *XorInst << "\n"
1366-
<< " Base Val: " << *XorWithSmallConst << "\n"
1367-
<< " New Const: " << NewConstVal << "\n");
1368-
}
1369-
}
1370-
1371-
for (Instruction *I : InstructionsToErase)
1372-
I->eraseFromParent();
1373-
1374-
return Changed;
1375-
}
1376-
1377-
// Try to transform XOR(A, B+C) in to XOR(A,C) + B where XOR(A,C) becomes
1378-
// the base for memory operations. This transformation is true under the
1379-
// following conditions
1380-
// Check 1 - B and C are disjoint.
1381-
// Check 2 - XOR(A,C) and B are disjoint.
1382-
//
1383-
// This transformation is beneficial particularly for GEPs because:
1384-
// 1. OR operations often map better to addressing modes than XOR
1385-
// 2. Disjoint OR operations preserve the semantics of the original XOR
1386-
// 3. This can enable further optimizations in the GEP offset folding pipeline
1387-
bool XorToOrDisjointTransformer::run() {
1388-
bool Changed = false;
1389-
1390-
// Collect all candidate XORs
1391-
for (Instruction &I : instructions(F)) {
1392-
Instruction *Op0 = nullptr;
1393-
ConstantInt *C1 = nullptr;
1394-
BinaryOperator *MatchedXorOp = nullptr;
1395-
1396-
// Attempt to match the instruction 'I' as XOR operation.
1397-
if (match(&I, m_CombineAnd(m_Xor(m_Instruction(Op0), m_ConstantInt(C1)),
1398-
m_BinOp(MatchedXorOp))) &&
1399-
hasGEPUser(MatchedXorOp))
1400-
XorGroups[Op0].emplace_back(MatchedXorOp, C1->getValue());
1401-
}
1402-
1403-
if (XorGroups.empty())
1404-
return false;
1405-
1406-
// Process each group of XORs
1407-
for (auto &[OriginalBaseInst, XorsInGroup] : XorGroups)
1408-
if (processXorGroup(OriginalBaseInst, XorsInGroup))
1409-
Changed = true;
1410-
1411-
return Changed;
1412-
}
1413-
14141226
bool SeparateConstOffsetFromGEPLegacyPass::runOnFunction(Function &F) {
14151227
if (skipFunction(F))
14161228
return false;
@@ -1430,11 +1242,6 @@ bool SeparateConstOffsetFromGEP::run(Function &F) {
14301242

14311243
DL = &F.getDataLayout();
14321244
bool Changed = false;
1433-
1434-
// Decompose xor in to "or disjoint" if possible.
1435-
XorToOrDisjointTransformer XorTransformer(F, *DT, *DL);
1436-
Changed |= XorTransformer.run();
1437-
14381245
for (BasicBlock &B : F) {
14391246
if (!DT->isReachableFromEntry(&B))
14401247
continue;

0 commit comments

Comments
 (0)