@@ -324,6 +324,10 @@ class Vectorizer {
324
324
Instruction *ChainElem, Instruction *ChainBegin,
325
325
const DenseMap<Instruction *, APInt /* OffsetFromLeader*/ > &ChainOffsets);
326
326
327
+ // / Merge the equivalence classes if casts could be inserted in one to match
328
+ // / the scalar bitwidth of the instructions in the other class.
329
+ void insertCastsToMergeClasses (EquivalenceClassMap &EQClasses);
330
+
327
331
// / Merges the equivalence classes if they have underlying objects that differ
328
332
// / by one level of indirection (i.e., one is a getelementptr and the other is
329
333
// / the base pointer in that getelementptr).
@@ -1310,6 +1314,82 @@ std::optional<APInt> Vectorizer::getConstantOffsetSelects(
1310
1314
return std::nullopt;
1311
1315
}
1312
1316
1317
+ void Vectorizer::insertCastsToMergeClasses (EquivalenceClassMap &EQClasses) {
1318
+ if (EQClasses.size () < 2 )
1319
+ return ;
1320
+
1321
+ // Loop over all equivalence classes and try to merge them. Keep track of
1322
+ // classes that are merged into others.
1323
+ DenseSet<EqClassKey> ClassesToErase;
1324
+ for (auto EC1 : EQClasses) {
1325
+ for (auto EC2 : EQClasses) {
1326
+ if (ClassesToErase.contains (EC2.first ) || EC1 <= EC2)
1327
+ continue ;
1328
+
1329
+ auto [Ptr1, AS1, TySize1, IsLoad1] = EC1.first ;
1330
+ auto [Ptr2, AS2, TySize2, IsLoad2] = EC2.first ;
1331
+
1332
+ // Attempt to merge EC2 into EC1. Skip if the pointers, address spaces or
1333
+ // whether the leader instruction is a load/store are different. Also skip
1334
+ // if the scalar bitwidth of the first equivalence class is smaller than
1335
+ // the second one to avoid reconsidering the same equivalence class pair.
1336
+ if (Ptr1 != Ptr2 || AS1 != AS2 || IsLoad1 != IsLoad2 || TySize1 < TySize2)
1337
+ continue ;
1338
+
1339
+ // Ensure all instructions in EC2 can be bitcasted into NewTy.
1340
+ // / TODO: NewTyBits is needed as stuctured binded variables cannot be
1341
+ // / captured by a lambda until C++20.
1342
+ auto NewTyBits = std::get<2 >(EC1.first );
1343
+ if (any_of (EC2.second , [&](Instruction *I) {
1344
+ return DL.getTypeSizeInBits (getLoadStoreType (I)) != NewTyBits;
1345
+ }))
1346
+ continue ;
1347
+
1348
+ // Create a new type for the equivalence class.
1349
+ // / TODO: NewTy should be an FP type for an all-FP equivalence class.
1350
+ auto *NewTy = Type::getIntNTy (EC2.second [0 ]->getContext (), NewTyBits);
1351
+ for (auto *Inst : EC2.second ) {
1352
+ auto *Ptr = getLoadStorePointerOperand (Inst);
1353
+ auto *OrigTy = Inst->getType ();
1354
+ if (OrigTy == NewTy)
1355
+ continue ;
1356
+ if (auto *LI = dyn_cast<LoadInst>(Inst)) {
1357
+ Builder.SetInsertPoint (LI->getIterator ());
1358
+ auto *NewLoad = Builder.CreateLoad (NewTy, Ptr);
1359
+ auto *Cast = Builder.CreateBitOrPointerCast (
1360
+ NewLoad, OrigTy, NewLoad->getName () + " .cast" );
1361
+ LI->replaceAllUsesWith (Cast);
1362
+ LI->eraseFromParent ();
1363
+ EQClasses[EC1.first ].emplace_back (NewLoad);
1364
+ } else {
1365
+ auto *SI = cast<StoreInst>(Inst);
1366
+ Builder.SetInsertPoint (SI->getIterator ());
1367
+ auto *Cast = Builder.CreateBitOrPointerCast (
1368
+ SI->getValueOperand (), NewTy,
1369
+ SI->getValueOperand ()->getName () + " .cast" );
1370
+ auto *NewStore = Builder.CreateStore (
1371
+ Cast, getLoadStorePointerOperand (SI), SI->isVolatile ());
1372
+ SI->eraseFromParent ();
1373
+ EQClasses[EC1.first ].emplace_back (NewStore);
1374
+ }
1375
+ }
1376
+
1377
+ // Sort the instructions in the equivalence class by their order in the
1378
+ // basic block. This is important to ensure that the instructions are
1379
+ // vectorized in the correct order.
1380
+ std::sort (EQClasses[EC1.first ].begin (), EQClasses[EC1.first ].end (),
1381
+ [](Instruction *A, Instruction *B) {
1382
+ return A && B && A->comesBefore (B);
1383
+ });
1384
+ ClassesToErase.insert (EC2.first );
1385
+ }
1386
+ }
1387
+
1388
+ // Erase the equivalence classes that were merged into others.
1389
+ for (auto Key : ClassesToErase)
1390
+ EQClasses.erase (Key);
1391
+ }
1392
+
1313
1393
void Vectorizer::mergeEquivalenceClasses (EquivalenceClassMap &EQClasses) const {
1314
1394
if (EQClasses.size () < 2 ) // There is nothing to merge.
1315
1395
return ;
@@ -1495,7 +1575,7 @@ Vectorizer::collectEquivalenceClasses(BasicBlock::iterator Begin,
1495
1575
/* IsLoad=*/ LI != nullptr }]
1496
1576
.emplace_back (&I);
1497
1577
}
1498
-
1578
+ insertCastsToMergeClasses (Ret);
1499
1579
mergeEquivalenceClasses (Ret);
1500
1580
return Ret;
1501
1581
}
0 commit comments