@@ -324,6 +324,11 @@ class Vectorizer {
324
324
Instruction *ChainElem, Instruction *ChainBegin,
325
325
const DenseMap<Instruction *, APInt /* OffsetFromLeader*/ > &ChainOffsets);
326
326
327
+ // / Merges the equivalence classes if they have uderlying objects that differ
328
+ // / by one level of indirection (i.e., one is a getelementptr and the other is
329
+ // / the base pointer in that getelementptr).
330
+ void mergeEquivalenceClasses (EquivalenceClassMap &EQClasses) const ;
331
+
327
332
// / Collects loads and stores grouped by "equivalence class", where:
328
333
// / - all elements in an eq class are a load or all are a store,
329
334
// / - they all load/store the same element size (it's OK to have e.g. i8 and
@@ -1305,6 +1310,128 @@ std::optional<APInt> Vectorizer::getConstantOffsetSelects(
1305
1310
return std::nullopt;
1306
1311
}
1307
1312
1313
+ void Vectorizer::mergeEquivalenceClasses (EquivalenceClassMap &EQClasses) const {
1314
+ if (EQClasses.size () < 2 ) // There is nothing to merge.
1315
+ return ;
1316
+
1317
+ // The reduced key has all elements of the ECClassKey except the underlying
1318
+ // object. Check that EqClassKey has 4 elements and define the reduced key.
1319
+ static_assert (std::tuple_size_v<EqClassKey> == 4 ,
1320
+ " EqClassKey has changed - EqClassReducedKey needs changes too" );
1321
+ using EqClassReducedKey =
1322
+ std::tuple<std::tuple_element_t <1 , EqClassKey> /* AddrSpace */ ,
1323
+ std::tuple_element_t <2 , EqClassKey> /* Element size */ ,
1324
+ std::tuple_element_t <3 , EqClassKey> /* IsLoad; */ >;
1325
+ using ECReducedKeyToUnderlyingObjectMap =
1326
+ MapVector<EqClassReducedKey,
1327
+ SmallPtrSet<std::tuple_element_t <0 , EqClassKey>, 4 >>;
1328
+
1329
+ // Form a map from the reduced key (without the underlying object) to the
1330
+ // underlying objects: 1 reduced key to many underlying objects, to form
1331
+ // groups of potentially merge-able equivalence classes.
1332
+ ECReducedKeyToUnderlyingObjectMap RedKeyToUOMap;
1333
+ bool FoundPotentiallyOptimizableEC = false ;
1334
+ for (const auto &EC : EQClasses) {
1335
+ const auto &Key = EC.first ;
1336
+ EqClassReducedKey RedKey{std::get<1 >(Key), std::get<2 >(Key),
1337
+ std::get<3 >(Key)};
1338
+ RedKeyToUOMap[RedKey].insert (std::get<0 >(Key));
1339
+ if (RedKeyToUOMap[RedKey].size () > 1 )
1340
+ FoundPotentiallyOptimizableEC = true ;
1341
+ }
1342
+ if (!FoundPotentiallyOptimizableEC)
1343
+ return ;
1344
+
1345
+ LLVM_DEBUG ({
1346
+ dbgs () << " LSV: mergeEquivalenceClasses: before merging:\n " ;
1347
+ for (const auto &EC : EQClasses) {
1348
+ dbgs () << " Key: ([" << std::get<0 >(EC.first )
1349
+ << " ]: " << *std::get<0 >(EC.first ) << " , " << std::get<1 >(EC.first )
1350
+ << " , " << std::get<2 >(EC.first ) << " , "
1351
+ << static_cast <int >(std::get<3 >(EC.first )) << " )\n " ;
1352
+ for (const auto &Inst : EC.second )
1353
+ dbgs () << " \t Inst:\t " << *Inst << " \n " ;
1354
+ }
1355
+ });
1356
+ LLVM_DEBUG ({
1357
+ dbgs () << " LSV: mergeEquivalenceClasses: RedKeyToUOMap:\n " ;
1358
+ for (const auto &RedKeyToUO : RedKeyToUOMap) {
1359
+ dbgs () << " Reduced key: (" << std::get<0 >(RedKeyToUO.first ) << " , "
1360
+ << std::get<1 >(RedKeyToUO.first ) << " , "
1361
+ << static_cast <int >(std::get<2 >(RedKeyToUO.first )) << " ) --> "
1362
+ << RedKeyToUO.second .size () << " underlying objects:\n " ;
1363
+ for (auto UObject : RedKeyToUO.second )
1364
+ dbgs () << " [" << UObject << " ]: " << *UObject << " \n " ;
1365
+ }
1366
+ });
1367
+
1368
+ using UObjectToUObjectMap = DenseMap<const Value *, const Value *>;
1369
+
1370
+ // Compute the ultimate targets for a set of underlying objects.
1371
+ auto GetUltimateTargets =
1372
+ [](SmallPtrSetImpl<const Value *> &UObjects) -> UObjectToUObjectMap {
1373
+ UObjectToUObjectMap IndirectionMap;
1374
+ for (const auto *UObject : UObjects) {
1375
+ const unsigned MaxLookupDepth = 1 ; // look for 1-level indirections only
1376
+ const auto *UltimateTarget =
1377
+ llvm::getUnderlyingObject (UObject, MaxLookupDepth);
1378
+ if (UltimateTarget != UObject)
1379
+ IndirectionMap[UObject] = UltimateTarget;
1380
+ }
1381
+ UObjectToUObjectMap UltimateTargetsMap;
1382
+ for (const auto *UObject : UObjects) {
1383
+ auto Target = UObject;
1384
+ auto It = IndirectionMap.find (Target);
1385
+ for (; It != IndirectionMap.end (); It = IndirectionMap.find (Target))
1386
+ Target = It->second ;
1387
+ UltimateTargetsMap[UObject] = Target;
1388
+ }
1389
+ return UltimateTargetsMap;
1390
+ };
1391
+
1392
+ // For each item in RedKeyToUOMap, if it has more than one underlying object,
1393
+ // try to merge the equivalence classes.
1394
+ for (auto &RedKeyToUO : RedKeyToUOMap) {
1395
+ auto UObjects = RedKeyToUO.second ;
1396
+ if (UObjects.size () < 2 )
1397
+ continue ;
1398
+ const auto RedKey = RedKeyToUO.first ;
1399
+ auto UTMap = GetUltimateTargets (UObjects);
1400
+ for (const auto &UT : UTMap) {
1401
+ const Value *UObject = UT.first ;
1402
+ const Value *UltimateTarget = UT.second ;
1403
+ if (UObject == UltimateTarget)
1404
+ continue ;
1405
+
1406
+ EqClassKey KeyFrom{UObject, std::get<0 >(RedKey), std::get<1 >(RedKey),
1407
+ std::get<2 >(RedKey)};
1408
+ EqClassKey KeyTo{UltimateTarget, std::get<0 >(RedKey), std::get<1 >(RedKey),
1409
+ std::get<2 >(RedKey)};
1410
+ auto VecFrom = EQClasses[KeyFrom];
1411
+ auto VecTo = EQClasses[KeyTo];
1412
+ SmallVector<Instruction *, 8 > MergedVec;
1413
+ std::merge (VecFrom.begin (), VecFrom.end (), VecTo.begin (), VecTo.end (),
1414
+ std::back_inserter (MergedVec),
1415
+ [](Instruction *A, Instruction *B) {
1416
+ return A && B && A->comesBefore (B);
1417
+ });
1418
+ EQClasses[KeyTo] = std::move (MergedVec);
1419
+ EQClasses.erase (KeyFrom);
1420
+ }
1421
+ }
1422
+ LLVM_DEBUG ({
1423
+ dbgs () << " LSV: mergeEquivalenceClasses: after merging:\n " ;
1424
+ for (const auto &EC : EQClasses) {
1425
+ dbgs () << " Key: ([" << std::get<0 >(EC.first )
1426
+ << " ]: " << *std::get<0 >(EC.first ) << " , " << std::get<1 >(EC.first )
1427
+ << " , " << std::get<2 >(EC.first ) << " , "
1428
+ << static_cast <int >(std::get<3 >(EC.first )) << " )\n " ;
1429
+ for (const auto &Inst : EC.second )
1430
+ dbgs () << " \t Inst:\t " << *Inst << " \n " ;
1431
+ }
1432
+ });
1433
+ }
1434
+
1308
1435
EquivalenceClassMap
1309
1436
Vectorizer::collectEquivalenceClasses (BasicBlock::iterator Begin,
1310
1437
BasicBlock::iterator End) {
@@ -1377,6 +1504,7 @@ Vectorizer::collectEquivalenceClasses(BasicBlock::iterator Begin,
1377
1504
.emplace_back (&I);
1378
1505
}
1379
1506
1507
+ mergeEquivalenceClasses (Ret);
1380
1508
return Ret;
1381
1509
}
1382
1510
0 commit comments