@@ -2451,6 +2451,179 @@ void CustomSafeOptPass::visitExtractElementInst(ExtractElementInst& I)
2451
2451
dp4WithIdentityMatrix (I);
2452
2452
}
2453
2453
2454
+ // /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
2455
+ // This pass removes dead local memory loads and stores. If we remove all such loads and stores, we also
2456
+ // remove all local memory fences together with barriers that follow.
2457
+ //
2458
+ IGC_INITIALIZE_PASS_BEGIN (TrivialLocalMemoryOpsElimination, " TrivialLocalMemoryOpsElimination" , " TrivialLocalMemoryOpsElimination" , false , false )
2459
+ IGC_INITIALIZE_PASS_END(TrivialLocalMemoryOpsElimination, " TrivialLocalMemoryOpsElimination" , " TrivialLocalMemoryOpsElimination" , false , false )
2460
+
2461
+ char TrivialLocalMemoryOpsElimination::ID = 0;
2462
+
2463
+ TrivialLocalMemoryOpsElimination::TrivialLocalMemoryOpsElimination () : FunctionPass(ID)
2464
+ {
2465
+ initializeTrivialLocalMemoryOpsEliminationPass (*PassRegistry::getPassRegistry ());
2466
+ }
2467
+
2468
+ bool TrivialLocalMemoryOpsElimination::runOnFunction (Function& F)
2469
+ {
2470
+ bool change = false ;
2471
+
2472
+ IGCMD::MetaDataUtils* pMdUtil = getAnalysis<MetaDataUtilsWrapper>().getMetaDataUtils ();
2473
+ if (!isEntryFunc (pMdUtil, &F))
2474
+ {
2475
+ // Skip if it is non-entry function. For example, a subroutine
2476
+ // foo ( local int* p) { ...... store v, p; ......}
2477
+ // in which no localMemoptimization will be performed.
2478
+ return change;
2479
+ }
2480
+
2481
+ visit (F);
2482
+ if (!abortPass && (m_LocalLoadsToRemove.empty () ^ m_LocalStoresToRemove.empty ()))
2483
+ {
2484
+ for (StoreInst* Inst : m_LocalStoresToRemove)
2485
+ {
2486
+ Inst->eraseFromParent ();
2487
+ change = true ;
2488
+ }
2489
+
2490
+ for (LoadInst* Inst : m_LocalLoadsToRemove)
2491
+ {
2492
+ if (Inst->use_empty ())
2493
+ {
2494
+ Inst->eraseFromParent ();
2495
+ change = true ;
2496
+ }
2497
+ }
2498
+
2499
+ for (CallInst* Inst : m_LocalFencesBariersToRemove)
2500
+ {
2501
+ Inst->eraseFromParent ();
2502
+ change = true ;
2503
+ }
2504
+ }
2505
+ m_LocalStoresToRemove.clear ();
2506
+ m_LocalLoadsToRemove.clear ();
2507
+ m_LocalFencesBariersToRemove.clear ();
2508
+
2509
+ return change;
2510
+ }
2511
+
2512
+ /*
2513
+ OCL instruction barrier(CLK_LOCAL_MEM_FENCE); is translate to two instructions
2514
+ call void @llvm.genx.GenISA.memoryfence(i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true)
2515
+ call void @llvm.genx.GenISA.threadgroupbarrier()
2516
+
2517
+ if we remove call void @llvm.genx.GenISA.memoryfence(i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true)
2518
+ we must remove next instruction if it is call void @llvm.genx.GenISA.threadgroupbarrier()
2519
+ */
2520
+ void TrivialLocalMemoryOpsElimination::findNextThreadGroupBarrierInst (Instruction& I)
2521
+ {
2522
+ auto nextInst = I.getNextNonDebugInstruction ();
2523
+ if (isa<GenIntrinsicInst>(nextInst))
2524
+ {
2525
+ GenIntrinsicInst* II = cast<GenIntrinsicInst>(nextInst);
2526
+ if (II->getIntrinsicID () == GenISAIntrinsic::GenISA_threadgroupbarrier)
2527
+ {
2528
+ m_LocalFencesBariersToRemove.push_back (dyn_cast<CallInst>(nextInst));
2529
+ }
2530
+ }
2531
+ }
2532
+
2533
+ void TrivialLocalMemoryOpsElimination::visitLoadInst (LoadInst& I)
2534
+ {
2535
+ if (I.getPointerAddressSpace () == ADDRESS_SPACE_LOCAL)
2536
+ {
2537
+ m_LocalLoadsToRemove.push_back (&I);
2538
+ }
2539
+ else if (I.getPointerAddressSpace () == ADDRESS_SPACE_GENERIC)
2540
+ {
2541
+ abortPass = true ;
2542
+ }
2543
+ }
2544
+
2545
+ void TrivialLocalMemoryOpsElimination::visitStoreInst (StoreInst& I)
2546
+ {
2547
+ if (I.getPointerAddressSpace () == ADDRESS_SPACE_LOCAL)
2548
+ {
2549
+ if (auto *GV = dyn_cast<GlobalVariable>(I.getPointerOperand ()->stripPointerCasts ()))
2550
+ {
2551
+ // Device sanitizer instrumentation pass inserts a new local memory
2552
+ // variable and inserts store to the variable in a kernel. The
2553
+ // variable is loaded later in no-inline functions. For this case,
2554
+ // do not eliminate the store.
2555
+ if (GV->getName ().startswith (" __Asan" ))
2556
+ {
2557
+ return ;
2558
+ }
2559
+ }
2560
+ m_LocalStoresToRemove.push_back (&I);
2561
+ }
2562
+ else if (I.getPointerAddressSpace () == ADDRESS_SPACE_GENERIC)
2563
+ {
2564
+ abortPass = true ;
2565
+ }
2566
+ }
2567
+
2568
+ bool TrivialLocalMemoryOpsElimination::isLocalBarrier (CallInst& I)
2569
+ {
2570
+ // check arguments in call void @llvm.genx.GenISA.memoryfence(i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true) if match to
2571
+ // (i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true) it is local barrier
2572
+ std::vector<bool > argumentsOfMemoryBarrier;
2573
+
2574
+ for (auto arg = I.arg_begin (); arg != I.arg_end (); ++arg)
2575
+ {
2576
+ ConstantInt* ci = dyn_cast<ConstantInt>(arg);
2577
+ if (ci) {
2578
+ argumentsOfMemoryBarrier.push_back (ci->getValue ().getBoolValue ());
2579
+ }
2580
+ else {
2581
+ // argument is not a constant, so we can't tell.
2582
+ return false ;
2583
+ }
2584
+ }
2585
+
2586
+ return argumentsOfMemoryBarrier == m_argumentsOfLocalMemoryBarrier;
2587
+ }
2588
+
2589
+ // If any call instruction use pointer to local memory abort pass execution
2590
+ void TrivialLocalMemoryOpsElimination::anyCallInstUseLocalMemory (CallInst& I)
2591
+ {
2592
+ Function* fn = I.getCalledFunction ();
2593
+
2594
+ if (fn != NULL )
2595
+ {
2596
+ for (auto arg = fn->arg_begin (); arg != fn->arg_end (); ++arg)
2597
+ {
2598
+ if (arg->getType ()->isPointerTy ())
2599
+ {
2600
+ if (arg->getType ()->getPointerAddressSpace () == ADDRESS_SPACE_LOCAL || arg->getType ()->getPointerAddressSpace () == ADDRESS_SPACE_GENERIC) abortPass = true ;
2601
+ }
2602
+ }
2603
+ }
2604
+ }
2605
+
2606
+ void TrivialLocalMemoryOpsElimination::visitCallInst (CallInst& I)
2607
+ {
2608
+ // detect only: llvm.genx.GenISA.memoryfence(i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true)
2609
+ // (note: the first and last arguments are true)
2610
+ // and add them with immediately following barriers to m_LocalFencesBariersToRemove
2611
+ anyCallInstUseLocalMemory (I);
2612
+
2613
+ if (isa<GenIntrinsicInst>(I))
2614
+ {
2615
+ GenIntrinsicInst* II = cast<GenIntrinsicInst>(&I);
2616
+ if (II->getIntrinsicID () == GenISAIntrinsic::GenISA_memoryfence)
2617
+ {
2618
+ if (isLocalBarrier (I))
2619
+ {
2620
+ m_LocalFencesBariersToRemove.push_back (&I);
2621
+ findNextThreadGroupBarrierInst (I);
2622
+ }
2623
+ }
2624
+ }
2625
+ }
2626
+
2454
2627
// //////////////////////////////////////////////////////////////////////////////
2455
2628
IGC_INITIALIZE_PASS_BEGIN (TrivialUnnecessaryTGMFenceElimination, " TrivialUnnecessaryTGMFenceElimination" , " TrivialUnnecessaryTGMFenceElimination" , false , false )
2456
2629
IGC_INITIALIZE_PASS_END(TrivialUnnecessaryTGMFenceElimination, " TrivialUnnecessaryTGMFenceElimination" , " TrivialUnnecessaryTGMFenceElimination" , false , false )
0 commit comments