@@ -2450,6 +2450,179 @@ void CustomSafeOptPass::visitExtractElementInst(ExtractElementInst& I)
2450
2450
dp4WithIdentityMatrix (I);
2451
2451
}
2452
2452
2453
+ // /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
2454
+ // This pass removes dead local memory loads and stores. If we remove all such loads and stores, we also
2455
+ // remove all local memory fences together with barriers that follow.
2456
+ //
2457
+ IGC_INITIALIZE_PASS_BEGIN (TrivialLocalMemoryOpsElimination, " TrivialLocalMemoryOpsElimination" , " TrivialLocalMemoryOpsElimination" , false , false )
2458
+ IGC_INITIALIZE_PASS_END(TrivialLocalMemoryOpsElimination, " TrivialLocalMemoryOpsElimination" , " TrivialLocalMemoryOpsElimination" , false , false )
2459
+
2460
+ char TrivialLocalMemoryOpsElimination::ID = 0;
2461
+
2462
+ TrivialLocalMemoryOpsElimination::TrivialLocalMemoryOpsElimination () : FunctionPass(ID)
2463
+ {
2464
+ initializeTrivialLocalMemoryOpsEliminationPass (*PassRegistry::getPassRegistry ());
2465
+ }
2466
+
2467
+ bool TrivialLocalMemoryOpsElimination::runOnFunction (Function& F)
2468
+ {
2469
+ bool change = false ;
2470
+
2471
+ IGCMD::MetaDataUtils* pMdUtil = getAnalysis<MetaDataUtilsWrapper>().getMetaDataUtils ();
2472
+ if (!isEntryFunc (pMdUtil, &F))
2473
+ {
2474
+ // Skip if it is non-entry function. For example, a subroutine
2475
+ // foo ( local int* p) { ...... store v, p; ......}
2476
+ // in which no localMemoptimization will be performed.
2477
+ return change;
2478
+ }
2479
+
2480
+ visit (F);
2481
+ if (!abortPass && (m_LocalLoadsToRemove.empty () ^ m_LocalStoresToRemove.empty ()))
2482
+ {
2483
+ for (StoreInst* Inst : m_LocalStoresToRemove)
2484
+ {
2485
+ Inst->eraseFromParent ();
2486
+ change = true ;
2487
+ }
2488
+
2489
+ for (LoadInst* Inst : m_LocalLoadsToRemove)
2490
+ {
2491
+ if (Inst->use_empty ())
2492
+ {
2493
+ Inst->eraseFromParent ();
2494
+ change = true ;
2495
+ }
2496
+ }
2497
+
2498
+ for (CallInst* Inst : m_LocalFencesBariersToRemove)
2499
+ {
2500
+ Inst->eraseFromParent ();
2501
+ change = true ;
2502
+ }
2503
+ }
2504
+ m_LocalStoresToRemove.clear ();
2505
+ m_LocalLoadsToRemove.clear ();
2506
+ m_LocalFencesBariersToRemove.clear ();
2507
+
2508
+ return change;
2509
+ }
2510
+
2511
+ /*
2512
+ OCL instruction barrier(CLK_LOCAL_MEM_FENCE); is translate to two instructions
2513
+ call void @llvm.genx.GenISA.memoryfence(i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true)
2514
+ call void @llvm.genx.GenISA.threadgroupbarrier()
2515
+
2516
+ if we remove call void @llvm.genx.GenISA.memoryfence(i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true)
2517
+ we must remove next instruction if it is call void @llvm.genx.GenISA.threadgroupbarrier()
2518
+ */
2519
+ void TrivialLocalMemoryOpsElimination::findNextThreadGroupBarrierInst (Instruction& I)
2520
+ {
2521
+ auto nextInst = I.getNextNonDebugInstruction ();
2522
+ if (isa<GenIntrinsicInst>(nextInst))
2523
+ {
2524
+ GenIntrinsicInst* II = cast<GenIntrinsicInst>(nextInst);
2525
+ if (II->getIntrinsicID () == GenISAIntrinsic::GenISA_threadgroupbarrier)
2526
+ {
2527
+ m_LocalFencesBariersToRemove.push_back (dyn_cast<CallInst>(nextInst));
2528
+ }
2529
+ }
2530
+ }
2531
+
2532
+ void TrivialLocalMemoryOpsElimination::visitLoadInst (LoadInst& I)
2533
+ {
2534
+ if (I.getPointerAddressSpace () == ADDRESS_SPACE_LOCAL)
2535
+ {
2536
+ m_LocalLoadsToRemove.push_back (&I);
2537
+ }
2538
+ else if (I.getPointerAddressSpace () == ADDRESS_SPACE_GENERIC)
2539
+ {
2540
+ abortPass = true ;
2541
+ }
2542
+ }
2543
+
2544
+ void TrivialLocalMemoryOpsElimination::visitStoreInst (StoreInst& I)
2545
+ {
2546
+ if (I.getPointerAddressSpace () == ADDRESS_SPACE_LOCAL)
2547
+ {
2548
+ if (auto *GV = dyn_cast<GlobalVariable>(I.getPointerOperand ()->stripPointerCasts ()))
2549
+ {
2550
+ // Device sanitizer instrumentation pass inserts a new local memory
2551
+ // variable and inserts store to the variable in a kernel. The
2552
+ // variable is loaded later in no-inline functions. For this case,
2553
+ // do not eliminate the store.
2554
+ if (GV->getName ().startswith (" __Asan" ))
2555
+ {
2556
+ return ;
2557
+ }
2558
+ }
2559
+ m_LocalStoresToRemove.push_back (&I);
2560
+ }
2561
+ else if (I.getPointerAddressSpace () == ADDRESS_SPACE_GENERIC)
2562
+ {
2563
+ abortPass = true ;
2564
+ }
2565
+ }
2566
+
2567
+ bool TrivialLocalMemoryOpsElimination::isLocalBarrier (CallInst& I)
2568
+ {
2569
+ // check arguments in call void @llvm.genx.GenISA.memoryfence(i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true) if match to
2570
+ // (i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true) it is local barrier
2571
+ std::vector<bool > argumentsOfMemoryBarrier;
2572
+
2573
+ for (auto arg = I.arg_begin (); arg != I.arg_end (); ++arg)
2574
+ {
2575
+ ConstantInt* ci = dyn_cast<ConstantInt>(arg);
2576
+ if (ci) {
2577
+ argumentsOfMemoryBarrier.push_back (ci->getValue ().getBoolValue ());
2578
+ }
2579
+ else {
2580
+ // argument is not a constant, so we can't tell.
2581
+ return false ;
2582
+ }
2583
+ }
2584
+
2585
+ return argumentsOfMemoryBarrier == m_argumentsOfLocalMemoryBarrier;
2586
+ }
2587
+
2588
+ // If any call instruction use pointer to local memory abort pass execution
2589
+ void TrivialLocalMemoryOpsElimination::anyCallInstUseLocalMemory (CallInst& I)
2590
+ {
2591
+ Function* fn = I.getCalledFunction ();
2592
+
2593
+ if (fn != NULL )
2594
+ {
2595
+ for (auto arg = fn->arg_begin (); arg != fn->arg_end (); ++arg)
2596
+ {
2597
+ if (arg->getType ()->isPointerTy ())
2598
+ {
2599
+ if (arg->getType ()->getPointerAddressSpace () == ADDRESS_SPACE_LOCAL || arg->getType ()->getPointerAddressSpace () == ADDRESS_SPACE_GENERIC) abortPass = true ;
2600
+ }
2601
+ }
2602
+ }
2603
+ }
2604
+
2605
+ void TrivialLocalMemoryOpsElimination::visitCallInst (CallInst& I)
2606
+ {
2607
+ // detect only: llvm.genx.GenISA.memoryfence(i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true)
2608
+ // (note: the first and last arguments are true)
2609
+ // and add them with immediately following barriers to m_LocalFencesBariersToRemove
2610
+ anyCallInstUseLocalMemory (I);
2611
+
2612
+ if (isa<GenIntrinsicInst>(I))
2613
+ {
2614
+ GenIntrinsicInst* II = cast<GenIntrinsicInst>(&I);
2615
+ if (II->getIntrinsicID () == GenISAIntrinsic::GenISA_memoryfence)
2616
+ {
2617
+ if (isLocalBarrier (I))
2618
+ {
2619
+ m_LocalFencesBariersToRemove.push_back (&I);
2620
+ findNextThreadGroupBarrierInst (I);
2621
+ }
2622
+ }
2623
+ }
2624
+ }
2625
+
2453
2626
// //////////////////////////////////////////////////////////////////////////////
2454
2627
IGC_INITIALIZE_PASS_BEGIN (TrivialUnnecessaryTGMFenceElimination, " TrivialUnnecessaryTGMFenceElimination" , " TrivialUnnecessaryTGMFenceElimination" , false , false )
2455
2628
IGC_INITIALIZE_PASS_END(TrivialUnnecessaryTGMFenceElimination, " TrivialUnnecessaryTGMFenceElimination" , " TrivialUnnecessaryTGMFenceElimination" , false , false )
0 commit comments