@@ -106,20 +106,6 @@ void ConstantCoalescing::ProcessFunction(Function* function)
106
106
std::vector<BufChunk*> indcb_owloads;
107
107
std::vector<BufChunk*> indcb_gathers;
108
108
109
- for (df_iterator<DomTreeNode*> dom_it = df_begin (dom_tree.getRootNode ()),
110
- dom_end = df_end (dom_tree.getRootNode ()); dom_it != dom_end; ++dom_it)
111
- {
112
- BasicBlock* cur_blk = dom_it->getBlock ();
113
- FindAllDirectCB (cur_blk, dircb_owloads);
114
-
115
- while (!dircb_owloads.empty ())
116
- {
117
- BufChunk* top_chunk = dircb_owloads.back ();
118
- dircb_owloads.pop_back ();
119
- delete top_chunk;
120
- }
121
- }
122
-
123
109
for (df_iterator<DomTreeNode*> dom_it = df_begin (dom_tree.getRootNode ()),
124
110
dom_end = df_end (dom_tree.getRootNode ()); dom_it != dom_end; ++dom_it)
125
111
{
@@ -618,144 +604,6 @@ bool sortFunction(BufChunk* buf1, BufChunk* buf2)
618
604
(buf1->addrSpace == buf2->addrSpace && buf1->chunkStart < buf2->chunkStart ));
619
605
}
620
606
621
- // find all direct CB, check which ones should be merged together, and move the smallest indexed one to the first used location.
622
- void ConstantCoalescing::FindAllDirectCB (
623
- BasicBlock* blk,
624
- std::vector<BufChunk*>& dircb_owloads)
625
- {
626
- int loadOrder = 0 ;
627
- // get work-item analysis, need to update uniformness information
628
- for (BasicBlock::iterator BBI = blk->begin (), BBE = blk->end ();
629
- BBI != BBE; ++BBI)
630
- {
631
- // skip dead instructions
632
- if (BBI->use_empty ())
633
- {
634
- continue ;
635
- }
636
- // bindless case
637
- if (LdRawIntrinsic * ldRaw = dyn_cast<LdRawIntrinsic>(BBI))
638
- {
639
- continue ;
640
- }
641
- LoadInst* inst = dyn_cast<LoadInst>(BBI);
642
- // skip load on struct or array type
643
- if (!inst || inst->getType ()->isAggregateType () ||
644
- inst->getPointerAddressSpace () == ADDRESS_SPACE_CONSTANT)
645
- {
646
- continue ;
647
- }
648
-
649
- const alignment_t alignment = GetAlignment (inst);
650
- Type* loadType = inst->getType ();
651
- Type* elemType = loadType->getScalarType ();
652
- // right now, only work on load with dword element-type
653
- if (elemType->getPrimitiveSizeInBits () != SIZE_DWORD * 8 || loadType->isVectorTy () || alignment == 0 )
654
- {
655
- continue ;
656
- }
657
-
658
- // skip stateless path
659
- uint bufId = 0 ;
660
- Value* elt_ptrv = nullptr ;
661
- BufferType bufType = BUFFER_TYPE_UNKNOWN;
662
- bool is_cbload = IsReadOnlyLoadDirectCB (inst, bufId, elt_ptrv, bufType);
663
- if (is_cbload)
664
- {
665
- uint addrSpace = inst->getPointerAddressSpace ();
666
- uint maxEltPlus = 1 ;
667
- const uint scalarSizeInBytes = inst->getType ()->getScalarSizeInBits () / 8 ;
668
- uint offsetInBytes = 0 ;
669
-
670
- if (isa<ConstantPointerNull>(elt_ptrv))
671
- {
672
- }
673
- else if (isa<IntToPtrInst>(elt_ptrv))
674
- {
675
- Value* elt_idxv = cast<Instruction>(elt_ptrv)->getOperand (0 );
676
- ConstantInt* offsetConstant = dyn_cast<ConstantInt>(elt_idxv);
677
- if (offsetConstant)
678
- { // direct access
679
- offsetInBytes = (uint)offsetConstant->getZExtValue ();
680
- if ((int32_t )offsetInBytes < 0 )
681
- {
682
- continue ;
683
- }
684
- }
685
- else
686
- {
687
- continue ;
688
- }
689
- }
690
- const uint eltid = offsetInBytes / scalarSizeInBytes;
691
-
692
- BufChunk* cov_chunk = nullptr ;
693
-
694
- cov_chunk = new BufChunk ();
695
- cov_chunk->bufIdxV = nullptr ;
696
- cov_chunk->addrSpace = addrSpace;
697
- cov_chunk->baseIdxV = nullptr ;
698
- cov_chunk->elementSize = scalarSizeInBytes;
699
- cov_chunk->chunkStart = eltid;
700
- cov_chunk->chunkSize = maxEltPlus;
701
- cov_chunk->chunkIO = inst;
702
- cov_chunk->loadOrder = loadOrder++;
703
- // const uint chunkAlignment = std::max<uint>(alignment, 4);
704
- // cov_chunk->chunkIO = CreateChunkLoad(inst, cov_chunk, eltid, chunkAlignment);
705
- dircb_owloads.push_back (cov_chunk);
706
- } // end of if gfx cbload handling
707
- } // loop over inst in block
708
-
709
- if (dircb_owloads.size () <= 1 )
710
- {
711
- return ;
712
- }
713
-
714
- std::sort (dircb_owloads.begin (), dircb_owloads.end (), sortFunction);
715
- std::vector<BufChunk*>::iterator iter = dircb_owloads.begin ();
716
- BufChunk* firstBufInChunk = *iter;
717
- uint firstCBLoadEle = (*iter)->loadOrder ;
718
- llvm::Instruction* firstCBLoad = (*iter)->chunkIO ;
719
- llvm::Instruction* MoveToLocation = (*iter)->chunkIO ;
720
- iter++;
721
- while (iter != dircb_owloads.end ())
722
- {
723
- const uint scalarSizeInBytes = (*iter)->elementSize ;
724
- const uint eltid = (*iter)->chunkStart ;
725
- uint chunkSize = (*iter)->chunkStart + scalarSizeInBytes - firstBufInChunk->chunkStart ;
726
- static_assert (MAX_VECTOR_NUM_ELEMENTS >= SIZE_OWORD, " Code below may need an update" );
727
- if (firstBufInChunk->addrSpace == (*iter)->addrSpace &&
728
- profitableChunkSize (chunkSize, scalarSizeInBytes) &&
729
- ((scalarSizeInBytes * eltid) % 4 ) == 0 )
730
- {
731
- if ((*iter)->loadOrder < firstCBLoadEle)
732
- {
733
- firstCBLoadEle = (*iter)->loadOrder ;
734
- MoveToLocation = (*iter)->chunkIO ;
735
- }
736
- iter++;
737
- continue ;
738
- }
739
- else
740
- {
741
- // move the CB with the smallest index to before the 1st used CB with the same to-be-merged CBs.
742
- if (firstCBLoad != MoveToLocation)
743
- {
744
- if (Instruction * temp = dyn_cast<Instruction>(firstCBLoad->getOperand (0 )))
745
- {
746
- firstCBLoad->moveBefore (MoveToLocation);
747
- temp->moveBefore (firstCBLoad);
748
- }
749
- }
750
- firstCBLoadEle = (*iter)->loadOrder ;
751
- firstBufInChunk = *iter;
752
- MoveToLocation = (*iter)->chunkIO ;
753
- firstCBLoad = (*iter)->chunkIO ;
754
- iter++;
755
- }
756
- }
757
- }
758
-
759
607
bool ConstantCoalescing::profitableChunkSize (
760
608
uint32_t ub, uint32_t lb, uint32_t eltSizeInBytes)
761
609
{
0 commit comments