@@ -39,6 +39,8 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
39
39
#include " common/LLVMWarningsPop.hpp"
40
40
#include " Probe/Assertion.h"
41
41
42
+ #include < utility> // std::pair, std::make_pair
43
+
42
44
using namespace llvm ;
43
45
using namespace IGC ;
44
46
@@ -323,8 +325,7 @@ namespace
323
325
// VP_SPLIT_SIZE is at least 8 bytes (largest element size) and
324
326
// must be power of 2.
325
327
VP_SPLIT_SIZE = 32 , // 32 bytes (must power of 2)
326
- VP_RAW_SPLIT_SIZE = 16 ,
327
- VP_MAX_VECTOR_SIZE = 128 // max vector length
328
+ VP_RAW_SPLIT_SIZE = 16
328
329
};
329
330
330
331
static char ID; // Pass identification, replacement for typeid
@@ -352,7 +353,7 @@ namespace
352
353
private:
353
354
354
355
void getOrGenScalarValues (
355
- Function& F, Value* VecVal, Value** scalars, Instruction*& availBeforeInst);
356
+ Function& F, Value* VecVal, ValVector& scalars, Instruction*& availBeforeInst);
356
357
void replaceAllVectorUsesWithScalars (Instruction* VI,
357
358
ValVector& SVals);
358
359
@@ -373,9 +374,7 @@ namespace
373
374
Type* ETy,
374
375
uint32_t NElts,
375
376
uint32_t SplitSize,
376
- Type** SVTypes,
377
- uint32_t * SVCounts,
378
- uint32_t & Len);
377
+ SmallVector<std::pair<Type*, uint32_t >, 8 >& SplitInfo);
379
378
380
379
private:
381
380
const DataLayout* m_DL;
@@ -519,78 +518,53 @@ void VectorPreProcess::replaceAllVectorUsesWithScalars(Instruction* VI, ValVecto
519
518
}
520
519
}
521
520
522
-
523
521
void VectorPreProcess::createSplitVectorTypes (
524
522
Type* ETy,
525
523
uint32_t NElts,
526
524
uint32_t SplitSize,
527
- Type** SVTypes,
528
- uint32_t * SVCounts,
529
- uint32_t & Len)
525
+ SmallVector<std::pair<Type*, uint32_t >, 8 >& SplitInfo)
530
526
{
531
527
uint32_t ebytes = (unsigned int )ETy->getPrimitiveSizeInBits () / 8 ;
532
528
if (ETy->isPointerTy ())
533
529
{
534
530
ebytes = m_DL->getPointerTypeSize (ETy);
535
531
}
536
532
533
+ // todo: generalize splitting for cases whose element size is bigger than splitsize!
537
534
if (IGC_IS_FLAG_ENABLED (EnableSplitUnalignedVector))
538
535
{
539
536
if (ebytes > SplitSize)
540
537
{
541
- SVCounts[ 0 ] = NElts * ebytes / SplitSize;
542
- SVTypes[ 0 ] = IntegerType::get (ETy->getContext (), SplitSize * 8 );
543
- Len = 1 ;
538
+ uint32_t M = NElts * ebytes / SplitSize;
539
+ Type* Ty = IntegerType::get (ETy->getContext (), SplitSize * 8 );
540
+ SplitInfo. push_back ( std::make_pair (Ty, M)) ;
544
541
return ;
545
542
}
546
543
}
547
544
548
- IGC_ASSERT ((SplitSize % ebytes) == 0 &&
549
- " Internal Error: Wrong split size!" );
550
-
551
- // the number of elements of a new vector
552
- uint32_t E = SplitSize / ebytes;
553
- // number of vectors
554
- uint32_t N = NElts / E;
555
- // remaining number of elements.
556
- uint32_t R = NElts % E;
557
-
558
- int j = 0 ;
559
- if (N > 0 )
560
- {
561
- SVCounts[0 ] = N;
562
- SVTypes[0 ] = VectorType::get (ETy, E);
563
- ++j;
564
- }
545
+ // Both SplitSize and ebytes shall be a power of 2
546
+ IGC_ASSERT ((SplitSize % ebytes) == 0 && " Internal Error: Wrong split size!" );
565
547
566
- // Sub-vectors are
567
- // 1. ebytes >=4, the remaing is a single sub-vector; or
568
- // 2. ebytes < 4, the remaining is splitted into
569
- // one sub-vector of multiple 4xebytes, and
570
- // the remaining vector of 3|2|1 elements.
571
- //
572
- // Note that we keep vector 3 here so that we may convert
573
- // vector3 to vector4 later when special-handling vector3.
574
- if (ebytes < 4 && R > 0 )
548
+ uint32_t E = SplitSize / ebytes; // split size in elements
549
+ uint32_t N = NElts; // the number of elements to be split
550
+ while (N > 4 )
575
551
{
576
- N = R / 4 ;
577
- R = R % 4 ;
578
- if (N > 0 )
552
+ uint32_t M = N / E; // the number of subvectors for split size E
553
+ if (M > 0 )
579
554
{
580
- SVCounts[j] = 1 ;
581
- SVTypes[j] = VectorType::get (ETy, 4 * N);
582
- ++j;
555
+ SplitInfo.push_back (std::make_pair (VectorType::get (ETy, E), M));
583
556
}
557
+ // The remaining elts are ones to be split for next iteration.
558
+ N = N % E;
559
+ E = E / 2 ; // next split size
584
560
}
585
561
586
- // remaining sub-vector
587
- if (R > 0 )
562
+ if (N > 0 )
588
563
{
589
- SVCounts[j] = 1 ;
590
- SVTypes[j] = (R == 1 ) ? ETy : VectorType::get (ETy, R );
591
- ++j ;
564
+ // A vector of 1|2|3|4 elements. No further splitting!
565
+ Type* Ty = (N == 1 ) ? ETy : VectorType::get (ETy, N );
566
+ SplitInfo. push_back ( std::make_pair (Ty, 1 )) ;
592
567
}
593
- Len = j;
594
568
}
595
569
596
570
bool VectorPreProcess::splitStore (
@@ -602,14 +576,10 @@ bool VectorPreProcess::splitStore(
602
576
Type* ETy = VTy->getElementType ();
603
577
uint32_t nelts = int_cast<uint32_t >(VTy->getNumElements ());
604
578
605
- IGC_ASSERT (nelts <= VP_MAX_VECTOR_SIZE && " Vector length is too big!" );
606
-
607
- Type* tys[6 ];
608
- uint32_t tycnts[6 ];
609
- uint32_t len;
610
- // Generate splitted loads and save them in the map
579
+ // splitInfo: Keep track of all pairs of (sub-vec type, #sub-vec).
580
+ SmallVector<std::pair<Type*, uint32_t >, 8 > splitInfo;
611
581
bool isStoreInst = isa<StoreInst>(SI);
612
-
582
+ uint32_t splitSize = isStoreInst ? VP_SPLIT_SIZE : VP_RAW_SPLIT_SIZE;
613
583
if (IGC_IS_FLAG_ENABLED (EnableSplitUnalignedVector))
614
584
{
615
585
// byte and word-aligned stores can only store a dword at a time.
@@ -623,16 +593,16 @@ bool VectorPreProcess::splitStore(
623
593
m_CGCtx->m_DriverInfo .splitUnalignedVectors () ||
624
594
!WI.isUniform (ASI.getInst ()))
625
595
&& ASI.getAlignment () < 4 ;
626
- const uint32_t splitSize = needsDWordSplit ? 4 : (isStoreInst ? VP_SPLIT_SIZE : VP_RAW_SPLIT_SIZE);
627
- createSplitVectorTypes (ETy, nelts, splitSize, tys, tycnts, len);
628
- }
629
- else
630
- {
631
- createSplitVectorTypes (ETy, nelts, isStoreInst ? VP_SPLIT_SIZE : VP_RAW_SPLIT_SIZE, tys, tycnts, len);
596
+ if (needsDWordSplit)
597
+ {
598
+ splitSize = 4 ;
599
+ }
632
600
}
601
+ createSplitVectorTypes (ETy, nelts, splitSize, splitInfo);
633
602
634
603
// return if no split
635
- if (len == 1 && tycnts[0 ] == 1 )
604
+ uint32_t len = splitInfo.size ();
605
+ if (len == 1 && splitInfo[0 ].second == 1 )
636
606
{
637
607
return false ;
638
608
}
@@ -642,19 +612,20 @@ bool VectorPreProcess::splitStore(
642
612
{
643
613
// Need to create splitted values.
644
614
Instruction* insertBeforeInst = nullptr ;
645
- Value* scalars[VP_MAX_VECTOR_SIZE] ;
615
+ ValVector scalars (nelts, nullptr ) ;
646
616
getOrGenScalarValues (*SI->getParent ()->getParent (),
647
617
StoredVal, scalars, insertBeforeInst);
648
618
insertBeforeInst = insertBeforeInst ? insertBeforeInst : SI;
649
619
IRBuilder<> aBuilder (insertBeforeInst);
650
620
621
+ Type* Ty1 = splitInfo[0 ].first ;
651
622
if (IGC_IS_FLAG_ENABLED (EnableSplitUnalignedVector))
652
623
{
653
- if (ETy->getPrimitiveSizeInBits () > tys[ 0 ] ->getScalarSizeInBits ())
624
+ if (ETy->getPrimitiveSizeInBits () > Ty1 ->getScalarSizeInBits ())
654
625
{
655
626
std::vector<Value*> splitScalars;
656
- const uint32_t vectorSize = (unsigned int )ETy->getPrimitiveSizeInBits () / tys[ 0 ] ->getScalarSizeInBits ();
657
- Type* splitType = llvm::VectorType::get (tys[ 0 ] , vectorSize);
627
+ const uint32_t vectorSize = (unsigned int )ETy->getPrimitiveSizeInBits () / Ty1 ->getScalarSizeInBits ();
628
+ Type* splitType = llvm::VectorType::get (Ty1 , vectorSize);
658
629
for (uint32_t i = 0 ; i < nelts; i++)
659
630
{
660
631
Value* splitInst = aBuilder.CreateBitCast (scalars[i], splitType);
@@ -663,7 +634,7 @@ bool VectorPreProcess::splitStore(
663
634
splitScalars.push_back (aBuilder.CreateExtractElement (splitInst, j));
664
635
}
665
636
}
666
- IGC_ASSERT (splitScalars.size () < VP_MAX_VECTOR_SIZE );
637
+ scalars. resize (splitScalars.size ());
667
638
for (uint32_t i = 0 ; i < splitScalars.size (); i++)
668
639
{
669
640
scalars[i] = splitScalars[i];
@@ -674,8 +645,10 @@ bool VectorPreProcess::splitStore(
674
645
// Now generate svals
675
646
for (uint32_t i = 0 , Idx = 0 ; i < len; ++i)
676
647
{
677
- VectorType* VTy1 = dyn_cast<VectorType>(tys[i]);
678
- for (uint32_t j = 0 ; j < tycnts[i]; ++j)
648
+ Type* Ty1 = splitInfo[i].first ;
649
+ uint32_t len1 = splitInfo[i].second ;
650
+ VectorType* VTy1 = dyn_cast<VectorType>(Ty1);
651
+ for (uint32_t j = 0 ; j < len1; ++j)
679
652
{
680
653
Value* subVec;
681
654
if (!VTy1)
@@ -685,7 +658,7 @@ bool VectorPreProcess::splitStore(
685
658
}
686
659
else
687
660
{
688
- subVec = UndefValue::get (tys[i] );
661
+ subVec = UndefValue::get (Ty1 );
689
662
uint32_t n1 = int_cast<uint32_t >(VTy1->getNumElements ());
690
663
for (uint32_t k = 0 ; k < n1; ++k)
691
664
{
@@ -709,10 +682,12 @@ bool VectorPreProcess::splitStore(
709
682
710
683
for (uint32_t i = 0 , subIdx = 0 ; i < len; ++i)
711
684
{
712
- VectorType* VTy1 = dyn_cast<VectorType>(tys[i]);
713
- for (uint32_t j = 0 ; j < tycnts[i]; ++j)
685
+ Type* Ty1 = splitInfo[i].first ;
686
+ uint32_t len1 = splitInfo[i].second ;
687
+ VectorType* VTy1 = dyn_cast<VectorType>(Ty1);
688
+ for (uint32_t j = 0 ; j < len1; ++j)
714
689
{
715
- uint32_t vAlign = (uint32_t )MinAlign (Align, eOffset * EBytes);
690
+ uint32_t vAlign = (uint32_t )MinAlign (Align, ( uint32_t ) eOffset * EBytes);
716
691
Value* offsetAddr = ASI.CreateConstScalarGEP (svals[subIdx]->getType (), Addr, eOffset);
717
692
Instruction* newST = ASI.Create (svals[subIdx], offsetAddr, vAlign, IsVolatile);
718
693
eOffset += (VTy1 ? int_cast<uint32_t >(VTy1->getNumElements ()) : 1 );
@@ -757,10 +732,12 @@ bool VectorPreProcess::splitLoad(
757
732
Type* ETy = VTy->getElementType ();
758
733
uint32_t nelts = int_cast<uint32_t >(VTy->getNumElements ());
759
734
760
- Type* tys[6 ];
761
- uint32_t tycnts[6 ];
762
- uint32_t len;
763
- // Generate splitted loads and save them in the map
735
+ // Split a vector type into multiple sub-types:
736
+ // 'len0' number of sub-vectors of type 'vecTy0'
737
+ // 'len1' number of sub-vectors of type 'vecTy1'
738
+ // ...
739
+ // SplitInfo : all pairs, each of which is (sub-vector's type, #sub-vectors).
740
+ SmallVector< std::pair<Type*, uint32_t >, 8 > splitInfo;
764
741
uint32_t splitSize = isLdRaw ? VP_RAW_SPLIT_SIZE : VP_SPLIT_SIZE;
765
742
if (IGC_IS_FLAG_ENABLED (EnableSplitUnalignedVector))
766
743
{
@@ -774,11 +751,11 @@ bool VectorPreProcess::splitLoad(
774
751
if ((isLdRaw || !WI.isUniform (ALI.getInst ())) && ALI.getAlignment () < 4 )
775
752
splitSize = 4 ;
776
753
}
777
-
778
- createSplitVectorTypes (ETy, nelts, splitSize, tys, tycnts, len);
754
+ createSplitVectorTypes (ETy, nelts, splitSize, splitInfo);
779
755
780
756
// return if no split
781
- if (len == 1 && tycnts[0 ] == 1 )
757
+ uint32_t len = splitInfo.size ();
758
+ if (len == 1 && splitInfo[0 ].second == 1 )
782
759
{
783
760
return false ;
784
761
}
@@ -795,12 +772,14 @@ bool VectorPreProcess::splitLoad(
795
772
796
773
for (uint32_t i = 0 ; i < len; ++i)
797
774
{
798
- VectorType* VTy1 = dyn_cast<VectorType>(tys[i]);
799
- for (uint32_t j = 0 ; j < tycnts[i]; ++j)
775
+ Type* Ty1 = splitInfo[i].first ;
776
+ uint32_t len1 = splitInfo[i].second ;
777
+ VectorType* VTy1 = dyn_cast<VectorType>(Ty1);
778
+ for (uint32_t j = 0 ; j < len1; ++j)
800
779
{
801
780
uint32_t vAlign = (uint32_t )MinAlign (Align, eOffset * EBytes);
802
- Value* offsetAddr = ALI.CreateConstScalarGEP (tys[i] , Addr, eOffset);
803
- Instruction* I = ALI.Create (tys[i] , offsetAddr, vAlign, IsVolatile);
781
+ Value* offsetAddr = ALI.CreateConstScalarGEP (Ty1 , Addr, eOffset);
782
+ Instruction* I = ALI.Create (Ty1 , offsetAddr, vAlign, IsVolatile);
804
783
eOffset += (VTy1 ? int_cast<uint32_t >(VTy1->getNumElements ()) : 1 );
805
784
806
785
svals.push_back (I);
@@ -1081,11 +1060,11 @@ bool VectorPreProcess::splitVector3LoadStore(Instruction* Inst)
1081
1060
}
1082
1061
1083
1062
// availBeforeInst:
1084
- // Used to indicate that all scalar values of VecVal are available right
1085
- // before the instruction pointed to availBeforeInst.
1086
- // If availBeforeInst is null, it means all scalar values are constants.
1063
+ // Indicate that all scalar values of VecVal are available right before
1064
+ // instruction 'availBeforeInst'. If availBeforeInst is null, it means
1065
+ // all scalar values are constants.
1087
1066
void VectorPreProcess::getOrGenScalarValues (
1088
- Function& F, Value* VecVal, Value** scalars, Instruction*& availBeforeInst)
1067
+ Function& F, Value* VecVal, ValVector& scalars, Instruction*& availBeforeInst)
1089
1068
{
1090
1069
availBeforeInst = nullptr ;
1091
1070
0 commit comments