Skip to content

Commit 394c73c

Browse files
jgu222Zuul
authored and
Zuul
committed
Make sure the vectors coming into emit will have power-of-2 vector size,
except some for vec3. This is to make emitPass less complicated. Change-Id: I8c8ffb6b301f2b0f5d60684d578ac56befb76ad4
1 parent 5040387 commit 394c73c

File tree

1 file changed

+70
-91
lines changed

1 file changed

+70
-91
lines changed

IGC/Compiler/CISACodeGen/VectorPreProcess.cpp

Lines changed: 70 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
3939
#include "common/LLVMWarningsPop.hpp"
4040
#include "Probe/Assertion.h"
4141

42+
#include <utility> // std::pair, std::make_pair
43+
4244
using namespace llvm;
4345
using namespace IGC;
4446

@@ -323,8 +325,7 @@ namespace
323325
// VP_SPLIT_SIZE is at least 8 bytes (largest element size) and
324326
// must be power of 2.
325327
VP_SPLIT_SIZE = 32, // 32 bytes (must power of 2)
326-
VP_RAW_SPLIT_SIZE = 16,
327-
VP_MAX_VECTOR_SIZE = 128 // max vector length
328+
VP_RAW_SPLIT_SIZE = 16
328329
};
329330

330331
static char ID; // Pass identification, replacement for typeid
@@ -352,7 +353,7 @@ namespace
352353
private:
353354

354355
void getOrGenScalarValues(
355-
Function& F, Value* VecVal, Value** scalars, Instruction*& availBeforeInst);
356+
Function& F, Value* VecVal, ValVector& scalars, Instruction*& availBeforeInst);
356357
void replaceAllVectorUsesWithScalars(Instruction* VI,
357358
ValVector& SVals);
358359

@@ -373,9 +374,7 @@ namespace
373374
Type* ETy,
374375
uint32_t NElts,
375376
uint32_t SplitSize,
376-
Type** SVTypes,
377-
uint32_t* SVCounts,
378-
uint32_t& Len);
377+
SmallVector<std::pair<Type*, uint32_t>, 8>& SplitInfo);
379378

380379
private:
381380
const DataLayout* m_DL;
@@ -519,78 +518,53 @@ void VectorPreProcess::replaceAllVectorUsesWithScalars(Instruction* VI, ValVecto
519518
}
520519
}
521520

522-
523521
void VectorPreProcess::createSplitVectorTypes(
524522
Type* ETy,
525523
uint32_t NElts,
526524
uint32_t SplitSize,
527-
Type** SVTypes,
528-
uint32_t* SVCounts,
529-
uint32_t& Len)
525+
SmallVector<std::pair<Type*, uint32_t>, 8>& SplitInfo)
530526
{
531527
uint32_t ebytes = (unsigned int)ETy->getPrimitiveSizeInBits() / 8;
532528
if (ETy->isPointerTy())
533529
{
534530
ebytes = m_DL->getPointerTypeSize(ETy);
535531
}
536532

533+
// todo: generalize splitting for cases whose element size is bigger than splitsize!
537534
if (IGC_IS_FLAG_ENABLED(EnableSplitUnalignedVector))
538535
{
539536
if (ebytes > SplitSize)
540537
{
541-
SVCounts[0] = NElts * ebytes / SplitSize;
542-
SVTypes[0] = IntegerType::get(ETy->getContext(), SplitSize * 8);
543-
Len = 1;
538+
uint32_t M = NElts * ebytes / SplitSize;
539+
Type* Ty = IntegerType::get(ETy->getContext(), SplitSize * 8);
540+
SplitInfo.push_back(std::make_pair(Ty, M));
544541
return;
545542
}
546543
}
547544

548-
IGC_ASSERT((SplitSize % ebytes) == 0 &&
549-
"Internal Error: Wrong split size!");
550-
551-
// the number of elements of a new vector
552-
uint32_t E = SplitSize / ebytes;
553-
// number of vectors
554-
uint32_t N = NElts / E;
555-
// remaining number of elements.
556-
uint32_t R = NElts % E;
557-
558-
int j = 0;
559-
if (N > 0)
560-
{
561-
SVCounts[0] = N;
562-
SVTypes[0] = VectorType::get(ETy, E);
563-
++j;
564-
}
545+
// Both SplitSize and ebytes shall be a power of 2
546+
IGC_ASSERT((SplitSize % ebytes) == 0 && "Internal Error: Wrong split size!");
565547

566-
// Sub-vectors are
567-
// 1. ebytes >=4, the remaing is a single sub-vector; or
568-
// 2. ebytes < 4, the remaining is splitted into
569-
// one sub-vector of multiple 4xebytes, and
570-
// the remaining vector of 3|2|1 elements.
571-
//
572-
// Note that we keep vector 3 here so that we may convert
573-
// vector3 to vector4 later when special-handling vector3.
574-
if (ebytes < 4 && R > 0)
548+
uint32_t E = SplitSize / ebytes; // split size in elements
549+
uint32_t N = NElts; // the number of elements to be split
550+
while (N > 4)
575551
{
576-
N = R / 4;
577-
R = R % 4;
578-
if (N > 0)
552+
uint32_t M = N / E; // the number of subvectors for split size E
553+
if (M > 0)
579554
{
580-
SVCounts[j] = 1;
581-
SVTypes[j] = VectorType::get(ETy, 4 * N);
582-
++j;
555+
SplitInfo.push_back(std::make_pair(VectorType::get(ETy, E), M));
583556
}
557+
// The remaining elts are ones to be split for next iteration.
558+
N = N % E;
559+
E = E / 2; // next split size
584560
}
585561

586-
// remaining sub-vector
587-
if (R > 0)
562+
if (N > 0)
588563
{
589-
SVCounts[j] = 1;
590-
SVTypes[j] = (R == 1) ? ETy : VectorType::get(ETy, R);
591-
++j;
564+
// A vector of 1|2|3|4 elements. No further splitting!
565+
Type* Ty = (N == 1) ? ETy : VectorType::get(ETy, N);
566+
SplitInfo.push_back(std::make_pair(Ty, 1));
592567
}
593-
Len = j;
594568
}
595569

596570
bool VectorPreProcess::splitStore(
@@ -602,14 +576,10 @@ bool VectorPreProcess::splitStore(
602576
Type* ETy = VTy->getElementType();
603577
uint32_t nelts = int_cast<uint32_t>(VTy->getNumElements());
604578

605-
IGC_ASSERT(nelts <= VP_MAX_VECTOR_SIZE && "Vector length is too big!");
606-
607-
Type* tys[6];
608-
uint32_t tycnts[6];
609-
uint32_t len;
610-
// Generate splitted loads and save them in the map
579+
// splitInfo: Keep track of all pairs of (sub-vec type, #sub-vec).
580+
SmallVector<std::pair<Type*, uint32_t>, 8> splitInfo;
611581
bool isStoreInst = isa<StoreInst>(SI);
612-
582+
uint32_t splitSize = isStoreInst ? VP_SPLIT_SIZE : VP_RAW_SPLIT_SIZE;
613583
if (IGC_IS_FLAG_ENABLED(EnableSplitUnalignedVector))
614584
{
615585
// byte and word-aligned stores can only store a dword at a time.
@@ -623,16 +593,16 @@ bool VectorPreProcess::splitStore(
623593
m_CGCtx->m_DriverInfo.splitUnalignedVectors() ||
624594
!WI.isUniform(ASI.getInst()))
625595
&& ASI.getAlignment() < 4;
626-
const uint32_t splitSize = needsDWordSplit ? 4 : (isStoreInst ? VP_SPLIT_SIZE : VP_RAW_SPLIT_SIZE);
627-
createSplitVectorTypes(ETy, nelts, splitSize, tys, tycnts, len);
628-
}
629-
else
630-
{
631-
createSplitVectorTypes(ETy, nelts, isStoreInst ? VP_SPLIT_SIZE : VP_RAW_SPLIT_SIZE, tys, tycnts, len);
596+
if (needsDWordSplit)
597+
{
598+
splitSize = 4;
599+
}
632600
}
601+
createSplitVectorTypes(ETy, nelts, splitSize, splitInfo);
633602

634603
// return if no split
635-
if (len == 1 && tycnts[0] == 1)
604+
uint32_t len = splitInfo.size();
605+
if (len == 1 && splitInfo[0].second == 1)
636606
{
637607
return false;
638608
}
@@ -642,19 +612,20 @@ bool VectorPreProcess::splitStore(
642612
{
643613
// Need to create splitted values.
644614
Instruction* insertBeforeInst = nullptr;
645-
Value* scalars[VP_MAX_VECTOR_SIZE];
615+
ValVector scalars(nelts, nullptr);
646616
getOrGenScalarValues(*SI->getParent()->getParent(),
647617
StoredVal, scalars, insertBeforeInst);
648618
insertBeforeInst = insertBeforeInst ? insertBeforeInst : SI;
649619
IRBuilder<> aBuilder(insertBeforeInst);
650620

621+
Type* Ty1 = splitInfo[0].first;
651622
if (IGC_IS_FLAG_ENABLED(EnableSplitUnalignedVector))
652623
{
653-
if (ETy->getPrimitiveSizeInBits() > tys[0]->getScalarSizeInBits())
624+
if (ETy->getPrimitiveSizeInBits() > Ty1->getScalarSizeInBits())
654625
{
655626
std::vector<Value*> splitScalars;
656-
const uint32_t vectorSize = (unsigned int)ETy->getPrimitiveSizeInBits() / tys[0]->getScalarSizeInBits();
657-
Type* splitType = llvm::VectorType::get(tys[0], vectorSize);
627+
const uint32_t vectorSize = (unsigned int)ETy->getPrimitiveSizeInBits() / Ty1->getScalarSizeInBits();
628+
Type* splitType = llvm::VectorType::get(Ty1, vectorSize);
658629
for (uint32_t i = 0; i < nelts; i++)
659630
{
660631
Value* splitInst = aBuilder.CreateBitCast(scalars[i], splitType);
@@ -663,7 +634,7 @@ bool VectorPreProcess::splitStore(
663634
splitScalars.push_back(aBuilder.CreateExtractElement(splitInst, j));
664635
}
665636
}
666-
IGC_ASSERT(splitScalars.size() < VP_MAX_VECTOR_SIZE);
637+
scalars.resize(splitScalars.size());
667638
for (uint32_t i = 0; i < splitScalars.size(); i++)
668639
{
669640
scalars[i] = splitScalars[i];
@@ -674,8 +645,10 @@ bool VectorPreProcess::splitStore(
674645
// Now generate svals
675646
for (uint32_t i = 0, Idx = 0; i < len; ++i)
676647
{
677-
VectorType* VTy1 = dyn_cast<VectorType>(tys[i]);
678-
for (uint32_t j = 0; j < tycnts[i]; ++j)
648+
Type* Ty1 = splitInfo[i].first;
649+
uint32_t len1 = splitInfo[i].second;
650+
VectorType* VTy1 = dyn_cast<VectorType>(Ty1);
651+
for (uint32_t j = 0; j < len1; ++j)
679652
{
680653
Value* subVec;
681654
if (!VTy1)
@@ -685,7 +658,7 @@ bool VectorPreProcess::splitStore(
685658
}
686659
else
687660
{
688-
subVec = UndefValue::get(tys[i]);
661+
subVec = UndefValue::get(Ty1);
689662
uint32_t n1 = int_cast<uint32_t>(VTy1->getNumElements());
690663
for (uint32_t k = 0; k < n1; ++k)
691664
{
@@ -709,10 +682,12 @@ bool VectorPreProcess::splitStore(
709682

710683
for (uint32_t i = 0, subIdx = 0; i < len; ++i)
711684
{
712-
VectorType* VTy1 = dyn_cast<VectorType>(tys[i]);
713-
for (uint32_t j = 0; j < tycnts[i]; ++j)
685+
Type* Ty1 = splitInfo[i].first;
686+
uint32_t len1 = splitInfo[i].second;
687+
VectorType* VTy1 = dyn_cast<VectorType>(Ty1);
688+
for (uint32_t j = 0; j < len1; ++j)
714689
{
715-
uint32_t vAlign = (uint32_t)MinAlign(Align, eOffset * EBytes);
690+
uint32_t vAlign = (uint32_t)MinAlign(Align, (uint32_t)eOffset * EBytes);
716691
Value* offsetAddr = ASI.CreateConstScalarGEP(svals[subIdx]->getType(), Addr, eOffset);
717692
Instruction* newST = ASI.Create(svals[subIdx], offsetAddr, vAlign, IsVolatile);
718693
eOffset += (VTy1 ? int_cast<uint32_t>(VTy1->getNumElements()) : 1);
@@ -757,10 +732,12 @@ bool VectorPreProcess::splitLoad(
757732
Type* ETy = VTy->getElementType();
758733
uint32_t nelts = int_cast<uint32_t>(VTy->getNumElements());
759734

760-
Type* tys[6];
761-
uint32_t tycnts[6];
762-
uint32_t len;
763-
// Generate splitted loads and save them in the map
735+
// Split a vector type into multiple sub-types:
736+
// 'len0' number of sub-vectors of type 'vecTy0'
737+
// 'len1' number of sub-vectors of type 'vecTy1'
738+
// ...
739+
// SplitInfo : all pairs, each of which is (sub-vector's type, #sub-vectors).
740+
SmallVector< std::pair<Type*, uint32_t>, 8 > splitInfo;
764741
uint32_t splitSize = isLdRaw ? VP_RAW_SPLIT_SIZE : VP_SPLIT_SIZE;
765742
if (IGC_IS_FLAG_ENABLED(EnableSplitUnalignedVector))
766743
{
@@ -774,11 +751,11 @@ bool VectorPreProcess::splitLoad(
774751
if ((isLdRaw || !WI.isUniform(ALI.getInst())) && ALI.getAlignment() < 4)
775752
splitSize = 4;
776753
}
777-
778-
createSplitVectorTypes(ETy, nelts, splitSize, tys, tycnts, len);
754+
createSplitVectorTypes(ETy, nelts, splitSize, splitInfo);
779755

780756
// return if no split
781-
if (len == 1 && tycnts[0] == 1)
757+
uint32_t len = splitInfo.size();
758+
if (len == 1 && splitInfo[0].second == 1)
782759
{
783760
return false;
784761
}
@@ -795,12 +772,14 @@ bool VectorPreProcess::splitLoad(
795772

796773
for (uint32_t i = 0; i < len; ++i)
797774
{
798-
VectorType* VTy1 = dyn_cast<VectorType>(tys[i]);
799-
for (uint32_t j = 0; j < tycnts[i]; ++j)
775+
Type* Ty1 = splitInfo[i].first;
776+
uint32_t len1 = splitInfo[i].second;
777+
VectorType* VTy1 = dyn_cast<VectorType>(Ty1);
778+
for (uint32_t j = 0; j < len1; ++j)
800779
{
801780
uint32_t vAlign = (uint32_t)MinAlign(Align, eOffset * EBytes);
802-
Value* offsetAddr = ALI.CreateConstScalarGEP(tys[i], Addr, eOffset);
803-
Instruction* I = ALI.Create(tys[i], offsetAddr, vAlign, IsVolatile);
781+
Value* offsetAddr = ALI.CreateConstScalarGEP(Ty1, Addr, eOffset);
782+
Instruction* I = ALI.Create(Ty1, offsetAddr, vAlign, IsVolatile);
804783
eOffset += (VTy1 ? int_cast<uint32_t>(VTy1->getNumElements()) : 1);
805784

806785
svals.push_back(I);
@@ -1081,11 +1060,11 @@ bool VectorPreProcess::splitVector3LoadStore(Instruction* Inst)
10811060
}
10821061

10831062
// availBeforeInst:
1084-
// Used to indicate that all scalar values of VecVal are available right
1085-
// before the instruction pointed to availBeforeInst.
1086-
// If availBeforeInst is null, it means all scalar values are constants.
1063+
// Indicate that all scalar values of VecVal are available right before
1064+
// instruction 'availBeforeInst'. If availBeforeInst is null, it means
1065+
// all scalar values are constants.
10871066
void VectorPreProcess::getOrGenScalarValues(
1088-
Function& F, Value* VecVal, Value** scalars, Instruction*& availBeforeInst)
1067+
Function& F, Value* VecVal, ValVector& scalars, Instruction*& availBeforeInst)
10891068
{
10901069
availBeforeInst = nullptr;
10911070

0 commit comments

Comments
 (0)