@@ -15824,48 +15824,51 @@ bool AArch64TargetLowering::shouldSinkOperands(
15824
15824
return false;
15825
15825
}
15826
15826
15827
- static bool createTblShuffleForZExt(ZExtInst *ZExt, FixedVectorType *DstTy,
15828
- bool IsLittleEndian) {
15829
- Value *Op = ZExt->getOperand(0);
15830
- auto *SrcTy = cast<FixedVectorType>(Op->getType());
15831
- auto SrcWidth = cast<IntegerType>(SrcTy->getElementType())->getBitWidth();
15832
- auto DstWidth = cast<IntegerType>(DstTy->getElementType())->getBitWidth();
15827
+ static bool createTblShuffleMask(unsigned SrcWidth, unsigned DstWidth,
15828
+ unsigned NumElts, bool IsLittleEndian,
15829
+ SmallVectorImpl<int> &Mask) {
15833
15830
if (DstWidth % 8 != 0 || DstWidth <= 16 || DstWidth >= 64)
15834
15831
return false;
15835
15832
15836
- assert(DstWidth % SrcWidth == 0 &&
15837
- "TBL lowering is not supported for a ZExt instruction with this "
15838
- "source & destination element type.");
15839
- unsigned ZExtFactor = DstWidth / SrcWidth;
15833
+ if (DstWidth % SrcWidth != 0)
15834
+ return false;
15835
+
15836
+ unsigned Factor = DstWidth / SrcWidth;
15837
+ unsigned MaskLen = NumElts * Factor;
15838
+
15839
+ Mask.clear();
15840
+ Mask.resize(MaskLen, NumElts);
15841
+
15842
+ unsigned SrcIndex = 0;
15843
+ for (unsigned I = 0; I < MaskLen; I += Factor)
15844
+ Mask[I] = SrcIndex++;
15845
+
15846
+ if (!IsLittleEndian)
15847
+ std::rotate(Mask.rbegin(), Mask.rbegin() + Factor - 1, Mask.rend());
15848
+
15849
+ return true;
15850
+ }
15851
+
15852
+ static Value *createTblShuffleForZExt(IRBuilderBase &Builder, Value *Op,
15853
+ FixedVectorType *ZExtTy,
15854
+ FixedVectorType *DstTy,
15855
+ bool IsLittleEndian) {
15856
+ auto *SrcTy = cast<FixedVectorType>(Op->getType());
15840
15857
unsigned NumElts = SrcTy->getNumElements();
15841
- IRBuilder<> Builder(ZExt);
15858
+ auto SrcWidth = cast<IntegerType>(SrcTy->getElementType())->getBitWidth();
15859
+ auto DstWidth = cast<IntegerType>(DstTy->getElementType())->getBitWidth();
15860
+
15842
15861
SmallVector<int> Mask;
15843
- // Create a mask that selects <0,...,Op[i]> for each lane of the destination
15844
- // vector to replace the original ZExt. This can later be lowered to a set of
15845
- // tbl instructions.
15846
- for (unsigned i = 0; i < NumElts * ZExtFactor; i++) {
15847
- if (IsLittleEndian) {
15848
- if (i % ZExtFactor == 0)
15849
- Mask.push_back(i / ZExtFactor);
15850
- else
15851
- Mask.push_back(NumElts);
15852
- } else {
15853
- if ((i + 1) % ZExtFactor == 0)
15854
- Mask.push_back((i - ZExtFactor + 1) / ZExtFactor);
15855
- else
15856
- Mask.push_back(NumElts);
15857
- }
15858
- }
15862
+ if (!createTblShuffleMask(SrcWidth, DstWidth, NumElts, IsLittleEndian, Mask))
15863
+ return nullptr;
15859
15864
15860
15865
auto *FirstEltZero = Builder.CreateInsertElement(
15861
15866
PoisonValue::get(SrcTy), Builder.getInt8(0), uint64_t(0));
15862
15867
Value *Result = Builder.CreateShuffleVector(Op, FirstEltZero, Mask);
15863
15868
Result = Builder.CreateBitCast(Result, DstTy);
15864
- if (DstTy != ZExt->getType())
15865
- Result = Builder.CreateZExt(Result, ZExt->getType());
15866
- ZExt->replaceAllUsesWith(Result);
15867
- ZExt->eraseFromParent();
15868
- return true;
15869
+ if (DstTy != ZExtTy)
15870
+ Result = Builder.CreateZExt(Result, ZExtTy);
15871
+ return Result;
15869
15872
}
15870
15873
15871
15874
static void createTblForTrunc(TruncInst *TI, bool IsLittleEndian) {
@@ -16030,21 +16033,30 @@ bool AArch64TargetLowering::optimizeExtendOrTruncateConversion(
16030
16033
16031
16034
DstTy = TruncDstType;
16032
16035
}
16033
-
16034
- return createTblShuffleForZExt(ZExt, DstTy, Subtarget->isLittleEndian());
16036
+ IRBuilder<> Builder(ZExt);
16037
+ Value *Result = createTblShuffleForZExt(
16038
+ Builder, ZExt->getOperand(0), cast<FixedVectorType>(ZExt->getType()),
16039
+ DstTy, Subtarget->isLittleEndian());
16040
+ if (!Result)
16041
+ return false;
16042
+ ZExt->replaceAllUsesWith(Result);
16043
+ ZExt->eraseFromParent();
16044
+ return true;
16035
16045
}
16036
16046
16037
16047
auto *UIToFP = dyn_cast<UIToFPInst>(I);
16038
16048
if (UIToFP && SrcTy->getElementType()->isIntegerTy(8) &&
16039
16049
DstTy->getElementType()->isFloatTy()) {
16040
16050
IRBuilder<> Builder(I);
16041
- auto *ZExt = cast<ZExtInst>(
16042
- Builder.CreateZExt(I->getOperand(0), VectorType::getInteger(DstTy)));
16051
+ Value *ZExt = createTblShuffleForZExt(
16052
+ Builder, I->getOperand(0), FixedVectorType::getInteger(DstTy),
16053
+ FixedVectorType::getInteger(DstTy), Subtarget->isLittleEndian());
16054
+ if (!ZExt)
16055
+ return false;
16043
16056
auto *UI = Builder.CreateUIToFP(ZExt, DstTy);
16044
16057
I->replaceAllUsesWith(UI);
16045
16058
I->eraseFromParent();
16046
- return createTblShuffleForZExt(ZExt, cast<FixedVectorType>(ZExt->getType()),
16047
- Subtarget->isLittleEndian());
16059
+ return true;
16048
16060
}
16049
16061
16050
16062
// Convert 'fptoui <(8|16) x float> to <(8|16) x i8>' to a wide fptoui
0 commit comments