@@ -15719,48 +15719,51 @@ bool AArch64TargetLowering::shouldSinkOperands(
15719
15719
return false;
15720
15720
}
15721
15721
15722
- static bool createTblShuffleForZExt(ZExtInst *ZExt, FixedVectorType *DstTy,
15723
- bool IsLittleEndian) {
15724
- Value *Op = ZExt->getOperand(0);
15725
- auto *SrcTy = cast<FixedVectorType>(Op->getType());
15726
- auto SrcWidth = cast<IntegerType>(SrcTy->getElementType())->getBitWidth();
15727
- auto DstWidth = cast<IntegerType>(DstTy->getElementType())->getBitWidth();
15722
+ static bool createTblShuffleMask(unsigned SrcWidth, unsigned DstWidth,
15723
+ unsigned NumElts, bool IsLittleEndian,
15724
+ SmallVectorImpl<int> &Mask) {
15728
15725
if (DstWidth % 8 != 0 || DstWidth <= 16 || DstWidth >= 64)
15729
15726
return false;
15730
15727
15731
- assert(DstWidth % SrcWidth == 0 &&
15732
- "TBL lowering is not supported for a ZExt instruction with this "
15733
- "source & destination element type.");
15734
- unsigned ZExtFactor = DstWidth / SrcWidth;
15728
+ if (DstWidth % SrcWidth != 0)
15729
+ return false;
15730
+
15731
+ unsigned Factor = DstWidth / SrcWidth;
15732
+ unsigned MaskLen = NumElts * Factor;
15733
+
15734
+ Mask.clear();
15735
+ Mask.resize(MaskLen, NumElts);
15736
+
15737
+ unsigned SrcIndex = 0;
15738
+ for (unsigned I = 0; I < MaskLen; I += Factor)
15739
+ Mask[I] = SrcIndex++;
15740
+
15741
+ if (!IsLittleEndian)
15742
+ std::rotate(Mask.rbegin(), Mask.rbegin() + Factor - 1, Mask.rend());
15743
+
15744
+ return true;
15745
+ }
15746
+
15747
+ static Value *createTblShuffleForZExt(IRBuilderBase &Builder, Value *Op,
15748
+ FixedVectorType *ZExtTy,
15749
+ FixedVectorType *DstTy,
15750
+ bool IsLittleEndian) {
15751
+ auto *SrcTy = cast<FixedVectorType>(Op->getType());
15735
15752
unsigned NumElts = SrcTy->getNumElements();
15736
- IRBuilder<> Builder(ZExt);
15753
+ auto SrcWidth = cast<IntegerType>(SrcTy->getElementType())->getBitWidth();
15754
+ auto DstWidth = cast<IntegerType>(DstTy->getElementType())->getBitWidth();
15755
+
15737
15756
SmallVector<int> Mask;
15738
- // Create a mask that selects <0,...,Op[i]> for each lane of the destination
15739
- // vector to replace the original ZExt. This can later be lowered to a set of
15740
- // tbl instructions.
15741
- for (unsigned i = 0; i < NumElts * ZExtFactor; i++) {
15742
- if (IsLittleEndian) {
15743
- if (i % ZExtFactor == 0)
15744
- Mask.push_back(i / ZExtFactor);
15745
- else
15746
- Mask.push_back(NumElts);
15747
- } else {
15748
- if ((i + 1) % ZExtFactor == 0)
15749
- Mask.push_back((i - ZExtFactor + 1) / ZExtFactor);
15750
- else
15751
- Mask.push_back(NumElts);
15752
- }
15753
- }
15757
+ if (!createTblShuffleMask(SrcWidth, DstWidth, NumElts, IsLittleEndian, Mask))
15758
+ return nullptr;
15754
15759
15755
15760
auto *FirstEltZero = Builder.CreateInsertElement(
15756
15761
PoisonValue::get(SrcTy), Builder.getInt8(0), uint64_t(0));
15757
15762
Value *Result = Builder.CreateShuffleVector(Op, FirstEltZero, Mask);
15758
15763
Result = Builder.CreateBitCast(Result, DstTy);
15759
- if (DstTy != ZExt->getType())
15760
- Result = Builder.CreateZExt(Result, ZExt->getType());
15761
- ZExt->replaceAllUsesWith(Result);
15762
- ZExt->eraseFromParent();
15763
- return true;
15764
+ if (DstTy != ZExtTy)
15765
+ Result = Builder.CreateZExt(Result, ZExtTy);
15766
+ return Result;
15764
15767
}
15765
15768
15766
15769
static void createTblForTrunc(TruncInst *TI, bool IsLittleEndian) {
@@ -15925,21 +15928,30 @@ bool AArch64TargetLowering::optimizeExtendOrTruncateConversion(
15925
15928
15926
15929
DstTy = TruncDstType;
15927
15930
}
15928
-
15929
- return createTblShuffleForZExt(ZExt, DstTy, Subtarget->isLittleEndian());
15931
+ IRBuilder<> Builder(ZExt);
15932
+ Value *Result = createTblShuffleForZExt(
15933
+ Builder, ZExt->getOperand(0), cast<FixedVectorType>(ZExt->getType()),
15934
+ DstTy, Subtarget->isLittleEndian());
15935
+ if (!Result)
15936
+ return false;
15937
+ ZExt->replaceAllUsesWith(Result);
15938
+ ZExt->eraseFromParent();
15939
+ return true;
15930
15940
}
15931
15941
15932
15942
auto *UIToFP = dyn_cast<UIToFPInst>(I);
15933
15943
if (UIToFP && SrcTy->getElementType()->isIntegerTy(8) &&
15934
15944
DstTy->getElementType()->isFloatTy()) {
15935
15945
IRBuilder<> Builder(I);
15936
- auto *ZExt = cast<ZExtInst>(
15937
- Builder.CreateZExt(I->getOperand(0), VectorType::getInteger(DstTy)));
15946
+ Value *ZExt = createTblShuffleForZExt(
15947
+ Builder, I->getOperand(0), FixedVectorType::getInteger(DstTy),
15948
+ FixedVectorType::getInteger(DstTy), Subtarget->isLittleEndian());
15949
+ if (!ZExt)
15950
+ return false;
15938
15951
auto *UI = Builder.CreateUIToFP(ZExt, DstTy);
15939
15952
I->replaceAllUsesWith(UI);
15940
15953
I->eraseFromParent();
15941
- return createTblShuffleForZExt(ZExt, cast<FixedVectorType>(ZExt->getType()),
15942
- Subtarget->isLittleEndian());
15954
+ return true;
15943
15955
}
15944
15956
15945
15957
// Convert 'fptoui <(8|16) x float> to <(8|16) x i8>' to a wide fptoui
0 commit comments