Skip to content

Commit 97e04d4

Browse files
committed
[X86] X86TTIImpl::getInterleavedMemoryOpCostAVX2(): canonicalize to integer type
This way we don't have to duplicate i32/f32 and i64/f64 entries, which was already forgotten to be done for a few tuples.
1 parent 129f466 commit 97e04d4

File tree

1 file changed

+9
-6
lines changed

1 file changed

+9
-6
lines changed

llvm/lib/Target/X86/X86TargetTransformInfo.cpp

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4686,6 +4686,10 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCostAVX2(
46864686

46874687
unsigned VF = VecTy->getNumElements() / Factor;
46884688
Type *ScalarTy = VecTy->getElementType();
4689+
// Deduplicate entries, model floats/pointers as appropriately-sized integers.
4690+
if (!ScalarTy->isIntegerTy())
4691+
ScalarTy =
4692+
Type::getIntNTy(ScalarTy->getContext(), DL.getTypeSizeInBits(ScalarTy));
46894693

46904694
// Get the cost of all the memory operations.
46914695
InstructionCost MemOpCosts = getMemoryOpCost(
@@ -4699,35 +4703,34 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCostAVX2(
46994703
CostKind);
47004704

47014705
// TODO: Complete for other data-types and strides.
4702-
// Each combination of Stride, ElementTy and VF results in a different
4706+
// Each combination of Stride, element bit width and VF results in a different
47034707
// sequence; The cost tables are therefore accessed with:
4704-
// Factor (stride) and VectorType=VFxElemType.
4708+
// Factor (stride) and VectorType=VFxiN.
47054709
// The Cost accounts only for the shuffle sequence;
47064710
// The cost of the loads/stores is accounted for separately.
47074711
//
47084712
static const CostTblEntry AVX2InterleavedLoadTbl[] = {
47094713
{ 2, MVT::v4i64, 6 }, //(load 8i64 and) deinterleave into 2 x 4i64
4710-
{ 2, MVT::v4f64, 6 }, //(load 8f64 and) deinterleave into 2 x 4f64
47114714

47124715
{ 3, MVT::v2i8, 10 }, //(load 6i8 and) deinterleave into 3 x 2i8
47134716
{ 3, MVT::v4i8, 4 }, //(load 12i8 and) deinterleave into 3 x 4i8
47144717
{ 3, MVT::v8i8, 9 }, //(load 24i8 and) deinterleave into 3 x 8i8
47154718
{ 3, MVT::v16i8, 11}, //(load 48i8 and) deinterleave into 3 x 16i8
47164719
{ 3, MVT::v32i8, 13}, //(load 96i8 and) deinterleave into 3 x 32i8
4717-
{ 3, MVT::v8f32, 17 }, //(load 24f32 and)deinterleave into 3 x 8f32
4720+
4721+
{ 3, MVT::v8i32, 17 }, //(load 24i32 and)deinterleave into 3 x 8i32
47184722

47194723
{ 4, MVT::v2i8, 12 }, //(load 8i8 and) deinterleave into 4 x 2i8
47204724
{ 4, MVT::v4i8, 4 }, //(load 16i8 and) deinterleave into 4 x 4i8
47214725
{ 4, MVT::v8i8, 20 }, //(load 32i8 and) deinterleave into 4 x 8i8
47224726
{ 4, MVT::v16i8, 39 }, //(load 64i8 and) deinterleave into 4 x 16i8
47234727
{ 4, MVT::v32i8, 80 }, //(load 128i8 and) deinterleave into 4 x 32i8
47244728

4725-
{ 8, MVT::v8f32, 40 } //(load 64f32 and)deinterleave into 8 x 8f32
4729+
{ 8, MVT::v8i32, 40 } //(load 64i32 and)deinterleave into 8 x 8i32
47264730
};
47274731

47284732
static const CostTblEntry AVX2InterleavedStoreTbl[] = {
47294733
{ 2, MVT::v4i64, 6 }, //interleave into 2 x 4i64 into 8i64 (and store)
4730-
{ 2, MVT::v4f64, 6 }, //interleave into 2 x 4f64 into 8f64 (and store)
47314734

47324735
{ 3, MVT::v2i8, 7 }, //interleave 3 x 2i8 into 6i8 (and store)
47334736
{ 3, MVT::v4i8, 8 }, //interleave 3 x 4i8 into 12i8 (and store)

0 commit comments

Comments
 (0)