@@ -4686,6 +4686,10 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCostAVX2(
4686
4686
4687
4687
unsigned VF = VecTy->getNumElements () / Factor;
4688
4688
Type *ScalarTy = VecTy->getElementType ();
4689
+ // Deduplicate entries, model floats/pointers as appropriately-sized integers.
4690
+ if (!ScalarTy->isIntegerTy ())
4691
+ ScalarTy =
4692
+ Type::getIntNTy (ScalarTy->getContext (), DL.getTypeSizeInBits (ScalarTy));
4689
4693
4690
4694
// Get the cost of all the memory operations.
4691
4695
InstructionCost MemOpCosts = getMemoryOpCost (
@@ -4699,35 +4703,34 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCostAVX2(
4699
4703
CostKind);
4700
4704
4701
4705
// TODO: Complete for other data-types and strides.
4702
- // Each combination of Stride, ElementTy and VF results in a different
4706
+ // Each combination of Stride, element bit width and VF results in a different
4703
4707
// sequence; The cost tables are therefore accessed with:
4704
- // Factor (stride) and VectorType=VFxElemType .
4708
+ // Factor (stride) and VectorType=VFxiN .
4705
4709
// The Cost accounts only for the shuffle sequence;
4706
4710
// The cost of the loads/stores is accounted for separately.
4707
4711
//
4708
4712
static const CostTblEntry AVX2InterleavedLoadTbl[] = {
4709
4713
{ 2 , MVT::v4i64, 6 }, // (load 8i64 and) deinterleave into 2 x 4i64
4710
- { 2 , MVT::v4f64, 6 }, // (load 8f64 and) deinterleave into 2 x 4f64
4711
4714
4712
4715
{ 3 , MVT::v2i8, 10 }, // (load 6i8 and) deinterleave into 3 x 2i8
4713
4716
{ 3 , MVT::v4i8, 4 }, // (load 12i8 and) deinterleave into 3 x 4i8
4714
4717
{ 3 , MVT::v8i8, 9 }, // (load 24i8 and) deinterleave into 3 x 8i8
4715
4718
{ 3 , MVT::v16i8, 11 }, // (load 48i8 and) deinterleave into 3 x 16i8
4716
4719
{ 3 , MVT::v32i8, 13 }, // (load 96i8 and) deinterleave into 3 x 32i8
4717
- { 3 , MVT::v8f32, 17 }, // (load 24f32 and)deinterleave into 3 x 8f32
4720
+
4721
+ { 3 , MVT::v8i32, 17 }, // (load 24i32 and)deinterleave into 3 x 8i32
4718
4722
4719
4723
{ 4 , MVT::v2i8, 12 }, // (load 8i8 and) deinterleave into 4 x 2i8
4720
4724
{ 4 , MVT::v4i8, 4 }, // (load 16i8 and) deinterleave into 4 x 4i8
4721
4725
{ 4 , MVT::v8i8, 20 }, // (load 32i8 and) deinterleave into 4 x 8i8
4722
4726
{ 4 , MVT::v16i8, 39 }, // (load 64i8 and) deinterleave into 4 x 16i8
4723
4727
{ 4 , MVT::v32i8, 80 }, // (load 128i8 and) deinterleave into 4 x 32i8
4724
4728
4725
- { 8 , MVT::v8f32 , 40 } // (load 64f32 and)deinterleave into 8 x 8f32
4729
+ { 8 , MVT::v8i32 , 40 } // (load 64i32 and)deinterleave into 8 x 8i32
4726
4730
};
4727
4731
4728
4732
static const CostTblEntry AVX2InterleavedStoreTbl[] = {
4729
4733
{ 2 , MVT::v4i64, 6 }, // interleave into 2 x 4i64 into 8i64 (and store)
4730
- { 2 , MVT::v4f64, 6 }, // interleave into 2 x 4f64 into 8f64 (and store)
4731
4734
4732
4735
{ 3 , MVT::v2i8, 7 }, // interleave 3 x 2i8 into 6i8 (and store)
4733
4736
{ 3 , MVT::v4i8, 8 }, // interleave 3 x 4i8 into 12i8 (and store)
0 commit comments