@@ -1563,6 +1563,67 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
1563
1563
if (Intrinsic::isTargetIntrinsic (IID))
1564
1564
return TargetTransformInfo::TCC_Basic;
1565
1565
1566
+ // VP Intrinsics should have the same cost as their non-vp counterpart.
1567
+ // TODO: Adjust the cost to make the vp intrinsic cheaper than its non-vp
1568
+ // counterpart when the vector length argument is smaller than the maximum
1569
+ // vector length.
1570
+ // TODO: Support other kinds of VPIntrinsics
1571
+ if (VPIntrinsic::isVPIntrinsic (ICA.getID ())) {
1572
+ std::optional<unsigned > FOp =
1573
+ VPIntrinsic::getFunctionalOpcodeForVP (ICA.getID ());
1574
+ if (FOp) {
1575
+ if (ICA.getID () == Intrinsic::vp_load) {
1576
+ Align Alignment;
1577
+ if (auto *VPI = dyn_cast_or_null<VPIntrinsic>(ICA.getInst ()))
1578
+ Alignment = VPI->getPointerAlignment ().valueOrOne ();
1579
+ unsigned AS = 0 ;
1580
+ if (ICA.getArgTypes ().size () > 1 )
1581
+ if (auto *PtrTy = dyn_cast<PointerType>(ICA.getArgTypes ()[0 ]))
1582
+ AS = PtrTy->getAddressSpace ();
1583
+ return thisT ()->getMemoryOpCost (*FOp, ICA.getReturnType (), Alignment,
1584
+ AS, CostKind);
1585
+ }
1586
+ if (ICA.getID () == Intrinsic::vp_store) {
1587
+ Align Alignment;
1588
+ if (auto *VPI = dyn_cast_or_null<VPIntrinsic>(ICA.getInst ()))
1589
+ Alignment = VPI->getPointerAlignment ().valueOrOne ();
1590
+ unsigned AS = 0 ;
1591
+ if (ICA.getArgTypes ().size () >= 2 )
1592
+ if (auto *PtrTy = dyn_cast<PointerType>(ICA.getArgTypes ()[1 ]))
1593
+ AS = PtrTy->getAddressSpace ();
1594
+ return thisT ()->getMemoryOpCost (*FOp, ICA.getArgTypes ()[0 ], Alignment,
1595
+ AS, CostKind);
1596
+ }
1597
+ if (VPBinOpIntrinsic::isVPBinOp (ICA.getID ())) {
1598
+ return thisT ()->getArithmeticInstrCost (*FOp, ICA.getReturnType (),
1599
+ CostKind);
1600
+ }
1601
+ }
1602
+
1603
+ std::optional<Intrinsic::ID> FID =
1604
+ VPIntrinsic::getFunctionalIntrinsicIDForVP (ICA.getID ());
1605
+ if (FID) {
1606
+ // Non-vp version will have same arg types except mask and vector
1607
+ // length.
1608
+ assert (ICA.getArgTypes ().size () >= 2 &&
1609
+ " Expected VPIntrinsic to have Mask and Vector Length args and "
1610
+ " types" );
1611
+ ArrayRef<Type *> NewTys = ArrayRef (ICA.getArgTypes ()).drop_back (2 );
1612
+
1613
+ // VPReduction intrinsics have a start value argument that their non-vp
1614
+ // counterparts do not have, except for the fadd and fmul non-vp
1615
+ // counterpart.
1616
+ if (VPReductionIntrinsic::isVPReduction (ICA.getID ()) &&
1617
+ *FID != Intrinsic::vector_reduce_fadd &&
1618
+ *FID != Intrinsic::vector_reduce_fmul)
1619
+ NewTys = NewTys.drop_front ();
1620
+
1621
+ IntrinsicCostAttributes NewICA (*FID, ICA.getReturnType (), NewTys,
1622
+ ICA.getFlags ());
1623
+ return thisT ()->getIntrinsicInstrCost (NewICA, CostKind);
1624
+ }
1625
+ }
1626
+
1566
1627
if (ICA.isTypeBasedOnly ())
1567
1628
return getTypeBasedIntrinsicInstrCost (ICA, CostKind);
1568
1629
@@ -1823,68 +1884,6 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
1823
1884
}
1824
1885
}
1825
1886
1826
- // VP Intrinsics should have the same cost as their non-vp counterpart.
1827
- // TODO: Adjust the cost to make the vp intrinsic cheaper than its non-vp
1828
- // counterpart when the vector length argument is smaller than the maximum
1829
- // vector length.
1830
- // TODO: Support other kinds of VPIntrinsics
1831
- if (VPIntrinsic::isVPIntrinsic (ICA.getID ())) {
1832
- std::optional<unsigned > FOp =
1833
- VPIntrinsic::getFunctionalOpcodeForVP (ICA.getID ());
1834
- if (FOp) {
1835
- if (ICA.getID () == Intrinsic::vp_load) {
1836
- Align Alignment;
1837
- if (auto *VPI = dyn_cast_or_null<VPIntrinsic>(ICA.getInst ()))
1838
- Alignment = VPI->getPointerAlignment ().valueOrOne ();
1839
- unsigned AS = 0 ;
1840
- if (ICA.getArgs ().size () > 1 )
1841
- if (auto *PtrTy =
1842
- dyn_cast<PointerType>(ICA.getArgs ()[0 ]->getType ()))
1843
- AS = PtrTy->getAddressSpace ();
1844
- return thisT ()->getMemoryOpCost (*FOp, ICA.getReturnType (), Alignment,
1845
- AS, CostKind);
1846
- }
1847
- if (ICA.getID () == Intrinsic::vp_store) {
1848
- Align Alignment;
1849
- if (auto *VPI = dyn_cast_or_null<VPIntrinsic>(ICA.getInst ()))
1850
- Alignment = VPI->getPointerAlignment ().valueOrOne ();
1851
- unsigned AS = 0 ;
1852
- if (ICA.getArgs ().size () >= 2 )
1853
- if (auto *PtrTy =
1854
- dyn_cast<PointerType>(ICA.getArgs ()[1 ]->getType ()))
1855
- AS = PtrTy->getAddressSpace ();
1856
- return thisT ()->getMemoryOpCost (*FOp, Args[0 ]->getType (), Alignment,
1857
- AS, CostKind);
1858
- }
1859
- if (VPBinOpIntrinsic::isVPBinOp (ICA.getID ())) {
1860
- return thisT ()->getArithmeticInstrCost (*FOp, ICA.getReturnType (),
1861
- CostKind);
1862
- }
1863
- }
1864
-
1865
- std::optional<Intrinsic::ID> FID =
1866
- VPIntrinsic::getFunctionalIntrinsicIDForVP (ICA.getID ());
1867
- if (FID) {
1868
- // Non-vp version will have same Args/Tys except mask and vector length.
1869
- assert (ICA.getArgs ().size () >= 2 && ICA.getArgTypes ().size () >= 2 &&
1870
- " Expected VPIntrinsic to have Mask and Vector Length args and "
1871
- " types" );
1872
- ArrayRef<Type *> NewTys = ArrayRef (ICA.getArgTypes ()).drop_back (2 );
1873
-
1874
- // VPReduction intrinsics have a start value argument that their non-vp
1875
- // counterparts do not have, except for the fadd and fmul non-vp
1876
- // counterpart.
1877
- if (VPReductionIntrinsic::isVPReduction (ICA.getID ()) &&
1878
- *FID != Intrinsic::vector_reduce_fadd &&
1879
- *FID != Intrinsic::vector_reduce_fmul)
1880
- NewTys = NewTys.drop_front ();
1881
-
1882
- IntrinsicCostAttributes NewICA (*FID, ICA.getReturnType (), NewTys,
1883
- ICA.getFlags ());
1884
- return thisT ()->getIntrinsicInstrCost (NewICA, CostKind);
1885
- }
1886
- }
1887
-
1888
1887
// Assume that we need to scalarize this intrinsic.)
1889
1888
// Compute the scalarization overhead based on Args for a vector
1890
1889
// intrinsic.
0 commit comments