@@ -2005,18 +2005,29 @@ InstructionCost RISCVTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
2005
2005
}
2006
2006
2007
2007
// When the vector needs to split into multiple register groups and the index
2008
- // exceeds single vector register group, we need to extract the element via
2009
- // stack.
2010
- if (Opcode == Instruction::ExtractElement && LT.first > 1 &&
2011
- ((Index == -1U ) || (Index > LT.second .getVectorMinNumElements () &&
2008
+ // exceeds single vector register group, we need to insert/ extract the element
2009
+ // via stack.
2010
+ if (LT.first > 1 &&
2011
+ ((Index == -1U ) || (Index >= LT.second .getVectorMinNumElements () &&
2012
2012
LT.second .isScalableVector ()))) {
2013
2013
Type *ScalarType = Val->getScalarType ();
2014
2014
Align VecAlign = DL.getPrefTypeAlign (Val);
2015
2015
Align SclAlign = DL.getPrefTypeAlign (ScalarType);
2016
+
2016
2017
// Store all split vectors into stack and load the target element.
2017
- return LT.first *
2018
- getMemoryOpCost (Instruction::Store, Val, VecAlign, 0 , CostKind) +
2019
- getMemoryOpCost (Instruction::Load, ScalarType, SclAlign, 0 ,
2018
+ if (Opcode == Instruction::ExtractElement)
2019
+ return LT.first * getMemoryOpCost (Instruction::Store, Val, VecAlign, 0 ,
2020
+ CostKind) +
2021
+ getMemoryOpCost (Instruction::Load, ScalarType, SclAlign, 0 ,
2022
+ CostKind);
2023
+
2024
+ // Store all split vectors into stack and store the target element and load
2025
+ // vectors back.
2026
+ return LT.first * (getMemoryOpCost (Instruction::Store, Val, VecAlign, 0 ,
2027
+ CostKind) +
2028
+ getMemoryOpCost (Instruction::Load, Val, VecAlign, 0 ,
2029
+ CostKind)) +
2030
+ getMemoryOpCost (Instruction::Store, ScalarType, SclAlign, 0 ,
2020
2031
CostKind);
2021
2032
}
2022
2033
0 commit comments