@@ -75,8 +75,8 @@ InstructionCost SystemZTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty,
75
75
// here, so that constant hoisting will ignore this constant.
76
76
if (BitSize == 0 )
77
77
return TTI::TCC_Free;
78
- // No cost model for operations on integers larger than 64 bit implemented yet.
79
- if (BitSize > 64 )
78
+ // No cost model for operations on integers larger than 128 bit implemented yet.
79
+ if ((!ST-> hasVector () && BitSize > 64 ) || BitSize > 128 )
80
80
return TTI::TCC_Free;
81
81
82
82
if (Imm == 0 )
@@ -96,7 +96,8 @@ InstructionCost SystemZTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty,
96
96
return 2 * TTI::TCC_Basic;
97
97
}
98
98
99
- return 4 * TTI::TCC_Basic;
99
+ // i128 immediates loads from Constant Pool
100
+ return 2 * TTI::TCC_Basic;
100
101
}
101
102
102
103
InstructionCost SystemZTTIImpl::getIntImmCostInst (unsigned Opcode, unsigned Idx,
@@ -479,21 +480,27 @@ InstructionCost SystemZTTIImpl::getArithmeticInstrCost(
479
480
return LIBCALL_COST;
480
481
481
482
// Give discount for some combined logical operations if supported.
482
- if (Args.size () == 2 && ST-> hasMiscellaneousExtensions3 () ) {
483
+ if (Args.size () == 2 ) {
483
484
if (Opcode == Instruction::Xor) {
484
485
for (const Value *A : Args) {
485
486
if (const Instruction *I = dyn_cast<Instruction>(A))
486
487
if (I->hasOneUse () &&
487
- (I->getOpcode () == Instruction::And ||
488
- I->getOpcode () == Instruction::Or ||
488
+ (I->getOpcode () == Instruction::Or ||
489
+ I->getOpcode () == Instruction::And ||
489
490
I->getOpcode () == Instruction::Xor))
490
- return 0 ;
491
+ if ((ScalarBits <= 64 && ST->hasMiscellaneousExtensions3 ()) ||
492
+ (isInt128InVR (Ty) &&
493
+ (I->getOpcode () == Instruction::Or || ST->hasVectorEnhancements1 ())))
494
+ return 0 ;
491
495
}
492
496
}
493
- else if (Opcode == Instruction::Or || Opcode == Instruction::And ) {
497
+ else if (Opcode == Instruction::And || Opcode == Instruction::Or ) {
494
498
for (const Value *A : Args) {
495
499
if (const Instruction *I = dyn_cast<Instruction>(A))
496
- if (I->hasOneUse () && I->getOpcode () == Instruction::Xor)
500
+ if ((I->hasOneUse () && I->getOpcode () == Instruction::Xor) &&
501
+ ((ScalarBits <= 64 && ST->hasMiscellaneousExtensions3 ()) ||
502
+ (isInt128InVR (Ty) &&
503
+ (Opcode == Instruction::And || ST->hasVectorEnhancements1 ()))))
497
504
return 0 ;
498
505
}
499
506
}
@@ -774,29 +781,63 @@ InstructionCost SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
774
781
assert (!Dst->isVectorTy ());
775
782
776
783
if (Opcode == Instruction::SIToFP || Opcode == Instruction::UIToFP) {
784
+ if (Src->isIntegerTy (128 ))
785
+ return LIBCALL_COST;
777
786
if (SrcScalarBits >= 32 ||
778
787
(I != nullptr && isa<LoadInst>(I->getOperand (0 ))))
779
788
return 1 ;
780
789
return SrcScalarBits > 1 ? 2 /* i8/i16 extend*/ : 5 /* branch seq.*/ ;
781
790
}
782
791
783
- if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) &&
784
- Src->isIntegerTy (1 )) {
785
- if (ST->hasLoadStoreOnCond2 ())
786
- return 2 ; // li 0; loc 1
787
-
788
- // This should be extension of a compare i1 result, which is done with
789
- // ipm and a varying sequence of instructions.
790
- unsigned Cost = 0 ;
791
- if (Opcode == Instruction::SExt)
792
- Cost = (DstScalarBits < 64 ? 3 : 4 );
793
- if (Opcode == Instruction::ZExt)
794
- Cost = 3 ;
795
- Type *CmpOpTy = ((I != nullptr ) ? getCmpOpsType (I) : nullptr );
796
- if (CmpOpTy != nullptr && CmpOpTy->isFloatingPointTy ())
797
- // If operands of an fp-type was compared, this costs +1.
798
- Cost++;
799
- return Cost;
792
+ if ((Opcode == Instruction::FPToSI || Opcode == Instruction::FPToUI) &&
793
+ Dst->isIntegerTy (128 ))
794
+ return LIBCALL_COST;
795
+
796
+ if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt)) {
797
+ if (Src->isIntegerTy (1 )) {
798
+ if (DstScalarBits == 128 )
799
+ return 5 /* branch seq.*/ ;
800
+
801
+ if (ST->hasLoadStoreOnCond2 ())
802
+ return 2 ; // li 0; loc 1
803
+
804
+ // This should be extension of a compare i1 result, which is done with
805
+ // ipm and a varying sequence of instructions.
806
+ unsigned Cost = 0 ;
807
+ if (Opcode == Instruction::SExt)
808
+ Cost = (DstScalarBits < 64 ? 3 : 4 );
809
+ if (Opcode == Instruction::ZExt)
810
+ Cost = 3 ;
811
+ Type *CmpOpTy = ((I != nullptr ) ? getCmpOpsType (I) : nullptr );
812
+ if (CmpOpTy != nullptr && CmpOpTy->isFloatingPointTy ())
813
+ // If operands of an fp-type was compared, this costs +1.
814
+ Cost++;
815
+ return Cost;
816
+ }
817
+ else if (isInt128InVR (Dst)) {
818
+ // Extensions from GPR to i128 (in VR) typically costs two instructions,
819
+ // but a zero-extending load would be just one extra instruction.
820
+ if (Opcode == Instruction::ZExt && I != nullptr )
821
+ if (LoadInst *Ld = dyn_cast<LoadInst>(I->getOperand (0 )))
822
+ if (Ld->hasOneUse ())
823
+ return 1 ;
824
+ return 2 ;
825
+ }
826
+ }
827
+
828
+ if (Opcode == Instruction::Trunc && isInt128InVR (Src) && I != nullptr ) {
829
+ if (LoadInst *Ld = dyn_cast<LoadInst>(I->getOperand (0 )))
830
+ if (Ld->hasOneUse ())
831
+ return 0 ; // Will be converted to GPR load.
832
+ bool OnlyTruncatingStores = true ;
833
+ for (const User *U : I->users ())
834
+ if (!isa<StoreInst>(U)) {
835
+ OnlyTruncatingStores = false ;
836
+ break ;
837
+ }
838
+ if (OnlyTruncatingStores)
839
+ return 0 ;
840
+ return 2 ; // Vector element extraction.
800
841
}
801
842
}
802
843
else if (ST->hasVector ()) {
@@ -930,7 +971,7 @@ InstructionCost SystemZTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
930
971
// A loaded value compared with 0 with multiple users becomes Load and
931
972
// Test. The load is then not foldable, so return 0 cost for the ICmp.
932
973
unsigned ScalarBits = ValTy->getScalarSizeInBits ();
933
- if (I != nullptr && ScalarBits >= 32 )
974
+ if (I != nullptr && ( ScalarBits == 32 || ScalarBits == 64 ) )
934
975
if (LoadInst *Ld = dyn_cast<LoadInst>(I->getOperand (0 )))
935
976
if (const ConstantInt *C = dyn_cast<ConstantInt>(I->getOperand (1 )))
936
977
if (!Ld->hasOneUse () && Ld->getParent () == I->getParent () &&
@@ -943,8 +984,8 @@ InstructionCost SystemZTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
943
984
return Cost;
944
985
}
945
986
case Instruction::Select:
946
- if (ValTy->isFloatingPointTy ())
947
- return 4 ; // No load on condition for FP - costs a conditional jump.
987
+ if (ValTy->isFloatingPointTy () || isInt128InVR (ValTy) )
988
+ return 4 ; // No LOC for FP / i128 - costs a conditional jump.
948
989
return 1 ; // Load On Condition / Select Register.
949
990
}
950
991
}
@@ -1157,6 +1198,10 @@ InstructionCost SystemZTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
1157
1198
return BaseT::getMemoryOpCost (Opcode, Src, Alignment, AddressSpace,
1158
1199
CostKind);
1159
1200
1201
+ // FP128 is a legal type but kept in a register pair on older CPUs.
1202
+ if (Src->isFP128Ty () && !ST->hasVectorEnhancements1 ())
1203
+ return 2 ;
1204
+
1160
1205
unsigned NumOps =
1161
1206
(Src->isVectorTy () ? getNumVectorRegs (Src) : getNumberOfParts (Src));
1162
1207
@@ -1177,10 +1222,6 @@ InstructionCost SystemZTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
1177
1222
}
1178
1223
}
1179
1224
1180
- if (Src->getScalarSizeInBits () == 128 )
1181
- // 128 bit scalars are held in a pair of two 64 bit registers.
1182
- NumOps *= 2 ;
1183
-
1184
1225
return NumOps;
1185
1226
}
1186
1227
0 commit comments