@@ -375,7 +375,9 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
375
375
{ X86::VMOVAPSYrr, X86::VMOVAPSYmr, TB_FOLDED_STORE | TB_ALIGN_32 },
376
376
{ X86::VMOVDQAYrr, X86::VMOVDQAYmr, TB_FOLDED_STORE | TB_ALIGN_32 },
377
377
{ X86::VMOVUPDYrr, X86::VMOVUPDYmr, TB_FOLDED_STORE },
378
- { X86::VMOVUPSYrr, X86::VMOVUPSYmr, TB_FOLDED_STORE }
378
+ { X86::VMOVUPSYrr, X86::VMOVUPSYmr, TB_FOLDED_STORE },
379
+ // AVX-512 foldable instructions
380
+ { X86::VMOVPDI2DIZrr,X86::VMOVPDI2DIZmr, TB_FOLDED_STORE }
379
381
};
380
382
381
383
for (unsigned i = 0 , e = array_lengthof (OpTbl0); i != e; ++i) {
@@ -581,6 +583,14 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
581
583
{ X86::TZCNT16rr, X86::TZCNT16rm, 0 },
582
584
{ X86::TZCNT32rr, X86::TZCNT32rm, 0 },
583
585
{ X86::TZCNT64rr, X86::TZCNT64rm, 0 },
586
+
587
+ // AVX-512 foldable instructions
588
+ { X86::VMOV64toPQIZrr, X86::VMOVQI2PQIZrm, 0 },
589
+ { X86::VMOVDI2SSZrr, X86::VMOVDI2SSZrm, 0 },
590
+ { X86::VMOVDQA32rr, X86::VMOVDQA32rm, TB_ALIGN_64 },
591
+ { X86::VMOVDQA64rr, X86::VMOVDQA64rm, TB_ALIGN_64 },
592
+ { X86::VMOVDQU32rr, X86::VMOVDQU32rm, 0 },
593
+ { X86::VMOVDQU64rr, X86::VMOVDQU64rm, 0 },
584
594
};
585
595
586
596
for (unsigned i = 0 , e = array_lengthof (OpTbl1); i != e; ++i) {
@@ -1180,12 +1190,35 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
1180
1190
{ X86::PEXT64rr, X86::PEXT64rm, 0 },
1181
1191
1182
1192
// AVX-512 foldable instructions
1193
+ { X86::VPADDDZrr, X86::VPADDDZrm, 0 },
1194
+ { X86::VPADDQZrr, X86::VPADDQZrm, 0 },
1195
+ { X86::VADDPSZrr, X86::VADDPSZrm, 0 },
1196
+ { X86::VADDPDZrr, X86::VADDPDZrm, 0 },
1197
+ { X86::VSUBPSZrr, X86::VSUBPSZrm, 0 },
1198
+ { X86::VSUBPDZrr, X86::VSUBPDZrm, 0 },
1199
+ { X86::VMULPSZrr, X86::VMULPSZrm, 0 },
1200
+ { X86::VMULPDZrr, X86::VMULPDZrm, 0 },
1201
+ { X86::VDIVPSZrr, X86::VDIVPSZrm, 0 },
1202
+ { X86::VDIVPDZrr, X86::VDIVPDZrm, 0 },
1203
+ { X86::VMINPSZrr, X86::VMINPSZrm, 0 },
1204
+ { X86::VMINPDZrr, X86::VMINPDZrm, 0 },
1205
+ { X86::VMAXPSZrr, X86::VMAXPSZrm, 0 },
1206
+ { X86::VMAXPDZrr, X86::VMAXPDZrm, 0 },
1183
1207
{ X86::VPERMPDZri, X86::VPERMPDZmi, 0 },
1184
1208
{ X86::VPERMPSZrr, X86::VPERMPSZrm, 0 },
1185
1209
{ X86::VPERMI2Drr, X86::VPERMI2Drm, 0 },
1186
1210
{ X86::VPERMI2Qrr, X86::VPERMI2Qrm, 0 },
1187
1211
{ X86::VPERMI2PSrr, X86::VPERMI2PSrm, 0 },
1188
1212
{ X86::VPERMI2PDrr, X86::VPERMI2PDrm, 0 },
1213
+ { X86::VPSLLVDZrr, X86::VPSLLVDZrm, 0 },
1214
+ { X86::VPSLLVQZrr, X86::VPSLLVQZrm, 0 },
1215
+ { X86::VPSRAVDZrr, X86::VPSRAVDZrm, 0 },
1216
+ { X86::VPSRLVDZrr, X86::VPSRLVDZrm, 0 },
1217
+ { X86::VPSRLVQZrr, X86::VPSRLVQZrm, 0 },
1218
+ { X86::VSHUFPDZrri, X86::VSHUFPDZrmi, 0 },
1219
+ { X86::VSHUFPSZrri, X86::VSHUFPSZrmi, 0 },
1220
+ { X86::VALIGNQrri, X86::VALIGNQrmi, 0 },
1221
+ { X86::VALIGNDrri, X86::VALIGNDrmi, 0 },
1189
1222
};
1190
1223
1191
1224
for (unsigned i = 0 , e = array_lengthof (OpTbl2); i != e; ++i) {
@@ -4010,6 +4043,8 @@ static bool hasPartialRegUpdate(unsigned Opcode) {
4010
4043
case X86::Int_VCVTSD2SSrr:
4011
4044
case X86::VCVTSS2SDrr:
4012
4045
case X86::Int_VCVTSS2SDrr:
4046
+ case X86::VCVTSD2SSZrr:
4047
+ case X86::VCVTSS2SDZrr:
4013
4048
case X86::VRCPSSr:
4014
4049
case X86::VROUNDSDr:
4015
4050
case X86::VROUNDSDr_Int:
@@ -5064,6 +5099,15 @@ bool X86InstrInfo::isHighLatencyDef(int opc) const {
5064
5099
case X86::VSQRTSSm:
5065
5100
case X86::VSQRTSSm_Int:
5066
5101
case X86::VSQRTSSr:
5102
+
5103
+ case X86::VGATHERQPSZrm:
5104
+ case X86::VGATHERQPDZrm:
5105
+ case X86::VGATHERDPDZrm:
5106
+ case X86::VGATHERDPSZrm:
5107
+ case X86::VPGATHERQDZrm:
5108
+ case X86::VPGATHERQQZrm:
5109
+ case X86::VPGATHERDDZrm:
5110
+ case X86::VPGATHERDQZrm:
5067
5111
return true ;
5068
5112
}
5069
5113
}
0 commit comments