@@ -754,6 +754,10 @@ def TuningUseGLMDivSqrtCosts
754
754
def TuningBranchHint: SubtargetFeature<"branch-hint", "HasBranchHint", "true",
755
755
"Target has branch hint feature">;
756
756
757
+ def TuningAvoidMFENCE
758
+ : SubtargetFeature<"avoid-mfence", "AvoidMFence", "true",
759
+ "Avoid MFENCE for fence seq_cst, and instead use lock or">;
760
+
757
761
//===----------------------------------------------------------------------===//
758
762
// X86 CPU Families
759
763
// TODO: Remove these - use general tuning features to determine codegen.
@@ -882,7 +886,8 @@ def ProcessorFeatures {
882
886
list<SubtargetFeature> NHMTuning = [TuningMacroFusion,
883
887
TuningSlowDivide64,
884
888
TuningInsertVZEROUPPER,
885
- TuningNoDomainDelayMov];
889
+ TuningNoDomainDelayMov,
890
+ TuningAvoidMFENCE];
886
891
887
892
// Westmere
888
893
list<SubtargetFeature> WSMAdditionalFeatures = [FeaturePCLMUL];
@@ -903,7 +908,8 @@ def ProcessorFeatures {
903
908
TuningFast15ByteNOP,
904
909
TuningPOPCNTFalseDeps,
905
910
TuningInsertVZEROUPPER,
906
- TuningNoDomainDelayMov];
911
+ TuningNoDomainDelayMov,
912
+ TuningAvoidMFENCE];
907
913
list<SubtargetFeature> SNBFeatures =
908
914
!listconcat(WSMFeatures, SNBAdditionalFeatures);
909
915
@@ -969,7 +975,8 @@ def ProcessorFeatures {
969
975
TuningAllowLight256Bit,
970
976
TuningNoDomainDelayMov,
971
977
TuningNoDomainDelayShuffle,
972
- TuningNoDomainDelayBlend];
978
+ TuningNoDomainDelayBlend,
979
+ TuningAvoidMFENCE];
973
980
list<SubtargetFeature> SKLFeatures =
974
981
!listconcat(BDWFeatures, SKLAdditionalFeatures);
975
982
@@ -1004,7 +1011,8 @@ def ProcessorFeatures {
1004
1011
TuningNoDomainDelayMov,
1005
1012
TuningNoDomainDelayShuffle,
1006
1013
TuningNoDomainDelayBlend,
1007
- TuningFastImmVectorShift];
1014
+ TuningFastImmVectorShift,
1015
+ TuningAvoidMFENCE];
1008
1016
list<SubtargetFeature> SKXFeatures =
1009
1017
!listconcat(BDWFeatures, SKXAdditionalFeatures);
1010
1018
@@ -1047,7 +1055,8 @@ def ProcessorFeatures {
1047
1055
TuningNoDomainDelayMov,
1048
1056
TuningNoDomainDelayShuffle,
1049
1057
TuningNoDomainDelayBlend,
1050
- TuningFastImmVectorShift];
1058
+ TuningFastImmVectorShift,
1059
+ TuningAvoidMFENCE];
1051
1060
list<SubtargetFeature> CNLFeatures =
1052
1061
!listconcat(SKLFeatures, CNLAdditionalFeatures);
1053
1062
@@ -1076,7 +1085,8 @@ def ProcessorFeatures {
1076
1085
TuningNoDomainDelayMov,
1077
1086
TuningNoDomainDelayShuffle,
1078
1087
TuningNoDomainDelayBlend,
1079
- TuningFastImmVectorShift];
1088
+ TuningFastImmVectorShift,
1089
+ TuningAvoidMFENCE];
1080
1090
list<SubtargetFeature> ICLFeatures =
1081
1091
!listconcat(CNLFeatures, ICLAdditionalFeatures);
1082
1092
@@ -1222,7 +1232,8 @@ def ProcessorFeatures {
1222
1232
// Tremont
1223
1233
list<SubtargetFeature> TRMAdditionalFeatures = [FeatureCLWB,
1224
1234
FeatureGFNI];
1225
- list<SubtargetFeature> TRMTuning = GLPTuning;
1235
+ list<SubtargetFeature> TRMAdditionalTuning = [TuningAvoidMFENCE];
1236
+ list<SubtargetFeature> TRMTuning = !listconcat(GLPTuning, TRMAdditionalTuning);
1226
1237
list<SubtargetFeature> TRMFeatures =
1227
1238
!listconcat(GLPFeatures, TRMAdditionalFeatures);
1228
1239
@@ -1429,7 +1440,8 @@ def ProcessorFeatures {
1429
1440
TuningFastScalarShiftMasks,
1430
1441
TuningBranchFusion,
1431
1442
TuningSBBDepBreaking,
1432
- TuningInsertVZEROUPPER];
1443
+ TuningInsertVZEROUPPER,
1444
+ TuningAvoidMFENCE];
1433
1445
1434
1446
// PileDriver
1435
1447
list<SubtargetFeature> BdVer2AdditionalFeatures = [FeatureF16C,
@@ -1509,7 +1521,8 @@ def ProcessorFeatures {
1509
1521
TuningSlowSHLD,
1510
1522
TuningSBBDepBreaking,
1511
1523
TuningInsertVZEROUPPER,
1512
- TuningAllowLight256Bit];
1524
+ TuningAllowLight256Bit,
1525
+ TuningAvoidMFENCE];
1513
1526
list<SubtargetFeature> ZN2AdditionalFeatures = [FeatureCLWB,
1514
1527
FeatureRDPID,
1515
1528
FeatureRDPRU,
@@ -1664,7 +1677,8 @@ def : ProcModel<"nocona", GenericPostRAModel, [
1664
1677
],
1665
1678
[
1666
1679
TuningSlowUAMem16,
1667
- TuningInsertVZEROUPPER
1680
+ TuningInsertVZEROUPPER,
1681
+ TuningAvoidMFENCE
1668
1682
]>;
1669
1683
1670
1684
// Intel Core 2 Solo/Duo.
@@ -1684,7 +1698,8 @@ def : ProcModel<P, SandyBridgeModel, [
1684
1698
[
1685
1699
TuningMacroFusion,
1686
1700
TuningSlowUAMem16,
1687
- TuningInsertVZEROUPPER
1701
+ TuningInsertVZEROUPPER,
1702
+ TuningAvoidMFENCE
1688
1703
]>;
1689
1704
}
1690
1705
foreach P = ["penryn", "core_2_duo_sse4_1"] in {
@@ -1703,7 +1718,8 @@ def : ProcModel<P, SandyBridgeModel, [
1703
1718
[
1704
1719
TuningMacroFusion,
1705
1720
TuningSlowUAMem16,
1706
- TuningInsertVZEROUPPER
1721
+ TuningInsertVZEROUPPER,
1722
+ TuningAvoidMFENCE
1707
1723
]>;
1708
1724
}
1709
1725
0 commit comments