@@ -772,6 +772,10 @@ def TuningUseGLMDivSqrtCosts
772
772
def TuningBranchHint: SubtargetFeature<"branch-hint", "HasBranchHint", "true",
773
773
"Target has branch hint feature">;
774
774
775
+ def TuningAvoidMFENCE
776
+ : SubtargetFeature<"avoid-mfence", "AvoidMFence", "true",
777
+ "Avoid MFENCE for fence seq_cst, and instead use lock or">;
778
+
775
779
//===----------------------------------------------------------------------===//
776
780
// X86 CPU Families
777
781
// TODO: Remove these - use general tuning features to determine codegen.
@@ -833,7 +837,8 @@ def ProcessorFeatures {
833
837
TuningSlow3OpsLEA,
834
838
TuningSlowDivide64,
835
839
TuningSlowIncDec,
836
- TuningInsertVZEROUPPER
840
+ TuningInsertVZEROUPPER,
841
+ TuningAvoidMFENCE
837
842
];
838
843
839
844
list<SubtargetFeature> X86_64V2Features = !listconcat(X86_64V1Features, [
@@ -849,7 +854,8 @@ def ProcessorFeatures {
849
854
TuningFastSHLDRotate,
850
855
TuningFast15ByteNOP,
851
856
TuningPOPCNTFalseDeps,
852
- TuningInsertVZEROUPPER
857
+ TuningInsertVZEROUPPER,
858
+ TuningAvoidMFENCE
853
859
];
854
860
855
861
list<SubtargetFeature> X86_64V3Features = !listconcat(X86_64V2Features, [
@@ -868,7 +874,8 @@ def ProcessorFeatures {
868
874
TuningPOPCNTFalseDeps,
869
875
TuningLZCNTFalseDeps,
870
876
TuningInsertVZEROUPPER,
871
- TuningAllowLight256Bit
877
+ TuningAllowLight256Bit,
878
+ TuningAvoidMFENCE
872
879
];
873
880
874
881
list<SubtargetFeature> X86_64V4Features = !listconcat(X86_64V3Features, [
@@ -892,15 +899,17 @@ def ProcessorFeatures {
892
899
TuningFastGather,
893
900
TuningPOPCNTFalseDeps,
894
901
TuningInsertVZEROUPPER,
895
- TuningAllowLight256Bit
902
+ TuningAllowLight256Bit,
903
+ TuningAvoidMFENCE
896
904
];
897
905
898
906
// Nehalem
899
907
list<SubtargetFeature> NHMFeatures = X86_64V2Features;
900
908
list<SubtargetFeature> NHMTuning = [TuningMacroFusion,
901
909
TuningSlowDivide64,
902
910
TuningInsertVZEROUPPER,
903
- TuningNoDomainDelayMov];
911
+ TuningNoDomainDelayMov,
912
+ TuningAvoidMFENCE];
904
913
905
914
// Westmere
906
915
list<SubtargetFeature> WSMAdditionalFeatures = [FeaturePCLMUL];
@@ -921,7 +930,8 @@ def ProcessorFeatures {
921
930
TuningFast15ByteNOP,
922
931
TuningPOPCNTFalseDeps,
923
932
TuningInsertVZEROUPPER,
924
- TuningNoDomainDelayMov];
933
+ TuningNoDomainDelayMov,
934
+ TuningAvoidMFENCE];
925
935
list<SubtargetFeature> SNBFeatures =
926
936
!listconcat(WSMFeatures, SNBAdditionalFeatures);
927
937
@@ -987,7 +997,8 @@ def ProcessorFeatures {
987
997
TuningAllowLight256Bit,
988
998
TuningNoDomainDelayMov,
989
999
TuningNoDomainDelayShuffle,
990
- TuningNoDomainDelayBlend];
1000
+ TuningNoDomainDelayBlend,
1001
+ TuningAvoidMFENCE];
991
1002
list<SubtargetFeature> SKLFeatures =
992
1003
!listconcat(BDWFeatures, SKLAdditionalFeatures);
993
1004
@@ -1022,7 +1033,8 @@ def ProcessorFeatures {
1022
1033
TuningNoDomainDelayMov,
1023
1034
TuningNoDomainDelayShuffle,
1024
1035
TuningNoDomainDelayBlend,
1025
- TuningFastImmVectorShift];
1036
+ TuningFastImmVectorShift,
1037
+ TuningAvoidMFENCE];
1026
1038
list<SubtargetFeature> SKXFeatures =
1027
1039
!listconcat(BDWFeatures, SKXAdditionalFeatures);
1028
1040
@@ -1065,7 +1077,8 @@ def ProcessorFeatures {
1065
1077
TuningNoDomainDelayMov,
1066
1078
TuningNoDomainDelayShuffle,
1067
1079
TuningNoDomainDelayBlend,
1068
- TuningFastImmVectorShift];
1080
+ TuningFastImmVectorShift,
1081
+ TuningAvoidMFENCE];
1069
1082
list<SubtargetFeature> CNLFeatures =
1070
1083
!listconcat(SKLFeatures, CNLAdditionalFeatures);
1071
1084
@@ -1094,7 +1107,8 @@ def ProcessorFeatures {
1094
1107
TuningNoDomainDelayMov,
1095
1108
TuningNoDomainDelayShuffle,
1096
1109
TuningNoDomainDelayBlend,
1097
- TuningFastImmVectorShift];
1110
+ TuningFastImmVectorShift,
1111
+ TuningAvoidMFENCE];
1098
1112
list<SubtargetFeature> ICLFeatures =
1099
1113
!listconcat(CNLFeatures, ICLAdditionalFeatures);
1100
1114
@@ -1268,7 +1282,8 @@ def ProcessorFeatures {
1268
1282
// Tremont
1269
1283
list<SubtargetFeature> TRMAdditionalFeatures = [FeatureCLWB,
1270
1284
FeatureGFNI];
1271
- list<SubtargetFeature> TRMTuning = GLPTuning;
1285
+ list<SubtargetFeature> TRMAdditionalTuning = [TuningAvoidMFENCE];
1286
+ list<SubtargetFeature> TRMTuning = !listconcat(GLPTuning, TRMAdditionalTuning);
1272
1287
list<SubtargetFeature> TRMFeatures =
1273
1288
!listconcat(GLPFeatures, TRMAdditionalFeatures);
1274
1289
@@ -1446,7 +1461,8 @@ def ProcessorFeatures {
1446
1461
TuningFastImm16,
1447
1462
TuningSBBDepBreaking,
1448
1463
TuningSlowDivide64,
1449
- TuningSlowSHLD];
1464
+ TuningSlowSHLD,
1465
+ TuningAvoidMFENCE];
1450
1466
list<SubtargetFeature> BtVer2Features =
1451
1467
!listconcat(BtVer1Features, BtVer2AdditionalFeatures);
1452
1468
@@ -1475,7 +1491,8 @@ def ProcessorFeatures {
1475
1491
TuningFastScalarShiftMasks,
1476
1492
TuningBranchFusion,
1477
1493
TuningSBBDepBreaking,
1478
- TuningInsertVZEROUPPER];
1494
+ TuningInsertVZEROUPPER,
1495
+ TuningAvoidMFENCE];
1479
1496
1480
1497
// PileDriver
1481
1498
list<SubtargetFeature> BdVer2AdditionalFeatures = [FeatureF16C,
@@ -1555,7 +1572,8 @@ def ProcessorFeatures {
1555
1572
TuningSlowSHLD,
1556
1573
TuningSBBDepBreaking,
1557
1574
TuningInsertVZEROUPPER,
1558
- TuningAllowLight256Bit];
1575
+ TuningAllowLight256Bit,
1576
+ TuningAvoidMFENCE];
1559
1577
list<SubtargetFeature> ZN2AdditionalFeatures = [FeatureCLWB,
1560
1578
FeatureRDPID,
1561
1579
FeatureRDPRU,
@@ -1740,7 +1758,8 @@ def : ProcModel<P, SandyBridgeModel, [
1740
1758
[
1741
1759
TuningMacroFusion,
1742
1760
TuningSlowUAMem16,
1743
- TuningInsertVZEROUPPER
1761
+ TuningInsertVZEROUPPER,
1762
+ TuningAvoidMFENCE
1744
1763
]>;
1745
1764
}
1746
1765
foreach P = ["penryn", "core_2_duo_sse4_1"] in {
@@ -1759,7 +1778,8 @@ def : ProcModel<P, SandyBridgeModel, [
1759
1778
[
1760
1779
TuningMacroFusion,
1761
1780
TuningSlowUAMem16,
1762
- TuningInsertVZEROUPPER
1781
+ TuningInsertVZEROUPPER,
1782
+ TuningAvoidMFENCE
1763
1783
]>;
1764
1784
}
1765
1785
0 commit comments