14
14
#include <linux/cleanup.h>
15
15
#include <linux/edac.h>
16
16
#include <linux/limits.h>
17
+ #include <linux/xarray.h>
17
18
#include <cxl/features.h>
18
19
#include <cxl.h>
19
20
#include <cxlmem.h>
20
21
#include "core.h"
22
+ #include "trace.h"
21
23
22
24
#define CXL_NR_EDAC_DEV_FEATURES 2
23
25
@@ -862,10 +864,285 @@ static int cxl_perform_maintenance(struct cxl_mailbox *cxl_mbox, u8 class,
862
864
return cxl_internal_send_cmd (cxl_mbox , & mbox_cmd );
863
865
}
864
866
867
+ /*
868
+ * Support for finding a memory operation attributes
869
+ * are from the current boot or not.
870
+ */
871
+
872
+ struct cxl_mem_err_rec {
873
+ struct xarray rec_gen_media ;
874
+ struct xarray rec_dram ;
875
+ };
876
+
877
+ enum cxl_mem_repair_type {
878
+ CXL_PPR ,
879
+ CXL_CACHELINE_SPARING ,
880
+ CXL_ROW_SPARING ,
881
+ CXL_BANK_SPARING ,
882
+ CXL_RANK_SPARING ,
883
+ CXL_REPAIR_MAX ,
884
+ };
885
+
886
+ /**
887
+ * struct cxl_mem_repair_attrbs - CXL memory repair attributes
888
+ * @dpa: DPA of memory to repair
889
+ * @nibble_mask: nibble mask, identifies one or more nibbles on the memory bus
890
+ * @row: row of memory to repair
891
+ * @column: column of memory to repair
892
+ * @channel: channel of memory to repair
893
+ * @sub_channel: sub channel of memory to repair
894
+ * @rank: rank of memory to repair
895
+ * @bank_group: bank group of memory to repair
896
+ * @bank: bank of memory to repair
897
+ * @repair_type: repair type. For eg. PPR, memory sparing etc.
898
+ */
899
+ struct cxl_mem_repair_attrbs {
900
+ u64 dpa ;
901
+ u32 nibble_mask ;
902
+ u32 row ;
903
+ u16 column ;
904
+ u8 channel ;
905
+ u8 sub_channel ;
906
+ u8 rank ;
907
+ u8 bank_group ;
908
+ u8 bank ;
909
+ enum cxl_mem_repair_type repair_type ;
910
+ };
911
+
912
+ static struct cxl_event_gen_media *
913
+ cxl_find_rec_gen_media (struct cxl_memdev * cxlmd ,
914
+ struct cxl_mem_repair_attrbs * attrbs )
915
+ {
916
+ struct cxl_mem_err_rec * array_rec = cxlmd -> err_rec_array ;
917
+ struct cxl_event_gen_media * rec ;
918
+
919
+ if (!array_rec )
920
+ return NULL ;
921
+
922
+ rec = xa_load (& array_rec -> rec_gen_media , attrbs -> dpa );
923
+ if (!rec )
924
+ return NULL ;
925
+
926
+ if (attrbs -> repair_type == CXL_PPR )
927
+ return rec ;
928
+
929
+ return NULL ;
930
+ }
931
+
932
+ static struct cxl_event_dram *
933
+ cxl_find_rec_dram (struct cxl_memdev * cxlmd ,
934
+ struct cxl_mem_repair_attrbs * attrbs )
935
+ {
936
+ struct cxl_mem_err_rec * array_rec = cxlmd -> err_rec_array ;
937
+ struct cxl_event_dram * rec ;
938
+ u16 validity_flags ;
939
+
940
+ if (!array_rec )
941
+ return NULL ;
942
+
943
+ rec = xa_load (& array_rec -> rec_dram , attrbs -> dpa );
944
+ if (!rec )
945
+ return NULL ;
946
+
947
+ validity_flags = get_unaligned_le16 (rec -> media_hdr .validity_flags );
948
+ if (!(validity_flags & CXL_DER_VALID_CHANNEL ) ||
949
+ !(validity_flags & CXL_DER_VALID_RANK ))
950
+ return NULL ;
951
+
952
+ switch (attrbs -> repair_type ) {
953
+ case CXL_PPR :
954
+ if (!(validity_flags & CXL_DER_VALID_NIBBLE ) ||
955
+ get_unaligned_le24 (rec -> nibble_mask ) == attrbs -> nibble_mask )
956
+ return rec ;
957
+ break ;
958
+ case CXL_CACHELINE_SPARING :
959
+ if (!(validity_flags & CXL_DER_VALID_BANK_GROUP ) ||
960
+ !(validity_flags & CXL_DER_VALID_BANK ) ||
961
+ !(validity_flags & CXL_DER_VALID_ROW ) ||
962
+ !(validity_flags & CXL_DER_VALID_COLUMN ))
963
+ return NULL ;
964
+
965
+ if (rec -> media_hdr .channel == attrbs -> channel &&
966
+ rec -> media_hdr .rank == attrbs -> rank &&
967
+ rec -> bank_group == attrbs -> bank_group &&
968
+ rec -> bank == attrbs -> bank &&
969
+ get_unaligned_le24 (rec -> row ) == attrbs -> row &&
970
+ get_unaligned_le16 (rec -> column ) == attrbs -> column &&
971
+ (!(validity_flags & CXL_DER_VALID_NIBBLE ) ||
972
+ get_unaligned_le24 (rec -> nibble_mask ) ==
973
+ attrbs -> nibble_mask ) &&
974
+ (!(validity_flags & CXL_DER_VALID_SUB_CHANNEL ) ||
975
+ rec -> sub_channel == attrbs -> sub_channel ))
976
+ return rec ;
977
+ break ;
978
+ case CXL_ROW_SPARING :
979
+ if (!(validity_flags & CXL_DER_VALID_BANK_GROUP ) ||
980
+ !(validity_flags & CXL_DER_VALID_BANK ) ||
981
+ !(validity_flags & CXL_DER_VALID_ROW ))
982
+ return NULL ;
983
+
984
+ if (rec -> media_hdr .channel == attrbs -> channel &&
985
+ rec -> media_hdr .rank == attrbs -> rank &&
986
+ rec -> bank_group == attrbs -> bank_group &&
987
+ rec -> bank == attrbs -> bank &&
988
+ get_unaligned_le24 (rec -> row ) == attrbs -> row &&
989
+ (!(validity_flags & CXL_DER_VALID_NIBBLE ) ||
990
+ get_unaligned_le24 (rec -> nibble_mask ) ==
991
+ attrbs -> nibble_mask ))
992
+ return rec ;
993
+ break ;
994
+ case CXL_BANK_SPARING :
995
+ if (!(validity_flags & CXL_DER_VALID_BANK_GROUP ) ||
996
+ !(validity_flags & CXL_DER_VALID_BANK ))
997
+ return NULL ;
998
+
999
+ if (rec -> media_hdr .channel == attrbs -> channel &&
1000
+ rec -> media_hdr .rank == attrbs -> rank &&
1001
+ rec -> bank_group == attrbs -> bank_group &&
1002
+ rec -> bank == attrbs -> bank &&
1003
+ (!(validity_flags & CXL_DER_VALID_NIBBLE ) ||
1004
+ get_unaligned_le24 (rec -> nibble_mask ) ==
1005
+ attrbs -> nibble_mask ))
1006
+ return rec ;
1007
+ break ;
1008
+ case CXL_RANK_SPARING :
1009
+ if (rec -> media_hdr .channel == attrbs -> channel &&
1010
+ rec -> media_hdr .rank == attrbs -> rank &&
1011
+ (!(validity_flags & CXL_DER_VALID_NIBBLE ) ||
1012
+ get_unaligned_le24 (rec -> nibble_mask ) ==
1013
+ attrbs -> nibble_mask ))
1014
+ return rec ;
1015
+ break ;
1016
+ default :
1017
+ return NULL ;
1018
+ }
1019
+
1020
+ return NULL ;
1021
+ }
1022
+
1023
+ #define CXL_MAX_STORAGE_DAYS 10
1024
+ #define CXL_MAX_STORAGE_TIME_SECS (CXL_MAX_STORAGE_DAYS * 24 * 60 * 60)
1025
+
1026
+ static void cxl_del_expired_gmedia_recs (struct xarray * rec_xarray ,
1027
+ struct cxl_event_gen_media * cur_rec )
1028
+ {
1029
+ u64 cur_ts = le64_to_cpu (cur_rec -> media_hdr .hdr .timestamp );
1030
+ struct cxl_event_gen_media * rec ;
1031
+ unsigned long index ;
1032
+ u64 delta_ts_secs ;
1033
+
1034
+ xa_for_each (rec_xarray , index , rec ) {
1035
+ delta_ts_secs = (cur_ts -
1036
+ le64_to_cpu (rec -> media_hdr .hdr .timestamp )) / 1000000000ULL ;
1037
+ if (delta_ts_secs >= CXL_MAX_STORAGE_TIME_SECS ) {
1038
+ xa_erase (rec_xarray , index );
1039
+ kfree (rec );
1040
+ }
1041
+ }
1042
+ }
1043
+
1044
+ static void cxl_del_expired_dram_recs (struct xarray * rec_xarray ,
1045
+ struct cxl_event_dram * cur_rec )
1046
+ {
1047
+ u64 cur_ts = le64_to_cpu (cur_rec -> media_hdr .hdr .timestamp );
1048
+ struct cxl_event_dram * rec ;
1049
+ unsigned long index ;
1050
+ u64 delta_secs ;
1051
+
1052
+ xa_for_each (rec_xarray , index , rec ) {
1053
+ delta_secs = (cur_ts -
1054
+ le64_to_cpu (rec -> media_hdr .hdr .timestamp )) / 1000000000ULL ;
1055
+ if (delta_secs >= CXL_MAX_STORAGE_TIME_SECS ) {
1056
+ xa_erase (rec_xarray , index );
1057
+ kfree (rec );
1058
+ }
1059
+ }
1060
+ }
1061
+
1062
+ #define CXL_MAX_REC_STORAGE_COUNT 200
1063
+
1064
+ static void cxl_del_overflow_old_recs (struct xarray * rec_xarray )
1065
+ {
1066
+ void * err_rec ;
1067
+ unsigned long index , count = 0 ;
1068
+
1069
+ xa_for_each (rec_xarray , index , err_rec )
1070
+ count ++ ;
1071
+
1072
+ if (count <= CXL_MAX_REC_STORAGE_COUNT )
1073
+ return ;
1074
+
1075
+ count -= CXL_MAX_REC_STORAGE_COUNT ;
1076
+ xa_for_each (rec_xarray , index , err_rec ) {
1077
+ xa_erase (rec_xarray , index );
1078
+ kfree (err_rec );
1079
+ count -- ;
1080
+ if (!count )
1081
+ break ;
1082
+ }
1083
+ }
1084
+
1085
+ int cxl_store_rec_gen_media (struct cxl_memdev * cxlmd , union cxl_event * evt )
1086
+ {
1087
+ struct cxl_mem_err_rec * array_rec = cxlmd -> err_rec_array ;
1088
+ struct cxl_event_gen_media * rec ;
1089
+ void * old_rec ;
1090
+
1091
+ if (!IS_ENABLED (CONFIG_CXL_EDAC_MEM_REPAIR ) || !array_rec )
1092
+ return 0 ;
1093
+
1094
+ rec = kmemdup (& evt -> gen_media , sizeof (* rec ), GFP_KERNEL );
1095
+ if (!rec )
1096
+ return - ENOMEM ;
1097
+
1098
+ old_rec = xa_store (& array_rec -> rec_gen_media ,
1099
+ le64_to_cpu (rec -> media_hdr .phys_addr ), rec ,
1100
+ GFP_KERNEL );
1101
+ if (xa_is_err (old_rec ))
1102
+ return xa_err (old_rec );
1103
+
1104
+ kfree (old_rec );
1105
+
1106
+ cxl_del_expired_gmedia_recs (& array_rec -> rec_gen_media , rec );
1107
+ cxl_del_overflow_old_recs (& array_rec -> rec_gen_media );
1108
+
1109
+ return 0 ;
1110
+ }
1111
+ EXPORT_SYMBOL_NS_GPL (cxl_store_rec_gen_media , "CXL" );
1112
+
1113
+ int cxl_store_rec_dram (struct cxl_memdev * cxlmd , union cxl_event * evt )
1114
+ {
1115
+ struct cxl_mem_err_rec * array_rec = cxlmd -> err_rec_array ;
1116
+ struct cxl_event_dram * rec ;
1117
+ void * old_rec ;
1118
+
1119
+ if (!IS_ENABLED (CONFIG_CXL_EDAC_MEM_REPAIR ) || !array_rec )
1120
+ return 0 ;
1121
+
1122
+ rec = kmemdup (& evt -> dram , sizeof (* rec ), GFP_KERNEL );
1123
+ if (!rec )
1124
+ return - ENOMEM ;
1125
+
1126
+ old_rec = xa_store (& array_rec -> rec_dram ,
1127
+ le64_to_cpu (rec -> media_hdr .phys_addr ), rec ,
1128
+ GFP_KERNEL );
1129
+ if (xa_is_err (old_rec ))
1130
+ return xa_err (old_rec );
1131
+
1132
+ kfree (old_rec );
1133
+
1134
+ cxl_del_expired_dram_recs (& array_rec -> rec_dram , rec );
1135
+ cxl_del_overflow_old_recs (& array_rec -> rec_dram );
1136
+
1137
+ return 0 ;
1138
+ }
1139
+ EXPORT_SYMBOL_NS_GPL (cxl_store_rec_dram , "CXL" );
1140
+
865
1141
int devm_cxl_memdev_edac_register (struct cxl_memdev * cxlmd )
866
1142
{
867
1143
struct edac_dev_feature ras_features [CXL_NR_EDAC_DEV_FEATURES ];
868
1144
int num_ras_features = 0 ;
1145
+ u8 repair_inst = 0 ;
869
1146
int rc ;
870
1147
871
1148
if (IS_ENABLED (CONFIG_CXL_EDAC_SCRUB )) {
@@ -886,6 +1163,20 @@ int devm_cxl_memdev_edac_register(struct cxl_memdev *cxlmd)
886
1163
num_ras_features ++ ;
887
1164
}
888
1165
1166
+ if (IS_ENABLED (CONFIG_CXL_EDAC_MEM_REPAIR )) {
1167
+ if (repair_inst ) {
1168
+ struct cxl_mem_err_rec * array_rec =
1169
+ devm_kzalloc (& cxlmd -> dev , sizeof (* array_rec ),
1170
+ GFP_KERNEL );
1171
+ if (!array_rec )
1172
+ return - ENOMEM ;
1173
+
1174
+ xa_init (& array_rec -> rec_gen_media );
1175
+ xa_init (& array_rec -> rec_dram );
1176
+ cxlmd -> err_rec_array = array_rec ;
1177
+ }
1178
+ }
1179
+
889
1180
if (!num_ras_features )
890
1181
return - EINVAL ;
891
1182
@@ -923,3 +1214,23 @@ int devm_cxl_region_edac_register(struct cxl_region *cxlr)
923
1214
num_ras_features , ras_features );
924
1215
}
925
1216
EXPORT_SYMBOL_NS_GPL (devm_cxl_region_edac_register , "CXL" );
1217
+
1218
+ void devm_cxl_memdev_edac_release (struct cxl_memdev * cxlmd )
1219
+ {
1220
+ struct cxl_mem_err_rec * array_rec = cxlmd -> err_rec_array ;
1221
+ struct cxl_event_gen_media * rec_gen_media ;
1222
+ struct cxl_event_dram * rec_dram ;
1223
+ unsigned long index ;
1224
+
1225
+ if (!IS_ENABLED (CONFIG_CXL_EDAC_MEM_REPAIR ) || !array_rec )
1226
+ return ;
1227
+
1228
+ xa_for_each (& array_rec -> rec_dram , index , rec_dram )
1229
+ kfree (rec_dram );
1230
+ xa_destroy (& array_rec -> rec_dram );
1231
+
1232
+ xa_for_each (& array_rec -> rec_gen_media , index , rec_gen_media )
1233
+ kfree (rec_gen_media );
1234
+ xa_destroy (& array_rec -> rec_gen_media );
1235
+ }
1236
+ EXPORT_SYMBOL_NS_GPL (devm_cxl_memdev_edac_release , "CXL" );
0 commit comments