@@ -1426,12 +1426,47 @@ static int umc_get_cs_mode(int dimm, u8 ctrl, struct amd64_pvt *pvt)
1426
1426
return cs_mode ;
1427
1427
}
1428
1428
1429
+ static int __addr_mask_to_cs_size (u32 addr_mask_orig , unsigned int cs_mode ,
1430
+ int csrow_nr , int dimm )
1431
+ {
1432
+ u32 msb , weight , num_zero_bits ;
1433
+ u32 addr_mask_deinterleaved ;
1434
+ int size = 0 ;
1435
+
1436
+ /*
1437
+ * The number of zero bits in the mask is equal to the number of bits
1438
+ * in a full mask minus the number of bits in the current mask.
1439
+ *
1440
+ * The MSB is the number of bits in the full mask because BIT[0] is
1441
+ * always 0.
1442
+ *
1443
+ * In the special 3 Rank interleaving case, a single bit is flipped
1444
+ * without swapping with the most significant bit. This can be handled
1445
+ * by keeping the MSB where it is and ignoring the single zero bit.
1446
+ */
1447
+ msb = fls (addr_mask_orig ) - 1 ;
1448
+ weight = hweight_long (addr_mask_orig );
1449
+ num_zero_bits = msb - weight - !!(cs_mode & CS_3R_INTERLEAVE );
1450
+
1451
+ /* Take the number of zero bits off from the top of the mask. */
1452
+ addr_mask_deinterleaved = GENMASK_ULL (msb - num_zero_bits , 1 );
1453
+
1454
+ edac_dbg (1 , "CS%d DIMM%d AddrMasks:\n" , csrow_nr , dimm );
1455
+ edac_dbg (1 , " Original AddrMask: 0x%x\n" , addr_mask_orig );
1456
+ edac_dbg (1 , " Deinterleaved AddrMask: 0x%x\n" , addr_mask_deinterleaved );
1457
+
1458
+ /* Register [31:1] = Address [39:9]. Size is in kBs here. */
1459
+ size = (addr_mask_deinterleaved >> 2 ) + 1 ;
1460
+
1461
+ /* Return size in MBs. */
1462
+ return size >> 10 ;
1463
+ }
1464
+
1429
1465
static int umc_addr_mask_to_cs_size (struct amd64_pvt * pvt , u8 umc ,
1430
1466
unsigned int cs_mode , int csrow_nr )
1431
1467
{
1432
- u32 addr_mask_orig , addr_mask_deinterleaved ;
1433
- u32 msb , weight , num_zero_bits ;
1434
1468
int cs_mask_nr = csrow_nr ;
1469
+ u32 addr_mask_orig ;
1435
1470
int dimm , size = 0 ;
1436
1471
1437
1472
/* No Chip Selects are enabled. */
@@ -1475,33 +1510,7 @@ static int umc_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc,
1475
1510
else
1476
1511
addr_mask_orig = pvt -> csels [umc ].csmasks [cs_mask_nr ];
1477
1512
1478
- /*
1479
- * The number of zero bits in the mask is equal to the number of bits
1480
- * in a full mask minus the number of bits in the current mask.
1481
- *
1482
- * The MSB is the number of bits in the full mask because BIT[0] is
1483
- * always 0.
1484
- *
1485
- * In the special 3 Rank interleaving case, a single bit is flipped
1486
- * without swapping with the most significant bit. This can be handled
1487
- * by keeping the MSB where it is and ignoring the single zero bit.
1488
- */
1489
- msb = fls (addr_mask_orig ) - 1 ;
1490
- weight = hweight_long (addr_mask_orig );
1491
- num_zero_bits = msb - weight - !!(cs_mode & CS_3R_INTERLEAVE );
1492
-
1493
- /* Take the number of zero bits off from the top of the mask. */
1494
- addr_mask_deinterleaved = GENMASK_ULL (msb - num_zero_bits , 1 );
1495
-
1496
- edac_dbg (1 , "CS%d DIMM%d AddrMasks:\n" , csrow_nr , dimm );
1497
- edac_dbg (1 , " Original AddrMask: 0x%x\n" , addr_mask_orig );
1498
- edac_dbg (1 , " Deinterleaved AddrMask: 0x%x\n" , addr_mask_deinterleaved );
1499
-
1500
- /* Register [31:1] = Address [39:9]. Size is in kBs here. */
1501
- size = (addr_mask_deinterleaved >> 2 ) + 1 ;
1502
-
1503
- /* Return size in MBs. */
1504
- return size >> 10 ;
1513
+ return __addr_mask_to_cs_size (addr_mask_orig , cs_mode , csrow_nr , dimm );
1505
1514
}
1506
1515
1507
1516
static void umc_debug_display_dimm_sizes (struct amd64_pvt * pvt , u8 ctrl )
@@ -3675,6 +3684,221 @@ static int umc_hw_info_get(struct amd64_pvt *pvt)
3675
3684
return 0 ;
3676
3685
}
3677
3686
3687
+ /*
3688
+ * The CPUs have one channel per UMC, so UMC number is equivalent to a
3689
+ * channel number. The GPUs have 8 channels per UMC, so the UMC number no
3690
+ * longer works as a channel number.
3691
+ *
3692
+ * The channel number within a GPU UMC is given in MCA_IPID[15:12].
3693
+ * However, the IDs are split such that two UMC values go to one UMC, and
3694
+ * the channel numbers are split in two groups of four.
3695
+ *
3696
+ * Refer to comment on gpu_get_umc_base().
3697
+ *
3698
+ * For example,
3699
+ * UMC0 CH[3:0] = 0x0005[3:0]000
3700
+ * UMC0 CH[7:4] = 0x0015[3:0]000
3701
+ * UMC1 CH[3:0] = 0x0025[3:0]000
3702
+ * UMC1 CH[7:4] = 0x0035[3:0]000
3703
+ */
3704
+ static void gpu_get_err_info (struct mce * m , struct err_info * err )
3705
+ {
3706
+ u8 ch = (m -> ipid & GENMASK (31 , 0 )) >> 20 ;
3707
+ u8 phy = ((m -> ipid >> 12 ) & 0xf );
3708
+
3709
+ err -> channel = ch % 2 ? phy + 4 : phy ;
3710
+ err -> csrow = phy ;
3711
+ }
3712
+
3713
+ static int gpu_addr_mask_to_cs_size (struct amd64_pvt * pvt , u8 umc ,
3714
+ unsigned int cs_mode , int csrow_nr )
3715
+ {
3716
+ u32 addr_mask_orig = pvt -> csels [umc ].csmasks [csrow_nr ];
3717
+
3718
+ return __addr_mask_to_cs_size (addr_mask_orig , cs_mode , csrow_nr , csrow_nr >> 1 );
3719
+ }
3720
+
3721
+ static void gpu_debug_display_dimm_sizes (struct amd64_pvt * pvt , u8 ctrl )
3722
+ {
3723
+ int size , cs_mode , cs = 0 ;
3724
+
3725
+ edac_printk (KERN_DEBUG , EDAC_MC , "UMC%d chip selects:\n" , ctrl );
3726
+
3727
+ cs_mode = CS_EVEN_PRIMARY | CS_ODD_PRIMARY ;
3728
+
3729
+ for_each_chip_select (cs , ctrl , pvt ) {
3730
+ size = gpu_addr_mask_to_cs_size (pvt , ctrl , cs_mode , cs );
3731
+ amd64_info (EDAC_MC ": %d: %5dMB\n" , cs , size );
3732
+ }
3733
+ }
3734
+
3735
+ static void gpu_dump_misc_regs (struct amd64_pvt * pvt )
3736
+ {
3737
+ struct amd64_umc * umc ;
3738
+ u32 i ;
3739
+
3740
+ for_each_umc (i ) {
3741
+ umc = & pvt -> umc [i ];
3742
+
3743
+ edac_dbg (1 , "UMC%d UMC cfg: 0x%x\n" , i , umc -> umc_cfg );
3744
+ edac_dbg (1 , "UMC%d SDP ctrl: 0x%x\n" , i , umc -> sdp_ctrl );
3745
+ edac_dbg (1 , "UMC%d ECC ctrl: 0x%x\n" , i , umc -> ecc_ctrl );
3746
+ edac_dbg (1 , "UMC%d All HBMs support ECC: yes\n" , i );
3747
+
3748
+ gpu_debug_display_dimm_sizes (pvt , i );
3749
+ }
3750
+ }
3751
+
3752
+ static u32 gpu_get_csrow_nr_pages (struct amd64_pvt * pvt , u8 dct , int csrow_nr )
3753
+ {
3754
+ u32 nr_pages ;
3755
+ int cs_mode = CS_EVEN_PRIMARY | CS_ODD_PRIMARY ;
3756
+
3757
+ nr_pages = gpu_addr_mask_to_cs_size (pvt , dct , cs_mode , csrow_nr );
3758
+ nr_pages <<= 20 - PAGE_SHIFT ;
3759
+
3760
+ edac_dbg (0 , "csrow: %d, channel: %d\n" , csrow_nr , dct );
3761
+ edac_dbg (0 , "nr_pages/channel: %u\n" , nr_pages );
3762
+
3763
+ return nr_pages ;
3764
+ }
3765
+
3766
+ static void gpu_init_csrows (struct mem_ctl_info * mci )
3767
+ {
3768
+ struct amd64_pvt * pvt = mci -> pvt_info ;
3769
+ struct dimm_info * dimm ;
3770
+ u8 umc , cs ;
3771
+
3772
+ for_each_umc (umc ) {
3773
+ for_each_chip_select (cs , umc , pvt ) {
3774
+ if (!csrow_enabled (cs , umc , pvt ))
3775
+ continue ;
3776
+
3777
+ dimm = mci -> csrows [umc ]-> channels [cs ]-> dimm ;
3778
+
3779
+ edac_dbg (1 , "MC node: %d, csrow: %d\n" ,
3780
+ pvt -> mc_node_id , cs );
3781
+
3782
+ dimm -> nr_pages = gpu_get_csrow_nr_pages (pvt , umc , cs );
3783
+ dimm -> edac_mode = EDAC_SECDED ;
3784
+ dimm -> mtype = MEM_HBM2 ;
3785
+ dimm -> dtype = DEV_X16 ;
3786
+ dimm -> grain = 64 ;
3787
+ }
3788
+ }
3789
+ }
3790
+
3791
+ static void gpu_setup_mci_misc_attrs (struct mem_ctl_info * mci )
3792
+ {
3793
+ struct amd64_pvt * pvt = mci -> pvt_info ;
3794
+
3795
+ mci -> mtype_cap = MEM_FLAG_HBM2 ;
3796
+ mci -> edac_ctl_cap = EDAC_FLAG_SECDED ;
3797
+
3798
+ mci -> edac_cap = EDAC_FLAG_EC ;
3799
+ mci -> mod_name = EDAC_MOD_STR ;
3800
+ mci -> ctl_name = pvt -> ctl_name ;
3801
+ mci -> dev_name = pci_name (pvt -> F3 );
3802
+ mci -> ctl_page_to_phys = NULL ;
3803
+
3804
+ gpu_init_csrows (mci );
3805
+ }
3806
+
3807
+ /* ECC is enabled by default on GPU nodes */
3808
+ static bool gpu_ecc_enabled (struct amd64_pvt * pvt )
3809
+ {
3810
+ return true;
3811
+ }
3812
+
3813
+ static inline u32 gpu_get_umc_base (u8 umc , u8 channel )
3814
+ {
3815
+ /*
3816
+ * On CPUs, there is one channel per UMC, so UMC numbering equals
3817
+ * channel numbering. On GPUs, there are eight channels per UMC,
3818
+ * so the channel numbering is different from UMC numbering.
3819
+ *
3820
+ * On CPU nodes channels are selected in 6th nibble
3821
+ * UMC chY[3:0]= [(chY*2 + 1) : (chY*2)]50000;
3822
+ *
3823
+ * On GPU nodes channels are selected in 3rd nibble
3824
+ * HBM chX[3:0]= [Y ]5X[3:0]000;
3825
+ * HBM chX[7:4]= [Y+1]5X[3:0]000
3826
+ */
3827
+ umc *= 2 ;
3828
+
3829
+ if (channel >= 4 )
3830
+ umc ++ ;
3831
+
3832
+ return 0x50000 + (umc << 20 ) + ((channel % 4 ) << 12 );
3833
+ }
3834
+
3835
+ static void gpu_read_mc_regs (struct amd64_pvt * pvt )
3836
+ {
3837
+ u8 nid = pvt -> mc_node_id ;
3838
+ struct amd64_umc * umc ;
3839
+ u32 i , umc_base ;
3840
+
3841
+ /* Read registers from each UMC */
3842
+ for_each_umc (i ) {
3843
+ umc_base = gpu_get_umc_base (i , 0 );
3844
+ umc = & pvt -> umc [i ];
3845
+
3846
+ amd_smn_read (nid , umc_base + UMCCH_UMC_CFG , & umc -> umc_cfg );
3847
+ amd_smn_read (nid , umc_base + UMCCH_SDP_CTRL , & umc -> sdp_ctrl );
3848
+ amd_smn_read (nid , umc_base + UMCCH_ECC_CTRL , & umc -> ecc_ctrl );
3849
+ }
3850
+ }
3851
+
3852
+ static void gpu_read_base_mask (struct amd64_pvt * pvt )
3853
+ {
3854
+ u32 base_reg , mask_reg ;
3855
+ u32 * base , * mask ;
3856
+ int umc , cs ;
3857
+
3858
+ for_each_umc (umc ) {
3859
+ for_each_chip_select (cs , umc , pvt ) {
3860
+ base_reg = gpu_get_umc_base (umc , cs ) + UMCCH_BASE_ADDR ;
3861
+ base = & pvt -> csels [umc ].csbases [cs ];
3862
+
3863
+ if (!amd_smn_read (pvt -> mc_node_id , base_reg , base )) {
3864
+ edac_dbg (0 , " DCSB%d[%d]=0x%08x reg: 0x%x\n" ,
3865
+ umc , cs , * base , base_reg );
3866
+ }
3867
+
3868
+ mask_reg = gpu_get_umc_base (umc , cs ) + UMCCH_ADDR_MASK ;
3869
+ mask = & pvt -> csels [umc ].csmasks [cs ];
3870
+
3871
+ if (!amd_smn_read (pvt -> mc_node_id , mask_reg , mask )) {
3872
+ edac_dbg (0 , " DCSM%d[%d]=0x%08x reg: 0x%x\n" ,
3873
+ umc , cs , * mask , mask_reg );
3874
+ }
3875
+ }
3876
+ }
3877
+ }
3878
+
3879
+ static void gpu_prep_chip_selects (struct amd64_pvt * pvt )
3880
+ {
3881
+ int umc ;
3882
+
3883
+ for_each_umc (umc ) {
3884
+ pvt -> csels [umc ].b_cnt = 8 ;
3885
+ pvt -> csels [umc ].m_cnt = 8 ;
3886
+ }
3887
+ }
3888
+
3889
+ static int gpu_hw_info_get (struct amd64_pvt * pvt )
3890
+ {
3891
+ pvt -> umc = kcalloc (pvt -> max_mcs , sizeof (struct amd64_umc ), GFP_KERNEL );
3892
+ if (!pvt -> umc )
3893
+ return - ENOMEM ;
3894
+
3895
+ gpu_prep_chip_selects (pvt );
3896
+ gpu_read_base_mask (pvt );
3897
+ gpu_read_mc_regs (pvt );
3898
+
3899
+ return 0 ;
3900
+ }
3901
+
3678
3902
static void hw_info_put (struct amd64_pvt * pvt )
3679
3903
{
3680
3904
pci_dev_put (pvt -> F1 );
@@ -3690,6 +3914,14 @@ static struct low_ops umc_ops = {
3690
3914
.get_err_info = umc_get_err_info ,
3691
3915
};
3692
3916
3917
+ static struct low_ops gpu_ops = {
3918
+ .hw_info_get = gpu_hw_info_get ,
3919
+ .ecc_enabled = gpu_ecc_enabled ,
3920
+ .setup_mci_misc_attrs = gpu_setup_mci_misc_attrs ,
3921
+ .dump_misc_regs = gpu_dump_misc_regs ,
3922
+ .get_err_info = gpu_get_err_info ,
3923
+ };
3924
+
3693
3925
/* Use Family 16h versions for defaults and adjust as needed below. */
3694
3926
static struct low_ops dct_ops = {
3695
3927
.map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow ,
@@ -3813,6 +4045,16 @@ static int per_family_init(struct amd64_pvt *pvt)
3813
4045
case 0x20 ... 0x2f :
3814
4046
pvt -> ctl_name = "F19h_M20h" ;
3815
4047
break ;
4048
+ case 0x30 ... 0x3f :
4049
+ if (pvt -> F3 -> device == PCI_DEVICE_ID_AMD_MI200_DF_F3 ) {
4050
+ pvt -> ctl_name = "MI200" ;
4051
+ pvt -> max_mcs = 4 ;
4052
+ pvt -> ops = & gpu_ops ;
4053
+ } else {
4054
+ pvt -> ctl_name = "F19h_M30h" ;
4055
+ pvt -> max_mcs = 8 ;
4056
+ }
4057
+ break ;
3816
4058
case 0x50 ... 0x5f :
3817
4059
pvt -> ctl_name = "F19h_M50h" ;
3818
4060
break ;
@@ -3846,11 +4088,17 @@ static int init_one_instance(struct amd64_pvt *pvt)
3846
4088
struct edac_mc_layer layers [2 ];
3847
4089
int ret = - ENOMEM ;
3848
4090
4091
+ /*
4092
+ * For Heterogeneous family EDAC CHIP_SELECT and CHANNEL layers should
4093
+ * be swapped to fit into the layers.
4094
+ */
3849
4095
layers [0 ].type = EDAC_MC_LAYER_CHIP_SELECT ;
3850
- layers [0 ].size = pvt -> csels [0 ].b_cnt ;
4096
+ layers [0 ].size = (pvt -> F3 -> device == PCI_DEVICE_ID_AMD_MI200_DF_F3 ) ?
4097
+ pvt -> max_mcs : pvt -> csels [0 ].b_cnt ;
3851
4098
layers [0 ].is_virt_csrow = true;
3852
4099
layers [1 ].type = EDAC_MC_LAYER_CHANNEL ;
3853
- layers [1 ].size = pvt -> max_mcs ;
4100
+ layers [1 ].size = (pvt -> F3 -> device == PCI_DEVICE_ID_AMD_MI200_DF_F3 ) ?
4101
+ pvt -> csels [0 ].b_cnt : pvt -> max_mcs ;
3854
4102
layers [1 ].is_virt_csrow = false;
3855
4103
3856
4104
mci = edac_mc_alloc (pvt -> mc_node_id , ARRAY_SIZE (layers ), layers , 0 );
0 commit comments