Skip to content

Commit 83bf240

Browse files
shubhraamdbp3tk0v
authored andcommitted
EDAC/versal: Make the bit position of injected errors configurable
Currently, the bit positions to inject correctable and uncorrectable errors are hardcoded. To make that configurable add separate sysfs entries to set the bit positions for injecting CE and UE errors. Allow for single bit error for CE and two bits errors for UE injection. [ bp: Massage. ] Signed-off-by: Shubhrajyoti Datta <[email protected]> Signed-off-by: Borislav Petkov (AMD) <[email protected]> Link: https://lore.kernel.org/r/[email protected]
1 parent b57c1a1 commit 83bf240

File tree

1 file changed

+161
-32
lines changed

1 file changed

+161
-32
lines changed

drivers/edac/versal_edac.c

Lines changed: 161 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,11 @@
4242

4343
#define ECCW0_FLIP_CTRL 0x109C
4444
#define ECCW0_FLIP0_OFFSET 0x10A0
45+
#define ECCW0_FLIP0_BITS 31
46+
#define ECCW0_FLIP1_OFFSET 0x10A4
4547
#define ECCW1_FLIP_CTRL 0x10AC
4648
#define ECCW1_FLIP0_OFFSET 0x10B0
49+
#define ECCW1_FLIP1_OFFSET 0x10B4
4750
#define ECCR0_CERR_STAT_OFFSET 0x10BC
4851
#define ECCR0_CE_ADDR_LO_OFFSET 0x10C0
4952
#define ECCR0_CE_ADDR_HI_OFFSET 0x10C4
@@ -116,9 +119,6 @@
116119
#define XDDR_BUS_WIDTH_32 1
117120
#define XDDR_BUS_WIDTH_16 2
118121

119-
#define ECC_CEPOISON_MASK 0x1
120-
#define ECC_UEPOISON_MASK 0x3
121-
122122
#define XDDR_MAX_ROW_CNT 18
123123
#define XDDR_MAX_COL_CNT 10
124124
#define XDDR_MAX_RANK_CNT 2
@@ -133,6 +133,7 @@
133133
* https://docs.xilinx.com/r/en-US/am012-versal-register-reference/PCSR_LOCK-XRAM_SLCR-Register
134134
*/
135135
#define PCSR_UNLOCK_VAL 0xF9E8D7C6
136+
#define PCSR_LOCK_VAL 1
136137
#define XDDR_ERR_TYPE_CE 0
137138
#define XDDR_ERR_TYPE_UE 1
138139

@@ -142,6 +143,7 @@
142143
#define XILINX_DRAM_SIZE_12G 3
143144
#define XILINX_DRAM_SIZE_16G 4
144145
#define XILINX_DRAM_SIZE_32G 5
146+
#define NUM_UE_BITPOS 2
145147

146148
/**
147149
* struct ecc_error_info - ECC error log information.
@@ -479,7 +481,7 @@ static void err_callback(const u32 *payload, void *data)
479481
writel(regval, priv->ddrmc_baseaddr + XDDR_ISR_OFFSET);
480482

481483
/* Lock the PCSR registers */
482-
writel(1, priv->ddrmc_baseaddr + XDDR_PCSR_OFFSET);
484+
writel(PCSR_LOCK_VAL, priv->ddrmc_baseaddr + XDDR_PCSR_OFFSET);
483485
edac_dbg(3, "Total error count CE %d UE %d\n",
484486
priv->ce_cnt, priv->ue_cnt);
485487
}
@@ -650,7 +652,7 @@ static void enable_intr(struct edac_priv *priv)
650652
writel(XDDR_IRQ_UE_MASK,
651653
priv->ddrmc_baseaddr + XDDR_IRQ1_EN_OFFSET);
652654
/* Lock the PCSR registers */
653-
writel(1, priv->ddrmc_baseaddr + XDDR_PCSR_OFFSET);
655+
writel(PCSR_LOCK_VAL, priv->ddrmc_baseaddr + XDDR_PCSR_OFFSET);
654656
}
655657

656658
static void disable_intr(struct edac_priv *priv)
@@ -663,7 +665,7 @@ static void disable_intr(struct edac_priv *priv)
663665
priv->ddrmc_baseaddr + XDDR_IRQ_DIS_OFFSET);
664666

665667
/* Lock the PCSR registers */
666-
writel(1, priv->ddrmc_baseaddr + XDDR_PCSR_OFFSET);
668+
writel(PCSR_LOCK_VAL, priv->ddrmc_baseaddr + XDDR_PCSR_OFFSET);
667669
}
668670

669671
#define to_mci(k) container_of(k, struct mem_ctl_info, dev)
@@ -734,56 +736,174 @@ static void poison_setup(struct edac_priv *priv)
734736
writel(regval, priv->ddrmc_noc_baseaddr + XDDR_NOC_REG_ADEC15_OFFSET);
735737
}
736738

737-
static ssize_t xddr_inject_data_poison_store(struct mem_ctl_info *mci,
738-
const char __user *data)
739+
static void xddr_inject_data_ce_store(struct mem_ctl_info *mci, u8 ce_bitpos)
739740
{
741+
u32 ecc0_flip0, ecc1_flip0, ecc0_flip1, ecc1_flip1;
740742
struct edac_priv *priv = mci->pvt_info;
741743

742-
writel(0, priv->ddrmc_baseaddr + ECCW0_FLIP0_OFFSET);
743-
writel(0, priv->ddrmc_baseaddr + ECCW1_FLIP0_OFFSET);
744-
745-
if (strncmp(data, "CE", 2) == 0) {
746-
writel(ECC_CEPOISON_MASK, priv->ddrmc_baseaddr +
747-
ECCW0_FLIP0_OFFSET);
748-
writel(ECC_CEPOISON_MASK, priv->ddrmc_baseaddr +
749-
ECCW1_FLIP0_OFFSET);
744+
if (ce_bitpos < ECCW0_FLIP0_BITS) {
745+
ecc0_flip0 = BIT(ce_bitpos);
746+
ecc1_flip0 = BIT(ce_bitpos);
747+
ecc0_flip1 = 0;
748+
ecc1_flip1 = 0;
750749
} else {
751-
writel(ECC_UEPOISON_MASK, priv->ddrmc_baseaddr +
752-
ECCW0_FLIP0_OFFSET);
753-
writel(ECC_UEPOISON_MASK, priv->ddrmc_baseaddr +
754-
ECCW1_FLIP0_OFFSET);
750+
ce_bitpos = ce_bitpos - ECCW0_FLIP0_BITS;
751+
ecc0_flip1 = BIT(ce_bitpos);
752+
ecc1_flip1 = BIT(ce_bitpos);
753+
ecc0_flip0 = 0;
754+
ecc1_flip0 = 0;
755755
}
756756

757-
/* Lock the PCSR registers */
758-
writel(1, priv->ddrmc_baseaddr + XDDR_PCSR_OFFSET);
759-
760-
return 0;
757+
writel(ecc0_flip0, priv->ddrmc_baseaddr + ECCW0_FLIP0_OFFSET);
758+
writel(ecc1_flip0, priv->ddrmc_baseaddr + ECCW1_FLIP0_OFFSET);
759+
writel(ecc0_flip1, priv->ddrmc_baseaddr + ECCW0_FLIP1_OFFSET);
760+
writel(ecc1_flip1, priv->ddrmc_baseaddr + ECCW1_FLIP1_OFFSET);
761761
}
762762

763-
static ssize_t inject_data_poison_store(struct file *file, const char __user *data,
764-
size_t count, loff_t *ppos)
763+
/*
764+
* To inject a correctable error, the following steps are needed:
765+
*
766+
* - Write the correctable error bit position value:
767+
* echo <bit_pos val> > /sys/kernel/debug/edac/<controller instance>/inject_ce
768+
*
769+
* poison_setup() derives the row, column, bank, group and rank and
770+
* writes to the ADEC registers based on the address given by the user.
771+
*
772+
* The ADEC12 and ADEC13 are mask registers; write 0 to make sure default
773+
* configuration is there and no addresses are masked.
774+
*
775+
* The row, column, bank, group and rank registers are written to the
776+
* match ADEC bit to generate errors at the particular address. ADEC14
777+
* and ADEC15 have the match bits.
778+
*
779+
* xddr_inject_data_ce_store() updates the ECC FLIP registers with the
780+
* bits to be corrupted based on the bit position given by the user.
781+
*
782+
* Upon doing a read to the address the errors are injected.
783+
*/
784+
static ssize_t inject_data_ce_store(struct file *file, const char __user *data,
785+
size_t count, loff_t *ppos)
765786
{
766787
struct device *dev = file->private_data;
767788
struct mem_ctl_info *mci = to_mci(dev);
768789
struct edac_priv *priv = mci->pvt_info;
790+
u8 ce_bitpos;
791+
int ret;
792+
793+
ret = kstrtou8_from_user(data, count, 0, &ce_bitpos);
794+
if (ret)
795+
return ret;
769796

770797
/* Unlock the PCSR registers */
771798
writel(PCSR_UNLOCK_VAL, priv->ddrmc_baseaddr + XDDR_PCSR_OFFSET);
772799
writel(PCSR_UNLOCK_VAL, priv->ddrmc_noc_baseaddr + XDDR_PCSR_OFFSET);
773800

774801
poison_setup(priv);
775802

803+
xddr_inject_data_ce_store(mci, ce_bitpos);
804+
ret = count;
805+
776806
/* Lock the PCSR registers */
777-
writel(1, priv->ddrmc_noc_baseaddr + XDDR_PCSR_OFFSET);
807+
writel(PCSR_LOCK_VAL, priv->ddrmc_baseaddr + XDDR_PCSR_OFFSET);
808+
writel(PCSR_LOCK_VAL, priv->ddrmc_noc_baseaddr + XDDR_PCSR_OFFSET);
809+
810+
return ret;
811+
}
812+
813+
static const struct file_operations xddr_inject_ce_fops = {
814+
.open = simple_open,
815+
.write = inject_data_ce_store,
816+
.llseek = generic_file_llseek,
817+
};
818+
819+
static void xddr_inject_data_ue_store(struct mem_ctl_info *mci, u32 val0, u32 val1)
820+
{
821+
struct edac_priv *priv = mci->pvt_info;
822+
823+
writel(val0, priv->ddrmc_baseaddr + ECCW0_FLIP0_OFFSET);
824+
writel(val0, priv->ddrmc_baseaddr + ECCW0_FLIP1_OFFSET);
825+
writel(val1, priv->ddrmc_baseaddr + ECCW1_FLIP1_OFFSET);
826+
writel(val1, priv->ddrmc_baseaddr + ECCW1_FLIP1_OFFSET);
827+
}
828+
829+
/*
830+
* To inject an uncorrectable error, the following steps are needed:
831+
* echo <bit_pos val> > /sys/kernel/debug/edac/<controller instance>/inject_ue
832+
*
833+
* poison_setup() derives the row, column, bank, group and rank and
834+
* writes to the ADEC registers based on the address given by the user.
835+
*
836+
* The ADEC12 and ADEC13 are mask registers; write 0 so that none of the
837+
* addresses are masked. The row, column, bank, group and rank registers
838+
* are written to the match ADEC bit to generate errors at the
839+
* particular address. ADEC14 and ADEC15 have the match bits.
840+
*
841+
* xddr_inject_data_ue_store() updates the ECC FLIP registers with the
842+
* bits to be corrupted based on the bit position given by the user. For
843+
* uncorrectable errors
844+
* 2 bit errors are injected.
845+
*
846+
* Upon doing a read to the address the errors are injected.
847+
*/
848+
static ssize_t inject_data_ue_store(struct file *file, const char __user *data,
849+
size_t count, loff_t *ppos)
850+
{
851+
struct device *dev = file->private_data;
852+
struct mem_ctl_info *mci = to_mci(dev);
853+
struct edac_priv *priv = mci->pvt_info;
854+
char buf[6], *pbuf, *token[2];
855+
u32 val0 = 0, val1 = 0;
856+
u8 len, ue0, ue1;
857+
int i, ret;
858+
859+
len = min_t(size_t, count, sizeof(buf));
860+
if (copy_from_user(buf, data, len))
861+
return -EFAULT;
862+
863+
buf[len] = '\0';
864+
pbuf = &buf[0];
865+
for (i = 0; i < NUM_UE_BITPOS; i++)
866+
token[i] = strsep(&pbuf, ",");
867+
868+
ret = kstrtou8(token[0], 0, &ue0);
869+
if (ret)
870+
return ret;
871+
872+
ret = kstrtou8(token[1], 0, &ue1);
873+
if (ret)
874+
return ret;
875+
876+
if (ue0 < ECCW0_FLIP0_BITS) {
877+
val0 = BIT(ue0);
878+
} else {
879+
ue0 = ue0 - ECCW0_FLIP0_BITS;
880+
val1 = BIT(ue0);
881+
}
778882

779-
xddr_inject_data_poison_store(mci, data);
883+
if (ue1 < ECCW0_FLIP0_BITS) {
884+
val0 |= BIT(ue1);
885+
} else {
886+
ue1 = ue1 - ECCW0_FLIP0_BITS;
887+
val1 |= BIT(ue1);
888+
}
780889

890+
/* Unlock the PCSR registers */
891+
writel(PCSR_UNLOCK_VAL, priv->ddrmc_baseaddr + XDDR_PCSR_OFFSET);
892+
writel(PCSR_UNLOCK_VAL, priv->ddrmc_noc_baseaddr + XDDR_PCSR_OFFSET);
893+
894+
poison_setup(priv);
895+
896+
xddr_inject_data_ue_store(mci, val0, val1);
897+
898+
/* Lock the PCSR registers */
899+
writel(PCSR_LOCK_VAL, priv->ddrmc_noc_baseaddr + XDDR_PCSR_OFFSET);
900+
writel(PCSR_LOCK_VAL, priv->ddrmc_baseaddr + XDDR_PCSR_OFFSET);
781901
return count;
782902
}
783903

784-
static const struct file_operations xddr_inject_enable_fops = {
904+
static const struct file_operations xddr_inject_ue_fops = {
785905
.open = simple_open,
786-
.write = inject_data_poison_store,
906+
.write = inject_data_ue_store,
787907
.llseek = generic_file_llseek,
788908
};
789909

@@ -795,8 +915,17 @@ static void create_debugfs_attributes(struct mem_ctl_info *mci)
795915
if (!priv->debugfs)
796916
return;
797917

798-
edac_debugfs_create_file("inject_error", 0200, priv->debugfs,
799-
&mci->dev, &xddr_inject_enable_fops);
918+
if (!edac_debugfs_create_file("inject_ce", 0200, priv->debugfs,
919+
&mci->dev, &xddr_inject_ce_fops)) {
920+
debugfs_remove_recursive(priv->debugfs);
921+
return;
922+
}
923+
924+
if (!edac_debugfs_create_file("inject_ue", 0200, priv->debugfs,
925+
&mci->dev, &xddr_inject_ue_fops)) {
926+
debugfs_remove_recursive(priv->debugfs);
927+
return;
928+
}
800929
debugfs_create_x64("address", 0600, priv->debugfs,
801930
&priv->err_inject_addr);
802931
mci->debugfs = priv->debugfs;

0 commit comments

Comments
 (0)