Skip to content

Commit 2a81160

Browse files
Peter Newmanbp3tk0v
authored andcommitted
x86/resctrl: Fix event counts regression in reused RMIDs
When creating a new monitoring group, the RMID allocated for it may have been used by a group which was previously removed. In this case, the hardware counters will have non-zero values which should be deducted from what is reported in the new group's counts. resctrl_arch_reset_rmid() initializes the prev_msr value for counters to 0, causing the initial count to be charged to the new group. Resurrect __rmid_read() and use it to initialize prev_msr correctly. Unlike before, __rmid_read() checks for error bits in the MSR read so that callers don't need to. Fixes: 1d81d15 ("x86/resctrl: Move mbm_overflow_count() into resctrl_arch_rmid_read()") Signed-off-by: Peter Newman <[email protected]> Signed-off-by: Borislav Petkov (AMD) <[email protected]> Reviewed-by: Reinette Chatre <[email protected]> Tested-by: Babu Moger <[email protected]> Cc: [email protected] Link: https://lore.kernel.org/r/[email protected]
1 parent fe1f071 commit 2a81160

File tree

1 file changed

+33
-16
lines changed

1 file changed

+33
-16
lines changed

arch/x86/kernel/cpu/resctrl/monitor.c

Lines changed: 33 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,30 @@ static inline struct rmid_entry *__rmid_entry(u32 rmid)
146146
return entry;
147147
}
148148

149+
static int __rmid_read(u32 rmid, enum resctrl_event_id eventid, u64 *val)
150+
{
151+
u64 msr_val;
152+
153+
/*
154+
* As per the SDM, when IA32_QM_EVTSEL.EvtID (bits 7:0) is configured
155+
* with a valid event code for supported resource type and the bits
156+
* IA32_QM_EVTSEL.RMID (bits 41:32) are configured with valid RMID,
157+
* IA32_QM_CTR.data (bits 61:0) reports the monitored data.
158+
* IA32_QM_CTR.Error (bit 63) and IA32_QM_CTR.Unavailable (bit 62)
159+
* are error bits.
160+
*/
161+
wrmsr(MSR_IA32_QM_EVTSEL, eventid, rmid);
162+
rdmsrl(MSR_IA32_QM_CTR, msr_val);
163+
164+
if (msr_val & RMID_VAL_ERROR)
165+
return -EIO;
166+
if (msr_val & RMID_VAL_UNAVAIL)
167+
return -EINVAL;
168+
169+
*val = msr_val;
170+
return 0;
171+
}
172+
149173
static struct arch_mbm_state *get_arch_mbm_state(struct rdt_hw_domain *hw_dom,
150174
u32 rmid,
151175
enum resctrl_event_id eventid)
@@ -172,8 +196,12 @@ void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_domain *d,
172196
struct arch_mbm_state *am;
173197

174198
am = get_arch_mbm_state(hw_dom, rmid, eventid);
175-
if (am)
199+
if (am) {
176200
memset(am, 0, sizeof(*am));
201+
202+
/* Record any initial, non-zero count value. */
203+
__rmid_read(rmid, eventid, &am->prev_msr);
204+
}
177205
}
178206

179207
static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr, unsigned int width)
@@ -191,25 +219,14 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d,
191219
struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d);
192220
struct arch_mbm_state *am;
193221
u64 msr_val, chunks;
222+
int ret;
194223

195224
if (!cpumask_test_cpu(smp_processor_id(), &d->cpu_mask))
196225
return -EINVAL;
197226

198-
/*
199-
* As per the SDM, when IA32_QM_EVTSEL.EvtID (bits 7:0) is configured
200-
* with a valid event code for supported resource type and the bits
201-
* IA32_QM_EVTSEL.RMID (bits 41:32) are configured with valid RMID,
202-
* IA32_QM_CTR.data (bits 61:0) reports the monitored data.
203-
* IA32_QM_CTR.Error (bit 63) and IA32_QM_CTR.Unavailable (bit 62)
204-
* are error bits.
205-
*/
206-
wrmsr(MSR_IA32_QM_EVTSEL, eventid, rmid);
207-
rdmsrl(MSR_IA32_QM_CTR, msr_val);
208-
209-
if (msr_val & RMID_VAL_ERROR)
210-
return -EIO;
211-
if (msr_val & RMID_VAL_UNAVAIL)
212-
return -EINVAL;
227+
ret = __rmid_read(rmid, eventid, &msr_val);
228+
if (ret)
229+
return ret;
213230

214231
am = get_arch_mbm_state(hw_dom, rmid, eventid);
215232
if (am) {

0 commit comments

Comments
 (0)