Skip to content

Commit 3431a94

Browse files
committed
Merge branch 'x86-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 AVX512 status update from Ingo Molnar: "This adds a new ABI that the main scheduler probably doesn't want to deal with but HPC job schedulers might want to use: the AVX512_elapsed_ms field in the new /proc/<pid>/arch_status task status file, which allows the user-space job scheduler to cluster such tasks, to avoid turbo frequency drops" * 'x86-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: Documentation/filesystems/proc.txt: Add arch_status file x86/process: Add AVX-512 usage elapsed time to /proc/pid/arch_status proc: Add /proc/<pid>/arch_status
2 parents 5b7a209 + 711486f commit 3431a94

File tree

6 files changed

+107
-0
lines changed

6 files changed

+107
-0
lines changed

Documentation/filesystems/proc.txt

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ Table of Contents
4545
3.9 /proc/<pid>/map_files - Information about memory mapped files
4646
3.10 /proc/<pid>/timerslack_ns - Task timerslack value
4747
3.11 /proc/<pid>/patch_state - Livepatch patch operation state
48+
3.12 /proc/<pid>/arch_status - Task architecture specific information
4849

4950
4 Configuring procfs
5051
4.1 Mount options
@@ -1948,6 +1949,45 @@ patched. If the patch is being enabled, then the task has already been
19481949
patched. If the patch is being disabled, then the task hasn't been
19491950
unpatched yet.
19501951

1952+
3.12 /proc/<pid>/arch_status - task architecture specific status
1953+
-------------------------------------------------------------------
1954+
When CONFIG_PROC_PID_ARCH_STATUS is enabled, this file displays the
1955+
architecture specific status of the task.
1956+
1957+
Example
1958+
-------
1959+
$ cat /proc/6753/arch_status
1960+
AVX512_elapsed_ms: 8
1961+
1962+
Description
1963+
-----------
1964+
1965+
x86 specific entries:
1966+
---------------------
1967+
AVX512_elapsed_ms:
1968+
------------------
1969+
If AVX512 is supported on the machine, this entry shows the milliseconds
1970+
elapsed since the last time AVX512 usage was recorded. The recording
1971+
happens on a best effort basis when a task is scheduled out. This means
1972+
that the value depends on two factors:
1973+
1974+
1) The time which the task spent on the CPU without being scheduled
1975+
out. With CPU isolation and a single runnable task this can take
1976+
several seconds.
1977+
1978+
2) The time since the task was scheduled out last. Depending on the
1979+
reason for being scheduled out (time slice exhausted, syscall ...)
1980+
this can be arbitrary long time.
1981+
1982+
As a consequence the value cannot be considered precise and authoritative
1983+
information. The application which uses this information has to be aware
1984+
of the overall scenario on the system in order to determine whether a
1985+
task is a real AVX512 user or not. Precise information can be obtained
1986+
with performance counters.
1987+
1988+
A special value of '-1' indicates that no AVX512 usage was recorded, thus
1989+
the task is unlikely an AVX512 user, but depends on the workload and the
1990+
scheduling scenario, it also could be a false negative mentioned above.
19511991

19521992
------------------------------------------------------------------------------
19531993
Configuring procfs

arch/x86/Kconfig

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,7 @@ config X86
220220
select USER_STACKTRACE_SUPPORT
221221
select VIRT_TO_BUS
222222
select X86_FEATURE_NAMES if PROC_FS
223+
select PROC_PID_ARCH_STATUS if PROC_FS
223224

224225
config INSTRUCTION_DECODER
225226
def_bool y

arch/x86/kernel/fpu/xstate.c

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
#include <linux/cpu.h>
99
#include <linux/mman.h>
1010
#include <linux/pkeys.h>
11+
#include <linux/seq_file.h>
12+
#include <linux/proc_fs.h>
1113

1214
#include <asm/fpu/api.h>
1315
#include <asm/fpu/internal.h>
@@ -1231,3 +1233,48 @@ int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf)
12311233

12321234
return 0;
12331235
}
1236+
1237+
#ifdef CONFIG_PROC_PID_ARCH_STATUS
1238+
/*
1239+
* Report the amount of time elapsed in millisecond since last AVX512
1240+
* use in the task.
1241+
*/
1242+
static void avx512_status(struct seq_file *m, struct task_struct *task)
1243+
{
1244+
unsigned long timestamp = READ_ONCE(task->thread.fpu.avx512_timestamp);
1245+
long delta;
1246+
1247+
if (!timestamp) {
1248+
/*
1249+
* Report -1 if no AVX512 usage
1250+
*/
1251+
delta = -1;
1252+
} else {
1253+
delta = (long)(jiffies - timestamp);
1254+
/*
1255+
* Cap to LONG_MAX if time difference > LONG_MAX
1256+
*/
1257+
if (delta < 0)
1258+
delta = LONG_MAX;
1259+
delta = jiffies_to_msecs(delta);
1260+
}
1261+
1262+
seq_put_decimal_ll(m, "AVX512_elapsed_ms:\t", delta);
1263+
seq_putc(m, '\n');
1264+
}
1265+
1266+
/*
1267+
* Report architecture specific information
1268+
*/
1269+
int proc_pid_arch_status(struct seq_file *m, struct pid_namespace *ns,
1270+
struct pid *pid, struct task_struct *task)
1271+
{
1272+
/*
1273+
* Report AVX512 state if the processor and build option supported.
1274+
*/
1275+
if (cpu_feature_enabled(X86_FEATURE_AVX512F))
1276+
avx512_status(m, task);
1277+
1278+
return 0;
1279+
}
1280+
#endif /* CONFIG_PROC_PID_ARCH_STATUS */

fs/proc/Kconfig

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,3 +98,7 @@ config PROC_CHILDREN
9898

9999
Say Y if you are running any user-space software which takes benefit from
100100
this interface. For example, rkt is such a piece of software.
101+
102+
config PROC_PID_ARCH_STATUS
103+
def_bool n
104+
depends on PROC_FS

fs/proc/base.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3061,6 +3061,9 @@ static const struct pid_entry tgid_base_stuff[] = {
30613061
#ifdef CONFIG_STACKLEAK_METRICS
30623062
ONE("stack_depth", S_IRUGO, proc_stack_depth),
30633063
#endif
3064+
#ifdef CONFIG_PROC_PID_ARCH_STATUS
3065+
ONE("arch_status", S_IRUGO, proc_pid_arch_status),
3066+
#endif
30643067
};
30653068

30663069
static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx)
@@ -3448,6 +3451,9 @@ static const struct pid_entry tid_base_stuff[] = {
34483451
#ifdef CONFIG_LIVEPATCH
34493452
ONE("patch_state", S_IRUSR, proc_pid_patch_state),
34503453
#endif
3454+
#ifdef CONFIG_PROC_PID_ARCH_STATUS
3455+
ONE("arch_status", S_IRUGO, proc_pid_arch_status),
3456+
#endif
34513457
};
34523458

34533459
static int proc_tid_base_readdir(struct file *file, struct dir_context *ctx)

include/linux/proc_fs.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,15 @@ struct proc_dir_entry *proc_create_net_single_write(const char *name, umode_t mo
7575
void *data);
7676
extern struct pid *tgid_pidfd_to_pid(const struct file *file);
7777

78+
#ifdef CONFIG_PROC_PID_ARCH_STATUS
79+
/*
80+
* The architecture which selects CONFIG_PROC_PID_ARCH_STATUS must
81+
* provide proc_pid_arch_status() definition.
82+
*/
83+
int proc_pid_arch_status(struct seq_file *m, struct pid_namespace *ns,
84+
struct pid *pid, struct task_struct *task);
85+
#endif /* CONFIG_PROC_PID_ARCH_STATUS */
86+
7887
#else /* CONFIG_PROC_FS */
7988

8089
static inline void proc_root_init(void)

0 commit comments

Comments
 (0)