Skip to content

Commit c760174

Browse files
captain5050namhyung
authored andcommitted
perf cpumap: Reduce cpu size from int to int16_t
Fewer than 32k logical CPUs are currently supported by perf. A cpumap is indexed by an integer (see perf_cpu_map__cpu) yielding a perf_cpu that wraps a 4-byte int for the logical CPU - the wrapping is done deliberately to avoid confusing a logical CPU with an index into a cpumap. Using a 4-byte int within the perf_cpu is larger than required so this patch reduces it to the 2-byte int16_t. For a cpumap containing 16 entries this will reduce the array size from 64 to 32 bytes. For very large servers with lots of logical CPUs the size savings will be greater. Signed-off-by: Ian Rogers <[email protected]> Reviewed-by: James Clark <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Namhyung Kim <[email protected]>
1 parent 2337b72 commit c760174

File tree

4 files changed

+54
-27
lines changed

4 files changed

+54
-27
lines changed

tools/lib/perf/cpumap.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,7 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list)
185185
while (isdigit(*cpu_list)) {
186186
p = NULL;
187187
start_cpu = strtoul(cpu_list, &p, 0);
188-
if (start_cpu >= INT_MAX
188+
if (start_cpu >= INT16_MAX
189189
|| (*p != '\0' && *p != ',' && *p != '-' && *p != '\n'))
190190
goto invalid;
191191

@@ -194,7 +194,7 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list)
194194
p = NULL;
195195
end_cpu = strtoul(cpu_list, &p, 0);
196196

197-
if (end_cpu >= INT_MAX || (*p != '\0' && *p != ',' && *p != '\n'))
197+
if (end_cpu >= INT16_MAX || (*p != '\0' && *p != ',' && *p != '\n'))
198198
goto invalid;
199199

200200
if (end_cpu < start_cpu)
@@ -209,7 +209,7 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list)
209209
for (; start_cpu <= end_cpu; start_cpu++) {
210210
/* check for duplicates */
211211
for (i = 0; i < nr_cpus; i++)
212-
if (tmp_cpus[i].cpu == (int)start_cpu)
212+
if (tmp_cpus[i].cpu == (int16_t)start_cpu)
213213
goto invalid;
214214

215215
if (nr_cpus == max_entries) {
@@ -219,7 +219,7 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list)
219219
goto invalid;
220220
tmp_cpus = tmp;
221221
}
222-
tmp_cpus[nr_cpus++].cpu = (int)start_cpu;
222+
tmp_cpus[nr_cpus++].cpu = (int16_t)start_cpu;
223223
}
224224
if (*p)
225225
++p;

tools/lib/perf/include/perf/cpumap.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,11 @@
44

55
#include <perf/core.h>
66
#include <stdbool.h>
7+
#include <stdint.h>
78

89
/** A wrapper around a CPU to avoid confusion with the perf_cpu_map's map's indices. */
910
struct perf_cpu {
10-
int cpu;
11+
int16_t cpu;
1112
};
1213

1314
struct perf_cache {

tools/perf/util/cpumap.c

Lines changed: 47 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -67,19 +67,23 @@ static struct perf_cpu_map *cpu_map__from_entries(const struct perf_record_cpu_m
6767
struct perf_cpu_map *map;
6868

6969
map = perf_cpu_map__empty_new(data->cpus_data.nr);
70-
if (map) {
71-
unsigned i;
72-
73-
for (i = 0; i < data->cpus_data.nr; i++) {
74-
/*
75-
* Special treatment for -1, which is not real cpu number,
76-
* and we need to use (int) -1 to initialize map[i],
77-
* otherwise it would become 65535.
78-
*/
79-
if (data->cpus_data.cpu[i] == (u16) -1)
80-
RC_CHK_ACCESS(map)->map[i].cpu = -1;
81-
else
82-
RC_CHK_ACCESS(map)->map[i].cpu = (int) data->cpus_data.cpu[i];
70+
if (!map)
71+
return NULL;
72+
73+
for (unsigned int i = 0; i < data->cpus_data.nr; i++) {
74+
/*
75+
* Special treatment for -1, which is not real cpu number,
76+
* and we need to use (int) -1 to initialize map[i],
77+
* otherwise it would become 65535.
78+
*/
79+
if (data->cpus_data.cpu[i] == (u16) -1) {
80+
RC_CHK_ACCESS(map)->map[i].cpu = -1;
81+
} else if (data->cpus_data.cpu[i] < INT16_MAX) {
82+
RC_CHK_ACCESS(map)->map[i].cpu = (int16_t) data->cpus_data.cpu[i];
83+
} else {
84+
pr_err("Invalid cpumap entry %u\n", data->cpus_data.cpu[i]);
85+
perf_cpu_map__put(map);
86+
return NULL;
8387
}
8488
}
8589

@@ -106,8 +110,15 @@ static struct perf_cpu_map *cpu_map__from_mask(const struct perf_record_cpu_map_
106110
int cpu;
107111

108112
perf_record_cpu_map_data__read_one_mask(data, i, local_copy);
109-
for_each_set_bit(cpu, local_copy, 64)
110-
RC_CHK_ACCESS(map)->map[j++].cpu = cpu + cpus_per_i;
113+
for_each_set_bit(cpu, local_copy, 64) {
114+
if (cpu + cpus_per_i < INT16_MAX) {
115+
RC_CHK_ACCESS(map)->map[j++].cpu = cpu + cpus_per_i;
116+
} else {
117+
pr_err("Invalid cpumap entry %d\n", cpu + cpus_per_i);
118+
perf_cpu_map__put(map);
119+
return NULL;
120+
}
121+
}
111122
}
112123
return map;
113124

@@ -127,8 +138,15 @@ static struct perf_cpu_map *cpu_map__from_range(const struct perf_record_cpu_map
127138
RC_CHK_ACCESS(map)->map[i++].cpu = -1;
128139

129140
for (int cpu = data->range_cpu_data.start_cpu; cpu <= data->range_cpu_data.end_cpu;
130-
i++, cpu++)
131-
RC_CHK_ACCESS(map)->map[i].cpu = cpu;
141+
i++, cpu++) {
142+
if (cpu < INT16_MAX) {
143+
RC_CHK_ACCESS(map)->map[i].cpu = cpu;
144+
} else {
145+
pr_err("Invalid cpumap entry %d\n", cpu);
146+
perf_cpu_map__put(map);
147+
return NULL;
148+
}
149+
}
132150

133151
return map;
134152
}
@@ -427,7 +445,7 @@ static void set_max_cpu_num(void)
427445
{
428446
const char *mnt;
429447
char path[PATH_MAX];
430-
int ret = -1;
448+
int max, ret = -1;
431449

432450
/* set up default */
433451
max_cpu_num.cpu = 4096;
@@ -444,19 +462,27 @@ static void set_max_cpu_num(void)
444462
goto out;
445463
}
446464

447-
ret = get_max_num(path, &max_cpu_num.cpu);
465+
ret = get_max_num(path, &max);
448466
if (ret)
449467
goto out;
450468

469+
max_cpu_num.cpu = max;
470+
451471
/* get the highest present cpu number for a sparse allocation */
452472
ret = snprintf(path, PATH_MAX, "%s/devices/system/cpu/present", mnt);
453473
if (ret >= PATH_MAX) {
454474
pr_err("sysfs path crossed PATH_MAX(%d) size\n", PATH_MAX);
455475
goto out;
456476
}
457477

458-
ret = get_max_num(path, &max_present_cpu_num.cpu);
478+
ret = get_max_num(path, &max);
459479

480+
if (!ret && max > INT16_MAX) {
481+
pr_err("Read out of bounds max cpus of %d\n", max);
482+
ret = -1;
483+
}
484+
if (!ret)
485+
max_present_cpu_num.cpu = (int16_t)max;
460486
out:
461487
if (ret)
462488
pr_err("Failed to read max cpus, using default of %d\n", max_cpu_num.cpu);
@@ -606,7 +632,7 @@ size_t cpu_map__snprint(struct perf_cpu_map *map, char *buf, size_t size)
606632
#define COMMA first ? "" : ","
607633

608634
for (i = 0; i < perf_cpu_map__nr(map) + 1; i++) {
609-
struct perf_cpu cpu = { .cpu = INT_MAX };
635+
struct perf_cpu cpu = { .cpu = INT16_MAX };
610636
bool last = i == perf_cpu_map__nr(map);
611637

612638
if (!last)

tools/perf/util/env.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -543,7 +543,7 @@ int perf_env__numa_node(struct perf_env *env, struct perf_cpu cpu)
543543

544544
for (i = 0; i < env->nr_numa_nodes; i++) {
545545
nn = &env->numa_nodes[i];
546-
nr = max(nr, perf_cpu_map__max(nn->map).cpu);
546+
nr = max(nr, (int)perf_cpu_map__max(nn->map).cpu);
547547
}
548548

549549
nr++;

0 commit comments

Comments
 (0)