Skip to content

Commit 26bdace

Browse files
committed
Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf tooling fixes from Thomas Gleixner: - fix 'perf test Session topology' segfault on s390 (Thomas Richter) - fix NULL return handling in bpf__prepare_load() (YueHaibing) - fix indexing on Coresight ETM packet queue decoder (Mathieu Poirier) - fix perf.data format description of NRCPUS header (Arnaldo Carvalho de Melo) - update perf.data documentation section on cpu topology - handle uncore event aliases in small groups properly (Kan Liang) - add missing perf_sample.addr into python sample dictionary (Leo Yan) * 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: perf tools: Fix perf.data format description of NRCPUS header perf script python: Add addr into perf sample dict perf data: Update documentation section on cpu topology perf cs-etm: Fix indexing for decoder packet queue perf bpf: Fix NULL return handling in bpf__prepare_load() perf test: "Session topology" dumps core on s390 perf parse-events: Handle uncore event aliases in small groups properly
2 parents 918fe1b + 6497bbc commit 26bdace

File tree

9 files changed

+185
-21
lines changed

9 files changed

+185
-21
lines changed

tools/perf/Documentation/perf.data-file-format.txt

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,8 +111,8 @@ A perf_header_string with the CPU architecture (uname -m)
111111
A structure defining the number of CPUs.
112112

113113
struct nr_cpus {
114-
uint32_t nr_cpus_online;
115114
uint32_t nr_cpus_available; /* CPUs not yet onlined */
115+
uint32_t nr_cpus_online;
116116
};
117117

118118
HEADER_CPUDESC = 8,
@@ -153,10 +153,18 @@ struct {
153153
HEADER_CPU_TOPOLOGY = 13,
154154

155155
String lists defining the core and CPU threads topology.
156+
The string lists are followed by a variable length array
157+
which contains core_id and socket_id of each cpu.
158+
The number of entries can be determined by the size of the
159+
section minus the sizes of both string lists.
156160

157161
struct {
158162
struct perf_header_string_list cores; /* Variable length */
159163
struct perf_header_string_list threads; /* Variable length */
164+
struct {
165+
uint32_t core_id;
166+
uint32_t socket_id;
167+
} cpus[nr]; /* Variable length records */
160168
};
161169

162170
Example:

tools/perf/tests/topology.c

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,27 @@ static int check_cpu_topology(char *path, struct cpu_map *map)
7070
session = perf_session__new(&data, false, NULL);
7171
TEST_ASSERT_VAL("can't get session", session);
7272

73+
/* On platforms with large numbers of CPUs process_cpu_topology()
74+
* might issue an error while reading the perf.data file section
75+
* HEADER_CPU_TOPOLOGY and the cpu_topology_map pointed to by member
76+
* cpu is a NULL pointer.
77+
* Example: On s390
78+
* CPU 0 is on core_id 0 and physical_package_id 6
79+
* CPU 1 is on core_id 1 and physical_package_id 3
80+
*
81+
* Core_id and physical_package_id are platform and architecture
82+
* dependend and might have higher numbers than the CPU id.
83+
* This actually depends on the configuration.
84+
*
85+
* In this case process_cpu_topology() prints error message:
86+
* "socket_id number is too big. You may need to upgrade the
87+
* perf tool."
88+
*
89+
* This is the reason why this test might be skipped.
90+
*/
91+
if (!session->header.env.cpu)
92+
return TEST_SKIP;
93+
7394
for (i = 0; i < session->header.env.nr_cpus_avail; i++) {
7495
if (!cpu_map__has(map, i))
7596
continue;
@@ -95,7 +116,7 @@ int test__session_topology(struct test *test __maybe_unused, int subtest __maybe
95116
{
96117
char path[PATH_MAX];
97118
struct cpu_map *map;
98-
int ret = -1;
119+
int ret = TEST_FAIL;
99120

100121
TEST_ASSERT_VAL("can't get templ file", !get_temp(path));
101122

@@ -110,12 +131,9 @@ int test__session_topology(struct test *test __maybe_unused, int subtest __maybe
110131
goto free_path;
111132
}
112133

113-
if (check_cpu_topology(path, map))
114-
goto free_map;
115-
ret = 0;
116-
117-
free_map:
134+
ret = check_cpu_topology(path, map);
118135
cpu_map__put(map);
136+
119137
free_path:
120138
unlink(path);
121139
return ret;

tools/perf/util/bpf-loader.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ bpf__prepare_load_buffer(void *obj_buf, size_t obj_buf_sz, const char *name)
6666
}
6767

6868
obj = bpf_object__open_buffer(obj_buf, obj_buf_sz, name);
69-
if (IS_ERR(obj)) {
69+
if (IS_ERR_OR_NULL(obj)) {
7070
pr_debug("bpf: failed to load buffer\n");
7171
return ERR_PTR(-EINVAL);
7272
}
@@ -102,14 +102,14 @@ struct bpf_object *bpf__prepare_load(const char *filename, bool source)
102102
pr_debug("bpf: successfull builtin compilation\n");
103103
obj = bpf_object__open_buffer(obj_buf, obj_buf_sz, filename);
104104

105-
if (!IS_ERR(obj) && llvm_param.dump_obj)
105+
if (!IS_ERR_OR_NULL(obj) && llvm_param.dump_obj)
106106
llvm__dump_obj(filename, obj_buf, obj_buf_sz);
107107

108108
free(obj_buf);
109109
} else
110110
obj = bpf_object__open(filename);
111111

112-
if (IS_ERR(obj)) {
112+
if (IS_ERR_OR_NULL(obj)) {
113113
pr_debug("bpf: failed to load %s\n", filename);
114114
return obj;
115115
}

tools/perf/util/cs-etm-decoder/cs-etm-decoder.c

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -96,11 +96,19 @@ int cs_etm_decoder__get_packet(struct cs_etm_decoder *decoder,
9696
/* Nothing to do, might as well just return */
9797
if (decoder->packet_count == 0)
9898
return 0;
99+
/*
100+
* The queueing process in function cs_etm_decoder__buffer_packet()
101+
* increments the tail *before* using it. This is somewhat counter
102+
* intuitive but it has the advantage of centralizing tail management
103+
* at a single location. Because of that we need to follow the same
104+
* heuristic with the head, i.e we increment it before using its
105+
* value. Otherwise the first element of the packet queue is not
106+
* used.
107+
*/
108+
decoder->head = (decoder->head + 1) & (MAX_BUFFER - 1);
99109

100110
*packet = decoder->packet_buffer[decoder->head];
101111

102-
decoder->head = (decoder->head + 1) & (MAX_BUFFER - 1);
103-
104112
decoder->packet_count--;
105113

106114
return 1;

tools/perf/util/evsel.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,7 @@ struct perf_evsel {
127127
bool precise_max;
128128
bool ignore_missing_thread;
129129
bool forced_leader;
130+
bool use_uncore_alias;
130131
/* parse modifier helper */
131132
int exclude_GH;
132133
int nr_members;

tools/perf/util/parse-events.c

Lines changed: 127 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1219,13 +1219,16 @@ int parse_events_add_numeric(struct parse_events_state *parse_state,
12191219

12201220
int parse_events_add_pmu(struct parse_events_state *parse_state,
12211221
struct list_head *list, char *name,
1222-
struct list_head *head_config, bool auto_merge_stats)
1222+
struct list_head *head_config,
1223+
bool auto_merge_stats,
1224+
bool use_alias)
12231225
{
12241226
struct perf_event_attr attr;
12251227
struct perf_pmu_info info;
12261228
struct perf_pmu *pmu;
12271229
struct perf_evsel *evsel;
12281230
struct parse_events_error *err = parse_state->error;
1231+
bool use_uncore_alias;
12291232
LIST_HEAD(config_terms);
12301233

12311234
pmu = perf_pmu__find(name);
@@ -1244,11 +1247,14 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
12441247
memset(&attr, 0, sizeof(attr));
12451248
}
12461249

1250+
use_uncore_alias = (pmu->is_uncore && use_alias);
1251+
12471252
if (!head_config) {
12481253
attr.type = pmu->type;
12491254
evsel = __add_event(list, &parse_state->idx, &attr, NULL, pmu, NULL, auto_merge_stats);
12501255
if (evsel) {
12511256
evsel->pmu_name = name;
1257+
evsel->use_uncore_alias = use_uncore_alias;
12521258
return 0;
12531259
} else {
12541260
return -ENOMEM;
@@ -1282,6 +1288,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
12821288
evsel->metric_expr = info.metric_expr;
12831289
evsel->metric_name = info.metric_name;
12841290
evsel->pmu_name = name;
1291+
evsel->use_uncore_alias = use_uncore_alias;
12851292
}
12861293

12871294
return evsel ? 0 : -ENOMEM;
@@ -1317,7 +1324,8 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
13171324
list_add_tail(&term->list, head);
13181325

13191326
if (!parse_events_add_pmu(parse_state, list,
1320-
pmu->name, head, true)) {
1327+
pmu->name, head,
1328+
true, true)) {
13211329
pr_debug("%s -> %s/%s/\n", str,
13221330
pmu->name, alias->str);
13231331
ok++;
@@ -1339,7 +1347,120 @@ int parse_events__modifier_group(struct list_head *list,
13391347
return parse_events__modifier_event(list, event_mod, true);
13401348
}
13411349

1342-
void parse_events__set_leader(char *name, struct list_head *list)
1350+
/*
1351+
* Check if the two uncore PMUs are from the same uncore block
1352+
* The format of the uncore PMU name is uncore_#blockname_#pmuidx
1353+
*/
1354+
static bool is_same_uncore_block(const char *pmu_name_a, const char *pmu_name_b)
1355+
{
1356+
char *end_a, *end_b;
1357+
1358+
end_a = strrchr(pmu_name_a, '_');
1359+
end_b = strrchr(pmu_name_b, '_');
1360+
1361+
if (!end_a || !end_b)
1362+
return false;
1363+
1364+
if ((end_a - pmu_name_a) != (end_b - pmu_name_b))
1365+
return false;
1366+
1367+
return (strncmp(pmu_name_a, pmu_name_b, end_a - pmu_name_a) == 0);
1368+
}
1369+
1370+
static int
1371+
parse_events__set_leader_for_uncore_aliase(char *name, struct list_head *list,
1372+
struct parse_events_state *parse_state)
1373+
{
1374+
struct perf_evsel *evsel, *leader;
1375+
uintptr_t *leaders;
1376+
bool is_leader = true;
1377+
int i, nr_pmu = 0, total_members, ret = 0;
1378+
1379+
leader = list_first_entry(list, struct perf_evsel, node);
1380+
evsel = list_last_entry(list, struct perf_evsel, node);
1381+
total_members = evsel->idx - leader->idx + 1;
1382+
1383+
leaders = calloc(total_members, sizeof(uintptr_t));
1384+
if (WARN_ON(!leaders))
1385+
return 0;
1386+
1387+
/*
1388+
* Going through the whole group and doing sanity check.
1389+
* All members must use alias, and be from the same uncore block.
1390+
* Also, storing the leader events in an array.
1391+
*/
1392+
__evlist__for_each_entry(list, evsel) {
1393+
1394+
/* Only split the uncore group which members use alias */
1395+
if (!evsel->use_uncore_alias)
1396+
goto out;
1397+
1398+
/* The events must be from the same uncore block */
1399+
if (!is_same_uncore_block(leader->pmu_name, evsel->pmu_name))
1400+
goto out;
1401+
1402+
if (!is_leader)
1403+
continue;
1404+
/*
1405+
* If the event's PMU name starts to repeat, it must be a new
1406+
* event. That can be used to distinguish the leader from
1407+
* other members, even they have the same event name.
1408+
*/
1409+
if ((leader != evsel) && (leader->pmu_name == evsel->pmu_name)) {
1410+
is_leader = false;
1411+
continue;
1412+
}
1413+
/* The name is always alias name */
1414+
WARN_ON(strcmp(leader->name, evsel->name));
1415+
1416+
/* Store the leader event for each PMU */
1417+
leaders[nr_pmu++] = (uintptr_t) evsel;
1418+
}
1419+
1420+
/* only one event alias */
1421+
if (nr_pmu == total_members) {
1422+
parse_state->nr_groups--;
1423+
goto handled;
1424+
}
1425+
1426+
/*
1427+
* An uncore event alias is a joint name which means the same event
1428+
* runs on all PMUs of a block.
1429+
* Perf doesn't support mixed events from different PMUs in the same
1430+
* group. The big group has to be split into multiple small groups
1431+
* which only include the events from the same PMU.
1432+
*
1433+
* Here the uncore event aliases must be from the same uncore block.
1434+
* The number of PMUs must be same for each alias. The number of new
1435+
* small groups equals to the number of PMUs.
1436+
* Setting the leader event for corresponding members in each group.
1437+
*/
1438+
i = 0;
1439+
__evlist__for_each_entry(list, evsel) {
1440+
if (i >= nr_pmu)
1441+
i = 0;
1442+
evsel->leader = (struct perf_evsel *) leaders[i++];
1443+
}
1444+
1445+
/* The number of members and group name are same for each group */
1446+
for (i = 0; i < nr_pmu; i++) {
1447+
evsel = (struct perf_evsel *) leaders[i];
1448+
evsel->nr_members = total_members / nr_pmu;
1449+
evsel->group_name = name ? strdup(name) : NULL;
1450+
}
1451+
1452+
/* Take the new small groups into account */
1453+
parse_state->nr_groups += nr_pmu - 1;
1454+
1455+
handled:
1456+
ret = 1;
1457+
out:
1458+
free(leaders);
1459+
return ret;
1460+
}
1461+
1462+
void parse_events__set_leader(char *name, struct list_head *list,
1463+
struct parse_events_state *parse_state)
13431464
{
13441465
struct perf_evsel *leader;
13451466

@@ -1348,6 +1469,9 @@ void parse_events__set_leader(char *name, struct list_head *list)
13481469
return;
13491470
}
13501471

1472+
if (parse_events__set_leader_for_uncore_aliase(name, list, parse_state))
1473+
return;
1474+
13511475
__perf_evlist__set_leader(list);
13521476
leader = list_entry(list->next, struct perf_evsel, node);
13531477
leader->group_name = name ? strdup(name) : NULL;

tools/perf/util/parse-events.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,9 @@ int parse_events_add_breakpoint(struct list_head *list, int *idx,
167167
void *ptr, char *type, u64 len);
168168
int parse_events_add_pmu(struct parse_events_state *parse_state,
169169
struct list_head *list, char *name,
170-
struct list_head *head_config, bool auto_merge_stats);
170+
struct list_head *head_config,
171+
bool auto_merge_stats,
172+
bool use_alias);
171173

172174
int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
173175
char *str,
@@ -178,7 +180,8 @@ int parse_events_copy_term_list(struct list_head *old,
178180

179181
enum perf_pmu_event_symbol_type
180182
perf_pmu__parse_check(const char *name);
181-
void parse_events__set_leader(char *name, struct list_head *list);
183+
void parse_events__set_leader(char *name, struct list_head *list,
184+
struct parse_events_state *parse_state);
182185
void parse_events_update_lists(struct list_head *list_event,
183186
struct list_head *list_all);
184187
void parse_events_evlist_error(struct parse_events_state *parse_state,

tools/perf/util/parse-events.y

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,7 @@ PE_NAME '{' events '}'
161161
struct list_head *list = $3;
162162

163163
inc_group_count(list, _parse_state);
164-
parse_events__set_leader($1, list);
164+
parse_events__set_leader($1, list, _parse_state);
165165
$$ = list;
166166
}
167167
|
@@ -170,7 +170,7 @@ PE_NAME '{' events '}'
170170
struct list_head *list = $2;
171171

172172
inc_group_count(list, _parse_state);
173-
parse_events__set_leader(NULL, list);
173+
parse_events__set_leader(NULL, list, _parse_state);
174174
$$ = list;
175175
}
176176

@@ -232,7 +232,7 @@ PE_NAME opt_event_config
232232
YYABORT;
233233

234234
ALLOC_LIST(list);
235-
if (parse_events_add_pmu(_parse_state, list, $1, $2, false)) {
235+
if (parse_events_add_pmu(_parse_state, list, $1, $2, false, false)) {
236236
struct perf_pmu *pmu = NULL;
237237
int ok = 0;
238238
char *pattern;
@@ -251,7 +251,7 @@ PE_NAME opt_event_config
251251
free(pattern);
252252
YYABORT;
253253
}
254-
if (!parse_events_add_pmu(_parse_state, list, pmu->name, terms, true))
254+
if (!parse_events_add_pmu(_parse_state, list, pmu->name, terms, true, false))
255255
ok++;
256256
parse_events_terms__delete(terms);
257257
}

tools/perf/util/scripting-engines/trace-event-python.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -531,6 +531,8 @@ static PyObject *get_perf_sample_dict(struct perf_sample *sample,
531531
PyLong_FromUnsignedLongLong(sample->period));
532532
pydict_set_item_string_decref(dict_sample, "phys_addr",
533533
PyLong_FromUnsignedLongLong(sample->phys_addr));
534+
pydict_set_item_string_decref(dict_sample, "addr",
535+
PyLong_FromUnsignedLongLong(sample->addr));
534536
set_sample_read_in_dict(dict_sample, sample, evsel);
535537
pydict_set_item_string_decref(dict, "sample", dict_sample);
536538

0 commit comments

Comments
 (0)