@@ -1941,6 +1941,11 @@ static void perf_put_aux_event(struct perf_event *event)
1941
1941
}
1942
1942
}
1943
1943
1944
+ static bool perf_need_aux_event (struct perf_event * event )
1945
+ {
1946
+ return !!event -> attr .aux_output || !!event -> attr .aux_sample_size ;
1947
+ }
1948
+
1944
1949
static int perf_get_aux_event (struct perf_event * event ,
1945
1950
struct perf_event * group_leader )
1946
1951
{
@@ -1953,7 +1958,17 @@ static int perf_get_aux_event(struct perf_event *event,
1953
1958
if (!group_leader )
1954
1959
return 0 ;
1955
1960
1956
- if (!perf_aux_output_match (event , group_leader ))
1961
+ /*
1962
+ * aux_output and aux_sample_size are mutually exclusive.
1963
+ */
1964
+ if (event -> attr .aux_output && event -> attr .aux_sample_size )
1965
+ return 0 ;
1966
+
1967
+ if (event -> attr .aux_output &&
1968
+ !perf_aux_output_match (event , group_leader ))
1969
+ return 0 ;
1970
+
1971
+ if (event -> attr .aux_sample_size && !group_leader -> pmu -> snapshot_aux )
1957
1972
return 0 ;
1958
1973
1959
1974
if (!atomic_long_inc_not_zero (& group_leader -> refcount ))
@@ -6222,6 +6237,122 @@ perf_output_sample_ustack(struct perf_output_handle *handle, u64 dump_size,
6222
6237
}
6223
6238
}
6224
6239
6240
+ static unsigned long perf_prepare_sample_aux (struct perf_event * event ,
6241
+ struct perf_sample_data * data ,
6242
+ size_t size )
6243
+ {
6244
+ struct perf_event * sampler = event -> aux_event ;
6245
+ struct ring_buffer * rb ;
6246
+
6247
+ data -> aux_size = 0 ;
6248
+
6249
+ if (!sampler )
6250
+ goto out ;
6251
+
6252
+ if (WARN_ON_ONCE (READ_ONCE (sampler -> state ) != PERF_EVENT_STATE_ACTIVE ))
6253
+ goto out ;
6254
+
6255
+ if (WARN_ON_ONCE (READ_ONCE (sampler -> oncpu ) != smp_processor_id ()))
6256
+ goto out ;
6257
+
6258
+ rb = ring_buffer_get (sampler -> parent ? sampler -> parent : sampler );
6259
+ if (!rb )
6260
+ goto out ;
6261
+
6262
+ /*
6263
+ * If this is an NMI hit inside sampling code, don't take
6264
+ * the sample. See also perf_aux_sample_output().
6265
+ */
6266
+ if (READ_ONCE (rb -> aux_in_sampling )) {
6267
+ data -> aux_size = 0 ;
6268
+ } else {
6269
+ size = min_t (size_t , size , perf_aux_size (rb ));
6270
+ data -> aux_size = ALIGN (size , sizeof (u64 ));
6271
+ }
6272
+ ring_buffer_put (rb );
6273
+
6274
+ out :
6275
+ return data -> aux_size ;
6276
+ }
6277
+
6278
+ long perf_pmu_snapshot_aux (struct ring_buffer * rb ,
6279
+ struct perf_event * event ,
6280
+ struct perf_output_handle * handle ,
6281
+ unsigned long size )
6282
+ {
6283
+ unsigned long flags ;
6284
+ long ret ;
6285
+
6286
+ /*
6287
+ * Normal ->start()/->stop() callbacks run in IRQ mode in scheduler
6288
+ * paths. If we start calling them in NMI context, they may race with
6289
+ * the IRQ ones, that is, for example, re-starting an event that's just
6290
+ * been stopped, which is why we're using a separate callback that
6291
+ * doesn't change the event state.
6292
+ *
6293
+ * IRQs need to be disabled to prevent IPIs from racing with us.
6294
+ */
6295
+ local_irq_save (flags );
6296
+ /*
6297
+ * Guard against NMI hits inside the critical section;
6298
+ * see also perf_prepare_sample_aux().
6299
+ */
6300
+ WRITE_ONCE (rb -> aux_in_sampling , 1 );
6301
+ barrier ();
6302
+
6303
+ ret = event -> pmu -> snapshot_aux (event , handle , size );
6304
+
6305
+ barrier ();
6306
+ WRITE_ONCE (rb -> aux_in_sampling , 0 );
6307
+ local_irq_restore (flags );
6308
+
6309
+ return ret ;
6310
+ }
6311
+
6312
+ static void perf_aux_sample_output (struct perf_event * event ,
6313
+ struct perf_output_handle * handle ,
6314
+ struct perf_sample_data * data )
6315
+ {
6316
+ struct perf_event * sampler = event -> aux_event ;
6317
+ unsigned long pad ;
6318
+ struct ring_buffer * rb ;
6319
+ long size ;
6320
+
6321
+ if (WARN_ON_ONCE (!sampler || !data -> aux_size ))
6322
+ return ;
6323
+
6324
+ rb = ring_buffer_get (sampler -> parent ? sampler -> parent : sampler );
6325
+ if (!rb )
6326
+ return ;
6327
+
6328
+ size = perf_pmu_snapshot_aux (rb , sampler , handle , data -> aux_size );
6329
+
6330
+ /*
6331
+ * An error here means that perf_output_copy() failed (returned a
6332
+ * non-zero surplus that it didn't copy), which in its current
6333
+ * enlightened implementation is not possible. If that changes, we'd
6334
+ * like to know.
6335
+ */
6336
+ if (WARN_ON_ONCE (size < 0 ))
6337
+ goto out_put ;
6338
+
6339
+ /*
6340
+ * The pad comes from ALIGN()ing data->aux_size up to u64 in
6341
+ * perf_prepare_sample_aux(), so should not be more than that.
6342
+ */
6343
+ pad = data -> aux_size - size ;
6344
+ if (WARN_ON_ONCE (pad >= sizeof (u64 )))
6345
+ pad = 8 ;
6346
+
6347
+ if (pad ) {
6348
+ u64 zero = 0 ;
6349
+ perf_output_copy (handle , & zero , pad );
6350
+ }
6351
+
6352
+ out_put :
6353
+ ring_buffer_put (rb );
6354
+ }
6355
+
6225
6356
static void __perf_event_header__init_id (struct perf_event_header * header ,
6226
6357
struct perf_sample_data * data ,
6227
6358
struct perf_event * event )
@@ -6541,6 +6672,13 @@ void perf_output_sample(struct perf_output_handle *handle,
6541
6672
if (sample_type & PERF_SAMPLE_PHYS_ADDR )
6542
6673
perf_output_put (handle , data -> phys_addr );
6543
6674
6675
+ if (sample_type & PERF_SAMPLE_AUX ) {
6676
+ perf_output_put (handle , data -> aux_size );
6677
+
6678
+ if (data -> aux_size )
6679
+ perf_aux_sample_output (event , handle , data );
6680
+ }
6681
+
6544
6682
if (!event -> attr .watermark ) {
6545
6683
int wakeup_events = event -> attr .wakeup_events ;
6546
6684
@@ -6729,6 +6867,35 @@ void perf_prepare_sample(struct perf_event_header *header,
6729
6867
6730
6868
if (sample_type & PERF_SAMPLE_PHYS_ADDR )
6731
6869
data -> phys_addr = perf_virt_to_phys (data -> addr );
6870
+
6871
+ if (sample_type & PERF_SAMPLE_AUX ) {
6872
+ u64 size ;
6873
+
6874
+ header -> size += sizeof (u64 ); /* size */
6875
+
6876
+ /*
6877
+ * Given the 16bit nature of header::size, an AUX sample can
6878
+ * easily overflow it, what with all the preceding sample bits.
6879
+ * Make sure this doesn't happen by using up to U16_MAX bytes
6880
+ * per sample in total (rounded down to 8 byte boundary).
6881
+ */
6882
+ size = min_t (size_t , U16_MAX - header -> size ,
6883
+ event -> attr .aux_sample_size );
6884
+ size = rounddown (size , 8 );
6885
+ size = perf_prepare_sample_aux (event , data , size );
6886
+
6887
+ WARN_ON_ONCE (size + header -> size > U16_MAX );
6888
+ header -> size += size ;
6889
+ }
6890
+ /*
6891
+ * If you're adding more sample types here, you likely need to do
6892
+ * something about the overflowing header::size, like repurpose the
6893
+ * lowest 3 bits of size, which should be always zero at the moment.
6894
+ * This raises a more important question, do we really need 512k sized
6895
+ * samples and why, so good argumentation is in order for whatever you
6896
+ * do here next.
6897
+ */
6898
+ WARN_ON_ONCE (header -> size & 7 );
6732
6899
}
6733
6900
6734
6901
static __always_inline int
@@ -10727,7 +10894,7 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
10727
10894
10728
10895
attr -> size = size ;
10729
10896
10730
- if (attr -> __reserved_1 || attr -> __reserved_2 )
10897
+ if (attr -> __reserved_1 || attr -> __reserved_2 || attr -> __reserved_3 )
10731
10898
return - EINVAL ;
10732
10899
10733
10900
if (attr -> sample_type & ~(PERF_SAMPLE_MAX - 1 ))
@@ -11277,7 +11444,7 @@ SYSCALL_DEFINE5(perf_event_open,
11277
11444
}
11278
11445
}
11279
11446
11280
- if (event -> attr . aux_output && !perf_get_aux_event (event , group_leader ))
11447
+ if (perf_need_aux_event ( event ) && !perf_get_aux_event (event , group_leader ))
11281
11448
goto err_locked ;
11282
11449
11283
11450
/*
0 commit comments