|
| 1 | +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ |
| 2 | +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ |
| 3 | +#ifndef __USDT_BPF_H__ |
| 4 | +#define __USDT_BPF_H__ |
| 5 | + |
| 6 | +#include <linux/errno.h> |
| 7 | +#include <bpf/bpf_helpers.h> |
| 8 | +#include <bpf/bpf_tracing.h> |
| 9 | +#include <bpf/bpf_core_read.h> |
| 10 | + |
| 11 | +/* Below types and maps are internal implementation details of libbpf's USDT |
| 12 | + * support and are subjects to change. Also, bpf_usdt_xxx() API helpers should |
| 13 | + * be considered an unstable API as well and might be adjusted based on user |
| 14 | + * feedback from using libbpf's USDT support in production. |
| 15 | + */ |
| 16 | + |
| 17 | +/* User can override BPF_USDT_MAX_SPEC_CNT to change default size of internal |
| 18 | + * map that keeps track of USDT argument specifications. This might be |
| 19 | + * necessary if there are a lot of USDT attachments. |
| 20 | + */ |
| 21 | +#ifndef BPF_USDT_MAX_SPEC_CNT |
| 22 | +#define BPF_USDT_MAX_SPEC_CNT 256 |
| 23 | +#endif |
| 24 | +/* User can override BPF_USDT_MAX_IP_CNT to change default size of internal |
| 25 | + * map that keeps track of IP (memory address) mapping to USDT argument |
| 26 | + * specification. |
| 27 | + * Note, if kernel supports BPF cookies, this map is not used and could be |
| 28 | + * resized all the way to 1 to save a bit of memory. |
| 29 | + */ |
| 30 | +#ifndef BPF_USDT_MAX_IP_CNT |
| 31 | +#define BPF_USDT_MAX_IP_CNT (4 * BPF_USDT_MAX_SPEC_CNT) |
| 32 | +#endif |
| 33 | +/* We use BPF CO-RE to detect support for BPF cookie from BPF side. This is |
| 34 | + * the only dependency on CO-RE, so if it's undesirable, user can override |
| 35 | + * BPF_USDT_HAS_BPF_COOKIE to specify whether to BPF cookie is supported or not. |
| 36 | + */ |
| 37 | +#ifndef BPF_USDT_HAS_BPF_COOKIE |
| 38 | +#define BPF_USDT_HAS_BPF_COOKIE \ |
| 39 | + bpf_core_enum_value_exists(enum bpf_func_id___usdt, BPF_FUNC_get_attach_cookie___usdt) |
| 40 | +#endif |
| 41 | + |
| 42 | +enum __bpf_usdt_arg_type { |
| 43 | + BPF_USDT_ARG_CONST, |
| 44 | + BPF_USDT_ARG_REG, |
| 45 | + BPF_USDT_ARG_REG_DEREF, |
| 46 | +}; |
| 47 | + |
| 48 | +struct __bpf_usdt_arg_spec { |
| 49 | + /* u64 scalar interpreted depending on arg_type, see below */ |
| 50 | + __u64 val_off; |
| 51 | + /* arg location case, see bpf_udst_arg() for details */ |
| 52 | + enum __bpf_usdt_arg_type arg_type; |
| 53 | + /* offset of referenced register within struct pt_regs */ |
| 54 | + short reg_off; |
| 55 | + /* whether arg should be interpreted as signed value */ |
| 56 | + bool arg_signed; |
| 57 | + /* number of bits that need to be cleared and, optionally, |
| 58 | + * sign-extended to cast arguments that are 1, 2, or 4 bytes |
| 59 | + * long into final 8-byte u64/s64 value returned to user |
| 60 | + */ |
| 61 | + char arg_bitshift; |
| 62 | +}; |
| 63 | + |
| 64 | +/* should match USDT_MAX_ARG_CNT in usdt.c exactly */ |
| 65 | +#define BPF_USDT_MAX_ARG_CNT 12 |
| 66 | +struct __bpf_usdt_spec { |
| 67 | + struct __bpf_usdt_arg_spec args[BPF_USDT_MAX_ARG_CNT]; |
| 68 | + __u64 usdt_cookie; |
| 69 | + short arg_cnt; |
| 70 | +}; |
| 71 | + |
| 72 | +struct { |
| 73 | + __uint(type, BPF_MAP_TYPE_ARRAY); |
| 74 | + __uint(max_entries, BPF_USDT_MAX_SPEC_CNT); |
| 75 | + __type(key, int); |
| 76 | + __type(value, struct __bpf_usdt_spec); |
| 77 | +} __bpf_usdt_specs SEC(".maps") __weak; |
| 78 | + |
| 79 | +struct { |
| 80 | + __uint(type, BPF_MAP_TYPE_HASH); |
| 81 | + __uint(max_entries, BPF_USDT_MAX_IP_CNT); |
| 82 | + __type(key, long); |
| 83 | + __type(value, __u32); |
| 84 | +} __bpf_usdt_ip_to_spec_id SEC(".maps") __weak; |
| 85 | + |
| 86 | +/* don't rely on user's BPF code to have latest definition of bpf_func_id */ |
| 87 | +enum bpf_func_id___usdt { |
| 88 | + BPF_FUNC_get_attach_cookie___usdt = 0xBAD, /* value doesn't matter */ |
| 89 | +}; |
| 90 | + |
| 91 | +static __always_inline |
| 92 | +int __bpf_usdt_spec_id(struct pt_regs *ctx) |
| 93 | +{ |
| 94 | + if (!BPF_USDT_HAS_BPF_COOKIE) { |
| 95 | + long ip = PT_REGS_IP(ctx); |
| 96 | + int *spec_id_ptr; |
| 97 | + |
| 98 | + spec_id_ptr = bpf_map_lookup_elem(&__bpf_usdt_ip_to_spec_id, &ip); |
| 99 | + return spec_id_ptr ? *spec_id_ptr : -ESRCH; |
| 100 | + } |
| 101 | + |
| 102 | + return bpf_get_attach_cookie(ctx); |
| 103 | +} |
| 104 | + |
| 105 | +/* Return number of USDT arguments defined for currently traced USDT. */ |
| 106 | +static inline __noinline |
| 107 | +int bpf_usdt_arg_cnt(struct pt_regs *ctx) |
| 108 | +{ |
| 109 | + struct __bpf_usdt_spec *spec; |
| 110 | + int spec_id; |
| 111 | + |
| 112 | + spec_id = __bpf_usdt_spec_id(ctx); |
| 113 | + if (spec_id < 0) |
| 114 | + return -ESRCH; |
| 115 | + |
| 116 | + spec = bpf_map_lookup_elem(&__bpf_usdt_specs, &spec_id); |
| 117 | + if (!spec) |
| 118 | + return -ESRCH; |
| 119 | + |
| 120 | + return spec->arg_cnt; |
| 121 | +} |
| 122 | + |
| 123 | +/* Fetch USDT argument #*arg_num* (zero-indexed) and put its value into *res. |
| 124 | + * Returns 0 on success; negative error, otherwise. |
| 125 | + * On error *res is guaranteed to be set to zero. |
| 126 | + */ |
| 127 | +static inline __noinline |
| 128 | +int bpf_usdt_arg(struct pt_regs *ctx, __u64 arg_num, long *res) |
| 129 | +{ |
| 130 | + struct __bpf_usdt_spec *spec; |
| 131 | + struct __bpf_usdt_arg_spec *arg_spec; |
| 132 | + unsigned long val; |
| 133 | + int err, spec_id; |
| 134 | + |
| 135 | + *res = 0; |
| 136 | + |
| 137 | + spec_id = __bpf_usdt_spec_id(ctx); |
| 138 | + if (spec_id < 0) |
| 139 | + return -ESRCH; |
| 140 | + |
| 141 | + spec = bpf_map_lookup_elem(&__bpf_usdt_specs, &spec_id); |
| 142 | + if (!spec) |
| 143 | + return -ESRCH; |
| 144 | + |
| 145 | + if (arg_num >= BPF_USDT_MAX_ARG_CNT || arg_num >= spec->arg_cnt) |
| 146 | + return -ENOENT; |
| 147 | + |
| 148 | + arg_spec = &spec->args[arg_num]; |
| 149 | + switch (arg_spec->arg_type) { |
| 150 | + case BPF_USDT_ARG_CONST: |
| 151 | + /* Arg is just a constant ("-4@$-9" in USDT arg spec). |
| 152 | + * value is recorded in arg_spec->val_off directly. |
| 153 | + */ |
| 154 | + val = arg_spec->val_off; |
| 155 | + break; |
| 156 | + case BPF_USDT_ARG_REG: |
| 157 | + /* Arg is in a register (e.g, "8@%rax" in USDT arg spec), |
| 158 | + * so we read the contents of that register directly from |
| 159 | + * struct pt_regs. To keep things simple user-space parts |
| 160 | + * record offsetof(struct pt_regs, <regname>) in arg_spec->reg_off. |
| 161 | + */ |
| 162 | + err = bpf_probe_read_kernel(&val, sizeof(val), (void *)ctx + arg_spec->reg_off); |
| 163 | + if (err) |
| 164 | + return err; |
| 165 | + break; |
| 166 | + case BPF_USDT_ARG_REG_DEREF: |
| 167 | + /* Arg is in memory addressed by register, plus some offset |
| 168 | + * (e.g., "-4@-1204(%rbp)" in USDT arg spec). Register is |
| 169 | + * identified lik with BPF_USDT_ARG_REG case, and the offset |
| 170 | + * is in arg_spec->val_off. We first fetch register contents |
| 171 | + * from pt_regs, then do another user-space probe read to |
| 172 | + * fetch argument value itself. |
| 173 | + */ |
| 174 | + err = bpf_probe_read_kernel(&val, sizeof(val), (void *)ctx + arg_spec->reg_off); |
| 175 | + if (err) |
| 176 | + return err; |
| 177 | + err = bpf_probe_read_user(&val, sizeof(val), (void *)val + arg_spec->val_off); |
| 178 | + if (err) |
| 179 | + return err; |
| 180 | + break; |
| 181 | + default: |
| 182 | + return -EINVAL; |
| 183 | + } |
| 184 | + |
| 185 | + /* cast arg from 1, 2, or 4 bytes to final 8 byte size clearing |
| 186 | + * necessary upper arg_bitshift bits, with sign extension if argument |
| 187 | + * is signed |
| 188 | + */ |
| 189 | + val <<= arg_spec->arg_bitshift; |
| 190 | + if (arg_spec->arg_signed) |
| 191 | + val = ((long)val) >> arg_spec->arg_bitshift; |
| 192 | + else |
| 193 | + val = val >> arg_spec->arg_bitshift; |
| 194 | + *res = val; |
| 195 | + return 0; |
| 196 | +} |
| 197 | + |
| 198 | +/* Retrieve user-specified cookie value provided during attach as |
| 199 | + * bpf_usdt_opts.usdt_cookie. This serves the same purpose as BPF cookie |
| 200 | + * returned by bpf_get_attach_cookie(). Libbpf's support for USDT is itself |
| 201 | + * utilizaing BPF cookies internally, so user can't use BPF cookie directly |
| 202 | + * for USDT programs and has to use bpf_usdt_cookie() API instead. |
| 203 | + */ |
| 204 | +static inline __noinline |
| 205 | +long bpf_usdt_cookie(struct pt_regs *ctx) |
| 206 | +{ |
| 207 | + struct __bpf_usdt_spec *spec; |
| 208 | + int spec_id; |
| 209 | + |
| 210 | + spec_id = __bpf_usdt_spec_id(ctx); |
| 211 | + if (spec_id < 0) |
| 212 | + return 0; |
| 213 | + |
| 214 | + spec = bpf_map_lookup_elem(&__bpf_usdt_specs, &spec_id); |
| 215 | + if (!spec) |
| 216 | + return 0; |
| 217 | + |
| 218 | + return spec->usdt_cookie; |
| 219 | +} |
| 220 | + |
| 221 | +/* we rely on ___bpf_apply() and ___bpf_narg() macros already defined in bpf_tracing.h */ |
| 222 | +#define ___bpf_usdt_args0() ctx |
| 223 | +#define ___bpf_usdt_args1(x) ___bpf_usdt_args0(), ({ long _x; bpf_usdt_arg(ctx, 0, &_x); (void *)_x; }) |
| 224 | +#define ___bpf_usdt_args2(x, args...) ___bpf_usdt_args1(args), ({ long _x; bpf_usdt_arg(ctx, 1, &_x); (void *)_x; }) |
| 225 | +#define ___bpf_usdt_args3(x, args...) ___bpf_usdt_args2(args), ({ long _x; bpf_usdt_arg(ctx, 2, &_x); (void *)_x; }) |
| 226 | +#define ___bpf_usdt_args4(x, args...) ___bpf_usdt_args3(args), ({ long _x; bpf_usdt_arg(ctx, 3, &_x); (void *)_x; }) |
| 227 | +#define ___bpf_usdt_args5(x, args...) ___bpf_usdt_args4(args), ({ long _x; bpf_usdt_arg(ctx, 4, &_x); (void *)_x; }) |
| 228 | +#define ___bpf_usdt_args6(x, args...) ___bpf_usdt_args5(args), ({ long _x; bpf_usdt_arg(ctx, 5, &_x); (void *)_x; }) |
| 229 | +#define ___bpf_usdt_args7(x, args...) ___bpf_usdt_args6(args), ({ long _x; bpf_usdt_arg(ctx, 6, &_x); (void *)_x; }) |
| 230 | +#define ___bpf_usdt_args8(x, args...) ___bpf_usdt_args7(args), ({ long _x; bpf_usdt_arg(ctx, 7, &_x); (void *)_x; }) |
| 231 | +#define ___bpf_usdt_args9(x, args...) ___bpf_usdt_args8(args), ({ long _x; bpf_usdt_arg(ctx, 8, &_x); (void *)_x; }) |
| 232 | +#define ___bpf_usdt_args10(x, args...) ___bpf_usdt_args9(args), ({ long _x; bpf_usdt_arg(ctx, 9, &_x); (void *)_x; }) |
| 233 | +#define ___bpf_usdt_args11(x, args...) ___bpf_usdt_args10(args), ({ long _x; bpf_usdt_arg(ctx, 10, &_x); (void *)_x; }) |
| 234 | +#define ___bpf_usdt_args12(x, args...) ___bpf_usdt_args11(args), ({ long _x; bpf_usdt_arg(ctx, 11, &_x); (void *)_x; }) |
| 235 | +#define ___bpf_usdt_args(args...) ___bpf_apply(___bpf_usdt_args, ___bpf_narg(args))(args) |
| 236 | + |
| 237 | +/* |
| 238 | + * BPF_USDT serves the same purpose for USDT handlers as BPF_PROG for |
| 239 | + * tp_btf/fentry/fexit BPF programs and BPF_KPROBE for kprobes. |
| 240 | + * Original struct pt_regs * context is preserved as 'ctx' argument. |
| 241 | + */ |
| 242 | +#define BPF_USDT(name, args...) \ |
| 243 | +name(struct pt_regs *ctx); \ |
| 244 | +static __attribute__((always_inline)) typeof(name(0)) \ |
| 245 | +____##name(struct pt_regs *ctx, ##args); \ |
| 246 | +typeof(name(0)) name(struct pt_regs *ctx) \ |
| 247 | +{ \ |
| 248 | + _Pragma("GCC diagnostic push") \ |
| 249 | + _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ |
| 250 | + return ____##name(___bpf_usdt_args(args)); \ |
| 251 | + _Pragma("GCC diagnostic pop") \ |
| 252 | +} \ |
| 253 | +static __attribute__((always_inline)) typeof(name(0)) \ |
| 254 | +____##name(struct pt_regs *ctx, ##args) |
| 255 | + |
| 256 | +#endif /* __USDT_BPF_H__ */ |
0 commit comments