Skip to content

Commit 0181ce3

Browse files
Don Hiattdledford
authored andcommitted
IB/hfi1: Add receive fault injection feature
Add fault injection capability: - Drop packets unconditionally (fault_by_packet) - Drop packets based on opcode (fault_by_opcode) This feature reacts to the global FAULT_INJECTION config flag. The faulting traces have been added: - misc/fault_opcode - misc/fault_packet See 'Documentation/fault-injection/fault-injection.txt' for details. Examples: - Dropping packets by opcode: /sys/kernel/debug/hfi1/hfi1_X/fault_opcode # Enable fault echo Y > fault_by_opcode # Setprobability of dropping (0-100%) # echo 25 > probability # Set opcode echo 0x64 > opcode # Number of times to fault echo 3 > times # An optional mask allows you to fault # a range of opcodes echo 0xf0 > mask /sys/kernel/debug/hfi1/hfi1_X/fault_stats contains a value in parentheses to indicate number of each opcode dropped. - Dropping packets unconditionally /sys/kernel/debug/hfi1/hfi1_X/fault_packet # Enable fault echo Y > fault_by_packet /sys/kernel/debug/hfi1/hfi1_X/fault_packet/fault_stats contains the number of packets dropped. Reviewed-by: Dennis Dalessandro <[email protected]> Signed-off-by: Mike Marciniszyn <[email protected]> Signed-off-by: Don Hiatt <[email protected]> Signed-off-by: Dennis Dalessandro <[email protected]> Signed-off-by: Doug Ledford <[email protected]>
1 parent f7b4263 commit 0181ce3

File tree

6 files changed

+336
-3
lines changed

6 files changed

+336
-3
lines changed

drivers/infiniband/hw/hfi1/debugfs.c

Lines changed: 222 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,12 @@
5151
#include <linux/export.h>
5252
#include <linux/module.h>
5353
#include <linux/string.h>
54+
#include <linux/types.h>
55+
#include <linux/ratelimit.h>
56+
#include <linux/fault-inject.h>
5457

5558
#include "hfi.h"
59+
#include "trace.h"
5660
#include "debugfs.h"
5761
#include "device.h"
5862
#include "qp.h"
@@ -1063,6 +1067,217 @@ DEBUGFS_SEQ_FILE_OPS(sdma_cpu_list);
10631067
DEBUGFS_SEQ_FILE_OPEN(sdma_cpu_list)
10641068
DEBUGFS_FILE_OPS(sdma_cpu_list);
10651069

1070+
#ifdef CONFIG_FAULT_INJECTION
1071+
static void *_fault_stats_seq_start(struct seq_file *s, loff_t *pos)
1072+
{
1073+
struct hfi1_opcode_stats_perctx *opstats;
1074+
1075+
if (*pos >= ARRAY_SIZE(opstats->stats))
1076+
return NULL;
1077+
return pos;
1078+
}
1079+
1080+
static void *_fault_stats_seq_next(struct seq_file *s, void *v, loff_t *pos)
1081+
{
1082+
struct hfi1_opcode_stats_perctx *opstats;
1083+
1084+
++*pos;
1085+
if (*pos >= ARRAY_SIZE(opstats->stats))
1086+
return NULL;
1087+
return pos;
1088+
}
1089+
1090+
static void _fault_stats_seq_stop(struct seq_file *s, void *v)
1091+
{
1092+
}
1093+
1094+
static int _fault_stats_seq_show(struct seq_file *s, void *v)
1095+
{
1096+
loff_t *spos = v;
1097+
loff_t i = *spos, j;
1098+
u64 n_packets = 0, n_bytes = 0;
1099+
struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private;
1100+
struct hfi1_devdata *dd = dd_from_dev(ibd);
1101+
1102+
for (j = 0; j < dd->first_user_ctxt; j++) {
1103+
if (!dd->rcd[j])
1104+
continue;
1105+
n_packets += dd->rcd[j]->opstats->stats[i].n_packets;
1106+
n_bytes += dd->rcd[j]->opstats->stats[i].n_bytes;
1107+
}
1108+
if (!n_packets && !n_bytes)
1109+
return SEQ_SKIP;
1110+
if (!ibd->fault_opcode->n_rxfaults[i] &&
1111+
!ibd->fault_opcode->n_txfaults[i])
1112+
return SEQ_SKIP;
1113+
seq_printf(s, "%02llx %llu/%llu (faults rx:%llu faults: tx:%llu)\n", i,
1114+
(unsigned long long)n_packets,
1115+
(unsigned long long)n_bytes,
1116+
(unsigned long long)ibd->fault_opcode->n_rxfaults[i],
1117+
(unsigned long long)ibd->fault_opcode->n_txfaults[i]);
1118+
return 0;
1119+
}
1120+
1121+
DEBUGFS_SEQ_FILE_OPS(fault_stats);
1122+
DEBUGFS_SEQ_FILE_OPEN(fault_stats);
1123+
DEBUGFS_FILE_OPS(fault_stats);
1124+
1125+
static void fault_exit_opcode_debugfs(struct hfi1_ibdev *ibd)
1126+
{
1127+
debugfs_remove_recursive(ibd->fault_opcode->dir);
1128+
kfree(ibd->fault_opcode);
1129+
ibd->fault_opcode = NULL;
1130+
}
1131+
1132+
static int fault_init_opcode_debugfs(struct hfi1_ibdev *ibd)
1133+
{
1134+
struct dentry *parent = ibd->hfi1_ibdev_dbg;
1135+
1136+
ibd->fault_opcode = kzalloc(sizeof(*ibd->fault_opcode), GFP_KERNEL);
1137+
if (!ibd->fault_opcode)
1138+
return -ENOMEM;
1139+
1140+
ibd->fault_opcode->attr.interval = 1;
1141+
ibd->fault_opcode->attr.require_end = ULONG_MAX;
1142+
ibd->fault_opcode->attr.stacktrace_depth = 32;
1143+
ibd->fault_opcode->attr.dname = NULL;
1144+
ibd->fault_opcode->attr.verbose = 0;
1145+
ibd->fault_opcode->fault_by_opcode = false;
1146+
ibd->fault_opcode->opcode = 0;
1147+
ibd->fault_opcode->mask = 0xff;
1148+
1149+
ibd->fault_opcode->dir =
1150+
fault_create_debugfs_attr("fault_opcode",
1151+
parent,
1152+
&ibd->fault_opcode->attr);
1153+
if (IS_ERR(ibd->fault_opcode->dir)) {
1154+
kfree(ibd->fault_opcode);
1155+
return -ENOENT;
1156+
}
1157+
1158+
DEBUGFS_SEQ_FILE_CREATE(fault_stats, ibd->fault_opcode->dir, ibd);
1159+
if (!debugfs_create_bool("fault_by_opcode", 0600,
1160+
ibd->fault_opcode->dir,
1161+
&ibd->fault_opcode->fault_by_opcode))
1162+
goto fail;
1163+
if (!debugfs_create_x8("opcode", 0600, ibd->fault_opcode->dir,
1164+
&ibd->fault_opcode->opcode))
1165+
goto fail;
1166+
if (!debugfs_create_x8("mask", 0600, ibd->fault_opcode->dir,
1167+
&ibd->fault_opcode->mask))
1168+
goto fail;
1169+
1170+
return 0;
1171+
fail:
1172+
fault_exit_opcode_debugfs(ibd);
1173+
return -ENOMEM;
1174+
}
1175+
1176+
static void fault_exit_packet_debugfs(struct hfi1_ibdev *ibd)
1177+
{
1178+
debugfs_remove_recursive(ibd->fault_packet->dir);
1179+
kfree(ibd->fault_packet);
1180+
ibd->fault_packet = NULL;
1181+
}
1182+
1183+
static int fault_init_packet_debugfs(struct hfi1_ibdev *ibd)
1184+
{
1185+
struct dentry *parent = ibd->hfi1_ibdev_dbg;
1186+
1187+
ibd->fault_packet = kzalloc(sizeof(*ibd->fault_packet), GFP_KERNEL);
1188+
if (!ibd->fault_packet)
1189+
return -ENOMEM;
1190+
1191+
ibd->fault_packet->attr.interval = 1;
1192+
ibd->fault_packet->attr.require_end = ULONG_MAX;
1193+
ibd->fault_packet->attr.stacktrace_depth = 32;
1194+
ibd->fault_packet->attr.dname = NULL;
1195+
ibd->fault_packet->attr.verbose = 0;
1196+
ibd->fault_packet->fault_by_packet = false;
1197+
1198+
ibd->fault_packet->dir =
1199+
fault_create_debugfs_attr("fault_packet",
1200+
parent,
1201+
&ibd->fault_opcode->attr);
1202+
if (IS_ERR(ibd->fault_packet->dir)) {
1203+
kfree(ibd->fault_packet);
1204+
return -ENOENT;
1205+
}
1206+
1207+
if (!debugfs_create_bool("fault_by_packet", 0600,
1208+
ibd->fault_packet->dir,
1209+
&ibd->fault_packet->fault_by_packet))
1210+
goto fail;
1211+
if (!debugfs_create_u64("fault_stats", 0400,
1212+
ibd->fault_packet->dir,
1213+
&ibd->fault_packet->n_faults))
1214+
goto fail;
1215+
1216+
return 0;
1217+
fail:
1218+
fault_exit_packet_debugfs(ibd);
1219+
return -ENOMEM;
1220+
}
1221+
1222+
static void fault_exit_debugfs(struct hfi1_ibdev *ibd)
1223+
{
1224+
fault_exit_opcode_debugfs(ibd);
1225+
fault_exit_packet_debugfs(ibd);
1226+
}
1227+
1228+
static int fault_init_debugfs(struct hfi1_ibdev *ibd)
1229+
{
1230+
int ret = 0;
1231+
1232+
ret = fault_init_opcode_debugfs(ibd);
1233+
if (ret)
1234+
return ret;
1235+
1236+
ret = fault_init_packet_debugfs(ibd);
1237+
if (ret)
1238+
fault_exit_opcode_debugfs(ibd);
1239+
1240+
return ret;
1241+
}
1242+
1243+
bool hfi1_dbg_fault_opcode(struct rvt_qp *qp, u32 opcode, bool rx)
1244+
{
1245+
bool ret = false;
1246+
struct hfi1_ibdev *ibd = to_idev(qp->ibqp.device);
1247+
1248+
if (!ibd->fault_opcode || !ibd->fault_opcode->fault_by_opcode)
1249+
return false;
1250+
if (ibd->fault_opcode->opcode != (opcode & ibd->fault_opcode->mask))
1251+
return false;
1252+
ret = should_fail(&ibd->fault_opcode->attr, 1);
1253+
if (ret) {
1254+
trace_hfi1_fault_opcode(qp, opcode);
1255+
if (rx)
1256+
ibd->fault_opcode->n_rxfaults[opcode]++;
1257+
else
1258+
ibd->fault_opcode->n_txfaults[opcode]++;
1259+
}
1260+
return ret;
1261+
}
1262+
1263+
bool hfi1_dbg_fault_packet(struct hfi1_packet *packet)
1264+
{
1265+
struct rvt_dev_info *rdi = &packet->rcd->ppd->dd->verbs_dev.rdi;
1266+
struct hfi1_ibdev *ibd = dev_from_rdi(rdi);
1267+
bool ret = false;
1268+
1269+
if (!ibd->fault_packet || !ibd->fault_packet->fault_by_packet)
1270+
return false;
1271+
1272+
ret = should_fail(&ibd->fault_packet->attr, 1);
1273+
if (ret) {
1274+
++ibd->fault_packet->n_faults;
1275+
trace_hfi1_fault_packet(packet);
1276+
}
1277+
return ret;
1278+
}
1279+
#endif
1280+
10661281
void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd)
10671282
{
10681283
char name[sizeof("port0counters") + 1];
@@ -1112,12 +1327,19 @@ void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd)
11121327
!port_cntr_ops[i].ops.write ?
11131328
S_IRUGO : S_IRUGO | S_IWUSR);
11141329
}
1330+
1331+
#ifdef CONFIG_FAULT_INJECTION
1332+
fault_init_debugfs(ibd);
1333+
#endif
11151334
}
11161335

11171336
void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd)
11181337
{
11191338
if (!hfi1_dbg_root)
11201339
goto out;
1340+
#ifdef CONFIG_FAULT_INJECTION
1341+
fault_exit_debugfs(ibd);
1342+
#endif
11211343
debugfs_remove(ibd->hfi1_ibdev_link);
11221344
debugfs_remove_recursive(ibd->hfi1_ibdev_dbg);
11231345
out:

drivers/infiniband/hw/hfi1/debugfs.h

Lines changed: 48 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -53,23 +53,68 @@ void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd);
5353
void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd);
5454
void hfi1_dbg_init(void);
5555
void hfi1_dbg_exit(void);
56+
57+
#ifdef CONFIG_FAULT_INJECTION
58+
#include <linux/fault-inject.h>
59+
struct fault_opcode {
60+
struct fault_attr attr;
61+
struct dentry *dir;
62+
bool fault_by_opcode;
63+
u64 n_rxfaults[256];
64+
u64 n_txfaults[256];
65+
u8 opcode;
66+
u8 mask;
67+
};
68+
69+
struct fault_packet {
70+
struct fault_attr attr;
71+
struct dentry *dir;
72+
bool fault_by_packet;
73+
u64 n_faults;
74+
};
75+
76+
bool hfi1_dbg_fault_opcode(struct rvt_qp *qp, u32 opcode, bool rx);
77+
bool hfi1_dbg_fault_packet(struct hfi1_packet *packet);
78+
#else
79+
static inline bool hfi1_dbg_fault_packet(struct hfi1_packet *packet)
80+
{
81+
return false;
82+
}
83+
84+
static inline bool hfi1_dbg_fault_opcode(struct rvt_qp *qp,
85+
u32 opcode, bool rx)
86+
{
87+
return false;
88+
}
89+
#endif
90+
5691
#else
5792
static inline void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd)
5893
{
5994
}
6095

61-
void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd)
96+
static inline void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd)
97+
{
98+
}
99+
100+
static inline void hfi1_dbg_init(void)
62101
{
63102
}
64103

65-
void hfi1_dbg_init(void)
104+
static inline void hfi1_dbg_exit(void)
66105
{
67106
}
68107

69-
void hfi1_dbg_exit(void)
108+
static inline bool hfi1_dbg_fault_packet(struct hfi1_packet *packet)
70109
{
110+
return false;
71111
}
72112

113+
static inline bool hfi1_dbg_fault_opcode(struct rvt_qp *qp,
114+
u32 opcode, bool rx)
115+
{
116+
return false;
117+
}
73118
#endif
74119

75120
#endif /* _HFI1_DEBUGFS_H */

drivers/infiniband/hw/hfi1/driver.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@
5959
#include "trace.h"
6060
#include "qp.h"
6161
#include "sdma.h"
62+
#include "debugfs.h"
6263

6364
#undef pr_fmt
6465
#define pr_fmt(fmt) DRIVER_NAME ": " fmt
@@ -1354,6 +1355,9 @@ void handle_eflags(struct hfi1_packet *packet)
13541355
*/
13551356
int process_receive_ib(struct hfi1_packet *packet)
13561357
{
1358+
if (unlikely(hfi1_dbg_fault_packet(packet)))
1359+
return RHF_RCV_CONTINUE;
1360+
13571361
trace_hfi1_rcvhdr(packet->rcd->ppd->dd,
13581362
packet->rcd->ctxt,
13591363
rhf_err_flags(packet->rhf),
@@ -1409,6 +1413,8 @@ int process_receive_error(struct hfi1_packet *packet)
14091413

14101414
int kdeth_process_expected(struct hfi1_packet *packet)
14111415
{
1416+
if (unlikely(hfi1_dbg_fault_packet(packet)))
1417+
return RHF_RCV_CONTINUE;
14121418
if (unlikely(rhf_err_flags(packet->rhf)))
14131419
handle_eflags(packet);
14141420

@@ -1421,6 +1427,8 @@ int kdeth_process_eager(struct hfi1_packet *packet)
14211427
{
14221428
if (unlikely(rhf_err_flags(packet->rhf)))
14231429
handle_eflags(packet);
1430+
if (unlikely(hfi1_dbg_fault_packet(packet)))
1431+
return RHF_RCV_CONTINUE;
14241432

14251433
dd_dev_err(packet->rcd->dd,
14261434
"Unhandled eager packet received. Dropping.\n");

0 commit comments

Comments
 (0)