Skip to content

Commit 67fb744

Browse files
SantoshShilimkarLinuxMinion
authored andcommitted
RDS: Add interface for receive MSG latency trace
Socket option to tap receive path latency. SO_RDS: SO_RDS_MSG_RXPATH_LATENCY with parameter, struct rds_rx_trace_so { u8 rx_traces; u8 rx_trace_pos[RDS_MSG_RX_DGRAM_TRACE_MAX]; } CMSG: RDS_CMSG_RXPATH_LATENCY(recvmsg) Returns rds message latencies in various stages of receive path in nS. Its set per socket using SO_RDS_MSG_RXPATH_LATENCY socket option. Legitimate points are defined in enum rds_message_rxpath_latency. More points can be added in future. CSMG format: struct rds_cmsg_rx_trace { u8 rx_traces; u8 rx_trace_pos[RDS_MSG_RX_DGRAM_TRACE_MAX]; u64 rx_trace[RDS_MSG_RX_DGRAM_TRACE_MAX]; } Receive MSG trace points: RDS message Receive Path Latency points enum rds_message_rxpath_latency { RDS_MSG_RX_HDR_TO_DGRAM_START = 0, RDS_MSG_RX_DGRAM_REASSEMBLE, RDS_MSG_RX_DGRAM_DELIVERED, RDS_MSG_RX_DGRAM_TRACE_MAX } Tested-by: Namrata Jampani <[email protected]> Reviewed-by: Ajaykumar Hotchandani <[email protected]> Reviewed-by: Sowmini Varadhan <[email protected]> Orabug: 22630180 Signed-off-by: Santosh Shilimkar <[email protected]>
1 parent ae712c8 commit 67fb744

File tree

6 files changed

+109
-3
lines changed

6 files changed

+109
-3
lines changed

include/uapi/linux/rds.h

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,12 @@
7575
#define RDS_TRANS_COUNT 3
7676
#define RDS_TRANS_NONE (~0)
7777

78+
/* Socket option to tap receive path latency
79+
* SO_RDS: SO_RDS_MSG_RXPATH_LATENCY
80+
* Format used struct rds_rx_trace_so
81+
*/
82+
#define SO_RDS_MSG_RXPATH_LATENCY 10
83+
7884
/*
7985
* ioctl commands for SOL_RDS
8086
*/
@@ -86,6 +92,25 @@
8692

8793
typedef u_int8_t rds_tos_t;
8894

95+
/* RDS message Receive Path Latency points */
96+
enum rds_message_rxpath_latency {
97+
RDS_MSG_RX_HDR_TO_DGRAM_START = 0,
98+
RDS_MSG_RX_DGRAM_REASSEMBLE,
99+
RDS_MSG_RX_DGRAM_DELIVERED,
100+
RDS_MSG_RX_DGRAM_TRACE_MAX
101+
};
102+
103+
struct rds_rx_trace_so {
104+
u8 rx_traces;
105+
u8 rx_trace_pos[RDS_MSG_RX_DGRAM_TRACE_MAX];
106+
};
107+
108+
struct rds_cmsg_rx_trace {
109+
u8 rx_traces;
110+
u8 rx_trace_pos[RDS_MSG_RX_DGRAM_TRACE_MAX];
111+
u64 rx_trace[RDS_MSG_RX_DGRAM_TRACE_MAX];
112+
};
113+
89114
/*
90115
* Control message types for SOL_RDS.
91116
*
@@ -104,6 +129,12 @@ typedef u_int8_t rds_tos_t;
104129
* the same as for the GET_MR setsockopt.
105130
* RDS_CMSG_RDMA_SEND_STATUS (recvmsg)
106131
* Returns the status of a completed RDMA/async send operation.
132+
* RDS_CMSG_RXPATH_LATENCY(recvmsg)
133+
* Returns rds message latencies in various stages of receive
134+
* path in nS. Its set per socket using SO_RDS_MSG_RXPATH_LATENCY
135+
* socket option. Legitimate points are defined in
136+
* enum rds_message_rxpath_latency. More points can be added in
137+
* future. CSMG format is struct rds_cmsg_rx_trace.
107138
*/
108139
#define RDS_CMSG_RDMA_ARGS 1
109140
#define RDS_CMSG_RDMA_DEST 2
@@ -115,6 +146,7 @@ typedef u_int8_t rds_tos_t;
115146
#define RDS_CMSG_MASKED_ATOMIC_FADD 8
116147
#define RDS_CMSG_MASKED_ATOMIC_CSWP 9
117148
#define RDS_CMSG_ASYNC_SEND 10
149+
#define RDS_CMSG_RXPATH_LATENCY 11
118150

119151
#define RDS_INFO_FIRST 10000
120152
#define RDS_INFO_COUNTERS 10000

net/rds/af_rds.c

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -383,6 +383,31 @@ static int rds_enable_recvtstamp(struct sock *sk, char __user *optval,
383383
return 0;
384384
}
385385

386+
static int rds_recv_track_latency(struct rds_sock *rs, char __user *optval,
387+
int optlen)
388+
{
389+
struct rds_rx_trace_so trace;
390+
int i;
391+
392+
if (optlen != sizeof(struct rds_rx_trace_so))
393+
return -EFAULT;
394+
395+
if (copy_from_user(&trace, (struct rds_rx_trace_so *)optval, sizeof(trace)))
396+
return -EFAULT;
397+
398+
rs->rs_rx_traces = trace.rx_traces;
399+
for (i = 0; i < rs->rs_rx_traces; i++) {
400+
if (trace.rx_trace_pos[i] > RDS_MSG_RX_DGRAM_TRACE_MAX) {
401+
rs->rs_rx_traces = 0;
402+
return -EFAULT;
403+
}
404+
rs->rs_rx_trace[i] = trace.rx_trace_pos[i];
405+
}
406+
407+
return 0;
408+
}
409+
410+
386411
static int rds_setsockopt(struct socket *sock, int level, int optname,
387412
char __user *optval, unsigned int optlen)
388413
{
@@ -426,6 +451,9 @@ static int rds_setsockopt(struct socket *sock, int level, int optname,
426451
ret = rds_enable_recvtstamp(sock->sk, optval, optlen);
427452
release_sock(sock->sk);
428453
break;
454+
case SO_RDS_MSG_RXPATH_LATENCY:
455+
ret = rds_recv_track_latency(rs, optval, optlen);
456+
break;
429457
default:
430458
ret = -ENOPROTOOPT;
431459
}
@@ -576,6 +604,7 @@ static int __rds_create(struct socket *sock, struct sock *sk, int protocol)
576604
rs->rs_tos = 0;
577605
rs->rs_conn = 0;
578606
rs->rs_netfilter_enabled = 0;
607+
rs->rs_rx_traces = 0;
579608

580609
if (rs->rs_bound_addr)
581610
printk(KERN_CRIT "bound addr %x at create\n", rs->rs_bound_addr);

net/rds/ib_recv.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1102,8 +1102,12 @@ static void rds_ib_process_recv(struct rds_connection *conn,
11021102
ic->i_ibinc = ibinc;
11031103

11041104
hdr = &ibinc->ii_inc.i_hdr;
1105+
ibinc->ii_inc.i_rx_lat_trace[RDS_MSG_RX_HDR] =
1106+
local_clock();
11051107
memcpy(hdr, ihdr, sizeof(*hdr));
11061108
ic->i_recv_data_rem = be32_to_cpu(hdr->h_len);
1109+
ibinc->ii_inc.i_rx_lat_trace[RDS_MSG_RX_START] =
1110+
local_clock();
11071111

11081112
rdsdebug("ic %p ibinc %p rem %u flag 0x%x\n", ic, ibinc,
11091113
ic->i_recv_data_rem, hdr->h_flags);

net/rds/rds.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,11 @@ struct rds_ext_header_rdma_bytes {
270270
};
271271

272272
#define __RDS_EXTHDR_MAX 16 /* for now */
273+
#define RDS_RX_MAX_TRACES (RDS_MSG_RX_DGRAM_TRACE_MAX + 1)
274+
#define RDS_MSG_RX_HDR 0
275+
#define RDS_MSG_RX_START 1
276+
#define RDS_MSG_RX_END 2
277+
#define RDS_MSG_RX_CMSG 3
273278

274279
struct rds_incoming {
275280
atomic_t i_refcount;
@@ -285,6 +290,7 @@ struct rds_incoming {
285290

286291
rds_rdma_cookie_t i_rdma_cookie;
287292
struct timeval i_rx_tstamp;
293+
u64 i_rx_lat_trace[RDS_RX_MAX_TRACES];
288294
};
289295

290296
struct rds_mr {
@@ -595,6 +601,10 @@ struct rds_sock {
595601
int rs_netfilter_enabled;
596602

597603
u8 rs_tos;
604+
605+
/* Socket receive path trace points*/
606+
u8 rs_rx_traces;
607+
u8 rs_rx_trace[RDS_MSG_RX_DGRAM_TRACE_MAX];
598608
};
599609

600610
static inline struct rds_sock *rds_sk_to_rs(const struct sock *sk)

net/rds/recv.c

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,8 @@ rds_recv_ok(struct sock *sk, struct sk_buff *skb)
6868
void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn,
6969
__be32 saddr)
7070
{
71+
int i;
72+
7173
atomic_set(&inc->i_refcount, 1);
7274
INIT_LIST_HEAD(&inc->i_item);
7375
inc->i_conn = conn;
@@ -77,6 +79,9 @@ void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn,
7779
inc->i_skb = NULL;
7880
inc->i_rx_tstamp.tv_sec = 0;
7981
inc->i_rx_tstamp.tv_usec = 0;
82+
83+
for (i = 0; i < RDS_RX_MAX_TRACES; i++)
84+
inc->i_rx_lat_trace[i] = 0;
8085
}
8186
EXPORT_SYMBOL_GPL(rds_inc_init);
8287

@@ -554,6 +559,7 @@ rds_recv_local(struct rds_connection *conn, __be32 saddr, __be32 daddr,
554559
do_gettimeofday(&inc->i_rx_tstamp);
555560
rds_inc_addref(inc);
556561
list_add_tail(&inc->i_item, &rs->rs_recv_queue);
562+
inc->i_rx_lat_trace[RDS_MSG_RX_END] = local_clock();
557563
__rds_wake_sk_sleep(sk);
558564
}
559565
} else {
@@ -726,7 +732,7 @@ static int rds_cmsg_recv(struct rds_incoming *inc, struct msghdr *msg,
726732
ret = put_cmsg(msg, SOL_RDS, RDS_CMSG_RDMA_DEST,
727733
sizeof(inc->i_rdma_cookie), &inc->i_rdma_cookie);
728734
if (ret)
729-
return ret;
735+
goto out;
730736
}
731737

732738
if ((inc->i_rx_tstamp.tv_sec != 0) &&
@@ -735,10 +741,30 @@ static int rds_cmsg_recv(struct rds_incoming *inc, struct msghdr *msg,
735741
sizeof(struct timeval),
736742
&inc->i_rx_tstamp);
737743
if (ret)
738-
return ret;
744+
goto out;
739745
}
740746

741-
return 0;
747+
if (rs->rs_rx_traces) {
748+
struct rds_cmsg_rx_trace t;
749+
int i, j;
750+
751+
inc->i_rx_lat_trace[RDS_MSG_RX_CMSG] = local_clock();
752+
t.rx_traces = rs->rs_rx_traces;
753+
for (i = 0; i < rs->rs_rx_traces; i++) {
754+
j = rs->rs_rx_trace[i];
755+
t.rx_trace_pos[j] = j;
756+
t.rx_trace[j] = inc->i_rx_lat_trace[j + 1] -
757+
inc->i_rx_lat_trace[j];
758+
}
759+
760+
ret = put_cmsg(msg, SOL_RDS, RDS_CMSG_RXPATH_LATENCY,
761+
sizeof(t), &t);
762+
if (ret)
763+
goto out;
764+
}
765+
766+
out:
767+
return ret;
742768
}
743769

744770
int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,

net/rds/tcp_recv.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,9 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb,
178178
tc->t_tinc = tinc;
179179
rdsdebug("alloced tinc %p\n", tinc);
180180
rds_inc_init(&tinc->ti_inc, conn, conn->c_faddr);
181+
tinc->ti_inc.i_rx_lat_trace[RDS_MSG_RX_HDR] =
182+
local_clock();
183+
181184
/*
182185
* XXX * we might be able to use the __ variants when
183186
* we've already serialized at a higher level.
@@ -202,6 +205,8 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb,
202205
/* could be 0 for a 0 len message */
203206
tc->t_tinc_data_rem =
204207
be32_to_cpu(tinc->ti_inc.i_hdr.h_len);
208+
tinc->ti_inc.i_rx_lat_trace[RDS_MSG_RX_START] =
209+
local_clock();
205210
}
206211
}
207212

0 commit comments

Comments
 (0)