Skip to content

Commit 6246db8

Browse files
SantoshShilimkargerd-rausch
authored andcommitted
RDS: Add interface for receive MSG latency trace
Socket option to tap receive path latency. SO_RDS: SO_RDS_MSG_RXPATH_LATENCY with parameter, struct rds_rx_trace_so { u8 rx_traces; u8 rx_trace_pos[RDS_MSG_RX_DGRAM_TRACE_MAX]; } CMSG: RDS_CMSG_RXPATH_LATENCY(recvmsg) Returns rds message latencies in various stages of receive path in nS. Its set per socket using SO_RDS_MSG_RXPATH_LATENCY socket option. Legitimate points are defined in enum rds_message_rxpath_latency. More points can be added in future. CSMG format: struct rds_cmsg_rx_trace { u8 rx_traces; u8 rx_trace_pos[RDS_MSG_RX_DGRAM_TRACE_MAX]; u64 rx_trace[RDS_MSG_RX_DGRAM_TRACE_MAX]; } Receive MSG trace points: RDS message Receive Path Latency points enum rds_message_rxpath_latency { RDS_MSG_RX_HDR_TO_DGRAM_START = 0, RDS_MSG_RX_DGRAM_REASSEMBLE, RDS_MSG_RX_DGRAM_DELIVERED, RDS_MSG_RX_DGRAM_TRACE_MAX } Orabug: 22630180 Signed-off-by: Santosh Shilimkar <[email protected]> Reviewed-by: Ajaykumar Hotchandani <[email protected]> Reviewed-by: Sowmini Varadhan <[email protected]> Tested-by: Namrata Jampani <[email protected]> Orabug: 27364391 (cherry picked from commit 67fb744) cherry-pick-repo=linux-uek.git Signed-off-by: Gerd Rausch <[email protected]> Signed-off-by: Somasundaram Krishnasamy <[email protected]>
1 parent ef72b08 commit 6246db8

File tree

6 files changed

+109
-3
lines changed

6 files changed

+109
-3
lines changed

include/uapi/linux/rds.h

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,12 @@
7575
#define RDS_TRANS_COUNT 3
7676
#define RDS_TRANS_NONE (~0)
7777

78+
/* Socket option to tap receive path latency
79+
* SO_RDS: SO_RDS_MSG_RXPATH_LATENCY
80+
* Format used struct rds_rx_trace_so
81+
*/
82+
#define SO_RDS_MSG_RXPATH_LATENCY 10
83+
7884
/*
7985
* ioctl commands for SOL_RDS
8086
*/
@@ -86,6 +92,25 @@
8692

8793
typedef u_int8_t rds_tos_t;
8894

95+
/* RDS message Receive Path Latency points */
96+
enum rds_message_rxpath_latency {
97+
RDS_MSG_RX_HDR_TO_DGRAM_START = 0,
98+
RDS_MSG_RX_DGRAM_REASSEMBLE,
99+
RDS_MSG_RX_DGRAM_DELIVERED,
100+
RDS_MSG_RX_DGRAM_TRACE_MAX
101+
};
102+
103+
struct rds_rx_trace_so {
104+
u8 rx_traces;
105+
u8 rx_trace_pos[RDS_MSG_RX_DGRAM_TRACE_MAX];
106+
};
107+
108+
struct rds_cmsg_rx_trace {
109+
u8 rx_traces;
110+
u8 rx_trace_pos[RDS_MSG_RX_DGRAM_TRACE_MAX];
111+
u64 rx_trace[RDS_MSG_RX_DGRAM_TRACE_MAX];
112+
};
113+
89114
/*
90115
* Control message types for SOL_RDS.
91116
*
@@ -104,6 +129,12 @@ typedef u_int8_t rds_tos_t;
104129
* the same as for the GET_MR setsockopt.
105130
* RDS_CMSG_RDMA_SEND_STATUS (recvmsg)
106131
* Returns the status of a completed RDMA/async send operation.
132+
* RDS_CMSG_RXPATH_LATENCY(recvmsg)
133+
* Returns rds message latencies in various stages of receive
134+
* path in nS. Its set per socket using SO_RDS_MSG_RXPATH_LATENCY
135+
* socket option. Legitimate points are defined in
136+
* enum rds_message_rxpath_latency. More points can be added in
137+
* future. CSMG format is struct rds_cmsg_rx_trace.
107138
*/
108139
#define RDS_CMSG_RDMA_ARGS 1
109140
#define RDS_CMSG_RDMA_DEST 2
@@ -115,6 +146,7 @@ typedef u_int8_t rds_tos_t;
115146
#define RDS_CMSG_MASKED_ATOMIC_FADD 8
116147
#define RDS_CMSG_MASKED_ATOMIC_CSWP 9
117148
#define RDS_CMSG_ASYNC_SEND 10
149+
#define RDS_CMSG_RXPATH_LATENCY 11
118150

119151
#define RDS_INFO_FIRST 10000
120152
#define RDS_INFO_COUNTERS 10000

net/rds/af_rds.c

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -383,6 +383,31 @@ static int rds_enable_recvtstamp(struct sock *sk, char __user *optval,
383383
return 0;
384384
}
385385

386+
static int rds_recv_track_latency(struct rds_sock *rs, char __user *optval,
387+
int optlen)
388+
{
389+
struct rds_rx_trace_so trace;
390+
int i;
391+
392+
if (optlen != sizeof(struct rds_rx_trace_so))
393+
return -EFAULT;
394+
395+
if (copy_from_user(&trace, (struct rds_rx_trace_so *)optval, sizeof(trace)))
396+
return -EFAULT;
397+
398+
rs->rs_rx_traces = trace.rx_traces;
399+
for (i = 0; i < rs->rs_rx_traces; i++) {
400+
if (trace.rx_trace_pos[i] > RDS_MSG_RX_DGRAM_TRACE_MAX) {
401+
rs->rs_rx_traces = 0;
402+
return -EFAULT;
403+
}
404+
rs->rs_rx_trace[i] = trace.rx_trace_pos[i];
405+
}
406+
407+
return 0;
408+
}
409+
410+
386411
static int rds_setsockopt(struct socket *sock, int level, int optname,
387412
char __user *optval, unsigned int optlen)
388413
{
@@ -426,6 +451,9 @@ static int rds_setsockopt(struct socket *sock, int level, int optname,
426451
ret = rds_enable_recvtstamp(sock->sk, optval, optlen);
427452
release_sock(sock->sk);
428453
break;
454+
case SO_RDS_MSG_RXPATH_LATENCY:
455+
ret = rds_recv_track_latency(rs, optval, optlen);
456+
break;
429457
default:
430458
ret = -ENOPROTOOPT;
431459
}
@@ -576,6 +604,7 @@ static int __rds_create(struct socket *sock, struct sock *sk, int protocol)
576604
rs->rs_tos = 0;
577605
rs->rs_conn = 0;
578606
rs->rs_netfilter_enabled = 0;
607+
rs->rs_rx_traces = 0;
579608

580609
if (rs->rs_bound_addr)
581610
printk(KERN_CRIT "bound addr %x at create\n", rs->rs_bound_addr);

net/rds/ib_recv.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1097,8 +1097,12 @@ static void rds_ib_process_recv(struct rds_connection *conn,
10971097
ic->i_ibinc = ibinc;
10981098

10991099
hdr = &ibinc->ii_inc.i_hdr;
1100+
ibinc->ii_inc.i_rx_lat_trace[RDS_MSG_RX_HDR] =
1101+
local_clock();
11001102
memcpy(hdr, ihdr, sizeof(*hdr));
11011103
ic->i_recv_data_rem = be32_to_cpu(hdr->h_len);
1104+
ibinc->ii_inc.i_rx_lat_trace[RDS_MSG_RX_START] =
1105+
local_clock();
11021106

11031107
rdsdebug("ic %p ibinc %p rem %u flag 0x%x\n", ic, ibinc,
11041108
ic->i_recv_data_rem, hdr->h_flags);

net/rds/rds.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,11 @@ struct rds_ext_header_rdma_bytes {
271271
};
272272

273273
#define __RDS_EXTHDR_MAX 16 /* for now */
274+
#define RDS_RX_MAX_TRACES (RDS_MSG_RX_DGRAM_TRACE_MAX + 1)
275+
#define RDS_MSG_RX_HDR 0
276+
#define RDS_MSG_RX_START 1
277+
#define RDS_MSG_RX_END 2
278+
#define RDS_MSG_RX_CMSG 3
274279

275280
struct rds_incoming {
276281
atomic_t i_refcount;
@@ -286,6 +291,7 @@ struct rds_incoming {
286291

287292
rds_rdma_cookie_t i_rdma_cookie;
288293
struct timeval i_rx_tstamp;
294+
u64 i_rx_lat_trace[RDS_RX_MAX_TRACES];
289295
};
290296

291297
struct rds_mr {
@@ -596,6 +602,10 @@ struct rds_sock {
596602
int rs_netfilter_enabled;
597603

598604
u8 rs_tos;
605+
606+
/* Socket receive path trace points*/
607+
u8 rs_rx_traces;
608+
u8 rs_rx_trace[RDS_MSG_RX_DGRAM_TRACE_MAX];
599609
};
600610

601611
static inline struct rds_sock *rds_sk_to_rs(const struct sock *sk)

net/rds/recv.c

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,8 @@ rds_recv_ok(struct net *net, struct sock *sk, struct sk_buff *skb)
6868
void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn,
6969
__be32 saddr)
7070
{
71+
int i;
72+
7173
atomic_set(&inc->i_refcount, 1);
7274
INIT_LIST_HEAD(&inc->i_item);
7375
inc->i_conn = conn;
@@ -77,6 +79,9 @@ void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn,
7779
inc->i_skb = NULL;
7880
inc->i_rx_tstamp.tv_sec = 0;
7981
inc->i_rx_tstamp.tv_usec = 0;
82+
83+
for (i = 0; i < RDS_RX_MAX_TRACES; i++)
84+
inc->i_rx_lat_trace[i] = 0;
8085
}
8186
EXPORT_SYMBOL_GPL(rds_inc_init);
8287

@@ -558,6 +563,7 @@ rds_recv_local(struct rds_connection *conn, __be32 saddr, __be32 daddr,
558563
do_gettimeofday(&inc->i_rx_tstamp);
559564
rds_inc_addref(inc);
560565
list_add_tail(&inc->i_item, &rs->rs_recv_queue);
566+
inc->i_rx_lat_trace[RDS_MSG_RX_END] = local_clock();
561567
__rds_wake_sk_sleep(sk);
562568
}
563569
} else {
@@ -730,7 +736,7 @@ static int rds_cmsg_recv(struct rds_incoming *inc, struct msghdr *msg,
730736
ret = put_cmsg(msg, SOL_RDS, RDS_CMSG_RDMA_DEST,
731737
sizeof(inc->i_rdma_cookie), &inc->i_rdma_cookie);
732738
if (ret)
733-
return ret;
739+
goto out;
734740
}
735741

736742
if ((inc->i_rx_tstamp.tv_sec != 0) &&
@@ -739,10 +745,30 @@ static int rds_cmsg_recv(struct rds_incoming *inc, struct msghdr *msg,
739745
sizeof(struct timeval),
740746
&inc->i_rx_tstamp);
741747
if (ret)
742-
return ret;
748+
goto out;
743749
}
744750

745-
return 0;
751+
if (rs->rs_rx_traces) {
752+
struct rds_cmsg_rx_trace t;
753+
int i, j;
754+
755+
inc->i_rx_lat_trace[RDS_MSG_RX_CMSG] = local_clock();
756+
t.rx_traces = rs->rs_rx_traces;
757+
for (i = 0; i < rs->rs_rx_traces; i++) {
758+
j = rs->rs_rx_trace[i];
759+
t.rx_trace_pos[j] = j;
760+
t.rx_trace[j] = inc->i_rx_lat_trace[j + 1] -
761+
inc->i_rx_lat_trace[j];
762+
}
763+
764+
ret = put_cmsg(msg, SOL_RDS, RDS_CMSG_RXPATH_LATENCY,
765+
sizeof(t), &t);
766+
if (ret)
767+
goto out;
768+
}
769+
770+
out:
771+
return ret;
746772
}
747773

748774
int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,

net/rds/tcp_recv.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,9 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb,
180180
tc->t_tinc = tinc;
181181
rdsdebug("alloced tinc %p\n", tinc);
182182
rds_inc_init(&tinc->ti_inc, conn, conn->c_faddr);
183+
tinc->ti_inc.i_rx_lat_trace[RDS_MSG_RX_HDR] =
184+
local_clock();
185+
183186
/*
184187
* XXX * we might be able to use the __ variants when
185188
* we've already serialized at a higher level.
@@ -204,6 +207,8 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb,
204207
/* could be 0 for a 0 len message */
205208
tc->t_tinc_data_rem =
206209
be32_to_cpu(tinc->ti_inc.i_hdr.h_len);
210+
tinc->ti_inc.i_rx_lat_trace[RDS_MSG_RX_START] =
211+
local_clock();
207212
}
208213
}
209214

0 commit comments

Comments
 (0)