Blob Blame History Raw
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 7 May 2018 15:27:11 -0400
Subject: svcrdma: Trace key RPC/RDMA protocol events
Patch-mainline: v4.18-rc1
Git-commit: 98895edbe377e990e61817d00ab029c7b8b99f21
References: bsc#1103992 FATE#326009

This includes:
  * Transport accept and tear-down
  * Decisions about using Write and Reply chunks
  * Each RDMA segment that is handled
  * Whenever an RDMA_ERR is sent

As a clean-up, I've standardized the order of the includes, and
removed some now redundant dprintk call sites.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Acked-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
---
 include/trace/events/rpcrdma.h           |  262 ++++++++++++++++++++++++++++++-
 net/sunrpc/xprtrdma/svc_rdma_recvfrom.c  |   36 +---
 net/sunrpc/xprtrdma/svc_rdma_rw.c        |   23 +-
 net/sunrpc/xprtrdma/svc_rdma_sendto.c    |   19 --
 net/sunrpc/xprtrdma/svc_rdma_transport.c |   19 +-
 5 files changed, 311 insertions(+), 48 deletions(-)

--- a/include/trace/events/rpcrdma.h
+++ b/include/trace/events/rpcrdma.h
@@ -1,6 +1,8 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 /*
- * Copyright (c) 2017 Oracle.  All rights reserved.
+ * Copyright (c) 2017, 2018 Oracle.  All rights reserved.
+ *
+ * Trace point definitions for the "rpcrdma" subsystem.
  */
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM rpcrdma
@@ -885,6 +887,264 @@ TRACE_EVENT(xprtrdma_cb_setup,
 DEFINE_CB_EVENT(xprtrdma_cb_call);
 DEFINE_CB_EVENT(xprtrdma_cb_reply);
 
+/**
+ ** Server-side RPC/RDMA events
+ **/
+
+DECLARE_EVENT_CLASS(svcrdma_xprt_event,
+	TP_PROTO(
+		const struct svc_xprt *xprt
+	),
+
+	TP_ARGS(xprt),
+
+	TP_STRUCT__entry(
+		__field(const void *, xprt)
+		__string(addr, xprt->xpt_remotebuf)
+	),
+
+	TP_fast_assign(
+		__entry->xprt = xprt;
+		__assign_str(addr, xprt->xpt_remotebuf);
+	),
+
+	TP_printk("xprt=%p addr=%s",
+		__entry->xprt, __get_str(addr)
+	)
+);
+
+#define DEFINE_XPRT_EVENT(name)						\
+		DEFINE_EVENT(svcrdma_xprt_event, svcrdma_xprt_##name,	\
+				TP_PROTO(				\
+					const struct svc_xprt *xprt	\
+				),					\
+				TP_ARGS(xprt))
+
+DEFINE_XPRT_EVENT(accept);
+DEFINE_XPRT_EVENT(fail);
+DEFINE_XPRT_EVENT(free);
+
+TRACE_DEFINE_ENUM(RDMA_MSG);
+TRACE_DEFINE_ENUM(RDMA_NOMSG);
+TRACE_DEFINE_ENUM(RDMA_MSGP);
+TRACE_DEFINE_ENUM(RDMA_DONE);
+TRACE_DEFINE_ENUM(RDMA_ERROR);
+
+#define show_rpcrdma_proc(x)						\
+		__print_symbolic(x,					\
+				{ RDMA_MSG, "RDMA_MSG" },		\
+				{ RDMA_NOMSG, "RDMA_NOMSG" },		\
+				{ RDMA_MSGP, "RDMA_MSGP" },		\
+				{ RDMA_DONE, "RDMA_DONE" },		\
+				{ RDMA_ERROR, "RDMA_ERROR" })
+
+TRACE_EVENT(svcrdma_decode_rqst,
+	TP_PROTO(
+		__be32 *p,
+		unsigned int hdrlen
+	),
+
+	TP_ARGS(p, hdrlen),
+
+	TP_STRUCT__entry(
+		__field(u32, xid)
+		__field(u32, vers)
+		__field(u32, proc)
+		__field(u32, credits)
+		__field(unsigned int, hdrlen)
+	),
+
+	TP_fast_assign(
+		__entry->xid = be32_to_cpup(p++);
+		__entry->vers = be32_to_cpup(p++);
+		__entry->credits = be32_to_cpup(p++);
+		__entry->proc = be32_to_cpup(p);
+		__entry->hdrlen = hdrlen;
+	),
+
+	TP_printk("xid=0x%08x vers=%u credits=%u proc=%s hdrlen=%u",
+		__entry->xid, __entry->vers, __entry->credits,
+		show_rpcrdma_proc(__entry->proc), __entry->hdrlen)
+);
+
+TRACE_EVENT(svcrdma_decode_short,
+	TP_PROTO(
+		unsigned int hdrlen
+	),
+
+	TP_ARGS(hdrlen),
+
+	TP_STRUCT__entry(
+		__field(unsigned int, hdrlen)
+	),
+
+	TP_fast_assign(
+		__entry->hdrlen = hdrlen;
+	),
+
+	TP_printk("hdrlen=%u", __entry->hdrlen)
+);
+
+DECLARE_EVENT_CLASS(svcrdma_badreq_event,
+	TP_PROTO(
+		__be32 *p
+	),
+
+	TP_ARGS(p),
+
+	TP_STRUCT__entry(
+		__field(u32, xid)
+		__field(u32, vers)
+		__field(u32, proc)
+		__field(u32, credits)
+	),
+
+	TP_fast_assign(
+		__entry->xid = be32_to_cpup(p++);
+		__entry->vers = be32_to_cpup(p++);
+		__entry->credits = be32_to_cpup(p++);
+		__entry->proc = be32_to_cpup(p);
+	),
+
+	TP_printk("xid=0x%08x vers=%u credits=%u proc=%u",
+		__entry->xid, __entry->vers, __entry->credits, __entry->proc)
+);
+
+#define DEFINE_BADREQ_EVENT(name)					\
+		DEFINE_EVENT(svcrdma_badreq_event, svcrdma_decode_##name,\
+				TP_PROTO(				\
+					__be32 *p			\
+				),					\
+				TP_ARGS(p))
+
+DEFINE_BADREQ_EVENT(badvers);
+DEFINE_BADREQ_EVENT(drop);
+DEFINE_BADREQ_EVENT(badproc);
+DEFINE_BADREQ_EVENT(parse);
+
+DECLARE_EVENT_CLASS(svcrdma_segment_event,
+	TP_PROTO(
+		u32 handle,
+		u32 length,
+		u64 offset
+	),
+
+	TP_ARGS(handle, length, offset),
+
+	TP_STRUCT__entry(
+		__field(u32, handle)
+		__field(u32, length)
+		__field(u64, offset)
+	),
+
+	TP_fast_assign(
+		__entry->handle = handle;
+		__entry->length = length;
+		__entry->offset = offset;
+	),
+
+	TP_printk("%u@0x%016llx:0x%08x",
+		__entry->length, (unsigned long long)__entry->offset,
+		__entry->handle
+	)
+);
+
+#define DEFINE_SEGMENT_EVENT(name)					\
+		DEFINE_EVENT(svcrdma_segment_event, svcrdma_encode_##name,\
+				TP_PROTO(				\
+					u32 handle,			\
+					u32 length,			\
+					u64 offset			\
+				),					\
+				TP_ARGS(handle, length, offset))
+
+DEFINE_SEGMENT_EVENT(rseg);
+DEFINE_SEGMENT_EVENT(wseg);
+
+DECLARE_EVENT_CLASS(svcrdma_chunk_event,
+	TP_PROTO(
+		u32 length
+	),
+
+	TP_ARGS(length),
+
+	TP_STRUCT__entry(
+		__field(u32, length)
+	),
+
+	TP_fast_assign(
+		__entry->length = length;
+	),
+
+	TP_printk("length=%u",
+		__entry->length
+	)
+);
+
+#define DEFINE_CHUNK_EVENT(name)					\
+		DEFINE_EVENT(svcrdma_chunk_event, svcrdma_encode_##name,\
+				TP_PROTO(				\
+					u32 length			\
+				),					\
+				TP_ARGS(length))
+
+DEFINE_CHUNK_EVENT(pzr);
+DEFINE_CHUNK_EVENT(write);
+DEFINE_CHUNK_EVENT(reply);
+
+TRACE_EVENT(svcrdma_encode_read,
+	TP_PROTO(
+		u32 length,
+		u32 position
+	),
+
+	TP_ARGS(length, position),
+
+	TP_STRUCT__entry(
+		__field(u32, length)
+		__field(u32, position)
+	),
+
+	TP_fast_assign(
+		__entry->length = length;
+		__entry->position = position;
+	),
+
+	TP_printk("length=%u position=%u",
+		__entry->length, __entry->position
+	)
+);
+
+DECLARE_EVENT_CLASS(svcrdma_error_event,
+	TP_PROTO(
+		__be32 xid
+	),
+
+	TP_ARGS(xid),
+
+	TP_STRUCT__entry(
+		__field(u32, xid)
+	),
+
+	TP_fast_assign(
+		__entry->xid = be32_to_cpu(xid);
+	),
+
+	TP_printk("xid=0x%08x",
+		__entry->xid
+	)
+);
+
+#define DEFINE_ERROR_EVENT(name)					\
+		DEFINE_EVENT(svcrdma_error_event, svcrdma_err_##name,	\
+				TP_PROTO(				\
+					__be32 xid			\
+				),					\
+				TP_ARGS(xid))
+
+DEFINE_ERROR_EVENT(vers);
+DEFINE_ERROR_EVENT(chunk);
+
 #endif /* _TRACE_RPCRDMA_H */
 
 #include <trace/define_trace.h>
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -93,17 +93,19 @@
  * (see rdma_read_complete() below).
  */
 
+#include <linux/spinlock.h>
 #include <asm/unaligned.h>
 #include <rdma/ib_verbs.h>
 #include <rdma/rdma_cm.h>
 
-#include <linux/spinlock.h>
-
 #include <linux/sunrpc/xdr.h>
 #include <linux/sunrpc/debug.h>
 #include <linux/sunrpc/rpc_rdma.h>
 #include <linux/sunrpc/svc_rdma.h>
 
+#include "xprt_rdma.h"
+#include <trace/events/rpcrdma.h>
+
 #define RPCDBG_FACILITY	RPCDBG_SVCXPRT
 
 /*
@@ -295,7 +297,6 @@ static int svc_rdma_xdr_decode_req(struc
 {
 	__be32 *p, *end, *rdma_argp;
 	unsigned int hdr_len;
-	char *proc;
 
 	/* Verify that there's enough bytes for header + something */
 	if (rq_arg->len <= RPCRDMA_HDRLEN_ERR)
@@ -307,10 +308,8 @@ static int svc_rdma_xdr_decode_req(struc
 
 	switch (*(rdma_argp + 3)) {
 	case rdma_msg:
-		proc = "RDMA_MSG";
 		break;
 	case rdma_nomsg:
-		proc = "RDMA_NOMSG";
 		break;
 
 	case rdma_done:
@@ -340,30 +339,27 @@ static int svc_rdma_xdr_decode_req(struc
 	hdr_len = (unsigned long)p - (unsigned long)rdma_argp;
 	rq_arg->head[0].iov_len -= hdr_len;
 	rq_arg->len -= hdr_len;
-	dprintk("svcrdma: received %s request for XID 0x%08x, hdr_len=%u\n",
-		proc, be32_to_cpup(rdma_argp), hdr_len);
+	trace_svcrdma_decode_rqst(rdma_argp, hdr_len);
 	return hdr_len;
 
 out_short:
-	dprintk("svcrdma: header too short = %d\n", rq_arg->len);
+	trace_svcrdma_decode_short(rq_arg->len);
 	return -EINVAL;
 
 out_version:
-	dprintk("svcrdma: bad xprt version: %u\n",
-		be32_to_cpup(rdma_argp + 1));
+	trace_svcrdma_decode_badvers(rdma_argp);
 	return -EPROTONOSUPPORT;
 
 out_drop:
-	dprintk("svcrdma: dropping RDMA_DONE/ERROR message\n");
+	trace_svcrdma_decode_drop(rdma_argp);
 	return 0;
 
 out_proc:
-	dprintk("svcrdma: bad rdma procedure (%u)\n",
-		be32_to_cpup(rdma_argp + 3));
+	trace_svcrdma_decode_badproc(rdma_argp);
 	return -EINVAL;
 
 out_inval:
-	dprintk("svcrdma: failed to parse transport header\n");
+	trace_svcrdma_decode_parse(rdma_argp);
 	return -EINVAL;
 }
 
@@ -412,12 +408,16 @@ static void svc_rdma_send_error(struct s
 	*p++ = *(rdma_argp + 1);
 	*p++ = xprt->sc_fc_credits;
 	*p++ = rdma_error;
-	if (status == -EPROTONOSUPPORT) {
+	switch (status) {
+	case -EPROTONOSUPPORT:
 		*p++ = err_vers;
 		*p++ = rpcrdma_version;
 		*p++ = rpcrdma_version;
-	} else {
+		trace_svcrdma_err_vers(*rdma_argp);
+		break;
+	default:
 		*p++ = err_chunk;
+		trace_svcrdma_err_chunk(*rdma_argp);
 	}
 	length = (unsigned long)p - (unsigned long)err_msgp;
 
@@ -532,8 +532,6 @@ int svc_rdma_recvfrom(struct svc_rqst *r
 	}
 	spin_unlock(&rdma_xprt->sc_rq_dto_lock);
 
-	dprintk("svcrdma: recvfrom: ctxt=%p on xprt=%p, rqstp=%p\n",
-		ctxt, rdma_xprt, rqstp);
 	atomic_inc(&rdma_stat_recv);
 
 	svc_rdma_build_arg_xdr(rqstp, ctxt);
@@ -559,8 +557,6 @@ int svc_rdma_recvfrom(struct svc_rqst *r
 
 complete:
 	svc_rdma_put_context(ctxt, 0);
-	dprintk("svcrdma: recvfrom: xprt=%p, rqstp=%p, rq_arg.len=%u\n",
-		rdma_xprt, rqstp, rqstp->rq_arg.len);
 	rqstp->rq_prot = IPPROTO_MAX;
 	svc_xprt_copy_addrs(rqstp, xprt);
 	return rqstp->rq_arg.len;
--- a/net/sunrpc/xprtrdma/svc_rdma_rw.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c
@@ -4,11 +4,14 @@
  * Use the core R/W API to move RPC-over-RDMA Read and Write chunks.
  */
 
+#include <rdma/rw.h>
+
 #include <linux/sunrpc/rpc_rdma.h>
 #include <linux/sunrpc/svc_rdma.h>
 #include <linux/sunrpc/debug.h>
 
-#include <rdma/rw.h>
+#include "xprt_rdma.h"
+#include <trace/events/rpcrdma.h>
 
 #define RPCDBG_FACILITY	RPCDBG_SVCXPRT
 
@@ -436,6 +439,7 @@ svc_rdma_build_writes(struct svc_rdma_wr
 		if (ret < 0)
 			goto out_initerr;
 
+		trace_svcrdma_encode_wseg(seg_handle, write_len, seg_offset);
 		list_add(&ctxt->rw_list, &cc->cc_rwctxts);
 		cc->cc_sqecount += ret;
 		if (write_len == seg_length - info->wi_seg_off) {
@@ -525,6 +529,8 @@ int svc_rdma_send_write_chunk(struct svc
 	ret = svc_rdma_post_chunk_ctxt(&info->wi_cc);
 	if (ret < 0)
 		goto out_err;
+
+	trace_svcrdma_encode_write(xdr->page_len);
 	return xdr->page_len;
 
 out_err:
@@ -581,6 +587,8 @@ int svc_rdma_send_reply_chunk(struct svc
 	ret = svc_rdma_post_chunk_ctxt(&info->wi_cc);
 	if (ret < 0)
 		goto out_err;
+
+	trace_svcrdma_encode_reply(consumed);
 	return consumed;
 
 out_err:
@@ -605,9 +613,6 @@ static int svc_rdma_build_read_segment(s
 		goto out_noctx;
 	ctxt->rw_nents = sge_no;
 
-	dprintk("svcrdma: reading segment %u@0x%016llx:0x%08x (%u sges)\n",
-		len, offset, rkey, sge_no);
-
 	sg = ctxt->rw_sg_table.sgl;
 	for (sge_no = 0; sge_no < ctxt->rw_nents; sge_no++) {
 		seg_len = min_t(unsigned int, len,
@@ -685,6 +690,7 @@ static int svc_rdma_build_read_chunk(str
 		if (ret < 0)
 			break;
 
+		trace_svcrdma_encode_rseg(rs_handle, rs_length, rs_offset);
 		info->ri_chunklen += rs_length;
 	}
 
@@ -705,9 +711,6 @@ static int svc_rdma_build_normal_read_ch
 	struct svc_rdma_op_ctxt *head = info->ri_readctxt;
 	int ret;
 
-	dprintk("svcrdma: Reading Read chunk at position %u\n",
-		info->ri_position);
-
 	info->ri_pageno = head->hdr_count;
 	info->ri_pageoff = 0;
 
@@ -715,6 +718,8 @@ static int svc_rdma_build_normal_read_ch
 	if (ret < 0)
 		goto out;
 
+	trace_svcrdma_encode_read(info->ri_chunklen, info->ri_position);
+
 	/* Split the Receive buffer between the head and tail
 	 * buffers at Read chunk's position. XDR roundup of the
 	 * chunk is not included in either the pagelist or in
@@ -763,8 +768,6 @@ static int svc_rdma_build_pz_read_chunk(
 	struct svc_rdma_op_ctxt *head = info->ri_readctxt;
 	int ret;
 
-	dprintk("svcrdma: Reading Position Zero Read chunk\n");
-
 	info->ri_pageno = head->hdr_count - 1;
 	info->ri_pageoff = offset_in_page(head->byte_len);
 
@@ -772,6 +775,8 @@ static int svc_rdma_build_pz_read_chunk(
 	if (ret < 0)
 		goto out;
 
+	trace_svcrdma_encode_pzr(info->ri_chunklen);
+
 	head->arg.len += info->ri_chunklen;
 	head->arg.buflen += info->ri_chunklen;
 
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -99,14 +99,19 @@
  * where two different Write segments send portions of the same page.
  */
 
-#include <linux/sunrpc/debug.h>
-#include <linux/sunrpc/rpc_rdma.h>
 #include <linux/spinlock.h>
 #include <asm/unaligned.h>
+
 #include <rdma/ib_verbs.h>
 #include <rdma/rdma_cm.h>
+
+#include <linux/sunrpc/debug.h>
+#include <linux/sunrpc/rpc_rdma.h>
 #include <linux/sunrpc/svc_rdma.h>
 
+#include "xprt_rdma.h"
+#include <trace/events/rpcrdma.h>
+
 #define RPCDBG_FACILITY	RPCDBG_SVCXPRT
 
 static u32 xdr_padsize(u32 len)
@@ -524,12 +529,6 @@ static int svc_rdma_send_reply_msg(struc
 	u32 inv_rkey;
 	int ret;
 
-	dprintk("svcrdma: sending %s reply: head=%zu, pagelen=%u, tail=%zu\n",
-		(rp_ch ? "RDMA_NOMSG" : "RDMA_MSG"),
-		rqstp->rq_res.head[0].iov_len,
-		rqstp->rq_res.page_len,
-		rqstp->rq_res.tail[0].iov_len);
-
 	ctxt = svc_rdma_get_context(rdma);
 
 	ret = svc_rdma_map_reply_hdr(rdma, ctxt, rdma_resp,
@@ -580,6 +579,7 @@ static int svc_rdma_send_error_msg(struc
 	/* Replace the original transport header with an
 	 * RDMA_ERROR response. XID etc are preserved.
 	 */
+	trace_svcrdma_err_chunk(*rdma_resp);
 	p = rdma_resp + 3;
 	*p++ = rdma_error;
 	*p   = err_chunk;
@@ -635,9 +635,6 @@ int svc_rdma_sendto(struct svc_rqst *rqs
 	rdma_argp = page_address(rqstp->rq_pages[0]);
 	svc_rdma_get_write_arrays(rdma_argp, &wr_lst, &rp_ch);
 
-	dprintk("svcrdma: preparing response for XID 0x%08x\n",
-		be32_to_cpup(rdma_argp));
-
 	/* Create the RDMA response header. xprt->xpt_mutex,
 	 * acquired in svc_send(), serializes RPC replies. The
 	 * code path below that inserts the credit grant value
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -41,21 +41,25 @@
  * Author: Tom Tucker <tom@opengridcomputing.com>
  */
 
-#include <linux/sunrpc/svc_xprt.h>
-#include <linux/sunrpc/addr.h>
-#include <linux/sunrpc/debug.h>
-#include <linux/sunrpc/rpc_rdma.h>
 #include <linux/interrupt.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/workqueue.h>
+#include <linux/export.h>
+
 #include <rdma/ib_verbs.h>
 #include <rdma/rdma_cm.h>
 #include <rdma/rw.h>
+
+#include <linux/sunrpc/addr.h>
+#include <linux/sunrpc/debug.h>
+#include <linux/sunrpc/rpc_rdma.h>
+#include <linux/sunrpc/svc_xprt.h>
 #include <linux/sunrpc/svc_rdma.h>
-#include <linux/export.h>
+
 #include "xprt_rdma.h"
+#include <trace/events/rpcrdma.h>
 
 #define RPCDBG_FACILITY	RPCDBG_SVCXPRT
 
@@ -861,10 +865,12 @@ static struct svc_xprt *svc_rdma_accept(
 	dprintk("    max_requests    : %d\n", newxprt->sc_max_requests);
 	dprintk("    ord             : %d\n", conn_param.initiator_depth);
 
+	trace_svcrdma_xprt_accept(&newxprt->sc_xprt);
 	return &newxprt->sc_xprt;
 
  errout:
 	dprintk("svcrdma: failure accepting new connection rc=%d.\n", ret);
+	trace_svcrdma_xprt_fail(&newxprt->sc_xprt);
 	/* Take a reference in case the DTO handler runs */
 	svc_xprt_get(&newxprt->sc_xprt);
 	if (newxprt->sc_qp && !IS_ERR(newxprt->sc_qp))
@@ -895,7 +901,6 @@ static void svc_rdma_detach(struct svc_x
 {
 	struct svcxprt_rdma *rdma =
 		container_of(xprt, struct svcxprt_rdma, sc_xprt);
-	dprintk("svc: svc_rdma_detach(%p)\n", xprt);
 
 	/* Disconnect and flush posted WQE */
 	rdma_disconnect(rdma->sc_cm_id);
@@ -907,7 +912,7 @@ static void __svc_rdma_free(struct work_
 		container_of(work, struct svcxprt_rdma, sc_work);
 	struct svc_xprt *xprt = &rdma->sc_xprt;
 
-	dprintk("svcrdma: %s(%p)\n", __func__, rdma);
+	trace_svcrdma_xprt_free(xprt);
 
 	if (rdma->sc_qp && !IS_ERR(rdma->sc_qp))
 		ib_drain_qp(rdma->sc_qp);