Blob Blame History Raw
From: Sebastian Sanchez <sebastian.sanchez@intel.com>
Date: Fri, 26 May 2017 05:35:18 -0700
Subject: IB/hfi1: Optimize cachelines for user SDMA request structure
Patch-mainline: v4.14-rc1
Git-commit: e3304b7cc4f14d365f46d6847a35563ae8b017f7
References: bsc#1060463 FATE#323043

The current user SDMA request structure layout has holes.
The cachelines can be reduced to improve cacheline trading.
Separate fields in the following categories: mostly read,
writable and shared with interrupt.

Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Sebastian Sanchez <sebastian.sanchez@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
Acked-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
---
 drivers/infiniband/hw/hfi1/user_sdma.c |  106 ++++++++++++++++++---------------
 1 file changed, 58 insertions(+), 48 deletions(-)

--- a/drivers/infiniband/hw/hfi1/user_sdma.c
+++ b/drivers/infiniband/hw/hfi1/user_sdma.c
@@ -117,6 +117,7 @@ MODULE_PARM_DESC(sdma_comp_size, "Size o
 
 #define AHG_KDETH_INTR_SHIFT 12
 #define AHG_KDETH_SH_SHIFT   13
+#define AHG_KDETH_ARRAY_SIZE  9
 
 #define PBC2LRH(x) ((((x) & 0xfff) << 2) - 4)
 #define LRH2PBC(x) ((((x) >> 2) + 1) & 0xfff)
@@ -204,25 +205,42 @@ struct evict_data {
 };
 
 struct user_sdma_request {
-	struct sdma_req_info info;
-	struct hfi1_user_sdma_pkt_q *pq;
-	struct hfi1_user_sdma_comp_q *cq;
 	/* This is the original header from user space */
 	struct hfi1_pkt_header hdr;
+
+	/* Read mostly fields */
+	struct hfi1_user_sdma_pkt_q *pq ____cacheline_aligned_in_smp;
+	struct hfi1_user_sdma_comp_q *cq;
 	/*
 	 * Pointer to the SDMA engine for this request.
 	 * Since different request could be on different VLs,
 	 * each request will need it's own engine pointer.
 	 */
 	struct sdma_engine *sde;
-	s8 ahg_idx;
-	u32 ahg[9];
+	struct sdma_req_info info;
+	/* TID array values copied from the tid_iov vector */
+	u32 *tids;
+	/* total length of the data in the request */
+	u32 data_len;
+	/* number of elements copied to the tids array */
+	u16 n_tids;
 	/*
-	 * KDETH.Offset (Eager) field
-	 * We need to remember the initial value so the headers
-	 * can be updated properly.
+	 * We copy the iovs for this request (based on
+	 * info.iovcnt). These are only the data vectors
 	 */
-	u32 koffset;
+	u8 data_iovs;
+	s8 ahg_idx;
+
+	/* Writeable fields shared with interrupt */
+	u64 seqcomp ____cacheline_aligned_in_smp;
+	u64 seqsubmitted;
+	unsigned long flags;
+	/* status of the last txreq completed */
+	int status;
+
+	/* Send side fields */
+	struct list_head txps ____cacheline_aligned_in_smp;
+	u64 seqnum;
 	/*
 	 * KDETH.OFFSET (TID) field
 	 * The offset can cover multiple packets, depending on the
@@ -230,29 +248,19 @@ struct user_sdma_request {
 	 */
 	u32 tidoffset;
 	/*
-	 * We copy the iovs for this request (based on
-	 * info.iovcnt). These are only the data vectors
+	 * KDETH.Offset (Eager) field
+	 * We need to remember the initial value so the headers
+	 * can be updated properly.
 	 */
-	unsigned data_iovs;
-	/* total length of the data in the request */
-	u32 data_len;
+	u32 koffset;
+	u32 sent;
+	/* TID index copied from the tid_iov vector */
+	u16 tididx;
 	/* progress index moving along the iovs array */
-	unsigned iov_idx;
+	u8 iov_idx;
+
 	struct user_sdma_iovec iovs[MAX_VECTORS_PER_REQ];
-	/* number of elements copied to the tids array */
-	u16 n_tids;
-	/* TID array values copied from the tid_iov vector */
-	u32 *tids;
-	u16 tididx;
-	u32 sent;
-	u64 seqnum;
-	u64 seqcomp;
-	u64 seqsubmitted;
-	struct list_head txps;
-	unsigned long flags;
-	/* status of the last txreq completed */
-	int status;
-};
+} ____cacheline_aligned_in_smp;
 
 /*
  * A single txreq could span up to 3 physical pages when the MTU
@@ -1034,11 +1042,6 @@ static int user_sdma_send_pkts(struct us
 							       datalen);
 				if (changes < 0)
 					goto free_tx;
-				sdma_txinit_ahg(&tx->txreq,
-						SDMA_TXREQ_F_USE_AHG,
-						datalen, req->ahg_idx, changes,
-						req->ahg, sizeof(req->hdr),
-						user_sdma_txreq_cb);
 			}
 		} else {
 			ret = sdma_txinit(&tx->txreq, 0, sizeof(req->hdr) +
@@ -1442,21 +1445,22 @@ done:
 }
 
 static int set_txreq_header_ahg(struct user_sdma_request *req,
-				struct user_sdma_txreq *tx, u32 len)
+				struct user_sdma_txreq *tx, u32 datalen)
 {
+	u32 ahg[AHG_KDETH_ARRAY_SIZE];
 	int diff = 0;
 	u8 omfactor; /* KDETH.OM */
 	struct hfi1_user_sdma_pkt_q *pq = req->pq;
 	struct hfi1_pkt_header *hdr = &req->hdr;
 	u16 pbclen = le16_to_cpu(hdr->pbc[0]);
-	u32 val32, tidval = 0, lrhlen = get_lrh_len(*hdr, pad_len(len));
+	u32 val32, tidval = 0, lrhlen = get_lrh_len(*hdr, pad_len(datalen));
 
 	if (PBC2LRH(pbclen) != lrhlen) {
 		/* PBC.PbcLengthDWs */
-		AHG_HEADER_SET(req->ahg, diff, 0, 0, 12,
+		AHG_HEADER_SET(ahg, diff, 0, 0, 12,
 			       cpu_to_le16(LRH2PBC(lrhlen)));
 		/* LRH.PktLen (we need the full 16 bits due to byte swap) */
-		AHG_HEADER_SET(req->ahg, diff, 3, 0, 16,
+		AHG_HEADER_SET(ahg, diff, 3, 0, 16,
 			       cpu_to_be16(lrhlen >> 2));
 	}
 
@@ -1468,13 +1472,12 @@ static int set_txreq_header_ahg(struct u
 		(HFI1_CAP_IS_KSET(EXTENDED_PSN) ? 0x7fffffff : 0xffffff);
 	if (unlikely(tx->flags & TXREQ_FLAGS_REQ_ACK))
 		val32 |= 1UL << 31;
-	AHG_HEADER_SET(req->ahg, diff, 6, 0, 16, cpu_to_be16(val32 >> 16));
-	AHG_HEADER_SET(req->ahg, diff, 6, 16, 16, cpu_to_be16(val32 & 0xffff));
+	AHG_HEADER_SET(ahg, diff, 6, 0, 16, cpu_to_be16(val32 >> 16));
+	AHG_HEADER_SET(ahg, diff, 6, 16, 16, cpu_to_be16(val32 & 0xffff));
 	/* KDETH.Offset */
-	AHG_HEADER_SET(req->ahg, diff, 15, 0, 16,
+	AHG_HEADER_SET(ahg, diff, 15, 0, 16,
 		       cpu_to_le16(req->koffset & 0xffff));
-	AHG_HEADER_SET(req->ahg, diff, 15, 16, 16,
-		       cpu_to_le16(req->koffset >> 16));
+	AHG_HEADER_SET(ahg, diff, 15, 16, 16, cpu_to_le16(req->koffset >> 16));
 	if (req_opcode(req->info.ctrl) == EXPECTED) {
 		__le16 val;
 
@@ -1492,9 +1495,8 @@ static int set_txreq_header_ahg(struct u
 			 * we have to check again.
 			 */
 			if (++req->tididx > req->n_tids - 1 ||
-			    !req->tids[req->tididx]) {
+			    !req->tids[req->tididx])
 				return -EINVAL;
-			}
 			tidval = req->tids[req->tididx];
 		}
 		omfactor = ((EXP_TID_GET(tidval, LEN) *
@@ -1502,7 +1504,7 @@ static int set_txreq_header_ahg(struct u
 				 KDETH_OM_MAX_SIZE) ? KDETH_OM_LARGE_SHIFT :
 				 KDETH_OM_SMALL_SHIFT;
 		/* KDETH.OM and KDETH.OFFSET (TID) */
-		AHG_HEADER_SET(req->ahg, diff, 7, 0, 16,
+		AHG_HEADER_SET(ahg, diff, 7, 0, 16,
 			       ((!!(omfactor - KDETH_OM_SMALL_SHIFT)) << 15 |
 				((req->tidoffset >> omfactor)
 				 & 0x7fff)));
@@ -1522,12 +1524,20 @@ static int set_txreq_header_ahg(struct u
 					     AHG_KDETH_INTR_SHIFT));
 		}
 
-		AHG_HEADER_SET(req->ahg, diff, 7, 16, 14, val);
+		AHG_HEADER_SET(ahg, diff, 7, 16, 14, val);
 	}
+	if (diff < 0)
+		return diff;
 
 	trace_hfi1_sdma_user_header_ahg(pq->dd, pq->ctxt, pq->subctxt,
 					req->info.comp_idx, req->sde->this_idx,
-					req->ahg_idx, req->ahg, diff, tidval);
+					req->ahg_idx, ahg, diff, tidval);
+	sdma_txinit_ahg(&tx->txreq,
+			SDMA_TXREQ_F_USE_AHG,
+			datalen, req->ahg_idx, diff,
+			ahg, sizeof(req->hdr),
+			user_sdma_txreq_cb);
+
 	return diff;
 }