Blob Blame History Raw
From aa489bba1604c95dab3caccca46fb07b71316151 Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Wed, 29 Jul 2015 04:23:39 -0500
Subject: [PATCH] rbd: add support for scatterlist obj_request_type
References: fate#318836
Patch-mainline: Not yet, SES2 clustered LIO/RBD

This adds support for a scatterlist rbd obj_request_type, so LIO
can pass down its sg to rbd.

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Acked-by: David Disseldorp <ddiss@suse.de>
---
 drivers/block/rbd.c |  105 ++++++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 94 insertions(+), 11 deletions(-)

--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -44,6 +44,7 @@
 #include <linux/slab.h>
 #include <linux/idr.h>
 #include <linux/workqueue.h>
+#include <linux/scatterlist.h>
 
 #include "rbd_types.h"
 
@@ -210,7 +211,7 @@ struct rbd_obj_request;
 typedef void (*rbd_obj_callback_t)(struct rbd_obj_request *);
 
 enum obj_request_type {
-	OBJ_REQUEST_NODATA, OBJ_REQUEST_BIO, OBJ_REQUEST_PAGES
+	OBJ_REQUEST_NODATA, OBJ_REQUEST_BIO, OBJ_REQUEST_PAGES, OBJ_REQUEST_SG,
 };
 
 enum obj_operation_type {
@@ -266,6 +267,10 @@ struct rbd_obj_request {
 			struct page	**pages;
 			u32		page_count;
 		};
+		struct {
+			struct scatterlist	*sg;
+			unsigned int		init_sg_offset;
+		};
 	};
 	struct page		**copyup_pages;
 	u32			copyup_page_count;
@@ -297,16 +302,22 @@ struct rbd_img_request {
 		u64			snap_id;	/* for reads */
 		struct ceph_snap_context *snapc;	/* for writes */
 	};
-	union {
-		struct request		*rq;		/* block request */
-		struct rbd_obj_request	*obj_request;	/* obj req initiator */
-	};
+
+	struct request		*rq;		/* block request */
+	struct rbd_obj_request	*obj_request;	/* obj req initiator */
+
 	struct page		**copyup_pages;
 	u32			copyup_page_count;
 	spinlock_t		completion_lock;/* protects next_completion */
 	u32			next_completion;
 	rbd_img_callback_t	callback;
+	/*
+	 * xferred is the bytes that have successfully been transferred.
+	 * completed is the bytes that have been accounted for and includes
+	 * both failed and successfully transffered bytes.
+	 */
 	u64			xferred;/* aggregate bytes transferred */
+	u64			completed;
 	int			result;	/* first nonzero obj_request result */
 
 	u32			obj_request_count;
@@ -1272,6 +1283,34 @@ static void zero_bio_chain(struct bio *c
 	}
 }
 
+static void zero_sg(struct scatterlist *sgl, u64 start, u64 length)
+{
+	struct scatterlist *sg = sgl;
+	u64 end = start + length;
+	u64 pos = 0;
+
+	while (pos < end && sg) {
+		if (pos + sg->length > start) {
+			int sg_offset = max_t(int, start - pos, 0);
+			unsigned int length = min_t(unsigned int,
+						    sg->length - sg_offset,
+						    end - pos);
+			void *kaddr;
+			unsigned long flags;
+
+			local_irq_save(flags);
+			kaddr = kmap_atomic(sg_page(sg));
+			memset(kaddr + sg_offset + sg->offset, 0, length);
+			flush_dcache_page(sg_page(sg));
+			kunmap_atomic(kaddr);
+			local_irq_restore(flags);
+		}
+
+		pos += sg->length;
+		sg = sg_next(sg);
+	}
+}
+
 /*
  * similar to zero_bio_chain(), zeros data defined by a page array,
  * starting at the given byte offset from the start of the array and
@@ -1546,6 +1585,7 @@ static bool obj_request_type_valid(enum
 	case OBJ_REQUEST_NODATA:
 	case OBJ_REQUEST_BIO:
 	case OBJ_REQUEST_PAGES:
+	case OBJ_REQUEST_SG:
 		return true;
 	default:
 		return false;
@@ -1729,14 +1769,18 @@ rbd_img_obj_request_read_callback(struct
 	if (obj_request->result == -ENOENT) {
 		if (obj_request->type == OBJ_REQUEST_BIO)
 			zero_bio_chain(obj_request->bio_list, 0);
-		else
+		else if (obj_request->type == OBJ_REQUEST_PAGES)
 			zero_pages(obj_request->pages, 0, length);
+		else if (obj_request->type == OBJ_REQUEST_SG)
+			zero_sg(obj_request->sg, 0, length);
 		obj_request->result = 0;
 	} else if (xferred < length && !obj_request->result) {
 		if (obj_request->type == OBJ_REQUEST_BIO)
 			zero_bio_chain(obj_request->bio_list, xferred);
-		else
+		else if (obj_request->type == OBJ_REQUEST_PAGES)
 			zero_pages(obj_request->pages, xferred, length);
+		else if (obj_request->type == OBJ_REQUEST_SG)
+			zero_sg(obj_request->sg, xferred, length);
 	}
 	obj_request->xferred = length;
 	obj_request_done_set(obj_request);
@@ -2080,6 +2124,7 @@ static void rbd_obj_request_destroy(stru
 	rbd_assert(obj_request_type_valid(obj_request->type));
 	switch (obj_request->type) {
 	case OBJ_REQUEST_NODATA:
+	case OBJ_REQUEST_SG:
 		break;		/* Nothing to do */
 	case OBJ_REQUEST_BIO:
 		if (obj_request->bio_list)
@@ -2181,6 +2226,7 @@ static struct rbd_img_request *rbd_img_r
 	img_request->offset = offset;
 	img_request->length = length;
 	img_request->flags = 0;
+	img_request->completed = 0;
 	if (op_type == OBJ_OP_DISCARD) {
 		img_request_discard_set(img_request);
 		img_request->snapc = snapc;
@@ -2306,6 +2352,7 @@ static bool rbd_img_obj_end_request(stru
 		 */
 		xferred = obj_request->length;
 	}
+	img_request->completed += xferred;
 
 	/* Image object requests don't own their page array */
 
@@ -2317,12 +2364,15 @@ static bool rbd_img_obj_end_request(stru
 	if (img_request_child_test(img_request)) {
 		rbd_assert(img_request->obj_request != NULL);
 		more = obj_request->which < img_request->obj_request_count - 1;
-	} else {
-		rbd_assert(img_request->rq != NULL);
-
+	} else if (img_request->rq) {
 		more = blk_update_request(img_request->rq, result, xferred);
 		if (!more)
 			__blk_mq_end_request(img_request->rq, result);
+	} else {
+		if (img_request->completed < img_request->length)
+			more = true;
+		else
+			more = false;
 	}
 
 	return more;
@@ -2427,6 +2477,10 @@ static void rbd_img_obj_request_fill(str
 		osd_req_op_extent_osd_data_pages(osd_request, num_ops,
 					obj_request->pages, length,
 					offset & ~PAGE_MASK, false, false);
+	else if (obj_request->type == OBJ_REQUEST_SG)
+		osd_req_op_extent_osd_data_sg(osd_request, num_ops,
+					obj_request->sg,
+					obj_request->init_sg_offset, length);
 
 	/* Discards are also writes */
 	if (op_type == OBJ_OP_WRITE || op_type == OBJ_OP_DISCARD)
@@ -2452,7 +2506,9 @@ static int rbd_img_request_fill(struct r
 	struct rbd_obj_request *next_obj_request;
 	struct bio *bio_list = NULL;
 	unsigned int bio_offset = 0;
+	unsigned int sg_offset = 0;
 	struct page **pages = NULL;
+	struct scatterlist *sgl = NULL;
 	enum obj_operation_type op_type;
 	u64 img_offset;
 	u64 resid;
@@ -2471,6 +2527,8 @@ static int rbd_img_request_fill(struct r
 			   bio_list->bi_iter.bi_sector << SECTOR_SHIFT);
 	} else if (type == OBJ_REQUEST_PAGES) {
 		pages = data_desc;
+	} else if (type == OBJ_REQUEST_SG) {
+		sgl = data_desc;
 	}
 
 	while (resid) {
@@ -2518,6 +2576,27 @@ static int rbd_img_request_fill(struct r
 			if ((offset + length) & ~PAGE_MASK)
 				page_count--;	/* more on last page */
 			pages += page_count;
+		} else if (type == OBJ_REQUEST_SG) {
+			u64 sg_length = 0;
+
+			obj_request->init_sg_offset = sg_offset;
+			obj_request->sg = sgl;
+			do {
+				sg_length += (sgl->length - sg_offset);
+				sg_offset = 0;
+				if (sg_length > length) {
+					sg_offset = sgl->length -
+							(sg_length - length);
+					break;
+				}
+				/*
+				 * For WRITE_SAME we have a single sg that
+				 * is written possibly multiple times over
+				 * img_request->length bytes.
+				 */
+				if (sg_next(sgl))
+					sgl = sg_next(sgl);
+			} while (true);
 		}
 
 		osd_req = rbd_osd_req_create(rbd_dev, op_type,
@@ -3073,9 +3152,13 @@ static void rbd_img_parent_read(struct r
 	if (obj_request->type == OBJ_REQUEST_BIO)
 		result = rbd_img_request_fill(img_request, OBJ_REQUEST_BIO,
 						obj_request->bio_list);
-	else
+	else if (obj_request->type == OBJ_REQUEST_PAGES)
 		result = rbd_img_request_fill(img_request, OBJ_REQUEST_PAGES,
 						obj_request->pages);
+	else
+		result = rbd_img_request_fill(img_request, OBJ_REQUEST_SG,
+						obj_request->sg);
+
 	if (result)
 		goto out_err;