From aa489bba1604c95dab3caccca46fb07b71316151 Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Wed, 29 Jul 2015 04:23:39 -0500
Subject: [PATCH] rbd: add support for scatterlist obj_request_type
References: fate#318836
Patch-mainline: Not yet, SES2 clustered LIO/RBD
This adds support for a scatterlist rbd obj_request_type, so LIO
can pass down its sg to rbd.
Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Acked-by: David Disseldorp <ddiss@suse.de>
Signed-off-by: Luis Henriques <lhenriques@suse.com>
[luis: rebased on top of a1fbb5e7bbb5 ("rbd: start enums at 1 instead of 0")]
---
drivers/block/rbd.c | 104 +++++++++++++++++++++++++++++++++++++++++++++++-----
1 file changed, 94 insertions(+), 10 deletions(-)
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -45,6 +45,7 @@
#include <linux/slab.h>
#include <linux/idr.h>
#include <linux/workqueue.h>
+#include <linux/scatterlist.h>
#include "rbd_types.h"
@@ -211,6 +212,7 @@ enum obj_request_type {
OBJ_REQUEST_NODATA = 1,
OBJ_REQUEST_BIO, /* pointer into provided bio (list) */
OBJ_REQUEST_BVECS, /* pointer into provided bio_vec array */
+ OBJ_REQUEST_SG,
};
enum obj_operation_type {
@@ -266,6 +268,10 @@ struct rbd_obj_request {
struct ceph_bvec_iter bvec_pos;
u32 bvec_count;
};
+ struct {
+ struct scatterlist *sg;
+ unsigned int init_sg_offset;
+ };
};
struct bio_vec *copyup_bvecs;
u32 copyup_bvec_count;
@@ -296,14 +302,20 @@ struct rbd_img_request {
u64 snap_id; /* for reads */
struct ceph_snap_context *snapc; /* for writes */
};
- union {
- struct request *rq; /* block request */
- struct rbd_obj_request *obj_request; /* obj req initiator */
- };
+
+ struct request *rq; /* block request */
+ struct rbd_obj_request *obj_request; /* obj req initiator */
+
spinlock_t completion_lock;/* protects next_completion */
u32 next_completion;
rbd_img_callback_t callback;
+ /*
+ * xferred is the bytes that have successfully been transferred.
+ * completed is the bytes that have been accounted for and includes
+ * both failed and successfully transffered bytes.
+ */
u64 xferred;/* aggregate bytes transferred */
+ u64 completed;
int result; /* first nonzero obj_request result */
u32 obj_request_count;
@@ -1273,6 +1285,34 @@ static void zero_bvecs(struct ceph_bvec_
}));
}
+static void zero_sg(struct scatterlist *sgl, u64 start, u64 length)
+{
+ struct scatterlist *sg = sgl;
+ u64 end = start + length;
+ u64 pos = 0;
+
+ while (pos < end && sg) {
+ if (pos + sg->length > start) {
+ int sg_offset = max_t(int, start - pos, 0);
+ unsigned int length = min_t(unsigned int,
+ sg->length - sg_offset,
+ end - pos);
+ void *kaddr;
+ unsigned long flags;
+
+ local_irq_save(flags);
+ kaddr = kmap_atomic(sg_page(sg));
+ memset(kaddr + sg_offset + sg->offset, 0, length);
+ flush_dcache_page(sg_page(sg));
+ kunmap_atomic(kaddr);
+ local_irq_restore(flags);
+ }
+
+ pos += sg->length;
+ sg = sg_next(sg);
+ }
+}
+
/*
* The default/initial value for all object request flags is 0. For
* each flag, once its value is set to 1 it is never reset to 0
@@ -1431,6 +1471,7 @@ static bool obj_request_type_valid(enum
case OBJ_REQUEST_NODATA:
case OBJ_REQUEST_BIO:
case OBJ_REQUEST_BVECS:
+ case OBJ_REQUEST_SG:
return true;
default:
return false;
@@ -1579,16 +1620,20 @@ rbd_img_obj_request_read_callback(struct
if (obj_request->result == -ENOENT) {
if (obj_request->type == OBJ_REQUEST_BIO)
zero_bios(&obj_request->bio_pos, 0, length);
- else
+ else if (obj_request->type == OBJ_REQUEST_BVECS)
zero_bvecs(&obj_request->bvec_pos, 0, length);
+ else if (obj_request->type == OBJ_REQUEST_SG)
+ zero_sg(obj_request->sg, 0, length);
obj_request->result = 0;
} else if (xferred < length && !obj_request->result) {
if (obj_request->type == OBJ_REQUEST_BIO)
zero_bios(&obj_request->bio_pos, xferred,
length - xferred);
- else
+ else if (obj_request->type == OBJ_REQUEST_BVECS)
zero_bvecs(&obj_request->bvec_pos, xferred,
length - xferred);
+ else if (obj_request->type == OBJ_REQUEST_SG)
+ zero_sg(obj_request->sg, xferred, length);
}
obj_request->xferred = length;
obj_request_done_set(obj_request);
@@ -1899,6 +1944,7 @@ static void rbd_obj_request_destroy(stru
case OBJ_REQUEST_NODATA:
case OBJ_REQUEST_BIO:
case OBJ_REQUEST_BVECS:
+ case OBJ_REQUEST_SG:
break; /* Nothing to do */
default:
rbd_assert(0);
@@ -1997,6 +2043,7 @@ static struct rbd_img_request *rbd_img_r
img_request->rbd_dev = rbd_dev;
img_request->offset = offset;
img_request->length = length;
+ img_request->completed = 0;
if (op_type == OBJ_OP_DISCARD) {
img_request_discard_set(img_request);
img_request->snapc = snapc;
@@ -2119,18 +2166,22 @@ static bool rbd_img_obj_end_request(stru
*/
xferred = obj_request->length;
}
+ img_request->completed += xferred;
if (img_request_child_test(img_request)) {
rbd_assert(img_request->obj_request != NULL);
more = obj_request->which < img_request->obj_request_count - 1;
- } else {
+ } else if (img_request->rq) {
blk_status_t status = errno_to_blk_status(result);
- rbd_assert(img_request->rq != NULL);
-
more = blk_update_request(img_request->rq, status, xferred);
if (!more)
__blk_mq_end_request(img_request->rq, status);
+ } else {
+ if (img_request->completed < img_request->length)
+ more = true;
+ else
+ more = false;
}
return more;
@@ -2234,6 +2285,10 @@ static void rbd_img_obj_request_fill(str
else if (obj_request->type == OBJ_REQUEST_BVECS)
osd_req_op_extent_osd_data_bvec_pos(osd_request, num_ops,
&obj_request->bvec_pos);
+ else if (obj_request->type == OBJ_REQUEST_SG)
+ osd_req_op_extent_osd_data_sg(osd_request, num_ops,
+ obj_request->sg,
+ obj_request->init_sg_offset, length);
/* Discards are also writes */
if (op_type == OBJ_OP_WRITE || op_type == OBJ_OP_DISCARD)
@@ -2259,7 +2314,9 @@ static int rbd_img_request_fill(struct r
struct rbd_obj_request *next_obj_request;
struct ceph_bio_iter bio_it;
struct ceph_bvec_iter bvec_it;
+ struct scatterlist *sgl = NULL;
enum obj_operation_type op_type;
+ unsigned int sg_offset = 0;
u64 img_offset;
u64 resid;
@@ -2277,6 +2334,8 @@ static int rbd_img_request_fill(struct r
bio_it.iter.bi_sector << SECTOR_SHIFT);
} else if (type == OBJ_REQUEST_BVECS) {
bvec_it = *(struct ceph_bvec_iter *)data_desc;
+ } else if (type == OBJ_REQUEST_SG) {
+ sgl = data_desc;
}
while (resid) {
@@ -2306,6 +2365,27 @@ static int rbd_img_request_fill(struct r
obj_request->bvec_pos = bvec_it;
ceph_bvec_iter_shorten(&obj_request->bvec_pos, length);
ceph_bvec_iter_advance(&bvec_it, length);
+ } else if (type == OBJ_REQUEST_SG) {
+ u64 sg_length = 0;
+
+ obj_request->init_sg_offset = sg_offset;
+ obj_request->sg = sgl;
+ do {
+ sg_length += (sgl->length - sg_offset);
+ sg_offset = 0;
+ if (sg_length > length) {
+ sg_offset = sgl->length -
+ (sg_length - length);
+ break;
+ }
+ /*
+ * For WRITE_SAME we have a single sg that
+ * is written possibly multiple times over
+ * img_request->length bytes.
+ */
+ if (sg_next(sgl))
+ sgl = sg_next(sgl);
+ } while (true);
}
osd_req = rbd_osd_req_create(rbd_dev, op_type,
@@ -2821,9 +2901,13 @@ static void rbd_img_parent_read(struct r
if (obj_request->type == OBJ_REQUEST_BIO)
result = rbd_img_request_fill(img_request, OBJ_REQUEST_BIO,
&obj_request->bio_pos);
- else
+ else if (obj_request->type == OBJ_REQUEST_BVECS)
result = rbd_img_request_fill(img_request, OBJ_REQUEST_BVECS,
&obj_request->bvec_pos);
+ else
+ result = rbd_img_request_fill(img_request, OBJ_REQUEST_SG,
+ obj_request->sg);
+
if (result)
goto out_err;