Blob Blame History Raw
From: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Date: Wed, 23 May 2018 00:34:39 -0400
Subject: [PATCH] nvmet: add simple file backed ns support
Git-commit: d5eff33ee6f80864d889d64fd3f6ea7b78dd1b24
Patch-mainline: v4.18-rc1
References: bsc#1104967,FATE#325924

This patch adds simple file backed namespace support for NVMeOF target.

The new file io-cmd-file.c is responsible for handling the code for I/O
commands when ns is file backed. Also, we introduce mempools based slow
path using sync I/Os for file backed ns to ensure forward progress under
reclaim.

The old block device based implementation is moved to io-cmd-bdev.c and
use a "nvmet_bdev_" symbol prefix.  The enable/disable calls are also
move into the respective files.

Signed-off-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
[hch: updated changelog, fixed double req->ns lookup in bdev case]
Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Hannes Reinecke <hare@suse.com>
---
 drivers/nvme/target/Makefile      |   4 +-
 drivers/nvme/target/admin-cmd.c   |   8 +
 drivers/nvme/target/core.c        |  50 ++++---
 drivers/nvme/target/io-cmd-bdev.c | 241 ++++++++++++++++++++++++++++++
 drivers/nvme/target/io-cmd-file.c | 300 ++++++++++++++++++++++++++++++++++++++
 drivers/nvme/target/io-cmd.c      | 228 -----------------------------
 drivers/nvme/target/nvmet.h       |  28 +++-
 7 files changed, 610 insertions(+), 249 deletions(-)
 create mode 100644 drivers/nvme/target/io-cmd-bdev.c
 create mode 100644 drivers/nvme/target/io-cmd-file.c
 delete mode 100644 drivers/nvme/target/io-cmd.c

diff --git a/drivers/nvme/target/Makefile b/drivers/nvme/target/Makefile
index 488250189c99..8118c93391c6 100644
--- a/drivers/nvme/target/Makefile
+++ b/drivers/nvme/target/Makefile
@@ -6,8 +6,8 @@ obj-$(CONFIG_NVME_TARGET_RDMA)		+= nvmet-rdma.o
 obj-$(CONFIG_NVME_TARGET_FC)		+= nvmet-fc.o
 obj-$(CONFIG_NVME_TARGET_FCLOOP)	+= nvme-fcloop.o
 
-nvmet-y		+= core.o configfs.o admin-cmd.o io-cmd.o fabrics-cmd.o \
-			discovery.o
+nvmet-y		+= core.o configfs.o admin-cmd.o fabrics-cmd.o \
+			discovery.o io-cmd-file.o io-cmd-bdev.o
 nvme-loop-y	+= loop.o
 nvmet-rdma-y	+= rdma.o
 nvmet-fc-y	+= fc.o
diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index d1afcfd89aa3..b2ba95b2eef7 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -45,6 +45,10 @@ static u16 nvmet_get_smart_log_nsid(struct nvmet_req *req,
 		return NVME_SC_INVALID_NS;
 	}
 
+	/* we don't have the right data for file backed ns */
+	if (!ns->bdev)
+		goto out;
+
 	host_reads = part_stat_read(ns->bdev->bd_part, ios[READ]);
 	data_units_read = part_stat_read(ns->bdev->bd_part, sectors[READ]);
 	host_writes = part_stat_read(ns->bdev->bd_part, ios[WRITE]);
@@ -54,6 +58,7 @@ static u16 nvmet_get_smart_log_nsid(struct nvmet_req *req,
 	put_unaligned_le64(data_units_read, &slog->data_units_read[0]);
 	put_unaligned_le64(host_writes, &slog->host_writes[0]);
 	put_unaligned_le64(data_units_written, &slog->data_units_written[0]);
+out:
 	nvmet_put_namespace(ns);
 
 	return NVME_SC_SUCCESS;
@@ -71,6 +76,9 @@ static u16 nvmet_get_smart_log_all(struct nvmet_req *req,
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(ns, &ctrl->subsys->namespaces, dev_link) {
+		/* we don't have the right data for file backed ns */
+		if (!ns->bdev)
+			continue;
 		host_reads += part_stat_read(ns->bdev->bd_part, ios[READ]);
 		data_units_read +=
 			part_stat_read(ns->bdev->bd_part, sectors[READ]);
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index 6d8eaf3f89c5..800aaf96ddcd 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -271,6 +271,12 @@ void nvmet_put_namespace(struct nvmet_ns *ns)
 	percpu_ref_put(&ns->ref);
 }
 
+static void nvmet_ns_dev_disable(struct nvmet_ns *ns)
+{
+	nvmet_bdev_ns_disable(ns);
+	nvmet_file_ns_disable(ns);
+}
+
 int nvmet_ns_enable(struct nvmet_ns *ns)
 {
 	struct nvmet_subsys *subsys = ns->subsys;
@@ -281,23 +287,16 @@ int nvmet_ns_enable(struct nvmet_ns *ns)
 	if (ns->enabled)
 		goto out_unlock;
 
-	ns->bdev = blkdev_get_by_path(ns->device_path, FMODE_READ | FMODE_WRITE,
-			NULL);
-	if (IS_ERR(ns->bdev)) {
-		pr_err("failed to open block device %s: (%ld)\n",
-		       ns->device_path, PTR_ERR(ns->bdev));
-		ret = PTR_ERR(ns->bdev);
-		ns->bdev = NULL;
+	ret = nvmet_bdev_ns_enable(ns);
+	if (ret)
+		ret = nvmet_file_ns_enable(ns);
+	if (ret)
 		goto out_unlock;
-	}
-
-	ns->size = i_size_read(ns->bdev->bd_inode);
-	ns->blksize_shift = blksize_bits(bdev_logical_block_size(ns->bdev));
 
 	ret = percpu_ref_init(&ns->ref, nvmet_destroy_namespace,
 				0, GFP_KERNEL);
 	if (ret)
-		goto out_blkdev_put;
+		goto out_dev_put;
 
 	if (ns->nsid > subsys->max_nsid)
 		subsys->max_nsid = ns->nsid;
@@ -328,9 +327,8 @@ int nvmet_ns_enable(struct nvmet_ns *ns)
 out_unlock:
 	mutex_unlock(&subsys->lock);
 	return ret;
-out_blkdev_put:
-	blkdev_put(ns->bdev, FMODE_WRITE|FMODE_READ);
-	ns->bdev = NULL;
+out_dev_put:
+	nvmet_ns_dev_disable(ns);
 	goto out_unlock;
 }
 
@@ -366,8 +364,7 @@ void nvmet_ns_disable(struct nvmet_ns *ns)
 	list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
 		nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE, 0, 0);
 
-	if (ns->bdev)
-		blkdev_put(ns->bdev, FMODE_WRITE|FMODE_READ);
+	nvmet_ns_dev_disable(ns);
 out_unlock:
 	mutex_unlock(&subsys->lock);
 }
@@ -499,6 +496,25 @@ int nvmet_sq_init(struct nvmet_sq *sq)
 }
 EXPORT_SYMBOL_GPL(nvmet_sq_init);
 
+static u16 nvmet_parse_io_cmd(struct nvmet_req *req)
+{
+	struct nvme_command *cmd = req->cmd;
+	u16 ret;
+
+	ret = nvmet_check_ctrl_status(req, cmd);
+	if (unlikely(ret))
+		return ret;
+
+	req->ns = nvmet_find_namespace(req->sq->ctrl, cmd->rw.nsid);
+	if (unlikely(!req->ns))
+		return NVME_SC_INVALID_NS | NVME_SC_DNR;
+
+	if (req->ns->file)
+		return nvmet_file_parse_io_cmd(req);
+	else
+		return nvmet_bdev_parse_io_cmd(req);
+}
+
 bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
 		struct nvmet_sq *sq, const struct nvmet_fabrics_ops *ops)
 {
diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c
new file mode 100644
index 000000000000..e0b0f7df70c2
--- /dev/null
+++ b/drivers/nvme/target/io-cmd-bdev.c
@@ -0,0 +1,241 @@
+/*
+ * NVMe I/O command implementation.
+ * Copyright (c) 2015-2016 HGST, a Western Digital Company.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/blkdev.h>
+#include <linux/module.h>
+#include "nvmet.h"
+
+int nvmet_bdev_ns_enable(struct nvmet_ns *ns)
+{
+	int ret;
+
+	ns->bdev = blkdev_get_by_path(ns->device_path,
+			FMODE_READ | FMODE_WRITE, NULL);
+	if (IS_ERR(ns->bdev)) {
+		ret = PTR_ERR(ns->bdev);
+		if (ret != -ENOTBLK) {
+			pr_err("failed to open block device %s: (%ld)\n",
+					ns->device_path, PTR_ERR(ns->bdev));
+		}
+		ns->bdev = NULL;
+		return ret;
+	}
+	ns->size = i_size_read(ns->bdev->bd_inode);
+	ns->blksize_shift = blksize_bits(bdev_logical_block_size(ns->bdev));
+	return 0;
+}
+
+void nvmet_bdev_ns_disable(struct nvmet_ns *ns)
+{
+	if (ns->bdev) {
+		blkdev_put(ns->bdev, FMODE_WRITE | FMODE_READ);
+		ns->bdev = NULL;
+	}
+}
+
+static void nvmet_bio_done(struct bio *bio)
+{
+	struct nvmet_req *req = bio->bi_private;
+
+	nvmet_req_complete(req,
+		bio->bi_status ? NVME_SC_INTERNAL | NVME_SC_DNR : 0);
+
+	if (bio != &req->b.inline_bio)
+		bio_put(bio);
+}
+
+static void nvmet_bdev_execute_rw(struct nvmet_req *req)
+{
+	int sg_cnt = req->sg_cnt;
+	struct bio *bio = &req->b.inline_bio;
+	struct scatterlist *sg;
+	sector_t sector;
+	blk_qc_t cookie;
+	int op, op_flags = 0, i;
+
+	if (!req->sg_cnt) {
+		nvmet_req_complete(req, 0);
+		return;
+	}
+
+	if (req->cmd->rw.opcode == nvme_cmd_write) {
+		op = REQ_OP_WRITE;
+		op_flags = REQ_SYNC | REQ_IDLE;
+		if (req->cmd->rw.control & cpu_to_le16(NVME_RW_FUA))
+			op_flags |= REQ_FUA;
+	} else {
+		op = REQ_OP_READ;
+	}
+
+	sector = le64_to_cpu(req->cmd->rw.slba);
+	sector <<= (req->ns->blksize_shift - 9);
+
+	bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec));
+	bio_set_dev(bio, req->ns->bdev);
+	bio->bi_iter.bi_sector = sector;
+	bio->bi_private = req;
+	bio->bi_end_io = nvmet_bio_done;
+	bio_set_op_attrs(bio, op, op_flags);
+
+	for_each_sg(req->sg, sg, req->sg_cnt, i) {
+		while (bio_add_page(bio, sg_page(sg), sg->length, sg->offset)
+				!= sg->length) {
+			struct bio *prev = bio;
+
+			bio = bio_alloc(GFP_KERNEL, min(sg_cnt, BIO_MAX_PAGES));
+			bio_set_dev(bio, req->ns->bdev);
+			bio->bi_iter.bi_sector = sector;
+			bio_set_op_attrs(bio, op, op_flags);
+
+			bio_chain(bio, prev);
+			submit_bio(prev);
+		}
+
+		sector += sg->length >> 9;
+		sg_cnt--;
+	}
+
+	cookie = submit_bio(bio);
+
+	blk_poll(bdev_get_queue(req->ns->bdev), cookie);
+}
+
+static void nvmet_bdev_execute_flush(struct nvmet_req *req)
+{
+	struct bio *bio = &req->b.inline_bio;
+
+	bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec));
+	bio_set_dev(bio, req->ns->bdev);
+	bio->bi_private = req;
+	bio->bi_end_io = nvmet_bio_done;
+	bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
+
+	submit_bio(bio);
+}
+
+static u16 nvmet_bdev_discard_range(struct nvmet_ns *ns,
+		struct nvme_dsm_range *range, struct bio **bio)
+{
+	int ret;
+
+	ret = __blkdev_issue_discard(ns->bdev,
+			le64_to_cpu(range->slba) << (ns->blksize_shift - 9),
+			le32_to_cpu(range->nlb) << (ns->blksize_shift - 9),
+			GFP_KERNEL, 0, bio);
+	if (ret && ret != -EOPNOTSUPP)
+		return NVME_SC_INTERNAL | NVME_SC_DNR;
+	return 0;
+}
+
+static void nvmet_bdev_execute_discard(struct nvmet_req *req)
+{
+	struct nvme_dsm_range range;
+	struct bio *bio = NULL;
+	int i;
+	u16 status;
+
+	for (i = 0; i <= le32_to_cpu(req->cmd->dsm.nr); i++) {
+		status = nvmet_copy_from_sgl(req, i * sizeof(range), &range,
+				sizeof(range));
+		if (status)
+			break;
+
+		status = nvmet_bdev_discard_range(req->ns, &range, &bio);
+		if (status)
+			break;
+	}
+
+	if (bio) {
+		bio->bi_private = req;
+		bio->bi_end_io = nvmet_bio_done;
+		if (status) {
+			bio->bi_status = BLK_STS_IOERR;
+			bio_endio(bio);
+		} else {
+			submit_bio(bio);
+		}
+	} else {
+		nvmet_req_complete(req, status);
+	}
+}
+
+static void nvmet_bdev_execute_dsm(struct nvmet_req *req)
+{
+	switch (le32_to_cpu(req->cmd->dsm.attributes)) {
+	case NVME_DSMGMT_AD:
+		nvmet_bdev_execute_discard(req);
+		return;
+	case NVME_DSMGMT_IDR:
+	case NVME_DSMGMT_IDW:
+	default:
+		/* Not supported yet */
+		nvmet_req_complete(req, 0);
+		return;
+	}
+}
+
+static void nvmet_bdev_execute_write_zeroes(struct nvmet_req *req)
+{
+	struct nvme_write_zeroes_cmd *write_zeroes = &req->cmd->write_zeroes;
+	struct bio *bio = NULL;
+	u16 status = NVME_SC_SUCCESS;
+	sector_t sector;
+	sector_t nr_sector;
+
+	sector = le64_to_cpu(write_zeroes->slba) <<
+		(req->ns->blksize_shift - 9);
+	nr_sector = (((sector_t)le16_to_cpu(write_zeroes->length) + 1) <<
+		(req->ns->blksize_shift - 9));
+
+	if (__blkdev_issue_zeroout(req->ns->bdev, sector, nr_sector,
+				GFP_KERNEL, &bio, 0))
+		status = NVME_SC_INTERNAL | NVME_SC_DNR;
+
+	if (bio) {
+		bio->bi_private = req;
+		bio->bi_end_io = nvmet_bio_done;
+		submit_bio(bio);
+	} else {
+		nvmet_req_complete(req, status);
+	}
+}
+
+u16 nvmet_bdev_parse_io_cmd(struct nvmet_req *req)
+{
+	struct nvme_command *cmd = req->cmd;
+
+	switch (cmd->common.opcode) {
+	case nvme_cmd_read:
+	case nvme_cmd_write:
+		req->execute = nvmet_bdev_execute_rw;
+		req->data_len = nvmet_rw_len(req);
+		return 0;
+	case nvme_cmd_flush:
+		req->execute = nvmet_bdev_execute_flush;
+		req->data_len = 0;
+		return 0;
+	case nvme_cmd_dsm:
+		req->execute = nvmet_bdev_execute_dsm;
+		req->data_len = (le32_to_cpu(cmd->dsm.nr) + 1) *
+			sizeof(struct nvme_dsm_range);
+		return 0;
+	case nvme_cmd_write_zeroes:
+		req->execute = nvmet_bdev_execute_write_zeroes;
+		return 0;
+	default:
+		pr_err("unhandled cmd %d on qid %d\n", cmd->common.opcode,
+		       req->sq->qid);
+		return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
+	}
+}
diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c
new file mode 100644
index 000000000000..ca1ccf867856
--- /dev/null
+++ b/drivers/nvme/target/io-cmd-file.c
@@ -0,0 +1,300 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * NVMe Over Fabrics Target File I/O commands implementation.
+ * Copyright (c) 2017-2018 Western Digital Corporation or its
+ * affiliates.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/uio.h>
+#include <linux/falloc.h>
+#include <linux/file.h>
+#include "nvmet.h"
+
+#define NVMET_MAX_MPOOL_BVEC		16
+#define NVMET_MIN_MPOOL_OBJ		16
+
+void nvmet_file_ns_disable(struct nvmet_ns *ns)
+{
+	if (ns->file) {
+		mempool_destroy(ns->bvec_pool);
+		ns->bvec_pool = NULL;
+		kmem_cache_destroy(ns->bvec_cache);
+		ns->bvec_cache = NULL;
+		fput(ns->file);
+		ns->file = NULL;
+	}
+}
+
+int nvmet_file_ns_enable(struct nvmet_ns *ns)
+{
+	int ret;
+	struct kstat stat;
+
+	ns->file = filp_open(ns->device_path,
+			O_RDWR | O_LARGEFILE | O_DIRECT, 0);
+	if (IS_ERR(ns->file)) {
+		pr_err("failed to open file %s: (%ld)\n",
+				ns->device_path, PTR_ERR(ns->bdev));
+		return PTR_ERR(ns->file);
+	}
+
+	ret = vfs_getattr(&ns->file->f_path,
+			&stat, STATX_SIZE, AT_STATX_FORCE_SYNC);
+	if (ret)
+		goto err;
+
+	ns->size = stat.size;
+	ns->blksize_shift = file_inode(ns->file)->i_blkbits;
+
+	ns->bvec_cache = kmem_cache_create("nvmet-bvec",
+			NVMET_MAX_MPOOL_BVEC * sizeof(struct bio_vec),
+			0, SLAB_HWCACHE_ALIGN, NULL);
+	if (!ns->bvec_cache)
+		goto err;
+
+	ns->bvec_pool = mempool_create(NVMET_MIN_MPOOL_OBJ, mempool_alloc_slab,
+			mempool_free_slab, ns->bvec_cache);
+
+	if (!ns->bvec_pool)
+		goto err;
+
+	return ret;
+err:
+	ns->size = 0;
+	ns->blksize_shift = 0;
+	nvmet_file_ns_disable(ns);
+	return ret;
+}
+
+static void nvmet_file_init_bvec(struct bio_vec *bv, struct sg_page_iter *iter)
+{
+	bv->bv_page = sg_page_iter_page(iter);
+	bv->bv_offset = iter->sg->offset;
+	bv->bv_len = PAGE_SIZE - iter->sg->offset;
+}
+
+static ssize_t nvmet_file_submit_bvec(struct nvmet_req *req, loff_t pos,
+		unsigned long nr_segs, size_t count)
+{
+	struct kiocb *iocb = &req->f.iocb;
+	ssize_t (*call_iter)(struct kiocb *iocb, struct iov_iter *iter);
+	struct iov_iter iter;
+	int ki_flags = 0, rw;
+	ssize_t ret;
+
+	if (req->cmd->rw.opcode == nvme_cmd_write) {
+		if (req->cmd->rw.control & cpu_to_le16(NVME_RW_FUA))
+			ki_flags = IOCB_DSYNC;
+		call_iter = req->ns->file->f_op->write_iter;
+		rw = WRITE;
+	} else {
+		call_iter = req->ns->file->f_op->read_iter;
+		rw = READ;
+	}
+
+	iov_iter_bvec(&iter, ITER_BVEC | rw, req->f.bvec, nr_segs, count);
+
+	iocb->ki_pos = pos;
+	iocb->ki_filp = req->ns->file;
+	iocb->ki_flags = IOCB_DIRECT | ki_flags;
+
+	ret = call_iter(iocb, &iter);
+
+	if (ret != -EIOCBQUEUED && iocb->ki_complete)
+		iocb->ki_complete(iocb, ret, 0);
+
+	return ret;
+}
+
+static void nvmet_file_io_done(struct kiocb *iocb, long ret, long ret2)
+{
+	struct nvmet_req *req = container_of(iocb, struct nvmet_req, f.iocb);
+
+	if (req->f.bvec != req->inline_bvec) {
+		if (likely(req->f.mpool_alloc == false))
+			kfree(req->f.bvec);
+		else
+			mempool_free(req->f.bvec, req->ns->bvec_pool);
+	}
+
+	nvmet_req_complete(req, ret != req->data_len ?
+			NVME_SC_INTERNAL | NVME_SC_DNR : 0);
+}
+
+static void nvmet_file_execute_rw(struct nvmet_req *req)
+{
+	ssize_t nr_bvec = DIV_ROUND_UP(req->data_len, PAGE_SIZE);
+	struct sg_page_iter sg_pg_iter;
+	unsigned long bv_cnt = 0;
+	bool is_sync = false;
+	size_t len = 0, total_len = 0;
+	ssize_t ret = 0;
+	loff_t pos;
+
+	if (!req->sg_cnt || !nr_bvec) {
+		nvmet_req_complete(req, 0);
+		return;
+	}
+
+	if (nr_bvec > NVMET_MAX_INLINE_BIOVEC)
+		req->f.bvec = kmalloc_array(nr_bvec, sizeof(struct bio_vec),
+				GFP_KERNEL);
+	else
+		req->f.bvec = req->inline_bvec;
+
+	req->f.mpool_alloc = false;
+	if (unlikely(!req->f.bvec)) {
+		/* fallback under memory pressure */
+		req->f.bvec = mempool_alloc(req->ns->bvec_pool, GFP_KERNEL);
+		req->f.mpool_alloc = true;
+		if (nr_bvec > NVMET_MAX_MPOOL_BVEC)
+			is_sync = true;
+	}
+
+	pos = le64_to_cpu(req->cmd->rw.slba) << req->ns->blksize_shift;
+
+	memset(&req->f.iocb, 0, sizeof(struct kiocb));
+	for_each_sg_page(req->sg, &sg_pg_iter, req->sg_cnt, 0) {
+		nvmet_file_init_bvec(&req->f.bvec[bv_cnt], &sg_pg_iter);
+		len += req->f.bvec[bv_cnt].bv_len;
+		total_len += req->f.bvec[bv_cnt].bv_len;
+		bv_cnt++;
+
+		WARN_ON_ONCE((nr_bvec - 1) < 0);
+
+		if (unlikely(is_sync) &&
+		    (nr_bvec - 1 == 0 || bv_cnt == NVMET_MAX_MPOOL_BVEC)) {
+			ret = nvmet_file_submit_bvec(req, pos, bv_cnt, len);
+			if (ret < 0)
+				goto out;
+			pos += len;
+			bv_cnt = 0;
+			len = 0;
+		}
+		nr_bvec--;
+	}
+
+	if (WARN_ON_ONCE(total_len != req->data_len))
+		ret = -EIO;
+out:
+	if (unlikely(is_sync || ret)) {
+		nvmet_file_io_done(&req->f.iocb, ret < 0 ? ret : total_len, 0);
+		return;
+	}
+	req->f.iocb.ki_complete = nvmet_file_io_done;
+	nvmet_file_submit_bvec(req, pos, bv_cnt, total_len);
+}
+
+static void nvmet_file_flush_work(struct work_struct *w)
+{
+	struct nvmet_req *req = container_of(w, struct nvmet_req, f.work);
+	int ret;
+
+	ret = vfs_fsync(req->ns->file, 1);
+
+	nvmet_req_complete(req, ret < 0 ? NVME_SC_INTERNAL | NVME_SC_DNR : 0);
+}
+
+static void nvmet_file_execute_flush(struct nvmet_req *req)
+{
+	INIT_WORK(&req->f.work, nvmet_file_flush_work);
+	schedule_work(&req->f.work);
+}
+
+static void nvmet_file_execute_discard(struct nvmet_req *req)
+{
+	int mode = FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE;
+	struct nvme_dsm_range range;
+	loff_t offset;
+	loff_t len;
+	int i, ret;
+
+	for (i = 0; i <= le32_to_cpu(req->cmd->dsm.nr); i++) {
+		if (nvmet_copy_from_sgl(req, i * sizeof(range), &range,
+					sizeof(range)))
+			break;
+		offset = le64_to_cpu(range.slba) << req->ns->blksize_shift;
+		len = le32_to_cpu(range.nlb) << req->ns->blksize_shift;
+		ret = vfs_fallocate(req->ns->file, mode, offset, len);
+		if (ret)
+			break;
+	}
+
+	nvmet_req_complete(req, ret < 0 ? NVME_SC_INTERNAL | NVME_SC_DNR : 0);
+}
+
+static void nvmet_file_dsm_work(struct work_struct *w)
+{
+	struct nvmet_req *req = container_of(w, struct nvmet_req, f.work);
+
+	switch (le32_to_cpu(req->cmd->dsm.attributes)) {
+	case NVME_DSMGMT_AD:
+		nvmet_file_execute_discard(req);
+		return;
+	case NVME_DSMGMT_IDR:
+	case NVME_DSMGMT_IDW:
+	default:
+		/* Not supported yet */
+		nvmet_req_complete(req, 0);
+		return;
+	}
+}
+
+static void nvmet_file_execute_dsm(struct nvmet_req *req)
+{
+	INIT_WORK(&req->f.work, nvmet_file_dsm_work);
+	schedule_work(&req->f.work);
+}
+
+static void nvmet_file_write_zeroes_work(struct work_struct *w)
+{
+	struct nvmet_req *req = container_of(w, struct nvmet_req, f.work);
+	struct nvme_write_zeroes_cmd *write_zeroes = &req->cmd->write_zeroes;
+	int mode = FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE;
+	loff_t offset;
+	loff_t len;
+	int ret;
+
+	offset = le64_to_cpu(write_zeroes->slba) << req->ns->blksize_shift;
+	len = (((sector_t)le16_to_cpu(write_zeroes->length) + 1) <<
+			req->ns->blksize_shift);
+
+	ret = vfs_fallocate(req->ns->file, mode, offset, len);
+	nvmet_req_complete(req, ret < 0 ? NVME_SC_INTERNAL | NVME_SC_DNR : 0);
+}
+
+static void nvmet_file_execute_write_zeroes(struct nvmet_req *req)
+{
+	INIT_WORK(&req->f.work, nvmet_file_write_zeroes_work);
+	schedule_work(&req->f.work);
+}
+
+u16 nvmet_file_parse_io_cmd(struct nvmet_req *req)
+{
+	struct nvme_command *cmd = req->cmd;
+
+	switch (cmd->common.opcode) {
+	case nvme_cmd_read:
+	case nvme_cmd_write:
+		req->execute = nvmet_file_execute_rw;
+		req->data_len = nvmet_rw_len(req);
+		return 0;
+	case nvme_cmd_flush:
+		req->execute = nvmet_file_execute_flush;
+		req->data_len = 0;
+		return 0;
+	case nvme_cmd_dsm:
+		req->execute = nvmet_file_execute_dsm;
+		req->data_len = (le32_to_cpu(cmd->dsm.nr) + 1) *
+			sizeof(struct nvme_dsm_range);
+		return 0;
+	case nvme_cmd_write_zeroes:
+		req->execute = nvmet_file_execute_write_zeroes;
+		req->data_len = 0;
+		return 0;
+	default:
+		pr_err("unhandled cmd for file ns %d on qid %d\n",
+				cmd->common.opcode, req->sq->qid);
+		return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
+	}
+}
diff --git a/drivers/nvme/target/io-cmd.c b/drivers/nvme/target/io-cmd.c
deleted file mode 100644
index e5eb2db4d20f..000000000000
--- a/drivers/nvme/target/io-cmd.c
+++ /dev/null
@@ -1,228 +0,0 @@
-/*
- * NVMe I/O command implementation.
- * Copyright (c) 2015-2016 HGST, a Western Digital Company.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- */
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-#include <linux/blkdev.h>
-#include <linux/module.h>
-#include "nvmet.h"
-
-static void nvmet_bio_done(struct bio *bio)
-{
-	struct nvmet_req *req = bio->bi_private;
-
-	nvmet_req_complete(req,
-		bio->bi_status ? NVME_SC_INTERNAL | NVME_SC_DNR : 0);
-
-	if (bio != &req->inline_bio)
-		bio_put(bio);
-}
-
-static inline u32 nvmet_rw_len(struct nvmet_req *req)
-{
-	return ((u32)le16_to_cpu(req->cmd->rw.length) + 1) <<
-			req->ns->blksize_shift;
-}
-
-static void nvmet_execute_rw(struct nvmet_req *req)
-{
-	int sg_cnt = req->sg_cnt;
-	struct bio *bio = &req->inline_bio;
-	struct scatterlist *sg;
-	sector_t sector;
-	blk_qc_t cookie;
-	int op, op_flags = 0, i;
-
-	if (!req->sg_cnt) {
-		nvmet_req_complete(req, 0);
-		return;
-	}
-
-	if (req->cmd->rw.opcode == nvme_cmd_write) {
-		op = REQ_OP_WRITE;
-		op_flags = REQ_SYNC | REQ_IDLE;
-		if (req->cmd->rw.control & cpu_to_le16(NVME_RW_FUA))
-			op_flags |= REQ_FUA;
-	} else {
-		op = REQ_OP_READ;
-	}
-
-	sector = le64_to_cpu(req->cmd->rw.slba);
-	sector <<= (req->ns->blksize_shift - 9);
-
-	bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec));
-	bio_set_dev(bio, req->ns->bdev);
-	bio->bi_iter.bi_sector = sector;
-	bio->bi_private = req;
-	bio->bi_end_io = nvmet_bio_done;
-	bio_set_op_attrs(bio, op, op_flags);
-
-	for_each_sg(req->sg, sg, req->sg_cnt, i) {
-		while (bio_add_page(bio, sg_page(sg), sg->length, sg->offset)
-				!= sg->length) {
-			struct bio *prev = bio;
-
-			bio = bio_alloc(GFP_KERNEL, min(sg_cnt, BIO_MAX_PAGES));
-			bio_set_dev(bio, req->ns->bdev);
-			bio->bi_iter.bi_sector = sector;
-			bio_set_op_attrs(bio, op, op_flags);
-
-			bio_chain(bio, prev);
-			submit_bio(prev);
-		}
-
-		sector += sg->length >> 9;
-		sg_cnt--;
-	}
-
-	cookie = submit_bio(bio);
-
-	blk_poll(bdev_get_queue(req->ns->bdev), cookie);
-}
-
-static void nvmet_execute_flush(struct nvmet_req *req)
-{
-	struct bio *bio = &req->inline_bio;
-
-	bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec));
-	bio_set_dev(bio, req->ns->bdev);
-	bio->bi_private = req;
-	bio->bi_end_io = nvmet_bio_done;
-	bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
-
-	submit_bio(bio);
-}
-
-static u16 nvmet_discard_range(struct nvmet_ns *ns,
-		struct nvme_dsm_range *range, struct bio **bio)
-{
-	int ret;
-
-	ret = __blkdev_issue_discard(ns->bdev,
-			le64_to_cpu(range->slba) << (ns->blksize_shift - 9),
-			le32_to_cpu(range->nlb) << (ns->blksize_shift - 9),
-			GFP_KERNEL, 0, bio);
-	if (ret && ret != -EOPNOTSUPP)
-		return NVME_SC_INTERNAL | NVME_SC_DNR;
-	return 0;
-}
-
-static void nvmet_execute_discard(struct nvmet_req *req)
-{
-	struct nvme_dsm_range range;
-	struct bio *bio = NULL;
-	int i;
-	u16 status;
-
-	for (i = 0; i <= le32_to_cpu(req->cmd->dsm.nr); i++) {
-		status = nvmet_copy_from_sgl(req, i * sizeof(range), &range,
-				sizeof(range));
-		if (status)
-			break;
-
-		status = nvmet_discard_range(req->ns, &range, &bio);
-		if (status)
-			break;
-	}
-
-	if (bio) {
-		bio->bi_private = req;
-		bio->bi_end_io = nvmet_bio_done;
-		if (status) {
-			bio->bi_status = BLK_STS_IOERR;
-			bio_endio(bio);
-		} else {
-			submit_bio(bio);
-		}
-	} else {
-		nvmet_req_complete(req, status);
-	}
-}
-
-static void nvmet_execute_dsm(struct nvmet_req *req)
-{
-	switch (le32_to_cpu(req->cmd->dsm.attributes)) {
-	case NVME_DSMGMT_AD:
-		nvmet_execute_discard(req);
-		return;
-	case NVME_DSMGMT_IDR:
-	case NVME_DSMGMT_IDW:
-	default:
-		/* Not supported yet */
-		nvmet_req_complete(req, 0);
-		return;
-	}
-}
-
-static void nvmet_execute_write_zeroes(struct nvmet_req *req)
-{
-	struct nvme_write_zeroes_cmd *write_zeroes = &req->cmd->write_zeroes;
-	struct bio *bio = NULL;
-	u16 status = NVME_SC_SUCCESS;
-	sector_t sector;
-	sector_t nr_sector;
-
-	sector = le64_to_cpu(write_zeroes->slba) <<
-		(req->ns->blksize_shift - 9);
-	nr_sector = (((sector_t)le16_to_cpu(write_zeroes->length) + 1) <<
-		(req->ns->blksize_shift - 9));
-
-	if (__blkdev_issue_zeroout(req->ns->bdev, sector, nr_sector,
-				GFP_KERNEL, &bio, 0))
-		status = NVME_SC_INTERNAL | NVME_SC_DNR;
-
-	if (bio) {
-		bio->bi_private = req;
-		bio->bi_end_io = nvmet_bio_done;
-		submit_bio(bio);
-	} else {
-		nvmet_req_complete(req, status);
-	}
-}
-
-u16 nvmet_parse_io_cmd(struct nvmet_req *req)
-{
-	struct nvme_command *cmd = req->cmd;
-	u16 ret;
-
-	ret = nvmet_check_ctrl_status(req, cmd);
-	if (unlikely(ret))
-		return ret;
-
-	req->ns = nvmet_find_namespace(req->sq->ctrl, cmd->rw.nsid);
-	if (unlikely(!req->ns))
-		return NVME_SC_INVALID_NS | NVME_SC_DNR;
-
-	switch (cmd->common.opcode) {
-	case nvme_cmd_read:
-	case nvme_cmd_write:
-		req->execute = nvmet_execute_rw;
-		req->data_len = nvmet_rw_len(req);
-		return 0;
-	case nvme_cmd_flush:
-		req->execute = nvmet_execute_flush;
-		req->data_len = 0;
-		return 0;
-	case nvme_cmd_dsm:
-		req->execute = nvmet_execute_dsm;
-		req->data_len = (le32_to_cpu(cmd->dsm.nr) + 1) *
-			sizeof(struct nvme_dsm_range);
-		return 0;
-	case nvme_cmd_write_zeroes:
-		req->execute = nvmet_execute_write_zeroes;
-		return 0;
-	default:
-		pr_err("unhandled cmd %d on qid %d\n", cmd->common.opcode,
-		       req->sq->qid);
-		return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
-	}
-}
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index 15fd84ab21f8..2d09afcfe505 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -43,6 +43,7 @@ struct nvmet_ns {
 	struct list_head	dev_link;
 	struct percpu_ref	ref;
 	struct block_device	*bdev;
+	struct file		*file;
 	u32			nsid;
 	u32			blksize_shift;
 	loff_t			size;
@@ -57,6 +58,8 @@ struct nvmet_ns {
 	struct config_group	group;
 
 	struct completion	disable_done;
+	mempool_t		*bvec_pool;
+	struct kmem_cache	*bvec_cache;
 };
 
 static inline struct nvmet_ns *to_nvmet_ns(struct config_item *item)
@@ -222,8 +225,18 @@ struct nvmet_req {
 	struct nvmet_cq		*cq;
 	struct nvmet_ns		*ns;
 	struct scatterlist	*sg;
-	struct bio		inline_bio;
 	struct bio_vec		inline_bvec[NVMET_MAX_INLINE_BIOVEC];
+	union {
+		struct {
+			struct bio      inline_bio;
+		} b;
+		struct {
+			bool			mpool_alloc;
+			struct kiocb            iocb;
+			struct bio_vec          *bvec;
+			struct work_struct      work;
+		} f;
+	};
 	int			sg_cnt;
 	/* data length as parsed from the command: */
 	size_t			data_len;
@@ -263,7 +276,8 @@ struct nvmet_async_event {
 };
 
 u16 nvmet_parse_connect_cmd(struct nvmet_req *req);
-u16 nvmet_parse_io_cmd(struct nvmet_req *req);
+u16 nvmet_bdev_parse_io_cmd(struct nvmet_req *req);
+u16 nvmet_file_parse_io_cmd(struct nvmet_req *req);
 u16 nvmet_parse_admin_cmd(struct nvmet_req *req);
 u16 nvmet_parse_discovery_cmd(struct nvmet_req *req);
 u16 nvmet_parse_fabrics_cmd(struct nvmet_req *req);
@@ -338,4 +352,14 @@ extern struct rw_semaphore nvmet_config_sem;
 bool nvmet_host_allowed(struct nvmet_req *req, struct nvmet_subsys *subsys,
 		const char *hostnqn);
 
+int nvmet_bdev_ns_enable(struct nvmet_ns *ns);
+int nvmet_file_ns_enable(struct nvmet_ns *ns);
+void nvmet_bdev_ns_disable(struct nvmet_ns *ns);
+void nvmet_file_ns_disable(struct nvmet_ns *ns);
+
+static inline u32 nvmet_rw_len(struct nvmet_req *req)
+{
+	return ((u32)le16_to_cpu(req->cmd->rw.length) + 1) <<
+			req->ns->blksize_shift;
+}
 #endif /* _NVMET_H */
-- 
2.16.4