Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 7 Feb 2017 01:57:29 +0000 (UTC)
From:      Alexander Motin <mav@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-11@freebsd.org
Subject:   svn commit: r313370 - in stable/11: sys/cam/ctl usr.sbin/ctladm
Message-ID:  <201702070157.v171vTtb099791@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: mav
Date: Tue Feb  7 01:57:29 2017
New Revision: 313370
URL: https://svnweb.freebsd.org/changeset/base/313370

Log:
  MFC r312694: Make CTL ramdisk backend a real RAM disk.
  
  If "capacity" LU option is set, ramdisk backend now implements featured
  thin provisioned disk, storing data in malloc(9) allocated memory blocks
  of pblocksize bytes (default PAGE_SIZE or 4KB).  Additionally ~0.2% of LU
  size is used for indirection tree (bigger pblocksize reduce the overhead).
  Backend supports all unmap and anchor operations.  If configured capacity
  is overflowed, proper error conditions are reported.
  
  If "capacity" LU option is not set, the backend operates mostly the same
  as before without allocating real storage: writes go to nowhere, reads
  return zeroes, reporting that all LBAs are unmapped.
  
  This backend is still mostly oriented on testing and benchmarking (it is
  still a volatile RAM disk), but now it should allow to run real FS tests,
  not only simple dumb dd.

Modified:
  stable/11/sys/cam/ctl/ctl_backend_ramdisk.c
  stable/11/usr.sbin/ctladm/ctladm.8
Directory Properties:
  stable/11/   (props changed)

Modified: stable/11/sys/cam/ctl/ctl_backend_ramdisk.c
==============================================================================
--- stable/11/sys/cam/ctl/ctl_backend_ramdisk.c	Tue Feb  7 01:56:26 2017	(r313369)
+++ stable/11/sys/cam/ctl/ctl_backend_ramdisk.c	Tue Feb  7 01:57:29 2017	(r313370)
@@ -1,7 +1,7 @@
 /*-
  * Copyright (c) 2003, 2008 Silicon Graphics International Corp.
  * Copyright (c) 2012 The FreeBSD Foundation
- * Copyright (c) 2014-2015 Alexander Motin <mav@FreeBSD.org>
+ * Copyright (c) 2014-2017 Alexander Motin <mav@FreeBSD.org>
  * All rights reserved.
  *
  * Portions of this software were developed by Edward Tomasz Napierala
@@ -35,7 +35,7 @@
  * $Id: //depot/users/kenm/FreeBSD-test2/sys/cam/ctl/ctl_backend_ramdisk.c#3 $
  */
 /*
- * CAM Target Layer backend for a "fake" ramdisk.
+ * CAM Target Layer black hole and RAM disk backend.
  *
  * Author: Ken Merry <ken@FreeBSD.org>
  */
@@ -48,9 +48,11 @@ __FBSDID("$FreeBSD$");
 #include <sys/kernel.h>
 #include <sys/condvar.h>
 #include <sys/types.h>
+#include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/malloc.h>
+#include <sys/sx.h>
 #include <sys/taskqueue.h>
 #include <sys/time.h>
 #include <sys/queue.h>
@@ -71,6 +73,29 @@ __FBSDID("$FreeBSD$");
 #include <cam/ctl/ctl_private.h>
 #include <cam/ctl/ctl_error.h>
 
+#define PRIV(io)	\
+    ((struct ctl_ptr_len_flags *)&(io)->io_hdr.ctl_private[CTL_PRIV_BACKEND])
+#define ARGS(io)	\
+    ((struct ctl_lba_len_flags *)&(io)->io_hdr.ctl_private[CTL_PRIV_LBA_LEN])
+
+#define	PPP	(PAGE_SIZE / sizeof(uint8_t **))
+#ifdef __LP64__
+#define	PPPS	(PAGE_SHIFT - 3)
+#else
+#define	PPPS	(PAGE_SHIFT - 2)
+#endif
+#define	SGPP	(PAGE_SIZE / sizeof(struct ctl_sg_entry))
+
+#define	P_UNMAPPED	NULL			/* Page is unmapped. */
+#define	P_ANCHORED	((void *)(uintptr_t)1)	/* Page is anchored. */
+
+typedef enum {
+	GP_READ,	/* Return data page or zero page. */
+	GP_WRITE,	/* Return data page, try allocate if none. */
+	GP_ANCHOR,	/* Return data page, try anchor if none. */
+	GP_OTHER,	/* Return what present, do not allocate/anchor. */
+} getpage_op_t;
+
 typedef enum {
 	CTL_BE_RAMDISK_LUN_UNCONFIGURED	= 0x01,
 	CTL_BE_RAMDISK_LUN_CONFIG_ERR	= 0x02,
@@ -79,28 +104,29 @@ typedef enum {
 
 struct ctl_be_ramdisk_lun {
 	struct ctl_lun_create_params params;
-	char lunname[32];
-	uint64_t size_bytes;
-	uint64_t size_blocks;
+	char			lunname[32];
+	int			indir;
+	uint8_t			**pages;
+	uint8_t			*zero_page;
+	struct sx		page_lock;
+	u_int			pblocksize;
+	u_int			pblockmul;
+	uint64_t		size_bytes;
+	uint64_t		size_blocks;
+	uint64_t		cap_bytes;
+	uint64_t		cap_used;
 	struct ctl_be_ramdisk_softc *softc;
 	ctl_be_ramdisk_lun_flags flags;
 	STAILQ_ENTRY(ctl_be_ramdisk_lun) links;
-	struct ctl_be_lun cbe_lun;
-	struct taskqueue *io_taskqueue;
-	struct task io_task;
+	struct ctl_be_lun	cbe_lun;
+	struct taskqueue	*io_taskqueue;
+	struct task		io_task;
 	STAILQ_HEAD(, ctl_io_hdr) cont_queue;
-	struct mtx_padalign queue_lock;
+	struct mtx_padalign	queue_lock;
 };
 
 struct ctl_be_ramdisk_softc {
 	struct mtx lock;
-	int rd_size;
-#ifdef CTL_RAMDISK_PAGES
-	uint8_t **ramdisk_pages;
-	int num_pages;
-#else
-	uint8_t *ramdisk_buffer;
-#endif
 	int num_luns;
 	STAILQ_HEAD(, ctl_be_ramdisk_lun) lun_list;
 };
@@ -111,8 +137,13 @@ extern struct ctl_softc *control_softc;
 static int ctl_backend_ramdisk_init(void);
 static int ctl_backend_ramdisk_shutdown(void);
 static int ctl_backend_ramdisk_move_done(union ctl_io *io);
+static void ctl_backend_ramdisk_compare(union ctl_io *io);
+static void ctl_backend_ramdisk_rw(union ctl_io *io);
 static int ctl_backend_ramdisk_submit(union ctl_io *io);
-static void ctl_backend_ramdisk_continue(union ctl_io *io);
+static void ctl_backend_ramdisk_worker(void *context, int pending);
+static int ctl_backend_ramdisk_config_read(union ctl_io *io);
+static int ctl_backend_ramdisk_config_write(union ctl_io *io);
+static uint64_t ctl_backend_ramdisk_lun_attr(void *be_lun, const char *attrname);
 static int ctl_backend_ramdisk_ioctl(struct cdev *dev, u_long cmd,
 				     caddr_t addr, int flag, struct thread *td);
 static int ctl_backend_ramdisk_rm(struct ctl_be_ramdisk_softc *softc,
@@ -121,12 +152,9 @@ static int ctl_backend_ramdisk_create(st
 				      struct ctl_lun_req *req);
 static int ctl_backend_ramdisk_modify(struct ctl_be_ramdisk_softc *softc,
 				  struct ctl_lun_req *req);
-static void ctl_backend_ramdisk_worker(void *context, int pending);
 static void ctl_backend_ramdisk_lun_shutdown(void *be_lun);
 static void ctl_backend_ramdisk_lun_config_status(void *be_lun,
 						  ctl_lun_config_status status);
-static int ctl_backend_ramdisk_config_write(union ctl_io *io);
-static int ctl_backend_ramdisk_config_read(union ctl_io *io);
 
 static struct ctl_backend_driver ctl_be_ramdisk_driver = 
 {
@@ -138,36 +166,21 @@ static struct ctl_backend_driver ctl_be_
 	.data_move_done = ctl_backend_ramdisk_move_done,
 	.config_read = ctl_backend_ramdisk_config_read,
 	.config_write = ctl_backend_ramdisk_config_write,
-	.ioctl = ctl_backend_ramdisk_ioctl
+	.ioctl = ctl_backend_ramdisk_ioctl,
+	.lun_attr = ctl_backend_ramdisk_lun_attr,
 };
 
 MALLOC_DEFINE(M_RAMDISK, "ramdisk", "Memory used for CTL RAMdisk");
 CTL_BACKEND_DECLARE(cbr, ctl_be_ramdisk_driver);
 
-int
+static int
 ctl_backend_ramdisk_init(void)
 {
 	struct ctl_be_ramdisk_softc *softc = &rd_softc;
-#ifdef CTL_RAMDISK_PAGES
-	int i;
-#endif
 
 	memset(softc, 0, sizeof(*softc));
 	mtx_init(&softc->lock, "ctlramdisk", NULL, MTX_DEF);
 	STAILQ_INIT(&softc->lun_list);
-	softc->rd_size = 1024 * 1024;
-#ifdef CTL_RAMDISK_PAGES
-	softc->num_pages = softc->rd_size / PAGE_SIZE;
-	softc->ramdisk_pages = (uint8_t **)malloc(sizeof(uint8_t *) *
-						  softc->num_pages, M_RAMDISK,
-						  M_WAITOK);
-	for (i = 0; i < softc->num_pages; i++)
-		softc->ramdisk_pages[i] = malloc(PAGE_SIZE, M_RAMDISK,M_WAITOK);
-#else
-	softc->ramdisk_buffer = (uint8_t *)malloc(softc->rd_size, M_RAMDISK,
-						  M_WAITOK);
-#endif
-
 	return (0);
 }
 
@@ -176,9 +189,6 @@ ctl_backend_ramdisk_shutdown(void)
 {
 	struct ctl_be_ramdisk_softc *softc = &rd_softc;
 	struct ctl_be_ramdisk_lun *lun, *next_lun;
-#ifdef CTL_RAMDISK_PAGES
-	int i;
-#endif
 
 	mtx_lock(&softc->lock);
 	STAILQ_FOREACH_SAFE(lun, &softc->lun_list, links, next_lun) {
@@ -193,30 +203,210 @@ ctl_backend_ramdisk_shutdown(void)
 		mtx_lock(&softc->lock);
 	}
 	mtx_unlock(&softc->lock);
-
-#ifdef CTL_RAMDISK_PAGES
-	for (i = 0; i < softc->num_pages; i++)
-		free(softc->ramdisk_pages[i], M_RAMDISK);
-	free(softc->ramdisk_pages, M_RAMDISK);
-#else
-	free(softc->ramdisk_buffer, M_RAMDISK);
-#endif
 	mtx_destroy(&softc->lock);
 	return (0);
 }
 
+static uint8_t *
+ctl_backend_ramdisk_getpage(struct ctl_be_ramdisk_lun *be_lun, off_t pn,
+    getpage_op_t op)
+{
+	uint8_t **p, ***pp;
+	off_t i;
+	int s;
+
+	if (be_lun->cap_bytes == 0) {
+		switch (op) {
+		case GP_READ:
+			return (be_lun->zero_page);
+		case GP_WRITE:
+			return ((uint8_t *)be_lun->pages);
+		case GP_ANCHOR:
+			return (P_ANCHORED);
+		default:
+			return (P_UNMAPPED);
+		}
+	}
+	if (op == GP_WRITE || op == GP_ANCHOR) {
+		sx_xlock(&be_lun->page_lock);
+		pp = &be_lun->pages;
+		for (s = (be_lun->indir - 1) * PPPS; s >= 0; s -= PPPS) {
+			if (*pp == NULL) {
+				*pp = malloc(PAGE_SIZE, M_RAMDISK,
+				    M_WAITOK|M_ZERO);
+			}
+			i = pn >> s;
+			pp = (uint8_t ***)&(*pp)[i];
+			pn -= i << s;
+		}
+		if (*pp == P_UNMAPPED && be_lun->cap_used < be_lun->cap_bytes) {
+			if (op == GP_WRITE) {
+				*pp = malloc(be_lun->pblocksize, M_RAMDISK,
+				    M_WAITOK|M_ZERO);
+			} else
+				*pp = P_ANCHORED;
+			be_lun->cap_used += be_lun->pblocksize;
+		} else if (*pp == P_ANCHORED && op == GP_WRITE) {
+			*pp = malloc(be_lun->pblocksize, M_RAMDISK,
+			    M_WAITOK|M_ZERO);
+		}
+		sx_xunlock(&be_lun->page_lock);
+		return ((uint8_t *)*pp);
+	} else {
+		sx_slock(&be_lun->page_lock);
+		p = be_lun->pages;
+		for (s = (be_lun->indir - 1) * PPPS; s >= 0; s -= PPPS) {
+			if (p == NULL)
+				break;
+			i = pn >> s;
+			p = (uint8_t **)p[i];
+			pn -= i << s;
+		}
+		sx_sunlock(&be_lun->page_lock);
+		if ((p == P_UNMAPPED || p == P_ANCHORED) && op == GP_READ)
+			return (be_lun->zero_page);
+		return ((uint8_t *)p);
+	}
+};
+
+static void
+ctl_backend_ramdisk_unmappage(struct ctl_be_ramdisk_lun *be_lun, off_t pn)
+{
+	uint8_t ***pp;
+	off_t i;
+	int s;
+
+	if (be_lun->cap_bytes == 0)
+		return;
+	sx_xlock(&be_lun->page_lock);
+	pp = &be_lun->pages;
+	for (s = (be_lun->indir - 1) * PPPS; s >= 0; s -= PPPS) {
+		if (*pp == NULL)
+			goto noindir;
+		i = pn >> s;
+		pp = (uint8_t ***)&(*pp)[i];
+		pn -= i << s;
+	}
+	if (*pp == P_ANCHORED) {
+		be_lun->cap_used -= be_lun->pblocksize;
+		*pp = P_UNMAPPED;
+	} else if (*pp != P_UNMAPPED) {
+		free(*pp, M_RAMDISK);
+		be_lun->cap_used -= be_lun->pblocksize;
+		*pp = P_UNMAPPED;
+	}
+noindir:
+	sx_xunlock(&be_lun->page_lock);
+};
+
+static void
+ctl_backend_ramdisk_anchorpage(struct ctl_be_ramdisk_lun *be_lun, off_t pn)
+{
+	uint8_t ***pp;
+	off_t i;
+	int s;
+
+	if (be_lun->cap_bytes == 0)
+		return;
+	sx_xlock(&be_lun->page_lock);
+	pp = &be_lun->pages;
+	for (s = (be_lun->indir - 1) * PPPS; s >= 0; s -= PPPS) {
+		if (*pp == NULL)
+			goto noindir;
+		i = pn >> s;
+		pp = (uint8_t ***)&(*pp)[i];
+		pn -= i << s;
+	}
+	if (*pp == P_UNMAPPED && be_lun->cap_used < be_lun->cap_bytes) {
+		be_lun->cap_used += be_lun->pblocksize;
+		*pp = P_ANCHORED;
+	} else if (*pp != P_ANCHORED) {
+		free(*pp, M_RAMDISK);
+		*pp = P_ANCHORED;
+	}
+noindir:
+	sx_xunlock(&be_lun->page_lock);
+};
+
+static void
+ctl_backend_ramdisk_freeallpages(uint8_t **p, int indir)
+{
+	int i;
+
+	if (p == NULL)
+		return;
+	if (indir == 0) {
+		free(p, M_RAMDISK);
+		return;
+	}
+	for (i = 0; i < PPP; i++) {
+		if (p[i] == NULL)
+			continue;
+		ctl_backend_ramdisk_freeallpages((uint8_t **)p[i], indir - 1);
+	}
+	free(p, M_RAMDISK);
+};
+
+static size_t
+cmp(uint8_t *a, uint8_t *b, size_t size)
+{
+	size_t i;
+
+	for (i = 0; i < size; i++) {
+		if (a[i] != b[i])
+			break;
+	}
+	return (i);
+}
+
+static int
+ctl_backend_ramdisk_cmp(union ctl_io *io)
+{
+	struct ctl_be_lun *cbe_lun = CTL_BACKEND_LUN(io);
+	struct ctl_be_ramdisk_lun *be_lun = cbe_lun->be_lun;
+	uint8_t *page;
+	uint8_t info[8];
+	uint64_t lba;
+	u_int lbaoff, lbas, res, off;
+
+	lbas = io->scsiio.kern_data_len / cbe_lun->blocksize;
+	lba = ARGS(io)->lba + PRIV(io)->len - lbas;
+	off = 0;
+	for (; lbas > 0; lbas--, lba++) {
+		page = ctl_backend_ramdisk_getpage(be_lun,
+		    lba >> cbe_lun->pblockexp, GP_READ);
+		lbaoff = lba & ~(UINT_MAX << cbe_lun->pblockexp);
+		page += lbaoff * cbe_lun->blocksize;
+		res = cmp(io->scsiio.kern_data_ptr + off, page,
+		    cbe_lun->blocksize);
+		off += res;
+		if (res < cbe_lun->blocksize)
+			break;
+	}
+	if (lbas > 0) {
+		off += io->scsiio.kern_rel_offset - io->scsiio.kern_data_len;
+		scsi_u64to8b(off, info);
+		ctl_set_sense(&io->scsiio, /*current_error*/ 1,
+		    /*sense_key*/ SSD_KEY_MISCOMPARE,
+		    /*asc*/ 0x1D, /*ascq*/ 0x00,
+		    /*type*/ SSD_ELEM_INFO,
+		    /*size*/ sizeof(info), /*data*/ &info,
+		    /*type*/ SSD_ELEM_NONE);
+		return (1);
+	}
+	return (0);
+}
+
 static int
 ctl_backend_ramdisk_move_done(union ctl_io *io)
 {
-	struct ctl_be_lun *cbe_lun;
-	struct ctl_be_ramdisk_lun *be_lun;
+	struct ctl_be_lun *cbe_lun = CTL_BACKEND_LUN(io);
+	struct ctl_be_ramdisk_lun *be_lun = cbe_lun->be_lun;
 #ifdef CTL_TIME_IO
 	struct bintime cur_bt;
 #endif
 
 	CTL_DEBUG_PRINT(("ctl_backend_ramdisk_move_done\n"));
-	cbe_lun = CTL_BACKEND_LUN(io);
-	be_lun = (struct ctl_be_ramdisk_lun *)cbe_lun->be_lun;
 #ifdef CTL_TIME_IO
 	getbinuptime(&cur_bt);
 	bintime_sub(&cur_bt, &io->io_hdr.dma_start_bt);
@@ -240,7 +430,12 @@ ctl_backend_ramdisk_move_done(union ctl_
 		ctl_set_invalid_field_ciu(&io->scsiio);
 	} else if ((io->io_hdr.port_status == 0) &&
 	    ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE)) {
-		if (io->io_hdr.ctl_private[CTL_PRIV_BACKEND].integer > 0) {
+		if (ARGS(io)->flags & CTL_LLF_COMPARE) {
+			/* We have data block ready for comparison. */
+			if (ctl_backend_ramdisk_cmp(io))
+				goto done;
+		}
+		if (ARGS(io)->len > PRIV(io)->len) {
 			mtx_lock(&be_lun->queue_lock);
 			STAILQ_INSERT_TAIL(&be_lun->cont_queue,
 			    &io->io_hdr, links);
@@ -251,75 +446,109 @@ ctl_backend_ramdisk_move_done(union ctl_
 		}
 		ctl_set_success(&io->scsiio);
 	}
+done:
 	ctl_data_submit_done(io);
 	return(0);
 }
 
-static int
-ctl_backend_ramdisk_submit(union ctl_io *io)
+static void
+ctl_backend_ramdisk_compare(union ctl_io *io)
 {
-	struct ctl_be_lun *cbe_lun;
-	struct ctl_lba_len_flags *lbalen;
+	struct ctl_be_lun *cbe_lun = CTL_BACKEND_LUN(io);
+	u_int lbas, len;
 
-	cbe_lun = CTL_BACKEND_LUN(io);
-	lbalen = (struct ctl_lba_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
-	if (lbalen->flags & CTL_LLF_VERIFY) {
-		ctl_set_success(&io->scsiio);
-		ctl_data_submit_done(io);
-		return (CTL_RETVAL_COMPLETE);
-	}
-	io->io_hdr.ctl_private[CTL_PRIV_BACKEND].integer =
-	    lbalen->len * cbe_lun->blocksize;
-	ctl_backend_ramdisk_continue(io);
-	return (CTL_RETVAL_COMPLETE);
+	lbas = ARGS(io)->len - PRIV(io)->len;
+	lbas = MIN(lbas, 131072 / cbe_lun->blocksize);
+	len = lbas * cbe_lun->blocksize;
+
+	io->scsiio.be_move_done = ctl_backend_ramdisk_move_done;
+	io->scsiio.kern_data_ptr = malloc(len, M_RAMDISK, M_WAITOK);
+	io->scsiio.kern_data_len = len;
+	io->scsiio.kern_sg_entries = 0;
+	io->io_hdr.flags |= CTL_FLAG_ALLOCATED;
+	PRIV(io)->len += lbas;
+#ifdef CTL_TIME_IO
+	getbinuptime(&io->io_hdr.dma_start_bt);
+#endif
+	ctl_datamove(io);
 }
 
 static void
-ctl_backend_ramdisk_continue(union ctl_io *io)
+ctl_backend_ramdisk_rw(union ctl_io *io)
 {
-	struct ctl_be_ramdisk_softc *softc;
-	int len, len_filled, sg_filled;
-#ifdef CTL_RAMDISK_PAGES
+	struct ctl_be_lun *cbe_lun = CTL_BACKEND_LUN(io);
+	struct ctl_be_ramdisk_lun *be_lun = cbe_lun->be_lun;
 	struct ctl_sg_entry *sg_entries;
-	int i;
-#endif
-
-	softc = &rd_softc;
-	len = io->io_hdr.ctl_private[CTL_PRIV_BACKEND].integer;
-#ifdef CTL_RAMDISK_PAGES
-	sg_filled = min(btoc(len), softc->num_pages);
-	if (sg_filled > 1) {
+	uint8_t *page;
+	uint64_t lba;
+	u_int i, len, lbaoff, lbas, sgs, off;
+	getpage_op_t op;
+
+	lba = ARGS(io)->lba + PRIV(io)->len;
+	lbaoff = lba & ~(UINT_MAX << cbe_lun->pblockexp);
+	lbas = ARGS(io)->len - PRIV(io)->len;
+	lbas = MIN(lbas, (SGPP << cbe_lun->pblockexp) - lbaoff);
+	sgs = (lbas + lbaoff + be_lun->pblockmul - 1) >> cbe_lun->pblockexp;
+	off = lbaoff * cbe_lun->blocksize;
+	op = (ARGS(io)->flags & CTL_LLF_WRITE) ? GP_WRITE : GP_READ;
+	if (sgs > 1) {
 		io->scsiio.kern_data_ptr = malloc(sizeof(struct ctl_sg_entry) *
-						  sg_filled, M_RAMDISK,
-						  M_WAITOK);
+		    sgs, M_RAMDISK, M_WAITOK);
 		sg_entries = (struct ctl_sg_entry *)io->scsiio.kern_data_ptr;
-		for (i = 0, len_filled = 0; i < sg_filled; i++) {
-			sg_entries[i].addr = softc->ramdisk_pages[i];
-			sg_entries[i].len = MIN(PAGE_SIZE, len - len_filled);
-			len_filled += sg_entries[i].len;
+		len = lbas * cbe_lun->blocksize;
+		for (i = 0; i < sgs; i++) {
+			page = ctl_backend_ramdisk_getpage(be_lun,
+			    (lba >> cbe_lun->pblockexp) + i, op);
+			if (page == P_UNMAPPED || page == P_ANCHORED) {
+				free(io->scsiio.kern_data_ptr, M_RAMDISK);
+nospc:
+				ctl_set_space_alloc_fail(&io->scsiio);
+				ctl_data_submit_done(io);
+				return;
+			}
+			sg_entries[i].addr = page + off;
+			sg_entries[i].len = MIN(len, be_lun->pblocksize - off);
+			len -= sg_entries[i].len;
+			off = 0;
 		}
 	} else {
-		sg_filled = 0;
-		len_filled = len;
-		io->scsiio.kern_data_ptr = softc->ramdisk_pages[0];
+		page = ctl_backend_ramdisk_getpage(be_lun,
+		    lba >> cbe_lun->pblockexp, op);
+		if (page == P_UNMAPPED || page == P_ANCHORED)
+			goto nospc;
+		sgs = 0;
+		io->scsiio.kern_data_ptr = page + off;
 	}
-#else
-	sg_filled = 0;
-	len_filled = min(len, softc->rd_size);
-	io->scsiio.kern_data_ptr = softc->ramdisk_buffer;
-#endif /* CTL_RAMDISK_PAGES */
 
 	io->scsiio.be_move_done = ctl_backend_ramdisk_move_done;
-	io->scsiio.kern_data_len = len_filled;
-	io->scsiio.kern_sg_entries = sg_filled;
+	io->scsiio.kern_data_len = lbas * cbe_lun->blocksize;
+	io->scsiio.kern_sg_entries = sgs;
 	io->io_hdr.flags |= CTL_FLAG_ALLOCATED;
-	io->io_hdr.ctl_private[CTL_PRIV_BACKEND].integer -= len_filled;
+	PRIV(io)->len += lbas;
 #ifdef CTL_TIME_IO
 	getbinuptime(&io->io_hdr.dma_start_bt);
 #endif
 	ctl_datamove(io);
 }
 
+static int
+ctl_backend_ramdisk_submit(union ctl_io *io)
+{
+	struct ctl_lba_len_flags *lbalen = ARGS(io);
+
+	if (lbalen->flags & CTL_LLF_VERIFY) {
+		ctl_set_success(&io->scsiio);
+		ctl_data_submit_done(io);
+		return (CTL_RETVAL_COMPLETE);
+	}
+	PRIV(io)->len = 0;
+	if (lbalen->flags & CTL_LLF_COMPARE)
+		ctl_backend_ramdisk_compare(io);
+	else
+		ctl_backend_ramdisk_rw(io);
+	return (CTL_RETVAL_COMPLETE);
+}
+
 static void
 ctl_backend_ramdisk_worker(void *context, int pending)
 {
@@ -327,7 +556,6 @@ ctl_backend_ramdisk_worker(void *context
 	union ctl_io *io;
 
 	be_lun = (struct ctl_be_ramdisk_lun *)context;
-
 	mtx_lock(&be_lun->queue_lock);
 	for (;;) {
 		io = (union ctl_io *)STAILQ_FIRST(&be_lun->cont_queue);
@@ -335,7 +563,10 @@ ctl_backend_ramdisk_worker(void *context
 			STAILQ_REMOVE(&be_lun->cont_queue, &io->io_hdr,
 				      ctl_io_hdr, links);
 			mtx_unlock(&be_lun->queue_lock);
-			ctl_backend_ramdisk_continue(io);
+			if (ARGS(io)->flags & CTL_LLF_COMPARE)
+				ctl_backend_ramdisk_compare(io);
+			else
+				ctl_backend_ramdisk_rw(io);
 			mtx_lock(&be_lun->queue_lock);
 			continue;
 		}
@@ -350,6 +581,259 @@ ctl_backend_ramdisk_worker(void *context
 }
 
 static int
+ctl_backend_ramdisk_gls(union ctl_io *io)
+{
+	struct ctl_be_lun *cbe_lun = CTL_BACKEND_LUN(io);
+	struct ctl_be_ramdisk_lun *be_lun = cbe_lun->be_lun;
+	struct scsi_get_lba_status_data *data;
+	uint8_t *page;
+	u_int lbaoff;
+
+	data = (struct scsi_get_lba_status_data *)io->scsiio.kern_data_ptr;
+	scsi_u64to8b(ARGS(io)->lba, data->descr[0].addr);
+	lbaoff = ARGS(io)->lba & ~(UINT_MAX << cbe_lun->pblockexp);
+	scsi_ulto4b(be_lun->pblockmul - lbaoff, data->descr[0].length);
+	page = ctl_backend_ramdisk_getpage(be_lun,
+	    ARGS(io)->lba >> cbe_lun->pblockexp, GP_OTHER);
+	if (page == P_UNMAPPED)
+		data->descr[0].status = 1;
+	else if (page == P_ANCHORED)
+		data->descr[0].status = 2;
+	else
+		data->descr[0].status = 0;
+	ctl_config_read_done(io);
+	return (CTL_RETVAL_COMPLETE);
+}
+
+static int
+ctl_backend_ramdisk_config_read(union ctl_io *io)
+{
+	int retval = 0;
+
+	switch (io->scsiio.cdb[0]) {
+	case SERVICE_ACTION_IN:
+		if (io->scsiio.cdb[1] == SGLS_SERVICE_ACTION) {
+			retval = ctl_backend_ramdisk_gls(io);
+			break;
+		}
+		ctl_set_invalid_field(&io->scsiio,
+				      /*sks_valid*/ 1,
+				      /*command*/ 1,
+				      /*field*/ 1,
+				      /*bit_valid*/ 1,
+				      /*bit*/ 4);
+		ctl_config_read_done(io);
+		retval = CTL_RETVAL_COMPLETE;
+		break;
+	default:
+		ctl_set_invalid_opcode(&io->scsiio);
+		ctl_config_read_done(io);
+		retval = CTL_RETVAL_COMPLETE;
+		break;
+	}
+	return (retval);
+}
+
+static void
+ctl_backend_ramdisk_delete(struct ctl_be_lun *cbe_lun, off_t lba, off_t len,
+    int anchor)
+{
+	struct ctl_be_ramdisk_lun *be_lun = cbe_lun->be_lun;
+	uint8_t *page;
+	uint64_t p, lp;
+	u_int lbaoff;
+	getpage_op_t op = anchor ? GP_ANCHOR : GP_OTHER;
+
+	/* Partially zero first partial page. */
+	p = lba >> cbe_lun->pblockexp;
+	lbaoff = lba & ~(UINT_MAX << cbe_lun->pblockexp);
+	if (lbaoff != 0) {
+		page = ctl_backend_ramdisk_getpage(be_lun, p, op);
+		if (page != P_UNMAPPED && page != P_ANCHORED) {
+			memset(page + lbaoff * cbe_lun->blocksize, 0,
+			    min(len, be_lun->pblockmul - lbaoff) *
+			    cbe_lun->blocksize);
+		}
+		p++;
+	}
+
+	/* Partially zero last partial page. */
+	lp = (lba + len) >> cbe_lun->pblockexp;
+	lbaoff = (lba + len) & ~(UINT_MAX << cbe_lun->pblockexp);
+	if (p <= lp && lbaoff != 0) {
+		page = ctl_backend_ramdisk_getpage(be_lun, lp, op);
+		if (page != P_UNMAPPED && page != P_ANCHORED)
+			memset(page, 0, lbaoff * cbe_lun->blocksize);
+	}
+
+	/* Delete remaining full pages. */
+	if (anchor) {
+		for (; p < lp; p++)
+			ctl_backend_ramdisk_anchorpage(be_lun, p);
+	} else {
+		for (; p < lp; p++)
+			ctl_backend_ramdisk_unmappage(be_lun, p);
+	}
+}
+
+static void
+ctl_backend_ramdisk_ws(union ctl_io *io)
+{
+	struct ctl_be_lun *cbe_lun = CTL_BACKEND_LUN(io);
+	struct ctl_be_ramdisk_lun *be_lun = cbe_lun->be_lun;
+	struct ctl_lba_len_flags *lbalen = ARGS(io);
+	uint8_t *page;
+	uint64_t lba;
+	u_int lbaoff, lbas;
+
+	if (lbalen->flags & ~(SWS_LBDATA | SWS_UNMAP | SWS_ANCHOR | SWS_NDOB)) {
+		ctl_set_invalid_field(&io->scsiio,
+				      /*sks_valid*/ 1,
+				      /*command*/ 1,
+				      /*field*/ 1,
+				      /*bit_valid*/ 0,
+				      /*bit*/ 0);
+		ctl_config_write_done(io);
+		return;
+	}
+	if (lbalen->flags & SWS_UNMAP) {
+		ctl_backend_ramdisk_delete(cbe_lun, lbalen->lba, lbalen->len,
+		    (lbalen->flags & SWS_ANCHOR) != 0);
+		ctl_set_success(&io->scsiio);
+		ctl_config_write_done(io);
+		return;
+	}
+
+	for (lba = lbalen->lba, lbas = lbalen->len; lbas > 0; lba++, lbas--) {
+		page = ctl_backend_ramdisk_getpage(be_lun,
+		    lba >> cbe_lun->pblockexp, GP_WRITE);
+		if (page == P_UNMAPPED || page == P_ANCHORED) {
+			ctl_set_space_alloc_fail(&io->scsiio);
+			ctl_data_submit_done(io);
+			return;
+		}
+		lbaoff = lba & ~(UINT_MAX << cbe_lun->pblockexp);
+		page += lbaoff * cbe_lun->blocksize;
+		if (lbalen->flags & SWS_NDOB) {
+			memset(page, 0, cbe_lun->blocksize);
+		} else {
+			memcpy(page, io->scsiio.kern_data_ptr,
+			    cbe_lun->blocksize);
+		}
+		if (lbalen->flags & SWS_LBDATA)
+			scsi_ulto4b(lba, page);
+	}
+	ctl_set_success(&io->scsiio);
+	ctl_config_write_done(io);
+}
+
+static void
+ctl_backend_ramdisk_unmap(union ctl_io *io)
+{
+	struct ctl_be_lun *cbe_lun = CTL_BACKEND_LUN(io);
+	struct ctl_ptr_len_flags *ptrlen = (struct ctl_ptr_len_flags *)ARGS(io);
+	struct scsi_unmap_desc *buf, *end;
+
+	if ((ptrlen->flags & ~SU_ANCHOR) != 0) {
+		ctl_set_invalid_field(&io->scsiio,
+				      /*sks_valid*/ 0,
+				      /*command*/ 0,
+				      /*field*/ 0,
+				      /*bit_valid*/ 0,
+				      /*bit*/ 0);
+		ctl_config_write_done(io);
+		return;
+	}
+
+	buf = (struct scsi_unmap_desc *)ptrlen->ptr;
+	end = buf + ptrlen->len / sizeof(*buf);
+	for (; buf < end; buf++) {
+		ctl_backend_ramdisk_delete(cbe_lun,
+		    scsi_8btou64(buf->lba), scsi_4btoul(buf->length),
+		    (ptrlen->flags & SU_ANCHOR) != 0);
+	}
+
+	ctl_set_success(&io->scsiio);
+	ctl_config_write_done(io);
+}
+
+static int
+ctl_backend_ramdisk_config_write(union ctl_io *io)
+{
+	struct ctl_be_lun *cbe_lun = CTL_BACKEND_LUN(io);
+	int retval = 0;
+
+	switch (io->scsiio.cdb[0]) {
+	case SYNCHRONIZE_CACHE:
+	case SYNCHRONIZE_CACHE_16:
+		/* We have no cache to flush. */
+		ctl_set_success(&io->scsiio);
+		ctl_config_write_done(io);
+		break;
+	case START_STOP_UNIT: {
+		struct scsi_start_stop_unit *cdb;
+
+		cdb = (struct scsi_start_stop_unit *)io->scsiio.cdb;
+		if ((cdb->how & SSS_PC_MASK) != 0) {
+			ctl_set_success(&io->scsiio);
+			ctl_config_write_done(io);
+			break;
+		}
+		if (cdb->how & SSS_START) {
+			if (cdb->how & SSS_LOEJ)
+				ctl_lun_has_media(cbe_lun);
+			ctl_start_lun(cbe_lun);
+		} else {
+			ctl_stop_lun(cbe_lun);
+			if (cdb->how & SSS_LOEJ)
+				ctl_lun_ejected(cbe_lun);
+		}
+		ctl_set_success(&io->scsiio);
+		ctl_config_write_done(io);
+		break;
+	}
+	case PREVENT_ALLOW:
+		ctl_set_success(&io->scsiio);
+		ctl_config_write_done(io);
+		break;
+	case WRITE_SAME_10:
+	case WRITE_SAME_16:
+		ctl_backend_ramdisk_ws(io);
+		break;
+	case UNMAP:
+		ctl_backend_ramdisk_unmap(io);
+		break;
+	default:
+		ctl_set_invalid_opcode(&io->scsiio);
+		ctl_config_write_done(io);
+		retval = CTL_RETVAL_COMPLETE;
+		break;
+	}
+
+	return (retval);
+}
+
+static uint64_t
+ctl_backend_ramdisk_lun_attr(void *arg, const char *attrname)
+{
+	struct ctl_be_ramdisk_lun *be_lun = arg;
+	uint64_t		val;
+
+	val = UINT64_MAX;
+	if (be_lun->cap_bytes == 0)
+		return (val);
+	sx_slock(&be_lun->page_lock);
+	if (strcmp(attrname, "blocksused") == 0) {
+		val = be_lun->cap_used / be_lun->cbe_lun.blocksize;
+	} else if (strcmp(attrname, "blocksavail") == 0) {
+		val = (be_lun->cap_bytes - be_lun->cap_used) /
+		    be_lun->cbe_lun.blocksize;
+	}
+	sx_sunlock(&be_lun->page_lock);
+	return (val);
+}
+
+static int
 ctl_backend_ramdisk_ioctl(struct cdev *dev, u_long cmd, caddr_t addr,
 			  int flag, struct thread *td)
 {
@@ -466,6 +950,9 @@ ctl_backend_ramdisk_rm(struct ctl_be_ram
 		taskqueue_drain_all(be_lun->io_taskqueue);
 		taskqueue_free(be_lun->io_taskqueue);
 		ctl_free_opts(&be_lun->cbe_lun.options);
+		free(be_lun->zero_page, M_RAMDISK);
+		ctl_backend_ramdisk_freeallpages(be_lun->pages, be_lun->indir);
+		sx_destroy(&be_lun->page_lock);
 		mtx_destroy(&be_lun->queue_lock);
 		free(be_lun, M_RAMDISK);
 	}
@@ -487,6 +974,7 @@ ctl_backend_ramdisk_create(struct ctl_be
 	struct ctl_lun_create_params *params;
 	char *value;
 	char tmpstr[32];
+	uint64_t t;
 	int retval;
 
 	retval = 0;
@@ -513,6 +1001,19 @@ ctl_backend_ramdisk_create(struct ctl_be
 	} else if (control_softc->flags & CTL_FLAG_ACTIVE_SHELF)
 		cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
 
+	be_lun->pblocksize = PAGE_SIZE;
+	value = ctl_get_opt(&cbe_lun->options, "pblocksize");
+	if (value != NULL) {
+		ctl_expand_number(value, &t);
+		be_lun->pblocksize = t;
+	}
+	if (be_lun->pblocksize < 512 || be_lun->pblocksize > 131072) {
+		snprintf(req->error_str, sizeof(req->error_str),
+			 "%s: unsupported pblocksize %u", __func__,
+			 be_lun->pblocksize);
+		goto bailout_error;
+	}
+
 	if (cbe_lun->lun_type == T_DIRECT ||
 	    cbe_lun->lun_type == T_CDROM) {
 		if (params->blocksize_bytes != 0)
@@ -521,6 +1022,14 @@ ctl_backend_ramdisk_create(struct ctl_be
 			cbe_lun->blocksize = 2048;
 		else
 			cbe_lun->blocksize = 512;
+		be_lun->pblockmul = be_lun->pblocksize / cbe_lun->blocksize;
+		if (be_lun->pblockmul < 1 || !powerof2(be_lun->pblockmul)) {
+			snprintf(req->error_str, sizeof(req->error_str),
+				 "%s: pblocksize %u not exp2 of blocksize %u",
+				 __func__,
+				 be_lun->pblocksize, cbe_lun->blocksize);
+			goto bailout_error;
+		}
 		if (params->lun_size_bytes < cbe_lun->blocksize) {
 			snprintf(req->error_str, sizeof(req->error_str),
 				 "%s: LUN size %ju < blocksize %u", __func__,
@@ -529,9 +1038,25 @@ ctl_backend_ramdisk_create(struct ctl_be
 		}
 		be_lun->size_blocks = params->lun_size_bytes / cbe_lun->blocksize;
 		be_lun->size_bytes = be_lun->size_blocks * cbe_lun->blocksize;
+		be_lun->indir = 0;
+		t = be_lun->size_bytes / be_lun->pblocksize;
+		while (t > 1) {
+			t /= PPP;
+			be_lun->indir++;
+		}
 		cbe_lun->maxlba = be_lun->size_blocks - 1;
-		cbe_lun->atomicblock = UINT32_MAX;
-		cbe_lun->opttxferlen = softc->rd_size / cbe_lun->blocksize;
+		cbe_lun->pblockexp = fls(be_lun->pblockmul) - 1;
+		cbe_lun->pblockoff = 0;
+		cbe_lun->ublockexp = cbe_lun->pblockexp;
+		cbe_lun->ublockoff = 0;
+		cbe_lun->atomicblock = be_lun->pblocksize;
+		cbe_lun->opttxferlen = SGPP * be_lun->pblocksize;
+		value = ctl_get_opt(&cbe_lun->options, "capacity");
+		if (value != NULL)
+			ctl_expand_number(value, &be_lun->cap_bytes);
+	} else {
+		be_lun->pblockmul = 1;
+		cbe_lun->pblockexp = 0;
 	}
 
 	/* Tell the user the blocksize we ended up using */
@@ -539,7 +1064,7 @@ ctl_backend_ramdisk_create(struct ctl_be
 	params->lun_size_bytes = be_lun->size_bytes;
 
 	value = ctl_get_opt(&cbe_lun->options, "unmap");
-	if (value != NULL && strcmp(value, "on") == 0)
+	if (value == NULL || strcmp(value, "off") != 0)
 		cbe_lun->flags |= CTL_LUN_FLAG_UNMAP;
 	value = ctl_get_opt(&cbe_lun->options, "readonly");
 	if (value != NULL) {
@@ -594,6 +1119,11 @@ ctl_backend_ramdisk_create(struct ctl_be
 	}
 
 	STAILQ_INIT(&be_lun->cont_queue);
+	sx_init(&be_lun->page_lock, "cram page lock");
+	if (be_lun->cap_bytes == 0)
+		be_lun->pages = malloc(be_lun->pblocksize, M_RAMDISK, M_WAITOK);
+	be_lun->zero_page = malloc(be_lun->pblocksize, M_RAMDISK,
+	    M_WAITOK|M_ZERO);
 	mtx_init(&be_lun->queue_lock, "cram queue lock", NULL, MTX_DEF);
 	TASK_INIT(&be_lun->io_task, /*priority*/0, ctl_backend_ramdisk_worker,
 	    be_lun);
@@ -668,10 +1198,12 @@ ctl_backend_ramdisk_create(struct ctl_be
 bailout_error:
 	req->status = CTL_LUN_ERROR;
 	if (be_lun != NULL) {
-		if (be_lun->io_taskqueue != NULL) {
+		if (be_lun->io_taskqueue != NULL)
 			taskqueue_free(be_lun->io_taskqueue);
-		}
 		ctl_free_opts(&cbe_lun->options);
+		free(be_lun->zero_page, M_RAMDISK);
+		ctl_backend_ramdisk_freeallpages(be_lun->pages, be_lun->indir);
+		sx_destroy(&be_lun->page_lock);
 		mtx_destroy(&be_lun->queue_lock);
 		free(be_lun, M_RAMDISK);
 	}
@@ -827,103 +1359,3 @@ ctl_backend_ramdisk_lun_config_status(vo
 	}
 	mtx_unlock(&softc->lock);
 }
-
-static int
-ctl_backend_ramdisk_config_write(union ctl_io *io)
-{
-	struct ctl_be_lun *cbe_lun;
-	int retval;
-
-	cbe_lun = CTL_BACKEND_LUN(io);
-	retval = 0;
-	switch (io->scsiio.cdb[0]) {
-	case SYNCHRONIZE_CACHE:
-	case SYNCHRONIZE_CACHE_16:
-		/*
-		 * The upper level CTL code will filter out any CDBs with
-		 * the immediate bit set and return the proper error.  It
-		 * will also not allow a sync cache command to go to a LUN
-		 * that is powered down.
-		 *
-		 * We don't really need to worry about what LBA range the
-		 * user asked to be synced out.  When they issue a sync
-		 * cache command, we'll sync out the whole thing.
-		 *
-		 * This is obviously just a stubbed out implementation.
-		 * The real implementation will be in the RAIDCore/CTL

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201702070157.v171vTtb099791>