Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 18 Jan 2021 16:25:51 GMT
From:      Ed Maste <emaste@FreeBSD.org>
To:        src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-branches@FreeBSD.org
Subject:   git: 2d194dc21989 - stable/12 - xen: allow limiting the amount of duplicated pending xenstore watches
Message-ID:  <202101181625.10IGPpmC094715@gitrepo.freebsd.org>

next in thread | raw e-mail | index | archive | help
The branch stable/12 has been updated by emaste:

URL: https://cgit.FreeBSD.org/src/commit/?id=2d194dc219892049dd03564c4083080cac1aa688

commit 2d194dc219892049dd03564c4083080cac1aa688
Author:     Roger Pau Monné <royger@FreeBSD.org>
AuthorDate: 2020-11-25 11:34:38 +0000
Commit:     Ed Maste <emaste@FreeBSD.org>
CommitDate: 2021-01-18 16:25:31 +0000

    xen: allow limiting the amount of duplicated pending xenstore watches
    
    Xenstore watches received are queued in a list and processed in a
    deferred thread. Such queuing was done without any checking, so a
    guest could potentially trigger a resource starvation against the
    FreeBSD kernel if such kernel is watching any user-controlled xenstore
    path.
    
    Allowing limiting the amount of pending events a watch can accumulate
    to prevent a remote guest from triggering this resource starvation
    issue.
    
    For the PV device backends and frontends this limitation is only
    applied to the other end /state node, which is limited to 1 pending
    event, the rest of the watched paths can still have unlimited pending
    watches because they are either local or controlled by a privileged
    domain.
    
    The xenstore user-space device gets special treatment as it's not
    possible for the kernel to know whether the paths being watched by
    user-space processes are controlled by a guest domain. For this reason
    watches set by the xenstore user-space device are limited to 1000
    pending events. Note this can be modified using the
    max_pending_watch_events sysctl of the device.
    
    This is XSA-349.
    
    Sponsored by:   Citrix Systems R&D
    MFC after:      3 days
    
    (cherry picked from commit 4e4e43dc9e1afc863670a031cc5cc75eb5e668d6)
---
 sys/dev/xen/balloon/balloon.c       |  3 ++-
 sys/dev/xen/blkback/blkback.c       |  6 ++++++
 sys/dev/xen/control/control.c       |  6 ++++++
 sys/dev/xen/xenstore/xenstore.c     | 14 +++++++++++---
 sys/dev/xen/xenstore/xenstore_dev.c | 15 +++++++++++++++
 sys/xen/xenbus/xenbusb.c            | 17 +++++++++++++++++
 sys/xen/xenstore/xenstorevar.h      |  9 +++++++++
 7 files changed, 66 insertions(+), 4 deletions(-)

diff --git a/sys/dev/xen/balloon/balloon.c b/sys/dev/xen/balloon/balloon.c
index b832bbaf313a..bc7b0983f8d7 100644
--- a/sys/dev/xen/balloon/balloon.c
+++ b/sys/dev/xen/balloon/balloon.c
@@ -310,7 +310,8 @@ set_new_target(unsigned long target)
 
 static struct xs_watch target_watch =
 {
-	.node = "memory/target"
+	.node = "memory/target",
+	.max_pending = 1,
 };
 
 /* React to a change in the target key */
diff --git a/sys/dev/xen/blkback/blkback.c b/sys/dev/xen/blkback/blkback.c
index ffbb4a25262f..d935ba965b00 100644
--- a/sys/dev/xen/blkback/blkback.c
+++ b/sys/dev/xen/blkback/blkback.c
@@ -3768,6 +3768,12 @@ xbb_attach(device_t dev)
 	xbb->hotplug_watch.callback = xbb_attach_disk;
 	KASSERT(xbb->hotplug_watch.node == NULL, ("watch node already setup"));
 	xbb->hotplug_watch.node = strdup(sbuf_data(watch_path), M_XENBLOCKBACK);
+	/*
+	 * We don't care about the path updated, just about the value changes
+	 * on that single node, hence there's no need to queue more that one
+	 * event.
+	 */
+	xbb->hotplug_watch.max_pending = 1;
 	sbuf_delete(watch_path);
 	error = xs_register_watch(&xbb->hotplug_watch);
 	if (error != 0) {
diff --git a/sys/dev/xen/control/control.c b/sys/dev/xen/control/control.c
index 495083e24656..a21ccecd7373 100644
--- a/sys/dev/xen/control/control.c
+++ b/sys/dev/xen/control/control.c
@@ -435,6 +435,12 @@ xctrl_attach(device_t dev)
 	xctrl->xctrl_watch.node = "control/shutdown";
 	xctrl->xctrl_watch.callback = xctrl_on_watch_event;
 	xctrl->xctrl_watch.callback_data = (uintptr_t)xctrl;
+	/*
+	 * We don't care about the path updated, just about the value changes
+	 * on that single node, hence there's no need to queue more that one
+	 * event.
+	 */
+	xctrl->xctrl_watch.max_pending = 1;
 	xs_register_watch(&xctrl->xctrl_watch);
 
 	if (xen_pv_domain())
diff --git a/sys/dev/xen/xenstore/xenstore.c b/sys/dev/xen/xenstore/xenstore.c
index f8da1c59f915..31d470eebbf6 100644
--- a/sys/dev/xen/xenstore/xenstore.c
+++ b/sys/dev/xen/xenstore/xenstore.c
@@ -656,12 +656,17 @@ xs_process_msg(enum xsd_sockmsg_type *type)
 		mtx_lock(&xs.registered_watches_lock);
 		msg->u.watch.handle = find_watch(
 		    msg->u.watch.vec[XS_WATCH_TOKEN]);
-		if (msg->u.watch.handle != NULL) {
-			mtx_lock(&xs.watch_events_lock);
+		mtx_lock(&xs.watch_events_lock);
+		if (msg->u.watch.handle != NULL &&
+		    (!msg->u.watch.handle->max_pending ||
+		    msg->u.watch.handle->pending <
+		    msg->u.watch.handle->max_pending)) {
+			msg->u.watch.handle->pending++;
 			TAILQ_INSERT_TAIL(&xs.watch_events, msg, list);
 			wakeup(&xs.watch_events);
 			mtx_unlock(&xs.watch_events_lock);
 		} else {
+			mtx_unlock(&xs.watch_events_lock);
 			free(msg->u.watch.vec, M_XENSTORE);
 			free(msg, M_XENSTORE);
 		}
@@ -983,8 +988,10 @@ xenwatch_thread(void *unused)
 
 		mtx_lock(&xs.watch_events_lock);
 		msg = TAILQ_FIRST(&xs.watch_events);
-		if (msg)
+		if (msg) {
 			TAILQ_REMOVE(&xs.watch_events, msg, list);
+			msg->u.watch.handle->pending--;
+		}
 		mtx_unlock(&xs.watch_events_lock);
 
 		if (msg != NULL) {
@@ -1578,6 +1585,7 @@ xs_register_watch(struct xs_watch *watch)
 	char token[sizeof(watch) * 2 + 1];
 	int error;
 
+	watch->pending = 0;
 	sprintf(token, "%lX", (long)watch);
 
 	mtx_lock(&xs.registered_watches_lock);
diff --git a/sys/dev/xen/xenstore/xenstore_dev.c b/sys/dev/xen/xenstore/xenstore_dev.c
index 2cd9f136ea8e..261e97657969 100644
--- a/sys/dev/xen/xenstore/xenstore_dev.c
+++ b/sys/dev/xen/xenstore/xenstore_dev.c
@@ -45,6 +45,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/conf.h>
 #include <sys/module.h>
 #include <sys/selinfo.h>
+#include <sys/sysctl.h>
 #include <sys/poll.h>
 
 #include <xen/xen-os.h>
@@ -53,6 +54,8 @@ __FBSDID("$FreeBSD$");
 #include <xen/xenstore/xenstorevar.h>
 #include <xen/xenstore/xenstore_internal.h>
 
+static unsigned int max_pending_watches = 1000;
+
 struct xs_dev_transaction {
 	LIST_ENTRY(xs_dev_transaction) list;
 	struct xs_transaction handle;
@@ -335,6 +338,7 @@ xs_dev_write(struct cdev *dev, struct uio *uio, int ioflag)
 		watch->watch.node = strdup(wpath, M_XENSTORE);
 		watch->watch.callback = xs_dev_watch_cb;
 		watch->watch.callback_data = (uintptr_t)watch;
+		watch->watch.max_pending = max_pending_watches;
 		watch->token = strdup(wtoken, M_XENSTORE);
 		watch->user = u;
 
@@ -511,6 +515,17 @@ static int
 xs_dev_attach(device_t dev)
 {
 	struct cdev *xs_cdev;
+	struct sysctl_ctx_list *sysctl_ctx;
+	struct sysctl_oid *sysctl_tree;
+
+	sysctl_ctx = device_get_sysctl_ctx(dev);
+	sysctl_tree = device_get_sysctl_tree(dev);
+	if (sysctl_ctx == NULL || sysctl_tree == NULL)
+	    return (EINVAL);
+
+	SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
+	    "max_pending_watch_events", CTLFLAG_RW, &max_pending_watches, 0,
+	    "maximum amount of pending watch events to be delivered");
 
 	xs_cdev = make_dev_credf(MAKEDEV_ETERNAL, &xs_dev_cdevsw, 0, NULL,
 	    UID_ROOT, GID_WHEEL, 0400, "xen/xenstore");
diff --git a/sys/xen/xenbus/xenbusb.c b/sys/xen/xenbus/xenbusb.c
index 8b755e2a62c3..a7b99d46f419 100644
--- a/sys/xen/xenbus/xenbusb.c
+++ b/sys/xen/xenbus/xenbusb.c
@@ -702,10 +702,21 @@ xenbusb_add_device(device_t dev, const char *type, const char *id)
 		ivars->xd_otherend_watch.node = statepath;
 		ivars->xd_otherend_watch.callback = xenbusb_otherend_watch_cb;
 		ivars->xd_otherend_watch.callback_data = (uintptr_t)ivars;
+		/*
+		 * Other end state node watch, limit to one pending event
+		 * to prevent frontends from queuing too many events that
+		 * could cause resource starvation.
+		 */
+		ivars->xd_otherend_watch.max_pending = 1;
 
 		ivars->xd_local_watch.node = ivars->xd_node;
 		ivars->xd_local_watch.callback = xenbusb_local_watch_cb;
 		ivars->xd_local_watch.callback_data = (uintptr_t)ivars;
+		/*
+		 * Watch our local path, only writable by us or a privileged
+		 * domain, no need to limit.
+		 */
+		ivars->xd_local_watch.max_pending = 0;
 
 		mtx_lock(&xbs->xbs_lock);
 		xbs->xbs_connecting_children++;
@@ -764,6 +775,12 @@ xenbusb_attach(device_t dev, char *bus_node, u_int id_components)
 	xbs->xbs_device_watch.node = bus_node;
 	xbs->xbs_device_watch.callback = xenbusb_devices_changed;
 	xbs->xbs_device_watch.callback_data = (uintptr_t)xbs;
+	/*
+	 * Allow for unlimited pending watches, as those are local paths
+	 * either controlled by the guest or only writable by privileged
+	 * domains.
+	 */
+	xbs->xbs_device_watch.max_pending = 0;
 
 	TASK_INIT(&xbs->xbs_probe_children, 0, xenbusb_probe_children_cb, dev);
 
diff --git a/sys/xen/xenstore/xenstorevar.h b/sys/xen/xenstore/xenstorevar.h
index 8c89e174acf2..b6e699c1fed9 100644
--- a/sys/xen/xenstore/xenstorevar.h
+++ b/sys/xen/xenstore/xenstorevar.h
@@ -70,6 +70,15 @@ struct xs_watch
 
 	/* Callback client data untouched by the XenStore watch mechanism. */
 	uintptr_t callback_data;
+
+	/* Maximum number of pending watch events to be delivered. */
+	unsigned int max_pending;
+
+	/*
+	 * Private counter used by xenstore to keep track of the pending
+	 * watches. Protected by xs.watch_events_lock.
+	 */
+	unsigned int pending;
 };
 LIST_HEAD(xs_watch_list, xs_watch);
 



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202101181625.10IGPpmC094715>