Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 18 May 2018 01:52:51 +0000 (UTC)
From:      Matt Macy <mmacy@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r333775 - in head/sys: kern net sys tests/epoch
Message-ID:  <201805180152.w4I1qpxs013915@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: mmacy
Date: Fri May 18 01:52:51 2018
New Revision: 333775
URL: https://svnweb.freebsd.org/changeset/base/333775

Log:
  epoch: add non-preemptible "critical" variant
  
  adds:
  - epoch_enter_critical() - can be called inside a different epoch,
    starts a section that will acquire any MTX_DEF mutexes or do
    anything that might sleep.
  - epoch_exit_critical() - corresponding exit call
  - epoch_wait_critical() - wait variant that is guaranteed that any
    threads in a section are running.
  - epoch_global_critical - an epoch_wait_critical safe epoch instance
  
  Requested by:   markj
  Approved by:	sbruno

Modified:
  head/sys/kern/subr_epoch.c
  head/sys/net/if.c
  head/sys/sys/epoch.h
  head/sys/tests/epoch/epoch_test.c

Modified: head/sys/kern/subr_epoch.c
==============================================================================
--- head/sys/kern/subr_epoch.c	Thu May 17 23:59:56 2018	(r333774)
+++ head/sys/kern/subr_epoch.c	Fri May 18 01:52:51 2018	(r333775)
@@ -83,6 +83,12 @@ SYSCTL_COUNTER_U64(_kern_epoch_stats, OID_AUTO, nconte
 static counter_u64_t switch_count;
 SYSCTL_COUNTER_U64(_kern_epoch_stats, OID_AUTO, switches, CTLFLAG_RW,
 				   &switch_count, "# of times a thread voluntarily context switched in epoch_wait");
+static counter_u64_t epoch_call_count;
+SYSCTL_COUNTER_U64(_kern_epoch_stats, OID_AUTO, epoch_calls, CTLFLAG_RW,
+				   &epoch_call_count, "# of times a callback was deferred");
+static counter_u64_t epoch_call_task_count;
+SYSCTL_COUNTER_U64(_kern_epoch_stats, OID_AUTO, epoch_call_tasks, CTLFLAG_RW,
+				   &epoch_call_task_count, "# of times a callback task was run");
 
 TAILQ_HEAD(threadlist, thread);
 
@@ -103,9 +109,8 @@ struct epoch_pcpu_state {
 struct epoch {
 	struct ck_epoch e_epoch __aligned(EPOCH_ALIGN);
 	struct epoch_pcpu_state *e_pcpu_dom[MAXMEMDOM] __aligned(EPOCH_ALIGN);
-	counter_u64_t e_frees;
-	uint64_t e_free_last;
 	int e_idx;
+	int e_flags;
 	struct epoch_pcpu_state *e_pcpu[0];
 };
 
@@ -119,7 +124,7 @@ static __read_mostly int domoffsets[MAXMEMDOM];
 static __read_mostly int inited;
 static __read_mostly int epoch_count;
 __read_mostly epoch_t global_epoch;
-static __read_mostly epoch_t private_epoch;
+__read_mostly epoch_t global_epoch_critical;
 
 static void epoch_call_task(void *context __unused);
 
@@ -161,8 +166,8 @@ epoch_init(void *arg __unused)
 		taskqgroup_attach_cpu(qgroup_softirq, DPCPU_ID_PTR(cpu, epoch_cb_task), NULL, cpu, -1, "epoch call task");
 	}
 	inited = 1;
-	global_epoch = epoch_alloc();
-	private_epoch = epoch_alloc();
+	global_epoch = epoch_alloc(0);
+	global_epoch_critical = epoch_alloc(EPOCH_CRITICAL);
 }
 SYSINIT(epoch, SI_SUB_TASKQ + 1, SI_ORDER_FIRST, epoch_init, NULL);
 
@@ -206,7 +211,7 @@ epoch_init_legacy(epoch_t epoch)
 }
 
 epoch_t
-epoch_alloc(void)
+epoch_alloc(int flags)
 {
 	epoch_t epoch;
 
@@ -215,12 +220,12 @@ epoch_alloc(void)
 	epoch = malloc(sizeof(struct epoch) + mp_ncpus*sizeof(void*),
 				   M_EPOCH, M_ZERO|M_WAITOK);
 	ck_epoch_init(&epoch->e_epoch);
-	epoch->e_frees = counter_u64_alloc(M_WAITOK);
 	if (usedomains)
 		epoch_init_numa(epoch);
 	else
 		epoch_init_legacy(epoch);
 	MPASS(epoch_count < MAX_EPOCHS-2);
+	epoch->e_flags = flags;
 	epoch->e_idx = epoch_count;
 	allepochs[epoch_count++] = epoch;
 	return (epoch);
@@ -240,11 +245,7 @@ epoch_free(epoch_t epoch)
 	}
 #endif
 	allepochs[epoch->e_idx] = NULL;
-	epoch_wait(private_epoch);
-	/*
-	 * Execute any lingering callbacks
-	 */
-	counter_u64_free(epoch->e_frees);
+	epoch_wait_critical(global_epoch_critical);
 	if (usedomains)
 		for (domain = 0; domain < vm_ndomains; domain++)
 			free_domain(epoch->e_pcpu_dom[domain], M_EPOCH);
@@ -289,14 +290,21 @@ epoch_enter_internal(epoch_t epoch, struct thread *td)
 }
 
 
-static void
-epoch_enter_private(ck_epoch_section_t *section)
+void
+epoch_enter_critical(epoch_t epoch)
 {
-	struct epoch_pcpu_state *eps;
+	ck_epoch_record_t *record;
+	ck_epoch_section_t *section;
+	struct thread *td;
 
-	MPASS(curthread->td_critnest);
-	eps = private_epoch->e_pcpu[curcpu];
-	ck_epoch_begin(&eps->eps_record.er_record, section);
+	section = NULL;
+	td = curthread;
+	critical_enter();
+	if (__predict_true(td->td_epochnest++ == 0))
+		section = (ck_epoch_section_t*)&td->td_epoch_section;
+
+	record = &epoch->e_pcpu[curcpu]->eps_record.er_record;
+	ck_epoch_begin(record, section);
 }
 
 void
@@ -321,14 +329,21 @@ epoch_exit_internal(epoch_t epoch, struct thread *td)
 	critical_exit();
 }
 
-static void
-epoch_exit_private(ck_epoch_section_t *section)
+void
+epoch_exit_critical(epoch_t epoch)
 {
-	struct epoch_pcpu_state *eps;
+	ck_epoch_record_t *record;
+	ck_epoch_section_t *section;
+	struct thread *td;
 
-	MPASS(curthread->td_critnest);
-	eps = private_epoch->e_pcpu[curcpu];
-	ck_epoch_end(&eps->eps_record.er_record, section);
+	section = NULL;
+	td = curthread;
+	MPASS(td->td_critnest);
+	if (__predict_true(td->td_epochnest-- == 1))
+		section = (ck_epoch_section_t*)&td->td_epoch_section;
+	record = &epoch->e_pcpu[curcpu]->eps_record.er_record;
+	ck_epoch_end(record, section);
+	critical_exit();
 }
 
 /*
@@ -517,7 +532,24 @@ epoch_wait(epoch_t epoch)
 			("%d residual locks held", td->td_locks - locks));
 }
 
+static void
+epoch_block_handler_critical(struct ck_epoch *g __unused, ck_epoch_record_t *c __unused,
+					void *arg __unused)
+{
+	cpu_spinwait();
+}
+
 void
+epoch_wait_critical(epoch_t epoch)
+{
+
+	MPASS(epoch->e_flags & EPOCH_CRITICAL);
+	critical_enter();
+	ck_epoch_synchronize_wait(&epoch->e_epoch, epoch_block_handler_critical, NULL);
+	critical_exit();
+}
+
+void
 epoch_call(epoch_t epoch, epoch_context_t ctx, void (*callback) (epoch_context_t))
 {
 	struct epoch_pcpu_state *eps;
@@ -530,8 +562,6 @@ epoch_call(epoch_t epoch, epoch_context_t ctx, void (*
 	if (__predict_false(epoch == NULL))
 		goto boottime;
 
-	counter_u64_add(epoch->e_frees, 1);
-
 	critical_enter();
 	*DPCPU_PTR(epoch_cb_count) += 1;
 	eps = epoch->e_pcpu[curcpu];
@@ -542,20 +572,18 @@ epoch_call(epoch_t epoch, epoch_context_t ctx, void (*
 	callback(ctx);
 }
 
-
 static void
 epoch_call_task(void *arg __unused)
 {
 	ck_stack_entry_t *cursor, *head, *next;
 	ck_epoch_record_t *record;
-	ck_epoch_section_t section;
 	epoch_t epoch;
 	ck_stack_t cb_stack;
 	int i, npending, total;
 
 	ck_stack_init(&cb_stack);
 	critical_enter();
-	epoch_enter_private(&section);
+	epoch_enter_critical(global_epoch_critical);
 	for (total = i = 0; i < epoch_count; i++) {
 		if (__predict_false((epoch = allepochs[i]) == NULL))
 			continue;
@@ -565,9 +593,12 @@ epoch_call_task(void *arg __unused)
 		ck_epoch_poll_deferred(record, &cb_stack);
 		total += npending - record->n_pending;
 	}
-	epoch_exit_private(&section);
+	epoch_exit_critical(global_epoch_critical);
 	*DPCPU_PTR(epoch_cb_count) -= total;
 	critical_exit();
+
+	counter_u64_add(epoch_call_count, total);
+	counter_u64_add(epoch_call_task_count, 1);
 
 	head = ck_stack_batch_pop_npsc(&cb_stack);
 	for (cursor = head; cursor != NULL; cursor = next) {

Modified: head/sys/net/if.c
==============================================================================
--- head/sys/net/if.c	Thu May 17 23:59:56 2018	(r333774)
+++ head/sys/net/if.c	Fri May 18 01:52:51 2018	(r333775)
@@ -903,7 +903,7 @@ if_attachdomain(void *dummy)
 {
 	struct ifnet *ifp;
 
-	net_epoch = epoch_alloc();
+	net_epoch = epoch_alloc(0);
 	TAILQ_FOREACH(ifp, &V_ifnet, if_link)
 		if_attachdomain1(ifp);
 }

Modified: head/sys/sys/epoch.h
==============================================================================
--- head/sys/sys/epoch.h	Thu May 17 23:59:56 2018	(r333774)
+++ head/sys/sys/epoch.h	Fri May 18 01:52:51 2018	(r333775)
@@ -35,7 +35,10 @@
 struct epoch;
 typedef struct epoch *epoch_t;
 
+#define EPOCH_CRITICAL 0x1
+
 extern epoch_t global_epoch;
+extern epoch_t global_epoch_critical;
 DPCPU_DECLARE(int, epoch_cb_count);
 DPCPU_DECLARE(struct grouptask, epoch_cb_task);
 
@@ -45,11 +48,14 @@ struct epoch_context {
 
 typedef struct epoch_context *epoch_context_t;
 
-epoch_t epoch_alloc(void);
+epoch_t epoch_alloc(int flags);
 void epoch_free(epoch_t epoch);
+void epoch_enter_critical(epoch_t epoch);
 void epoch_enter_internal(epoch_t epoch, struct thread *td);
+void epoch_exit_critical(epoch_t epoch);
 void epoch_exit_internal(epoch_t epoch, struct thread *td);
 void epoch_wait(epoch_t epoch);
+void epoch_wait_critical(epoch_t epoch);
 void epoch_call(epoch_t epoch, epoch_context_t ctx, void (*callback) (epoch_context_t));
 int in_epoch(void);
 

Modified: head/sys/tests/epoch/epoch_test.c
==============================================================================
--- head/sys/tests/epoch/epoch_test.c	Thu May 17 23:59:56 2018	(r333774)
+++ head/sys/tests/epoch/epoch_test.c	Fri May 18 01:52:51 2018	(r333775)
@@ -29,6 +29,7 @@ __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/types.h>
+#include <sys/proc.h>
 #include <sys/counter.h>
 #include <sys/epoch.h>
 #include <sys/gtaskqueue.h>
@@ -138,7 +139,7 @@ test_modinit(void)
 	int i, error, pri_range, pri_off;
 
 	pri_range = PRI_MIN_TIMESHARE - PRI_MIN_REALTIME;
-	test_epoch = epoch_alloc();
+	test_epoch = epoch_alloc(0);
 	for (i = 0; i < mp_ncpus*2; i++) {
 		etilist[i].threadid = i;
 		error = kthread_add(testloop, &etilist[i], NULL, &testthreads[i],



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201805180152.w4I1qpxs013915>