Skip site navigation (1)Skip section navigation (2)
Date:      Sun, 11 Jan 2015 20:27:15 +0000 (UTC)
From:      Konstantin Belousov <kib@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r277023 - head/sys/x86/iommu
Message-ID:  <201501112027.t0BKRF0A028255@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: kib
Date: Sun Jan 11 20:27:15 2015
New Revision: 277023
URL: https://svnweb.freebsd.org/changeset/base/277023

Log:
  Right now, for non-coherent DMARs, page table update code flushes the
  cache for whole page containing modified pte, and more, only last page
  in the series of the consequtive pages is flushed (i.e. the affected
  mappings should be larger than 2MB).
  
  Avoid excessive flushing and do missed neccessary flushing, by
  splitting invalidation and unmapping.  For now, flush exactly the
  range of the changed pte.  This is still somewhat bigger than
  neccessary, since pte is 8 bytes, while cache flush line is at least
  32 bytes.
  
  The originator of the issue reports that after the change,
  'dmar_bus_dmamap_unload went from 13,288 cycles down to
  3,257. dmar_bus_dmamap_load_buffer went from 9,686 cycles down to
  3,517.  and I am now able to get line 1GbE speed with Netperf TCP
  (even with 1K message size).'
  
  Diagnosed and tested by:	Nadav Amit <nadav.amit@gmail.com>
  Sponsored by:	The FreeBSD Foundation
  MFC after:	1 week

Modified:
  head/sys/x86/iommu/intel_ctx.c
  head/sys/x86/iommu/intel_dmar.h
  head/sys/x86/iommu/intel_idpgtbl.c
  head/sys/x86/iommu/intel_utils.c

Modified: head/sys/x86/iommu/intel_ctx.c
==============================================================================
--- head/sys/x86/iommu/intel_ctx.c	Sun Jan 11 20:22:12 2015	(r277022)
+++ head/sys/x86/iommu/intel_ctx.c	Sun Jan 11 20:27:15 2015	(r277023)
@@ -97,7 +97,8 @@ dmar_ensure_ctx_page(struct dmar_unit *d
 	re += bus;
 	dmar_pte_store(&re->r1, DMAR_ROOT_R1_P | (DMAR_ROOT_R1_CTP_MASK &
 	    VM_PAGE_TO_PHYS(ctxm)));
-	dmar_unmap_pgtbl(sf, DMAR_IS_COHERENT(dmar));
+	dmar_flush_root_to_ram(dmar, re);
+	dmar_unmap_pgtbl(sf);
 	TD_PINNED_ASSERT;
 }
 
@@ -158,6 +159,7 @@ ctx_id_entry_init(struct dmar_ctx *ctx, 
 		    (DMAR_CTX1_ASR_MASK & VM_PAGE_TO_PHYS(ctx_root)) |
 		    DMAR_CTX1_P);
 	}
+	dmar_flush_ctx_to_ram(unit, ctxp);
 }
 
 static int
@@ -364,7 +366,7 @@ dmar_get_ctx(struct dmar_unit *dmar, dev
 			ctx->domain = alloc_unrl(dmar->domids);
 			if (ctx->domain == -1) {
 				DMAR_UNLOCK(dmar);
-				dmar_unmap_pgtbl(sf, true);
+				dmar_unmap_pgtbl(sf);
 				dmar_ctx_dtr(ctx, true, true);
 				TD_PINNED_ASSERT;
 				return (NULL);
@@ -389,7 +391,7 @@ dmar_get_ctx(struct dmar_unit *dmar, dev
 		} else {
 			dmar_ctx_dtr(ctx1, true, true);
 		}
-		dmar_unmap_pgtbl(sf, DMAR_IS_COHERENT(dmar));
+		dmar_unmap_pgtbl(sf);
 	}
 	ctx->refs++;
 	if ((ctx->flags & DMAR_CTX_RMRR) != 0)
@@ -480,7 +482,7 @@ dmar_free_ctx_locked(struct dmar_unit *d
 	if (ctx->refs > 1) {
 		ctx->refs--;
 		DMAR_UNLOCK(dmar);
-		dmar_unmap_pgtbl(sf, DMAR_IS_COHERENT(dmar));
+		dmar_unmap_pgtbl(sf);
 		TD_PINNED_ASSERT;
 		return;
 	}
@@ -496,6 +498,7 @@ dmar_free_ctx_locked(struct dmar_unit *d
 	 */
 	dmar_pte_clear(&ctxp->ctx1);
 	ctxp->ctx2 = 0;
+	dmar_flush_ctx_to_ram(dmar, ctxp);
 	dmar_inv_ctx_glob(dmar);
 	if ((dmar->hw_ecap & DMAR_ECAP_DI) != 0) {
 		if (dmar->qi_enabled)
@@ -513,7 +516,7 @@ dmar_free_ctx_locked(struct dmar_unit *d
 	taskqueue_drain(dmar->delayed_taskqueue, &ctx->unload_task);
 	KASSERT(TAILQ_EMPTY(&ctx->unload_entries),
 	    ("unfinished unloads %p", ctx));
-	dmar_unmap_pgtbl(sf, DMAR_IS_COHERENT(dmar));
+	dmar_unmap_pgtbl(sf);
 	free_unr(dmar->domids, ctx->domain);
 	dmar_ctx_dtr(ctx, true, true);
 	TD_PINNED_ASSERT;

Modified: head/sys/x86/iommu/intel_dmar.h
==============================================================================
--- head/sys/x86/iommu/intel_dmar.h	Sun Jan 11 20:22:12 2015	(r277022)
+++ head/sys/x86/iommu/intel_dmar.h	Sun Jan 11 20:27:15 2015	(r277023)
@@ -228,11 +228,14 @@ struct vm_page *dmar_pgalloc(vm_object_t
 void dmar_pgfree(vm_object_t obj, vm_pindex_t idx, int flags);
 void *dmar_map_pgtbl(vm_object_t obj, vm_pindex_t idx, int flags,
     struct sf_buf **sf);
-void dmar_unmap_pgtbl(struct sf_buf *sf, bool coherent);
+void dmar_unmap_pgtbl(struct sf_buf *sf);
 int dmar_load_root_entry_ptr(struct dmar_unit *unit);
 int dmar_inv_ctx_glob(struct dmar_unit *unit);
 int dmar_inv_iotlb_glob(struct dmar_unit *unit);
 int dmar_flush_write_bufs(struct dmar_unit *unit);
+void dmar_flush_pte_to_ram(struct dmar_unit *unit, dmar_pte_t *dst);
+void dmar_flush_ctx_to_ram(struct dmar_unit *unit, dmar_ctx_entry_t *dst);
+void dmar_flush_root_to_ram(struct dmar_unit *unit, dmar_root_entry_t *dst);
 int dmar_enable_translation(struct dmar_unit *unit);
 int dmar_disable_translation(struct dmar_unit *unit);
 bool dmar_barrier_enter(struct dmar_unit *dmar, u_int barrier_id);

Modified: head/sys/x86/iommu/intel_idpgtbl.c
==============================================================================
--- head/sys/x86/iommu/intel_idpgtbl.c	Sun Jan 11 20:22:12 2015	(r277022)
+++ head/sys/x86/iommu/intel_idpgtbl.c	Sun Jan 11 20:27:15 2015	(r277023)
@@ -146,7 +146,7 @@ ctx_idmap_nextlvl(struct idpgtbl *tbl, i
 		}
 	}
 	/* ctx_get_idmap_pgtbl flushes CPU cache if needed. */
-	dmar_unmap_pgtbl(sf, true);
+	dmar_unmap_pgtbl(sf);
 	VM_OBJECT_WLOCK(tbl->pgtbl_obj);
 }
 
@@ -361,7 +361,7 @@ ctx_pgtbl_map_pte(struct dmar_ctx *ctx, 
 		pte = (dmar_pte_t *)sf_buf_kva(*sf);
 	} else {
 		if (*sf != NULL)
-			dmar_unmap_pgtbl(*sf, DMAR_IS_COHERENT(ctx->dmar));
+			dmar_unmap_pgtbl(*sf);
 		*idxp = idx;
 retry:
 		pte = dmar_map_pgtbl(ctx->pgtbl_obj, idx, flags, sf);
@@ -397,9 +397,10 @@ retry:
 			}
 			dmar_pte_store(&ptep->pte, DMAR_PTE_R | DMAR_PTE_W |
 			    VM_PAGE_TO_PHYS(m));
+			dmar_flush_pte_to_ram(ctx->dmar, ptep);
 			sf_buf_page(sfp)->wire_count += 1;
 			m->wire_count--;
-			dmar_unmap_pgtbl(sfp, DMAR_IS_COHERENT(ctx->dmar));
+			dmar_unmap_pgtbl(sfp);
 			/* Only executed once. */
 			goto retry;
 		}
@@ -467,20 +468,19 @@ ctx_map_buf_locked(struct dmar_ctx *ctx,
 		if (pte == NULL) {
 			KASSERT((flags & DMAR_PGF_WAITOK) == 0,
 			    ("failed waitable pte alloc %p", ctx));
-			if (sf != NULL) {
-				dmar_unmap_pgtbl(sf,
-				    DMAR_IS_COHERENT(ctx->dmar));
-			}
+			if (sf != NULL)
+				dmar_unmap_pgtbl(sf);
 			ctx_unmap_buf_locked(ctx, base1, base - base1, flags);
 			TD_PINNED_ASSERT;
 			return (ENOMEM);
 		}
 		dmar_pte_store(&pte->pte, VM_PAGE_TO_PHYS(ma[pi]) | pflags |
 		    (superpage ? DMAR_PTE_SP : 0));
+		dmar_flush_pte_to_ram(ctx->dmar, pte);
 		sf_buf_page(sf)->wire_count += 1;
 	}
 	if (sf != NULL)
-		dmar_unmap_pgtbl(sf, DMAR_IS_COHERENT(ctx->dmar));
+		dmar_unmap_pgtbl(sf);
 	TD_PINNED_ASSERT;
 	return (0);
 }
@@ -567,9 +567,10 @@ ctx_unmap_clear_pte(struct dmar_ctx *ctx
 	vm_page_t m;
 
 	dmar_pte_clear(&pte->pte);
+	dmar_flush_pte_to_ram(ctx->dmar, pte);
 	m = sf_buf_page(*sf);
 	if (free_sf) {
-		dmar_unmap_pgtbl(*sf, DMAR_IS_COHERENT(ctx->dmar));
+		dmar_unmap_pgtbl(*sf);
 		*sf = NULL;
 	}
 	m->wire_count--;
@@ -651,7 +652,7 @@ ctx_unmap_buf_locked(struct dmar_ctx *ct
 		    (uintmax_t)base, (uintmax_t)size, (uintmax_t)pg_sz));
 	}
 	if (sf != NULL)
-		dmar_unmap_pgtbl(sf, DMAR_IS_COHERENT(ctx->dmar));
+		dmar_unmap_pgtbl(sf);
 	/*
 	 * See 11.1 Write Buffer Flushing for an explanation why RWBF
 	 * can be ignored there.

Modified: head/sys/x86/iommu/intel_utils.c
==============================================================================
--- head/sys/x86/iommu/intel_utils.c	Sun Jan 11 20:22:12 2015	(r277022)
+++ head/sys/x86/iommu/intel_utils.c	Sun Jan 11 20:27:15 2015	(r277023)
@@ -354,20 +354,46 @@ dmar_map_pgtbl(vm_object_t obj, vm_pinde
 }
 
 void
-dmar_unmap_pgtbl(struct sf_buf *sf, bool coherent)
+dmar_unmap_pgtbl(struct sf_buf *sf)
 {
-	vm_page_t m;
 
-	m = sf_buf_page(sf);
 	sf_buf_free(sf);
 	sched_unpin();
+}
+
+static void
+dmar_flush_transl_to_ram(struct dmar_unit *unit, void *dst, size_t sz)
+{
 
+	if (DMAR_IS_COHERENT(unit))
+		return;
 	/*
 	 * If DMAR does not snoop paging structures accesses, flush
 	 * CPU cache to memory.
 	 */
-	if (!coherent)
-		pmap_invalidate_cache_pages(&m, 1);
+	pmap_invalidate_cache_range((uintptr_t)dst, (uintptr_t)dst + sz,
+	    TRUE);
+}
+
+void
+dmar_flush_pte_to_ram(struct dmar_unit *unit, dmar_pte_t *dst)
+{
+
+	dmar_flush_transl_to_ram(unit, dst, sizeof(*dst));
+}
+
+void
+dmar_flush_ctx_to_ram(struct dmar_unit *unit, dmar_ctx_entry_t *dst)
+{
+
+	dmar_flush_transl_to_ram(unit, dst, sizeof(*dst));
+}
+
+void
+dmar_flush_root_to_ram(struct dmar_unit *unit, dmar_root_entry_t *dst)
+{
+
+	dmar_flush_transl_to_ram(unit, dst, sizeof(*dst));
 }
 
 /*



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201501112027.t0BKRF0A028255>