Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 19 Jul 2019 18:39:25 +0000 (UTC)
From:      Warner Losh <imp@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r350147 - head/sys/dev/nvme
Message-ID:  <201907191839.x6JIdPQF076598@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: imp
Date: Fri Jul 19 18:39:24 2019
New Revision: 350147
URL: https://svnweb.freebsd.org/changeset/base/350147

Log:
  Keep track of the number of commands that exhaust their retry limit.
  
  While we print failure messages on the console, sometimes logs are lost or
  overwhelmed. Keeping a count of how many times we've failed retriable commands
  helps get a magnitude of the problem.

Modified:
  head/sys/dev/nvme/nvme_private.h
  head/sys/dev/nvme/nvme_qpair.c
  head/sys/dev/nvme/nvme_sysctl.c

Modified: head/sys/dev/nvme/nvme_private.h
==============================================================================
--- head/sys/dev/nvme/nvme_private.h	Fri Jul 19 18:39:18 2019	(r350146)
+++ head/sys/dev/nvme/nvme_private.h	Fri Jul 19 18:39:24 2019	(r350147)
@@ -191,6 +191,7 @@ struct nvme_qpair {
 	int64_t			num_cmds;
 	int64_t			num_intr_handler_calls;
 	int64_t			num_retries;
+	int64_t			num_failures;
 
 	struct nvme_command	*cmd;
 	struct nvme_completion	*cpl;

Modified: head/sys/dev/nvme/nvme_qpair.c
==============================================================================
--- head/sys/dev/nvme/nvme_qpair.c	Fri Jul 19 18:39:18 2019	(r350146)
+++ head/sys/dev/nvme/nvme_qpair.c	Fri Jul 19 18:39:24 2019	(r350147)
@@ -387,14 +387,16 @@ nvme_qpair_complete_tracker(struct nvme_qpair *qpair, 
     struct nvme_completion *cpl, error_print_t print_on_error)
 {
 	struct nvme_request	*req;
-	boolean_t		retry, error;
+	boolean_t		retry, error, retriable;
 
 	req = tr->req;
 	error = nvme_completion_is_error(cpl);
-	retry = error && nvme_completion_is_retry(cpl) &&
-	   req->retries < nvme_retry_count;
+	retriable = nvme_completion_is_retry(cpl);
+	retry = error && retriable && req->retries < nvme_retry_count;
 	if (retry)
 		qpair->num_retries++;
+	if (error && req->retries >= nvme_retry_count && retriable)
+		qpair->num_failures++;
 
 	if (error && (print_on_error == ERROR_PRINT_ALL ||
 		(!retry && print_on_error == ERROR_PRINT_NO_RETRY))) {
@@ -687,6 +689,7 @@ nvme_qpair_construct(struct nvme_qpair *qpair, uint32_
 	qpair->num_cmds = 0;
 	qpair->num_intr_handler_calls = 0;
 	qpair->num_retries = 0;
+	qpair->num_failures = 0;
 	qpair->cmd = (struct nvme_command *)queuemem;
 	qpair->cpl = (struct nvme_completion *)(queuemem + cmdsz);
 	prpmem = (uint8_t *)(queuemem + cmdsz + cplsz);

Modified: head/sys/dev/nvme/nvme_sysctl.c
==============================================================================
--- head/sys/dev/nvme/nvme_sysctl.c	Fri Jul 19 18:39:18 2019	(r350146)
+++ head/sys/dev/nvme/nvme_sysctl.c	Fri Jul 19 18:39:24 2019	(r350147)
@@ -167,6 +167,7 @@ nvme_qpair_reset_stats(struct nvme_qpair *qpair)
 	qpair->num_cmds = 0;
 	qpair->num_intr_handler_calls = 0;
 	qpair->num_retries = 0;
+	qpair->num_failures = 0;
 }
 
 static int
@@ -215,6 +216,21 @@ nvme_sysctl_num_retries(SYSCTL_HANDLER_ARGS)
 }
 
 static int
+nvme_sysctl_num_failures(SYSCTL_HANDLER_ARGS)
+{
+	struct nvme_controller 	*ctrlr = arg1;
+	int64_t			num_failures = 0;
+	int			i;
+
+	num_failures = ctrlr->adminq.num_failures;
+
+	for (i = 0; i < ctrlr->num_io_queues; i++)
+		num_failures += ctrlr->ioq[i].num_failures;
+
+	return (sysctl_handle_64(oidp, &num_failures, 0, req));
+}
+
+static int
 nvme_sysctl_reset_stats(SYSCTL_HANDLER_ARGS)
 {
 	struct nvme_controller 	*ctrlr = arg1;
@@ -267,6 +283,9 @@ nvme_sysctl_initialize_queue(struct nvme_qpair *qpair,
 	    "coalescing)");
 	SYSCTL_ADD_QUAD(ctrlr_ctx, que_list, OID_AUTO, "num_retries",
 	    CTLFLAG_RD, &qpair->num_retries, "Number of commands retried");
+	SYSCTL_ADD_QUAD(ctrlr_ctx, que_list, OID_AUTO, "num_failures",
+	    CTLFLAG_RD, &qpair->num_failures,
+	    "Number of commands ending in failure after all retries");
 
 	SYSCTL_ADD_PROC(ctrlr_ctx, que_list, OID_AUTO,
 	    "dump_debug", CTLTYPE_UINT | CTLFLAG_RW, qpair, 0,
@@ -322,6 +341,11 @@ nvme_sysctl_initialize_ctrlr(struct nvme_controller *c
 	    "num_retries", CTLTYPE_S64 | CTLFLAG_RD,
 	    ctrlr, 0, nvme_sysctl_num_retries, "IU",
 	    "Number of commands retried");
+
+	SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO,
+	    "num_failures", CTLTYPE_S64 | CTLFLAG_RD,
+	    ctrlr, 0, nvme_sysctl_num_failures, "IU",
+	    "Number of commands ending in failure after all retries");
 
 	SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO,
 	    "reset_stats", CTLTYPE_UINT | CTLFLAG_RW, ctrlr, 0,



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201907191839.x6JIdPQF076598>