Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 10 May 2021 15:56:16 GMT
From:      Rick Macklem <rmacklem@FreeBSD.org>
To:        src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-branches@FreeBSD.org
Subject:   git: e9959506d2cc - stable/12 - nfsd: fix the slot sequence# when a callback fails
Message-ID:  <202105101556.14AFuGN9099146@gitrepo.freebsd.org>

next in thread | raw e-mail | index | archive | help
The branch stable/12 has been updated by rmacklem:

URL: https://cgit.FreeBSD.org/src/commit/?id=e9959506d2cc2fd728be8cef5babbcd019c4fcd2

commit e9959506d2cc2fd728be8cef5babbcd019c4fcd2
Author:     Rick Macklem <rmacklem@FreeBSD.org>
AuthorDate: 2021-04-26 23:24:10 +0000
Commit:     Rick Macklem <rmacklem@FreeBSD.org>
CommitDate: 2021-05-10 15:53:15 +0000

    nfsd: fix the slot sequence# when a callback fails
    
    Commit 4281bfec3628 patched the server so that the
    callback session slot would be free'd for reuse when
    a callback attempt fails.
    However, this can often result in the sequence# for
    the session slot to be advanced such that the client
    end will reply NFSERR_SEQMISORDERED.
    
    To avoid the NFSERR_SEQMISORDERED client reply,
    this patch negates the sequence# advance for the
    case where the callback has failed.
    The common case is a failed back channel, where
    the callback cannot be sent to the client, and
    not advancing the sequence# is correct for this
    case.  For the uncommon case where the client's
    reply to the callback is lost, not advancing the
    sequence# will indicate to the client that the
    next callback is a retry and not a new callback.
    But, since the FreeBSD server always sets "csa_cachethis"
    false in the callback sequence operation, a retry
    and a new callback should be handled the same way
    by the client, so this should not matter.
    
    Until you have this patch in your NFSv4.1/4.2 server,
    you should consider avoiding the use of delegations.
    Even with this patch, interoperation with the
    Linux NFSv4.1/4.2 client in kernel versions prior
    to 5.3 can result in frequent 15second delays if
    delegations are enabled.  This occurs because, for
    kernels prior to 5.3, the Linux client does a TCP
    reconnect every time it sees multiple concurrent
    callbacks and then it takes 15seconds to recover
    the back channel after doing so.
    
    (cherry picked from commit 87597731488105dd1ab921a95e39bb62e1abe668)
---
 sys/fs/nfs/nfs_commonkrpc.c      |  4 ++--
 sys/fs/nfs/nfs_commonsubs.c      |  4 +++-
 sys/fs/nfs/nfs_var.h             |  2 +-
 sys/fs/nfsserver/nfs_nfsdstate.c | 23 ++++++++++++++++++++---
 4 files changed, 26 insertions(+), 7 deletions(-)

diff --git a/sys/fs/nfs/nfs_commonkrpc.c b/sys/fs/nfs/nfs_commonkrpc.c
index 78b6d0ed6985..449b3ad5e4a7 100644
--- a/sys/fs/nfs/nfs_commonkrpc.c
+++ b/sys/fs/nfs/nfs_commonkrpc.c
@@ -866,7 +866,7 @@ tryagain:
 			sep->nfsess_slotseq[nd->nd_slotid] += 10;
 			mtx_unlock(&sep->nfsess_mtx);
 			/* And free the slot. */
-			nfsv4_freeslot(sep, nd->nd_slotid);
+			nfsv4_freeslot(sep, nd->nd_slotid, false);
 		}
 		NFSINCRGLOBAL(nfsstatsv1.rpcinvalid);
 		error = ENXIO;
@@ -1103,7 +1103,7 @@ tryagain:
 		if ((nd->nd_flag & ND_NFSV4) != 0) {
 			/* Free the slot, as required. */
 			if (freeslot != -1)
-				nfsv4_freeslot(sep, freeslot);
+				nfsv4_freeslot(sep, freeslot, false);
 			/*
 			 * If this op is Putfh, throw its results away.
 			 */
diff --git a/sys/fs/nfs/nfs_commonsubs.c b/sys/fs/nfs/nfs_commonsubs.c
index 39cf9e9fc819..390d6e91535f 100644
--- a/sys/fs/nfs/nfs_commonsubs.c
+++ b/sys/fs/nfs/nfs_commonsubs.c
@@ -4751,7 +4751,7 @@ nfsv4_sequencelookup(struct nfsmount *nmp, struct nfsclsession *sep,
  * Free a session slot.
  */
 void
-nfsv4_freeslot(struct nfsclsession *sep, int slot)
+nfsv4_freeslot(struct nfsclsession *sep, int slot, bool resetseq)
 {
 	uint64_t bitval;
 
@@ -4759,6 +4759,8 @@ nfsv4_freeslot(struct nfsclsession *sep, int slot)
 	if (slot > 0)
 		bitval <<= slot;
 	mtx_lock(&sep->nfsess_mtx);
+	if (resetseq)
+		sep->nfsess_slotseq[slot]--;
 	if ((bitval & sep->nfsess_slots) == 0)
 		printf("freeing free slot!!\n");
 	sep->nfsess_slots &= ~bitval;
diff --git a/sys/fs/nfs/nfs_var.h b/sys/fs/nfs/nfs_var.h
index 9c8942e27132..bee66d15b016 100644
--- a/sys/fs/nfs/nfs_var.h
+++ b/sys/fs/nfs/nfs_var.h
@@ -345,7 +345,7 @@ void nfsv4_setsequence(struct nfsmount *, struct nfsrv_descript *,
     struct nfsclsession *, int);
 int nfsv4_sequencelookup(struct nfsmount *, struct nfsclsession *, int *,
     int *, uint32_t *, uint8_t *);
-void nfsv4_freeslot(struct nfsclsession *, int);
+void nfsv4_freeslot(struct nfsclsession *, int, bool);
 struct ucred *nfsrv_getgrpscred(struct ucred *);
 struct nfsdevice *nfsv4_findmirror(struct nfsmount *);
 
diff --git a/sys/fs/nfsserver/nfs_nfsdstate.c b/sys/fs/nfsserver/nfs_nfsdstate.c
index a093d5db9a18..9171891478c1 100644
--- a/sys/fs/nfsserver/nfs_nfsdstate.c
+++ b/sys/fs/nfsserver/nfs_nfsdstate.c
@@ -4562,7 +4562,8 @@ nfsrv_docallback(struct nfsclient *clp, int procnum, nfsv4stateid_t *stateidp,
 		if ((clp->lc_flags & LCL_NFSV41) != 0) {
 			error = ECONNREFUSED;
 			if (procnum != NFSV4PROC_CBNULL)
-				nfsv4_freeslot(&sep->sess_cbsess, slotpos);
+				nfsv4_freeslot(&sep->sess_cbsess, slotpos,
+				    true);
 			nfsrv_freesession(sep, NULL);
 		} else if (nd->nd_procnum == NFSV4PROC_CBNULL)
 			error = newnfs_connect(NULL, &clp->lc_req, cred,
@@ -4594,8 +4595,24 @@ nfsrv_docallback(struct nfsclient *clp, int procnum, nfsv4stateid_t *stateidp,
 				error = ECONNREFUSED;
 			}
 			NFSD_DEBUG(4, "aft newnfs_request=%d\n", error);
-			if (error != 0 && procnum != NFSV4PROC_CBNULL)
-				nfsv4_freeslot(&sep->sess_cbsess, slotpos);
+			if (error != 0 && procnum != NFSV4PROC_CBNULL) {
+				/*
+				 * It is likely that the callback was never
+				 * processed by the client and, as such,
+				 * the sequence# for the session slot needs
+				 * to be backed up by one to avoid a
+				 * NFSERR_SEQMISORDERED error reply.
+				 * For the unlikely case where the callback
+				 * was processed by the client, this will
+				 * make the next callback on the slot
+				 * appear to be a retry.
+				 * Since callbacks never specify that the
+				 * reply be cached, this "apparent retry"
+				 * should not be a problem.
+				 */
+				nfsv4_freeslot(&sep->sess_cbsess, slotpos,
+				    true);
+			}
 			nfsrv_freesession(sep, NULL);
 		} else
 			error = newnfs_request(nd, NULL, clp, &clp->lc_req,



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202105101556.14AFuGN9099146>