Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 9 Feb 2001 16:46:11 -0800 (PST)
From:      Luigi Rizzo <rizzo@aciri.org>
To:        jlemon@flugsvamp.com (Jonathan Lemon)
Cc:        rizzo@aciri.org, net@freebsd.org
Subject:   Re: [patch] fast sbappend*, please try...
Message-ID:  <200102100046.f1A0kBB12081@iguana.aciri.org>
In-Reply-To: <200102100013.f1A0DNp76359@prism.flugsvamp.com> from Jonathan Lemon at "Feb 9, 2001  6:13:23 pm"

next in thread | previous in thread | raw e-mail | index | archive | help
Thanks for the feedback...

there is another (performance) problem in my code, actually:
with TCP, or non-datagram sockets, the socket buffer usually
contains a single record, so that sbappend would still have to
scan the chain m->m_next . An additional tail pointer would
be needed to avoid that overhead as well in the generic case:

    sb_mb------>[  ]---[  ]---[  ]
		 |
		[  ]---[  ]---[  ]---[  ]
		 |
		[  ]---[  ]
		 |
    sb_mb_tail->[  ]---[  ]---[  ]---[  ]<--sb_mb_end


>     - I dislike having sb_mb_tail only being valid if sb_mb is NULL;
>       to me, this is non-obvious, and I feel it would be cleaner to
>       always make it valid

(actually, in my code sb_mb_tail is only valid if sb_mb is non_null,
and to me this seems reasonable, as the append code needs to
differentiate an empty queue anyways).  I see your point, but the
end result is probably not that different, with my approach
sbappendmbuf would just be

	if (sb->sb_mb == NULL)
		sb->sb_mb = m ;
	else
		sb->sb_mb_tail->m_nextpkt = m;
	sb->sb_mb_tail = m ;

instead of

	struct mbuf *m = sb->sb_mb_tail;
	if (m)
		m->m_nextpkt = m0;
	else
		sb->sb_mb = m0;
	sb->sb_mb_tail = m0;

so basically you just change the variable to test.

>     - sbgettail() is misnamed, it should reflect the function that 
>       it is being used for in most cases (appending the new mbuf to 
>       an existing chain)

yes, the fact is that i first implemented as a 'get' function
and only at a later time realized that append was the most common
usage.

>     - I don't think that the fastscan sysctl is appropriate; either 

that is part of the debugging cruft, and was useful in this stage
to assert the difference in performance with and without the tail
pointer.

>     - Also, I believe that there may be a problem with the original
>       patch, in that the sb_mb_tail is not updated in sbdrop(), in 

yes, i noticed it and already fixed in a similar way.
Would you like to commit your patch to -current ?

	cheers
	luigi


>       train.
> 
> In that spirit, I offer an amended patch below.
> --
> Jonathan
> 
> Index: kern/uipc_socket.c
> ===================================================================
> RCS file: /ncvs/src/sys/kern/uipc_socket.c,v
> retrieving revision 1.68.2.11
> diff -u -r1.68.2.11 uipc_socket.c
> --- kern/uipc_socket.c	2000/12/22 10:25:21	1.68.2.11
> +++ kern/uipc_socket.c	2001/02/09 23:57:14
> @@ -778,6 +778,11 @@
>  		goto restart;
>  	}
>  dontblock:
> +	/* 
> +	 * On entry here, m points to the first record on the socket buffer.
> +	 * While we process the initial mbufs containing address and control
> +	 * info we save a copy of m->m_nextpkt into nextrecord.
> +	 */
>  	if (uio->uio_procp)
>  		uio->uio_procp->p_stats->p_ru.ru_msgrcv++;
>  	nextrecord = m->m_nextpkt;
> @@ -821,6 +826,18 @@
>  			controlp = &(*controlp)->m_next;
>  		}
>  	}
> +	/*
> +	 * if nextrecord == NULL (this is a single chain) then m_tail
> +	 * may not be valid here if m was changed earlier.
> +	 */
> +	if (nextrecord == NULL && (flags & MSG_PEEK) == 0)
> +		so->so_rcv.sb_mb_tail = m;
> +
> +	/*
> +	 * If m is non-null, we have some data to read. From now on, make
> +	 * sure to keep sb_mb_tail consistent when working on the last
> +	 * packet on the chain (nextrecord==NULL) and we change m->m_nextpkt.
> +	 */
>  	if (m) {
>  		if ((flags & MSG_PEEK) == 0)
>  			m->m_nextpkt = nextrecord;
> @@ -881,6 +898,8 @@
>  				}
>  				if (m)
>  					m->m_nextpkt = nextrecord;
> +				if (nextrecord == NULL)
> +					so->so_rcv.sb_mb_tail = m;
>  			}
>  		} else {
>  			if (flags & MSG_PEEK)
> @@ -937,8 +956,11 @@
>  			(void) sbdroprecord(&so->so_rcv);
>  	}
>  	if ((flags & MSG_PEEK) == 0) {
> -		if (m == 0)
> +		if (m == 0) {
>  			so->so_rcv.sb_mb = nextrecord;
> +			if (nextrecord == NULL || nextrecord->m_nextpkt == NULL)
> +				so->so_rcv.sb_mb_tail = nextrecord;
> +		}
>  		if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
>  			(*pr->pr_usrreqs->pru_rcvd)(so, flags);
>  	}
> Index: kern/uipc_socket2.c
> ===================================================================
> RCS file: /ncvs/src/sys/kern/uipc_socket2.c,v
> retrieving revision 1.55.2.8
> diff -u -r1.55.2.8 uipc_socket2.c
> --- kern/uipc_socket2.c	2001/02/04 14:49:45	1.55.2.8
> +++ kern/uipc_socket2.c	2001/02/09 23:59:16
> @@ -63,6 +65,9 @@
>  
>  static	u_long sb_efficiency = 8;	/* parameter for sbreserve() */
>  
> +static int 	sbtailchk(struct sockbuf *sb);
> +static __inline	void 	sbappendmbuf(struct sockbuf *sb, struct mbuf *m0);
> +
>  /*
>   * Procedures to manipulate state flags of socket
>   * and do appropriate wakeups.  Normal sequence from the
> @@ -477,6 +482,29 @@
>   * or sbdroprecord() when the data is acknowledged by the peer.
>   */
>  
> +static int
> +sbtailchk(struct sockbuf *sb)
> +{
> +	struct mbuf *m = sb->sb_mb;
> +
> +	while (m && m->m_nextpkt)
> +		m = m->m_nextpkt;
> +	return (m == sb->sb_mb_tail);
> +}
> +
> +static __inline void
> +sbappendmbuf(struct sockbuf *sb, struct mbuf *m0)
> +{
> +	struct mbuf *m = sb->sb_mb_tail;
> +
> +	KASSERT(sbtailchk(sb), ("sbappendmbuf: bad sb_mb_tail"));
> +	if (m)
> +		m->m_nextpkt = m0;
> +	else
> +		sb->sb_mb = m0;
> +	sb->sb_mb_tail = m0;
> +}
> +
>  /*
>   * Append mbuf chain m to the last record in the
>   * socket buffer sb.  The additional space associated
> @@ -492,10 +520,9 @@
>  
>  	if (m == 0)
>  		return;
> -	n = sb->sb_mb;
> +	KASSERT(sbtailchk(sb), ("sbappend: bad sb_mb_tail"));
> +	n = sb->sb_mb_tail;
>  	if (n) {
> -		while (n->m_nextpkt)
> -			n = n->m_nextpkt;
>  		do {
>  			if (n->m_flags & M_EOR) {
>  				sbappendrecord(sb, m); /* XXXXXX!!!! */
> @@ -545,19 +572,12 @@
>  
>  	if (m0 == 0)
>  		return;
> -	m = sb->sb_mb;
> -	if (m)
> -		while (m->m_nextpkt)
> -			m = m->m_nextpkt;
>  	/*
>  	 * Put the first mbuf on the queue.
>  	 * Note this permits zero length records.
>  	 */
>  	sballoc(sb, m0);
> -	if (m)
> -		m->m_nextpkt = m0;
> -	else
> -		sb->sb_mb = m0;
> +	sbappendmbuf(sb, m0);
>  	m = m0->m_next;
>  	m0->m_next = 0;
>  	if (m && (m0->m_flags & M_EOR)) {
> @@ -603,6 +623,8 @@
>  	 */
>  	sballoc(sb, m0);
>  	m0->m_nextpkt = *mp;
> +	if (*mp == NULL) 		/* m0 is actually the new tail */
> +		sb->sb_mb_tail = m0;
>  	*mp = m0;
>  	m = m0->m_next;
>  	m0->m_next = 0;
> @@ -653,13 +675,7 @@
>  	m->m_next = control;
>  	for (n = m; n; n = n->m_next)
>  		sballoc(sb, n);
> -	n = sb->sb_mb;
> -	if (n) {
> -		while (n->m_nextpkt)
> -			n = n->m_nextpkt;
> -		n->m_nextpkt = m;
> -	} else
> -		sb->sb_mb = m;
> +	sbappendmbuf(sb, m);
>  	return (1);
>  }
>  
> @@ -686,13 +702,7 @@
>  	n->m_next = m0;			/* concatenate data to control */
>  	for (m = control; m; m = m->m_next)
>  		sballoc(sb, m);
> -	n = sb->sb_mb;
> -	if (n) {
> -		while (n->m_nextpkt)
> -			n = n->m_nextpkt;
> -		n->m_nextpkt = control;
> -	} else
> -		sb->sb_mb = control;
> +	sbappendmbuf(sb, control);
>  	return (1);
>  }
>  
> @@ -733,7 +743,7 @@
>  		if (n)
>  			n->m_next = m;
>  		else
> -			sb->sb_mb = m;
> +			sb->sb_mb = sb->sb_mb_tail = m;
>  		sballoc(sb, m);
>  		n = m;
>  		m->m_flags &= ~M_EOR;
> @@ -813,6 +823,9 @@
>  		m->m_nextpkt = next;
>  	} else
>  		sb->sb_mb = next;
> +	m = sb->sb_mb;
> +	if (m == NULL || m->m_nextpkt == NULL)
> +		sb->sb_mb_tail = m;
>  }
>  
>  /*
> @@ -833,6 +846,9 @@
>  			MFREE(m, mn);
>  			m = mn;
>  		} while (m);
> +		m = sb->sb_mb;
> +		if (m == NULL || m->m_nextpkt == NULL)
> +			sb->sb_mb_tail = m;
>  	}
>  }
>  
> Index: sys/socketvar.h
> ===================================================================
> RCS file: /ncvs/src/sys/sys/socketvar.h,v
> retrieving revision 1.46.2.4
> diff -u -r1.46.2.4 socketvar.h
> --- sys/socketvar.h	2000/11/26 02:30:08	1.46.2.4
> +++ sys/socketvar.h	2001/02/08 17:55:46
> @@ -93,6 +93,7 @@
>  		u_long	sb_mbmax;	/* max chars of mbufs to use */
>  		long	sb_lowat;	/* low water mark */
>  		struct	mbuf *sb_mb;	/* the mbuf chain */
> +		struct	mbuf *sb_mb_tail; /* last pkt in chain */
>  		struct	selinfo sb_sel;	/* process selecting read/write */
>  		short	sb_flags;	/* flags, see below */
>  		short	sb_timeo;	/* timeout for read/write */
> 



To Unsubscribe: send mail to majordomo@FreeBSD.org
with "unsubscribe freebsd-net" in the body of the message




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200102100046.f1A0kBB12081>