Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 17 Nov 2010 18:35:41 +0200
From:      Kostik Belousov <kostikbel@gmail.com>
To:        Mike Tancsa <mike@sentex.net>
Cc:        stable@freebsd.org
Subject:   Re: Call for testers: FPU changes
Message-ID:  <20101117163541.GR2392@deviant.kiev.zoral.com.ua>
In-Reply-To: <4CE333EF.10406@sentex.net>
References:  <20101115211350.GE2392@deviant.kiev.zoral.com.ua> <4CE1FDBA.9030403@sentex.net> <20101116094330.GH2392@deviant.kiev.zoral.com.ua> <4CE300DE.8010304@sentex.net> <20101116221926.GN2392@deviant.kiev.zoral.com.ua> <4CE333EF.10406@sentex.net>

next in thread | previous in thread | raw e-mail | index | archive | help

--QHdPxs9xTQFJbl9m
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline
Content-Transfer-Encoding: quoted-printable

On Tue, Nov 16, 2010 at 08:46:23PM -0500, Mike Tancsa wrote:
> On 11/16/2010 5:19 PM, Kostik Belousov wrote:
> > Would your conclusion be that the patch seems to increase the throughput
> > of the aesni(4) ?
> >=20
> > I think that on small-sized blocks, when using aesni(4), the dominating
> > factor is the copying/copyout of the data to/from the kernel address
> > space. Still would be interesting to compare the full output
> > of "openssl speed" on aesni(4) with and without the patch I posted.
>=20
> Hi,
> 	There does seem to be some improvement on large blocks.  But there are
> some freakishly fast times. On other sizes, there is no difference in
> speed it would seem
>=20
> I did 20 runs. Updated stats at http://www.tancsa.com/fpu.html

Thank you. Indeed, I think that the test units are too small so that
random system events can cause the variation. Nonetheless, patch seems
to help, so I committed it.

Meantime, the similar change may be beneficial for padlock(4) too.
f you are going to test it, please note that most likely, openssl padlock
engine does not use padlock(4), I do not know for sure.

diff --git a/sys/crypto/via/padlock.c b/sys/crypto/via/padlock.c
index 77e059b..ba63093 100644
--- a/sys/crypto/via/padlock.c
+++ b/sys/crypto/via/padlock.c
@@ -170,7 +170,7 @@ padlock_newsession(device_t dev, uint32_t *sidp, struct=
 cryptoini *cri)
 	struct padlock_session *ses =3D NULL;
 	struct cryptoini *encini, *macini;
 	struct thread *td;
-	int error;
+	int error, saved_ctx;
=20
 	if (sidp =3D=3D NULL || cri =3D=3D NULL)
 		return (EINVAL);
@@ -238,10 +238,18 @@ padlock_newsession(device_t dev, uint32_t *sidp, stru=
ct cryptoini *cri)
=20
 	if (macini !=3D NULL) {
 		td =3D curthread;
-		error =3D fpu_kern_enter(td, &ses->ses_fpu_ctx, FPU_KERN_NORMAL);
+		if (!is_fpu_kern_thread(0)) {
+			error =3D fpu_kern_enter(td, &ses->ses_fpu_ctx,
+			    FPU_KERN_NORMAL);
+			saved_ctx =3D 1;
+		} else {
+			error =3D 0;
+			saved_ctx =3D 0;
+		}
 		if (error =3D=3D 0) {
 			error =3D padlock_hash_setup(ses, macini);
-			fpu_kern_leave(td, &ses->ses_fpu_ctx);
+			if (saved_ctx)
+				fpu_kern_leave(td, &ses->ses_fpu_ctx);
 		}
 		if (error !=3D 0) {
 			padlock_freesession_one(sc, ses, 0);
diff --git a/sys/crypto/via/padlock_cipher.c b/sys/crypto/via/padlock_ciphe=
r.c
index 0ae26c8..1456ddf 100644
--- a/sys/crypto/via/padlock_cipher.c
+++ b/sys/crypto/via/padlock_cipher.c
@@ -205,7 +205,7 @@ padlock_cipher_process(struct padlock_session *ses, str=
uct cryptodesc *enccrd,
 	struct thread *td;
 	u_char *buf, *abuf;
 	uint32_t *key;
-	int allocated, error;
+	int allocated, error, saved_ctx;
=20
 	buf =3D padlock_cipher_alloc(enccrd, crp, &allocated);
 	if (buf =3D=3D NULL)
@@ -250,14 +250,21 @@ padlock_cipher_process(struct padlock_session *ses, s=
truct cryptodesc *enccrd,
 	}
=20
 	td =3D curthread;
-	error =3D fpu_kern_enter(td, &ses->ses_fpu_ctx, FPU_KERN_NORMAL);
+	if (!is_fpu_kern_thread(0)) {
+		error =3D fpu_kern_enter(td, &ses->ses_fpu_ctx, FPU_KERN_NORMAL);
+		saved_ctx =3D 1;
+	} else {
+		error =3D 0;
+		saved_ctx =3D 0;
+	}
 	if (error !=3D 0)
 		goto out;
=20
 	padlock_cbc(abuf, abuf, enccrd->crd_len / AES_BLOCK_LEN, key, cw,
 	    ses->ses_iv);
=20
-	fpu_kern_leave(td, &ses->ses_fpu_ctx);
+	if (saved_ctx)
+		fpu_kern_leave(td, &ses->ses_fpu_ctx);
=20
 	if (allocated) {
 		crypto_copyback(crp->crp_flags, crp->crp_buf, enccrd->crd_skip,
diff --git a/sys/crypto/via/padlock_hash.c b/sys/crypto/via/padlock_hash.c
index 58c58b2..0fe182b 100644
--- a/sys/crypto/via/padlock_hash.c
+++ b/sys/crypto/via/padlock_hash.c
@@ -366,17 +366,24 @@ padlock_hash_process(struct padlock_session *ses, str=
uct cryptodesc *maccrd,
     struct cryptop *crp)
 {
 	struct thread *td;
-	int error;
+	int error, saved_ctx;
=20
 	td =3D curthread;
-	error =3D fpu_kern_enter(td, &ses->ses_fpu_ctx, FPU_KERN_NORMAL);
+	if (!is_fpu_kern_thread(0)) {
+		error =3D fpu_kern_enter(td, &ses->ses_fpu_ctx, FPU_KERN_NORMAL);
+		saved_ctx =3D 1;
+	} else {
+		error =3D 0;
+		saved_ctx =3D 0;
+	}
 	if (error !=3D 0)
 		return (error);
 	if ((maccrd->crd_flags & CRD_F_KEY_EXPLICIT) !=3D 0)
 		padlock_hash_key_setup(ses, maccrd->crd_key, maccrd->crd_klen);
=20
 	error =3D padlock_authcompute(ses, maccrd, crp->crp_buf, crp->crp_flags);
-	fpu_kern_leave(td, &ses->ses_fpu_ctx);
+	if (saved_ctx)
+		fpu_kern_leave(td, &ses->ses_fpu_ctx);
 	return (error);
 }
=20

--QHdPxs9xTQFJbl9m
Content-Type: application/pgp-signature
Content-Disposition: inline

-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.11 (FreeBSD)

iEYEARECAAYFAkzkBFwACgkQC3+MBN1Mb4glLgCg84M8O4YhICTfLgjBQpS5rKsK
X60AoL1rJFCn2zjHDZpihSLXdsQTU3tA
=7tWt
-----END PGP SIGNATURE-----

--QHdPxs9xTQFJbl9m--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20101117163541.GR2392>