Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 7 Aug 2003 16:08:01 +0200
From:      Simon Barner <barner@in.tum.de>
To:        "mitrohin a.s." <swp@uni-altai.ru>
Cc:        freebsd-hackers@freebsd.org
Subject:   Re: [patch] Re: getfsent(3) and spaces in fstab
Message-ID:  <20030807140801.GD1475@zi025.glhnet.mhn.de>
In-Reply-To: <20030805020736.GA1805@bspu.secna.ru>
References:  <1059607242.64020.5.camel@mjtdev1.dand06.au.bytecraft.au.com> <3F285560.2090607@acm.org> <1059608748.64020.10.camel@mjtdev1.dand06.au.bytecraft.au.com> <002201c356fa$4a66a700$1200a8c0@gsicomp.on.ca> <20030731134343.GB1323@zi025.glhnet.mhn.de> <1059693358.64020.31.camel@mjtdev1.dand06.au.bytecraft.au.com> <20030801153142.GA487@zi025.glhnet.mhn.de> <3F2B75E2.FBC18052@mindspring.com> <20030804003331.GA408@zi025.glhnet.mhn.de> <20030805020736.GA1805@bspu.secna.ru>

next in thread | previous in thread | raw e-mail | index | archive | help

--m51xatjYGsM+13rf
Content-Type: multipart/mixed; boundary="O5XBE6gyVG5Rl6Rj"
Content-Disposition: inline


--O5XBE6gyVG5Rl6Rj
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline
Content-Transfer-Encoding: quoted-printable

> imho - expensive algorithm... i want to see anything more simple...=20
> like "gtok()" instead "es_strsep() + remove_escapes()"?

I have adopted my patch to use your neat gtok() function, but I came to
the conclusion that a two-pass algorithm is necessary:

The first pass detects whether a line from fstab is the old or the new
style format (old style lines may only have unescaped white spaces
before a trailing #-comment).

Then, the second pass extracts the information.

I admit this is rather complicated, but I don't how to handle two sets
of delimiters (":\n" and " \n\r\t") with only one pass. Using gtok() to
detect the style of line is not an option IMO, since it would convert
escape sequences.

Now, the following lines can be processed:

1) old style:
<file system>:<mount point>:<mount type>:<dump>:<passno>([' ','\t']*#<comme=
nt>)*

2) new style
format as described in fstab(5) + an optional #-comment at the end of the l=
ine

3) empty lines, white space lines, deliberately many white spaces + comment

In both the old and the new style lines, white spaces can be written as
escape sequences or in double quotes.

Could somebody please review my patch - if there are no objections (but
I am sure there are some more details that can be improved), I will
write a PR in order

Regards,
 Simon

--O5XBE6gyVG5Rl6Rj
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="patch-fstab.c"
Content-Transfer-Encoding: quoted-printable

--- fstab.c.orig	Fri Aug  1 17:18:00 2003
+++ fstab.c	Thu Aug  7 15:46:39 2003
@@ -84,6 +84,60 @@
 	_fs_fstab.fs_spec =3D buf;
 }
=20
+/*
+ * Gets a token from a string *s, that is either empty or is separated by
+ * a set of delimiters *delim.
+ * Characters that are in *delim, can occur in the token if the are escape=
d,
+ * i.e. have a '\' prepended. The character '\' itself is encoded as '\\'.
+ * *s can have a trailing comment (indicated by a '#'), which will cause t=
he
+ * characters after the '#' to be ignored. To encode a '#' within a token,
+ * use '\#'.
+ *
+ * If a token is found, gtok sets the last character after its end
+ * to '\0' and returns a pointer it. Otherwise the return value is NULL.
+ * As a side effect, the input string *s modified and points to the next
+ * character after the end of the current token, i.e. after the '\0'.
+ */
+char *gtok(char **s, char const *delim)
+{
+	int quoted, escaped;
+	static char const esc_set[] =3D {  't',  'r',  'n',  'a', 0 };
+	static char const esc_rep[] =3D { '\t', '\r', '\n', '\a', 0 };
+	char *tok, *r, *w, *p;
+
+	if (!s || !*s || !*(tok =3D *s + strspn(*s, delim)) || *tok =3D=3D '#')
+		return NULL;
+
+	for (quoted =3D escaped =3D 0, r =3D w =3D tok; *r; r++) {
+		if (!escaped) {
+			if (*r =3D=3D '\\') {
+				escaped =3D 1;
+				continue;
+			}
+			if (*r =3D=3D '\"') {
+				quoted ^=3D -1;
+				continue;
+			}
+			if (!quoted) {
+				if (strchr(delim, *r)) {
+					r++;
+					break;
+				}
+			}
+		} else {
+			escaped =3D 0;
+			if ((p =3D strchr(esc_set, *r)) !=3D NULL) {
+				*w++ =3D esc_rep[p - esc_set];
+				continue;
+			}
+		}
+		*w++ =3D *r;
+	}
+	*w =3D 0;
+	*s =3D r;
+	return tok;
+}
+
 static int
 fstabscan()
 {
@@ -91,21 +145,73 @@
 #define	MAXLINELENGTH	1024
 	static char line[MAXLINELENGTH];
 	char subline[MAXLINELENGTH];
-	int typexx;
+	int typexx, escaped=3D0, quoted=3D0, ws_sep=3D0;
=20
 	for (;;) {
=20
 		if (!(p =3D fgets(line, sizeof(line), _fs_fp)))
 			return(0);
-/* OLD_STYLE_FSTAB */
 		++LineNo;
-		if (*line =3D=3D '#' || *line =3D=3D '\n')
-			continue;
-		if (!strpbrk(p, " \t")) {
-			_fs_fstab.fs_spec =3D strsep(&p, ":\n");
-			_fs_fstab.fs_file =3D strsep(&p, ":\n");
+	=09
+		/* Detect whether line is in old or new fstab style */
+		for (cp=3Dp; *cp !=3D '\n'; ++cp) {
+			if (*cp =3D=3D '\\') {
+			    escaped =3D (escaped ? 0 : 1);
+			    continue;
+			}
+			if (!escaped) {
+			    /* Quotes */
+			    if (*cp =3D=3D '\"') {
+			    	quoted =3D (quoted ? 0 : 1);
+				continue;
+			    }
+			    if (quoted)
+			    	continue;
+			    /* new white separator found */
+			    if (cp > p && strspn (cp, " \n\r\t") &&
+ 				!strspn(cp-1, " \t"))
+				++ws_sep;
+			   =20
+			    /* #-comment found */
+			    if (*cp =3D=3D '#') {
+			    	*cp =3D '\0';
+				/* ignore white spaces in front of a comment */
+				if (cp > p && strspn(cp-1, " \t") &&=20
+				    ws_sep > 0)
+				    ws_sep--;
+				    break;
+			    }
+			} else
+			    escaped =3D 0;
+		}
+		/* open quotes and unfinished escape-sequences are bad */
+		if (quoted || escaped)
+		    goto bad;
+		/* ignore trailing white spaces */
+	        if (*(cp + strspn (cp, " \t")) =3D=3D '\n' && ws_sep > 0)
+		    --ws_sep;
+		  =20
+		/* No white space separators found =3D> OLD_STYLE_FSTAB */
+		if (ws_sep =3D=3D 0) {
+			/*
+			 * line consists only of white spaces
+			 * (evtl. + #-comment)
+			 */
+			if (strspn (p, " \t"))
+				continue;
+			/*
+			 * Now read the different values (gtok will convert
+			 * escape seq.). Format is:
+			 *  <fs_spec>:<fs_file>:<fs_type>:<freq>:<passno>
+			 * ':' itself can be encodes as '\:'
+			 */
+			if (!(_fs_fstab.fs_spec =3D gtok(&p, ":\n\r")))
+				continue;
+			if (!(_fs_fstab.fs_file =3D gtok(&p, ":\n\r"))) {
+				goto bad;
+			}
 			fixfsfile();
-			_fs_fstab.fs_type =3D strsep(&p, ":\n");
+			_fs_fstab.fs_type =3D gtok(&p, ":\n\r");
 			if (_fs_fstab.fs_type) {
 				if (!strcmp(_fs_fstab.fs_type, FSTAB_XX))
 					continue;
@@ -113,46 +219,43 @@
 				_fs_fstab.fs_vfstype =3D
 				    strcmp(_fs_fstab.fs_type, FSTAB_SW) ?
 				    "ufs" : "swap";
-				if ((cp =3D strsep(&p, ":\n")) !=3D NULL) {
+				if ((cp =3D gtok(&p, ":\n\r")) !=3D NULL) {
 					_fs_fstab.fs_freq =3D atoi(cp);
-					if ((cp =3D strsep(&p, ":\n")) !=3D NULL) {
+					if ((cp =3D gtok(&p, " \n\r\t")) !=3D NULL) {
 						_fs_fstab.fs_passno =3D atoi(cp);
+						if (gtok (&p, " \n\r\t"))
+						    goto bad;
+						   =20
 						return(1);
 					}
 				}
 			}
 			goto bad;
 		}
-/* OLD_STYLE_FSTAB */
-		while ((cp =3D strsep(&p, " \t\n")) !=3D NULL && *cp =3D=3D '\0')
-			;
-		_fs_fstab.fs_spec =3D cp;
-		if (!_fs_fstab.fs_spec || *_fs_fstab.fs_spec =3D=3D '#')
+	=09
+		/* At least one white space sep. found =3D> NEW_STYLE_FSTAB */
+		if (!(_fs_fstab.fs_spec =3D gtok(&p, " \n\r\t")))
 			continue;
-		while ((cp =3D strsep(&p, " \t\n")) !=3D NULL && *cp =3D=3D '\0')
-			;
-		_fs_fstab.fs_file =3D cp;
+		if (!(_fs_fstab.fs_file =3D gtok(&p, " \n\r\t")))
+			goto bad;
 		fixfsfile();
-		while ((cp =3D strsep(&p, " \t\n")) !=3D NULL && *cp =3D=3D '\0')
-			;
-		_fs_fstab.fs_vfstype =3D cp;
-		while ((cp =3D strsep(&p, " \t\n")) !=3D NULL && *cp =3D=3D '\0')
-			;
-		_fs_fstab.fs_mntops =3D cp;
-		if (_fs_fstab.fs_mntops =3D=3D NULL)
+		if (!(_fs_fstab.fs_vfstype =3D gtok(&p, " \n\r\t")))
+			goto bad;
+		if (!(_fs_fstab.fs_mntops =3D gtok(&p, " \n\r\t")))
 			goto bad;
 		_fs_fstab.fs_freq =3D 0;
 		_fs_fstab.fs_passno =3D 0;
-		while ((cp =3D strsep(&p, " \t\n")) !=3D NULL && *cp =3D=3D '\0')
-			;
+		cp =3D gtok(&p, " \n\r\t");
 		if (cp !=3D NULL) {
 			_fs_fstab.fs_freq =3D atoi(cp);
-			while ((cp =3D strsep(&p, " \t\n")) !=3D NULL && *cp =3D=3D '\0')
-				;
+			cp =3D gtok(&p, " \n\r\t");
 			if (cp !=3D NULL)
 				_fs_fstab.fs_passno =3D atoi(cp);
 		}
 		strcpy(subline, _fs_fstab.fs_mntops);
+		if (gtok (&p, " \n\r\t"))
+		    goto bad;
+
 		p =3D subline;
 		for (typexx =3D 0, cp =3D strsep(&p, ","); cp;
 		     cp =3D strsep(&p, ",")) {

--O5XBE6gyVG5Rl6Rj--

--m51xatjYGsM+13rf
Content-Type: application/pgp-signature; name="signature.asc"
Content-Description: Digital signature
Content-Disposition: inline

-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.2.2 (FreeBSD)

iD8DBQE/Ml1BCkn+/eutqCoRAtkbAKD4zdFKhcmU+oBM4O3piYxAINenrgCgzAFP
xf9B3L1OcG/V6TXSY6Dy5HA=
=Mr8w
-----END PGP SIGNATURE-----

--m51xatjYGsM+13rf--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20030807140801.GD1475>