Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 29 Dec 2005 19:33:38 -0500
From:      Martin Cracauer <cracauer@cons.org>
To:        freebsd-current@freebsd.org
Subject:   fetch extension - use local filename from content-disposition header
Message-ID:  <20051229193328.A13367@cons.org>

next in thread | raw e-mail | index | archive | help

--QKdGvSO+nmPlgiQ/
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline

I'm a bit rusty, so please point me to style mistakes in the appended
diff. 

The following diff implements a "-O" option to fetch(1), which, when
set, will make fetch use a local filename supplied by the server in a
Content-Disposition header.

The most common case for this is when things are stored on a web
server by users and there handled as "attachments".  The URL filename
will say "http://foo.bar.com/attachment.php?attid=42" which is useless
as a local filename.

However, popular web software like the vBulletion forum system and
Bugzilla internally store the original filename (e.g. "mysystem.jpg"
which it was when the uploader submitted it) and provide it to the
client in a Content-Disposition header.

If you visit such an attachment in Mozilla, you will see that using
the "save" function will default to the original filename.

This extension to fetch implements the same thing.

You can test it here:
http://www.cons.org/tmp/content-disposition.cgi

Open in browser, say "save to disk", it will default to "foo.txt"
instead of "content-disposition.cgi".  Same if you use the new fetch
with -O.  Or test on any attachment on a modern version of vBulletin.

If you use Bugzilla somewhere, use this fetch to get an attachment
with "-O" and you'll be thankful that it got the original filename,
e.g. "reproduce-bug.query".

Martin
-- 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Martin Cracauer <cracauer@cons.org>   http://www.cons.org/cracauer/
FreeBSD - where you want to go, today.      http://www.freebsd.org/

--QKdGvSO+nmPlgiQ/
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="freebsd-fetch-O.diff"

Index: usr.bin/fetch/fetch.1
===================================================================
RCS file: /home/CVS-FreeBSD/src/usr.bin/fetch/fetch.1,v
retrieving revision 1.66
diff -u -r1.66 fetch.1
--- usr.bin/fetch/fetch.1	13 Feb 2005 22:25:21 -0000	1.66
+++ usr.bin/fetch/fetch.1	30 Dec 2005 00:11:38 -0000
@@ -148,6 +148,11 @@
 .Ar file
 argument is a directory, fetched file(s) will be placed within the
 directory, with name(s) selected as in the default behaviour.
+.It Fl O
+For the output filename, use the name supplied by the server in
+a Content-Disposition header.  If no such header was sent, or if
+it was not parsable, behave like if this option was not set
+(fall through to -o or derive from URL).
 .It Fl P
 .It Fl p
 Use passive FTP.
Index: usr.bin/fetch/fetch.c
===================================================================
RCS file: /home/CVS-FreeBSD/src/usr.bin/fetch/fetch.c,v
retrieving revision 1.75
diff -u -r1.75 fetch.c
--- usr.bin/fetch/fetch.c	18 Nov 2004 12:01:30 -0000	1.75
+++ usr.bin/fetch/fetch.c	30 Dec 2005 00:11:38 -0000
@@ -68,6 +68,7 @@
 int	 o_directory;	/*        output file is a directory */
 char	*o_filename;	/*        name of output file */
 int	 o_stdout;	/*        output file is stdout */
+int	 O_flag;	/*    -O: filename from content-disposition header */
 int	 once_flag;	/*    -1: stop at first successful file */
 int	 p_flag;	/* -[Pp]: use passive FTP */
 int	 R_flag;	/*    -R: don't delete partially transferred files */
@@ -402,6 +403,38 @@
 		goto success;
 	}
 
+	/* start the transfer */
+	if (timeout)
+		alarm(timeout);
+	us.content_disposition = NULL;
+	f = fetchXGet(url, &us, flags); /* .... cracauer - pass content-disposition back in &us */
+	if (timeout)
+		alarm(0);
+	if (sigalrm || sigint)
+		goto signal;
+	if (f == NULL) {
+		warnx("%s: %s", URL, fetchLastErrString);
+		goto failure;
+	}
+	if (sigint)
+		goto signal;
+
+	/*
+	 * If the user asked us to use the filename from a Content-Disposition
+	 * header, and if the server provided such a header, use it.
+	 *
+	 * Note that this header, if set, is malloc memory that is our 
+	 * responsibility to free.  This is the case no matter whether
+	 * O_flag is set or not.
+	 */
+	if (O_flag && us.content_disposition) {
+		if (v_level > 0)
+			fprintf(stderr, 
+			    "Using server-supplied filename '%s'\n",
+			    us.content_disposition);
+		path = us.content_disposition;
+	}
+
 	/*
 	 * If the -r flag was specified, we have to compare the local
 	 * and remote files, so we should really do a fetchStat()
@@ -434,21 +467,6 @@
 		}
 	}
 
-	/* start the transfer */
-	if (timeout)
-		alarm(timeout);
-	f = fetchXGet(url, &us, flags);
-	if (timeout)
-		alarm(0);
-	if (sigalrm || sigint)
-		goto signal;
-	if (f == NULL) {
-		warnx("%s: %s", URL, fetchLastErrString);
-		goto failure;
-	}
-	if (sigint)
-		goto signal;
-
 	/* check that size is as expected */
 	if (S_size) {
 		if (us.size == -1) {
@@ -698,6 +716,8 @@
 		fetchFreeURL(url);
 	if (tmppath != NULL)
 		free(tmppath);
+	if (us.content_disposition != NULL)
+		free(us.content_disposition);
 	return (r);
 }
 
@@ -724,7 +744,7 @@
 	int c, e, r;
 
 	while ((c = getopt(argc, argv,
-	    "146AaB:bc:dFf:Hh:lMmN:nPpo:qRrS:sT:tUvw:")) != -1)
+	    "146AaB:bc:dFf:Hh:lMmN:nPpo:OqRrS:sT:tUvw:")) != -1)
 		switch (c) {
 		case '1':
 			once_flag = 1;
@@ -776,6 +796,9 @@
 			o_flag = 1;
 			o_filename = optarg;
 			break;
+		case 'O':
+			O_flag = 1;
+			break;
 		case 'M':
 		case 'm':
 			if (r_flag)
Index: lib/libfetch/fetch.h
===================================================================
RCS file: /home/CVS-FreeBSD/src/lib/libfetch/fetch.h,v
retrieving revision 1.26
diff -u -r1.26 fetch.h
--- lib/libfetch/fetch.h	21 Sep 2004 18:35:20 -0000	1.26
+++ lib/libfetch/fetch.h	30 Dec 2005 00:11:38 -0000
@@ -52,6 +52,7 @@
 	off_t		 size;
 	time_t		 atime;
 	time_t		 mtime;
+	char             *content_disposition;
 };
 
 struct url_ent {
Index: lib/libfetch/http.c
===================================================================
RCS file: /home/CVS-FreeBSD/src/lib/libfetch/http.c,v
retrieving revision 1.77
diff -u -r1.77 http.c
--- lib/libfetch/http.c	24 Aug 2005 12:28:05 -0000	1.77
+++ lib/libfetch/http.c	30 Dec 2005 00:11:38 -0000
@@ -334,6 +334,7 @@
 	hdr_error = -1,
 	hdr_end = 0,
 	hdr_unknown = 1,
+	hdr_content_disposition,
 	hdr_content_length,
 	hdr_content_range,
 	hdr_last_modified,
@@ -347,6 +348,7 @@
 	hdr_t		 num;
 	const char	*name;
 } hdr_names[] = {
+	{ hdr_content_disposition,	"Content-Disposition" },
 	{ hdr_content_length,		"Content-Length" },
 	{ hdr_content_range,		"Content-Range" },
 	{ hdr_last_modified,		"Last-Modified" },
@@ -549,6 +551,30 @@
 	return (0);
 }
 
+/*
+ * Parse a content-composition header
+ */
+static char *
+_http_parse_content_disposition(const char *p)
+{
+	char *s, *s2;
+	const char *looking_for = "filename=\"";
+
+	if ((s = strstr(p, looking_for))) {
+		s = strdup(s + strlen(looking_for));
+		if ((s2 = strchr(s, '"'))) {
+			*s2 = '\0';
+			return s;
+		} else {
+			free(s);
+			return NULL;
+		}
+			
+	} else {
+		return NULL;
+	}
+}
+
 
 /*****************************************************************************
  * Helper functions for authorization
@@ -991,6 +1017,10 @@
 			case hdr_error:
 				_http_seterr(HTTP_PROTOCOL_ERROR);
 				goto ouch;
+			case hdr_content_disposition:
+				us->content_disposition = 
+					_http_parse_content_disposition(p);
+				break;
 			case hdr_content_length:
 				_http_parse_length(p, &clength);
 				break;

--QKdGvSO+nmPlgiQ/--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20051229193328.A13367>