From owner-freebsd-bugs@FreeBSD.ORG Thu May 31 02:00:11 2007 Return-Path: X-Original-To: freebsd-bugs@hub.freebsd.org Delivered-To: freebsd-bugs@hub.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [69.147.83.52]) by hub.freebsd.org (Postfix) with ESMTP id C253F16A421 for ; Thu, 31 May 2007 02:00:11 +0000 (UTC) (envelope-from gnats@FreeBSD.org) Received: from freefall.freebsd.org (freefall.freebsd.org [69.147.83.40]) by mx1.freebsd.org (Postfix) with ESMTP id E710813C457 for ; Thu, 31 May 2007 02:00:09 +0000 (UTC) (envelope-from gnats@FreeBSD.org) Received: from freefall.freebsd.org (gnats@localhost [127.0.0.1]) by freefall.freebsd.org (8.13.4/8.13.4) with ESMTP id l4V209Ex065967 for ; Thu, 31 May 2007 02:00:09 GMT (envelope-from gnats@freefall.freebsd.org) Received: (from gnats@localhost) by freefall.freebsd.org (8.13.4/8.13.4/Submit) id l4V209qn065966; Thu, 31 May 2007 02:00:09 GMT (envelope-from gnats) Resent-Date: Thu, 31 May 2007 02:00:09 GMT Resent-Message-Id: <200705310200.l4V209qn065966@freefall.freebsd.org> Resent-From: FreeBSD-gnats-submit@FreeBSD.org (GNATS Filer) Resent-To: freebsd-bugs@FreeBSD.org Resent-Reply-To: FreeBSD-gnats-submit@FreeBSD.org, Jan Schaumann Received: from mx1.freebsd.org (mx1.freebsd.org [69.147.83.52]) by hub.freebsd.org (Postfix) with ESMTP id 4D14016A421 for ; Thu, 31 May 2007 01:53:09 +0000 (UTC) (envelope-from nobody@FreeBSD.org) Received: from www.freebsd.org (www.freebsd.org [69.147.83.33]) by mx1.freebsd.org (Postfix) with ESMTP id 3D9A913C455 for ; Thu, 31 May 2007 01:53:09 +0000 (UTC) (envelope-from nobody@FreeBSD.org) Received: from www.freebsd.org (localhost [127.0.0.1]) by www.freebsd.org (8.13.1/8.13.1) with ESMTP id l4V1r8q5006694 for ; Thu, 31 May 2007 01:53:09 GMT (envelope-from nobody@www.freebsd.org) Received: (from nobody@localhost) by www.freebsd.org (8.13.1/8.13.1/Submit) id l4V1r8EU006693; Thu, 31 May 2007 01:53:08 GMT (envelope-from nobody) Message-Id: <200705310153.l4V1r8EU006693@www.freebsd.org> Date: Thu, 31 May 2007 01:53:08 GMT From: Jan Schaumann To: freebsd-gnats-submit@FreeBSD.org X-Send-Pr-Version: www-3.0 Cc: Subject: misc/113175: add "-n count" option to split(1) X-BeenThere: freebsd-bugs@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: Bug reports List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 31 May 2007 02:00:11 -0000 >Number: 113175 >Category: misc >Synopsis: add "-n count" option to split(1) >Confidential: no >Severity: non-critical >Priority: low >Responsible: freebsd-bugs >State: open >Quarter: >Keywords: >Date-Required: >Class: sw-bug >Submitter-Id: current-users >Arrival-Date: Thu May 31 02:00:09 GMT 2007 >Closed-Date: >Last-Modified: >Originator: Jan Schaumann >Release: None >Organization: >Environment: >Description: I added a new option to split(1) in NetBSD to allow splitting of a file into N chunks rather than having to do the math oneselves and then specify the '-b' option. See rev1.22 of NetBSD's split.c (rev 1.15 of split.1). Attached is a completely untested patch that might work for your version of split. Hmm, second try. Attaching the diff didn't work. Something about wrong encoding type. See diff in "Fix". >How-To-Repeat: >Fix: --- split.c.orig 2007-05-30 18:35:37.000000000 -0700 +++ split.c 2007-05-30 18:45:56.000000000 -0700 @@ -64,6 +64,7 @@ #define DEFLINE 1000 /* Default num lines per file. */ off_t bytecnt; /* Byte count to split on. */ +off_t chunks = 0; /* Chunks count to split into. */ long numlines; /* Line count to split on. */ int file_open; /* If a file open. */ int ifd = -1, ofd = -1; /* Input/output file descriptors. */ @@ -76,6 +77,7 @@ void newfile(void); void split1(void); void split2(void); +void split3(void); static void usage(void); int @@ -88,7 +90,7 @@ setlocale(LC_ALL, ""); - while ((ch = getopt(argc, argv, "0123456789a:b:l:p:")) != -1) + while ((ch = getopt(argc, argv, "0123456789a:b:l:n:p:")) != -1) switch (ch) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': @@ -138,6 +140,13 @@ errx(EX_USAGE, "%s: illegal line count", optarg); break; + case 'n': /* Chunks. */ + if (!isdigit((unsigned char)optarg[0]) || + (chunks = (size_t)strtoul(optarg, &ep, 10)) == 0 || + *ep != '\0') + errx(EX_USAGE, "%s: illegal number of chunks.", optarg); + break; + case 'p': /* pattern matching. */ if (regcomp(&rgx, optarg, REG_EXTENDED|REG_NOSUB) != 0) errx(EX_USAGE, "%s: illegal regexp", optarg); @@ -164,12 +173,15 @@ if (strlen(fname) + (unsigned long)sufflen >= sizeof(fname)) errx(EX_USAGE, "suffix is too long"); - if (pflag && (numlines != 0 || bytecnt != 0)) + if (pflag && (numlines != 0 || bytecnt != 0 || chunks != 0)) usage(); if (numlines == 0) numlines = DEFLINE; - else if (bytecnt != 0) + else if (bytecnt != 0 || chunks != 0) + usage(); + + if (bytecnt && chunks) usage(); if (ifd == -1) /* Stdin by default. */ @@ -178,6 +190,9 @@ if (bytecnt) { split1(); exit (0); + } else if (chunks) { + split3(); + exit (0); } split2(); if (pflag) @@ -195,6 +210,9 @@ off_t bcnt; char *C; ssize_t dist, len; + int nfiles; + + nfiles = 0; for (bcnt = 0;;) switch ((len = read(ifd, bfr, MAXBSIZE))) { @@ -204,8 +222,11 @@ err(EX_IOERR, "read"); /* NOTREACHED */ default: - if (!file_open) + if (!file_open) { + if (!chunks || (nfiles < chunks)) { newfile(); + nfiles++; + } if (bcnt + len >= bytecnt) { dist = bytecnt - bcnt; if (write(ofd, bfr, dist) != dist) @@ -213,13 +234,19 @@ len -= dist; for (C = bfr + dist; len >= bytecnt; len -= bytecnt, C += bytecnt) { + if (!chunks || (nfiles < chunks)) { newfile(); + nfiles++; + } if (write(ofd, C, bytecnt) != bytecnt) err(EX_IOERR, "write"); } if (len != 0) { + if (!chunks || (nfiles < chunks)) { newfile(); + nfiles++; + } if (write(ofd, C, len) != len) err(EX_IOERR, "write"); } else @@ -285,6 +312,31 @@ exit(0); } + /* + * split3 -- + * Split the input into specified number of chunks + */ +static void +split3() +{ + struct stat sb; + + if (fstat(ifd, &sb) == -1) { + err(1, "stat"); + /* NOTREACHED */ + } + + if (chunks > sb.st_size) { + errx(1, "can't split into more than %d files", + (int)sb.st_size); + /* NOTREACHED */ + } + + bytecnt = sb.st_size/chunks; + split1(); +} + + /* * newfile -- * Open a new output file. @@ -338,6 +390,7 @@ (void)fprintf(stderr, "usage: split [-l line_count] [-a suffix_length] [file [prefix]]\n" " split -b byte_count[K|k|M|m|G|g] [-a suffix_length] [file [prefix]]\n" +" split -n chunk_count [-a suffix_length] [file [prefix]]\n" " split -p pattern [-a suffix_length] [file [prefix]]\n"); exit(EX_USAGE); } --- split.1.orig 2007-05-30 18:36:08.000000000 -0700 +++ split.1 2007-05-30 18:47:14.000000000 -0700 @@ -32,7 +32,7 @@ .\" @(#)split.1 8.3 (Berkeley) 4/16/94 .\" $FreeBSD: /repoman/r/ncvs/src/usr.bin/split/split.1,v 1.21 2006/09/29 15:20:47 ru Exp $ .\" -.Dd August 10, 2006 +.Dd May 30, 2007 .Dt SPLIT 1 .Os .Sh NAME @@ -53,6 +53,10 @@ .Op Fl a Ar suffix_length .Op Ar file Op Ar prefix .Nm +.Fl n Ar chunk_count +.Op Fl a Ar suffix_length +.Op Ar file Op Ar prefix +.Nm .Fl p Ar pattern .Op Fl a Ar suffix_length .Op Ar file Op Ar prefix @@ -112,6 +116,10 @@ Create smaller files .Ar line_count lines in length. +.It Fl n Ar chunk_count +Split file int +.Ar chunk_count +smaller files. .It Fl p Ar pattern The file is split whenever an input line matches .Ar pattern , >Release-Note: >Audit-Trail: >Unformatted: