450 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			450 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /* gzjoin -- command to join gzip files into one gzip file
 | |
| 
 | |
|   Copyright (C) 2004, 2005, 2012 Mark Adler, all rights reserved
 | |
|   version 1.2, 14 Aug 2012
 | |
| 
 | |
|   This software is provided 'as-is', without any express or implied
 | |
|   warranty.  In no event will the author be held liable for any damages
 | |
|   arising from the use of this software.
 | |
| 
 | |
|   Permission is granted to anyone to use this software for any purpose,
 | |
|   including commercial applications, and to alter it and redistribute it
 | |
|   freely, subject to the following restrictions:
 | |
| 
 | |
|   1. The origin of this software must not be misrepresented; you must not
 | |
|      claim that you wrote the original software. If you use this software
 | |
|      in a product, an acknowledgment in the product documentation would be
 | |
|      appreciated but is not required.
 | |
|   2. Altered source versions must be plainly marked as such, and must not be
 | |
|      misrepresented as being the original software.
 | |
|   3. This notice may not be removed or altered from any source distribution.
 | |
| 
 | |
|   Mark Adler    madler@alumni.caltech.edu
 | |
|  */
 | |
| 
 | |
| /*
 | |
|  * Change history:
 | |
|  *
 | |
|  * 1.0  11 Dec 2004     - First version
 | |
|  * 1.1  12 Jun 2005     - Changed ssize_t to long for portability
 | |
|  * 1.2  14 Aug 2012     - Clean up for z_const usage
 | |
|  */
 | |
| 
 | |
| /*
 | |
|    gzjoin takes one or more gzip files on the command line and writes out a
 | |
|    single gzip file that will uncompress to the concatenation of the
 | |
|    uncompressed data from the individual gzip files.  gzjoin does this without
 | |
|    having to recompress any of the data and without having to calculate a new
 | |
|    crc32 for the concatenated uncompressed data.  gzjoin does however have to
 | |
|    decompress all of the input data in order to find the bits in the compressed
 | |
|    data that need to be modified to concatenate the streams.
 | |
| 
 | |
|    gzjoin does not do an integrity check on the input gzip files other than
 | |
|    checking the gzip header and decompressing the compressed data.  They are
 | |
|    otherwise assumed to be complete and correct.
 | |
| 
 | |
|    Each joint between gzip files removes at least 18 bytes of previous trailer
 | |
|    and subsequent header, and inserts an average of about three bytes to the
 | |
|    compressed data in order to connect the streams.  The output gzip file
 | |
|    has a minimal ten-byte gzip header with no file name or modification time.
 | |
| 
 | |
|    This program was written to illustrate the use of the Z_BLOCK option of
 | |
|    inflate() and the crc32_combine() function.  gzjoin will not compile with
 | |
|    versions of zlib earlier than 1.2.3.
 | |
|  */
 | |
| 
 | |
| #include <stdio.h>      /* fputs(), fprintf(), fwrite(), putc() */
 | |
| #include <stdlib.h>     /* exit(), malloc(), free() */
 | |
| #include <fcntl.h>      /* open() */
 | |
| #include <unistd.h>     /* close(), read(), lseek() */
 | |
| #include "zlib.h"
 | |
|     /* crc32(), crc32_combine(), inflateInit2(), inflate(), inflateEnd() */
 | |
| 
 | |
| #define local static
 | |
| 
 | |
| /* exit with an error (return a value to allow use in an expression) */
 | |
| local int bail(char *why1, char *why2)
 | |
| {
 | |
|     fprintf(stderr, "gzjoin error: %s%s, output incomplete\n", why1, why2);
 | |
|     exit(1);
 | |
|     return 0;
 | |
| }
 | |
| 
 | |
| /* -- simple buffered file input with access to the buffer -- */
 | |
| 
 | |
| #define CHUNK 32768         /* must be a power of two and fit in unsigned */
 | |
| 
 | |
| /* bin buffered input file type */
 | |
| typedef struct {
 | |
|     char *name;             /* name of file for error messages */
 | |
|     int fd;                 /* file descriptor */
 | |
|     unsigned left;          /* bytes remaining at next */
 | |
|     unsigned char *next;    /* next byte to read */
 | |
|     unsigned char *buf;     /* allocated buffer of length CHUNK */
 | |
| } bin;
 | |
| 
 | |
| /* close a buffered file and free allocated memory */
 | |
| local void bclose(bin *in)
 | |
| {
 | |
|     if (in != NULL) {
 | |
|         if (in->fd != -1)
 | |
|             close(in->fd);
 | |
|         if (in->buf != NULL)
 | |
|             free(in->buf);
 | |
|         free(in);
 | |
|     }
 | |
| }
 | |
| 
 | |
| /* open a buffered file for input, return a pointer to type bin, or NULL on
 | |
|    failure */
 | |
| local bin *bopen(char *name)
 | |
| {
 | |
|     bin *in;
 | |
| 
 | |
|     in = malloc(sizeof(bin));
 | |
|     if (in == NULL)
 | |
|         return NULL;
 | |
|     in->buf = malloc(CHUNK);
 | |
|     in->fd = open(name, O_RDONLY, 0);
 | |
|     if (in->buf == NULL || in->fd == -1) {
 | |
|         bclose(in);
 | |
|         return NULL;
 | |
|     }
 | |
|     in->left = 0;
 | |
|     in->next = in->buf;
 | |
|     in->name = name;
 | |
|     return in;
 | |
| }
 | |
| 
 | |
| /* load buffer from file, return -1 on read error, 0 or 1 on success, with
 | |
|    1 indicating that end-of-file was reached */
 | |
| local int bload(bin *in)
 | |
| {
 | |
|     long len;
 | |
| 
 | |
|     if (in == NULL)
 | |
|         return -1;
 | |
|     if (in->left != 0)
 | |
|         return 0;
 | |
|     in->next = in->buf;
 | |
|     do {
 | |
|         len = (long)read(in->fd, in->buf + in->left, CHUNK - in->left);
 | |
|         if (len < 0)
 | |
|             return -1;
 | |
|         in->left += (unsigned)len;
 | |
|     } while (len != 0 && in->left < CHUNK);
 | |
|     return len == 0 ? 1 : 0;
 | |
| }
 | |
| 
 | |
| /* get a byte from the file, bail if end of file */
 | |
| #define bget(in) (in->left ? 0 : bload(in), \
 | |
|                   in->left ? (in->left--, *(in->next)++) : \
 | |
|                     bail("unexpected end of file on ", in->name))
 | |
| 
 | |
| /* get a four-byte little-endian unsigned integer from file */
 | |
| local unsigned long bget4(bin *in)
 | |
| {
 | |
|     unsigned long val;
 | |
| 
 | |
|     val = bget(in);
 | |
|     val += (unsigned long)(bget(in)) << 8;
 | |
|     val += (unsigned long)(bget(in)) << 16;
 | |
|     val += (unsigned long)(bget(in)) << 24;
 | |
|     return val;
 | |
| }
 | |
| 
 | |
| /* skip bytes in file */
 | |
| local void bskip(bin *in, unsigned skip)
 | |
| {
 | |
|     /* check pointer */
 | |
|     if (in == NULL)
 | |
|         return;
 | |
| 
 | |
|     /* easy case -- skip bytes in buffer */
 | |
|     if (skip <= in->left) {
 | |
|         in->left -= skip;
 | |
|         in->next += skip;
 | |
|         return;
 | |
|     }
 | |
| 
 | |
|     /* skip what's in buffer, discard buffer contents */
 | |
|     skip -= in->left;
 | |
|     in->left = 0;
 | |
| 
 | |
|     /* seek past multiples of CHUNK bytes */
 | |
|     if (skip > CHUNK) {
 | |
|         unsigned left;
 | |
| 
 | |
|         left = skip & (CHUNK - 1);
 | |
|         if (left == 0) {
 | |
|             /* exact number of chunks: seek all the way minus one byte to check
 | |
|                for end-of-file with a read */
 | |
|             lseek(in->fd, skip - 1, SEEK_CUR);
 | |
|             if (read(in->fd, in->buf, 1) != 1)
 | |
|                 bail("unexpected end of file on ", in->name);
 | |
|             return;
 | |
|         }
 | |
| 
 | |
|         /* skip the integral chunks, update skip with remainder */
 | |
|         lseek(in->fd, skip - left, SEEK_CUR);
 | |
|         skip = left;
 | |
|     }
 | |
| 
 | |
|     /* read more input and skip remainder */
 | |
|     bload(in);
 | |
|     if (skip > in->left)
 | |
|         bail("unexpected end of file on ", in->name);
 | |
|     in->left -= skip;
 | |
|     in->next += skip;
 | |
| }
 | |
| 
 | |
| /* -- end of buffered input functions -- */
 | |
| 
 | |
| /* skip the gzip header from file in */
 | |
| local void gzhead(bin *in)
 | |
| {
 | |
|     int flags;
 | |
| 
 | |
|     /* verify gzip magic header and compression method */
 | |
|     if (bget(in) != 0x1f || bget(in) != 0x8b || bget(in) != 8)
 | |
|         bail(in->name, " is not a valid gzip file");
 | |
| 
 | |
|     /* get and verify flags */
 | |
|     flags = bget(in);
 | |
|     if ((flags & 0xe0) != 0)
 | |
|         bail("unknown reserved bits set in ", in->name);
 | |
| 
 | |
|     /* skip modification time, extra flags, and os */
 | |
|     bskip(in, 6);
 | |
| 
 | |
|     /* skip extra field if present */
 | |
|     if (flags & 4) {
 | |
|         unsigned len;
 | |
| 
 | |
|         len = bget(in);
 | |
|         len += (unsigned)(bget(in)) << 8;
 | |
|         bskip(in, len);
 | |
|     }
 | |
| 
 | |
|     /* skip file name if present */
 | |
|     if (flags & 8)
 | |
|         while (bget(in) != 0)
 | |
|             ;
 | |
| 
 | |
|     /* skip comment if present */
 | |
|     if (flags & 16)
 | |
|         while (bget(in) != 0)
 | |
|             ;
 | |
| 
 | |
|     /* skip header crc if present */
 | |
|     if (flags & 2)
 | |
|         bskip(in, 2);
 | |
| }
 | |
| 
 | |
| /* write a four-byte little-endian unsigned integer to out */
 | |
| local void put4(unsigned long val, FILE *out)
 | |
| {
 | |
|     putc(val & 0xff, out);
 | |
|     putc((val >> 8) & 0xff, out);
 | |
|     putc((val >> 16) & 0xff, out);
 | |
|     putc((val >> 24) & 0xff, out);
 | |
| }
 | |
| 
 | |
| /* Load up zlib stream from buffered input, bail if end of file */
 | |
| local void zpull(z_streamp strm, bin *in)
 | |
| {
 | |
|     if (in->left == 0)
 | |
|         bload(in);
 | |
|     if (in->left == 0)
 | |
|         bail("unexpected end of file on ", in->name);
 | |
|     strm->avail_in = in->left;
 | |
|     strm->next_in = in->next;
 | |
| }
 | |
| 
 | |
| /* Write header for gzip file to out and initialize trailer. */
 | |
| local void gzinit(unsigned long *crc, unsigned long *tot, FILE *out)
 | |
| {
 | |
|     fwrite("\x1f\x8b\x08\0\0\0\0\0\0\xff", 1, 10, out);
 | |
|     *crc = crc32(0L, Z_NULL, 0);
 | |
|     *tot = 0;
 | |
| }
 | |
| 
 | |
| /* Copy the compressed data from name, zeroing the last block bit of the last
 | |
|    block if clr is true, and adding empty blocks as needed to get to a byte
 | |
|    boundary.  If clr is false, then the last block becomes the last block of
 | |
|    the output, and the gzip trailer is written.  crc and tot maintains the
 | |
|    crc and length (modulo 2^32) of the output for the trailer.  The resulting
 | |
|    gzip file is written to out.  gzinit() must be called before the first call
 | |
|    of gzcopy() to write the gzip header and to initialize crc and tot. */
 | |
| local void gzcopy(char *name, int clr, unsigned long *crc, unsigned long *tot,
 | |
|                   FILE *out)
 | |
| {
 | |
|     int ret;                /* return value from zlib functions */
 | |
|     int pos;                /* where the "last block" bit is in byte */
 | |
|     int last;               /* true if processing the last block */
 | |
|     bin *in;                /* buffered input file */
 | |
|     unsigned char *start;   /* start of compressed data in buffer */
 | |
|     unsigned char *junk;    /* buffer for uncompressed data -- discarded */
 | |
|     z_off_t len;            /* length of uncompressed data (support > 4 GB) */
 | |
|     z_stream strm;          /* zlib inflate stream */
 | |
| 
 | |
|     /* open gzip file and skip header */
 | |
|     in = bopen(name);
 | |
|     if (in == NULL)
 | |
|         bail("could not open ", name);
 | |
|     gzhead(in);
 | |
| 
 | |
|     /* allocate buffer for uncompressed data and initialize raw inflate
 | |
|        stream */
 | |
|     junk = malloc(CHUNK);
 | |
|     strm.zalloc = Z_NULL;
 | |
|     strm.zfree = Z_NULL;
 | |
|     strm.opaque = Z_NULL;
 | |
|     strm.avail_in = 0;
 | |
|     strm.next_in = Z_NULL;
 | |
|     ret = inflateInit2(&strm, -15);
 | |
|     if (junk == NULL || ret != Z_OK)
 | |
|         bail("out of memory", "");
 | |
| 
 | |
|     /* inflate and copy compressed data, clear last-block bit if requested */
 | |
|     len = 0;
 | |
|     zpull(&strm, in);
 | |
|     start = in->next;
 | |
|     last = start[0] & 1;
 | |
|     if (last && clr)
 | |
|         start[0] &= ~1;
 | |
|     strm.avail_out = 0;
 | |
|     for (;;) {
 | |
|         /* if input used and output done, write used input and get more */
 | |
|         if (strm.avail_in == 0 && strm.avail_out != 0) {
 | |
|             fwrite(start, 1, strm.next_in - start, out);
 | |
|             start = in->buf;
 | |
|             in->left = 0;
 | |
|             zpull(&strm, in);
 | |
|         }
 | |
| 
 | |
|         /* decompress -- return early when end-of-block reached */
 | |
|         strm.avail_out = CHUNK;
 | |
|         strm.next_out = junk;
 | |
|         ret = inflate(&strm, Z_BLOCK);
 | |
|         switch (ret) {
 | |
|         case Z_MEM_ERROR:
 | |
|             bail("out of memory", "");
 | |
|         case Z_DATA_ERROR:
 | |
|             bail("invalid compressed data in ", in->name);
 | |
|         }
 | |
| 
 | |
|         /* update length of uncompressed data */
 | |
|         len += CHUNK - strm.avail_out;
 | |
| 
 | |
|         /* check for block boundary (only get this when block copied out) */
 | |
|         if (strm.data_type & 128) {
 | |
|             /* if that was the last block, then done */
 | |
|             if (last)
 | |
|                 break;
 | |
| 
 | |
|             /* number of unused bits in last byte */
 | |
|             pos = strm.data_type & 7;
 | |
| 
 | |
|             /* find the next last-block bit */
 | |
|             if (pos != 0) {
 | |
|                 /* next last-block bit is in last used byte */
 | |
|                 pos = 0x100 >> pos;
 | |
|                 last = strm.next_in[-1] & pos;
 | |
|                 if (last && clr)
 | |
|                     in->buf[strm.next_in - in->buf - 1] &= ~pos;
 | |
|             }
 | |
|             else {
 | |
|                 /* next last-block bit is in next unused byte */
 | |
|                 if (strm.avail_in == 0) {
 | |
|                     /* don't have that byte yet -- get it */
 | |
|                     fwrite(start, 1, strm.next_in - start, out);
 | |
|                     start = in->buf;
 | |
|                     in->left = 0;
 | |
|                     zpull(&strm, in);
 | |
|                 }
 | |
|                 last = strm.next_in[0] & 1;
 | |
|                 if (last && clr)
 | |
|                     in->buf[strm.next_in - in->buf] &= ~1;
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     /* update buffer with unused input */
 | |
|     in->left = strm.avail_in;
 | |
|     in->next = in->buf + (strm.next_in - in->buf);
 | |
| 
 | |
|     /* copy used input, write empty blocks to get to byte boundary */
 | |
|     pos = strm.data_type & 7;
 | |
|     fwrite(start, 1, in->next - start - 1, out);
 | |
|     last = in->next[-1];
 | |
|     if (pos == 0 || !clr)
 | |
|         /* already at byte boundary, or last file: write last byte */
 | |
|         putc(last, out);
 | |
|     else {
 | |
|         /* append empty blocks to last byte */
 | |
|         last &= ((0x100 >> pos) - 1);       /* assure unused bits are zero */
 | |
|         if (pos & 1) {
 | |
|             /* odd -- append an empty stored block */
 | |
|             putc(last, out);
 | |
|             if (pos == 1)
 | |
|                 putc(0, out);               /* two more bits in block header */
 | |
|             fwrite("\0\0\xff\xff", 1, 4, out);
 | |
|         }
 | |
|         else {
 | |
|             /* even -- append 1, 2, or 3 empty fixed blocks */
 | |
|             switch (pos) {
 | |
|             case 6:
 | |
|                 putc(last | 8, out);
 | |
|                 last = 0;
 | |
|             case 4:
 | |
|                 putc(last | 0x20, out);
 | |
|                 last = 0;
 | |
|             case 2:
 | |
|                 putc(last | 0x80, out);
 | |
|                 putc(0, out);
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     /* update crc and tot */
 | |
|     *crc = crc32_combine(*crc, bget4(in), len);
 | |
|     *tot += (unsigned long)len;
 | |
| 
 | |
|     /* clean up */
 | |
|     inflateEnd(&strm);
 | |
|     free(junk);
 | |
|     bclose(in);
 | |
| 
 | |
|     /* write trailer if this is the last gzip file */
 | |
|     if (!clr) {
 | |
|         put4(*crc, out);
 | |
|         put4(*tot, out);
 | |
|     }
 | |
| }
 | |
| 
 | |
| /* join the gzip files on the command line, write result to stdout */
 | |
| int main(int argc, char **argv)
 | |
| {
 | |
|     unsigned long crc, tot;     /* running crc and total uncompressed length */
 | |
| 
 | |
|     /* skip command name */
 | |
|     argc--;
 | |
|     argv++;
 | |
| 
 | |
|     /* show usage if no arguments */
 | |
|     if (argc == 0) {
 | |
|         fputs("gzjoin usage: gzjoin f1.gz [f2.gz [f3.gz ...]] > fjoin.gz\n",
 | |
|               stderr);
 | |
|         return 0;
 | |
|     }
 | |
| 
 | |
|     /* join gzip files on command line and write to stdout */
 | |
|     gzinit(&crc, &tot, stdout);
 | |
|     while (argc--)
 | |
|         gzcopy(*argv++, argc, &crc, &tot, stdout);
 | |
| 
 | |
|     /* done */
 | |
|     return 0;
 | |
| }
 | 
