| /* gzjoin -- command to join gzip files into one gzip file | 
 |  | 
 |   Copyright (C) 2004, 2005, 2012 Mark Adler, all rights reserved | 
 |   version 1.2, 14 Aug 2012 | 
 |  | 
 |   This software is provided 'as-is', without any express or implied | 
 |   warranty.  In no event will the author be held liable for any damages | 
 |   arising from the use of this software. | 
 |  | 
 |   Permission is granted to anyone to use this software for any purpose, | 
 |   including commercial applications, and to alter it and redistribute it | 
 |   freely, subject to the following restrictions: | 
 |  | 
 |   1. The origin of this software must not be misrepresented; you must not | 
 |      claim that you wrote the original software. If you use this software | 
 |      in a product, an acknowledgment in the product documentation would be | 
 |      appreciated but is not required. | 
 |   2. Altered source versions must be plainly marked as such, and must not be | 
 |      misrepresented as being the original software. | 
 |   3. This notice may not be removed or altered from any source distribution. | 
 |  | 
 |   Mark Adler    madler@alumni.caltech.edu | 
 |  */ | 
 |  | 
 | /* | 
 |  * Change history: | 
 |  * | 
 |  * 1.0  11 Dec 2004     - First version | 
 |  * 1.1  12 Jun 2005     - Changed ssize_t to long for portability | 
 |  * 1.2  14 Aug 2012     - Clean up for z_const usage | 
 |  */ | 
 |  | 
 | /* | 
 |    gzjoin takes one or more gzip files on the command line and writes out a | 
 |    single gzip file that will uncompress to the concatenation of the | 
 |    uncompressed data from the individual gzip files.  gzjoin does this without | 
 |    having to recompress any of the data and without having to calculate a new | 
 |    crc32 for the concatenated uncompressed data.  gzjoin does however have to | 
 |    decompress all of the input data in order to find the bits in the compressed | 
 |    data that need to be modified to concatenate the streams. | 
 |  | 
 |    gzjoin does not do an integrity check on the input gzip files other than | 
 |    checking the gzip header and decompressing the compressed data.  They are | 
 |    otherwise assumed to be complete and correct. | 
 |  | 
 |    Each joint between gzip files removes at least 18 bytes of previous trailer | 
 |    and subsequent header, and inserts an average of about three bytes to the | 
 |    compressed data in order to connect the streams.  The output gzip file | 
 |    has a minimal ten-byte gzip header with no file name or modification time. | 
 |  | 
 |    This program was written to illustrate the use of the Z_BLOCK option of | 
 |    inflate() and the crc32_combine() function.  gzjoin will not compile with | 
 |    versions of zlib earlier than 1.2.3. | 
 |  */ | 
 |  | 
 | #include <stdio.h>      /* fputs(), fprintf(), fwrite(), putc() */ | 
 | #include <stdlib.h>     /* exit(), malloc(), free() */ | 
 | #include <fcntl.h>      /* open() */ | 
 | #include <unistd.h>     /* close(), read(), lseek() */ | 
 | #include "zlib.h" | 
 |     /* crc32(), crc32_combine(), inflateInit2(), inflate(), inflateEnd() */ | 
 |  | 
 | #define local static | 
 |  | 
 | /* exit with an error (return a value to allow use in an expression) */ | 
 | local int bail(char *why1, char *why2) | 
 | { | 
 |     fprintf(stderr, "gzjoin error: %s%s, output incomplete\n", why1, why2); | 
 |     exit(1); | 
 |     return 0; | 
 | } | 
 |  | 
 | /* -- simple buffered file input with access to the buffer -- */ | 
 |  | 
 | #define CHUNK 32768         /* must be a power of two and fit in unsigned */ | 
 |  | 
 | /* bin buffered input file type */ | 
 | typedef struct { | 
 |     char *name;             /* name of file for error messages */ | 
 |     int fd;                 /* file descriptor */ | 
 |     unsigned left;          /* bytes remaining at next */ | 
 |     unsigned char *next;    /* next byte to read */ | 
 |     unsigned char *buf;     /* allocated buffer of length CHUNK */ | 
 | } bin; | 
 |  | 
 | /* close a buffered file and free allocated memory */ | 
 | local void bclose(bin *in) | 
 | { | 
 |     if (in != NULL) { | 
 |         if (in->fd != -1) | 
 |             close(in->fd); | 
 |         if (in->buf != NULL) | 
 |             free(in->buf); | 
 |         free(in); | 
 |     } | 
 | } | 
 |  | 
 | /* open a buffered file for input, return a pointer to type bin, or NULL on | 
 |    failure */ | 
 | local bin *bopen(char *name) | 
 | { | 
 |     bin *in; | 
 |  | 
 |     in = malloc(sizeof(bin)); | 
 |     if (in == NULL) | 
 |         return NULL; | 
 |     in->buf = malloc(CHUNK); | 
 |     in->fd = open(name, O_RDONLY, 0); | 
 |     if (in->buf == NULL || in->fd == -1) { | 
 |         bclose(in); | 
 |         return NULL; | 
 |     } | 
 |     in->left = 0; | 
 |     in->next = in->buf; | 
 |     in->name = name; | 
 |     return in; | 
 | } | 
 |  | 
 | /* load buffer from file, return -1 on read error, 0 or 1 on success, with | 
 |    1 indicating that end-of-file was reached */ | 
 | local int bload(bin *in) | 
 | { | 
 |     long len; | 
 |  | 
 |     if (in == NULL) | 
 |         return -1; | 
 |     if (in->left != 0) | 
 |         return 0; | 
 |     in->next = in->buf; | 
 |     do { | 
 |         len = (long)read(in->fd, in->buf + in->left, CHUNK - in->left); | 
 |         if (len < 0) | 
 |             return -1; | 
 |         in->left += (unsigned)len; | 
 |     } while (len != 0 && in->left < CHUNK); | 
 |     return len == 0 ? 1 : 0; | 
 | } | 
 |  | 
 | /* get a byte from the file, bail if end of file */ | 
 | #define bget(in) (in->left ? 0 : bload(in), \ | 
 |                   in->left ? (in->left--, *(in->next)++) : \ | 
 |                     bail("unexpected end of file on ", in->name)) | 
 |  | 
 | /* get a four-byte little-endian unsigned integer from file */ | 
 | local unsigned long bget4(bin *in) | 
 | { | 
 |     unsigned long val; | 
 |  | 
 |     val = bget(in); | 
 |     val += (unsigned long)(bget(in)) << 8; | 
 |     val += (unsigned long)(bget(in)) << 16; | 
 |     val += (unsigned long)(bget(in)) << 24; | 
 |     return val; | 
 | } | 
 |  | 
 | /* skip bytes in file */ | 
 | local void bskip(bin *in, unsigned skip) | 
 | { | 
 |     /* check pointer */ | 
 |     if (in == NULL) | 
 |         return; | 
 |  | 
 |     /* easy case -- skip bytes in buffer */ | 
 |     if (skip <= in->left) { | 
 |         in->left -= skip; | 
 |         in->next += skip; | 
 |         return; | 
 |     } | 
 |  | 
 |     /* skip what's in buffer, discard buffer contents */ | 
 |     skip -= in->left; | 
 |     in->left = 0; | 
 |  | 
 |     /* seek past multiples of CHUNK bytes */ | 
 |     if (skip > CHUNK) { | 
 |         unsigned left; | 
 |  | 
 |         left = skip & (CHUNK - 1); | 
 |         if (left == 0) { | 
 |             /* exact number of chunks: seek all the way minus one byte to check | 
 |                for end-of-file with a read */ | 
 |             lseek(in->fd, skip - 1, SEEK_CUR); | 
 |             if (read(in->fd, in->buf, 1) != 1) | 
 |                 bail("unexpected end of file on ", in->name); | 
 |             return; | 
 |         } | 
 |  | 
 |         /* skip the integral chunks, update skip with remainder */ | 
 |         lseek(in->fd, skip - left, SEEK_CUR); | 
 |         skip = left; | 
 |     } | 
 |  | 
 |     /* read more input and skip remainder */ | 
 |     bload(in); | 
 |     if (skip > in->left) | 
 |         bail("unexpected end of file on ", in->name); | 
 |     in->left -= skip; | 
 |     in->next += skip; | 
 | } | 
 |  | 
 | /* -- end of buffered input functions -- */ | 
 |  | 
 | /* skip the gzip header from file in */ | 
 | local void gzhead(bin *in) | 
 | { | 
 |     int flags; | 
 |  | 
 |     /* verify gzip magic header and compression method */ | 
 |     if (bget(in) != 0x1f || bget(in) != 0x8b || bget(in) != 8) | 
 |         bail(in->name, " is not a valid gzip file"); | 
 |  | 
 |     /* get and verify flags */ | 
 |     flags = bget(in); | 
 |     if ((flags & 0xe0) != 0) | 
 |         bail("unknown reserved bits set in ", in->name); | 
 |  | 
 |     /* skip modification time, extra flags, and os */ | 
 |     bskip(in, 6); | 
 |  | 
 |     /* skip extra field if present */ | 
 |     if (flags & 4) { | 
 |         unsigned len; | 
 |  | 
 |         len = bget(in); | 
 |         len += (unsigned)(bget(in)) << 8; | 
 |         bskip(in, len); | 
 |     } | 
 |  | 
 |     /* skip file name if present */ | 
 |     if (flags & 8) | 
 |         while (bget(in) != 0) | 
 |             ; | 
 |  | 
 |     /* skip comment if present */ | 
 |     if (flags & 16) | 
 |         while (bget(in) != 0) | 
 |             ; | 
 |  | 
 |     /* skip header crc if present */ | 
 |     if (flags & 2) | 
 |         bskip(in, 2); | 
 | } | 
 |  | 
 | /* write a four-byte little-endian unsigned integer to out */ | 
 | local void put4(unsigned long val, FILE *out) | 
 | { | 
 |     putc(val & 0xff, out); | 
 |     putc((val >> 8) & 0xff, out); | 
 |     putc((val >> 16) & 0xff, out); | 
 |     putc((val >> 24) & 0xff, out); | 
 | } | 
 |  | 
 | /* Load up zlib stream from buffered input, bail if end of file */ | 
 | local void zpull(z_streamp strm, bin *in) | 
 | { | 
 |     if (in->left == 0) | 
 |         bload(in); | 
 |     if (in->left == 0) | 
 |         bail("unexpected end of file on ", in->name); | 
 |     strm->avail_in = in->left; | 
 |     strm->next_in = in->next; | 
 | } | 
 |  | 
 | /* Write header for gzip file to out and initialize trailer. */ | 
 | local void gzinit(unsigned long *crc, unsigned long *tot, FILE *out) | 
 | { | 
 |     fwrite("\x1f\x8b\x08\0\0\0\0\0\0\xff", 1, 10, out); | 
 |     *crc = crc32(0L, Z_NULL, 0); | 
 |     *tot = 0; | 
 | } | 
 |  | 
 | /* Copy the compressed data from name, zeroing the last block bit of the last | 
 |    block if clr is true, and adding empty blocks as needed to get to a byte | 
 |    boundary.  If clr is false, then the last block becomes the last block of | 
 |    the output, and the gzip trailer is written.  crc and tot maintains the | 
 |    crc and length (modulo 2^32) of the output for the trailer.  The resulting | 
 |    gzip file is written to out.  gzinit() must be called before the first call | 
 |    of gzcopy() to write the gzip header and to initialize crc and tot. */ | 
 | local void gzcopy(char *name, int clr, unsigned long *crc, unsigned long *tot, | 
 |                   FILE *out) | 
 | { | 
 |     int ret;                /* return value from zlib functions */ | 
 |     int pos;                /* where the "last block" bit is in byte */ | 
 |     int last;               /* true if processing the last block */ | 
 |     bin *in;                /* buffered input file */ | 
 |     unsigned char *start;   /* start of compressed data in buffer */ | 
 |     unsigned char *junk;    /* buffer for uncompressed data -- discarded */ | 
 |     z_off_t len;            /* length of uncompressed data (support > 4 GB) */ | 
 |     z_stream strm;          /* zlib inflate stream */ | 
 |  | 
 |     /* open gzip file and skip header */ | 
 |     in = bopen(name); | 
 |     if (in == NULL) | 
 |         bail("could not open ", name); | 
 |     gzhead(in); | 
 |  | 
 |     /* allocate buffer for uncompressed data and initialize raw inflate | 
 |        stream */ | 
 |     junk = malloc(CHUNK); | 
 |     strm.zalloc = Z_NULL; | 
 |     strm.zfree = Z_NULL; | 
 |     strm.opaque = Z_NULL; | 
 |     strm.avail_in = 0; | 
 |     strm.next_in = Z_NULL; | 
 |     ret = inflateInit2(&strm, -15); | 
 |     if (junk == NULL || ret != Z_OK) | 
 |         bail("out of memory", ""); | 
 |  | 
 |     /* inflate and copy compressed data, clear last-block bit if requested */ | 
 |     len = 0; | 
 |     zpull(&strm, in); | 
 |     start = in->next; | 
 |     last = start[0] & 1; | 
 |     if (last && clr) | 
 |         start[0] &= ~1; | 
 |     strm.avail_out = 0; | 
 |     for (;;) { | 
 |         /* if input used and output done, write used input and get more */ | 
 |         if (strm.avail_in == 0 && strm.avail_out != 0) { | 
 |             fwrite(start, 1, strm.next_in - start, out); | 
 |             start = in->buf; | 
 |             in->left = 0; | 
 |             zpull(&strm, in); | 
 |         } | 
 |  | 
 |         /* decompress -- return early when end-of-block reached */ | 
 |         strm.avail_out = CHUNK; | 
 |         strm.next_out = junk; | 
 |         ret = inflate(&strm, Z_BLOCK); | 
 |         switch (ret) { | 
 |         case Z_MEM_ERROR: | 
 |             bail("out of memory", ""); | 
 |         case Z_DATA_ERROR: | 
 |             bail("invalid compressed data in ", in->name); | 
 |         } | 
 |  | 
 |         /* update length of uncompressed data */ | 
 |         len += CHUNK - strm.avail_out; | 
 |  | 
 |         /* check for block boundary (only get this when block copied out) */ | 
 |         if (strm.data_type & 128) { | 
 |             /* if that was the last block, then done */ | 
 |             if (last) | 
 |                 break; | 
 |  | 
 |             /* number of unused bits in last byte */ | 
 |             pos = strm.data_type & 7; | 
 |  | 
 |             /* find the next last-block bit */ | 
 |             if (pos != 0) { | 
 |                 /* next last-block bit is in last used byte */ | 
 |                 pos = 0x100 >> pos; | 
 |                 last = strm.next_in[-1] & pos; | 
 |                 if (last && clr) | 
 |                     in->buf[strm.next_in - in->buf - 1] &= ~pos; | 
 |             } | 
 |             else { | 
 |                 /* next last-block bit is in next unused byte */ | 
 |                 if (strm.avail_in == 0) { | 
 |                     /* don't have that byte yet -- get it */ | 
 |                     fwrite(start, 1, strm.next_in - start, out); | 
 |                     start = in->buf; | 
 |                     in->left = 0; | 
 |                     zpull(&strm, in); | 
 |                 } | 
 |                 last = strm.next_in[0] & 1; | 
 |                 if (last && clr) | 
 |                     in->buf[strm.next_in - in->buf] &= ~1; | 
 |             } | 
 |         } | 
 |     } | 
 |  | 
 |     /* update buffer with unused input */ | 
 |     in->left = strm.avail_in; | 
 |     in->next = in->buf + (strm.next_in - in->buf); | 
 |  | 
 |     /* copy used input, write empty blocks to get to byte boundary */ | 
 |     pos = strm.data_type & 7; | 
 |     fwrite(start, 1, in->next - start - 1, out); | 
 |     last = in->next[-1]; | 
 |     if (pos == 0 || !clr) | 
 |         /* already at byte boundary, or last file: write last byte */ | 
 |         putc(last, out); | 
 |     else { | 
 |         /* append empty blocks to last byte */ | 
 |         last &= ((0x100 >> pos) - 1);       /* assure unused bits are zero */ | 
 |         if (pos & 1) { | 
 |             /* odd -- append an empty stored block */ | 
 |             putc(last, out); | 
 |             if (pos == 1) | 
 |                 putc(0, out);               /* two more bits in block header */ | 
 |             fwrite("\0\0\xff\xff", 1, 4, out); | 
 |         } | 
 |         else { | 
 |             /* even -- append 1, 2, or 3 empty fixed blocks */ | 
 |             switch (pos) { | 
 |             case 6: | 
 |                 putc(last | 8, out); | 
 |                 last = 0; | 
 |             case 4: | 
 |                 putc(last | 0x20, out); | 
 |                 last = 0; | 
 |             case 2: | 
 |                 putc(last | 0x80, out); | 
 |                 putc(0, out); | 
 |             } | 
 |         } | 
 |     } | 
 |  | 
 |     /* update crc and tot */ | 
 |     *crc = crc32_combine(*crc, bget4(in), len); | 
 |     *tot += (unsigned long)len; | 
 |  | 
 |     /* clean up */ | 
 |     inflateEnd(&strm); | 
 |     free(junk); | 
 |     bclose(in); | 
 |  | 
 |     /* write trailer if this is the last gzip file */ | 
 |     if (!clr) { | 
 |         put4(*crc, out); | 
 |         put4(*tot, out); | 
 |     } | 
 | } | 
 |  | 
 | /* join the gzip files on the command line, write result to stdout */ | 
 | int main(int argc, char **argv) | 
 | { | 
 |     unsigned long crc, tot;     /* running crc and total uncompressed length */ | 
 |  | 
 |     /* skip command name */ | 
 |     argc--; | 
 |     argv++; | 
 |  | 
 |     /* show usage if no arguments */ | 
 |     if (argc == 0) { | 
 |         fputs("gzjoin usage: gzjoin f1.gz [f2.gz [f3.gz ...]] > fjoin.gz\n", | 
 |               stderr); | 
 |         return 0; | 
 |     } | 
 |  | 
 |     /* join gzip files on command line and write to stdout */ | 
 |     gzinit(&crc, &tot, stdout); | 
 |     while (argc--) | 
 |         gzcopy(*argv++, argc, &crc, &tot, stdout); | 
 |  | 
 |     /* done */ | 
 |     return 0; | 
 | } |