|  | /* gzjoin -- command to join gzip files into one gzip file | 
|  |  | 
|  | Copyright (C) 2004, 2005, 2012 Mark Adler, all rights reserved | 
|  | version 1.2, 14 Aug 2012 | 
|  |  | 
|  | This software is provided 'as-is', without any express or implied | 
|  | warranty.  In no event will the author be held liable for any damages | 
|  | arising from the use of this software. | 
|  |  | 
|  | Permission is granted to anyone to use this software for any purpose, | 
|  | including commercial applications, and to alter it and redistribute it | 
|  | freely, subject to the following restrictions: | 
|  |  | 
|  | 1. The origin of this software must not be misrepresented; you must not | 
|  | claim that you wrote the original software. If you use this software | 
|  | in a product, an acknowledgment in the product documentation would be | 
|  | appreciated but is not required. | 
|  | 2. Altered source versions must be plainly marked as such, and must not be | 
|  | misrepresented as being the original software. | 
|  | 3. This notice may not be removed or altered from any source distribution. | 
|  |  | 
|  | Mark Adler    madler@alumni.caltech.edu | 
|  | */ | 
|  |  | 
|  | /* | 
|  | * Change history: | 
|  | * | 
|  | * 1.0  11 Dec 2004     - First version | 
|  | * 1.1  12 Jun 2005     - Changed ssize_t to long for portability | 
|  | * 1.2  14 Aug 2012     - Clean up for z_const usage | 
|  | */ | 
|  |  | 
|  | /* | 
|  | gzjoin takes one or more gzip files on the command line and writes out a | 
|  | single gzip file that will uncompress to the concatenation of the | 
|  | uncompressed data from the individual gzip files.  gzjoin does this without | 
|  | having to recompress any of the data and without having to calculate a new | 
|  | crc32 for the concatenated uncompressed data.  gzjoin does however have to | 
|  | decompress all of the input data in order to find the bits in the compressed | 
|  | data that need to be modified to concatenate the streams. | 
|  |  | 
|  | gzjoin does not do an integrity check on the input gzip files other than | 
|  | checking the gzip header and decompressing the compressed data.  They are | 
|  | otherwise assumed to be complete and correct. | 
|  |  | 
|  | Each joint between gzip files removes at least 18 bytes of previous trailer | 
|  | and subsequent header, and inserts an average of about three bytes to the | 
|  | compressed data in order to connect the streams.  The output gzip file | 
|  | has a minimal ten-byte gzip header with no file name or modification time. | 
|  |  | 
|  | This program was written to illustrate the use of the Z_BLOCK option of | 
|  | inflate() and the crc32_combine() function.  gzjoin will not compile with | 
|  | versions of zlib earlier than 1.2.3. | 
|  | */ | 
|  |  | 
|  | #include <stdio.h>      /* fputs(), fprintf(), fwrite(), putc() */ | 
|  | #include <stdlib.h>     /* exit(), malloc(), free() */ | 
|  | #include <fcntl.h>      /* open() */ | 
|  | #include <unistd.h>     /* close(), read(), lseek() */ | 
|  | #include "zlib.h" | 
|  | /* crc32(), crc32_combine(), inflateInit2(), inflate(), inflateEnd() */ | 
|  |  | 
|  | #define local static | 
|  |  | 
|  | /* exit with an error (return a value to allow use in an expression) */ | 
|  | local int bail(char *why1, char *why2) | 
|  | { | 
|  | fprintf(stderr, "gzjoin error: %s%s, output incomplete\n", why1, why2); | 
|  | exit(1); | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | /* -- simple buffered file input with access to the buffer -- */ | 
|  |  | 
|  | #define CHUNK 32768         /* must be a power of two and fit in unsigned */ | 
|  |  | 
|  | /* bin buffered input file type */ | 
|  | typedef struct { | 
|  | char *name;             /* name of file for error messages */ | 
|  | int fd;                 /* file descriptor */ | 
|  | unsigned left;          /* bytes remaining at next */ | 
|  | unsigned char *next;    /* next byte to read */ | 
|  | unsigned char *buf;     /* allocated buffer of length CHUNK */ | 
|  | } bin; | 
|  |  | 
|  | /* close a buffered file and free allocated memory */ | 
|  | local void bclose(bin *in) | 
|  | { | 
|  | if (in != NULL) { | 
|  | if (in->fd != -1) | 
|  | close(in->fd); | 
|  | if (in->buf != NULL) | 
|  | free(in->buf); | 
|  | free(in); | 
|  | } | 
|  | } | 
|  |  | 
|  | /* open a buffered file for input, return a pointer to type bin, or NULL on | 
|  | failure */ | 
|  | local bin *bopen(char *name) | 
|  | { | 
|  | bin *in; | 
|  |  | 
|  | in = malloc(sizeof(bin)); | 
|  | if (in == NULL) | 
|  | return NULL; | 
|  | in->buf = malloc(CHUNK); | 
|  | in->fd = open(name, O_RDONLY, 0); | 
|  | if (in->buf == NULL || in->fd == -1) { | 
|  | bclose(in); | 
|  | return NULL; | 
|  | } | 
|  | in->left = 0; | 
|  | in->next = in->buf; | 
|  | in->name = name; | 
|  | return in; | 
|  | } | 
|  |  | 
|  | /* load buffer from file, return -1 on read error, 0 or 1 on success, with | 
|  | 1 indicating that end-of-file was reached */ | 
|  | local int bload(bin *in) | 
|  | { | 
|  | long len; | 
|  |  | 
|  | if (in == NULL) | 
|  | return -1; | 
|  | if (in->left != 0) | 
|  | return 0; | 
|  | in->next = in->buf; | 
|  | do { | 
|  | len = (long)read(in->fd, in->buf + in->left, CHUNK - in->left); | 
|  | if (len < 0) | 
|  | return -1; | 
|  | in->left += (unsigned)len; | 
|  | } while (len != 0 && in->left < CHUNK); | 
|  | return len == 0 ? 1 : 0; | 
|  | } | 
|  |  | 
|  | /* get a byte from the file, bail if end of file */ | 
|  | #define bget(in) (in->left ? 0 : bload(in), \ | 
|  | in->left ? (in->left--, *(in->next)++) : \ | 
|  | bail("unexpected end of file on ", in->name)) | 
|  |  | 
|  | /* get a four-byte little-endian unsigned integer from file */ | 
|  | local unsigned long bget4(bin *in) | 
|  | { | 
|  | unsigned long val; | 
|  |  | 
|  | val = bget(in); | 
|  | val += (unsigned long)(bget(in)) << 8; | 
|  | val += (unsigned long)(bget(in)) << 16; | 
|  | val += (unsigned long)(bget(in)) << 24; | 
|  | return val; | 
|  | } | 
|  |  | 
|  | /* skip bytes in file */ | 
|  | local void bskip(bin *in, unsigned skip) | 
|  | { | 
|  | /* check pointer */ | 
|  | if (in == NULL) | 
|  | return; | 
|  |  | 
|  | /* easy case -- skip bytes in buffer */ | 
|  | if (skip <= in->left) { | 
|  | in->left -= skip; | 
|  | in->next += skip; | 
|  | return; | 
|  | } | 
|  |  | 
|  | /* skip what's in buffer, discard buffer contents */ | 
|  | skip -= in->left; | 
|  | in->left = 0; | 
|  |  | 
|  | /* seek past multiples of CHUNK bytes */ | 
|  | if (skip > CHUNK) { | 
|  | unsigned left; | 
|  |  | 
|  | left = skip & (CHUNK - 1); | 
|  | if (left == 0) { | 
|  | /* exact number of chunks: seek all the way minus one byte to check | 
|  | for end-of-file with a read */ | 
|  | lseek(in->fd, skip - 1, SEEK_CUR); | 
|  | if (read(in->fd, in->buf, 1) != 1) | 
|  | bail("unexpected end of file on ", in->name); | 
|  | return; | 
|  | } | 
|  |  | 
|  | /* skip the integral chunks, update skip with remainder */ | 
|  | lseek(in->fd, skip - left, SEEK_CUR); | 
|  | skip = left; | 
|  | } | 
|  |  | 
|  | /* read more input and skip remainder */ | 
|  | bload(in); | 
|  | if (skip > in->left) | 
|  | bail("unexpected end of file on ", in->name); | 
|  | in->left -= skip; | 
|  | in->next += skip; | 
|  | } | 
|  |  | 
|  | /* -- end of buffered input functions -- */ | 
|  |  | 
|  | /* skip the gzip header from file in */ | 
|  | local void gzhead(bin *in) | 
|  | { | 
|  | int flags; | 
|  |  | 
|  | /* verify gzip magic header and compression method */ | 
|  | if (bget(in) != 0x1f || bget(in) != 0x8b || bget(in) != 8) | 
|  | bail(in->name, " is not a valid gzip file"); | 
|  |  | 
|  | /* get and verify flags */ | 
|  | flags = bget(in); | 
|  | if ((flags & 0xe0) != 0) | 
|  | bail("unknown reserved bits set in ", in->name); | 
|  |  | 
|  | /* skip modification time, extra flags, and os */ | 
|  | bskip(in, 6); | 
|  |  | 
|  | /* skip extra field if present */ | 
|  | if (flags & 4) { | 
|  | unsigned len; | 
|  |  | 
|  | len = bget(in); | 
|  | len += (unsigned)(bget(in)) << 8; | 
|  | bskip(in, len); | 
|  | } | 
|  |  | 
|  | /* skip file name if present */ | 
|  | if (flags & 8) | 
|  | while (bget(in) != 0) | 
|  | ; | 
|  |  | 
|  | /* skip comment if present */ | 
|  | if (flags & 16) | 
|  | while (bget(in) != 0) | 
|  | ; | 
|  |  | 
|  | /* skip header crc if present */ | 
|  | if (flags & 2) | 
|  | bskip(in, 2); | 
|  | } | 
|  |  | 
|  | /* write a four-byte little-endian unsigned integer to out */ | 
|  | local void put4(unsigned long val, FILE *out) | 
|  | { | 
|  | putc(val & 0xff, out); | 
|  | putc((val >> 8) & 0xff, out); | 
|  | putc((val >> 16) & 0xff, out); | 
|  | putc((val >> 24) & 0xff, out); | 
|  | } | 
|  |  | 
|  | /* Load up zlib stream from buffered input, bail if end of file */ | 
|  | local void zpull(z_streamp strm, bin *in) | 
|  | { | 
|  | if (in->left == 0) | 
|  | bload(in); | 
|  | if (in->left == 0) | 
|  | bail("unexpected end of file on ", in->name); | 
|  | strm->avail_in = in->left; | 
|  | strm->next_in = in->next; | 
|  | } | 
|  |  | 
|  | /* Write header for gzip file to out and initialize trailer. */ | 
|  | local void gzinit(unsigned long *crc, unsigned long *tot, FILE *out) | 
|  | { | 
|  | fwrite("\x1f\x8b\x08\0\0\0\0\0\0\xff", 1, 10, out); | 
|  | *crc = crc32(0L, Z_NULL, 0); | 
|  | *tot = 0; | 
|  | } | 
|  |  | 
|  | /* Copy the compressed data from name, zeroing the last block bit of the last | 
|  | block if clr is true, and adding empty blocks as needed to get to a byte | 
|  | boundary.  If clr is false, then the last block becomes the last block of | 
|  | the output, and the gzip trailer is written.  crc and tot maintains the | 
|  | crc and length (modulo 2^32) of the output for the trailer.  The resulting | 
|  | gzip file is written to out.  gzinit() must be called before the first call | 
|  | of gzcopy() to write the gzip header and to initialize crc and tot. */ | 
|  | local void gzcopy(char *name, int clr, unsigned long *crc, unsigned long *tot, | 
|  | FILE *out) | 
|  | { | 
|  | int ret;                /* return value from zlib functions */ | 
|  | int pos;                /* where the "last block" bit is in byte */ | 
|  | int last;               /* true if processing the last block */ | 
|  | bin *in;                /* buffered input file */ | 
|  | unsigned char *start;   /* start of compressed data in buffer */ | 
|  | unsigned char *junk;    /* buffer for uncompressed data -- discarded */ | 
|  | z_off_t len;            /* length of uncompressed data (support > 4 GB) */ | 
|  | z_stream strm;          /* zlib inflate stream */ | 
|  |  | 
|  | /* open gzip file and skip header */ | 
|  | in = bopen(name); | 
|  | if (in == NULL) | 
|  | bail("could not open ", name); | 
|  | gzhead(in); | 
|  |  | 
|  | /* allocate buffer for uncompressed data and initialize raw inflate | 
|  | stream */ | 
|  | junk = malloc(CHUNK); | 
|  | strm.zalloc = Z_NULL; | 
|  | strm.zfree = Z_NULL; | 
|  | strm.opaque = Z_NULL; | 
|  | strm.avail_in = 0; | 
|  | strm.next_in = Z_NULL; | 
|  | ret = inflateInit2(&strm, -15); | 
|  | if (junk == NULL || ret != Z_OK) | 
|  | bail("out of memory", ""); | 
|  |  | 
|  | /* inflate and copy compressed data, clear last-block bit if requested */ | 
|  | len = 0; | 
|  | zpull(&strm, in); | 
|  | start = in->next; | 
|  | last = start[0] & 1; | 
|  | if (last && clr) | 
|  | start[0] &= ~1; | 
|  | strm.avail_out = 0; | 
|  | for (;;) { | 
|  | /* if input used and output done, write used input and get more */ | 
|  | if (strm.avail_in == 0 && strm.avail_out != 0) { | 
|  | fwrite(start, 1, strm.next_in - start, out); | 
|  | start = in->buf; | 
|  | in->left = 0; | 
|  | zpull(&strm, in); | 
|  | } | 
|  |  | 
|  | /* decompress -- return early when end-of-block reached */ | 
|  | strm.avail_out = CHUNK; | 
|  | strm.next_out = junk; | 
|  | ret = inflate(&strm, Z_BLOCK); | 
|  | switch (ret) { | 
|  | case Z_MEM_ERROR: | 
|  | bail("out of memory", ""); | 
|  | case Z_DATA_ERROR: | 
|  | bail("invalid compressed data in ", in->name); | 
|  | } | 
|  |  | 
|  | /* update length of uncompressed data */ | 
|  | len += CHUNK - strm.avail_out; | 
|  |  | 
|  | /* check for block boundary (only get this when block copied out) */ | 
|  | if (strm.data_type & 128) { | 
|  | /* if that was the last block, then done */ | 
|  | if (last) | 
|  | break; | 
|  |  | 
|  | /* number of unused bits in last byte */ | 
|  | pos = strm.data_type & 7; | 
|  |  | 
|  | /* find the next last-block bit */ | 
|  | if (pos != 0) { | 
|  | /* next last-block bit is in last used byte */ | 
|  | pos = 0x100 >> pos; | 
|  | last = strm.next_in[-1] & pos; | 
|  | if (last && clr) | 
|  | in->buf[strm.next_in - in->buf - 1] &= ~pos; | 
|  | } | 
|  | else { | 
|  | /* next last-block bit is in next unused byte */ | 
|  | if (strm.avail_in == 0) { | 
|  | /* don't have that byte yet -- get it */ | 
|  | fwrite(start, 1, strm.next_in - start, out); | 
|  | start = in->buf; | 
|  | in->left = 0; | 
|  | zpull(&strm, in); | 
|  | } | 
|  | last = strm.next_in[0] & 1; | 
|  | if (last && clr) | 
|  | in->buf[strm.next_in - in->buf] &= ~1; | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | /* update buffer with unused input */ | 
|  | in->left = strm.avail_in; | 
|  | in->next = in->buf + (strm.next_in - in->buf); | 
|  |  | 
|  | /* copy used input, write empty blocks to get to byte boundary */ | 
|  | pos = strm.data_type & 7; | 
|  | fwrite(start, 1, in->next - start - 1, out); | 
|  | last = in->next[-1]; | 
|  | if (pos == 0 || !clr) | 
|  | /* already at byte boundary, or last file: write last byte */ | 
|  | putc(last, out); | 
|  | else { | 
|  | /* append empty blocks to last byte */ | 
|  | last &= ((0x100 >> pos) - 1);       /* assure unused bits are zero */ | 
|  | if (pos & 1) { | 
|  | /* odd -- append an empty stored block */ | 
|  | putc(last, out); | 
|  | if (pos == 1) | 
|  | putc(0, out);               /* two more bits in block header */ | 
|  | fwrite("\0\0\xff\xff", 1, 4, out); | 
|  | } | 
|  | else { | 
|  | /* even -- append 1, 2, or 3 empty fixed blocks */ | 
|  | switch (pos) { | 
|  | case 6: | 
|  | putc(last | 8, out); | 
|  | last = 0; | 
|  | case 4: | 
|  | putc(last | 0x20, out); | 
|  | last = 0; | 
|  | case 2: | 
|  | putc(last | 0x80, out); | 
|  | putc(0, out); | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | /* update crc and tot */ | 
|  | *crc = crc32_combine(*crc, bget4(in), len); | 
|  | *tot += (unsigned long)len; | 
|  |  | 
|  | /* clean up */ | 
|  | inflateEnd(&strm); | 
|  | free(junk); | 
|  | bclose(in); | 
|  |  | 
|  | /* write trailer if this is the last gzip file */ | 
|  | if (!clr) { | 
|  | put4(*crc, out); | 
|  | put4(*tot, out); | 
|  | } | 
|  | } | 
|  |  | 
|  | /* join the gzip files on the command line, write result to stdout */ | 
|  | int main(int argc, char **argv) | 
|  | { | 
|  | unsigned long crc, tot;     /* running crc and total uncompressed length */ | 
|  |  | 
|  | /* skip command name */ | 
|  | argc--; | 
|  | argv++; | 
|  |  | 
|  | /* show usage if no arguments */ | 
|  | if (argc == 0) { | 
|  | fputs("gzjoin usage: gzjoin f1.gz [f2.gz [f3.gz ...]] > fjoin.gz\n", | 
|  | stderr); | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | /* join gzip files on command line and write to stdout */ | 
|  | gzinit(&crc, &tot, stdout); | 
|  | while (argc--) | 
|  | gzcopy(*argv++, argc, &crc, &tot, stdout); | 
|  |  | 
|  | /* done */ | 
|  | return 0; | 
|  | } |