zlib 1.2.2.2
This commit is contained in:
29
examples/README.examples
Normal file
29
examples/README.examples
Normal file
@@ -0,0 +1,29 @@
|
||||
This directory contains examples of the use of zlib.
|
||||
|
||||
fitblk.c
|
||||
compress just enough input to nearly fill a requested output size
|
||||
- zlib isn't designed to do this, but fitblk does it anyway
|
||||
|
||||
gzappend.c
|
||||
append to a gzip file
|
||||
- illustrates the use of the Z_BLOCK flush parameter for inflate()
|
||||
- illustrates the use of deflatePrime() to start at any bit
|
||||
|
||||
gzjoin.c
|
||||
join gzip files without recalculating the crc or recompressing
|
||||
- illustrates the use of the Z_BLOCK flush parameter for inflate()
|
||||
- illustrates the use of crc32_combine()
|
||||
|
||||
gzlog.c
|
||||
gzlog.h
|
||||
efficiently maintain a message log file in gzip format
|
||||
- illustrates use of raw deflate and Z_SYNC_FLUSH
|
||||
- illustrates use of gzip header extra field
|
||||
|
||||
zlib_how.html
|
||||
painfully comprehensive description of zpipe.c (see below)
|
||||
- describes in excruciating detail the use of deflate() and inflate()
|
||||
|
||||
zpipe.c
|
||||
reads and writes zlib streams from stdin to stdout
|
||||
- illustrates the proper use of deflate() and inflate()
|
||||
235
examples/fitblk.c
Normal file
235
examples/fitblk.c
Normal file
@@ -0,0 +1,235 @@
|
||||
/* fitblk.c: example of fitting compressed output to a specified size
|
||||
Not copyrighted -- provided to the public domain
|
||||
Version 1.1 25 November 2004 Mark Adler */
|
||||
|
||||
/* Version history:
|
||||
1.0 24 Nov 2004 First version
|
||||
1.1 25 Nov 2004 Change deflateInit2() to deflateInit()
|
||||
Use fixed-size, stack-allocated raw buffers
|
||||
Simplify code moving compression to subroutines
|
||||
Use assert() for internal errors
|
||||
Add detailed description of approach
|
||||
*/
|
||||
|
||||
/* Approach to just fitting a requested compressed size:
|
||||
|
||||
fitblk performs three compression passes on a portion of the input
|
||||
data in order to determine how much of that input will compress to
|
||||
nearly the requested output block size. The first pass generates
|
||||
enough deflate blocks to produce output to fill the requested
|
||||
output size plus a specfied excess amount (see the EXCESS define
|
||||
below). The last deflate block may go quite a bit past that, but
|
||||
is discarded. The second pass decompresses and recompresses just
|
||||
the compressed data that fit in the requested plus excess sized
|
||||
buffer. The deflate process is terminated after that amount of
|
||||
input, which is less than the amount consumed on the first pass.
|
||||
The last deflate block of the result will be of a comparable size
|
||||
to the final product, so that the header for that deflate block and
|
||||
the compression ratio for that block will be about the same as in
|
||||
the final product. The third compression pass decompresses the
|
||||
result of the second step, but only the compressed data up to the
|
||||
requested size minus an amount to allow the compressed stream to
|
||||
complete (see the MARGIN define below). That will result in a
|
||||
final compressed stream whose length is less than or equal to the
|
||||
requested size. Assuming sufficient input and a requested size
|
||||
greater than a few hundred bytes, the shortfall will typically be
|
||||
less than ten bytes.
|
||||
|
||||
If the input is short enough that the first compression completes
|
||||
before filling the requested output size, then that compressed
|
||||
stream is return with no recompression.
|
||||
|
||||
EXCESS is chosen to be just greater than the shortfall seen in a
|
||||
two pass approach similar to the above. That shortfall is due to
|
||||
the last deflate block compressing more efficiently with a smaller
|
||||
header on the second pass. EXCESS is set to be large enough so
|
||||
that there is enough uncompressed data for the second pass to fill
|
||||
out the requested size, and small enough so that the final deflate
|
||||
block of the second pass will be close in size to the final deflate
|
||||
block of the third and final pass. MARGIN is chosen to be just
|
||||
large enough to assure that the final compression has enough room
|
||||
to complete in all cases.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <assert.h>
|
||||
#include "zlib.h"
|
||||
|
||||
#define local static
|
||||
|
||||
/* print nastygram and leave */
|
||||
local void quit(char *why)
|
||||
{
|
||||
fprintf(stderr, "fitblk abort: %s\n", why);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
#define RAWLEN 4096 /* intermediate uncompressed buffer size */
|
||||
|
||||
/* compress from file to def until provided buffer is full or end of
|
||||
input reached; return last deflate() return value, or Z_ERRNO if
|
||||
there was read error on the file */
|
||||
local int partcompress(FILE *in, z_streamp def)
|
||||
{
|
||||
int ret, flush;
|
||||
char raw[RAWLEN];
|
||||
|
||||
flush = Z_NO_FLUSH;
|
||||
do {
|
||||
def->avail_in = fread(raw, 1, RAWLEN, in);
|
||||
if (ferror(in))
|
||||
return Z_ERRNO;
|
||||
def->next_in = raw;
|
||||
if (feof(in))
|
||||
flush = Z_FINISH;
|
||||
ret = deflate(def, flush);
|
||||
assert(ret != Z_STREAM_ERROR);
|
||||
} while (def->avail_out != 0 && flush == Z_NO_FLUSH);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* recompress from inf's input to def's output; the input for inf and
|
||||
the output for def are set in those structures before calling;
|
||||
return last deflate() return value, or Z_MEM_ERROR if inflate()
|
||||
was not able to allocate enough memory when it needed to */
|
||||
local int recompress(z_streamp inf, z_streamp def)
|
||||
{
|
||||
int ret, flush;
|
||||
char raw[RAWLEN];
|
||||
|
||||
flush = Z_NO_FLUSH;
|
||||
do {
|
||||
/* decompress */
|
||||
inf->avail_out = RAWLEN;
|
||||
inf->next_out = raw;
|
||||
ret = inflate(inf, Z_NO_FLUSH);
|
||||
assert(ret != Z_STREAM_ERROR && ret != Z_DATA_ERROR &&
|
||||
ret != Z_NEED_DICT);
|
||||
if (ret == Z_MEM_ERROR)
|
||||
return ret;
|
||||
|
||||
/* compress what was decompresed until done or no room */
|
||||
def->avail_in = RAWLEN - inf->avail_out;
|
||||
def->next_in = raw;
|
||||
if (inf->avail_out != 0)
|
||||
flush = Z_FINISH;
|
||||
ret = deflate(def, flush);
|
||||
assert(ret != Z_STREAM_ERROR);
|
||||
} while (ret != Z_STREAM_END && def->avail_out != 0);
|
||||
return ret;
|
||||
}
|
||||
|
||||
#define EXCESS 256 /* empirically determined stream overage */
|
||||
#define MARGIN 8 /* amount to back off for completion */
|
||||
|
||||
/* compress from stdin to fixed-size block on stdout */
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
int ret; /* return code */
|
||||
unsigned size; /* requested fixed output block size */
|
||||
unsigned have; /* bytes written by deflate() call */
|
||||
char *blk; /* intermediate and final stream */
|
||||
char *tmp; /* close to desired size stream */
|
||||
z_stream def, inf; /* zlib deflate and inflate states */
|
||||
|
||||
/* get requested output size */
|
||||
if (argc != 2)
|
||||
quit("need one argument: size of output block");
|
||||
ret = strtol(argv[1], argv + 1, 10);
|
||||
if (argv[1][0] != 0)
|
||||
quit("argument must be a number");
|
||||
if (ret < 8) /* 8 is minimum zlib stream size */
|
||||
quit("need positive size of 8 or greater");
|
||||
size = (unsigned)ret;
|
||||
|
||||
/* allocate memory for buffers and compression engine */
|
||||
blk = malloc(size + EXCESS);
|
||||
def.zalloc = Z_NULL;
|
||||
def.zfree = Z_NULL;
|
||||
def.opaque = Z_NULL;
|
||||
ret = deflateInit(&def, Z_DEFAULT_COMPRESSION);
|
||||
if (ret != Z_OK || blk == NULL)
|
||||
quit("out of memory");
|
||||
|
||||
/* compress from stdin until output full, or no more input */
|
||||
def.avail_out = size + EXCESS;
|
||||
def.next_out = blk;
|
||||
ret = partcompress(stdin, &def);
|
||||
if (ret == Z_ERRNO)
|
||||
quit("error reading input");
|
||||
|
||||
/* if it all fit, then size was undersubscribed -- done! */
|
||||
if (ret == Z_STREAM_END && def.avail_out >= EXCESS) {
|
||||
/* write block to stdout */
|
||||
have = size + EXCESS - def.avail_out;
|
||||
ret = fwrite(blk, 1, have, stdout);
|
||||
if (ret != have || ferror(stdout))
|
||||
quit("error writing output");
|
||||
|
||||
/* clean up and print results to stderr */
|
||||
ret = deflateEnd(&def);
|
||||
assert(ret != Z_STREAM_ERROR);
|
||||
free(blk);
|
||||
fprintf(stderr,
|
||||
"%u bytes unused out of %u requested (all input)\n",
|
||||
size - have, size);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* it didn't all fit -- set up for recompression */
|
||||
inf.zalloc = Z_NULL;
|
||||
inf.zfree = Z_NULL;
|
||||
inf.opaque = Z_NULL;
|
||||
inf.avail_in = 0;
|
||||
inf.next_in = Z_NULL;
|
||||
ret = inflateInit(&inf);
|
||||
tmp = malloc(size + EXCESS);
|
||||
if (ret != Z_OK || tmp == NULL)
|
||||
quit("out of memory");
|
||||
ret = deflateReset(&def);
|
||||
assert(ret != Z_STREAM_ERROR);
|
||||
|
||||
/* do first recompression close to the right amount */
|
||||
inf.avail_in = size + EXCESS;
|
||||
inf.next_in = blk;
|
||||
def.avail_out = size + EXCESS;
|
||||
def.next_out = tmp;
|
||||
ret = recompress(&inf, &def);
|
||||
if (ret == Z_MEM_ERROR)
|
||||
quit("out of memory");
|
||||
|
||||
/* set up for next reocmpression */
|
||||
ret = inflateReset(&inf);
|
||||
assert(ret != Z_STREAM_ERROR);
|
||||
ret = deflateReset(&def);
|
||||
assert(ret != Z_STREAM_ERROR);
|
||||
|
||||
/* do second and final recompression (third compression) */
|
||||
inf.avail_in = size - MARGIN; /* assure stream will complete */
|
||||
inf.next_in = tmp;
|
||||
def.avail_out = size;
|
||||
def.next_out = blk;
|
||||
ret = recompress(&inf, &def);
|
||||
if (ret == Z_MEM_ERROR)
|
||||
quit("out of memory");
|
||||
assert(ret == Z_STREAM_END); /* otherwise MARGIN too small */
|
||||
|
||||
/* done -- write block to stdout */
|
||||
have = size - def.avail_out;
|
||||
ret = fwrite(blk, 1, have, stdout);
|
||||
if (ret != have || ferror(stdout))
|
||||
quit("error writing output");
|
||||
|
||||
/* clean up and print results to stderr */
|
||||
free(tmp);
|
||||
ret = inflateEnd(&inf);
|
||||
assert(ret != Z_STREAM_ERROR);
|
||||
ret = deflateEnd(&def);
|
||||
assert(ret != Z_STREAM_ERROR);
|
||||
free(blk);
|
||||
fprintf(stderr,
|
||||
"%u bytes unused out of %u requested (%lu input)\n",
|
||||
size - have, size, def.total_in);
|
||||
return 0;
|
||||
}
|
||||
500
examples/gzappend.c
Normal file
500
examples/gzappend.c
Normal file
@@ -0,0 +1,500 @@
|
||||
/* gzappend -- command to append to a gzip file
|
||||
|
||||
Copyright (C) 2003 Mark Adler, all rights reserved
|
||||
version 1.1, 4 Nov 2003
|
||||
|
||||
This software is provided 'as-is', without any express or implied
|
||||
warranty. In no event will the author be held liable for any damages
|
||||
arising from the use of this software.
|
||||
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it
|
||||
freely, subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not
|
||||
claim that you wrote the original software. If you use this software
|
||||
in a product, an acknowledgment in the product documentation would be
|
||||
appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be
|
||||
misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
|
||||
Mark Adler madler@alumni.caltech.edu
|
||||
*/
|
||||
|
||||
/*
|
||||
* Change history:
|
||||
*
|
||||
* 1.0 19 Oct 2003 - First version
|
||||
* 1.1 4 Nov 2003 - Expand and clarify some comments and notes
|
||||
* - Add version and copyright to help
|
||||
* - Send help to stdout instead of stderr
|
||||
* - Add some preemptive typecasts
|
||||
* - Add L to constants in lseek() calls
|
||||
* - Remove some debugging information in error messages
|
||||
* - Use new data_type definition for zlib 1.2.1
|
||||
* - Simplfy and unify file operations
|
||||
* - Finish off gzip file in gztack()
|
||||
* - Use deflatePrime() instead of adding empty blocks
|
||||
* - Keep gzip file clean on appended file read errors
|
||||
* - Use in-place rotate instead of auxiliary buffer
|
||||
* (Why you ask? Because it was fun to write!)
|
||||
*/
|
||||
|
||||
/*
|
||||
gzappend takes a gzip file and appends to it, compressing files from the
|
||||
command line or data from stdin. The gzip file is written to directly, to
|
||||
avoid copying that file, in case it's large. Note that this results in the
|
||||
unfriendly behavior that if gzappend fails, the gzip file is corrupted.
|
||||
|
||||
This program was written to illustrate the use of the new Z_BLOCK option of
|
||||
zlib 1.2.x's inflate() function. This option returns from inflate() at each
|
||||
block boundary to facilitate locating and modifying the last block bit at
|
||||
the start of the final deflate block. Also whether using Z_BLOCK or not,
|
||||
another required feature of zlib 1.2.x is that inflate() now provides the
|
||||
number of unusued bits in the last input byte used. gzappend will not work
|
||||
with versions of zlib earlier than 1.2.1.
|
||||
|
||||
gzappend first decompresses the gzip file internally, discarding all but
|
||||
the last 32K of uncompressed data, and noting the location of the last block
|
||||
bit and the number of unused bits in the last byte of the compressed data.
|
||||
The gzip trailer containing the CRC-32 and length of the uncompressed data
|
||||
is verified. This trailer will be later overwritten.
|
||||
|
||||
Then the last block bit is cleared by seeking back in the file and rewriting
|
||||
the byte that contains it. Seeking forward, the last byte of the compressed
|
||||
data is saved along with the number of unused bits to initialize deflate.
|
||||
|
||||
A deflate process is initialized, using the last 32K of the uncompressed
|
||||
data from the gzip file to initialize the dictionary. If the total
|
||||
uncompressed data was less than 32K, then all of it is used to initialize
|
||||
the dictionary. The deflate output bit buffer is also initialized with the
|
||||
last bits from the original deflate stream. From here on, the data to
|
||||
append is simply compressed using deflate, and written to the gzip file.
|
||||
When that is complete, the new CRC-32 and uncompressed length are written
|
||||
as the trailer of the gzip file.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <fcntl.h>
|
||||
#include <unistd.h>
|
||||
#include "zlib.h"
|
||||
|
||||
#define local static
|
||||
#define LGCHUNK 14
|
||||
#define CHUNK (1U << LGCHUNK)
|
||||
#define DSIZE 32768U
|
||||
|
||||
/* print an error message and terminate with extreme prejudice */
|
||||
local void bye(char *msg1, char *msg2)
|
||||
{
|
||||
fprintf(stderr, "gzappend error: %s%s\n", msg1, msg2);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/* return the greatest common divisor of a and b using Euclid's algorithm,
|
||||
modified to be fast when one argument much greater than the other, and
|
||||
coded to avoid unnecessary swapping */
|
||||
local unsigned gcd(unsigned a, unsigned b)
|
||||
{
|
||||
unsigned c;
|
||||
|
||||
while (a && b)
|
||||
if (a > b) {
|
||||
c = b;
|
||||
while (a - c >= c)
|
||||
c <<= 1;
|
||||
a -= c;
|
||||
}
|
||||
else {
|
||||
c = a;
|
||||
while (b - c >= c)
|
||||
c <<= 1;
|
||||
b -= c;
|
||||
}
|
||||
return a + b;
|
||||
}
|
||||
|
||||
/* rotate list[0..len-1] left by rot positions, in place */
|
||||
local void rotate(unsigned char *list, unsigned len, unsigned rot)
|
||||
{
|
||||
unsigned char tmp;
|
||||
unsigned cycles;
|
||||
unsigned char *start, *last, *to, *from;
|
||||
|
||||
/* normalize rot and handle degenerate cases */
|
||||
if (len < 2) return;
|
||||
if (rot >= len) rot %= len;
|
||||
if (rot == 0) return;
|
||||
|
||||
/* pointer to last entry in list */
|
||||
last = list + (len - 1);
|
||||
|
||||
/* do simple left shift by one */
|
||||
if (rot == 1) {
|
||||
tmp = *list;
|
||||
memcpy(list, list + 1, len - 1);
|
||||
*last = tmp;
|
||||
return;
|
||||
}
|
||||
|
||||
/* do simple right shift by one */
|
||||
if (rot == len - 1) {
|
||||
tmp = *last;
|
||||
memmove(list + 1, list, len - 1);
|
||||
*list = tmp;
|
||||
return;
|
||||
}
|
||||
|
||||
/* otherwise do rotate as a set of cycles in place */
|
||||
cycles = gcd(len, rot); /* number of cycles */
|
||||
do {
|
||||
start = from = list + cycles; /* start index is arbitrary */
|
||||
tmp = *from; /* save entry to be overwritten */
|
||||
for (;;) {
|
||||
to = from; /* next step in cycle */
|
||||
from += rot; /* go right rot positions */
|
||||
if (from > last) from -= len; /* (pointer better not wrap) */
|
||||
if (from == start) break; /* all but one shifted */
|
||||
*to = *from; /* shift left */
|
||||
}
|
||||
*to = tmp; /* complete the circle */
|
||||
} while (--cycles);
|
||||
}
|
||||
|
||||
/* structure for gzip file read operations */
|
||||
typedef struct {
|
||||
int fd; /* file descriptor */
|
||||
int size; /* 1 << size is bytes in buf */
|
||||
unsigned left; /* bytes available at next */
|
||||
unsigned char *buf; /* buffer */
|
||||
unsigned char *next; /* next byte in buffer */
|
||||
char *name; /* file name for error messages */
|
||||
} file;
|
||||
|
||||
/* reload buffer */
|
||||
local int readin(file *in)
|
||||
{
|
||||
int len;
|
||||
|
||||
len = read(in->fd, in->buf, 1 << in->size);
|
||||
if (len == -1) bye("error reading ", in->name);
|
||||
in->left = (unsigned)len;
|
||||
in->next = in->buf;
|
||||
return len;
|
||||
}
|
||||
|
||||
/* read from file in, exit if end-of-file */
|
||||
local int readmore(file *in)
|
||||
{
|
||||
if (readin(in) == 0) bye("unexpected end of ", in->name);
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define read1(in) (in->left == 0 ? readmore(in) : 0, \
|
||||
in->left--, *(in->next)++)
|
||||
|
||||
/* skip over n bytes of in */
|
||||
local void skip(file *in, unsigned n)
|
||||
{
|
||||
unsigned bypass;
|
||||
|
||||
if (n > in->left) {
|
||||
n -= in->left;
|
||||
bypass = n & ~((1U << in->size) - 1);
|
||||
if (bypass) {
|
||||
if (lseek(in->fd, (off_t)bypass, SEEK_CUR) == -1)
|
||||
bye("seeking ", in->name);
|
||||
n -= bypass;
|
||||
}
|
||||
readmore(in);
|
||||
if (n > in->left)
|
||||
bye("unexpected end of ", in->name);
|
||||
}
|
||||
in->left -= n;
|
||||
in->next += n;
|
||||
}
|
||||
|
||||
/* read a four-byte unsigned integer, little-endian, from in */
|
||||
unsigned long read4(file *in)
|
||||
{
|
||||
unsigned long val;
|
||||
|
||||
val = read1(in);
|
||||
val += (unsigned)read1(in) << 8;
|
||||
val += (unsigned long)read1(in) << 16;
|
||||
val += (unsigned long)read1(in) << 24;
|
||||
return val;
|
||||
}
|
||||
|
||||
/* skip over gzip header */
|
||||
local void gzheader(file *in)
|
||||
{
|
||||
int flags;
|
||||
unsigned n;
|
||||
|
||||
if (read1(in) != 31 || read1(in) != 139) bye(in->name, " not a gzip file");
|
||||
if (read1(in) != 8) bye("unknown compression method in", in->name);
|
||||
flags = read1(in);
|
||||
if (flags & 0xe0) bye("unknown header flags set in", in->name);
|
||||
skip(in, 6);
|
||||
if (flags & 4) {
|
||||
n = read1(in);
|
||||
n += (unsigned)(read1(in)) << 8;
|
||||
skip(in, n);
|
||||
}
|
||||
if (flags & 8) while (read1(in) != 0) ;
|
||||
if (flags & 16) while (read1(in) != 0) ;
|
||||
if (flags & 2) skip(in, 2);
|
||||
}
|
||||
|
||||
/* decompress gzip file "name", return strm with a deflate stream ready to
|
||||
continue compression of the data in the gzip file, and return a file
|
||||
descriptor pointing to where to write the compressed data -- the deflate
|
||||
stream is initialized to compress using level "level" */
|
||||
local int gzscan(char *name, z_stream *strm, int level)
|
||||
{
|
||||
int ret, lastbit, left, full;
|
||||
unsigned have;
|
||||
unsigned long crc, tot;
|
||||
unsigned char *window;
|
||||
off_t lastoff, end;
|
||||
file gz;
|
||||
|
||||
/* open gzip file */
|
||||
gz.name = name;
|
||||
gz.fd = open(name, O_RDWR, 0);
|
||||
if (gz.fd == -1) bye("cannot open ", name);
|
||||
gz.buf = malloc(CHUNK);
|
||||
if (gz.buf == NULL) bye("out of memory", "");
|
||||
gz.size = LGCHUNK;
|
||||
gz.left = 0;
|
||||
|
||||
/* skip gzip header */
|
||||
gzheader(&gz);
|
||||
|
||||
/* prepare to decompress */
|
||||
window = malloc(DSIZE);
|
||||
if (window == NULL) bye("out of memory", "");
|
||||
strm->zalloc = Z_NULL;
|
||||
strm->zfree = Z_NULL;
|
||||
strm->opaque = Z_NULL;
|
||||
ret = inflateInit2(strm, -15);
|
||||
if (ret != Z_OK) bye("out of memory", " or library mismatch");
|
||||
|
||||
/* decompress the deflate stream, saving append information */
|
||||
lastbit = 0;
|
||||
lastoff = lseek(gz.fd, 0L, SEEK_CUR) - gz.left;
|
||||
left = 0;
|
||||
strm->avail_in = gz.left;
|
||||
strm->next_in = gz.next;
|
||||
crc = crc32(0L, Z_NULL, 0);
|
||||
have = full = 0;
|
||||
do {
|
||||
/* if needed, get more input */
|
||||
if (strm->avail_in == 0) {
|
||||
readmore(&gz);
|
||||
strm->avail_in = gz.left;
|
||||
strm->next_in = gz.next;
|
||||
}
|
||||
|
||||
/* set up output to next available section of sliding window */
|
||||
strm->avail_out = DSIZE - have;
|
||||
strm->next_out = window + have;
|
||||
|
||||
/* inflate and check for errors */
|
||||
ret = inflate(strm, Z_BLOCK);
|
||||
if (ret == Z_STREAM_ERROR) bye("internal stream error!", "");
|
||||
if (ret == Z_MEM_ERROR) bye("out of memory", "");
|
||||
if (ret == Z_DATA_ERROR)
|
||||
bye("invalid compressed data--format violated in", name);
|
||||
|
||||
/* update crc and sliding window pointer */
|
||||
crc = crc32(crc, window + have, DSIZE - have - strm->avail_out);
|
||||
if (strm->avail_out)
|
||||
have = DSIZE - strm->avail_out;
|
||||
else {
|
||||
have = 0;
|
||||
full = 1;
|
||||
}
|
||||
|
||||
/* process end of block */
|
||||
if (strm->data_type & 128) {
|
||||
if (strm->data_type & 64)
|
||||
left = strm->data_type & 0x1f;
|
||||
else {
|
||||
lastbit = strm->data_type & 0x1f;
|
||||
lastoff = lseek(gz.fd, 0L, SEEK_CUR) - strm->avail_in;
|
||||
}
|
||||
}
|
||||
} while (ret != Z_STREAM_END);
|
||||
inflateEnd(strm);
|
||||
gz.left = strm->avail_in;
|
||||
gz.next = strm->next_in;
|
||||
|
||||
/* save the location of the end of the compressed data */
|
||||
end = lseek(gz.fd, 0L, SEEK_CUR) - gz.left;
|
||||
|
||||
/* check gzip trailer and save total for deflate */
|
||||
if (crc != read4(&gz))
|
||||
bye("invalid compressed data--crc mismatch in ", name);
|
||||
tot = strm->total_out;
|
||||
if ((tot & 0xffffffffUL) != read4(&gz))
|
||||
bye("invalid compressed data--length mismatch in", name);
|
||||
|
||||
/* if not at end of file, warn */
|
||||
if (gz.left || readin(&gz))
|
||||
fprintf(stderr,
|
||||
"gzappend warning: junk at end of gzip file overwritten\n");
|
||||
|
||||
/* clear last block bit */
|
||||
lseek(gz.fd, lastoff - (lastbit != 0), SEEK_SET);
|
||||
if (read(gz.fd, gz.buf, 1) != 1) bye("reading after seek on ", name);
|
||||
*gz.buf = (unsigned char)(*gz.buf ^ (1 << ((8 - lastbit) & 7)));
|
||||
lseek(gz.fd, -1L, SEEK_CUR);
|
||||
if (write(gz.fd, gz.buf, 1) != 1) bye("writing after seek to ", name);
|
||||
|
||||
/* if window wrapped, build dictionary from window by rotating */
|
||||
if (full) {
|
||||
rotate(window, DSIZE, have);
|
||||
have = DSIZE;
|
||||
}
|
||||
|
||||
/* set up deflate stream with window, crc, total_in, and leftover bits */
|
||||
ret = deflateInit2(strm, level, Z_DEFLATED, -15, 8, Z_DEFAULT_STRATEGY);
|
||||
if (ret != Z_OK) bye("out of memory", "");
|
||||
deflateSetDictionary(strm, window, have);
|
||||
strm->adler = crc;
|
||||
strm->total_in = tot;
|
||||
if (left) {
|
||||
lseek(gz.fd, --end, SEEK_SET);
|
||||
if (read(gz.fd, gz.buf, 1) != 1) bye("reading after seek on ", name);
|
||||
deflatePrime(strm, 8 - left, *gz.buf);
|
||||
}
|
||||
lseek(gz.fd, end, SEEK_SET);
|
||||
|
||||
/* clean up and return */
|
||||
free(window);
|
||||
free(gz.buf);
|
||||
return gz.fd;
|
||||
}
|
||||
|
||||
/* append file "name" to gzip file gd using deflate stream strm -- if last
|
||||
is true, then finish off the deflate stream at the end */
|
||||
local void gztack(char *name, int gd, z_stream *strm, int last)
|
||||
{
|
||||
int fd, len, ret;
|
||||
unsigned left;
|
||||
unsigned char *in, *out;
|
||||
|
||||
/* open file to compress and append */
|
||||
fd = 0;
|
||||
if (name != NULL) {
|
||||
fd = open(name, O_RDONLY, 0);
|
||||
if (fd == -1)
|
||||
fprintf(stderr, "gzappend warning: %s not found, skipping ...\n",
|
||||
name);
|
||||
}
|
||||
|
||||
/* allocate buffers */
|
||||
in = fd == -1 ? NULL : malloc(CHUNK);
|
||||
out = malloc(CHUNK);
|
||||
if (out == NULL) bye("out of memory", "");
|
||||
|
||||
/* compress input file and append to gzip file */
|
||||
do {
|
||||
/* get more input */
|
||||
len = fd == -1 ? 0 : read(fd, in, CHUNK);
|
||||
if (len == -1) {
|
||||
fprintf(stderr,
|
||||
"gzappend warning: error reading %s, skipping rest ...\n",
|
||||
name);
|
||||
len = 0;
|
||||
}
|
||||
strm->avail_in = (unsigned)len;
|
||||
strm->next_in = in;
|
||||
if (len) strm->adler = crc32(strm->adler, in, (unsigned)len);
|
||||
|
||||
/* compress and write all available output */
|
||||
do {
|
||||
strm->avail_out = CHUNK;
|
||||
strm->next_out = out;
|
||||
ret = deflate(strm, last && len == 0 ? Z_FINISH : Z_NO_FLUSH);
|
||||
left = CHUNK - strm->avail_out;
|
||||
while (left) {
|
||||
len = write(gd, out + CHUNK - strm->avail_out - left, left);
|
||||
if (len == -1) bye("writing gzip file", "");
|
||||
left -= (unsigned)len;
|
||||
}
|
||||
} while (strm->avail_out == 0 && ret != Z_STREAM_END);
|
||||
} while (len != 0);
|
||||
|
||||
/* write trailer after last entry */
|
||||
if (last) {
|
||||
deflateEnd(strm);
|
||||
out[0] = (unsigned char)(strm->adler);
|
||||
out[1] = (unsigned char)(strm->adler >> 8);
|
||||
out[2] = (unsigned char)(strm->adler >> 16);
|
||||
out[3] = (unsigned char)(strm->adler >> 24);
|
||||
out[4] = (unsigned char)(strm->total_in);
|
||||
out[5] = (unsigned char)(strm->total_in >> 8);
|
||||
out[6] = (unsigned char)(strm->total_in >> 16);
|
||||
out[7] = (unsigned char)(strm->total_in >> 24);
|
||||
len = 8;
|
||||
do {
|
||||
ret = write(gd, out + 8 - len, len);
|
||||
if (ret == -1) bye("writing gzip file", "");
|
||||
len -= ret;
|
||||
} while (len);
|
||||
close(gd);
|
||||
}
|
||||
|
||||
/* clean up and return */
|
||||
free(out);
|
||||
if (in != NULL) free(in);
|
||||
if (fd > 0) close(fd);
|
||||
}
|
||||
|
||||
/* process the compression level option if present, scan the gzip file, and
|
||||
append the specified files, or append the data from stdin if no other file
|
||||
names are provided on the command line -- the gzip file must be writable
|
||||
and seekable */
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
int gd, level;
|
||||
z_stream strm;
|
||||
|
||||
/* ignore command name */
|
||||
argv++;
|
||||
|
||||
/* provide usage if no arguments */
|
||||
if (*argv == NULL) {
|
||||
printf("gzappend 1.1 (4 Nov 2003) Copyright (C) 2003 Mark Adler\n");
|
||||
printf(
|
||||
"usage: gzappend [-level] file.gz [ addthis [ andthis ... ]]\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* set compression level */
|
||||
level = Z_DEFAULT_COMPRESSION;
|
||||
if (argv[0][0] == '-') {
|
||||
if (argv[0][1] < '0' || argv[0][1] > '9' || argv[0][2] != 0)
|
||||
bye("invalid compression level", "");
|
||||
level = argv[0][1] - '0';
|
||||
if (*++argv == NULL) bye("no gzip file name after options", "");
|
||||
}
|
||||
|
||||
/* prepare to append to gzip file */
|
||||
gd = gzscan(*argv++, &strm, level);
|
||||
|
||||
/* append files on command line, or from stdin if none */
|
||||
if (*argv == NULL)
|
||||
gztack(NULL, gd, &strm, 1);
|
||||
else
|
||||
do {
|
||||
gztack(*argv, gd, &strm, argv[1] == NULL);
|
||||
} while (*++argv != NULL);
|
||||
return 0;
|
||||
}
|
||||
447
examples/gzjoin.c
Normal file
447
examples/gzjoin.c
Normal file
@@ -0,0 +1,447 @@
|
||||
/* gzjoin -- command to join gzip files into one gzip file
|
||||
|
||||
Copyright (C) 2004 Mark Adler, all rights reserved
|
||||
version 1.0, 11 Dec 2004
|
||||
|
||||
This software is provided 'as-is', without any express or implied
|
||||
warranty. In no event will the author be held liable for any damages
|
||||
arising from the use of this software.
|
||||
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it
|
||||
freely, subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not
|
||||
claim that you wrote the original software. If you use this software
|
||||
in a product, an acknowledgment in the product documentation would be
|
||||
appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be
|
||||
misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
|
||||
Mark Adler madler@alumni.caltech.edu
|
||||
*/
|
||||
|
||||
/*
|
||||
* Change history:
|
||||
*
|
||||
* 1.0 11 Dec 2004 - First version
|
||||
*/
|
||||
|
||||
/*
|
||||
gzjoin takes one or more gzip files on the command line and writes out a
|
||||
single gzip file that will uncompress to the concatenation of the
|
||||
uncompressed data from the individual gzip files. gzjoin does this without
|
||||
having to recompress any of the data and without having to calculate a new
|
||||
crc32 for the concatenated uncompressed data. gzjoin does however have to
|
||||
decompress all of the input data in order to find the bits in the compressed
|
||||
data that need to be modified to concatenate the streams.
|
||||
|
||||
gzjoin does not do an integrity check on the input gzip files other than
|
||||
checking the gzip header and decompressing the compressed data. They are
|
||||
otherwise assumed to be complete and correct.
|
||||
|
||||
Each joint between gzip files removes at least 18 bytes of previous trailer
|
||||
and subsequent header, and inserts an average of about three bytes to the
|
||||
compressed data in order to connect the streams. The output gzip file
|
||||
has a minimal ten-byte gzip header with no file name or modification time.
|
||||
|
||||
This program was written to illustrate the use of the Z_BLOCK option of
|
||||
inflate() and the crc32_combine() function. gzjoin will not compile with
|
||||
versions of zlib earlier than 1.2.3.
|
||||
*/
|
||||
|
||||
#include <stdio.h> /* fputs(), fprintf(), fwrite(), putc() */
|
||||
#include <stdlib.h> /* exit(), malloc(), free() */
|
||||
#include <fcntl.h> /* open() */
|
||||
#include <unistd.h> /* close(), read(), lseek() */
|
||||
#include "zlib.h"
|
||||
/* crc32(), crc32_combine(), inflateInit2(), inflate(), inflateEnd() */
|
||||
|
||||
#define local static
|
||||
|
||||
/* exit with an error (return a value to allow use in an expression) */
|
||||
local int bail(char *why1, char *why2)
|
||||
{
|
||||
fprintf(stderr, "gzjoin error: %s%s, output incomplete\n", why1, why2);
|
||||
exit(1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* -- simple buffered file input with access to the buffer -- */
|
||||
|
||||
#define CHUNK 32768 /* must be a power of two and fit in unsigned */
|
||||
|
||||
/* bin buffered input file type */
|
||||
typedef struct {
|
||||
char *name; /* name of file for error messages */
|
||||
int fd; /* file descriptor */
|
||||
unsigned left; /* bytes remaining at next */
|
||||
unsigned char *next; /* next byte to read */
|
||||
unsigned char *buf; /* allocated buffer of length CHUNK */
|
||||
} bin;
|
||||
|
||||
/* close a buffered file and free allocated memory */
|
||||
local void bclose(bin *in)
|
||||
{
|
||||
if (in != NULL) {
|
||||
if (in->fd != -1)
|
||||
close(in->fd);
|
||||
if (in->buf != NULL)
|
||||
free(in->buf);
|
||||
free(in);
|
||||
}
|
||||
}
|
||||
|
||||
/* open a buffered file for input, return a pointer to type bin, or NULL on
|
||||
failure */
|
||||
local bin *bopen(char *name)
|
||||
{
|
||||
bin *in;
|
||||
|
||||
in = malloc(sizeof(bin));
|
||||
if (in == NULL)
|
||||
return NULL;
|
||||
in->buf = malloc(CHUNK);
|
||||
in->fd = open(name, O_RDONLY, 0);
|
||||
if (in->buf == NULL || in->fd == -1) {
|
||||
bclose(in);
|
||||
return NULL;
|
||||
}
|
||||
in->left = 0;
|
||||
in->next = in->buf;
|
||||
in->name = name;
|
||||
return in;
|
||||
}
|
||||
|
||||
/* load buffer from file, return -1 on read error, 0 or 1 on success, with
|
||||
1 indicating that end-of-file was reached */
|
||||
local int bload(bin *in)
|
||||
{
|
||||
ssize_t len;
|
||||
|
||||
if (in == NULL)
|
||||
return -1;
|
||||
if (in->left != 0)
|
||||
return 0;
|
||||
in->next = in->buf;
|
||||
do {
|
||||
len = read(in->fd, in->buf + in->left, CHUNK - in->left);
|
||||
if (len < 0)
|
||||
return -1;
|
||||
in->left += (unsigned)len;
|
||||
} while (len != 0 && in->left < CHUNK);
|
||||
return len == 0 ? 1 : 0;
|
||||
}
|
||||
|
||||
/* get a byte from the file, bail if end of file */
|
||||
#define bget(in) (in->left ? 0 : bload(in), \
|
||||
in->left ? (in->left--, *(in->next)++) : \
|
||||
bail("unexpected end of file on ", in->name))
|
||||
|
||||
/* get a four-byte little-endian unsigned integer from file */
|
||||
local unsigned long bget4(bin *in)
|
||||
{
|
||||
unsigned long val;
|
||||
|
||||
val = bget(in);
|
||||
val += (unsigned long)(bget(in)) << 8;
|
||||
val += (unsigned long)(bget(in)) << 16;
|
||||
val += (unsigned long)(bget(in)) << 24;
|
||||
return val;
|
||||
}
|
||||
|
||||
/* skip bytes in file */
|
||||
local void bskip(bin *in, unsigned skip)
|
||||
{
|
||||
/* check pointer */
|
||||
if (in == NULL)
|
||||
return;
|
||||
|
||||
/* easy case -- skip bytes in buffer */
|
||||
if (skip <= in->left) {
|
||||
in->left -= skip;
|
||||
in->next += skip;
|
||||
return;
|
||||
}
|
||||
|
||||
/* skip what's in buffer, discard buffer contents */
|
||||
skip -= in->left;
|
||||
in->left = 0;
|
||||
|
||||
/* seek past multiples of CHUNK bytes */
|
||||
if (skip > CHUNK) {
|
||||
unsigned left;
|
||||
|
||||
left = skip & (CHUNK - 1);
|
||||
if (left == 0) {
|
||||
/* exact number of chunks: seek all the way minus one byte to check
|
||||
for end-of-file with a read */
|
||||
lseek(in->fd, skip - 1, SEEK_CUR);
|
||||
if (read(in->fd, in->buf, 1) != 1)
|
||||
bail("unexpected end of file on ", in->name);
|
||||
return;
|
||||
}
|
||||
|
||||
/* skip the integral chunks, update skip with remainder */
|
||||
lseek(in->fd, skip - left, SEEK_CUR);
|
||||
skip = left;
|
||||
}
|
||||
|
||||
/* read more input and skip remainder */
|
||||
bload(in);
|
||||
if (skip > in->left)
|
||||
bail("unexpected end of file on ", in->name);
|
||||
in->left -= skip;
|
||||
in->next += skip;
|
||||
}
|
||||
|
||||
/* -- end of buffered input functions -- */
|
||||
|
||||
/* skip the gzip header from file in */
|
||||
local void gzhead(bin *in)
|
||||
{
|
||||
int flags;
|
||||
|
||||
/* verify gzip magic header and compression method */
|
||||
if (bget(in) != 0x1f || bget(in) != 0x8b || bget(in) != 8)
|
||||
bail(in->name, " is not a valid gzip file");
|
||||
|
||||
/* get and verify flags */
|
||||
flags = bget(in);
|
||||
if ((flags & 0xe0) != 0)
|
||||
bail("unknown reserved bits set in ", in->name);
|
||||
|
||||
/* skip modification time, extra flags, and os */
|
||||
bskip(in, 6);
|
||||
|
||||
/* skip extra field if present */
|
||||
if (flags & 4) {
|
||||
unsigned len;
|
||||
|
||||
len = bget(in);
|
||||
len += (unsigned)(bget(in)) << 8;
|
||||
bskip(in, len);
|
||||
}
|
||||
|
||||
/* skip file name if present */
|
||||
if (flags & 8)
|
||||
while (bget(in) != 0)
|
||||
;
|
||||
|
||||
/* skip comment if present */
|
||||
if (flags & 16)
|
||||
while (bget(in) != 0)
|
||||
;
|
||||
|
||||
/* skip header crc if present */
|
||||
if (flags & 2)
|
||||
bskip(in, 2);
|
||||
}
|
||||
|
||||
/* write a four-byte little-endian unsigned integer to out */
|
||||
local void put4(unsigned long val, FILE *out)
|
||||
{
|
||||
putc(val & 0xff, out);
|
||||
putc((val >> 8) & 0xff, out);
|
||||
putc((val >> 16) & 0xff, out);
|
||||
putc((val >> 24) & 0xff, out);
|
||||
}
|
||||
|
||||
/* Load up zlib stream from buffered input, bail if end of file */
|
||||
local void zpull(z_streamp strm, bin *in)
|
||||
{
|
||||
if (in->left == 0)
|
||||
bload(in);
|
||||
if (in->left == 0)
|
||||
bail("unexpected end of file on ", in->name);
|
||||
strm->avail_in = in->left;
|
||||
strm->next_in = in->next;
|
||||
}
|
||||
|
||||
/* Write header for gzip file to out and initialize trailer. */
|
||||
local void gzinit(unsigned long *crc, unsigned long *tot, FILE *out)
|
||||
{
|
||||
fwrite("\x1f\x8b\x08\0\0\0\0\0\0\xff", 1, 10, out);
|
||||
*crc = crc32(0L, Z_NULL, 0);
|
||||
*tot = 0;
|
||||
}
|
||||
|
||||
/* Copy the compressed data from name, zeroing the last block bit of the last
|
||||
block if clr is true, and adding empty blocks as needed to get to a byte
|
||||
boundary. If clr is false, then the last block becomes the last block of
|
||||
the output, and the gzip trailer is written. crc and tot maintains the
|
||||
crc and length (modulo 2^32) of the output for the trailer. The resulting
|
||||
gzip file is written to out. gzinit() must be called before the first call
|
||||
of gzcopy() to write the gzip header and to initialize crc and tot. */
|
||||
local void gzcopy(char *name, int clr, unsigned long *crc, unsigned long *tot,
|
||||
FILE *out)
|
||||
{
|
||||
int ret; /* return value from zlib functions */
|
||||
int pos; /* where the "last block" bit is in byte */
|
||||
int last; /* true if processing the last block */
|
||||
bin *in; /* buffered input file */
|
||||
unsigned char *start; /* start of compressed data in buffer */
|
||||
unsigned char *junk; /* buffer for uncompressed data -- discarded */
|
||||
z_off_t len; /* length of uncompressed data (support > 4 GB) */
|
||||
z_stream strm; /* zlib inflate stream */
|
||||
|
||||
/* open gzip file and skip header */
|
||||
in = bopen(name);
|
||||
if (in == NULL)
|
||||
bail("could not open ", name);
|
||||
gzhead(in);
|
||||
|
||||
/* allocate buffer for uncompressed data and initialize raw inflate
|
||||
stream */
|
||||
junk = malloc(CHUNK);
|
||||
strm.zalloc = Z_NULL;
|
||||
strm.zfree = Z_NULL;
|
||||
strm.opaque = Z_NULL;
|
||||
strm.avail_in = 0;
|
||||
strm.next_in = Z_NULL;
|
||||
ret = inflateInit2(&strm, -15);
|
||||
if (junk == NULL || ret != Z_OK)
|
||||
bail("out of memory", "");
|
||||
|
||||
/* inflate and copy compressed data, clear last-block bit if requested */
|
||||
len = 0;
|
||||
zpull(&strm, in);
|
||||
start = strm.next_in;
|
||||
last = start[0] & 1;
|
||||
if (last && clr)
|
||||
start[0] &= ~1;
|
||||
strm.avail_out = 0;
|
||||
for (;;) {
|
||||
/* if input used and output done, write used input and get more */
|
||||
if (strm.avail_in == 0 && strm.avail_out != 0) {
|
||||
fwrite(start, 1, strm.next_in - start, out);
|
||||
start = in->buf;
|
||||
in->left = 0;
|
||||
zpull(&strm, in);
|
||||
}
|
||||
|
||||
/* decompress -- return early when end-of-block reached */
|
||||
strm.avail_out = CHUNK;
|
||||
strm.next_out = junk;
|
||||
ret = inflate(&strm, Z_BLOCK);
|
||||
switch (ret) {
|
||||
case Z_MEM_ERROR:
|
||||
bail("out of memory", "");
|
||||
case Z_DATA_ERROR:
|
||||
bail("invalid compressed data in ", in->name);
|
||||
}
|
||||
|
||||
/* update length of uncompressed data */
|
||||
len += CHUNK - strm.avail_out;
|
||||
|
||||
/* check for block boundary (only get this when block copied out) */
|
||||
if (strm.data_type & 128) {
|
||||
/* if that was the last block, then done */
|
||||
if (last)
|
||||
break;
|
||||
|
||||
/* number of unused bits in last byte */
|
||||
pos = strm.data_type & 7;
|
||||
|
||||
/* find the next last-block bit */
|
||||
if (pos != 0) {
|
||||
/* next last-block bit is in last used byte */
|
||||
pos = 0x100 >> pos;
|
||||
last = strm.next_in[-1] & pos;
|
||||
if (last && clr)
|
||||
strm.next_in[-1] &= ~pos;
|
||||
}
|
||||
else {
|
||||
/* next last-block bit is in next unused byte */
|
||||
if (strm.avail_in == 0) {
|
||||
/* don't have that byte yet -- get it */
|
||||
fwrite(start, 1, strm.next_in - start, out);
|
||||
start = in->buf;
|
||||
in->left = 0;
|
||||
zpull(&strm, in);
|
||||
}
|
||||
last = strm.next_in[0] & 1;
|
||||
if (last && clr)
|
||||
strm.next_in[0] &= ~1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* update buffer with unused input */
|
||||
in->left = strm.avail_in;
|
||||
in->next = strm.next_in;
|
||||
|
||||
/* copy used input, write empty blocks to get to byte boundary */
|
||||
pos = strm.data_type & 7;
|
||||
fwrite(start, 1, in->next - start - 1, out);
|
||||
last = in->next[-1];
|
||||
if (pos == 0 || !clr)
|
||||
/* already at byte boundary, or last file: write last byte */
|
||||
putc(last, out);
|
||||
else {
|
||||
/* append empty blocks to last byte */
|
||||
last &= ((0x100 >> pos) - 1); /* assure unused bits are zero */
|
||||
if (pos & 1) {
|
||||
/* odd -- append an empty stored block */
|
||||
putc(last, out);
|
||||
if (pos == 1)
|
||||
putc(0, out); /* two more bits in block header */
|
||||
fwrite("\0\0\xff\xff", 1, 4, out);
|
||||
}
|
||||
else {
|
||||
/* even -- append 1, 2, or 3 empty fixed blocks */
|
||||
switch (pos) {
|
||||
case 6:
|
||||
putc(last | 8, out);
|
||||
last = 0;
|
||||
case 4:
|
||||
putc(last | 0x20, out);
|
||||
last = 0;
|
||||
case 2:
|
||||
putc(last | 0x80, out);
|
||||
putc(0, out);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* update crc and tot */
|
||||
*crc = crc32_combine(*crc, bget4(in), len);
|
||||
*tot += (unsigned long)len;
|
||||
|
||||
/* clean up */
|
||||
inflateEnd(&strm);
|
||||
free(junk);
|
||||
bclose(in);
|
||||
|
||||
/* write trailer if this is the last gzip file */
|
||||
if (!clr) {
|
||||
put4(*crc, out);
|
||||
put4(*tot, out);
|
||||
}
|
||||
}
|
||||
|
||||
/* join the gzip files on the command line, write result to stdout */
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
unsigned long crc, tot; /* running crc and total uncompressed length */
|
||||
|
||||
/* skip command name */
|
||||
argc--;
|
||||
argv++;
|
||||
|
||||
/* show usage if no arguments */
|
||||
if (argc == 0) {
|
||||
fputs("gzjoin usage: gzjoin f1.gz [f2.gz [f3.gz ...]] > fjoin.gz\n",
|
||||
stderr);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* join gzip files on command line and write to stdout */
|
||||
gzinit(&crc, &tot, stdout);
|
||||
while (argc--)
|
||||
gzcopy(*argv++, argc, &crc, &tot, stdout);
|
||||
|
||||
/* done */
|
||||
return 0;
|
||||
}
|
||||
413
examples/gzlog.c
Normal file
413
examples/gzlog.c
Normal file
@@ -0,0 +1,413 @@
|
||||
/*
|
||||
* gzlog.c
|
||||
* Copyright (C) 2004 Mark Adler
|
||||
* For conditions of distribution and use, see copyright notice in gzlog.h
|
||||
* version 1.0, 26 Nov 2004
|
||||
*
|
||||
*/
|
||||
|
||||
#include <string.h> /* memcmp() */
|
||||
#include <stdlib.h> /* malloc(), free(), NULL */
|
||||
#include <sys/types.h> /* size_t, off_t */
|
||||
#include <unistd.h> /* read(), close(), sleep(), ftruncate(), */
|
||||
/* lseek() */
|
||||
#include <fcntl.h> /* open() */
|
||||
#include <sys/file.h> /* flock() */
|
||||
#include "zlib.h" /* deflateInit2(), deflate(), deflateEnd() */
|
||||
|
||||
#include "gzlog.h" /* interface */
|
||||
#define local static
|
||||
|
||||
/* log object structure */
|
||||
typedef struct {
|
||||
int id; /* object identifier */
|
||||
int fd; /* log file descriptor */
|
||||
off_t extra; /* offset of extra "ap" subfield */
|
||||
off_t mark_off; /* offset of marked data */
|
||||
off_t last_off; /* offset of last block */
|
||||
unsigned long crc; /* uncompressed crc */
|
||||
unsigned long len; /* uncompressed length (modulo 2^32) */
|
||||
unsigned stored; /* length of current stored block */
|
||||
} gz_log;
|
||||
|
||||
#define GZLOGID 19334 /* gz_log object identifier */
|
||||
|
||||
#define LOCK_RETRY 1 /* retry lock once a second */
|
||||
#define LOCK_PATIENCE 1200 /* try about twenty minutes before forcing */
|
||||
|
||||
/* acquire a lock on a file */
|
||||
local int lock(int fd)
|
||||
{
|
||||
int patience;
|
||||
|
||||
/* try to lock every LOCK_RETRY seconds for LOCK_PATIENCE seconds */
|
||||
patience = LOCK_PATIENCE;
|
||||
do {
|
||||
if (flock(fd, LOCK_EX + LOCK_NB) == 0)
|
||||
return 0;
|
||||
(void)sleep(LOCK_RETRY);
|
||||
patience -= LOCK_RETRY;
|
||||
} while (patience > 0);
|
||||
|
||||
/* we've run out of patience -- give up */
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* release lock */
|
||||
local void unlock(int fd)
|
||||
{
|
||||
(void)flock(fd, LOCK_UN);
|
||||
}
|
||||
|
||||
/* release a log object */
|
||||
local void log_clean(gz_log *log)
|
||||
{
|
||||
unlock(log->fd);
|
||||
(void)close(log->fd);
|
||||
free(log);
|
||||
}
|
||||
|
||||
/* read an unsigned long from a byte buffer little-endian */
|
||||
local unsigned long make_ulg(unsigned char *buf)
|
||||
{
|
||||
int n;
|
||||
unsigned long val;
|
||||
|
||||
val = (unsigned long)(*buf++);
|
||||
for (n = 8; n < 32; n += 8)
|
||||
val += (unsigned long)(*buf++) << n;
|
||||
return val;
|
||||
}
|
||||
|
||||
/* read an off_t from a byte buffer little-endian */
|
||||
local off_t make_off(unsigned char *buf)
|
||||
{
|
||||
int n;
|
||||
off_t val;
|
||||
|
||||
val = (off_t)(*buf++);
|
||||
for (n = 8; n < 64; n += 8)
|
||||
val += (off_t)(*buf++) << n;
|
||||
return val;
|
||||
}
|
||||
|
||||
/* write an unsigned long little-endian to byte buffer */
|
||||
local void dice_ulg(unsigned long val, unsigned char *buf)
|
||||
{
|
||||
int n;
|
||||
|
||||
for (n = 0; n < 4; n++) {
|
||||
*buf++ = val & 0xff;
|
||||
val >>= 8;
|
||||
}
|
||||
}
|
||||
|
||||
/* write an off_t little-endian to byte buffer */
|
||||
local void dice_off(off_t val, unsigned char *buf)
|
||||
{
|
||||
int n;
|
||||
|
||||
for (n = 0; n < 8; n++) {
|
||||
*buf++ = val & 0xff;
|
||||
val >>= 8;
|
||||
}
|
||||
}
|
||||
|
||||
/* initial, empty gzip file for appending */
|
||||
local char empty_gz[] = {
|
||||
0x1f, 0x8b, /* magic gzip id */
|
||||
8, /* compression method is deflate */
|
||||
4, /* there is an extra field */
|
||||
0, 0, 0, 0, /* no modification time provided */
|
||||
0, 0xff, /* no extra flags, no OS */
|
||||
20, 0, 'a', 'p', 16, 0, /* extra field with "ap" subfield */
|
||||
32, 0, 0, 0, 0, 0, 0, 0, /* offset of uncompressed data */
|
||||
32, 0, 0, 0, 0, 0, 0, 0, /* offset of last block */
|
||||
1, 0, 0, 0xff, 0xff, /* empty stored block (last) */
|
||||
0, 0, 0, 0, /* crc */
|
||||
0, 0, 0, 0 /* uncompressed length */
|
||||
};
|
||||
|
||||
/* initialize a log object with locking */
|
||||
void *gzlog_open(char *path)
|
||||
{
|
||||
unsigned xlen;
|
||||
unsigned char temp[20];
|
||||
unsigned sub_len;
|
||||
int good;
|
||||
gz_log *log;
|
||||
|
||||
/* allocate log structure */
|
||||
log = malloc(sizeof(gz_log));
|
||||
if (log == NULL)
|
||||
return NULL;
|
||||
log->id = GZLOGID;
|
||||
|
||||
/* open file, creating it if necessary, and locking it */
|
||||
log->fd = open(path, O_RDWR | O_CREAT, 0600);
|
||||
if (log->fd < 0) {
|
||||
free(log);
|
||||
return NULL;
|
||||
}
|
||||
if (lock(log->fd)) {
|
||||
close(log->fd);
|
||||
free(log);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* if file is empty, write new gzip stream */
|
||||
if (lseek(log->fd, 0, SEEK_END) == 0) {
|
||||
if (write(log->fd, empty_gz, sizeof(empty_gz)) != sizeof(empty_gz)) {
|
||||
log_clean(log);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/* check gzip header */
|
||||
(void)lseek(log->fd, 0, SEEK_SET);
|
||||
if (read(log->fd, temp, 12) != 12 || temp[0] != 0x1f ||
|
||||
temp[1] != 0x8b || temp[2] != 8 || (temp[3] & 4) == 0) {
|
||||
log_clean(log);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* process extra field to find "ap" sub-field */
|
||||
xlen = temp[10] + (temp[11] << 8);
|
||||
good = 0;
|
||||
while (xlen) {
|
||||
if (xlen < 4 || read(log->fd, temp, 4) != 4)
|
||||
break;
|
||||
sub_len = temp[2];
|
||||
sub_len += temp[3] << 8;
|
||||
xlen -= 4;
|
||||
if (memcmp(temp, "ap", 2) == 0 && sub_len == 16) {
|
||||
good = 1;
|
||||
break;
|
||||
}
|
||||
if (xlen < sub_len)
|
||||
break;
|
||||
(void)lseek(log->fd, sub_len, SEEK_CUR);
|
||||
xlen -= sub_len;
|
||||
}
|
||||
if (!good) {
|
||||
log_clean(log);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* read in "ap" sub-field */
|
||||
log->extra = lseek(log->fd, 0, SEEK_CUR);
|
||||
if (read(log->fd, temp, 16) != 16) {
|
||||
log_clean(log);
|
||||
return NULL;
|
||||
}
|
||||
log->mark_off = make_off(temp);
|
||||
log->last_off = make_off(temp + 8);
|
||||
|
||||
/* get crc, length of gzip file */
|
||||
(void)lseek(log->fd, log->last_off, SEEK_SET);
|
||||
if (read(log->fd, temp, 13) != 13 ||
|
||||
memcmp(temp, "\001\000\000\377\377", 5) != 0) {
|
||||
log_clean(log);
|
||||
return NULL;
|
||||
}
|
||||
log->crc = make_ulg(temp + 5);
|
||||
log->len = make_ulg(temp + 9);
|
||||
|
||||
/* set up to write over empty last block */
|
||||
(void)lseek(log->fd, log->last_off + 5, SEEK_SET);
|
||||
log->stored = 0;
|
||||
return (void *)log;
|
||||
}
|
||||
|
||||
/* maximum amount to put in a stored block before starting a new one */
|
||||
#define MAX_BLOCK 16384
|
||||
|
||||
/* write a block to a log object */
|
||||
int gzlog_write(void *obj, char *data, size_t len)
|
||||
{
|
||||
size_t some;
|
||||
unsigned char temp[5];
|
||||
gz_log *log;
|
||||
|
||||
/* check object */
|
||||
log = (gz_log *)obj;
|
||||
if (log == NULL || log->id != GZLOGID)
|
||||
return 1;
|
||||
|
||||
/* write stored blocks until all of the input is written */
|
||||
do {
|
||||
some = MAX_BLOCK - log->stored;
|
||||
if (some > len)
|
||||
some = len;
|
||||
if (write(log->fd, data, some) != some)
|
||||
return 1;
|
||||
log->crc = crc32(log->crc, data, some);
|
||||
log->len += some;
|
||||
len -= some;
|
||||
data += some;
|
||||
log->stored += some;
|
||||
|
||||
/* if the stored block is full, end it and start another */
|
||||
if (log->stored == MAX_BLOCK) {
|
||||
(void)lseek(log->fd, log->last_off, SEEK_SET);
|
||||
temp[0] = 0;
|
||||
dice_ulg(log->stored + ((unsigned long)(~log->stored) << 16),
|
||||
temp + 1);
|
||||
if (write(log->fd, temp, 5) != 5)
|
||||
return 1;
|
||||
log->last_off = lseek(log->fd, log->stored, SEEK_CUR);
|
||||
(void)lseek(log->fd, 5, SEEK_CUR);
|
||||
log->stored = 0;
|
||||
}
|
||||
} while (len);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* recompress the remaining stored deflate data in place */
|
||||
local int recomp(gz_log *log)
|
||||
{
|
||||
z_stream strm;
|
||||
size_t len, max;
|
||||
unsigned char *in;
|
||||
unsigned char *out;
|
||||
unsigned char temp[16];
|
||||
|
||||
/* allocate space and read it all in (it's around 1 MB) */
|
||||
len = log->last_off - log->mark_off;
|
||||
max = len + (len >> 12) + (len >> 14) + 11;
|
||||
out = malloc(max);
|
||||
if (out == NULL)
|
||||
return 1;
|
||||
in = malloc(len);
|
||||
if (in == NULL) {
|
||||
free(out);
|
||||
return 1;
|
||||
}
|
||||
(void)lseek(log->fd, log->mark_off, SEEK_SET);
|
||||
if (read(log->fd, in, len) != len) {
|
||||
free(in);
|
||||
free(out);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* recompress in memory, decoding stored data as we go */
|
||||
/* note: this assumes that unsigned is four bytes or more */
|
||||
/* consider not making that assumption */
|
||||
strm.zalloc = Z_NULL;
|
||||
strm.zfree = Z_NULL;
|
||||
strm.opaque = Z_NULL;
|
||||
if (deflateInit2(&strm, Z_BEST_COMPRESSION, Z_DEFLATED, -15, 8,
|
||||
Z_DEFAULT_STRATEGY) != Z_OK) {
|
||||
free(in);
|
||||
free(out);
|
||||
return 1;
|
||||
}
|
||||
strm.next_in = in;
|
||||
strm.avail_out = max;
|
||||
strm.next_out = out;
|
||||
while (len >= 5) {
|
||||
if (strm.next_in[0] != 0)
|
||||
break;
|
||||
strm.avail_in = strm.next_in[1] + (strm.next_in[2] << 8);
|
||||
strm.next_in += 5;
|
||||
len -= 5;
|
||||
if (strm.avail_in != 0) {
|
||||
if (len < strm.avail_in)
|
||||
break;
|
||||
len -= strm.avail_in;
|
||||
(void)deflate(&strm, Z_NO_FLUSH);
|
||||
if (strm.avail_in != 0 || strm.avail_out == 0)
|
||||
break;
|
||||
}
|
||||
}
|
||||
(void)deflate(&strm, Z_SYNC_FLUSH);
|
||||
(void)deflateEnd(&strm);
|
||||
free(in);
|
||||
if (len != 0 || strm.avail_out == 0) {
|
||||
free(out);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* overwrite stored data with compressed data */
|
||||
(void)lseek(log->fd, log->mark_off, SEEK_SET);
|
||||
len = max - strm.avail_out;
|
||||
if (write(log->fd, out, len) != len) {
|
||||
free(out);
|
||||
return 1;
|
||||
}
|
||||
free(out);
|
||||
|
||||
/* write last empty block, crc, and length */
|
||||
log->mark_off = log->last_off = lseek(log->fd, 0, SEEK_CUR);
|
||||
temp[0] = 1;
|
||||
dice_ulg(0xffffL << 16, temp + 1);
|
||||
dice_ulg(log->crc, temp + 5);
|
||||
dice_ulg(log->len, temp + 9);
|
||||
if (write(log->fd, temp, 13) != 13)
|
||||
return 1;
|
||||
|
||||
/* truncate file to discard remaining stored data and old trailer */
|
||||
ftruncate(log->fd, lseek(log->fd, 0, SEEK_CUR));
|
||||
|
||||
/* update extra field to point to new last empty block */
|
||||
(void)lseek(log->fd, log->extra, SEEK_SET);
|
||||
dice_off(log->mark_off, temp);
|
||||
dice_off(log->last_off, temp + 8);
|
||||
if (write(log->fd, temp, 16) != 16)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* maximum accumulation of stored blocks before compressing */
|
||||
#define MAX_STORED 1048576
|
||||
|
||||
/* close log object */
|
||||
int gzlog_close(void *obj)
|
||||
{
|
||||
unsigned char temp[8];
|
||||
gz_log *log;
|
||||
|
||||
/* check object */
|
||||
log = (gz_log *)obj;
|
||||
if (log == NULL || log->id != GZLOGID)
|
||||
return 1;
|
||||
|
||||
/* go to start of most recent block being written */
|
||||
(void)lseek(log->fd, log->last_off, SEEK_SET);
|
||||
|
||||
/* if some stuff was put there, update block */
|
||||
if (log->stored) {
|
||||
temp[0] = 0;
|
||||
dice_ulg(log->stored + ((unsigned long)(~log->stored) << 16),
|
||||
temp + 1);
|
||||
if (write(log->fd, temp, 5) != 5)
|
||||
return 1;
|
||||
log->last_off = lseek(log->fd, log->stored, SEEK_CUR);
|
||||
}
|
||||
|
||||
/* write last block (empty) */
|
||||
if (write(log->fd, "\001\000\000\377\377", 5) != 5)
|
||||
return 1;
|
||||
|
||||
/* write updated crc and uncompressed length */
|
||||
dice_ulg(log->crc, temp);
|
||||
dice_ulg(log->len, temp + 4);
|
||||
if (write(log->fd, temp, 8) != 8)
|
||||
return 1;
|
||||
|
||||
/* put offset of that last block in gzip extra block */
|
||||
(void)lseek(log->fd, log->extra + 8, SEEK_SET);
|
||||
dice_off(log->last_off, temp);
|
||||
if (write(log->fd, temp, 8) != 8)
|
||||
return 1;
|
||||
|
||||
/* if more than 1 MB stored, then time to compress it */
|
||||
if (log->last_off - log->mark_off > MAX_STORED) {
|
||||
if (recomp(log))
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* unlock and close file */
|
||||
log_clean(log);
|
||||
return 0;
|
||||
}
|
||||
58
examples/gzlog.h
Normal file
58
examples/gzlog.h
Normal file
@@ -0,0 +1,58 @@
|
||||
/* gzlog.h
|
||||
Copyright (C) 2004 Mark Adler, all rights reserved
|
||||
version 1.0, 26 Nov 2004
|
||||
|
||||
This software is provided 'as-is', without any express or implied
|
||||
warranty. In no event will the author be held liable for any damages
|
||||
arising from the use of this software.
|
||||
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it
|
||||
freely, subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not
|
||||
claim that you wrote the original software. If you use this software
|
||||
in a product, an acknowledgment in the product documentation would be
|
||||
appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be
|
||||
misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
|
||||
Mark Adler madler@alumni.caltech.edu
|
||||
*/
|
||||
|
||||
/*
|
||||
The gzlog object allows writing short messages to a gzipped log file,
|
||||
opening the log file locked for small bursts, and then closing it. The log
|
||||
object works by appending stored data to the gzip file until 1 MB has been
|
||||
accumulated. At that time, the stored data is compressed, and replaces the
|
||||
uncompressed data in the file. The log file is truncated to its new size at
|
||||
that time. After closing, the log file is always valid gzip file that can
|
||||
decompressed to recover what was written.
|
||||
|
||||
A gzip header "extra" field contains two file offsets for appending. The
|
||||
first points to just after the last compressed data. The second points to
|
||||
the last stored block in the deflate stream, which is empty. All of the
|
||||
data between those pointers is uncompressed.
|
||||
*/
|
||||
|
||||
/* Open a gzlog object, creating the log file if it does not exist. Return
|
||||
NULL on error. Note that gzlog_open() could take a long time to return if
|
||||
there is difficulty in locking the file. */
|
||||
void *gzlog_open(char *path);
|
||||
|
||||
/* Write to a gzlog object. Return non-zero on error. This function will
|
||||
simply write data to the file uncompressed. Compression of the data
|
||||
will not occur until gzlog_close() is called. It is expected that
|
||||
gzlog_write() is used for a short message, and then gzlog_close() is
|
||||
called. If a large amount of data is to be written, then the application
|
||||
should write no more than 1 MB at a time with gzlog_write() before
|
||||
calling gzlog_close() and then gzlog_open() again. */
|
||||
int gzlog_write(void *log, char *data, size_t len);
|
||||
|
||||
/* Close a gzlog object. Return non-zero on error. The log file is locked
|
||||
until this function is called. This function will compress stored data
|
||||
at the end of the gzip file if at least 1 MB has been accumulated. Note
|
||||
that the file will not be a valid gzip file until this function completes.
|
||||
*/
|
||||
int gzlog_close(void *log);
|
||||
522
examples/zlib_how.html
Normal file
522
examples/zlib_how.html
Normal file
@@ -0,0 +1,522 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"
|
||||
"http://www.w3.org/TR/REC-html40/loose.dtd">
|
||||
<html>
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>zlib Usage Example</title>
|
||||
<!-- Copyright (c) 2004 Mark Adler. -->
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#000000" link="#0000FF" vlink="#00A000">
|
||||
<h2 align="center"> zlib Usage Example </h2>
|
||||
We often get questions about how the <tt>deflate()</tt> and <tt>inflate()</tt> functions should be used.
|
||||
Users wonder when they should provide more input, when they should use more output,
|
||||
what to do with a <tt>Z_BUF_ERROR</tt>, how to make sure the process terminates properly, and
|
||||
so on. So for those who have read <tt>zlib.h</tt> (a few times), and
|
||||
would like further edification, below is an annotated example in C of simple routines to compress and decompress
|
||||
from an input file to an output file using <tt>deflate()</tt> and <tt>inflate()</tt> respectively. The
|
||||
annotations are interspersed between lines of the code. So please read between the lines.
|
||||
We hope this helps explain some of the intricacies of <em>zlib</em>.
|
||||
<p>
|
||||
Without further adieu, here is the program <a href="zpipe.c"><tt>zpipe.c</tt></a>:
|
||||
<pre><b>
|
||||
/* zpipe.c: example of proper use of zlib's inflate() and deflate()
|
||||
Not copyrighted -- provided to the public domain
|
||||
Version 1.2 9 November 2004 Mark Adler */
|
||||
|
||||
/* Version history:
|
||||
1.0 30 Oct 2004 First version
|
||||
1.1 8 Nov 2004 Add void casting for unused return values
|
||||
Use switch statement for inflate() return values
|
||||
1.2 9 Nov 2004 Add assertions to document zlib guarantees
|
||||
*/
|
||||
</b></pre><!-- -->
|
||||
We now include the header files for the required definitions. From
|
||||
<tt>stdio.h</tt> we use <tt>fopen()</tt>, <tt>fread()</tt>, <tt>fwrite()</tt>,
|
||||
<tt>feof()</tt>, <tt>ferror()</tt>, and <tt>fclose()</tt> for file i/o, and
|
||||
<tt>fputs()</tt> for error messages. From <tt>string.h</tt> we use
|
||||
<tt>strcmp()</tt> for command line argument processing.
|
||||
From <tt>assert.h</tt> we use the <tt>assert()</tt> macro.
|
||||
From <tt>zlib.h</tt>
|
||||
we use the basic compression functions <tt>deflateInit()</tt>,
|
||||
<tt>deflate()</tt>, and <tt>deflateEnd()</tt>, and the basic decompression
|
||||
functions <tt>inflateInit()</tt>, <tt>inflate()</tt>, and
|
||||
<tt>inflateEnd()</tt>.
|
||||
<pre><b>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
#include "zlib.h"
|
||||
</b></pre><!-- -->
|
||||
<tt>CHUNK</tt> is simply the buffer size for feeding data to and pulling data
|
||||
from the <em>zlib</em> routines. Larger buffer sizes would be more efficient,
|
||||
especially for <tt>inflate()</tt>. If the memory is available, buffers sizes
|
||||
on the order of 128K or 256K bytes should be used.
|
||||
<pre><b>
|
||||
#define CHUNK 16384
|
||||
</b></pre><!-- -->
|
||||
The <tt>def()</tt> routine compresses data from an input file to an output file. The output data
|
||||
will be in the <em>zlib</em> format, which is different from the <em>gzip</em> or <em>zip</em>
|
||||
formats. The <em>zlib</em> format has a very small header of only two bytes to identify it as
|
||||
a <em>zlib</em> stream and to provide decoding information, and a four-byte trailer with a fast
|
||||
check value to verify the integrity of the uncompressed data after decoding.
|
||||
<pre><b>
|
||||
/* Compress from file source to file dest until EOF on source.
|
||||
def() returns Z_OK on success, Z_MEM_ERROR if memory could not be
|
||||
allocated for processing, Z_STREAM_ERROR if an invalid compression
|
||||
level is supplied, Z_VERSION_ERROR if the version of zlib.h and the
|
||||
version of the library linked do not match, or Z_ERRNO if there is
|
||||
an error reading or writing the files. */
|
||||
int def(FILE *source, FILE *dest, int level)
|
||||
{
|
||||
</b></pre>
|
||||
Here are the local variables for <tt>def()</tt>. <tt>ret</tt> will be used for <em>zlib</em>
|
||||
return codes. <tt>flush</tt> will keep track of the current flushing state for <tt>deflate()</tt>,
|
||||
which is either no flushing, or flush to completion after the end of the input file is reached.
|
||||
<tt>have</tt> is the amount of data returned from <tt>deflate()</tt>. The <tt>strm</tt> structure
|
||||
is used to pass information to and from the <em>zlib</em> routines, and to maintain the
|
||||
<tt>deflate()</tt> state. <tt>in</tt> and <tt>out</tt> are the input and output buffers for
|
||||
<tt>deflate()</tt>.
|
||||
<pre><b>
|
||||
int ret, flush;
|
||||
unsigned have;
|
||||
z_stream strm;
|
||||
char in[CHUNK];
|
||||
char out[CHUNK];
|
||||
</b></pre><!-- -->
|
||||
The first thing we do is to initialize the <em>zlib</em> state for compression using
|
||||
<tt>deflateInit()</tt>. This must be done before the first use of <tt>deflate()</tt>.
|
||||
The <tt>zalloc</tt>, <tt>zfree</tt>, and <tt>opaque</tt> fields in the <tt>strm</tt>
|
||||
structure must be initialized before calling <tt>deflateInit()</tt>. Here they are
|
||||
set to the <em>zlib</em> constant <tt>Z_NULL</tt> to request that <em>zlib</em> use
|
||||
the default memory allocation routines. An application may also choose to provide
|
||||
custom memory allocation routines here. <tt>deflateInit()</tt> will allocate on the
|
||||
order of 256K bytes for the internal state.
|
||||
(See <a href="zlib_tech.html"><em>zlib Technical Details</em></a>.)
|
||||
<p>
|
||||
<tt>deflateInit()</tt> is called with a pointer to the structure to be initialized and
|
||||
the compression level, which is an integer in the range of -1 to 9. Lower compression
|
||||
levels result in faster execution, but less compression. Higher levels result in
|
||||
greater compression, but slower execution. The <em>zlib</em> constant Z_DEFAULT_COMPRESSION,
|
||||
equal to -1,
|
||||
provides a good compromise between compression and speed and is equivalent to level 6.
|
||||
Level 0 actually does no compression at all, and in fact expands the data slightly to produce
|
||||
the <em>zlib</em> format (it is not a byte-for-byte copy of the input).
|
||||
More advanced applications of <em>zlib</em>
|
||||
may use <tt>deflateInit2()</tt> here instead. Such an application may want to reduce how
|
||||
much memory will be used, at some price in compression. Or it may need to request a
|
||||
<em>gzip</em> header and trailer instead of a <em>zlib</em> header and trailer, or raw
|
||||
encoding with no header or trailer at all.
|
||||
<p>
|
||||
We must check the return value of <tt>deflateInit()</tt> against the <em>zlib</em> constant
|
||||
<tt>Z_OK</tt> to make sure that it was able to
|
||||
allocate memory for the internal state, and that the provided arguments were valid.
|
||||
<tt>deflateInit()</tt> will also check that the version of <em>zlib</em> that the <tt>zlib.h</tt>
|
||||
file came from matches the version of <em>zlib</em> actually linked with the program. This
|
||||
is especially important for environments in which <em>zlib</em> is a shared library.
|
||||
<p>
|
||||
Note that an application can initialize multiple, independent <em>zlib</em> streams, which can
|
||||
operate in parallel. The state information maintained in the structure allows the <em>zlib</em>
|
||||
routines to be reentrant.
|
||||
<pre><b>
|
||||
/* allocate deflate state */
|
||||
strm.zalloc = Z_NULL;
|
||||
strm.zfree = Z_NULL;
|
||||
strm.opaque = Z_NULL;
|
||||
ret = deflateInit(&strm, level);
|
||||
if (ret != Z_OK)
|
||||
return ret;
|
||||
</b></pre><!-- -->
|
||||
With the pleasantries out of the way, now we can get down to business. The outer <tt>do</tt>-loop
|
||||
reads all of the input file and exits at the bottom of the loop once end-of-file is reached.
|
||||
This loop contains the only call of <tt>deflate()</tt>. So we must make sure that all of the
|
||||
input data has been processed and that all of the output data has been generated and consumed
|
||||
before we fall out of the loop at the bottom.
|
||||
<pre><b>
|
||||
/* compress until end of file */
|
||||
do {
|
||||
</b></pre>
|
||||
We start off by reading data from the input file. The number of bytes read is put directly
|
||||
into <tt>avail_in</tt>, and a pointer to those bytes is put into <tt>next_in</tt>. We also
|
||||
check to see if end-of-file on the input has been reached. If we are at the end of file, then <tt>flush</tt> is set to the
|
||||
<em>zlib</em> constant <tt>Z_FINISH</tt>, which is later passed to <tt>deflate()</tt> to
|
||||
indicate that this is the last chunk of input data to compress. We need to use <tt>feof()</tt>
|
||||
to check for end-of-file as opposed to seeing if fewer than <tt>CHUNK</tt> bytes have been read. The
|
||||
reason is that if the input file length is an exact multiple of <tt>CHUNK</tt>, we will miss
|
||||
the fact that we got to the end-of-file, and not know to tell <tt>deflate()</tt> to finish
|
||||
up the compressed stream. If we are not yet at the end of the input, then the <em>zlib</em>
|
||||
constant <tt>Z_NO_FLUSH</tt> will be passed to <tt>deflate</tt> to indicate that we are still
|
||||
in the middle of the uncompressed data.
|
||||
<p>
|
||||
If there is an error in reading from the input file, the process is aborted with
|
||||
<tt>deflateEnd()</tt> being called to free the allocated <em>zlib</em> state before returning
|
||||
the error. We wouldn't want a memory leak, now would we? <tt>deflateEnd()</tt> can be called
|
||||
at any time after the state has been initialized. Once that's done, <tt>deflateInit()</tt> (or
|
||||
<tt>deflateInit2()</tt>) would have to be called to start a new compression process. There is
|
||||
no point here in checking the <tt>deflateEnd()</tt> return code. The deallocation can't fail.
|
||||
<pre><b>
|
||||
strm.avail_in = fread(in, 1, CHUNK, source);
|
||||
if (ferror(source)) {
|
||||
(void)deflateEnd(&strm);
|
||||
return Z_ERRNO;
|
||||
}
|
||||
flush = feof(source) ? Z_FINISH : Z_NO_FLUSH;
|
||||
strm.next_in = in;
|
||||
</b></pre><!-- -->
|
||||
The inner <tt>do</tt>-loop passes our chunk of input data to <tt>deflate()</tt>, and then
|
||||
keeps calling <tt>deflate()</tt> until it is done producing output. Once there is no more
|
||||
new output, <tt>deflate()</tt> is guaranteed to have consumed all of the input, i.e.,
|
||||
<tt>avail_in</tt> will be zero.
|
||||
<pre><b>
|
||||
/* run deflate() on input until output buffer not full, finish
|
||||
compression if all of source has been read in */
|
||||
do {
|
||||
</b></pre>
|
||||
Output space is provided to <tt>deflate()</tt> by setting <tt>avail_out</tt> to the number
|
||||
of available output bytes and <tt>next_out</tt> to a pointer to that space.
|
||||
<pre><b>
|
||||
strm.avail_out = CHUNK;
|
||||
strm.next_out = out;
|
||||
</b></pre>
|
||||
Now we call the compression engine itself, <tt>deflate()</tt>. It takes as many of the
|
||||
<tt>avail_in</tt> bytes at <tt>next_in</tt> as it can process, and writes as many as
|
||||
<tt>avail_out</tt> bytes to <tt>next_out</tt>. Those counters and pointers are then
|
||||
updated past the input data consumed and the output data written. It is the amount of
|
||||
output space available that may limit how much input is consumed.
|
||||
Hence the inner loop to make sure that
|
||||
all of the input is consumed by providing more output space each time. Since <tt>avail_in</tt>
|
||||
and <tt>next_in</tt> are updated by <tt>deflate()</tt>, we don't have to mess with those
|
||||
between <tt>deflate()</tt> calls until it's all used up.
|
||||
<p>
|
||||
The parameters to <tt>deflate()</tt> are a pointer to the <tt>strm</tt> structure containing
|
||||
the input and output information and the internal compression engine state, and a parameter
|
||||
indicating whether and how to flush data to the output. Normally <tt>deflate</tt> will consume
|
||||
several K bytes of input data before producing any output (except for the header), in order
|
||||
to accumulate statistics on the data for optimum compression. It will then put out a burst of
|
||||
compressed data, and proceed to consume more input before the next burst. Eventually,
|
||||
<tt>deflate()</tt>
|
||||
must be told to terminate the stream, complete the compression with provided input data, and
|
||||
write out the trailer check value. <tt>deflate()</tt> will continue to compress normally as long
|
||||
as the flush parameter is <tt>Z_NO_FLUSH</tt>. Once the <tt>Z_FINISH</tt> parameter is provided,
|
||||
<tt>deflate()</tt> will begin to complete the compressed output stream. However depending on how
|
||||
much output space is provided, <tt>deflate()</tt> may have to be called several times until it
|
||||
has provided the complete compressed stream, even after it has consumed all of the input. The flush
|
||||
parameter must continue to be <tt>Z_FINISH</tt> for those subsequent calls.
|
||||
<p>
|
||||
There are other values of the flush parameter that are used in more advanced applications. You can
|
||||
force <tt>deflate()</tt> to produce a burst of output that encodes all of the input data provided
|
||||
so far, even if it wouldn't have otherwise, for example to control data latency on a link with
|
||||
compressed data. You can also ask that <tt>deflate()</tt> do that as well as erase any history up to
|
||||
that point so that what follows can be decompressed independently, for example for random access
|
||||
applications. Both requests will degrade compression by an amount depending on how often such
|
||||
requests are made.
|
||||
<p>
|
||||
<tt>deflate()</tt> has a return value that can indicate errors, yet we do not check it here. Why
|
||||
not? Well, it turns out that <tt>deflate()</tt> can do no wrong here. Let's go through
|
||||
<tt>deflate()</tt>'s return values and dispense with them one by one. The possible values are
|
||||
<tt>Z_OK</tt>, <tt>Z_STREAM_END</tt>, <tt>Z_STREAM_ERROR</tt>, or <tt>Z_BUF_ERROR</tt>. <tt>Z_OK</tt>
|
||||
is, well, ok. <tt>Z_STREAM_END</tt> is also ok and will be returned for the last call of
|
||||
<tt>deflate()</tt>. This is already guaranteed by calling <tt>deflate()</tt> with <tt>Z_FINISH</tt>
|
||||
until it has no more output. <tt>Z_STREAM_ERROR</tt> is only possible if the stream is not
|
||||
initialized properly, but we did initialize it properly. There is no harm in checking for
|
||||
<tt>Z_STREAM_ERROR</tt> here, for example to check for the possibility that some
|
||||
other part of the application inadvertently clobbered the memory containing the <em>zlib</em> state.
|
||||
<tt>Z_BUF_ERROR</tt> will be explained further below, but
|
||||
suffice it to say that this is simply an indication that <tt>deflate()</tt> could not consume
|
||||
more input or produce more output. <tt>deflate()</tt> can be called again with more output space
|
||||
or more available input, which it will be in this code.
|
||||
<pre><b>
|
||||
ret = deflate(&strm, flush); /* no bad return value */
|
||||
assert(ret != Z_STREAM_ERROR); /* state not clobbered */
|
||||
</b></pre>
|
||||
Now we compute how much output <tt>deflate()</tt> provided on the last call, which is the
|
||||
difference between how much space was provided before the call, and how much output space
|
||||
is still available after the call. Then that data, if any, is written to the output file.
|
||||
We can then reuse the output buffer for the next call of <tt>deflate()</tt>. Again if there
|
||||
is a file i/o error, we call <tt>deflateEnd()</tt> before returning to avoid a memory leak.
|
||||
<pre><b>
|
||||
have = CHUNK - strm.avail_out;
|
||||
if (fwrite(out, 1, have, dest) != have || ferror(dest)) {
|
||||
(void)deflateEnd(&strm);
|
||||
return Z_ERRNO;
|
||||
}
|
||||
</b></pre>
|
||||
The inner <tt>do</tt>-loop is repeated until the last <tt>deflate()</tt> call fails to fill the
|
||||
provided output buffer. Then we know that <tt>deflate()</tt> has done as much as it can with
|
||||
the provided input, and that all of that input has been consumed. We can then fall out of this
|
||||
loop and reuse the input buffer.
|
||||
<p>
|
||||
The way we tell that <tt>deflate()</tt> has no more output is by seeing that it did not fill
|
||||
the output buffer, leaving <tt>avail_out</tt> greater than zero. However suppose that
|
||||
<tt>deflate()</tt> has no more output, but just so happened to exactly fill the output buffer!
|
||||
<tt>avail_out</tt> is zero, and we can't tell that <tt>deflate()</tt> has done all it can.
|
||||
As far as we know, <tt>deflate()</tt>
|
||||
has more output for us. So we call it again. But now <tt>deflate()</tt> produces no output
|
||||
at all, and <tt>avail_out</tt> remains unchanged as <tt>CHUNK</tt>. That <tt>deflate()</tt> call
|
||||
wasn't able to do anything, either consume input or produce output, and so it returns
|
||||
<tt>Z_BUF_ERROR</tt>. (See, I told you I'd cover this later.) However this is not a problem at
|
||||
all. Now we finally have the desired indication that <tt>deflate()</tt> is really done,
|
||||
and so we drop out of the inner loop to provide more input to <tt>deflate()</tt>.
|
||||
<p>
|
||||
With <tt>flush</tt> set to <tt>Z_FINISH</tt>, this final set of <tt>deflate()</tt> calls will
|
||||
complete the output stream. Once that is done, subsequent calls of <tt>deflate()</tt> would return
|
||||
<tt>Z_STREAM_ERROR</tt> if the flush parameter is not <tt>Z_FINISH</tt>, and do no more processing
|
||||
until the state is reinitialized.
|
||||
<p>
|
||||
Some applications of <em>zlib</em> have two loops that call <tt>deflate()</tt>
|
||||
instead of the single inner loop we have here. The first loop would call
|
||||
without flushing and feed all of the data to <tt>deflate()</tt>. The second loop would call
|
||||
<tt>deflate()</tt> with no more
|
||||
data and the <tt>Z_FINISH</tt> parameter to complete the process. As you can see from this
|
||||
example, that can be avoided by simply keeping track of the current flush state.
|
||||
<pre><b>
|
||||
} while (strm.avail_out == 0);
|
||||
assert(strm.avail_in == 0); /* all input will be used */
|
||||
</b></pre><!-- -->
|
||||
Now we check to see if we have already processed all of the input file. That information was
|
||||
saved in the <tt>flush</tt> variable, so we see if that was set to <tt>Z_FINISH</tt>. If so,
|
||||
then we're done and we fall out of the outer loop. We're guaranteed to get <tt>Z_STREAM_END</tt>
|
||||
from the last <tt>deflate()</tt> call, since we ran it until the last chunk of input was
|
||||
consumed and all of the output was generated.
|
||||
<pre><b>
|
||||
/* done when last data in file processed */
|
||||
} while (flush != Z_FINISH);
|
||||
assert(ret == Z_STREAM_END); /* stream will be complete */
|
||||
</b></pre><!-- -->
|
||||
The process is complete, but we still need to deallocate the state to avoid a memory leak
|
||||
(or rather more like a memory hemorrhage if you didn't do this). Then
|
||||
finally we can return with a happy return value.
|
||||
<pre><b>
|
||||
/* clean up and return */
|
||||
(void)deflateEnd(&strm);
|
||||
return Z_OK;
|
||||
}
|
||||
</b></pre><!-- -->
|
||||
Now we do the same thing for decompression in the <tt>inf()</tt> routine. <tt>inf()</tt>
|
||||
decompresses what is hopefully a valid <em>zlib</em> stream from the input file and writes the
|
||||
uncompressed data to the output file. Much of the discussion above for <tt>def()</tt>
|
||||
applies to <tt>inf()</tt> as well, so the discussion here will focus on the differences between
|
||||
the two.
|
||||
<pre><b>
|
||||
/* Decompress from file source to file dest until stream ends or EOF.
|
||||
inf() returns Z_OK on success, Z_MEM_ERROR if memory could not be
|
||||
allocated for processing, Z_DATA_ERROR if the deflate data is
|
||||
invalid or incomplete, Z_VERSION_ERROR if the version of zlib.h and
|
||||
the version of the library linked do not match, or Z_ERRNO if there
|
||||
is an error reading or writing the files. */
|
||||
int inf(FILE *source, FILE *dest)
|
||||
{
|
||||
</b></pre>
|
||||
The local variables have the same functionality as they do for <tt>def()</tt>. The
|
||||
only difference is that there is no <tt>flush</tt> variable, since <tt>inflate()</tt>
|
||||
can tell from the <em>zlib</em> stream itself when the stream is complete.
|
||||
<pre><b>
|
||||
int ret;
|
||||
unsigned have;
|
||||
z_stream strm;
|
||||
char in[CHUNK];
|
||||
char out[CHUNK];
|
||||
</b></pre><!-- -->
|
||||
The initialization of the state is the same, except that there is no compression level,
|
||||
of course, and two more elements of the structure are initialized. <tt>avail_in</tt>
|
||||
and <tt>next_in</tt> must be initialized before calling <tt>inflateInit()</tt>. This
|
||||
is because the application has the option to provide the start of the zlib stream in
|
||||
order for <tt>inflateInit()</tt> to have access to information about the compression
|
||||
method to aid in memory allocation. In the current implementation of <em>zlib</em>
|
||||
(up through versions 1.2.x), the method-dependent memory allocations are deferred to the first call of
|
||||
<tt>inflate()</tt> anyway. However those fields must be initialized since later versions
|
||||
of <em>zlib</em> that provide more compression methods may take advantage of this interface.
|
||||
In any case, no decompression is performed by <tt>inflateInit()</tt>, so the
|
||||
<tt>avail_out</tt> and <tt>next_out</tt> fields do not need to be initialized before calling.
|
||||
<p>
|
||||
Here <tt>avail_in</tt> is set to zero and <tt>next_in</tt> is set to <tt>Z_NULL</tt> to
|
||||
indicate that no input data is being provided.
|
||||
<pre><b>
|
||||
/* allocate inflate state */
|
||||
strm.zalloc = Z_NULL;
|
||||
strm.zfree = Z_NULL;
|
||||
strm.opaque = Z_NULL;
|
||||
strm.avail_in = 0;
|
||||
strm.next_in = Z_NULL;
|
||||
ret = inflateInit(&strm);
|
||||
if (ret != Z_OK)
|
||||
return ret;
|
||||
</b></pre><!-- -->
|
||||
The outer <tt>do</tt>-loop decompresses input until <tt>inflate()</tt> indicates
|
||||
that it has reached the end of the compressed data and has produced all of the uncompressed
|
||||
output. This is in contrast to <tt>def()</tt> which processes all of the input file.
|
||||
If end-of-file is reached before the compressed data self-terminates, then the compressed
|
||||
data is incomplete and an error is returned.
|
||||
<pre><b>
|
||||
/* decompress until deflate stream ends or end of file */
|
||||
do {
|
||||
</b></pre>
|
||||
We read input data and set the <tt>strm</tt> structure accordingly. If we've reached the
|
||||
end of the input file, then we leave the outer loop and report an error, since the
|
||||
compressed data is incomplete. Note that we may read more data than is eventually consumed
|
||||
by <tt>inflate()</tt>, if the input file continues past the <em>zlib</em> stream.
|
||||
For applications where <em>zlib</em> streams are embedded in other data, this routine would
|
||||
need to be modified to return the unused data, or at least indicate how much of the input
|
||||
data was not used, so the application would know where to pick up after the <em>zlib</em> stream.
|
||||
<pre><b>
|
||||
strm.avail_in = fread(in, 1, CHUNK, source);
|
||||
if (ferror(source)) {
|
||||
(void)inflateEnd(&strm);
|
||||
return Z_ERRNO;
|
||||
}
|
||||
if (strm.avail_in == 0)
|
||||
break;
|
||||
strm.next_in = in;
|
||||
</b></pre><!-- -->
|
||||
The inner <tt>do</tt>-loop has the same function it did in <tt>def()</tt>, which is to
|
||||
keep calling <tt>inflate()</tt> until has generated all of the output it can with the
|
||||
provided input.
|
||||
<pre><b>
|
||||
/* run inflate() on input until output buffer not full */
|
||||
do {
|
||||
</b></pre>
|
||||
Just like in <tt>def()</tt>, the same output space is provided for each call of <tt>inflate()</tt>.
|
||||
<pre><b>
|
||||
strm.avail_out = CHUNK;
|
||||
strm.next_out = out;
|
||||
</b></pre>
|
||||
Now we run the decompression engine itself. There is no need to adjust the flush parameter, since
|
||||
the <em>zlib</em> format is self-terminating. The main difference here is that there are
|
||||
return values that we need to pay attention to. <tt>Z_DATA_ERROR</tt>
|
||||
indicates that <tt>inflate()</tt> detected an error in the <em>zlib</em> compressed data format,
|
||||
which means that either the data is not a <em>zlib</em> stream to begin with, or that the data was
|
||||
corrupted somewhere along the way since it was compressed. The other error to be processed is
|
||||
<tt>Z_MEM_ERROR</tt>, which can occur since memory allocation is deferred until <tt>inflate()</tt>
|
||||
needs it, unlike <tt>deflate()</tt>, whose memory is allocated at the start by <tt>deflateInit()</tt>.
|
||||
<p>
|
||||
Advanced applications may use
|
||||
<tt>deflateSetDictionary()</tt> to prime <tt>deflate()</tt> with a set of likely data to improve the
|
||||
first 32K or so of compression. This is noted in the <em>zlib</em> header, so <tt>inflate()</tt>
|
||||
requests that that dictionary be provided before it can start to decompress. Without the dictionary,
|
||||
correct decompression is not possible. For this routine, we have no idea what the dictionary is,
|
||||
so the <tt>Z_NEED_DICT</tt> indication is converted to a <tt>Z_DATA_ERROR</tt>.
|
||||
<p>
|
||||
<tt>inflate()</tt> can also return <tt>Z_STREAM_ERROR</tt>, which should not be possible here,
|
||||
but could be checked for as noted above for <tt>def()</tt>. <tt>Z_BUF_ERROR</tt> does not need to be
|
||||
checked for here, for the same reasons noted for <tt>def()</tt>. <tt>Z_STREAM_END</tt> will be
|
||||
checked for later.
|
||||
<pre><b>
|
||||
ret = inflate(&strm, Z_NO_FLUSH);
|
||||
assert(ret != Z_STREAM_ERROR); /* state not clobbered */
|
||||
switch (ret) {
|
||||
case Z_NEED_DICT:
|
||||
ret = Z_DATA_ERROR; /* and fall through */
|
||||
case Z_DATA_ERROR:
|
||||
case Z_MEM_ERROR:
|
||||
(void)inflateEnd(&strm);
|
||||
return ret;
|
||||
}
|
||||
</b></pre>
|
||||
The output of <tt>inflate()</tt> is handled identically to that of <tt>deflate()</tt>.
|
||||
<pre><b>
|
||||
have = CHUNK - strm.avail_out;
|
||||
if (fwrite(out, 1, have, dest) != have || ferror(dest)) {
|
||||
(void)inflateEnd(&strm);
|
||||
return Z_ERRNO;
|
||||
}
|
||||
</b></pre>
|
||||
The inner <tt>do</tt>-loop ends when <tt>inflate()</tt> has no more output as indicated
|
||||
by not filling the output buffer, just as for <tt>deflate()</tt>.
|
||||
<pre><b>
|
||||
} while (strm.avail_out == 0);
|
||||
assert(strm.avail_in == 0); /* all input will be used */
|
||||
</b></pre><!-- -->
|
||||
The outer <tt>do</tt>-loop ends when <tt>inflate()</tt> reports that it has reached the
|
||||
end of the input <em>zlib</em> stream, has completed the decompression and integrity
|
||||
check, and has provided all of the output. This is indicated by the <tt>inflate()</tt>
|
||||
return value <tt>Z_STREAM_END</tt>. The inner loop is guaranteed to leave <tt>ret</tt>
|
||||
equal to <tt>Z_STREAM_END</tt> if the last chunk of the input file read contained the end
|
||||
of the <em>zlib</em> stream. So if the return value is not <tt>Z_STREAM_END</tt>, the
|
||||
loop continues to read more input.
|
||||
<pre><b>
|
||||
/* done when inflate() says it's done */
|
||||
} while (ret != Z_STREAM_END);
|
||||
</b></pre><!-- -->
|
||||
At this point, decompression successfully completed, or we broke out of the loop due to no
|
||||
more data being available from the input file. If the last <tt>inflate()</tt> return value
|
||||
is not <tt>Z_STREAM_END</tt>, then the <em>zlib</em> stream was incomplete and a data error
|
||||
is returned. Otherwise, we return with a happy return value. Of course, <tt>inflateEnd()</tt>
|
||||
is called first to avoid a memory leak.
|
||||
<pre><b>
|
||||
/* clean up and return */
|
||||
(void)inflateEnd(&strm);
|
||||
return ret == Z_STREAM_END ? Z_OK : Z_DATA_ERROR;
|
||||
}
|
||||
</b></pre><!-- -->
|
||||
That ends the routines that directly use <em>zlib</em>. The following routines make this
|
||||
a command-line program by running data through the above routines from <tt>stdin</tt> to
|
||||
<tt>stdout</tt>, and handling any errors reported by <tt>def()</tt> or <tt>inf()</tt>.
|
||||
<p>
|
||||
<tt>zerr()</tt> is used to interpret the possible error codes from <tt>def()</tt>
|
||||
and <tt>inf()</tt>, as detailed in their comments above, and print out an error message.
|
||||
Note that these are only a subset of the possible return values from <tt>deflate()</tt>
|
||||
and <tt>inflate()</tt>.
|
||||
<pre><b>
|
||||
/* report a zlib or i/o error */
|
||||
void zerr(int ret)
|
||||
{
|
||||
fputs("zpipe: ", stderr);
|
||||
switch (ret) {
|
||||
case Z_ERRNO:
|
||||
if (ferror(stdin))
|
||||
fputs("error reading stdin\n", stderr);
|
||||
if (ferror(stdout))
|
||||
fputs("error writing stdout\n", stderr);
|
||||
break;
|
||||
case Z_STREAM_ERROR:
|
||||
fputs("invalid compression level\n", stderr);
|
||||
break;
|
||||
case Z_DATA_ERROR:
|
||||
fputs("invalid or incomplete deflate data\n", stderr);
|
||||
break;
|
||||
case Z_MEM_ERROR:
|
||||
fputs("out of memory\n", stderr);
|
||||
break;
|
||||
case Z_VERSION_ERROR:
|
||||
fputs("zlib version mismatch!\n", stderr);
|
||||
}
|
||||
}
|
||||
</b></pre><!-- -->
|
||||
Here is the <tt>main()</tt> routine used to test <tt>def()</tt> and <tt>inf()</tt>. The
|
||||
<tt>zpipe</tt> command is simply a compression pipe from <tt>stdin</tt> to <tt>stdout</tt>, if
|
||||
no arguments are given, or it is a decompression pipe if <tt>zpipe -d</tt> is used. If any other
|
||||
arguments are provided, no compression or decompression is performed. Instead a usage
|
||||
message is displayed. Examples are <tt>zpipe < foo.txt > foo.txt.z</tt> to compress, and
|
||||
<tt>zpipe -d < foo.txt.z > foo.txt</tt> to decompress.
|
||||
<pre><b>
|
||||
/* compress or decompress from stdin to stdout */
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
int ret;
|
||||
|
||||
/* do compression if no arguments */
|
||||
if (argc == 1) {
|
||||
ret = def(stdin, stdout, Z_DEFAULT_COMPRESSION);
|
||||
if (ret != Z_OK)
|
||||
zerr(ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* do decompression if -d specified */
|
||||
else if (argc == 2 && strcmp(argv[1], "-d") == 0) {
|
||||
ret = inf(stdin, stdout);
|
||||
if (ret != Z_OK)
|
||||
zerr(ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* otherwise, report usage */
|
||||
else {
|
||||
fputs("zpipe usage: zpipe [-d] < source > dest\n", stderr);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
</b></pre>
|
||||
<hr>
|
||||
<i>Copyright (c) 2004 by Mark Adler<br>Last modified 13 November 2004</i>
|
||||
</body>
|
||||
</html>
|
||||
191
examples/zpipe.c
Normal file
191
examples/zpipe.c
Normal file
@@ -0,0 +1,191 @@
|
||||
/* zpipe.c: example of proper use of zlib's inflate() and deflate()
|
||||
Not copyrighted -- provided to the public domain
|
||||
Version 1.2 9 November 2004 Mark Adler */
|
||||
|
||||
/* Version history:
|
||||
1.0 30 Oct 2004 First version
|
||||
1.1 8 Nov 2004 Add void casting for unused return values
|
||||
Use switch statement for inflate() return values
|
||||
1.2 9 Nov 2004 Add assertions to document zlib guarantees
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
#include "zlib.h"
|
||||
|
||||
#define CHUNK 16384
|
||||
|
||||
/* Compress from file source to file dest until EOF on source.
|
||||
def() returns Z_OK on success, Z_MEM_ERROR if memory could not be
|
||||
allocated for processing, Z_STREAM_ERROR if an invalid compression
|
||||
level is supplied, Z_VERSION_ERROR if the version of zlib.h and the
|
||||
version of the library linked do not match, or Z_ERRNO if there is
|
||||
an error reading or writing the files. */
|
||||
int def(FILE *source, FILE *dest, int level)
|
||||
{
|
||||
int ret, flush;
|
||||
unsigned have;
|
||||
z_stream strm;
|
||||
char in[CHUNK];
|
||||
char out[CHUNK];
|
||||
|
||||
/* allocate deflate state */
|
||||
strm.zalloc = Z_NULL;
|
||||
strm.zfree = Z_NULL;
|
||||
strm.opaque = Z_NULL;
|
||||
ret = deflateInit(&strm, level);
|
||||
if (ret != Z_OK)
|
||||
return ret;
|
||||
|
||||
/* compress until end of file */
|
||||
do {
|
||||
strm.avail_in = fread(in, 1, CHUNK, source);
|
||||
if (ferror(source)) {
|
||||
(void)deflateEnd(&strm);
|
||||
return Z_ERRNO;
|
||||
}
|
||||
flush = feof(source) ? Z_FINISH : Z_NO_FLUSH;
|
||||
strm.next_in = in;
|
||||
|
||||
/* run deflate() on input until output buffer not full, finish
|
||||
compression if all of source has been read in */
|
||||
do {
|
||||
strm.avail_out = CHUNK;
|
||||
strm.next_out = out;
|
||||
ret = deflate(&strm, flush); /* no bad return value */
|
||||
assert(ret != Z_STREAM_ERROR); /* state not clobbered */
|
||||
have = CHUNK - strm.avail_out;
|
||||
if (fwrite(out, 1, have, dest) != have || ferror(dest)) {
|
||||
(void)deflateEnd(&strm);
|
||||
return Z_ERRNO;
|
||||
}
|
||||
} while (strm.avail_out == 0);
|
||||
assert(strm.avail_in == 0); /* all input will be used */
|
||||
|
||||
/* done when last data in file processed */
|
||||
} while (flush != Z_FINISH);
|
||||
assert(ret == Z_STREAM_END); /* stream will be complete */
|
||||
|
||||
/* clean up and return */
|
||||
(void)deflateEnd(&strm);
|
||||
return Z_OK;
|
||||
}
|
||||
|
||||
/* Decompress from file source to file dest until stream ends or EOF.
|
||||
inf() returns Z_OK on success, Z_MEM_ERROR if memory could not be
|
||||
allocated for processing, Z_DATA_ERROR if the deflate data is
|
||||
invalid or incomplete, Z_VERSION_ERROR if the version of zlib.h and
|
||||
the version of the library linked do not match, or Z_ERRNO if there
|
||||
is an error reading or writing the files. */
|
||||
int inf(FILE *source, FILE *dest)
|
||||
{
|
||||
int ret;
|
||||
unsigned have;
|
||||
z_stream strm;
|
||||
char in[CHUNK];
|
||||
char out[CHUNK];
|
||||
|
||||
/* allocate inflate state */
|
||||
strm.zalloc = Z_NULL;
|
||||
strm.zfree = Z_NULL;
|
||||
strm.opaque = Z_NULL;
|
||||
strm.avail_in = 0;
|
||||
strm.next_in = Z_NULL;
|
||||
ret = inflateInit(&strm);
|
||||
if (ret != Z_OK)
|
||||
return ret;
|
||||
|
||||
/* decompress until deflate stream ends or end of file */
|
||||
do {
|
||||
strm.avail_in = fread(in, 1, CHUNK, source);
|
||||
if (ferror(source)) {
|
||||
(void)inflateEnd(&strm);
|
||||
return Z_ERRNO;
|
||||
}
|
||||
if (strm.avail_in == 0)
|
||||
break;
|
||||
strm.next_in = in;
|
||||
|
||||
/* run inflate() on input until output buffer not full */
|
||||
do {
|
||||
strm.avail_out = CHUNK;
|
||||
strm.next_out = out;
|
||||
ret = inflate(&strm, Z_NO_FLUSH);
|
||||
assert(ret != Z_STREAM_ERROR); /* state not clobbered */
|
||||
switch (ret) {
|
||||
case Z_NEED_DICT:
|
||||
ret = Z_DATA_ERROR; /* and fall through */
|
||||
case Z_DATA_ERROR:
|
||||
case Z_MEM_ERROR:
|
||||
(void)inflateEnd(&strm);
|
||||
return ret;
|
||||
}
|
||||
have = CHUNK - strm.avail_out;
|
||||
if (fwrite(out, 1, have, dest) != have || ferror(dest)) {
|
||||
(void)inflateEnd(&strm);
|
||||
return Z_ERRNO;
|
||||
}
|
||||
} while (strm.avail_out == 0);
|
||||
assert(strm.avail_in == 0); /* all input will be used */
|
||||
|
||||
/* done when inflate() says it's done */
|
||||
} while (ret != Z_STREAM_END);
|
||||
|
||||
/* clean up and return */
|
||||
(void)inflateEnd(&strm);
|
||||
return ret == Z_STREAM_END ? Z_OK : Z_DATA_ERROR;
|
||||
}
|
||||
|
||||
/* report a zlib or i/o error */
|
||||
void zerr(int ret)
|
||||
{
|
||||
fputs("zpipe: ", stderr);
|
||||
switch (ret) {
|
||||
case Z_ERRNO:
|
||||
if (ferror(stdin))
|
||||
fputs("error reading stdin\n", stderr);
|
||||
if (ferror(stdout))
|
||||
fputs("error writing stdout\n", stderr);
|
||||
break;
|
||||
case Z_STREAM_ERROR:
|
||||
fputs("invalid compression level\n", stderr);
|
||||
break;
|
||||
case Z_DATA_ERROR:
|
||||
fputs("invalid or incomplete deflate data\n", stderr);
|
||||
break;
|
||||
case Z_MEM_ERROR:
|
||||
fputs("out of memory\n", stderr);
|
||||
break;
|
||||
case Z_VERSION_ERROR:
|
||||
fputs("zlib version mismatch!\n", stderr);
|
||||
}
|
||||
}
|
||||
|
||||
/* compress or decompress from stdin to stdout */
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
int ret;
|
||||
|
||||
/* do compression if no arguments */
|
||||
if (argc == 1) {
|
||||
ret = def(stdin, stdout, Z_DEFAULT_COMPRESSION);
|
||||
if (ret != Z_OK)
|
||||
zerr(ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* do decompression if -d specified */
|
||||
else if (argc == 2 && strcmp(argv[1], "-d") == 0) {
|
||||
ret = inf(stdin, stdout);
|
||||
if (ret != Z_OK)
|
||||
zerr(ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* otherwise, report usage */
|
||||
else {
|
||||
fputs("zpipe usage: zpipe [-d] < source > dest\n", stderr);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user