/*
 * zip.c: handling zip files
 *
 * Hanhua Feng
 *
 * $Id: zip.c,v 1.14 2003/05/21 14:59:39 hanhua Exp $ 
 */
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <zlib.h>
#include "htutil.h"
#include "zip.h"

typedef unsigned int uint32;
typedef unsigned short uint16;


#pragma pack(1)

typedef struct {
    uint32 sign;         /* local file header signature: 0x04034b50 */
    uint16 extr_ver;     /* version needed to extract */
    uint16 flags;        /* general purpose flag */
    uint16 method;       /* compression method */
    uint16 time;         /* last mod file time */
    uint16 date;         /* last mod file date */
    uint32 crc32;        /* crc-32 */
    uint32 co_size;      /* compressed size */
    uint32 un_size;      /* uncompressed size */
    uint16 fn_len;       /* filename length */
    uint16 ef_len;       /* extra field length */
    /* followed by: filename; extra fields */
}
zipf_file_header;

typedef struct {
    uint32 crc32;        /* crc32 */
    uint32 co_size;      /* compressed size */
    uint32 un_size;      /* uncompressed size */
}
zipf_data_desc;

typedef struct {
    uint32 sign;         /* central file header signature: 0x02014b50 */
    uint16 made_ver;     /* version made_by */
    uint16 extr_ver;     /* version needed to extract */
    uint16 flags;        /* general purpose flag */
    uint16 method;       /* compression method */
    uint16 time;         /* last mod file time */
    uint16 date;         /* last mod file date */
    uint32 crc32;        /* crc-32 */
    uint32 co_size;      /* compressed size */
    uint32 un_size;      /* uncompressed size */
    uint16 fn_len;       /* filename length */
    uint16 ef_len;       /* extra field length */
    uint16 cm_len;       /* file comment length */
    uint16 dn_start;     /* disk number start */
    uint16 in_attr;      /* interal file attribution */
    uint32 ex_attr;      /* external file attribution */
    uint32 offset;       /* relative offset of local header(as to the disk) */
    /* followed by: filename; extra field; file comment */
}
zipf_dir_entry;

typedef struct {
    uint32 sign;         /* end of central dir signature: 0x06054b50 */
    uint16 dn_this;      /* number of this disk */
    uint16 dn_dir;       /* num. of the disk with the start of central dir */
    uint16 entry_this;   /* total num. of entries in c. dir on this disk */
    uint16 entry_num;    /* total number of entries in the central dir */
    uint32 dir_size;     /* size of the central directory */
    uint32 dir_off;      /* offset of start of c. dir(as to the 1st disk) */
    uint16 cm_len;       /* zipfile comment length */
    /* following by: zipfile comment */
}
zipf_dir_tail;

#pragma pack()

/* file is encrypted */
/* #define FILE_IS_ENCRYPTED(entry) ((entry)->flags&0x1) */

/* crc32, co_size, un_size were set to zero in the local file header */
#define FILE_DATA_DESC_ONLY(entry) ((entry)->flags&0x8)

enum comp_method {
    STORED = 0, SHRUNK = 1, IMPLODED = 6, DEFLATED = 8
};


#ifdef DEBUG
static int verbose = 0;
#endif

char buffer[8192+1]; /* shared temporary buffer */


static void panic( const char *msg )
{
    fprintf( stderr, "Panic: %s\n", msg );
    abort();
}

int zip_hashtable_init( ziphash *hash, int size )
{
    int i;

    hash->item_num = 0;
    hash->table_size = size;
    hash->table = htmalloc( ZIP_STORAGE, sizeof(zipentry*) * size );
    if ( !hash->table )
        panic( "No enough memory to allocate the hash table" );

    for ( i=0; i<size; i++ )
        hash->table[i] = 0;

    hash->root_file_num = 0;
    hash->root_files = 0;
    return 0;
}

void zip_hashtable_cleanup( ziphash *hash )
{
    zipentry *p, *p1;
    int i;

    for ( i=0; i<hash->table_size; i++ )
    {
        for ( p = hash->table[i]; p; p = p1 )
        {
            p1 = p->hash_next;
            htfree( ZIP_STORAGE, p );
        }
    }

    htfree( ZIP_STORAGE, hash->table );
}

#ifdef DEBUG_ZIP
static void zip_hashtable_print( ziphash *hash )
{
    zipentry *p, *q;
    int i;

    printf( "The hash table contains:\n" );
    for ( i=0; i<hash->table_size; i++ )
    {
        for ( p = hash->table[i]; p; p = p->hash_next )
        {
            if ( p == hash->table[i] )
                printf( " [%02d]", i );
            else
                printf( "     " );
            printf( " %s", p->name );

            if ( p->file_num )
                printf( "(%d)", p->file_num );

            if ( p->files )
            {
                q = p->files;
                do
                    {
                    printf( " %s", q->name );
                    q = q->next;
                }
                while ( q != p->files );
            }

            printf ( "\n" );
        }
    }
}
#endif

static void zip_hashtable_add_item( ziphash *hash, zipentry *entry )
{
    int n;

    n = hash_func( entry->name, hash->table_size );
    entry->hash_next = hash->table[n];
    hash->table[n] = entry;
    entry->files = 0;
    entry->file_num = 0;
    hash->item_num++;
}

static zipentry *zip_hashtable_seek_item( ziphash *hash, const char *name )
{
    zipentry *ze;
    int n;

    n = hash_func( name, hash->table_size );
    for ( ze = hash->table[n]; ze; ze = ze->hash_next )
    {
        if ( 0 == strcmp( ze->name, name ) )
            return ze;
    }
    return 0;
}

void zip_hashtable_add( ziphash *hash, zipentry *entry )
{
    zipentry *ze;
    char *p;

    /* If it existed, we do nothing */
    if ( zip_hashtable_seek_item( hash, entry->name ) )
        return;

    zip_hashtable_add_item( hash, entry );

    /* preparing to add its upper directories */
    strncpy( buffer, entry->name, sizeof(buffer)-1 );

    for ( p = strrchr( buffer, '/' ); p; p = strrchr( buffer, '/' ) )
    {
        *p = '\0';
        if ( !p[1] )
            continue; /* omiting duplicated '/'s */

        ze = zip_hashtable_seek_item( hash, buffer );
        if ( ze )
        {
            /* If this directory is already in the hash table */
            ze->file_num++;
            if ( ze->files )
            {
                entry->prev = ze->files->prev;
                entry->next = ze->files;
                ze->files->prev = entry->prev->next = entry;
            }
            else
                ze->files = entry->prev = entry->next = entry;

            entry = ze; /* entry is now the toppest newly-added directory */
            break; /* break the loop because we can't change upper dirs */
        }

        /* create the parent directory */
        ze = htmalloc( ZIP_STORAGE, sizeof(zipentry) + strlen(buffer) + 4 );
        memset( ze, 0, sizeof(zipentry) );

        ze->name = (char *)(void *)(ze+1);
        strcpy( ze->name, buffer );
        zip_hashtable_add_item( hash, ze );

        /* Since zip_hashtable_add_item will reset file_num and files,
         * we need to change these two after the function is called.
         */
        ze->file_num = 1;
        ze->files = entry->prev = entry->next = entry;

        entry = ze;  /* loop back to check the parent directory */
    }

    /* adding a root file or directory */
    if ( !p )
    {
        hash->root_file_num++;
        if ( hash->root_files )
        {
            entry->prev = hash->root_files->prev;
            entry->next = hash->root_files;
            hash->root_files->prev = entry->prev->next = entry;
        }
        else
            hash->root_files = entry->prev = entry->next = entry;
    }
}

#ifdef DEBUG
void zip_entry_print( zipentry *ze, int index )
{
    if ( !index )
        printf( "   Method  CRC-32  C-size  U-size  Offset    Filename\n" );

    printf( "#%04d [%d] %08lX %-7ld %-7ld %-9d %s\n",
        index, ze->method, ze->crc32, ze->co_size, ze->un_size,
        ze->data_off, ze->name );
}

void zip_dir_print( zipentry *ze )
{
    int i=0;
    zipentry *p = ze;
    if ( !p )
        return;
    do
        {
        zip_entry_print( p, i++ );
        p = p->next;
    }
    while ( p != ze );
}
#endif

zipfile *zip_open( const char *path )
{
    FILE *fp;
    zipfile *zf;
    zipf_dir_tail zdt;
    size_t n, ziplen;
    int tailp;

    fp = htfopen( path, "rb" );
    if ( !fp )
        return 0;

    zf = htmalloc( ZIP_STORAGE, sizeof(zipfile) );
    if ( !zf )
        return 0;

    zf->fp = fp;

    /* seek to the end of zip file */
    fseek( fp, 0, SEEK_END );
    ziplen = ftell( fp );
    if ( ziplen <= sizeof(zdt) )
    {
#ifdef DEBUG
        printf( "%s: invalid file size %d.\n", path, ziplen );
#endif
        htfree( ZIP_STORAGE, zf );
        htfclose( fp );
        return 0;
    }

    if ( ziplen > sizeof(buffer)-1 )
        fseek( fp, -(long)(sizeof(buffer)-1), SEEK_END );
    else
        fseek( fp, 0, SEEK_SET );

    /* read a block at the end of zip file */
    n = fread( buffer, sizeof(char), sizeof(buffer)/sizeof(char)-1, fp );
    assert( n > sizeof(zdt) );

    /* searching the end of central dir record */
    for ( tailp = n - sizeof(zdt); tailp >= 0; tailp-- )
    {
        if ( buffer[tailp] == 0x50 )
        {
            if ( 0 == memcmp( buffer+tailp, "PK\005\006", 4 ) )
                break;
        }
    }

    if ( tailp < 0 )
    {
#ifdef DEBUG
        printf( "%s: not zip file signature.\n", path );
#endif
        htfree( ZIP_STORAGE, zf );
        htfclose( fp );
        return 0;
    }

    /* read information of the end of central dir recond */
    memcpy( &zdt, buffer+tailp, sizeof(zdt) );

    if ( zdt.dn_this != 0 && zdt.dn_dir != 0
            && zdt.entry_this != zdt.entry_num )
    {
#ifdef DEBUG
        printf( "%s: multivolume zip files are not supported.\n", path );
#endif
        htfree( ZIP_STORAGE, zf );
        htfclose( fp );
        return 0;
    }

    zf->entry_num = get_intel16(zdt.entry_num);
    zf->loaded_num = 0;
    zf->first_entry_off =
        zf->cur_entry_off = get_intel32(zdt.dir_off);
    zf->dir_size = get_intel32(zdt.dir_size);

#ifdef DEBUG
    if ( verbose )
    {
        buffer[n] = '\0';
        printf( "%s: ZIP file information:\n", path );
        printf( "  Entries: %d,  Dir size: %d, Dir offset: %d\n",
            zf->entry_num, zf->dir_size, zf->first_entry_off );
        printf( "  Comment: %s\n", buffer + tailp + sizeof(zdt) );
    }
#endif

    /* initialize hash table */
    if ( zip_hashtable_init( &(zf->dir), zf->entry_num / 4 + 5 ) )
    {
        htfree( ZIP_STORAGE, zf );
        htfclose( fp );
        return 0;
    }

    return zf;
}

void zip_close( zipfile *zf )
{
    htfclose( zf->fp );
    zip_hashtable_cleanup( &(zf->dir) );
    htfree( ZIP_STORAGE, zf );
}

int zip_loadentry( zipfile *zf, int num )
{
    int n, i, fn_len;
    size_t offset;
    zipf_dir_entry zde;
    zipentry *ze;

    if ( zf->loaded_num >= zf->entry_num )
        return 0;

    n = zf->entry_num - zf->loaded_num;
    if ( num >= n )
        num = n;

    offset = zf->cur_entry_off;

    fseek( zf->fp, offset, SEEK_SET );

    for ( i=0; i<num; i++ )
    {
        /* read zipf_dir_entry structure for each file */
        fread( &zde, sizeof(zde), 1, zf->fp );

        if ( get_intel32(zde.sign) != 0x02014b50 )
        {
#ifdef DEBUG
            printf( "Error in the #%d central dir entry.\n",
                i + zf->loaded_num );
#endif
            return i;
        }

        /* allocate a zipentry and the filename buffer in the same time */
        fn_len = get_intel16(zde.fn_len);
        ze = htmalloc( ZIP_STORAGE, sizeof(zipentry) + fn_len + 4 );

        /* read filename */
        fread( ze+1, 1, fn_len, zf->fp );

        ze->name = (char *)(void *)(ze+1);
        ze->name[fn_len] = '\0';

        /* removing appended '/'s */
        while ( fn_len > 0 && ze->name[fn_len-1] == '/' )
            ze->name[--fn_len] = '\0';

        /* removing prepended '/'s */
        while ( *(ze->name) == '/' )
            ze->name++;

        ze->data_off = get_intel32(zde.offset);
        ze->crc32 = get_intel32(zde.crc32);
        ze->co_size = get_intel32(zde.co_size);
        ze->un_size = get_intel32(zde.un_size);
        ze->method = get_intel16(zde.method);

        ze->time = get_dos_time( get_intel16(zde.date),
                                 get_intel16(zde.time) );

        n = get_intel16(zde.ef_len) + get_intel16(zde.cm_len);
        if ( n >= 0 )
            fseek( zf->fp, n, SEEK_CUR );

        offset += n + zde.fn_len + sizeof(zde);

#ifdef DEBUG
        if ( verbose )
            zip_entry_print( ze, i + zf->loaded_num );
#endif

        /* adding the zipentry to hash table, creating directory entries */
        zip_hashtable_add( &(zf->dir), ze );
    }

    zf->loaded_num += i;
    zf->cur_entry_off = offset;

    return num;
}

zipentry *zip_search( zipfile *zf, const char *file )
{
    return zip_hashtable_seek_item( &(zf->dir), file );
}

zipentry *zip_readdir( zipfile *zf, const char *file )
{
    zipentry *ze;
    if ( !file || '\0' == file[0] || ( '/' == file[0] && '\0' == file[1] ) )
        return zf->dir.root_files;

    ze = zip_hashtable_seek_item( &(zf->dir), file );
    if ( !ze )
        return 0;
    return ze->files;
}

#define WINDOW_SIZE 32768

zipsession *zip_session_open( zipfile *zf, zipentry *ze )
{
    zipsession *zs;
    z_stream *param = 0;
    size_t offset = 0, start;
    zipf_file_header zfh;
    unsigned int head;
    /* unsigned long adler = 0; */

    fseek( zf->fp, ze->data_off, SEEK_SET );
    fread( &zfh, sizeof(zfh), 1, zf->fp );
    start = get_intel16(zfh.fn_len) + get_intel16(zfh.ef_len);
    if ( start > 0 )
        fseek( zf->fp, start, SEEK_CUR );
    start += ze->data_off + sizeof(zfh);

    switch ( ze->method )
    {
    case DEFLATED:
        param = (z_stream *)htmalloc( ZIP_STORAGE,
                sizeof(z_stream) + WINDOW_SIZE + 4 );
        if ( !param )
            return 0;

        /* generating the 2-byte head of a zlib stream: RFC 1950 */
        head = 0x781F / 31 * 31;
        param->next_in = (void *)(param+1);
        param->next_in[0] = head >> 8;
        param->next_in[1] = head & 0xff;

        offset = min( ze->co_size, WINDOW_SIZE-2 );
        fread( param->next_in+2, 1, offset, zf->fp );

        param->avail_in = offset + 2;
        param->zalloc = htcalloc;
        param->zfree = htfree;
        param->opaque = ZIP_STORAGE;
        if ( Z_OK != inflateInit( param ) )
        {
            htfree( ZIP_STORAGE, param );
            return 0;
        }

        /* adler = adler32( 0L, 0, 0 );  */

        /* falling through */
    case STORED:
        zs = (zipsession *)htmalloc( ZIP_STORAGE, sizeof(zipsession) );
        if ( !zs )
        {
            if ( param )
                htfree( ZIP_STORAGE, param );
            return 0;
        }
        break;

    default:
        return 0;
    }

    zs->file = zf;
    zs->entry = ze;
    zs->state = ze->method;
    zs->start = start;
    zs->offset = offset;
    zs->param = param;
    zs->unc_off = 0;
    /* zs->adler = adler; */

    return zs;
}

void zip_session_close( zipsession *zs )
{
    if ( zs->state == DEFLATED )
    {
        assert( zs->param );
        if ( zs->param )
        {
            inflateEnd( zs->param );
            htfree( ZIP_STORAGE, zs->param );
        }
    }
    htfree( ZIP_STORAGE, zs );
}

size_t zip_session_read( void *buf, size_t size, zipsession *zs )
{
    FILE *fp;
    z_stream *param;
    zipentry *ze = zs->entry;
    size_t s;
    int n;

    fp = zs->file->fp;

    switch ( zs->state )
    {
    case DEFLATED:
        param = zs->param;
        param->next_out = buf;
        param->avail_out = size;

        for ( ;; )
        {
            if ( 0 == param->avail_in )
            {
                if ( ze->co_size <= zs->offset )
                {
                    param->next_in = (void *)(param+1);
                    /*
                    param->next_in[0] = (param->adler) & 0xff;
                    param->next_in[1] = (param->adler>>8) & 0xff;
                    param->next_in[2] = (param->adler>>16) & 0xff;
                    param->next_in[3] = (param->adler>>24) & 0xff;
                    */
                    param->avail_in = 4;
                }
                else
                {
                    n = ze->co_size - zs->offset;
                    if ( n > WINDOW_SIZE )
                        n = WINDOW_SIZE;
                    fseek( fp, zs->offset + zs->start, SEEK_SET );
                    fread( param+1, 1, n, fp );
                    zs->offset += n;

                    param->avail_in = n;
                    param->next_in = (void *)(param+1);
                }
            }

            n = inflate( param, Z_SYNC_FLUSH );
            zs->unc_off += size - param->avail_out;
            switch ( n )
            {
            case Z_OK:
                if ( param->avail_out == 0 )
                    return size;
                break;          /* looping back to redo inflate */
            case Z_STREAM_END:
                return size - param->avail_out;
            case Z_DATA_ERROR:
                /*
                * I can never know the adler32 of the data stream BEFORE
                * the data have been uncompressed. So the only way is too
                * check the error message and ignore this case. It works
                * fine with zlib 1.1.3 under cygwin32.
                */
                if ( 0 == strcmp( param->msg, "incorrect data check" ) )
                    return size - param->avail_out;
                /* falling through */
            default:
#ifdef DEBUG
                if ( param->msg )
                    printf( "\nZLib error message: %s.\n", param->msg );
#endif
                return -1;
            }

            /* looping back */
        }
        break;

    case STORED:
        if ( ze->co_size <= zs->offset )
            return 0;
        s = ze->co_size - zs->offset;
        if ( size > s )
            size = s;
        fseek( fp, zs->offset + zs->start, SEEK_SET );
        fread( buf, 1, size, fp );
        zs->unc_off = zs->offset += size;
        break;

    default:
        return 0;
    }

    return size;
}

void zip_session_seek( zipsession *zs, size_t offset )
{
    char buf[8192];
    size_t n;
    z_stream *param;

    if ( offset >= zs->entry->un_size )
        return;

    switch ( zs->state )
    {
    case DEFLATED:
        param = zs->param;
        if ( zs->unc_off > offset )
        {
            unsigned int head = 0x781F / 31 * 31;
            param->next_in = (void *)(param+1);
            param->next_in[0] = head >> 8;
            param->next_in[1] = head & 0xff;

            n = min( zs->entry->co_size, WINDOW_SIZE-2 );
            fseek( zs->file->fp, zs->start, SEEK_SET );
            fread( param->next_in+2, 1, n, zs->file->fp );

            param->avail_in = n + 2;

            if ( Z_OK == inflateReset( param ) )
            {
                zs->offset = 0;
                zs->unc_off = 0;
            }
            else
            {
#ifdef DEBUG
                printf( "inflateReset: Error.\n" );
#endif
                return;
            }
        }

        n = offset - zs->unc_off;
        if ( n > 0 )
        {
            for ( ; n > sizeof(buf); n -= sizeof(buf) )
                zip_session_read( buf, sizeof(buf), zs );
            zip_session_read( buf, n, zs );
        }
        break;

    case STORED:
        zs->unc_off = zs->offset = offset;
        break;

    default:
        break;
    }
}


#ifdef DEBUG_ZIP
int main( int argc, char *argv[] )
{
    int z_list = 0, z_print = 0;
    char *fn = 0, *dir = 0, *param1 = 0, *param2 = 0;
    zipfile *zf;
    int i;

    if ( sizeof(zipf_file_header) != 30
         || sizeof(zipf_dir_entry) != 46
         || sizeof(zipf_dir_tail) != 22 )
    {
        printf( "Structure size not meet:\n"
                "File header: %d, Dir entry: %d, Dir tail: %d.\n",
                sizeof(zipf_file_header),
                sizeof(zipf_dir_entry),
                sizeof(zipf_dir_tail) );
        return -1;
    }

    for ( i=1; i<argc; i++ )
    {
        if ( argv[i][0] == '-' )
        {
            switch ( argv[i][1] )
            {
            case 'v':
                verbose = 1;
                break;
            case 'l':
                z_list = 1;
                break;
            case 'p':
                z_print = 1;
                break;
            default:
                break;
            }
        }
        else if ( !fn )
            fn = argv[i];
        else if ( !dir )
            dir = argv[i];
        else if ( !param1 )
            param1 = argv[i];
        else if ( !param2 )
            param2 = argv[i];
    }

    if ( !fn )
    {
        printf( "Usage: %s [-v] -l|-p <zip_file>.zip [filename]\n",
                argv[0] );
        return -1;
    }

    zf = zip_open( fn );
    if ( !zf )
    {
        printf( "%s: file error.\n", fn );
        return 0;
    }
    i = zip_loadentry( zf, 1000 );
    if ( verbose )
        printf( ">>> %d dir entries loaded.\n", i );
    if ( z_list )
    {
        if ( dir && strcmp( "/", dir ) )
        {
            zipentry *ze = zip_search( zf, dir );
            if ( ze )
            {
                if ( ze->file_num )
                    printf( "%s/%s: contains %d files.\n",
                        fn, dir, ze->file_num );
                else
                    printf( "%s/%s is a file.\n", fn, dir );
                zip_dir_print( ze->files );
            }
            else
                printf( "%s: not found in zip file %s.\n", dir, fn );
        }
        else
        {
            printf( "%s/ contains %d files.\n", fn, zf->dir.root_file_num );
            zip_dir_print( zf->dir.root_files );
        }
    }
    if ( z_print && dir )
    {
        zipentry *ze = zip_search( zf, dir );
        if ( ze )
        {
            if ( ze->co_size )
            {
                int size;
                char buf[2048];
                zipsession *zs = zip_session_open( zf, ze );
                if ( !zs )
                {
                    printf( "zip_session_open: failed.\n" );
                    return -1;
                }

                if ( param1 )
                {
                    zip_session_seek( zs, atoi(param1) );
                    if ( param2 )
                        size = atoi(param2) - atoi(param1);
                }

                for ( ; size; )
                {
                    if ( size < 0 )
                        i = zip_session_read( buf, sizeof(buf), zs );
                    else if ( size > sizeof(buf) )
                    {
                        i = zip_session_read( buf, sizeof(buf), zs );
                        size -= sizeof(buf);
                    }
                    else
                    {
                        i = zip_session_read( buf, size, zs );
                        size = 0;
                    }

                    if ( i < 0 )
                    {
                        printf( "\n\n>>> There is an error in %s/%s!\n",
                                fn, dir );
                        break;
                    }
                    if ( 0 == i )
                        break;
                    fwrite( buf, 1, i, stdout );
                }
                zip_session_close( zs );
            }
            else
                printf( "%s/%s is not a regular file.\n", fn, dir );
        }
        else
            printf( "%s/%s does nonot exist.\n", fn, dir );
    }
    if ( verbose )
        zip_hashtable_print( &(zf->dir) );
    zip_close( zf );

    return 0;
}
#endif
