Navmesher_old/espReader/Reader.c

/*
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0.If a copy of the MPL was not distributed with this
 * file, You can obtain one at http ://mozilla.org/MPL/2.0/.
 */
#undef NDEBUG
#include <assert.h>
#include <stdbool.h>
#include <stdio.h>
#include <string.h>
#include <intrin.h>

#include "zlib.h"

#include "ESPReader.h"

//
// === CONSTANTS ===
//

// timestamp field access
const uint16_t day_mask = 0x1F;
const uint16_t month_mask = 0xF;
const uint16_t year_mask = 0x7F;
const int month_offset = 5;
const int year_offset = 9;

//
// === FORWARD DECLARATIONS ===
//

void asserts(void);

// Tree walkers
char *walk_concat(char *data, size_t size, struct walker_callbacks cb);
char *walk_group(char *data, struct walker_callbacks cb);
char *walk_record(char *data, struct walker_callbacks cb);

// Header printers
void print_group_header(Group *header);
void print_record_header(Record *header);

// Printer helpers
void print_group_label(Group *header);
void print_record_flags(Record *header);
void print_timestamp(uint16_t ts);
void print_type(Type4 type);
void print_type4(Type4 val);

// Utilities
Timestamp convert_ts(uint16_t ts);
void print_callback(Node n, void *data, void **carry_out);
void dc_size_cb(Node n, void *data, void **carry_out);
void formid_count_cb(Node n, void *data, void **carry_out);
void decompress_pre(Node n, void *data, void **carry_out);
void decompress_post(Node n, void *data, void **carry_in);

//
// === FUNCTIONS ===
//

void asserts(void) {
    // binary overlay size checks
    assert(sizeof(Record) == 24); // Record struct incorrect size
    assert(sizeof(Group) == 24); // Group struct incorrect size
    assert(sizeof((Group) { 0 }.label) == 4); // Label union incorrect size
    assert(sizeof(Field) == 6); // Field struct incorrect size

    // zlib compatability
    assert(sizeof(uLongf) == sizeof(uint32_t));
    assert(sizeof(Bytef) == sizeof(char));
}

void espr_walk(char *data, size_t size, struct walker_callbacks cb) {
    // check assertions that cannot be checked at compile time
    asserts();

    char *data_start = data;

    // check that we are at the start of the file
    const Type4 type = *(const Type4 *)data;
    assert(type.uint == rt[TES4]);

    data = walk_concat(data, size, cb);
    assert(data == data_start + size);
}

void espr_print(char *data, size_t size) {
    struct walker_callbacks cb = { .pre = print_callback };
    espr_walk(data, size, cb);
}

void print_callback(Node n, void *data, void **carry_out) {
    (void)data;
    (void)carry_out;
    switch (n.type) {
        case NT_GROUP:
            print_group_header(n.header.group);
            break;
        case NT_RECORD:
            print_record_header(n.header.record);
            break;
        default:
            assert(false); // invalid node type
    }
}

size_t espr_decompressed_size(char *data, size_t size) {
    size_t dc_size = 0;
    struct walker_callbacks cb = { .pre = dc_size_cb, .data = &dc_size };
    espr_walk(data, size, cb);
    return dc_size;
}

// Adds the size of every node up, reading decompressed size from compressed
// records.
void dc_size_cb(Node n, void *data, void **carry_out) {
    (void)carry_out;
    size_t *dc_size = data;
    switch (n.type) {
        case NT_GROUP:
            // Only add header size for groups, internals will be walked
            *dc_size += sizeof(Group);
            break;
        case NT_RECORD:
            // Add the whole record and header, records are leaf-ish
            *dc_size += sizeof(Record);
            if (n.header.record->flags & COMPRESSED_FLAG) {
                // Read decompressed size
                *dc_size += *((uint32_t *)n.data);
            }
            else
                *dc_size += n.header.record->size;
            break;
        default:
            assert(false); // invalid node type
    }
}

size_t espr_formid_count(char *data, size_t size) {
    size_t count = 0;
    struct walker_callbacks cb = { .pre = formid_count_cb, .data = &count };
    espr_walk(data, size, cb);
    return count;
}

/* FormID <-> Record relationship should be bijective. I do not believe
 * groups have formids, and every record should have a unique formid,
 * otherwise there would be clashes in the id space.
 */
void formid_count_cb(Node n, void *data, void **carry_out) {
    (void)carry_out;
    size_t *count = data;
    if (n.type == NT_RECORD) {
        (*count)++;
    }
}

struct decom {
    char *buf;
    size_t remaining;
};

void espr_decompress(char *data, size_t size, char *buf, size_t buf_size) {
    struct decom s = { .buf = buf, .remaining = buf_size };
    struct walker_callbacks cb =
        { .pre = decompress_pre, .post = decompress_post, .data = &s };
    espr_walk(data, size, cb);
}

void decompress_pre(Node n, void *decom_ptr, void **carry_out) {
    struct decom *d = decom_ptr;

    switch (n.type) {
        case NT_RECORD:
            // compressed record
            if (n.header.record->flags & COMPRESSED_FLAG) {
                // copy header
                memcpy(d->buf, n.header.record, sizeof(Record));

                // copied header reference
                Record *header = (Record *)d->buf;

                // update decom struct
                d->remaining -= sizeof(Record);
                d->buf += sizeof(Record);

                // decompress directly into buffer
                // first 4 bytes are the decompressed size
                const uint32_t dc_size = *((uint32_t *)n.data);
                uint32_t to_copy = dc_size;
                uint32_t cur_size = n.header.record->size - sizeof(uint32_t);
                char *data_start = n.data + sizeof(uint32_t);
                int ret = uncompress(
                    (Bytef *)d->buf,
                    (uLongf *)&to_copy,
                    (Bytef *)data_start,
                    (uLong)cur_size
                );
                assert(ret == Z_OK);
                assert(to_copy == dc_size);

                // update decom struct
                d->remaining -= dc_size;
                d->buf += dc_size;

                // update header data size
                header->size = dc_size;

                // unset compressed flag
                header->flags &= ~COMPRESSED_FLAG;
            }
            else {
                // copy record
                size_t record_size = sizeof(Record) + n.header.record->size;
                memcpy(d->buf, n.header.record, record_size);

                // update decom
                d->remaining -= record_size;
                d->buf += record_size;
            }
            break;
        case NT_GROUP:
            // copy header, contents will be copied while walking
            memcpy(d->buf, n.header.group, sizeof(Group));

            // save copied header location for post-walk group size recalc
            *carry_out = (void *)d->buf;

            // update decom
            d->buf += sizeof(Group);
            d->remaining -= sizeof(Group);

            break;
        default:
            assert(false); // invalid node type
    }
}

void decompress_post(Node n, void *decom_ptr, void **carry_in) {
    struct decom *d = decom_ptr;

    // only need to handle group resize
    if (n.type == NT_GROUP) {
        Group *g = (Group *)(*carry_in);
        uint32_t new_size = (uint32_t)((char *)d->buf - (char *)g);
        g->size = new_size;
    }
}

/* Unknown data will be some concatenation of groups and records.
 *
 * `walk_concat` will call the appropriate walking function
 * for each segment of unknown data in this concatenation.
 */
char *walk_concat(char *data, size_t size, struct walker_callbacks cb) {
    const char *end = data + size;
    while (data != end) {
        assert(data < end);

        const Type4 *type = (Type4 *)data;

        // check valid type
        assert(rt[rt_hash(type->uint)] == type->uint);

        // only need to distinguish between groups and records
        if (type->uint == rt[GRUP])
            data = walk_group(data, cb);
        else
            data = walk_record(data, cb);
    }
    return data;
}

/* Walk a group record. Group records are containers for any other type of
 * record, including other group records.
 *
 * This function will also call `cb` with the node constructed from this group
 * record.
 */
char *walk_group(char *data, struct walker_callbacks cb) {
    Group *const header = (Group *const)data;

    // The size in the group header includes the size of the header
    char *data_start = data + sizeof(Group);
    char *data_end = data + header->size;
    size_t data_size = data_end - data_start;

    Node n = { .header.group = header, .data = data_start, .type = NT_GROUP };
    void *carry;

    // Pre-walk callback
    if (cb.pre)
        cb.pre(n, cb.data, &carry);

    // Walk through the concatenation of data inside the group.
    data = walk_concat(data_start, data_size, cb);
    assert(data == data_end);

    // Post-walk callback
    if (cb.post)
        cb.post(n, cb.data, &carry);

    return data;
}

char *walk_record(char *data, struct walker_callbacks cb) {
    Record *header = (Record *)data;
    assert(header->type.uint != rt[GRUP]);

    char *data_start = data + sizeof(Record);

    Node n = { .header.record = header, .data = data_start, .type = NT_RECORD };
    void *carry;

    /* Pre and post walk callbacks make less sense for record walking as records
     * are leaf-ish, will still call both here for now as field walking may be
     * added in the future.
     */

     // Pre-walk callback
    if (cb.pre)
        cb.pre(n, cb.data, &carry);

    // Update data ptr based on record size.
    data += sizeof(Record) + header->size;

    // Post-walk callback
    if (cb.post)
        cb.post(n, cb.data, &carry);

    return data;
}

void print_group_header(Group *header) {
    printf("--- HEADER: GROUP ---\n");
    print_type(header->grup);
    printf("Size: %u\n", header->size);
    print_group_label(header);
    assert(header->type < GTS_SIZE);
    printf("Group type: %s\n", group_type_strings[header->type]);
    print_timestamp(header->timestamp);
    printf("Version Control Info: %04x\n", header->vcinfo);
    printf("Unknown: %08x\n", header->unknown);
}

void print_record_header(Record *header) {
    printf("--- HEADER: RECORD ---\n");
    print_type(header->type);
    print_record_flags(header);
    printf("FormID: %x\n", header->formid);
    print_timestamp(header->timestamp);
    printf("Version Control Info: %04x\n", header->vcinfo);
    printf("Version: %u\n", header->version);
    printf("Unknown: %08x\n", header->unknown);
}

void print_group_label(Group *header) {
    printf("Label: ");
    switch (header->type) {
        case GT_TOP:
            print_type4(header->label.type);
            break;
        case GT_INTERIOR_CELL_BLOCK:
        case GT_INTERIOR_CELL_SUBBLOCK:
            printf("%d", header->label.number);
        case GT_EXTERIOR_CELL_BLOCK:
        case GT_EXTERIOR_CELL_SUBBLOCK:
            printf("X: %d, Y: %d",
                header->label.coord[1], header->label.coord[0]);
        case GT_WORLD_CHILDREN:
        case GT_CELL_CHILDREN:
        case GT_TOPIC_CHILDREN:
        case GT_CELL_PERSISTENT_CHILDREN:
        case GT_CELL_TEMPORARY_CHILDREN:
            printf("FormID[%x]", header->label.formid);
            break;
        default:
            assert(false); // invalid group type
    }
    printf("\n");
}

void print_record_flags(Record *header) {
    printf("Flags:\n");

    uint32_t flags = header->flags;
    const uint32_t type = header->type.uint;

    // print flags
    if (type == rt[REFR]) {
        // TODO
        // REFR requires FormID lookup
        flags = 0;
    }
    else {
        rfs_inner *const flag_lut = rfs[rt_hash(type)];
        if (flag_lut) {
            while (flags != 0) {
                // will always be >= 0 as flags is not 0
                int highest = 31 - __lzcnt(flags);
                assert(highest >= 0);
                const char *const str = (*flag_lut)[highest];
                if (str) {
                    printf("  - %s\n", str);
                    flags -= ((uint32_t)1) << highest;
                }
                else
                    break;
            }
        }
    }

    if (flags != 0) {
        printf("\n\nOriginal flags: %08x\n", header->flags);
        printf("Unhandled flags: %08x\n", flags);
        assert(false); // unhandled flags
    }
}

// This is the Skyrim SE timestamp format
void print_timestamp(uint16_t _ts) {
    Timestamp ts = convert_ts(_ts);
    printf("Timestamp: 20x%u-%02u-%02u\n", ts.year, ts.month, ts.day);
}

void print_type(Type4 type) {
    printf("Type: ");
    print_type4(type);
    printf("\n");
}

void print_type4(Type4 val) {
    // invariant: printed i characters from val.bytes
    for (size_t i = 0; i != 4; i++)
        printf("%c", val.bytes[i]);
}

Timestamp convert_ts(uint16_t ts) {
    /*
    const uint8_t day = (uint8_t)(ts & day_mask);
    const uint8_t month = (uint8_t)((ts >> month_offset) & month_mask);
    const uint16_t year = (ts >> year_offset) & year_mask;
    */

    const uint8_t day = ts & 0xff;
    const uint8_t hb = (ts >> 8) & 0xff;
    const uint8_t month = ((hb - 1) % 12) + 1;
    const uint8_t year = ((hb - 1) / 12 + 3) % 10;

    return (Timestamp) { year, month, day };
}