Navmesher_old/espReader/Reader.c

#include <assert.h>
#include <stdbool.h>
#include <stdio.h>
#include <string.h>
#include <intrin.h>

#include "zlib.h"

#include "ESPReader.h"

//
// === CONSTANTS ===
//

// timestamp field access
const uint16_t day_mask = 0x1F;
const uint16_t month_mask = 0xF;
const uint16_t year_mask = 0x7F;
const int month_offset = 5;
const int year_offset = 9;

//
// === FORWARD DECLARATIONS ===
//

void asserts(void);

// Tree walkers
char *walk_concat(char *data, size_t size, void (*cb)(Node n, void *pt), void *pt);
char *walk_group(char *data, void (*cb)(Node n, void *pt), void *pt);
char *walk_record(char *data, void (*cb)(Node n, void *pt), void *pt);

// Header printers
void print_group_header(Group *header);
void print_record_header(Record *header);

// Printer helpers
void print_group_label(Group *header);
void print_record_flags(Record *header);
void print_timestamp(uint16_t ts);
void print_type(Type4 type);
void print_type4(Type4 val);

// Utilities
Timestamp convert_ts(uint16_t ts);
void print_callback(Node n, void *_);
void dc_size_cb(Node n, void *dc_size_ptr);
void formid_count_cb(Node n, void *count_ptr);
void decompress_cb(Node n, void *decom_ptr);

//
// === FUNCTIONS ===
//

void asserts(void) {
	// binary overlay size checks
	assert(sizeof(Record) == 24); // Record struct incorrect size
	assert(sizeof(Group) == 24); // Group struct incorrect size
	assert(sizeof((Group) { 0 }.label) == 4); // Label union in group struct incorrect size
	assert(sizeof(Field) == 6); // Field struct incorrect size
}

void espr_walk(char *data, size_t size, void (*cb)(Node n, void *pt), void *pt) {
	// check assertions that cannot be checked at compile time
	asserts();

	char *data_start = data;

	// check that we are at the start of the file
	const Type4 type = *(const Type4 *)data;
	assert(type.uint == rt[TES4]);

	data = walk_concat(data, size, cb, pt);
	assert(data == data_start + size);
}

void espr_print(char *data, size_t size) {
	espr_walk(data, size, print_callback, NULL);
}

void print_callback(Node n, void *pt) {
	(void)pt;
	switch (n.type) {
		case NT_GROUP:
			print_group_header(n.header.group);
			break;
		case NT_RECORD:
			print_record_header(n.header.record);
			break;
		default:
			assert(false); // invalid node type
	}
}

size_t espr_decompressed_size(char *data, size_t size) {
	size_t dc_size = 0;
	espr_walk(data, size, dc_size_cb, &dc_size);
	return dc_size;
}

// Adds the size of every node up, reading decompressed size from compressed records.
void dc_size_cb(Node n, void *dc_size_ptr) {
	size_t *dcsp = dc_size_ptr;
	switch (n.type) {
		case NT_GROUP:
			// Only add header size for groups, internals will be walked
			*dcsp += sizeof(Group);
			break;
		case NT_RECORD:
			// Add the whole record and header, records are leaf-ish
			*dcsp += sizeof(Record);
			if (n.header.record->flags & COMPRESSED_FLAG) {
				// Read decompressed size
				*dcsp += *((uint32_t *)n.data);
			} else
				*dcsp += n.header.record->size;
			break;
		default:
			assert(false); // invalid node type
	}
}

size_t espr_formid_count(char *data, size_t size) {
	size_t count = 0;
	espr_walk(data, size, formid_count_cb, &count);
	return count;
}

/* FormID <-> Record relationship should be bijective. I do not believe
 * groups have formids, and every record should have a unique formid,
 * otherwise there would be clashes in the id space.
 */
void formid_count_cb(Node n, void *count_ptr) {
	size_t *c = count_ptr;
	if (n.type == NT_RECORD) {
		(*c)++;
	}
}

struct decom {
	char *buf;
	char *start;
	size_t remaining;
};

void espr_decompress(char *data, size_t size, char *buf, size_t buf_size) {
	struct decom s = { .buf = buf, .start = data, .remaining = buf_size };
	espr_walk(data, size, decompress_cb, &s);

	// handle final segment
	size_t remaining = buf_size - (s.buf - buf);
	assert(remaining == s.remaining);
	memcpy(s.buf, s.start, remaining);
}

void decompress_cb(Node n, void *decom_ptr) {
	struct decom *d = decom_ptr;

	// only need to do anything when we find a compressed flag
	if (n.type == NT_RECORD && n.header.record->flags & COMPRESSED_FLAG) {
		// uncompressed segment copy
		size_t size = n.data - d->start;
		assert(size < d->remaining);
		memcpy(d->buf, d->start, size);

		// update decom struct
		d->remaining -= size;
		d->buf += size;

		// copied header
		Record *header = (Record *)(d->buf) - 1;

		// decompress directly into buffer
		const size_t dc_size = *((uint32_t *)n.data);
		size_t to_copy = dc_size;
		size_t cur_size = n.header.record->size - sizeof(uint32_t);
		char *data_start = n.data + sizeof(uint32_t);
		int ret = uncompress(d->buf, &to_copy, data_start, cur_size);
		assert(ret == Z_OK);
		assert(to_copy == dc_size);

		// update decom struct
		d->remaining -= dc_size;
		d->buf += dc_size;

		// update start to start of next record/group
		d->start = n.data + n.header.record->size;

		// update header data size
		header->size = dc_size;

		// unset compressed flag
		header->flags &= ~COMPRESSED_FLAG;
	}
}

/* Unknown data will be some concatenation of groups and records.
 *
 * `walk_concat` will call the appropriate walking function
 * for each segment of unknown data in this concatenation.
 */
char *walk_concat(char *data, size_t size, void (*cb)(Node n, void *pt), void *pt) {
	const char *end = data + size;
	while (data != end) {
		assert(data < end);

		const Type4 *type = (Type4 *)data;

		// check valid type
		assert(rt[rth2rt[rt_hash(type->uint)]] == type->uint);

		// only need to distinguish between groups and records
		if (type->uint == rt[GRUP])
			data = walk_group(data, cb, pt);
		else
			data = walk_record(data, cb, pt);
	}
	return data;
}

/* Walk a group record. Group records are containers for any other type of record,
 * including other group records.
 *
 * This function will also call `cb` with the node constructed from this group record.
 */
char *walk_group(char *data, void (*cb)(Node n, void *pt), void *pt) {
	Group *const header = (Group *const)data;

	// The size in the group header includes the size of the header
	char *data_start = data + sizeof(Group);
	char *data_end   = data + header->size;
	size_t data_size = data_end - data_start;

	// Callback
	Node n = { .header.group = header, .data = data_start, .type = NT_GROUP };
	cb(n, pt);

	// Walk through the concatenation of data inside the group.
	data = walk_concat(data_start, data_size, cb, pt);
	assert(data == data_end);

	return data;
}

char *walk_record(char *data, void (*cb)(Node n, void *pt), void *pt) {
	Record *header = (Record *)data;
	assert(header->type.uint != rt[GRUP]);

	char *data_start = data + sizeof(Record);

	// Callback
	Node n = { .header.record = header, .data = data_start, .type = NT_RECORD };
	cb(n, pt);

	// Update data ptr based on record size.
	data += sizeof(Record) + header->size;
	return data;
}

void print_group_header(Group *header) {
	printf("--- HEADER: GROUP ---\n");
	print_type(header->grup);
	printf("Size: %u\n", header->size);
	print_group_label(header);
	assert(header->type < GTS_SIZE);
	printf("Group type: %s\n", group_type_strings[header->type]);
	print_timestamp(header->timestamp);
	printf("Version Control Info: %04x\n", header->vcinfo);
	printf("Unknown: %08x\n", header->unknown);
}

void print_record_header(Record *header) {
	printf("--- HEADER: RECORD ---\n");
	print_type(header->type);
	print_record_flags(header);
	printf("FormID: %x\n", header->formid);
	print_timestamp(header->timestamp);
	printf("Version Control Info: %04x\n", header->vcinfo);
	printf("Version: %u\n", header->version);
	printf("Unknown: %08x\n", header->unknown);
}

void print_group_label(Group *header) {
	printf("Label: ");
	switch (header->type) {
		case GT_TOP:
			print_type4(header->label.type);
			break;
		case GT_INTERIOR_CELL_BLOCK:
		case GT_INTERIOR_CELL_SUBBLOCK:
			printf("%d", header->label.number);
		case GT_EXTERIOR_CELL_BLOCK:
		case GT_EXTERIOR_CELL_SUBBLOCK:
			printf("X: %d, Y: %d", header->label.coord[1], header->label.coord[0]);
		case GT_WORLD_CHILDREN:
		case GT_CELL_CHILDREN:
		case GT_TOPIC_CHILDREN:
		case GT_CELL_PERSISTENT_CHILDREN:
		case GT_CELL_TEMPORARY_CHILDREN:
			printf("FormID[%x]", header->label.formid);
			break;
		default:
			assert(false); // invalid group type
	}
	printf("\n");
}

void print_record_flags(Record *header) {
	printf("Flags:\n");

	uint32_t flags = header->flags;
	const uint32_t type = header->type.uint;

	// print flags
	if (type == rt[REFR]) {
		// TODO
		// REFR requires FormID lookup
		flags = 0;
	} else {
		rfs_inner *const flag_lut = rfs[rt_hash(type)];
		if (flag_lut) {
			while (flags != 0) {
				// will always be >= 0 as flags is not 0
				size_t highest = 31 - __lzcnt(flags);
				const char *const str = (*flag_lut)[highest];
				if (str) {
					printf("  - %s\n", str);
					flags -= ((uint32_t)1) << highest;
				} else
					break;
			}
		}
	}

	if (flags != 0) {
		printf("\n\nOriginal flags: %08x\n", header->flags);
		printf("Unhandled flags: %08x\n", flags);
		assert(false); // unhandled flags
	}
}

// This is the Skyrim SE timestamp format
void print_timestamp(uint16_t _ts) {
	Timestamp ts = convert_ts(_ts);
	printf("Timestamp: 20x%u-%02u-%02u\n", ts.year, ts.month, ts.day);
}

void print_type(Type4 type) {
	printf("Type: ");
	print_type4(type);
	printf("\n");
}

void print_type4(Type4 val) {
	// invariant: printed i characters from val.bytes
	for (size_t i = 0; i != 4; i++)
		printf("%c", val.bytes[i]);
}

Timestamp convert_ts(uint16_t ts) {
	/*
	const uint8_t day = (uint8_t)(ts & day_mask);
	const uint8_t month = (uint8_t)((ts >> month_offset) & month_mask);
	const uint16_t year = (ts >> year_offset) & year_mask;
	*/

	const uint8_t day = ts & 0xff;
	const uint8_t hb = (ts >> 8) & 0xff;
	const uint8_t month = ((hb - 1) % 12) + 1;
	const uint8_t year = ((hb - 1) / 12 + 3) % 10;

	return (Timestamp){ year, month, day };
}