Fixed a bug in espr_stats where the esp_stats structure was not being properly initialised.

Also, significantly sped up the header printing functions.
This commit is contained in:
2022-09-07 20:56:43 +10:00
parent de1a27e337
commit 4d7bdcf3cc
7 changed files with 579 additions and 483 deletions

View File

@@ -5,10 +5,12 @@
*/
#undef NDEBUG
#include <assert.h>
#include <stdlib.h>
#include <stdbool.h>
#include <stdio.h>
#include <string.h>
#include <intrin.h>
#include <io.h>
#include "zlib.h"
@@ -18,6 +20,8 @@
// === CONSTANTS ===
//
#define STDOUT_FILENO 1
// timestamp field access
const uint16_t day_mask = 0x1F;
const uint16_t month_mask = 0xF;
@@ -41,17 +45,18 @@ void print_group_header(Group *header);
void print_record_header(Record *header);
// Printer helpers
void print_group_label(Group *header);
void print_record_flags(Record *header);
void print_timestamp(uint16_t ts);
void print_type(Type4 type);
void print_type4(Type4 val);
void litcopy(struct str_buf *sb, struct str_lit lit);
void num_str(struct str_buf *sb, unsigned long num, int radix);
void type_str(struct str_buf *sb, Type4 type);
void timestamp_str(struct str_buf *sb, uint16_t timestamp);
void group_label_str(struct str_buf *sb, Group *header);
void record_flags_str(struct str_buf *sb, Record *header);
void sb_write(int fp, struct str_buf sb_pre, struct str_buf sb_post);
// Utilities
Timestamp convert_ts(uint16_t ts);
void print_callback(Node n, void *data, void **carry_out);
void dc_size_cb(Node n, void *data, void **carry_out);
void formid_count_cb(Node n, void *data, void **carry_out);
void print_cb(Node n, void *data, void **carry_out);
void stats_cb(Node n, void *data, void **carry_out);
void decompress_pre(Node n, void *data, void **carry_out);
void decompress_post(Node n, void *data, void **carry_in);
@@ -85,169 +90,6 @@ void espr_walk(char *data, size_t size, struct walker_callbacks cb) {
assert(data == data_start + size);
}
void espr_print(char *data, size_t size) {
struct walker_callbacks cb = { .pre = print_callback };
espr_walk(data, size, cb);
}
void print_callback(Node n, void *data, void **carry_out) {
(void)data;
(void)carry_out;
switch (n.type) {
case NT_GROUP:
print_group_header(n.header.group);
break;
case NT_RECORD:
print_record_header(n.header.record);
break;
default:
assert(false); // invalid node type
}
}
size_t espr_decompressed_size(char *data, size_t size) {
size_t dc_size = 0;
struct walker_callbacks cb = { .pre = dc_size_cb, .data = &dc_size };
espr_walk(data, size, cb);
return dc_size;
}
// Adds the size of every node up, reading decompressed size from compressed
// records.
void dc_size_cb(Node n, void *data, void **carry_out) {
(void)carry_out;
size_t *dc_size = data;
switch (n.type) {
case NT_GROUP:
// Only add header size for groups, internals will be walked
*dc_size += sizeof(Group);
break;
case NT_RECORD:
// Add the whole record and header, records are leaf-ish
*dc_size += sizeof(Record);
if (n.header.record->flags & COMPRESSED_FLAG) {
// Read decompressed size
*dc_size += *((uint32_t *)n.data);
}
else
*dc_size += n.header.record->size;
break;
default:
assert(false); // invalid node type
}
}
size_t espr_formid_count(char *data, size_t size) {
size_t count = 0;
struct walker_callbacks cb = { .pre = formid_count_cb, .data = &count };
espr_walk(data, size, cb);
return count;
}
/* FormID <-> Record relationship should be bijective. I do not believe
* groups have formids, and every record should have a unique formid,
* otherwise there would be clashes in the id space.
*/
void formid_count_cb(Node n, void *data, void **carry_out) {
(void)carry_out;
size_t *count = data;
if (n.type == NT_RECORD) {
(*count)++;
}
}
struct decom {
char *buf;
size_t remaining;
};
void espr_decompress(char *data, size_t size, char *buf, size_t buf_size) {
struct decom s = { .buf = buf, .remaining = buf_size };
struct walker_callbacks cb =
{ .pre = decompress_pre, .post = decompress_post, .data = &s };
espr_walk(data, size, cb);
}
void decompress_pre(Node n, void *decom_ptr, void **carry_out) {
struct decom *d = decom_ptr;
switch (n.type) {
case NT_RECORD:
// compressed record
if (n.header.record->flags & COMPRESSED_FLAG) {
// copy header
memcpy(d->buf, n.header.record, sizeof(Record));
// copied header reference
Record *header = (Record *)d->buf;
// update decom struct
d->remaining -= sizeof(Record);
d->buf += sizeof(Record);
// decompress directly into buffer
// first 4 bytes are the decompressed size
const uint32_t dc_size = *((uint32_t *)n.data);
uint32_t to_copy = dc_size;
uint32_t cur_size = n.header.record->size - sizeof(uint32_t);
char *data_start = n.data + sizeof(uint32_t);
int ret = uncompress(
(Bytef *)d->buf,
(uLongf *)&to_copy,
(Bytef *)data_start,
(uLong)cur_size
);
assert(ret == Z_OK);
assert(to_copy == dc_size);
// update decom struct
d->remaining -= dc_size;
d->buf += dc_size;
// update header data size
header->size = dc_size;
// unset compressed flag
header->flags &= ~COMPRESSED_FLAG;
}
else {
// copy record
size_t record_size = sizeof(Record) + n.header.record->size;
memcpy(d->buf, n.header.record, record_size);
// update decom
d->remaining -= record_size;
d->buf += record_size;
}
break;
case NT_GROUP:
// copy header, contents will be copied while walking
memcpy(d->buf, n.header.group, sizeof(Group));
// save copied header location for post-walk group size recalc
*carry_out = (void *)d->buf;
// update decom
d->buf += sizeof(Group);
d->remaining -= sizeof(Group);
break;
default:
assert(false); // invalid node type
}
}
void decompress_post(Node n, void *decom_ptr, void **carry_in) {
struct decom *d = decom_ptr;
// only need to handle group resize
if (n.type == NT_GROUP) {
Group *g = (Group *)(*carry_in);
uint32_t new_size = (uint32_t)((char *)d->buf - (char *)g);
g->size = new_size;
}
}
/* Unknown data will be some concatenation of groups and records.
*
* `walk_concat` will call the appropriate walking function
@@ -332,58 +174,299 @@ char *walk_record(char *data, struct walker_callbacks cb) {
return data;
}
void espr_print(char *data, size_t size) {
struct walker_callbacks cb = { .pre = print_cb };
espr_walk(data, size, cb);
}
void print_cb(Node n, void *data, void **carry_out) {
(void)data;
(void)carry_out;
switch (n.type) {
case NT_GROUP:
print_group_header(n.header.group);
break;
case NT_RECORD:
print_record_header(n.header.record);
break;
default:
assert(false); // invalid node type
}
}
struct esp_stats espr_stats(char *data, size_t size) {
struct esp_stats stats = { 0 };
struct walker_callbacks cb = { .pre = stats_cb, .data = &stats };
espr_walk(data, size, cb);
return stats;
}
/* Tallies up the group and record count. Calculates uncompressed size; groups
* only need their header size tallied as their data size will be handled by
* further walking of the tree.
*/
void stats_cb(Node n, void *data, void **carry_out) {
(void)carry_out;
struct esp_stats *stats = data;
switch (n.type) {
case NT_GROUP:
stats->group_count++;
stats->decompressed_size += sizeof(Group);
break;
case NT_RECORD:
stats->record_count++;
stats->decompressed_size += sizeof(Record);
if (n.header.record->flags & COMPRESSED_FLAG) {
// uncompressed size is stored in the first 4 bytes of data
stats->decompressed_size += *((uint32_t *)n.data);
} else {
stats->decompressed_size += n.header.record->size;
}
break;
default:
assert(false); // invalid node type
}
}
struct decom {
char *buf;
size_t remaining;
};
void espr_decompress(char *data, size_t size, char *buf, size_t buf_size) {
struct decom s = { .buf = buf, .remaining = buf_size };
struct walker_callbacks cb =
{ .pre = decompress_pre, .post = decompress_post, .data = &s };
espr_walk(data, size, cb);
}
/* Handles the copying of groups and records, and the decompression of
* compressed record data.
*
* For groups it copies only the header as group data will be handled by further
* walking. The destination prior to copying will also be saved to carry_out for
* groups so that decompress_post can correctly update the size of the copied
* group.
*
* For uncompressed records it simply copies the entirety of the record to the
* destination. For compressed records in copies the header first and then
* directly decompresses the compressed record into the destination.
*/
void decompress_pre(Node n, void *decom_ptr, void **carry_out) {
struct decom *d = decom_ptr;
switch (n.type) {
case NT_RECORD:
// compressed record
if (n.header.record->flags & COMPRESSED_FLAG) {
// copy header
memcpy(d->buf, n.header.record, sizeof(Record));
// copied header reference
Record *header = (Record *)d->buf;
// update decom struct
d->remaining -= sizeof(Record);
d->buf += sizeof(Record);
// decompress directly into buffer
// first 4 bytes are the decompressed size
const uint32_t dc_size = *((uint32_t *)n.data);
uint32_t to_copy = dc_size;
uint32_t cur_size = n.header.record->size - sizeof(uint32_t);
char *data_start = n.data + sizeof(uint32_t);
int ret = uncompress(
(Bytef *)d->buf,
(uLongf *)&to_copy,
(Bytef *)data_start,
(uLong)cur_size
);
assert(ret == Z_OK);
assert(to_copy == dc_size);
// update decom struct
d->remaining -= dc_size;
d->buf += dc_size;
// update header data size
header->size = dc_size;
// unset compressed flag
header->flags &= ~COMPRESSED_FLAG;
}
else {
// copy record
size_t record_size = sizeof(Record) + n.header.record->size;
memcpy(d->buf, n.header.record, record_size);
// update decom
d->remaining -= record_size;
d->buf += record_size;
}
break;
case NT_GROUP:
// copy header, contents will be copied while walking
memcpy(d->buf, n.header.group, sizeof(Group));
// save copied header location for post-walk group size recalc
*carry_out = (void *)d->buf;
// update decom
d->buf += sizeof(Group);
d->remaining -= sizeof(Group);
break;
default:
assert(false); // invalid node type
}
}
/* Handles recalculating group size after decompression. The location of the
* the group's copied header will be passed in in carry_in and can be used both
* to access the copied group header and calculate the new size of the group
* based on the difference between the current destination pointer and the
* group header pointer.
*/
void decompress_post(Node n, void *decom_ptr, void **carry_in) {
struct decom *d = decom_ptr;
// only need to handle group resize
if (n.type == NT_GROUP) {
Group *g = (Group *)(*carry_in);
uint32_t new_size = (uint32_t)((char *)d->buf - (char *)g);
g->size = new_size;
}
}
void print_group_header(Group *header) {
printf("--- HEADER: GROUP ---\n");
print_type(header->grup);
printf("Size: %u\n", header->size);
print_group_label(header);
assert(header->type < GTS_SIZE);
printf("Group type: %s\n", group_type_strings[header->type]);
print_timestamp(header->timestamp);
printf("Version Control Info: %04x\n", header->vcinfo);
printf("Unknown: %08x\n", header->unknown);
// Guess at enough with significant margin
char buf[1024] = { 0 };
const struct str_buf sb_pre = { .buf = buf, .size = sizeof(buf) };
struct str_buf sb = sb_pre;
// literals
struct str_lit
l1 = STR_LIT("--- HEADER: GROUP ---"),
l2 = STR_LIT("\nType: "),
l3 = STR_LIT("\nSize: "),
l4 = STR_LIT("\nLabel: "),
l5 = STR_LIT("\nGroup type: "),
l6 = STR_LIT("\nTimestamp: "),
l7 = STR_LIT("\nVersion Control Info: "),
l8 = STR_LIT("\nUnknown: "),
l9 = STR_LIT("\n");
struct str_lit gt = group_type_strings[header->type];
// construct output
litcopy(&sb, l1);
litcopy(&sb, l2); type_str(&sb, header->grup);
litcopy(&sb, l3); num_str(&sb, header->size, 10);
litcopy(&sb, l4); group_label_str(&sb, header);
litcopy(&sb, l5); litcopy(&sb, gt);
litcopy(&sb, l6); timestamp_str(&sb, header->timestamp);
litcopy(&sb, l7); num_str(&sb, header->vcinfo, 16);
litcopy(&sb, l8); num_str(&sb, header->unknown, 16);
litcopy(&sb, l9);
sb_write(STDOUT_FILENO, sb_pre, sb);
}
void print_record_header(Record *header) {
printf("--- HEADER: RECORD ---\n");
print_type(header->type);
print_record_flags(header);
printf("FormID: %x\n", header->formid);
print_timestamp(header->timestamp);
printf("Version Control Info: %04x\n", header->vcinfo);
printf("Version: %u\n", header->version);
printf("Unknown: %08x\n", header->unknown);
void litcopy(struct str_buf *sb, struct str_lit lit) {
assert(sb->size >= lit.size);
memcpy(sb->buf, lit.lit, lit.size);
sb->size -= lit.size;
sb->buf += lit.size;
}
void print_group_label(Group *header) {
printf("Label: ");
void num_str(struct str_buf *sb, unsigned long num, int radix) {
errno_t ret = _ultoa_s(num, sb->buf, sb->size, radix);
assert(ret == 0);
int len = (int)strlen(sb->buf);
sb->size -= len;
sb->buf += len;
}
void type_str(struct str_buf *sb, Type4 type) {
assert(sb->size >= 4);
for (size_t i = 0; i != 4; i++)
sb->buf[i] = type.bytes[i];
sb->buf += 4;
sb->size -= 4;
}
void group_label_str(struct str_buf *sb, Group *header) {
switch (header->type) {
case GT_TOP:
print_type4(header->label.type);
type_str(sb, header->label.type);
break;
case GT_INTERIOR_CELL_BLOCK:
case GT_INTERIOR_CELL_SUBBLOCK:
printf("%d", header->label.number);
num_str(sb, header->label.number, 10);
break;
case GT_EXTERIOR_CELL_BLOCK:
case GT_EXTERIOR_CELL_SUBBLOCK:
printf("X: %d, Y: %d",
header->label.coord[1], header->label.coord[0]);
litcopy(sb, LIT("X: ")); num_str(sb, header->label.coord[1], 10);
litcopy(sb, LIT("Y: ")); num_str(sb, header->label.coord[0], 10);
break;
case GT_WORLD_CHILDREN:
case GT_CELL_CHILDREN:
case GT_TOPIC_CHILDREN:
case GT_CELL_PERSISTENT_CHILDREN:
case GT_CELL_TEMPORARY_CHILDREN:
printf("FormID[%x]", header->label.formid);
litcopy(sb, LIT("FormID["));
num_str(sb, header->label.formid, 16);
litcopy(sb, LIT("]"));
break;
default:
assert(false); // invalid group type
}
printf("\n");
}
void print_record_flags(Record *header) {
printf("Flags:\n");
void timestamp_str(struct str_buf *sb, uint16_t timestamp) {
Timestamp ts = convert_ts(timestamp);
litcopy(sb, LIT("20x")); num_str(sb, ts.year, 10); litcopy(sb, LIT("-"));
num_str(sb, ts.month, 10); litcopy(sb, LIT("-")); num_str(sb, ts.day, 10);
}
void sb_write(int fp, struct str_buf sb_pre, struct str_buf sb_post) {
int size = sb_pre.size - sb_post.size;
assert(size >= 0);
int ret = _write(fp, sb_pre.buf, size);
assert(ret == size);
}
void print_record_header(Record *header) {
char buf[1024] = { 0 };
const struct str_buf sb_pre = { .buf = buf, .size = sizeof(buf) };
struct str_buf sb = sb_pre;
const struct str_lit
l1 = LIT("--- HEADER: RECORD ---"),
l2 = LIT("\nType: "),
l3 = LIT("\nFlags: "),
l4 = LIT("\nFormID: "),
l5 = LIT("\nTimestamp: "),
l6 = LIT("\nVersion Control Info: "),
l7 = LIT("\nVersion: "),
l8 = LIT("\nUnknown: "),
l9 = LIT("\n");
litcopy(&sb, l1);
litcopy(&sb, l2); type_str(&sb, header->type);
litcopy(&sb, l3); record_flags_str(&sb, header);
litcopy(&sb, l4); num_str(&sb, header->formid, 16);
litcopy(&sb, l5); timestamp_str(&sb, header->timestamp);
litcopy(&sb, l6); num_str(&sb, header->vcinfo, 16);
litcopy(&sb, l7); num_str(&sb, header->version, 10);
litcopy(&sb, l8); num_str(&sb, header->unknown, 16);
litcopy(&sb, l9);
sb_write(STDOUT_FILENO, sb_pre, sb);
}
void record_flags_str(struct str_buf *sb, Record *header) {
uint32_t flags = header->flags;
const uint32_t type = header->type.uint;
@@ -400,9 +483,9 @@ void print_record_flags(Record *header) {
// will always be >= 0 as flags is not 0
int highest = 31 - __lzcnt(flags);
assert(highest >= 0);
const char *const str = (*flag_lut)[highest];
if (str) {
printf(" - %s\n", str);
const struct str_lit lit = (*flag_lut)[highest];
if (lit.lit) {
litcopy(sb, LIT("\n - ")); litcopy(sb, lit);
flags -= ((uint32_t)1) << highest;
}
else
@@ -411,6 +494,7 @@ void print_record_flags(Record *header) {
}
}
// slow path
if (flags != 0) {
printf("\n\nOriginal flags: %08x\n", header->flags);
printf("Unhandled flags: %08x\n", flags);
@@ -418,24 +502,12 @@ void print_record_flags(Record *header) {
}
}
// This is the Skyrim SE timestamp format
void print_timestamp(uint16_t _ts) {
Timestamp ts = convert_ts(_ts);
printf("Timestamp: 20x%u-%02u-%02u\n", ts.year, ts.month, ts.day);
}
void print_type(Type4 type) {
printf("Type: ");
print_type4(type);
printf("\n");
}
void print_type4(Type4 val) {
// invariant: printed i characters from val.bytes
for (size_t i = 0; i != 4; i++)
printf("%c", val.bytes[i]);
}
/* Converts the bit-packed/encoded timestamp used in esp/esm files into day,
* month and year. See UESP for further explanation.
*
* This currently handles the timestamp format used in Skyrim.esm, but newer
* files apparently use a different format. This will need to be handled later.
*/
Timestamp convert_ts(uint16_t ts) {
/*
const uint8_t day = (uint8_t)(ts & day_mask);