/* * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0.If a copy of the MPL was not distributed with this * file, You can obtain one at http ://mozilla.org/MPL/2.0/. */ #undef NDEBUG #include #include #include #include #include #include #include #include #include "zlib.h" #include "ESPReader.h" // // === CONSTANTS === // #define STDOUT_FILENO 1 // timestamp field access const uint16_t day_mask = 0x1F; const uint16_t month_mask = 0xF; const uint16_t year_mask = 0x7F; const int month_offset = 5; const int year_offset = 9; // // === FORWARD DECLARATIONS === // void asserts(void); // Tree walkers char *walk_concat( char *data, size_t size, struct walker_callbacks cb, void *from_parent ); char *walk_group(char *data, struct walker_callbacks cb, void *from_parent); char *walk_record(char *data, struct walker_callbacks cb, void *from_parent); // Header printers void print_group_header(Group *header); void print_record_header(Record *header); // Printer helpers void litcopy(struct str_buf *sb, struct str_lit lit); void num_str(struct str_buf *sb, unsigned long num, int radix); void type_str(struct str_buf *sb, Type4 type); void timestamp_str(struct str_buf *sb, uint16_t timestamp); void group_label_str(struct str_buf *sb, Group *header); void record_flags_str(struct str_buf *sb, Record *header); void sb_write(int fp, struct str_buf sb_pre, struct str_buf sb_post); // Utilities Timestamp convert_ts(uint16_t ts); // Callbacks void print_cb( Node n, void *data, void **carry_out, void *from_parent, void **to_children ); void stats_cb( Node n, void *data, void **carry_out, void *from_parent, void **to_children ); void decompress_pre( Node n, void *decom_ptr, void **carry_out, void *from_parent, void **to_children ); void decompress_post( Node n, void *data, void **carry_in ); void create_tree_cb( Node n, void *data, void **carry_out, void *from_parent, void **to_children ); // // === FUNCTIONS === // void asserts(void) { // binary overlay size checks assert(sizeof(Record) == 24); // Record struct incorrect size assert(sizeof(Group) == 24); // Group struct incorrect size assert(sizeof((Group) { 0 }.label) == 4); // Label union incorrect size assert(sizeof(Field) == 6); // Field struct incorrect size assert(sizeof(MetaNode) == 64); // 1 cache line // zlib compatability assert(sizeof(uLongf) == sizeof(uint32_t)); assert(sizeof(Bytef) == sizeof(char)); } void espr_walk(char *data, size_t size, struct walker_callbacks cb) { // check assertions that cannot be checked at compile time asserts(); char *data_start = data; // check that we are at the start of the file const Type4 type = *(const Type4 *)data; assert(type.uint == rt[TES4]); data = walk_concat(data, size, cb, NULL); assert(data == data_start + size); } /* Unknown data will be some concatenation of groups and records. * * `walk_concat` will call the appropriate walking function * for each segment of unknown data in this concatenation. */ char *walk_concat( char *data, size_t size, struct walker_callbacks cb, void *from_parent ) { const char *end = data + size; while (data != end) { assert(data < end); const Type4 *type = (Type4 *)data; // check valid type assert(rt[rt_hash(type->uint)] == type->uint); // only need to distinguish between groups and records if (type->uint == rt[GRUP]) data = walk_group(data, cb, from_parent); else data = walk_record(data, cb, from_parent); } return data; } /* Walk a group record. Group records are containers for any other type of * record, including other group records. * * This function will also call `cb` with the node constructed from this group * record. */ char *walk_group(char *data, struct walker_callbacks cb, void *from_parent) { Group *const header = (Group *const)data; // The size in the group header includes the size of the header char *data_start = data + sizeof(Group); char *data_end = data + header->size; size_t data_size = data_end - data_start; Node n = { .header.group = header, .data = data_start, .type = NT_GROUP }; void *carry = NULL; void *to_children = NULL; // Pre-walk callback if (cb.pre) cb.pre(n, cb.data, &carry, from_parent, &to_children); // Walk through the concatenation of data inside the group. data = walk_concat(data_start, data_size, cb, to_children); assert(data == data_end); // Post-walk callback if (cb.post) cb.post(n, cb.data, carry); return data; } char *walk_record(char *data, struct walker_callbacks cb, void *from_parent) { Record *header = (Record *)data; assert(header->type.uint != rt[GRUP]); char *data_start = data + sizeof(Record); Node n = { .header.record = header, .data = data_start, .type = NT_RECORD }; void *carry = NULL; void *to_children = NULL; /* Pre and post walk callbacks make less sense for record walking as * records are leaf-ish, will still call both here for now as field * walking may be added in the future. */ // Pre-walk callback if (cb.pre) cb.pre(n, cb.data, &carry, from_parent, &to_children); // Update data ptr based on record size. data += sizeof(Record) + header->size; // Post-walk callback if (cb.post) cb.post(n, cb.data, carry); return data; } void espr_print(char *data, size_t size) { struct walker_callbacks cb = { .pre = print_cb }; espr_walk(data, size, cb); } void print_cb( Node n, void *data, void **carry_out, void *from_parent, void **to_children ) { (void)data; (void)carry_out; (void)from_parent; (void)to_children; switch (n.type) { case NT_GROUP: print_group_header(n.header.group); break; case NT_RECORD: print_record_header(n.header.record); break; default: assert(false); // invalid node type } } struct esp_stats espr_stats(char *data, size_t size) { struct esp_stats stats = { 0 }; struct walker_callbacks cb = { .pre = stats_cb, .data = &stats }; espr_walk(data, size, cb); return stats; } /* Tallies up the group and record count. Calculates uncompressed size; groups * only need their header size tallied as their data size will be handled by * further walking of the tree. */ void stats_cb( Node n, void *data, void **carry_out, void *from_parent, void **to_children ) { (void)carry_out; (void)from_parent; (void)to_children; struct esp_stats *stats = data; switch (n.type) { case NT_GROUP: stats->group_count++; stats->decompressed_size += sizeof(Group); break; case NT_RECORD: stats->record_count++; stats->decompressed_size += sizeof(Record); if (n.header.record->flags & COMPRESSED_FLAG) { // uncompressed size is stored in the first 4 bytes of // data stats->decompressed_size += *((uint32_t *)n.data); } else { stats->decompressed_size += n.header.record->size; } break; default: assert(false); // invalid node type } } struct decom { char *buf; size_t remaining; }; void espr_decompress(char *data, size_t size, char *buf, size_t buf_size) { struct decom s = { .buf = buf, .remaining = buf_size }; struct walker_callbacks cb = { .pre = decompress_pre, .post = decompress_post, .data = &s }; espr_walk(data, size, cb); } /* Handles the copying of groups and records, and the decompression of * compressed record data. * * For groups it copies only the header as group data will be handled by further * walking. The destination prior to copying will also be saved to carry_out for * groups so that decompress_post can correctly update the size of the copied * group. * * For uncompressed records it simply copies the entirety of the record to the * destination. For compressed records in copies the header first and then * directly decompresses the compressed record into the destination. */ void decompress_pre( Node n, void *decom_ptr, void **carry_out, void *from_parent, void **to_children ) { (void)from_parent; (void)to_children; struct decom *d = decom_ptr; switch (n.type) { case NT_RECORD: // compressed record if (n.header.record->flags & COMPRESSED_FLAG) { // copy header memcpy(d->buf, n.header.record, sizeof(Record)); // copied header reference Record *header = (Record *)d->buf; // update decom struct d->remaining -= sizeof(Record); d->buf += sizeof(Record); // decompress directly into buffer // first 4 bytes are the decompressed size const uint32_t dc_size = *((uint32_t *)n.data); uint32_t to_copy = dc_size; uint32_t cur_size = n.header.record->size - sizeof(uint32_t); char *data_start = n.data + sizeof(uint32_t); int ret = uncompress( (Bytef *)d->buf, (uLongf *)&to_copy, (Bytef *)data_start, (uLong)cur_size ); assert(ret == Z_OK); assert(to_copy == dc_size); // update decom struct d->remaining -= dc_size; d->buf += dc_size; // update header data size header->size = dc_size; // unset compressed flag header->flags &= ~COMPRESSED_FLAG; } else { // copy record size_t record_size = sizeof(Record) + n.header.record->size; memcpy(d->buf, n.header.record, record_size); // update decom d->remaining -= record_size; d->buf += record_size; } break; case NT_GROUP: // copy header, contents will be copied while walking memcpy(d->buf, n.header.group, sizeof(Group)); // save copied header location for post-walk group size recalc *carry_out = (void *)d->buf; // update decom d->buf += sizeof(Group); d->remaining -= sizeof(Group); break; default: assert(false); // invalid node type } } /* Handles recalculating group size after decompression. The location of the * the group's copied header will be passed in in carry_in and can be used both * to access the copied group header and calculate the new size of the group * based on the difference between the current destination pointer and the * group header pointer. */ void decompress_post(Node n, void *decom_ptr, void **carry_in) { struct decom *d = decom_ptr; // only need to handle group resize if (n.type == NT_GROUP) { Group *g = (Group *)(*carry_in); uint32_t new_size = (uint32_t)((char *)d->buf - (char *)g); g->size = new_size; } } void print_group_header(Group *header) { assert(header->type < GTS_SIZE); // Guess at enough with significant margin char buf[1024] = { 0 }; const struct str_buf sb_pre = { .buf = buf, .size = sizeof(buf) }; struct str_buf sb = sb_pre; // literals struct str_lit l1 = STR_LIT("--- HEADER: GROUP ---"), l2 = STR_LIT("\nType: "), l3 = STR_LIT("\nSize: "), l4 = STR_LIT("\nLabel: "), l5 = STR_LIT("\nGroup type: "), l6 = STR_LIT("\nTimestamp: "), l7 = STR_LIT("\nVersion Control Info: "), l8 = STR_LIT("\nUnknown: "), l9 = STR_LIT("\n"); struct str_lit gt = group_type_strings[header->type]; // construct output litcopy(&sb, l1); litcopy(&sb, l2); type_str(&sb, header->grup); litcopy(&sb, l3); num_str(&sb, header->size, 10); litcopy(&sb, l4); group_label_str(&sb, header); litcopy(&sb, l5); litcopy(&sb, gt); litcopy(&sb, l6); timestamp_str(&sb, header->timestamp); litcopy(&sb, l7); num_str(&sb, header->vcinfo, 16); litcopy(&sb, l8); num_str(&sb, header->unknown, 16); litcopy(&sb, l9); sb_write(STDOUT_FILENO, sb_pre, sb); } void litcopy(struct str_buf *sb, struct str_lit lit) { assert(sb->size >= lit.size); memcpy(sb->buf, lit.lit, lit.size); sb->size -= lit.size; sb->buf += lit.size; } void num_str(struct str_buf *sb, unsigned long num, int radix) { errno_t ret = _ultoa_s(num, sb->buf, sb->size, radix); assert(ret == 0); int len = (int)strlen(sb->buf); sb->size -= len; sb->buf += len; } void type_str(struct str_buf *sb, Type4 type) { assert(sb->size >= 4); for (size_t i = 0; i != 4; i++) sb->buf[i] = type.bytes[i]; sb->buf += 4; sb->size -= 4; } void group_label_str(struct str_buf *sb, Group *header) { switch (header->type) { case GT_TOP: type_str(sb, header->label.type); break; case GT_INTERIOR_CELL_BLOCK: case GT_INTERIOR_CELL_SUBBLOCK: num_str(sb, header->label.number, 10); break; case GT_EXTERIOR_CELL_BLOCK: case GT_EXTERIOR_CELL_SUBBLOCK: uint16_t x = header->label.coord[1], y = header->label.coord[0]; litcopy(sb, LIT("X: ")); num_str(sb, x, 10); litcopy(sb, LIT("Y: ")); num_str(sb, y, 10); break; case GT_WORLD_CHILDREN: case GT_CELL_CHILDREN: case GT_TOPIC_CHILDREN: case GT_CELL_PERSISTENT_CHILDREN: case GT_CELL_TEMPORARY_CHILDREN: litcopy(sb, LIT("FormID[")); num_str(sb, header->label.formid, 16); litcopy(sb, LIT("]")); break; default: assert(false); // invalid group type } } void timestamp_str(struct str_buf *sb, uint16_t timestamp) { Timestamp ts = convert_ts(timestamp); litcopy(sb, LIT("20x")); num_str(sb, ts.year, 10); litcopy(sb, LIT("-")); num_str(sb, ts.month, 10); litcopy(sb, LIT("-")); num_str(sb, ts.day, 10); } void sb_write(int fp, struct str_buf sb_pre, struct str_buf sb_post) { int size = sb_pre.size - sb_post.size; assert(size >= 0); int ret = _write(fp, sb_pre.buf, size); assert(ret == size); } void print_record_header(Record *header) { char buf[1024] = { 0 }; const struct str_buf sb_pre = { .buf = buf, .size = sizeof(buf) }; struct str_buf sb = sb_pre; const struct str_lit l1 = LIT("--- HEADER: RECORD ---"), l2 = LIT("\nType: "), l3 = LIT("\nFlags: "), l4 = LIT("\nFormID: "), l5 = LIT("\nTimestamp: "), l6 = LIT("\nVersion Control Info: "), l7 = LIT("\nVersion: "), l8 = LIT("\nUnknown: "), l9 = LIT("\n"); litcopy(&sb, l1); litcopy(&sb, l2); type_str(&sb, header->type); litcopy(&sb, l3); record_flags_str(&sb, header); litcopy(&sb, l4); num_str(&sb, header->formid, 16); litcopy(&sb, l5); timestamp_str(&sb, header->timestamp); litcopy(&sb, l6); num_str(&sb, header->vcinfo, 16); litcopy(&sb, l7); num_str(&sb, header->version, 10); litcopy(&sb, l8); num_str(&sb, header->unknown, 16); litcopy(&sb, l9); sb_write(STDOUT_FILENO, sb_pre, sb); } void record_flags_str(struct str_buf *sb, Record *header) { uint32_t flags = header->flags; const uint32_t type = header->type.uint; // print flags if (type == rt[REFR]) { // TODO // REFR requires FormID lookup flags = 0; } else { rfs_inner *const flag_lut = rfs[rt_hash(type)]; if (flag_lut) { while (flags != 0) { // get next flag, from lowest bit to highest // will always be >= 0 as flags is not 0 int lowest = _tzcnt_u32(flags); assert(lowest < 32); // get flag string const struct str_lit lit = (*flag_lut)[lowest]; // not a valid flag if (!lit.lit) break; // copy flag string litcopy(sb, LIT("\n - ")); litcopy(sb, lit); // remove flag from to be processed flags ^= ((uint32_t)1) << lowest; } } } // slow path if (flags != 0) { printf("\n\nOriginal flags: %08x\n", header->flags); printf("Unhandled flags: %08x\n", flags); assert(false); // unhandled flags } } /* Converts the bit-packed/encoded timestamp used in esp/esm files into day, * month and year. See UESP for further explanation. * * This currently handles the timestamp format used in Skyrim.esm, but newer * files apparently use a different format. This will need to be handled later. */ Timestamp convert_ts(uint16_t ts) { /* const uint8_t day = (uint8_t)(ts & day_mask); const uint8_t month = (uint8_t)((ts >> month_offset) & month_mask); const uint16_t year = (ts >> year_offset) & year_mask; */ const uint8_t day = ts & 0xff; const uint8_t hb = (ts >> 8) & 0xff; const uint8_t month = ((hb - 1) % 12) + 1; const uint8_t year = ((hb - 1) / 12 + 3) % 10; return (Timestamp) { year, month, day }; } MetaNode *espr_create_tree(struct sized_buf in, struct sized_buf tree) { const struct sized_buf tree_pre = tree; struct walker_callbacks cb = { .pre = create_tree_cb, .data = &tree }; espr_walk(in.data, in.size, cb); return (MetaNode *)tree_pre.data; } void create_tree_cb( const Node n, void *data, void **carry_out, void *from_parent, void **to_children ) { (void)carry_out; // add new metanode to tree struct sized_buf *tree = data; assert(tree->size >= sizeof(MetaNode)); MetaNode *m = (MetaNode *)tree->data; tree->data += sizeof(MetaNode); tree->size -= sizeof(MetaNode); MetaNode *p = from_parent; // construct new node m->n = n; m->parent = p; m->prev = p->last_child; p->last_child->next = m; p->last_child = m; // send self to children *to_children = m; }