/* * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0.If a copy of the MPL was not distributed with this * file, You can obtain one at http ://mozilla.org/MPL/2.0/. */ #pragma once /* For reading structured data out of Creation Engine esp/esm files. * Based on information from: * https://en.uesp.net/wiki/Skyrim_Mod:Mod_File_Format * https://github.com/TES5Edit/TES5Edit * * esp/esm files generally have a tree-like structure where: * - top level is a a TES4 record concatenated with each of the top-level groups * - groups contain a concatenation of 0 or more groups or records * - records contain a concatenation of 0 or more fields * - fields contain a structure of statically or dynamically sized data * - groups, records, and fields all have headers * * === NOTE === * Assumptions: * - Little endian system * - Compiled with MSVC */ #include #include "msh.h" // Guards for C++ usage #ifdef __cplusplus extern "C" { #endif // Flag fields are 32 bits, so there are 32 flags #define RFS_INNER_SIZE 32 // There are 127 record types + a NONE type used for sanity checks #define RT_SIZE 128 /* RT hash seed was externally calculated s.t. the fourcc codes perfectly hash * into indices between 0 and 511. That is, there are no hashing collisions. * This allows for hard coded lookup tables for the fourcc codes in a relatively * small space. * * A minimal perfect hash is also possible with an intermediate seed table, * though I'm not sure which is faster, if it's worth trying to speed this up, * etc. */ #define RT_HASH_SIZE 512 #define RT_HASH_BITS 9 #define RT_HASH_SEED 131261257 // Used for identifying records that are compressed #define COMPRESSED_FLAG (((uint32_t)1) << 18) // Top level group order, there are 118 top level groups #define GO_SIZE 118 // There are 10 Group Types #define GTS_SIZE 10 #define LIT(L) (const struct str_lit) STR_LIT(L) #define STR_LIT(L) { .lit = L , .size = sizeof( L ) - 1 } // // === FORWARD DEFS === // typedef union type4 Type4; typedef struct timestamp Timestamp; typedef struct group Group; typedef struct record Record; typedef struct field Field; typedef struct meta_node MetaNode; // // === SIMPLE TYPES === // // 3 byte ID, upper byte is determined at run time and is used to // reference across esp/esm files typedef uint32_t formid; // char[4] with uint32_t access, used to access fourcc values union type4 { char bytes[4]; uint32_t uint; }; struct str_lit { const char *const lit; const int size; const char _pad[4]; }; // Inner type for Record Flag String LUT. Indexed by flag bit. typedef const struct str_lit rfs_inner[RFS_INNER_SIZE]; struct esp_stats { size_t decompressed_size; uint32_t group_count; uint32_t record_count; }; struct str_buf { char *buf; int size; char _pad[4]; }; struct sized_buf { char *data; size_t size; }; // // === ENUMS === // // Tag for generic node tagged union enum node_type { NT_GROUP, NT_RECORD, }; // Record type enum enum record_type { NONE, AACT, ACHR, ACTI, ADDN, ALCH, AMMO, ANIO, APPA, ARMA, ARMO, ARTO, ASPC, ASTP, AVIF, BOOK, BPTD, CAMS, CELL, CLAS, CLDC, CLFM, CLMT, COBJ, COLL, CONT, CPTH, CSTY, DEBR, DIAL, DLBR, DLVW, DOBJ, DOOR, DUAL, ECZN, EFSH, ENCH, EQUP, EXPL, EYES, FACT, FLOR, FLST, FSTP, FSTS, FURN, GLOB, GMST, GRAS, GRUP, HAIR, HAZD, HDPT, IDLE, IDLM, IMAD, IMGS, INFO, INGR, IPCT, IPDS, KEYM, KYWD, LAND, LCRT, LCTN, LGTM, LIGH, LSCR, LTEX, LVLI, LVLN, LVSP, MATO, MATT, MESG, MGEF, MISC, MOVT, MSTT, MUSC, MUST, NAVI, NAVM, NOTE, NPC_, OTFT, PACK, PERK, PGRE, PHZD, PROJ, PWAT, QUST, RACE, REFR, REGN, RELA, REVB, RFCT, RGDL, SCEN, SCOL, SCPT, SCRL, SHOU, SLGM, SMBN, SMEN, SMQN, SNCT, SNDR, SOPM, SOUN, SPEL, SPGD, STAT, TACT, TES4, TREE, TXST, VTYP, WATR, WEAP, WOOP, WRLD, WTHR, }; // Group type enum enum group_type { GT_TOP = 0, GT_WORLD_CHILDREN = 1, GT_INTERIOR_CELL_BLOCK = 2, GT_INTERIOR_CELL_SUBBLOCK = 3, GT_EXTERIOR_CELL_BLOCK = 4, GT_EXTERIOR_CELL_SUBBLOCK = 5, GT_CELL_CHILDREN = 6, GT_TOPIC_CHILDREN = 7, GT_CELL_PERSISTENT_CHILDREN = 8, GT_CELL_TEMPORARY_CHILDREN = 9, }; // // === COMPOSITE TYPES === // // Generic node in the esp/esm tree typedef struct node Node; struct node { union { Group *group; Record *record; } header; char *data; enum node_type type; uint32_t _pad; }; // Used for passing around parsed timestamps struct timestamp { uint16_t year; uint8_t month; uint8_t day; }; /* Given to espr_walk. * * pre is called before the children of the current node have been * walked post is called after the children of the current node have * been walked * * carry_out and carry_in is a pointer to a void * on the stack that can * be used for passing data between pre and post for a node. * * data is a pointer that the user can supply when calling espr_walk * that will be passed to pre and post when they are called. */ struct walker_callbacks { void (*pre)( Node n, void *data, void **carry_out, void *from_parent, void **to_children ); void (*post)(Node n, void *data, void *carry_in); void *data; }; /* Meta Nodes are used for constructing a more flexible tree structure * on top of the natural structure of ESP/ESM files. * * Meta Nodes do not create a pure tree structure, rather they have * pointers to their parent and first child, and children have pointers * backwards and forward through a linked list of all of the children of * the parent node. * * There is no root node as such, rather there is a root linked list for * which all of the Meta Nodes have no parents. * * While the ESP/ESM buffer can be modified in-place, any modification * that changes the size of the stored data cannot be directly written * to the buffer without first shifting all of the data after the point * of modification. * * Modifications that change data size are: * - Adding or deleting a group or record * - Adding or deleting a field in a record * - Changing a variable length field with data of different length * * With a Meta Node you can instead allocate new, arbitrarily sized * memory for the node data. The Meta Node tree can then be walked to * reconstruct a contiguous view of discontiguous memory. */ struct meta_node { Node n; MetaNode *parent; MetaNode *first_child; MetaNode *last_child; MetaNode *prev; MetaNode *next; }; // // === BINARY DATA OVERLAYS === // #pragma pack(push, 1) // Group header overlay struct group { Type4 grup; // always RT_GRUP uint32_t size; // uncludes the 24 byte group header union { Type4 type; // this may be mangled, do not rely on formid formid; int32_t number; int16_t coord[2]; } label; // access determined by the `type` below int32_t type; // group_type enum uint16_t timestamp; uint16_t vcinfo; uint32_t unknown; }; // Record header overlay struct record { Type4 type; uint32_t size; uint32_t flags; uint32_t formid; uint16_t timestamp; uint16_t vcinfo; uint16_t version; uint16_t unknown; }; // Field header overlay struct field { Type4 type; uint16_t size; }; #pragma pack(pop) // // === LUTs === // // record type enum to fourcc value extern const uint32_t rt[RT_SIZE]; // for converting between record_type and record_type_hash enums extern const uint16_t rt2rth[RT_SIZE]; extern const uint8_t rth2rt[RT_HASH_SIZE]; // type -> flag mappings // NULL table pointers indicate no flags // NULL string pointers indicate invalid flag extern rfs_inner *const rfs[RT_SIZE]; extern rfs_inner *const rfs_refr[RT_SIZE]; // Expected (probably) order of top level groups in an esp/esm extern const enum record_type group_order[GO_SIZE]; // Printable strings for group types extern const struct str_lit group_type_strings[GTS_SIZE]; // // === FUNCTIONS === // // hashes type value into a record type enum value inline uint32_t rt_hash(uint32_t type) { return rth2rt[uint32_t_msh(type, RT_HASH_BITS, RT_HASH_SEED)]; } /* `espr_walk` walks through the tree structure of the esp/esm binary * data starting at `data` of `size` bytes. * * `cb` is a callback that takes a `Node` to process. `pt` is a pointer * to arbitrary data that is passed on to `cb` whenever it is called. * * Data is walked sequentially. Nodes passed to `cb` will be strictly * increasing in terms of memory location within the buffer. */ void espr_walk(char *data, size_t size, struct walker_callbacks cb); /* `espr_print` prints the header of every group and record in the given * esp/esm binary data. */ void espr_print(char *data, size_t size); /* Calculates the number of groups and records in the esp/esm file and * the size of the esp/esm if all of the compressed records were * decompressed. */ struct esp_stats espr_stats(char *data, size_t size); // Calculates the number of formid's in an esm/esp from the stats inline uint32_t espr_formid_count(struct esp_stats stats) { return stats.record_count; } // Calculates the number of nodes in the esp/esm from the stats inline uint32_t espr_node_count(struct esp_stats stats) { return stats.record_count + stats.group_count; } // Calculates the size of a MetaNode tree constructed over the esp/esm // for which the stats were generated. inline size_t espr_tree_size(struct esp_stats stats) { return sizeof(MetaNode) * espr_node_count(stats); } /* Copies the data from `data` to `buf` decompressing compressed fields * as it does so. buf_size should be the value returned from * `espr_decompressed_size`, and `buf` should be at least of that size. */ void espr_decompress(char *data, size_t size, char *buf, size_t buf_size); MetaNode *espr_create_tree(struct sized_buf in, struct sized_buf tree); // End C++ guard #ifdef __cplusplus } #endif