/* * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0.If a copy of the MPL was not distributed with this * file, You can obtain one at http ://mozilla.org/MPL/2.0/. */ #pragma once /* For reading structured data out of Creation Engine esp/esm files. * Based on information from: * https://en.uesp.net/wiki/Skyrim_Mod:Mod_File_Format * https://github.com/TES5Edit/TES5Edit * * esp/esm files generally have a tree-like structure where: * - top level is a a TES4 record concatenated with each of the top-level groups * - groups contain a concatenation of 0 or more groups or records * - records contain a concatenation of 0 or more fields * - fields contain a structure of statically or dynamically sized data * - groups, records, and fields all have headers * * === NOTE === * Assumptions: * - Little endian system * - Compiled with MSVC */ #include #include "msh.h" // Guards for C++ usage #ifdef __cplusplus extern "C" { #endif #define RFS_INNER_SIZE 32 #define RT_SIZE 128 /* RT hash seed was externally calculated s.t. the fourcc codes perfectly hash into * indices between 0 and 511. That is, there are no hashing collisions. This allows * for hard coded lookup tables for the fourcc codes in a relatively small space. * * A minimal perfect hash is also possible with an intermediate seed table, though * I'm not sure which is faster, if it's worth trying to speed this up, etc. */ #define RT_HASH_SIZE 512 #define RT_HASH_BITS 9 #define RT_HASH_SEED 131261257 #define COMPRESSED_FLAG (((uint32_t)1) << 18) #define GO_SIZE 118 #define GTS_SIZE 10 // // === FORWARD DEFS === // typedef union type4 Type4; typedef struct timestamp Timestamp; typedef struct group Group; typedef struct record Record; typedef struct field Field; // // === SIMPLE TYPES === // // Basic types typedef uint32_t formid; // char[4] with uint32_t access union type4 { char bytes[4]; uint32_t uint; }; // indexed by flag bit typedef const char *const rfs_inner[RFS_INNER_SIZE]; // // === ENUMS === // // Tag for generic node tagged union enum node_type { // NT_ prefix NT_GROUP, NT_RECORD, }; // Record type enum enum record_type { _NONE, AACT, ACHR, ACTI, ADDN, ALCH, AMMO, ANIO, APPA, ARMA, ARMO, ARTO, ASPC, ASTP, AVIF, BOOK, BPTD, CAMS, CELL, CLAS, CLDC, CLFM, CLMT, COBJ, COLL, CONT, CPTH, CSTY, DEBR, DIAL, DLBR, DLVW, DOBJ, DOOR, DUAL, ECZN, EFSH, ENCH, EQUP, EXPL, EYES, FACT, FLOR, FLST, FSTP, FSTS, FURN, GLOB, GMST, GRAS, GRUP, HAIR, HAZD, HDPT, IDLE, IDLM, IMAD, IMGS, INFO, INGR, IPCT, IPDS, KEYM, KYWD, LAND, LCRT, LCTN, LGTM, LIGH, LSCR, LTEX, LVLI, LVLN, LVSP, MATO, MATT, MESG, MGEF, MISC, MOVT, MSTT, MUSC, MUST, NAVI, NAVM, NOTE, NPC_, OTFT, PACK, PERK, PGRE, PHZD, PROJ, PWAT, QUST, RACE, REFR, REGN, RELA, REVB, RFCT, RGDL, SCEN, SCOL, SCPT, SCRL, SHOU, SLGM, SMBN, SMEN, SMQN, SNCT, SNDR, SOPM, SOUN, SPEL, SPGD, STAT, TACT, TES4, TREE, TXST, VTYP, WATR, WEAP, WOOP, WRLD, WTHR, }; // Enums of perfect hash values for enum record_type_hash { RT_AACT = 496, RT_ACHR = 249, RT_ACTI = 293, RT_ADDN = 316, RT_ALCH = 312, RT_AMMO = 157, RT_ANIO = 297, RT_APPA = 230, RT_ARMA = 222, RT_ARMO = 218, RT_ARTO = 328, RT_ASPC = 252, RT_ASTP = 384, RT_AVIF = 309, RT_BOOK = 318, RT_BPTD = 454, RT_CAMS = 319, RT_CELL = 18, RT_CLAS = 16, RT_CLDC = 68, RT_CLFM = 389, RT_CLMT = 497, RT_COBJ = 422, RT_COLL = 140, RT_CONT = 169, RT_CPTH = 30, RT_CSTY = 193, RT_DEBR = 460, RT_DIAL = 304, RT_DLBR = 340, RT_DLVW = 434, RT_DOBJ = 437, RT_DOOR = 347, RT_DUAL = 246, RT_ECZN = 229, RT_EFSH = 509, RT_ENCH = 194, RT_EQUP = 57, RT_EXPL = 153, RT_EYES = 181, RT_FACT = 62, RT_FLOR = 137, RT_FLST = 199, RT_FSTP = 462, RT_FSTS = 388, RT_FURN = 105, RT_GLOB = 376, RT_GMST = 125, RT_GRAS = 49, RT_GRUP = 511, RT_HAIR = 481, RT_HAZD = 21, RT_HDPT = 100, RT_IDLE = 204, RT_IDLM = 348, RT_IMAD = 390, RT_IMGS = 187, RT_INFO = 82, RT_INGR = 463, RT_IPCT = 292, RT_IPDS = 89, RT_KEYM = 54, RT_KYWD = 123, RT_LAND = 261, RT_LCRT = 138, RT_LCTN = 172, RT_LGTM = 177, RT_LIGH = 13, RT_LSCR = 288, RT_LTEX = 447, RT_LVLI = 235, RT_LVLN = 453, RT_LVSP = 343, RT_MATO = 2, RT_MATT = 220, RT_MESG = 66, RT_MGEF = 32, RT_MISC = 145, RT_MOVT = 378, RT_MSTT = 132, RT_MUSC = 87, RT_MUST = 9, RT_NAVI = 50, RT_NAVM = 122, RT_NOTE = 366, RT_NPC_ = 440, RT_OTFT = 365, RT_PACK = 441, RT_PERK = 243, RT_PGRE = 210, RT_PHZD = 26, RT_PROJ = 120, RT_PWAT = 397, RT_QUST = 71, RT_RACE = 108, RT_REFR = 449, RT_REGN = 320, RT_RELA = 182, RT_REVB = 266, RT_RFCT = 311, RT_RGDL = 19, RT_SCEN = 119, RT_SCOL = 276, RT_SCPT = 363, RT_SCRL = 104, RT_SHOU = 115, RT_SLGM = 70, RT_SMBN = 413, RT_SMEN = 240, RT_SMQN = 63, RT_SNCT = 117, RT_SNDR = 280, RT_SOPM = 306, RT_SOUN = 165, RT_SPEL = 190, RT_SPGD = 443, RT_STAT = 202, RT_TACT = 282, RT_TES4 = 474, RT_TREE = 27, RT_TXST = 359, RT_VTYP = 335, RT_WATR = 84, RT_WEAP = 10, RT_WOOP = 352, RT_WRLD = 38, RT_WTHR = 83, }; // GRUP type values enum group_type { // GT_ prefix GT_TOP = 0, GT_WORLD_CHILDREN = 1, GT_INTERIOR_CELL_BLOCK = 2, GT_INTERIOR_CELL_SUBBLOCK = 3, GT_EXTERIOR_CELL_BLOCK = 4, GT_EXTERIOR_CELL_SUBBLOCK = 5, GT_CELL_CHILDREN = 6, GT_TOPIC_CHILDREN = 7, GT_CELL_PERSISTENT_CHILDREN = 8, GT_CELL_TEMPORARY_CHILDREN = 9, }; // // === COMPOSITE TYPES === // // Generic node typedef struct node Node; struct node { union { Group *group; Record *record; } header; char *const data; enum node_type type; uint32_t _pad; }; // calculated timestamp struct timestamp { uint16_t year; uint8_t month; uint8_t day; }; struct walker_callbacks { void (*pre)(Node n, void *data, void **carry_out); void (*post)(Node n, void *data, void **carry_in); void *data; }; // // === BINARY DATA OVERLAYS === // #pragma pack(push, 1) // Group header overlay struct group { Type4 grup; // always RT_GRUP uint32_t size; // uncludes the 24 byte group header union { Type4 type; // this may be mangled and should not be relied on formid formid; int32_t number; int16_t coord[2]; } label; // access determined by the `type` below int32_t type; // group_type enum uint16_t timestamp; uint16_t vcinfo; uint32_t unknown; }; // Record header overlay struct record { Type4 type; uint32_t size; uint32_t flags; uint32_t formid; uint16_t timestamp; uint16_t vcinfo; uint16_t version; uint16_t unknown; }; // Field header overlay struct field { Type4 type; uint16_t size; }; #pragma pack(pop) // // === LUTs === // // record type enum to fourcc value extern const uint32_t rt[RT_SIZE]; // for converting between record_type and record_type_hash enums extern const uint16_t rt2rth[RT_SIZE]; extern const uint8_t rth2rt[RT_HASH_SIZE]; // type -> flag mappings // NULL table pointers indicate no flags // NULL string pointers indicate invalid flag extern rfs_inner *const rfs[RT_HASH_SIZE]; extern rfs_inner *const rfs_refr[RT_HASH_SIZE]; // Expected (probably) order of top level groups in an esp/esm extern const enum record_type group_order[GO_SIZE]; // Printable strings for group types extern const char *const group_type_strings[GTS_SIZE]; // // === FUNCTIONS === // // hashes type value into RT_ hash value inline uint32_t rt_hash(uint32_t type) { return uint32_t_msh(type, RT_HASH_BITS, RT_HASH_SEED); } /* `espr_walk` walks through the tree structure of the esp/esm binary data * starting at `data` of `size` bytes. * * `cb` is a callback that takes a `Node` to process. `pt` is a pointer to * arbitrary data that is passed on to `cb` whenever it is called. * * Data is walked sequentially. Nodes passed to `cb` will be strictly increasing * in terms of memory location within the buffer. */ void espr_walk(char *data, size_t size, struct walker_callbacks cb); /* `espr_print` prints the header of every group and record in the given * esp/esm binary data. */ void espr_print(char *data, size_t size); /* Calculates the size of the esp data if all of the compressed records are * decompressed. */ size_t espr_decompressed_size(char *data, size_t size); /* Counts the number of formids present in the esp/esm data. This should be * equal to the number of records. */ size_t espr_formid_count(char *data, size_t size); /* Copies the data from `data` to `buf` decompressing compressed fields as * it does so. buf_size should be the value returned from `espr_decompressed_size`, * and `buf` should be at least of that size. */ void espr_decompress(char *data, size_t size, char *buf, size_t buf_size); // End C++ guard #ifdef __cplusplus } #endif