364 lines
11 KiB
C
364 lines
11 KiB
C
/*
|
|
* This Source Code Form is subject to the terms of the Mozilla Public
|
|
* License, v. 2.0.If a copy of the MPL was not distributed with this
|
|
* file, You can obtain one at http ://mozilla.org/MPL/2.0/.
|
|
*/
|
|
#pragma once
|
|
|
|
/* For reading structured data out of Creation Engine esp/esm files.
|
|
* Based on information from:
|
|
* https://en.uesp.net/wiki/Skyrim_Mod:Mod_File_Format
|
|
* https://github.com/TES5Edit/TES5Edit
|
|
*
|
|
* esp/esm files generally have a tree-like structure where:
|
|
* - top level is a a TES4 record concatenated with each of the top-level groups
|
|
* - groups contain a concatenation of 0 or more groups or records
|
|
* - records contain a concatenation of 0 or more fields
|
|
* - fields contain a structure of statically or dynamically sized data
|
|
* - groups, records, and fields all have headers
|
|
*
|
|
* === NOTE ===
|
|
* Assumptions:
|
|
* - Little endian system
|
|
* - Compiled with MSVC
|
|
*/
|
|
|
|
#include <stdint.h>
|
|
#include "msh.h"
|
|
|
|
// Guards for C++ usage
|
|
#ifdef __cplusplus
|
|
extern "C" {
|
|
#endif
|
|
|
|
// Flag fields are 32 bits, so there are 32 flags
|
|
#define RFS_INNER_SIZE 32
|
|
|
|
// There are 127 record types + a NONE type used for sanity checks
|
|
#define RT_SIZE 128
|
|
|
|
/* RT hash seed was externally calculated s.t. the fourcc codes perfectly hash
|
|
* into indices between 0 and 511. That is, there are no hashing collisions.
|
|
* This allows for hard coded lookup tables for the fourcc codes in a relatively
|
|
* small space.
|
|
*
|
|
* A minimal perfect hash is also possible with an intermediate seed table,
|
|
* though I'm not sure which is faster, if it's worth trying to speed this up,
|
|
* etc.
|
|
*/
|
|
#define RT_HASH_SIZE 512
|
|
#define RT_HASH_BITS 9
|
|
#define RT_HASH_SEED 131261257
|
|
|
|
// Used for identifying records that are compressed
|
|
#define COMPRESSED_FLAG (((uint32_t)1) << 18)
|
|
|
|
// Top level group order, there are 118 top level groups
|
|
#define GO_SIZE 118
|
|
// There are 10 Group Types
|
|
#define GTS_SIZE 10
|
|
|
|
#define LIT(L) (const struct str_lit) STR_LIT(L)
|
|
#define STR_LIT(L) { .lit = L , .size = sizeof( L ) - 1 }
|
|
|
|
|
|
//
|
|
// === FORWARD DEFS ===
|
|
//
|
|
|
|
typedef union type4 Type4;
|
|
typedef struct timestamp Timestamp;
|
|
typedef struct group Group;
|
|
typedef struct record Record;
|
|
typedef struct field Field;
|
|
typedef struct meta_node MetaNode;
|
|
|
|
|
|
//
|
|
// === SIMPLE TYPES ===
|
|
//
|
|
|
|
// 3 byte ID, upper byte is determined at run time and is used to reference
|
|
// across esp/esm files
|
|
typedef uint32_t formid;
|
|
|
|
// char[4] with uint32_t access, used to access fourcc values
|
|
union type4 {
|
|
char bytes[4];
|
|
uint32_t uint;
|
|
};
|
|
|
|
struct str_lit {
|
|
const char *const lit;
|
|
const int size;
|
|
const char _pad[4];
|
|
};
|
|
|
|
// Inner type for Record Flag String LUT. Indexed by flag bit.
|
|
typedef const struct str_lit rfs_inner[RFS_INNER_SIZE];
|
|
|
|
struct esp_stats {
|
|
size_t decompressed_size;
|
|
uint32_t group_count;
|
|
uint32_t record_count;
|
|
};
|
|
|
|
struct str_buf {
|
|
char *buf;
|
|
int size;
|
|
char _pad[4];
|
|
};
|
|
|
|
struct sized_buf {
|
|
char *data;
|
|
size_t size;
|
|
};
|
|
|
|
//
|
|
// === ENUMS ===
|
|
//
|
|
|
|
// Tag for generic node tagged union
|
|
enum node_type {
|
|
NT_GROUP,
|
|
NT_RECORD,
|
|
};
|
|
|
|
// Record type enum
|
|
enum record_type {
|
|
NONE,
|
|
AACT, ACHR, ACTI, ADDN, ALCH, AMMO,
|
|
ANIO, APPA, ARMA, ARMO, ARTO, ASPC,
|
|
ASTP, AVIF, BOOK, BPTD, CAMS, CELL,
|
|
CLAS, CLDC, CLFM, CLMT, COBJ, COLL,
|
|
CONT, CPTH, CSTY, DEBR, DIAL, DLBR,
|
|
DLVW, DOBJ, DOOR, DUAL, ECZN, EFSH,
|
|
ENCH, EQUP, EXPL, EYES, FACT, FLOR,
|
|
FLST, FSTP, FSTS, FURN, GLOB, GMST,
|
|
GRAS, GRUP, HAIR, HAZD, HDPT, IDLE,
|
|
IDLM, IMAD, IMGS, INFO, INGR, IPCT,
|
|
IPDS, KEYM, KYWD, LAND, LCRT, LCTN,
|
|
LGTM, LIGH, LSCR, LTEX, LVLI, LVLN,
|
|
LVSP, MATO, MATT, MESG, MGEF, MISC,
|
|
MOVT, MSTT, MUSC, MUST, NAVI, NAVM,
|
|
NOTE, NPC_, OTFT, PACK, PERK, PGRE,
|
|
PHZD, PROJ, PWAT, QUST, RACE, REFR,
|
|
REGN, RELA, REVB, RFCT, RGDL, SCEN,
|
|
SCOL, SCPT, SCRL, SHOU, SLGM, SMBN,
|
|
SMEN, SMQN, SNCT, SNDR, SOPM, SOUN,
|
|
SPEL, SPGD, STAT, TACT, TES4, TREE,
|
|
TXST, VTYP, WATR, WEAP, WOOP, WRLD,
|
|
WTHR,
|
|
};
|
|
|
|
// Group type enum
|
|
enum group_type {
|
|
GT_TOP = 0,
|
|
GT_WORLD_CHILDREN = 1,
|
|
GT_INTERIOR_CELL_BLOCK = 2,
|
|
GT_INTERIOR_CELL_SUBBLOCK = 3,
|
|
GT_EXTERIOR_CELL_BLOCK = 4,
|
|
GT_EXTERIOR_CELL_SUBBLOCK = 5,
|
|
GT_CELL_CHILDREN = 6,
|
|
GT_TOPIC_CHILDREN = 7,
|
|
GT_CELL_PERSISTENT_CHILDREN = 8,
|
|
GT_CELL_TEMPORARY_CHILDREN = 9,
|
|
};
|
|
|
|
//
|
|
// === COMPOSITE TYPES ===
|
|
//
|
|
|
|
// Generic node in the esp/esm tree
|
|
typedef struct node Node;
|
|
struct node {
|
|
union {
|
|
Group *group;
|
|
Record *record;
|
|
} header;
|
|
char *data;
|
|
enum node_type type;
|
|
uint32_t _pad;
|
|
};
|
|
|
|
// Used for passing around parsed timestamps
|
|
struct timestamp {
|
|
uint16_t year;
|
|
uint8_t month;
|
|
uint8_t day;
|
|
};
|
|
|
|
/* Given to espr_walk.
|
|
*
|
|
* pre is called before the children of the current node have been walked
|
|
* post is called after the children of the current node have been walked
|
|
*
|
|
* carry_out and carry_in is a pointer to a void * on the stack that can be
|
|
* used for passing data between pre and post for a node.
|
|
*
|
|
* data is a pointer that the user can supply when calling espr_walk that
|
|
* will be passed to pre and post when they are called.
|
|
*/
|
|
struct walker_callbacks {
|
|
void (*pre)(Node n, void *data, void **carry_out, void *from_parent, void **to_children);
|
|
void (*post)(Node n, void *data, void *carry_in);
|
|
void *data;
|
|
};
|
|
|
|
/* Meta Nodes are used for constructing a more flexible tree structure
|
|
* on top of the natural structure of ESP/ESM files.
|
|
*
|
|
* Meta Nodes do not create a pure tree structure, rather they have pointers to
|
|
* their parent and first child, and children have pointers backwards and
|
|
* forward through a linked list of all of the children of the parent node.
|
|
*
|
|
* There is no root node as such, rather there is a root linked list for which
|
|
* all of the Meta Nodes have no parents.
|
|
*
|
|
* While the ESP/ESM buffer can be modified in-place, any modification that
|
|
* changes the size of the stored data cannot be directly written to the buffer
|
|
* without first shifting all of the data after the point of modification.
|
|
*
|
|
* Modifications that change data size are:
|
|
* - Adding or deleting a group or record
|
|
* - Adding or deleting a field in a record
|
|
* - Changing a variable length field with data of different length
|
|
*
|
|
* With a Meta Node you can instead allocate new, arbitrarily sized memory for
|
|
* the node data. The Meta Node tree can then be walked to reconstruct a
|
|
* contiguous view of discontiguous memory.
|
|
*/
|
|
struct meta_node {
|
|
Node n;
|
|
MetaNode *parent;
|
|
MetaNode *first_child;
|
|
MetaNode *last_child;
|
|
MetaNode *prev;
|
|
MetaNode *next;
|
|
};
|
|
|
|
//
|
|
// === BINARY DATA OVERLAYS ===
|
|
//
|
|
|
|
#pragma pack(push, 1)
|
|
|
|
// Group header overlay
|
|
struct group {
|
|
Type4 grup; // always RT_GRUP
|
|
uint32_t size; // uncludes the 24 byte group header
|
|
union {
|
|
Type4 type; // this may be mangled and should not be relied on
|
|
formid formid;
|
|
int32_t number;
|
|
int16_t coord[2];
|
|
} label; // access determined by the `type` below
|
|
int32_t type; // group_type enum
|
|
uint16_t timestamp;
|
|
uint16_t vcinfo;
|
|
uint32_t unknown;
|
|
};
|
|
|
|
// Record header overlay
|
|
struct record {
|
|
Type4 type;
|
|
uint32_t size;
|
|
uint32_t flags;
|
|
uint32_t formid;
|
|
uint16_t timestamp;
|
|
uint16_t vcinfo;
|
|
uint16_t version;
|
|
uint16_t unknown;
|
|
};
|
|
|
|
// Field header overlay
|
|
struct field {
|
|
Type4 type;
|
|
uint16_t size;
|
|
};
|
|
|
|
#pragma pack(pop)
|
|
|
|
//
|
|
// === LUTs ===
|
|
//
|
|
|
|
// record type enum to fourcc value
|
|
extern const uint32_t rt[RT_SIZE];
|
|
|
|
// for converting between record_type and record_type_hash enums
|
|
extern const uint16_t rt2rth[RT_SIZE];
|
|
extern const uint8_t rth2rt[RT_HASH_SIZE];
|
|
|
|
// type -> flag mappings
|
|
// NULL table pointers indicate no flags
|
|
// NULL string pointers indicate invalid flag
|
|
extern rfs_inner *const rfs[RT_SIZE];
|
|
extern rfs_inner *const rfs_refr[RT_SIZE];
|
|
|
|
// Expected (probably) order of top level groups in an esp/esm
|
|
extern const enum record_type group_order[GO_SIZE];
|
|
|
|
// Printable strings for group types
|
|
extern const struct str_lit group_type_strings[GTS_SIZE];
|
|
|
|
//
|
|
// === FUNCTIONS ===
|
|
//
|
|
|
|
// hashes type value into a record type enum value
|
|
inline uint32_t rt_hash(uint32_t type) {
|
|
return rth2rt[uint32_t_msh(type, RT_HASH_BITS, RT_HASH_SEED)];
|
|
}
|
|
|
|
/* `espr_walk` walks through the tree structure of the esp/esm binary data
|
|
* starting at `data` of `size` bytes.
|
|
*
|
|
* `cb` is a callback that takes a `Node` to process. `pt` is a pointer to
|
|
* arbitrary data that is passed on to `cb` whenever it is called.
|
|
*
|
|
* Data is walked sequentially. Nodes passed to `cb` will be strictly
|
|
* increasing in terms of memory location within the buffer.
|
|
*/
|
|
void espr_walk(char *data, size_t size, struct walker_callbacks cb);
|
|
|
|
/* `espr_print` prints the header of every group and record in the given
|
|
* esp/esm binary data.
|
|
*/
|
|
void espr_print(char *data, size_t size);
|
|
|
|
/* Calculates the number of groups and records in the esp/esm file and the
|
|
* size of the esp/esm if all of the compressed records were decompressed.
|
|
*/
|
|
struct esp_stats espr_stats(char *data, size_t size);
|
|
|
|
// Calculates the number of formid's in an esm/esp from the stats
|
|
inline uint32_t espr_formid_count(struct esp_stats stats) {
|
|
return stats.record_count;
|
|
}
|
|
|
|
// Calculates the number of nodes in the esp/esm from the stats
|
|
inline uint32_t espr_node_count(struct esp_stats stats) {
|
|
return stats.record_count + stats.group_count;
|
|
}
|
|
|
|
// Calculates the size of a MetaNode tree constructed over the esp/esm for
|
|
// which the stats were generated.
|
|
inline size_t espr_tree_size(struct esp_stats stats) {
|
|
return sizeof(MetaNode) * espr_node_count(stats);
|
|
}
|
|
|
|
/* Copies the data from `data` to `buf` decompressing compressed fields as
|
|
* it does so. buf_size should be the value returned from
|
|
* `espr_decompressed_size`, and `buf` should be at least of that size.
|
|
*/
|
|
void espr_decompress(char *data, size_t size, char *buf, size_t buf_size);
|
|
|
|
MetaNode *espr_create_tree(struct sized_buf in, struct sized_buf tree);
|
|
|
|
// End C++ guard
|
|
#ifdef __cplusplus
|
|
}
|
|
#endif
|
|
|