legendary/legendary/models/manifest.py
derrod e00e534f2c First public test version of legendary
Unfortunately the history from before this commit is lost to time. And
that's probably for the best.
2020-04-14 15:40:41 +02:00

510 lines
16 KiB
Python

#!/usr/bin/env python
# coding: utf-8
import hashlib
import logging
import struct
import zlib
from io import BytesIO
logger = logging.getLogger('Manifest')
def read_fstring(bio):
length = struct.unpack('<i', bio.read(4))[0]
# if the length is negative the string is UTF-16 encoded, this was a pain to figure out.
if length < 0:
# utf-16 chars are 2 bytes wide but the length is # of characters, not bytes
# todo actually make sure utf-16 characters can't be longer than 2 bytes
length *= -2
s = bio.read(length - 2).decode('utf-16')
bio.seek(2, 1) # utf-16 strings have two byte null terminators
elif length > 0:
s = bio.read(length - 1).decode('ascii')
bio.seek(1, 1) # skip string null terminator
else: # empty string, no terminators or anything
s = ''
return s
def get_chunk_dir(version):
# The lowest version I've ever seen was 12 (Unreal Tournament), but for completeness sake leave all of them in
if version >= 15:
return 'ChunksV4'
elif version >= 6:
return 'ChunksV3'
elif version >= 3:
return 'ChunksV2'
else:
return 'Chunks'
class Manifest:
header_magic = 0x44BEC00C
def __init__(self):
self.header_size = 0
self.size_compressed = 0
self.size_uncompressed = 0
self.sha_hash = ''
self.stored_as = 0
self.version = 0
self.data = b''
# remainder
self.meta = None
self.chunk_data_list = None
self.file_manifest_list = None
self.custom_fields = None
@property
def compressed(self):
return self.stored_as & 0x1
@classmethod
def read_all(cls, data):
_m = cls.read(data)
_tmp = BytesIO(_m.data)
_m.meta = ManifestMeta.read(_tmp)
_m.chunk_data_list = CDL.read(_tmp, _m.version)
_m.file_manifest_list = FML.read(_tmp)
_m.custom_fields = CustomFields.read(_tmp)
unhandled_data = _tmp.read()
if unhandled_data:
logger.warning(f'Did not read {len(unhandled_data)} remaining bytes in manifest! '
f'This may not be a problem.')
return _m
@classmethod
def read(cls, data):
bio = BytesIO(data)
if struct.unpack('<I', bio.read(4))[0] != cls.header_magic:
raise ValueError('No header magic!')
_manifest = cls()
_manifest.header_size = struct.unpack('<I', bio.read(4))[0]
_manifest.size_compressed = struct.unpack('<I', bio.read(4))[0]
_manifest.size_uncompressed = struct.unpack('<I', bio.read(4))[0]
_manifest.sha_hash = bio.read(20)
_manifest.stored_as = struct.unpack('B', bio.read(1))[0]
_manifest.version = struct.unpack('<I', bio.read(4))[0]
if bio.tell() != _manifest.header_size:
logger.fatal(f'Did not read entire header {bio.tell()} != {_manifest.header_size}! '
f'Header version: {_manifest.version}, please report this on '
f'GitHub along with a sample of the problematic manifest!')
raise ValueError('Did not read complete manifest header!')
data = bio.read()
if _manifest.compressed:
_manifest.data = zlib.decompress(data)
dec_hash = hashlib.sha1(_manifest.data).hexdigest()
if dec_hash != _manifest.sha_hash.hex():
raise ValueError('Hash does not match!')
else:
_manifest.data = data
return _manifest
class ManifestMeta:
def __init__(self):
self.meta_size = 0
self.data_version = 0
self.feature_level = 0
self.is_file_data = False
self.app_id = 0
self.app_name = ''
self.build_version = ''
self.launch_exe = ''
self.launch_command = ''
self.prereq_ids = []
self.prereq_name = ''
self.prereq_path = ''
self.prereq_args = ''
@classmethod
def read(cls, bio):
_meta = cls()
_meta.meta_size = struct.unpack('<I', bio.read(4))[0]
_meta.data_version = struct.unpack('B', bio.read(1))[0] # always 0?
_meta.feature_level = struct.unpack('<I', bio.read(4))[0] # same as manifest version
# As far as I can tell this was used for very old manifests that didn't use chunks at all
_meta.is_file_data = struct.unpack('B', bio.read(1))[0] == 1
_meta.app_id = struct.unpack('<I', bio.read(4))[0] # always 0?
_meta.app_name = read_fstring(bio)
_meta.build_version = read_fstring(bio)
_meta.launch_exe = read_fstring(bio)
_meta.launch_command = read_fstring(bio)
# This is a list though I've never seen more than one entry
entries = struct.unpack('<I', bio.read(4))[0]
for i in range(entries):
_meta.prereq_ids.append(read_fstring(bio))
_meta.prereq_name = read_fstring(bio)
_meta.prereq_path = read_fstring(bio)
_meta.prereq_args = read_fstring(bio)
if bio.tell() != _meta.meta_size:
raise ValueError('Did not read entire meta!')
# seek to end if not already
# bio.seek(0 + _meta.meta_size)
return _meta
class CDL:
def __init__(self):
self.version = 0
self.size = 0
self.count = 0
self.elements = []
self._manifest_version = 17
self._guid_map = None
self._guid_int_map = None
def get_chunk_by_guid(self, guid):
"""
Get chunk by GUID string or number, creates index of chunks on first call
Integer GUIDs are usually faster and require less memory, use those when possible.
:param guid:
:return:
"""
if isinstance(guid, int):
return self.get_chunk_by_guid_num(guid)
else:
return self.get_chunk_by_guid_str(guid)
def get_chunk_by_guid_str(self, guid):
if not self._guid_map:
self._guid_map = dict()
for index, chunk in enumerate(self.elements):
self._guid_map[chunk.guid_str] = index
index = self._guid_map.get(guid.lower(), None)
if index is None:
raise ValueError(f'Invalid GUID! {guid}')
return self.elements[index]
def get_chunk_by_guid_num(self, guid_int):
if not self._guid_int_map:
self._guid_int_map = dict()
for index, chunk in enumerate(self.elements):
self._guid_int_map[chunk.guid_num] = index
index = self._guid_int_map.get(guid_int, None)
if index is None:
raise ValueError(f'Invalid GUID! {hex(guid_int)}')
return self.elements[index]
@classmethod
def read(cls, bio, manifest_version=17):
cdl_start = bio.tell()
_cdl = cls()
_cdl._manifest_version = manifest_version
_cdl.size = struct.unpack('<I', bio.read(4))[0]
_cdl.version = struct.unpack('B', bio.read(1))[0]
_cdl.count = struct.unpack('<I', bio.read(4))[0]
# the way this data is stored is rather odd, maybe there's a nicer way to write this...
for i in range(_cdl.count):
_cdl.elements.append(ChunkInfo(manifest_version=manifest_version))
# guid, doesn't seem to be a standard like UUID but is fairly straightfoward, 4 bytes, 128 bit.
for chunk in _cdl.elements:
chunk.guid = struct.unpack('<IIII', bio.read(16))
# hash is a 64 bit integer, no idea how it's calculated but we don't need to know that.
for chunk in _cdl.elements:
chunk.hash = struct.unpack('<Q', bio.read(8))[0]
# sha1 hash
for chunk in _cdl.elements:
chunk.sha_hash = bio.read(20)
# group number, seems to be part of the download path
for chunk in _cdl.elements:
chunk.group_num = struct.unpack('B', bio.read(1))[0]
# window size is the uncompressed size
for chunk in _cdl.elements:
chunk.window_size = struct.unpack('<I', bio.read(4))[0]
# file size is the compressed size that will need to be downloaded
for chunk in _cdl.elements:
chunk.file_size = struct.unpack('<q', bio.read(8))[0]
if bio.tell() - cdl_start != _cdl.size:
raise ValueError('Did not read entire chunk data list!')
return _cdl
class ChunkInfo:
def __init__(self, manifest_version=17):
self.guid = None
self.hash = 0
self.sha_hash = b''
self.group_num = 0
self.window_size = 0
self.file_size = 0
self._manifest_version = manifest_version
# caches for things that are "expensive" to compute
self._guid_str = None
self._guid_num = None
def __repr__(self):
return '<ChunkInfo (guid={}, hash={}, sha_hash={}, group_num={}, window_size={}, file_size={})>'.format(
self.guid_str, self.hash, self.sha_hash.hex(), self.group_num, self.window_size, self.file_size
)
@property
def guid_str(self):
if not self._guid_str:
self._guid_str = '-'.join('{:08x}'.format(g) for g in self.guid)
return self._guid_str
@property
def guid_num(self):
if not self._guid_num:
self._guid_num = self.guid[3] + (self.guid[2] << 32) + (self.guid[1] << 64) + (self.guid[0] << 96)
return self._guid_num
@property
def path(self):
return '{}/{:02d}/{:016X}_{}.chunk'.format(
get_chunk_dir(self._manifest_version),
# the result of this seems to always match the group number, but this is the "correct way"
(zlib.crc32(struct.pack('<I', self.guid[0]) +
struct.pack('<I', self.guid[1]) +
struct.pack('<I', self.guid[2]) +
struct.pack('<I', self.guid[3])) & 0xffffffff) % 100,
self.hash, ''.join('{:08X}'.format(g) for g in self.guid)
)
class FML:
def __init__(self):
self.version = 0
self.size = 0
self.count = 0
self.elements = []
self._path_map = dict()
def get_file_by_path(self, path):
if not self._path_map:
self._path_map = dict()
for index, fm in enumerate(self.elements):
self._path_map[fm.filename] = index
index = self._path_map.get(path, None)
if index is None:
raise ValueError(f'Invalid path! {path}')
return self.elements[index]
@classmethod
def read(cls, bio):
fml_start = bio.tell()
_fml = cls()
_fml.size = struct.unpack('<I', bio.read(4))[0]
_fml.version = struct.unpack('B', bio.read(1))[0]
_fml.count = struct.unpack('<I', bio.read(4))[0]
for i in range(_fml.count):
_fml.elements.append(FileManifest())
for fm in _fml.elements:
fm.filename = read_fstring(bio)
# never seen this used in any of the manifests I checked but can't wait for something to break because of it
for fm in _fml.elements:
fm.symlink_target = read_fstring(bio)
# For files this is actually the SHA1 instead of whatever it is for chunks...
for fm in _fml.elements:
fm.hash = bio.read(20)
# Flags, the only one I've seen is for executables
for fm in _fml.elements:
fm.flags = struct.unpack('B', bio.read(1))[0]
# install tags, no idea what they do, I've only seen them in the Fortnite manifest
for fm in _fml.elements:
_elem = struct.unpack('<I', bio.read(4))[0]
for i in range(_elem):
fm.install_tags.append(read_fstring(bio))
# Each file is made up of "Chunk Parts" that can be spread across the "chunk stream"
for fm in _fml.elements:
_elem = struct.unpack('<I', bio.read(4))[0]
for i in range(_elem):
chunkp = ChunkPart()
_size = struct.unpack('<I', bio.read(4))[0]
chunkp.guid = struct.unpack('<IIII', bio.read(16))
chunkp.offset = struct.unpack('<I', bio.read(4))[0]
chunkp.size = struct.unpack('<I', bio.read(4))[0]
fm.chunk_parts.append(chunkp)
# we have to calculate the actual file size ourselves
for fm in _fml.elements:
fm.file_size = sum(c.size for c in fm.chunk_parts)
if bio.tell() - fml_start != _fml.size:
raise ValueError('Did not read entire chunk data list!')
return _fml
class FileManifest:
def __init__(self):
self.filename = ''
self.symlink_target = ''
self.hash = b''
self.flags = 0
self.install_tags = []
self.chunk_parts = []
self.file_size = 0
@property
def executable(self):
return self.flags & 0x4
@property
def sha_hash(self):
return self.hash
def __repr__(self):
if len(self.chunk_parts) <= 20:
cp_repr = ', '.join(repr(c) for c in self.chunk_parts)
else:
_cp = [repr(cp) for cp in self.chunk_parts[:20]]
_cp.append('[...]')
cp_repr = ', '.join(_cp)
return '<FileManifest (filename="{}", symlink_target="{}", hash={}, flags={}, ' \
'install_tags=[{}], chunk_parts=[{}], file_size={})>'.format(
self.filename, self.symlink_target, self.hash.hex(), self.flags,
', '.join(self.install_tags), cp_repr, self.file_size
)
class ChunkPart:
def __init__(self):
self.guid = None
self.offset = 0
self.size = 0
# caches for things that are "expensive" to compute
self._guid_str = None
self._guid_num = None
@property
def guid_str(self):
if not self._guid_str:
self._guid_str = '-'.join('{:08x}'.format(g) for g in self.guid)
return self._guid_str
@property
def guid_num(self):
if not self._guid_num:
self._guid_num = self.guid[3] + (self.guid[2] << 32) + (self.guid[1] << 64) + (self.guid[0] << 96)
return self._guid_num
def __repr__(self):
guid_readable = '-'.join('{:08x}'.format(g) for g in self.guid)
return '<ChunkPart (guid={}, offset={}, size={})>'.format(
guid_readable, self.offset, self.size)
class CustomFields: # this could probably be replaced with just a dict
def __init__(self):
self.size = 0
self.version = 0
self.count = 0
self._dict = dict()
def __getitem__(self, item):
return self._dict.get(item, None)
def __str__(self):
return str(self._dict)
def keys(self):
return self._dict.keys()
def values(self):
return self._dict.values()
@classmethod
def read(cls, bio):
_cf = cls()
cf_start = bio.tell()
_cf.size = struct.unpack('<I', bio.read(4))[0]
_cf.version = struct.unpack('B', bio.read(1))[0]
_cf.count = struct.unpack('<I', bio.read(4))[0]
_keys = []
_values = []
for i in range(_cf.count):
_keys.append(read_fstring(bio))
for i in range(_cf.count):
_values.append(read_fstring(bio))
_cf._dict = dict(zip(_keys, _values))
if bio.tell() - cf_start != _cf.size:
raise ValueError('Did not read entire custom fields list!')
return _cf
class ManifestComparison:
def __init__(self):
self.added = set()
self.removed = set()
self.changed = set()
self.unchanged = set()
@classmethod
def create(cls, manifest, old_manifest=None):
comp = cls()
if not old_manifest:
comp.added = set(fm.filename for fm in manifest.file_manifest_list.elements)
return comp
old_files = {fm.filename: fm.hash for fm in old_manifest.file_manifest_list.elements}
for fm in manifest.file_manifest_list.elements:
old_file_hash = old_files.pop(fm.filename, None)
if old_file_hash:
if fm.hash == old_file_hash:
comp.unchanged.add(fm.filename)
else:
comp.changed.add(fm.filename)
else:
comp.added.add(fm.filename)
# any remaining old files were removed
if old_files:
comp.removed = set(old_files.keys())
return comp