[cli/core/downloader/utils] Add download reordering optimization

This is an optimization that aims to fix issues with some titles such as World War Z that have lots of duplicated files resulting in a very high runtime cache requirement. The basic idea is to group files that share lots of chunks together so the data can be removed from the cache sooner. For most games this has little to no effect. For some titles with heavy duplication it can reduce the RAM usage significantly however. For instance the RAM requirements for World War Z are reduced from 5.3 GiB to 499 MiB. Partially fixes #17
2024-06-26 10:10:20 +12:00 · 2020-05-04 13:59:04 +02:00 · 2020-05-04 13:59:04 +02:00 · 69383c4788
parent 3f27ea0296
commit 69383c4788
3 changed files with 74 additions and 13 deletions
--- a/legendary/cli.py
+++ b/legendary/cli.py
@ -274,6 +274,7 @@ class LegendaryCLI:

        logger.info('Preparing download...')
        # todo use status queue to print progress from CLI
+        # This has become a little ridiculous hasn't it?
        dlm, analysis, igame = self.core.prepare_download(game=game, base_game=base_game, base_path=args.base_path,
                                                          force=args.force, max_shm=args.shared_memory,
                                                          max_workers=args.max_workers, game_folder=args.game_folder,
@ -284,7 +285,8 @@ class LegendaryCLI:
                                                          platform_override=args.platform_override,
                                                          file_prefix_filter=args.file_prefix,
                                                          file_exclude_filter=args.file_exclude_prefix,
-                                                          file_install_tag=args.install_tag)
+                                                          file_install_tag=args.install_tag,
+                                                          dl_optimizations=args.order_opt)

        # game is either up to date or hasn't changed, so we have nothing to do
        if not analysis.dl_size:
@ -467,6 +469,8 @@ def main():
                                type=str, help='Exclude files starting with <prefix> (case insensitive)')
    install_parser.add_argument('--install-tag', dest='install_tag', action='store', metavar='<tag>',
                                type=str, help='Only download files with the specified install tag (testing)')
+    install_parser.add_argument('--enable-reordering', dest='order_opt', action='store_true',
+                                help='Enable reordering to attempt to optimize RAM usage during download')

    launch_parser.add_argument('--offline', dest='offline', action='store_true',
                               default=False, help='Skip login and launch game without online authentication')
--- a/legendary/core.py
+++ b/legendary/core.py
@ -25,6 +25,7 @@ from legendary.models.exceptions import *
 from legendary.models.game import *
 from legendary.models.json_manifest import JSONManifest
 from legendary.models.manifest import Manifest, ManifestMeta
+from legendary.utils.game_workarounds import is_opt_enabled


 # ToDo: instead of true/false return values for success/failure actually raise an exception that the CLI/GUI
@ -361,8 +362,8 @@ class LegendaryCore:
                         game_folder: str = '', override_manifest: str = '',
                         override_old_manifest: str = '', override_base_url: str = '',
                         platform_override: str = '', file_prefix_filter: str = '',
-                         file_exclude_filter: str = '', file_install_tag: str = ''
-                         ) -> (DLManager, AnalysisResult, ManifestMeta):
+                         file_exclude_filter: str = '', file_install_tag: str = '',
+                         dl_optimizations: bool = False) -> (DLManager, AnalysisResult, ManifestMeta):
        # load old manifest
        old_manifest = None

@ -408,10 +409,6 @@ class LegendaryCore:

        install_path = os.path.join(base_path, game_folder)

-        # todo move this somewhere else so the directory only gets created once the download is started
-        if not os.path.exists(install_path):
-            os.makedirs(install_path)
-
        self.log.info(f'Install path: {install_path}')

        if not force:
@ -432,13 +429,20 @@ class LegendaryCore:
        if not max_shm:
            max_shm = self.lgd.config.getint('Legendary', 'max_memory', fallback=1024)

+        if dl_optimizations or is_opt_enabled(game.app_name):
+            self.log.info('Download order optimizations are enabled.')
+            process_opt = True
+        else:
+            process_opt = False
+
        dlm = DLManager(install_path, base_url, resume_file=resume_file, status_q=status_q,
                        max_shared_memory=max_shm * 1024 * 1024, max_workers=max_workers)
        anlres = dlm.run_analysis(manifest=new_manifest, old_manifest=old_manifest,
                                  patch=not disable_patching, resume=not force,
                                  file_prefix_filter=file_prefix_filter,
                                  file_exclude_filter=file_exclude_filter,
-                                  file_install_tag=file_install_tag)
+                                  file_install_tag=file_install_tag,
+                                  processing_optimization=process_opt)

        prereq = None
        if new_manifest.meta.prereq_ids:
--- a/legendary/downloader/manager.py
+++ b/legendary/downloader/manager.py
@ -242,7 +242,8 @@ class DLManager(Process):

    def run_analysis(self, manifest: Manifest, old_manifest: Manifest = None,
                     patch=True, resume=True, file_prefix_filter=None,
-                     file_exclude_filter=None, file_install_tag=None) -> AnalysisResult:
+                     file_exclude_filter=None, file_install_tag=None,
+                     processing_optimization=False) -> AnalysisResult:
        """
        Run analysis on manifest and old manifest (if not None) and return a result
        with a summary resources required in order to install the provided manifest.
@ -253,6 +254,8 @@ class DLManager(Process):
        :param resume: Continue based on resume file if it exists
        :param file_prefix_filter: Only download files that start with this prefix
        :param file_exclude_filter: Exclude files with this prefix from download
+        :param file_install_tag: Only install files with the specified tag
+        :param processing_optimization: Attempt to optimize processing order and RAM usage
        :return: AnalysisResult
        """

@ -324,9 +327,19 @@ class DLManager(Process):
            analysis_res.unchanged = len(mc.unchanged)
            self.log.debug(f'{analysis_res.unchanged} unchanged files')

+        if processing_optimization and len(manifest.file_manifest_list.elements) > 8_000:
+            self.log.warning('Manifest contains too many files, processing optimizations will be disabled.')
+            processing_optimization = False
+        elif processing_optimization:
+            self.log.info('Processing order optimization is enabled, analysis may take a few seconds longer...')
+
        # count references to chunks for determining runtime cache size later
        references = Counter()
-        for fm in manifest.file_manifest_list.elements:
+        file_to_chunks = defaultdict(set)
+        fmlist = sorted(manifest.file_manifest_list.elements,
+                        key=lambda a: a.filename.lower())
+
+        for fm in fmlist:
            # chunks of unchanged files are not downloaded so we can skip them
            if fm.filename in mc.unchanged:
                analysis_res.unchanged += fm.file_size
@ -334,6 +347,46 @@ class DLManager(Process):

            for cp in fm.chunk_parts:
                references[cp.guid_num] += 1
+                if processing_optimization:
+                    file_to_chunks[fm.filename].add(cp.guid_num)
+
+        if processing_optimization:
+            # reorder the file manifest list to group files that share many chunks
+            # 5 is mostly arbitrary but has shown in testing to be a good choice
+            min_overlap = 5
+            # enumerate the file list to try and find a "partner" for
+            # each file that shares the most chunks with it.
+            partners = dict()
+            filenames = [fm.filename for fm in fmlist]
+
+            for num, filename in enumerate(filenames[:int((len(filenames)+1)/2)]):
+                chunks = file_to_chunks[filename]
+                max_overlap = min_overlap
+
+                for other_file in filenames[num+1:]:
+                    overlap = len(chunks & file_to_chunks[other_file])
+                    if overlap > max_overlap:
+                        partners[filename] = other_file
+                        max_overlap = overlap
+
+            # iterate over all the files again and this time around
+            _fmlist = []
+            processed = set()
+            for fm in fmlist:
+                if fm.filename in processed:
+                    continue
+                _fmlist.append(fm)
+                processed.add(fm.filename)
+                # try to find the file's "partner"
+                partner = partners.get(fm.filename, None)
+                if not partner or partner in processed:
+                    continue
+
+                partner_fm = manifest.file_manifest_list.get_file_by_path(partner)
+                _fmlist.append(partner_fm)
+                processed.add(partner)
+
+            fmlist = _fmlist

        # determine reusable chunks and prepare lookup table for reusable ones
        re_usable = defaultdict(dict)
@ -367,8 +420,7 @@ class DLManager(Process):
        # run through the list of files and create the download jobs and also determine minimum
        # runtime cache requirement by simulating adding/removing from cache during download.
        self.log.debug('Creating filetasks and chunktasks...')
-        for current_file in sorted(manifest.file_manifest_list.elements,
-                                   key=lambda a: a.filename.lower()):
+        for current_file in fmlist:
            # skip unchanged and empty files
            if current_file.filename in mc.unchanged:
                continue
@ -440,7 +492,8 @@ class DLManager(Process):
        if analysis_res.min_memory > self.max_shared_memory:
            shared_mib = f'{self.max_shared_memory / 1024 / 1024:.01f} MiB'
            required_mib = f'{analysis_res.min_memory / 1024 / 1024:.01f} MiB'
-            raise MemoryError(f'Current shared memory cache is smaller than required! {shared_mib} < {required_mib}')
+            raise MemoryError(f'Current shared memory cache is smaller than required! {shared_mib} < {required_mib}. '
+                              f'Try running legendary with the --enable-reordering flag to reduce memory usage.')

        # calculate actual dl and patch write size.
        analysis_res.dl_size = \