From 69383c47887337e7b3bef8a434b7fa6f75dc3c77 Mon Sep 17 00:00:00 2001
From: derrod <xlnedder@gmail.com>
Date: Mon, 4 May 2020 13:59:04 +0200
Subject: [PATCH] [cli/core/downloader/utils] Add download reordering
 optimization

This is an optimization that aims to fix issues with some titles
such as World War Z that have lots of duplicated files resulting
in a very high runtime cache requirement.

The basic idea is to group files that share lots of chunks together
so the data can be removed from the cache sooner.

For most games this has little to no effect. For some titles with heavy
duplication it can reduce the RAM usage significantly however. For
instance the RAM requirements for World War Z are reduced from 5.3 GiB
to 499 MiB.

Partially fixes #17
---
 legendary/cli.py                |  6 +++-
 legendary/core.py               | 18 ++++++----
 legendary/downloader/manager.py | 63 ++++++++++++++++++++++++++++++---
 3 files changed, 74 insertions(+), 13 deletions(-)
diff --git a/legendary/cli.py b/legendary/cli.py
index 897dea6..f756410 100644
--- a/legendary/cli.py
+++ b/legendary/cli.py
@@ -274,6 +274,7 @@ class LegendaryCLI:
 
         logger.info('Preparing download...')
         # todo use status queue to print progress from CLI
+        # This has become a little ridiculous hasn't it?
         dlm, analysis, igame = self.core.prepare_download(game=game, base_game=base_game, base_path=args.base_path,
                                                           force=args.force, max_shm=args.shared_memory,
                                                           max_workers=args.max_workers, game_folder=args.game_folder,
@@ -284,7 +285,8 @@ class LegendaryCLI:
                                                           platform_override=args.platform_override,
                                                           file_prefix_filter=args.file_prefix,
                                                           file_exclude_filter=args.file_exclude_prefix,
-                                                          file_install_tag=args.install_tag)
+                                                          file_install_tag=args.install_tag,
+                                                          dl_optimizations=args.order_opt)
 
         # game is either up to date or hasn't changed, so we have nothing to do
         if not analysis.dl_size:
@@ -467,6 +469,8 @@ def main():
                                 type=str, help='Exclude files starting with <prefix> (case insensitive)')
     install_parser.add_argument('--install-tag', dest='install_tag', action='store', metavar='<tag>',
                                 type=str, help='Only download files with the specified install tag (testing)')
+    install_parser.add_argument('--enable-reordering', dest='order_opt', action='store_true',
+                                help='Enable reordering to attempt to optimize RAM usage during download')
 
     launch_parser.add_argument('--offline', dest='offline', action='store_true',
                                default=False, help='Skip login and launch game without online authentication')
diff --git a/legendary/core.py b/legendary/core.py
index 38b62d5..f127f85 100644
--- a/legendary/core.py
+++ b/legendary/core.py
@@ -25,6 +25,7 @@ from legendary.models.exceptions import *
 from legendary.models.game import *
 from legendary.models.json_manifest import JSONManifest
 from legendary.models.manifest import Manifest, ManifestMeta
+from legendary.utils.game_workarounds import is_opt_enabled
 
 
 # ToDo: instead of true/false return values for success/failure actually raise an exception that the CLI/GUI
@@ -361,8 +362,8 @@ class LegendaryCore:
                          game_folder: str = '', override_manifest: str = '',
                          override_old_manifest: str = '', override_base_url: str = '',
                          platform_override: str = '', file_prefix_filter: str = '',
-                         file_exclude_filter: str = '', file_install_tag: str = ''
-                         ) -> (DLManager, AnalysisResult, ManifestMeta):
+                         file_exclude_filter: str = '', file_install_tag: str = '',
+                         dl_optimizations: bool = False) -> (DLManager, AnalysisResult, ManifestMeta):
         # load old manifest
         old_manifest = None
 
@@ -408,10 +409,6 @@ class LegendaryCore:
 
         install_path = os.path.join(base_path, game_folder)
 
-        # todo move this somewhere else so the directory only gets created once the download is started
-        if not os.path.exists(install_path):
-            os.makedirs(install_path)
-
         self.log.info(f'Install path: {install_path}')
 
         if not force:
@@ -432,13 +429,20 @@ class LegendaryCore:
         if not max_shm:
             max_shm = self.lgd.config.getint('Legendary', 'max_memory', fallback=1024)
 
+        if dl_optimizations or is_opt_enabled(game.app_name):
+            self.log.info('Download order optimizations are enabled.')
+            process_opt = True
+        else:
+            process_opt = False
+
         dlm = DLManager(install_path, base_url, resume_file=resume_file, status_q=status_q,
                         max_shared_memory=max_shm * 1024 * 1024, max_workers=max_workers)
         anlres = dlm.run_analysis(manifest=new_manifest, old_manifest=old_manifest,
                                   patch=not disable_patching, resume=not force,
                                   file_prefix_filter=file_prefix_filter,
                                   file_exclude_filter=file_exclude_filter,
-                                  file_install_tag=file_install_tag)
+                                  file_install_tag=file_install_tag,
+                                  processing_optimization=process_opt)
 
         prereq = None
         if new_manifest.meta.prereq_ids:
diff --git a/legendary/downloader/manager.py b/legendary/downloader/manager.py
index 460a981..59069b5 100644
--- a/legendary/downloader/manager.py
+++ b/legendary/downloader/manager.py
@@ -242,7 +242,8 @@ class DLManager(Process):
 
     def run_analysis(self, manifest: Manifest, old_manifest: Manifest = None,
                      patch=True, resume=True, file_prefix_filter=None,
-                     file_exclude_filter=None, file_install_tag=None) -> AnalysisResult:
+                     file_exclude_filter=None, file_install_tag=None,
+                     processing_optimization=False) -> AnalysisResult:
         """
         Run analysis on manifest and old manifest (if not None) and return a result
         with a summary resources required in order to install the provided manifest.
@@ -253,6 +254,8 @@ class DLManager(Process):
         :param resume: Continue based on resume file if it exists
         :param file_prefix_filter: Only download files that start with this prefix
         :param file_exclude_filter: Exclude files with this prefix from download
+        :param file_install_tag: Only install files with the specified tag
+        :param processing_optimization: Attempt to optimize processing order and RAM usage
         :return: AnalysisResult
         """
 
@@ -324,9 +327,19 @@ class DLManager(Process):
             analysis_res.unchanged = len(mc.unchanged)
             self.log.debug(f'{analysis_res.unchanged} unchanged files')
 
+        if processing_optimization and len(manifest.file_manifest_list.elements) > 8_000:
+            self.log.warning('Manifest contains too many files, processing optimizations will be disabled.')
+            processing_optimization = False
+        elif processing_optimization:
+            self.log.info('Processing order optimization is enabled, analysis may take a few seconds longer...')
+
         # count references to chunks for determining runtime cache size later
         references = Counter()
-        for fm in manifest.file_manifest_list.elements:
+        file_to_chunks = defaultdict(set)
+        fmlist = sorted(manifest.file_manifest_list.elements,
+                        key=lambda a: a.filename.lower())
+
+        for fm in fmlist:
             # chunks of unchanged files are not downloaded so we can skip them
             if fm.filename in mc.unchanged:
                 analysis_res.unchanged += fm.file_size
@@ -334,6 +347,46 @@ class DLManager(Process):
 
             for cp in fm.chunk_parts:
                 references[cp.guid_num] += 1
+                if processing_optimization:
+                    file_to_chunks[fm.filename].add(cp.guid_num)
+
+        if processing_optimization:
+            # reorder the file manifest list to group files that share many chunks
+            # 5 is mostly arbitrary but has shown in testing to be a good choice
+            min_overlap = 5
+            # enumerate the file list to try and find a "partner" for
+            # each file that shares the most chunks with it.
+            partners = dict()
+            filenames = [fm.filename for fm in fmlist]
+
+            for num, filename in enumerate(filenames[:int((len(filenames)+1)/2)]):
+                chunks = file_to_chunks[filename]
+                max_overlap = min_overlap
+
+                for other_file in filenames[num+1:]:
+                    overlap = len(chunks & file_to_chunks[other_file])
+                    if overlap > max_overlap:
+                        partners[filename] = other_file
+                        max_overlap = overlap
+
+            # iterate over all the files again and this time around
+            _fmlist = []
+            processed = set()
+            for fm in fmlist:
+                if fm.filename in processed:
+                    continue
+                _fmlist.append(fm)
+                processed.add(fm.filename)
+                # try to find the file's "partner"
+                partner = partners.get(fm.filename, None)
+                if not partner or partner in processed:
+                    continue
+
+                partner_fm = manifest.file_manifest_list.get_file_by_path(partner)
+                _fmlist.append(partner_fm)
+                processed.add(partner)
+
+            fmlist = _fmlist
 
         # determine reusable chunks and prepare lookup table for reusable ones
         re_usable = defaultdict(dict)
@@ -367,8 +420,7 @@ class DLManager(Process):
         # run through the list of files and create the download jobs and also determine minimum
         # runtime cache requirement by simulating adding/removing from cache during download.
         self.log.debug('Creating filetasks and chunktasks...')
-        for current_file in sorted(manifest.file_manifest_list.elements,
-                                   key=lambda a: a.filename.lower()):
+        for current_file in fmlist:
             # skip unchanged and empty files
             if current_file.filename in mc.unchanged:
                 continue
@@ -440,7 +492,8 @@ class DLManager(Process):
         if analysis_res.min_memory > self.max_shared_memory:
             shared_mib = f'{self.max_shared_memory / 1024 / 1024:.01f} MiB'
             required_mib = f'{analysis_res.min_memory / 1024 / 1024:.01f} MiB'
-            raise MemoryError(f'Current shared memory cache is smaller than required! {shared_mib} < {required_mib}')
+            raise MemoryError(f'Current shared memory cache is smaller than required! {shared_mib} < {required_mib}. '
+                              f'Try running legendary with the --enable-reordering flag to reduce memory usage.')
 
         # calculate actual dl and patch write size.
         analysis_res.dl_size = \