diff --git a/archivebox/config.py b/archivebox/config.py index 4d1bffe9..83c8462b 100644 --- a/archivebox/config.py +++ b/archivebox/config.py @@ -126,6 +126,7 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = { '--write-sub', '--all-subs', '--write-auto-sub', + '--convert-subs=srt', '--yes-playlist', '--continue', '--ignore-errors', diff --git a/archivebox/core/models.py b/archivebox/core/models.py index dd2b8802..9ad2e0e1 100644 --- a/archivebox/core/models.py +++ b/archivebox/core/models.py @@ -41,7 +41,7 @@ class Tag(models.Model): Based on django-taggit model """ id = models.AutoField(primary_key=True, serialize=False, verbose_name='ID') - + name = models.CharField(unique=True, blank=False, max_length=100) # slug is autoset on save from name, never set it manually diff --git a/archivebox/extractors/media.py b/archivebox/extractors/media.py index 1c0a21ba..e41a4002 100644 --- a/archivebox/extractors/media.py +++ b/archivebox/extractors/media.py @@ -70,11 +70,24 @@ def save_media(link: Link, out_dir: Optional[Path]=None, timeout: int=MEDIA_TIME finally: timer.end() + # add video description and subtitles to full-text index + index_texts = [ + text_file.read_text(encoding='utf-8').strip() + for text_file in ( + *output_path.glob('*.description'), + *output_path.glob('*.srt'), + *output_path.glob('*.vtt'), + *output_path.glob('*.lrc'), + *output_path.glob('*.lrc'), + ) + ] + return ArchiveResult( cmd=cmd, pwd=str(out_dir), cmd_version=YOUTUBEDL_VERSION, output=output, status=status, + index_texts=index_texts, **timer.stats, )