1
0
Fork 0
mirror of synced 2024-06-29 19:41:05 +12:00

add ability to fetch media

This commit is contained in:
Nick Sweeting 2019-01-11 05:52:29 -05:00
parent 0df098717a
commit c33f7ba91c
2 changed files with 51 additions and 62 deletions

View file

@ -18,6 +18,7 @@ from config import (
FETCH_SCREENSHOT, FETCH_SCREENSHOT,
FETCH_DOM, FETCH_DOM,
FETCH_GIT, FETCH_GIT,
FETCH_MEDIA,
RESOLUTION, RESOLUTION,
CHECK_SSL_VALIDITY, CHECK_SSL_VALIDITY,
SUBMIT_ARCHIVE_DOT_ORG, SUBMIT_ARCHIVE_DOT_ORG,
@ -89,6 +90,9 @@ def archive_link(link_dir, link, overwrite=True):
log_link_archive(link_dir, link, update_existing) log_link_archive(link_dir, link, update_existing)
if FETCH_FAVICON:
link = fetch_favicon(link_dir, link, overwrite=overwrite)
if FETCH_WGET: if FETCH_WGET:
link = fetch_wget(link_dir, link, overwrite=overwrite) link = fetch_wget(link_dir, link, overwrite=overwrite)
@ -113,8 +117,9 @@ def archive_link(link_dir, link, overwrite=True):
if FETCH_GIT: if FETCH_GIT:
link = fetch_git(link_dir, link, overwrite=overwrite) link = fetch_git(link_dir, link, overwrite=overwrite)
if FETCH_FAVICON: if FETCH_MEDIA:
link = fetch_favicon(link_dir, link, overwrite=overwrite) link = fetch_media(link_dir, link, overwrite=overwrite)
write_link_index(link_dir, link) write_link_index(link_dir, link)
# print() # print()
@ -435,71 +440,54 @@ def fetch_favicon(link_dir, link, timeout=TIMEOUT):
'output': output, 'output': output,
} }
# @attach_result_to_link('audio') @attach_result_to_link('media')
# def fetch_audio(link_dir, link, timeout=TIMEOUT): def fetch_media(link_dir, link, timeout=TIMEOUT, overwrite=False):
# """Download audio rip using youtube-dl""" """Download playlists or individual video, audio, and subtitles using youtube-dl"""
# if link['type'] not in ('soundcloud',)\ output = os.path.join(link_dir, 'media')
# and 'audio' not in link['tags']:
# return
# path = os.path.join(link_dir, 'audio') if os.path.exists(output) and not overwrite:
return {'output': 'media', 'status': 'skipped'}
# if not os.path.exists(path) or overwrite: os.mkdir(output)
# print(' - Downloading audio') print(' - Downloading media')
# CMD = [ CMD = [
# "youtube-dl -x --audio-format mp3 --audio-quality 0 -o '%(title)s.%(ext)s'", 'youtube-dl',
# link['url'], '--write-description',
# ] '--write-info-json',
# end = progress(timeout, prefix=' ') '--write-annotations',
# try: '--yes-playlist',
# result = run(CMD, stdout=DEVNULL, stderr=DEVNULL, cwd=link_dir, timeout=timeout + 1) # audio/audio.mp3 '--write-thumbnail ',
# end() '--no-call-home',
# if result.returncode: '--no-check-certificate',
# print(' ', result.stderr.decode()) '--user-agent ',
# raise Exception('Failed to download audio') '--all-subs',
# chmod_file('audio.mp3', cwd=link_dir) '-x',
# return 'audio.mp3' '--audio-format', 'mp3',
# except Exception as e: '--audio-quality', '320K',
# end() '--embed-thumbnail',
# print(' Run to see full output:', 'cd {}; {}'.format(link_dir, ' '.join(CMD))) '--add-metadata',
# print(' {}Failed: {} {}{}'.format(ANSI['red'], e.__class__.__name__, e, ANSI['reset'])) link['url']
# raise ]
# else:
# print(' √ Skipping audio download')
# @attach_result_to_link('video') end = progress(timeout, prefix=' ')
# def fetch_video(link_dir, link, timeout=TIMEOUT): try:
# """Download video rip using youtube-dl""" result = run(CMD, stdout=DEVNULL, stderr=DEVNULL, cwd=output, timeout=timeout + 1) # audio/audio.mp3
end()
if result.returncode:
print(' got youtubedl response code {}:'.format(result.returncode))
raise Exception('Failed to download media')
chmod_file('media', cwd=link_dir)
return 'media'
except Exception as e:
end()
print(' Run to see full output:', 'cd {}; {}'.format(link_dir, ' '.join(CMD)))
print(' {}Failed: {} {}{}'.format(ANSI['red'], e.__class__.__name__, e, ANSI['reset']))
# if link['type'] not in ('youtube', 'youku', 'vimeo')\ return {
# and 'video' not in link['tags']: 'cmd': CMD,
# return 'output': output,
}
# path = os.path.join(link_dir, 'video')
# if not os.path.exists(path) or overwrite:
# print(' - Downloading video')
# CMD = [
# "youtube-dl -x --video-format mp4 --audio-quality 0 -o '%(title)s.%(ext)s'",
# link['url'],
# ]
# end = progress(timeout, prefix=' ')
# try:
# result = run(CMD, stdout=DEVNULL, stderr=DEVNULL, cwd=link_dir, timeout=timeout + 1) # video/movie.mp4
# end()
# if result.returncode:
# print(' ', result.stderr.decode())
# raise Exception('Failed to download video')
# chmod_file('video.mp4', cwd=link_dir)
# return 'video.mp4'
# except Exception as e:
# end()
# print(' Run to see full output:', 'cd {}; {}'.format(link_dir, ' '.join(CMD)))
# print(' {}Failed: {} {}{}'.format(ANSI['red'], e.__class__.__name__, e, ANSI['reset']))
# raise
# else:
# print(' √ Skipping video download')
@attach_result_to_link('git') @attach_result_to_link('git')
def fetch_git(link_dir, link, timeout=TIMEOUT): def fetch_git(link_dir, link, timeout=TIMEOUT):

View file

@ -22,6 +22,7 @@ FETCH_PDF = os.getenv('FETCH_PDF', 'True'
FETCH_SCREENSHOT = os.getenv('FETCH_SCREENSHOT', 'True' ).lower() == 'true' FETCH_SCREENSHOT = os.getenv('FETCH_SCREENSHOT', 'True' ).lower() == 'true'
FETCH_DOM = os.getenv('FETCH_DOM', 'True' ).lower() == 'true' FETCH_DOM = os.getenv('FETCH_DOM', 'True' ).lower() == 'true'
FETCH_GIT = os.getenv('FETCH_GIT', 'True' ).lower() == 'true' FETCH_GIT = os.getenv('FETCH_GIT', 'True' ).lower() == 'true'
FETCH_MEDIA = os.getenv('FETCH_MEDIA', 'True' ).lower() == 'true'
FETCH_FAVICON = os.getenv('FETCH_FAVICON', 'True' ).lower() == 'true' FETCH_FAVICON = os.getenv('FETCH_FAVICON', 'True' ).lower() == 'true'
SUBMIT_ARCHIVE_DOT_ORG = os.getenv('SUBMIT_ARCHIVE_DOT_ORG', 'True' ).lower() == 'true' SUBMIT_ARCHIVE_DOT_ORG = os.getenv('SUBMIT_ARCHIVE_DOT_ORG', 'True' ).lower() == 'true'
RESOLUTION = os.getenv('RESOLUTION', '1440,1200' ) RESOLUTION = os.getenv('RESOLUTION', '1440,1200' )