Skip to content

Commit

Permalink
Merge pull request #747 from tcely/patch-2
Browse files Browse the repository at this point in the history
Add and use `refresh_formats`
  • Loading branch information
meeb authored Feb 20, 2025
2 parents 570a150 + 4591d38 commit f78fa44
Show file tree
Hide file tree
Showing 4 changed files with 61 additions and 12 deletions.
9 changes: 1 addition & 8 deletions tubesync/sync/filtering.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,14 +130,7 @@ def filter_max_cap(instance: Media):
return False

max_cap_age = instance.source.download_cap_date
if not max_cap_age:
log.debug(
f"Media: {instance.source} / {instance} has not max_cap_age "
f"so not skipping based on max_cap_age"
)
return False

if instance.published <= max_cap_age:
if max_cap_age and instance.published <= max_cap_age:
# log new media instances, not every media instance every time
if not instance.skip:
log.info(
Expand Down
34 changes: 34 additions & 0 deletions tubesync/sync/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1037,6 +1037,40 @@ def loaded_metadata(self):
except Exception as e:
return {}

@property
def refresh_formats(self):
data = self.loaded_metadata
metadata_seconds = data.get('epoch', None)
if not metadata_seconds:
self.metadata = None
return False

now = timezone.now()
formats_seconds = data.get('formats_epoch', metadata_seconds)
metadata_dt = self.metadata_published(formats_seconds)
if (now - metadata_dt) < timedelta(seconds=self.source.index_schedule):
return False

self.skip = False
metadata = self.index_metadata()
if self.skip:
return False

response = metadata
if getattr(settings, 'SHRINK_NEW_MEDIA_METADATA', False):
response = filter_response(metadata, True)

field = self.get_metadata_field('formats')
data[field] = response.get(field, [])
if data.get('availability', 'public') != response.get('availability', 'public'):
data['availability'] = response.get('availability', 'public')
data['formats_epoch'] = response.get('epoch', formats_seconds)

from common.utils import json_serial
compact_json = json.dumps(data, separators=(',', ':'), default=json_serial)
self.metadata = compact_json
return True

@property
def url(self):
url = self.URLS.get(self.source.source_type, '')
Expand Down
21 changes: 19 additions & 2 deletions tubesync/sync/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -553,6 +553,8 @@ def download_media(media_id):
err = (f'Failed to download media: {media} (UUID: {media.pk}) to disk, '
f'expected outfile does not exist: {filepath}')
log.error(err)
# Try refreshing formats
media.refresh_formats
# Raising an error here triggers the task to be re-attempted (or fail)
raise DownloadFailedException(err)

Expand Down Expand Up @@ -587,10 +589,25 @@ def save_all_media_for_source(source_id):
log.error(f'Task save_all_media_for_source(pk={source_id}) called but no '
f'source exists with ID: {source_id}')
return

already_saved = set()
mqs = Media.objects.filter(source=source)
refresh_qs = mqs.filter(
can_download=False,
skip=False,
manual_skip=False,
downloaded=False,
)
for media in refresh_qs:
media.refresh_formats
media.save()
already_saved.add(media.uuid)

# Trigger the post_save signal for each media item linked to this source as various
# flags may need to be recalculated
for media in Media.objects.filter(source=source):
media.save()
for media in mqs:
if media.uuid not in already_saved:
media.save()


@background(schedule=0, remove_existing_tasks=True)
Expand Down
9 changes: 7 additions & 2 deletions tubesync/sync/youtube.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ def get_channel_image_info(url):
'simulate': True,
'logger': log,
'extract_flat': True, # Change to False to get detailed info
'check_formats': False,
})

with yt_dlp.YoutubeDL(opts) as y:
Expand Down Expand Up @@ -143,7 +144,11 @@ def get_media_info(url):
'simulate': True,
'logger': log,
'extract_flat': True,
'extractor_args': {'youtubetab': {'approximate_date': ['true']}},
'check_formats': True,
'extractor_args': {
'youtube': {'formats': ['missing_pot']},
'youtubetab': {'approximate_date': ['true']},
},
})
response = {}
with yt_dlp.YoutubeDL(opts) as y:
Expand Down Expand Up @@ -248,7 +253,7 @@ def download_media(
'writeautomaticsub': auto_subtitles,
'subtitleslangs': sub_langs.split(','),
'writethumbnail': embed_thumbnail,
'check_formats': False,
'check_formats': None,
'overwrites': None,
'sleep_interval': 10 + int(settings.DOWNLOAD_MEDIA_DELAY / 20),
'max_sleep_interval': settings.DOWNLOAD_MEDIA_DELAY,
Expand Down

0 comments on commit f78fa44

Please sign in to comment.