From 48082c90917bd01009e134384c926608b3b3c4be Mon Sep 17 00:00:00 2001 From: dirkf Date: Mon, 7 Oct 2024 15:52:33 +0100 Subject: [PATCH 01/11] [core] Let Git ignore `__pycache__`, `.pytest_cache` --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index c4870a6baf4..0214efca9e6 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +__pycache__/ *.pyc *.pyo *.class @@ -5,6 +6,7 @@ *.DS_Store wine-py2exe/ py2exe.log +.pytest_cache/ *.kate-swp build/ dist/ From 515c8b85b1061c6f1cfcdee979d509b7a76a5224 Mon Sep 17 00:00:00 2001 From: dirkf Date: Mon, 7 Oct 2024 12:53:03 +0100 Subject: [PATCH 02/11] [ORFRadio] Support /programm/ URL format * fixes yt-dlp/yt-dlp#11014 --- youtube_dl/extractor/orf.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/orf.py b/youtube_dl/extractor/orf.py index 1ee78edbc17..2e1341f4492 100644 --- a/youtube_dl/extractor/orf.py +++ b/youtube_dl/extractor/orf.py @@ -112,7 +112,7 @@ class ORFRadioIE(ORFRadioBase): _VALID_URL = ( r'https?://sound\.orf\.at/radio/(?P{0})/sendung/(?P\d+)(?:/(?P\w+))?'.format(_STATION_RE), - r'https?://(?P{0})\.orf\.at/player/(?P\d{{8}})/(?P\d+)'.format(_STATION_RE), + r'https?://(?P{0})\.orf\.at/(?:player|programm)/(?P\d{{8}})/(?P\d+)'.format(_STATION_RE), ) _TESTS = [{ @@ -150,6 +150,10 @@ class ORFRadioIE(ORFRadioBase): 'duration': 1500, }, 'skip': 'Shows from ORF Sound are only available for 30 days.' + }, { + # yt-dlp/yt-dlp#11014 + 'url': 'https://oe1.orf.at/programm/20240916/769302/Playgrounds', + 'only_matching': True, }] def _real_extract(self, url): From ac0c9c8f9f7c9cd6d2854d42dc095e0013a5f40c Mon Sep 17 00:00:00 2001 From: dirkf Date: Mon, 7 Oct 2024 13:14:16 +0100 Subject: [PATCH 03/11] [HentaiStigma] Support new frame format with HTML5 video * resolves #25019 --- youtube_dl/extractor/hentaistigma.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/hentaistigma.py b/youtube_dl/extractor/hentaistigma.py index 86a93de4d62..c01fe05fd8b 100644 --- a/youtube_dl/extractor/hentaistigma.py +++ b/youtube_dl/extractor/hentaistigma.py @@ -1,6 +1,11 @@ +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor +from ..utils import ( + merge_dicts, + traverse_obj, +) class HentaiStigmaIE(InfoExtractor): @@ -24,16 +29,17 @@ def _real_extract(self, url): title = self._html_search_regex( r']+class="posttitle"[^>]*>]*>([^<]+)', webpage, 'title') - wrap_url = self._html_search_regex( + + wrap_url = self._search_regex( r']+src="([^"]+mp4)"', webpage, 'wrapper url') - wrap_webpage = self._download_webpage(wrap_url, video_id) - video_url = self._html_search_regex( - r'file\s*:\s*"([^"]+)"', wrap_webpage, 'video url') + vid_page = self._download_webpage(wrap_url, video_id) + + entries = self._parse_html5_media_entries(wrap_url, vid_page, video_id) + self._sort_formats(traverse_obj(entries, (0, 'formats')) or []) - return { + return merge_dicts({ 'id': video_id, - 'url': video_url, 'title': title, 'age_limit': 18, - } + }, entries[0]) From c445489a46b8b4dd2ab5019e9d31c656bb83856f Mon Sep 17 00:00:00 2001 From: dirkf Date: Mon, 7 Oct 2024 13:28:17 +0100 Subject: [PATCH 04/11] [Mgoon,Kaltura] Fix regex typo `(:?` * thx yt-dlp/yt-dlp#10807 (584d455) --- youtube_dl/extractor/kaltura.py | 2 +- youtube_dl/extractor/mgoon.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py index 6d4d9339478..861b6952be8 100644 --- a/youtube_dl/extractor/kaltura.py +++ b/youtube_dl/extractor/kaltura.py @@ -23,7 +23,7 @@ class KalturaIE(InfoExtractor): (?: kaltura:(?P\d+):(?P[0-9a-z_]+)| https?:// - (:?(?:www|cdnapi(?:sec)?)\.)?kaltura\.com(?::\d+)?/ + (?:(?:www|cdnapi(?:sec)?)\.)?kaltura\.com(?::\d+)?/ (?: (?: # flash player diff --git a/youtube_dl/extractor/mgoon.py b/youtube_dl/extractor/mgoon.py index 7bb473900fc..56086f7b950 100644 --- a/youtube_dl/extractor/mgoon.py +++ b/youtube_dl/extractor/mgoon.py @@ -13,7 +13,7 @@ class MgoonIE(InfoExtractor): _VALID_URL = r'''(?x)https?://(?:www\.)? - (?:(:?m\.)?mgoon\.com/(?:ch/(?:.+)/v|play/view)| + (?:(?:m\.)?mgoon\.com/(?:ch/(?:.+)/v|play/view)| video\.mgoon\.com)/(?P[0-9]+)''' _API_URL = 'http://mpos.mgoon.com/player/video?id={0:}' _TESTS = [ From 620298e0fffe30b1e46dd265943fc47902f672f6 Mon Sep 17 00:00:00 2001 From: dirkf Date: Mon, 7 Oct 2024 14:54:20 +0100 Subject: [PATCH 05/11] [core] Fix jwplayer format parsing * thx yt-dlp/yt-dlp#10956 --- youtube_dl/extractor/common.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 9b0016d07ec..c54406e7acd 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -3128,7 +3128,8 @@ def _parse_jwplayer_formats(self, jwplayer_sources_data, video_id=None, continue urls.add(source_url) source_type = source.get('type') or '' - ext = mimetype2ext(source_type) or determine_ext(source_url) + # https://github.com/yt-dlp/yt-dlp/pull/10956 + ext = determine_ext(source_url, default_ext=mimetype2ext(source_type)) if source_type == 'hls' or ext == 'm3u8' or 'format=m3u8-aapl' in source_url: formats.extend(self._extract_m3u8_formats( source_url, video_id, 'mp4', entry_protocol='m3u8_native', From 94f181f9f58dcf2cf0635873682e823ca7e665f7 Mon Sep 17 00:00:00 2001 From: dirkf Date: Sat, 19 Oct 2024 17:01:24 +0100 Subject: [PATCH 06/11] [YandexMusic] Fix CAPTCHA check * correct logic in _download_webpage() hook (yt-dlp/yt-dlp#4432) * improve error message. --- youtube_dl/extractor/yandexmusic.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/yandexmusic.py b/youtube_dl/extractor/yandexmusic.py index 8da5b430f7c..91b731673e5 100644 --- a/youtube_dl/extractor/yandexmusic.py +++ b/youtube_dl/extractor/yandexmusic.py @@ -30,17 +30,20 @@ def _handle_error(response): @staticmethod def _raise_captcha(): raise ExtractorError( - 'YandexMusic has considered youtube-dl requests automated and ' - 'asks you to solve a CAPTCHA. You can either wait for some ' - 'time until unblocked and optionally use --sleep-interval ' - 'in future or alternatively you can go to https://music.yandex.ru/ ' - 'solve CAPTCHA, then export cookies and pass cookie file to ' - 'youtube-dl with --cookies', + 'YandexMusic has considered youtube-dl requests automated ' + 'and asks you to solve a CAPTCHA. You can wait for some time ' + 'until unblocked and optionally use --sleep-interval in future; ' + 'otherwise solve the CAPTCHA at https://music.yandex.ru/, ' + 'then export cookies and pass the cookie file to youtube-dl ' + 'with --cookies.', expected=True) def _download_webpage_handle(self, *args, **kwargs): webpage = super(YandexMusicBaseIE, self)._download_webpage_handle(*args, **kwargs) - if 'Нам очень жаль, но запросы, поступившие с вашего IP-адреса, похожи на автоматические.' in webpage: + blocked_ip_msg = ( + 'Нам очень жаль, но запросы, поступившие с ' + 'вашего IP-адреса, похожи на автоматические.') + if blocked_ip_msg in (webpage or [''])[0]: self._raise_captcha() return webpage From ae0cbb84f2c2ae0a57d4a53bb310bc0cd2ecc025 Mon Sep 17 00:00:00 2001 From: dirkf Date: Sun, 20 Oct 2024 11:53:06 +0100 Subject: [PATCH 07/11] [XFileShare] Add geo-block detection --- youtube_dl/extractor/xfileshare.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/xfileshare.py b/youtube_dl/extractor/xfileshare.py index 4dc3032e7e0..62ce75970da 100644 --- a/youtube_dl/extractor/xfileshare.py +++ b/youtube_dl/extractor/xfileshare.py @@ -13,6 +13,7 @@ decode_packed_codes, determine_ext, ExtractorError, + get_element_by_class, get_element_by_id, int_or_none, merge_dicts, @@ -200,11 +201,20 @@ def _real_extract(self, url): host, 'embed-%s.html' % video_id if host in ('govid.me', 'vidlo.us') else video_id) webpage = self._download_webpage(url, video_id) - container_div = get_element_by_id('container', webpage) or webpage + main = self._search_regex( + r'(?s)
(.+)
', webpage, 'main', default=webpage) + container_div = ( + get_element_by_id('container', main) + or get_element_by_class('container', main) + or webpage) if self._search_regex( r'>This server is in maintenance mode\.', container_div, 'maint error', group=0, default=None): raise ExtractorError(clean_html(container_div), expected=True) + if self._search_regex( + 'not available in your country', container_div, + 'geo block', group=0, default=None): + self.raise_geo_restricted() if self._search_regex( self._FILE_NOT_FOUND_REGEXES, container_div, 'missing video error', group=0, default=None): From 01b80a080249739fef9e538ac2b213332babf59f Mon Sep 17 00:00:00 2001 From: dirkf Date: Sun, 20 Oct 2024 12:28:08 +0100 Subject: [PATCH 08/11] [XFileShare] Re-factor and fix tests * update site list * support page with player data in