diff --git a/Makefile b/Makefile index 3e17365b83d..8a09a054769 100644 --- a/Makefile +++ b/Makefile @@ -1,3 +1,6 @@ +# Also edit in youtube_dl/version.py +VERSION = 2022.02.17 + all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites clean: @@ -37,6 +40,9 @@ test: nosetests --verbose test $(MAKE) codetest +test-redgifs: + python3 -m youtube_dl -jq 'https://xhamster4.com/videos/izzy-bell-creampie-pussy-xhpwA7S' + ot: offlinetest # Keep this list in sync with devscripts/run_tests.sh @@ -133,3 +139,14 @@ youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash- Makefile MANIFEST.in youtube-dl.1 youtube-dl.bash-completion \ youtube-dl.zsh youtube-dl.fish setup.py setup.cfg \ youtube-dl + +build: + rm -f dist/*.whl + VERSION=$(VERSION)_redgifs python3 setup.py bdist_wheel + ls -ldh dist/youtube_dl-$(VERSION)_redgifs-py2.py3-none-any.whl + +release: build + -gh release create v$(VERSION) --title "v$(VERSION)" --notes "Fixes to extractors, probably." + gh release upload v$(VERSION) dist/youtube_dl-$(VERSION)_redgifs-py2.py3-none-any.whl + +.PHONY: build diff --git a/youtube_dl/extractor/redtube.py b/youtube_dl/extractor/redtube.py index a1ca791caaa..1f84e58b5b0 100644 --- a/youtube_dl/extractor/redtube.py +++ b/youtube_dl/extractor/redtube.py @@ -1,5 +1,6 @@ from __future__ import unicode_literals +import json import re from .common import InfoExtractor @@ -66,46 +67,7 @@ def _real_extract(self, url): webpage, 'title', group='title', default=None) or self._og_search_title(webpage) - formats = [] - sources = self._parse_json( - self._search_regex( - r'sources\s*:\s*({.+?})', webpage, 'source', default='{}'), - video_id, fatal=False) - if sources and isinstance(sources, dict): - for format_id, format_url in sources.items(): - if format_url: - formats.append({ - 'url': format_url, - 'format_id': format_id, - 'height': int_or_none(format_id), - }) - medias = self._parse_json( - self._search_regex( - r'mediaDefinition["\']?\s*:\s*(\[.+?}\s*\])', webpage, - 'media definitions', default='{}'), - video_id, fatal=False) - if medias and isinstance(medias, list): - for media in medias: - format_url = url_or_none(media.get('videoUrl')) - if not format_url: - continue - if media.get('format') == 'hls' or determine_ext(format_url) == 'm3u8': - formats.extend(self._extract_m3u8_formats( - format_url, video_id, 'mp4', - entry_protocol='m3u8_native', m3u8_id='hls', - fatal=False)) - continue - format_id = media.get('quality') - formats.append({ - 'url': format_url, - 'format_id': format_id, - 'height': int_or_none(format_id), - }) - if not formats: - video_url = self._html_search_regex( - r'', webpage, 'video URL') - formats.append({'url': video_url}) - self._sort_formats(formats) + formats = self._get_formats(webpage, video_id) thumbnail = self._og_search_thumbnail(webpage) upload_date = unified_strdate(self._search_regex( @@ -134,3 +96,51 @@ def _real_extract(self, url): 'age_limit': age_limit, 'formats': formats, }) + + def _get_formats(self, webpage, video_id): + formats = [] + + matches = re.findall(r'\{.+?\}', webpage) + if matches is not None: + for match in matches: + try: + match = json.loads(match) + if 'videoUrl' in match: + url = match['videoUrl'] + if url.startswith('https://www.redtube.com/media/mp4?'): + self._add_formats(formats, url, 'mp4', video_id) + elif url.startswith('https://www.redtube.com/media/hls?'): + self._add_formats(formats, url, 'hls', video_id) + except json.decoder.JSONDecodeError as e: + pass # print(e) + + self._sort_formats(formats) + return formats + + def _add_formats(self, formats, url, codec, video_id): + raw_meta = self._download_webpage(url, video_id) + meta = json.loads(raw_meta) + + for stream in meta: + quality = stream['quality'] + if isinstance(quality, list): + quality = quality[0] + + format = { + 'url': stream['videoUrl'], + 'format_id': '%s-%s' % (quality, codec), + 'height': int(quality), + } + + mobj = re.search(r'(?P\d{3,4})[pP]_(?P\d+)[kK]_\d+', format['url']) + if mobj: + height = int(mobj.group('height')) + bitrate = int(mobj.group('bitrate')) + format.update({ + 'height': height, + 'tbr': bitrate, + }) + + formats.append(format) + + return formats diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py index e17947fc6a7..de085b6f087 100644 --- a/youtube_dl/extractor/xhamster.py +++ b/youtube_dl/extractor/xhamster.py @@ -3,6 +3,7 @@ import itertools import re +import json from .common import InfoExtractor from ..compat import compat_str @@ -146,7 +147,7 @@ def _real_extract(self, url): def get_height(s): return int_or_none(self._search_regex( - r'^(\d+)[pP]', s, 'height', default=None)) + r'^(\d+)[pP]', s, 'height', default=480)) initials = self._parse_json( self._search_regex( diff --git a/youtube_dl/extractor/youporn.py b/youtube_dl/extractor/youporn.py index 31e8abb7298..216fe4fb586 100644 --- a/youtube_dl/extractor/youporn.py +++ b/youtube_dl/extractor/youporn.py @@ -184,3 +184,26 @@ def extract_tag_box(regex, title): 'age_limit': age_limit, 'formats': formats, } + + def _get_formats(self, meta): + formats = [] + + for item in meta: + format = { + 'url': item['videoUrl'], + 'format_id': '%s-%s' % (item['quality'], item['codec']), + 'height': int(item['quality']), + } + + mobj = re.search(r'(?P\d{3,4})[pP]_(?P\d+)[kK]_\d+', item['videoUrl']) + if mobj: + height = int(mobj.group('height')) + bitrate = int(mobj.group('bitrate')) + format.update({ + 'height': height, + 'tbr': bitrate, + }) + + formats.append(format) + + return formats diff --git a/youtube_dl/version.py b/youtube_dl/version.py index b82fbc702ff..38142a213ea 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2021.12.17' +__version__ = '2022.02.17-redgifs'