From 3687a4f9bdb025855acd2a4e782748c8e3810765 Mon Sep 17 00:00:00 2001 From: dirkf Date: Sat, 4 Jun 2022 08:07:54 +0100 Subject: [PATCH 1/5] [DoodStream] Add extractor from yt-dlp back-port and improve --- youtube_dl/extractor/doodstream.py | 119 +++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 1 + 2 files changed, 120 insertions(+) create mode 100644 youtube_dl/extractor/doodstream.py diff --git a/youtube_dl/extractor/doodstream.py b/youtube_dl/extractor/doodstream.py new file mode 100644 index 00000000000..70bf4e51207 --- /dev/null +++ b/youtube_dl/extractor/doodstream.py @@ -0,0 +1,119 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import random +import string +import time + +from ..compat import compat_filter as filter +from ..utils import ( + clean_html, + ExtractorError, + get_element_by_class, + parse_duration, + parse_filesize, + update_url_query, + unified_strdate, + url_or_none, +) + +from .common import InfoExtractor + + +class DoodStreamIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?dood\.(?:to|watch)/[ed]/(?P[a-z0-9]+)' + _TESTS = [{ + 'url': 'http://dood.to/e/5s1wmbdacezb', + 'md5': '4568b83b31e13242b3f1ff96c55f0595', + 'info_dict': { + 'id': '5s1wmbdacezb', + 'ext': 'mp4', + 'title': 'Kat Wonders - Monthly May 2020', + 'description': 'Kat Wonders - Monthly May 2020 | DoodStream.com', + 'thumbnail': 'https://img.doodcdn.com/snaps/flyus84qgl2fsk4g.jpg', + }, + 'skip': 'Video not found', + }, { + 'url': 'http://dood.watch/d/5s1wmbdacezb', + 'md5': '4568b83b31e13242b3f1ff96c55f0595', + 'info_dict': { + 'id': '5s1wmbdacezb', + 'ext': 'mp4', + 'title': 'Kat Wonders - Monthly May 2020', + 'description': 'Kat Wonders - Monthly May 2020 | DoodStream.com', + 'thumbnail': 'https://img.doodcdn.com/snaps/flyus84qgl2fsk4g.jpg', + }, + 'skip': 'Video not found', + }, { + 'url': 'https://dood.to/d/jzrxn12t2s7n', + 'md5': '3207e199426eca7c2aa23c2872e6728a', + 'info_dict': { + 'id': 'jzrxn12t2s7n', + 'ext': 'mp4', + 'title': 'Stacy Cruz Cute ALLWAYSWELL', + 'description': 'Stacy Cruz Cute ALLWAYSWELL | DoodStream.com', + 'thumbnail': 'https://img.doodcdn.com/snaps/8edqd5nppkac3x8u.jpg', + }, + 'skip': 'Video not found', + }, { + 'url': 'https://dood.to/d/is34uy8wvaet', + 'md5': '04740d3ba93bcd638aa7a097d9226710', + 'info_dict': { + 'id': 'is34uy8wvaet', + 'ext': 'mp4', + 'title': 'Akhanda (2021) Telugu DVDScr MP3 700MB - DoodStream', + 'upload_date': '20211202', + 'filesize_approx': int, + 'duration': 9886, + } + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + url = 'https://dood.to/e/' + video_id + headers = { + 'User-Agent': 'Mozilla/5.0', # (Windows NT 6.1; WOW64; rv:53.0) Gecko/20100101 Firefox/66.0', + } + webpage = self._download_webpage(url, video_id, headers=headers) + + title = self._html_search_meta(('og:title', 'twitter:title'), webpage, default=None) + if not title: + title = self._html_search_regex(r']*>([^<]+?)(?:\|\s+DoodStream\s*)? Date: Sat, 4 Jun 2022 09:26:41 +0100 Subject: [PATCH 2/5] [DoodStream] Support more TLDs * also fix title extraction --- youtube_dl/extractor/doodstream.py | 35 +++++++++++++++++++++++------- 1 file changed, 27 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/doodstream.py b/youtube_dl/extractor/doodstream.py index 70bf4e51207..94411ebb053 100644 --- a/youtube_dl/extractor/doodstream.py +++ b/youtube_dl/extractor/doodstream.py @@ -21,7 +21,7 @@ class DoodStreamIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?dood\.(?:to|watch)/[ed]/(?P[a-z0-9]+)' + _VALID_URL = r'https?://(?:www\.)?dood\.(?:to|watch|so|la|pm|sh|ws|one)/[ed]/(?P[a-z0-9]+)' _TESTS = [{ 'url': 'http://dood.to/e/5s1wmbdacezb', 'md5': '4568b83b31e13242b3f1ff96c55f0595', @@ -61,11 +61,24 @@ class DoodStreamIE(InfoExtractor): 'info_dict': { 'id': 'is34uy8wvaet', 'ext': 'mp4', - 'title': 'Akhanda (2021) Telugu DVDScr MP3 700MB - DoodStream', + 'title': 'Akhanda (2021) Telugu DVDScr MP3 700MB', 'upload_date': '20211202', + 'thumbnail': r're:https?://img\.doodcdn\.com?/[\w/]+\.jpg', 'filesize_approx': int, 'duration': 9886, } + }, { + 'url': 'https://dood.so/d/wlihoael8uog', + 'md5': '2c14444c89788cc309738c1560abe278', + 'info_dict': { + 'id': 'wlihoael8uog', + 'ext': 'mp4', + 'title': 'VID 20220319 161659', + 'thumbnail': r're:https?://img\.doodcdn\.com?/splash/rmpnhb8ckkk79cge\.jpg', + 'upload_date': '20220319', + 'filesize_approx': int, + 'duration': 12.0, + } }] def _real_extract(self, url): @@ -76,11 +89,12 @@ def _real_extract(self, url): } webpage = self._download_webpage(url, video_id, headers=headers) - title = self._html_search_meta(('og:title', 'twitter:title'), webpage, default=None) - if not title: - title = self._html_search_regex(r']*>([^<]+?)(?:\|\s+DoodStream\s*)?]*>([^<]+?)(?:[|-]\s+DoodStream\s*)? Date: Fri, 29 Nov 2024 14:11:41 +0000 Subject: [PATCH 3/5] Incorporate changes from PR #32979 * update tests * send `Referer` when fetching media link Co-authored-by: mp3butcher --- youtube_dl/extractor/doodstream.py | 87 ++++++++++++++---------------- 1 file changed, 40 insertions(+), 47 deletions(-) diff --git a/youtube_dl/extractor/doodstream.py b/youtube_dl/extractor/doodstream.py index 94411ebb053..8dd54a0abb0 100644 --- a/youtube_dl/extractor/doodstream.py +++ b/youtube_dl/extractor/doodstream.py @@ -1,9 +1,9 @@ # coding: utf-8 from __future__ import unicode_literals -import random -import string -import time +from random import choice as random_choice +from string import ascii_letters, digits +from time import time as time_time from ..compat import compat_filter as filter from ..utils import ( @@ -21,40 +21,20 @@ class DoodStreamIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?dood\.(?:to|watch|so|la|pm|sh|ws|one)/[ed]/(?P[a-z0-9]+)' + # dood.* redirects + # .watch -> .re (but HEAD request fails in GenericIE) + # .so -> .li + _VALID_URL = r'https?://(?:www\.)?(?Pdood\.(?:to|la|li|pm|re|sh|watch|ws|one)|ds2play\.com)/[ed]/(?P[a-z\d]+)' _TESTS = [{ - 'url': 'http://dood.to/e/5s1wmbdacezb', - 'md5': '4568b83b31e13242b3f1ff96c55f0595', + 'url': 'https://dood.li/e/h7ecgw5oqn8k', + 'md5': '90f2af170551c17fc78bee7426890054', 'info_dict': { - 'id': '5s1wmbdacezb', + 'id': 'h7ecgw5oqn8k', 'ext': 'mp4', - 'title': 'Kat Wonders - Monthly May 2020', - 'description': 'Kat Wonders - Monthly May 2020 | DoodStream.com', - 'thumbnail': 'https://img.doodcdn.com/snaps/flyus84qgl2fsk4g.jpg', + 'title': 'Free-Slow-Music', + 'upload_date': '20230814', + 'thumbnail': 'https://img.doodcdn.co/splash/7mbnwydhb6kb7xyk.jpg', }, - 'skip': 'Video not found', - }, { - 'url': 'http://dood.watch/d/5s1wmbdacezb', - 'md5': '4568b83b31e13242b3f1ff96c55f0595', - 'info_dict': { - 'id': '5s1wmbdacezb', - 'ext': 'mp4', - 'title': 'Kat Wonders - Monthly May 2020', - 'description': 'Kat Wonders - Monthly May 2020 | DoodStream.com', - 'thumbnail': 'https://img.doodcdn.com/snaps/flyus84qgl2fsk4g.jpg', - }, - 'skip': 'Video not found', - }, { - 'url': 'https://dood.to/d/jzrxn12t2s7n', - 'md5': '3207e199426eca7c2aa23c2872e6728a', - 'info_dict': { - 'id': 'jzrxn12t2s7n', - 'ext': 'mp4', - 'title': 'Stacy Cruz Cute ALLWAYSWELL', - 'description': 'Stacy Cruz Cute ALLWAYSWELL | DoodStream.com', - 'thumbnail': 'https://img.doodcdn.com/snaps/8edqd5nppkac3x8u.jpg', - }, - 'skip': 'Video not found', }, { 'url': 'https://dood.to/d/is34uy8wvaet', 'md5': '04740d3ba93bcd638aa7a097d9226710', @@ -66,9 +46,10 @@ class DoodStreamIE(InfoExtractor): 'thumbnail': r're:https?://img\.doodcdn\.com?/[\w/]+\.jpg', 'filesize_approx': int, 'duration': 9886, - } + }, + 'skip': 'Video not found', }, { - 'url': 'https://dood.so/d/wlihoael8uog', + 'url': 'https://dood.sh/d/wlihoael8uog', 'md5': '2c14444c89788cc309738c1560abe278', 'info_dict': { 'id': 'wlihoael8uog', @@ -78,16 +59,20 @@ class DoodStreamIE(InfoExtractor): 'upload_date': '20220319', 'filesize_approx': int, 'duration': 12.0, - } + }, + }, { + 'url': 'http://dood.ws /d/h7ecgw5oqn8k', + 'only_matching': True, + }, { + 'url': 'https://dood.li/d/wlihoael8uog', + 'only_matching': True, }] def _real_extract(self, url): video_id = self._match_id(url) - url = 'https://dood.to/e/' + video_id - headers = { - 'User-Agent': 'Mozilla/5.0', # (Windows NT 6.1; WOW64; rv:53.0) Gecko/20100101 Firefox/66.0', - } - webpage = self._download_webpage(url, video_id, headers=headers) + host = 'dood.li' + url = 'https://%s/e/%s' % (host, video_id) + webpage = self._download_webpage(url, video_id, note='Downloading "/e/" webpage') def get_title(html, fatal=False): return self._html_search_regex(r']*>([^<]+?)(?:[|-]\s+DoodStream\s*)? Date: Fri, 29 Nov 2024 14:17:10 +0000 Subject: [PATCH 4/5] Further small improvements --- youtube_dl/extractor/doodstream.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/doodstream.py b/youtube_dl/extractor/doodstream.py index 8dd54a0abb0..0d32711862a 100644 --- a/youtube_dl/extractor/doodstream.py +++ b/youtube_dl/extractor/doodstream.py @@ -78,16 +78,16 @@ def get_title(html, fatal=False): return self._html_search_regex(r']*>([^<]+?)(?:[|-]\s+DoodStream\s*)?]*>([^<]+?)pass_md5/[\da-f-]+/(?P[\da-z]+))', webpage, 'tokens', + group=('pm', 'tok')) + headers = { + 'Referer': url, + } # construct the media link final_url = self._download_webpage( 'https://%s/%s' % (host, pass_md5), video_id, headers={ @@ -100,6 +100,7 @@ def get_title(html, fatal=False): 'expiry': int(time_time() * 1000), }) + # get additional metadata thumb = next(filter(None, (url_or_none(self._html_search_meta(x, webpage, default=None)) for x in ('og:image', 'twitter:image'))), None) description = self._html_search_meta( From da90df06c3b8e7e6cc66a124c196aef4eb856a89 Mon Sep 17 00:00:00 2001 From: dirkf Date: Fri, 29 Nov 2024 14:27:29 +0000 Subject: [PATCH 5/5] Linty --- youtube_dl/extractor/doodstream.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/doodstream.py b/youtube_dl/extractor/doodstream.py index 0d32711862a..9350dee9c36 100644 --- a/youtube_dl/extractor/doodstream.py +++ b/youtube_dl/extractor/doodstream.py @@ -61,7 +61,7 @@ class DoodStreamIE(InfoExtractor): 'duration': 12.0, }, }, { - 'url': 'http://dood.ws /d/h7ecgw5oqn8k', + 'url': 'http://dood.ws/d/h7ecgw5oqn8k', 'only_matching': True, }, { 'url': 'https://dood.li/d/wlihoael8uog', @@ -94,7 +94,7 @@ def get_title(html, fatal=False): 'Referer': url, }, note='Downloading authpage URL') final_url += ''.join((random_choice(ascii_letters + digits) - for _ in range(10))) + for _ in range(10))) final_url = update_url_query(final_url, { 'token': token, 'expiry': int(time_time() * 1000),