Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[jsinterp] Support YT player 590f65a6 #32820

Merged
merged 5 commits into from
Jun 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions test/test_jsinterp.py
Original file line number Diff line number Diff line change
Expand Up @@ -577,9 +577,11 @@ def test_32066(self):
def test_unary_operators(self):
jsi = JSInterpreter('function f(){return 2 - - - 2;}')
self.assertEqual(jsi.call_function('f'), 0)
# fails
# jsi = JSInterpreter('function f(){return 2 + - + - - 2;}')
# self.assertEqual(jsi.call_function('f'), 0)
jsi = JSInterpreter('function f(){return 2 + - + - - 2;}')
self.assertEqual(jsi.call_function('f'), 0)
# https://github.com/ytdl-org/youtube-dl/issues/32815
jsi = JSInterpreter('function f(){return 0 - 7 * - 6;}')
self.assertEqual(jsi.call_function('f'), 42)

""" # fails so far
def test_packed(self):
Expand Down
4 changes: 4 additions & 0 deletions test/test_youtube_signature.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,10 @@
'https://www.youtube.com/s/player/b7910ca8/player_ias.vflset/en_US/base.js',
'_hXMCwMt9qE310D', 'LoZMgkkofRMCZQ',
),
(
'https://www.youtube.com/s/player/590f65a6/player_ias.vflset/en_US/base.js',
'1tm7-g_A9zsI8_Lay_', 'xI4Vem4Put_rOg',
),
]


Expand Down
1 change: 0 additions & 1 deletion youtube_dl/extractor/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -3033,7 +3033,6 @@ def _find_jwplayer_data(self, webpage, video_id=None, transform_source=js_to_jso
transform_source=transform_source, default=None)

def _extract_jwplayer_data(self, webpage, video_id, *args, **kwargs):

# allow passing `transform_source` through to _find_jwplayer_data()
transform_source = kwargs.pop('transform_source', None)
kwfind = compat_kwargs({'transform_source': transform_source}) if transform_source else {}
Expand Down
9 changes: 5 additions & 4 deletions youtube_dl/extractor/palcomp3.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from ..utils import (
int_or_none,
str_or_none,
try_get,
traverse_obj,
)


Expand Down Expand Up @@ -109,7 +109,7 @@ class PalcoMP3ArtistIE(PalcoMP3BaseIE):
}
name'''

@ classmethod
@classmethod
def suitable(cls, url):
return False if re.match(PalcoMP3IE._VALID_URL, url) else super(PalcoMP3ArtistIE, cls).suitable(url)

Expand All @@ -118,7 +118,8 @@ def _real_extract(self, url):
artist = self._call_api(artist_slug, self._ARTIST_FIELDS_TMPL)['artist']

def entries():
for music in (try_get(artist, lambda x: x['musics']['nodes'], list) or []):
for music in traverse_obj(artist, (
'musics', 'nodes', lambda _, m: m['musicID'])):
yield self._parse_music(music)

return self.playlist_result(
Expand All @@ -137,7 +138,7 @@ class PalcoMP3VideoIE(PalcoMP3BaseIE):
'title': 'Maiara e Maraisa - Você Faz Falta Aqui - DVD Ao Vivo Em Campo Grande',
'description': 'md5:7043342c09a224598e93546e98e49282',
'upload_date': '20161107',
'uploader_id': 'maiaramaraisaoficial',
'uploader_id': '@maiaramaraisaoficial',
'uploader': 'Maiara e Maraisa',
}
}]
Expand Down
85 changes: 68 additions & 17 deletions youtube_dl/jsinterp.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
remove_quotes,
unified_timestamp,
variadic,
write_string,
)
from .compat import (
compat_basestring,
Expand Down Expand Up @@ -53,15 +54,16 @@ def update_and_rename_wrapper(w):

# NB In principle NaN cannot be checked by membership.
# Here all NaN values are actually this one, so _NaN is _NaN,
# although _NaN != _NaN.
# although _NaN != _NaN. Ditto Infinity.

_NaN = float('nan')
_Infinity = float('inf')


def _js_bit_op(op):

def zeroise(x):
return 0 if x in (None, JS_Undefined, _NaN) else x
return 0 if x in (None, JS_Undefined, _NaN, _Infinity) else x

@wraps_op(op)
def wrapped(a, b):
Expand All @@ -84,7 +86,7 @@ def wrapped(a, b):
def _js_div(a, b):
if JS_Undefined in (a, b) or not (a or b):
return _NaN
return operator.truediv(a or 0, b) if b else float('inf')
return operator.truediv(a or 0, b) if b else _Infinity


def _js_mod(a, b):
Expand Down Expand Up @@ -220,6 +222,42 @@ def __repr__(self):
return 'LocalNameSpace%s' % (self.maps, )


class Debugger(object):
ENABLED = False

@staticmethod
def write(*args, **kwargs):
level = kwargs.get('level', 100)

def truncate_string(s, left, right=0):
if s is None or len(s) <= left + right:
return s
return '...'.join((s[:left - 3], s[-right:] if right else ''))

write_string('[debug] JS: {0}{1}\n'.format(
' ' * (100 - level),
' '.join(truncate_string(compat_str(x), 50, 50) for x in args)))

@classmethod
def wrap_interpreter(cls, f):
def interpret_statement(self, stmt, local_vars, allow_recursion, *args, **kwargs):
if cls.ENABLED and stmt.strip():
cls.write(stmt, level=allow_recursion)
try:
ret, should_ret = f(self, stmt, local_vars, allow_recursion, *args, **kwargs)
except Exception as e:
if cls.ENABLED:
if isinstance(e, ExtractorError):
e = e.orig_msg
cls.write('=> Raises:', e, '<-|', stmt, level=allow_recursion)
raise
if cls.ENABLED and stmt.strip():
if should_ret or not repr(ret) == stmt:
cls.write(['->', '=>'][should_ret], repr(ret), '<-|', stmt, level=allow_recursion)
return ret, should_ret
return interpret_statement


class JSInterpreter(object):
__named_object_counter = 0

Expand Down Expand Up @@ -307,8 +345,7 @@ def regex_flags(cls, expr):
def __op_chars(cls):
op_chars = set(';,[')
for op in cls._all_operators():
for c in op[0]:
op_chars.add(c)
op_chars.update(op[0])
return op_chars

def _named_object(self, namespace, obj):
Expand All @@ -326,9 +363,8 @@ def _separate(cls, expr, delim=',', max_split=None, skip_delims=None):
# collections.Counter() is ~10% slower in both 2.7 and 3.9
counters = dict((k, 0) for k in _MATCHING_PARENS.values())
start, splits, pos, delim_len = 0, 0, 0, len(delim) - 1
in_quote, escaping, skipping = None, False, 0
after_op, in_regex_char_group = True, False

in_quote, escaping, after_op, in_regex_char_group = None, False, True, False
skipping = 0
for idx, char in enumerate(expr):
paren_delta = 0
if not in_quote:
Expand Down Expand Up @@ -382,10 +418,12 @@ def _separate_at_paren(cls, expr, delim=None):
return separated[0][1:].strip(), separated[1].strip()

@staticmethod
def _all_operators():
return itertools.chain(
# Ref: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/Operator_Precedence
_SC_OPERATORS, _LOG_OPERATORS, _COMP_OPERATORS, _OPERATORS)
def _all_operators(_cached=[]):
if not _cached:
_cached.extend(itertools.chain(
# Ref: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/Operator_Precedence
_SC_OPERATORS, _LOG_OPERATORS, _COMP_OPERATORS, _OPERATORS))
return _cached

def _operator(self, op, left_val, right_expr, expr, local_vars, allow_recursion):
if op in ('||', '&&'):
Expand Down Expand Up @@ -416,7 +454,7 @@ def _index(self, obj, idx, allow_undefined=False):
except Exception as e:
if allow_undefined:
return JS_Undefined
raise self.Exception('Cannot get index {idx:.100}'.format(**locals()), expr=repr(obj), cause=e)
raise self.Exception('Cannot get index {idx!r:.100}'.format(**locals()), expr=repr(obj), cause=e)

def _dump(self, obj, namespace):
try:
Expand All @@ -438,6 +476,7 @@ def _dump(self, obj, namespace):
_FINALLY_RE = re.compile(r'finally\s*\{')
_SWITCH_RE = re.compile(r'switch\s*\(')

@Debugger.wrap_interpreter
def interpret_statement(self, stmt, local_vars, allow_recursion=100):
if allow_recursion < 0:
raise self.Exception('Recursion limit reached')
Expand Down Expand Up @@ -511,7 +550,6 @@ def interpret_statement(self, stmt, local_vars, allow_recursion=100):
expr = self._dump(inner, local_vars) + outer

if expr.startswith('('):

m = re.match(r'\((?P<d>[a-z])%(?P<e>[a-z])\.length\+(?P=e)\.length\)%(?P=e)\.length', expr)
if m:
# short-cut eval of frequently used `(d%e.length+e.length)%e.length`, worth ~6% on `pytest -k test_nsig`
Expand Down Expand Up @@ -693,7 +731,7 @@ def interpret_statement(self, stmt, local_vars, allow_recursion=100):
(?P<op>{_OPERATOR_RE})?
=(?!=)(?P<expr>.*)$
)|(?P<return>
(?!if|return|true|false|null|undefined)(?P<name>{_NAME_RE})$
(?!if|return|true|false|null|undefined|NaN|Infinity)(?P<name>{_NAME_RE})$
)|(?P<indexing>
(?P<in>{_NAME_RE})\[(?P<idx>.+)\]$
)|(?P<attribute>
Expand Down Expand Up @@ -727,11 +765,12 @@ def interpret_statement(self, stmt, local_vars, allow_recursion=100):
raise JS_Break()
elif expr == 'continue':
raise JS_Continue()

elif expr == 'undefined':
return JS_Undefined, should_return
elif expr == 'NaN':
return _NaN, should_return
elif expr == 'Infinity':
return _Infinity, should_return

elif md.get('return'):
return local_vars[m.group('name')], should_return
Expand Down Expand Up @@ -760,18 +799,28 @@ def interpret_statement(self, stmt, local_vars, allow_recursion=100):
right_expr = separated.pop()
# handle operators that are both unary and binary, minimal BODMAS
if op in ('+', '-'):
# simplify/adjust consecutive instances of these operators
undone = 0
while len(separated) > 1 and not separated[-1].strip():
undone += 1
separated.pop()
if op == '-' and undone % 2 != 0:
right_expr = op + right_expr
elif op == '+':
while len(separated) > 1 and separated[-1].strip() in self.OP_CHARS:
right_expr = separated.pop() + right_expr
# hanging op at end of left => unary + (strip) or - (push right)
left_val = separated[-1]
for dm_op in ('*', '%', '/', '**'):
bodmas = tuple(self._separate(left_val, dm_op, skip_delims=skip_delim))
if len(bodmas) > 1 and not bodmas[-1].strip():
expr = op.join(separated) + op + right_expr
right_expr = None
if len(separated) > 1:
separated.pop()
right_expr = op.join((left_val, right_expr))
else:
separated = [op.join((left_val, right_expr))]
right_expr = None
break
if right_expr is None:
continue
Expand All @@ -797,6 +846,8 @@ def assertion(cndn, msg):

def eval_method():
if (variable, member) == ('console', 'debug'):
if Debugger.ENABLED:
Debugger.write(self.interpret_expression('[{}]'.format(arg_str), local_vars, allow_recursion))
return
types = {
'String': compat_str,
Expand Down
2 changes: 1 addition & 1 deletion youtube_dl/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2406,7 +2406,7 @@ def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
""" tb, if given, is the original traceback (so that it can be printed out).
If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
"""

self.orig_msg = msg
if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
expected = True
if video_id is not None:
Expand Down
Loading