From 3b31478dfd18c03a1305089e79c30dc929f53eeb Mon Sep 17 00:00:00 2001
From: Bart Broere <mail@bartbroere.eu>
Date: Fri, 31 Mar 2023 12:30:22 +0200
Subject: [PATCH 01/47] Fix support for NPO downloads

---
 youtube_dl/extractor/npo.py | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py
index e525ad92843..eff9edb8b6d 100644
--- a/youtube_dl/extractor/npo.py
+++ b/youtube_dl/extractor/npo.py
@@ -1,6 +1,8 @@
 from __future__ import unicode_literals
 
 import re
+import urllib.parse
+from http.cookies import SimpleCookie
 
 from .common import InfoExtractor
 from ..compat import (
@@ -184,22 +186,28 @@ def _real_extract(self, url):
         return self._get_info(url, video_id) or self._get_old_info(video_id)
 
     def _get_info(self, url, video_id):
-        token = self._download_json(
+        _, xsrf_token_response = self._download_webpage_handle(
             'https://www.npostart.nl/api/token', video_id,
             'Downloading token', headers={
                 'Referer': url,
                 'X-Requested-With': 'XMLHttpRequest',
-            })['token']
+            })
+        cookies = SimpleCookie()
+        cookies.load(xsrf_token_response.headers['Set-Cookie'])
+        cookies = {k: v.value for k, v in cookies.items()}
+        xsrf_token = cookies['XSRF-TOKEN']
 
         player = self._download_json(
             'https://www.npostart.nl/player/%s' % video_id, video_id,
-            'Downloading player JSON', data=urlencode_postdata({
+            'Downloading player JSON',
+            headers={"x-xsrf-token": urllib.parse.unquote(xsrf_token)},
+            data=urlencode_postdata({
                 'autoplay': 0,
                 'share': 1,
                 'pageUrl': url,
+                'isFavourite': "false",
                 'hasAdConsent': 0,
-                '_token': token,
-            }))
+            },))
 
         player_token = player['token']
 
@@ -215,7 +223,7 @@ def _get_info(self, url, video_id):
                     'quality': 'npo',
                     'tokenId': player_token,
                     'streamType': 'broadcast',
-                })
+                }, data=b"")
             if not streams:
                 continue
             stream = streams.get('stream')

From b4776f2e36e6235c6a3142973355be7e03eee919 Mon Sep 17 00:00:00 2001
From: Bart Broere <mail@bartbroere.eu>
Date: Fri, 31 Mar 2023 12:39:11 +0200
Subject: [PATCH 02/47] Import from compat

---
 youtube_dl/extractor/npo.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py
index eff9edb8b6d..dba42205801 100644
--- a/youtube_dl/extractor/npo.py
+++ b/youtube_dl/extractor/npo.py
@@ -1,13 +1,13 @@
 from __future__ import unicode_literals
 
 import re
-import urllib.parse
-from http.cookies import SimpleCookie
 
 from .common import InfoExtractor
 from ..compat import (
     compat_HTTPError,
     compat_str,
+    compat_urllib_parse_unquote_plus,
+    compat_cookies_SimpleCookie,
 )
 from ..utils import (
     determine_ext,
@@ -192,7 +192,7 @@ def _get_info(self, url, video_id):
                 'Referer': url,
                 'X-Requested-With': 'XMLHttpRequest',
             })
-        cookies = SimpleCookie()
+        cookies = compat_cookies_SimpleCookie()
         cookies.load(xsrf_token_response.headers['Set-Cookie'])
         cookies = {k: v.value for k, v in cookies.items()}
         xsrf_token = cookies['XSRF-TOKEN']
@@ -200,7 +200,7 @@ def _get_info(self, url, video_id):
         player = self._download_json(
             'https://www.npostart.nl/player/%s' % video_id, video_id,
             'Downloading player JSON',
-            headers={"x-xsrf-token": urllib.parse.unquote(xsrf_token)},
+            headers={"x-xsrf-token": compat_urllib_parse_unquote_plus(xsrf_token)},
             data=urlencode_postdata({
                 'autoplay': 0,
                 'share': 1,

From fb2b4e2894171825c6c85d813a8120b679eadf52 Mon Sep 17 00:00:00 2001
From: Bart Broere <mail@bartbroere.eu>
Date: Fri, 31 Mar 2023 12:46:05 +0200
Subject: [PATCH 03/47] Add line comment

---
 youtube_dl/extractor/npo.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py
index dba42205801..646b0f43370 100644
--- a/youtube_dl/extractor/npo.py
+++ b/youtube_dl/extractor/npo.py
@@ -207,7 +207,7 @@ def _get_info(self, url, video_id):
                 'pageUrl': url,
                 'isFavourite': "false",
                 'hasAdConsent': 0,
-            },))
+            }))
 
         player_token = player['token']
 
@@ -223,7 +223,8 @@ def _get_info(self, url, video_id):
                     'quality': 'npo',
                     'tokenId': player_token,
                     'streamType': 'broadcast',
-                }, data=b"")
+                },
+                data=b"")  # empty byte string to force a POST request instead of GET, without it HTTP 405 will happen
             if not streams:
                 continue
             stream = streams.get('stream')

From 9e1acb2527a9141710657a35d358dba54b4c8ddd Mon Sep 17 00:00:00 2001
From: Bart Broere <mail@bartbroere.eu>
Date: Fri, 31 Mar 2023 12:56:18 +0200
Subject: [PATCH 04/47] Fix flake8

---
 youtube_dl/extractor/npo.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py
index 646b0f43370..e8e596be198 100644
--- a/youtube_dl/extractor/npo.py
+++ b/youtube_dl/extractor/npo.py
@@ -200,7 +200,9 @@ def _get_info(self, url, video_id):
         player = self._download_json(
             'https://www.npostart.nl/player/%s' % video_id, video_id,
             'Downloading player JSON',
-            headers={"x-xsrf-token": compat_urllib_parse_unquote_plus(xsrf_token)},
+            headers={
+                "x-xsrf-token": compat_urllib_parse_unquote_plus(xsrf_token)
+            },
             data=urlencode_postdata({
                 'autoplay': 0,
                 'share': 1,
@@ -224,7 +226,9 @@ def _get_info(self, url, video_id):
                     'tokenId': player_token,
                     'streamType': 'broadcast',
                 },
-                data=b"")  # empty byte string to force a POST request instead of GET, without it HTTP 405 will happen
+                data=b"")
+            # Empty byte string in the call above to force a POST request
+            # Without it HTTP 405 will happen
             if not streams:
                 continue
             stream = streams.get('stream')

From 632897860b94c20bab65c9fd0ad81d6ae3ab30c1 Mon Sep 17 00:00:00 2001
From: Bart Broere <mail@bartbroere.eu>
Date: Mon, 3 Apr 2023 09:50:21 +0200
Subject: [PATCH 05/47] Accept suggestions on PR; comply with conventions

Co-authored-by: dirkf <fieldhouse@gmx.net>
---
 youtube_dl/extractor/npo.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py
index e8e596be198..84bde9683dd 100644
--- a/youtube_dl/extractor/npo.py
+++ b/youtube_dl/extractor/npo.py
@@ -4,10 +4,10 @@
 
 from .common import InfoExtractor
 from ..compat import (
+    compat_cookies_SimpleCookie,
     compat_HTTPError,
     compat_str,
     compat_urllib_parse_unquote_plus,
-    compat_cookies_SimpleCookie,
 )
 from ..utils import (
     determine_ext,
@@ -194,20 +194,20 @@ def _get_info(self, url, video_id):
             })
         cookies = compat_cookies_SimpleCookie()
         cookies.load(xsrf_token_response.headers['Set-Cookie'])
-        cookies = {k: v.value for k, v in cookies.items()}
+        cookies = dict((k, v.value) for k, v in cookies.items())
         xsrf_token = cookies['XSRF-TOKEN']
 
         player = self._download_json(
             'https://www.npostart.nl/player/%s' % video_id, video_id,
             'Downloading player JSON',
             headers={
-                "x-xsrf-token": compat_urllib_parse_unquote_plus(xsrf_token)
+                'x-xsrf-token': compat_urllib_parse_unquote_plus(xsrf_token)
             },
             data=urlencode_postdata({
                 'autoplay': 0,
                 'share': 1,
                 'pageUrl': url,
-                'isFavourite': "false",
+                'isFavourite': 'false',
                 'hasAdConsent': 0,
             }))
 
@@ -226,7 +226,7 @@ def _get_info(self, url, video_id):
                     'tokenId': player_token,
                     'streamType': 'broadcast',
                 },
-                data=b"")
+                data=b'')
             # Empty byte string in the call above to force a POST request
             # Without it HTTP 405 will happen
             if not streams:

From 0c7261db901e79aed3dfd20f0b3c99ccbd32d20a Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Thu, 6 Apr 2023 01:51:02 +0100
Subject: [PATCH 06/47] Update npo.py

* simplify comment
* force CI
---
 youtube_dl/extractor/npo.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py
index 84bde9683dd..d6379f1d35b 100644
--- a/youtube_dl/extractor/npo.py
+++ b/youtube_dl/extractor/npo.py
@@ -226,9 +226,8 @@ def _get_info(self, url, video_id):
                     'tokenId': player_token,
                     'streamType': 'broadcast',
                 },
+                # empty data to force a POST request, avoiding HTTP 405
                 data=b'')
-            # Empty byte string in the call above to force a POST request
-            # Without it HTTP 405 will happen
             if not streams:
                 continue
             stream = streams.get('stream')

From da3d1f4321ec0b374b4201e092c085550003aec3 Mon Sep 17 00:00:00 2001
From: Bart Broere <mail@bartbroere.eu>
Date: Fri, 1 Mar 2024 10:36:03 +0100
Subject: [PATCH 07/47] Add notes on new npo.nl site

---
 youtube_dl/extractor/npo.py | 96 ++++++++++++++++++++++++++++++++++---
 1 file changed, 90 insertions(+), 6 deletions(-)

diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py
index 472da54ac0c..aef007e6a2b 100644
--- a/youtube_dl/extractor/npo.py
+++ b/youtube_dl/extractor/npo.py
@@ -36,6 +36,7 @@ def _get_token(self, video_id):
 
 class NPOIE(NPOBaseIE):
     IE_NAME = 'npo'
+    # TODO find out if all hosts still work:
     IE_DESC = 'npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl'
     _VALID_URL = r'''(?x)
                     (?:
@@ -62,6 +63,10 @@ class NPOIE(NPOBaseIE):
             'description': 'Dagelijks tussen tien en elf: nieuws, sport en achtergronden.',
             'upload_date': '20140622',
         },
+        'skip': 'Video gone',
+    }, {
+        'url': 'https://npo.nl/start/serie/zembla/seizoen-2015/wie-is-de-mol-2/',
+        # TODO other test attributes
     }, {
         'url': 'http://www.npo.nl/de-mega-mike-mega-thomas-show/27-02-2009/VARA_101191800',
         'md5': 'da50a5787dbfc1603c4ad80f31c5120b',
@@ -73,8 +78,9 @@ class NPOIE(NPOBaseIE):
             'upload_date': '20090227',
             'duration': 2400,
         },
+        'skip': 'Video gone',
     }, {
-        'url': 'http://www.npo.nl/tegenlicht/25-02-2013/VPWON_1169289',
+        'url': 'https://npo.nl/start/serie/vpro-tegenlicht/seizoen-11/zwart-geld-de-toekomst-komt-uit-afrika',
         'md5': 'f8065e4e5a7824068ed3c7e783178f2c',
         'info_dict': {
             'id': 'VPWON_1169289',
@@ -95,7 +101,8 @@ class NPOIE(NPOBaseIE):
         },
         'params': {
             'skip_download': True,
-        }
+        },
+        'skip': 'Video gone',
     }, {
         # non asf in streams
         'url': 'http://www.npo.nl/hoe-gaat-europa-verder-na-parijs/10-01-2015/WO_NOS_762771',
@@ -106,7 +113,8 @@ class NPOIE(NPOBaseIE):
         },
         'params': {
             'skip_download': True,
-        }
+        },
+        'skip': 'Video gone',
     }, {
         'url': 'http://www.ntr.nl/Aap-Poot-Pies/27/detail/Aap-poot-pies/VPWON_1233944#content',
         'info_dict': {
@@ -119,7 +127,8 @@ class NPOIE(NPOBaseIE):
         },
         'params': {
             'skip_download': True,
-        }
+        },
+        'skip': 'Video gone',
     }, {
         'url': 'http://www.omroepwnl.nl/video/fragment/vandaag-de-dag-verkiezingen__POMS_WNL_853698',
         'info_dict': {
@@ -132,7 +141,8 @@ class NPOIE(NPOBaseIE):
         },
         'params': {
             'skip_download': True,
-        }
+        },
+        'skip': 'Video gone',
     }, {
         # audio
         'url': 'http://www.npo.nl/jouw-stad-rotterdam/29-01-2017/RBX_FUNX_6683215/RBX_FUNX_7601437',
@@ -148,15 +158,19 @@ class NPOIE(NPOBaseIE):
     }, {
         'url': 'http://www.zapp.nl/de-bzt-show/gemist/KN_1687547',
         'only_matching': True,
+        'skip': 'Video gone',
     }, {
         'url': 'http://www.zapp.nl/de-bzt-show/filmpjes/POMS_KN_7315118',
         'only_matching': True,
+        'skip': 'Video gone',
     }, {
         'url': 'http://www.zapp.nl/beste-vrienden-quiz/extra-video-s/WO_NTR_1067990',
         'only_matching': True,
+        'skip': 'Video gone',
     }, {
         'url': 'https://www.npo3.nl/3onderzoekt/16-09-2015/VPWON_1239870',
         'only_matching': True,
+        'skip': 'Video gone',
     }, {
         # live stream
         'url': 'npo:LI_NL1_4188102',
@@ -704,7 +718,6 @@ class VPROIE(NPOPlaylistBaseIE):
                 'description': 'md5:52cf4eefbc96fffcbdc06d024147abea',
                 'upload_date': '20130225',
             },
-            'skip': 'Video gone',
         },
         {
             'url': 'http://www.vpro.nl/programmas/2doc/2015/sergio-herman.html',
@@ -723,6 +736,7 @@ class VPROIE(NPOPlaylistBaseIE):
                 'title': 'education education',
             },
             'playlist_count': 2,
+            'skip': 'Video gone',
         },
         {
             'url': 'http://www.2doc.nl/documentaires/series/2doc/2015/oktober/de-tegenprestatie.html',
@@ -778,3 +792,73 @@ class AndereTijdenIE(NPOPlaylistBaseIE):
         },
         'playlist_count': 3,
     }]
+
+###############################################################
+#   Description of the new process of getting to the stream   #
+###############################################################
+
+# Valid URLs for new tests
+# https://npo.nl/start/serie/zembla/seizoen-2015/wie-is-de-mol-2/
+# https://npo.nl/start/serie/zembla/seizoen-2015/wie-is-de-mol-2/afspelen
+
+# Step 1: Normalize the URL
+# If the URL ends with /afspelen, strip that
+# We need the slug in the next stepto find the productId
+
+# Step 2: Find the productId
+# In the contents of the URL is a JSON blob:
+# <script id="__NEXT_DATA__" type="application/json">
+# There's a list of queries in the ['props']['pageProps']['dehydratedState']['queries'] key
+# In this list of queries, one is the current episode
+# This one can be found by looping over queries and selecting
+# the one where the key ['state']['data']['slug'] contains the last part of the URL
+# In the test case 'wie-is-de-mol-2'
+# We need the productId from the corresponding entry in ['state']['data']['productId']
+# This looks a bit GraphQL-like, so there might be an easier way to query the productId, if we know the slug
+
+# Step 3: Get the JWT
+# With this productId we can get a player-token
+# https://npo.nl/start/api/domain/player-token?productId=VARA_101372912
+# The response is a JSON dictionary, with one key ['token']
+# In this key is a JWT
+
+# Step 4: Get the stream-link json
+# The JWT needs to be put in the Authorization header in a POST request to
+# https://prod.npoplayer.nl/stream-link
+# with the following payload (for this test case)
+# {
+#   "profileName": "dash",
+#   "drmType": "widevine",
+#   "referrerUrl": "https://npo.nl/start/serie/zembla/seizoen-2015/wie-is-de-mol-2/afspelen"
+# }
+# Even though the request asks for Widevine DRM, it's not always available
+# At this point we don't know whether there's DRM yet
+
+# Step 5: Get the stream.mpd from the JSON response and find out if DRM is enabled
+# This returns a JSON response with a stream.mpd file in the ['stream']['streamURL'] key
+# If dash_unencrypted is in this URL it's a stream without DRM and we can download it
+
+# For all new content there most likely is DRM protection on the stream
+# In that case dash_cenc is in the stream.mpd URL
+
+
+##############################################################
+#   Differences when embedded on the broadcaster's website   #
+##############################################################
+
+# The same episode is also embedded on the broadcaster's website: https://bnnvara.nl/videos/27455
+# It's nice to support that too, and in the case of bnnvara.nl (and maybe more broadcasters)
+# it's even easier to get to the productId
+# By POSTing to the GraphQL endpoint at we can query using the id (last part of the URL)
+# https://api.bnnvara.nl/bff/graphql
+# {
+#   "operationName": "getMedia",
+#   "variables": {
+#     "id": "27455",
+#     "hasAdConsent": false,
+#     "atInternetId": 70
+#   },
+#   "query": "query getMedia($id: ID!, $mediaUrl: String, $hasAdConsent: Boolean!, $atInternetId: Int) {\n  player(\n    id: $id\n    mediaUrl: $mediaUrl\n    hasAdConsent: $hasAdConsent\n    atInternetId: $atInternetId\n  ) {\n    ... on PlayerSucces {\n      brand {\n        name\n        slug\n        broadcastsEnabled\n        __typename\n      }\n      title\n      programTitle\n      pomsProductId\n      broadcasters {\n        name\n        __typename\n      }\n      duration\n      classifications {\n        title\n        imageUrl\n        type\n        __typename\n      }\n      image {\n        title\n        url\n        __typename\n      }\n      cta {\n        title\n        url\n        __typename\n      }\n      genres {\n        name\n        __typename\n      }\n      subtitles {\n        url\n        language\n        __typename\n      }\n      sources {\n        name\n        url\n        ratio\n        __typename\n      }\n      type\n      token\n      __typename\n    }\n    ... on PlayerError {\n      error\n      __typename\n    }\n    __typename\n  }\n}"
+# }
+# The response is in the key ['data']['player']['pomsProductId']
+# From this point it's possible to continue at step 3 of the description above

From 577368116bd01244a5640e0247ed3cb876d9ac52 Mon Sep 17 00:00:00 2001
From: Bart Broere <mail@bartbroere.eu>
Date: Fri, 1 Mar 2024 13:15:52 +0100
Subject: [PATCH 08/47] Fix token URL

---
 youtube_dl/extractor/npo.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py
index aef007e6a2b..9187680352a 100644
--- a/youtube_dl/extractor/npo.py
+++ b/youtube_dl/extractor/npo.py
@@ -30,7 +30,8 @@
 class NPOBaseIE(InfoExtractor):
     def _get_token(self, video_id):
         return self._download_json(
-            'http://ida.omroep.nl/app.php/auth', video_id,
+            'https://npo.nl/start/api/domain/player-token?productId=%s' % video_id,
+            video_id,
             note='Downloading token')['token']
 
 

From 29724e7b07ac35d5a82e64808699fa7879e3afd9 Mon Sep 17 00:00:00 2001
From: Bart Broere <mail@bartbroere.eu>
Date: Fri, 1 Mar 2024 13:24:48 +0100
Subject: [PATCH 09/47] Delete all broken extractors

Re-implementing these is quicker for the cases where that's even still possible
---
 youtube_dl/extractor/extractors.py |  12 +-
 youtube_dl/extractor/npo.py        | 425 +----------------------------
 2 files changed, 9 insertions(+), 428 deletions(-)

diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index 29b655c94c0..dabcd60cb75 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -847,17 +847,7 @@
     NownessSeriesIE,
 )
 from .noz import NozIE
-from .npo import (
-    AndereTijdenIE,
-    NPOIE,
-    NPOLiveIE,
-    NPORadioIE,
-    NPORadioFragmentIE,
-    SchoolTVIE,
-    HetKlokhuisIE,
-    VPROIE,
-    WNLIE,
-)
+from .npo import NPOIE
 from .npr import NprIE
 from .nrk import (
     NRKIE,
diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py
index 9187680352a..f1d9c87ba1a 100644
--- a/youtube_dl/extractor/npo.py
+++ b/youtube_dl/extractor/npo.py
@@ -27,45 +27,22 @@
 )
 
 
-class NPOBaseIE(InfoExtractor):
-    def _get_token(self, video_id):
-        return self._download_json(
-            'https://npo.nl/start/api/domain/player-token?productId=%s' % video_id,
-            video_id,
-            note='Downloading token')['token']
-
-
-class NPOIE(NPOBaseIE):
+class NPOIE(InfoExtractor):
     IE_NAME = 'npo'
-    # TODO find out if all hosts still work:
-    IE_DESC = 'npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl'
+    IE_DESC = 'npo.nl'
     _VALID_URL = r'''(?x)
                     (?:
                         npo:|
                         https?://
                             (?:www\.)?
                             (?:
-                                npo\.nl/(?:[^/]+/)*|
-                                (?:ntr|npostart)\.nl/(?:[^/]+/){2,}|
-                                omroepwnl\.nl/video/fragment/[^/]+__|
-                                (?:zapp|npo3)\.nl/(?:[^/]+/){2,}
+                                npo\.nl/(?:[^/]+/)*
                             )
                         )
                         (?P<id>[^/?#]+)
                 '''
 
     _TESTS = [{
-        'url': 'http://www.npo.nl/nieuwsuur/22-06-2014/VPWON_1220719',
-        'md5': '4b3f9c429157ec4775f2c9cb7b911016',
-        'info_dict': {
-            'id': 'VPWON_1220719',
-            'ext': 'm4v',
-            'title': 'Nieuwsuur',
-            'description': 'Dagelijks tussen tien en elf: nieuws, sport en achtergronden.',
-            'upload_date': '20140622',
-        },
-        'skip': 'Video gone',
-    }, {
         'url': 'https://npo.nl/start/serie/zembla/seizoen-2015/wie-is-de-mol-2/',
         # TODO other test attributes
     }, {
@@ -91,110 +68,13 @@ class NPOIE(NPOBaseIE):
             'upload_date': '20130225',
             'duration': 3000,
         },
-    }, {
-        'url': 'http://www.npo.nl/de-nieuwe-mens-deel-1/21-07-2010/WO_VPRO_043706',
-        'info_dict': {
-            'id': 'WO_VPRO_043706',
-            'ext': 'm4v',
-            'title': 'De nieuwe mens - Deel 1',
-            'description': 'md5:518ae51ba1293ffb80d8d8ce90b74e4b',
-            'duration': 4680,
-        },
-        'params': {
-            'skip_download': True,
-        },
-        'skip': 'Video gone',
-    }, {
-        # non asf in streams
-        'url': 'http://www.npo.nl/hoe-gaat-europa-verder-na-parijs/10-01-2015/WO_NOS_762771',
-        'info_dict': {
-            'id': 'WO_NOS_762771',
-            'ext': 'mp4',
-            'title': 'Hoe gaat Europa verder na Parijs?',
-        },
-        'params': {
-            'skip_download': True,
-        },
-        'skip': 'Video gone',
-    }, {
-        'url': 'http://www.ntr.nl/Aap-Poot-Pies/27/detail/Aap-poot-pies/VPWON_1233944#content',
-        'info_dict': {
-            'id': 'VPWON_1233944',
-            'ext': 'm4v',
-            'title': 'Aap, poot, pies',
-            'description': 'md5:c9c8005d1869ae65b858e82c01a91fde',
-            'upload_date': '20150508',
-            'duration': 599,
-        },
-        'params': {
-            'skip_download': True,
-        },
-        'skip': 'Video gone',
-    }, {
-        'url': 'http://www.omroepwnl.nl/video/fragment/vandaag-de-dag-verkiezingen__POMS_WNL_853698',
-        'info_dict': {
-            'id': 'POW_00996502',
-            'ext': 'm4v',
-            'title': '''"Dit is wel een 'landslide'..."''',
-            'description': 'md5:f8d66d537dfb641380226e31ca57b8e8',
-            'upload_date': '20150508',
-            'duration': 462,
-        },
-        'params': {
-            'skip_download': True,
-        },
-        'skip': 'Video gone',
-    }, {
-        # audio
-        'url': 'http://www.npo.nl/jouw-stad-rotterdam/29-01-2017/RBX_FUNX_6683215/RBX_FUNX_7601437',
-        'info_dict': {
-            'id': 'RBX_FUNX_6683215',
-            'ext': 'mp3',
-            'title': 'Jouw Stad Rotterdam',
-            'description': 'md5:db251505244f097717ec59fabc372d9f',
-        },
-        'params': {
-            'skip_download': True,
-        }
-    }, {
-        'url': 'http://www.zapp.nl/de-bzt-show/gemist/KN_1687547',
-        'only_matching': True,
-        'skip': 'Video gone',
-    }, {
-        'url': 'http://www.zapp.nl/de-bzt-show/filmpjes/POMS_KN_7315118',
-        'only_matching': True,
-        'skip': 'Video gone',
-    }, {
-        'url': 'http://www.zapp.nl/beste-vrienden-quiz/extra-video-s/WO_NTR_1067990',
-        'only_matching': True,
-        'skip': 'Video gone',
-    }, {
-        'url': 'https://www.npo3.nl/3onderzoekt/16-09-2015/VPWON_1239870',
-        'only_matching': True,
-        'skip': 'Video gone',
-    }, {
-        # live stream
-        'url': 'npo:LI_NL1_4188102',
-        'only_matching': True,
-    }, {
-        'url': 'http://www.npo.nl/radio-gaga/13-06-2017/BNN_101383373',
-        'only_matching': True,
-    }, {
-        'url': 'https://www.zapp.nl/1803-skelterlab/instructie-video-s/740-instructievideo-s/POMS_AT_11736927',
-        'only_matching': True,
-    }, {
-        'url': 'https://www.npostart.nl/broodje-gezond-ei/28-05-2018/KN_1698996',
-        'only_matching': True,
-    }, {
-        'url': 'https://npo.nl/KN_1698996',
-        'only_matching': True,
     }]
 
-    @classmethod
-    def suitable(cls, url):
-        return (False if any(ie.suitable(url)
-                for ie in (NPOLiveIE, NPORadioIE, NPORadioFragmentIE))
-                else super(NPOIE, cls).suitable(url))
+    def _get_token(self, video_id):
+        return self._download_json(
+            'https://npo.nl/start/api/domain/player-token?productId=%s' % video_id,
+            video_id,
+            note='Downloading token')['token']
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
@@ -505,295 +385,6 @@ def add_format_url(format_url):
             'is_live': is_live,
         }
 
-
-class NPOLiveIE(NPOBaseIE):
-    IE_NAME = 'npo.nl:live'
-    _VALID_URL = r'https?://(?:www\.)?npo(?:start)?\.nl/live(?:/(?P<id>[^/?#&]+))?'
-
-    _TESTS = [{
-        'url': 'http://www.npo.nl/live/npo-1',
-        'info_dict': {
-            'id': 'LI_NL1_4188102',
-            'display_id': 'npo-1',
-            'ext': 'mp4',
-            'title': 're:^NPO 1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
-            'is_live': True,
-        },
-        'params': {
-            'skip_download': True,
-        }
-    }, {
-        'url': 'http://www.npo.nl/live',
-        'only_matching': True,
-    }, {
-        'url': 'https://www.npostart.nl/live/npo-1',
-        'only_matching': True,
-    }]
-
-    def _real_extract(self, url):
-        display_id = self._match_id(url) or 'npo-1'
-
-        webpage = self._download_webpage(url, display_id)
-
-        live_id = self._search_regex(
-            [r'media-id="([^"]+)"', r'data-prid="([^"]+)"'], webpage, 'live id')
-
-        return {
-            '_type': 'url_transparent',
-            'url': 'npo:%s' % live_id,
-            'ie_key': NPOIE.ie_key(),
-            'id': live_id,
-            'display_id': display_id,
-        }
-
-
-class NPORadioIE(InfoExtractor):
-    IE_NAME = 'npo.nl:radio'
-    _VALID_URL = r'https?://(?:www\.)?npo\.nl/radio/(?P<id>[^/]+)'
-
-    _TEST = {
-        'url': 'http://www.npo.nl/radio/radio-1',
-        'info_dict': {
-            'id': 'radio-1',
-            'ext': 'mp3',
-            'title': 're:^NPO Radio 1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
-            'is_live': True,
-        },
-        'params': {
-            'skip_download': True,
-        }
-    }
-
-    @classmethod
-    def suitable(cls, url):
-        return False if NPORadioFragmentIE.suitable(url) else super(NPORadioIE, cls).suitable(url)
-
-    @staticmethod
-    def _html_get_attribute_regex(attribute):
-        return r'{0}\s*=\s*\'([^\']+)\''.format(attribute)
-
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-
-        webpage = self._download_webpage(url, video_id)
-
-        title = self._html_search_regex(
-            self._html_get_attribute_regex('data-channel'), webpage, 'title')
-
-        stream = self._parse_json(
-            self._html_search_regex(self._html_get_attribute_regex('data-streams'), webpage, 'data-streams'),
-            video_id)
-
-        codec = stream.get('codec')
-
-        return {
-            'id': video_id,
-            'url': stream['url'],
-            'title': self._live_title(title),
-            'acodec': codec,
-            'ext': codec,
-            'is_live': True,
-        }
-
-
-class NPORadioFragmentIE(InfoExtractor):
-    IE_NAME = 'npo.nl:radio:fragment'
-    _VALID_URL = r'https?://(?:www\.)?npo\.nl/radio/[^/]+/fragment/(?P<id>\d+)'
-
-    _TEST = {
-        'url': 'http://www.npo.nl/radio/radio-5/fragment/174356',
-        'md5': 'dd8cc470dad764d0fdc70a9a1e2d18c2',
-        'info_dict': {
-            'id': '174356',
-            'ext': 'mp3',
-            'title': 'Jubileumconcert Willeke Alberti',
-        },
-    }
-
-    def _real_extract(self, url):
-        audio_id = self._match_id(url)
-
-        webpage = self._download_webpage(url, audio_id)
-
-        title = self._html_search_regex(
-            r'href="/radio/[^/]+/fragment/%s" title="([^"]+)"' % audio_id,
-            webpage, 'title')
-
-        audio_url = self._search_regex(
-            r"data-streams='([^']+)'", webpage, 'audio url')
-
-        return {
-            'id': audio_id,
-            'url': audio_url,
-            'title': title,
-        }
-
-
-class NPODataMidEmbedIE(InfoExtractor):
-    def _real_extract(self, url):
-        display_id = self._match_id(url)
-        webpage = self._download_webpage(url, display_id)
-        video_id = self._search_regex(
-            r'data-mid=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'video_id', group='id')
-        return {
-            '_type': 'url_transparent',
-            'ie_key': 'NPO',
-            'url': 'npo:%s' % video_id,
-            'display_id': display_id
-        }
-
-
-class SchoolTVIE(NPODataMidEmbedIE):
-    IE_NAME = 'schooltv'
-    _VALID_URL = r'https?://(?:www\.)?schooltv\.nl/video/(?P<id>[^/?#&]+)'
-
-    _TEST = {
-        'url': 'http://www.schooltv.nl/video/ademhaling-de-hele-dag-haal-je-adem-maar-wat-gebeurt-er-dan-eigenlijk-in-je-lichaam/',
-        'info_dict': {
-            'id': 'WO_NTR_429477',
-            'display_id': 'ademhaling-de-hele-dag-haal-je-adem-maar-wat-gebeurt-er-dan-eigenlijk-in-je-lichaam',
-            'title': 'Ademhaling: De hele dag haal je adem. Maar wat gebeurt er dan eigenlijk in je lichaam?',
-            'ext': 'mp4',
-            'description': 'md5:abfa0ff690adb73fd0297fd033aaa631'
-        },
-        'params': {
-            # Skip because of m3u8 download
-            'skip_download': True
-        }
-    }
-
-
-class HetKlokhuisIE(NPODataMidEmbedIE):
-    IE_NAME = 'hetklokhuis'
-    _VALID_URL = r'https?://(?:www\.)?hetklokhuis\.nl/[^/]+/\d+/(?P<id>[^/?#&]+)'
-
-    _TEST = {
-        'url': 'http://hetklokhuis.nl/tv-uitzending/3471/Zwaartekrachtsgolven',
-        'info_dict': {
-            'id': 'VPWON_1260528',
-            'display_id': 'Zwaartekrachtsgolven',
-            'ext': 'm4v',
-            'title': 'Het Klokhuis: Zwaartekrachtsgolven',
-            'description': 'md5:c94f31fb930d76c2efa4a4a71651dd48',
-            'upload_date': '20170223',
-        },
-        'params': {
-            'skip_download': True
-        }
-    }
-
-
-class NPOPlaylistBaseIE(NPOIE):
-    def _real_extract(self, url):
-        playlist_id = self._match_id(url)
-
-        webpage = self._download_webpage(url, playlist_id)
-
-        entries = [
-            self.url_result('npo:%s' % video_id if not video_id.startswith('http') else video_id)
-            for video_id in orderedSet(re.findall(self._PLAYLIST_ENTRY_RE, webpage))
-        ]
-
-        playlist_title = self._html_search_regex(
-            self._PLAYLIST_TITLE_RE, webpage, 'playlist title',
-            default=None) or self._og_search_title(webpage)
-
-        return self.playlist_result(entries, playlist_id, playlist_title)
-
-
-class VPROIE(NPOPlaylistBaseIE):
-    IE_NAME = 'vpro'
-    _VALID_URL = r'https?://(?:www\.)?(?:(?:tegenlicht\.)?vpro|2doc)\.nl/(?:[^/]+/)*(?P<id>[^/]+)\.html'
-    _PLAYLIST_TITLE_RE = (r'<h1[^>]+class=["\'].*?\bmedia-platform-title\b.*?["\'][^>]*>([^<]+)',
-                          r'<h5[^>]+class=["\'].*?\bmedia-platform-subtitle\b.*?["\'][^>]*>([^<]+)')
-    _PLAYLIST_ENTRY_RE = r'data-media-id="([^"]+)"'
-
-    _TESTS = [
-        {
-            'url': 'http://tegenlicht.vpro.nl/afleveringen/2012-2013/de-toekomst-komt-uit-afrika.html',
-            'md5': 'f8065e4e5a7824068ed3c7e783178f2c',
-            'info_dict': {
-                'id': 'VPWON_1169289',
-                'ext': 'm4v',
-                'title': 'De toekomst komt uit Afrika',
-                'description': 'md5:52cf4eefbc96fffcbdc06d024147abea',
-                'upload_date': '20130225',
-            },
-        },
-        {
-            'url': 'http://www.vpro.nl/programmas/2doc/2015/sergio-herman.html',
-            'info_dict': {
-                'id': 'sergio-herman',
-                'title': 'sergio herman: fucking perfect',
-            },
-            'playlist_count': 2,
-            'skip': 'Video gone',
-        },
-        {
-            # playlist with youtube embed
-            'url': 'http://www.vpro.nl/programmas/2doc/2015/education-education.html',
-            'info_dict': {
-                'id': 'education-education',
-                'title': 'education education',
-            },
-            'playlist_count': 2,
-            'skip': 'Video gone',
-        },
-        {
-            'url': 'http://www.2doc.nl/documentaires/series/2doc/2015/oktober/de-tegenprestatie.html',
-            'info_dict': {
-                'id': 'de-tegenprestatie',
-                'title': 'De Tegenprestatie',
-            },
-            'playlist_count': 2,
-        }, {
-            'url': 'http://www.2doc.nl/speel~VARA_101375237~mh17-het-verdriet-van-nederland~.html',
-            'info_dict': {
-                'id': 'VARA_101375237',
-                'ext': 'm4v',
-                'title': 'MH17: Het verdriet van Nederland',
-                'description': 'md5:09e1a37c1fdb144621e22479691a9f18',
-                'upload_date': '20150716',
-            },
-            'params': {
-                # Skip because of m3u8 download
-                'skip_download': True
-            },
-        }
-    ]
-
-
-class WNLIE(NPOPlaylistBaseIE):
-    IE_NAME = 'wnl'
-    _VALID_URL = r'https?://(?:www\.)?omroepwnl\.nl/video/detail/(?P<id>[^/]+)__\d+'
-    _PLAYLIST_TITLE_RE = r'(?s)<h1[^>]+class="subject"[^>]*>(.+?)</h1>'
-    _PLAYLIST_ENTRY_RE = r'<a[^>]+href="([^"]+)"[^>]+class="js-mid"[^>]*>Deel \d+'
-
-    _TESTS = [{
-        'url': 'http://www.omroepwnl.nl/video/detail/vandaag-de-dag-6-mei__060515',
-        'info_dict': {
-            'id': 'vandaag-de-dag-6-mei',
-            'title': 'Vandaag de Dag 6 mei',
-        },
-        'playlist_count': 4,
-    }]
-
-
-class AndereTijdenIE(NPOPlaylistBaseIE):
-    IE_NAME = 'anderetijden'
-    _VALID_URL = r'https?://(?:www\.)?anderetijden\.nl/programma/(?:[^/]+/)+(?P<id>[^/?#&]+)'
-    _PLAYLIST_TITLE_RE = r'(?s)<h1[^>]+class=["\'].*?\bpage-title\b.*?["\'][^>]*>(.+?)</h1>'
-    _PLAYLIST_ENTRY_RE = r'<figure[^>]+class=["\']episode-container episode-page["\'][^>]+data-prid=["\'](.+?)["\']'
-
-    _TESTS = [{
-        'url': 'http://anderetijden.nl/programma/1/Andere-Tijden/aflevering/676/Duitse-soldaten-over-de-Slag-bij-Arnhem',
-        'info_dict': {
-            'id': 'Duitse-soldaten-over-de-Slag-bij-Arnhem',
-            'title': 'Duitse soldaten over de Slag bij Arnhem',
-        },
-        'playlist_count': 3,
-    }]
-
 ###############################################################
 #   Description of the new process of getting to the stream   #
 ###############################################################

From 21eb4513e05ddbe1bd6be8ef8d58ef9f20710bc2 Mon Sep 17 00:00:00 2001
From: Bart Broere <mail@bartbroere.eu>
Date: Fri, 1 Mar 2024 14:12:51 +0100
Subject: [PATCH 10/47] Convert the description into code

---
 youtube_dl/extractor/npo.py | 448 +++++-------------------------------
 1 file changed, 57 insertions(+), 391 deletions(-)

diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py
index f1d9c87ba1a..52e7096c142 100644
--- a/youtube_dl/extractor/npo.py
+++ b/youtube_dl/extractor/npo.py
@@ -1,29 +1,10 @@
 from __future__ import unicode_literals
 
-import re
+import json
 
 from .common import InfoExtractor
-from ..compat import (
-    compat_cookies_SimpleCookie,
-    compat_HTTPError,
-    compat_str,
-    compat_urllib_parse_unquote_plus,
-)
 from ..utils import (
-    determine_ext,
     ExtractorError,
-    fix_xml_ampersands,
-    int_or_none,
-    merge_dicts,
-    orderedSet,
-    parse_duration,
-    qualities,
-    str_or_none,
-    strip_jsonp,
-    unified_strdate,
-    unified_timestamp,
-    url_or_none,
-    urlencode_postdata,
 )
 
 
@@ -44,7 +25,7 @@ class NPOIE(InfoExtractor):
 
     _TESTS = [{
         'url': 'https://npo.nl/start/serie/zembla/seizoen-2015/wie-is-de-mol-2/',
-        # TODO other test attributes
+        # TODO fill in other test attributes
     }, {
         'url': 'http://www.npo.nl/de-mega-mike-mega-thomas-show/27-02-2009/VARA_101191800',
         'md5': 'da50a5787dbfc1603c4ad80f31c5120b',
@@ -77,380 +58,65 @@ def _get_token(self, video_id):
             note='Downloading token')['token']
 
     def _real_extract(self, url):
-        video_id = self._match_id(url)
-        return self._get_info(url, video_id) or self._get_old_info(video_id)
-
-    def _get_info(self, url, video_id):
-        _, xsrf_token_response = self._download_webpage_handle(
-            'https://www.npostart.nl/api/token', video_id,
-            'Downloading token', headers={
-                'Referer': url,
-                'X-Requested-With': 'XMLHttpRequest',
-            })
-        cookies = compat_cookies_SimpleCookie()
-        cookies.load(xsrf_token_response.headers['Set-Cookie'])
-        cookies = dict((k, v.value) for k, v in cookies.items())
-        xsrf_token = cookies['XSRF-TOKEN']
-
-        player = self._download_json(
-            'https://www.npostart.nl/player/%s' % video_id, video_id,
-            'Downloading player JSON',
-            headers={
-                'x-xsrf-token': compat_urllib_parse_unquote_plus(xsrf_token)
-            },
-            data=urlencode_postdata({
-                'autoplay': 0,
-                'share': 1,
-                'pageUrl': url,
-                'isFavourite': 'false',
-                'hasAdConsent': 0,
-            }))
-
-        player_token = player['token']
-
-        drm = False
-        format_urls = set()
-        formats = []
-        for profile in ('hls', 'dash-widevine', 'dash-playready', 'smooth'):
-            streams = self._download_json(
-                'https://start-player.npo.nl/video/%s/streams' % video_id,
-                video_id, 'Downloading %s profile JSON' % profile, fatal=False,
-                query={
-                    'profile': profile,
-                    'quality': 'npo',
-                    'tokenId': player_token,
-                    'streamType': 'broadcast',
-                },
-                # empty data to force a POST request, avoiding HTTP 405
-                data=b'')
-            if not streams:
-                continue
-            stream = streams.get('stream')
-            if not isinstance(stream, dict):
-                continue
-            stream_url = url_or_none(stream.get('src'))
-            if not stream_url or stream_url in format_urls:
-                continue
-            format_urls.add(stream_url)
-            if stream.get('protection') is not None or stream.get('keySystemOptions') is not None:
-                drm = True
-                continue
-            stream_type = stream.get('type')
-            stream_ext = determine_ext(stream_url)
-            if stream_type == 'application/dash+xml' or stream_ext == 'mpd':
-                formats.extend(self._extract_mpd_formats(
-                    stream_url, video_id, mpd_id='dash', fatal=False))
-            elif stream_type == 'application/vnd.apple.mpegurl' or stream_ext == 'm3u8':
-                formats.extend(self._extract_m3u8_formats(
-                    stream_url, video_id, ext='mp4',
-                    entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
-            elif re.search(r'\.isml?/Manifest', stream_url):
-                formats.extend(self._extract_ism_formats(
-                    stream_url, video_id, ism_id='mss', fatal=False))
-            else:
-                formats.append({
-                    'url': stream_url,
-                })
-
-        if not formats:
-            if drm:
-                raise ExtractorError('This video is DRM protected.', expected=True)
-            return
-
-        self._sort_formats(formats)
-
-        info = {
-            'id': video_id,
-            'title': video_id,
-            'formats': formats,
-        }
-
-        embed_url = url_or_none(player.get('embedUrl'))
-        if embed_url:
-            webpage = self._download_webpage(
-                embed_url, video_id, 'Downloading embed page', fatal=False)
-            if webpage:
-                video = self._parse_json(
-                    self._search_regex(
-                        r'\bvideo\s*=\s*({.+?})\s*;', webpage, 'video',
-                        default='{}'), video_id)
-                if video:
-                    title = video.get('episodeTitle')
-                    subtitles = {}
-                    subtitles_list = video.get('subtitles')
-                    if isinstance(subtitles_list, list):
-                        for cc in subtitles_list:
-                            cc_url = url_or_none(cc.get('src'))
-                            if not cc_url:
-                                continue
-                            lang = str_or_none(cc.get('language')) or 'nl'
-                            subtitles.setdefault(lang, []).append({
-                                'url': cc_url,
-                            })
-                    return merge_dicts({
-                        'title': title,
-                        'description': video.get('description'),
-                        'thumbnail': url_or_none(
-                            video.get('still_image_url') or video.get('orig_image_url')),
-                        'duration': int_or_none(video.get('duration')),
-                        'timestamp': unified_timestamp(video.get('broadcastDate')),
-                        'creator': video.get('channel'),
-                        'series': video.get('title'),
-                        'episode': title,
-                        'episode_number': int_or_none(video.get('episodeNumber')),
-                        'subtitles': subtitles,
-                    }, info)
-
-        return info
-
-    def _get_old_info(self, video_id):
-        metadata = self._download_json(
-            'http://e.omroep.nl/metadata/%s' % video_id,
-            video_id,
-            # We have to remove the javascript callback
-            transform_source=strip_jsonp,
-        )
-
-        error = metadata.get('error')
-        if error:
-            raise ExtractorError(error, expected=True)
-
-        # For some videos actual video id (prid) is different (e.g. for
-        # http://www.omroepwnl.nl/video/fragment/vandaag-de-dag-verkiezingen__POMS_WNL_853698
-        # video id is POMS_WNL_853698 but prid is POW_00996502)
-        video_id = metadata.get('prid') or video_id
-
-        # titel is too generic in some cases so utilize aflevering_titel as well
-        # when available (e.g. http://tegenlicht.vpro.nl/afleveringen/2014-2015/access-to-africa.html)
-        title = metadata['titel']
-        sub_title = metadata.get('aflevering_titel')
-        if sub_title and sub_title != title:
-            title += ': %s' % sub_title
-
-        token = self._get_token(video_id)
-
-        formats = []
-        urls = set()
-
-        def is_legal_url(format_url):
-            return format_url and format_url not in urls and re.match(
-                r'^(?:https?:)?//', format_url)
-
-        QUALITY_LABELS = ('Laag', 'Normaal', 'Hoog')
-        QUALITY_FORMATS = ('adaptive', 'wmv_sb', 'h264_sb', 'wmv_bb', 'h264_bb', 'wvc1_std', 'h264_std')
-
-        quality_from_label = qualities(QUALITY_LABELS)
-        quality_from_format_id = qualities(QUALITY_FORMATS)
-        items = self._download_json(
-            'http://ida.omroep.nl/app.php/%s' % video_id, video_id,
-            'Downloading formats JSON', query={
-                'adaptive': 'yes',
-                'token': token,
-            })['items'][0]
-        for num, item in enumerate(items):
-            item_url = item.get('url')
-            if not is_legal_url(item_url):
-                continue
-            urls.add(item_url)
-            format_id = self._search_regex(
-                r'video/ida/([^/]+)', item_url, 'format id',
-                default=None)
-
-            item_label = item.get('label')
-
-            def add_format_url(format_url):
-                width = int_or_none(self._search_regex(
-                    r'(\d+)[xX]\d+', format_url, 'width', default=None))
-                height = int_or_none(self._search_regex(
-                    r'\d+[xX](\d+)', format_url, 'height', default=None))
-                if item_label in QUALITY_LABELS:
-                    quality = quality_from_label(item_label)
-                    f_id = item_label
-                elif item_label in QUALITY_FORMATS:
-                    quality = quality_from_format_id(format_id)
-                    f_id = format_id
-                else:
-                    quality, f_id = [None] * 2
-                formats.append({
-                    'url': format_url,
-                    'format_id': f_id,
-                    'width': width,
-                    'height': height,
-                    'quality': quality,
-                })
-
-            # Example: http://www.npo.nl/de-nieuwe-mens-deel-1/21-07-2010/WO_VPRO_043706
-            if item.get('contentType') in ('url', 'audio'):
-                add_format_url(item_url)
-                continue
-
-            try:
-                stream_info = self._download_json(
-                    item_url + '&type=json', video_id,
-                    'Downloading %s stream JSON'
-                    % item_label or item.get('format') or format_id or num)
-            except ExtractorError as ee:
-                if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404:
-                    error = (self._parse_json(
-                        ee.cause.read().decode(), video_id,
-                        fatal=False) or {}).get('errorstring')
-                    if error:
-                        raise ExtractorError(error, expected=True)
-                raise
-            # Stream URL instead of JSON, example: npo:LI_NL1_4188102
-            if isinstance(stream_info, compat_str):
-                if not stream_info.startswith('http'):
-                    continue
-                video_url = stream_info
-            # JSON
-            else:
-                video_url = stream_info.get('url')
-            if not video_url or 'vodnotavailable.' in video_url or video_url in urls:
-                continue
-            urls.add(video_url)
-            if determine_ext(video_url) == 'm3u8':
-                formats.extend(self._extract_m3u8_formats(
-                    video_url, video_id, ext='mp4',
-                    entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
-            else:
-                add_format_url(video_url)
-
-        is_live = metadata.get('medium') == 'live'
-
-        if not is_live:
-            for num, stream in enumerate(metadata.get('streams', [])):
-                stream_url = stream.get('url')
-                if not is_legal_url(stream_url):
-                    continue
-                urls.add(stream_url)
-                # smooth streaming is not supported
-                stream_type = stream.get('type', '').lower()
-                if stream_type in ['ss', 'ms']:
-                    continue
-                if stream_type == 'hds':
-                    f4m_formats = self._extract_f4m_formats(
-                        stream_url, video_id, fatal=False)
-                    # f4m downloader downloads only piece of live stream
-                    for f4m_format in f4m_formats:
-                        f4m_format['preference'] = -1
-                    formats.extend(f4m_formats)
-                elif stream_type == 'hls':
-                    formats.extend(self._extract_m3u8_formats(
-                        stream_url, video_id, ext='mp4', fatal=False))
-                # Example: http://www.npo.nl/de-nieuwe-mens-deel-1/21-07-2010/WO_VPRO_043706
-                elif '.asf' in stream_url:
-                    asx = self._download_xml(
-                        stream_url, video_id,
-                        'Downloading stream %d ASX playlist' % num,
-                        transform_source=fix_xml_ampersands, fatal=False)
-                    if not asx:
-                        continue
-                    ref = asx.find('./ENTRY/Ref')
-                    if ref is None:
+        # You might want to use removesuffix here,
+        # but removesuffix is introduced in Python 3.9
+        # and youtube-dl supports Python 3.2+
+        if url.endswith('/afspelen'):
+            url = url[:-9]
+        elif url.endswith('/afspelen/'):
+            url = url[:-10]
+        if url.endswith('/'):
+            url = url[:-1]
+        slug = url.split('/')[-1]
+        page = self._download_webpage(url, slug, 'Finding productId using slug: %s' % slug)
+        # TODO find out what proper HTML parsing utilities are available in youtube-dl
+        next_data = page.split('<script id="__NEXT_DATA__" type="application/json">')[1].split('</script>')[0]
+        next_data = json.loads(next_data)
+        product_id, description, thumbnail, title = None, None, None, None
+        for query in next_data['props']['pageProps']['dehydratedState']['queries']:
+            if isinstance(query['state']['data'], list):
+                for entry in query['state']['data']:
+                    print(entry)
+                    try:
+                        if entry['slug'] == slug:
+                            product_id = entry['productId']
+                            title = entry['title']
+                            synopsis = entry['synopsis']
+                            description = synopsis.get('long', synopsis.get('short', synopsis.get('brief', '')))
+                            thumbnail = entry['images'][0]['url']
+                            break
+                    except KeyError:
                         continue
-                    video_url = ref.get('href')
-                    if not video_url or video_url in urls:
+                    except IndexError:
                         continue
-                    urls.add(video_url)
-                    formats.append({
-                        'url': video_url,
-                        'ext': stream.get('formaat', 'asf'),
-                        'quality': stream.get('kwaliteit'),
-                        'preference': -10,
-                    })
-                else:
-                    formats.append({
-                        'url': stream_url,
-                        'quality': stream.get('kwaliteit'),
-                    })
+        if not product_id:
+            raise ExtractorError('No productId found for slug: %s' % slug)
+
+        token = self._get_token(product_id)
+
+        stream_link = self._download_json(
+            'https://prod.npoplayer.nl/stream-link', video_id=slug,
+            data=json.dumps({
+                "profileName": "dash",
+                "drmType": "widevine",
+                "referrerUrl": url,
+            }).encode('utf8'),
+            headers={
+                "Authorization": token,
+                "Content-Type": "application/json",
+            }
+        )
 
-        self._sort_formats(formats)
+        stream_url = stream_link['stream']['streamURL']
 
-        subtitles = {}
-        if metadata.get('tt888') == 'ja':
-            subtitles['nl'] = [{
-                'ext': 'vtt',
-                'url': 'http://tt888.omroep.nl/tt888/%s' % video_id,
-            }]
+        # TODO other formats than dash / mpd
+        mpd = self._extract_mpd_formats(stream_url, slug, mpd_id='dash', fatal=False)
 
         return {
-            'id': video_id,
-            'title': self._live_title(title) if is_live else title,
-            'description': metadata.get('info'),
-            'thumbnail': metadata.get('images', [{'url': None}])[-1]['url'],
-            'upload_date': unified_strdate(metadata.get('gidsdatum')),
-            'duration': parse_duration(metadata.get('tijdsduur')),
-            'formats': formats,
-            'subtitles': subtitles,
-            'is_live': is_live,
+            'id': slug,
+            'formats': mpd,
+            'title': title or slug,
+            'description': description,
+            'thumbnail': thumbnail,
+            # TODO fill in other metadata that's available
         }
-
-###############################################################
-#   Description of the new process of getting to the stream   #
-###############################################################
-
-# Valid URLs for new tests
-# https://npo.nl/start/serie/zembla/seizoen-2015/wie-is-de-mol-2/
-# https://npo.nl/start/serie/zembla/seizoen-2015/wie-is-de-mol-2/afspelen
-
-# Step 1: Normalize the URL
-# If the URL ends with /afspelen, strip that
-# We need the slug in the next stepto find the productId
-
-# Step 2: Find the productId
-# In the contents of the URL is a JSON blob:
-# <script id="__NEXT_DATA__" type="application/json">
-# There's a list of queries in the ['props']['pageProps']['dehydratedState']['queries'] key
-# In this list of queries, one is the current episode
-# This one can be found by looping over queries and selecting
-# the one where the key ['state']['data']['slug'] contains the last part of the URL
-# In the test case 'wie-is-de-mol-2'
-# We need the productId from the corresponding entry in ['state']['data']['productId']
-# This looks a bit GraphQL-like, so there might be an easier way to query the productId, if we know the slug
-
-# Step 3: Get the JWT
-# With this productId we can get a player-token
-# https://npo.nl/start/api/domain/player-token?productId=VARA_101372912
-# The response is a JSON dictionary, with one key ['token']
-# In this key is a JWT
-
-# Step 4: Get the stream-link json
-# The JWT needs to be put in the Authorization header in a POST request to
-# https://prod.npoplayer.nl/stream-link
-# with the following payload (for this test case)
-# {
-#   "profileName": "dash",
-#   "drmType": "widevine",
-#   "referrerUrl": "https://npo.nl/start/serie/zembla/seizoen-2015/wie-is-de-mol-2/afspelen"
-# }
-# Even though the request asks for Widevine DRM, it's not always available
-# At this point we don't know whether there's DRM yet
-
-# Step 5: Get the stream.mpd from the JSON response and find out if DRM is enabled
-# This returns a JSON response with a stream.mpd file in the ['stream']['streamURL'] key
-# If dash_unencrypted is in this URL it's a stream without DRM and we can download it
-
-# For all new content there most likely is DRM protection on the stream
-# In that case dash_cenc is in the stream.mpd URL
-
-
-##############################################################
-#   Differences when embedded on the broadcaster's website   #
-##############################################################
-
-# The same episode is also embedded on the broadcaster's website: https://bnnvara.nl/videos/27455
-# It's nice to support that too, and in the case of bnnvara.nl (and maybe more broadcasters)
-# it's even easier to get to the productId
-# By POSTing to the GraphQL endpoint at we can query using the id (last part of the URL)
-# https://api.bnnvara.nl/bff/graphql
-# {
-#   "operationName": "getMedia",
-#   "variables": {
-#     "id": "27455",
-#     "hasAdConsent": false,
-#     "atInternetId": 70
-#   },
-#   "query": "query getMedia($id: ID!, $mediaUrl: String, $hasAdConsent: Boolean!, $atInternetId: Int) {\n  player(\n    id: $id\n    mediaUrl: $mediaUrl\n    hasAdConsent: $hasAdConsent\n    atInternetId: $atInternetId\n  ) {\n    ... on PlayerSucces {\n      brand {\n        name\n        slug\n        broadcastsEnabled\n        __typename\n      }\n      title\n      programTitle\n      pomsProductId\n      broadcasters {\n        name\n        __typename\n      }\n      duration\n      classifications {\n        title\n        imageUrl\n        type\n        __typename\n      }\n      image {\n        title\n        url\n        __typename\n      }\n      cta {\n        title\n        url\n        __typename\n      }\n      genres {\n        name\n        __typename\n      }\n      subtitles {\n        url\n        language\n        __typename\n      }\n      sources {\n        name\n        url\n        ratio\n        __typename\n      }\n      type\n      token\n      __typename\n    }\n    ... on PlayerError {\n      error\n      __typename\n    }\n    __typename\n  }\n}"
-# }
-# The response is in the key ['data']['player']['pomsProductId']
-# From this point it's possible to continue at step 3 of the description above

From 0dc7d954cb1492834cb07706624b497de4420e23 Mon Sep 17 00:00:00 2001
From: Bart Broere <mail@bartbroere.eu>
Date: Fri, 1 Mar 2024 15:05:30 +0100
Subject: [PATCH 11/47] Comply with coding conventions a bit more

---
 youtube_dl/extractor/npo.py | 44 ++++++++++++++++++-------------------
 1 file changed, 21 insertions(+), 23 deletions(-)

diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py
index 52e7096c142..502d276fff9 100644
--- a/youtube_dl/extractor/npo.py
+++ b/youtube_dl/extractor/npo.py
@@ -65,30 +65,29 @@ def _real_extract(self, url):
             url = url[:-9]
         elif url.endswith('/afspelen/'):
             url = url[:-10]
-        if url.endswith('/'):
-            url = url[:-1]
+        url = url.rstrip('/')
         slug = url.split('/')[-1]
         page = self._download_webpage(url, slug, 'Finding productId using slug: %s' % slug)
         # TODO find out what proper HTML parsing utilities are available in youtube-dl
         next_data = page.split('<script id="__NEXT_DATA__" type="application/json">')[1].split('</script>')[0]
         next_data = json.loads(next_data)
-        product_id, description, thumbnail, title = None, None, None, None
+        product_id, title, description, thumbnail = None, None, None, None
         for query in next_data['props']['pageProps']['dehydratedState']['queries']:
             if isinstance(query['state']['data'], list):
                 for entry in query['state']['data']:
-                    print(entry)
-                    try:
-                        if entry['slug'] == slug:
-                            product_id = entry['productId']
-                            title = entry['title']
-                            synopsis = entry['synopsis']
-                            description = synopsis.get('long', synopsis.get('short', synopsis.get('brief', '')))
-                            thumbnail = entry['images'][0]['url']
-                            break
-                    except KeyError:
-                        continue
-                    except IndexError:
-                        continue
+                    if entry['slug'] == slug:
+                        product_id = entry.get('productId')
+                        title = entry.get('title')
+                        synopsis = entry.get('synopsis', {})
+                        description = (
+                                synopsis.get('long')
+                                or synopsis.get('short')
+                                or synopsis.get('brief')
+                        )
+                        thumbnails = entry.get('images')
+                        for thumbnail_entry in thumbnails:
+                            if 'url' in thumbnail_entry:
+                                thumbnail = thumbnail_entry.get('url')
         if not product_id:
             raise ExtractorError('No productId found for slug: %s' % slug)
 
@@ -97,19 +96,18 @@ def _real_extract(self, url):
         stream_link = self._download_json(
             'https://prod.npoplayer.nl/stream-link', video_id=slug,
             data=json.dumps({
-                "profileName": "dash",
-                "drmType": "widevine",
-                "referrerUrl": url,
+                'profileName': 'dash',
+                'drmType': 'widevine',
+                'referrerUrl': url,
             }).encode('utf8'),
             headers={
-                "Authorization": token,
-                "Content-Type": "application/json",
+                'Authorization': token,
+                'Content-Type': 'application/json',
             }
         )
 
-        stream_url = stream_link['stream']['streamURL']
-
         # TODO other formats than dash / mpd
+        stream_url = stream_link.get('stream', {}).get('streamURL')
         mpd = self._extract_mpd_formats(stream_url, slug, mpd_id='dash', fatal=False)
 
         return {

From fb7b7179ff7ff08ad7e32539c0b0d440e0899903 Mon Sep 17 00:00:00 2001
From: Bart Broere <mail@bartbroere.eu>
Date: Fri, 1 Mar 2024 15:08:10 +0100
Subject: [PATCH 12/47] Speculate about other ways of getting productId

---
 youtube_dl/extractor/npo.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py
index 502d276fff9..7f90aa827ed 100644
--- a/youtube_dl/extractor/npo.py
+++ b/youtube_dl/extractor/npo.py
@@ -70,6 +70,8 @@ def _real_extract(self, url):
         page = self._download_webpage(url, slug, 'Finding productId using slug: %s' % slug)
         # TODO find out what proper HTML parsing utilities are available in youtube-dl
         next_data = page.split('<script id="__NEXT_DATA__" type="application/json">')[1].split('</script>')[0]
+        # TODO The data in this script tag feels like GraphQL, so there might be an easier way
+        #      to get the product id, maybe using a GraphQL endpoint
         next_data = json.loads(next_data)
         product_id, title, description, thumbnail = None, None, None, None
         for query in next_data['props']['pageProps']['dehydratedState']['queries']:

From f9e59b0c49c8f0fc3951f8ca01705abb46ed51e4 Mon Sep 17 00:00:00 2001
From: Bart Broere <mail@bartbroere.eu>
Date: Fri, 1 Mar 2024 15:28:14 +0100
Subject: [PATCH 13/47] Add the possibility to add 'hls' later

---
 youtube_dl/extractor/npo.py | 43 ++++++++++++++++++++-----------------
 1 file changed, 23 insertions(+), 20 deletions(-)

diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py
index 7f90aa827ed..3e543e35015 100644
--- a/youtube_dl/extractor/npo.py
+++ b/youtube_dl/extractor/npo.py
@@ -82,9 +82,9 @@ def _real_extract(self, url):
                         title = entry.get('title')
                         synopsis = entry.get('synopsis', {})
                         description = (
-                                synopsis.get('long')
-                                or synopsis.get('short')
-                                or synopsis.get('brief')
+                            synopsis.get('long')
+                            or synopsis.get('short')
+                            or synopsis.get('brief')
                         )
                         thumbnails = entry.get('images')
                         for thumbnail_entry in thumbnails:
@@ -95,26 +95,29 @@ def _real_extract(self, url):
 
         token = self._get_token(product_id)
 
-        stream_link = self._download_json(
-            'https://prod.npoplayer.nl/stream-link', video_id=slug,
-            data=json.dumps({
-                'profileName': 'dash',
-                'drmType': 'widevine',
-                'referrerUrl': url,
-            }).encode('utf8'),
-            headers={
-                'Authorization': token,
-                'Content-Type': 'application/json',
-            }
-        )
-
-        # TODO other formats than dash / mpd
-        stream_url = stream_link.get('stream', {}).get('streamURL')
-        mpd = self._extract_mpd_formats(stream_url, slug, mpd_id='dash', fatal=False)
+        formats = []
+        for profile in (
+                'dash',
+                # 'hls',  # TODO test what needs to change for 'hls' support
+        ):
+            stream_link = self._download_json(
+                'https://prod.npoplayer.nl/stream-link', video_id=slug,
+                data=json.dumps({
+                    'profileName': profile,
+                    'drmType': 'widevine',
+                    'referrerUrl': url,
+                }).encode('utf8'),
+                headers={
+                    'Authorization': token,
+                    'Content-Type': 'application/json',
+                }
+            )
+            stream_url = stream_link.get('stream', {}).get('streamURL')
+            formats.extend(self._extract_mpd_formats(stream_url, slug, mpd_id='dash', fatal=False))
 
         return {
             'id': slug,
-            'formats': mpd,
+            'formats': formats,
             'title': title or slug,
             'description': description,
             'thumbnail': thumbnail,

From 8b1a7d9a7c09d7c88fa03f885ebdc5347c007f69 Mon Sep 17 00:00:00 2001
From: Bart Broere <mail@bartbroere.eu>
Date: Fri, 1 Mar 2024 16:23:19 +0100
Subject: [PATCH 14/47] Use provided util

---
 youtube_dl/extractor/npo.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py
index 3e543e35015..e7275e1b338 100644
--- a/youtube_dl/extractor/npo.py
+++ b/youtube_dl/extractor/npo.py
@@ -72,7 +72,7 @@ def _real_extract(self, url):
         next_data = page.split('<script id="__NEXT_DATA__" type="application/json">')[1].split('</script>')[0]
         # TODO The data in this script tag feels like GraphQL, so there might be an easier way
         #      to get the product id, maybe using a GraphQL endpoint
-        next_data = json.loads(next_data)
+        next_data = self._parse_json(next_data, slug)
         product_id, title, description, thumbnail = None, None, None, None
         for query in next_data['props']['pageProps']['dehydratedState']['queries']:
             if isinstance(query['state']['data'], list):

From 34b5b2010774fab2cb8984c720fcd7c62110669a Mon Sep 17 00:00:00 2001
From: Bart Broere <mail@bartbroere.eu>
Date: Sun, 3 Mar 2024 17:47:15 +0100
Subject: [PATCH 15/47] Refactor into reusable method

---
 youtube_dl/extractor/extractors.py |  2 +-
 youtube_dl/extractor/npo.py        | 60 ++++++++++++++++++++++++------
 2 files changed, 50 insertions(+), 12 deletions(-)

diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index dabcd60cb75..696fd8e1e08 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -847,7 +847,7 @@
     NownessSeriesIE,
 )
 from .noz import NozIE
-from .npo import NPOIE
+from .npo import BNNVaraIE, NPOIE
 from .npr import NprIE
 from .nrk import (
     NRKIE,
diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py
index e7275e1b338..3896968611a 100644
--- a/youtube_dl/extractor/npo.py
+++ b/youtube_dl/extractor/npo.py
@@ -13,7 +13,6 @@ class NPOIE(InfoExtractor):
     IE_DESC = 'npo.nl'
     _VALID_URL = r'''(?x)
                     (?:
-                        npo:|
                         https?://
                             (?:www\.)?
                             (?:
@@ -82,9 +81,9 @@ def _real_extract(self, url):
                         title = entry.get('title')
                         synopsis = entry.get('synopsis', {})
                         description = (
-                            synopsis.get('long')
-                            or synopsis.get('short')
-                            or synopsis.get('brief')
+                                synopsis.get('long')
+                                or synopsis.get('short')
+                                or synopsis.get('brief')
                         )
                         thumbnails = entry.get('images')
                         for thumbnail_entry in thumbnails:
@@ -93,8 +92,19 @@ def _real_extract(self, url):
         if not product_id:
             raise ExtractorError('No productId found for slug: %s' % slug)
 
-        token = self._get_token(product_id)
+        formats = self._download_by_product_id(product_id, slug, url)
+
+        return {
+            'id': slug,
+            'formats': formats,
+            'title': title or slug,
+            'description': description,
+            'thumbnail': thumbnail,
+            # TODO fill in other metadata that's available
+        }
 
+    def _download_by_product_id(self, product_id, slug, url=None):
+        token = self._get_token(product_id)
         formats = []
         for profile in (
                 'dash',
@@ -105,7 +115,7 @@ def _real_extract(self, url):
                 data=json.dumps({
                     'profileName': profile,
                     'drmType': 'widevine',
-                    'referrerUrl': url,
+                    'referrerUrl': url or '',
                 }).encode('utf8'),
                 headers={
                     'Authorization': token,
@@ -114,12 +124,40 @@ def _real_extract(self, url):
             )
             stream_url = stream_link.get('stream', {}).get('streamURL')
             formats.extend(self._extract_mpd_formats(stream_url, slug, mpd_id='dash', fatal=False))
+        return formats
+
+
+class BNNVaraIE(NPOIE):
+    IE_NAME = 'bnnvara'
+    IE_DESC = 'bnnvara.nl'
+    _VALID_URL = r'https?://(?:www\.)?bnnvara\.nl/videos/[0-9]*'
+
+    def _real_extract(self, url):
+        url = url.rstrip('/')
+        video_id = url.split('/')[-1]
+
+        media = self._download_json('https://api.bnnvara.nl/bff/graphql',
+                                    video_id,
+                                    data=json.dumps(
+                                        {
+                                            'operationName': 'getMedia',
+                                            'variables': {
+                                                'id': video_id,
+                                                'hasAdConsent': False,
+                                                'atInternetId': 70
+                                            },
+                                            'query': 'query getMedia($id: ID!, $mediaUrl: String, $hasAdConsent: Boolean!, $atInternetId: Int) {\n  player(\n    id: $id\n    mediaUrl: $mediaUrl\n    hasAdConsent: $hasAdConsent\n    atInternetId: $atInternetId\n  ) {\n    ... on PlayerSucces {\n      brand {\n        name\n        slug\n        broadcastsEnabled\n        __typename\n      }\n      title\n      programTitle\n      pomsProductId\n      broadcasters {\n        name\n        __typename\n      }\n      duration\n      classifications {\n        title\n        imageUrl\n        type\n        __typename\n      }\n      image {\n        title\n        url\n        __typename\n      }\n      cta {\n        title\n        url\n        __typename\n      }\n      genres {\n        name\n        __typename\n      }\n      subtitles {\n        url\n        language\n        __typename\n      }\n      sources {\n        name\n        url\n        ratio\n        __typename\n      }\n      type\n      token\n      __typename\n    }\n    ... on PlayerError {\n      error\n      __typename\n    }\n    __typename\n  }\n}'
+                                        }).encode('utf8'),
+                                    headers={
+                                        'Content-Type': 'application/json',
+                                    })
+        product_id = media.get('data', {}).get('player', {}).get('pomsProductId')
+
+        formats = self._download_by_product_id(product_id, video_id)
 
         return {
-            'id': slug,
+            'id': product_id,
+            'title': media.get('data', {}).get('player', {}).get('title'),
             'formats': formats,
-            'title': title or slug,
-            'description': description,
-            'thumbnail': thumbnail,
-            # TODO fill in other metadata that's available
+            'thumbnail': media.get('data', {}).get('player', {}).get('image').get('url'),
         }

From 4fc423845e8b5f8855fb6e5a0a5087064401b12b Mon Sep 17 00:00:00 2001
From: Bart Broere <mail@bartbroere.eu>
Date: Tue, 5 Mar 2024 12:49:22 +0100
Subject: [PATCH 16/47] Fix lint

---
 youtube_dl/extractor/npo.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py
index 3896968611a..53fd816f734 100644
--- a/youtube_dl/extractor/npo.py
+++ b/youtube_dl/extractor/npo.py
@@ -80,11 +80,9 @@ def _real_extract(self, url):
                         product_id = entry.get('productId')
                         title = entry.get('title')
                         synopsis = entry.get('synopsis', {})
-                        description = (
-                                synopsis.get('long')
-                                or synopsis.get('short')
-                                or synopsis.get('brief')
-                        )
+                        description = (synopsis.get('long')
+                                       or synopsis.get('short')
+                                       or synopsis.get('brief'))
                         thumbnails = entry.get('images')
                         for thumbnail_entry in thumbnails:
                             if 'url' in thumbnail_entry:

From 28ba01f1ccfc5560be7d027b1669822e44d4143f Mon Sep 17 00:00:00 2001
From: Bart Broere <mail@bartbroere.eu>
Date: Tue, 5 Mar 2024 13:43:56 +0100
Subject: [PATCH 17/47] Add Ongehoord Nederland and test URL for BNNVARA

---
 youtube_dl/extractor/extractors.py |  2 +-
 youtube_dl/extractor/npo.py        | 30 ++++++++++++++++++++++++++++++
 2 files changed, 31 insertions(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index 696fd8e1e08..802e498f9dd 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -847,7 +847,7 @@
     NownessSeriesIE,
 )
 from .noz import NozIE
-from .npo import BNNVaraIE, NPOIE
+from .npo import BNNVaraIE, NPOIE, ONIE
 from .npr import NprIE
 from .nrk import (
     NRKIE,
diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py
index 53fd816f734..d8573d343e3 100644
--- a/youtube_dl/extractor/npo.py
+++ b/youtube_dl/extractor/npo.py
@@ -1,6 +1,7 @@
 from __future__ import unicode_literals
 
 import json
+import re
 
 from .common import InfoExtractor
 from ..utils import (
@@ -129,6 +130,9 @@ class BNNVaraIE(NPOIE):
     IE_NAME = 'bnnvara'
     IE_DESC = 'bnnvara.nl'
     _VALID_URL = r'https?://(?:www\.)?bnnvara\.nl/videos/[0-9]*'
+    _TESTS = [{
+        'url': 'https://www.bnnvara.nl/videos/27455',
+    }]
 
     def _real_extract(self, url):
         url = url.rstrip('/')
@@ -159,3 +163,29 @@ def _real_extract(self, url):
             'formats': formats,
             'thumbnail': media.get('data', {}).get('player', {}).get('image').get('url'),
         }
+
+
+class ONIE(NPOIE):
+    IE_NAME = 'on'
+    IE_DESC = 'ongehoordnederland.tv'
+    _VALID_URL = r'https?://(?:www\.)?ongehoordnederland.tv/.*'
+    _TESTS = [{
+        'url': 'https://ongehoordnederland.tv/2024/03/01/korte-clips/heeft-preppen-zin-betwijfel-dat-je-daar-echt-iets-aan-zult-hebben-bij-oorlog-lydia-daniel/',
+    }]
+
+    def _real_extract(self, url):
+        video_id = url.rstrip('/').split('/')[-1]
+        page, _ = self._download_webpage_handle(url, video_id)
+        results = re.findall("page: '(.+)'", page)
+        formats = []
+        for result in results:
+            formats.extend(self._download_by_product_id(result, video_id))
+
+        if not formats:
+            raise ExtractorError('Could not find a POMS product id in the provided URL.')
+
+        return {
+            'id': video_id,
+            'title': video_id,
+            'formats': formats,
+        }

From eb6e396bfb66965487ef1e7c50edbf6e28130462 Mon Sep 17 00:00:00 2001
From: Bart Broere <mail@bartbroere.eu>
Date: Tue, 5 Mar 2024 13:55:59 +0100
Subject: [PATCH 18/47] First version of a VPRO regex

---
 youtube_dl/extractor/npo.py | 30 +++++++++++++++++++++++++++---
 1 file changed, 27 insertions(+), 3 deletions(-)

diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py
index d8573d343e3..d48a4cda078 100644
--- a/youtube_dl/extractor/npo.py
+++ b/youtube_dl/extractor/npo.py
@@ -4,9 +4,7 @@
 import re
 
 from .common import InfoExtractor
-from ..utils import (
-    ExtractorError,
-)
+from ..utils import ExtractorError
 
 
 class NPOIE(InfoExtractor):
@@ -189,3 +187,29 @@ def _real_extract(self, url):
             'title': video_id,
             'formats': formats,
         }
+
+
+class VPROIE(NPOIE):
+    IE_NAME = 'vpro'
+    IE_DESC = 'vpro.nl'
+    _VALID_URL = r'https?://(?:www\.)?vpro.nl/.*'
+    _TESTS = [{
+        'url': 'https://www.vpro.nl/programmas/tegenlicht/kijk/afleveringen/2015-2016/offline-als-luxe.html',
+    }]
+
+    def _real_extract(self, url):
+        video_id = url.rstrip('/').split('/')[-1]
+        page, _ = self._download_webpage_handle(url, video_id)
+        results = re.findall('data-media-id="(.+_.+)"\s', page)
+        formats = []
+        for result in results:
+            formats.extend(self._download_by_product_id(result, video_id))
+
+        if not formats:
+            raise ExtractorError('Could not find a POMS product id in the provided URL.')
+
+        return {
+            'id': video_id,
+            'title': video_id,
+            'formats': formats,
+        }

From d36d50fe5cf166899adfc85e7ca9b0f8f5272d19 Mon Sep 17 00:00:00 2001
From: Bart Broere <mail@bartbroere.eu>
Date: Tue, 5 Mar 2024 14:04:03 +0100
Subject: [PATCH 19/47] Re-add Zapp

---
 youtube_dl/extractor/extractors.py |  2 +-
 youtube_dl/extractor/npo.py        | 22 ++++++++++++++++++++++
 2 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index 802e498f9dd..b3a9fdfbaf1 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -847,7 +847,7 @@
     NownessSeriesIE,
 )
 from .noz import NozIE
-from .npo import BNNVaraIE, NPOIE, ONIE
+from .npo import BNNVaraIE, NPOIE, ONIE, VPROIE
 from .npr import NprIE
 from .nrk import (
     NRKIE,
diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py
index d48a4cda078..84b41443bc3 100644
--- a/youtube_dl/extractor/npo.py
+++ b/youtube_dl/extractor/npo.py
@@ -204,6 +204,7 @@ def _real_extract(self, url):
         formats = []
         for result in results:
             formats.extend(self._download_by_product_id(result, video_id))
+            break  # TODO find a better solution, VPRO pages can have multiple videos embedded
 
         if not formats:
             raise ExtractorError('Could not find a POMS product id in the provided URL.')
@@ -213,3 +214,24 @@ def _real_extract(self, url):
             'title': video_id,
             'formats': formats,
         }
+
+
+class ZAPPIE(NPOIE):
+    IE_NAME = 'zapp'
+    IE_DESC = 'zapp.nl'
+    _VALID_URL = r'https?://(?:www\.)?zapp.nl/.*'
+
+    _TESTS = [{
+        'url': 'https://www.zapp.nl/programmas/zappsport/gemist/AT_300003973',
+    }]
+
+    def _real_extract(self, url):
+        video_id = url.rstrip('/').split('/')[-1]
+
+        formats = self._download_by_product_id(url, video_id)
+
+        return {
+            'id': video_id,
+            'title': video_id,
+            'formats': formats,
+        }

From d426a92a60ba9b6eb01256d3dcad4dcbfecd742c Mon Sep 17 00:00:00 2001
From: Bart Broere <mail@bartbroere.eu>
Date: Tue, 5 Mar 2024 14:11:49 +0100
Subject: [PATCH 20/47] Encoding suggestion from PR

---
 youtube_dl/extractor/npo.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py
index 84b41443bc3..01eb54fc02d 100644
--- a/youtube_dl/extractor/npo.py
+++ b/youtube_dl/extractor/npo.py
@@ -1,3 +1,4 @@
+# coding: utf-8
 from __future__ import unicode_literals
 
 import json

From 3b3d73cbe6f64d6485e03cb658cc491d4fa62333 Mon Sep 17 00:00:00 2001
From: Bart Broere <mail@bartbroere.eu>
Date: Wed, 6 Mar 2024 11:52:08 +0100
Subject: [PATCH 21/47] Use program-detail endpoint and remove a test

---
 youtube_dl/extractor/npo.py | 61 +++++++++++++++----------------------
 1 file changed, 24 insertions(+), 37 deletions(-)

diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py
index 01eb54fc02d..239583b5bd1 100644
--- a/youtube_dl/extractor/npo.py
+++ b/youtube_dl/extractor/npo.py
@@ -25,18 +25,6 @@ class NPOIE(InfoExtractor):
     _TESTS = [{
         'url': 'https://npo.nl/start/serie/zembla/seizoen-2015/wie-is-de-mol-2/',
         # TODO fill in other test attributes
-    }, {
-        'url': 'http://www.npo.nl/de-mega-mike-mega-thomas-show/27-02-2009/VARA_101191800',
-        'md5': 'da50a5787dbfc1603c4ad80f31c5120b',
-        'info_dict': {
-            'id': 'VARA_101191800',
-            'ext': 'm4v',
-            'title': 'De Mega Mike & Mega Thomas show: The best of.',
-            'description': 'md5:3b74c97fc9d6901d5a665aac0e5400f4',
-            'upload_date': '20090227',
-            'duration': 2400,
-        },
-        'skip': 'Video gone',
     }, {
         'url': 'https://npo.nl/start/serie/vpro-tegenlicht/seizoen-11/zwart-geld-de-toekomst-komt-uit-afrika',
         'md5': 'f8065e4e5a7824068ed3c7e783178f2c',
@@ -66,27 +54,21 @@ def _real_extract(self, url):
             url = url[:-10]
         url = url.rstrip('/')
         slug = url.split('/')[-1]
-        page = self._download_webpage(url, slug, 'Finding productId using slug: %s' % slug)
-        # TODO find out what proper HTML parsing utilities are available in youtube-dl
-        next_data = page.split('<script id="__NEXT_DATA__" type="application/json">')[1].split('</script>')[0]
-        # TODO The data in this script tag feels like GraphQL, so there might be an easier way
-        #      to get the product id, maybe using a GraphQL endpoint
-        next_data = self._parse_json(next_data, slug)
-        product_id, title, description, thumbnail = None, None, None, None
-        for query in next_data['props']['pageProps']['dehydratedState']['queries']:
-            if isinstance(query['state']['data'], list):
-                for entry in query['state']['data']:
-                    if entry['slug'] == slug:
-                        product_id = entry.get('productId')
-                        title = entry.get('title')
-                        synopsis = entry.get('synopsis', {})
-                        description = (synopsis.get('long')
-                                       or synopsis.get('short')
-                                       or synopsis.get('brief'))
-                        thumbnails = entry.get('images')
-                        for thumbnail_entry in thumbnails:
-                            if 'url' in thumbnail_entry:
-                                thumbnail = thumbnail_entry.get('url')
+
+        program_metadata = self._download_json('https://npo.nl/start/api/domain/program-detail',
+                                               slug,
+                                               query={'slug': slug})
+        product_id = program_metadata.get('productId')
+        images = program_metadata.get('images')
+        thumbnail = None
+        for image in images:
+            thumbnail = image.get('url')
+            break
+        title = program_metadata.get('title')
+        descriptions = program_metadata.get('description', {})
+        description = descriptions.get('long') or descriptions.get('short') or descriptions.get('brief')
+        duration = program_metadata.get('durationInSeconds')
+
         if not product_id:
             raise ExtractorError('No productId found for slug: %s' % slug)
 
@@ -96,17 +78,18 @@ def _real_extract(self, url):
             'id': slug,
             'formats': formats,
             'title': title or slug,
-            'description': description,
+            'description': description or title or slug,
             'thumbnail': thumbnail,
-            # TODO fill in other metadata that's available
+            'duration': duration,
         }
 
     def _download_by_product_id(self, product_id, slug, url=None):
         token = self._get_token(product_id)
         formats = []
         for profile in (
-                'dash',
-                # 'hls',  # TODO test what needs to change for 'hls' support
+            'dash',
+            # 'hls' is available too, but implementing it doesn't add much
+            # As far as I know 'dash' is always available
         ):
             stream_link = self._download_json(
                 'https://prod.npoplayer.nl/stream-link', video_id=slug,
@@ -131,6 +114,7 @@ class BNNVaraIE(NPOIE):
     _VALID_URL = r'https?://(?:www\.)?bnnvara\.nl/videos/[0-9]*'
     _TESTS = [{
         'url': 'https://www.bnnvara.nl/videos/27455',
+        # TODO fill in other test attributes
     }]
 
     def _real_extract(self, url):
@@ -170,6 +154,7 @@ class ONIE(NPOIE):
     _VALID_URL = r'https?://(?:www\.)?ongehoordnederland.tv/.*'
     _TESTS = [{
         'url': 'https://ongehoordnederland.tv/2024/03/01/korte-clips/heeft-preppen-zin-betwijfel-dat-je-daar-echt-iets-aan-zult-hebben-bij-oorlog-lydia-daniel/',
+        # TODO fill in other test attributes
     }]
 
     def _real_extract(self, url):
@@ -196,6 +181,7 @@ class VPROIE(NPOIE):
     _VALID_URL = r'https?://(?:www\.)?vpro.nl/.*'
     _TESTS = [{
         'url': 'https://www.vpro.nl/programmas/tegenlicht/kijk/afleveringen/2015-2016/offline-als-luxe.html',
+        # TODO fill in other test attributes
     }]
 
     def _real_extract(self, url):
@@ -224,6 +210,7 @@ class ZAPPIE(NPOIE):
 
     _TESTS = [{
         'url': 'https://www.zapp.nl/programmas/zappsport/gemist/AT_300003973',
+        # TODO fill in other test attributes
     }]
 
     def _real_extract(self, url):

From 4b24e5f00da0b11f3e2989d5a568e862285d34ea Mon Sep 17 00:00:00 2001
From: Bart Broere <mail@bartbroere.eu>
Date: Wed, 6 Mar 2024 12:22:27 +0100
Subject: [PATCH 22/47] Re-add SchoolTV

---
 youtube_dl/extractor/extractors.py |  2 +-
 youtube_dl/extractor/npo.py        | 42 +++++++++++++++++++++++++++---
 2 files changed, 40 insertions(+), 4 deletions(-)

diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index b3a9fdfbaf1..5f2ac7ced47 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -847,7 +847,7 @@
     NownessSeriesIE,
 )
 from .noz import NozIE
-from .npo import BNNVaraIE, NPOIE, ONIE, VPROIE
+from .npo import BNNVaraIE, NPOIE, ONIE, VPROIE, SchoolTVIE
 from .npr import NprIE
 from .nrk import (
     NRKIE,
diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py
index 239583b5bd1..a28915bd08f 100644
--- a/youtube_dl/extractor/npo.py
+++ b/youtube_dl/extractor/npo.py
@@ -87,9 +87,9 @@ def _download_by_product_id(self, product_id, slug, url=None):
         token = self._get_token(product_id)
         formats = []
         for profile in (
-            'dash',
-            # 'hls' is available too, but implementing it doesn't add much
-            # As far as I know 'dash' is always available
+                'dash',
+                # 'hls' is available too, but implementing it doesn't add much
+                # As far as I know 'dash' is always available
         ):
             stream_link = self._download_json(
                 'https://prod.npoplayer.nl/stream-link', video_id=slug,
@@ -223,3 +223,39 @@ def _real_extract(self, url):
             'title': video_id,
             'formats': formats,
         }
+
+
+class SchoolTVIE(NPOIE):
+    IE_NAME = 'schooltv'
+    IE_DESC = 'schooltv.nl'
+    _VALID_URL = r'https?://(?:www\.)?schooltv.nl/item/.*'
+
+    _TESTS = [{
+        'url': 'https://schooltv.nl/item/zapp-music-challenge-2015-zapp-music-challenge-2015',
+        # TODO fill in other test attributes
+    }]
+
+    def _real_extract(self, url):
+        video_id = url.rstrip('/').split('/')[-1]
+
+        build_id = 'b7eHUzAVO7wHXCopYxQhV'
+
+        metadata_url = 'https://schooltv.nl/_next/data/' \
+                       + build_id \
+                       + '/item/' \
+                       + video_id + '.json'
+
+        metadata = self._download_json(metadata_url,
+                                       video_id).get('pageProps', {}).get('data', {})
+
+        formats = self._download_by_product_id(metadata.get('poms_mid'), video_id)
+
+        if not formats:
+            raise ExtractorError('Could not find a POMS product id in the provided URL.')
+
+        return {
+            'id': video_id,
+            'title': metadata.get('title', '') + ' - ' + metadata.get('subtitle', ''),
+            'description': metadata.get('description') or metadata.get('short_description'),
+            'formats': formats,
+        }

From 681b39032ae34709a74c5a4ab8f0d2275aab6880 Mon Sep 17 00:00:00 2001
From: Bart Broere <mail@bartbroere.eu>
Date: Wed, 6 Mar 2024 12:32:34 +0100
Subject: [PATCH 23/47] Fix flake8 and better error reporting

---
 youtube_dl/extractor/npo.py | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py
index a28915bd08f..c4e4097e34b 100644
--- a/youtube_dl/extractor/npo.py
+++ b/youtube_dl/extractor/npo.py
@@ -166,7 +166,8 @@ def _real_extract(self, url):
             formats.extend(self._download_by_product_id(result, video_id))
 
         if not formats:
-            raise ExtractorError('Could not find a POMS product id in the provided URL.')
+            raise ExtractorError('Could not find a POMS product id in the provided URL, '
+                                 'perhaps because all stream URLs are DRM protected.')
 
         return {
             'id': video_id,
@@ -187,14 +188,15 @@ class VPROIE(NPOIE):
     def _real_extract(self, url):
         video_id = url.rstrip('/').split('/')[-1]
         page, _ = self._download_webpage_handle(url, video_id)
-        results = re.findall('data-media-id="(.+_.+)"\s', page)
+        results = re.findall(r'data-media-id="(.+_.+)"\s', page)
         formats = []
         for result in results:
             formats.extend(self._download_by_product_id(result, video_id))
             break  # TODO find a better solution, VPRO pages can have multiple videos embedded
 
         if not formats:
-            raise ExtractorError('Could not find a POMS product id in the provided URL.')
+            raise ExtractorError('Could not find a POMS product id in the provided URL, '
+                                 'perhaps because all stream URLs are DRM protected.')
 
         return {
             'id': video_id,
@@ -238,6 +240,8 @@ class SchoolTVIE(NPOIE):
     def _real_extract(self, url):
         video_id = url.rstrip('/').split('/')[-1]
 
+        # TODO Find out how we could obtain this automatically
+        #      Otherwise this extractor might break each time SchoolTV deploys a new release
         build_id = 'b7eHUzAVO7wHXCopYxQhV'
 
         metadata_url = 'https://schooltv.nl/_next/data/' \
@@ -251,7 +255,8 @@ def _real_extract(self, url):
         formats = self._download_by_product_id(metadata.get('poms_mid'), video_id)
 
         if not formats:
-            raise ExtractorError('Could not find a POMS product id in the provided URL.')
+            raise ExtractorError('Could not find a POMS product id in the provided URL, '
+                                 'perhaps because all stream URLs are DRM protected.')
 
         return {
             'id': video_id,

From 159f825edd6326fda7f43fb27d13db6cd2bbc4ca Mon Sep 17 00:00:00 2001
From: Bart Broere <mail@bartbroere.eu>
Date: Wed, 6 Mar 2024 12:53:37 +0100
Subject: [PATCH 24/47] Add scaffolding for last few extractors and change
 order so the PR diff looks nice

---
 youtube_dl/extractor/npo.py | 91 +++++++++++++++++++++----------------
 1 file changed, 52 insertions(+), 39 deletions(-)

diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py
index c4e4097e34b..196ab9d1b11 100644
--- a/youtube_dl/extractor/npo.py
+++ b/youtube_dl/extractor/npo.py
@@ -11,16 +11,7 @@
 class NPOIE(InfoExtractor):
     IE_NAME = 'npo'
     IE_DESC = 'npo.nl'
-    _VALID_URL = r'''(?x)
-                    (?:
-                        https?://
-                            (?:www\.)?
-                            (?:
-                                npo\.nl/(?:[^/]+/)*
-                            )
-                        )
-                        (?P<id>[^/?#]+)
-                '''
+    _VALID_URL = r'https?://(?:www\.)?npo\.nl/.*'
 
     _TESTS = [{
         'url': 'https://npo.nl/start/serie/zembla/seizoen-2015/wie-is-de-mol-2/',
@@ -176,35 +167,6 @@ def _real_extract(self, url):
         }
 
 
-class VPROIE(NPOIE):
-    IE_NAME = 'vpro'
-    IE_DESC = 'vpro.nl'
-    _VALID_URL = r'https?://(?:www\.)?vpro.nl/.*'
-    _TESTS = [{
-        'url': 'https://www.vpro.nl/programmas/tegenlicht/kijk/afleveringen/2015-2016/offline-als-luxe.html',
-        # TODO fill in other test attributes
-    }]
-
-    def _real_extract(self, url):
-        video_id = url.rstrip('/').split('/')[-1]
-        page, _ = self._download_webpage_handle(url, video_id)
-        results = re.findall(r'data-media-id="(.+_.+)"\s', page)
-        formats = []
-        for result in results:
-            formats.extend(self._download_by_product_id(result, video_id))
-            break  # TODO find a better solution, VPRO pages can have multiple videos embedded
-
-        if not formats:
-            raise ExtractorError('Could not find a POMS product id in the provided URL, '
-                                 'perhaps because all stream URLs are DRM protected.')
-
-        return {
-            'id': video_id,
-            'title': video_id,
-            'formats': formats,
-        }
-
-
 class ZAPPIE(NPOIE):
     IE_NAME = 'zapp'
     IE_DESC = 'zapp.nl'
@@ -264,3 +226,54 @@ def _real_extract(self, url):
             'description': metadata.get('description') or metadata.get('short_description'),
             'formats': formats,
         }
+
+
+class HetKlokhuisIE(NPOIE):
+    ...
+
+    def _real_extract(self, url):
+        ...
+
+
+class VPROIE(NPOIE):
+    IE_NAME = 'vpro'
+    IE_DESC = 'vpro.nl'
+    _VALID_URL = r'https?://(?:www\.)?vpro.nl/.*'
+    _TESTS = [{
+        'url': 'https://www.vpro.nl/programmas/tegenlicht/kijk/afleveringen/2015-2016/offline-als-luxe.html',
+        # TODO fill in other test attributes
+    }]
+
+    def _real_extract(self, url):
+        video_id = url.rstrip('/').split('/')[-1]
+        page, _ = self._download_webpage_handle(url, video_id)
+        results = re.findall(r'data-media-id="(.+_.+)"\s', page)
+        formats = []
+        for result in results:
+            formats.extend(self._download_by_product_id(result, video_id))
+            break  # TODO find a better solution, VPRO pages can have multiple videos embedded
+
+        if not formats:
+            raise ExtractorError('Could not find a POMS product id in the provided URL, '
+                                 'perhaps because all stream URLs are DRM protected.')
+
+        return {
+            'id': video_id,
+            'title': video_id,
+            'formats': formats,
+        }
+
+
+class WNLIE(NPOIE):
+    ...
+
+    def _real_extract(self, url):
+        ...
+
+
+class AndereTijdenIE(NPOIE):
+    ...
+
+    def _real_extract(self, url):
+        ...
+

From 0cbcd1aec656998d44dbffe59cbb0adac4b84b45 Mon Sep 17 00:00:00 2001
From: Bart Broere <mail@bartbroere.eu>
Date: Wed, 6 Mar 2024 12:55:51 +0100
Subject: [PATCH 25/47] Make diff better

---
 youtube_dl/extractor/extractors.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index 5f2ac7ced47..b1093a1ac0e 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -847,7 +847,16 @@
     NownessSeriesIE,
 )
 from .noz import NozIE
-from .npo import BNNVaraIE, NPOIE, ONIE, VPROIE, SchoolTVIE
+from .npo import (
+    AndereTijdenIE,
+    BNNVaraIE,
+    NPOIE,
+    ONIE,
+    SchoolTVIE,
+    HetKlokhuisIE,
+    VPROIE,
+    WNLIE,
+)
 from .npr import NprIE
 from .nrk import (
     NRKIE,

From 0ab79c37ae2c465678276bef0e9032efb30f464b Mon Sep 17 00:00:00 2001
From: Bart Broere <mail@bartbroere.eu>
Date: Thu, 7 Mar 2024 16:23:09 +0100
Subject: [PATCH 26/47] Reusable code for two NTR sites

---
 youtube_dl/extractor/npo.py | 53 +++++++++++++++++++++++++------------
 1 file changed, 36 insertions(+), 17 deletions(-)

diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py
index 196ab9d1b11..77411da5215 100644
--- a/youtube_dl/extractor/npo.py
+++ b/youtube_dl/extractor/npo.py
@@ -228,11 +228,35 @@ def _real_extract(self, url):
         }
 
 
-class HetKlokhuisIE(NPOIE):
-    ...
-
+class NTRSubsiteIE(NPOIE):
     def _real_extract(self, url):
-        ...
+        video_id = url.rstrip('/').split('/')[-1]
+
+        page, _ = self._download_webpage_handle(url)
+        results = re.findall(r'data-mid="(.+_.+)"', page)
+        formats = []
+        for result in results:
+            formats.extend(self._download_by_product_id(result, video_id))
+            break
+
+        if not formats:
+            raise ExtractorError('Could not find a POMS product id in the provided URL, '
+                                 'perhaps because all stream URLs are DRM protected.')
+
+        return {
+            'id': video_id,
+            'title': video_id,
+            'formats': formats,
+        }
+
+
+class HetKlokhuisIE(NTRSubsiteIE):
+    IE_NAME = 'het-klokhuis'
+    IE_DESC = 'hetklokhuis.nl'
+    _VALID_URL = r'https?://(?:www\.)?het-klokhuis\.nl/.*'
+    _TESTS = [{
+        'url': 'https://hetklokhuis.nl/dossier/142/zoek-het-uit/tv-uitzending/2987/aliens'
+    }]
 
 
 class VPROIE(NPOIE):
@@ -264,16 +288,11 @@ def _real_extract(self, url):
         }
 
 
-class WNLIE(NPOIE):
-    ...
-
-    def _real_extract(self, url):
-        ...
-
-
-class AndereTijdenIE(NPOIE):
-    ...
-
-    def _real_extract(self, url):
-        ...
-
+class AndereTijdenIE(NTRSubsiteIE):
+    IE_NAME = 'anderetijden'
+    IE_DESC = 'anderetijden.nl'
+    _VALID_URL = r'https?://(?:www\.)?anderetijden\.nl/.*'
+    _TESTS = [{
+        'url': 'https://anderetijden.nl/programma/1/Andere-Tijden/aflevering/676/Duitse-soldaten-over-de-Slag-bij-Arnhem'
+        # TODO fill in other test attributes
+    }]

From c08f29f45b6b7f41127c8d9260617de7d69430f9 Mon Sep 17 00:00:00 2001
From: Bart Broere <mail@bartbroere.eu>
Date: Sun, 10 Mar 2024 16:27:40 +0100
Subject: [PATCH 27/47] Update unit tests

---
 youtube_dl/extractor/extractors.py |  1 -
 youtube_dl/extractor/npo.py        | 37 ++++++++++++++++++++++--------
 2 files changed, 27 insertions(+), 11 deletions(-)

diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index b1093a1ac0e..e5c9af8ba40 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -855,7 +855,6 @@
     SchoolTVIE,
     HetKlokhuisIE,
     VPROIE,
-    WNLIE,
 )
 from .npr import NprIE
 from .nrk import (
diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py
index 77411da5215..f5f7485735f 100644
--- a/youtube_dl/extractor/npo.py
+++ b/youtube_dl/extractor/npo.py
@@ -15,17 +15,24 @@ class NPOIE(InfoExtractor):
 
     _TESTS = [{
         'url': 'https://npo.nl/start/serie/zembla/seizoen-2015/wie-is-de-mol-2/',
-        # TODO fill in other test attributes
+        'md5': 'f9ce9c43cc8bc3b8138df1562b99c379',
+        'info_dict': {
+            'description': 'Wie is de mol? (2)',
+            'ext': 'm4v',
+            'duration': 2439,
+            'id': 'wie-is-de-mol-2',
+            'thumbnail': 'https://assets-start.npo.nl/resources/2023/07/01/e723c3cf-3e42-418a-9ba5-f6dbb64b516a.jpg',
+            'title': 'Wie is de mol? (2)'
+        }
     }, {
         'url': 'https://npo.nl/start/serie/vpro-tegenlicht/seizoen-11/zwart-geld-de-toekomst-komt-uit-afrika',
-        'md5': 'f8065e4e5a7824068ed3c7e783178f2c',
+        'md5': 'c84d054219c4888ed53b4ee3d01b2d93',
         'info_dict': {
-            'id': 'VPWON_1169289',
-            'ext': 'm4v',
-            'title': 'Tegenlicht: Zwart geld. De toekomst komt uit Afrika',
-            'description': 'md5:52cf4eefbc96fffcbdc06d024147abea',
-            'upload_date': '20130225',
-            'duration': 3000,
+            'id': 'zwart-geld-de-toekomst-komt-uit-afrika',
+            'title': 'Zwart geld: de toekomst komt uit Afrika',
+            'description': 'Zwart geld: de toekomst komt uit Afrika',
+            'thumbnail': 'https://assets-start.npo.nl/resources/2023/06/30/d9879593-1944-4249-990c-1561dac14d8e.jpg',
+            'duration': 3000
         },
     }]
 
@@ -105,7 +112,12 @@ class BNNVaraIE(NPOIE):
     _VALID_URL = r'https?://(?:www\.)?bnnvara\.nl/videos/[0-9]*'
     _TESTS = [{
         'url': 'https://www.bnnvara.nl/videos/27455',
-        # TODO fill in other test attributes
+        'md5': '392dd367877739e49b9e0a9a550b178a',
+        'info_dict': {
+            'id': 'VARA_101369808',
+            'thumbnail': 'https://media.vara.nl/files/thumbnails/321291_custom_zembla__wie_is_de_mol_680x383.jpg',
+            'title': 'Zembla - Wie is de mol?'
+        }
     }]
 
     def _real_extract(self, url):
@@ -265,7 +277,12 @@ class VPROIE(NPOIE):
     _VALID_URL = r'https?://(?:www\.)?vpro.nl/.*'
     _TESTS = [{
         'url': 'https://www.vpro.nl/programmas/tegenlicht/kijk/afleveringen/2015-2016/offline-als-luxe.html',
-        # TODO fill in other test attributes
+        'md5': 'cf302e066b5313cfaf8d5adf50d64f13',
+        'info_dict': {
+            'id': 'offline-als-luxe.html',
+            'title': 'offline-als-luxe.html',
+            'ext': 'm4v',
+        }
     }]
 
     def _real_extract(self, url):

From 28624cfe0930655b815f40d4b4820f76728de65e Mon Sep 17 00:00:00 2001
From: Bart Broere <mail@bartbroere.eu>
Date: Sun, 10 Mar 2024 16:57:31 +0100
Subject: [PATCH 28/47] Work work

---
 youtube_dl/extractor/extractors.py |  1 +
 youtube_dl/extractor/npo.py        | 28 +++++++++++++++++++++-------
 2 files changed, 22 insertions(+), 7 deletions(-)

diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index e5c9af8ba40..1a1905d13d7 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -855,6 +855,7 @@
     SchoolTVIE,
     HetKlokhuisIE,
     VPROIE,
+    ZAPPIE,
 )
 from .npr import NprIE
 from .nrk import (
diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py
index f5f7485735f..699eedf1227 100644
--- a/youtube_dl/extractor/npo.py
+++ b/youtube_dl/extractor/npo.py
@@ -208,7 +208,12 @@ class SchoolTVIE(NPOIE):
 
     _TESTS = [{
         'url': 'https://schooltv.nl/item/zapp-music-challenge-2015-zapp-music-challenge-2015',
-        # TODO fill in other test attributes
+        'md5': 'e9ef151c4886994e2bea23593348cb14',
+        'info_dict': {
+            'id': 'zapp-music-challenge-2015-zapp-music-challenge-2015',
+            'title': 'Zapp Music Challenge 2015 - Alain Clark & Yaell',
+            'description': "Een nummer schrijven met de super bekende soulzanger en producer Alain Clark? Dat is de uitdaging voor de dertienjarige Yaell uit Delft. En als het dan echt goed is, mag hij het ook nog eens live gaan spelen op de speelplaats bij Giel Beelen! Muziek is heel erg belangrijk in het leven van Yaell. 'Als er geen muziek zou zijn, dan zou ik heel veel niet kunnen.' Hij is dan ook altijd aan het schrijven, vaak over zijn eigen leven. Maar soms is het best lastig om die teksten te verzinnen. Vindt hij de inspiratie om een hit te maken met Alain?"
+        },
     }]
 
     def _real_extract(self, url):
@@ -244,7 +249,7 @@ class NTRSubsiteIE(NPOIE):
     def _real_extract(self, url):
         video_id = url.rstrip('/').split('/')[-1]
 
-        page, _ = self._download_webpage_handle(url)
+        page, _ = self._download_webpage_handle(url, video_id)
         results = re.findall(r'data-mid="(.+_.+)"', page)
         formats = []
         for result in results:
@@ -263,11 +268,16 @@ def _real_extract(self, url):
 
 
 class HetKlokhuisIE(NTRSubsiteIE):
-    IE_NAME = 'het-klokhuis'
+    IE_NAME = 'hetklokhuis'
     IE_DESC = 'hetklokhuis.nl'
-    _VALID_URL = r'https?://(?:www\.)?het-klokhuis\.nl/.*'
+    _VALID_URL = r'https?://(?:www\.)?hetklokhuis\.nl/.*'
     _TESTS = [{
-        'url': 'https://hetklokhuis.nl/dossier/142/zoek-het-uit/tv-uitzending/2987/aliens'
+        'url': 'https://hetklokhuis.nl/dossier/142/zoek-het-uit/tv-uitzending/2987/aliens',
+        'md5': '4664b54ed4e05183b1e4f2f4290d551e',
+        'info_dict': {
+            'id': 'aliens',
+            'title': 'aliens'
+        }
     }]
 
 
@@ -310,6 +320,10 @@ class AndereTijdenIE(NTRSubsiteIE):
     IE_DESC = 'anderetijden.nl'
     _VALID_URL = r'https?://(?:www\.)?anderetijden\.nl/.*'
     _TESTS = [{
-        'url': 'https://anderetijden.nl/programma/1/Andere-Tijden/aflevering/676/Duitse-soldaten-over-de-Slag-bij-Arnhem'
-        # TODO fill in other test attributes
+        'url': 'https://anderetijden.nl/programma/1/Andere-Tijden/aflevering/676/Duitse-soldaten-over-de-Slag-bij-Arnhem',
+        'md5': '3d607b16e00b459156b4ab6e163dccd7',
+        'info_dict': {
+            'id': 'Duitse-soldaten-over-de-Slag-bij-Arnhem',
+            'title': 'Duitse-soldaten-over-de-Slag-bij-Arnhem'
+        }
     }]

From 1ca4e686a3f9001cb52c8b682b57c1fba65700db Mon Sep 17 00:00:00 2001
From: Bart Broere <mail@bartbroere.eu>
Date: Sun, 10 Mar 2024 17:04:00 +0100
Subject: [PATCH 29/47] Add an MD5

---
 youtube_dl/extractor/npo.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py
index 699eedf1227..f4cd137ff93 100644
--- a/youtube_dl/extractor/npo.py
+++ b/youtube_dl/extractor/npo.py
@@ -157,7 +157,10 @@ class ONIE(NPOIE):
     _VALID_URL = r'https?://(?:www\.)?ongehoordnederland.tv/.*'
     _TESTS = [{
         'url': 'https://ongehoordnederland.tv/2024/03/01/korte-clips/heeft-preppen-zin-betwijfel-dat-je-daar-echt-iets-aan-zult-hebben-bij-oorlog-lydia-daniel/',
-        # TODO fill in other test attributes
+        'md5': 'a85ebd50fa86fe5cbce654655f7dbb12',
+        'info_dict': {
+
+        }
     }]
 
     def _real_extract(self, url):

From 4398f6832f76948ee79025f0e055117182d1dfb3 Mon Sep 17 00:00:00 2001
From: Bart Broere <mail@bartbroere.eu>
Date: Mon, 11 Mar 2024 13:40:23 +0100
Subject: [PATCH 30/47] Fix zapp extractor

---
 youtube_dl/extractor/npo.py | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py
index f4cd137ff93..a5413a1d748 100644
--- a/youtube_dl/extractor/npo.py
+++ b/youtube_dl/extractor/npo.py
@@ -99,7 +99,8 @@ def _download_by_product_id(self, product_id, slug, url=None):
                 headers={
                     'Authorization': token,
                     'Content-Type': 'application/json',
-                }
+                },
+                fatal=False,
             )
             stream_url = stream_link.get('stream', {}).get('streamURL')
             formats.extend(self._extract_mpd_formats(stream_url, slug, mpd_id='dash', fatal=False))
@@ -188,14 +189,18 @@ class ZAPPIE(NPOIE):
     _VALID_URL = r'https?://(?:www\.)?zapp.nl/.*'
 
     _TESTS = [{
-        'url': 'https://www.zapp.nl/programmas/zappsport/gemist/AT_300003973',
-        # TODO fill in other test attributes
+        'url': 'https://www.zapp.nl/programmas/zappsport/gemist/POMS_AT_811523',
+        'md5': '9eb2d8b6f88b72b6b986ea2c26a81588',
+        'info_dict': {
+            'id': 'POMS_AT_811523',
+            'title': 'POMS_AT_811523',
+        },
     }]
 
     def _real_extract(self, url):
         video_id = url.rstrip('/').split('/')[-1]
 
-        formats = self._download_by_product_id(url, video_id)
+        formats = self._download_by_product_id(video_id, video_id, url=url)
 
         return {
             'id': video_id,
@@ -279,8 +284,8 @@ class HetKlokhuisIE(NTRSubsiteIE):
         'md5': '4664b54ed4e05183b1e4f2f4290d551e',
         'info_dict': {
             'id': 'aliens',
-            'title': 'aliens'
-        }
+            'title': 'aliens',
+        },
     }]
 
 
@@ -295,7 +300,7 @@ class VPROIE(NPOIE):
             'id': 'offline-als-luxe.html',
             'title': 'offline-als-luxe.html',
             'ext': 'm4v',
-        }
+        },
     }]
 
     def _real_extract(self, url):
@@ -327,6 +332,6 @@ class AndereTijdenIE(NTRSubsiteIE):
         'md5': '3d607b16e00b459156b4ab6e163dccd7',
         'info_dict': {
             'id': 'Duitse-soldaten-over-de-Slag-bij-Arnhem',
-            'title': 'Duitse-soldaten-over-de-Slag-bij-Arnhem'
-        }
+            'title': 'Duitse-soldaten-over-de-Slag-bij-Arnhem',
+        },
     }]

From 58d7a00e3f07744b65ad53d12fcee1ec0050de74 Mon Sep 17 00:00:00 2001
From: Bart Broere <mail@bartbroere.eu>
Date: Mon, 11 Mar 2024 14:14:38 +0100
Subject: [PATCH 31/47] Resolve some of the pull request feedback

---
 youtube_dl/extractor/npo.py | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py
index a5413a1d748..ea1e0fd2bad 100644
--- a/youtube_dl/extractor/npo.py
+++ b/youtube_dl/extractor/npo.py
@@ -18,8 +18,8 @@ class NPOIE(InfoExtractor):
         'md5': 'f9ce9c43cc8bc3b8138df1562b99c379',
         'info_dict': {
             'description': 'Wie is de mol? (2)',
-            'ext': 'm4v',
             'duration': 2439,
+            'ext': 'm4v',
             'id': 'wie-is-de-mol-2',
             'thumbnail': 'https://assets-start.npo.nl/resources/2023/07/01/e723c3cf-3e42-418a-9ba5-f6dbb64b516a.jpg',
             'title': 'Wie is de mol? (2)'
@@ -30,6 +30,7 @@ class NPOIE(InfoExtractor):
         'info_dict': {
             'id': 'zwart-geld-de-toekomst-komt-uit-afrika',
             'title': 'Zwart geld: de toekomst komt uit Afrika',
+            'ext': 'mp4',
             'description': 'Zwart geld: de toekomst komt uit Afrika',
             'thumbnail': 'https://assets-start.npo.nl/resources/2023/06/30/d9879593-1944-4249-990c-1561dac14d8e.jpg',
             'duration': 3000
@@ -70,7 +71,7 @@ def _real_extract(self, url):
         if not product_id:
             raise ExtractorError('No productId found for slug: %s' % slug)
 
-        formats = self._download_by_product_id(product_id, slug, url)
+        formats = self._extract_formats_by_product_id(product_id, slug, url)
 
         return {
             'id': slug,
@@ -81,7 +82,7 @@ def _real_extract(self, url):
             'duration': duration,
         }
 
-    def _download_by_product_id(self, product_id, slug, url=None):
+    def _extract_formats_by_product_id(self, product_id, slug, url=None):
         token = self._get_token(product_id)
         formats = []
         for profile in (
@@ -93,7 +94,6 @@ def _download_by_product_id(self, product_id, slug, url=None):
                 'https://prod.npoplayer.nl/stream-link', video_id=slug,
                 data=json.dumps({
                     'profileName': profile,
-                    'drmType': 'widevine',
                     'referrerUrl': url or '',
                 }).encode('utf8'),
                 headers={
@@ -117,7 +117,8 @@ class BNNVaraIE(NPOIE):
         'info_dict': {
             'id': 'VARA_101369808',
             'thumbnail': 'https://media.vara.nl/files/thumbnails/321291_custom_zembla__wie_is_de_mol_680x383.jpg',
-            'title': 'Zembla - Wie is de mol?'
+            'title': 'Zembla - Wie is de mol?',
+            'ext': 'mp4',
         }
     }]
 
@@ -142,7 +143,7 @@ def _real_extract(self, url):
                                     })
         product_id = media.get('data', {}).get('player', {}).get('pomsProductId')
 
-        formats = self._download_by_product_id(product_id, video_id)
+        formats = self._extract_formats_by_product_id(product_id, video_id)
 
         return {
             'id': product_id,
@@ -170,7 +171,7 @@ def _real_extract(self, url):
         results = re.findall("page: '(.+)'", page)
         formats = []
         for result in results:
-            formats.extend(self._download_by_product_id(result, video_id))
+            formats.extend(self._extract_formats_by_product_id(result, video_id))
 
         if not formats:
             raise ExtractorError('Could not find a POMS product id in the provided URL, '
@@ -200,7 +201,7 @@ class ZAPPIE(NPOIE):
     def _real_extract(self, url):
         video_id = url.rstrip('/').split('/')[-1]
 
-        formats = self._download_by_product_id(video_id, video_id, url=url)
+        formats = self._extract_formats_by_product_id(video_id, video_id, url=url)
 
         return {
             'id': video_id,
@@ -239,7 +240,7 @@ def _real_extract(self, url):
         metadata = self._download_json(metadata_url,
                                        video_id).get('pageProps', {}).get('data', {})
 
-        formats = self._download_by_product_id(metadata.get('poms_mid'), video_id)
+        formats = self._extract_formats_by_product_id(metadata.get('poms_mid'), video_id)
 
         if not formats:
             raise ExtractorError('Could not find a POMS product id in the provided URL, '
@@ -261,7 +262,7 @@ def _real_extract(self, url):
         results = re.findall(r'data-mid="(.+_.+)"', page)
         formats = []
         for result in results:
-            formats.extend(self._download_by_product_id(result, video_id))
+            formats.extend(self._extract_formats_by_product_id(result, video_id))
             break
 
         if not formats:
@@ -309,7 +310,7 @@ def _real_extract(self, url):
         results = re.findall(r'data-media-id="(.+_.+)"\s', page)
         formats = []
         for result in results:
-            formats.extend(self._download_by_product_id(result, video_id))
+            formats.extend(self._extract_formats_by_product_id(result, video_id))
             break  # TODO find a better solution, VPRO pages can have multiple videos embedded
 
         if not formats:

From ad64f3751e74c5ee2bbe45a6d5110813dbdd77f3 Mon Sep 17 00:00:00 2001
From: Bart Broere <mail@bartbroere.eu>
Date: Thu, 14 Mar 2024 13:34:33 +0100
Subject: [PATCH 32/47] Improve regex

Co-authored-by: Roy <git@rvsit.nl>
---
 youtube_dl/extractor/npo.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py
index ea1e0fd2bad..27582ae9f18 100644
--- a/youtube_dl/extractor/npo.py
+++ b/youtube_dl/extractor/npo.py
@@ -307,7 +307,7 @@ class VPROIE(NPOIE):
     def _real_extract(self, url):
         video_id = url.rstrip('/').split('/')[-1]
         page, _ = self._download_webpage_handle(url, video_id)
-        results = re.findall(r'data-media-id="(.+_.+)"\s', page)
+        results = re.findall(r'data-media-id="([a-zA-Z0-9_]+)"\s', page)
         formats = []
         for result in results:
             formats.extend(self._extract_formats_by_product_id(result, video_id))

From bc86c5f73b189a3ab5caa0f63d62ed8e3b70d741 Mon Sep 17 00:00:00 2001
From: Bart Broere <mail@bartbroere.eu>
Date: Thu, 14 Mar 2024 13:37:41 +0100
Subject: [PATCH 33/47] Make regex more specific and remove redundant .*

---
 youtube_dl/extractor/npo.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py
index 27582ae9f18..4651e68685f 100644
--- a/youtube_dl/extractor/npo.py
+++ b/youtube_dl/extractor/npo.py
@@ -11,7 +11,7 @@
 class NPOIE(InfoExtractor):
     IE_NAME = 'npo'
     IE_DESC = 'npo.nl'
-    _VALID_URL = r'https?://(?:www\.)?npo\.nl/.*'
+    _VALID_URL = r'https?://(?:www\.)?npo\.nl/start/serie/'
 
     _TESTS = [{
         'url': 'https://npo.nl/start/serie/zembla/seizoen-2015/wie-is-de-mol-2/',

From 4c90b2f5875593af17dff13f96b8b05791f64a21 Mon Sep 17 00:00:00 2001
From: Bart Broere <mail@bartbroere.eu>
Date: Thu, 14 Mar 2024 13:39:59 +0100
Subject: [PATCH 34/47] Adhere to code style

Co-authored-by: dirkf <fieldhouse@gmx.net>
---
 youtube_dl/extractor/npo.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py
index 4651e68685f..4a70e251b7f 100644
--- a/youtube_dl/extractor/npo.py
+++ b/youtube_dl/extractor/npo.py
@@ -55,8 +55,7 @@ def _real_extract(self, url):
         slug = url.split('/')[-1]
 
         program_metadata = self._download_json('https://npo.nl/start/api/domain/program-detail',
-                                               slug,
-                                               query={'slug': slug})
+                                               slug, query={'slug': slug})
         product_id = program_metadata.get('productId')
         images = program_metadata.get('images')
         thumbnail = None

From 007bbeacd78e0d158f684b5a8833d6425a0312f9 Mon Sep 17 00:00:00 2001
From: Bart Broere <mail@bartbroere.eu>
Date: Thu, 14 Mar 2024 13:41:01 +0100
Subject: [PATCH 35/47] Remove afspelen and trailing slashes with one regex

Co-authored-by: dirkf <fieldhouse@gmx.net>
---
 youtube_dl/extractor/npo.py | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py
index 4a70e251b7f..545e585099e 100644
--- a/youtube_dl/extractor/npo.py
+++ b/youtube_dl/extractor/npo.py
@@ -44,14 +44,8 @@ def _get_token(self, video_id):
             note='Downloading token')['token']
 
     def _real_extract(self, url):
-        # You might want to use removesuffix here,
-        # but removesuffix is introduced in Python 3.9
-        # and youtube-dl supports Python 3.2+
-        if url.endswith('/afspelen'):
-            url = url[:-9]
-        elif url.endswith('/afspelen/'):
-            url = url[:-10]
-        url = url.rstrip('/')
+            # Remove /afspelen and/or any trailing `/`s
+            url = re.sub(r'/(?:afspelen)?/*$', '', url)
         slug = url.split('/')[-1]
 
         program_metadata = self._download_json('https://npo.nl/start/api/domain/program-detail',

From a60972e253dfe88c81601eaa2e2899afbc4c29fd Mon Sep 17 00:00:00 2001
From: Bart Broere <mail@bartbroere.eu>
Date: Fri, 15 Mar 2024 13:02:56 +0100
Subject: [PATCH 36/47] Fix indent from suggestion

---
 youtube_dl/extractor/npo.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py
index 545e585099e..4dbab16ab47 100644
--- a/youtube_dl/extractor/npo.py
+++ b/youtube_dl/extractor/npo.py
@@ -44,8 +44,8 @@ def _get_token(self, video_id):
             note='Downloading token')['token']
 
     def _real_extract(self, url):
-            # Remove /afspelen and/or any trailing `/`s
-            url = re.sub(r'/(?:afspelen)?/*$', '', url)
+        # Remove /afspelen and/or any trailing `/`s
+        url = re.sub(r'/(?:afspelen)?/*$', '', url)
         slug = url.split('/')[-1]
 
         program_metadata = self._download_json('https://npo.nl/start/api/domain/program-detail',

From ad6ee6fdd2548cc153d85c74675a941699437a25 Mon Sep 17 00:00:00 2001
From: Bart Broere <mail@bartbroere.eu>
Date: Sat, 21 Sep 2024 21:58:53 +0200
Subject: [PATCH 37/47] Commit two suggestions from the PR

Co-authored-by: dirkf <fieldhouse@gmx.net>
---
 youtube_dl/extractor/npo.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py
index 4dbab16ab47..bfa96e6a787 100644
--- a/youtube_dl/extractor/npo.py
+++ b/youtube_dl/extractor/npo.py
@@ -95,7 +95,7 @@ def _extract_formats_by_product_id(self, product_id, slug, url=None):
                 },
                 fatal=False,
             )
-            stream_url = stream_link.get('stream', {}).get('streamURL')
+            stream_url = traverse_obj(stream_link, ('stream', 'streamURL'))
             formats.extend(self._extract_mpd_formats(stream_url, slug, mpd_id='dash', fatal=False))
         return formats
 
@@ -160,7 +160,7 @@ class ONIE(NPOIE):
 
     def _real_extract(self, url):
         video_id = url.rstrip('/').split('/')[-1]
-        page, _ = self._download_webpage_handle(url, video_id)
+        page = self._download_webpage(url, video_id)
         results = re.findall("page: '(.+)'", page)
         formats = []
         for result in results:

From bf91db4846df82f474938fbe091055fc82eb8a0d Mon Sep 17 00:00:00 2001
From: Bart Broere <mail@bartbroere.eu>
Date: Sat, 21 Sep 2024 20:04:50 +0000
Subject: [PATCH 38/47] Use suggested util

---
 youtube_dl/extractor/npo.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py
index bfa96e6a787..70e297e19d9 100644
--- a/youtube_dl/extractor/npo.py
+++ b/youtube_dl/extractor/npo.py
@@ -5,7 +5,7 @@
 import re
 
 from .common import InfoExtractor
-from ..utils import ExtractorError
+from ..utils import ExtractorError, join_nonempty
 
 
 class NPOIE(InfoExtractor):
@@ -241,7 +241,7 @@ def _real_extract(self, url):
 
         return {
             'id': video_id,
-            'title': metadata.get('title', '') + ' - ' + metadata.get('subtitle', ''),
+            'title': join_nonempty('title', 'subtitle', from_dict=metadata),
             'description': metadata.get('description') or metadata.get('short_description'),
             'formats': formats,
         }

From 6de650f51fc9da6ae8a261b3f57e26f4ec78a2d1 Mon Sep 17 00:00:00 2001
From: Bart Broere <mail@bartbroere.eu>
Date: Sat, 21 Sep 2024 20:08:53 +0000
Subject: [PATCH 39/47] Use traverse_obj in another place as well

---
 youtube_dl/extractor/npo.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py
index 70e297e19d9..e130f2dbdab 100644
--- a/youtube_dl/extractor/npo.py
+++ b/youtube_dl/extractor/npo.py
@@ -5,7 +5,7 @@
 import re
 
 from .common import InfoExtractor
-from ..utils import ExtractorError, join_nonempty
+from ..utils import ExtractorError, join_nonempty, traverse_obj
 
 
 class NPOIE(InfoExtractor):
@@ -140,9 +140,9 @@ def _real_extract(self, url):
 
         return {
             'id': product_id,
-            'title': media.get('data', {}).get('player', {}).get('title'),
+            'title': traverse_obj(media, ('data', 'player', 'title')),
             'formats': formats,
-            'thumbnail': media.get('data', {}).get('player', {}).get('image').get('url'),
+            'thumbnail': traverse_obj(media, ('data', 'player', 'image', 'url')),
         }
 
 

From c748eca829194de5aba9a66035549f2b9112aba2 Mon Sep 17 00:00:00 2001
From: Bart Broere <mail@bartbroere.eu>
Date: Sun, 20 Oct 2024 11:49:09 +0200
Subject: [PATCH 40/47] Automatically obtain NextJS buildId and change item to
 video-item

---
 youtube_dl/extractor/npo.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py
index e130f2dbdab..8299bfb168b 100644
--- a/youtube_dl/extractor/npo.py
+++ b/youtube_dl/extractor/npo.py
@@ -221,13 +221,14 @@ class SchoolTVIE(NPOIE):
     def _real_extract(self, url):
         video_id = url.rstrip('/').split('/')[-1]
 
-        # TODO Find out how we could obtain this automatically
-        #      Otherwise this extractor might break each time SchoolTV deploys a new release
-        build_id = 'b7eHUzAVO7wHXCopYxQhV'
+        build_id = self._search_nextjs_data(
+            self._download_webpage(url, video_id),
+            video_id,
+        )['buildId']
 
         metadata_url = 'https://schooltv.nl/_next/data/' \
                        + build_id \
-                       + '/item/' \
+                       + '/video-item/' \
                        + video_id + '.json'
 
         metadata = self._download_json(metadata_url,
@@ -304,7 +305,7 @@ def _real_extract(self, url):
         formats = []
         for result in results:
             formats.extend(self._extract_formats_by_product_id(result, video_id))
-            break  # TODO find a better solution, VPRO pages can have multiple videos embedded
+            break
 
         if not formats:
             raise ExtractorError('Could not find a POMS product id in the provided URL, '

From 41157b2b49914e1d786d86972122e1d4ebbbb6b5 Mon Sep 17 00:00:00 2001
From: Bart Broere <mail@bartbroere.eu>
Date: Sun, 20 Oct 2024 12:00:44 +0200
Subject: [PATCH 41/47] Move GraphQL query into separate variable

---
 youtube_dl/extractor/npo.py | 66 ++++++++++++++++++++++++++++++++++++-
 1 file changed, 65 insertions(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py
index 8299bfb168b..a53a3c3d92e 100644
--- a/youtube_dl/extractor/npo.py
+++ b/youtube_dl/extractor/npo.py
@@ -118,6 +118,70 @@ class BNNVaraIE(NPOIE):
     def _real_extract(self, url):
         url = url.rstrip('/')
         video_id = url.split('/')[-1]
+        graphql_query = """query getMedia($id: ID!, $mediaUrl: String, $hasAdConsent: Boolean!, $atInternetId: Int) {
+                            player(
+                                id: $id
+                                mediaUrl: $mediaUrl
+                                hasAdConsent: $hasAdConsent
+                                atInternetId: $atInternetId
+                            ) {
+                            ... on PlayerSucces {
+                                brand {
+                                    name
+                                    slug
+                                    broadcastsEnabled
+                                    __typename
+                                }
+                                title
+                                programTitle
+                                pomsProductId
+                                broadcasters {
+                                    name
+                                    __typename
+                                }
+                                duration
+                                classifications {
+                                    title
+                                    imageUrl
+                                    type
+                                    __typename
+                                }
+                                image {
+                                    title
+                                    url
+                                    __typename
+                                }
+                                cta {
+                                    title
+                                    url
+                                    __typename
+                                }
+                                genres {
+                                    name
+                                    __typename
+                                }
+                                subtitles {
+                                    url
+                                    language
+                                    __typename
+                                }
+                                sources {
+                                    name
+                                    url
+                                    ratio
+                                    __typename
+                                }
+                                    type
+                                    token
+                                    __typename
+                                }
+                                ... on PlayerError {
+                                    error
+                                    __typename
+                                }
+                                    __typename
+                            }
+}"""
 
         media = self._download_json('https://api.bnnvara.nl/bff/graphql',
                                     video_id,
@@ -129,7 +193,7 @@ def _real_extract(self, url):
                                                 'hasAdConsent': False,
                                                 'atInternetId': 70
                                             },
-                                            'query': 'query getMedia($id: ID!, $mediaUrl: String, $hasAdConsent: Boolean!, $atInternetId: Int) {\n  player(\n    id: $id\n    mediaUrl: $mediaUrl\n    hasAdConsent: $hasAdConsent\n    atInternetId: $atInternetId\n  ) {\n    ... on PlayerSucces {\n      brand {\n        name\n        slug\n        broadcastsEnabled\n        __typename\n      }\n      title\n      programTitle\n      pomsProductId\n      broadcasters {\n        name\n        __typename\n      }\n      duration\n      classifications {\n        title\n        imageUrl\n        type\n        __typename\n      }\n      image {\n        title\n        url\n        __typename\n      }\n      cta {\n        title\n        url\n        __typename\n      }\n      genres {\n        name\n        __typename\n      }\n      subtitles {\n        url\n        language\n        __typename\n      }\n      sources {\n        name\n        url\n        ratio\n        __typename\n      }\n      type\n      token\n      __typename\n    }\n    ... on PlayerError {\n      error\n      __typename\n    }\n    __typename\n  }\n}'
+                                            'query': graphql_query
                                         }).encode('utf8'),
                                     headers={
                                         'Content-Type': 'application/json',

From c3026dd70c4a0d74dc6079331cd037ed6fa7a479 Mon Sep 17 00:00:00 2001
From: Bart Broere <mail@bartbroere.eu>
Date: Sun, 20 Oct 2024 12:08:50 +0200
Subject: [PATCH 42/47] Apply suggestion from PR

---
 youtube_dl/extractor/npo.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py
index a53a3c3d92e..4bb70ad5339 100644
--- a/youtube_dl/extractor/npo.py
+++ b/youtube_dl/extractor/npo.py
@@ -198,9 +198,10 @@ def _real_extract(self, url):
                                     headers={
                                         'Content-Type': 'application/json',
                                     })
-        product_id = media.get('data', {}).get('player', {}).get('pomsProductId')
-
-        formats = self._extract_formats_by_product_id(product_id, video_id)
+        
+        product_id = traverse_obj(media, ('data', 'player', 'pomsProductId'))
+        formats = self._download_by_product_id(product_id, video_id) if product_id else []
+        self._sort_formats(formats)
 
         return {
             'id': product_id,

From 7f1c09bea173bd9bb6a81d793e85e49ddd0882c0 Mon Sep 17 00:00:00 2001
From: Bart Broere <mail@bartbroere.eu>
Date: Sun, 20 Oct 2024 12:11:30 +0200
Subject: [PATCH 43/47] Use _sort_formats util

---
 youtube_dl/extractor/npo.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py
index 4bb70ad5339..96e854ae6ba 100644
--- a/youtube_dl/extractor/npo.py
+++ b/youtube_dl/extractor/npo.py
@@ -198,7 +198,7 @@ def _real_extract(self, url):
                                     headers={
                                         'Content-Type': 'application/json',
                                     })
-        
+
         product_id = traverse_obj(media, ('data', 'player', 'pomsProductId'))
         formats = self._download_by_product_id(product_id, video_id) if product_id else []
         self._sort_formats(formats)
@@ -231,9 +231,7 @@ def _real_extract(self, url):
         for result in results:
             formats.extend(self._extract_formats_by_product_id(result, video_id))
 
-        if not formats:
-            raise ExtractorError('Could not find a POMS product id in the provided URL, '
-                                 'perhaps because all stream URLs are DRM protected.')
+        self._sort_formats(formats)
 
         return {
             'id': video_id,

From 0e1a0cfa03dcc0f089525cb0a2bdc82364927cc1 Mon Sep 17 00:00:00 2001
From: Bart Broere <mail@bartbroere.eu>
Date: Sun, 20 Oct 2024 12:28:10 +0200
Subject: [PATCH 44/47] Apply some more PR feedback

---
 youtube_dl/extractor/npo.py | 44 +++++++++++++------------------------
 1 file changed, 15 insertions(+), 29 deletions(-)

diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py
index 96e854ae6ba..98095ac6b19 100644
--- a/youtube_dl/extractor/npo.py
+++ b/youtube_dl/extractor/npo.py
@@ -50,30 +50,22 @@ def _real_extract(self, url):
 
         program_metadata = self._download_json('https://npo.nl/start/api/domain/program-detail',
                                                slug, query={'slug': slug})
-        product_id = program_metadata.get('productId')
-        images = program_metadata.get('images')
-        thumbnail = None
-        for image in images:
-            thumbnail = image.get('url')
-            break
-        title = program_metadata.get('title')
-        descriptions = program_metadata.get('description', {})
-        description = descriptions.get('long') or descriptions.get('short') or descriptions.get('brief')
-        duration = program_metadata.get('durationInSeconds')
-
+        product_id = traverse_obj(program_metadata, 'productId')
         if not product_id:
-            raise ExtractorError('No productId found for slug: %s' % slug)
-
+            raise ExtractorError('No productId found for slug: %s' % (slug,))
         formats = self._extract_formats_by_product_id(product_id, slug, url)
-
-        return {
+        self._sort_formats(formats)
+        return merge_dicts(traverse_obj(program_metadata, {
+            'title': 'title',
+            'description': (('description', ('long', 'short', 'brief')), 'title'),
+            'thumbnail': ('images', Ellipsis, 'url', T(url_or_none)),
+            'duration': ('durationInSeconds', T(int_or_none)),
+        }, get_all=False), {
             'id': slug,
             'formats': formats,
-            'title': title or slug,
-            'description': description or title or slug,
-            'thumbnail': thumbnail,
-            'duration': duration,
-        }
+            'title': slug,
+            'description': slug,
+        })
 
     def _extract_formats_by_product_id(self, product_id, slug, url=None):
         token = self._get_token(product_id)
@@ -299,9 +291,7 @@ def _real_extract(self, url):
 
         formats = self._extract_formats_by_product_id(metadata.get('poms_mid'), video_id)
 
-        if not formats:
-            raise ExtractorError('Could not find a POMS product id in the provided URL, '
-                                 'perhaps because all stream URLs are DRM protected.')
+        self._sort_formats(formats)
 
         return {
             'id': video_id,
@@ -322,9 +312,7 @@ def _real_extract(self, url):
             formats.extend(self._extract_formats_by_product_id(result, video_id))
             break
 
-        if not formats:
-            raise ExtractorError('Could not find a POMS product id in the provided URL, '
-                                 'perhaps because all stream URLs are DRM protected.')
+        self._sort_formats(formats)
 
         return {
             'id': video_id,
@@ -370,9 +358,7 @@ def _real_extract(self, url):
             formats.extend(self._extract_formats_by_product_id(result, video_id))
             break
 
-        if not formats:
-            raise ExtractorError('Could not find a POMS product id in the provided URL, '
-                                 'perhaps because all stream URLs are DRM protected.')
+        self._sort_formats(formats)
 
         return {
             'id': video_id,

From 817e2e5938707ae1ff2bde374b03d4f3265c6cae Mon Sep 17 00:00:00 2001
From: Bart Broere <mail@bartbroere.eu>
Date: Sun, 20 Oct 2024 12:37:48 +0200
Subject: [PATCH 45/47] Fix some missing imports

---
 youtube_dl/extractor/npo.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py
index 98095ac6b19..8d8499e8411 100644
--- a/youtube_dl/extractor/npo.py
+++ b/youtube_dl/extractor/npo.py
@@ -5,7 +5,7 @@
 import re
 
 from .common import InfoExtractor
-from ..utils import ExtractorError, join_nonempty, traverse_obj
+from ..utils import ExtractorError, int_or_none, join_nonempty, merge_dicts, traverse_obj, url_or_none, T
 
 
 class NPOIE(InfoExtractor):

From 75266ce4ed190192082cc86f8e17b16d984873d9 Mon Sep 17 00:00:00 2001
From: Bart Broere <mail@bartbroere.eu>
Date: Sun, 20 Oct 2024 13:41:53 +0200
Subject: [PATCH 46/47] Fix old metadata reference

---
 youtube_dl/extractor/npo.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py
index 8d8499e8411..89403a9d897 100644
--- a/youtube_dl/extractor/npo.py
+++ b/youtube_dl/extractor/npo.py
@@ -192,7 +192,7 @@ def _real_extract(self, url):
                                     })
 
         product_id = traverse_obj(media, ('data', 'player', 'pomsProductId'))
-        formats = self._download_by_product_id(product_id, video_id) if product_id else []
+        formats = self._extract_formats_by_product_id(product_id, video_id) if product_id else []
         self._sort_formats(formats)
 
         return {

From 6f271423e8564fd503f0430fdbd627923503ca43 Mon Sep 17 00:00:00 2001
From: Bart Broere <mail@bartbroere.eu>
Date: Sun, 20 Oct 2024 14:38:03 +0200
Subject: [PATCH 47/47] Update tests

---
 youtube_dl/extractor/npo.py | 30 ++++++++++++++++++------------
 1 file changed, 18 insertions(+), 12 deletions(-)

diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py
index 89403a9d897..a6281f2a41c 100644
--- a/youtube_dl/extractor/npo.py
+++ b/youtube_dl/extractor/npo.py
@@ -17,23 +17,23 @@ class NPOIE(InfoExtractor):
         'url': 'https://npo.nl/start/serie/zembla/seizoen-2015/wie-is-de-mol-2/',
         'md5': 'f9ce9c43cc8bc3b8138df1562b99c379',
         'info_dict': {
-            'description': 'Wie is de mol? (2)',
+            'title': 'Wie is de mol? (2)',
+            'thumbnail': 'https://assets-start.npo.nl/resources/2023/07/01/e723c3cf-3e42-418a-9ba5-f6dbb64b516a.jpg',
             'duration': 2439,
-            'ext': 'm4v',
             'id': 'wie-is-de-mol-2',
-            'thumbnail': 'https://assets-start.npo.nl/resources/2023/07/01/e723c3cf-3e42-418a-9ba5-f6dbb64b516a.jpg',
-            'title': 'Wie is de mol? (2)'
+            'description': 'wie-is-de-mol-2',
+            'ext': 'mp4',
         }
     }, {
         'url': 'https://npo.nl/start/serie/vpro-tegenlicht/seizoen-11/zwart-geld-de-toekomst-komt-uit-afrika',
         'md5': 'c84d054219c4888ed53b4ee3d01b2d93',
         'info_dict': {
-            'id': 'zwart-geld-de-toekomst-komt-uit-afrika',
             'title': 'Zwart geld: de toekomst komt uit Afrika',
-            'ext': 'mp4',
-            'description': 'Zwart geld: de toekomst komt uit Afrika',
             'thumbnail': 'https://assets-start.npo.nl/resources/2023/06/30/d9879593-1944-4249-990c-1561dac14d8e.jpg',
-            'duration': 3000
+            'duration': 3000,
+            'id': 'zwart-geld-de-toekomst-komt-uit-afrika',
+            'description': 'zwart-geld-de-toekomst-komt-uit-afrika',
+            'ext': 'mp4',
         },
     }]
 
@@ -211,7 +211,9 @@ class ONIE(NPOIE):
         'url': 'https://ongehoordnederland.tv/2024/03/01/korte-clips/heeft-preppen-zin-betwijfel-dat-je-daar-echt-iets-aan-zult-hebben-bij-oorlog-lydia-daniel/',
         'md5': 'a85ebd50fa86fe5cbce654655f7dbb12',
         'info_dict': {
-
+            'id': 'heeft-preppen-zin-betwijfel-dat-je-daar-echt-iets-aan-zult-hebben-bij-oorlog-lydia-daniel',
+            'title': 'heeft-preppen-zin-betwijfel-dat-je-daar-echt-iets-aan-zult-hebben-bij-oorlog-lydia-daniel',
+            'ext': 'mp4',
         }
     }]
 
@@ -243,6 +245,7 @@ class ZAPPIE(NPOIE):
         'info_dict': {
             'id': 'POMS_AT_811523',
             'title': 'POMS_AT_811523',
+            'ext': 'mp4',
         },
     }]
 
@@ -268,8 +271,9 @@ class SchoolTVIE(NPOIE):
         'md5': 'e9ef151c4886994e2bea23593348cb14',
         'info_dict': {
             'id': 'zapp-music-challenge-2015-zapp-music-challenge-2015',
-            'title': 'Zapp Music Challenge 2015 - Alain Clark & Yaell',
-            'description': "Een nummer schrijven met de super bekende soulzanger en producer Alain Clark? Dat is de uitdaging voor de dertienjarige Yaell uit Delft. En als het dan echt goed is, mag hij het ook nog eens live gaan spelen op de speelplaats bij Giel Beelen! Muziek is heel erg belangrijk in het leven van Yaell. 'Als er geen muziek zou zijn, dan zou ik heel veel niet kunnen.' Hij is dan ook altijd aan het schrijven, vaak over zijn eigen leven. Maar soms is het best lastig om die teksten te verzinnen. Vindt hij de inspiratie om een hit te maken met Alain?"
+            'title': 'Zapp Music Challenge 2015-Alain Clark & Yaell',
+            'description': "Een nummer schrijven met de super bekende soulzanger en producer Alain Clark? Dat is de uitdaging voor de dertienjarige Yaell uit Delft. En als het dan echt goed is, mag hij het ook nog eens live gaan spelen op de speelplaats bij Giel Beelen! Muziek is heel erg belangrijk in het leven van Yaell. 'Als er geen muziek zou zijn, dan zou ik heel veel niet kunnen.' Hij is dan ook altijd aan het schrijven, vaak over zijn eigen leven. Maar soms is het best lastig om die teksten te verzinnen. Vindt hij de inspiratie om een hit te maken met Alain?",
+            'ext': 'mp4',
         },
     }]
 
@@ -331,6 +335,7 @@ class HetKlokhuisIE(NTRSubsiteIE):
         'info_dict': {
             'id': 'aliens',
             'title': 'aliens',
+            'ext': 'mp4',
         },
     }]
 
@@ -345,7 +350,7 @@ class VPROIE(NPOIE):
         'info_dict': {
             'id': 'offline-als-luxe.html',
             'title': 'offline-als-luxe.html',
-            'ext': 'm4v',
+            'ext': 'mp4',
         },
     }]
 
@@ -377,5 +382,6 @@ class AndereTijdenIE(NTRSubsiteIE):
         'info_dict': {
             'id': 'Duitse-soldaten-over-de-Slag-bij-Arnhem',
             'title': 'Duitse-soldaten-over-de-Slag-bij-Arnhem',
+            'ext': 'mp4',
         },
     }]