From 1fbbe29b99dc61375bf6d786f824d9fcf6ea9c1a Mon Sep 17 00:00:00 2001 From: Simon Sawicki Date: Sat, 14 Feb 2026 23:34:51 +0100 Subject: [PATCH] [ie] Limit `netrc_machine` parameter to shell-safe characters Also adapts some extractor regexes to adhere to this limitation See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-g3gw-q23r-pgqm Authored by: Grub4K --- test/test_InfoExtractor.py | 2 ++ yt_dlp/extractor/common.py | 13 ++++++++++--- yt_dlp/extractor/getcourseru.py | 2 +- yt_dlp/extractor/pornhub.py | 4 ++-- yt_dlp/extractor/teachable.py | 4 ++-- 5 files changed, 17 insertions(+), 8 deletions(-) diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index c15dd8a617..f66fdbf8da 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -76,6 +76,8 @@ class TestInfoExtractor(unittest.TestCase): self.assertEqual(ie._get_netrc_login_info(netrc_machine='empty_pass'), ('user', '')) self.assertEqual(ie._get_netrc_login_info(netrc_machine='both_empty'), ('', '')) self.assertEqual(ie._get_netrc_login_info(netrc_machine='nonexistent'), (None, None)) + with self.assertRaises(ExtractorError): + ie._get_netrc_login_info(netrc_machine=';echo rce') def test_html_search_regex(self): html = '

Watch this video

' diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 70f143387b..24e6d38069 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -661,9 +661,11 @@ class InfoExtractor: if not self._ready: self._initialize_pre_login() if self.supports_login(): - username, password = self._get_login_info() - if username: - self._perform_login(username, password) + # try login only if it would actually do anything + if type(self)._perform_login is not InfoExtractor._perform_login: + username, password = self._get_login_info() + if username: + self._perform_login(username, password) elif self.get_param('username') and False not in (self.IE_DESC, self._NETRC_MACHINE): self.report_warning(f'Login with password is not supported for this website. {self._login_hint("cookies")}') self._real_initialize() @@ -1385,6 +1387,11 @@ class InfoExtractor: def _get_netrc_login_info(self, netrc_machine=None): netrc_machine = netrc_machine or self._NETRC_MACHINE + if not netrc_machine: + raise ExtractorError(f'Missing netrc_machine and {type(self).__name__}._NETRC_MACHINE') + ALLOWED = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789.-_' + if netrc_machine.startswith(('-', '_')) or not all(c in ALLOWED for c in netrc_machine): + raise ExtractorError(f'Invalid netrc machine: {netrc_machine!r}', expected=True) cmd = self.get_param('netrc_cmd') if cmd: diff --git a/yt_dlp/extractor/getcourseru.py b/yt_dlp/extractor/getcourseru.py index 2d923cf540..e2b3a7db36 100644 --- a/yt_dlp/extractor/getcourseru.py +++ b/yt_dlp/extractor/getcourseru.py @@ -59,7 +59,7 @@ class GetCourseRuIE(InfoExtractor): 'marafon.mani-beauty.com', 'on.psbook.ru', ] - _BASE_URL_RE = rf'https?://(?:(?!player02\.)[^.]+\.getcourse\.(?:ru|io)|{"|".join(map(re.escape, _DOMAINS))})' + _BASE_URL_RE = rf'https?://(?:(?!player02\.)[a-zA-Z0-9-]+\.getcourse\.(?:ru|io)|{"|".join(map(re.escape, _DOMAINS))})' _VALID_URL = [ rf'{_BASE_URL_RE}/(?!pl/|teach/)(?P[^?#]+)', rf'{_BASE_URL_RE}/(?:pl/)?teach/control/lesson/view\?(?:[^#]+&)?id=(?P\d+)', diff --git a/yt_dlp/extractor/pornhub.py b/yt_dlp/extractor/pornhub.py index 5144f14093..3fc802e15d 100644 --- a/yt_dlp/extractor/pornhub.py +++ b/yt_dlp/extractor/pornhub.py @@ -128,7 +128,7 @@ class PornHubIE(PornHubBaseIE): _VALID_URL = rf'''(?x) https?:// (?: - (?:[^/]+\.)? + (?:[a-zA-Z0-9.-]+\.)? {PornHubBaseIE._PORNHUB_HOST_RE} /(?:(?:view_video\.php|video/show)\?viewkey=|embed/)| (?:www\.)?thumbzilla\.com/video/ @@ -534,7 +534,7 @@ class PornHubPlaylistBaseIE(PornHubBaseIE): class PornHubUserIE(PornHubPlaylistBaseIE): - _VALID_URL = rf'(?Phttps?://(?:[^/]+\.)?{PornHubBaseIE._PORNHUB_HOST_RE}/(?:(?:user|channel)s|model|pornstar)/(?P[^/?#&]+))(?:[?#&]|/(?!videos)|$)' + _VALID_URL = rf'(?Phttps?://(?:[a-zA-Z0-9.-]+\.)?{PornHubBaseIE._PORNHUB_HOST_RE}/(?:(?:user|channel)s|model|pornstar)/(?P[^/?#&]+))(?:[?#&]|/(?!videos)|$)' _TESTS = [{ 'url': 'https://www.pornhub.com/model/zoe_ph', 'playlist_mincount': 118, diff --git a/yt_dlp/extractor/teachable.py b/yt_dlp/extractor/teachable.py index 0d39be6d5f..5be247a0b3 100644 --- a/yt_dlp/extractor/teachable.py +++ b/yt_dlp/extractor/teachable.py @@ -102,7 +102,7 @@ class TeachableIE(TeachableBaseIE): _WORKING = False _VALID_URL = r'''(?x) (?: - {}https?://(?P[^/]+)| + {}https?://(?P[a-zA-Z0-9.-]+)| https?://(?:www\.)?(?P{}) ) /courses/[^/]+/lectures/(?P\d+) @@ -211,7 +211,7 @@ class TeachableIE(TeachableBaseIE): class TeachableCourseIE(TeachableBaseIE): _VALID_URL = r'''(?x) (?: - {}https?://(?P[^/]+)| + {}https?://(?P[a-zA-Z0-9.-]+)| https?://(?:www\.)?(?P{}) ) /(?:courses|p)/(?:enrolled/)?(?P[^/?#&]+)