From 1fbbe29b99dc61375bf6d786f824d9fcf6ea9c1a Mon Sep 17 00:00:00 2001
From: Simon Sawicki <contact@grub4k.dev>
Date: Sat, 14 Feb 2026 23:34:51 +0100
Subject: [PATCH] [ie] Limit `netrc_machine` parameter to shell-safe characters

Also adapts some extractor regexes to adhere to this limitation

See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-g3gw-q23r-pgqm

Authored by: Grub4K
---
 test/test_InfoExtractor.py      |  2 ++
 yt_dlp/extractor/common.py      | 13 ++++++++++---
 yt_dlp/extractor/getcourseru.py |  2 +-
 yt_dlp/extractor/pornhub.py     |  4 ++--
 yt_dlp/extractor/teachable.py   |  4 ++--
 5 files changed, 17 insertions(+), 8 deletions(-)
diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py
index c15dd8a617..f66fdbf8da 100644
--- a/test/test_InfoExtractor.py
+++ b/test/test_InfoExtractor.py
@@ -76,6 +76,8 @@ class TestInfoExtractor(unittest.TestCase):
             self.assertEqual(ie._get_netrc_login_info(netrc_machine='empty_pass'), ('user', ''))
             self.assertEqual(ie._get_netrc_login_info(netrc_machine='both_empty'), ('', ''))
             self.assertEqual(ie._get_netrc_login_info(netrc_machine='nonexistent'), (None, None))
+            with self.assertRaises(ExtractorError):
+                ie._get_netrc_login_info(netrc_machine=';echo rce')
 
     def test_html_search_regex(self):
         html = '<p id="foo">Watch this <a href="http://www.youtube.com/watch?v=BaW_jenozKc">video</a></p>'
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index 70f143387b..24e6d38069 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -661,9 +661,11 @@ class InfoExtractor:
         if not self._ready:
             self._initialize_pre_login()
             if self.supports_login():
-                username, password = self._get_login_info()
-                if username:
-                    self._perform_login(username, password)
+                # try login only if it would actually do anything
+                if type(self)._perform_login is not InfoExtractor._perform_login:
+                    username, password = self._get_login_info()
+                    if username:
+                        self._perform_login(username, password)
             elif self.get_param('username') and False not in (self.IE_DESC, self._NETRC_MACHINE):
                 self.report_warning(f'Login with password is not supported for this website. {self._login_hint("cookies")}')
             self._real_initialize()
@@ -1385,6 +1387,11 @@ class InfoExtractor:
 
     def _get_netrc_login_info(self, netrc_machine=None):
         netrc_machine = netrc_machine or self._NETRC_MACHINE
+        if not netrc_machine:
+            raise ExtractorError(f'Missing netrc_machine and {type(self).__name__}._NETRC_MACHINE')
+        ALLOWED = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789.-_'
+        if netrc_machine.startswith(('-', '_')) or not all(c in ALLOWED for c in netrc_machine):
+            raise ExtractorError(f'Invalid netrc machine: {netrc_machine!r}', expected=True)
 
         cmd = self.get_param('netrc_cmd')
         if cmd:
diff --git a/yt_dlp/extractor/getcourseru.py b/yt_dlp/extractor/getcourseru.py
index 2d923cf540..e2b3a7db36 100644
--- a/yt_dlp/extractor/getcourseru.py
+++ b/yt_dlp/extractor/getcourseru.py
@@ -59,7 +59,7 @@ class GetCourseRuIE(InfoExtractor):
         'marafon.mani-beauty.com',
         'on.psbook.ru',
     ]
-    _BASE_URL_RE = rf'https?://(?:(?!player02\.)[^.]+\.getcourse\.(?:ru|io)|{"|".join(map(re.escape, _DOMAINS))})'
+    _BASE_URL_RE = rf'https?://(?:(?!player02\.)[a-zA-Z0-9-]+\.getcourse\.(?:ru|io)|{"|".join(map(re.escape, _DOMAINS))})'
     _VALID_URL = [
         rf'{_BASE_URL_RE}/(?!pl/|teach/)(?P<id>[^?#]+)',
         rf'{_BASE_URL_RE}/(?:pl/)?teach/control/lesson/view\?(?:[^#]+&)?id=(?P<id>\d+)',
diff --git a/yt_dlp/extractor/pornhub.py b/yt_dlp/extractor/pornhub.py
index 5144f14093..3fc802e15d 100644
--- a/yt_dlp/extractor/pornhub.py
+++ b/yt_dlp/extractor/pornhub.py
@@ -128,7 +128,7 @@ class PornHubIE(PornHubBaseIE):
     _VALID_URL = rf'''(?x)
                     https?://
                         (?:
-                            (?:[^/]+\.)?
+                            (?:[a-zA-Z0-9.-]+\.)?
                             {PornHubBaseIE._PORNHUB_HOST_RE}
                             /(?:(?:view_video\.php|video/show)\?viewkey=|embed/)|
                             (?:www\.)?thumbzilla\.com/video/
@@ -534,7 +534,7 @@ class PornHubPlaylistBaseIE(PornHubBaseIE):
 
 
 class PornHubUserIE(PornHubPlaylistBaseIE):
-    _VALID_URL = rf'(?P<url>https?://(?:[^/]+\.)?{PornHubBaseIE._PORNHUB_HOST_RE}/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/?#&]+))(?:[?#&]|/(?!videos)|$)'
+    _VALID_URL = rf'(?P<url>https?://(?:[a-zA-Z0-9.-]+\.)?{PornHubBaseIE._PORNHUB_HOST_RE}/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/?#&]+))(?:[?#&]|/(?!videos)|$)'
     _TESTS = [{
         'url': 'https://www.pornhub.com/model/zoe_ph',
         'playlist_mincount': 118,
diff --git a/yt_dlp/extractor/teachable.py b/yt_dlp/extractor/teachable.py
index 0d39be6d5f..5be247a0b3 100644
--- a/yt_dlp/extractor/teachable.py
+++ b/yt_dlp/extractor/teachable.py
@@ -102,7 +102,7 @@ class TeachableIE(TeachableBaseIE):
     _WORKING = False
     _VALID_URL = r'''(?x)
                     (?:
-                        {}https?://(?P<site_t>[^/]+)|
+                        {}https?://(?P<site_t>[a-zA-Z0-9.-]+)|
                         https?://(?:www\.)?(?P<site>{})
                     )
                     /courses/[^/]+/lectures/(?P<id>\d+)
@@ -211,7 +211,7 @@ class TeachableIE(TeachableBaseIE):
 class TeachableCourseIE(TeachableBaseIE):
     _VALID_URL = r'''(?x)
                         (?:
-                            {}https?://(?P<site_t>[^/]+)|
+                            {}https?://(?P<site_t>[a-zA-Z0-9.-]+)|
                             https?://(?:www\.)?(?P<site>{})
                         )
                         /(?:courses|p)/(?:enrolled/)?(?P<id>[^/?#&]+)