From 43229d1d5f47b313e1958d719faff6321d853ed3 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Mon, 16 Feb 2026 13:59:34 -0600 Subject: [PATCH] [cookies] Ignore cookies with control characters (#15862) http.cookies.Morsel was patched in Python 3.14.3 and 3.13.12 to raise a CookieError if the cookie name, value or any attribute of its input contains a control character. yt_dlp.cookies.LenientSimpleCookie now preemptively discards any cookies containing control characters, which is consistent with its more lenient parsing. Ref: https://github.com/python/cpython/issues/143919 Closes #15849 Authored by: bashonly, syphyr Co-authored-by: syphyr --- test/test_cookies.py | 30 ++++++++++++++++++++++++++++-- yt_dlp/cookies.py | 32 ++++++++++++++++++++++++++------ 2 files changed, 54 insertions(+), 8 deletions(-) diff --git a/test/test_cookies.py b/test/test_cookies.py index f956ab1876..f64a1535c1 100644 --- a/test/test_cookies.py +++ b/test/test_cookies.py @@ -205,8 +205,8 @@ class TestLenientSimpleCookie(unittest.TestCase): ), ( 'Test quoted cookie', - 'keebler="E=mc2; L=\\"Loves\\"; fudge=\\012;"', - {'keebler': 'E=mc2; L="Loves"; fudge=\012;'}, + 'keebler="E=mc2; L=\\"Loves\\"; fudge=;"', + {'keebler': 'E=mc2; L="Loves"; fudge=;'}, ), ( "Allow '=' in an unquoted value", @@ -328,4 +328,30 @@ class TestLenientSimpleCookie(unittest.TestCase): 'Key=Value; [Invalid]=Value; Another=Value', {'Key': 'Value', 'Another': 'Value'}, ), + # Ref: https://github.com/python/cpython/issues/143919 + ( + 'Test invalid cookie name w/ control character', + 'foo\012=bar;', + {}, + ), + ( + 'Test invalid cookie name w/ control character 2', + 'foo\015baz=bar', + {}, + ), + ( + 'Test invalid cookie name w/ control character followed by valid cookie', + 'foo\015=bar; x=y;', + {'x': 'y'}, + ), + ( + 'Test invalid cookie value w/ control character', + 'keebler="E=mc2; L=\\"Loves\\"; fudge=\\012;"', + {}, + ), + ( + 'Test invalid quoted attribute value w/ control character', + 'Customer="WILE_E_COYOTE"; Version="1\\012"; Path="/acme"', + {}, + ), ) diff --git a/yt_dlp/cookies.py b/yt_dlp/cookies.py index 23f90d6109..831b3a2c8b 100644 --- a/yt_dlp/cookies.py +++ b/yt_dlp/cookies.py @@ -1168,6 +1168,7 @@ class LenientSimpleCookie(http.cookies.SimpleCookie): # We use Morsel's legal key chars to avoid errors on setting values _LEGAL_KEY_CHARS = r'\w\d' + re.escape('!#$%&\'*+-.:^_`|~') _LEGAL_VALUE_CHARS = _LEGAL_KEY_CHARS + re.escape('(),/<=>?@[]{}') + _LEGAL_KEY_RE = re.compile(rf'[{_LEGAL_KEY_CHARS}]+', re.ASCII) _RESERVED = { 'expires', @@ -1185,17 +1186,17 @@ class LenientSimpleCookie(http.cookies.SimpleCookie): # Added 'bad' group to catch the remaining value _COOKIE_PATTERN = re.compile(r''' - \s* # Optional whitespace at start of cookie + [ ]* # Optional whitespace at start of cookie (?P # Start of group 'key' - [''' + _LEGAL_KEY_CHARS + r''']+?# Any word of at least one letter + [^ =;]+ # Match almost anything here for now and validate later ) # End of group 'key' ( # Optional group: there may not be a value. - \s*=\s* # Equal Sign + [ ]*=[ ]* # Equal Sign ( # Start of potential value (?P # Start of group 'val' "(?:[^\\"]|\\.)*" # Any doublequoted string | # or - \w{3},\s[\w\d\s-]{9,11}\s[\d:]{8}\sGMT # Special case for "expires" attr + \w{3},\ [\w\d -]{9,11}\ [\d:]{8}\ GMT # Special case for "expires" attr | # or [''' + _LEGAL_VALUE_CHARS + r''']* # Any word or empty string ) # End of group 'val' @@ -1203,10 +1204,14 @@ class LenientSimpleCookie(http.cookies.SimpleCookie): (?P(?:\\;|[^;])*?) # 'bad' group fallback for invalid values ) # End of potential value )? # End of optional value group - \s* # Any number of spaces. - (\s+|;|$) # Ending either at space, semicolon, or EOS. + [ ]* # Any number of spaces. + ([ ]+|;|$) # Ending either at space, semicolon, or EOS. ''', re.ASCII | re.VERBOSE) + # http.cookies.Morsel raises on values w/ control characters in Python 3.14.3+ & 3.13.12+ + # Ref: https://github.com/python/cpython/issues/143919 + _CONTROL_CHARACTER_RE = re.compile(r'[\x00-\x1F\x7F]') + def load(self, data): # Workaround for https://github.com/yt-dlp/yt-dlp/issues/4776 if not isinstance(data, str): @@ -1219,6 +1224,9 @@ class LenientSimpleCookie(http.cookies.SimpleCookie): continue key, value = match.group('key', 'val') + if not self._LEGAL_KEY_RE.fullmatch(key): + morsel = None + continue is_attribute = False if key.startswith('$'): @@ -1237,6 +1245,14 @@ class LenientSimpleCookie(http.cookies.SimpleCookie): value = True else: value, _ = self.value_decode(value) + # Guard against control characters in quoted attribute values + if self._CONTROL_CHARACTER_RE.search(value): + # While discarding the entire morsel is not very lenient, + # it's better than http.cookies.Morsel raising a CookieError + # and it's probably better to err on the side of caution + self.pop(morsel.key, None) + morsel = None + continue morsel[key] = value @@ -1246,6 +1262,10 @@ class LenientSimpleCookie(http.cookies.SimpleCookie): elif value is not None: morsel = self.get(key, http.cookies.Morsel()) real_value, coded_value = self.value_decode(value) + # Guard against control characters in quoted cookie values + if self._CONTROL_CHARACTER_RE.search(real_value): + morsel = None + continue morsel.set(key, real_value, coded_value) self[key] = morsel