1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2026-02-17 14:05:48 +00:00

[cookies] Ignore cookies with control characters (#15862)

http.cookies.Morsel was patched in Python 3.14.3 and 3.13.12
to raise a CookieError if the cookie name, value or any attribute
of its input contains a control character.

yt_dlp.cookies.LenientSimpleCookie now preemptively discards
any cookies containing control characters, which is consistent
with its more lenient parsing.

Ref: https://github.com/python/cpython/issues/143919

Closes #15849
Authored by: bashonly, syphyr

Co-authored-by: syphyr <syphyr@gmail.com>
This commit is contained in:
bashonly
2026-02-16 13:59:34 -06:00
committed by GitHub
parent 8d6e0b29bf
commit 43229d1d5f
2 changed files with 54 additions and 8 deletions

View File

@@ -205,8 +205,8 @@ class TestLenientSimpleCookie(unittest.TestCase):
),
(
'Test quoted cookie',
'keebler="E=mc2; L=\\"Loves\\"; fudge=\\012;"',
{'keebler': 'E=mc2; L="Loves"; fudge=\012;'},
'keebler="E=mc2; L=\\"Loves\\"; fudge=;"',
{'keebler': 'E=mc2; L="Loves"; fudge=;'},
),
(
"Allow '=' in an unquoted value",
@@ -328,4 +328,30 @@ class TestLenientSimpleCookie(unittest.TestCase):
'Key=Value; [Invalid]=Value; Another=Value',
{'Key': 'Value', 'Another': 'Value'},
),
# Ref: https://github.com/python/cpython/issues/143919
(
'Test invalid cookie name w/ control character',
'foo\012=bar;',
{},
),
(
'Test invalid cookie name w/ control character 2',
'foo\015baz=bar',
{},
),
(
'Test invalid cookie name w/ control character followed by valid cookie',
'foo\015=bar; x=y;',
{'x': 'y'},
),
(
'Test invalid cookie value w/ control character',
'keebler="E=mc2; L=\\"Loves\\"; fudge=\\012;"',
{},
),
(
'Test invalid quoted attribute value w/ control character',
'Customer="WILE_E_COYOTE"; Version="1\\012"; Path="/acme"',
{},
),
)

View File

@@ -1168,6 +1168,7 @@ class LenientSimpleCookie(http.cookies.SimpleCookie):
# We use Morsel's legal key chars to avoid errors on setting values
_LEGAL_KEY_CHARS = r'\w\d' + re.escape('!#$%&\'*+-.:^_`|~')
_LEGAL_VALUE_CHARS = _LEGAL_KEY_CHARS + re.escape('(),/<=>?@[]{}')
_LEGAL_KEY_RE = re.compile(rf'[{_LEGAL_KEY_CHARS}]+', re.ASCII)
_RESERVED = {
'expires',
@@ -1185,17 +1186,17 @@ class LenientSimpleCookie(http.cookies.SimpleCookie):
# Added 'bad' group to catch the remaining value
_COOKIE_PATTERN = re.compile(r'''
\s* # Optional whitespace at start of cookie
[ ]* # Optional whitespace at start of cookie
(?P<key> # Start of group 'key'
[''' + _LEGAL_KEY_CHARS + r''']+?# Any word of at least one letter
[^ =;]+ # Match almost anything here for now and validate later
) # End of group 'key'
( # Optional group: there may not be a value.
\s*=\s* # Equal Sign
[ ]*=[ ]* # Equal Sign
( # Start of potential value
(?P<val> # Start of group 'val'
"(?:[^\\"]|\\.)*" # Any doublequoted string
| # or
\w{3},\s[\w\d\s-]{9,11}\s[\d:]{8}\sGMT # Special case for "expires" attr
\w{3},\ [\w\d -]{9,11}\ [\d:]{8}\ GMT # Special case for "expires" attr
| # or
[''' + _LEGAL_VALUE_CHARS + r''']* # Any word or empty string
) # End of group 'val'
@@ -1203,10 +1204,14 @@ class LenientSimpleCookie(http.cookies.SimpleCookie):
(?P<bad>(?:\\;|[^;])*?) # 'bad' group fallback for invalid values
) # End of potential value
)? # End of optional value group
\s* # Any number of spaces.
(\s+|;|$) # Ending either at space, semicolon, or EOS.
[ ]* # Any number of spaces.
([ ]+|;|$) # Ending either at space, semicolon, or EOS.
''', re.ASCII | re.VERBOSE)
# http.cookies.Morsel raises on values w/ control characters in Python 3.14.3+ & 3.13.12+
# Ref: https://github.com/python/cpython/issues/143919
_CONTROL_CHARACTER_RE = re.compile(r'[\x00-\x1F\x7F]')
def load(self, data):
# Workaround for https://github.com/yt-dlp/yt-dlp/issues/4776
if not isinstance(data, str):
@@ -1219,6 +1224,9 @@ class LenientSimpleCookie(http.cookies.SimpleCookie):
continue
key, value = match.group('key', 'val')
if not self._LEGAL_KEY_RE.fullmatch(key):
morsel = None
continue
is_attribute = False
if key.startswith('$'):
@@ -1237,6 +1245,14 @@ class LenientSimpleCookie(http.cookies.SimpleCookie):
value = True
else:
value, _ = self.value_decode(value)
# Guard against control characters in quoted attribute values
if self._CONTROL_CHARACTER_RE.search(value):
# While discarding the entire morsel is not very lenient,
# it's better than http.cookies.Morsel raising a CookieError
# and it's probably better to err on the side of caution
self.pop(morsel.key, None)
morsel = None
continue
morsel[key] = value
@@ -1246,6 +1262,10 @@ class LenientSimpleCookie(http.cookies.SimpleCookie):
elif value is not None:
morsel = self.get(key, http.cookies.Morsel())
real_value, coded_value = self.value_decode(value)
# Guard against control characters in quoted cookie values
if self._CONTROL_CHARACTER_RE.search(real_value):
morsel = None
continue
morsel.set(key, real_value, coded_value)
self[key] = morsel