mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2026-02-17 14:05:48 +00:00
[cookies] Ignore cookies with control characters (#15862)
http.cookies.Morsel was patched in Python 3.14.3 and 3.13.12 to raise a CookieError if the cookie name, value or any attribute of its input contains a control character. yt_dlp.cookies.LenientSimpleCookie now preemptively discards any cookies containing control characters, which is consistent with its more lenient parsing. Ref: https://github.com/python/cpython/issues/143919 Closes #15849 Authored by: bashonly, syphyr Co-authored-by: syphyr <syphyr@gmail.com>
This commit is contained in:
@@ -205,8 +205,8 @@ class TestLenientSimpleCookie(unittest.TestCase):
|
||||
),
|
||||
(
|
||||
'Test quoted cookie',
|
||||
'keebler="E=mc2; L=\\"Loves\\"; fudge=\\012;"',
|
||||
{'keebler': 'E=mc2; L="Loves"; fudge=\012;'},
|
||||
'keebler="E=mc2; L=\\"Loves\\"; fudge=;"',
|
||||
{'keebler': 'E=mc2; L="Loves"; fudge=;'},
|
||||
),
|
||||
(
|
||||
"Allow '=' in an unquoted value",
|
||||
@@ -328,4 +328,30 @@ class TestLenientSimpleCookie(unittest.TestCase):
|
||||
'Key=Value; [Invalid]=Value; Another=Value',
|
||||
{'Key': 'Value', 'Another': 'Value'},
|
||||
),
|
||||
# Ref: https://github.com/python/cpython/issues/143919
|
||||
(
|
||||
'Test invalid cookie name w/ control character',
|
||||
'foo\012=bar;',
|
||||
{},
|
||||
),
|
||||
(
|
||||
'Test invalid cookie name w/ control character 2',
|
||||
'foo\015baz=bar',
|
||||
{},
|
||||
),
|
||||
(
|
||||
'Test invalid cookie name w/ control character followed by valid cookie',
|
||||
'foo\015=bar; x=y;',
|
||||
{'x': 'y'},
|
||||
),
|
||||
(
|
||||
'Test invalid cookie value w/ control character',
|
||||
'keebler="E=mc2; L=\\"Loves\\"; fudge=\\012;"',
|
||||
{},
|
||||
),
|
||||
(
|
||||
'Test invalid quoted attribute value w/ control character',
|
||||
'Customer="WILE_E_COYOTE"; Version="1\\012"; Path="/acme"',
|
||||
{},
|
||||
),
|
||||
)
|
||||
|
||||
@@ -1168,6 +1168,7 @@ class LenientSimpleCookie(http.cookies.SimpleCookie):
|
||||
# We use Morsel's legal key chars to avoid errors on setting values
|
||||
_LEGAL_KEY_CHARS = r'\w\d' + re.escape('!#$%&\'*+-.:^_`|~')
|
||||
_LEGAL_VALUE_CHARS = _LEGAL_KEY_CHARS + re.escape('(),/<=>?@[]{}')
|
||||
_LEGAL_KEY_RE = re.compile(rf'[{_LEGAL_KEY_CHARS}]+', re.ASCII)
|
||||
|
||||
_RESERVED = {
|
||||
'expires',
|
||||
@@ -1185,17 +1186,17 @@ class LenientSimpleCookie(http.cookies.SimpleCookie):
|
||||
|
||||
# Added 'bad' group to catch the remaining value
|
||||
_COOKIE_PATTERN = re.compile(r'''
|
||||
\s* # Optional whitespace at start of cookie
|
||||
[ ]* # Optional whitespace at start of cookie
|
||||
(?P<key> # Start of group 'key'
|
||||
[''' + _LEGAL_KEY_CHARS + r''']+?# Any word of at least one letter
|
||||
[^ =;]+ # Match almost anything here for now and validate later
|
||||
) # End of group 'key'
|
||||
( # Optional group: there may not be a value.
|
||||
\s*=\s* # Equal Sign
|
||||
[ ]*=[ ]* # Equal Sign
|
||||
( # Start of potential value
|
||||
(?P<val> # Start of group 'val'
|
||||
"(?:[^\\"]|\\.)*" # Any doublequoted string
|
||||
| # or
|
||||
\w{3},\s[\w\d\s-]{9,11}\s[\d:]{8}\sGMT # Special case for "expires" attr
|
||||
\w{3},\ [\w\d -]{9,11}\ [\d:]{8}\ GMT # Special case for "expires" attr
|
||||
| # or
|
||||
[''' + _LEGAL_VALUE_CHARS + r''']* # Any word or empty string
|
||||
) # End of group 'val'
|
||||
@@ -1203,10 +1204,14 @@ class LenientSimpleCookie(http.cookies.SimpleCookie):
|
||||
(?P<bad>(?:\\;|[^;])*?) # 'bad' group fallback for invalid values
|
||||
) # End of potential value
|
||||
)? # End of optional value group
|
||||
\s* # Any number of spaces.
|
||||
(\s+|;|$) # Ending either at space, semicolon, or EOS.
|
||||
[ ]* # Any number of spaces.
|
||||
([ ]+|;|$) # Ending either at space, semicolon, or EOS.
|
||||
''', re.ASCII | re.VERBOSE)
|
||||
|
||||
# http.cookies.Morsel raises on values w/ control characters in Python 3.14.3+ & 3.13.12+
|
||||
# Ref: https://github.com/python/cpython/issues/143919
|
||||
_CONTROL_CHARACTER_RE = re.compile(r'[\x00-\x1F\x7F]')
|
||||
|
||||
def load(self, data):
|
||||
# Workaround for https://github.com/yt-dlp/yt-dlp/issues/4776
|
||||
if not isinstance(data, str):
|
||||
@@ -1219,6 +1224,9 @@ class LenientSimpleCookie(http.cookies.SimpleCookie):
|
||||
continue
|
||||
|
||||
key, value = match.group('key', 'val')
|
||||
if not self._LEGAL_KEY_RE.fullmatch(key):
|
||||
morsel = None
|
||||
continue
|
||||
|
||||
is_attribute = False
|
||||
if key.startswith('$'):
|
||||
@@ -1237,6 +1245,14 @@ class LenientSimpleCookie(http.cookies.SimpleCookie):
|
||||
value = True
|
||||
else:
|
||||
value, _ = self.value_decode(value)
|
||||
# Guard against control characters in quoted attribute values
|
||||
if self._CONTROL_CHARACTER_RE.search(value):
|
||||
# While discarding the entire morsel is not very lenient,
|
||||
# it's better than http.cookies.Morsel raising a CookieError
|
||||
# and it's probably better to err on the side of caution
|
||||
self.pop(morsel.key, None)
|
||||
morsel = None
|
||||
continue
|
||||
|
||||
morsel[key] = value
|
||||
|
||||
@@ -1246,6 +1262,10 @@ class LenientSimpleCookie(http.cookies.SimpleCookie):
|
||||
elif value is not None:
|
||||
morsel = self.get(key, http.cookies.Morsel())
|
||||
real_value, coded_value = self.value_decode(value)
|
||||
# Guard against control characters in quoted cookie values
|
||||
if self._CONTROL_CHARACTER_RE.search(real_value):
|
||||
morsel = None
|
||||
continue
|
||||
morsel.set(key, real_value, coded_value)
|
||||
self[key] = morsel
|
||||
|
||||
|
||||
Reference in New Issue
Block a user