diff --git a/README.md b/README.md index ae60413f28..244f8f25a8 100644 --- a/README.md +++ b/README.md @@ -1858,7 +1858,7 @@ The following extractors use this feature: #### youtube * `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube/_base.py](https://github.com/yt-dlp/yt-dlp/blob/415b4c9f955b1a0391204bd24a7132590e7b3bdb/yt_dlp/extractor/youtube/_base.py#L402-L409) for the list of supported content language codes * `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively -* `player_client`: Clients to extract video data from. The currently available clients are `web`, `web_safari`, `web_embedded`, `web_music`, `web_creator`, `mweb`, `ios`, `android`, `android_sdkless`, `android_vr`, `tv`, `tv_simply`, `tv_downgraded`, and `tv_embedded`. By default, `android_sdkless,web,web_safari` is used. If no JavaScript runtime/engine is available, then only `android_sdkless` is used. If logged-in cookies are passed to yt-dlp, then `tv_downgraded,web,web_safari` is used for free accounts and `tv_downgraded,web_creator,web` is used for premium accounts. The `web_music` client is added for `music.youtube.com` URLs when logged-in cookies are used. The `web_embedded` client is added for age-restricted videos but only works if the video is embeddable. The `tv_embedded` and `web_creator` clients are added for age-restricted videos if account age-verification is required. Some clients, such as `web` and `web_music`, require a `po_token` for their formats to be downloadable. Some clients, such as `web_creator`, will only work with authentication. Not all clients support authentication via cookies. You can use `default` for the default clients, or you can use `all` for all clients (not recommended). You can prefix a client with `-` to exclude it, e.g. `youtube:player_client=default,-ios` +* `player_client`: Clients to extract video data from. The currently available clients are `web`, `web_safari`, `web_embedded`, `web_music`, `web_creator`, `mweb`, `ios`, `ios_downgraded`, `android`, `android_vr`, `tv`, `tv_simply`, `tv_downgraded`, and `tv_embedded`. By default, `android_vr,ios_downgraded,web,web_safari` is used. If no JavaScript runtime/engine is available, then `android_vr,ios_downgraded` is used. If logged-in cookies are passed to yt-dlp, then `tv_downgraded,web,web_safari` is used for free accounts and `tv_downgraded,web_creator,web` is used for premium accounts. The `web_music` client is added for `music.youtube.com` URLs when logged-in cookies are used. The `web_embedded` client is added for age-restricted videos but only works if the video is embeddable. The `tv_embedded` and `web_creator` clients are added for age-restricted videos if account age-verification is required. Some clients, such as `web` and `web_music`, require a `po_token` for their formats to be downloadable. Some clients, such as `web_creator`, will only work with authentication. Not all clients support authentication via cookies. You can use `default` for the default clients, or you can use `all` for all clients (not recommended). You can prefix a client with `-` to exclude it, e.g. `youtube:player_client=default,-ios` * `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player), `initial_data` (skip initial data/next ep request). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause issues such as missing formats or metadata. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) and [#12826](https://github.com/yt-dlp/yt-dlp/issues/12826) for more details * `webpage_skip`: Skip extraction of embedded webpage data. One or both of `player_response`, `initial_data`. These options are for testing purposes and don't skip any network requests * `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp. diff --git a/yt_dlp/extractor/youtube/_base.py b/yt_dlp/extractor/youtube/_base.py index 8e38408d9d..b713bf5c61 100644 --- a/yt_dlp/extractor/youtube/_base.py +++ b/yt_dlp/extractor/youtube/_base.py @@ -223,20 +223,6 @@ INNERTUBE_CLIENTS = { }, 'PLAYER_PO_TOKEN_POLICY': PlayerPoTokenPolicy(required=False, recommended=True), }, - # Doesn't require a PoToken for some reason - 'android_sdkless': { - 'INNERTUBE_CONTEXT': { - 'client': { - 'clientName': 'ANDROID', - 'clientVersion': '21.02.35', - 'userAgent': 'com.google.android.youtube/21.02.35 (Linux; U; Android 11) gzip', - 'osName': 'Android', - 'osVersion': '11', - }, - }, - 'INNERTUBE_CONTEXT_CLIENT_NAME': 3, - 'REQUIRE_JS_PLAYER': False, - }, # YouTube Kids videos aren't returned on this client for some reason 'android_vr': { 'INNERTUBE_CONTEXT': { @@ -285,6 +271,34 @@ INNERTUBE_CLIENTS = { 'PLAYER_PO_TOKEN_POLICY': PlayerPoTokenPolicy(required=False, recommended=True), 'REQUIRE_JS_PLAYER': False, }, + 'ios_downgraded': { + 'INNERTUBE_CONTEXT': { + 'client': { + 'clientName': 'IOS', + 'clientVersion': '19.49.7', + 'deviceMake': 'Apple', + 'deviceModel': 'iPhone16,2', + 'userAgent': 'com.google.ios.youtube/19.49.7 (iPhone16,2; U; CPU iOS 17_5_1 like Mac OS X;)', + 'osName': 'iPhone', + 'osVersion': '17.5.1.21F90', + }, + }, + 'INNERTUBE_CONTEXT_CLIENT_NAME': 5, + 'GVS_PO_TOKEN_POLICY': { + StreamingProtocol.HTTPS: GvsPoTokenPolicy( + required=True, + recommended=True, + not_required_with_player_token=True, + ), + StreamingProtocol.HLS: GvsPoTokenPolicy( + required=False, + recommended=True, + not_required_with_player_token=True, + ), + }, + 'PLAYER_PO_TOKEN_POLICY': PlayerPoTokenPolicy(required=False, recommended=True), + 'REQUIRE_JS_PLAYER': False, + }, # mweb has 'ultralow' formats # See: https://github.com/yt-dlp/yt-dlp/pull/557 'mweb': { @@ -323,13 +337,12 @@ INNERTUBE_CLIENTS = { 'client': { 'clientName': 'TVHTML5', 'clientVersion': '7.20260114.12.00', - 'userAgent': 'Mozilla/5.0 (ChromiumStylePlatform) Cobalt/Version', + # See: https://github.com/youtube/cobalt/blob/main/cobalt/browser/user_agent/user_agent_platform_info.cc#L506 + 'userAgent': 'Mozilla/5.0 (ChromiumStylePlatform) Cobalt/25.lts.30.1034943-gold (unlike Gecko), Unknown_TV_Unknown_0/Unknown (Unknown, Unknown)', }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 7, 'SUPPORTS_COOKIES': True, - # See: https://github.com/youtube/cobalt/blob/main/cobalt/browser/user_agent/user_agent_platform_info.cc#L506 - 'AUTHENTICATED_USER_AGENT': 'Mozilla/5.0 (ChromiumStylePlatform) Cobalt/25.lts.30.1034943-gold (unlike Gecko), Unknown_TV_Unknown_0/Unknown (Unknown, Unknown)', }, 'tv_downgraded': { 'INNERTUBE_CONTEXT': { @@ -340,6 +353,7 @@ INNERTUBE_CLIENTS = { }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 7, + 'REQUIRE_AUTH': True, 'SUPPORTS_COOKIES': True, }, 'tv_simply': { @@ -418,7 +432,6 @@ def build_innertube_clients(): ytcfg.setdefault('SUPPORTS_COOKIES', False) ytcfg.setdefault('SUPPORTS_AD_PLAYBACK_CONTEXT', False) ytcfg.setdefault('PLAYER_PARAMS', None) - ytcfg.setdefault('AUTHENTICATED_USER_AGENT', None) ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en') _, base_client, variant = _split_innertube_client(client) @@ -703,14 +716,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=' def _get_default_ytcfg(self, client='web'): - ytcfg = copy.deepcopy(INNERTUBE_CLIENTS[client]) - - # Currently, only the tv client needs to use an alternative user-agent when logged-in - if ytcfg.get('AUTHENTICATED_USER_AGENT') and self.is_authenticated: - client_context = ytcfg.setdefault('INNERTUBE_CONTEXT', {}).setdefault('client', {}) - client_context['userAgent'] = ytcfg['AUTHENTICATED_USER_AGENT'] - - return ytcfg + return copy.deepcopy(INNERTUBE_CLIENTS[client]) def _get_innertube_host(self, client='web'): return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST'] diff --git a/yt_dlp/extractor/youtube/_video.py b/yt_dlp/extractor/youtube/_video.py index d6198bf454..f994b662fb 100644 --- a/yt_dlp/extractor/youtube/_video.py +++ b/yt_dlp/extractor/youtube/_video.py @@ -145,8 +145,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): r'\b(?Pvfl[a-zA-Z0-9_-]+)\b.*?\.js$', ) _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'srt', 'vtt') - _DEFAULT_CLIENTS = ('android_sdkless', 'web', 'web_safari') - _DEFAULT_JSLESS_CLIENTS = ('android_sdkless',) + _DEFAULT_CLIENTS = ('android_vr', 'ios_downgraded', 'web', 'web_safari') + _DEFAULT_JSLESS_CLIENTS = ('android_vr', 'ios_downgraded') _DEFAULT_AUTHED_CLIENTS = ('tv_downgraded', 'web', 'web_safari') # Premium does not require POT (except for subtitles) _DEFAULT_PREMIUM_CLIENTS = ('tv_downgraded', 'web_creator', 'web') @@ -3589,6 +3589,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if client_name == 'web_safari' and proto == 'hls' and live_status != 'is_live': f['source_preference'] -= 1 + # Safeguard against inevitable ios_downgraded client breakage + if client_name == 'ios_downgraded' and proto == 'hls' and live_status != 'is_live': + f['__needs_testing'] = True + if missing_pot: f['format_note'] = join_nonempty(f.get('format_note'), 'MISSING POT', delim=' ') f['source_preference'] -= 20