1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2026-02-05 05:26:55 +00:00

[ie/visir] Add extractor (#15811)

Closes #11901
Authored by: doe1080
This commit is contained in:
doe1080
2026-02-05 00:33:00 +09:00
committed by GitHub
parent bb3af7e6d5
commit c7c45f5289
4 changed files with 121 additions and 0 deletions

View File

@@ -924,6 +924,7 @@ class TestUtil(unittest.TestCase):
self.assertEqual(month_by_name(None), None)
self.assertEqual(month_by_name('December', 'en'), 12)
self.assertEqual(month_by_name('décembre', 'fr'), 12)
self.assertEqual(month_by_name('desember', 'is'), 12)
self.assertEqual(month_by_name('December'), 12)
self.assertEqual(month_by_name('décembre'), None)
self.assertEqual(month_by_name('Unknown', 'unknown'), None)

View File

@@ -2343,6 +2343,7 @@ from .vimm import (
)
from .viously import ViouslyIE
from .viqeo import ViqeoIE
from .visir import VisirIE
from .viu import (
ViuIE,
ViuOTTIE,

116
yt_dlp/extractor/visir.py Normal file
View File

@@ -0,0 +1,116 @@
import re
from .common import InfoExtractor
from ..utils import (
UnsupportedError,
clean_html,
int_or_none,
js_to_json,
month_by_name,
url_or_none,
urljoin,
)
from ..utils.traversal import find_element, traverse_obj
class VisirIE(InfoExtractor):
IE_DESC = 'Vísir'
_VALID_URL = r'https?://(?:www\.)?visir\.is/(?P<type>k|player)/(?P<id>[\da-f-]+)(?:/(?P<slug>[\w.-]+))?'
_EMBED_REGEX = [rf'<iframe[^>]+src=["\'](?P<url>{_VALID_URL})']
_TESTS = [{
'url': 'https://www.visir.is/k/eabb8f7f-ad87-46fb-9469-a0f1dc0fc4bc-1769022963988',
'info_dict': {
'id': 'eabb8f7f-ad87-46fb-9469-a0f1dc0fc4bc-1769022963988',
'ext': 'mp4',
'title': 'Sveppi og Siggi Þór mestu skaphundarnir',
'categories': ['island-i-dag'],
'description': 'md5:e06bd6a0cd8bdde328ad8cf00d3d4df6',
'duration': 792,
'thumbnail': r're:https?://www\.visir\.is/.+',
'upload_date': '20260121',
'view_count': int,
},
}, {
'url': 'https://www.visir.is/k/b0a88e02-eceb-4270-855c-8328b76b9d81-1763979306704/tonlistarborgin-reykjavik',
'info_dict': {
'id': 'b0a88e02-eceb-4270-855c-8328b76b9d81-1763979306704',
'ext': 'mp4',
'title': 'Tónlistarborgin Reykjavík',
'categories': ['tonlist'],
'description': 'md5:47237589dc95dbde55dfbb163396f88a',
'display_id': 'tonlistarborgin-reykjavik',
'duration': 81,
'thumbnail': r're:https?://www\.visir\.is/.+',
'upload_date': '20251124',
'view_count': int,
},
}, {
'url': 'https://www.visir.is/player/0cd5709e-6870-46d0-aaaf-0ae637de94f1-1770060083580',
'info_dict': {
'id': '0cd5709e-6870-46d0-aaaf-0ae637de94f1-1770060083580',
'ext': 'mp4',
'title': 'Sportpakkinn 2. febrúar 2026',
'categories': ['sportpakkinn'],
'display_id': 'sportpakkinn-2.-februar-2026',
'duration': 293,
'thumbnail': r're:https?://www\.visir\.is/.+',
'upload_date': '20260202',
'view_count': int,
},
}]
_WEBPAGE_TESTS = [{
'url': 'https://www.visir.is/g/20262837896d/segir-von-brigdin-med-prinsessuna-rista-djupt',
'info_dict': {
'id': '9ad5e58a-f26f-49f7-8b1d-68f0629485b7-1770059257365',
'ext': 'mp4',
'title': 'Norðmenn tala ekki um annað en prinsessuna',
'categories': ['frettir'],
'description': 'md5:53e2623ae79e1355778c14f5b557a0cd',
'display_id': 'nordmenn-tala-ekki-um-annad-en-prinsessuna',
'duration': 138,
'thumbnail': r're:https?://www\.visir\.is/.+',
'upload_date': '20260202',
'view_count': int,
},
}]
def _real_extract(self, url):
video_type, video_id, display_id = self._match_valid_url(url).group('type', 'id', 'slug')
webpage = self._download_webpage(url, video_id)
if video_type == 'player':
real_url = self._og_search_url(webpage)
if not self.suitable(real_url) or self._match_valid_url(real_url).group('type') == 'player':
raise UnsupportedError(real_url)
return self.url_result(real_url, self.ie_key())
upload_date = None
date_elements = traverse_obj(webpage, (
{find_element(cls='article-item__date')}, {clean_html}, filter, {str.split}))
if date_elements and len(date_elements) == 3:
day, month, year = date_elements
day = int_or_none(day.rstrip('.'))
month = month_by_name(month, 'is')
if day and month and re.fullmatch(r'[0-9]{4}', year):
upload_date = f'{year}{month:02d}{day:02d}'
player = self._search_json(
r'App\.Player\.Init\(', webpage, video_id, 'player', transform_source=js_to_json)
m3u8_url = traverse_obj(player, ('File', {urljoin('https://vod.visir.is/')}))
return {
'id': video_id,
'display_id': display_id,
'formats': self._extract_m3u8_formats(m3u8_url, video_id, 'mp4'),
'upload_date': upload_date,
**traverse_obj(webpage, ({find_element(cls='article-item press-ads')}, {
'description': ({find_element(cls='-large')}, {clean_html}, filter),
'view_count': ({find_element(cls='article-item__viewcount')}, {clean_html}, {int_or_none}),
})),
**traverse_obj(player, {
'title': ('Title', {clean_html}),
'categories': ('Categoryname', {clean_html}, filter, all, filter),
'duration': ('MediaDuration', {int_or_none}),
'thumbnail': ('Image', {url_or_none}),
}),
}

View File

@@ -75,6 +75,9 @@ MONTH_NAMES = {
'fr': [
'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
'is': [
'janúar', 'febrúar', 'mars', 'apríl', 'maí', 'júní',
'júlí', 'ágúst', 'september', 'október', 'nóvember', 'desember'],
# these follow the genitive grammatical case (dopełniacz)
# some websites might be using nominative, which will require another month list
# https://en.wikibooks.org/wiki/Polish/Noun_cases