From 4e066e4b23899c1770c1a0d10fff21a73c99f400 Mon Sep 17 00:00:00 2001 From: sherl Date: Fri, 27 Jun 2025 23:02:57 +0200 Subject: [PATCH] fix: rebuild formats from yt-dlp data makes yattee and freetube work --- ythdd_api_v1.py | 2 +- ythdd_extractor.py | 27 ++++++++++-- ythdd_inv_tl.py | 104 +++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 125 insertions(+), 8 deletions(-) diff --git a/ythdd_api_v1.py b/ythdd_api_v1.py index 8b7b71e..516f54a 100644 --- a/ythdd_api_v1.py +++ b/ythdd_api_v1.py @@ -105,7 +105,7 @@ def hot(data): # try to get the data try: started = time.time() - extracted_dict = ythdd_extractor.extract(url_lookup[data[1]] + videoId, getcomments=getcomments, maxcomments=comment_count) + extracted_dict = ythdd_extractor.extract(url_lookup[data[1]] + videoId, getcomments=getcomments, maxcomments=comment_count, manifest_fix=True) extracted_dict["took"] = time.time() - started return 200, "OK", extracted_dict except Exception as e: diff --git a/ythdd_extractor.py b/ythdd_extractor.py index 630a7b0..4cd0e17 100644 --- a/ythdd_extractor.py +++ b/ythdd_extractor.py @@ -13,6 +13,11 @@ ytdl_opts = { "default": "%(id)s.%(ext)s", "chapter": "%(id)s.%(ext)s_%(section_number)03d_%(section_title)s.%(ext)s" }, + "extractor_args": { + "youtube": { + "formats": ["dashy"] + } + }, "simulate": True } @@ -123,7 +128,7 @@ web_context_dict = { } } -def extract(url: str, getcomments=False, maxcomments=""): +def extract(url: str, getcomments=False, maxcomments="", manifest_fix=False): # TODO: check user-agent and cookiefile if ythdd_globals.config['extractor']['user-agent']: @@ -137,9 +142,12 @@ def extract(url: str, getcomments=False, maxcomments=""): if getcomments: ytdl_opts['getcomments'] = True if maxcomments: - ytdl_opts['extractor_args'] = {'youtube': {'max_comments': [maxcomments, "all", "all", "all"]}} + ytdl_opts['extractor_args']['youtube']['max_comments'] = [maxcomments, "all", "all", "all"] + if manifest_fix: + # https://github.com/yt-dlp/yt-dlp/issues/11952#issuecomment-2565802294 + ytdl_opts['extractor_args']['youtube']['player_client'] = ['default', 'web_safari'] with yt_dlp.YoutubeDL(ytdl_opts) as ytdl: - result = ytdl.extract_info(url, download=False) + result = ytdl.sanitize_info(ytdl.extract_info(url, download=False)) return result def WEBrelated(url: str): @@ -183,6 +191,19 @@ def WEBextractSinglePage(uri: str): return {'ec1': extracted_json1, 'ec2': extracted_json2, 'took': end_time - start_time} +def paramsFromUrl(url: str) -> dict: + # Returns a dictionary of params from a given URL. + split_list = url.split("&") + params = {} + + for num, string in enumerate(split_list): + if num == 0: + string = string[string.find("?") + 1:] + key, value = string.split("=") + params[key] = value + + return params + def IOSextract(uri: str): start = time.time() diff --git a/ythdd_inv_tl.py b/ythdd_inv_tl.py index 8c502ab..4160906 100644 --- a/ythdd_inv_tl.py +++ b/ythdd_inv_tl.py @@ -227,6 +227,94 @@ def rebuildFormats(data): return result, formatStreams +def rebuildFormatsFromYtdlpApi(ydata: dict): + # Rebuild invidious-compatible formats from yt-dlp's output (ydata) + + adaptive_formats = [] + format_streams = [] + + for stream in safeTraverse(ydata, ["formats"], default=[]): + + if safeTraverse(stream, ["protocol"], default="storyboard") not in ("http_dash_segments"): + continue + + newRow = {} + # Add from ...'s ... to ... as ... + newRow["bitrate"] = str(int(safeTraverse(stream, ["tbr"], default=0) * 1000)) + newRow["url"] = safeTraverse(stream, ["url"]) + newRow["itag"] = safeTraverse(stream, ["format_id"]) + params = ythdd_extractor.paramsFromUrl(newRow["url"]) + + vcodec = safeTraverse(stream, ["vcodec"], default="none") + acodec = safeTraverse(stream, ["acodec"], default="none") + if vcodec == "none" and acodec == "none": + continue + + if safeTraverse(stream, ["acodec"]) != "none": + # audio-only track + type = safeTraverse(stream, ["audio_ext"], default=None) + if type is None: + type = "mp4" + newRow[ "type"] = "audio/" + type + newRow[ "audioQuality"] = str(safeTraverse(stream, ["abr"], default=128)) + newRow["audioSampleRate"] = str(safeTraverse(stream, ["asr"], default=44100)) + newRow[ "audioChannels"] = safeTraverse(stream, ["audio_channels"]) + newRow[ "qualityLabel"] = newRow["audioQuality"] + "kbps" + if safeTraverse(stream, ["vcodec"]) != "none": + # either video-only or video+audio + type = safeTraverse(stream, ["video_ext"], default=None) + if type is None: + type = "mp4" + height = str(safeTraverse(stream, ["height"], default=0)) + width = str(safeTraverse(stream, [ "width"], default=0)) + newRow[ "type"] = "video/" + type + newRow[ "resolution"] = height + "p" + newRow[ "fps"] = safeTraverse(stream, ["fps"], default=30) + newRow[ "qualityLabel"] = height + "p" + str(int(newRow['fps'])) * (newRow["fps"] > 30) # also a placeholder + newRow[ "size"] = width + "x" + height + + newRow["clen"] = safeTraverse(params, ["clen"], default=safeTraverse(stream, ["filesize"], default="0")) + newRow["lmt"] = safeTraverse(params, ["lmt"], default="0") + + if vcodec != "none" and acodec != "none": + # 360p stream + format_streams.append(newRow.copy()) + if vcodec != "none" or acodec != "none": + adaptive_formats.append(newRow.copy()) + + + # { + # "url": data[best_bitrate_video]['url'], + # "itag": str(data[best_bitrate_video]['itag']), + # "type": data[best_bitrate_video]['mimeType'], + # "quality": data[best_bitrate_video]['quality'], + # "bitrate": str(data[best_bitrate_video]['averageBitrate']), + # "fps": data[best_bitrate_video]['fps'], + # "size": "", # workaround for clipious, which requires ANYTHING to be passed, or else it will throw and error and won't load the video + # "resolution": str(invidious_formats.FORMATS[data[best_bitrate_video]['itag']]['height']) + "p", + # "qualityLabel": str(invidious_formats.FORMATS[data[best_bitrate_video]['itag']]['height']) + "p", + # "container": invidious_formats.FORMATS[data[best_bitrate_video]['itag']]['ext'], + # "encoding": invidious_formats.FORMATS[data[best_bitrate_video]['itag']]['vcodec'] + # } + + # { + # "audioChannels": data[best_bitrate_audio]['audioChannels'], + # "init": result[best_bitrate_audio]['init'], + # "index": result[best_bitrate_audio]['index'], + # "bitrate": str(data[best_bitrate_audio]['averageBitrate']), + # "url": data[best_bitrate_audio]['url'], + # "itag": str(data[best_bitrate_audio]['itag']), + # "type": data[best_bitrate_audio]['mimeType'], + # "clen": result[best_bitrate_audio]['clen'], + # "lmt": result[best_bitrate_audio]['lmt'], + # "projectionType": result[best_bitrate_audio]['projectionType'], + # "audioQuality": result[best_bitrate_audio]['audioQuality'], + # "audioSampleRate": result[best_bitrate_audio]['audioSampleRate'], + # "qualityLabel": "audio" + # } + + return adaptive_formats, format_streams + def videos(data): # an attempt on a faithful rewrite of # https://github.com/iv-org/invidious/blob/master/src/invidious/videos/parser.cr @@ -247,7 +335,7 @@ def videos(data): time_start = time() - ydata = ythdd_extractor.extract(data[3]) + ydata = ythdd_extractor.extract(data[3], manifest_fix=True) wdata = ythdd_extractor.WEBextractSinglePage(data[3]) #return send(200, {'ydata': ydata, 'wdata': wdata}) @@ -309,7 +397,9 @@ def videos(data): related_video['viewCountText'] = safeTraverse(y, ['shortViewCountText', 'simpleText'], default="0").split(" ")[0] related_views = 0 if related_views_text: - related_views = int("".join([z for z in related_views_text if 48 <= ord(z) and ord(z) <= 57])) + if related_views_text.lower() == "no": + related_views_text = "0" + related_views = int("0" + "".join([z for z in related_views_text if 48 <= ord(z) and ord(z) <= 57])) related_views_text = related_views_text.split(" ")[0] related_video['viewCount'] = related_views related.append(related_video) @@ -346,7 +436,9 @@ def videos(data): # subs *= magnitude[x] subs = ydata['channel_follower_count'] channel_about_info = ythdd_extractor.browseAbout(ucid) - author_thumbnail = ythdd_extractor.getChannelAvatar(channel_about_info) + author_thumbnail = [ # must be a list + ythdd_extractor.getChannelAvatar(channel_about_info) + ] * 3 # yes really # for x in author_thumbnail: # # rewrite to use views.py # x['url'] = ythdd_globals.translateLinks(x['url']) @@ -357,6 +449,7 @@ def videos(data): format_streams = [] # adaptive_formats, format_streams = rebuildFormats(adaptive_formats) + adaptive_formats, format_streams = rebuildFormatsFromYtdlpApi(ydata) if live_now: video_type = "livestream" @@ -417,7 +510,7 @@ def videos(data): "premiereTimestamp": premiere_timestamp, #"hlsUrl": hls_url, # broken after a change in iOS player - #"adaptiveFormats": adaptive_formats, # same as hlsUrl + "adaptiveFormats": adaptive_formats, # same as hlsUrl "formatStreams": format_streams, # very bare bones, empty actually xD "captions": [], # not implemented # "captions": [ @@ -440,6 +533,9 @@ def videos(data): } #''' + if ythdd_globals.config['general']['debug']: + response["ydata"] = ydata + # for debugging: #return send(200, ythdd_extractor.WEBextractSinglePage(data[3])) #return send(200, ythdd_extractor.IOSextract(data[3]))