fix: rebuild formats from yt-dlp data

makes yattee and freetube work
This commit is contained in:
2025-06-27 23:02:57 +02:00
parent 837567f8c8
commit 4e066e4b23
3 changed files with 125 additions and 8 deletions

View File

@@ -105,7 +105,7 @@ def hot(data):
# try to get the data
try:
started = time.time()
extracted_dict = ythdd_extractor.extract(url_lookup[data[1]] + videoId, getcomments=getcomments, maxcomments=comment_count)
extracted_dict = ythdd_extractor.extract(url_lookup[data[1]] + videoId, getcomments=getcomments, maxcomments=comment_count, manifest_fix=True)
extracted_dict["took"] = time.time() - started
return 200, "OK", extracted_dict
except Exception as e:

View File

@@ -13,6 +13,11 @@ ytdl_opts = {
"default": "%(id)s.%(ext)s",
"chapter": "%(id)s.%(ext)s_%(section_number)03d_%(section_title)s.%(ext)s"
},
"extractor_args": {
"youtube": {
"formats": ["dashy"]
}
},
"simulate": True
}
@@ -123,7 +128,7 @@ web_context_dict = {
}
}
def extract(url: str, getcomments=False, maxcomments=""):
def extract(url: str, getcomments=False, maxcomments="", manifest_fix=False):
# TODO: check user-agent and cookiefile
if ythdd_globals.config['extractor']['user-agent']:
@@ -137,9 +142,12 @@ def extract(url: str, getcomments=False, maxcomments=""):
if getcomments:
ytdl_opts['getcomments'] = True
if maxcomments:
ytdl_opts['extractor_args'] = {'youtube': {'max_comments': [maxcomments, "all", "all", "all"]}}
ytdl_opts['extractor_args']['youtube']['max_comments'] = [maxcomments, "all", "all", "all"]
if manifest_fix:
# https://github.com/yt-dlp/yt-dlp/issues/11952#issuecomment-2565802294
ytdl_opts['extractor_args']['youtube']['player_client'] = ['default', 'web_safari']
with yt_dlp.YoutubeDL(ytdl_opts) as ytdl:
result = ytdl.extract_info(url, download=False)
result = ytdl.sanitize_info(ytdl.extract_info(url, download=False))
return result
def WEBrelated(url: str):
@@ -183,6 +191,19 @@ def WEBextractSinglePage(uri: str):
return {'ec1': extracted_json1, 'ec2': extracted_json2, 'took': end_time - start_time}
def paramsFromUrl(url: str) -> dict:
# Returns a dictionary of params from a given URL.
split_list = url.split("&")
params = {}
for num, string in enumerate(split_list):
if num == 0:
string = string[string.find("?") + 1:]
key, value = string.split("=")
params[key] = value
return params
def IOSextract(uri: str):
start = time.time()

View File

@@ -227,6 +227,94 @@ def rebuildFormats(data):
return result, formatStreams
def rebuildFormatsFromYtdlpApi(ydata: dict):
# Rebuild invidious-compatible formats from yt-dlp's output (ydata)
adaptive_formats = []
format_streams = []
for stream in safeTraverse(ydata, ["formats"], default=[]):
if safeTraverse(stream, ["protocol"], default="storyboard") not in ("http_dash_segments"):
continue
newRow = {}
# Add from ...'s ... to ... as ...
newRow["bitrate"] = str(int(safeTraverse(stream, ["tbr"], default=0) * 1000))
newRow["url"] = safeTraverse(stream, ["url"])
newRow["itag"] = safeTraverse(stream, ["format_id"])
params = ythdd_extractor.paramsFromUrl(newRow["url"])
vcodec = safeTraverse(stream, ["vcodec"], default="none")
acodec = safeTraverse(stream, ["acodec"], default="none")
if vcodec == "none" and acodec == "none":
continue
if safeTraverse(stream, ["acodec"]) != "none":
# audio-only track
type = safeTraverse(stream, ["audio_ext"], default=None)
if type is None:
type = "mp4"
newRow[ "type"] = "audio/" + type
newRow[ "audioQuality"] = str(safeTraverse(stream, ["abr"], default=128))
newRow["audioSampleRate"] = str(safeTraverse(stream, ["asr"], default=44100))
newRow[ "audioChannels"] = safeTraverse(stream, ["audio_channels"])
newRow[ "qualityLabel"] = newRow["audioQuality"] + "kbps"
if safeTraverse(stream, ["vcodec"]) != "none":
# either video-only or video+audio
type = safeTraverse(stream, ["video_ext"], default=None)
if type is None:
type = "mp4"
height = str(safeTraverse(stream, ["height"], default=0))
width = str(safeTraverse(stream, [ "width"], default=0))
newRow[ "type"] = "video/" + type
newRow[ "resolution"] = height + "p"
newRow[ "fps"] = safeTraverse(stream, ["fps"], default=30)
newRow[ "qualityLabel"] = height + "p" + str(int(newRow['fps'])) * (newRow["fps"] > 30) # also a placeholder
newRow[ "size"] = width + "x" + height
newRow["clen"] = safeTraverse(params, ["clen"], default=safeTraverse(stream, ["filesize"], default="0"))
newRow["lmt"] = safeTraverse(params, ["lmt"], default="0")
if vcodec != "none" and acodec != "none":
# 360p stream
format_streams.append(newRow.copy())
if vcodec != "none" or acodec != "none":
adaptive_formats.append(newRow.copy())
# {
# "url": data[best_bitrate_video]['url'],
# "itag": str(data[best_bitrate_video]['itag']),
# "type": data[best_bitrate_video]['mimeType'],
# "quality": data[best_bitrate_video]['quality'],
# "bitrate": str(data[best_bitrate_video]['averageBitrate']),
# "fps": data[best_bitrate_video]['fps'],
# "size": "", # workaround for clipious, which requires ANYTHING to be passed, or else it will throw and error and won't load the video
# "resolution": str(invidious_formats.FORMATS[data[best_bitrate_video]['itag']]['height']) + "p",
# "qualityLabel": str(invidious_formats.FORMATS[data[best_bitrate_video]['itag']]['height']) + "p",
# "container": invidious_formats.FORMATS[data[best_bitrate_video]['itag']]['ext'],
# "encoding": invidious_formats.FORMATS[data[best_bitrate_video]['itag']]['vcodec']
# }
# {
# "audioChannels": data[best_bitrate_audio]['audioChannels'],
# "init": result[best_bitrate_audio]['init'],
# "index": result[best_bitrate_audio]['index'],
# "bitrate": str(data[best_bitrate_audio]['averageBitrate']),
# "url": data[best_bitrate_audio]['url'],
# "itag": str(data[best_bitrate_audio]['itag']),
# "type": data[best_bitrate_audio]['mimeType'],
# "clen": result[best_bitrate_audio]['clen'],
# "lmt": result[best_bitrate_audio]['lmt'],
# "projectionType": result[best_bitrate_audio]['projectionType'],
# "audioQuality": result[best_bitrate_audio]['audioQuality'],
# "audioSampleRate": result[best_bitrate_audio]['audioSampleRate'],
# "qualityLabel": "audio"
# }
return adaptive_formats, format_streams
def videos(data):
# an attempt on a faithful rewrite of
# https://github.com/iv-org/invidious/blob/master/src/invidious/videos/parser.cr
@@ -247,7 +335,7 @@ def videos(data):
time_start = time()
ydata = ythdd_extractor.extract(data[3])
ydata = ythdd_extractor.extract(data[3], manifest_fix=True)
wdata = ythdd_extractor.WEBextractSinglePage(data[3])
#return send(200, {'ydata': ydata, 'wdata': wdata})
@@ -309,7 +397,9 @@ def videos(data):
related_video['viewCountText'] = safeTraverse(y, ['shortViewCountText', 'simpleText'], default="0").split(" ")[0]
related_views = 0
if related_views_text:
related_views = int("".join([z for z in related_views_text if 48 <= ord(z) and ord(z) <= 57]))
if related_views_text.lower() == "no":
related_views_text = "0"
related_views = int("0" + "".join([z for z in related_views_text if 48 <= ord(z) and ord(z) <= 57]))
related_views_text = related_views_text.split(" ")[0]
related_video['viewCount'] = related_views
related.append(related_video)
@@ -346,7 +436,9 @@ def videos(data):
# subs *= magnitude[x]
subs = ydata['channel_follower_count']
channel_about_info = ythdd_extractor.browseAbout(ucid)
author_thumbnail = ythdd_extractor.getChannelAvatar(channel_about_info)
author_thumbnail = [ # must be a list
ythdd_extractor.getChannelAvatar(channel_about_info)
] * 3 # yes really
# for x in author_thumbnail:
# # rewrite to use views.py
# x['url'] = ythdd_globals.translateLinks(x['url'])
@@ -357,6 +449,7 @@ def videos(data):
format_streams = []
# adaptive_formats, format_streams = rebuildFormats(adaptive_formats)
adaptive_formats, format_streams = rebuildFormatsFromYtdlpApi(ydata)
if live_now:
video_type = "livestream"
@@ -417,7 +510,7 @@ def videos(data):
"premiereTimestamp": premiere_timestamp,
#"hlsUrl": hls_url, # broken after a change in iOS player
#"adaptiveFormats": adaptive_formats, # same as hlsUrl
"adaptiveFormats": adaptive_formats, # same as hlsUrl
"formatStreams": format_streams, # very bare bones, empty actually xD
"captions": [], # not implemented
# "captions": [
@@ -440,6 +533,9 @@ def videos(data):
}
#'''
if ythdd_globals.config['general']['debug']:
response["ydata"] = ydata
# for debugging:
#return send(200, ythdd_extractor.WEBextractSinglePage(data[3]))
#return send(200, ythdd_extractor.IOSextract(data[3]))