diff --git a/ythdd_extractor.py b/ythdd_extractor.py index 804c2e4..1225744 100644 --- a/ythdd_extractor.py +++ b/ythdd_extractor.py @@ -82,7 +82,7 @@ stage3_headers = { "Sec-Fetch-Mode": "navigate", "Content-Type": "application/json", "X-Youtube-Client-Name": "1", - "X-Youtube-Client-Version": "2.20251014.01.00", + "X-Youtube-Client-Version": "2.20251103.01.00", "Origin": "https://www.youtube.com", "Accept-Encoding": "gzip, deflate, br", "Cookie": "PREF=hl=en&tz=UTC; SOCS=CAI" @@ -94,7 +94,7 @@ stage3_body = { "client": { "clientName": "WEB", - "clientVersion": "2.20251014.01.00", + "clientVersion": "2.20251103.01.00", "hl": "en", "timeZone": "UTC", "utcOffsetMinutes": 0 @@ -114,7 +114,7 @@ web_context_dict = { 'deviceModel': '', 'userAgent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:144.0) Gecko/20100101 Firefox/144.0,gzip(gfe)', 'clientName': 'WEB', - 'clientVersion': '2.20251030.01.00', + 'clientVersion': '2.20251103.01.00', 'osName': 'Windows', 'osVersion': '10.0', 'screenPixelDensity': 2, diff --git a/ythdd_inv_tl.py b/ythdd_inv_tl.py index 37d0996..ba8c9b3 100644 --- a/ythdd_inv_tl.py +++ b/ythdd_inv_tl.py @@ -135,215 +135,6 @@ def getError(wdata: dict): return error -def rebuildFormats(data): - result = [{} for x in data] - formatStreams = [] - best_bitrate_video = 0 - best_bitrate_audio = -1 - - for x in range(len(data)): - - try: - result[x]['audioChannels'] = data[x]['audioChannels'] - isVideo = 0 - except: - isVideo = 1 - - if not "initRange" in data[x]: # for livestreams? - continue - - result[x]['init'] = str(data[x]['initRange']['start']) + "-" + str(data[x]['initRange']['end']) - result[x]['index'] = str(data[x]['indexRange']['start']) + "-" + str(data[x]['indexRange']['end']) - result[x]['bitrate'] = str(data[x]['averageBitrate']) - result[x]['url'] = data[x]['url'] - result[x]['itag'] = str(data[x]['itag']) - result[x]['type'] = data[x]['mimeType'] - result[x]['clen'] = data[x]['contentLength'] - result[x]['lmt'] = data[x]['lastModified'] - result[x]['projectionType'] = data[x]['projectionType'] - try: - result[x]['colorInfo'] = data[x]['colorInfo'] - except: - pass - - if "audio" == data[x]['mimeType'][:5]: - isAudio = 1 - else: - isAudio = 0 - - if isVideo: - result[x]['fps'] = str(data[x]['fps']) - else: - result[x]['audioQuality'] = data[x]['audioQuality'] - result[x]['audioSampleRate'] = data[x]['audioSampleRate'] - - if data[x]['itag'] in invidious_formats.FORMATS.keys(): - result[x]['container'] = invidious_formats.FORMATS[data[x]['itag']]['ext'] - try: - result[x]['encoding'] = invidious_formats.FORMATS[data[x]['itag']]['vcodec'] - except: - result[x]['encoding'] = invidious_formats.FORMATS[data[x]['itag']]['acodec'] - - if isVideo: - try: - result[x]['resolution'] = str(invidious_formats.FORMATS[data[x]['itag']]['height']) + "p" - result[x]['qualityLabel'] = str(invidious_formats.FORMATS[data[x]['itag']]['height']) + "p" + str(result[x]['fps']) * (data[x]['fps'] > 30) # NOT IMPLEMENTED, that's just a placeholder - result[x]['size'] = str(invidious_formats.FORMATS[data[x]['itag']]['width']) + "x" + str(invidious_formats.FORMATS[data[x]['itag']]['height']) - except: - pass - - # we assume here that a stream with the highest bitrate must be a video stream- that may not be the case - if data[x]['averageBitrate'] > data[best_bitrate_video]['averageBitrate'] and isVideo: - best_bitrate_video = x - if data[x]['averageBitrate'] > data[best_bitrate_audio]['averageBitrate'] and isAudio: - best_bitrate_audio = x - - # makes FreeTube work, unfortunately it's a video-only stream - formatStreams = [ - { - "url": data[best_bitrate_video]['url'], - "itag": str(data[best_bitrate_video]['itag']), - "type": data[best_bitrate_video]['mimeType'], - "quality": data[best_bitrate_video]['quality'], - "bitrate": str(data[best_bitrate_video]['averageBitrate']), - "fps": data[best_bitrate_video]['fps'], - "size": "", # workaround for clipious, which requires ANYTHING to be passed, or else it will throw and error and won't load the video - "resolution": str(invidious_formats.FORMATS[data[best_bitrate_video]['itag']]['height']) + "p", - "qualityLabel": str(invidious_formats.FORMATS[data[best_bitrate_video]['itag']]['height']) + "p", - "container": invidious_formats.FORMATS[data[best_bitrate_video]['itag']]['ext'], - "encoding": invidious_formats.FORMATS[data[best_bitrate_video]['itag']]['vcodec'] - }, - # { - # "audioChannels": data[best_bitrate_audio]['audioChannels'], - # "init": result[best_bitrate_audio]['init'], - # "index": result[best_bitrate_audio]['index'], - # "bitrate": str(data[best_bitrate_audio]['averageBitrate']), - # "url": data[best_bitrate_audio]['url'], - # "itag": str(data[best_bitrate_audio]['itag']), - # "type": data[best_bitrate_audio]['mimeType'], - # "clen": result[best_bitrate_audio]['clen'], - # "lmt": result[best_bitrate_audio]['lmt'], - # "projectionType": result[best_bitrate_audio]['projectionType'], - # "audioQuality": result[best_bitrate_audio]['audioQuality'], - # "audioSampleRate": result[best_bitrate_audio]['audioSampleRate'], - # "qualityLabel": "audio" - # } - ] - - # not all itags have width and/or height - try: - formatStreams[0]["size"] = str(invidious_formats.FORMATS[data[best_bitrate]['itag']]['width']) + "x" + str(invidious_formats.FORMATS[data[best_bitrate]['itag']]['height']) - except: - pass - - return result, formatStreams - -def rebuildFormatsFromYtdlpApi(ydata: dict): - # Rebuild invidious-compatible formats from yt-dlp's output (ydata) - - adaptive_formats = [] - format_streams = [] - - for stream in safeTraverse(ydata, ["formats"], default=[]): - - if safeTraverse(stream, ["protocol"], default="storyboard") not in ("http_dash_segments", "https"): - continue - - newRow = {} - # Add from ...'s ... to ... as ... - newRow["bitrate"] = str(int(safeTraverse(stream, ["tbr"], default=0) * 1000)) - newRow["url"] = safeTraverse(stream, ["url"]) - newRow["itag"] = safeTraverse(stream, ["format_id"]) - params = ythdd_extractor.paramsFromUrl(newRow["url"]) - - vcodec = safeTraverse(stream, ["vcodec"], default="none") - acodec = safeTraverse(stream, ["acodec"], default="none") - if vcodec == "none" and acodec == "none": - continue - - if safeTraverse(stream, ["acodec"]) != "none": - # audio-only track - type = safeTraverse(stream, ["audio_ext"], default=None) - fnote = safeTraverse(stream, ["format_note"], default="low") - if type is None: - type = "mp4" - abr = safeTraverse(stream, ["abr"], default="0") - if abr is None: - abr = "0" - newRow[ "type"] = "audio/" + type - newRow[ "audioQuality"] = fnote - newRow["audioSampleRate"] = int(safeTraverse(stream, ["asr"], default="44100")) - newRow[ "audioChannels"] = int(safeTraverse(stream, ["audio_channels"])) - newRow[ "qualityLabel"] = str(int(abr)) + "k (audio)" - newRow[ "resolution"] = f"{fnote} quality" - newRow[ "size"] = "0x0" - if safeTraverse(stream, ["vcodec"]) != "none": - # either video-only or video+audio - type = safeTraverse(stream, ["video_ext"], default=None) - if type is None: - type = "mp4" - height = str(safeTraverse(stream, ["height"], default=0)) - width = str(safeTraverse(stream, [ "width"], default=0)) - newRow[ "type"] = "video/" + type - newRow[ "resolution"] = (height if height in ("144", "240", "360", "480", "720", "1080") else "360") + "p" # mpv won't play the video inside of Yattee if it's a non-standard resolution (bug?) - newRow[ "fps"] = safeTraverse(stream, ["fps"], default=30) - newRow[ "qualityLabel"] = height + "p" + str(int(newRow['fps'])) * (newRow["fps"] > 30) + " (video)" # also a placeholder - newRow[ "size"] = width + "x" + height - - newRow[ "clen"] = safeTraverse(params, ["clen"], default=safeTraverse(stream, ["filesize"], default="0")) - newRow[ "lmt"] = safeTraverse(params, ["lmt"], default="0") - - if newRow["clen"] is None: - # for clipious sake which expects a string - newRow["clen"] = "0" - - newRow[ "projectionType"] = "RECTANGULAR" # clipious requires this to be mentioned explicitly and cannot be nulled - newRow[ "container"] = safeTraverse(stream, ["ext"], default="unknown_container") - newRow[ "encoding"] = safeTraverse(invidious_formats.FORMATS, [int("0" + newRow["itag"].split("-")[0]), "ext"], default="unknown_encoding") # not sure this split is necessary - newRow[ "quality"] = newRow["qualityLabel"] - newRow[ "init"] = "0-1" # dummy values - newRow[ "index"] = "2-3" # dummy values - - if vcodec != "none" and acodec != "none": - # 360p stream - newRow["qualityLabel"] = height + "p" + str(int(newRow['fps'])) * (newRow["fps"] > 30) - format_streams.append(newRow) - if vcodec != "none" or acodec != "none": - adaptive_formats.append(newRow) - - - # { - # "url": data[best_bitrate_video]['url'], - # "itag": str(data[best_bitrate_video]['itag']), - # "type": data[best_bitrate_video]['mimeType'], - # "quality": data[best_bitrate_video]['quality'], - # "bitrate": str(data[best_bitrate_video]['averageBitrate']), - # "fps": data[best_bitrate_video]['fps'], - # "size": "", # workaround for clipious, which requires ANYTHING to be passed, or else it will throw and error and won't load the video - # "resolution": str(invidious_formats.FORMATS[data[best_bitrate_video]['itag']]['height']) + "p", - # "qualityLabel": str(invidious_formats.FORMATS[data[best_bitrate_video]['itag']]['height']) + "p", - # "container": invidious_formats.FORMATS[data[best_bitrate_video]['itag']]['ext'], - # "encoding": invidious_formats.FORMATS[data[best_bitrate_video]['itag']]['vcodec'] - # } - - # { - # "audioChannels": data[best_bitrate_audio]['audioChannels'], - # "init": result[best_bitrate_audio]['init'], - # "index": result[best_bitrate_audio]['index'], - # "bitrate": str(data[best_bitrate_audio]['averageBitrate']), - # "url": data[best_bitrate_audio]['url'], - # "itag": str(data[best_bitrate_audio]['itag']), - # "type": data[best_bitrate_audio]['mimeType'], - # "clen": result[best_bitrate_audio]['clen'], - # "lmt": result[best_bitrate_audio]['lmt'], - # "projectionType": result[best_bitrate_audio]['projectionType'], - # "audioQuality": result[best_bitrate_audio]['audioQuality'], - # "audioSampleRate": result[best_bitrate_audio]['audioSampleRate'], - # "qualityLabel": "audio" - # } - - return adaptive_formats, format_streams - def videos(data): # an attempt on a faithful rewrite of # https://github.com/iv-org/invidious/blob/master/src/invidious/videos/parser.cr @@ -469,10 +260,43 @@ def videos(data): author_verified = author_verified or safeTraverse(livm, [0, "listItemViewModel", "title", "attachmentRuns", 0, "element", "type", "imageType", "image", "sources", 0, "clientResource", "imageName"]) in ("AUDIO_BADGE", "CHECK_CIRCLE_FILLED") author_thumbnail = ythdd_extractor.generateChannelAvatarsFromUrl(author_thumbnail) + wdata_streams = safeTraverse(wdata, ["ec1", "streamingData"], default=[]) + adaptive_formats = [] format_streams = [] # adaptive_formats, format_streams = rebuildFormats(adaptive_formats) if not live_now: - adaptive_formats, format_streams = rebuildFormatsFromYtdlpApi(ydata) + # adaptive_formats, format_streams = rebuildFormatsFromYtdlpApi(ydata) + + initial_astreams_y = {} # itag is the key + initial_fstreams_y = {} # same here + initial_astreams_w = {} + initial_fstreams_w = {} + + for video_stream in ydata["formats"]: + if video_stream["format_note"] in ("storyboard"): + # ignore non-audio/video formats (e.g. storyboards) + continue + if video_stream["format_id"] == "18": # todo: do this dynamically + initial_fstreams_y[int(video_stream["format_id"])] = video_stream + else: + initial_astreams_y[int(video_stream["format_id"])] = video_stream + + # format streams + for video_stream in wdata_streams["formats"]: + initial_fstreams_w[video_stream["itag"]] = video_stream + + # adaptive streams + for video_stream in wdata_streams["adaptiveFormats"]: + initial_astreams_w[video_stream["itag"]] = video_stream + + for itag in initial_astreams_y: + if itag in initial_astreams_w: + adaptive_formats.append(ythdd_struct_parser.parseAdaptiveStreams(initial_astreams_w[itag], initial_astreams_y[itag])) + + for itag in initial_fstreams_y: + if itag in initial_fstreams_w: + format_streams.append( ythdd_struct_parser.parseFormatStreams( initial_fstreams_w[itag], initial_fstreams_y[itag])) + hls_url = None else: adaptive_formats, format_streams = [{"url": f"http://a/?expire={int(time_start + 5.9 * 60 * 60)}", "itag": "18", "type": "", "clen": "0", "lmt": "", "projectionType": "RECTANGULAR"}], [] # freetube/clipious shenanigans, see: https://github.com/FreeTubeApp/FreeTube/pull/5997 and https://github.com/lamarios/clipious/blob/b9e7885/lib/videos/models/adaptive_format.g.dart diff --git a/ythdd_struct_parser.py b/ythdd_struct_parser.py index d49b074..6430f1d 100644 --- a/ythdd_struct_parser.py +++ b/ythdd_struct_parser.py @@ -1,5 +1,6 @@ -from ythdd_globals import safeTraverse from html import escape +from invidious_formats import FORMATS +from ythdd_globals import safeTraverse import json import dateparser import ythdd_globals @@ -516,3 +517,76 @@ def extractTextFromSimpleOrRuns(obj: dict, default: str = "") -> str: else: print(f"error(extractTextFromSimpleOrRuns): text extraction failed for {obj}") return text + + +def parseFormatStreams(wdata_fstream: dict, ydata_stream: dict) -> dict: + + try: + stream_url = ydata_stream["url"] + except: + ythdd_globals.print_debug( "could not extract format stream URL from yt-dlp response:") + ythdd_globals.print_debug(f"wdata: {wdata_fstream}") + ythdd_globals.print_debug(f"ydata: {ydata_stream}") + + fstream = { + "url": stream_url, + "itag": wdata_fstream["itag"], + "type": wdata_fstream["mimeType"], + "quality": wdata_fstream["quality"], + "bitrate": str(wdata_fstream["bitrate"]), + "fps": wdata_fstream["fps"], + "size": f"{wdata_fstream['width']}x{wdata_fstream['height']}", + "resolution": f"{wdata_fstream['height'] if wdata_fstream['height'] in (144, 240, 360, 720, 1080, 2160) else 360}p", + "qualityLabel": wdata_fstream["qualityLabel"], + "container": safeTraverse(FORMATS.get(wdata_fstream["itag"]), [ "ext"], default="mp4"), # invidious_formats + "encoding": safeTraverse(FORMATS.get(wdata_fstream["itag"]), ["vcodec"], default="mp4") # invidious_formats + } + + + return fstream + +def parseAdaptiveStreams(wdata_astream: dict, ydata_stream: dict) -> dict: + + try: + stream_url = ydata_stream["url"] + except: + ythdd_globals.print_debug( "could not extract adaptive stream URL from yt-dlp response:") + ythdd_globals.print_debug(f"wdata: {wdata_fstream}") + ythdd_globals.print_debug(f"ydata: {ydata_stream}") + + astream_common = { + "init": f"{wdata_astream[ 'initRange']['start']}-{wdata_astream[ 'initRange']['end']}", + "index": f"{wdata_astream['indexRange']['start']}-{wdata_astream['indexRange']['end']}", + "bitrate": str(wdata_astream["bitrate"]), + "url": stream_url, + "itag": str(wdata_astream["itag"]), + "type": wdata_astream["mimeType"], + "clen": wdata_astream["contentLength"], + "lmt": wdata_astream["lastModified"], + "projectionType": wdata_astream["projectionType"], + "container": safeTraverse(FORMATS.get(wdata_astream["itag"]), [ "ext"], default="mp4"), # invidious_formats, + "encoding": safeTraverse(FORMATS.get(wdata_astream["itag"]), ["vcodec"], default="mp4") # invidious_formats, + } + + isVideo = True + if "audioQuality" in wdata_astream: + isVideo = False + + if isVideo: + astream = astream_common + # video-specific metadata + astream["fps"] = wdata_astream["fps"] + astream["size"] = f"{wdata_astream['width']}x{wdata_astream['height']}" + astream["resolution"] = f"{wdata_astream['height'] if wdata_astream['height'] in (144, 240, 360, 720, 1080, 2160) else 360}p" + astream["qualityLabel"] = wdata_astream["qualityLabel"] + astream["colorInfo"] = safeTraverse(wdata_astream, ["colorInfo"]) + else: + astream = astream_common + # audio-specific metadata + astream["audioQuality"] = wdata_astream["audioQuality"], + astream["audioSampleRate"] = int(wdata_astream["audioSampleRate"]), + astream["audioChannels"] = wdata_astream["audioChannels"] + + # breakpoint() + return astream +