diff --git a/views.py b/views.py index 938e89b..2b08ea4 100644 --- a/views.py +++ b/views.py @@ -45,6 +45,21 @@ def thumbnailProxy(received_request): return response +def storyboardProxy(received_request): + + # will proxy storyboards + prefix = "https://i.ytimg.com/" + sqp = request.args.get("sqp") + sigh = request.args.get("sigh") + + if sqp is None or sigh is None: + return Response(json.dumps({"status": "error", "error": "Request lacks the sqp, sigh params (or both)."}), mimetype="application/json", status=400) + + sb = requests.get(prefix + "sb/" + received_request, headers=ythdd_globals.getHeaders(caller='proxy'), params={"sqp": sqp, "sigh": sigh}, stream=True) + sb.raw.decode_content = True + response = Response(sb.raw, mimetype=sb.headers['content-type'], status=sb.status_code) + return response + def ggphtProxy(received_request): prefix = "https://yt3.ggpht.com/" diff --git a/ythdd.py b/ythdd.py index de2f24c..73ba089 100644 --- a/ythdd.py +++ b/ythdd.py @@ -64,6 +64,7 @@ def setup(): app.add_url_rule('/api/', view_func=ythdd_api.api_greeting) app.add_url_rule('/api/', view_func=ythdd_api.api_global_catchall) app.add_url_rule('/vi/', view_func=views.thumbnailProxy) + app.add_url_rule('/sb/', view_func=views.storyboardProxy) app.add_url_rule('/ggpht/', view_func=views.ggphtProxy) app.add_url_rule('/guc/', view_func=views.gucProxy) app.add_url_rule('/img/', view_func=views.imgProxy) diff --git a/ythdd_globals.py b/ythdd_globals.py index 78bceca..cb3eb15 100644 --- a/ythdd_globals.py +++ b/ythdd_globals.py @@ -23,7 +23,7 @@ version = "0.0.1" apiVersion = "1" randomly_generated_passcode = 0 video_cache = {} -general_cache = {"search": [], "continuations": {"channels": {}, "comments": {}}, "channels": {}, "playlists": {}, "hashed_videoplayback": {}} +general_cache = {"search": [], "continuations": {"channels": {}, "comments": {}}, "channels": {}, "playlists": {}, "storyboards": {}, "hashed_videoplayback": {}} def getConfig(configfile): diff --git a/ythdd_inv_tl.py b/ythdd_inv_tl.py index e049b0a..b5438fd 100644 --- a/ythdd_inv_tl.py +++ b/ythdd_inv_tl.py @@ -12,9 +12,11 @@ import json, datetime import dateparser import html import invidious_formats +import math import ythdd_globals import ythdd_api_v1 import ythdd_extractor +import ythdd_struct_builder import ythdd_struct_parser # TODOs: @@ -32,9 +34,9 @@ import ythdd_struct_parser # [✓] /api/v1/playlists/:plid # [✓] /api/v1/channel/{videos, shorts, playlists, streams, latest?}/:ucid (rewrite) # [✓] /api/v1/:videoIdXXXX/maxres.jpg redirects to best quality thumbnail +# [✓] /api/v1/storyboards/:videoIdXXXX # ---------- # PLANNED: -# [X] /api/v1/storyboards/:videoIdXXXX # [X] /api/v1/videos/:videoIdXXXX does not depend on yt-dlp and offloads stream retrieval elsewhere (making initial response fast) # [X] /api/v1/manifest/:videoIdXXXX (above is prerequisite) # [X] rewrite the awful lookup logic @@ -425,7 +427,7 @@ def videos(data): lmvm = safeTraverse(y, ['metadata', 'lockupMetadataViewModel'], default=[]) related_entry['videoId'] = safeTraverse(y, ['contentId']) related_entry['title'] = safeTraverse(lmvm, ['title', 'content']) - related_entry['videoThumbnails'] = ythdd_struct_parser.genThumbs(related_entry['videoId']) #safeTraverse(y, ['thumbnail', 'thumbnails']) + related_entry['videoThumbnails'] = ythdd_struct_builder.genThumbs(related_entry['videoId']) #safeTraverse(y, ['thumbnail', 'thumbnails']) related_entry['author'] = safeTraverse(lmvm, ['metadata', 'contentMetadataViewModel', 'metadataRows', 0, 'metadataParts', 0, 'text', 'content']) related_entry['authorId'] = safeTraverse(lmvm, ['image', 'decoratedAvatarViewModel', 'rendererContext', 'commandContext', 'onTap', 'innertubeCommand', 'browseEndpoint', 'browseId'], default="UNKNOWNCHANNELID") related_entry['authorUrl'] = '/channel/' + related_entry['authorId'] @@ -493,6 +495,10 @@ def videos(data): # requests for the video's comments don't have to # spawn an additional request for initial ctoken ensure_comment_continuation(video_id, wdata) + storyboards = [] + storyboards_extracted = ensure_storyboards(video_id, wdata, length=length) + if storyboards_extracted: + storyboards = ythdd_struct_builder.genStoryboards(video_id) time_end = time() @@ -500,8 +506,8 @@ def videos(data): "type": video_type, "title": title, "videoId": video_id, - "videoThumbnails": ythdd_struct_parser.genThumbs(video_id), - "storyboards": [], # not implemented + "videoThumbnails": ythdd_struct_builder.genThumbs(video_id), + "storyboards": storyboards, "description": description, # due to change (include ythdd metadata) "descriptionHtml": description_html, "published": published, @@ -788,6 +794,7 @@ def ensure_comment_continuation(video_id: str, wdata = None): wdata = ythdd_extractor.WEBextractSinglePage(video_id) # search for "top comments" continuation token + # todo: replace this with on-demand continuation creation comment_continuation = safeTraverse(wdata, ["ec2", "engagementPanels", 0, "engagementPanelSectionListRenderer", "header", "engagementPanelTitleHeaderRenderer", "menu", "sortFilterSubMenuRenderer", "subMenuItems", 0, "serviceEndpoint", "continuationCommand", "token"], default=None) if comment_continuation is not None: ythdd_globals.general_cache["continuations"]["comments"][video_id].append(comment_continuation) @@ -795,6 +802,77 @@ def ensure_comment_continuation(video_id: str, wdata = None): print(f"error: couldn't extract comment continuation token from video page ({video_id}). this video likely has comments disabled.") ythdd_globals.general_cache["continuations"]["comments"][video_id].append("") +def ensure_storyboards(video_id: str, wdata = None, length = 60): + # Returns True on successful extraction, False when it failed. + + # Storyboards don't expire. They can be cached indefinitely. + if not video_id in ythdd_globals.general_cache["storyboards"]: + ythdd_globals.general_cache["storyboards"][video_id] = None + + if wdata is None: + wdata = ythdd_extractor.WEBextractSinglePage(video_id) + + # get storyboard template string + storyboards = None + storyboard_template = safeTraverse(wdata, ["ec1", "storyboards", "playerStoryboardSpecRenderer", "spec"], default=None) + # silly sanity check, todo: do a regex one instead? + if isinstance(storyboard_template, str): + # sample storyboard template url structure, indented for readability + # https://i.ytimg.com/sb/:videoId/storyboard3_L$L/$N.jpg?sqp=b64encodedprotobuf + # | 48 # 27 # 100 # 10 # 10 # 0 # default # rs$datadatadatadatadatadatadatadatada + # | 80 # 45 # 55 # 10 # 10 # 1000 # M$M # rs$datadatadatadatadatadatadatadatada + # | 160 # 90 # 55 # 5 # 5 # 1000 # M$M # rs$datadatadatadatadatadatadatadatada + # | 320 # 180 # 55 # 3 # 3 # 1000 # M$M # rs$datadatadatadatadatadatadatadatada + # ^ width, height, thumb_count, columns, rows, interval, $N, sigh parameter. $L is just the index of a given storyboard, say, 0 for $N=default + + # try to extract data from the storyboard template + try: + base_url, *formats = storyboard_template.split("|") + + extracted_formats = [] + for index, fmt in enumerate(formats): + fmt = fmt.split("#") + width = int(fmt[0]) + height = int(fmt[1]) + count = int(fmt[2]) + columns = int(fmt[3]) + rows = int(fmt[4]) + interval = int(fmt[5]) + name = fmt[6] + sigh = fmt[7] + + thumbs_per_image = columns * rows + images_count = math.ceil(count / thumbs_per_image) + interval = interval if interval != 0 else int((length / count) * 1000) # calculated only for $N=default as it's the only one that has interval=0 + + extracted_formats.append({ + "index": index, + "width": width, + "height": height, + "thumb_count": count, + "columns": columns, + "rows": rows, + "interval": interval, + "name": name, + "sigh": sigh, + "images_count": images_count + }) + + storyboards = { + "template_url": ythdd_globals.translateLinks(base_url, remove_params=False), # NOT removing params is crucial, otherwise sqp will be dropped! + "formats": extracted_formats + } + + ythdd_globals.general_cache["storyboards"][video_id] = storyboards + return True + except: + print("error(ensure_storyboards): storyboard template url layout changed. please update ythdd for latest storyboard extraction fixes.") + return False + else: + print(f"error(ensure_storyboards: couldn't extract storyboards from video page ({video_id}). this video won't have storyboards.") + return False + + def channels(data, req, only_json: bool = False): # prevent potential out of bound read @@ -998,6 +1076,25 @@ def playlists(data, req, only_json: bool = False): return send(200, response) +def storyboards(data, req): + + height = req.args.get("height") + width = req.args.get("width") + video_id = data[3] + + try: + height = int(height) + except: + pass + try: + width = int(width) + except: + pass + + resp = ythdd_struct_builder.genWebvttStoryboard(video_id, width, height) + + return Response(resp, mimetype="text/vtt", status=200) + def lookup(data, req): # possibly TODO: rewrite this mess if len(data) > 2: @@ -1021,6 +1118,8 @@ def lookup(data, req): return get_comments(data, req) case 'playlists': return playlists(data, req) + case 'storyboards': + return storyboards(data, req) case _: incrementBadRequests() return notImplemented(data) diff --git a/ythdd_struct_builder.py b/ythdd_struct_builder.py new file mode 100644 index 0000000..06b58aa --- /dev/null +++ b/ythdd_struct_builder.py @@ -0,0 +1,93 @@ +from ythdd_globals import safeTraverse +import ythdd_globals + +def genThumbs(videoId: str): + + result = [] + thumbnails = [ + {'height': 720, 'width': 1280, 'quality': "maxres", 'url': "maxres"}, # will always attempt to return the best quality available + {'height': 720, 'width': 1280, 'quality': "maxresdefault", 'url': "maxresdefault"}, + {'height': 480, 'width': 640, 'quality': "sddefault", 'url': "sddefault"}, + {'height': 360, 'width': 480, 'quality': "high", 'url': "hqdefault"}, + {'height': 180, 'width': 320, 'quality': "medium", 'url': "mqdefault"}, + {'height': 90, 'width': 120, 'quality': "default", 'url': "default"}, + {'height': 90, 'width': 120, 'quality': "start", 'url': "1"}, + {'height': 90, 'width': 120, 'quality': "middle", 'url': "2"}, + {'height': 90, 'width': 120, 'quality': "end", 'url': "3"}, + ] + + for x in thumbnails: + width = x['width'] + height = x['height'] + quality = x['quality'] + url = ythdd_globals.config['general']['public_facing_url'] + 'vi/' + videoId + '/' + x['url'] + '.jpg' + result.append({'quality': quality, 'url': url, 'width': width, 'height': height}) + + return result + +def genStoryboards(video_id: str) -> list: + # generates storyboards inside of /api/v1/videos/:video_id + storyboards = [] + cached_storyboards = safeTraverse(ythdd_globals.general_cache["storyboards"], [video_id], default=[]) + for sb in cached_storyboards["formats"]: + built_storyboard = { + "url": f"/api/v1/storyboards/{video_id}?width={sb['width']}&height={sb['height']}", + "templateUrl": cached_storyboards['template_url'].replace("$L", str(sb['index'])).replace("$N", sb['name']) + f"&sigh={sb['sigh']}", + "width": sb['width'], + "height": sb['height'], + "count": sb['thumb_count'], + "interval": sb['interval'], + "storyboardWidth": sb['columns'], + "storyboardHeight": sb['rows'], + "storyboardCount": sb['images_count'] + } + storyboards.append(built_storyboard) + + return storyboards + +def msToWebvttTimestamp(time: int): + + ms = time % 1000 + time //= 1000 + + hours = time // (60 * 60) + time -= hours * 60 * 60 + minutes = time // 60 + time -= minutes * 60 + seconds = time + timestamp = f"{str(hours).zfill(2)}:{str(minutes).zfill(2)}:{str(seconds).zfill(2)}.{str(ms).zfill(3)}" + + return timestamp + +def genWebvttStoryboard(video_id: str, width: int = None, height: int = None): + # generates WebVTT storyboards for /api/v1/storyboards/:video_id + webvtt = "WEBVTT\n\n" + cached_storyboards = safeTraverse(ythdd_globals.general_cache["storyboards"], [video_id]) + if cached_storyboards is None: + return "" + found_storyboard = {} + + for sb in cached_storyboards["formats"]: + if width is not None and width == sb['width']: + found_storyboard = sb + if height is not None and height == sb['height']: + found_storyboard = sb + + # could be changed + if not found_storyboard: + found_storyboard = cached_storyboards["formats"][0] + + start = 0 + thumbs_per_sb = sb['columns'] * sb['rows'] + xx = 0 + yy = 0 + for x in range(found_storyboard["thumb_count"]): + xx = x % sb['columns'] + yy = (x // sb['rows']) % sb['rows'] + xywh = f"#xywh={xx * sb['width']},{yy * sb['height']},{sb['width']},{sb['height']}" + webvtt += f"{msToWebvttTimestamp(start)} --> {msToWebvttTimestamp(start + found_storyboard['interval'])}\n" + webvtt += cached_storyboards['template_url'].replace("$L", str(sb['index'])).replace("$N", sb['name']).replace("$M", str(x // (thumbs_per_sb))) + f"&sigh={sb['sigh']}{xywh}\n" + webvtt += "\n" + start += found_storyboard['interval'] + + return webvtt diff --git a/ythdd_struct_parser.py b/ythdd_struct_parser.py index efe1377..d8d3242 100644 --- a/ythdd_struct_parser.py +++ b/ythdd_struct_parser.py @@ -4,33 +4,10 @@ import json import dateparser import ythdd_globals import ythdd_extractor +import ythdd_struct_builder DEFAULT_AVATAR = "https://yt3.ggpht.com/a/default-user=s176-c-k-c0x00ffffff-no-rj" -def genThumbs(videoId: str): - - result = [] - thumbnails = [ - {'height': 720, 'width': 1280, 'quality': "maxres", 'url': "maxres"}, # will always attempt to return the best quality available - {'height': 720, 'width': 1280, 'quality': "maxresdefault", 'url': "maxresdefault"}, - {'height': 480, 'width': 640, 'quality': "sddefault", 'url': "sddefault"}, - {'height': 360, 'width': 480, 'quality': "high", 'url': "hqdefault"}, - {'height': 180, 'width': 320, 'quality': "medium", 'url': "mqdefault"}, - {'height': 90, 'width': 120, 'quality': "default", 'url': "default"}, - {'height': 90, 'width': 120, 'quality': "start", 'url': "1"}, - {'height': 90, 'width': 120, 'quality': "middle", 'url': "2"}, - {'height': 90, 'width': 120, 'quality': "end", 'url': "3"}, - ] - - for x in thumbnails: - width = x['width'] - height = x['height'] - quality = x['quality'] - url = ythdd_globals.config['general']['public_facing_url'] + 'vi/' + videoId + '/' + x['url'] + '.jpg' - result.append({'quality': quality, 'url': url, 'width': width, 'height': height}) - - return result - def doesContainNumber(string: str, numeric_system: int = 10) -> bool: try: number = int(string, numeric_system) @@ -150,7 +127,7 @@ def parseRenderers(entry: dict, context: dict = {}) -> dict: "authorUrl": "/channel/" + author_ucid, "authorVerified": verified, # TODO "authorThumbnails": ythdd_extractor.generateChannelAvatarsFromUrl(avatar_url), - "videoThumbnails": genThumbs(safeTraverse(entry, ["videoRenderer", "videoId"], default="unknown")), + "videoThumbnails": ythdd_struct_builder.genThumbs(safeTraverse(entry, ["videoRenderer", "videoId"], default="unknown")), "description": description, "descriptionHtml": description_html, "viewCount": view_count, @@ -258,7 +235,7 @@ def parseRenderers(entry: dict, context: dict = {}) -> dict: "authorId": author_ucid, "authorUrl": "/channel/" + author_ucid, "authorVerified": False, - "videoThumbnails": genThumbs(video_id), + "videoThumbnails": ythdd_struct_builder.genThumbs(video_id), "description": "", "descriptionHtml": "", "viewCount": parseViewsFromViewText(views_text), @@ -287,7 +264,7 @@ def parseRenderers(entry: dict, context: dict = {}) -> dict: # thumbnail["url"] = ythdd_globals.translateLinks(thumbnail["url"]) video_id = safeTraverse(entry, ["gridVideoRenderer", "videoId"], default="UnknownVideoId") - thumbnails = genThumbs(video_id) + thumbnails = ythdd_struct_builder.genThumbs(video_id) published_date = safeTraverse(entry, ["gridVideoRenderer", "publishedTimeText", "simpleText"], default="now") published_date = published_date.removeprefix("Streamed ") @@ -384,7 +361,7 @@ def parseRenderers(entry: dict, context: dict = {}) -> dict: "authorId": author_ucid, "authorUrl": "/channel/" + author_ucid, "authorThumbnails": avatars, - "videoThumbnails": genThumbs(video_id), + "videoThumbnails": ythdd_struct_builder.genThumbs(video_id), "index": video_index, "lengthSeconds": length, "liveNow": False, # todo: check this?