feat: storyboard generation (json, webvtt) and proxy

adds support for video storyboard extraction, generation and proxying
2025-10-15 00:03:45 +02:00
parent c760104d70
commit b0845d723a
6 changed files with 218 additions and 33 deletions
--- a/views.py
+++ b/views.py
@@ -45,6 +45,21 @@ def thumbnailProxy(received_request):

 	return response

+def storyboardProxy(received_request):
+
+	# will proxy storyboards
+	prefix = "https://i.ytimg.com/"
+	sqp = request.args.get("sqp")
+	sigh = request.args.get("sigh")
+
+	if sqp is None or sigh is None:
+		return Response(json.dumps({"status": "error", "error": "Request lacks the sqp, sigh params (or both)."}), mimetype="application/json", status=400)
+
+	sb = requests.get(prefix + "sb/" + received_request, headers=ythdd_globals.getHeaders(caller='proxy'), params={"sqp": sqp, "sigh": sigh}, stream=True)
+	sb.raw.decode_content = True
+	response = Response(sb.raw, mimetype=sb.headers['content-type'], status=sb.status_code)
+	return response
+
 def ggphtProxy(received_request):

 	prefix = "https://yt3.ggpht.com/"
--- a/ythdd.py
+++ b/ythdd.py
@@ -64,6 +64,7 @@ def setup():
 	app.add_url_rule('/api/', view_func=ythdd_api.api_greeting)
 	app.add_url_rule('/api/<path:received_request>', view_func=ythdd_api.api_global_catchall)
 	app.add_url_rule('/vi/<path:received_request>', view_func=views.thumbnailProxy)
+	app.add_url_rule('/sb/<path:received_request>', view_func=views.storyboardProxy)
 	app.add_url_rule('/ggpht/<path:received_request>', view_func=views.ggphtProxy)
 	app.add_url_rule('/guc/<path:received_request>', view_func=views.gucProxy)
 	app.add_url_rule('/img/<path:received_request>', view_func=views.imgProxy)
--- a/ythdd_globals.py
+++ b/ythdd_globals.py
@@ -23,7 +23,7 @@ version = "0.0.1"
 apiVersion = "1"
 randomly_generated_passcode = 0
 video_cache = {}
-general_cache = {"search": [], "continuations": {"channels": {}, "comments": {}}, "channels": {}, "playlists": {}, "hashed_videoplayback": {}}
+general_cache = {"search": [], "continuations": {"channels": {}, "comments": {}}, "channels": {}, "playlists": {}, "storyboards": {}, "hashed_videoplayback": {}}

 def getConfig(configfile):
 	
--- a/ythdd_inv_tl.py
+++ b/ythdd_inv_tl.py
@@ -12,9 +12,11 @@ import json, datetime
 import dateparser
 import html
 import invidious_formats
+import math
 import ythdd_globals
 import ythdd_api_v1
 import ythdd_extractor
+import ythdd_struct_builder
 import ythdd_struct_parser

 # TODOs:
@@ -32,9 +34,9 @@ import ythdd_struct_parser
 # [✓] /api/v1/playlists/:plid
 # [✓] /api/v1/channel/{videos, shorts, playlists, streams, latest?}/:ucid (rewrite)
 # [✓] /api/v1/:videoIdXXXX/maxres.jpg redirects to best quality thumbnail
+# [✓] /api/v1/storyboards/:videoIdXXXX
 # ----------
 # PLANNED:
-# [X] /api/v1/storyboards/:videoIdXXXX
 # [X] /api/v1/videos/:videoIdXXXX does not depend on yt-dlp and offloads stream retrieval elsewhere (making initial response fast)
 # [X] /api/v1/manifest/:videoIdXXXX (above is prerequisite)
 # [X] rewrite the awful lookup logic
@@ -425,7 +427,7 @@ def videos(data):
 				lmvm = safeTraverse(y, ['metadata', 'lockupMetadataViewModel'], default=[])
 				related_entry['videoId']          = safeTraverse(y, ['contentId'])
 				related_entry['title']            = safeTraverse(lmvm, ['title', 'content'])
-				related_entry['videoThumbnails']  = ythdd_struct_parser.genThumbs(related_entry['videoId']) #safeTraverse(y, ['thumbnail', 'thumbnails'])
+				related_entry['videoThumbnails']  = ythdd_struct_builder.genThumbs(related_entry['videoId']) #safeTraverse(y, ['thumbnail', 'thumbnails'])
 				related_entry['author']           = safeTraverse(lmvm, ['metadata', 'contentMetadataViewModel', 'metadataRows', 0, 'metadataParts', 0, 'text', 'content'])
 				related_entry['authorId']         = safeTraverse(lmvm, ['image', 'decoratedAvatarViewModel', 'rendererContext', 'commandContext', 'onTap', 'innertubeCommand', 'browseEndpoint', 'browseId'], default="UNKNOWNCHANNELID")
 				related_entry['authorUrl']        = '/channel/' + related_entry['authorId']
@@ -493,6 +495,10 @@ def videos(data):
 	# requests for the video's comments don't have to
 	# spawn an additional request for initial ctoken
 	ensure_comment_continuation(video_id, wdata)
+	storyboards = []
+	storyboards_extracted = ensure_storyboards(video_id, wdata, length=length)
+	if storyboards_extracted:
+		storyboards = ythdd_struct_builder.genStoryboards(video_id)

 	time_end = time()

@@ -500,8 +506,8 @@ def videos(data):
 		"type": video_type,
 		"title": title,
 		"videoId": video_id,
-		"videoThumbnails": ythdd_struct_parser.genThumbs(video_id),
-		"storyboards": [], # not implemented
+		"videoThumbnails": ythdd_struct_builder.genThumbs(video_id),
+		"storyboards": storyboards,
 		"description": description, # due to change (include ythdd metadata)
 		"descriptionHtml": description_html,
 		"published": published,
@@ -788,6 +794,7 @@ def ensure_comment_continuation(video_id: str, wdata = None):
 			wdata = ythdd_extractor.WEBextractSinglePage(video_id)

 		# search for "top comments" continuation token
+		# todo: replace this with on-demand continuation creation
 		comment_continuation = safeTraverse(wdata, ["ec2", "engagementPanels", 0, "engagementPanelSectionListRenderer", "header", "engagementPanelTitleHeaderRenderer", "menu", "sortFilterSubMenuRenderer", "subMenuItems", 0, "serviceEndpoint", "continuationCommand", "token"], default=None)
 		if comment_continuation is not None:
 			ythdd_globals.general_cache["continuations"]["comments"][video_id].append(comment_continuation)
@@ -795,6 +802,77 @@ def ensure_comment_continuation(video_id: str, wdata = None):
 			print(f"error: couldn't extract comment continuation token from video page ({video_id}). this video likely has comments disabled.")
 			ythdd_globals.general_cache["continuations"]["comments"][video_id].append("")

+def ensure_storyboards(video_id: str, wdata = None, length = 60):
+	# Returns True on successful extraction, False when it failed.
+
+	# Storyboards don't expire. They can be cached indefinitely.
+	if not video_id in ythdd_globals.general_cache["storyboards"]:
+		ythdd_globals.general_cache["storyboards"][video_id] = None
+
+		if wdata is None:
+			wdata = ythdd_extractor.WEBextractSinglePage(video_id)
+
+		# get storyboard template string
+		storyboards = None
+		storyboard_template = safeTraverse(wdata, ["ec1", "storyboards", "playerStoryboardSpecRenderer", "spec"], default=None)
+		# silly sanity check, todo: do a regex one instead?
+		if isinstance(storyboard_template, str):
+			# sample storyboard template url structure, indented for readability
+			# https://i.ytimg.com/sb/:videoId/storyboard3_L$L/$N.jpg?sqp=b64encodedprotobuf
+			# |  48 #  27 # 100 # 10 # 10 #    0 # default # rs$datadatadatadatadatadatadatadatada
+			# |  80 #  45 #  55 # 10 # 10 # 1000 #     M$M # rs$datadatadatadatadatadatadatadatada
+			# | 160 #  90 #  55 #  5 #  5 # 1000 #     M$M # rs$datadatadatadatadatadatadatadatada
+			# | 320 # 180 #  55 #  3 #  3 # 1000 #     M$M # rs$datadatadatadatadatadatadatadatada
+			# ^ width, height, thumb_count, columns, rows, interval, $N, sigh parameter. $L is just the index of a given storyboard, say, 0 for $N=default
+			
+			# try to extract data from the storyboard template
+			try:
+				base_url, *formats = storyboard_template.split("|")
+
+				extracted_formats = []
+				for index, fmt in enumerate(formats):
+					fmt      = fmt.split("#")
+					width    = int(fmt[0])
+					height   = int(fmt[1])
+					count    = int(fmt[2])
+					columns  = int(fmt[3])
+					rows     = int(fmt[4])
+					interval = int(fmt[5])
+					name     =     fmt[6]
+					sigh     =     fmt[7]
+
+					thumbs_per_image = columns * rows
+					images_count = math.ceil(count / thumbs_per_image)
+					interval = interval if interval != 0 else int((length / count) * 1000) # calculated only for $N=default as it's the only one that has interval=0
+
+					extracted_formats.append({
+						"index": index,
+						"width": width,
+						"height": height,
+						"thumb_count": count,
+						"columns": columns,
+						"rows": rows,
+						"interval": interval,
+						"name": name,
+						"sigh": sigh,
+						"images_count": images_count
+					})
+
+				storyboards = {
+					"template_url": ythdd_globals.translateLinks(base_url, remove_params=False), # NOT removing params is crucial, otherwise sqp will be dropped!
+					"formats": extracted_formats
+				}
+
+				ythdd_globals.general_cache["storyboards"][video_id] = storyboards
+				return True
+			except:
+				print("error(ensure_storyboards): storyboard template url layout changed. please update ythdd for latest storyboard extraction fixes.")
+				return False
+		else:
+			print(f"error(ensure_storyboards: couldn't extract storyboards from video page ({video_id}). this video won't have storyboards.")
+			return False
+
+
 def channels(data, req, only_json: bool = False):

 	# prevent potential out of bound read
@@ -998,6 +1076,25 @@ def playlists(data, req, only_json: bool = False):

 	return send(200, response)

+def storyboards(data, req):
+
+	height = req.args.get("height")
+	width  = req.args.get("width")
+	video_id = data[3]
+
+	try:
+		height = int(height)
+	except:
+		pass
+	try:
+		width = int(width)
+	except:
+		pass
+
+	resp = ythdd_struct_builder.genWebvttStoryboard(video_id, width, height)
+
+	return Response(resp, mimetype="text/vtt", status=200)
+
 def lookup(data, req):
 	# possibly TODO: rewrite this mess
 	if len(data) > 2:
@@ -1021,6 +1118,8 @@ def lookup(data, req):
 					return get_comments(data, req)
 				case 'playlists':
 					return playlists(data, req)
+				case 'storyboards':
+					return storyboards(data, req)
 				case _:
 					incrementBadRequests()
 					return notImplemented(data)
--- a/ythdd_struct_builder.py
+++ b/ythdd_struct_builder.py
@@ -0,0 +1,93 @@
+from ythdd_globals import safeTraverse
+import ythdd_globals
+
+def genThumbs(videoId: str):
+
+	result = []
+	thumbnails = [
+			{'height': 720, 'width': 1280, 'quality': "maxres",        'url': "maxres"}, # will always attempt to return the best quality available
+			{'height': 720, 'width': 1280, 'quality': "maxresdefault", 'url': "maxresdefault"},
+			{'height': 480, 'width': 640,  'quality': "sddefault",     'url': "sddefault"},
+			{'height': 360, 'width': 480,  'quality': "high",          'url': "hqdefault"},
+			{'height': 180, 'width': 320,  'quality': "medium",        'url': "mqdefault"},
+			{'height': 90,  'width': 120,  'quality': "default",       'url': "default"},
+			{'height': 90,  'width': 120,  'quality': "start",         'url': "1"},
+			{'height': 90,  'width': 120,  'quality': "middle",        'url': "2"},
+			{'height': 90,  'width': 120,  'quality': "end",           'url': "3"},
+	]
+
+	for x in thumbnails:
+		width = x['width']
+		height = x['height']
+		quality = x['quality']
+		url = ythdd_globals.config['general']['public_facing_url'] + 'vi/' + videoId + '/' + x['url'] + '.jpg'
+		result.append({'quality': quality, 'url': url, 'width': width, 'height': height})
+
+	return result
+
+def genStoryboards(video_id: str) -> list:
+	# generates storyboards inside of /api/v1/videos/:video_id
+	storyboards = []
+	cached_storyboards = safeTraverse(ythdd_globals.general_cache["storyboards"], [video_id], default=[])
+	for sb in cached_storyboards["formats"]:
+		built_storyboard = {
+			"url": f"/api/v1/storyboards/{video_id}?width={sb['width']}&height={sb['height']}",
+			"templateUrl": cached_storyboards['template_url'].replace("$L", str(sb['index'])).replace("$N", sb['name']) + f"&sigh={sb['sigh']}",
+			"width": sb['width'],
+			"height": sb['height'],
+			"count": sb['thumb_count'],
+			"interval": sb['interval'],
+			"storyboardWidth": sb['columns'],
+			"storyboardHeight": sb['rows'],
+			"storyboardCount": sb['images_count']
+		}
+		storyboards.append(built_storyboard)
+	
+	return storyboards
+
+def msToWebvttTimestamp(time: int):
+
+	ms = time % 1000
+	time //= 1000
+
+	hours = time // (60 * 60)
+	time -= hours * 60 * 60
+	minutes = time // 60
+	time -= minutes * 60
+	seconds = time
+	timestamp = f"{str(hours).zfill(2)}:{str(minutes).zfill(2)}:{str(seconds).zfill(2)}.{str(ms).zfill(3)}"
+
+	return timestamp
+
+def genWebvttStoryboard(video_id: str, width: int = None, height: int = None):
+	# generates WebVTT storyboards for /api/v1/storyboards/:video_id
+	webvtt = "WEBVTT\n\n"
+	cached_storyboards = safeTraverse(ythdd_globals.general_cache["storyboards"], [video_id])
+	if cached_storyboards is None:
+		return ""
+	found_storyboard = {}
+
+	for sb in cached_storyboards["formats"]:
+		if width is not None and width == sb['width']:
+				found_storyboard = sb
+		if height is not None and height == sb['height']:
+				found_storyboard = sb
+
+	# could be changed
+	if not found_storyboard:
+		found_storyboard = cached_storyboards["formats"][0]
+
+	start = 0
+	thumbs_per_sb = sb['columns'] * sb['rows']
+	xx = 0
+	yy = 0
+	for x in range(found_storyboard["thumb_count"]):
+		xx = x % sb['columns']
+		yy = (x // sb['rows']) % sb['rows']
+		xywh = f"#xywh={xx * sb['width']},{yy * sb['height']},{sb['width']},{sb['height']}"
+		webvtt += f"{msToWebvttTimestamp(start)} --> {msToWebvttTimestamp(start + found_storyboard['interval'])}\n"
+		webvtt += cached_storyboards['template_url'].replace("$L", str(sb['index'])).replace("$N", sb['name']).replace("$M", str(x // (thumbs_per_sb))) + f"&sigh={sb['sigh']}{xywh}\n"
+		webvtt += "\n"
+		start += found_storyboard['interval']
+
+	return webvtt
--- a/ythdd_struct_parser.py
+++ b/ythdd_struct_parser.py
@@ -4,33 +4,10 @@ import json
 import dateparser
 import ythdd_globals
 import ythdd_extractor
+import ythdd_struct_builder

 DEFAULT_AVATAR = "https://yt3.ggpht.com/a/default-user=s176-c-k-c0x00ffffff-no-rj"

-def genThumbs(videoId: str):
-
-	result = []
-	thumbnails = [
-			{'height': 720, 'width': 1280, 'quality': "maxres",        'url': "maxres"}, # will always attempt to return the best quality available
-			{'height': 720, 'width': 1280, 'quality': "maxresdefault", 'url': "maxresdefault"},
-			{'height': 480, 'width': 640,  'quality': "sddefault",     'url': "sddefault"},
-			{'height': 360, 'width': 480,  'quality': "high",          'url': "hqdefault"},
-			{'height': 180, 'width': 320,  'quality': "medium",        'url': "mqdefault"},
-			{'height': 90,  'width': 120,  'quality': "default",       'url': "default"},
-			{'height': 90,  'width': 120,  'quality': "start",         'url': "1"},
-			{'height': 90,  'width': 120,  'quality': "middle",        'url': "2"},
-			{'height': 90,  'width': 120,  'quality': "end",           'url': "3"},
-	]
-
-	for x in thumbnails:
-		width = x['width']
-		height = x['height']
-		quality = x['quality']
-		url = ythdd_globals.config['general']['public_facing_url'] + 'vi/' + videoId + '/' + x['url'] + '.jpg'
-		result.append({'quality': quality, 'url': url, 'width': width, 'height': height})
-
-	return result
-
 def doesContainNumber(string: str, numeric_system: int = 10) -> bool:
 	try:
 		number = int(string, numeric_system)
@@ -150,7 +127,7 @@ def parseRenderers(entry: dict, context: dict = {}) -> dict:
 				"authorUrl": "/channel/" + author_ucid,
 				"authorVerified": verified, # TODO
 				"authorThumbnails": ythdd_extractor.generateChannelAvatarsFromUrl(avatar_url),
-				"videoThumbnails": genThumbs(safeTraverse(entry, ["videoRenderer", "videoId"], default="unknown")),
+				"videoThumbnails": ythdd_struct_builder.genThumbs(safeTraverse(entry, ["videoRenderer", "videoId"], default="unknown")),
 				"description": description,
 				"descriptionHtml": description_html,
 				"viewCount": view_count,
@@ -258,7 +235,7 @@ def parseRenderers(entry: dict, context: dict = {}) -> dict:
 				"authorId": author_ucid,
 				"authorUrl": "/channel/" + author_ucid,
 				"authorVerified": False,
-				"videoThumbnails": genThumbs(video_id),
+				"videoThumbnails": ythdd_struct_builder.genThumbs(video_id),
 				"description": "",
 				"descriptionHtml": "",
 				"viewCount": parseViewsFromViewText(views_text),
@@ -287,7 +264,7 @@ def parseRenderers(entry: dict, context: dict = {}) -> dict:
 			# 	thumbnail["url"] = ythdd_globals.translateLinks(thumbnail["url"])

 			video_id = safeTraverse(entry, ["gridVideoRenderer", "videoId"], default="UnknownVideoId")
-			thumbnails = genThumbs(video_id)
+			thumbnails = ythdd_struct_builder.genThumbs(video_id)

 			published_date = safeTraverse(entry, ["gridVideoRenderer", "publishedTimeText", "simpleText"], default="now")
 			published_date = published_date.removeprefix("Streamed ")
@@ -384,7 +361,7 @@ def parseRenderers(entry: dict, context: dict = {}) -> dict:
 				"authorId": author_ucid,
 				"authorUrl": "/channel/" + author_ucid,
 				"authorThumbnails": avatars,
-				"videoThumbnails": genThumbs(video_id),
+				"videoThumbnails": ythdd_struct_builder.genThumbs(video_id),
 				"index": video_index,
 				"lengthSeconds": length,
 				"liveNow": False, # todo: check this?