feat: storyboard generation (json, webvtt) and proxy
adds support for video storyboard extraction, generation and proxying
This commit is contained in:
15
views.py
15
views.py
@@ -45,6 +45,21 @@ def thumbnailProxy(received_request):
|
||||
|
||||
return response
|
||||
|
||||
def storyboardProxy(received_request):
|
||||
|
||||
# will proxy storyboards
|
||||
prefix = "https://i.ytimg.com/"
|
||||
sqp = request.args.get("sqp")
|
||||
sigh = request.args.get("sigh")
|
||||
|
||||
if sqp is None or sigh is None:
|
||||
return Response(json.dumps({"status": "error", "error": "Request lacks the sqp, sigh params (or both)."}), mimetype="application/json", status=400)
|
||||
|
||||
sb = requests.get(prefix + "sb/" + received_request, headers=ythdd_globals.getHeaders(caller='proxy'), params={"sqp": sqp, "sigh": sigh}, stream=True)
|
||||
sb.raw.decode_content = True
|
||||
response = Response(sb.raw, mimetype=sb.headers['content-type'], status=sb.status_code)
|
||||
return response
|
||||
|
||||
def ggphtProxy(received_request):
|
||||
|
||||
prefix = "https://yt3.ggpht.com/"
|
||||
|
||||
1
ythdd.py
1
ythdd.py
@@ -64,6 +64,7 @@ def setup():
|
||||
app.add_url_rule('/api/', view_func=ythdd_api.api_greeting)
|
||||
app.add_url_rule('/api/<path:received_request>', view_func=ythdd_api.api_global_catchall)
|
||||
app.add_url_rule('/vi/<path:received_request>', view_func=views.thumbnailProxy)
|
||||
app.add_url_rule('/sb/<path:received_request>', view_func=views.storyboardProxy)
|
||||
app.add_url_rule('/ggpht/<path:received_request>', view_func=views.ggphtProxy)
|
||||
app.add_url_rule('/guc/<path:received_request>', view_func=views.gucProxy)
|
||||
app.add_url_rule('/img/<path:received_request>', view_func=views.imgProxy)
|
||||
|
||||
@@ -23,7 +23,7 @@ version = "0.0.1"
|
||||
apiVersion = "1"
|
||||
randomly_generated_passcode = 0
|
||||
video_cache = {}
|
||||
general_cache = {"search": [], "continuations": {"channels": {}, "comments": {}}, "channels": {}, "playlists": {}, "hashed_videoplayback": {}}
|
||||
general_cache = {"search": [], "continuations": {"channels": {}, "comments": {}}, "channels": {}, "playlists": {}, "storyboards": {}, "hashed_videoplayback": {}}
|
||||
|
||||
def getConfig(configfile):
|
||||
|
||||
|
||||
107
ythdd_inv_tl.py
107
ythdd_inv_tl.py
@@ -12,9 +12,11 @@ import json, datetime
|
||||
import dateparser
|
||||
import html
|
||||
import invidious_formats
|
||||
import math
|
||||
import ythdd_globals
|
||||
import ythdd_api_v1
|
||||
import ythdd_extractor
|
||||
import ythdd_struct_builder
|
||||
import ythdd_struct_parser
|
||||
|
||||
# TODOs:
|
||||
@@ -32,9 +34,9 @@ import ythdd_struct_parser
|
||||
# [✓] /api/v1/playlists/:plid
|
||||
# [✓] /api/v1/channel/{videos, shorts, playlists, streams, latest?}/:ucid (rewrite)
|
||||
# [✓] /api/v1/:videoIdXXXX/maxres.jpg redirects to best quality thumbnail
|
||||
# [✓] /api/v1/storyboards/:videoIdXXXX
|
||||
# ----------
|
||||
# PLANNED:
|
||||
# [X] /api/v1/storyboards/:videoIdXXXX
|
||||
# [X] /api/v1/videos/:videoIdXXXX does not depend on yt-dlp and offloads stream retrieval elsewhere (making initial response fast)
|
||||
# [X] /api/v1/manifest/:videoIdXXXX (above is prerequisite)
|
||||
# [X] rewrite the awful lookup logic
|
||||
@@ -425,7 +427,7 @@ def videos(data):
|
||||
lmvm = safeTraverse(y, ['metadata', 'lockupMetadataViewModel'], default=[])
|
||||
related_entry['videoId'] = safeTraverse(y, ['contentId'])
|
||||
related_entry['title'] = safeTraverse(lmvm, ['title', 'content'])
|
||||
related_entry['videoThumbnails'] = ythdd_struct_parser.genThumbs(related_entry['videoId']) #safeTraverse(y, ['thumbnail', 'thumbnails'])
|
||||
related_entry['videoThumbnails'] = ythdd_struct_builder.genThumbs(related_entry['videoId']) #safeTraverse(y, ['thumbnail', 'thumbnails'])
|
||||
related_entry['author'] = safeTraverse(lmvm, ['metadata', 'contentMetadataViewModel', 'metadataRows', 0, 'metadataParts', 0, 'text', 'content'])
|
||||
related_entry['authorId'] = safeTraverse(lmvm, ['image', 'decoratedAvatarViewModel', 'rendererContext', 'commandContext', 'onTap', 'innertubeCommand', 'browseEndpoint', 'browseId'], default="UNKNOWNCHANNELID")
|
||||
related_entry['authorUrl'] = '/channel/' + related_entry['authorId']
|
||||
@@ -493,6 +495,10 @@ def videos(data):
|
||||
# requests for the video's comments don't have to
|
||||
# spawn an additional request for initial ctoken
|
||||
ensure_comment_continuation(video_id, wdata)
|
||||
storyboards = []
|
||||
storyboards_extracted = ensure_storyboards(video_id, wdata, length=length)
|
||||
if storyboards_extracted:
|
||||
storyboards = ythdd_struct_builder.genStoryboards(video_id)
|
||||
|
||||
time_end = time()
|
||||
|
||||
@@ -500,8 +506,8 @@ def videos(data):
|
||||
"type": video_type,
|
||||
"title": title,
|
||||
"videoId": video_id,
|
||||
"videoThumbnails": ythdd_struct_parser.genThumbs(video_id),
|
||||
"storyboards": [], # not implemented
|
||||
"videoThumbnails": ythdd_struct_builder.genThumbs(video_id),
|
||||
"storyboards": storyboards,
|
||||
"description": description, # due to change (include ythdd metadata)
|
||||
"descriptionHtml": description_html,
|
||||
"published": published,
|
||||
@@ -788,6 +794,7 @@ def ensure_comment_continuation(video_id: str, wdata = None):
|
||||
wdata = ythdd_extractor.WEBextractSinglePage(video_id)
|
||||
|
||||
# search for "top comments" continuation token
|
||||
# todo: replace this with on-demand continuation creation
|
||||
comment_continuation = safeTraverse(wdata, ["ec2", "engagementPanels", 0, "engagementPanelSectionListRenderer", "header", "engagementPanelTitleHeaderRenderer", "menu", "sortFilterSubMenuRenderer", "subMenuItems", 0, "serviceEndpoint", "continuationCommand", "token"], default=None)
|
||||
if comment_continuation is not None:
|
||||
ythdd_globals.general_cache["continuations"]["comments"][video_id].append(comment_continuation)
|
||||
@@ -795,6 +802,77 @@ def ensure_comment_continuation(video_id: str, wdata = None):
|
||||
print(f"error: couldn't extract comment continuation token from video page ({video_id}). this video likely has comments disabled.")
|
||||
ythdd_globals.general_cache["continuations"]["comments"][video_id].append("")
|
||||
|
||||
def ensure_storyboards(video_id: str, wdata = None, length = 60):
|
||||
# Returns True on successful extraction, False when it failed.
|
||||
|
||||
# Storyboards don't expire. They can be cached indefinitely.
|
||||
if not video_id in ythdd_globals.general_cache["storyboards"]:
|
||||
ythdd_globals.general_cache["storyboards"][video_id] = None
|
||||
|
||||
if wdata is None:
|
||||
wdata = ythdd_extractor.WEBextractSinglePage(video_id)
|
||||
|
||||
# get storyboard template string
|
||||
storyboards = None
|
||||
storyboard_template = safeTraverse(wdata, ["ec1", "storyboards", "playerStoryboardSpecRenderer", "spec"], default=None)
|
||||
# silly sanity check, todo: do a regex one instead?
|
||||
if isinstance(storyboard_template, str):
|
||||
# sample storyboard template url structure, indented for readability
|
||||
# https://i.ytimg.com/sb/:videoId/storyboard3_L$L/$N.jpg?sqp=b64encodedprotobuf
|
||||
# | 48 # 27 # 100 # 10 # 10 # 0 # default # rs$datadatadatadatadatadatadatadatada
|
||||
# | 80 # 45 # 55 # 10 # 10 # 1000 # M$M # rs$datadatadatadatadatadatadatadatada
|
||||
# | 160 # 90 # 55 # 5 # 5 # 1000 # M$M # rs$datadatadatadatadatadatadatadatada
|
||||
# | 320 # 180 # 55 # 3 # 3 # 1000 # M$M # rs$datadatadatadatadatadatadatadatada
|
||||
# ^ width, height, thumb_count, columns, rows, interval, $N, sigh parameter. $L is just the index of a given storyboard, say, 0 for $N=default
|
||||
|
||||
# try to extract data from the storyboard template
|
||||
try:
|
||||
base_url, *formats = storyboard_template.split("|")
|
||||
|
||||
extracted_formats = []
|
||||
for index, fmt in enumerate(formats):
|
||||
fmt = fmt.split("#")
|
||||
width = int(fmt[0])
|
||||
height = int(fmt[1])
|
||||
count = int(fmt[2])
|
||||
columns = int(fmt[3])
|
||||
rows = int(fmt[4])
|
||||
interval = int(fmt[5])
|
||||
name = fmt[6]
|
||||
sigh = fmt[7]
|
||||
|
||||
thumbs_per_image = columns * rows
|
||||
images_count = math.ceil(count / thumbs_per_image)
|
||||
interval = interval if interval != 0 else int((length / count) * 1000) # calculated only for $N=default as it's the only one that has interval=0
|
||||
|
||||
extracted_formats.append({
|
||||
"index": index,
|
||||
"width": width,
|
||||
"height": height,
|
||||
"thumb_count": count,
|
||||
"columns": columns,
|
||||
"rows": rows,
|
||||
"interval": interval,
|
||||
"name": name,
|
||||
"sigh": sigh,
|
||||
"images_count": images_count
|
||||
})
|
||||
|
||||
storyboards = {
|
||||
"template_url": ythdd_globals.translateLinks(base_url, remove_params=False), # NOT removing params is crucial, otherwise sqp will be dropped!
|
||||
"formats": extracted_formats
|
||||
}
|
||||
|
||||
ythdd_globals.general_cache["storyboards"][video_id] = storyboards
|
||||
return True
|
||||
except:
|
||||
print("error(ensure_storyboards): storyboard template url layout changed. please update ythdd for latest storyboard extraction fixes.")
|
||||
return False
|
||||
else:
|
||||
print(f"error(ensure_storyboards: couldn't extract storyboards from video page ({video_id}). this video won't have storyboards.")
|
||||
return False
|
||||
|
||||
|
||||
def channels(data, req, only_json: bool = False):
|
||||
|
||||
# prevent potential out of bound read
|
||||
@@ -998,6 +1076,25 @@ def playlists(data, req, only_json: bool = False):
|
||||
|
||||
return send(200, response)
|
||||
|
||||
def storyboards(data, req):
|
||||
|
||||
height = req.args.get("height")
|
||||
width = req.args.get("width")
|
||||
video_id = data[3]
|
||||
|
||||
try:
|
||||
height = int(height)
|
||||
except:
|
||||
pass
|
||||
try:
|
||||
width = int(width)
|
||||
except:
|
||||
pass
|
||||
|
||||
resp = ythdd_struct_builder.genWebvttStoryboard(video_id, width, height)
|
||||
|
||||
return Response(resp, mimetype="text/vtt", status=200)
|
||||
|
||||
def lookup(data, req):
|
||||
# possibly TODO: rewrite this mess
|
||||
if len(data) > 2:
|
||||
@@ -1021,6 +1118,8 @@ def lookup(data, req):
|
||||
return get_comments(data, req)
|
||||
case 'playlists':
|
||||
return playlists(data, req)
|
||||
case 'storyboards':
|
||||
return storyboards(data, req)
|
||||
case _:
|
||||
incrementBadRequests()
|
||||
return notImplemented(data)
|
||||
|
||||
93
ythdd_struct_builder.py
Normal file
93
ythdd_struct_builder.py
Normal file
@@ -0,0 +1,93 @@
|
||||
from ythdd_globals import safeTraverse
|
||||
import ythdd_globals
|
||||
|
||||
def genThumbs(videoId: str):
|
||||
|
||||
result = []
|
||||
thumbnails = [
|
||||
{'height': 720, 'width': 1280, 'quality': "maxres", 'url': "maxres"}, # will always attempt to return the best quality available
|
||||
{'height': 720, 'width': 1280, 'quality': "maxresdefault", 'url': "maxresdefault"},
|
||||
{'height': 480, 'width': 640, 'quality': "sddefault", 'url': "sddefault"},
|
||||
{'height': 360, 'width': 480, 'quality': "high", 'url': "hqdefault"},
|
||||
{'height': 180, 'width': 320, 'quality': "medium", 'url': "mqdefault"},
|
||||
{'height': 90, 'width': 120, 'quality': "default", 'url': "default"},
|
||||
{'height': 90, 'width': 120, 'quality': "start", 'url': "1"},
|
||||
{'height': 90, 'width': 120, 'quality': "middle", 'url': "2"},
|
||||
{'height': 90, 'width': 120, 'quality': "end", 'url': "3"},
|
||||
]
|
||||
|
||||
for x in thumbnails:
|
||||
width = x['width']
|
||||
height = x['height']
|
||||
quality = x['quality']
|
||||
url = ythdd_globals.config['general']['public_facing_url'] + 'vi/' + videoId + '/' + x['url'] + '.jpg'
|
||||
result.append({'quality': quality, 'url': url, 'width': width, 'height': height})
|
||||
|
||||
return result
|
||||
|
||||
def genStoryboards(video_id: str) -> list:
|
||||
# generates storyboards inside of /api/v1/videos/:video_id
|
||||
storyboards = []
|
||||
cached_storyboards = safeTraverse(ythdd_globals.general_cache["storyboards"], [video_id], default=[])
|
||||
for sb in cached_storyboards["formats"]:
|
||||
built_storyboard = {
|
||||
"url": f"/api/v1/storyboards/{video_id}?width={sb['width']}&height={sb['height']}",
|
||||
"templateUrl": cached_storyboards['template_url'].replace("$L", str(sb['index'])).replace("$N", sb['name']) + f"&sigh={sb['sigh']}",
|
||||
"width": sb['width'],
|
||||
"height": sb['height'],
|
||||
"count": sb['thumb_count'],
|
||||
"interval": sb['interval'],
|
||||
"storyboardWidth": sb['columns'],
|
||||
"storyboardHeight": sb['rows'],
|
||||
"storyboardCount": sb['images_count']
|
||||
}
|
||||
storyboards.append(built_storyboard)
|
||||
|
||||
return storyboards
|
||||
|
||||
def msToWebvttTimestamp(time: int):
|
||||
|
||||
ms = time % 1000
|
||||
time //= 1000
|
||||
|
||||
hours = time // (60 * 60)
|
||||
time -= hours * 60 * 60
|
||||
minutes = time // 60
|
||||
time -= minutes * 60
|
||||
seconds = time
|
||||
timestamp = f"{str(hours).zfill(2)}:{str(minutes).zfill(2)}:{str(seconds).zfill(2)}.{str(ms).zfill(3)}"
|
||||
|
||||
return timestamp
|
||||
|
||||
def genWebvttStoryboard(video_id: str, width: int = None, height: int = None):
|
||||
# generates WebVTT storyboards for /api/v1/storyboards/:video_id
|
||||
webvtt = "WEBVTT\n\n"
|
||||
cached_storyboards = safeTraverse(ythdd_globals.general_cache["storyboards"], [video_id])
|
||||
if cached_storyboards is None:
|
||||
return ""
|
||||
found_storyboard = {}
|
||||
|
||||
for sb in cached_storyboards["formats"]:
|
||||
if width is not None and width == sb['width']:
|
||||
found_storyboard = sb
|
||||
if height is not None and height == sb['height']:
|
||||
found_storyboard = sb
|
||||
|
||||
# could be changed
|
||||
if not found_storyboard:
|
||||
found_storyboard = cached_storyboards["formats"][0]
|
||||
|
||||
start = 0
|
||||
thumbs_per_sb = sb['columns'] * sb['rows']
|
||||
xx = 0
|
||||
yy = 0
|
||||
for x in range(found_storyboard["thumb_count"]):
|
||||
xx = x % sb['columns']
|
||||
yy = (x // sb['rows']) % sb['rows']
|
||||
xywh = f"#xywh={xx * sb['width']},{yy * sb['height']},{sb['width']},{sb['height']}"
|
||||
webvtt += f"{msToWebvttTimestamp(start)} --> {msToWebvttTimestamp(start + found_storyboard['interval'])}\n"
|
||||
webvtt += cached_storyboards['template_url'].replace("$L", str(sb['index'])).replace("$N", sb['name']).replace("$M", str(x // (thumbs_per_sb))) + f"&sigh={sb['sigh']}{xywh}\n"
|
||||
webvtt += "\n"
|
||||
start += found_storyboard['interval']
|
||||
|
||||
return webvtt
|
||||
@@ -4,33 +4,10 @@ import json
|
||||
import dateparser
|
||||
import ythdd_globals
|
||||
import ythdd_extractor
|
||||
import ythdd_struct_builder
|
||||
|
||||
DEFAULT_AVATAR = "https://yt3.ggpht.com/a/default-user=s176-c-k-c0x00ffffff-no-rj"
|
||||
|
||||
def genThumbs(videoId: str):
|
||||
|
||||
result = []
|
||||
thumbnails = [
|
||||
{'height': 720, 'width': 1280, 'quality': "maxres", 'url': "maxres"}, # will always attempt to return the best quality available
|
||||
{'height': 720, 'width': 1280, 'quality': "maxresdefault", 'url': "maxresdefault"},
|
||||
{'height': 480, 'width': 640, 'quality': "sddefault", 'url': "sddefault"},
|
||||
{'height': 360, 'width': 480, 'quality': "high", 'url': "hqdefault"},
|
||||
{'height': 180, 'width': 320, 'quality': "medium", 'url': "mqdefault"},
|
||||
{'height': 90, 'width': 120, 'quality': "default", 'url': "default"},
|
||||
{'height': 90, 'width': 120, 'quality': "start", 'url': "1"},
|
||||
{'height': 90, 'width': 120, 'quality': "middle", 'url': "2"},
|
||||
{'height': 90, 'width': 120, 'quality': "end", 'url': "3"},
|
||||
]
|
||||
|
||||
for x in thumbnails:
|
||||
width = x['width']
|
||||
height = x['height']
|
||||
quality = x['quality']
|
||||
url = ythdd_globals.config['general']['public_facing_url'] + 'vi/' + videoId + '/' + x['url'] + '.jpg'
|
||||
result.append({'quality': quality, 'url': url, 'width': width, 'height': height})
|
||||
|
||||
return result
|
||||
|
||||
def doesContainNumber(string: str, numeric_system: int = 10) -> bool:
|
||||
try:
|
||||
number = int(string, numeric_system)
|
||||
@@ -150,7 +127,7 @@ def parseRenderers(entry: dict, context: dict = {}) -> dict:
|
||||
"authorUrl": "/channel/" + author_ucid,
|
||||
"authorVerified": verified, # TODO
|
||||
"authorThumbnails": ythdd_extractor.generateChannelAvatarsFromUrl(avatar_url),
|
||||
"videoThumbnails": genThumbs(safeTraverse(entry, ["videoRenderer", "videoId"], default="unknown")),
|
||||
"videoThumbnails": ythdd_struct_builder.genThumbs(safeTraverse(entry, ["videoRenderer", "videoId"], default="unknown")),
|
||||
"description": description,
|
||||
"descriptionHtml": description_html,
|
||||
"viewCount": view_count,
|
||||
@@ -258,7 +235,7 @@ def parseRenderers(entry: dict, context: dict = {}) -> dict:
|
||||
"authorId": author_ucid,
|
||||
"authorUrl": "/channel/" + author_ucid,
|
||||
"authorVerified": False,
|
||||
"videoThumbnails": genThumbs(video_id),
|
||||
"videoThumbnails": ythdd_struct_builder.genThumbs(video_id),
|
||||
"description": "",
|
||||
"descriptionHtml": "",
|
||||
"viewCount": parseViewsFromViewText(views_text),
|
||||
@@ -287,7 +264,7 @@ def parseRenderers(entry: dict, context: dict = {}) -> dict:
|
||||
# thumbnail["url"] = ythdd_globals.translateLinks(thumbnail["url"])
|
||||
|
||||
video_id = safeTraverse(entry, ["gridVideoRenderer", "videoId"], default="UnknownVideoId")
|
||||
thumbnails = genThumbs(video_id)
|
||||
thumbnails = ythdd_struct_builder.genThumbs(video_id)
|
||||
|
||||
published_date = safeTraverse(entry, ["gridVideoRenderer", "publishedTimeText", "simpleText"], default="now")
|
||||
published_date = published_date.removeprefix("Streamed ")
|
||||
@@ -384,7 +361,7 @@ def parseRenderers(entry: dict, context: dict = {}) -> dict:
|
||||
"authorId": author_ucid,
|
||||
"authorUrl": "/channel/" + author_ucid,
|
||||
"authorThumbnails": avatars,
|
||||
"videoThumbnails": genThumbs(video_id),
|
||||
"videoThumbnails": ythdd_struct_builder.genThumbs(video_id),
|
||||
"index": video_index,
|
||||
"lengthSeconds": length,
|
||||
"liveNow": False, # todo: check this?
|
||||
|
||||
Reference in New Issue
Block a user