feat: video comments endpoint

2025-09-14 07:02:22 +02:00
parent d0d2298186
commit ef177f7200
4 changed files with 193 additions and 3 deletions
--- a/ythdd_inv_tl.py
+++ b/ythdd_inv_tl.py
@@ -449,6 +449,12 @@ def videos(data):
 		premium = True
 	# TODO: detect paywalled patron-only videos

+	# because we fetched the video's wdata, we might as
+	# well save it inside of general cache so that
+	# requests for the video's comments don't have to
+	# spawn an additional request for initial ctoken
+	ensure_comment_continuation(video_id, wdata)
+
 	time_end = time()

 	response = {
@@ -680,10 +686,70 @@ def get_channel_tab(requested_tab, ucid, req, only_json: bool = False):

 	return send(200, response)

+def get_comments(data, req, only_json: bool = False):
+
+	# get comment continuation
+	ctoken = req.args.get('continuation')
+
+	# perform some basic video id validation
+	if len(data) < 4 or len(data) >= 4 and len(data[3]) != 11:
+		return send(400, {"error": "Bad request: invalid videoId."})
+
+	video_id = data[3]
+
+	# if ctoken isn't provided, get it from the general cache
+	if ctoken is None or ctoken == '':
+		# but first ensure it's there
+		ensure_comment_continuation(video_id)
+		ctoken = ythdd_globals.general_cache["continuations"]["comments"][video_id][0]
+
+	# get joined video comment models
+	wdata, new_continuation = ythdd_extractor.WEBgetVideoComments(ctoken)
+
+	comments = []
+	for comment in wdata:
+		# parse the comment
+		parsed_comment = ythdd_struct_parser.customCommentRendererParser(comment)
+		if parsed_comment is not None:
+			comments.append(parsed_comment)
+
+	response = {
+		"videoId": video_id,
+		"comments": comments,
+		"continuation": new_continuation
+	}
+
+	if only_json:
+		return response
+
+	return send(200, response)
+
+
+def ensure_comment_continuation(video_id: str, wdata = None):
+
+	# save continutation token for comments in global comment cache
+	if not video_id in ythdd_globals.general_cache["continuations"]["comments"]:
+		ythdd_globals.general_cache["continuations"]["comments"][video_id] = []
+
+		if wdata is None:
+			# perhaps saving related videos to cache might be not a bad idea?
+			wdata = ythdd_extractor.WEBextractSinglePage(video_id)
+
+		# search for "top comments" continuation token
+		comment_continuation = safeTraverse(wdata, ["ec2", "engagementPanels", 0, "engagementPanelSectionListRenderer", "header", "engagementPanelTitleHeaderRenderer", "menu", "sortFilterSubMenuRenderer", "subMenuItems", 0, "serviceEndpoint", "continuationCommand", "token"], default=None)
+		if comment_continuation is not None:
+			ythdd_globals.general_cache["continuations"]["comments"][video_id].append(comment_continuation)
+		else:
+			print(f"error: couldn't extract comment continuation token from video page ({video_id})")
+
 def channels(data, req, only_json: bool = False):

+	# prevent potential out of bound read
+	if len(data) < 4:
+		return send(400, {"error": "No channel specified."})
+
+	# silly sanity check
 	if len(data[3]) != 24 or not data[3].startswith("UC"):
-		# silly sanity check
 		return send(404, {"error": "This channel does not exist."})

 	if len(data) > 4:
@@ -775,6 +841,8 @@ def lookup(data, req):
 					return search(data, req)
 				case 'channels':
 					return channels(data, req)
+				case 'comments':
+					return get_comments(data, req)
 				case _:
 					incrementBadRequests()
 					return notImplemented(data)