diff --git a/ythdd_extractor.py b/ythdd_extractor.py
index 177b4dd..05c28fe 100644
--- a/ythdd_extractor.py
+++ b/ythdd_extractor.py
@@ -434,4 +434,86 @@ def WEBgetSearchSuggestions(query: str, previous_query: str = '') -> list:
return {
"query": query,
"suggestions": suggestions
- }
\ No newline at end of file
+ }
+
+def WEBgetVideoComments(ctoken: str) -> tuple:
+
+ # ctoken needs to be passed explicitly.
+ # no guessing or retrieving it from globals.
+ if ctoken is None:
+ return [], ""
+
+ # build web context containing the relevant ctoken
+ web_context = makeWebContext({"continuation": ctoken})
+ response = requests.post('https://www.youtube.com/youtubei/v1/next',
+ params={"prettyPrint": False},
+ headers=stage2_headers,
+ data=json.dumps(web_context)
+ )
+
+ results = []
+ try:
+ results = json.loads(response.text)
+ except:
+ pass
+
+ comments = safeTraverse(results, ["frameworkUpdates", "entityBatchUpdate", "mutations"], default=[])
+ comment_continuations = []
+ comment_continuations_re = safeTraverse(results, ["onResponseReceivedEndpoints"], default=[])
+ for received_endpoint in comment_continuations_re:
+
+ # this is horrible...
+
+ acia = safeTraverse(received_endpoint, ["appendContinuationItemsAction", "continuationItems"], default=[])
+ rcic = safeTraverse(received_endpoint, ["reloadContinuationItemsCommand", "continuationItems"], default=[])
+
+ for entry in acia:
+ if "commentThreadRenderer" in entry or "continuationItemRenderer" in entry:
+ comment_continuations = acia
+ break
+
+ for entry in rcic:
+ if "commentThreadRenderer" in entry or "continuationItemRenderer" in entry:
+ comment_continuations = rcic
+ break
+
+ if comment_continuations != []:
+ break
+
+ if comment_continuations == []:
+ print("error: received an unknown comment structure, unable to parse continuations (replies)")
+ # breakpoint()
+ # return [], ""
+
+ # extract new continuation
+ new_continuation = ""
+ if "continuationItemRenderer" in safeTraverse(comment_continuations, [-1], default=[]):
+ # first, look for ctoken inside of response for next page of comments
+ new_continuation = safeTraverse(comment_continuations, [-1, "continuationItemRenderer", "continuationEndpoint", "continuationCommand", "token"], default=None)
+ # or search elsewhere in case this is a reply thread
+ if new_continuation is None:
+ new_continuation = safeTraverse(comment_continuations, [-1, "continuationItemRenderer", "button", "buttonRenderer", "command", "continuationCommand", "token"], default="")
+
+ # perform a basic mutation check before parsing
+ # will ignore replies liked by video uploader ("hearts")
+ actual_comments = [x for x in comments if "properties" in safeTraverse(x, ["payload", "commentEntityPayload"], default=[], quiet=True)]
+ actual_comment_continuations = [x for x in comment_continuations if "replies" in safeTraverse(x, ["commentThreadRenderer"], default=[], quiet=True)]
+
+ # link reply data (reply count and ctoken) for comments with replies
+ for reply_renderer in actual_comment_continuations:
+
+ mutual_key = safeTraverse(reply_renderer, ["commentThreadRenderer", "commentViewModel", "commentViewModel", "commentKey"], default="unknown-key")
+ reply_ctoken = safeTraverse(reply_renderer, ["commentThreadRenderer", "replies", "commentRepliesRenderer", "contents", 0, "continuationItemRenderer", "continuationEndpoint", "continuationCommand", "token"], default="")
+ reply_count = safeTraverse(reply_renderer, ["commentThreadRenderer", "replies", "commentRepliesRenderer", "viewReplies", "buttonRenderer", "text", "runs", 0, "text"], default="0 replies").split(" ")[0]
+
+ for comment in actual_comments:
+ found_key = safeTraverse(comment, ["entityKey"], default="unknown-key")
+ # try to link a relevant ctoken if a comment has response
+ if found_key == mutual_key:
+ if ythdd_globals.config["general"]["debug"]: print(f"found reply for {found_key}")
+ comment["replies"] = {
+ "replyCount": int(reply_count),
+ "continuation": reply_ctoken
+ }
+
+ return actual_comments, new_continuation
diff --git a/ythdd_globals.py b/ythdd_globals.py
index 6484539..9277b16 100644
--- a/ythdd_globals.py
+++ b/ythdd_globals.py
@@ -23,7 +23,7 @@ version = "0.0.1"
apiVersion = "1"
randomly_generated_passcode = 0
video_cache = {}
-general_cache = {"search": [], "continuations": {"channels": {}}, "channels": {}}
+general_cache = {"search": [], "continuations": {"channels": {}, "comments": {}}, "channels": {}}
def getConfig(configfile):
diff --git a/ythdd_inv_tl.py b/ythdd_inv_tl.py
index f29a38f..a89b9a0 100644
--- a/ythdd_inv_tl.py
+++ b/ythdd_inv_tl.py
@@ -449,6 +449,12 @@ def videos(data):
premium = True
# TODO: detect paywalled patron-only videos
+ # because we fetched the video's wdata, we might as
+ # well save it inside of general cache so that
+ # requests for the video's comments don't have to
+ # spawn an additional request for initial ctoken
+ ensure_comment_continuation(video_id, wdata)
+
time_end = time()
response = {
@@ -680,10 +686,70 @@ def get_channel_tab(requested_tab, ucid, req, only_json: bool = False):
return send(200, response)
+def get_comments(data, req, only_json: bool = False):
+
+ # get comment continuation
+ ctoken = req.args.get('continuation')
+
+ # perform some basic video id validation
+ if len(data) < 4 or len(data) >= 4 and len(data[3]) != 11:
+ return send(400, {"error": "Bad request: invalid videoId."})
+
+ video_id = data[3]
+
+ # if ctoken isn't provided, get it from the general cache
+ if ctoken is None or ctoken == '':
+ # but first ensure it's there
+ ensure_comment_continuation(video_id)
+ ctoken = ythdd_globals.general_cache["continuations"]["comments"][video_id][0]
+
+ # get joined video comment models
+ wdata, new_continuation = ythdd_extractor.WEBgetVideoComments(ctoken)
+
+ comments = []
+ for comment in wdata:
+ # parse the comment
+ parsed_comment = ythdd_struct_parser.customCommentRendererParser(comment)
+ if parsed_comment is not None:
+ comments.append(parsed_comment)
+
+ response = {
+ "videoId": video_id,
+ "comments": comments,
+ "continuation": new_continuation
+ }
+
+ if only_json:
+ return response
+
+ return send(200, response)
+
+
+def ensure_comment_continuation(video_id: str, wdata = None):
+
+ # save continutation token for comments in global comment cache
+ if not video_id in ythdd_globals.general_cache["continuations"]["comments"]:
+ ythdd_globals.general_cache["continuations"]["comments"][video_id] = []
+
+ if wdata is None:
+ # perhaps saving related videos to cache might be not a bad idea?
+ wdata = ythdd_extractor.WEBextractSinglePage(video_id)
+
+ # search for "top comments" continuation token
+ comment_continuation = safeTraverse(wdata, ["ec2", "engagementPanels", 0, "engagementPanelSectionListRenderer", "header", "engagementPanelTitleHeaderRenderer", "menu", "sortFilterSubMenuRenderer", "subMenuItems", 0, "serviceEndpoint", "continuationCommand", "token"], default=None)
+ if comment_continuation is not None:
+ ythdd_globals.general_cache["continuations"]["comments"][video_id].append(comment_continuation)
+ else:
+ print(f"error: couldn't extract comment continuation token from video page ({video_id})")
+
def channels(data, req, only_json: bool = False):
+ # prevent potential out of bound read
+ if len(data) < 4:
+ return send(400, {"error": "No channel specified."})
+
+ # silly sanity check
if len(data[3]) != 24 or not data[3].startswith("UC"):
- # silly sanity check
return send(404, {"error": "This channel does not exist."})
if len(data) > 4:
@@ -775,6 +841,8 @@ def lookup(data, req):
return search(data, req)
case 'channels':
return channels(data, req)
+ case 'comments':
+ return get_comments(data, req)
case _:
incrementBadRequests()
return notImplemented(data)
diff --git a/ythdd_struct_parser.py b/ythdd_struct_parser.py
index 5dcc826..45b3f50 100644
--- a/ythdd_struct_parser.py
+++ b/ythdd_struct_parser.py
@@ -304,6 +304,46 @@ def parseRenderers(entry: dict, context: dict = {}) -> dict:
# breakpoint()
return
+def customCommentRendererParser(comment: dict, context: dict = {}) -> dict:
+
+ cep = safeTraverse(comment, ["payload", "commentEntityPayload"], default={})
+ content = safeTraverse(cep, ["properties", "content", "content"], default="")
+ content_html = escape(content).replace("\r\n", "
").replace("\n", "
")
+ author = safeTraverse(cep, ["author"], default={})
+ verified = safeTraverse(author, ["isVerified"], default=False) or safeTraverse(author, ["isArtist"], default=False)
+ ucid = safeTraverse(author, ["channelId"], default="UNKNOWNCHANNELID")
+ published_date = safeTraverse(cep, ["properties", "publishedTime"], default="now")
+ edited = False
+
+ if published_date.endswith(" (edited)"):
+ edited = True
+ published_date_unix = int(dateparser.parse(published_date.removesuffix(" (edited)")).timestamp())
+ else:
+ published_date_unix = int(dateparser.parse(published_date).timestamp())
+
+ inv_comment = {
+ "authorId": ucid,
+ "authorUrl": "/channel/" + ucid,
+ "author": safeTraverse(author, ["displayName"], default="@ythdd-unknown-user"),
+ "verified": verified,
+ "authorThumbnails": ythdd_extractor.generateChannelAvatarsFromUrl(safeTraverse(author, ["avatarThumbnailUrl"], default=DEFAULT_AVATAR)), # proxy them!
+ "authorIsChannelOwner": safeTraverse(author, ["isCreator"], default=False), # ???
+ "isSponsor": False, # not sure how to retrieve this
+ "likeCount": parseViewsFromViewText("0" + safeTraverse(cep, ["toolbar", "likeCountNotliked"], default="0") + " likes"),
+ "isPinned": False,
+ "commentId": safeTraverse(cep, ["properties", "commentId"], default="UNKNOWNCOMMENTID"),
+ "content": content,
+ "contentHtml": content_html,
+ "isEdited": edited,
+ "published": published_date_unix,
+ "publishedText": published_date if published_date != "now" else "unknown amount of time ago"
+ }
+
+ if "replies" in comment:
+ inv_comment["replies"] = comment["replies"]
+
+ return inv_comment
+
def parseDescriptionSnippet(snippet: list):
text = ""