feat: video comments endpoint
This commit is contained in:
@@ -434,4 +434,86 @@ def WEBgetSearchSuggestions(query: str, previous_query: str = '') -> list:
|
||||
return {
|
||||
"query": query,
|
||||
"suggestions": suggestions
|
||||
}
|
||||
}
|
||||
|
||||
def WEBgetVideoComments(ctoken: str) -> tuple:
|
||||
|
||||
# ctoken needs to be passed explicitly.
|
||||
# no guessing or retrieving it from globals.
|
||||
if ctoken is None:
|
||||
return [], ""
|
||||
|
||||
# build web context containing the relevant ctoken
|
||||
web_context = makeWebContext({"continuation": ctoken})
|
||||
response = requests.post('https://www.youtube.com/youtubei/v1/next',
|
||||
params={"prettyPrint": False},
|
||||
headers=stage2_headers,
|
||||
data=json.dumps(web_context)
|
||||
)
|
||||
|
||||
results = []
|
||||
try:
|
||||
results = json.loads(response.text)
|
||||
except:
|
||||
pass
|
||||
|
||||
comments = safeTraverse(results, ["frameworkUpdates", "entityBatchUpdate", "mutations"], default=[])
|
||||
comment_continuations = []
|
||||
comment_continuations_re = safeTraverse(results, ["onResponseReceivedEndpoints"], default=[])
|
||||
for received_endpoint in comment_continuations_re:
|
||||
|
||||
# this is horrible...
|
||||
|
||||
acia = safeTraverse(received_endpoint, ["appendContinuationItemsAction", "continuationItems"], default=[])
|
||||
rcic = safeTraverse(received_endpoint, ["reloadContinuationItemsCommand", "continuationItems"], default=[])
|
||||
|
||||
for entry in acia:
|
||||
if "commentThreadRenderer" in entry or "continuationItemRenderer" in entry:
|
||||
comment_continuations = acia
|
||||
break
|
||||
|
||||
for entry in rcic:
|
||||
if "commentThreadRenderer" in entry or "continuationItemRenderer" in entry:
|
||||
comment_continuations = rcic
|
||||
break
|
||||
|
||||
if comment_continuations != []:
|
||||
break
|
||||
|
||||
if comment_continuations == []:
|
||||
print("error: received an unknown comment structure, unable to parse continuations (replies)")
|
||||
# breakpoint()
|
||||
# return [], ""
|
||||
|
||||
# extract new continuation
|
||||
new_continuation = ""
|
||||
if "continuationItemRenderer" in safeTraverse(comment_continuations, [-1], default=[]):
|
||||
# first, look for ctoken inside of response for next page of comments
|
||||
new_continuation = safeTraverse(comment_continuations, [-1, "continuationItemRenderer", "continuationEndpoint", "continuationCommand", "token"], default=None)
|
||||
# or search elsewhere in case this is a reply thread
|
||||
if new_continuation is None:
|
||||
new_continuation = safeTraverse(comment_continuations, [-1, "continuationItemRenderer", "button", "buttonRenderer", "command", "continuationCommand", "token"], default="")
|
||||
|
||||
# perform a basic mutation check before parsing
|
||||
# will ignore replies liked by video uploader ("hearts")
|
||||
actual_comments = [x for x in comments if "properties" in safeTraverse(x, ["payload", "commentEntityPayload"], default=[], quiet=True)]
|
||||
actual_comment_continuations = [x for x in comment_continuations if "replies" in safeTraverse(x, ["commentThreadRenderer"], default=[], quiet=True)]
|
||||
|
||||
# link reply data (reply count and ctoken) for comments with replies
|
||||
for reply_renderer in actual_comment_continuations:
|
||||
|
||||
mutual_key = safeTraverse(reply_renderer, ["commentThreadRenderer", "commentViewModel", "commentViewModel", "commentKey"], default="unknown-key")
|
||||
reply_ctoken = safeTraverse(reply_renderer, ["commentThreadRenderer", "replies", "commentRepliesRenderer", "contents", 0, "continuationItemRenderer", "continuationEndpoint", "continuationCommand", "token"], default="")
|
||||
reply_count = safeTraverse(reply_renderer, ["commentThreadRenderer", "replies", "commentRepliesRenderer", "viewReplies", "buttonRenderer", "text", "runs", 0, "text"], default="0 replies").split(" ")[0]
|
||||
|
||||
for comment in actual_comments:
|
||||
found_key = safeTraverse(comment, ["entityKey"], default="unknown-key")
|
||||
# try to link a relevant ctoken if a comment has response
|
||||
if found_key == mutual_key:
|
||||
if ythdd_globals.config["general"]["debug"]: print(f"found reply for {found_key}")
|
||||
comment["replies"] = {
|
||||
"replyCount": int(reply_count),
|
||||
"continuation": reply_ctoken
|
||||
}
|
||||
|
||||
return actual_comments, new_continuation
|
||||
|
||||
@@ -23,7 +23,7 @@ version = "0.0.1"
|
||||
apiVersion = "1"
|
||||
randomly_generated_passcode = 0
|
||||
video_cache = {}
|
||||
general_cache = {"search": [], "continuations": {"channels": {}}, "channels": {}}
|
||||
general_cache = {"search": [], "continuations": {"channels": {}, "comments": {}}, "channels": {}}
|
||||
|
||||
def getConfig(configfile):
|
||||
|
||||
|
||||
@@ -449,6 +449,12 @@ def videos(data):
|
||||
premium = True
|
||||
# TODO: detect paywalled patron-only videos
|
||||
|
||||
# because we fetched the video's wdata, we might as
|
||||
# well save it inside of general cache so that
|
||||
# requests for the video's comments don't have to
|
||||
# spawn an additional request for initial ctoken
|
||||
ensure_comment_continuation(video_id, wdata)
|
||||
|
||||
time_end = time()
|
||||
|
||||
response = {
|
||||
@@ -680,10 +686,70 @@ def get_channel_tab(requested_tab, ucid, req, only_json: bool = False):
|
||||
|
||||
return send(200, response)
|
||||
|
||||
def get_comments(data, req, only_json: bool = False):
|
||||
|
||||
# get comment continuation
|
||||
ctoken = req.args.get('continuation')
|
||||
|
||||
# perform some basic video id validation
|
||||
if len(data) < 4 or len(data) >= 4 and len(data[3]) != 11:
|
||||
return send(400, {"error": "Bad request: invalid videoId."})
|
||||
|
||||
video_id = data[3]
|
||||
|
||||
# if ctoken isn't provided, get it from the general cache
|
||||
if ctoken is None or ctoken == '':
|
||||
# but first ensure it's there
|
||||
ensure_comment_continuation(video_id)
|
||||
ctoken = ythdd_globals.general_cache["continuations"]["comments"][video_id][0]
|
||||
|
||||
# get joined video comment models
|
||||
wdata, new_continuation = ythdd_extractor.WEBgetVideoComments(ctoken)
|
||||
|
||||
comments = []
|
||||
for comment in wdata:
|
||||
# parse the comment
|
||||
parsed_comment = ythdd_struct_parser.customCommentRendererParser(comment)
|
||||
if parsed_comment is not None:
|
||||
comments.append(parsed_comment)
|
||||
|
||||
response = {
|
||||
"videoId": video_id,
|
||||
"comments": comments,
|
||||
"continuation": new_continuation
|
||||
}
|
||||
|
||||
if only_json:
|
||||
return response
|
||||
|
||||
return send(200, response)
|
||||
|
||||
|
||||
def ensure_comment_continuation(video_id: str, wdata = None):
|
||||
|
||||
# save continutation token for comments in global comment cache
|
||||
if not video_id in ythdd_globals.general_cache["continuations"]["comments"]:
|
||||
ythdd_globals.general_cache["continuations"]["comments"][video_id] = []
|
||||
|
||||
if wdata is None:
|
||||
# perhaps saving related videos to cache might be not a bad idea?
|
||||
wdata = ythdd_extractor.WEBextractSinglePage(video_id)
|
||||
|
||||
# search for "top comments" continuation token
|
||||
comment_continuation = safeTraverse(wdata, ["ec2", "engagementPanels", 0, "engagementPanelSectionListRenderer", "header", "engagementPanelTitleHeaderRenderer", "menu", "sortFilterSubMenuRenderer", "subMenuItems", 0, "serviceEndpoint", "continuationCommand", "token"], default=None)
|
||||
if comment_continuation is not None:
|
||||
ythdd_globals.general_cache["continuations"]["comments"][video_id].append(comment_continuation)
|
||||
else:
|
||||
print(f"error: couldn't extract comment continuation token from video page ({video_id})")
|
||||
|
||||
def channels(data, req, only_json: bool = False):
|
||||
|
||||
# prevent potential out of bound read
|
||||
if len(data) < 4:
|
||||
return send(400, {"error": "No channel specified."})
|
||||
|
||||
# silly sanity check
|
||||
if len(data[3]) != 24 or not data[3].startswith("UC"):
|
||||
# silly sanity check
|
||||
return send(404, {"error": "This channel does not exist."})
|
||||
|
||||
if len(data) > 4:
|
||||
@@ -775,6 +841,8 @@ def lookup(data, req):
|
||||
return search(data, req)
|
||||
case 'channels':
|
||||
return channels(data, req)
|
||||
case 'comments':
|
||||
return get_comments(data, req)
|
||||
case _:
|
||||
incrementBadRequests()
|
||||
return notImplemented(data)
|
||||
|
||||
@@ -304,6 +304,46 @@ def parseRenderers(entry: dict, context: dict = {}) -> dict:
|
||||
# breakpoint()
|
||||
return
|
||||
|
||||
def customCommentRendererParser(comment: dict, context: dict = {}) -> dict:
|
||||
|
||||
cep = safeTraverse(comment, ["payload", "commentEntityPayload"], default={})
|
||||
content = safeTraverse(cep, ["properties", "content", "content"], default="")
|
||||
content_html = escape(content).replace("\r\n", "<br>").replace("\n", "<br>")
|
||||
author = safeTraverse(cep, ["author"], default={})
|
||||
verified = safeTraverse(author, ["isVerified"], default=False) or safeTraverse(author, ["isArtist"], default=False)
|
||||
ucid = safeTraverse(author, ["channelId"], default="UNKNOWNCHANNELID")
|
||||
published_date = safeTraverse(cep, ["properties", "publishedTime"], default="now")
|
||||
edited = False
|
||||
|
||||
if published_date.endswith(" (edited)"):
|
||||
edited = True
|
||||
published_date_unix = int(dateparser.parse(published_date.removesuffix(" (edited)")).timestamp())
|
||||
else:
|
||||
published_date_unix = int(dateparser.parse(published_date).timestamp())
|
||||
|
||||
inv_comment = {
|
||||
"authorId": ucid,
|
||||
"authorUrl": "/channel/" + ucid,
|
||||
"author": safeTraverse(author, ["displayName"], default="@ythdd-unknown-user"),
|
||||
"verified": verified,
|
||||
"authorThumbnails": ythdd_extractor.generateChannelAvatarsFromUrl(safeTraverse(author, ["avatarThumbnailUrl"], default=DEFAULT_AVATAR)), # proxy them!
|
||||
"authorIsChannelOwner": safeTraverse(author, ["isCreator"], default=False), # ???
|
||||
"isSponsor": False, # not sure how to retrieve this
|
||||
"likeCount": parseViewsFromViewText("0" + safeTraverse(cep, ["toolbar", "likeCountNotliked"], default="0") + " likes"),
|
||||
"isPinned": False,
|
||||
"commentId": safeTraverse(cep, ["properties", "commentId"], default="UNKNOWNCOMMENTID"),
|
||||
"content": content,
|
||||
"contentHtml": content_html,
|
||||
"isEdited": edited,
|
||||
"published": published_date_unix,
|
||||
"publishedText": published_date if published_date != "now" else "unknown amount of time ago"
|
||||
}
|
||||
|
||||
if "replies" in comment:
|
||||
inv_comment["replies"] = comment["replies"]
|
||||
|
||||
return inv_comment
|
||||
|
||||
def parseDescriptionSnippet(snippet: list):
|
||||
|
||||
text = ""
|
||||
|
||||
Reference in New Issue
Block a user