feat: video comments endpoint
This commit is contained in:
@@ -435,3 +435,85 @@ def WEBgetSearchSuggestions(query: str, previous_query: str = '') -> list:
|
|||||||
"query": query,
|
"query": query,
|
||||||
"suggestions": suggestions
|
"suggestions": suggestions
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def WEBgetVideoComments(ctoken: str) -> tuple:
|
||||||
|
|
||||||
|
# ctoken needs to be passed explicitly.
|
||||||
|
# no guessing or retrieving it from globals.
|
||||||
|
if ctoken is None:
|
||||||
|
return [], ""
|
||||||
|
|
||||||
|
# build web context containing the relevant ctoken
|
||||||
|
web_context = makeWebContext({"continuation": ctoken})
|
||||||
|
response = requests.post('https://www.youtube.com/youtubei/v1/next',
|
||||||
|
params={"prettyPrint": False},
|
||||||
|
headers=stage2_headers,
|
||||||
|
data=json.dumps(web_context)
|
||||||
|
)
|
||||||
|
|
||||||
|
results = []
|
||||||
|
try:
|
||||||
|
results = json.loads(response.text)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
comments = safeTraverse(results, ["frameworkUpdates", "entityBatchUpdate", "mutations"], default=[])
|
||||||
|
comment_continuations = []
|
||||||
|
comment_continuations_re = safeTraverse(results, ["onResponseReceivedEndpoints"], default=[])
|
||||||
|
for received_endpoint in comment_continuations_re:
|
||||||
|
|
||||||
|
# this is horrible...
|
||||||
|
|
||||||
|
acia = safeTraverse(received_endpoint, ["appendContinuationItemsAction", "continuationItems"], default=[])
|
||||||
|
rcic = safeTraverse(received_endpoint, ["reloadContinuationItemsCommand", "continuationItems"], default=[])
|
||||||
|
|
||||||
|
for entry in acia:
|
||||||
|
if "commentThreadRenderer" in entry or "continuationItemRenderer" in entry:
|
||||||
|
comment_continuations = acia
|
||||||
|
break
|
||||||
|
|
||||||
|
for entry in rcic:
|
||||||
|
if "commentThreadRenderer" in entry or "continuationItemRenderer" in entry:
|
||||||
|
comment_continuations = rcic
|
||||||
|
break
|
||||||
|
|
||||||
|
if comment_continuations != []:
|
||||||
|
break
|
||||||
|
|
||||||
|
if comment_continuations == []:
|
||||||
|
print("error: received an unknown comment structure, unable to parse continuations (replies)")
|
||||||
|
# breakpoint()
|
||||||
|
# return [], ""
|
||||||
|
|
||||||
|
# extract new continuation
|
||||||
|
new_continuation = ""
|
||||||
|
if "continuationItemRenderer" in safeTraverse(comment_continuations, [-1], default=[]):
|
||||||
|
# first, look for ctoken inside of response for next page of comments
|
||||||
|
new_continuation = safeTraverse(comment_continuations, [-1, "continuationItemRenderer", "continuationEndpoint", "continuationCommand", "token"], default=None)
|
||||||
|
# or search elsewhere in case this is a reply thread
|
||||||
|
if new_continuation is None:
|
||||||
|
new_continuation = safeTraverse(comment_continuations, [-1, "continuationItemRenderer", "button", "buttonRenderer", "command", "continuationCommand", "token"], default="")
|
||||||
|
|
||||||
|
# perform a basic mutation check before parsing
|
||||||
|
# will ignore replies liked by video uploader ("hearts")
|
||||||
|
actual_comments = [x for x in comments if "properties" in safeTraverse(x, ["payload", "commentEntityPayload"], default=[], quiet=True)]
|
||||||
|
actual_comment_continuations = [x for x in comment_continuations if "replies" in safeTraverse(x, ["commentThreadRenderer"], default=[], quiet=True)]
|
||||||
|
|
||||||
|
# link reply data (reply count and ctoken) for comments with replies
|
||||||
|
for reply_renderer in actual_comment_continuations:
|
||||||
|
|
||||||
|
mutual_key = safeTraverse(reply_renderer, ["commentThreadRenderer", "commentViewModel", "commentViewModel", "commentKey"], default="unknown-key")
|
||||||
|
reply_ctoken = safeTraverse(reply_renderer, ["commentThreadRenderer", "replies", "commentRepliesRenderer", "contents", 0, "continuationItemRenderer", "continuationEndpoint", "continuationCommand", "token"], default="")
|
||||||
|
reply_count = safeTraverse(reply_renderer, ["commentThreadRenderer", "replies", "commentRepliesRenderer", "viewReplies", "buttonRenderer", "text", "runs", 0, "text"], default="0 replies").split(" ")[0]
|
||||||
|
|
||||||
|
for comment in actual_comments:
|
||||||
|
found_key = safeTraverse(comment, ["entityKey"], default="unknown-key")
|
||||||
|
# try to link a relevant ctoken if a comment has response
|
||||||
|
if found_key == mutual_key:
|
||||||
|
if ythdd_globals.config["general"]["debug"]: print(f"found reply for {found_key}")
|
||||||
|
comment["replies"] = {
|
||||||
|
"replyCount": int(reply_count),
|
||||||
|
"continuation": reply_ctoken
|
||||||
|
}
|
||||||
|
|
||||||
|
return actual_comments, new_continuation
|
||||||
|
|||||||
@@ -23,7 +23,7 @@ version = "0.0.1"
|
|||||||
apiVersion = "1"
|
apiVersion = "1"
|
||||||
randomly_generated_passcode = 0
|
randomly_generated_passcode = 0
|
||||||
video_cache = {}
|
video_cache = {}
|
||||||
general_cache = {"search": [], "continuations": {"channels": {}}, "channels": {}}
|
general_cache = {"search": [], "continuations": {"channels": {}, "comments": {}}, "channels": {}}
|
||||||
|
|
||||||
def getConfig(configfile):
|
def getConfig(configfile):
|
||||||
|
|
||||||
|
|||||||
@@ -449,6 +449,12 @@ def videos(data):
|
|||||||
premium = True
|
premium = True
|
||||||
# TODO: detect paywalled patron-only videos
|
# TODO: detect paywalled patron-only videos
|
||||||
|
|
||||||
|
# because we fetched the video's wdata, we might as
|
||||||
|
# well save it inside of general cache so that
|
||||||
|
# requests for the video's comments don't have to
|
||||||
|
# spawn an additional request for initial ctoken
|
||||||
|
ensure_comment_continuation(video_id, wdata)
|
||||||
|
|
||||||
time_end = time()
|
time_end = time()
|
||||||
|
|
||||||
response = {
|
response = {
|
||||||
@@ -680,10 +686,70 @@ def get_channel_tab(requested_tab, ucid, req, only_json: bool = False):
|
|||||||
|
|
||||||
return send(200, response)
|
return send(200, response)
|
||||||
|
|
||||||
|
def get_comments(data, req, only_json: bool = False):
|
||||||
|
|
||||||
|
# get comment continuation
|
||||||
|
ctoken = req.args.get('continuation')
|
||||||
|
|
||||||
|
# perform some basic video id validation
|
||||||
|
if len(data) < 4 or len(data) >= 4 and len(data[3]) != 11:
|
||||||
|
return send(400, {"error": "Bad request: invalid videoId."})
|
||||||
|
|
||||||
|
video_id = data[3]
|
||||||
|
|
||||||
|
# if ctoken isn't provided, get it from the general cache
|
||||||
|
if ctoken is None or ctoken == '':
|
||||||
|
# but first ensure it's there
|
||||||
|
ensure_comment_continuation(video_id)
|
||||||
|
ctoken = ythdd_globals.general_cache["continuations"]["comments"][video_id][0]
|
||||||
|
|
||||||
|
# get joined video comment models
|
||||||
|
wdata, new_continuation = ythdd_extractor.WEBgetVideoComments(ctoken)
|
||||||
|
|
||||||
|
comments = []
|
||||||
|
for comment in wdata:
|
||||||
|
# parse the comment
|
||||||
|
parsed_comment = ythdd_struct_parser.customCommentRendererParser(comment)
|
||||||
|
if parsed_comment is not None:
|
||||||
|
comments.append(parsed_comment)
|
||||||
|
|
||||||
|
response = {
|
||||||
|
"videoId": video_id,
|
||||||
|
"comments": comments,
|
||||||
|
"continuation": new_continuation
|
||||||
|
}
|
||||||
|
|
||||||
|
if only_json:
|
||||||
|
return response
|
||||||
|
|
||||||
|
return send(200, response)
|
||||||
|
|
||||||
|
|
||||||
|
def ensure_comment_continuation(video_id: str, wdata = None):
|
||||||
|
|
||||||
|
# save continutation token for comments in global comment cache
|
||||||
|
if not video_id in ythdd_globals.general_cache["continuations"]["comments"]:
|
||||||
|
ythdd_globals.general_cache["continuations"]["comments"][video_id] = []
|
||||||
|
|
||||||
|
if wdata is None:
|
||||||
|
# perhaps saving related videos to cache might be not a bad idea?
|
||||||
|
wdata = ythdd_extractor.WEBextractSinglePage(video_id)
|
||||||
|
|
||||||
|
# search for "top comments" continuation token
|
||||||
|
comment_continuation = safeTraverse(wdata, ["ec2", "engagementPanels", 0, "engagementPanelSectionListRenderer", "header", "engagementPanelTitleHeaderRenderer", "menu", "sortFilterSubMenuRenderer", "subMenuItems", 0, "serviceEndpoint", "continuationCommand", "token"], default=None)
|
||||||
|
if comment_continuation is not None:
|
||||||
|
ythdd_globals.general_cache["continuations"]["comments"][video_id].append(comment_continuation)
|
||||||
|
else:
|
||||||
|
print(f"error: couldn't extract comment continuation token from video page ({video_id})")
|
||||||
|
|
||||||
def channels(data, req, only_json: bool = False):
|
def channels(data, req, only_json: bool = False):
|
||||||
|
|
||||||
if len(data[3]) != 24 or not data[3].startswith("UC"):
|
# prevent potential out of bound read
|
||||||
|
if len(data) < 4:
|
||||||
|
return send(400, {"error": "No channel specified."})
|
||||||
|
|
||||||
# silly sanity check
|
# silly sanity check
|
||||||
|
if len(data[3]) != 24 or not data[3].startswith("UC"):
|
||||||
return send(404, {"error": "This channel does not exist."})
|
return send(404, {"error": "This channel does not exist."})
|
||||||
|
|
||||||
if len(data) > 4:
|
if len(data) > 4:
|
||||||
@@ -775,6 +841,8 @@ def lookup(data, req):
|
|||||||
return search(data, req)
|
return search(data, req)
|
||||||
case 'channels':
|
case 'channels':
|
||||||
return channels(data, req)
|
return channels(data, req)
|
||||||
|
case 'comments':
|
||||||
|
return get_comments(data, req)
|
||||||
case _:
|
case _:
|
||||||
incrementBadRequests()
|
incrementBadRequests()
|
||||||
return notImplemented(data)
|
return notImplemented(data)
|
||||||
|
|||||||
@@ -304,6 +304,46 @@ def parseRenderers(entry: dict, context: dict = {}) -> dict:
|
|||||||
# breakpoint()
|
# breakpoint()
|
||||||
return
|
return
|
||||||
|
|
||||||
|
def customCommentRendererParser(comment: dict, context: dict = {}) -> dict:
|
||||||
|
|
||||||
|
cep = safeTraverse(comment, ["payload", "commentEntityPayload"], default={})
|
||||||
|
content = safeTraverse(cep, ["properties", "content", "content"], default="")
|
||||||
|
content_html = escape(content).replace("\r\n", "<br>").replace("\n", "<br>")
|
||||||
|
author = safeTraverse(cep, ["author"], default={})
|
||||||
|
verified = safeTraverse(author, ["isVerified"], default=False) or safeTraverse(author, ["isArtist"], default=False)
|
||||||
|
ucid = safeTraverse(author, ["channelId"], default="UNKNOWNCHANNELID")
|
||||||
|
published_date = safeTraverse(cep, ["properties", "publishedTime"], default="now")
|
||||||
|
edited = False
|
||||||
|
|
||||||
|
if published_date.endswith(" (edited)"):
|
||||||
|
edited = True
|
||||||
|
published_date_unix = int(dateparser.parse(published_date.removesuffix(" (edited)")).timestamp())
|
||||||
|
else:
|
||||||
|
published_date_unix = int(dateparser.parse(published_date).timestamp())
|
||||||
|
|
||||||
|
inv_comment = {
|
||||||
|
"authorId": ucid,
|
||||||
|
"authorUrl": "/channel/" + ucid,
|
||||||
|
"author": safeTraverse(author, ["displayName"], default="@ythdd-unknown-user"),
|
||||||
|
"verified": verified,
|
||||||
|
"authorThumbnails": ythdd_extractor.generateChannelAvatarsFromUrl(safeTraverse(author, ["avatarThumbnailUrl"], default=DEFAULT_AVATAR)), # proxy them!
|
||||||
|
"authorIsChannelOwner": safeTraverse(author, ["isCreator"], default=False), # ???
|
||||||
|
"isSponsor": False, # not sure how to retrieve this
|
||||||
|
"likeCount": parseViewsFromViewText("0" + safeTraverse(cep, ["toolbar", "likeCountNotliked"], default="0") + " likes"),
|
||||||
|
"isPinned": False,
|
||||||
|
"commentId": safeTraverse(cep, ["properties", "commentId"], default="UNKNOWNCOMMENTID"),
|
||||||
|
"content": content,
|
||||||
|
"contentHtml": content_html,
|
||||||
|
"isEdited": edited,
|
||||||
|
"published": published_date_unix,
|
||||||
|
"publishedText": published_date if published_date != "now" else "unknown amount of time ago"
|
||||||
|
}
|
||||||
|
|
||||||
|
if "replies" in comment:
|
||||||
|
inv_comment["replies"] = comment["replies"]
|
||||||
|
|
||||||
|
return inv_comment
|
||||||
|
|
||||||
def parseDescriptionSnippet(snippet: list):
|
def parseDescriptionSnippet(snippet: list):
|
||||||
|
|
||||||
text = ""
|
text = ""
|
||||||
|
|||||||
Reference in New Issue
Block a user