feat: playlist browsing
pagination still needs refinement for some of the clients. on another note, this is an anniversary commit, as ythdd turns 1 year today.
This commit is contained in:
@@ -517,3 +517,75 @@ def WEBgetVideoComments(ctoken: str) -> tuple:
|
||||
}
|
||||
|
||||
return actual_comments, new_continuation
|
||||
|
||||
def WEBextractPlaylist(plid: str = "", ctoken: str = "", prefix: str = "VL"):
|
||||
|
||||
additional_context = {'browseId': prefix + plid}
|
||||
if ctoken:
|
||||
# playlist id can be omitted if ctoken is provided
|
||||
additional_context = {'continuation': ctoken}
|
||||
|
||||
context = makeWebContext(additional_context)
|
||||
|
||||
response = requests.post(
|
||||
'https://www.youtube.com/youtubei/v1/browse?prettyPrint=false',
|
||||
headers = ythdd_globals.getHeaders(),
|
||||
json = context
|
||||
)
|
||||
|
||||
resp_json = json.loads(response.text)
|
||||
|
||||
# if this is a first-time fetch (no ctoken passed), extract metadata
|
||||
metadata = None
|
||||
if not ctoken:
|
||||
metadata = {
|
||||
"header": safeTraverse(resp_json, ["header"]),
|
||||
#"microformat": safeTraverse(resp_json, ["microformat"]),
|
||||
"sidebar": safeTraverse(resp_json, ["sidebar"])
|
||||
}
|
||||
|
||||
# TODO (after python protodec implementation/wrapper is done):
|
||||
# # try to extract ctoken containing the full playlist, including delisted/deleted videos
|
||||
# full_playlist_ctoken = urllib.parse.quote(protodec.from_json({
|
||||
# "80226972:embedded": {
|
||||
# "2:string": prefix + plid,
|
||||
# "3:base64": {
|
||||
# "1:varint": request_count, # todo: increment by 200 with an external index
|
||||
# "15:string": "PT:" + urllib.parse.quote(protodec.from_json({"1:varint": index})),
|
||||
# "104:embedded": {"1:0:varint": 0}
|
||||
# },
|
||||
# "35:string": plid
|
||||
# }
|
||||
# }))
|
||||
# # if ctoken creation succeeded
|
||||
# if full_playlist_ctoken:
|
||||
# # make another request
|
||||
# response = requests.post(
|
||||
# 'https://www.youtube.com/youtubei/v1/browse?prettyPrint=false',
|
||||
# headers = ythdd_globals.getHeaders(),
|
||||
# json = makeWebContext({'continuation': full_playlist_ctoken})
|
||||
# )
|
||||
# resp_json = json.loads(response.text)
|
||||
# else:
|
||||
# print("error(WEBextractPlaylist): full playlist metadata extraction failed. Delisted/deleted videos will be missing.")
|
||||
|
||||
# extract continuation
|
||||
new_continuation = None
|
||||
if ctoken:
|
||||
# subsequent request
|
||||
new_continuation = safeTraverse(resp_json, ["onResponseReceivedActions", 0, "appendContinuationItemsAction", "continuationItems", -1, "continuationItemRenderer", "continuationEndpoint", "continuationCommand", "token"])
|
||||
else:
|
||||
# first-time request
|
||||
new_continuation = safeTraverse(resp_json, ["contents", "twoColumnBrowseResultsRenderer", "tabs", 0, "tabRenderer", "content", "sectionListRenderer", "contents", 0, "itemSectionRenderer", "contents", 0, "playlistVideoListRenderer", "contents", -1, "continuationItemRenderer", "continuationEndpoint", "commandExecutorCommand", "commands", -1, "continuationCommand", "token"])
|
||||
|
||||
# "best-effort" playlist's videos extraction
|
||||
# "best-effort" because None's (unsuccessful video extraction = None) are passed as they are
|
||||
# warning! todo: iterate over this, as shorts cannot be currently extracted (they use richGridRenderer, not playlistVideoListRenderer)
|
||||
videos = None
|
||||
if ctoken: # or full_playlist_ctoken:
|
||||
videos = safeTraverse(resp_json, ["onResponseReceivedActions", 0, "appendContinuationItemsAction", "continuationItems"]) # includes continuation as last element of list, which will be ignored
|
||||
else:
|
||||
videos = safeTraverse(resp_json, ["contents", "twoColumnBrowseResultsRenderer", "tabs", 0, "tabRenderer", "content", "sectionListRenderer", "contents", 0, "itemSectionRenderer", "contents", 0, "playlistVideoListRenderer", "contents"])
|
||||
|
||||
return metadata, new_continuation, videos
|
||||
|
||||
|
||||
Reference in New Issue
Block a user