diff --git a/ythdd_extractor.py b/ythdd_extractor.py index d59e6c7..2508a42 100644 --- a/ythdd_extractor.py +++ b/ythdd_extractor.py @@ -1,6 +1,7 @@ #!/usr/bin/python3 import brotli, yt_dlp, requests, json, time from ythdd_globals import safeTraverse +import ythdd_proto import ythdd_globals ytdl_opts = { @@ -518,12 +519,15 @@ def WEBgetVideoComments(ctoken: str) -> tuple: return actual_comments, new_continuation -def WEBextractPlaylist(plid: str = "", ctoken: str = "", prefix: str = "VL"): +def WEBextractPlaylist(plid: str = "", ctoken: str = ""): - additional_context = {'browseId': prefix + plid} + # if ctoken has been provided, use it if ctoken: # playlist id can be omitted if ctoken is provided additional_context = {'continuation': ctoken} + else: + # try to create ctoken which will allow for accessing the full playlist, including delisted/deleted videos + additional_context = {'continuation': ythdd_proto.producePlaylistContinuation(plid, offset=0)} context = makeWebContext(additional_context) @@ -540,52 +544,15 @@ def WEBextractPlaylist(plid: str = "", ctoken: str = "", prefix: str = "VL"): if not ctoken: metadata = { "header": safeTraverse(resp_json, ["header"]), - #"microformat": safeTraverse(resp_json, ["microformat"]), "sidebar": safeTraverse(resp_json, ["sidebar"]) } - # TODO (after python protodec implementation/wrapper is done): - # # try to extract ctoken containing the full playlist, including delisted/deleted videos - # full_playlist_ctoken = urllib.parse.quote(protodec.from_json({ - # "80226972:embedded": { - # "2:string": prefix + plid, - # "3:base64": { - # "1:varint": request_count, # todo: increment by 200 with an external index - # "15:string": "PT:" + urllib.parse.quote(protodec.from_json({"1:varint": index})), - # "104:embedded": {"1:0:varint": 0} - # }, - # "35:string": plid - # } - # })) - # # if ctoken creation succeeded - # if full_playlist_ctoken: - # # make another request - # response = requests.post( - # 'https://www.youtube.com/youtubei/v1/browse?prettyPrint=false', - # headers = ythdd_globals.getHeaders(), - # json = makeWebContext({'continuation': full_playlist_ctoken}) - # ) - # resp_json = json.loads(response.text) - # else: - # print("error(WEBextractPlaylist): full playlist metadata extraction failed. Delisted/deleted videos will be missing.") - # extract continuation - new_continuation = None - if ctoken: - # subsequent request - new_continuation = safeTraverse(resp_json, ["onResponseReceivedActions", 0, "appendContinuationItemsAction", "continuationItems", -1, "continuationItemRenderer", "continuationEndpoint", "continuationCommand", "token"]) - else: - # first-time request - new_continuation = safeTraverse(resp_json, ["contents", "twoColumnBrowseResultsRenderer", "tabs", 0, "tabRenderer", "content", "sectionListRenderer", "contents", 0, "itemSectionRenderer", "contents", 0, "playlistVideoListRenderer", "contents", -1, "continuationItemRenderer", "continuationEndpoint", "commandExecutorCommand", "commands", -1, "continuationCommand", "token"]) + new_continuation = safeTraverse(resp_json, ["onResponseReceivedActions", 0, "appendContinuationItemsAction", "continuationItems", -1, "continuationItemRenderer", "continuationEndpoint", "continuationCommand", "token"]) # "best-effort" playlist's videos extraction # "best-effort" because None's (unsuccessful video extraction = None) are passed as they are - # warning! todo: iterate over this, as shorts cannot be currently extracted (they use richGridRenderer, not playlistVideoListRenderer) - videos = None - if ctoken: # or full_playlist_ctoken: - videos = safeTraverse(resp_json, ["onResponseReceivedActions", 0, "appendContinuationItemsAction", "continuationItems"]) # includes continuation as last element of list, which will be ignored - else: - videos = safeTraverse(resp_json, ["contents", "twoColumnBrowseResultsRenderer", "tabs", 0, "tabRenderer", "content", "sectionListRenderer", "contents", 0, "itemSectionRenderer", "contents", 0, "playlistVideoListRenderer", "contents"]) + videos = safeTraverse(resp_json, ["onResponseReceivedActions", 0, "appendContinuationItemsAction", "continuationItems"]) # includes continuation as last element of list, which will be ignored return metadata, new_continuation, videos diff --git a/ythdd_struct_parser.py b/ythdd_struct_parser.py index 015f730..f8f0a16 100644 --- a/ythdd_struct_parser.py +++ b/ythdd_struct_parser.py @@ -313,6 +313,7 @@ def parseRenderers(entry: dict, context: dict = {}) -> dict: } case "playlistVideoRenderer": + # used by all content inside of playlists which have at least one non-shorts video/livestream video_id = safeTraverse(entry, ["playlistVideoRenderer", "videoId"], default="UnknownVideoId") title = safeTraverse(entry, ["playlistVideoRenderer", "title", "runs", 0, "text"], default="Unknown video title") @@ -321,7 +322,11 @@ def parseRenderers(entry: dict, context: dict = {}) -> dict: video_index = int(safeTraverse(entry, ["playlistVideoRenderer", "index", "simpleText"], default="1")) - 1 length = parseLengthFromTimeBadge(safeTraverse(entry, ["playlistVideoRenderer", "lengthText", "simpleText"], default="0:0")) published_date = safeTraverse(entry, ["playlistVideoRenderer", "videoInfo", "runs", -1, "text"], default="2000-01-01") - published_date = published_date.removeprefix("Streamed ") + published_date = published_date.removeprefix("Streamed ").removeprefix(" watching") + + # handle livestreams + if not published_date: + published_date = "now" return { "type": "video",