fix: fix shorts-only playlists by using on-demand ctoken generator

this will allow for retrieving playlist videos as playlistVideoRenderer
which we already have a parser for. another benefit is being able to
list videos in a playlist which have been deleted/made private.
also fixes livestream parsing
This commit is contained in:
2025-09-26 22:49:16 +02:00
parent 30850a7ce0
commit b98aa718b0
2 changed files with 14 additions and 42 deletions

View File

@@ -1,6 +1,7 @@
#!/usr/bin/python3
import brotli, yt_dlp, requests, json, time
from ythdd_globals import safeTraverse
import ythdd_proto
import ythdd_globals
ytdl_opts = {
@@ -518,12 +519,15 @@ def WEBgetVideoComments(ctoken: str) -> tuple:
return actual_comments, new_continuation
def WEBextractPlaylist(plid: str = "", ctoken: str = "", prefix: str = "VL"):
def WEBextractPlaylist(plid: str = "", ctoken: str = ""):
additional_context = {'browseId': prefix + plid}
# if ctoken has been provided, use it
if ctoken:
# playlist id can be omitted if ctoken is provided
additional_context = {'continuation': ctoken}
else:
# try to create ctoken which will allow for accessing the full playlist, including delisted/deleted videos
additional_context = {'continuation': ythdd_proto.producePlaylistContinuation(plid, offset=0)}
context = makeWebContext(additional_context)
@@ -540,52 +544,15 @@ def WEBextractPlaylist(plid: str = "", ctoken: str = "", prefix: str = "VL"):
if not ctoken:
metadata = {
"header": safeTraverse(resp_json, ["header"]),
#"microformat": safeTraverse(resp_json, ["microformat"]),
"sidebar": safeTraverse(resp_json, ["sidebar"])
}
# TODO (after python protodec implementation/wrapper is done):
# # try to extract ctoken containing the full playlist, including delisted/deleted videos
# full_playlist_ctoken = urllib.parse.quote(protodec.from_json({
# "80226972:embedded": {
# "2:string": prefix + plid,
# "3:base64": {
# "1:varint": request_count, # todo: increment by 200 with an external index
# "15:string": "PT:" + urllib.parse.quote(protodec.from_json({"1:varint": index})),
# "104:embedded": {"1:0:varint": 0}
# },
# "35:string": plid
# }
# }))
# # if ctoken creation succeeded
# if full_playlist_ctoken:
# # make another request
# response = requests.post(
# 'https://www.youtube.com/youtubei/v1/browse?prettyPrint=false',
# headers = ythdd_globals.getHeaders(),
# json = makeWebContext({'continuation': full_playlist_ctoken})
# )
# resp_json = json.loads(response.text)
# else:
# print("error(WEBextractPlaylist): full playlist metadata extraction failed. Delisted/deleted videos will be missing.")
# extract continuation
new_continuation = None
if ctoken:
# subsequent request
new_continuation = safeTraverse(resp_json, ["onResponseReceivedActions", 0, "appendContinuationItemsAction", "continuationItems", -1, "continuationItemRenderer", "continuationEndpoint", "continuationCommand", "token"])
else:
# first-time request
new_continuation = safeTraverse(resp_json, ["contents", "twoColumnBrowseResultsRenderer", "tabs", 0, "tabRenderer", "content", "sectionListRenderer", "contents", 0, "itemSectionRenderer", "contents", 0, "playlistVideoListRenderer", "contents", -1, "continuationItemRenderer", "continuationEndpoint", "commandExecutorCommand", "commands", -1, "continuationCommand", "token"])
# "best-effort" playlist's videos extraction
# "best-effort" because None's (unsuccessful video extraction = None) are passed as they are
# warning! todo: iterate over this, as shorts cannot be currently extracted (they use richGridRenderer, not playlistVideoListRenderer)
videos = None
if ctoken: # or full_playlist_ctoken:
videos = safeTraverse(resp_json, ["onResponseReceivedActions", 0, "appendContinuationItemsAction", "continuationItems"]) # includes continuation as last element of list, which will be ignored
else:
videos = safeTraverse(resp_json, ["contents", "twoColumnBrowseResultsRenderer", "tabs", 0, "tabRenderer", "content", "sectionListRenderer", "contents", 0, "itemSectionRenderer", "contents", 0, "playlistVideoListRenderer", "contents"])
return metadata, new_continuation, videos

View File

@@ -313,6 +313,7 @@ def parseRenderers(entry: dict, context: dict = {}) -> dict:
}
case "playlistVideoRenderer":
# used by all content inside of playlists which have at least one non-shorts video/livestream
video_id = safeTraverse(entry, ["playlistVideoRenderer", "videoId"], default="UnknownVideoId")
title = safeTraverse(entry, ["playlistVideoRenderer", "title", "runs", 0, "text"], default="Unknown video title")
@@ -321,7 +322,11 @@ def parseRenderers(entry: dict, context: dict = {}) -> dict:
video_index = int(safeTraverse(entry, ["playlistVideoRenderer", "index", "simpleText"], default="1")) - 1
length = parseLengthFromTimeBadge(safeTraverse(entry, ["playlistVideoRenderer", "lengthText", "simpleText"], default="0:0"))
published_date = safeTraverse(entry, ["playlistVideoRenderer", "videoInfo", "runs", -1, "text"], default="2000-01-01")
published_date = published_date.removeprefix("Streamed ")
published_date = published_date.removeprefix("Streamed ").removeprefix(" watching")
# handle livestreams
if not published_date:
published_date = "now"
return {
"type": "video",