feat: playlist browsing

pagination still needs refinement for some of the clients. on another note, this is an anniversary commit, as ythdd turns 1 year today.
2025-09-25 23:30:59 +02:00
parent 6d0c70696b
commit 1c9174c888
4 changed files with 228 additions and 3 deletions
--- a/ythdd_extractor.py
+++ b/ythdd_extractor.py
@@ -517,3 +517,75 @@ def WEBgetVideoComments(ctoken: str) -> tuple:
 				}

 	return actual_comments, new_continuation
+
+def WEBextractPlaylist(plid: str = "", ctoken: str = "", prefix: str = "VL"):
+
+	additional_context = {'browseId': prefix + plid}
+	if ctoken:
+		# playlist id can be omitted if ctoken is provided
+		additional_context = {'continuation': ctoken}
+
+	context = makeWebContext(additional_context)
+
+	response = requests.post(
+		'https://www.youtube.com/youtubei/v1/browse?prettyPrint=false',
+		headers = ythdd_globals.getHeaders(),
+		json    = context
+	)
+
+	resp_json = json.loads(response.text)
+
+	# if this is a first-time fetch (no ctoken passed), extract metadata
+	metadata = None
+	if not ctoken:
+		metadata = {
+			"header": safeTraverse(resp_json, ["header"]),
+			#"microformat": safeTraverse(resp_json, ["microformat"]),
+			"sidebar": safeTraverse(resp_json, ["sidebar"])
+		}
+
+	# TODO (after python protodec implementation/wrapper is done):
+	# # try to extract ctoken containing the full playlist, including delisted/deleted videos
+	# full_playlist_ctoken = urllib.parse.quote(protodec.from_json({
+	# 	"80226972:embedded": {
+	# 		"2:string": prefix + plid,
+	# 		"3:base64": {
+	# 			"1:varint": request_count, # todo: increment by 200 with an external index
+	# 			"15:string": "PT:" + urllib.parse.quote(protodec.from_json({"1:varint": index})),
+	# 			"104:embedded": {"1:0:varint": 0}
+	# 		},
+	# 		"35:string": plid
+	# 	}
+	# }))
+	# # if ctoken creation succeeded
+	# if full_playlist_ctoken:
+	# 	# make another request
+	# 	response = requests.post(
+	# 		'https://www.youtube.com/youtubei/v1/browse?prettyPrint=false',
+	# 		headers = ythdd_globals.getHeaders(),
+	# 		json    = makeWebContext({'continuation': full_playlist_ctoken})
+	# 	)
+	# 	resp_json = json.loads(response.text)
+	# else:
+	# 	print("error(WEBextractPlaylist): full playlist metadata extraction failed. Delisted/deleted videos will be missing.")
+
+	# extract continuation
+	new_continuation = None
+	if ctoken:
+		# subsequent request
+		new_continuation = safeTraverse(resp_json, ["onResponseReceivedActions", 0, "appendContinuationItemsAction", "continuationItems", -1, "continuationItemRenderer", "continuationEndpoint", "continuationCommand", "token"])
+	else:
+		# first-time request
+		new_continuation = safeTraverse(resp_json, ["contents", "twoColumnBrowseResultsRenderer", "tabs", 0, "tabRenderer", "content", "sectionListRenderer", "contents", 0, "itemSectionRenderer", "contents", 0, "playlistVideoListRenderer", "contents", -1, "continuationItemRenderer", "continuationEndpoint", "commandExecutorCommand", "commands", -1, "continuationCommand", "token"])
+
+	# "best-effort" playlist's videos extraction
+	# "best-effort" because None's (unsuccessful video extraction = None) are passed as they are
+	# warning! todo: iterate over this, as shorts cannot be currently extracted (they use richGridRenderer, not playlistVideoListRenderer)
+	videos = None
+	if ctoken: # or full_playlist_ctoken:
+		videos = safeTraverse(resp_json, ["onResponseReceivedActions", 0, "appendContinuationItemsAction", "continuationItems"]) # includes continuation as last element of list, which will be ignored
+	else:
+		videos = safeTraverse(resp_json, ["contents", "twoColumnBrowseResultsRenderer", "tabs", 0, "tabRenderer", "content", "sectionListRenderer", "contents", 0, "itemSectionRenderer", "contents", 0, "playlistVideoListRenderer", "contents"])
+
+	return metadata, new_continuation, videos
+