update extractor headers, add support for checking badges and avatars

2025-02-28 00:57:40 +01:00
parent d1b9f90e7e
commit dbc90d3f74
1 changed files with 95 additions and 5 deletions
--- a/ythdd_extractor.py
+++ b/ythdd_extractor.py
@@ -61,7 +61,7 @@ stage1_body = {

 stage2_headers = {
 	"Connection": "keep-alive",
-	"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36",
+	"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:135.0) Gecko/20100101 Firefox/135.0",
 	"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
 	"Accept-Language": "en-us,en;q=0.5",
 	"Sec-Fetch-Mode": "navigate",
@@ -70,13 +70,13 @@ stage2_headers = {

 stage3_headers = {
 	"Connection": "keep-alive",
-	"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36",
+	"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:135.0) Gecko/20100101 Firefox/135.0",
 	"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
 	"Accept-Language": "en-us,en;q=0.5",
 	"Sec-Fetch-Mode": "navigate",
 	"Content-Type": "application/json",
 	"X-Youtube-Client-Name": "1",
-	"X-Youtube-Client-Version": "2.20241126.01.00",
+	"X-Youtube-Client-Version": "2.20250226.01.00",
 	"Origin": "https://www.youtube.com",
 	"Accept-Encoding": "gzip, deflate, br",
 	"Cookie": "PREF=hl=en&tz=UTC; SOCS=CAI"
@@ -88,7 +88,7 @@ stage3_body = {
 		"client":
 		{
 			"clientName": "WEB",
-			"clientVersion": "2.20241126.01.00",
+			"clientVersion": "2.20250226.01.00",
 			"hl": "en",
 			"timeZone": "UTC",
 			"utcOffsetMinutes": 0
@@ -99,6 +99,30 @@ stage3_body = {
 	"racyCheckOk": True
 }

+web_context_dict = {
+    'context': {
+        'client': {
+            'hl': 'en',
+            'gl': 'US',
+            'deviceMake': '',
+            'deviceModel': '',
+            'userAgent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:135.0) Gecko/20100101 Firefox/135.0,gzip(gfe)',
+            'clientName': 'WEB',
+            'clientVersion': '2.20250226.01.00',
+            'osName': 'Windows',
+            'osVersion': '10.0',
+            'screenPixelDensity': 2,
+            'platform': 'DESKTOP',
+            'screenDensityFloat': 2,
+            'userInterfaceTheme': 'USER_INTERFACE_THEME_LIGHT',
+            'browserName': 'Firefox',
+            'browserVersion': '135.0',
+            'acceptHeader': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+            'utcOffsetMinutes': 0,
+        }
+    }
+}
+
 def extract(url: str, getcomments=False, maxcomments=""):
 	# TODO: check user-agent and cookiefile

@@ -180,4 +204,70 @@ def IOSextract(uri: str):
 	end = time.time()

 	#return {'stage1': stage1, 'stage2': stage2, 'stage3': stage3, 'took': end - start}
-	return {'stage1': stage1, 'stage3': stage3, 'took': end - start}
+	return {'stage1': stage1, 'stage3': stage3, 'took': end - start}
+
+def makeWebContext(secondaryContextDict: dict):
+	# Uses web_context_dict to create a context, returns a dict.
+
+	current_web_context_dict = web_context_dict
+
+	for key in secondaryContextDict:
+		current_web_context_dict[key] = secondaryContextDict[key]
+
+	return current_web_context_dict
+
+def getChannelAvatar(response_json: dict):
+	# Returns a dictionary: {url: <proxied url to remote server>, width: ..., height: ...}
+	# containing the best resolution in terms of pixel count.
+	# A great majority of the code has been influenced by https://github.com/iv-org/invidious/blob/master/src/invidious/channels/about.cr.
+
+	avatars = safeTraverse(response_json, ['metadata', 'channelMetadataRenderer', 'avatar', 'thumbnails'], default=None)
+
+	if avatars is None:
+		# fallback to lower resolution avatars
+		avatars = safeTraverse(response_json, ['header',
+			'pageHeaderRenderer',
+			'content',
+			'pageHeaderViewModel',
+			'image',
+			'decoratedAvatarViewModel',
+			'avatar',
+			'avatarViewModel',
+			'image',
+			'sources'], default=None)
+
+	# if avatars is None: # TODO: if avatars is still None, use a local avatar
+
+	best_avatar = avatars[-1] # usually, the best avatar is stored last
+	for avatar in avatars:
+		if avatar['width'] * avatar['height'] > best_avatar['width'] * best_avatar['height']:
+			best_avatar = avatar
+
+	best_avatar['url'] = ythdd_globals.translateLinks(best_avatar['url'])
+
+	return best_avatar
+
+def isVerified(response_json: dict):
+	# Returns True if any user badge has been found (verified/artist).
+	badges = safeTraverse(response_json, [], default=False)
+
+	if badges: return True
+	return False
+
+def browseAbout(ucid: str):
+	# Returns the response from innertubes browse endpoint for channels (as a dict).
+
+	if len(ucid) != 24:
+		raise ValueError(f"Something is wrong with the UCID {ucid}. Expected a 24-character long channel ID, not {len(ucid)}.")
+
+	context = makeWebContext({'browseId': ucid})
+
+	response = requests.post(
+		'https://www.youtube.com/youtubei/v1/browse?prettyPrint=false',
+		headers = ythdd_globals.getHeaders(),
+		json    = context,
+	)
+
+	response_json = json.loads(response.text)
+
+	return response_json