fix: adjust initialData extraction for new a/b change

chore: remove unnecessary string from age-restricted videos' description
fix: play other formats than 16x9 on yattee
2025-12-29 23:32:06 +01:00 · 2025-11-22 21:42:47 +01:00 · 2025-11-21 22:38:43 +01:00 · 2025-11-21 10:15:37 +01:00 · 2025-11-21 09:59:22 +01:00 · 2025-11-21 09:44:30 +01:00
6 changed files with 139 additions and 43 deletions
--- a/config.default.toml
+++ b/config.default.toml
@@ -9,10 +9,13 @@ cache = true                                       # Whether to cache requests f
 [api]
 api_key = ""                 # Leave empty API key for public access to non-sensitive backend
 api_key_admin = "CHANGEME"   # Empty *admin* API key will autogenerate a random one every launch.
+enable_debugger_halt = false # Whether to allow to trigger pdb using admin's API key.

 [extractor]
 user-agent = ""                  # Leave empty for default (Firefox ESR).
 cookies_path = ""                # Leave empty for none.
+age_restricted_cookies_path = "" # Cookies to use when bypassing age-gated videos only. Leave empty to disable.
+deno_path = ""                   # Required when using cookies.
 preferred_extractor = ""         # Leave empty for default (android_vr).

 [proxy]
--- a/ythdd_api_v1.py
+++ b/ythdd_api_v1.py
@@ -8,11 +8,11 @@ import ythdd_globals, ythdd_extractor
 #from flask_sqlalchemy import SQLAlchemy
 #import ythdd_api_v1_stats, ythdd_api_v1_user, ythdd_api_v1_info, ythdd_api_v1_query, ythdd_api_v1_meta, ythdd_api_v1_admin

-def requireAuthentication(func):
-	@wraps(func)
+def requireAuthentication(admin: bool = True):
+	def functionWrapper(func):
 		def wrapper(*args, **kwargs):
 			token = kwargs["r"].args.get('token')
-		if token == lewy_globals.config['api']['api_key']:
+			if token == ythdd_globals.config['api']['api_key' + admin * '_admin']:
 				try:
 					status, received, data = func(*args, **kwargs)
 					return status, received, data
@@ -21,6 +21,7 @@ def requireAuthentication(func):
 			else:
 				return 401, "error", {'error_msg': "Unauthorized"}
 		return wrapper
+	return functionWrapper

 def incrementBadRequests():
 	ythdd_globals.apiFailedRequests += 1
@@ -143,6 +144,13 @@ def hot(data):
 			incrementBadRequests()
 			return notImplemented([data[1]]) # workaround before notImplemented is reworked

+@requireAuthentication(admin=True)
+def debugger_halt(r):
+	if not ythdd_globals.config["api"]["enable_debugger_halt"]:
+		return 403, "Administrator has disabled access for this endpoint.", []
+	breakpoint()
+	return 200, "Pdb triggered and ended successfully.", []
+
 def lookup(data, request):
 	match data[0]:
 		case 'stats':
@@ -163,6 +171,8 @@ def lookup(data, request):
 		case 'admin':
 			# REQUIRE CREDENTIALS!
 			return stub_hello()
+		case 'halt':
+			return debugger_halt(r=request)
 		case _:
 			incrementBadRequests()
 			return notImplemented(data)
--- a/ythdd_extractor.py
+++ b/ythdd_extractor.py
@@ -1,5 +1,6 @@
 #!/usr/bin/python3
 import brotli, yt_dlp, requests, json, time
+from http.cookiejar import MozillaCookieJar
 from ythdd_globals import safeTraverse
 import ythdd_proto
 import ythdd_globals
@@ -19,7 +20,11 @@ ytdl_opts = {
 				# "formats": ["dashy"]
 			}
 	},
-	"simulate": True
+	"simulate": True,
+	"js_runtimes": {
+		"deno": {}
+	},
+	'remote_components': ['ejs:github']
 }

 stage1_headers = {
@@ -129,7 +134,7 @@ web_context_dict = {
    }
 }

-def extract(url: str, getcomments=False, maxcomments="", manifest_fix=False):
+def extract(url: str, getcomments=False, maxcomments="", manifest_fix=False, use_cookies=None):
 	# TODO: check user-agent and cookiefile

 	ytdl_context = ytdl_opts.copy()
@@ -137,9 +142,6 @@ def extract(url: str, getcomments=False, maxcomments="", manifest_fix=False):
 	if ythdd_globals.config['extractor']['user-agent']:
 		yt_dlp.utils.std_headers['User-Agent'] = ythdd_globals.config['extractor']['user-agent']

-	if ythdd_globals.config['extractor']['cookies_path']:
-		ytdl_context['cookiefile'] = ythdd_globals.config['extractor']['cookies_path']
-
 	if len(url) == 11:
 		url = "https://www.youtube.com/watch?v=" + url
 	if getcomments:
@@ -153,7 +155,27 @@ def extract(url: str, getcomments=False, maxcomments="", manifest_fix=False):
 		ytdl_context['extractor_args']['youtube']['player_client'] = [ythdd_globals.config['extractor']['preferred_extractor']]
 	else:
 		ytdl_context['extractor_args']['youtube']['player_client'] = ['android_vr']
-	with yt_dlp.YoutubeDL(ytdl_opts) as ytdl:
+
+	if use_cookies is not None:
+		# can be either "global", "agegated" or None
+		deno_path = ythdd_globals.config['extractor']['deno_path']
+		match use_cookies:
+			case "global":
+				ytdl_context['cookiefile'] = ythdd_globals.config['extractor']['cookies_path']
+				ytdl_context['extractor_args']['youtube']['player_client'] = ['tv']
+				if not deno_path:
+					print("FATAL ERROR: deno path is required for playback using cookies!")
+				ytdl_context['js_runtimes']['deno']['path'] = deno_path if deno_path else ""
+			case "agegated":
+				ytdl_context['cookiefile'] = ythdd_globals.config['extractor']['age_restricted_cookies_path']
+				ytdl_context['extractor_args']['youtube']['player_client'] = ['tv']
+				if not deno_path:
+					print("FATAL ERROR: deno path is required for playback of age-restricted content!")
+				ytdl_context['js_runtimes']['deno']['path'] = deno_path if deno_path else ""
+			case None | _:
+				pass
+
+	with yt_dlp.YoutubeDL(ytdl_context) as ytdl:
 		result = ytdl.sanitize_info(ytdl.extract_info(url, download=False))
 	return result

@@ -177,7 +199,7 @@ def WEBrelated(url: str):

 	return extracted_json["contents"]['twoColumnWatchNextResults']["secondaryResults"]

-def WEBextractSinglePage(uri: str):
+def WEBextractSinglePage(uri: str, use_cookies=None):
 	# WARNING! HIGHLY EXPERIMENTAL, DUE TO BREAK ANYTIME
 	
 	start_time = time.time()
@@ -185,11 +207,25 @@ def WEBextractSinglePage(uri: str):
 	if len(uri) != 11:
 		raise ValueError("WEBextractSinglePage expects a single, 11-character long argument")

-	response = requests.get("https://www.youtube.com/watch?v=" + uri, headers=ythdd_globals.getHeaders(caller='extractor'))
+	cookies = None
+	if use_cookies is not None:
+		match use_cookies:
+			case "global":
+				ythdd_globals.print_debug("wdata: using global cookies")
+				cookies = MozillaCookieJar(ythdd_globals.config["extractor"]["cookies_path"])
+				cookies.load()
+			case "agegated":
+				ythdd_globals.print_debug("wdata: using agegated cookies")
+				cookies = MozillaCookieJar(ythdd_globals.config["extractor"]["age_restricted_cookies_path"])
+				cookies.load()
+			case None | _:
+				pass
+
+	response = requests.get("https://www.youtube.com/watch?v=" + uri, headers=ythdd_globals.getHeaders(caller='extractor'), cookies=cookies)
 	extracted_string = str(response.content.decode('utf8', 'unicode_escape'))
-	start = extracted_string.find('{"responseContext":{"serviceTrackingParams":')
+	start = extracted_string.find('{"responseContext":')
 	end = extracted_string.find(';var ', start)
-	start2 = extracted_string.find('{"responseContext":{"serviceTrackingParams":', start + 1)
+	start2 = extracted_string.find('{"responseContext":', start + 1)
 	end2 = extracted_string.find(';</script>', start2)
 	extracted_json1 = json.loads(extracted_string[start:end])
 	extracted_json2 = json.loads(extracted_string[start2:end2])
--- a/ythdd_globals.py
+++ b/ythdd_globals.py
@@ -32,7 +32,7 @@ def getConfig(configfile):
 	global randomly_generated_passcode

 	if not os.path.exists(configfile):
-		dummy_config = {'general': {'db_file_path': 'ythdd_db.sqlite', 'video_storage_directory_path': 'videos/', 'is_proxied': False, 'public_facing_url': 'http://127.0.0.1:5000/', 'debug': False, 'cache': True}, 'api': {'api_key': 'CHANGEME'}, 'proxy': {'user-agent': '', 'allow_proxying_videos': True, 'match_initcwndbps': True}, 'extractor': {'user-agent': '', 'cookies_path': ''}, 'admin': {'admins': ['admin']}, 'yt_dlp': {}, 'postprocessing': {'presets': [{'name': 'recommended: [N][<=720p] best V+A', 'format': 'bv[height<=720]+ba', 'reencode': ''}, {'name': '[N][1080p] best V+A', 'format': 'bv[height=1080]+ba', 'reencode': ''}, {'name': '[R][1080p] webm', 'format': 'bv[height=1080]+ba', 'reencode': 'webm'}, {'name': '[N][720p] best V+A', 'format': 'bv[height=720]+ba', 'reencode': ''}, {'name': '[R][720p] webm', 'format': 'bv[height=720]+ba', 'reencode': 'webm'}, {'name': '[N][480p] best V+A', 'format': 'bv[height=480]+ba', 'reencode': ''}, {'name': '[480p] VP9 webm/reencode', 'format': 'bv*[height=480][ext=webm]+ba/bv[height=480]+ba', 'reencode': 'webm'}, {'name': '[N][1080p] best video only', 'format': 'bv[height=1080]', 'reencode': ''}, {'name': '[N][opus] best audio only', 'format': 'ba', 'reencode': 'opus'}]}}
+		dummy_config = {'general': {'db_file_path': 'ythdd_db.sqlite', 'video_storage_directory_path': 'videos/', 'is_proxied': False, 'public_facing_url': 'http://127.0.0.1:5000/', 'debug': False, 'cache': True}, 'api': {'api_key': 'CHANGEME', 'enable_debugger_halt': False}, 'proxy': {'user-agent': '', 'allow_proxying_videos': True, 'match_initcwndbps': True}, 'extractor': {'user-agent': '', 'cookies_path': ''}, 'admin': {'admins': ['admin']}, 'yt_dlp': {}, 'postprocessing': {'presets': [{'name': 'recommended: [N][<=720p] best V+A', 'format': 'bv[height<=720]+ba', 'reencode': ''}, {'name': '[N][1080p] best V+A', 'format': 'bv[height=1080]+ba', 'reencode': ''}, {'name': '[R][1080p] webm', 'format': 'bv[height=1080]+ba', 'reencode': 'webm'}, {'name': '[N][720p] best V+A', 'format': 'bv[height=720]+ba', 'reencode': ''}, {'name': '[R][720p] webm', 'format': 'bv[height=720]+ba', 'reencode': 'webm'}, {'name': '[N][480p] best V+A', 'format': 'bv[height=480]+ba', 'reencode': ''}, {'name': '[480p] VP9 webm/reencode', 'format': 'bv*[height=480][ext=webm]+ba/bv[height=480]+ba', 'reencode': 'webm'}, {'name': '[N][1080p] best video only', 'format': 'bv[height=1080]', 'reencode': ''}, {'name': '[N][opus] best audio only', 'format': 'ba', 'reencode': 'opus'}]}}
 		# if a passcode has not been provided by the user (config file doesn't exist, and user didn't specify it using an argument)
 		print(f"{colors.WARNING}WARNING{colors.ENDC}: Using default, baked in config data. {colors.ENDL}"
 			  f"         Consider copying and editing the provided example file ({colors.OKCYAN}config.default.toml{colors.ENDC}).")
--- a/ythdd_inv_tl.py
+++ b/ythdd_inv_tl.py
@@ -163,10 +163,23 @@ def videos(data):

 	wdata = ythdd_extractor.WEBextractSinglePage(data[3])

+	age_restricted = False
 	error = getError(wdata)
 	if error is not None:
+		if error.startswith("(LOGIN_REQUIRED)") and "inappropriate for some users" in error:
+			# check if user provided age-gated cookies
+			if ythdd_globals.config["extractor"]["age_restricted_cookies_path"]:
+				ythdd_globals.print_debug(f"videos({data[3]}): using agegated cookies to bypass restriction")
+				ydata = ythdd_extractor.extract(data[3], use_cookies="agegated")
+				wdata = ythdd_extractor.WEBextractSinglePage(data[3], use_cookies="agegated")
+				age_restricted = True
+			else:
+				# return error if no age-gated cookies are provided
 				return send(500, {"status": "error", "error": error})
-
+		else:
+			# return error if it doesn't mention age restriction
+			return send(500, {"status": "error", "error": error})
+	else:
 		ydata = ythdd_extractor.extract(data[3])

 	#return send(200, {'ydata': ydata, 'wdata': wdata})
@@ -212,8 +225,7 @@ def videos(data):
 				y = safeTraverse(entry, ['lockupViewModel'])
 				if not isinstance(y, dict):
 					continue
-				is_mix_or_playlist = safeTraverse(entry, ["lockupViewModel", "contentImage", "collectionThumbnailViewModel", "primaryThumbnail", "thumbnailViewModel", "overlays", 0, "thumbnailOverlayBadgeViewModel", "thumbnailBadges", 0, "thumbnailBadgeViewModel", "icon", "sources", 0, "clientResource", "imageName"], default="") in ("MIX", "PLAYLISTS")
-				if is_mix_or_playlist:
+				if safeTraverse(y, ["contentType"], default="LOCKUP_CONTENT_TYPE_VIDEO") != "LOCKUP_CONTENT_TYPE_VIDEO":
 					# neither mixes nor playlists are currently supported by the invidious api
 					continue
 				# note: this model is similar, but not identical to the one in ythdd_struct_parser. perhaps they can be both handled in the struct parser some time.
@@ -278,16 +290,21 @@ def videos(data):
 				continue
 			if video_stream["format_id"] == "18": # todo: do this dynamically
 				initial_fstreams_y[int(video_stream["format_id"])] = video_stream
-			else:
+			elif video_stream["format_id"].isdigit():
+				# filter out DRC audio
 				initial_astreams_y[int(video_stream["format_id"])] = video_stream
+			else:
+				continue

 		# format streams
 		for video_stream in wdata_streams["formats"]:
 			initial_fstreams_w[video_stream["itag"]] = video_stream

 		# adaptive streams
-		for video_stream in wdata_streams["adaptiveFormats"]:
-			initial_astreams_w[video_stream["itag"]] = video_stream
+		for audiovideo_stream in wdata_streams["adaptiveFormats"]:
+			if not "isVb" in audiovideo_stream and not "isDrc" in audiovideo_stream:
+				# skip DRC and VB formats
+				initial_astreams_w[audiovideo_stream["itag"]] = audiovideo_stream

 		for itag in initial_astreams_y:
 			if itag in initial_astreams_w:
@@ -302,6 +319,10 @@ def videos(data):
 		adaptive_formats, format_streams = [{"url": f"http://a/?expire={int(time_start + 5.9 * 60 * 60)}", "itag": "18", "type": "", "clen": "0", "lmt": "", "projectionType": "RECTANGULAR"}], [] # freetube/clipious shenanigans, see: https://github.com/FreeTubeApp/FreeTube/pull/5997 and https://github.com/lamarios/clipious/blob/b9e7885/lib/videos/models/adaptive_format.g.dart
 		hls_url = safeTraverse(ydata, ["url"], default="ythdd: unable to retrieve stream url")

+	if age_restricted:
+		if not adaptive_formats:
+			adaptive_formats  = [{"url": f"http://a/?expire={int(time_start + 5.9 * 60 * 60)}", "itag": "18", "type": "", "clen": "0", "lmt": "", "projectionType": "RECTANGULAR"}] # same as above
+
 	if live_now:
 		video_type         = "livestream"
 		premiere_timestamp = published # ??? that works i guess
--- a/ythdd_struct_parser.py
+++ b/ythdd_struct_parser.py
@@ -519,6 +519,32 @@ def extractTextFromSimpleOrRuns(obj: dict, default: str = "") -> str:
 	return text


+def findNearestResolution(width: int, height: int) -> int:
+	# Finds the nearest standard resolution (one of 144p, 240p, ...)
+	# So far only used for Yattee, as it has trouble playing anything
+	# without one of the standard resolutions. Playback on other
+	# clients is unaffected.
+
+	# failsafe behaviour
+	try:
+		width  = int(width)
+		height = int(height)
+		res    = min(width, height)
+	except:
+		return 360
+
+	standard_resolutions = [144, 240, 360, 720, 1080, 2160, 4320]
+	if res in standard_resolutions:
+		return res
+
+	# calculate relative distance to one of the standard resolutions
+	res_normalized = [abs(1 - (x / res)) for x in standard_resolutions]
+	# pick the one where the distance is the smallest
+	target_index = res_normalized.index(min(res_normalized))
+	target_res = standard_resolutions[target_index]
+
+	return target_res
+
 def parseFormatStreams(wdata_fstream: dict, ydata_stream: dict) -> dict:

 	try:
@@ -530,13 +556,13 @@ def parseFormatStreams(wdata_fstream: dict, ydata_stream: dict) -> dict:

 	fstream = {
 		"url": stream_url,
-		"itag": wdata_fstream["itag"],
+		"itag": str(wdata_fstream["itag"]),
 		"type": wdata_fstream["mimeType"],
 		"quality": wdata_fstream["quality"],
 		"bitrate": str(wdata_fstream["bitrate"]),
 		"fps": wdata_fstream["fps"],
 		"size": f"{wdata_fstream['width']}x{wdata_fstream['height']}",
-		"resolution": f"{wdata_fstream['height'] if wdata_fstream['height'] in (144, 240, 360, 720, 1080, 2160) else 360}p",
+		"resolution": f"{findNearestResolution(wdata_fstream['width'], wdata_fstream['height'])}p", # possibly not really needed here
 		"qualityLabel": wdata_fstream["qualityLabel"],
 		"container": safeTraverse(FORMATS.get(wdata_fstream["itag"]), [   "ext"], default="mp4"), # invidious_formats
 		"encoding":  safeTraverse(FORMATS.get(wdata_fstream["itag"]), ["vcodec"], default="mp4") # invidious_formats
@@ -564,8 +590,8 @@ def parseAdaptiveStreams(wdata_astream: dict, ydata_stream: dict) -> dict:
 		"clen": wdata_astream["contentLength"],
 		"lmt": wdata_astream["lastModified"],
 		"projectionType": wdata_astream["projectionType"],
-		"container": safeTraverse(FORMATS.get(wdata_astream["itag"]), [   "ext"], default="mp4"), # invidious_formats,
-		"encoding":  safeTraverse(FORMATS.get(wdata_astream["itag"]), ["vcodec"], default="mp4")  # invidious_formats,
+		"container": safeTraverse(FORMATS.get(wdata_astream["itag"]), [   "ext"], default="mp4"), # invidious_formats
+		"encoding":  safeTraverse(FORMATS.get(wdata_astream["itag"]), ["vcodec"], default="mp4")  # invidious_formats
 	}

 	isVideo = True
@@ -577,16 +603,16 @@ def parseAdaptiveStreams(wdata_astream: dict, ydata_stream: dict) -> dict:
 		# video-specific metadata
 		astream["fps"]          = wdata_astream["fps"]
 		astream["size"]         = f"{wdata_astream['width']}x{wdata_astream['height']}"
-		astream["resolution"]   = f"{wdata_astream['height'] if wdata_astream['height'] in (144, 240, 360, 720, 1080, 2160) else 360}p"
+		astream["resolution"]   = f"{findNearestResolution(wdata_astream['width'], wdata_astream['height'])}p"
 		astream["qualityLabel"] = wdata_astream["qualityLabel"]
 		astream["colorInfo"]    = safeTraverse(wdata_astream, ["colorInfo"])
 	else:
 		astream = astream_common
 		# audio-specific metadata
+		astream["encoding"]        = safeTraverse(FORMATS.get(wdata_astream["itag"]), ["acodec"], default="mp4")
 		astream["audioQuality"]    =     wdata_astream["audioQuality"],
 		astream["audioSampleRate"] = int(wdata_astream["audioSampleRate"]),
 		astream["audioChannels"]   =     wdata_astream["audioChannels"]

-	# breakpoint()
 	return astream
Author	SHA1	Message	Date
sherl	56af1f0735	fix: adjust initialData extraction for new a/b change	2025-12-29 23:32:06 +01:00
sherl	72266aad0f	chore: remove unnecessary string from age-restricted videos' description	2025-11-22 21:42:47 +01:00
sherl	900cc92229	fix: play other formats than 16x9 on yattee reports factually wrong, but close enough resolution. to be removed when this will be fixed on yattee's end.	2025-11-21 22:38:43 +01:00
sherl	2687cc2bdc	hotfix: fix typo	2025-11-21 10:15:37 +01:00
sherl	4a9d59c9b4	fix: filter out DRC and VB audio from wdata	2025-11-21 09:59:22 +01:00
sherl	4af581ab7c	fix: use acodec for audio streams instead of vcodec should fix some issues with more strict DASH players	2025-11-21 09:44:30 +01:00
sherl	d1f381220d	fix: use exclusively tv player for age-restricted videos also filter out DRC audio	2025-11-20 17:54:41 +01:00
sherl	eebf434f3e	feat: support age-restricted videos when cookies are provided	2025-11-20 13:02:38 +01:00
sherl	c979c97077	feat: allow debugging with pdb allows user to debug the webapp when admin API key is passed as a param. also - an anniversary, 100th commit!	2025-11-13 11:28:40 +01:00
sherl	11c94c757e	fix: don't show playlists in related videos feed items in related feed are now checked by contentType to determine if they are videos or not	2025-11-07 18:45:11 +01:00
sherl	4421e68d9d	fix: format stream itag is now a string (as it should) fixes playback on clipious	2025-11-07 18:43:07 +01:00