#!/usr/bin/python3 import yt_dlp, requests, json import ythdd_globals ytdl_opts = { #"format": "bv*[height<=720]+ba", # to be defined by the user #"getcomments": True, #"extractor_args": {"maxcomments": ...}, #"writeinfojson": True, #"progress_hooks": my_hook, "outtmpl": { "default": "%(id)s.%(ext)s", "chapter": "%(id)s.%(ext)s_%(section_number)03d_%(section_title)s.%(ext)s" }, "simulate": True } def extract(url: str, getcomments=False, maxcomments=""): if len(url) == 11: url = "https://www.youtube.com/watch?v=" + url if getcomments: ytdl_opts['getcomments'] = True if maxcomments: ytdl_opts['extractor_args'] = {'youtube': {'max_comments': [maxcomments, "all", "all", "all"]}} with yt_dlp.YoutubeDL(ytdl_opts) as ytdl: result = ytdl.extract_info(url, download=False) return result def related(url: str): # WARNING! HIGHLY EXPERIMENTAL, DUE TO BREAK ANYTIME if len(url) == 11: params = {'v': url} else: videoId = url.find("https://www.youtube.com/watch?v=") # len() = 32 if videoId == -1: raise BaseException videoId = url[32:44] params = {'v': videoId} user_agent = 'Mozilla/5.0 (Windows NT 10.0; rv:130.0) Gecko/20100101 Firefox/130.0' if ythdd_globals.config['extractor']['user-agent']: user_agent = ythdd_globals.config['extractor']['user-agent'] headers = { 'User-Agent': user_agent, 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/png,image/svg+xml,*/*;q=0.8', 'Accept-Language': 'en-US,en;q=0.5', 'DNT': '1', 'Sec-GPC': '1', 'Connection': 'keep-alive', 'Upgrade-Insecure-Requests': '1', 'Sec-Fetch-Dest': 'document', 'Sec-Fetch-Mode': 'navigate', 'Sec-Fetch-Site': 'none', 'Sec-Fetch-User': '?1', 'Priority': 'u=0, i', 'Pragma': 'no-cache', 'Cache-Control': 'no-cache', } response = requests.get(url, headers=headers, params=params) extracted_string = str(response.content.decode('utf8', 'unicode_escape')) start = extracted_string.find('{"responseContext":{"serviceTrackingParams":') start2 = extracted_string.find('{"responseContext":{"serviceTrackingParams":', start + 1) end = extracted_string.find(';', start2) extracted_json = json.loads(extracted_string[start2:end]) return extracted_json["contents"]['twoColumnWatchNextResults']["secondaryResults"]