#!/usr/bin/python3 import brotli, yt_dlp, requests, json, time import ythdd_globals ytdl_opts = { #"format": "bv*[height<=720]+ba", # to be defined by the user #"getcomments": True, #"extractor_args": {"maxcomments": ...}, #"writeinfojson": True, #"progress_hooks": my_hook, "outtmpl": { "default": "%(id)s.%(ext)s", "chapter": "%(id)s.%(ext)s_%(section_number)03d_%(section_title)s.%(ext)s" }, "simulate": True } stage1_headers = { "Connection": "keep-alive", "User-Agent": "com.google.ios.youtube/19.45.4 (iPhone16,2; U; CPU iOS 18_1_0 like Mac OS X;)", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "Accept-Language": "en-us,en;q=0.5", "Sec-Fetch-Mode": "navigate", "Content-Type": "application/json", "X-Youtube-Client-Name": "5", "X-Youtube-Client-Version": "19.45.4", "Origin": "https://www.youtube.com", "Accept-Encoding": "gzip, deflate, br", "Cookie": "PREF=hl=en&tz=UTC; SOCS=CAI" } stage1_body = { "context": { "client": { "clientName": "IOS", "clientVersion": "19.45.4", "deviceMake": "Apple", "deviceModel": "iPhone16,2", "userAgent": "com.google.ios.youtube/19.45.4 (iPhone16,2; U; CPU iOS 18_1_0 like Mac OS X;)", "osName": "iPhone", "osVersion": "18.1.0.22B83", "hl": "en", "timeZone": "UTC", "utcOffsetMinutes": 0 } }, #"videoId": uri, "playbackContext": { "contentPlaybackContext": { "html5Preference": "HTML5_PREF_WANTS" } }, "contentCheckOk": True, "racyCheckOk": True } stage2_headers = { "Connection": "keep-alive", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "Accept-Language": "en-us,en;q=0.5", "Sec-Fetch-Mode": "navigate", "Accept-Encoding": "gzip, deflate, br" } stage3_headers = { "Connection": "keep-alive", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "Accept-Language": "en-us,en;q=0.5", "Sec-Fetch-Mode": "navigate", "Content-Type": "application/json", "X-Youtube-Client-Name": "1", "X-Youtube-Client-Version": "2.20241126.01.00", "Origin": "https://www.youtube.com", "Accept-Encoding": "gzip, deflate, br", "Cookie": "PREF=hl=en&tz=UTC; SOCS=CAI" } stage3_body = { "context": { "client": { "clientName": "WEB", "clientVersion": "2.20241126.01.00", "hl": "en", "timeZone": "UTC", "utcOffsetMinutes": 0 } }, #"videoId": uri, "contentCheckOk": True, "racyCheckOk": True } def extract(url: str, getcomments=False, maxcomments=""): # TODO: check user-agent and cookiefile if ythdd_globals.config['extractor']['user-agent']: yt_dlp.utils.std_headers['User-Agent'] = ythdd_globals.config['extractor']['user-agent'] if ythdd_globals.config['extractor']['cookies_path']: ytdl_opts['cookiefile'] = ythdd_globals.config['extractor']['cookies_path'] if len(url) == 11: url = "https://www.youtube.com/watch?v=" + url if getcomments: ytdl_opts['getcomments'] = True if maxcomments: ytdl_opts['extractor_args'] = {'youtube': {'max_comments': [maxcomments, "all", "all", "all"]}} with yt_dlp.YoutubeDL(ytdl_opts) as ytdl: result = ytdl.extract_info(url, download=False) return result def WEBrelated(url: str): # WARNING! HIGHLY EXPERIMENTAL, DUE TO BREAK ANYTIME if len(url) == 11: params = {'v': url} else: videoId = url.find("https://www.youtube.com/watch?v=") # len() = 32 if videoId == -1: raise BaseException videoId = url[32:44] params = {'v': videoId} response = requests.get(url, headers=ythdd_globals.getHeaders(caller='extractor'), params=params) extracted_string = str(response.content.decode('utf8', 'unicode_escape')) start = extracted_string.find('{"responseContext":{"serviceTrackingParams":') start2 = extracted_string.find('{"responseContext":{"serviceTrackingParams":', start + 1) end = extracted_string.find(';', start2) extracted_json = json.loads(extracted_string[start2:end]) return extracted_json["contents"]['twoColumnWatchNextResults']["secondaryResults"] def WEBextractSinglePage(uri: str): # WARNING! HIGHLY EXPERIMENTAL, DUE TO BREAK ANYTIME start_time = time.time() if len(uri) != 11: raise ValueError("WEBextractSinglePage expects a single, 11-character long argument") response = requests.get("https://www.youtube.com/watch?v=" + uri, headers=ythdd_globals.getHeaders(caller='extractor')) extracted_string = str(response.content.decode('utf8', 'unicode_escape')) start = extracted_string.find('{"responseContext":{"serviceTrackingParams":') end = extracted_string.find(';var ', start) start2 = extracted_string.find('{"responseContext":{"serviceTrackingParams":', start + 1) end2 = extracted_string.find(';', start2) extracted_json1 = json.loads(extracted_string[start:end]) extracted_json2 = json.loads(extracted_string[start2:end2]) end_time = time.time() return {'ec1': extracted_json1, 'ec2': extracted_json2, 'took': end_time - start_time} def IOSextract(uri: str): start = time.time() if len(uri) != 11: raise ValueError("IOSextract expects a single, 11-character long uri as an argument") stage1_body['videoId'] = uri stage1_h = requests.post("https://www.youtube.com/youtubei/v1/player?prettyPrint=false", headers=stage1_headers, json=stage1_body) stage1 = json.loads(stage1_h.content.decode('utf-8')) #stage2_h = requests.get(stage1['streamingData']['hlsManifestUrl'], headers=stage2_headers) #stage2 = stage2_h.content.decode('utf-8') stage3_body['videoId'] = uri stage3_h = requests.post("https://www.youtube.com/youtubei/v1/next?prettyPrint=false", headers=stage3_headers, json=stage3_body) stage3 = json.loads(stage3_h.content.decode('utf-8')) end = time.time() #return {'stage1': stage1, 'stage2': stage2, 'stage3': stage3, 'took': end - start} return {'stage1': stage1, 'stage3': stage3, 'took': end - start}