183 lines
6.0 KiB
Python
183 lines
6.0 KiB
Python
#!/usr/bin/python3
|
|
import brotli, yt_dlp, requests, json, time
|
|
from ythdd_globals import safeTraverse
|
|
import ythdd_globals
|
|
|
|
ytdl_opts = {
|
|
#"format": "bv*[height<=720]+ba", # to be defined by the user
|
|
#"getcomments": True,
|
|
#"extractor_args": {"maxcomments": ...},
|
|
#"writeinfojson": True,
|
|
#"progress_hooks": my_hook,
|
|
"outtmpl": {
|
|
"default": "%(id)s.%(ext)s",
|
|
"chapter": "%(id)s.%(ext)s_%(section_number)03d_%(section_title)s.%(ext)s"
|
|
},
|
|
"simulate": True
|
|
}
|
|
|
|
stage1_headers = {
|
|
"Connection": "keep-alive",
|
|
"User-Agent": "com.google.ios.youtube/19.45.4 (iPhone16,2; U; CPU iOS 18_1_0 like Mac OS X;)",
|
|
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
|
"Accept-Language": "en-us,en;q=0.5",
|
|
"Sec-Fetch-Mode": "navigate",
|
|
"Content-Type": "application/json",
|
|
"X-Youtube-Client-Name": "5",
|
|
"X-Youtube-Client-Version": "19.45.4",
|
|
"Origin": "https://www.youtube.com",
|
|
"Accept-Encoding": "gzip, deflate, br",
|
|
"Cookie": "PREF=hl=en&tz=UTC; SOCS=CAI"
|
|
}
|
|
|
|
stage1_body = {
|
|
"context":
|
|
{
|
|
"client":
|
|
{
|
|
"clientName": "IOS",
|
|
"clientVersion": "19.45.4",
|
|
"deviceMake": "Apple",
|
|
"deviceModel": "iPhone16,2",
|
|
"userAgent": "com.google.ios.youtube/19.45.4 (iPhone16,2; U; CPU iOS 18_1_0 like Mac OS X;)",
|
|
"osName": "iPhone",
|
|
"osVersion": "18.1.0.22B83",
|
|
"hl": "en",
|
|
"timeZone": "UTC",
|
|
"utcOffsetMinutes": 0
|
|
}
|
|
},
|
|
#"videoId": uri,
|
|
"playbackContext":
|
|
{
|
|
"contentPlaybackContext":
|
|
{
|
|
"html5Preference": "HTML5_PREF_WANTS"
|
|
}
|
|
},
|
|
"contentCheckOk": True,
|
|
"racyCheckOk": True
|
|
}
|
|
|
|
stage2_headers = {
|
|
"Connection": "keep-alive",
|
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36",
|
|
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
|
"Accept-Language": "en-us,en;q=0.5",
|
|
"Sec-Fetch-Mode": "navigate",
|
|
"Accept-Encoding": "gzip, deflate, br"
|
|
}
|
|
|
|
stage3_headers = {
|
|
"Connection": "keep-alive",
|
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36",
|
|
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
|
"Accept-Language": "en-us,en;q=0.5",
|
|
"Sec-Fetch-Mode": "navigate",
|
|
"Content-Type": "application/json",
|
|
"X-Youtube-Client-Name": "1",
|
|
"X-Youtube-Client-Version": "2.20241126.01.00",
|
|
"Origin": "https://www.youtube.com",
|
|
"Accept-Encoding": "gzip, deflate, br",
|
|
"Cookie": "PREF=hl=en&tz=UTC; SOCS=CAI"
|
|
}
|
|
|
|
stage3_body = {
|
|
"context":
|
|
{
|
|
"client":
|
|
{
|
|
"clientName": "WEB",
|
|
"clientVersion": "2.20241126.01.00",
|
|
"hl": "en",
|
|
"timeZone": "UTC",
|
|
"utcOffsetMinutes": 0
|
|
}
|
|
},
|
|
#"videoId": uri,
|
|
"contentCheckOk": True,
|
|
"racyCheckOk": True
|
|
}
|
|
|
|
def extract(url: str, getcomments=False, maxcomments=""):
|
|
# TODO: check user-agent and cookiefile
|
|
|
|
if ythdd_globals.config['extractor']['user-agent']:
|
|
yt_dlp.utils.std_headers['User-Agent'] = ythdd_globals.config['extractor']['user-agent']
|
|
|
|
if ythdd_globals.config['extractor']['cookies_path']:
|
|
ytdl_opts['cookiefile'] = ythdd_globals.config['extractor']['cookies_path']
|
|
|
|
if len(url) == 11:
|
|
url = "https://www.youtube.com/watch?v=" + url
|
|
if getcomments:
|
|
ytdl_opts['getcomments'] = True
|
|
if maxcomments:
|
|
ytdl_opts['extractor_args'] = {'youtube': {'max_comments': [maxcomments, "all", "all", "all"]}}
|
|
with yt_dlp.YoutubeDL(ytdl_opts) as ytdl:
|
|
result = ytdl.extract_info(url, download=False)
|
|
return result
|
|
|
|
def WEBrelated(url: str):
|
|
# WARNING! HIGHLY EXPERIMENTAL, DUE TO BREAK ANYTIME
|
|
if len(url) == 11:
|
|
params = {'v': url}
|
|
else:
|
|
videoId = url.find("https://www.youtube.com/watch?v=") # len() = 32
|
|
if videoId == -1:
|
|
raise BaseException
|
|
videoId = url[32:44]
|
|
params = {'v': videoId}
|
|
|
|
response = requests.get(url, headers=ythdd_globals.getHeaders(caller='extractor'), params=params)
|
|
extracted_string = str(response.content.decode('utf8', 'unicode_escape'))
|
|
start = extracted_string.find('{"responseContext":{"serviceTrackingParams":')
|
|
start2 = extracted_string.find('{"responseContext":{"serviceTrackingParams":', start + 1)
|
|
end = extracted_string.find(';</script>', start2)
|
|
extracted_json = json.loads(extracted_string[start2:end])
|
|
|
|
return extracted_json["contents"]['twoColumnWatchNextResults']["secondaryResults"]
|
|
|
|
def WEBextractSinglePage(uri: str):
|
|
# WARNING! HIGHLY EXPERIMENTAL, DUE TO BREAK ANYTIME
|
|
|
|
start_time = time.time()
|
|
|
|
if len(uri) != 11:
|
|
raise ValueError("WEBextractSinglePage expects a single, 11-character long argument")
|
|
|
|
response = requests.get("https://www.youtube.com/watch?v=" + uri, headers=ythdd_globals.getHeaders(caller='extractor'))
|
|
extracted_string = str(response.content.decode('utf8', 'unicode_escape'))
|
|
start = extracted_string.find('{"responseContext":{"serviceTrackingParams":')
|
|
end = extracted_string.find(';var ', start)
|
|
start2 = extracted_string.find('{"responseContext":{"serviceTrackingParams":', start + 1)
|
|
end2 = extracted_string.find(';</script>', start2)
|
|
extracted_json1 = json.loads(extracted_string[start:end])
|
|
extracted_json2 = json.loads(extracted_string[start2:end2])
|
|
|
|
end_time = time.time()
|
|
|
|
return {'ec1': extracted_json1, 'ec2': extracted_json2, 'took': end_time - start_time}
|
|
|
|
def IOSextract(uri: str):
|
|
|
|
start = time.time()
|
|
|
|
if len(uri) != 11:
|
|
raise ValueError("IOSextract expects a single, 11-character long uri as an argument")
|
|
|
|
stage1_body['videoId'] = uri
|
|
stage1_h = requests.post("https://www.youtube.com/youtubei/v1/player?prettyPrint=false", headers=stage1_headers, json=stage1_body)
|
|
stage1 = json.loads(stage1_h.content.decode('utf-8'))
|
|
|
|
#stage2_h = requests.get(stage1['streamingData']['hlsManifestUrl'], headers=stage2_headers)
|
|
#stage2 = stage2_h.content.decode('utf-8')
|
|
|
|
stage3_body['videoId'] = uri
|
|
stage3_h = requests.post("https://www.youtube.com/youtubei/v1/next?prettyPrint=false", headers=stage3_headers, json=stage3_body)
|
|
stage3 = json.loads(stage3_h.content.decode('utf-8'))
|
|
|
|
end = time.time()
|
|
|
|
#return {'stage1': stage1, 'stage2': stage2, 'stage3': stage3, 'took': end - start}
|
|
return {'stage1': stage1, 'stage3': stage3, 'took': end - start} |