new iOS/web extractors, image proxying done by views.py
- ythdd_globals.py - added helper function to get user-configured header - ythdd.py - now checks for config.toml in work directory - requirements.txt - add brotli, so that requests can decompress innertube request
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
#!/usr/bin/python3
|
||||
import yt_dlp, requests, json
|
||||
import brotli, yt_dlp, requests, json, time
|
||||
import ythdd_globals
|
||||
|
||||
ytdl_opts = {
|
||||
@@ -15,6 +15,89 @@ ytdl_opts = {
|
||||
"simulate": True
|
||||
}
|
||||
|
||||
stage1_headers = {
|
||||
"Connection": "keep-alive",
|
||||
"User-Agent": "com.google.ios.youtube/19.45.4 (iPhone16,2; U; CPU iOS 18_1_0 like Mac OS X;)",
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||
"Accept-Language": "en-us,en;q=0.5",
|
||||
"Sec-Fetch-Mode": "navigate",
|
||||
"Content-Type": "application/json",
|
||||
"X-Youtube-Client-Name": "5",
|
||||
"X-Youtube-Client-Version": "19.45.4",
|
||||
"Origin": "https://www.youtube.com",
|
||||
"Accept-Encoding": "gzip, deflate, br",
|
||||
"Cookie": "PREF=hl=en&tz=UTC; SOCS=CAI"
|
||||
}
|
||||
|
||||
stage1_body = {
|
||||
"context":
|
||||
{
|
||||
"client":
|
||||
{
|
||||
"clientName": "IOS",
|
||||
"clientVersion": "19.45.4",
|
||||
"deviceMake": "Apple",
|
||||
"deviceModel": "iPhone16,2",
|
||||
"userAgent": "com.google.ios.youtube/19.45.4 (iPhone16,2; U; CPU iOS 18_1_0 like Mac OS X;)",
|
||||
"osName": "iPhone",
|
||||
"osVersion": "18.1.0.22B83",
|
||||
"hl": "en",
|
||||
"timeZone": "UTC",
|
||||
"utcOffsetMinutes": 0
|
||||
}
|
||||
},
|
||||
#"videoId": uri,
|
||||
"playbackContext":
|
||||
{
|
||||
"contentPlaybackContext":
|
||||
{
|
||||
"html5Preference": "HTML5_PREF_WANTS"
|
||||
}
|
||||
},
|
||||
"contentCheckOk": True,
|
||||
"racyCheckOk": True
|
||||
}
|
||||
|
||||
stage2_headers = {
|
||||
"Connection": "keep-alive",
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36",
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||
"Accept-Language": "en-us,en;q=0.5",
|
||||
"Sec-Fetch-Mode": "navigate",
|
||||
"Accept-Encoding": "gzip, deflate, br"
|
||||
}
|
||||
|
||||
stage3_headers = {
|
||||
"Connection": "keep-alive",
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36",
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||
"Accept-Language": "en-us,en;q=0.5",
|
||||
"Sec-Fetch-Mode": "navigate",
|
||||
"Content-Type": "application/json",
|
||||
"X-Youtube-Client-Name": "1",
|
||||
"X-Youtube-Client-Version": "2.20241126.01.00",
|
||||
"Origin": "https://www.youtube.com",
|
||||
"Accept-Encoding": "gzip, deflate, br",
|
||||
"Cookie": "PREF=hl=en&tz=UTC; SOCS=CAI"
|
||||
}
|
||||
|
||||
stage3_body = {
|
||||
"context":
|
||||
{
|
||||
"client":
|
||||
{
|
||||
"clientName": "WEB",
|
||||
"clientVersion": "2.20241126.01.00",
|
||||
"hl": "en",
|
||||
"timeZone": "UTC",
|
||||
"utcOffsetMinutes": 0
|
||||
}
|
||||
},
|
||||
#"videoId": uri,
|
||||
"contentCheckOk": True,
|
||||
"racyCheckOk": True
|
||||
}
|
||||
|
||||
def extract(url: str, getcomments=False, maxcomments=""):
|
||||
# TODO: check user-agent and cookiefile
|
||||
|
||||
@@ -34,7 +117,7 @@ def extract(url: str, getcomments=False, maxcomments=""):
|
||||
result = ytdl.extract_info(url, download=False)
|
||||
return result
|
||||
|
||||
def related(url: str):
|
||||
def WEBrelated(url: str):
|
||||
# WARNING! HIGHLY EXPERIMENTAL, DUE TO BREAK ANYTIME
|
||||
if len(url) == 11:
|
||||
params = {'v': url}
|
||||
@@ -45,34 +128,55 @@ def related(url: str):
|
||||
videoId = url[32:44]
|
||||
params = {'v': videoId}
|
||||
|
||||
# NOTE: use ESR user-agent
|
||||
# user_agent = 'Mozilla/5.0 (Windows NT 10.0; rv:130.0) Gecko/20100101 Firefox/130.0'
|
||||
user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:128.0) Gecko/20100101 Firefox/128.0'
|
||||
|
||||
if ythdd_globals.config['extractor']['user-agent']:
|
||||
user_agent = ythdd_globals.config['extractor']['user-agent']
|
||||
|
||||
headers = {
|
||||
'User-Agent': user_agent,
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/png,image/svg+xml,*/*;q=0.8',
|
||||
'Accept-Language': 'en-US,en;q=0.5',
|
||||
'DNT': '1',
|
||||
'Sec-GPC': '1',
|
||||
'Connection': 'keep-alive',
|
||||
'Upgrade-Insecure-Requests': '1',
|
||||
'Sec-Fetch-Dest': 'document',
|
||||
'Sec-Fetch-Mode': 'navigate',
|
||||
'Sec-Fetch-Site': 'none',
|
||||
'Sec-Fetch-User': '?1',
|
||||
'Priority': 'u=0, i',
|
||||
'Pragma': 'no-cache',
|
||||
'Cache-Control': 'no-cache',
|
||||
}
|
||||
response = requests.get(url, headers=headers, params=params)
|
||||
response = requests.get(url, headers=ythdd_globals.getHeaders(caller='extractor'), params=params)
|
||||
extracted_string = str(response.content.decode('utf8', 'unicode_escape'))
|
||||
start = extracted_string.find('{"responseContext":{"serviceTrackingParams":')
|
||||
start2 = extracted_string.find('{"responseContext":{"serviceTrackingParams":', start + 1)
|
||||
end = extracted_string.find(';</script>', start2)
|
||||
extracted_json = json.loads(extracted_string[start2:end])
|
||||
|
||||
return extracted_json["contents"]['twoColumnWatchNextResults']["secondaryResults"]
|
||||
return extracted_json["contents"]['twoColumnWatchNextResults']["secondaryResults"]
|
||||
|
||||
def WEBextractSinglePage(uri: str):
|
||||
# WARNING! HIGHLY EXPERIMENTAL, DUE TO BREAK ANYTIME
|
||||
|
||||
start_time = time.time()
|
||||
|
||||
if len(uri) != 11:
|
||||
raise ValueError("WEBextractSinglePage expects a single, 11-character long argument")
|
||||
|
||||
response = requests.get("https://www.youtube.com/watch?v=" + uri, headers=ythdd_globals.getHeaders(caller='extractor'))
|
||||
extracted_string = str(response.content.decode('utf8', 'unicode_escape'))
|
||||
start = extracted_string.find('{"responseContext":{"serviceTrackingParams":')
|
||||
end = extracted_string.find(';var ', start)
|
||||
start2 = extracted_string.find('{"responseContext":{"serviceTrackingParams":', start + 1)
|
||||
end2 = extracted_string.find(';</script>', start2)
|
||||
extracted_json1 = json.loads(extracted_string[start:end])
|
||||
extracted_json2 = json.loads(extracted_string[start2:end2])
|
||||
|
||||
end_time = time.time()
|
||||
|
||||
return {'ec1': extracted_json1, 'ec2': extracted_json2, 'took': end_time - start_time}
|
||||
|
||||
def IOSextract(uri: str):
|
||||
|
||||
start = time.time()
|
||||
|
||||
if len(uri) != 11:
|
||||
raise ValueError("IOSextract expects a single, 11-character long uri as an argument")
|
||||
|
||||
stage1_body['videoId'] = uri
|
||||
stage1_h = requests.post("https://www.youtube.com/youtubei/v1/player?prettyPrint=false", headers=stage1_headers, json=stage1_body)
|
||||
stage1 = json.loads(stage1_h.content.decode('utf-8'))
|
||||
|
||||
#stage2_h = requests.get(stage1['streamingData']['hlsManifestUrl'], headers=stage2_headers)
|
||||
#stage2 = stage2_h.content.decode('utf-8')
|
||||
|
||||
stage3_body['videoId'] = uri
|
||||
stage3_h = requests.post("https://www.youtube.com/youtubei/v1/next?prettyPrint=false", headers=stage3_headers, json=stage3_body)
|
||||
stage3 = json.loads(stage3_h.content.decode('utf-8'))
|
||||
|
||||
end = time.time()
|
||||
|
||||
#return {'stage1': stage1, 'stage2': stage2, 'stage3': stage3, 'took': end - start}
|
||||
return {'stage1': stage1, 'stage3': stage3, 'took': end - start}
|
||||
Reference in New Issue
Block a user