78 lines
2.7 KiB
Python
78 lines
2.7 KiB
Python
#!/usr/bin/python3
|
|
import yt_dlp, requests, json
|
|
import ythdd_globals
|
|
|
|
ytdl_opts = {
|
|
#"format": "bv*[height<=720]+ba", # to be defined by the user
|
|
#"getcomments": True,
|
|
#"extractor_args": {"maxcomments": ...},
|
|
#"writeinfojson": True,
|
|
#"progress_hooks": my_hook,
|
|
"outtmpl": {
|
|
"default": "%(id)s.%(ext)s",
|
|
"chapter": "%(id)s.%(ext)s_%(section_number)03d_%(section_title)s.%(ext)s"
|
|
},
|
|
"simulate": True
|
|
}
|
|
|
|
def extract(url: str, getcomments=False, maxcomments=""):
|
|
# TODO: check user-agent and cookiefile
|
|
|
|
if ythdd_globals.config['extractor']['user-agent']:
|
|
yt_dlp.utils.std_headers['User-Agent'] = ythdd_globals.config['extractor']['user-agent']
|
|
|
|
if ythdd_globals.config['extractor']['cookies_path']:
|
|
ytdl_opts['cookiefile'] = ythdd_globals.config['extractor']['cookies_path']
|
|
|
|
if len(url) == 11:
|
|
url = "https://www.youtube.com/watch?v=" + url
|
|
if getcomments:
|
|
ytdl_opts['getcomments'] = True
|
|
if maxcomments:
|
|
ytdl_opts['extractor_args'] = {'youtube': {'max_comments': [maxcomments, "all", "all", "all"]}}
|
|
with yt_dlp.YoutubeDL(ytdl_opts) as ytdl:
|
|
result = ytdl.extract_info(url, download=False)
|
|
return result
|
|
|
|
def related(url: str):
|
|
# WARNING! HIGHLY EXPERIMENTAL, DUE TO BREAK ANYTIME
|
|
if len(url) == 11:
|
|
params = {'v': url}
|
|
else:
|
|
videoId = url.find("https://www.youtube.com/watch?v=") # len() = 32
|
|
if videoId == -1:
|
|
raise BaseException
|
|
videoId = url[32:44]
|
|
params = {'v': videoId}
|
|
|
|
# NOTE: use ESR user-agent
|
|
# user_agent = 'Mozilla/5.0 (Windows NT 10.0; rv:130.0) Gecko/20100101 Firefox/130.0'
|
|
user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:128.0) Gecko/20100101 Firefox/128.0'
|
|
|
|
if ythdd_globals.config['extractor']['user-agent']:
|
|
user_agent = ythdd_globals.config['extractor']['user-agent']
|
|
|
|
headers = {
|
|
'User-Agent': user_agent,
|
|
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/png,image/svg+xml,*/*;q=0.8',
|
|
'Accept-Language': 'en-US,en;q=0.5',
|
|
'DNT': '1',
|
|
'Sec-GPC': '1',
|
|
'Connection': 'keep-alive',
|
|
'Upgrade-Insecure-Requests': '1',
|
|
'Sec-Fetch-Dest': 'document',
|
|
'Sec-Fetch-Mode': 'navigate',
|
|
'Sec-Fetch-Site': 'none',
|
|
'Sec-Fetch-User': '?1',
|
|
'Priority': 'u=0, i',
|
|
'Pragma': 'no-cache',
|
|
'Cache-Control': 'no-cache',
|
|
}
|
|
response = requests.get(url, headers=headers, params=params)
|
|
extracted_string = str(response.content.decode('utf8', 'unicode_escape'))
|
|
start = extracted_string.find('{"responseContext":{"serviceTrackingParams":')
|
|
start2 = extracted_string.find('{"responseContext":{"serviceTrackingParams":', start + 1)
|
|
end = extracted_string.find(';</script>', start2)
|
|
extracted_json = json.loads(extracted_string[start2:end])
|
|
|
|
return extracted_json["contents"]['twoColumnWatchNextResults']["secondaryResults"] |