diff --git a/ythdd.py b/ythdd.py index 230b395..4d41621 100644 --- a/ythdd.py +++ b/ythdd.py @@ -4,10 +4,7 @@ from flask_sqlalchemy import SQLAlchemy from markupsafe import escape import requests, json, toml, time import views, downloader, ythdd_api, ythdd_globals, ythdd_db -#from ythdd_db import db -#config = toml.load("config.toml") -#global app ythdd_globals.starttime = int(time.time()) ythdd_globals.apiRequests = 0 ythdd_globals.apiFailedRequests = 0 diff --git a/ythdd_api_v1.py b/ythdd_api_v1.py index 4f7a6bb..4ab5992 100644 --- a/ythdd_api_v1.py +++ b/ythdd_api_v1.py @@ -12,6 +12,7 @@ def incrementBadRequests(): ythdd_globals.apiFailedRequests += 1 def notImplemented(data): + # TODO: change list to string -> data, not data[0] return 501, f"not recognised/implemented: {data[0]}", [] def stub_hello(): @@ -29,13 +30,75 @@ def stats(): return 200, "OK", data_to_send def hot(data): - if len(data) <= 3: + #print(data) + if len(data) <= 2: incrementBadRequests() return 400, f'error: bad request. supply required arguments.', [] - comment_count = "" - if data[1] not in ("video", "channel", "handle", "playlist"): + match data[1]: + case "video" | "channel" | "handle" | "playlist": + url_lookup = {'video': 'https://www.youtube.com/watch?v=', 'channel': 'https://www.youtube.com/channel/', 'handle': 'https://www.youtube.com/@', 'playlist': 'https://www.youtube.com/playlist?list='} + comment_count = "" + if len(data) <= 3: + incrementBadRequests() + return 400, f'error: bad request. supply required arguments.', [] + if data[2] not in ("c", "nc", "lc"): + incrementBadRequests() + return notImplemented(data) + if data[2] == "lc" and len(data) <= 4: + incrementBadRequests() + return 400, f'error: bad request. limited comments (lc) requires an extra argument specifying amount of comments.', [] + elif data[2] == "lc": + try: + comment_count = str(int(data[3])) + except: + incrementBadRequests() + return 400, f'error: bad request. {data[3]} is not a number.', [] + videoId = data[4] + else: + videoId = data[3] + + if len(videoId) != 11: # videoId sanity check + incrementBadRequests() + return 400, f'error: bad request. wrong videoId: {videoId} is {len(videoId)} characters long, but should be 11.', [] + + getcomments = True + if data[2] == "nc": + getcomments = False + + try: + started = int(time.time()) + extracted_dict = ythdd_extractor.extract(url_lookup[data[1]] + videoId, getcomments=getcomments, maxcomments=comment_count) + extracted_dict["took"] = int(time.time()) - started + return 200, "OK", extracted_dict + except Exception as e: + incrementBadRequests() + return 400, f'error: failed to get "{videoId}" ({data[2]}). {e}', [] + case "related": + videoId = data[2] + if len(videoId) != 11: # videoId sanity check + incrementBadRequests() + return 400, f'error: bad request. wrong videoId: {videoId} is {len(videoId)} characters long, but should be 11.', [] + + started = int(time.time()) + try: + extracted_related = ythdd_extractor.related('https://www.youtube.com/watch?v=' + videoId) + extracted_related['took'] = int(time.time()) - started + return 200, "OK", extracted_related + except Exception as e: + incrementBadRequests() + return 400, f'error: unknown error while parsing {videoId}: {e}', [] + + case _: + incrementBadRequests() + return notImplemented(data) + + + ''' + if data[1] not in ("video", "channel", "handle", "playlist", "related"): + incrementBadRequests() return notImplemented(data) if data[2] not in ("c", "nc", "lc"): # comments, no comments, limited comments + incrementBadRequests() return notImplemented(data) if data[2] == "lc": if len(data) <= 4: @@ -62,6 +125,7 @@ def hot(data): except Exception as e: incrementBadRequests() return 400, f'error: failed to get "{videoId}" ({data[2]}). {e}', [] + ''' def lookup(data): match data[0]: diff --git a/ythdd_extractor.py b/ythdd_extractor.py index 8c3b0ef..7ad2ed9 100644 --- a/ythdd_extractor.py +++ b/ythdd_extractor.py @@ -1,5 +1,6 @@ #!/usr/bin/python3 -import yt_dlp, toml +import yt_dlp, requests, json +import ythdd_globals ytdl_opts = { #"format": "bv*[height<=720]+ba", # to be defined by the user @@ -14,11 +15,53 @@ ytdl_opts = { "simulate": True } -def extract(url, getcomments=False, maxcomments=""): +def extract(url: str, getcomments=False, maxcomments=""): + if len(url) == 11: + url = "https://www.youtube.com/watch?v=" + url if getcomments: ytdl_opts['getcomments'] = True if maxcomments: ytdl_opts['extractor_args'] = {'youtube': {'max_comments': [maxcomments, "all", "all", "all"]}} with yt_dlp.YoutubeDL(ytdl_opts) as ytdl: result = ytdl.extract_info(url, download=False) - return result \ No newline at end of file + return result + +def related(url: str): + # WARNING! HIGHLY EXPERIMENTAL, DUE TO BREAK ANYTIME + if len(url) == 11: + params = {'v': url} + else: + videoId = url.find("https://www.youtube.com/watch?v=") # len() = 32 + if videoId == -1: + raise BaseException + videoId = url[32:44] + params = {'v': videoId} + + user_agent = 'Mozilla/5.0 (Windows NT 10.0; rv:130.0) Gecko/20100101 Firefox/130.0' + if ythdd_globals.config['extractor']['user-agent']: + user_agent = ythdd_globals.config['extractor']['user-agent'] + + headers = { + 'User-Agent': user_agent, + 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/png,image/svg+xml,*/*;q=0.8', + 'Accept-Language': 'en-US,en;q=0.5', + 'DNT': '1', + 'Sec-GPC': '1', + 'Connection': 'keep-alive', + 'Upgrade-Insecure-Requests': '1', + 'Sec-Fetch-Dest': 'document', + 'Sec-Fetch-Mode': 'navigate', + 'Sec-Fetch-Site': 'none', + 'Sec-Fetch-User': '?1', + 'Priority': 'u=0, i', + 'Pragma': 'no-cache', + 'Cache-Control': 'no-cache', + } + response = requests.get(url, headers=headers, params=params) + extracted_string = str(response.content.decode('utf8', 'unicode_escape')) + start = extracted_string.find('{"responseContext":{"serviceTrackingParams":') + start2 = extracted_string.find('{"responseContext":{"serviceTrackingParams":', start + 1) + end = extracted_string.find(';', start2) + extracted_json = json.loads(extracted_string[start2:end]) + + return extracted_json["contents"]['twoColumnWatchNextResults']["secondaryResults"] \ No newline at end of file