Compare commits

..

2 Commits

Author SHA1 Message Date
3212627d89 introduce a bare-bones invidious API translation layer
also rewritten ythdd_api.py a tiny bit
2024-12-26 20:38:33 +01:00
0099736a74 new iOS/web extractors, image proxying done by views.py
- ythdd_globals.py - added helper function to get user-configured header
- ythdd.py - now checks for config.toml in work directory
- requirements.txt - add brotli, so that requests can decompress
innertube request
2024-12-26 20:15:45 +01:00
10 changed files with 797 additions and 52 deletions

View File

@@ -1,15 +1,19 @@
[general]
db_file_path = "/path/to/ythdd_db.sqlite" # Preferably stored on an SSD.
video_storage_directory_path = "/path/to/videos/" # Path to video vault.
is_proxied = false
is_proxied = false # Set to true if running behind reverse proxy.
public_facing_url = "http://localhost:5000/" # Used for URL rewriting. Note the trailing backslash /.
[api]
api_key = "" # Leave empty API key for public access to non-sensitive backend
api_key_admin = "CHANGEME" # Empty *admin* API key will autogenerate a random one every launch.
[extractor]
user-agent = "" # leave empty for default
cookies_path = "" # leave empty for none
user-agent = "" # Leave empty for default (Firefox ESR).
cookies_path = "" # Leave empty for none.
[proxy]
user-agent = "" # Leave empty for default (Firefox ESR).
[admin]
# List of users with admin priviledges.

114
invidious_formats.py Normal file
View File

@@ -0,0 +1,114 @@
# Part of the Invidious project
# https://github.com/iv-org/invidious
# License: AGPLv3
FORMATS = {
5: {"ext": "flv", "width": 400, "height": 240, "acodec": "mp3", "abr": 64, "vcodec": "h263"},
6: {"ext": "flv", "width": 450, "height": 270, "acodec": "mp3", "abr": 64, "vcodec": "h263"},
13: {"ext": "3gp", "acodec": "aac", "vcodec": "mp4v"},
17: {"ext": "3gp", "width": 176, "height": 144, "acodec": "aac", "abr": 24, "vcodec": "mp4v"},
18: {"ext": "mp4", "width": 640, "height": 360, "acodec": "aac", "abr": 96, "vcodec": "h264"},
22: {"ext": "mp4", "width": 1280, "height": 720, "acodec": "aac", "abr": 192, "vcodec": "h264"},
34: {"ext": "flv", "width": 640, "height": 360, "acodec": "aac", "abr": 128, "vcodec": "h264"},
35: {"ext": "flv", "width": 854, "height": 480, "acodec": "aac", "abr": 128, "vcodec": "h264"},
36: {"ext": "3gp", "width": 320, "acodec": "aac", "vcodec": "mp4v"},
37: {"ext": "mp4", "width": 1920, "height": 1080, "acodec": "aac", "abr": 192, "vcodec": "h264"},
38: {"ext": "mp4", "width": 4096, "height": 3072, "acodec": "aac", "abr": 192, "vcodec": "h264"},
43: {"ext": "webm", "width": 640, "height": 360, "acodec": "vorbis", "abr": 128, "vcodec": "vp8"},
44: {"ext": "webm", "width": 854, "height": 480, "acodec": "vorbis", "abr": 128, "vcodec": "vp8"},
45: {"ext": "webm", "width": 1280, "height": 720, "acodec": "vorbis", "abr": 192, "vcodec": "vp8"},
46: {"ext": "webm", "width": 1920, "height": 1080, "acodec": "vorbis", "abr": 192, "vcodec": "vp8"},
59: {"ext": "mp4", "width": 854, "height": 480, "acodec": "aac", "abr": 128, "vcodec": "h264"},
78: {"ext": "mp4", "width": 854, "height": 480, "acodec": "aac", "abr": 128, "vcodec": "h264"},
# 3D videos
82: {"ext": "mp4", "height": 360, "format": "3D", "acodec": "aac", "abr": 128, "vcodec": "h264"},
83: {"ext": "mp4", "height": 480, "format": "3D", "acodec": "aac", "abr": 128, "vcodec": "h264"},
84: {"ext": "mp4", "height": 720, "format": "3D", "acodec": "aac", "abr": 192, "vcodec": "h264"},
85: {"ext": "mp4", "height": 1080, "format": "3D", "acodec": "aac", "abr": 192, "vcodec": "h264"},
100: {"ext": "webm", "height": 360, "format": "3D", "acodec": "vorbis", "abr": 128, "vcodec": "vp8"},
101: {"ext": "webm", "height": 480, "format": "3D", "acodec": "vorbis", "abr": 192, "vcodec": "vp8"},
102: {"ext": "webm", "height": 720, "format": "3D", "acodec": "vorbis", "abr": 192, "vcodec": "vp8"},
# Apple HTTP Live Streaming
91: {"ext": "mp4", "height": 144, "format": "HLS", "acodec": "aac", "abr": 48, "vcodec": "h264"},
92: {"ext": "mp4", "height": 240, "format": "HLS", "acodec": "aac", "abr": 48, "vcodec": "h264"},
93: {"ext": "mp4", "height": 360, "format": "HLS", "acodec": "aac", "abr": 128, "vcodec": "h264"},
94: {"ext": "mp4", "height": 480, "format": "HLS", "acodec": "aac", "abr": 128, "vcodec": "h264"},
95: {"ext": "mp4", "height": 720, "format": "HLS", "acodec": "aac", "abr": 256, "vcodec": "h264"},
96: {"ext": "mp4", "height": 1080, "format": "HLS", "acodec": "aac", "abr": 256, "vcodec": "h264"},
132: {"ext": "mp4", "height": 240, "format": "HLS", "acodec": "aac", "abr": 48, "vcodec": "h264"},
151: {"ext": "mp4", "height": 72, "format": "HLS", "acodec": "aac", "abr": 24, "vcodec": "h264"},
# DASH mp4 video
133: {"ext": "mp4", "height": 240, "format": "DASH video", "vcodec": "h264"},
134: {"ext": "mp4", "height": 360, "format": "DASH video", "vcodec": "h264"},
135: {"ext": "mp4", "height": 480, "format": "DASH video", "vcodec": "h264"},
136: {"ext": "mp4", "height": 720, "format": "DASH video", "vcodec": "h264"},
137: {"ext": "mp4", "height": 1080, "format": "DASH video", "vcodec": "h264"},
138: {"ext": "mp4", "format": "DASH video", "vcodec": "h264"}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
160: {"ext": "mp4", "height": 144, "format": "DASH video", "vcodec": "h264"},
212: {"ext": "mp4", "height": 480, "format": "DASH video", "vcodec": "h264"},
264: {"ext": "mp4", "height": 1440, "format": "DASH video", "vcodec": "h264"},
298: {"ext": "mp4", "height": 720, "format": "DASH video", "vcodec": "h264", "fps": 60},
299: {"ext": "mp4", "height": 1080, "format": "DASH video", "vcodec": "h264", "fps": 60},
266: {"ext": "mp4", "height": 2160, "format": "DASH video", "vcodec": "h264"},
# Dash mp4 audio
139: {"ext": "m4a", "format": "DASH audio", "acodec": "aac", "abr": 48, "container": "m4a_dash"},
140: {"ext": "m4a", "format": "DASH audio", "acodec": "aac", "abr": 128, "container": "m4a_dash"},
141: {"ext": "m4a", "format": "DASH audio", "acodec": "aac", "abr": 256, "container": "m4a_dash"},
256: {"ext": "m4a", "format": "DASH audio", "acodec": "aac", "container": "m4a_dash"},
258: {"ext": "m4a", "format": "DASH audio", "acodec": "aac", "container": "m4a_dash"},
325: {"ext": "m4a", "format": "DASH audio", "acodec": "dtse", "container": "m4a_dash"},
328: {"ext": "m4a", "format": "DASH audio", "acodec": "ec-3", "container": "m4a_dash"},
# Dash webm
167: {"ext": "webm", "height": 360, "width": 640, "format": "DASH video", "container": "webm", "vcodec": "vp8"},
168: {"ext": "webm", "height": 480, "width": 854, "format": "DASH video", "container": "webm", "vcodec": "vp8"},
169: {"ext": "webm", "height": 720, "width": 1280, "format": "DASH video", "container": "webm", "vcodec": "vp8"},
170: {"ext": "webm", "height": 1080, "width": 1920, "format": "DASH video", "container": "webm", "vcodec": "vp8"},
218: {"ext": "webm", "height": 480, "width": 854, "format": "DASH video", "container": "webm", "vcodec": "vp8"},
219: {"ext": "webm", "height": 480, "width": 854, "format": "DASH video", "container": "webm", "vcodec": "vp8"},
278: {"ext": "webm", "height": 144, "format": "DASH video", "container": "webm", "vcodec": "vp9"},
242: {"ext": "webm", "height": 240, "format": "DASH video", "vcodec": "vp9"},
243: {"ext": "webm", "height": 360, "format": "DASH video", "vcodec": "vp9"},
244: {"ext": "webm", "height": 480, "format": "DASH video", "vcodec": "vp9"},
245: {"ext": "webm", "height": 480, "format": "DASH video", "vcodec": "vp9"},
246: {"ext": "webm", "height": 480, "format": "DASH video", "vcodec": "vp9"},
247: {"ext": "webm", "height": 720, "format": "DASH video", "vcodec": "vp9"},
248: {"ext": "webm", "height": 1080, "format": "DASH video", "vcodec": "vp9"},
271: {"ext": "webm", "height": 1440, "format": "DASH video", "vcodec": "vp9"},
# itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
272: {"ext": "webm", "height": 2160, "format": "DASH video", "vcodec": "vp9"},
302: {"ext": "webm", "height": 720, "format": "DASH video", "vcodec": "vp9", "fps": 60},
303: {"ext": "webm", "height": 1080, "format": "DASH video", "vcodec": "vp9", "fps": 60},
308: {"ext": "webm", "height": 1440, "format": "DASH video", "vcodec": "vp9", "fps": 60},
313: {"ext": "webm", "height": 2160, "format": "DASH video", "vcodec": "vp9"},
315: {"ext": "webm", "height": 2160, "format": "DASH video", "vcodec": "vp9", "fps": 60},
330: {"ext": "webm", "height": 144, "format": "DASH video", "vcodec": "vp9", "fps": 60},
331: {"ext": "webm", "height": 240, "format": "DASH video", "vcodec": "vp9", "fps": 60},
332: {"ext": "webm", "height": 360, "format": "DASH video", "vcodec": "vp9", "fps": 60},
333: {"ext": "webm", "height": 480, "format": "DASH video", "vcodec": "vp9", "fps": 60},
334: {"ext": "webm", "height": 720, "format": "DASH video", "vcodec": "vp9", "fps": 60},
335: {"ext": "webm", "height": 1080, "format": "DASH video", "vcodec": "vp9", "fps": 60},
336: {"ext": "webm", "height": 1440, "format": "DASH video", "vcodec": "vp9", "fps": 60},
337: {"ext": "webm", "height": 2160, "format": "DASH video", "vcodec": "vp9", "fps": 60},
# Dash webm audio
171: {"ext": "webm", "acodec": "vorbis", "format": "DASH audio", "abr": 128},
172: {"ext": "webm", "acodec": "vorbis", "format": "DASH audio", "abr": 256},
# Dash webm audio with opus inside
249: {"ext": "webm", "format": "DASH audio", "acodec": "opus", "abr": 50},
250: {"ext": "webm", "format": "DASH audio", "acodec": "opus", "abr": 70},
251: {"ext": "webm", "format": "DASH audio", "acodec": "opus", "abr": 160},
# av01 video only formats sometimes served with "unknown" codecs
394: {"ext": "mp4", "height": 144, "vcodec": "av01.0.05M.08"},
395: {"ext": "mp4", "height": 240, "vcodec": "av01.0.05M.08"},
396: {"ext": "mp4", "height": 360, "vcodec": "av01.0.05M.08"},
397: {"ext": "mp4", "height": 480, "vcodec": "av01.0.05M.08"},
}

View File

@@ -12,3 +12,4 @@ toml>=0.10.2
Flask-APScheduler>=1.13.1
requests>=2.32.3
yt_dlp
brotli>=1.1.0

View File

@@ -1,8 +1,9 @@
#!/usr/bin/python3
from flask import render_template
from flask import render_template, Response
from flask_sqlalchemy import SQLAlchemy
from markupsafe import escape
import requests, json
import ythdd_globals
def homepage():
return "homepage"
@@ -12,3 +13,31 @@ def home():
def index():
return "index"
def thumbnailProxy(received_request):
# apparently, this can be set to
# https://img.youtube.com/ as well
prefix = "https://i.ytimg.com/"
if received_request.count("/") < 1 or received_request.index("/") != 11:
return Response(json.dumps({
'status': 400,
'error_msg': 'invalid request. pretend this is a thumbnail :D'
}), mimetype='application/json', status=400)
thumbnail = requests.get(prefix + "vi/" + received_request, headers=ythdd_globals.getHeaders(caller='proxy'), stream=True)
thumbnail.raw.decode_content = True
response = Response(thumbnail.raw, mimetype=thumbnail.headers['content-type'], status=thumbnail.status_code)
return response
def ggphtProxy(received_request):
prefix = "https://yt3.ggpht.com/"
ggpht = requests.get(prefix + received_request, headers=ythdd_globals.getHeaders(caller='proxy'), stream=True)
ggpht.raw.decode_content = True
response = Response(ggpht.raw, mimetype=ggpht.headers['content-type'], status=ggpht.status_code)
return response

View File

@@ -6,15 +6,18 @@ from argparse import ArgumentParser
from ythdd_globals import colors
import requests, json, toml, time
import views, downloader, ythdd_api, ythdd_globals, ythdd_db
import os
from flask_apscheduler import APScheduler
app = Flask(__name__)
app_host = "None"
app_port = "None"
def setup():
# sanity check: make sure config is set
# required to make `flask --app ythdd run --debug` work
global config
global config, app_host, app_port
try:
if not config['general']:
ythdd_globals.setConfig(ythdd_globals.configfile)
@@ -31,6 +34,25 @@ def setup():
ythdd_globals.isProxied = config['general']['is_proxied']
ythdd_globals.outsideApiHits = 0
are_we_sure_of_host_and_port = True
if app_host == "None":
app_host = "127.0.0.1"
are_we_sure_of_host_and_port = False
if app_port == "None":
app_port = "5000"
are_we_sure_of_host_and_port = False
public_facing_url = config['general']['public_facing_url']
rewrite_sanity_check = public_facing_url.replace(f"{app_host}:{app_port}", "")
if not config['general']['is_proxied'] and public_facing_url == rewrite_sanity_check:
sanity_string = f"{colors.WARNING}Heads up!{colors.ENDC} Public facing URL does not match the IP and port the server is running on.\n"
sanity_string += f" Expected: {colors.OKCYAN}{config['general']['public_facing_url']}{colors.ENDC}, but"
if not are_we_sure_of_host_and_port: sanity_string += " (assuming it's)"
sanity_string += f" running on: {colors.OKCYAN}{app_host}:{app_port}{colors.ENDC}.\n"
sanity_string += f" This is just a sanity check and may not neccessarily mean bad configuration.\n"
sanity_string += f" If you're running a reverse proxy, set {colors.OKCYAN}is_proxied{colors.ENDC} to true to silence this message.\n"
print(sanity_string)
app.config['SQLALCHEMY_DATABASE_URI'] = f"sqlite:///{config['general']['db_file_path']}"
app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False
app.add_url_rule('/', view_func=views.index)
@@ -38,6 +60,8 @@ def setup():
app.add_url_rule('/home', view_func=views.home)
app.add_url_rule('/api/', view_func=ythdd_api.api_greeting)
app.add_url_rule('/api/<path:received_request>', view_func=ythdd_api.api_global_catchall)
app.add_url_rule('/vi/<path:received_request>', view_func=views.thumbnailProxy)
app.add_url_rule('/ggpht/<path:received_request>', view_func=views.ggphtProxy)
db = ythdd_db.initDB(app, config)
with app.app_context():
@@ -81,14 +105,18 @@ def main(args):
host = host_port[0]
port = host_port[1]
global config
global config, app_host, app_port
try:
# if specified, use custom config file
ythdd_globals.configfile = args.config
ythdd_globals.setConfig(ythdd_globals.configfile)
except:
# if not, use dummy file
# if not, try using the default "config.toml"
if os.path.exists("config.toml"):
ythdd_globals.configfile = "config.toml"
else:
# unless it's not there, if that's the case then use the dummy file
ythdd_globals.configfile = ""
# but try to set the API secret if provided by the user
if args.secret:
@@ -97,6 +125,9 @@ def main(args):
config = ythdd_globals.config
app_host = host
app_port = port
setup()
app.run(host=host, port=int(port))
@@ -115,4 +146,6 @@ if __name__ == "__main__":
main(args)
else:
app_host = os.getenv("FLASK_RUN_HOST", "None")
app_port = os.getenv("FLASK_RUN_PORT", "None")
setup()

View File

@@ -2,7 +2,8 @@
from flask import Response, request
from markupsafe import escape
import requests, time, json
import ythdd_api_v1, ythdd_globals
import ythdd_globals
import ythdd_api_v1, ythdd_inv_tl
def api_greeting():
string = {'status': 200, 'msg': f"ok (ythdd {ythdd_globals.version})", 'latest_api': f"v{ythdd_globals.apiVersion}"}
@@ -18,18 +19,40 @@ def api_global_catchall(received_request):
api_version = request_list[0]
if request_list[0] == 'v1':
# use v1 api
del request_list[0] # v1
# if list is empty, aka /api/v1/
if request_list == ['']:
return api_greeting()
del request_list[0]
# if list is empty, aka /api/v1/, or /api/v1
if request_list == [''] or request_list == []:
#return api_greeting()
resp = api_greeting()
try:
status, received, data = ythdd_api_v1.lookup(request_list)
except Exception as e:
ythdd_globals.apiFailedRequests += 1
status, received, data = 500, f"internal server error: call ended in failure: {e}", []
resp = Response(json.dumps({'status': status, 'msg': received, 'data': data}), mimetype='application/json', status=status)
elif request_list[0] == 'invidious':
# drop 'invidious' from the list
del request_list[0]
# for /api/invidious/ and /api/invidious
# show greeting from Invidious TL
#print(request_list) # for debugging purposes
if request_list == [''] or request_list == []:
#resp = ythdd_inv_tl.greeting()
status, response = ythdd_inv_tl.greeting()
return Response(response, status=status)
# if a path has been supplied try to get appropriate data
try:
# lookup and construct a response
resp = ythdd_inv_tl.lookup(request_list)
#print(resp) # for debugging purposes
# unless an error occurs
except Exception as e:
ythdd_globals.apiFailedRequests += 1
status, received, data = 500, f"internal server error: invidious translation call ended in failure: {e}", []
resp = Response(json.dumps({'status': status, 'msg': received, 'data': data}), mimetype='application/json', status=status)
else:
ythdd_globals.apiFailedRequests += 1
status, received, data = 405, f'error: unsupported api version: "{request_list[0]}". try: "v{ythdd_globals.apiVersion}".', []
resp = Response(json.dumps({'status': status, 'msg': received, 'data': data}), mimetype='application/json', status=status)
response = {'status': status, 'msg': received, 'data': data}
return Response(json.dumps(response), mimetype='application/json', status=status)
return resp

View File

@@ -113,7 +113,7 @@ def hot(data):
started = time.time()
try:
# try to actually get the data
extracted_related = ythdd_extractor.related('https://www.youtube.com/watch?v=' + videoId)
extracted_related = ythdd_extractor.WEBrelated('https://www.youtube.com/watch?v=' + videoId)
extracted_related['took'] = time.time() - started
return 200, "OK", extracted_related
except KeyError:

View File

@@ -1,5 +1,5 @@
#!/usr/bin/python3
import yt_dlp, requests, json
import brotli, yt_dlp, requests, json, time
import ythdd_globals
ytdl_opts = {
@@ -15,6 +15,89 @@ ytdl_opts = {
"simulate": True
}
stage1_headers = {
"Connection": "keep-alive",
"User-Agent": "com.google.ios.youtube/19.45.4 (iPhone16,2; U; CPU iOS 18_1_0 like Mac OS X;)",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "en-us,en;q=0.5",
"Sec-Fetch-Mode": "navigate",
"Content-Type": "application/json",
"X-Youtube-Client-Name": "5",
"X-Youtube-Client-Version": "19.45.4",
"Origin": "https://www.youtube.com",
"Accept-Encoding": "gzip, deflate, br",
"Cookie": "PREF=hl=en&tz=UTC; SOCS=CAI"
}
stage1_body = {
"context":
{
"client":
{
"clientName": "IOS",
"clientVersion": "19.45.4",
"deviceMake": "Apple",
"deviceModel": "iPhone16,2",
"userAgent": "com.google.ios.youtube/19.45.4 (iPhone16,2; U; CPU iOS 18_1_0 like Mac OS X;)",
"osName": "iPhone",
"osVersion": "18.1.0.22B83",
"hl": "en",
"timeZone": "UTC",
"utcOffsetMinutes": 0
}
},
#"videoId": uri,
"playbackContext":
{
"contentPlaybackContext":
{
"html5Preference": "HTML5_PREF_WANTS"
}
},
"contentCheckOk": True,
"racyCheckOk": True
}
stage2_headers = {
"Connection": "keep-alive",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "en-us,en;q=0.5",
"Sec-Fetch-Mode": "navigate",
"Accept-Encoding": "gzip, deflate, br"
}
stage3_headers = {
"Connection": "keep-alive",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "en-us,en;q=0.5",
"Sec-Fetch-Mode": "navigate",
"Content-Type": "application/json",
"X-Youtube-Client-Name": "1",
"X-Youtube-Client-Version": "2.20241126.01.00",
"Origin": "https://www.youtube.com",
"Accept-Encoding": "gzip, deflate, br",
"Cookie": "PREF=hl=en&tz=UTC; SOCS=CAI"
}
stage3_body = {
"context":
{
"client":
{
"clientName": "WEB",
"clientVersion": "2.20241126.01.00",
"hl": "en",
"timeZone": "UTC",
"utcOffsetMinutes": 0
}
},
#"videoId": uri,
"contentCheckOk": True,
"racyCheckOk": True
}
def extract(url: str, getcomments=False, maxcomments=""):
# TODO: check user-agent and cookiefile
@@ -34,7 +117,7 @@ def extract(url: str, getcomments=False, maxcomments=""):
result = ytdl.extract_info(url, download=False)
return result
def related(url: str):
def WEBrelated(url: str):
# WARNING! HIGHLY EXPERIMENTAL, DUE TO BREAK ANYTIME
if len(url) == 11:
params = {'v': url}
@@ -45,30 +128,7 @@ def related(url: str):
videoId = url[32:44]
params = {'v': videoId}
# NOTE: use ESR user-agent
# user_agent = 'Mozilla/5.0 (Windows NT 10.0; rv:130.0) Gecko/20100101 Firefox/130.0'
user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:128.0) Gecko/20100101 Firefox/128.0'
if ythdd_globals.config['extractor']['user-agent']:
user_agent = ythdd_globals.config['extractor']['user-agent']
headers = {
'User-Agent': user_agent,
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/png,image/svg+xml,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.5',
'DNT': '1',
'Sec-GPC': '1',
'Connection': 'keep-alive',
'Upgrade-Insecure-Requests': '1',
'Sec-Fetch-Dest': 'document',
'Sec-Fetch-Mode': 'navigate',
'Sec-Fetch-Site': 'none',
'Sec-Fetch-User': '?1',
'Priority': 'u=0, i',
'Pragma': 'no-cache',
'Cache-Control': 'no-cache',
}
response = requests.get(url, headers=headers, params=params)
response = requests.get(url, headers=ythdd_globals.getHeaders(caller='extractor'), params=params)
extracted_string = str(response.content.decode('utf8', 'unicode_escape'))
start = extracted_string.find('{"responseContext":{"serviceTrackingParams":')
start2 = extracted_string.find('{"responseContext":{"serviceTrackingParams":', start + 1)
@@ -76,3 +136,47 @@ def related(url: str):
extracted_json = json.loads(extracted_string[start2:end])
return extracted_json["contents"]['twoColumnWatchNextResults']["secondaryResults"]
def WEBextractSinglePage(uri: str):
# WARNING! HIGHLY EXPERIMENTAL, DUE TO BREAK ANYTIME
start_time = time.time()
if len(uri) != 11:
raise ValueError("WEBextractSinglePage expects a single, 11-character long argument")
response = requests.get("https://www.youtube.com/watch?v=" + uri, headers=ythdd_globals.getHeaders(caller='extractor'))
extracted_string = str(response.content.decode('utf8', 'unicode_escape'))
start = extracted_string.find('{"responseContext":{"serviceTrackingParams":')
end = extracted_string.find(';var ', start)
start2 = extracted_string.find('{"responseContext":{"serviceTrackingParams":', start + 1)
end2 = extracted_string.find(';</script>', start2)
extracted_json1 = json.loads(extracted_string[start:end])
extracted_json2 = json.loads(extracted_string[start2:end2])
end_time = time.time()
return {'ec1': extracted_json1, 'ec2': extracted_json2, 'took': end_time - start_time}
def IOSextract(uri: str):
start = time.time()
if len(uri) != 11:
raise ValueError("IOSextract expects a single, 11-character long uri as an argument")
stage1_body['videoId'] = uri
stage1_h = requests.post("https://www.youtube.com/youtubei/v1/player?prettyPrint=false", headers=stage1_headers, json=stage1_body)
stage1 = json.loads(stage1_h.content.decode('utf-8'))
#stage2_h = requests.get(stage1['streamingData']['hlsManifestUrl'], headers=stage2_headers)
#stage2 = stage2_h.content.decode('utf-8')
stage3_body['videoId'] = uri
stage3_h = requests.post("https://www.youtube.com/youtubei/v1/next?prettyPrint=false", headers=stage3_headers, json=stage3_body)
stage3 = json.loads(stage3_h.content.decode('utf-8'))
end = time.time()
#return {'stage1': stage1, 'stage2': stage2, 'stage3': stage3, 'took': end - start}
return {'stage1': stage1, 'stage3': stage3, 'took': end - start}

View File

@@ -30,16 +30,16 @@ def getConfig(configfile):
global randomly_generated_passcode
if not os.path.exists(configfile):
dummy_config = {'general': {'db_file_path': 'ythdd_db.sqlite', 'video_storage_directory_path': 'videos/', 'is_proxied': False}, 'api': {'api_key': 'CHANGEME'}, 'extractor': {'user-agent': '', 'cookies_path': ''}, 'admin': {'admins': ['admin']}, 'yt_dlp': {}, 'postprocessing': {'presets': [{'name': 'recommended: [N][<=720p] best V+A', 'format': 'bv[height<=720]+ba', 'reencode': ''}, {'name': '[N][1080p] best V+A', 'format': 'bv[height=1080]+ba', 'reencode': ''}, {'name': '[R][1080p] webm', 'format': 'bv[height=1080]+ba', 'reencode': 'webm'}, {'name': '[N][720p] best V+A', 'format': 'bv[height=720]+ba', 'reencode': ''}, {'name': '[R][720p] webm', 'format': 'bv[height=720]+ba', 'reencode': 'webm'}, {'name': '[N][480p] best V+A', 'format': 'bv[height=480]+ba', 'reencode': ''}, {'name': '[480p] VP9 webm/reencode', 'format': 'bv*[height=480][ext=webm]+ba/bv[height=480]+ba', 'reencode': 'webm'}, {'name': '[N][1080p] best video only', 'format': 'bv[height=1080]', 'reencode': ''}, {'name': '[N][opus] best audio only', 'format': 'ba', 'reencode': 'opus'}]}}
dummy_config = {'general': {'db_file_path': 'ythdd_db.sqlite', 'video_storage_directory_path': 'videos/', 'is_proxied': False, 'public_facing_url': 'http://localhost:5000/'}, 'api': {'api_key': 'CHANGEME'}, 'extractor': {'user-agent': '', 'cookies_path': ''}, 'admin': {'admins': ['admin']}, 'yt_dlp': {}, 'postprocessing': {'presets': [{'name': 'recommended: [N][<=720p] best V+A', 'format': 'bv[height<=720]+ba', 'reencode': ''}, {'name': '[N][1080p] best V+A', 'format': 'bv[height=1080]+ba', 'reencode': ''}, {'name': '[R][1080p] webm', 'format': 'bv[height=1080]+ba', 'reencode': 'webm'}, {'name': '[N][720p] best V+A', 'format': 'bv[height=720]+ba', 'reencode': ''}, {'name': '[R][720p] webm', 'format': 'bv[height=720]+ba', 'reencode': 'webm'}, {'name': '[N][480p] best V+A', 'format': 'bv[height=480]+ba', 'reencode': ''}, {'name': '[480p] VP9 webm/reencode', 'format': 'bv*[height=480][ext=webm]+ba/bv[height=480]+ba', 'reencode': 'webm'}, {'name': '[N][1080p] best video only', 'format': 'bv[height=1080]', 'reencode': ''}, {'name': '[N][opus] best audio only', 'format': 'ba', 'reencode': 'opus'}]}}
# if a passcode has not been provided by the user (config file doesn't exist, and user didn't specify it using an argument)
print(f"{colors.WARNING}WARNING{colors.ENDC}: Using default, baked in config data. {colors.ENDL}"
f"Consider copying and editing the provided example file ({colors.OKCYAN}config.default.toml{colors.ENDC}).")
f" Consider copying and editing the provided example file ({colors.OKCYAN}config.default.toml{colors.ENDC}).")
if randomly_generated_passcode == 0:
# generate a pseudorandom one and use it in the temporary config
randomly_generated_passcode = str(int(time.time() * 1337 % 899_999 + 100_000))
print(f"{colors.WARNING}WARNING{colors.ENDC}: Default config populated with one-time, insecure pseudorandom admin API key: {colors.OKCYAN}{randomly_generated_passcode}{colors.ENDC}."
f" {colors.ENDL}The admin API key is not the Flask debugger PIN. You need to provide a config file for persistence!{colors.ENDL}")
print(f"{colors.WARNING}WARNING{colors.ENDC}: Default config populated with one-time, insecure pseudorandom admin API key: {colors.OKCYAN}{randomly_generated_passcode}{colors.ENDC}.\n"
f" The admin API key is not the Flask debugger PIN. You need to provide a config file for persistence!{colors.ENDL}")
dummy_config['api']['api_key_admin'] = randomly_generated_passcode
return dummy_config
@@ -54,5 +54,40 @@ def setConfig(configfile):
#setConfig(configfile)
config = {}
def getHeaders(caller="proxy"):
# NOTE: use ESR user-agent
# user_agent = 'Mozilla/5.0 (Windows NT 10.0; rv:130.0) Gecko/20100101 Firefox/130.0'
user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:128.0) Gecko/20100101 Firefox/128.0'
if config[caller]['user-agent']:
user_agent = config[caller]['user-agent']
headers = {
'User-Agent': user_agent,
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/png,image/svg+xml,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.5',
'DNT': '1',
'Sec-GPC': '1',
'Connection': 'keep-alive',
'Upgrade-Insecure-Requests': '1',
'Sec-Fetch-Dest': 'document',
'Sec-Fetch-Mode': 'navigate',
'Sec-Fetch-Site': 'none',
'Sec-Fetch-User': '?1',
'Priority': 'u=0, i',
'Pragma': 'no-cache',
'Cache-Control': 'no-cache',
}
return headers
def translateLinks(link):
link = link.replace("https://i.ytimg.com/", config['general']['public_facing_url'])
link = link.replace("https://yt3.ggpht.com/", config['general']['public_facing_url'] + "ggpht/")
return link
def getUptime():
return int(time.time()) - starttime

402
ythdd_inv_tl.py Normal file
View File

@@ -0,0 +1,402 @@
#!/usr/bin/python3
# ythdd Invidious Translation Layer
# -----
# Translates requests sent through Invidious API at /api/invidious/
# to use internal extractors.
from flask import Response, request, redirect
from markupsafe import escape
from time import strftime, localtime, time
import json, datetime
import invidious_formats
import ythdd_globals
import ythdd_api_v1
import ythdd_extractor
# TODO:
# [✓] /api/v1/stats (stats())
# [✓] /streams/dQw4w9WgXcQ (does nothing)
# [✓] /vi/videoIdXXXX/maxresdefault.jpg
# [*] /api/v1/auth/subscriptions (stub? db?)
# [*] /api/v1/auth/feed?page=1 (stub? db?)
# [*] /api/v1/auth/playlists (stub? db?)
# [*] /api/v1/videos/videoIdXXXX
def incrementBadRequests():
ythdd_globals.apiFailedRequests += 1
def greeting():
return 200, 'hello from Invidious TL!\nstats endpoint at /api/invidious/stats'
def send(status, response):
return Response(json.dumps(response), mimetype='application/json', status=status)
def notImplemented(data):
return send(501, {'error': f"not recognised/implemented: {'/'.join(data)}"})
def stats():
data_to_send = {
"version": "2.0",
"software":
{
"name": "invidious",
"version": f"invidious TL, ythdd ({ythdd_globals.version})",
"branch": "https://gitea.7o7.cx/sherl/ythdd",
"tl_msg": "/api/invidious/api/v1/"
}
}
return send(200, data_to_send)
def videoIdSanityCheck(videoId: str):
if len(videId) != 11:
incrementBadRequests()
return send(400, f'error: bad request. wrong videoId: {videoId} is {len(videoId)} characters long, but should be 11.', [])
# elif...?
def auth(data):
# can be either subscriptions, feed or playlists
match data[1]:
# NOT YET IMPLEMENTED
# TODO: make it use the internal db
case "subscriptions" | "feed" | "playlists":
return send(200, [])
case _:
incrementBadRequests()
return send(404, [])
def streams():
return send(200, '')
def epochToDate(epoch):
# TODO: replace with UTC time
return strftime('%Y-%m-%d %H:%M:%S', localtime(epoch))
def trending():
return send(200, [{}])
def popular():
return send(200, [{}])
def safeTraverse(obj: dict, path: list, default=None):
result = obj
try:
for x in path:
result = result[x]
except KeyError:
result = default
finally:
return result
def genThumbs(videoId: str):
result = []
thumbnails = [
#{'height': 720, 'width': 1280, 'quality': "maxres", 'url': "maxres"}, # for the time being omit the buggy maxres quality
{'height': 720, 'width': 1280, 'quality': "maxresdefault", 'url': "maxresdefault"},
{'height': 480, 'width': 640, 'quality': "sddefault", 'url': "sddefault"},
{'height': 360, 'width': 480, 'quality': "high", 'url': "hqdefault"},
{'height': 180, 'width': 320, 'quality': "medium", 'url': "mqdefault"},
{'height': 90, 'width': 120, 'quality': "default", 'url': "default"},
{'height': 90, 'width': 120, 'quality': "start", 'url': "1"},
{'height': 90, 'width': 120, 'quality': "middle", 'url': "2"},
{'height': 90, 'width': 120, 'quality': "end", 'url': "3"},
]
for x in thumbnails:
width = x['width']
height = x['height']
quality = x['quality']
url = ythdd_globals.config['general']['public_facing_url'] + 'vi/' + videoId + '/' + x['url'] + '.jpg'
result.append({'quality': quality, 'url': url, 'width': width, 'height': height})
return result
def rebuildFormats(data):
result = [{} for x in data]
formatStreams = []
for x in range(len(data)):
try:
result[x]['audioChannels'] = data[x]['audioChannels']
isVideo = 0
except:
isVideo = 1
result[x]['init'] = str(data[x]['initRange']['start']) + "-" + str(data[x]['initRange']['end'])
result[x]['index'] = str(data[x]['indexRange']['start']) + "-" + str(data[x]['indexRange']['end'])
result[x]['bitrate'] = str(data[x]['averageBitrate'])
result[x]['url'] = data[x]['url']
result[x]['itag'] = str(data[x]['itag'])
result[x]['type'] = data[x]['mimeType']
result[x]['clen'] = data[x]['contentLength']
result[x]['lmt'] = data[x]['lastModified']
result[x]['projectionType'] = data[x]['projectionType']
try:
result[x]['colorInfo'] = data[x]['colorInfo']
except:
pass
if isVideo:
result[x]['fps'] = str(data[x]['fps'])
else:
result[x]['audioQuality'] = data[x]['audioQuality']
result[x]['audioSampleRate'] = data[x]['audioSampleRate']
if data[x]['itag'] in invidious_formats.FORMATS.keys():
result[x]['container'] = invidious_formats.FORMATS[data[x]['itag']]['ext']
try:
result[x]['encoding'] = invidious_formats.FORMATS[data[x]['itag']]['vcodec']
except:
result[x]['encoding'] = invidious_formats.FORMATS[data[x]['itag']]['acodec']
if isVideo:
try:
result[x]['resolution'] = str(invidious_formats.FORMATS[data[x]['itag']]['height']) + "p"
result[x]['qualityLabel'] = str(invidious_formats.FORMATS[data[x]['itag']]['height']) + "p" + str(result[x]['fps']) * (data[x]['fps'] > 30) # NOT IMPLEMENTED, that's just a placeholder
result[x]['size'] = str(invidious_formats.FORMATS[data[x]['itag']]['width']) + "x" + str(invidious_formats.FORMATS[data[x]['itag']]['height'])
except:
pass
if data[x]['itag'] <= 80: # temporary solution, I promise!
formatStreams.append(result[x])
return result, formatStreams
def videos(data):
# an attempt on a faithful rewrite of
# https://github.com/iv-org/invidious/blob/master/src/invidious/videos/parser.cr
response = {}
#print(f"got data: {data}")
#print("requesting idata from IOSextract")
idata = ythdd_extractor.IOSextract(data[3])
wdata = ythdd_extractor.WEBextractSinglePage(data[3])
main_results = idata['stage3']['contents']['twoColumnWatchNextResults']
primary_results = safeTraverse(main_results, ['results', 'results', 'contents'])
if primary_results:
video_primary_renderer = safeTraverse(primary_results, [0, 'videoPrimaryInfoRenderer'])
video_secondary_renderer = safeTraverse(primary_results, [1, 'videoSecondaryInfoRenderer'])
else:
print("error: primary_results not found in invidious TL videos()")
video_details = safeTraverse(wdata, ['ec1', 'videoDetails'])
microformat = safeTraverse(wdata, ['ec1', 'microformat', 'playerMicroformatRenderer'], default={})
video_id = safeTraverse(video_details, ['videoId'], default=f"[{data[3]}] (errors occurred, check logs)")
title = safeTraverse(video_details, ['title'], default=video_id)
views = int(safeTraverse(video_details, ['viewCount'], default=0))
length = int(safeTraverse(video_details, ['lengthSeconds'], default=1))
published = datetime.datetime.fromisoformat(safeTraverse(microformat, ['publishDate'], default="2004-10-29T10:30:00-07:00")).timestamp() # ISO format to Unix timestamp
published_date = epochToDate(published)
premiere_timestamp = safeTraverse(microformat, ['liveBroadcastDetails', 'startTimestamp'], default=0) # let's ignore the nitty gritty for the time being
premiere_timestamp = premiere_timestamp if premiere_timestamp else safeTraverse(microformat, ['playabilityStatus', 'liveStreamability', 'liveStreamabilityRenderer', 'offlineSlate', 'liveStreamOfflineSlateRenderer', 'scheduledStartTime'], default=0)
live_now = safeTraverse(microformat, ['liveBroadcastDetails', 'isLiveNow'], default=False)
post_live_dvr = safeTraverse(video_details, ['isPostLiveDvr'], default=False)
allowed_regions = safeTraverse(microformat, ['availableCountries'], default=[])
allow_ratings = safeTraverse(video_details, ['allowRatings'], default=True)
family_friendly = safeTraverse(microformat, ['isFamilySafe'], default=True)
is_listed = safeTraverse(video_details, ['isCrawlable'], default=True)
is_upcoming = safeTraverse(video_details, ['isUpcoming'], default=False)
keywords = safeTraverse(video_details, ['keywords'], default=[])
# TODO: https://github.com/iv-org/invidious/blob/master/src/invidious/videos/parser.cr#L258
related = safeTraverse(wdata, ['ec2', 'contents', 'twoColumnWatchNextResults', 'secondaryResults', 'secondaryResults', 'results'], default=[]) # can possibly change in the future
magnitude = {'K': 1_000, 'M': 1_000_000, 'B': 1_000_000_000}
toplevel_buttons = safeTraverse(video_primary_renderer, ['videoActions', 'menuRenderer', 'topLevelButtons'], default={}) # hacky solution
likes_text = safeTraverse(toplevel_buttons, [0, 'segmentedLikeDislikeButtonViewModel', 'likeButtonViewModel', 'likeButtonViewModel', 'toggleButtonViewModel', 'toggleButtonViewModel', 'defaultButtonViewModel', 'buttonViewModel', 'title'], default="") # hacky solution
likes = 0
if likes_text:
likes = int("".join([x for x in likes_text if 48 <= ord(x) and ord(x) <= 57])) # ASCII for 0-9, no regex needed
likes_text = likes_text.split(" ")[0]
for x in magnitude.keys():
if x in likes_text:
likes *= magnitude[x]
description = safeTraverse(microformat, ['description', 'simpleText'], default="\n(ythdd: error ocurred, failed to retrieve description)")
short_description = safeTraverse(wdata, ['ec1', 'videoDetails', 'shortDescription'], default="(ythdd: error occurred, failed to retrieve short description)")
description_html = "<p>" + description + "</p>" # sorry, not happening right now, TODO: https://github.com/iv-org/invidious/blob/master/src/invidious/videos/parser.cr#L329
metadata = safeTraverse(video_secondary_renderer, ['metadataRowContainer', 'metadataRowContainerRenderer', 'rows'], default={})
genre = safeTraverse(microformat, ['category'])
# TODO: genre blah blah blah...
author = safeTraverse(video_details, ['author'], default="Unknown Author")
ucid = safeTraverse(video_details, ['channelId'], default="UNKNOWNCHANNELID")
author_info = safeTraverse(video_secondary_renderer, ['owner', 'videoOwnerRenderer'], default={})
author_thumbnail = safeTraverse(author_info, ['thumbnail', 'thumbnails']) # lowest quality thumbnail
subs_text = safeTraverse(author_info, ['subscriberCountText', 'simpleText'], default="0")
subs = 0
if subs_text:
subs = int("".join([x for x in subs_text if 48 <= ord(x) and ord(x) <= 57]))
subs_text = subs_text.split(" ")[0]
for x in magnitude.keys():
if x in subs_text:
subs *= magnitude[x]
for x in author_thumbnail:
# rewrite to use views.py
x['url'] = ythdd_globals.translateLinks(x['url'])
# TODO: author_verified = ...
hls_url = safeTraverse(idata, ['stage1', 'streamingData', 'hlsManifestUrl'], default="")
adaptive_formats = safeTraverse(idata, ['stage1', 'streamingData', 'adaptiveFormats'], default=[])
adaptive_formats, format_streams = rebuildFormats(adaptive_formats)
if live_now:
video_type = "livestream"
elif premiere_timestamp:
video_type = "scheduled"
published = premiere_timestamp if premiere_timestamp else int(time.time())
else:
video_type = "video"
if not format_streams:
format_streams = []
# providing format streams breaks Clipious client
#format_streams.append(adaptive_formats[0])
#format_streams.append(adaptive_formats[1])
#'''
response = {
"type": video_type,
"title": title,
"videoId": video_id,
"videoThumbnails": genThumbs(video_id),
"storyboards": [], # not implemented
"description": description, # due to change (include ythdd metadata)
"descriptionHtml": description_html, # basically the same as normal description for the time being
"published": published,
"publishedText": published_date,
"keywords": keywords,
"viewCount": views,
"viewCountText": str(views), # not implemented
"likeCount": likes,
"dislikeCount": 0,
"paid": False, # not implemented
"premium": False, # not implemented
"isFamilyFriendly": family_friendly,
"allowedRegions": allowed_regions,
"genre": genre,
"genreUrl": "/genreUrl/not/implemented/", # not implemented
"author": author,
"authorId": ucid,
"authorUrl": "/channel/" + ucid,
"authorVerified": False, # not implemented
"authorThumbnails": author_thumbnail,
"subCountText": subs_text,
"lengthSeconds": length,
"allowRatings": allow_ratings,
"rating": 0,
"isListed": is_listed,
"liveNow": live_now,
"isPostLiveDvr": post_live_dvr,
"isUpcoming": is_upcoming,
"dashUrl": "/dash/not/implemented/", # not implemented
"premiereTimestamp": premiere_timestamp,
"hlsUrl": hls_url,
"adaptiveFormats": adaptive_formats,
"formatStreams": format_streams, # very bare bones, empty actually xD
"captions": [], # not implemented
# "captions": [
# {
# "label": String,
# "language_code": String,
# "url": String
# }
# ],
# "musicTracks": [
# {
# "song": String,
# "artist": String,
# "album": String,
# "license": String
# }
# ],
'recommendedVideos': [] # not yet implemented
# "recommendedVideos": [
# {
# "videoId": String,
# "title": String,
# "videoThumbnails": [
# {
# "quality": String,
# "url": String,
# "width": Int32,
# "height": Int32
# }
# ],
# "author": String,
# "authorUrl": String,
# "authorId": String?,
# "authorVerified": Boolean,
# "authorThumbnails": [
# {
# "url": string,
# "width": Int32,
# "height": Int32
# }
# ],
# "lengthSeconds": Int32,
# "viewCount":
# "viewCountText": String
# }
# ]
}
#'''
# for debugging:
#return send(200, ythdd_extractor.WEBextractSinglePage(data[3]))
#return send(200, ythdd_extractor.IOSextract(data[3]))
# if youtube returns not the videoId we aksed
# then it means that the instance is ratelimited
status_code = 200 if data[3] == response['videoId'] else 403
return send(status_code, response)
def lookup(data):
# possibly TODO: rewrite this mess
if len(data) > 2:
if (data[0], data[1]) == ("api", "v1"):
match data[2]:
case 'stats' | '': # /api/invidious/api/v1/stats and /api/invidious/api/v1/
return stats()
case 'trending':
return trending()
case 'popular':
return popular()
case 'videos':
return videos(data)
case 'auth':
return auth(data)
case _:
incrementBadRequests()
return notImplemented(data)
elif data[0] == 'ggpht':
# for some reason the Materialous client
# keeps making requests to these
if data[1] == 'ggpht':
return redirect('/' + "/".join(data[1:]))
return redirect('/' + "/".join(data[0:]))
else:
incrementBadRequests()
return notImplemented(data)
elif len(data) == 2:
if (data[0], data[1]) == ("api", "v1"): # /api/invidious/api/v1
return stats()
elif data[0] == "streams":
return streams()
elif data[0] == 'ggpht':
return redirect('/' + "/".join(data[0:]))
else:
incrementBadRequests()
return notImplemented(data)
elif len(data) == 1:
return stats() # /api/invidious/something