Compare commits
97 Commits
1e4b05c33b
...
master
| Author | SHA1 | Date | |
|---|---|---|---|
| 56af1f0735 | |||
| 72266aad0f | |||
| 900cc92229 | |||
| 2687cc2bdc | |||
| 4a9d59c9b4 | |||
| 4af581ab7c | |||
| d1f381220d | |||
| eebf434f3e | |||
| c979c97077 | |||
| 11c94c757e | |||
| 4421e68d9d | |||
| da2daab16a | |||
| 05b81e55da | |||
| 158dcc3b7f | |||
| 668e8c32aa | |||
| 760aaccfff | |||
| da54bd0818 | |||
| b0845d723a | |||
| c760104d70 | |||
| d6cb0fe692 | |||
| 81fba8c4d0 | |||
| 260039c307 | |||
| 2b24fc2906 | |||
| 002e3cba33 | |||
| 5944fd2458 | |||
| 719b545cb4 | |||
| 468795a7a2 | |||
| 7eb4452fec | |||
| e7ae42f289 | |||
| 5bb542826e | |||
| 365c54d214 | |||
| a2e2fedc90 | |||
| 4a311f185c | |||
| 45073fff67 | |||
| f73e6c11d4 | |||
| 34e00e2492 | |||
| f63c620541 | |||
| f2adc2e561 | |||
| e6d32091e1 | |||
| b98aa718b0 | |||
| 30850a7ce0 | |||
| 1c9174c888 | |||
| 6d0c70696b | |||
| 509e81aafa | |||
| caa9e0c2b1 | |||
| 873abbd413 | |||
| bedcaff7c0 | |||
| 5dcf8c81e0 | |||
| 60712f3b5d | |||
| c20d4c34aa | |||
| be697bb572 | |||
| 3e84cf3443 | |||
| 4a3937a923 | |||
| c3fae689e1 | |||
| 4cfb1db7d0 | |||
| 5a1e772909 | |||
| 7c4991cea7 | |||
| 5f88d6f096 | |||
| eaaa14c4d8 | |||
| ef177f7200 | |||
| d0d2298186 | |||
| 7086177a58 | |||
| dc8009db23 | |||
| e562d1ee14 | |||
| 96c1b5396e | |||
| 256d21bbcd | |||
| 2a9826eb03 | |||
| 37e932956d | |||
| 15d2de5228 | |||
| 3cf203ee27 | |||
| a814797363 | |||
| 89f8f2a786 | |||
| 5e655ddd2c | |||
| 3e7589aea6 | |||
| cd34b2e406 | |||
| 3a0a89f737 | |||
| 55a116e042 | |||
| 51b94842e3 | |||
| 3253447c72 | |||
| 32c563a45a | |||
| 3ea3558990 | |||
| 6cdae051b5 | |||
| 4e066e4b23 | |||
| 837567f8c8 | |||
| 99e914557a | |||
| 06679ee165 | |||
| dbc90d3f74 | |||
| d1b9f90e7e | |||
| 019e47edd9 | |||
| 8fa05486f1 | |||
| 71a24a4870 | |||
| 2837cdf612 | |||
| ee31114e51 | |||
| dd102cb6ae | |||
| 3a524d96a0 | |||
| 3212627d89 | |||
| 0099736a74 |
48
README.md
48
README.md
@@ -1,2 +1,50 @@
|
||||
# ythdd
|
||||
*Archive videos while you watch them.*
|
||||
|
||||
**ythdd** is tool which can help you create a multimedia archive by downloading videos and their metadata while you watch them.
|
||||
It does so by acting as an intermediary between you and YouTube, forwarding your requests while also saving them to a hard drive, hence the name.
|
||||
|
||||
## Goals
|
||||
**Current goal of the project**: provide support for [Invidious](https://github.com/iv-org/invidious) API calls through the Invidious translation layer (ythdd_inv_tl.py).
|
||||
|
||||
## Running ythdd:
|
||||
- First, download project files or clone the repository with git:
|
||||
```
|
||||
git clone https://gitea.7o7.cx/sherl/ythdd.git
|
||||
```
|
||||
|
||||
- Navigate into the directory with project files:
|
||||
```
|
||||
cd ythdd
|
||||
```
|
||||
|
||||
- Create a virtual enviroment:
|
||||
```
|
||||
python -m venv .venv
|
||||
```
|
||||
Above will create a hidden directory called *.venv*.
|
||||
|
||||
- To activate the virtual enviroment use:
|
||||
- on Linux:
|
||||
```
|
||||
source .venv/bin/activate
|
||||
```
|
||||
- on Windows (cmd):
|
||||
```
|
||||
.venv\Scripts\activate
|
||||
```
|
||||
|
||||
- Install required packages into the newly created virtual environment:
|
||||
```
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
- Run the script:
|
||||
- continuously without interrupts:
|
||||
```
|
||||
python ythdd.py
|
||||
```
|
||||
- with automatic reloading (code will be reloaded after a change to the codebase)
|
||||
```
|
||||
flask --app ythdd run --debug
|
||||
```
|
||||
@@ -1,15 +1,27 @@
|
||||
[general]
|
||||
db_file_path = "/path/to/ythdd_db.sqlite" # Preferably stored on an SSD.
|
||||
video_storage_directory_path = "/path/to/videos/" # Path to video vault.
|
||||
is_proxied = false
|
||||
db_file_path = "ythdd_db.sqlite" # Path to the databse file, preferably stored on an SSD.
|
||||
video_storage_directory_path = "/path/to/videos/" # Path to video vault.
|
||||
is_proxied = false # Set to true if running behind reverse proxy.
|
||||
public_facing_url = "http://127.0.0.1:5000/" # Used for URL rewriting. Note the trailing backslash /.
|
||||
debug = false # Whether to print verbose, debug info on API endpoints.
|
||||
cache = true # Whether to cache requests for 3 hours (temporary solution to long load times).
|
||||
|
||||
[api]
|
||||
api_key = "" # Leave empty API key for public access to non-sensitive backend
|
||||
api_key_admin = "CHANGEME" # Empty *admin* API key will autogenerate a random one every launch.
|
||||
api_key = "" # Leave empty API key for public access to non-sensitive backend
|
||||
api_key_admin = "CHANGEME" # Empty *admin* API key will autogenerate a random one every launch.
|
||||
enable_debugger_halt = false # Whether to allow to trigger pdb using admin's API key.
|
||||
|
||||
[extractor]
|
||||
user-agent = "" # leave empty for default
|
||||
cookies_path = "" # leave empty for none
|
||||
user-agent = "" # Leave empty for default (Firefox ESR).
|
||||
cookies_path = "" # Leave empty for none.
|
||||
age_restricted_cookies_path = "" # Cookies to use when bypassing age-gated videos only. Leave empty to disable.
|
||||
deno_path = "" # Required when using cookies.
|
||||
preferred_extractor = "" # Leave empty for default (android_vr).
|
||||
|
||||
[proxy]
|
||||
user-agent = "" # Leave empty for default (Firefox ESR).
|
||||
allow_proxying_videos = false # Whether to allow video proxying through the instance (traffic-intensive).
|
||||
match_initcwndbps = true # Experimental: matches proxying speed to the one suggested by Innertube (may help avoid being ratelimited/banned).
|
||||
|
||||
[admin]
|
||||
# List of users with admin priviledges.
|
||||
|
||||
114
invidious_formats.py
Normal file
114
invidious_formats.py
Normal file
@@ -0,0 +1,114 @@
|
||||
# Part of the Invidious project
|
||||
# https://github.com/iv-org/invidious
|
||||
# License: AGPLv3
|
||||
|
||||
FORMATS = {
|
||||
|
||||
5: {"ext": "flv", "width": 400, "height": 240, "acodec": "mp3", "abr": 64, "vcodec": "h263"},
|
||||
6: {"ext": "flv", "width": 450, "height": 270, "acodec": "mp3", "abr": 64, "vcodec": "h263"},
|
||||
13: {"ext": "3gp", "acodec": "aac", "vcodec": "mp4v"},
|
||||
17: {"ext": "3gp", "width": 176, "height": 144, "acodec": "aac", "abr": 24, "vcodec": "mp4v"},
|
||||
18: {"ext": "mp4", "width": 640, "height": 360, "acodec": "aac", "abr": 96, "vcodec": "h264"},
|
||||
22: {"ext": "mp4", "width": 1280, "height": 720, "acodec": "aac", "abr": 192, "vcodec": "h264"},
|
||||
34: {"ext": "flv", "width": 640, "height": 360, "acodec": "aac", "abr": 128, "vcodec": "h264"},
|
||||
35: {"ext": "flv", "width": 854, "height": 480, "acodec": "aac", "abr": 128, "vcodec": "h264"},
|
||||
36: {"ext": "3gp", "width": 320, "acodec": "aac", "vcodec": "mp4v"},
|
||||
37: {"ext": "mp4", "width": 1920, "height": 1080, "acodec": "aac", "abr": 192, "vcodec": "h264"},
|
||||
38: {"ext": "mp4", "width": 4096, "height": 3072, "acodec": "aac", "abr": 192, "vcodec": "h264"},
|
||||
43: {"ext": "webm", "width": 640, "height": 360, "acodec": "vorbis", "abr": 128, "vcodec": "vp8"},
|
||||
44: {"ext": "webm", "width": 854, "height": 480, "acodec": "vorbis", "abr": 128, "vcodec": "vp8"},
|
||||
45: {"ext": "webm", "width": 1280, "height": 720, "acodec": "vorbis", "abr": 192, "vcodec": "vp8"},
|
||||
46: {"ext": "webm", "width": 1920, "height": 1080, "acodec": "vorbis", "abr": 192, "vcodec": "vp8"},
|
||||
59: {"ext": "mp4", "width": 854, "height": 480, "acodec": "aac", "abr": 128, "vcodec": "h264"},
|
||||
78: {"ext": "mp4", "width": 854, "height": 480, "acodec": "aac", "abr": 128, "vcodec": "h264"},
|
||||
|
||||
# 3D videos
|
||||
82: {"ext": "mp4", "height": 360, "format": "3D", "acodec": "aac", "abr": 128, "vcodec": "h264"},
|
||||
83: {"ext": "mp4", "height": 480, "format": "3D", "acodec": "aac", "abr": 128, "vcodec": "h264"},
|
||||
84: {"ext": "mp4", "height": 720, "format": "3D", "acodec": "aac", "abr": 192, "vcodec": "h264"},
|
||||
85: {"ext": "mp4", "height": 1080, "format": "3D", "acodec": "aac", "abr": 192, "vcodec": "h264"},
|
||||
100: {"ext": "webm", "height": 360, "format": "3D", "acodec": "vorbis", "abr": 128, "vcodec": "vp8"},
|
||||
101: {"ext": "webm", "height": 480, "format": "3D", "acodec": "vorbis", "abr": 192, "vcodec": "vp8"},
|
||||
102: {"ext": "webm", "height": 720, "format": "3D", "acodec": "vorbis", "abr": 192, "vcodec": "vp8"},
|
||||
|
||||
# Apple HTTP Live Streaming
|
||||
91: {"ext": "mp4", "height": 144, "format": "HLS", "acodec": "aac", "abr": 48, "vcodec": "h264"},
|
||||
92: {"ext": "mp4", "height": 240, "format": "HLS", "acodec": "aac", "abr": 48, "vcodec": "h264"},
|
||||
93: {"ext": "mp4", "height": 360, "format": "HLS", "acodec": "aac", "abr": 128, "vcodec": "h264"},
|
||||
94: {"ext": "mp4", "height": 480, "format": "HLS", "acodec": "aac", "abr": 128, "vcodec": "h264"},
|
||||
95: {"ext": "mp4", "height": 720, "format": "HLS", "acodec": "aac", "abr": 256, "vcodec": "h264"},
|
||||
96: {"ext": "mp4", "height": 1080, "format": "HLS", "acodec": "aac", "abr": 256, "vcodec": "h264"},
|
||||
132: {"ext": "mp4", "height": 240, "format": "HLS", "acodec": "aac", "abr": 48, "vcodec": "h264"},
|
||||
151: {"ext": "mp4", "height": 72, "format": "HLS", "acodec": "aac", "abr": 24, "vcodec": "h264"},
|
||||
|
||||
# DASH mp4 video
|
||||
133: {"ext": "mp4", "height": 240, "format": "DASH video", "vcodec": "h264"},
|
||||
134: {"ext": "mp4", "height": 360, "format": "DASH video", "vcodec": "h264"},
|
||||
135: {"ext": "mp4", "height": 480, "format": "DASH video", "vcodec": "h264"},
|
||||
136: {"ext": "mp4", "height": 720, "format": "DASH video", "vcodec": "h264"},
|
||||
137: {"ext": "mp4", "height": 1080, "format": "DASH video", "vcodec": "h264"},
|
||||
138: {"ext": "mp4", "format": "DASH video", "vcodec": "h264"}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
|
||||
160: {"ext": "mp4", "height": 144, "format": "DASH video", "vcodec": "h264"},
|
||||
212: {"ext": "mp4", "height": 480, "format": "DASH video", "vcodec": "h264"},
|
||||
264: {"ext": "mp4", "height": 1440, "format": "DASH video", "vcodec": "h264"},
|
||||
298: {"ext": "mp4", "height": 720, "format": "DASH video", "vcodec": "h264", "fps": 60},
|
||||
299: {"ext": "mp4", "height": 1080, "format": "DASH video", "vcodec": "h264", "fps": 60},
|
||||
266: {"ext": "mp4", "height": 2160, "format": "DASH video", "vcodec": "h264"},
|
||||
|
||||
# Dash mp4 audio
|
||||
139: {"ext": "m4a", "format": "DASH audio", "acodec": "aac", "abr": 48, "container": "m4a_dash"},
|
||||
140: {"ext": "m4a", "format": "DASH audio", "acodec": "aac", "abr": 128, "container": "m4a_dash"},
|
||||
141: {"ext": "m4a", "format": "DASH audio", "acodec": "aac", "abr": 256, "container": "m4a_dash"},
|
||||
256: {"ext": "m4a", "format": "DASH audio", "acodec": "aac", "container": "m4a_dash"},
|
||||
258: {"ext": "m4a", "format": "DASH audio", "acodec": "aac", "container": "m4a_dash"},
|
||||
325: {"ext": "m4a", "format": "DASH audio", "acodec": "dtse", "container": "m4a_dash"},
|
||||
328: {"ext": "m4a", "format": "DASH audio", "acodec": "ec-3", "container": "m4a_dash"},
|
||||
|
||||
# Dash webm
|
||||
167: {"ext": "webm", "height": 360, "width": 640, "format": "DASH video", "container": "webm", "vcodec": "vp8"},
|
||||
168: {"ext": "webm", "height": 480, "width": 854, "format": "DASH video", "container": "webm", "vcodec": "vp8"},
|
||||
169: {"ext": "webm", "height": 720, "width": 1280, "format": "DASH video", "container": "webm", "vcodec": "vp8"},
|
||||
170: {"ext": "webm", "height": 1080, "width": 1920, "format": "DASH video", "container": "webm", "vcodec": "vp8"},
|
||||
218: {"ext": "webm", "height": 480, "width": 854, "format": "DASH video", "container": "webm", "vcodec": "vp8"},
|
||||
219: {"ext": "webm", "height": 480, "width": 854, "format": "DASH video", "container": "webm", "vcodec": "vp8"},
|
||||
278: {"ext": "webm", "height": 144, "format": "DASH video", "container": "webm", "vcodec": "vp9"},
|
||||
242: {"ext": "webm", "height": 240, "format": "DASH video", "vcodec": "vp9"},
|
||||
243: {"ext": "webm", "height": 360, "format": "DASH video", "vcodec": "vp9"},
|
||||
244: {"ext": "webm", "height": 480, "format": "DASH video", "vcodec": "vp9"},
|
||||
245: {"ext": "webm", "height": 480, "format": "DASH video", "vcodec": "vp9"},
|
||||
246: {"ext": "webm", "height": 480, "format": "DASH video", "vcodec": "vp9"},
|
||||
247: {"ext": "webm", "height": 720, "format": "DASH video", "vcodec": "vp9"},
|
||||
248: {"ext": "webm", "height": 1080, "format": "DASH video", "vcodec": "vp9"},
|
||||
271: {"ext": "webm", "height": 1440, "format": "DASH video", "vcodec": "vp9"},
|
||||
|
||||
# itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
|
||||
272: {"ext": "webm", "height": 2160, "format": "DASH video", "vcodec": "vp9"},
|
||||
302: {"ext": "webm", "height": 720, "format": "DASH video", "vcodec": "vp9", "fps": 60},
|
||||
303: {"ext": "webm", "height": 1080, "format": "DASH video", "vcodec": "vp9", "fps": 60},
|
||||
308: {"ext": "webm", "height": 1440, "format": "DASH video", "vcodec": "vp9", "fps": 60},
|
||||
313: {"ext": "webm", "height": 2160, "format": "DASH video", "vcodec": "vp9"},
|
||||
315: {"ext": "webm", "height": 2160, "format": "DASH video", "vcodec": "vp9", "fps": 60},
|
||||
330: {"ext": "webm", "height": 144, "format": "DASH video", "vcodec": "vp9", "fps": 60},
|
||||
331: {"ext": "webm", "height": 240, "format": "DASH video", "vcodec": "vp9", "fps": 60},
|
||||
332: {"ext": "webm", "height": 360, "format": "DASH video", "vcodec": "vp9", "fps": 60},
|
||||
333: {"ext": "webm", "height": 480, "format": "DASH video", "vcodec": "vp9", "fps": 60},
|
||||
334: {"ext": "webm", "height": 720, "format": "DASH video", "vcodec": "vp9", "fps": 60},
|
||||
335: {"ext": "webm", "height": 1080, "format": "DASH video", "vcodec": "vp9", "fps": 60},
|
||||
336: {"ext": "webm", "height": 1440, "format": "DASH video", "vcodec": "vp9", "fps": 60},
|
||||
337: {"ext": "webm", "height": 2160, "format": "DASH video", "vcodec": "vp9", "fps": 60},
|
||||
|
||||
# Dash webm audio
|
||||
171: {"ext": "webm", "acodec": "vorbis", "format": "DASH audio", "abr": 128},
|
||||
172: {"ext": "webm", "acodec": "vorbis", "format": "DASH audio", "abr": 256},
|
||||
|
||||
# Dash webm audio with opus inside
|
||||
249: {"ext": "webm", "format": "DASH audio", "acodec": "opus", "abr": 50},
|
||||
250: {"ext": "webm", "format": "DASH audio", "acodec": "opus", "abr": 70},
|
||||
251: {"ext": "webm", "format": "DASH audio", "acodec": "opus", "abr": 160},
|
||||
|
||||
# av01 video only formats sometimes served with "unknown" codecs
|
||||
394: {"ext": "mp4", "height": 144, "vcodec": "av01.0.05M.08"},
|
||||
395: {"ext": "mp4", "height": 240, "vcodec": "av01.0.05M.08"},
|
||||
396: {"ext": "mp4", "height": 360, "vcodec": "av01.0.05M.08"},
|
||||
397: {"ext": "mp4", "height": 480, "vcodec": "av01.0.05M.08"},
|
||||
}
|
||||
@@ -11,4 +11,7 @@ Flask-SQLAlchemy>=3.1.1
|
||||
toml>=0.10.2
|
||||
Flask-APScheduler>=1.13.1
|
||||
requests>=2.32.3
|
||||
yt_dlp
|
||||
yt_dlp
|
||||
brotli>=1.1.0
|
||||
dateparser>=1.2.2
|
||||
bbpb>=1.4.2
|
||||
158
views.py
158
views.py
@@ -1,8 +1,9 @@
|
||||
#!/usr/bin/python3
|
||||
from flask import render_template
|
||||
from flask import redirect, render_template, request, Response
|
||||
from flask_sqlalchemy import SQLAlchemy
|
||||
from markupsafe import escape
|
||||
import requests, json
|
||||
import hashlib, json, re, requests
|
||||
import ythdd_globals
|
||||
|
||||
def homepage():
|
||||
return "homepage"
|
||||
@@ -11,4 +12,155 @@ def home():
|
||||
return "welcome home!"
|
||||
|
||||
def index():
|
||||
return "index"
|
||||
return "index"
|
||||
|
||||
def thumbnailProxy(received_request):
|
||||
|
||||
# apparently, this can be set to
|
||||
# https://img.youtube.com/ as well
|
||||
prefix = "https://i.ytimg.com/"
|
||||
|
||||
if received_request.count("/") < 1 or received_request.index("/") != 11:
|
||||
return Response(json.dumps({
|
||||
'status': 400,
|
||||
'error_msg': 'invalid request. pretend this is a thumbnail :D'
|
||||
}), mimetype='application/json', status=400)
|
||||
|
||||
quality_urls = ['maxresdefault', 'sddefault', 'hqdefault', 'mqdefault', 'default', '1', '2', '3']
|
||||
video_id, requested_quality = received_request.split('/')
|
||||
|
||||
thumbnail = requests.get(prefix + "vi/" + video_id + "/" + requested_quality, headers=ythdd_globals.getHeaders(caller='proxy'), stream=True)
|
||||
thumbnail.raw.decode_content = True
|
||||
|
||||
quality_id = 0
|
||||
if requested_quality == "maxres.jpg":
|
||||
# if requested quality is maxres,
|
||||
# provide the best quality possible
|
||||
while thumbnail.status_code != 200:
|
||||
thumbnail = requests.get(prefix + "vi/" + video_id + "/" + quality_urls[quality_id] + ".jpg", headers=ythdd_globals.getHeaders(caller='proxy'), stream=True)
|
||||
thumbnail.raw.decode_content = True
|
||||
quality_id += 1
|
||||
|
||||
response = Response(thumbnail.raw, mimetype=thumbnail.headers['content-type'], status=thumbnail.status_code)
|
||||
|
||||
return response
|
||||
|
||||
def storyboardProxy(received_request):
|
||||
|
||||
# will proxy storyboards
|
||||
prefix = "https://i.ytimg.com/"
|
||||
sqp = request.args.get("sqp")
|
||||
sigh = request.args.get("sigh")
|
||||
|
||||
if sqp is None or sigh is None:
|
||||
return Response(json.dumps({"status": "error", "error": "Request lacks the sqp, sigh params (or both)."}), mimetype="application/json", status=400)
|
||||
|
||||
sb = requests.get(prefix + "sb/" + received_request, headers=ythdd_globals.getHeaders(caller='proxy'), params={"sqp": sqp, "sigh": sigh}, stream=True)
|
||||
sb.raw.decode_content = True
|
||||
response = Response(sb.raw, mimetype=sb.headers['content-type'], status=sb.status_code)
|
||||
return response
|
||||
|
||||
def ggphtProxy(received_request):
|
||||
|
||||
prefix = "https://yt3.ggpht.com/"
|
||||
|
||||
# fix for how materialious fetches avatars
|
||||
if received_request.startswith("guc/"):
|
||||
return gucProxy(received_request.removeprefix("guc/"))
|
||||
|
||||
ggpht = requests.get(prefix + received_request, headers=ythdd_globals.getHeaders(caller='proxy'), stream=True)
|
||||
ggpht.raw.decode_content = True
|
||||
response = Response(ggpht.raw, mimetype=ggpht.headers['content-type'], status=ggpht.status_code)
|
||||
|
||||
return response
|
||||
|
||||
def gucProxy(received_request):
|
||||
|
||||
prefix = "https://yt3.googleusercontent.com/"
|
||||
|
||||
guc = requests.get(prefix + received_request, headers=ythdd_globals.getHeaders(caller='proxy'), stream=True)
|
||||
guc.raw.decode_content = True
|
||||
response = Response(guc.raw, mimetype=guc.headers['content-type'], status=guc.status_code)
|
||||
|
||||
return response
|
||||
|
||||
def imgProxy(received_request):
|
||||
|
||||
# will proxy /img/no_thumbnail.jpg
|
||||
prefix = "https://i.ytimg.com/"
|
||||
|
||||
thumbnail = requests.get(prefix + "img/" + received_request, headers=ythdd_globals.getHeaders(caller='proxy'), stream=True)
|
||||
thumbnail.raw.decode_content = True
|
||||
response = Response(thumbnail.raw, mimetype=thumbnail.headers['content-type'], status=thumbnail.status_code)
|
||||
|
||||
return response
|
||||
|
||||
def videoplaybackProxy():
|
||||
# inspired by Yotter's video proxy
|
||||
# https://github.com/ytorg/Yotter/blob/b43a72ab7bfa5a59916fa3259cbc39165717c6bb/app/routes.py#L527
|
||||
|
||||
if not ythdd_globals.config['proxy']['allow_proxying_videos']:
|
||||
return Response(json.dumps({"error": "Administrator has disabled this endpoint"}), mimetype="application/json", status=403)
|
||||
|
||||
headers = dict(request.headers)
|
||||
proxy_headers = ythdd_globals.getHeaders(caller='proxy')
|
||||
if "Range" in headers:
|
||||
proxy_headers["Range"] = headers["Range"]
|
||||
|
||||
params = dict(request.args)
|
||||
# support md5 videoplayback url shortening
|
||||
if "md5" in params:
|
||||
if params["md5"] not in ythdd_globals.general_cache["hashed_videoplayback"]:
|
||||
return Response(json.dumps({"error": "Videoplayback request not cached or expired."}), mimetype="application/json", status=404)
|
||||
return redirect(ythdd_globals.general_cache["hashed_videoplayback"][params["md5"]]["original_url"])
|
||||
else:
|
||||
public_url = request.url
|
||||
public_url = ythdd_globals.config["general"]["public_facing_url"] + public_url[public_url.rfind("videoplayback"):]
|
||||
md5sum = hashlib.md5(public_url.encode("utf-8")).hexdigest()
|
||||
ythdd_globals.general_cache["hashed_videoplayback"][md5sum] = {"original_url": request.url}
|
||||
|
||||
# reconstruct the url
|
||||
# first attempt: from host param
|
||||
host = params.get('host')
|
||||
# failed? then try to get it from the rest of the params
|
||||
if host is None:
|
||||
# second attempt: reconstruct url from mn and mvi?
|
||||
# the host schema seems to be as follows:
|
||||
# rr{mvi[any]/fvip[any]?}---{mn[any]}.googlevideo.com
|
||||
# regarding mvi/fvip, it seems that any value smaller than 5 passes
|
||||
try:
|
||||
mvi = params.get('mvi').split(',')[-1]
|
||||
mn = params.get('mn').split(',')[-1]
|
||||
if int(mvi) > 5:
|
||||
mvi = 3 # invidious uses this as fallback
|
||||
host = f"rr{mvi}---{mn}.googlevideo.com"
|
||||
except (AttributeError, ValueError):
|
||||
return Response(json.dumps({"error": "Couldn't extract crucial parameters for hostname reconstruction"}, mimetype="application/json", status=400))
|
||||
else:
|
||||
# don't echo host "hint" back to the googlevideo server
|
||||
del params['host']
|
||||
# run a regex sanity check
|
||||
if re.fullmatch(r"[\w-]+\.googlevideo\.com", host) is None:
|
||||
# fallback behavior for unexpected hostnames
|
||||
return Response(json.dumps({"error": "Please either pass a valid host, or don't pass any"}), mimetype="application/json", status=400)
|
||||
|
||||
try:
|
||||
# request the proxied data
|
||||
remote_response = requests.get(f"https://{host}/videoplayback", headers=proxy_headers, params=params, stream=True)
|
||||
except:
|
||||
return Response(json.dumps({"error": "Couldn't connect to googlevideo host"}), mimetype="application/json", status=500)
|
||||
|
||||
# determine the chunk size
|
||||
chunk_size = 10 * 1024 # by default it's 10 MB (as this is the most youtube is willing to send without ratelimiting)
|
||||
# or the one in initcwndbps (if user enabled the config flag to match chunk_size with initcwndbps)
|
||||
if ythdd_globals.config['proxy']['match_initcwndbps']:
|
||||
try:
|
||||
chunk_size = int(params.get('initcwndbps') / 1024)
|
||||
except:
|
||||
pass
|
||||
# return a chunked response
|
||||
resp = Response(remote_response.iter_content(chunk_size=chunk_size), content_type=remote_response.headers['Content-Type'], status=remote_response.status_code, headers=remote_response.headers, direct_passthrough=True)
|
||||
resp.cache_control.public = True
|
||||
resp.cache_control.max_age = int(60_000)
|
||||
|
||||
return resp
|
||||
52
ythdd.py
52
ythdd.py
@@ -6,15 +6,18 @@ from argparse import ArgumentParser
|
||||
from ythdd_globals import colors
|
||||
import requests, json, toml, time
|
||||
import views, downloader, ythdd_api, ythdd_globals, ythdd_db
|
||||
import os
|
||||
from flask_apscheduler import APScheduler
|
||||
|
||||
app = Flask(__name__)
|
||||
app = Flask(__name__)
|
||||
app_host = "None"
|
||||
app_port = "None"
|
||||
|
||||
def setup():
|
||||
|
||||
# sanity check: make sure config is set
|
||||
# required to make `flask --app ythdd run --debug` work
|
||||
global config
|
||||
global config, app_host, app_port
|
||||
try:
|
||||
if not config['general']:
|
||||
ythdd_globals.setConfig(ythdd_globals.configfile)
|
||||
@@ -31,6 +34,28 @@ def setup():
|
||||
ythdd_globals.isProxied = config['general']['is_proxied']
|
||||
ythdd_globals.outsideApiHits = 0
|
||||
|
||||
are_we_sure_of_host_and_port = True
|
||||
if app_host == "None":
|
||||
app_host = "127.0.0.1"
|
||||
are_we_sure_of_host_and_port = False
|
||||
if app_port == "None":
|
||||
app_port = "5000"
|
||||
are_we_sure_of_host_and_port = False
|
||||
|
||||
public_facing_url = config['general']['public_facing_url']
|
||||
rewrite_sanity_check = public_facing_url.replace(f"{app_host}:{app_port}", "")
|
||||
if not config['general']['is_proxied'] and public_facing_url == rewrite_sanity_check:
|
||||
sanity_string = f"{colors.WARNING}Heads up!{colors.ENDC} Public facing URL does not match the IP and port the server is running on.\n"
|
||||
sanity_string += f" Expected: {colors.OKCYAN}{config['general']['public_facing_url']}{colors.ENDC}, but"
|
||||
if not are_we_sure_of_host_and_port: sanity_string += " (assuming it's)"
|
||||
sanity_string += f" running on: {colors.OKCYAN}{app_host}:{app_port}{colors.ENDC}.\n"
|
||||
sanity_string += f" This is just a sanity check and may not neccessarily mean bad configuration.\n"
|
||||
sanity_string += f" If you're running a reverse proxy, set {colors.OKCYAN}is_proxied{colors.ENDC} to true to silence this message.\n"
|
||||
print(sanity_string)
|
||||
|
||||
# Should work around disconnects: https://stackoverflow.com/a/61739721
|
||||
app.config['SQLALCHEMY_ENGINE_OPTIONS'] = {"pool_pre_ping": True}
|
||||
|
||||
app.config['SQLALCHEMY_DATABASE_URI'] = f"sqlite:///{config['general']['db_file_path']}"
|
||||
app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False
|
||||
app.add_url_rule('/', view_func=views.index)
|
||||
@@ -38,6 +63,12 @@ def setup():
|
||||
app.add_url_rule('/home', view_func=views.home)
|
||||
app.add_url_rule('/api/', view_func=ythdd_api.api_greeting)
|
||||
app.add_url_rule('/api/<path:received_request>', view_func=ythdd_api.api_global_catchall)
|
||||
app.add_url_rule('/vi/<path:received_request>', view_func=views.thumbnailProxy)
|
||||
app.add_url_rule('/sb/<path:received_request>', view_func=views.storyboardProxy)
|
||||
app.add_url_rule('/ggpht/<path:received_request>', view_func=views.ggphtProxy)
|
||||
app.add_url_rule('/guc/<path:received_request>', view_func=views.gucProxy)
|
||||
app.add_url_rule('/img/<path:received_request>', view_func=views.imgProxy)
|
||||
app.add_url_rule('/videoplayback', view_func=views.videoplaybackProxy)
|
||||
db = ythdd_db.initDB(app, config)
|
||||
|
||||
with app.app_context():
|
||||
@@ -81,15 +112,19 @@ def main(args):
|
||||
host = host_port[0]
|
||||
port = host_port[1]
|
||||
|
||||
global config
|
||||
global config, app_host, app_port
|
||||
try:
|
||||
# if specified, use custom config file
|
||||
ythdd_globals.configfile = args.config
|
||||
ythdd_globals.setConfig(ythdd_globals.configfile)
|
||||
|
||||
except:
|
||||
# if not, use dummy file
|
||||
ythdd_globals.configfile = ""
|
||||
# if not, try using the default "config.toml"
|
||||
if os.path.exists("config.toml"):
|
||||
ythdd_globals.configfile = "config.toml"
|
||||
else:
|
||||
# unless it's not there, if that's the case then use the dummy file
|
||||
ythdd_globals.configfile = ""
|
||||
# but try to set the API secret if provided by the user
|
||||
if args.secret:
|
||||
ythdd_globals.randomly_generated_passcode = args.secret
|
||||
@@ -97,8 +132,11 @@ def main(args):
|
||||
|
||||
config = ythdd_globals.config
|
||||
|
||||
app_host = host
|
||||
app_port = port
|
||||
|
||||
setup()
|
||||
app.run(host=host, port=int(port))
|
||||
app.run(host=host, port=int(port), threaded=True)
|
||||
|
||||
if __name__ == "__main__":
|
||||
#app.run(host="127.0.0.1", port=5000)
|
||||
@@ -115,4 +153,6 @@ if __name__ == "__main__":
|
||||
main(args)
|
||||
|
||||
else:
|
||||
app_host = os.getenv("FLASK_RUN_HOST", "None")
|
||||
app_port = os.getenv("FLASK_RUN_PORT", "None")
|
||||
setup()
|
||||
55
ythdd_api.py
55
ythdd_api.py
@@ -1,8 +1,11 @@
|
||||
#!/usr/bin/python3
|
||||
from flask import Response, request
|
||||
from markupsafe import escape
|
||||
from ythdd_globals import colors as c
|
||||
import requests, time, json
|
||||
import ythdd_api_v1, ythdd_globals
|
||||
import ythdd_globals
|
||||
import ythdd_api_v1, ythdd_inv_tl
|
||||
import traceback
|
||||
|
||||
def api_greeting():
|
||||
string = {'status': 200, 'msg': f"ok (ythdd {ythdd_globals.version})", 'latest_api': f"v{ythdd_globals.apiVersion}"}
|
||||
@@ -11,25 +14,57 @@ def api_greeting():
|
||||
|
||||
def api_global_catchall(received_request):
|
||||
ythdd_globals.apiRequests += 1
|
||||
if request.environ['REMOTE_ADDR'] != "127.0.0.1" or (ythdd_globals.isProxied and request.environ['X-Forwarded-For'] != "127.0.0.1"):
|
||||
ythdd_globals.outsideApiHits += 1
|
||||
if request.environ['REMOTE_ADDR'] != "127.0.0.1" or (ythdd_globals.isProxied and request.environ['HTTP_X_FORWARDED_FOR'] != "127.0.0.1"):
|
||||
ythdd_globals.outsideApiHits += 1
|
||||
|
||||
request_list = received_request.split('/')
|
||||
api_version = request_list[0]
|
||||
if request_list[0] == 'v1':
|
||||
# use v1 api
|
||||
del request_list[0] # v1
|
||||
# if list is empty, aka /api/v1/
|
||||
if request_list == ['']:
|
||||
return api_greeting()
|
||||
del request_list[0]
|
||||
# if list is empty, aka /api/v1/, or /api/v1
|
||||
if request_list == [''] or request_list == []:
|
||||
#return api_greeting()
|
||||
resp = api_greeting()
|
||||
try:
|
||||
status, received, data = ythdd_api_v1.lookup(request_list)
|
||||
status, received, data = ythdd_api_v1.lookup(request_list, request)
|
||||
except Exception as e:
|
||||
ythdd_globals.apiFailedRequests += 1
|
||||
stripped_filename = __file__[max(__file__.rfind("/"), __file__.rfind("\\")) + 1:]
|
||||
print(f"\n{c.FAIL}Error! /api/{received_request} -> {stripped_filename}:L{e.__traceback__.tb_lineno} -> {type(e).__name__}{c.ENDC}:\n"
|
||||
+ f"{traceback.format_exc()}")
|
||||
status, received, data = 500, f"internal server error: call ended in failure: {e}", []
|
||||
if ythdd_globals.config["general"]["debug"]:
|
||||
status, received, data = 500, f"internal server error: call ended in failure: {e} ({stripped_filename}:L{e.__traceback__.tb_lineno})", []
|
||||
resp = Response(json.dumps({'status': status, 'msg': received, 'data': data}), mimetype='application/json', status=status)
|
||||
elif request_list[0] == 'invidious':
|
||||
# drop 'invidious' from the list
|
||||
del request_list[0]
|
||||
# for /api/invidious/ and /api/invidious
|
||||
# show greeting from Invidious TL
|
||||
#print(request_list) # for debugging purposes
|
||||
if request_list == [''] or request_list == []:
|
||||
#resp = ythdd_inv_tl.greeting()
|
||||
status, response = ythdd_inv_tl.greeting()
|
||||
return Response(response, status=status)
|
||||
# if a path has been supplied try to get appropriate data
|
||||
try:
|
||||
# lookup and construct a response
|
||||
resp = ythdd_inv_tl.lookup(request_list, request)
|
||||
#print(resp) # for debugging purposes
|
||||
# unless an error occurs
|
||||
except Exception as e:
|
||||
ythdd_globals.apiFailedRequests += 1
|
||||
stripped_filename = __file__[max(__file__.rfind("/"), __file__.rfind("\\")) + 1:]
|
||||
print(f"\n{c.FAIL}Error! /api/{received_request} -> {stripped_filename}:L{e.__traceback__.tb_lineno} -> {type(e).__name__}{c.ENDC}:\n"
|
||||
+ f"{traceback.format_exc()}")
|
||||
status, received, data = 500, f"internal server error: invidious translation call ended in failure: {e}", []
|
||||
if ythdd_globals.config["general"]["debug"]:
|
||||
status, received, data = 500, f"internal server error: invidious translation call ended in failure: {e} ({stripped_filename}:L{e.__traceback__.tb_lineno})", []
|
||||
resp = Response(json.dumps({'status': status, 'msg': received, 'data': data}), mimetype='application/json', status=status)
|
||||
else:
|
||||
ythdd_globals.apiFailedRequests += 1
|
||||
status, received, data = 405, f'error: unsupported api version: "{request_list[0]}". try: "v{ythdd_globals.apiVersion}".', []
|
||||
resp = Response(json.dumps({'status': status, 'msg': received, 'data': data}), mimetype='application/json', status=status)
|
||||
|
||||
response = {'status': status, 'msg': received, 'data': data}
|
||||
return Response(json.dumps(response), mimetype='application/json', status=status)
|
||||
return resp
|
||||
@@ -8,6 +8,21 @@ import ythdd_globals, ythdd_extractor
|
||||
#from flask_sqlalchemy import SQLAlchemy
|
||||
#import ythdd_api_v1_stats, ythdd_api_v1_user, ythdd_api_v1_info, ythdd_api_v1_query, ythdd_api_v1_meta, ythdd_api_v1_admin
|
||||
|
||||
def requireAuthentication(admin: bool = True):
|
||||
def functionWrapper(func):
|
||||
def wrapper(*args, **kwargs):
|
||||
token = kwargs["r"].args.get('token')
|
||||
if token == ythdd_globals.config['api']['api_key' + admin * '_admin']:
|
||||
try:
|
||||
status, received, data = func(*args, **kwargs)
|
||||
return status, received, data
|
||||
except:
|
||||
raise AssertionError(f"Function \"{func.__name__}\" does not return status, code, and data as it should!")
|
||||
else:
|
||||
return 401, "error", {'error_msg': "Unauthorized"}
|
||||
return wrapper
|
||||
return functionWrapper
|
||||
|
||||
def incrementBadRequests():
|
||||
ythdd_globals.apiFailedRequests += 1
|
||||
|
||||
@@ -29,7 +44,7 @@ def stats():
|
||||
"outside_api_requests": ythdd_globals.outsideApiHits,
|
||||
"local_api_requests": ythdd_globals.apiRequests - ythdd_globals.outsideApiHits
|
||||
}
|
||||
return 200, "OK", data_to_send
|
||||
return 200, "ok", data_to_send
|
||||
|
||||
def videoIdSanityCheck(videoId: str):
|
||||
if len(videId) != 11:
|
||||
@@ -91,7 +106,7 @@ def hot(data):
|
||||
# try to get the data
|
||||
try:
|
||||
started = time.time()
|
||||
extracted_dict = ythdd_extractor.extract(url_lookup[data[1]] + videoId, getcomments=getcomments, maxcomments=comment_count)
|
||||
extracted_dict = ythdd_extractor.extract(url_lookup[data[1]] + videoId, getcomments=getcomments, maxcomments=comment_count, manifest_fix=True)
|
||||
extracted_dict["took"] = time.time() - started
|
||||
return 200, "OK", extracted_dict
|
||||
except Exception as e:
|
||||
@@ -113,7 +128,7 @@ def hot(data):
|
||||
started = time.time()
|
||||
try:
|
||||
# try to actually get the data
|
||||
extracted_related = ythdd_extractor.related('https://www.youtube.com/watch?v=' + videoId)
|
||||
extracted_related = ythdd_extractor.WEBrelated('https://www.youtube.com/watch?v=' + videoId)
|
||||
extracted_related['took'] = time.time() - started
|
||||
return 200, "OK", extracted_related
|
||||
except KeyError:
|
||||
@@ -129,7 +144,14 @@ def hot(data):
|
||||
incrementBadRequests()
|
||||
return notImplemented([data[1]]) # workaround before notImplemented is reworked
|
||||
|
||||
def lookup(data):
|
||||
@requireAuthentication(admin=True)
|
||||
def debugger_halt(r):
|
||||
if not ythdd_globals.config["api"]["enable_debugger_halt"]:
|
||||
return 403, "Administrator has disabled access for this endpoint.", []
|
||||
breakpoint()
|
||||
return 200, "Pdb triggered and ended successfully.", []
|
||||
|
||||
def lookup(data, request):
|
||||
match data[0]:
|
||||
case 'stats':
|
||||
return stats()
|
||||
@@ -149,6 +171,8 @@ def lookup(data):
|
||||
case 'admin':
|
||||
# REQUIRE CREDENTIALS!
|
||||
return stub_hello()
|
||||
case 'halt':
|
||||
return debugger_halt(r=request)
|
||||
case _:
|
||||
incrementBadRequests()
|
||||
return notImplemented(data)
|
||||
@@ -1,5 +1,8 @@
|
||||
#!/usr/bin/python3
|
||||
import yt_dlp, requests, json
|
||||
import brotli, yt_dlp, requests, json, time
|
||||
from http.cookiejar import MozillaCookieJar
|
||||
from ythdd_globals import safeTraverse
|
||||
import ythdd_proto
|
||||
import ythdd_globals
|
||||
|
||||
ytdl_opts = {
|
||||
@@ -12,29 +15,171 @@ ytdl_opts = {
|
||||
"default": "%(id)s.%(ext)s",
|
||||
"chapter": "%(id)s.%(ext)s_%(section_number)03d_%(section_title)s.%(ext)s"
|
||||
},
|
||||
"simulate": True
|
||||
"extractor_args": {
|
||||
"youtube": {
|
||||
# "formats": ["dashy"]
|
||||
}
|
||||
},
|
||||
"simulate": True,
|
||||
"js_runtimes": {
|
||||
"deno": {}
|
||||
},
|
||||
'remote_components': ['ejs:github']
|
||||
}
|
||||
|
||||
def extract(url: str, getcomments=False, maxcomments=""):
|
||||
stage1_headers = {
|
||||
"Connection": "keep-alive",
|
||||
"User-Agent": "com.google.ios.youtube/19.45.4 (iPhone16,2; U; CPU iOS 18_1_0 like Mac OS X;)",
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||
"Accept-Language": "en-us,en;q=0.5",
|
||||
"Sec-Fetch-Mode": "navigate",
|
||||
"Content-Type": "application/json",
|
||||
"X-Youtube-Client-Name": "5",
|
||||
"X-Youtube-Client-Version": "19.45.4",
|
||||
"Origin": "https://www.youtube.com",
|
||||
"Accept-Encoding": "gzip, deflate, br",
|
||||
"Cookie": "PREF=hl=en&tz=UTC; SOCS=CAI"
|
||||
}
|
||||
|
||||
stage1_body = {
|
||||
"context":
|
||||
{
|
||||
"client":
|
||||
{
|
||||
"clientName": "IOS",
|
||||
"clientVersion": "19.45.4",
|
||||
"deviceMake": "Apple",
|
||||
"deviceModel": "iPhone16,2",
|
||||
"userAgent": "com.google.ios.youtube/19.45.4 (iPhone16,2; U; CPU iOS 18_1_0 like Mac OS X;)",
|
||||
"osName": "iPhone",
|
||||
"osVersion": "18.1.0.22B83",
|
||||
"hl": "en",
|
||||
"timeZone": "UTC",
|
||||
"utcOffsetMinutes": 0
|
||||
}
|
||||
},
|
||||
#"videoId": uri,
|
||||
"playbackContext":
|
||||
{
|
||||
"contentPlaybackContext":
|
||||
{
|
||||
"html5Preference": "HTML5_PREF_WANTS"
|
||||
}
|
||||
},
|
||||
"contentCheckOk": True,
|
||||
"racyCheckOk": True
|
||||
}
|
||||
|
||||
stage2_headers = {
|
||||
"Connection": "keep-alive",
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:144.0) Gecko/20100101 Firefox/144.0",
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||
"Accept-Language": "en-us,en;q=0.5",
|
||||
"Sec-Fetch-Mode": "navigate",
|
||||
"Accept-Encoding": "gzip, deflate, br"
|
||||
}
|
||||
|
||||
stage3_headers = {
|
||||
"Connection": "keep-alive",
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:144.0) Gecko/20100101 Firefox/144.0",
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||
"Accept-Language": "en-us,en;q=0.5",
|
||||
"Sec-Fetch-Mode": "navigate",
|
||||
"Content-Type": "application/json",
|
||||
"X-Youtube-Client-Name": "1",
|
||||
"X-Youtube-Client-Version": "2.20251103.01.00",
|
||||
"Origin": "https://www.youtube.com",
|
||||
"Accept-Encoding": "gzip, deflate, br",
|
||||
"Cookie": "PREF=hl=en&tz=UTC; SOCS=CAI"
|
||||
}
|
||||
|
||||
stage3_body = {
|
||||
"context":
|
||||
{
|
||||
"client":
|
||||
{
|
||||
"clientName": "WEB",
|
||||
"clientVersion": "2.20251103.01.00",
|
||||
"hl": "en",
|
||||
"timeZone": "UTC",
|
||||
"utcOffsetMinutes": 0
|
||||
}
|
||||
},
|
||||
#"videoId": uri,
|
||||
"contentCheckOk": True,
|
||||
"racyCheckOk": True
|
||||
}
|
||||
|
||||
web_context_dict = {
|
||||
'context': {
|
||||
'client': {
|
||||
'hl': 'en',
|
||||
'gl': 'US',
|
||||
'deviceMake': '',
|
||||
'deviceModel': '',
|
||||
'userAgent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:144.0) Gecko/20100101 Firefox/144.0,gzip(gfe)',
|
||||
'clientName': 'WEB',
|
||||
'clientVersion': '2.20251103.01.00',
|
||||
'osName': 'Windows',
|
||||
'osVersion': '10.0',
|
||||
'screenPixelDensity': 2,
|
||||
'platform': 'DESKTOP',
|
||||
'screenDensityFloat': 2,
|
||||
'userInterfaceTheme': 'USER_INTERFACE_THEME_LIGHT',
|
||||
'browserName': 'Firefox',
|
||||
'browserVersion': '142.0',
|
||||
'acceptHeader': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
||||
'utcOffsetMinutes': 0,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
def extract(url: str, getcomments=False, maxcomments="", manifest_fix=False, use_cookies=None):
|
||||
# TODO: check user-agent and cookiefile
|
||||
|
||||
ytdl_context = ytdl_opts.copy()
|
||||
|
||||
if ythdd_globals.config['extractor']['user-agent']:
|
||||
yt_dlp.utils.std_headers['User-Agent'] = ythdd_globals.config['extractor']['user-agent']
|
||||
|
||||
if ythdd_globals.config['extractor']['cookies_path']:
|
||||
ytdl_opts['cookiefile'] = ythdd_globals.config['extractor']['cookies_path']
|
||||
|
||||
if len(url) == 11:
|
||||
url = "https://www.youtube.com/watch?v=" + url
|
||||
if getcomments:
|
||||
ytdl_opts['getcomments'] = True
|
||||
ytdl_context['getcomments'] = True
|
||||
if maxcomments:
|
||||
ytdl_opts['extractor_args'] = {'youtube': {'max_comments': [maxcomments, "all", "all", "all"]}}
|
||||
with yt_dlp.YoutubeDL(ytdl_opts) as ytdl:
|
||||
result = ytdl.extract_info(url, download=False)
|
||||
ytdl_context['extractor_args']['youtube']['max_comments'] = [maxcomments, "all", "all", "all"]
|
||||
if manifest_fix:
|
||||
# https://github.com/yt-dlp/yt-dlp/issues/11952#issuecomment-2565802294
|
||||
ytdl_context['extractor_args']['youtube']['player_client'] = ['default', 'web_safari']
|
||||
elif ythdd_globals.config['extractor']['preferred_extractor']:
|
||||
ytdl_context['extractor_args']['youtube']['player_client'] = [ythdd_globals.config['extractor']['preferred_extractor']]
|
||||
else:
|
||||
ytdl_context['extractor_args']['youtube']['player_client'] = ['android_vr']
|
||||
|
||||
if use_cookies is not None:
|
||||
# can be either "global", "agegated" or None
|
||||
deno_path = ythdd_globals.config['extractor']['deno_path']
|
||||
match use_cookies:
|
||||
case "global":
|
||||
ytdl_context['cookiefile'] = ythdd_globals.config['extractor']['cookies_path']
|
||||
ytdl_context['extractor_args']['youtube']['player_client'] = ['tv']
|
||||
if not deno_path:
|
||||
print("FATAL ERROR: deno path is required for playback using cookies!")
|
||||
ytdl_context['js_runtimes']['deno']['path'] = deno_path if deno_path else ""
|
||||
case "agegated":
|
||||
ytdl_context['cookiefile'] = ythdd_globals.config['extractor']['age_restricted_cookies_path']
|
||||
ytdl_context['extractor_args']['youtube']['player_client'] = ['tv']
|
||||
if not deno_path:
|
||||
print("FATAL ERROR: deno path is required for playback of age-restricted content!")
|
||||
ytdl_context['js_runtimes']['deno']['path'] = deno_path if deno_path else ""
|
||||
case None | _:
|
||||
pass
|
||||
|
||||
with yt_dlp.YoutubeDL(ytdl_context) as ytdl:
|
||||
result = ytdl.sanitize_info(ytdl.extract_info(url, download=False))
|
||||
return result
|
||||
|
||||
def related(url: str):
|
||||
def WEBrelated(url: str):
|
||||
# WARNING! HIGHLY EXPERIMENTAL, DUE TO BREAK ANYTIME
|
||||
if len(url) == 11:
|
||||
params = {'v': url}
|
||||
@@ -45,34 +190,423 @@ def related(url: str):
|
||||
videoId = url[32:44]
|
||||
params = {'v': videoId}
|
||||
|
||||
# NOTE: use ESR user-agent
|
||||
# user_agent = 'Mozilla/5.0 (Windows NT 10.0; rv:130.0) Gecko/20100101 Firefox/130.0'
|
||||
user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:128.0) Gecko/20100101 Firefox/128.0'
|
||||
|
||||
if ythdd_globals.config['extractor']['user-agent']:
|
||||
user_agent = ythdd_globals.config['extractor']['user-agent']
|
||||
|
||||
headers = {
|
||||
'User-Agent': user_agent,
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/png,image/svg+xml,*/*;q=0.8',
|
||||
'Accept-Language': 'en-US,en;q=0.5',
|
||||
'DNT': '1',
|
||||
'Sec-GPC': '1',
|
||||
'Connection': 'keep-alive',
|
||||
'Upgrade-Insecure-Requests': '1',
|
||||
'Sec-Fetch-Dest': 'document',
|
||||
'Sec-Fetch-Mode': 'navigate',
|
||||
'Sec-Fetch-Site': 'none',
|
||||
'Sec-Fetch-User': '?1',
|
||||
'Priority': 'u=0, i',
|
||||
'Pragma': 'no-cache',
|
||||
'Cache-Control': 'no-cache',
|
||||
}
|
||||
response = requests.get(url, headers=headers, params=params)
|
||||
response = requests.get(url, headers=ythdd_globals.getHeaders(caller='extractor'), params=params)
|
||||
extracted_string = str(response.content.decode('utf8', 'unicode_escape'))
|
||||
start = extracted_string.find('{"responseContext":{"serviceTrackingParams":')
|
||||
start2 = extracted_string.find('{"responseContext":{"serviceTrackingParams":', start + 1)
|
||||
end = extracted_string.find(';</script>', start2)
|
||||
extracted_json = json.loads(extracted_string[start2:end])
|
||||
|
||||
return extracted_json["contents"]['twoColumnWatchNextResults']["secondaryResults"]
|
||||
return extracted_json["contents"]['twoColumnWatchNextResults']["secondaryResults"]
|
||||
|
||||
def WEBextractSinglePage(uri: str, use_cookies=None):
|
||||
# WARNING! HIGHLY EXPERIMENTAL, DUE TO BREAK ANYTIME
|
||||
|
||||
start_time = time.time()
|
||||
|
||||
if len(uri) != 11:
|
||||
raise ValueError("WEBextractSinglePage expects a single, 11-character long argument")
|
||||
|
||||
cookies = None
|
||||
if use_cookies is not None:
|
||||
match use_cookies:
|
||||
case "global":
|
||||
ythdd_globals.print_debug("wdata: using global cookies")
|
||||
cookies = MozillaCookieJar(ythdd_globals.config["extractor"]["cookies_path"])
|
||||
cookies.load()
|
||||
case "agegated":
|
||||
ythdd_globals.print_debug("wdata: using agegated cookies")
|
||||
cookies = MozillaCookieJar(ythdd_globals.config["extractor"]["age_restricted_cookies_path"])
|
||||
cookies.load()
|
||||
case None | _:
|
||||
pass
|
||||
|
||||
response = requests.get("https://www.youtube.com/watch?v=" + uri, headers=ythdd_globals.getHeaders(caller='extractor'), cookies=cookies)
|
||||
extracted_string = str(response.content.decode('utf8', 'unicode_escape'))
|
||||
start = extracted_string.find('{"responseContext":')
|
||||
end = extracted_string.find(';var ', start)
|
||||
start2 = extracted_string.find('{"responseContext":', start + 1)
|
||||
end2 = extracted_string.find(';</script>', start2)
|
||||
extracted_json1 = json.loads(extracted_string[start:end])
|
||||
extracted_json2 = json.loads(extracted_string[start2:end2])
|
||||
|
||||
end_time = time.time()
|
||||
|
||||
return {'ec1': extracted_json1, 'ec2': extracted_json2, 'took': end_time - start_time}
|
||||
|
||||
def paramsFromUrl(url: str) -> dict:
|
||||
# Returns a dictionary of params from a given URL.
|
||||
split_list = url.split("&")
|
||||
params = {}
|
||||
|
||||
for num, string in enumerate(split_list):
|
||||
if num == 0:
|
||||
string = string[string.find("?") + 1:]
|
||||
key, value = string.split("=")
|
||||
params[key] = value
|
||||
|
||||
return params
|
||||
|
||||
def IOSextract(uri: str):
|
||||
|
||||
start = time.time()
|
||||
|
||||
if len(uri) != 11:
|
||||
raise ValueError("IOSextract expects a single, 11-character long uri as an argument")
|
||||
|
||||
stage1_body['videoId'] = uri
|
||||
stage1_h = requests.post("https://www.youtube.com/youtubei/v1/player?prettyPrint=false", headers=stage1_headers, json=stage1_body)
|
||||
stage1 = json.loads(stage1_h.content.decode('utf-8'))
|
||||
|
||||
#stage2_h = requests.get(stage1['streamingData']['hlsManifestUrl'], headers=stage2_headers)
|
||||
#stage2 = stage2_h.content.decode('utf-8')
|
||||
|
||||
stage3_body['videoId'] = uri
|
||||
stage3_h = requests.post("https://www.youtube.com/youtubei/v1/next?prettyPrint=false", headers=stage3_headers, json=stage3_body)
|
||||
stage3 = json.loads(stage3_h.content.decode('utf-8'))
|
||||
|
||||
end = time.time()
|
||||
|
||||
#return {'stage1': stage1, 'stage2': stage2, 'stage3': stage3, 'took': end - start}
|
||||
return {'stage1': stage1, 'stage3': stage3, 'took': end - start}
|
||||
|
||||
def makeWebContext(secondaryContextDict: dict):
|
||||
# Uses web_context_dict to create a context, returns a dict.
|
||||
# Essentially, expands the web_context_dict with a secondary one.
|
||||
|
||||
current_web_context_dict = web_context_dict.copy()
|
||||
|
||||
for key in secondaryContextDict:
|
||||
current_web_context_dict[key] = secondaryContextDict[key]
|
||||
|
||||
return current_web_context_dict
|
||||
|
||||
def getChannelAvatar(response_json: dict):
|
||||
# Returns a dictionary: {url: <proxied url to remote server>, width: ..., height: ...}
|
||||
# containing the best resolution in terms of pixel count.
|
||||
# A great majority of the code has been influenced by https://github.com/iv-org/invidious/blob/master/src/invidious/channels/about.cr.
|
||||
|
||||
avatars = safeTraverse(response_json, ['metadata', 'channelMetadataRenderer', 'avatar', 'thumbnails'], default=None)
|
||||
|
||||
if avatars is None:
|
||||
# fallback to lower resolution avatars
|
||||
avatars = safeTraverse(response_json, ['header',
|
||||
'pageHeaderRenderer',
|
||||
'content',
|
||||
'pageHeaderViewModel',
|
||||
'image',
|
||||
'decoratedAvatarViewModel',
|
||||
'avatar',
|
||||
'avatarViewModel',
|
||||
'image',
|
||||
'sources'], default=None)
|
||||
|
||||
# if avatars is None: # TODO: if avatars is still None, use a local avatar
|
||||
|
||||
best_avatar = avatars[-1] # usually, the best avatar is stored last
|
||||
for avatar in avatars:
|
||||
if avatar['width'] * avatar['height'] > best_avatar['width'] * best_avatar['height']:
|
||||
best_avatar = avatar
|
||||
|
||||
# or use regex substitution and set the size to something like 512x512
|
||||
# e.g.: =s128 -> =s512
|
||||
|
||||
best_avatar['url'] = ythdd_globals.translateLinks(best_avatar['url'])
|
||||
|
||||
return best_avatar
|
||||
|
||||
def generateChannelAvatarsFromUrl(url: str, proxied: bool = True) -> list:
|
||||
# Generates channel avatars at default sizes.
|
||||
|
||||
# avatar urls for channels in search results start with //yt3.ggpht.com/
|
||||
if url.startswith("//"):
|
||||
url = "https:" + url
|
||||
|
||||
avatars = []
|
||||
if not url.startswith("https://yt3.ggpht.com/") and not url.startswith("https://yt3.googleusercontent.com/"):
|
||||
return []
|
||||
|
||||
url = ythdd_globals.translateLinks(url)
|
||||
url_size_start = url.rfind("=s") + 2
|
||||
url_size_end = url. find("-", url_size_start)
|
||||
|
||||
default_sizes = [32, 48, 76, 100, 176, 512]
|
||||
|
||||
for size in default_sizes:
|
||||
avatars.append(
|
||||
{
|
||||
"url": url[:url_size_start] + str(size) + url[url_size_end:],
|
||||
"width": size,
|
||||
"height": size
|
||||
}
|
||||
)
|
||||
|
||||
return avatars
|
||||
|
||||
def isVerified(response_json: dict) -> bool:
|
||||
# Returns True if any user badge has been found (verified/artist).
|
||||
|
||||
if not isinstance(response_json, dict):
|
||||
return False
|
||||
|
||||
match safeTraverse(list(response_json.keys()), [0], default=""):
|
||||
case "metadataBadgeRenderer": # channels in search results
|
||||
verified = safeTraverse(response_json, ["metadataBadgeRenderer", "tooltip"], default="") in ("Verified", "Official Artist Channel") # perhaps look for badge styles?
|
||||
return verified
|
||||
|
||||
return False
|
||||
|
||||
def isPremium(response_json: dict) -> bool:
|
||||
# Returns True if content is paid (member-only).
|
||||
|
||||
if not isinstance(response_json, dict):
|
||||
return False
|
||||
|
||||
match safeTraverse(list(response_json.keys()), [0], default=""):
|
||||
case "metadataBadgeRenderer": # channels in search results
|
||||
paid = safeTraverse(response_json, ["metadataBadgeRenderer", "style"], default="") in ("BADGE_STYLE_TYPE_MEMBERS_ONLY")
|
||||
return paid
|
||||
|
||||
return False
|
||||
|
||||
def browseChannel(ucid: str, params: str = None, ctoken: str = None):
|
||||
# Returns the response from innertubes browse endpoint for channels (as a dict).
|
||||
|
||||
if len(ucid) != 24:
|
||||
raise ValueError(f"Something is wrong with the UCID {ucid}. Expected a 24-character long channel ID, not {len(ucid)}.")
|
||||
|
||||
additional_context = {'browseId': ucid}
|
||||
if params is not None:
|
||||
additional_context['params'] = params
|
||||
if ctoken is not None:
|
||||
additional_context['continuation'] = ctoken
|
||||
|
||||
context = makeWebContext(additional_context)
|
||||
|
||||
response = requests.post(
|
||||
'https://www.youtube.com/youtubei/v1/browse?prettyPrint=false',
|
||||
headers = ythdd_globals.getHeaders(),
|
||||
json = context,
|
||||
)
|
||||
|
||||
response_json = json.loads(response.text)
|
||||
|
||||
return response_json
|
||||
|
||||
def WEBextractSearchResults(search_query: str, page: int) -> list:
|
||||
# Posts a search request to innertube API
|
||||
# and processes only the relevant part (the actual results)
|
||||
|
||||
if search_query is None:
|
||||
return []
|
||||
|
||||
additional_context = {"query": search_query}
|
||||
if page is not None:
|
||||
params = ythdd_proto.produceSearchParams(page)
|
||||
additional_context["params"] = params
|
||||
|
||||
web_context = makeWebContext(additional_context)
|
||||
response = requests.post('https://www.youtube.com/youtubei/v1/search',
|
||||
params={"prettyPrint": False},
|
||||
headers=stage2_headers,
|
||||
data=json.dumps(web_context)
|
||||
)
|
||||
|
||||
results = []
|
||||
try:
|
||||
results = json.loads(response.text)
|
||||
except:
|
||||
pass
|
||||
results = safeTraverse(results, ["contents", "twoColumnSearchResultsRenderer", "primaryContents", "sectionListRenderer", "contents", 0, "itemSectionRenderer", "contents"], default=[])
|
||||
|
||||
return results
|
||||
|
||||
def WEBgetSearchSuggestions(query: str, previous_query: str = '') -> list:
|
||||
# Takes in a search query and returns relevant suggestions.
|
||||
# Can optionally take the previous query but that's rather novel and
|
||||
# not supported across players nor invidious API itself.
|
||||
|
||||
suggestions = []
|
||||
|
||||
if not isinstance(query, str):
|
||||
print("WEBgetSearchSuggestions: query is not a string (as it should)")
|
||||
return {}
|
||||
if not isinstance(previous_query, str):
|
||||
previous_query = ''
|
||||
|
||||
if ythdd_globals.config["general"]["cache"]:
|
||||
# look for cached suggestions
|
||||
for cached_search in ythdd_globals.general_cache["search"]:
|
||||
if cached_search["q"] == query.lower() and cached_search["pq"] == previous_query.lower():
|
||||
# found it? skip ahead
|
||||
suggestions = cached_search["resp"]
|
||||
break
|
||||
|
||||
# request wasn't cached? query the API
|
||||
if suggestions == []:
|
||||
|
||||
params = {
|
||||
'ds': 'yt',
|
||||
'hl': 'en', # host language
|
||||
'gl': 'us', # geolocation
|
||||
'client': 'youtube',
|
||||
'gs_ri': 'youtube',
|
||||
'q': query, # query
|
||||
'pq': previous_query # previous query
|
||||
}
|
||||
|
||||
response = requests.get(
|
||||
'https://suggestqueries-clients6.youtube.com/complete/search',
|
||||
params=params,
|
||||
headers=stage2_headers
|
||||
)
|
||||
|
||||
# can break anytime but hopefully the tiny speed gain will make up for it
|
||||
results = response.text[23 + len(query):]
|
||||
results = results[:results.rfind("{") - 1]
|
||||
results = json.loads(results)
|
||||
|
||||
for result in results:
|
||||
suggestions.append(result[0])
|
||||
|
||||
# cache response
|
||||
if ythdd_globals.config["general"]["cache"]:
|
||||
ythdd_globals.general_cache["search"].append(
|
||||
{
|
||||
"q": query.lower(),
|
||||
"pq": previous_query.lower(),
|
||||
"resp": suggestions
|
||||
}
|
||||
)
|
||||
|
||||
return {
|
||||
"query": query,
|
||||
"suggestions": suggestions
|
||||
}
|
||||
|
||||
def WEBgetVideoComments(ctoken: str) -> tuple:
|
||||
|
||||
# ctoken needs to be passed explicitly.
|
||||
# no guessing or retrieving it from globals.
|
||||
if ctoken is None:
|
||||
return [], ""
|
||||
|
||||
# build web context containing the relevant ctoken
|
||||
web_context = makeWebContext({"continuation": ctoken})
|
||||
response = requests.post('https://www.youtube.com/youtubei/v1/next',
|
||||
params={"prettyPrint": False},
|
||||
headers=stage2_headers,
|
||||
data=json.dumps(web_context)
|
||||
)
|
||||
|
||||
results = []
|
||||
try:
|
||||
results = json.loads(response.text)
|
||||
except:
|
||||
pass
|
||||
|
||||
comments = safeTraverse(results, ["frameworkUpdates", "entityBatchUpdate", "mutations"], default=[])
|
||||
comment_continuations = []
|
||||
comment_continuations_re = safeTraverse(results, ["onResponseReceivedEndpoints"], default=[])
|
||||
for received_endpoint in comment_continuations_re:
|
||||
|
||||
# this is horrible...
|
||||
|
||||
acia = safeTraverse(received_endpoint, ["appendContinuationItemsAction", "continuationItems"], default=[])
|
||||
rcic = safeTraverse(received_endpoint, ["reloadContinuationItemsCommand", "continuationItems"], default=[])
|
||||
|
||||
for entry in acia:
|
||||
if "commentThreadRenderer" in entry or "continuationItemRenderer" in entry:
|
||||
comment_continuations = acia
|
||||
break
|
||||
|
||||
for entry in rcic:
|
||||
if "commentThreadRenderer" in entry or "continuationItemRenderer" in entry:
|
||||
comment_continuations = rcic
|
||||
break
|
||||
|
||||
if comment_continuations != []:
|
||||
break
|
||||
|
||||
if comment_continuations == []:
|
||||
print("error: received an unknown comment structure, unable to parse continuations (replies)")
|
||||
# breakpoint()
|
||||
# return [], ""
|
||||
|
||||
# extract new continuation
|
||||
new_continuation = ""
|
||||
if "continuationItemRenderer" in safeTraverse(comment_continuations, [-1], default=[]):
|
||||
# first, look for ctoken inside of response for next page of comments
|
||||
new_continuation = safeTraverse(comment_continuations, [-1, "continuationItemRenderer", "continuationEndpoint", "continuationCommand", "token"], default=None)
|
||||
# or search elsewhere in case this is a reply thread
|
||||
if new_continuation is None:
|
||||
new_continuation = safeTraverse(comment_continuations, [-1, "continuationItemRenderer", "button", "buttonRenderer", "command", "continuationCommand", "token"], default="")
|
||||
|
||||
# perform a basic mutation check before parsing
|
||||
# will ignore replies liked by video uploader ("hearts")
|
||||
actual_comments = [x for x in comments if "properties" in safeTraverse(x, ["payload", "commentEntityPayload"], default=[], quiet=True)]
|
||||
actual_comment_continuations = [x for x in comment_continuations if "replies" in safeTraverse(x, ["commentThreadRenderer"], default=[], quiet=True)]
|
||||
|
||||
# link reply data (reply count and ctoken) for comments with replies
|
||||
for reply_renderer in actual_comment_continuations:
|
||||
|
||||
mutual_key = safeTraverse(reply_renderer, ["commentThreadRenderer", "commentViewModel", "commentViewModel", "commentKey"], default="unknown-key")
|
||||
reply_ctoken = safeTraverse(reply_renderer, ["commentThreadRenderer", "replies", "commentRepliesRenderer", "contents", 0, "continuationItemRenderer", "continuationEndpoint", "continuationCommand", "token"], default="")
|
||||
reply_count = safeTraverse(reply_renderer, ["commentThreadRenderer", "replies", "commentRepliesRenderer", "viewReplies", "buttonRenderer", "text", "runs", 0, "text"], default="0 replies").split(" ")[0]
|
||||
|
||||
# suspected a/b test. can be worked arount with on-demand ctoken creation.
|
||||
# workaround for yt not showing replies when sorting for "top" comments
|
||||
try:
|
||||
int(reply_count) # can be just "Replies"
|
||||
except:
|
||||
reply_count = "0"
|
||||
|
||||
for comment in actual_comments:
|
||||
found_key = safeTraverse(comment, ["entityKey"], default="unknown-key")
|
||||
# try to link a relevant ctoken if a comment has response
|
||||
if found_key == mutual_key:
|
||||
if ythdd_globals.config["general"]["debug"]: print(f"found reply for {found_key}")
|
||||
comment["replies"] = {
|
||||
"replyCount": int(reply_count),
|
||||
"continuation": reply_ctoken
|
||||
}
|
||||
|
||||
return actual_comments, new_continuation
|
||||
|
||||
def WEBextractPlaylist(plid: str = "", ctoken: str = ""):
|
||||
|
||||
# if ctoken has been provided, use it
|
||||
if ctoken:
|
||||
# playlist id can be omitted if ctoken is provided
|
||||
additional_context = {'continuation': ctoken}
|
||||
else:
|
||||
# try to create ctoken which will allow for accessing the full playlist, including delisted/deleted videos
|
||||
additional_context = {'continuation': ythdd_proto.producePlaylistContinuation(plid, offset=0)}
|
||||
|
||||
context = makeWebContext(additional_context)
|
||||
|
||||
response = requests.post(
|
||||
'https://www.youtube.com/youtubei/v1/browse?prettyPrint=false',
|
||||
headers = ythdd_globals.getHeaders(),
|
||||
json = context
|
||||
)
|
||||
|
||||
resp_json = json.loads(response.text)
|
||||
|
||||
# if this is a first-time fetch (no ctoken passed), extract metadata
|
||||
metadata = None
|
||||
if not ctoken:
|
||||
metadata = {
|
||||
"microformat": safeTraverse(resp_json, ["microformat"]),
|
||||
"sidebar": safeTraverse(resp_json, ["sidebar"])
|
||||
}
|
||||
|
||||
# extract continuation
|
||||
new_continuation = safeTraverse(resp_json, ["onResponseReceivedActions", 0, "appendContinuationItemsAction", "continuationItems", -1, "continuationItemRenderer", "continuationEndpoint", "continuationCommand", "token"])
|
||||
|
||||
# "best-effort" playlist's videos extraction
|
||||
# "best-effort" because None's (unsuccessful video extraction = None) are passed as they are
|
||||
videos = safeTraverse(resp_json, ["onResponseReceivedActions", 0, "appendContinuationItemsAction", "continuationItems"]) # includes continuation as last element of list, which will be ignored
|
||||
|
||||
return metadata, new_continuation, videos
|
||||
|
||||
|
||||
109
ythdd_globals.py
109
ythdd_globals.py
@@ -22,6 +22,8 @@ configfile = "config.toml"
|
||||
version = "0.0.1"
|
||||
apiVersion = "1"
|
||||
randomly_generated_passcode = 0
|
||||
video_cache = {}
|
||||
general_cache = {"search": [], "continuations": {"channels": {}, "comments": {}}, "channels": {}, "playlists": {}, "storyboards": {}, "hashed_videoplayback": {}}
|
||||
|
||||
def getConfig(configfile):
|
||||
|
||||
@@ -30,16 +32,16 @@ def getConfig(configfile):
|
||||
global randomly_generated_passcode
|
||||
|
||||
if not os.path.exists(configfile):
|
||||
dummy_config = {'general': {'db_file_path': 'ythdd_db.sqlite', 'video_storage_directory_path': 'videos/', 'is_proxied': False}, 'api': {'api_key': 'CHANGEME'}, 'extractor': {'user-agent': '', 'cookies_path': ''}, 'admin': {'admins': ['admin']}, 'yt_dlp': {}, 'postprocessing': {'presets': [{'name': 'recommended: [N][<=720p] best V+A', 'format': 'bv[height<=720]+ba', 'reencode': ''}, {'name': '[N][1080p] best V+A', 'format': 'bv[height=1080]+ba', 'reencode': ''}, {'name': '[R][1080p] webm', 'format': 'bv[height=1080]+ba', 'reencode': 'webm'}, {'name': '[N][720p] best V+A', 'format': 'bv[height=720]+ba', 'reencode': ''}, {'name': '[R][720p] webm', 'format': 'bv[height=720]+ba', 'reencode': 'webm'}, {'name': '[N][480p] best V+A', 'format': 'bv[height=480]+ba', 'reencode': ''}, {'name': '[480p] VP9 webm/reencode', 'format': 'bv*[height=480][ext=webm]+ba/bv[height=480]+ba', 'reencode': 'webm'}, {'name': '[N][1080p] best video only', 'format': 'bv[height=1080]', 'reencode': ''}, {'name': '[N][opus] best audio only', 'format': 'ba', 'reencode': 'opus'}]}}
|
||||
dummy_config = {'general': {'db_file_path': 'ythdd_db.sqlite', 'video_storage_directory_path': 'videos/', 'is_proxied': False, 'public_facing_url': 'http://127.0.0.1:5000/', 'debug': False, 'cache': True}, 'api': {'api_key': 'CHANGEME', 'enable_debugger_halt': False}, 'proxy': {'user-agent': '', 'allow_proxying_videos': True, 'match_initcwndbps': True}, 'extractor': {'user-agent': '', 'cookies_path': ''}, 'admin': {'admins': ['admin']}, 'yt_dlp': {}, 'postprocessing': {'presets': [{'name': 'recommended: [N][<=720p] best V+A', 'format': 'bv[height<=720]+ba', 'reencode': ''}, {'name': '[N][1080p] best V+A', 'format': 'bv[height=1080]+ba', 'reencode': ''}, {'name': '[R][1080p] webm', 'format': 'bv[height=1080]+ba', 'reencode': 'webm'}, {'name': '[N][720p] best V+A', 'format': 'bv[height=720]+ba', 'reencode': ''}, {'name': '[R][720p] webm', 'format': 'bv[height=720]+ba', 'reencode': 'webm'}, {'name': '[N][480p] best V+A', 'format': 'bv[height=480]+ba', 'reencode': ''}, {'name': '[480p] VP9 webm/reencode', 'format': 'bv*[height=480][ext=webm]+ba/bv[height=480]+ba', 'reencode': 'webm'}, {'name': '[N][1080p] best video only', 'format': 'bv[height=1080]', 'reencode': ''}, {'name': '[N][opus] best audio only', 'format': 'ba', 'reencode': 'opus'}]}}
|
||||
# if a passcode has not been provided by the user (config file doesn't exist, and user didn't specify it using an argument)
|
||||
print(f"{colors.WARNING}WARNING{colors.ENDC}: Using default, baked in config data. {colors.ENDL}"
|
||||
f"Consider copying and editing the provided example file ({colors.OKCYAN}config.default.toml{colors.ENDC}).")
|
||||
f" Consider copying and editing the provided example file ({colors.OKCYAN}config.default.toml{colors.ENDC}).")
|
||||
if randomly_generated_passcode == 0:
|
||||
# generate a pseudorandom one and use it in the temporary config
|
||||
randomly_generated_passcode = str(int(time.time() * 1337 % 899_999 + 100_000))
|
||||
|
||||
print(f"{colors.WARNING}WARNING{colors.ENDC}: Default config populated with one-time, insecure pseudorandom admin API key: {colors.OKCYAN}{randomly_generated_passcode}{colors.ENDC}."
|
||||
f" {colors.ENDL}The admin API key is not the Flask debugger PIN. You need to provide a config file for persistence!{colors.ENDL}")
|
||||
print(f"{colors.WARNING}WARNING{colors.ENDC}: Default config populated with one-time, insecure pseudorandom admin API key: {colors.OKCYAN}{randomly_generated_passcode}{colors.ENDC}.\n"
|
||||
f" The admin API key is not the Flask debugger PIN. You need to provide a config file for persistence!{colors.ENDL}")
|
||||
|
||||
dummy_config['api']['api_key_admin'] = randomly_generated_passcode
|
||||
return dummy_config
|
||||
@@ -54,5 +56,102 @@ def setConfig(configfile):
|
||||
#setConfig(configfile)
|
||||
config = {}
|
||||
|
||||
def getHeaders(caller="proxy"):
|
||||
|
||||
# NOTE: use ESR user-agent
|
||||
# user_agent = 'Mozilla/5.0 (Windows NT 10.0; rv:130.0) Gecko/20100101 Firefox/130.0'
|
||||
user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:144.0) Gecko/20100101 Firefox/144.0'
|
||||
|
||||
if config[caller]['user-agent']:
|
||||
user_agent = config[caller]['user-agent']
|
||||
|
||||
headers = {
|
||||
'User-Agent': user_agent,
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/png,image/svg+xml,*/*;q=0.8',
|
||||
'Accept-Language': 'en-US,en;q=0.5',
|
||||
'DNT': '1',
|
||||
'Sec-GPC': '1',
|
||||
'Connection': 'keep-alive',
|
||||
'Upgrade-Insecure-Requests': '1',
|
||||
'Sec-Fetch-Dest': 'document',
|
||||
'Sec-Fetch-Mode': 'navigate',
|
||||
'Sec-Fetch-Site': 'none',
|
||||
'Sec-Fetch-User': '?1',
|
||||
'Priority': 'u=0, i',
|
||||
'Pragma': 'no-cache',
|
||||
'Cache-Control': 'no-cache',
|
||||
}
|
||||
|
||||
return headers
|
||||
|
||||
def translateLinks(link: str, remove_params: bool = True):
|
||||
|
||||
link = link.replace("https://i.ytimg.com/", config['general']['public_facing_url'])
|
||||
link = link.replace("https://yt3.ggpht.com/", config['general']['public_facing_url'] + "ggpht/")
|
||||
link = link.replace("https://yt3.googleusercontent.com/", config['general']['public_facing_url'] + "guc/")
|
||||
|
||||
# try to remove tracking params
|
||||
if remove_params and "?" in link:
|
||||
link = link[:link.find("?")]
|
||||
|
||||
return link
|
||||
|
||||
def getUptime():
|
||||
return int(time.time()) - starttime
|
||||
return int(time.time()) - starttime
|
||||
|
||||
def safeTraverse(obj: dict, path: list, default=None, quiet: bool = False):
|
||||
"""
|
||||
Traverse dynamic objects with fallback to default values
|
||||
|
||||
This function can take an Ellipsis as part of traversal path,
|
||||
meaning that it will return the object from the list
|
||||
that contains the next key. This has been introduced
|
||||
so that no matter which object in a list holds the relevant
|
||||
model, it will find it (meaning no assumptions are necessary).
|
||||
Kepp in mind that only one ellipsis at a time is supported,
|
||||
thus ["some_key", ..., ..., "some_other_key"] won't work.
|
||||
|
||||
:param obj: Traversed object
|
||||
:type obj: dict
|
||||
:param path: Path which shall be traversed
|
||||
:type path: list
|
||||
:param default: Default value returned on failure
|
||||
:type default: any, None by default
|
||||
:param quiet: Quiet flag
|
||||
:type quiet: bool
|
||||
"""
|
||||
result = obj
|
||||
try:
|
||||
# for every item in path and its position
|
||||
for pos, iterable_key in enumerate(path):
|
||||
# if the key is not an ellipsis, traverse it
|
||||
if iterable_key is not Ellipsis:
|
||||
result = result[iterable_key]
|
||||
# if it is an ellipsis, and there is another key beside it
|
||||
elif pos < len(path) - 1:
|
||||
# then iterate through all of the list contents
|
||||
for list_content in result:
|
||||
# in search of the next traversal key
|
||||
if path[pos + 1] in list_content:
|
||||
result = list_content
|
||||
# show an error message if ellipsis is used incorrectly
|
||||
else:
|
||||
print("error(safeTraverse): Traversal path can't end with an Ellipsis!")
|
||||
raise TypeError()
|
||||
# handle exceptions
|
||||
except (KeyError, TypeError, IndexError):
|
||||
result = default
|
||||
if not quiet: print(f"error reading: {' -> '.join([str(x) for x in path])} - returning: {default}")
|
||||
finally:
|
||||
return result
|
||||
|
||||
def getCommit() -> str | None:
|
||||
try:
|
||||
return Repo(search_parent_directories=True).head.object.hexsha
|
||||
except Exception as e:
|
||||
return None
|
||||
|
||||
def print_debug(text: str) -> None:
|
||||
# Will print a string only if debugging is enabled.
|
||||
if config["general"]["debug"]:
|
||||
print(text)
|
||||
|
||||
996
ythdd_inv_tl.py
Normal file
996
ythdd_inv_tl.py
Normal file
@@ -0,0 +1,996 @@
|
||||
#!/usr/bin/python3
|
||||
# ythdd Invidious Translation Layer
|
||||
# -----
|
||||
# Translates requests sent through Invidious API at /api/invidious/
|
||||
# to use internal extractors.
|
||||
from flask import Response, request, redirect, url_for
|
||||
from markupsafe import escape
|
||||
from time import strftime, gmtime, time
|
||||
from ythdd_globals import safeTraverse
|
||||
from hashlib import md5
|
||||
import json, datetime
|
||||
import dateparser
|
||||
import html
|
||||
import invidious_formats
|
||||
import math
|
||||
import ythdd_globals
|
||||
import ythdd_api_v1
|
||||
import ythdd_extractor
|
||||
import ythdd_struct_builder
|
||||
import ythdd_struct_parser
|
||||
|
||||
# TODOs:
|
||||
# ----------
|
||||
# DONE:
|
||||
# [✓] /api/v1/stats (stats())
|
||||
# [✓] /streams/dQw4w9WgXcQ (does nothing)
|
||||
# [✓] /vi/:videoIdXXXX/maxresdefault.jpg
|
||||
# [✓] /api/v1/search?q=... (videos and playlists), pagination
|
||||
# [✓] /api/v1/search/suggestions?q=...&pq=...
|
||||
# [✓] /api/v1/channel/:ucid
|
||||
# [✓] /api/v1/channel/:ucid/videos, shorts, playlists, streams
|
||||
# [✓] /api/v1/comments/:videoid?continuation=...
|
||||
# [✓] /api/v1/videos/:videoIdXXXX
|
||||
# [✓] /api/v1/playlists/:plid
|
||||
# [✓] /api/v1/channel/{videos, shorts, playlists, streams, latest?}/:ucid (rewrite)
|
||||
# [✓] /api/v1/:videoIdXXXX/maxres.jpg redirects to best quality thumbnail
|
||||
# [✓] /api/v1/storyboards/:videoIdXXXX
|
||||
# ----------
|
||||
# PLANNED:
|
||||
# [X] /api/v1/videos/:videoIdXXXX does not depend on yt-dlp and offloads stream retrieval elsewhere (making initial response fast)
|
||||
# [X] /api/v1/manifest/:videoIdXXXX (above is prerequisite)
|
||||
# [X] rewrite the awful lookup logic
|
||||
# [X] /api/v1/search?q=... complex filtering options (https://gitea.invidious.io/iv-org/invidious/src/branch/master/src/invidious/search/filters.cr)
|
||||
# ----------
|
||||
# IDEAS:
|
||||
# [*] /api/v1/popular returns last requested videos by the IP (serving as multi-device history?)
|
||||
# [*] /api/v1/trending returns recently archived videos
|
||||
# [*] produce continuations instead of extracting them
|
||||
# ----------
|
||||
# NOT PLANNED/MAYBE IN THE FUTURE:
|
||||
# [ ] /api/v1/auth/subscriptions (stub? db?)
|
||||
# [ ] /api/v1/auth/feed?page=1 (stub? db?)
|
||||
# [ ] /api/v1/auth/playlists (stub? db?)
|
||||
|
||||
DEFAULT_AVATAR = "https://yt3.ggpht.com/a/default-user=s176-c-k-c0x00ffffff-no-rj"
|
||||
DEFAULT_VIDEO = "https://i.ytimg.com/img/no_thumbnail.jpg" # todo: replace this with a custom, local asset
|
||||
|
||||
def incrementBadRequests():
|
||||
ythdd_globals.apiFailedRequests += 1
|
||||
|
||||
def greeting():
|
||||
return 200, 'hello from Invidious TL!\nstats endpoint at /api/invidious/stats'
|
||||
|
||||
def send(status, response):
|
||||
return Response(json.dumps(response), mimetype='application/json', status=status)
|
||||
|
||||
def notImplemented(data):
|
||||
return send(501, {'error': f"not recognised/implemented in Invidious TL: {'/'.join(data)}"})
|
||||
|
||||
def stats():
|
||||
data_to_send = {
|
||||
"version": "2.0",
|
||||
"software":
|
||||
{
|
||||
"name": "invidious",
|
||||
"version": f"invidious TL, ythdd ({ythdd_globals.version})",
|
||||
"branch": "https://gitea.7o7.cx/sherl/ythdd",
|
||||
"tl_msg": "/api/invidious/api/v1/"
|
||||
}
|
||||
}
|
||||
return send(200, data_to_send)
|
||||
|
||||
def videoIdSanityCheck(videoId: str):
|
||||
if len(videoId) != 11:
|
||||
incrementBadRequests()
|
||||
return send(400, f'error: bad request. wrong videoId: {videoId} is {len(videoId)} characters long, but should be 11.')
|
||||
# elif...?
|
||||
|
||||
def auth(data):
|
||||
# can be either subscriptions, feed or playlists
|
||||
match data[1]:
|
||||
# NOT YET IMPLEMENTED
|
||||
# TODO: make it use the internal db
|
||||
case "subscriptions" | "feed" | "playlists":
|
||||
return send(200, [])
|
||||
case _:
|
||||
incrementBadRequests()
|
||||
return send(404, [])
|
||||
|
||||
def streams():
|
||||
return send(200, '')
|
||||
|
||||
def epochToDate(epoch):
|
||||
return strftime('%Y-%m-%dT%H:%M:%SZ', gmtime(epoch))
|
||||
|
||||
def dateToEpoch(date: str):
|
||||
return datetime.datetime.fromisoformat(date).timestamp()
|
||||
|
||||
def trending():
|
||||
return send(200, [{}])
|
||||
|
||||
def popular():
|
||||
return send(200, [{}])
|
||||
|
||||
def getError(wdata: dict):
|
||||
unknown_error = {"status": "Unknown error", "reason": "This is a generic ythdd error."}
|
||||
error = ""
|
||||
|
||||
try:
|
||||
playabilityStatus = safeTraverse(wdata, ['ec1', 'playabilityStatus'], default=unknown_error)
|
||||
# check for error
|
||||
if safeTraverse(playabilityStatus, ["status"], default="error") == "OK":
|
||||
return None # no error? return None
|
||||
error += f"({playabilityStatus['status']}) {playabilityStatus['reason']}"
|
||||
except:
|
||||
error += f"Generic error"
|
||||
|
||||
try:
|
||||
errorScreen = safeTraverse(wdata, ['ec1', 'playabilityStatus', 'errorScreen', 'playerErrorMessageRenderer', 'subreason', 'runs'], default=[])
|
||||
error += " - "
|
||||
for x in errorScreen:
|
||||
error += f"{x['text']} "
|
||||
except:
|
||||
pass
|
||||
|
||||
return error
|
||||
|
||||
def videos(data):
|
||||
# an attempt on a faithful rewrite of
|
||||
# https://github.com/iv-org/invidious/blob/master/src/invidious/videos/parser.cr
|
||||
response = {}
|
||||
|
||||
# hls_url = safeTraverse(idata, ['stage1', 'streamingData', 'hlsManifestUrl'], default="")
|
||||
# adaptive_formats = safeTraverse(idata, ['stage1', 'streamingData', 'adaptiveFormats'], default=[])
|
||||
# if not hls_url or not adaptive_formats:
|
||||
# print(f"serious error: couldn't get hls_url or adaptive_formats!\n"
|
||||
# f"dumping idata:\n"
|
||||
# f"{idata}")
|
||||
# return send(500, {'error': getError(idata)})
|
||||
|
||||
time_start = time()
|
||||
|
||||
if len(data) < 4 or len(data[3]) != 11:
|
||||
return send(400, {"error": "No valid video id."})
|
||||
|
||||
if ythdd_globals.config['general']['cache'] and data[3] in ythdd_globals.video_cache:
|
||||
if ythdd_globals.video_cache[data[3]]['cacheTime'] + 3 * 60 * 60 > time():
|
||||
response = ythdd_globals.video_cache[data[3]]
|
||||
response['fromCache'] = True
|
||||
return send(200, response)
|
||||
else:
|
||||
del ythdd_globals.video_cache[data[3]]
|
||||
|
||||
wdata = ythdd_extractor.WEBextractSinglePage(data[3])
|
||||
|
||||
age_restricted = False
|
||||
error = getError(wdata)
|
||||
if error is not None:
|
||||
if error.startswith("(LOGIN_REQUIRED)") and "inappropriate for some users" in error:
|
||||
# check if user provided age-gated cookies
|
||||
if ythdd_globals.config["extractor"]["age_restricted_cookies_path"]:
|
||||
ythdd_globals.print_debug(f"videos({data[3]}): using agegated cookies to bypass restriction")
|
||||
ydata = ythdd_extractor.extract(data[3], use_cookies="agegated")
|
||||
wdata = ythdd_extractor.WEBextractSinglePage(data[3], use_cookies="agegated")
|
||||
age_restricted = True
|
||||
else:
|
||||
# return error if no age-gated cookies are provided
|
||||
return send(500, {"status": "error", "error": error})
|
||||
else:
|
||||
# return error if it doesn't mention age restriction
|
||||
return send(500, {"status": "error", "error": error})
|
||||
else:
|
||||
ydata = ythdd_extractor.extract(data[3])
|
||||
|
||||
#return send(200, {'ydata': ydata, 'wdata': wdata})
|
||||
#return send(200, {'idata': idata, 'wdata': wdata})
|
||||
|
||||
main_results = wdata['ec2']['contents']['twoColumnWatchNextResults']
|
||||
primary_results = safeTraverse(main_results, ['results', 'results', 'contents'])
|
||||
# video_primary_renderer = safeTraverse(primary_results, [..., 'videoPrimaryInfoRenderer'])
|
||||
video_secondary_renderer = safeTraverse(primary_results, [..., 'videoSecondaryInfoRenderer'])
|
||||
|
||||
video_details = safeTraverse(wdata, ['ec1', 'videoDetails'])
|
||||
microformat = safeTraverse(wdata, ['ec1', 'microformat', 'playerMicroformatRenderer'], default={})
|
||||
|
||||
video_id = safeTraverse(video_details, ['videoId'], default=f"[{data[3]}] (errors occurred, check logs)")
|
||||
title = safeTraverse(video_details, ['title'], default=video_id)
|
||||
views = int(safeTraverse(video_details, ['viewCount'], default=0))
|
||||
length = int(safeTraverse(video_details, ['lengthSeconds'], default=1))
|
||||
published = dateToEpoch(safeTraverse(microformat, ['publishDate'], default="2000-01-01T00:00:00Z")) # ISO format to Unix timestamp
|
||||
published_date = epochToDate(published)
|
||||
premiere_timestamp = safeTraverse(microformat, ['liveBroadcastDetails', 'startTimestamp'], default=None) # let's ignore the nitty gritty for the time being
|
||||
premiere_timestamp = premiere_timestamp if premiere_timestamp else safeTraverse(microformat, ['playabilityStatus', 'liveStreamability', 'liveStreamabilityRenderer', 'offlineSlate', 'liveStreamOfflineSlateRenderer', 'scheduledStartTime'], default=None)
|
||||
live_now = safeTraverse(microformat, ['liveBroadcastDetails', 'isLiveNow'], default=False)
|
||||
post_live_dvr = safeTraverse(video_details, ['isPostLiveDvr'], default=False)
|
||||
allowed_regions = safeTraverse(microformat, ['availableCountries'], default=[])
|
||||
allow_ratings = safeTraverse(video_details, ['allowRatings'], default=True)
|
||||
family_friendly = safeTraverse(microformat, ['isFamilySafe'], default=True)
|
||||
is_listed = safeTraverse(video_details, ['isCrawlable'], default=True)
|
||||
is_upcoming = safeTraverse(video_details, ['isUpcoming'], default=False)
|
||||
keywords = safeTraverse(video_details, ['keywords'], default=[])
|
||||
|
||||
related_raw = safeTraverse(wdata, ['ec2', 'contents', 'twoColumnWatchNextResults', 'secondaryResults', 'secondaryResults', 'results'], default=[]) # can possibly change in the future
|
||||
related = []
|
||||
for entry in related_raw[:-1]:
|
||||
|
||||
related_entry = {}
|
||||
match safeTraverse(list(entry.keys()), [0], default=""):
|
||||
|
||||
case "compactVideoRenderer":
|
||||
# legacy renderer, a/b tested and later phased out in summer 2025
|
||||
continue
|
||||
|
||||
case "lockupViewModel":
|
||||
y = safeTraverse(entry, ['lockupViewModel'])
|
||||
if not isinstance(y, dict):
|
||||
continue
|
||||
if safeTraverse(y, ["contentType"], default="LOCKUP_CONTENT_TYPE_VIDEO") != "LOCKUP_CONTENT_TYPE_VIDEO":
|
||||
# neither mixes nor playlists are currently supported by the invidious api
|
||||
continue
|
||||
# note: this model is similar, but not identical to the one in ythdd_struct_parser. perhaps they can be both handled in the struct parser some time.
|
||||
lmvm = safeTraverse(y, ['metadata', 'lockupMetadataViewModel'], default=[])
|
||||
related_entry['videoId'] = safeTraverse(y, ['contentId'])
|
||||
related_entry['title'] = safeTraverse(lmvm, ['title', 'content'])
|
||||
related_entry['videoThumbnails'] = ythdd_struct_builder.genThumbs(related_entry['videoId']) #safeTraverse(y, ['thumbnail', 'thumbnails'])
|
||||
related_entry['author'] = safeTraverse(lmvm, ['metadata', 'contentMetadataViewModel', 'metadataRows', 0, 'metadataParts', 0, 'text', 'content'])
|
||||
related_entry['authorId'] = safeTraverse(lmvm, ['image', 'decoratedAvatarViewModel', 'rendererContext', 'commandContext', 'onTap', 'innertubeCommand', 'browseEndpoint', 'browseId'], default="UNKNOWNCHANNELID")
|
||||
related_entry['authorUrl'] = '/channel/' + related_entry['authorId']
|
||||
related_entry['authorVerified'] = False if safeTraverse(lmvm, ['metadata', 'contentMetadataViewModel', 'metadataRows', 0, 'metadataParts', 0, 'text', 'attachmentRuns']) is None else True # seens to do the job
|
||||
author_avatar_url = safeTraverse(lmvm, ['image', 'decoratedAvatarViewModel', 'avatar', 'avatarViewModel', 'image', 'sources', 0, 'url'], default=DEFAULT_AVATAR)
|
||||
related_entry['authorThumbnails'] = ythdd_extractor.generateChannelAvatarsFromUrl(author_avatar_url)
|
||||
related_entry['lengthSeconds'] = ythdd_struct_parser.parseLengthFromTimeBadge(safeTraverse(y, ['contentImage', 'thumbnailViewModel', 'overlays', 0, 'thumbnailOverlayBadgeViewModel', 'thumbnailBadges', 0, 'thumbnailBadgeViewModel', 'text'], default="0:0"))
|
||||
related_entry['viewCountText'] = safeTraverse(lmvm, ['metadata', 'contentMetadataViewModel', 'metadataRows', 1, 'metadataParts', 0, 'text', 'content'], default="0").split(" ")[0]
|
||||
related_entry['viewCount'] = ythdd_struct_parser.parseViewsFromViewText(related_entry['viewCountText'])
|
||||
|
||||
case _:
|
||||
# unsupported model: print info into stdout
|
||||
print("received an entry of unknown type during parsing of related videos:")
|
||||
print(entry)
|
||||
print("")
|
||||
continue
|
||||
|
||||
related.append(related_entry)
|
||||
|
||||
likes = safeTraverse(ydata, ['like_count'], default=0)
|
||||
description = safeTraverse(microformat, ['description', 'simpleText'], default="\n(ythdd: failed to retrieve description, perhaps it's empty?)")
|
||||
short_description = safeTraverse(wdata, ['ec1', 'videoDetails', 'shortDescription'], default="(ythdd: failed to retrieve short description, perhaps it's empty?)")
|
||||
description_html = html.escape(description).replace("\r\n", "<br>").replace("\n", "<br>") # still TODO: https://github.com/iv-org/invidious/blob/master/src/invidious/videos/parser.cr#L329
|
||||
|
||||
genre = safeTraverse(microformat, ['category'])
|
||||
# TODO: genre blah blah blah...
|
||||
author = safeTraverse(video_details, ['author'], default="Unknown Author")
|
||||
ucid = safeTraverse(video_details, ['channelId'], default="UNKNOWNCHANNELID")
|
||||
subs = ydata['channel_follower_count']
|
||||
author_thumbnail = safeTraverse(video_secondary_renderer, ['owner', 'videoOwnerRenderer', 'thumbnail', 'thumbnails', 0, 'url'])
|
||||
author_verified = ythdd_extractor.isVerified(safeTraverse(video_secondary_renderer, ['owner', 'videoOwnerRenderer', 'badges', 0], default=[]))
|
||||
if author_thumbnail is None:
|
||||
# there might be multiple authors (on a collaborative video)
|
||||
# if so, then try to extract first channel's (uploader's) avatar
|
||||
livm = safeTraverse(video_secondary_renderer, ["owner", "videoOwnerRenderer", "attributedTitle", "commandRuns", 0, "onTap", "innertubeCommand", "showDialogCommand", "panelLoadingStrategy", "inlineContent", "dialogViewModel", "customContent", "listViewModel", "listItems"], default=[])
|
||||
author_thumbnail = safeTraverse(livm, [0, "listItemViewModel", "leadingAccessory", "avatarViewModel", "image", "sources", 0, "url"], default=DEFAULT_AVATAR)
|
||||
author_verified = author_verified or safeTraverse(livm, [0, "listItemViewModel", "title", "attachmentRuns", 0, "element", "type", "imageType", "image", "sources", 0, "clientResource", "imageName"]) in ("AUDIO_BADGE", "CHECK_CIRCLE_FILLED")
|
||||
author_thumbnail = ythdd_extractor.generateChannelAvatarsFromUrl(author_thumbnail)
|
||||
|
||||
wdata_streams = safeTraverse(wdata, ["ec1", "streamingData"], default=[])
|
||||
adaptive_formats = []
|
||||
format_streams = []
|
||||
# adaptive_formats, format_streams = rebuildFormats(adaptive_formats)
|
||||
if not live_now:
|
||||
# adaptive_formats, format_streams = rebuildFormatsFromYtdlpApi(ydata)
|
||||
|
||||
initial_astreams_y = {} # itag is the key
|
||||
initial_fstreams_y = {} # same here
|
||||
initial_astreams_w = {}
|
||||
initial_fstreams_w = {}
|
||||
|
||||
for video_stream in ydata["formats"]:
|
||||
if video_stream["format_note"] in ("storyboard"):
|
||||
# ignore non-audio/video formats (e.g. storyboards)
|
||||
continue
|
||||
if video_stream["format_id"] == "18": # todo: do this dynamically
|
||||
initial_fstreams_y[int(video_stream["format_id"])] = video_stream
|
||||
elif video_stream["format_id"].isdigit():
|
||||
# filter out DRC audio
|
||||
initial_astreams_y[int(video_stream["format_id"])] = video_stream
|
||||
else:
|
||||
continue
|
||||
|
||||
# format streams
|
||||
for video_stream in wdata_streams["formats"]:
|
||||
initial_fstreams_w[video_stream["itag"]] = video_stream
|
||||
|
||||
# adaptive streams
|
||||
for audiovideo_stream in wdata_streams["adaptiveFormats"]:
|
||||
if not "isVb" in audiovideo_stream and not "isDrc" in audiovideo_stream:
|
||||
# skip DRC and VB formats
|
||||
initial_astreams_w[audiovideo_stream["itag"]] = audiovideo_stream
|
||||
|
||||
for itag in initial_astreams_y:
|
||||
if itag in initial_astreams_w:
|
||||
adaptive_formats.append(ythdd_struct_parser.parseAdaptiveStreams(initial_astreams_w[itag], initial_astreams_y[itag]))
|
||||
|
||||
for itag in initial_fstreams_y:
|
||||
if itag in initial_fstreams_w:
|
||||
format_streams.append( ythdd_struct_parser.parseFormatStreams( initial_fstreams_w[itag], initial_fstreams_y[itag]))
|
||||
|
||||
hls_url = None
|
||||
else:
|
||||
adaptive_formats, format_streams = [{"url": f"http://a/?expire={int(time_start + 5.9 * 60 * 60)}", "itag": "18", "type": "", "clen": "0", "lmt": "", "projectionType": "RECTANGULAR"}], [] # freetube/clipious shenanigans, see: https://github.com/FreeTubeApp/FreeTube/pull/5997 and https://github.com/lamarios/clipious/blob/b9e7885/lib/videos/models/adaptive_format.g.dart
|
||||
hls_url = safeTraverse(ydata, ["url"], default="ythdd: unable to retrieve stream url")
|
||||
|
||||
if age_restricted:
|
||||
if not adaptive_formats:
|
||||
adaptive_formats = [{"url": f"http://a/?expire={int(time_start + 5.9 * 60 * 60)}", "itag": "18", "type": "", "clen": "0", "lmt": "", "projectionType": "RECTANGULAR"}] # same as above
|
||||
|
||||
if live_now:
|
||||
video_type = "livestream"
|
||||
premiere_timestamp = published # ??? that works i guess
|
||||
elif premiere_timestamp:
|
||||
video_type = "scheduled"
|
||||
published = dateToEpoch(premiere_timestamp) if premiere_timestamp else int(time())
|
||||
else:
|
||||
video_type = "video"
|
||||
|
||||
premium = False
|
||||
if "YouTube Red" in keywords:
|
||||
premium = True
|
||||
# TODO: detect paywalled patron-only videos
|
||||
|
||||
# because we fetched the video's wdata, we might as
|
||||
# well save it inside of general cache so that
|
||||
# requests for the video's comments don't have to
|
||||
# spawn an additional request for initial ctoken
|
||||
ensure_comment_continuation(video_id, wdata)
|
||||
storyboards = []
|
||||
storyboards_extracted = ensure_storyboards(video_id, wdata, length=length)
|
||||
if storyboards_extracted:
|
||||
storyboards = ythdd_struct_builder.genStoryboards(video_id)
|
||||
|
||||
time_end = time()
|
||||
|
||||
response = {
|
||||
"type": video_type,
|
||||
"title": title,
|
||||
"videoId": video_id,
|
||||
"videoThumbnails": ythdd_struct_builder.genThumbs(video_id),
|
||||
"storyboards": storyboards,
|
||||
"description": description, # due to change (include ythdd metadata)
|
||||
"descriptionHtml": description_html,
|
||||
"published": published,
|
||||
"publishedText": published_date,
|
||||
"keywords": keywords,
|
||||
"viewCount": views,
|
||||
"viewCountText": str(views), # not implemented
|
||||
"likeCount": likes,
|
||||
"dislikeCount": 0,
|
||||
"paid": False, # not implemented
|
||||
"premium": premium,
|
||||
"isFamilyFriendly": family_friendly,
|
||||
"allowedRegions": allowed_regions,
|
||||
"genre": genre,
|
||||
"genreUrl": "/genreUrl/not/implemented/", # not implemented
|
||||
"author": author,
|
||||
"authorId": ucid,
|
||||
"authorUrl": "/channel/" + ucid,
|
||||
"authorVerified": author_verified,
|
||||
"authorThumbnails": author_thumbnail,
|
||||
"subCountText": str(subs),
|
||||
"lengthSeconds": length,
|
||||
"allowRatings": allow_ratings,
|
||||
"rating": 0,
|
||||
"isListed": is_listed,
|
||||
"liveNow": live_now,
|
||||
"isPostLiveDvr": post_live_dvr,
|
||||
"isUpcoming": is_upcoming,
|
||||
"dashUrl": ythdd_globals.config['general']['public_facing_url'] + "api/invidious/api/v1/manifest/" + video_id, # not implemented
|
||||
"premiereTimestamp": premiere_timestamp,
|
||||
"hlsUrl": hls_url, # broken after a change in iOS player, only usable for livestreams
|
||||
"adaptiveFormats": adaptive_formats, # same as hlsUrl
|
||||
"formatStreams": format_streams,
|
||||
"captions": [], # not implemented
|
||||
# "captions": [
|
||||
# {
|
||||
# "label": String,
|
||||
# "language_code": String,
|
||||
# "url": String
|
||||
# }
|
||||
# ],
|
||||
# "musicTracks": [
|
||||
# {
|
||||
# "song": String,
|
||||
# "artist": String,
|
||||
# "album": String,
|
||||
# "license": String
|
||||
# }
|
||||
# ],
|
||||
"recommendedVideos": related,
|
||||
"took": time_end - time_start
|
||||
}
|
||||
|
||||
if ythdd_globals.config['general']['debug']:
|
||||
response["ydata"] = ydata
|
||||
response["wdata"] = wdata
|
||||
|
||||
if ythdd_globals.config['general']['cache']:
|
||||
ythdd_globals.video_cache[data[3]] = response
|
||||
ythdd_globals.video_cache[data[3]]['cacheTime'] = time()
|
||||
|
||||
# for debugging:
|
||||
#return send(200, ythdd_extractor.WEBextractSinglePage(data[3]))
|
||||
#return send(200, ythdd_extractor.IOSextract(data[3]))
|
||||
#return send(200, {'idata': idata, 'wdata': wdata})
|
||||
|
||||
# if youtube returns not the videoId we aksed
|
||||
# then it means that the instance is ratelimited
|
||||
status_code = 200 if data[3] == response['videoId'] else 403
|
||||
|
||||
return send(status_code, response)
|
||||
|
||||
def search(data, req):
|
||||
search_query = req.args.get('q')
|
||||
|
||||
# ignore paginated requests as we do nothing with the continuation token
|
||||
page = req.args.get('page')
|
||||
if page is not None and page != '1':
|
||||
try:
|
||||
page = int(page)
|
||||
except:
|
||||
return send(400, {"error": "Wrong page."})
|
||||
else:
|
||||
page = None # when page is "1"
|
||||
|
||||
if (data[-2].lower() != "search" or data[-1].lower() != "") and data[-1].lower() != "search":
|
||||
previous_query = req.args.get('pq')
|
||||
suggestions = ythdd_extractor.WEBgetSearchSuggestions(search_query, previous_query)
|
||||
return send(200, suggestions)
|
||||
|
||||
results = ythdd_extractor.WEBextractSearchResults(search_query, page)
|
||||
results_list = []
|
||||
|
||||
for entry in results:
|
||||
parsed_entry = ythdd_struct_parser.parseRenderers(entry)
|
||||
if parsed_entry is not None:
|
||||
results_list.append(parsed_entry)
|
||||
|
||||
return send(200, results_list)
|
||||
|
||||
def get_channel_tab(requested_tab, ucid, req, only_json: bool = False):
|
||||
|
||||
# check for page/cont
|
||||
ctoken = req.args.get('continuation')
|
||||
|
||||
# perhaps continuation tokens should be checked here (whether they are inside of general_cache)
|
||||
# this way, malicious requests containing bogus ctokens can't be sent to potentially ban/ratelimit the instance (?)
|
||||
# if ctoken is not None and ctoken not in ythdd_globals.general_cache...
|
||||
|
||||
# unique req fingerprint allows for this exact query to be cached in memory.
|
||||
# md5 sum serves as a "unique" deterministic value which can be checked for cache hit/miss
|
||||
unique_request_fingerprint = md5(f"{ucid}_{requested_tab}_{ctoken}".encode('utf-8')).hexdigest()
|
||||
|
||||
# if we haven't discovered parameters required for browsing a specific tab,
|
||||
# then load them now
|
||||
if ucid not in ythdd_globals.general_cache["continuations"]["channels"]:
|
||||
channels(["", "", "", ucid], req, True)
|
||||
|
||||
# check if request has been cached within the last hour
|
||||
if ythdd_globals.config['general']['cache'] and unique_request_fingerprint in ythdd_globals.general_cache["channels"]:
|
||||
if ythdd_globals.general_cache["channels"][unique_request_fingerprint]['cacheTime'] + 1 * 60 * 60 > time():
|
||||
response = ythdd_globals.general_cache["channels"][unique_request_fingerprint]
|
||||
if only_json:
|
||||
return response
|
||||
else:
|
||||
return send(200, response)
|
||||
else:
|
||||
del ythdd_globals.general_cache["channels"][unique_request_fingerprint]
|
||||
|
||||
# load relevant data from global (general) cache
|
||||
param = safeTraverse(ythdd_globals.general_cache["continuations"]["channels"][ucid], ["tabs", requested_tab, "param"], default=None)
|
||||
name = safeTraverse(ythdd_globals.general_cache["continuations"]["channels"][ucid], ["name"], default="")
|
||||
avatar = safeTraverse(ythdd_globals.general_cache["continuations"]["channels"][ucid], ["avatar"], default=DEFAULT_AVATAR)
|
||||
verified = safeTraverse(ythdd_globals.general_cache["continuations"]["channels"][ucid], ["verified"], default=False)
|
||||
|
||||
# if provided, ctoken will be used for browsing as well
|
||||
wdata = ythdd_extractor.browseChannel(ucid, params=param, ctoken=ctoken)
|
||||
|
||||
# sanity check (whether we got what we requested)
|
||||
received_tab = safeTraverse(wdata, ["responseContext", "serviceTrackingParams", 0, "params", 0, "value"])
|
||||
if received_tab != f"channel.{requested_tab}":
|
||||
# if that's not the case, either something changed in the innertube API,
|
||||
# or content that was asked for isn't available
|
||||
print(f"INFO: couldn't verify server returned channel data we asked for. "
|
||||
f"Requested channel.{requested_tab}, got {received_tab}. Most likely we sent a request to Innertube which got rejected.")
|
||||
|
||||
# load requested tab
|
||||
result = {}
|
||||
if ctoken is None:
|
||||
tabs = safeTraverse(wdata, ["contents", "twoColumnBrowseResultsRenderer", "tabs"], default=[])
|
||||
for tab in tabs:
|
||||
tab_name = safeTraverse(tab, ["tabRenderer", "title"], default="").lower()
|
||||
# rewrite livestream tab for backwards compatibility with invidious (and clients like freetube)
|
||||
if tab_name == "live":
|
||||
tab_name = "streams"
|
||||
if tab_name and tab_name == requested_tab:
|
||||
result = safeTraverse(tab, ["tabRenderer", "content"], default=[])
|
||||
break
|
||||
|
||||
items = []
|
||||
inner_contents = []
|
||||
new_continuation = ""
|
||||
response = {}
|
||||
match requested_tab:
|
||||
case "videos" | "shorts" | "streams":
|
||||
# videos/shorts/livestreams have actually the same response schema,
|
||||
# only the renderers differ - but they are taken care of in ythdd_struct_parser.parseRenderers()
|
||||
|
||||
if ctoken is None:
|
||||
inner_contents = safeTraverse(result, ["richGridRenderer", "contents"], default=[])
|
||||
else:
|
||||
inner_contents = safeTraverse(wdata, ["onResponseReceivedActions", 0, "appendContinuationItemsAction", "continuationItems"], default=[])
|
||||
|
||||
for entry in inner_contents:
|
||||
# videos from videos tab have no owner info (?) or it's in another place. if it is somewhere, this expression can be made simpler by traversing something else in struct parser.
|
||||
item = safeTraverse(entry, ["richItemRenderer", "content"])
|
||||
if item is not None:
|
||||
items.append(ythdd_struct_parser.parseRenderers(item, {"author_name": name, "author_ucid": ucid, "avatar": avatar}))
|
||||
|
||||
new_continuation = safeTraverse(inner_contents, [-1, "continuationItemRenderer", "continuationEndpoint", "continuationCommand", "token"], default="")
|
||||
|
||||
response = {
|
||||
"videos": items
|
||||
}
|
||||
|
||||
if new_continuation:
|
||||
response["continuation"] = new_continuation
|
||||
|
||||
# cache response
|
||||
if ythdd_globals.config['general']['cache']:
|
||||
ythdd_globals.general_cache["channels"][unique_request_fingerprint] = response
|
||||
ythdd_globals.general_cache["channels"][unique_request_fingerprint]['cacheTime'] = time()
|
||||
|
||||
# todo: save continuation(?)
|
||||
# or... is there a usecase for saving it?
|
||||
|
||||
case "playlists":
|
||||
|
||||
# todo: replace this with an on-demand generated ctoken?
|
||||
if ctoken is None:
|
||||
inner_contents = safeTraverse(result, ["sectionListRenderer", "contents", 0, "itemSectionRenderer", "contents", 0, "gridRenderer", "items"], default=[])
|
||||
else:
|
||||
inner_contents = safeTraverse(wdata, ["onResponseReceivedActions", 0, "appendContinuationItemsAction", "continuationItems"], default=[])
|
||||
|
||||
for entry in inner_contents:
|
||||
item = ythdd_struct_parser.parseRenderers(entry, {"author_name": name, "author_ucid": ucid, "avatar": avatar})
|
||||
items.append(item)
|
||||
|
||||
new_continuation = safeTraverse(inner_contents, [-1, "continuationItemRenderer", "continuationEndpoint", "continuationCommand", "token"], default="")
|
||||
|
||||
response = {
|
||||
"playlists": items
|
||||
}
|
||||
|
||||
if new_continuation:
|
||||
response["continuation"] = new_continuation
|
||||
|
||||
# cache response
|
||||
if ythdd_globals.config['general']['cache']:
|
||||
ythdd_globals.general_cache["channels"][unique_request_fingerprint] = response
|
||||
ythdd_globals.general_cache["channels"][unique_request_fingerprint]['cacheTime'] = time()
|
||||
|
||||
case _:
|
||||
# for all other renderers, which aren't currently supported
|
||||
response = {
|
||||
# "wdata": wdata
|
||||
}
|
||||
|
||||
if ythdd_globals.config["general"]["debug"]:
|
||||
response["wdata"] = wdata
|
||||
|
||||
if only_json:
|
||||
return response
|
||||
|
||||
return send(200, response)
|
||||
|
||||
def get_comments(data, req, only_json: bool = False):
|
||||
|
||||
# get comment continuation
|
||||
ctoken = req.args.get('continuation')
|
||||
|
||||
# perform some basic video id validation
|
||||
if len(data) < 4 or len(data) >= 4 and len(data[3]) != 11:
|
||||
return send(400, {"error": "Bad request: invalid videoId."})
|
||||
|
||||
video_id = data[3]
|
||||
|
||||
# if ctoken isn't provided, get it from the general cache
|
||||
if ctoken is None or ctoken == '':
|
||||
# but first ensure it's there
|
||||
ensure_comment_continuation(video_id)
|
||||
ctoken = ythdd_globals.general_cache["continuations"]["comments"][video_id][0]
|
||||
|
||||
# get joined video comment models
|
||||
wdata, new_continuation = ythdd_extractor.WEBgetVideoComments(ctoken)
|
||||
|
||||
comments = []
|
||||
for comment in wdata:
|
||||
# parse the comment
|
||||
parsed_comment = ythdd_struct_parser.customCommentRendererParser(comment)
|
||||
if parsed_comment is not None:
|
||||
comments.append(parsed_comment)
|
||||
|
||||
response = {
|
||||
"videoId": video_id,
|
||||
"comments": comments,
|
||||
"continuation": new_continuation
|
||||
}
|
||||
|
||||
if only_json:
|
||||
return response
|
||||
|
||||
return send(200, response)
|
||||
|
||||
|
||||
def ensure_comment_continuation(video_id: str, wdata = None):
|
||||
|
||||
# save continutation token for comments in global comment cache
|
||||
if not video_id in ythdd_globals.general_cache["continuations"]["comments"]:
|
||||
ythdd_globals.general_cache["continuations"]["comments"][video_id] = []
|
||||
|
||||
if wdata is None:
|
||||
# perhaps saving related videos to cache might be not a bad idea?
|
||||
wdata = ythdd_extractor.WEBextractSinglePage(video_id)
|
||||
|
||||
# search for "top comments" continuation token
|
||||
# todo: replace this with on-demand continuation creation
|
||||
comment_continuation = safeTraverse(wdata, ["ec2", "engagementPanels", 0, "engagementPanelSectionListRenderer", "header", "engagementPanelTitleHeaderRenderer", "menu", "sortFilterSubMenuRenderer", "subMenuItems", 0, "serviceEndpoint", "continuationCommand", "token"], default=None)
|
||||
if comment_continuation is not None:
|
||||
ythdd_globals.general_cache["continuations"]["comments"][video_id].append(comment_continuation)
|
||||
else:
|
||||
print(f"error: couldn't extract comment continuation token from video page ({video_id}). this video likely has comments disabled.")
|
||||
ythdd_globals.general_cache["continuations"]["comments"][video_id].append("")
|
||||
|
||||
def ensure_storyboards(video_id: str, wdata = None, length = 60):
|
||||
# Returns True on successful extraction, False when it failed.
|
||||
|
||||
# Storyboards don't expire. They can be cached indefinitely.
|
||||
if not video_id in ythdd_globals.general_cache["storyboards"]:
|
||||
ythdd_globals.general_cache["storyboards"][video_id] = None
|
||||
|
||||
if wdata is None:
|
||||
wdata = ythdd_extractor.WEBextractSinglePage(video_id)
|
||||
|
||||
# get storyboard template string
|
||||
storyboards = None
|
||||
storyboard_template = safeTraverse(wdata, ["ec1", "storyboards", "playerStoryboardSpecRenderer", "spec"], default=None)
|
||||
# silly sanity check, todo: do a regex one instead?
|
||||
if isinstance(storyboard_template, str):
|
||||
# sample storyboard template url structure, indented for readability
|
||||
# https://i.ytimg.com/sb/:videoId/storyboard3_L$L/$N.jpg?sqp=b64encodedprotobuf
|
||||
# | 48 # 27 # 100 # 10 # 10 # 0 # default # rs$datadatadatadatadatadatadatadatada
|
||||
# | 80 # 45 # 55 # 10 # 10 # 1000 # M$M # rs$datadatadatadatadatadatadatadatada
|
||||
# | 160 # 90 # 55 # 5 # 5 # 1000 # M$M # rs$datadatadatadatadatadatadatadatada
|
||||
# | 320 # 180 # 55 # 3 # 3 # 1000 # M$M # rs$datadatadatadatadatadatadatadatada
|
||||
# ^ width, height, thumb_count, columns, rows, interval, $N, sigh parameter. $L is just the index of a given storyboard, say, 0 for $N=default
|
||||
|
||||
# try to extract data from the storyboard template
|
||||
try:
|
||||
base_url, *formats = storyboard_template.split("|")
|
||||
|
||||
extracted_formats = []
|
||||
for index, fmt in enumerate(formats):
|
||||
fmt = fmt.split("#")
|
||||
width = int(fmt[0])
|
||||
height = int(fmt[1])
|
||||
count = int(fmt[2])
|
||||
columns = int(fmt[3])
|
||||
rows = int(fmt[4])
|
||||
interval = int(fmt[5])
|
||||
name = fmt[6]
|
||||
sigh = fmt[7]
|
||||
|
||||
thumbs_per_image = columns * rows
|
||||
images_count = math.ceil(count / thumbs_per_image)
|
||||
interval = interval if interval != 0 else int((length / count) * 1000) # calculated only for $N=default as it's the only one that has interval=0
|
||||
|
||||
extracted_formats.append({
|
||||
"index": index,
|
||||
"width": width,
|
||||
"height": height,
|
||||
"thumb_count": count,
|
||||
"columns": columns,
|
||||
"rows": rows,
|
||||
"interval": interval,
|
||||
"name": name,
|
||||
"sigh": sigh,
|
||||
"images_count": images_count
|
||||
})
|
||||
|
||||
storyboards = {
|
||||
"template_url": ythdd_globals.translateLinks(base_url, remove_params=False), # NOT removing params is crucial, otherwise sqp will be dropped!
|
||||
"formats": extracted_formats
|
||||
}
|
||||
|
||||
ythdd_globals.general_cache["storyboards"][video_id] = storyboards
|
||||
return True
|
||||
except:
|
||||
print("error(ensure_storyboards): storyboard template url layout changed. please update ythdd for latest storyboard extraction fixes.")
|
||||
return False
|
||||
else:
|
||||
print(f"error(ensure_storyboards: couldn't extract storyboards from video page ({video_id}). this video won't have storyboards.")
|
||||
return False
|
||||
|
||||
|
||||
def channels(data, req, only_json: bool = False):
|
||||
|
||||
# prevent potential out of bound read
|
||||
if len(data) < 4:
|
||||
return send(400, {"error": "No channel specified."})
|
||||
|
||||
if len(data) > 4 and len(data[4]) == 24 and data[4].startswith("UC"):
|
||||
# reversed order (/api/v1/api/invidious/channels/videos/UC...)
|
||||
data[3], data[4] = data[4], data[3]
|
||||
# silly sanity check
|
||||
if len(data[3]) != 24 or not data[3].startswith("UC"):
|
||||
# typical order (/api/v1/api/invidious/channels/UC.../videos)
|
||||
return send(404, {"error": "This channel does not exist."})
|
||||
|
||||
if len(data) > 4:
|
||||
match data[4]:
|
||||
case "videos" | "shorts" | "playlists" | "podcasts" | "streams":
|
||||
return get_channel_tab( data[4], data[3], req)
|
||||
case "live":
|
||||
return get_channel_tab("streams", data[3], req)
|
||||
case "latest":
|
||||
return get_channel_tab( "videos", data[3], req)
|
||||
case _:
|
||||
return send(400, {"error": f"Bad request, unrecognized/unsupported tab \"{data[4]}\"."})
|
||||
|
||||
wdata = ythdd_extractor.browseChannel(data[3])
|
||||
channel_meta = safeTraverse(wdata, ["metadata", "channelMetadataRenderer"])
|
||||
banners = safeTraverse(wdata, ["header", "pageHeaderRenderer", "content", "pageHeaderViewModel", "banner", "imageBannerViewModel", "image", "sources"], default=[])
|
||||
avatar = safeTraverse(wdata, ["header", "pageHeaderRenderer", "content", "pageHeaderViewModel", "image", "decoratedAvatarViewModel", "avatar", "avatarViewModel", "image", "sources", 0, "url"], default=DEFAULT_AVATAR)
|
||||
subscribers = ythdd_struct_parser.parseViewsFromViewText(safeTraverse(wdata, ["header", "pageHeaderRenderer", "content", "pageHeaderViewModel", "metadata", "contentMetadataViewModel", "metadataRows", 1, "metadataParts", 0, "text", "content"], default="0"))
|
||||
verified = False # to be replaced later with ythdd_extractor.isVerified(...)
|
||||
|
||||
author_name = safeTraverse(channel_meta, ["title"], default="Unknown Channel")
|
||||
author_ucid = safeTraverse(channel_meta, ["externalId"], default=data[3]) # prevent recursion with fallback to provided ucid
|
||||
|
||||
ythdd_globals.general_cache["continuations"]["channels"][author_ucid] = {
|
||||
"avatar": avatar,
|
||||
"name": author_name,
|
||||
"tabs": {},
|
||||
"verified": verified
|
||||
}
|
||||
tabs = safeTraverse(wdata, ["contents", "twoColumnBrowseResultsRenderer", "tabs"], default=[])
|
||||
tab_names = []
|
||||
for tab in tabs:
|
||||
# collect tab names
|
||||
tab_name = safeTraverse(tab, ["tabRenderer", "title"], default="").lower()
|
||||
if tab_name:
|
||||
# same as in get_channel_tab
|
||||
if tab_name == "live":
|
||||
tab_name = "streams"
|
||||
tab_names.append(tab_name)
|
||||
# and their params (used to retrieve data about them)
|
||||
ythdd_globals.general_cache["continuations"]["channels"][author_ucid]["tabs"][tab_name] = dict()
|
||||
ythdd_globals.general_cache["continuations"]["channels"][author_ucid]["tabs"][tab_name]["param"] = safeTraverse(tab, ["tabRenderer", "endpoint", "browseEndpoint", "params"], default=None)
|
||||
|
||||
latest_videos = get_channel_tab("videos", data[3], req, only_json=True)
|
||||
|
||||
for banner in banners:
|
||||
banner["url"] = ythdd_globals.translateLinks(banner["url"])
|
||||
avatars = ythdd_extractor.generateChannelAvatarsFromUrl(avatar)
|
||||
|
||||
response = {
|
||||
"author": author_name,
|
||||
"authorId": author_ucid,
|
||||
"authorUrl": "https://www.youtube.com/channel/" + author_ucid,
|
||||
"authorBanners": banners,
|
||||
"authorThumbnails": avatars,
|
||||
"subCount": subscribers,
|
||||
"totalViews": 0,
|
||||
"joined": 0,
|
||||
"autoGenerated": False, # todo: actually check this
|
||||
"ageGated": False,
|
||||
"isFamilyFriendly": safeTraverse(channel_meta, ["isFamilySafe"], default=False),
|
||||
"description": safeTraverse(channel_meta, ["description"], default="ythdd: no channel description"),
|
||||
"descriptionHtml": html.escape(safeTraverse(channel_meta, ["description"], default="ythdd: no channel description (html)")).replace("\r\n", "<br>").replace("\n", "<br>"),
|
||||
"allowedRegions": safeTraverse(channel_meta, ["availableCountryCodes"], default=[]),
|
||||
"tabs": tab_names,
|
||||
"tags": [safeTraverse(channel_meta, ["keywords"], default="")],
|
||||
"authorVerified": verified,
|
||||
"latestVideos": latest_videos["videos"], # using struct parser
|
||||
"relatedChannels": []
|
||||
}
|
||||
|
||||
if ythdd_globals.config["general"]["debug"]:
|
||||
response["wdata"] = wdata
|
||||
|
||||
# todo: cache response
|
||||
|
||||
if only_json:
|
||||
return response
|
||||
|
||||
return send(200, response)
|
||||
|
||||
def playlists(data, req, only_json: bool = False):
|
||||
|
||||
# read playlist id and sanity check
|
||||
if len(data) < 4:
|
||||
return send(400, {"error": "No playlist specified."})
|
||||
|
||||
# todo: make clipious stop spamming requests for paginated response
|
||||
page = req.args.get('page')
|
||||
# if page is not None and page != '1':
|
||||
# return send(404, {"error": "Paginated queries are not supported."})
|
||||
|
||||
plid = data[3]
|
||||
|
||||
# no info about what length can playlists be
|
||||
# settle for a basic sanity check instead
|
||||
# if len(plid) not in (18, 34, 36):
|
||||
if not (plid.startswith("PL") or plid.startswith("OL")):
|
||||
response = {"error": "Only standard playlists are currently supported (no mixes, video-based playlists, etc.)"}
|
||||
if only_json:
|
||||
return response
|
||||
return send(400, response)
|
||||
|
||||
# check if request has been cached within the last hour
|
||||
if ythdd_globals.config['general']['cache'] and plid in ythdd_globals.general_cache['playlists']:
|
||||
if ythdd_globals.general_cache['playlists'][plid]['cacheTime'] + 1 * 60 * 60 > time():
|
||||
response = ythdd_globals.general_cache['playlists'][plid].copy()
|
||||
if page is not None and page != '1':
|
||||
response['videos'] = []
|
||||
if only_json:
|
||||
return response
|
||||
else:
|
||||
return send(200, response)
|
||||
else:
|
||||
del ythdd_globals.general_cache['playlists'][plid]
|
||||
|
||||
# browse the playlist iteratively, first fetch is without any continuation
|
||||
all_unparsed_videos = []
|
||||
meta, new_continuation, videos = ythdd_extractor.WEBextractPlaylist(plid=plid)
|
||||
if isinstance(videos, list):
|
||||
all_unparsed_videos = videos.copy()
|
||||
while new_continuation != None:
|
||||
# fetch subsequent playlist videos
|
||||
_, new_continuation, videos = ythdd_extractor.WEBextractPlaylist(ctoken=new_continuation)
|
||||
if videos is not None:
|
||||
all_unparsed_videos.extend(videos)
|
||||
|
||||
# process videos
|
||||
parsed_videos = []
|
||||
for video in all_unparsed_videos:
|
||||
parsed_video = ythdd_struct_parser.parseRenderers(video)
|
||||
if parsed_video is not None:
|
||||
parsed_videos.append(parsed_video)
|
||||
|
||||
# process metadata
|
||||
primary_sidebar = safeTraverse(meta, ["sidebar", "playlistSidebarRenderer", "items", 0, "playlistSidebarPrimaryInfoRenderer"], default={})
|
||||
secondary_sidebar = safeTraverse(meta, ["sidebar", "playlistSidebarRenderer", "items", 1, "playlistSidebarSecondaryInfoRenderer"], default={})
|
||||
|
||||
# apparently fields can be stored inside of simpleText one time, only to be stored inside of runs another time
|
||||
title = ythdd_struct_parser.extractTextFromSimpleOrRuns(safeTraverse(primary_sidebar, ["title"]), default="Unknown playlist title")
|
||||
playlist_thumb = ythdd_globals.translateLinks(safeTraverse(primary_sidebar, ["thumbnailRenderer", "playlistVideoThumbnailRenderer", "thumbnail", "thumbnails", -1, "url"], default=DEFAULT_VIDEO))
|
||||
author = safeTraverse(secondary_sidebar, ["videoOwner", "videoOwnerRenderer", "title", "runs", 0, "text"], default="Unknown channel")
|
||||
author_ucid = safeTraverse(secondary_sidebar, ["videoOwner", "videoOwnerRenderer", "title", "runs", 0, "navigationEndpoint", "browseEndpoint", "browseId"], default="UNKNOWNCHANNELID")
|
||||
author_avatars = ythdd_extractor.generateChannelAvatarsFromUrl(safeTraverse(secondary_sidebar, ["videoOwner", "videoOwnerRenderer", "thumbnail", "thumbnails", 0, "url"], default=DEFAULT_AVATAR))
|
||||
description = safeTraverse(meta, ["microformat", "microformatDataRenderer", "description"], default="(ythdd: failed to retrieve description, perhaps it's empty?)")
|
||||
if author_ucid == "UNKNOWNCHANNELID" and author == "Unknown channel":
|
||||
# most likely a collaborative playlist
|
||||
author = safeTraverse(secondary_sidebar, ["videoOwner", "videoOwnerRenderer", "title", "simpleText"], default="by Unknown user and others").removeprefix("by ")
|
||||
author_orig = author[:author.rfind(" and ")]
|
||||
description += f"\n(ythdd: This is a collaborative playlist by \"{author_orig}\" {author.removeprefix(author_orig + ' ')}. You can't view the authors' channels.)"
|
||||
author = author_orig
|
||||
|
||||
description_html = html.escape(description).replace("\r\n", "<br>").replace("\n", "<br>")
|
||||
video_count = ythdd_struct_parser.parseViewsFromViewText(ythdd_struct_parser.extractTextFromSimpleOrRuns(safeTraverse(primary_sidebar, ["stats", 0]), default="No videos"))
|
||||
view_count = ythdd_struct_parser.parseViewsFromViewText(ythdd_struct_parser.extractTextFromSimpleOrRuns(safeTraverse(primary_sidebar, ["stats", 1]), default="No views"))
|
||||
updated = ythdd_struct_parser.extractTextFromSimpleOrRuns(safeTraverse(primary_sidebar, ["stats", 2]), default="2000-01-01").removeprefix("Last updated on ").removeprefix("Updated ")
|
||||
updated = int(dateparser.parse(updated).timestamp())
|
||||
is_unlisted = safeTraverse(primary_sidebar, ["badges", 0, "metadataBadgeRenderer", "icon", "iconType"], default="PRIVACY_LISTED") == "PRIVACY_UNLISTED" # this needs further research https://gitea.invidious.io/iv-org/invidious/src/commit/325e013e0d9e5670fa0df7635ff30a0ee029e05e/src/invidious/playlists.cr#L133
|
||||
|
||||
response = {
|
||||
"type": "playlist",
|
||||
"title": title,
|
||||
"playlistId": plid,
|
||||
"playlistThumbnail": playlist_thumb,
|
||||
"author": author,
|
||||
"authorId": author_ucid,
|
||||
"authorUrl": "/channel/" + author_ucid,
|
||||
"subtitle": None, # todo?
|
||||
"authorThumbnails": author_avatars,
|
||||
"description": description,
|
||||
"descriptionHtml": description_html,
|
||||
"videoCount": video_count,
|
||||
"viewCount": view_count,
|
||||
"updated": updated,
|
||||
"isListed": not is_unlisted,
|
||||
"videos": parsed_videos
|
||||
}
|
||||
|
||||
# todo: cache videos and metadata separately, so that paginated queries can be supported as well
|
||||
if ythdd_globals.config['general']['cache']:
|
||||
ythdd_globals.general_cache['playlists'][plid] = response.copy()
|
||||
ythdd_globals.general_cache['playlists'][plid]['cacheTime'] = time()
|
||||
|
||||
if page is not None or page == '1':
|
||||
response['videos'] = []
|
||||
|
||||
if only_json:
|
||||
return response
|
||||
|
||||
return send(200, response)
|
||||
|
||||
def storyboards(data, req):
|
||||
|
||||
height = req.args.get("height")
|
||||
width = req.args.get("width")
|
||||
video_id = data[3]
|
||||
|
||||
try:
|
||||
height = int(height)
|
||||
except:
|
||||
pass
|
||||
try:
|
||||
width = int(width)
|
||||
except:
|
||||
pass
|
||||
|
||||
resp = ythdd_struct_builder.genWebvttStoryboard(video_id, width, height)
|
||||
|
||||
return Response(resp, mimetype="text/vtt", status=200)
|
||||
|
||||
def lookup(data, req):
|
||||
# possibly TODO: rewrite this mess
|
||||
if len(data) > 2:
|
||||
if (data[0], data[1]) == ("api", "v1"):
|
||||
match data[2]:
|
||||
case 'stats' | '': # /api/invidious/api/v1/stats and /api/invidious/api/v1/
|
||||
return stats()
|
||||
case 'trending':
|
||||
return trending()
|
||||
case 'popular':
|
||||
return popular()
|
||||
case 'videos':
|
||||
return videos(data)
|
||||
case 'auth':
|
||||
return auth(data)
|
||||
case 'search':
|
||||
return search(data, req)
|
||||
case 'channels':
|
||||
return channels(data, req)
|
||||
case 'comments':
|
||||
return get_comments(data, req)
|
||||
case 'playlists':
|
||||
return playlists(data, req)
|
||||
case 'storyboards':
|
||||
return storyboards(data, req)
|
||||
case _:
|
||||
incrementBadRequests()
|
||||
return notImplemented(data)
|
||||
elif data[0] in ('ggpht', 'vi'):
|
||||
# for some reason the Materialous client
|
||||
# and FreeTube keep making requests to these
|
||||
if data[1] in ('ggpht', 'vi'):
|
||||
return redirect('/' + "/".join(data[1:]))
|
||||
return redirect('/' + "/".join(data[0:]))
|
||||
else:
|
||||
incrementBadRequests()
|
||||
return notImplemented(data)
|
||||
elif len(data) == 2:
|
||||
if (data[0], data[1]) == ("api", "v1"): # /api/invidious/api/v1
|
||||
return stats()
|
||||
elif data[0] == "streams":
|
||||
return streams()
|
||||
elif data[0] in ('ggpht', 'vi'):
|
||||
return redirect('/' + "/".join(data[0:]))
|
||||
else:
|
||||
incrementBadRequests()
|
||||
return notImplemented(data)
|
||||
|
||||
elif len(data) == 1:
|
||||
if data[0] == "videoplayback":
|
||||
return redirect(url_for('videoplaybackProxy', **req.args))
|
||||
return stats() # /api/invidious/something
|
||||
96
ythdd_proto.py
Normal file
96
ythdd_proto.py
Normal file
@@ -0,0 +1,96 @@
|
||||
from ythdd_globals import safeTraverse
|
||||
import base64
|
||||
import blackboxprotobuf as bbpb
|
||||
import json
|
||||
import urllib.parse
|
||||
import ythdd_globals
|
||||
|
||||
def bbpbToB64(msg_and_typedef: tuple, urlsafe: bool = False, padding: bool = False) -> str:
|
||||
encoded_protobuf = bbpb.encode_message(*msg_and_typedef)
|
||||
if urlsafe:
|
||||
b64_protobuf = base64.urlsafe_b64encode(encoded_protobuf)
|
||||
else:
|
||||
b64_protobuf = base64.b64encode(encoded_protobuf)
|
||||
if padding:
|
||||
url_encoded_b64 = urllib.parse.quote(b64_protobuf.decode())
|
||||
else:
|
||||
url_encoded_b64 = b64_protobuf.decode().rstrip('=')
|
||||
return url_encoded_b64
|
||||
|
||||
def fdictToBbpb(msg: dict) -> tuple:
|
||||
# Requires Python 3.7+ or CPython 3.6+,
|
||||
# as these versions preserve dictionary insertion order.
|
||||
# Structural matching (match, case) requires Python 3.10+.
|
||||
clean_msg = {}
|
||||
clean_type = {}
|
||||
for key in msg:
|
||||
num, type = key.split(":")
|
||||
|
||||
match type:
|
||||
case "message":
|
||||
# if the type is an embedded message
|
||||
internal_msg, internal_type = fdictToBbpb(msg[key])
|
||||
# msg can just be appended as usual
|
||||
clean_msg[num] = internal_msg
|
||||
# type contains more fields than normally
|
||||
clean_type[num] = {
|
||||
'field_order': list(internal_msg.keys()),
|
||||
'message_typedef': internal_type,
|
||||
'type': type
|
||||
}
|
||||
|
||||
case "base64" | "base64u" | "base64p" | "base64up":
|
||||
# if the type is a base64-embedded message
|
||||
internal_msg, internal_type = fdictToBbpb(msg[key])
|
||||
match type.removeprefix("base64"):
|
||||
case "":
|
||||
b64_encoded_msg = bbpbToB64((internal_msg, internal_type))
|
||||
case "u":
|
||||
b64_encoded_msg = bbpbToB64((internal_msg, internal_type), urlsafe=True)
|
||||
case "p":
|
||||
b64_encoded_msg = bbpbToB64((internal_msg, internal_type), padding=True)
|
||||
case "up":
|
||||
b64_encoded_msg = bbpbToB64((internal_msg, internal_type), urlsafe=True, padding=True)
|
||||
clean_msg[num] = b64_encoded_msg
|
||||
clean_type[num] = {'type': 'string'}
|
||||
|
||||
case "int" | "string":
|
||||
clean_msg[num] = msg[key]
|
||||
clean_type[num] = {'type': type}
|
||||
|
||||
case _:
|
||||
raise KeyError(f'error(fmsgToBBPBTuple): invalid key "{type}"')
|
||||
|
||||
|
||||
return (clean_msg, clean_type)
|
||||
|
||||
def producePlaylistContinuation(plid: str, offset: int = 0) -> str:
|
||||
msge = {
|
||||
'80226972:message': {
|
||||
'2:string': f'VL{plid}',
|
||||
'3:base64': {
|
||||
'1:int': int(offset / 100),
|
||||
'15:string': f'PT:{bbpbToB64(fdictToBbpb({"1:int": offset}))}',
|
||||
'104:message': {
|
||||
'1:int': 0
|
||||
}
|
||||
},
|
||||
'35:string': plid
|
||||
}
|
||||
}
|
||||
|
||||
bbpb_dicts = fdictToBbpb(msge)
|
||||
b64_ctoken = bbpbToB64(bbpb_dicts, urlsafe=True, padding=True)
|
||||
|
||||
return b64_ctoken
|
||||
|
||||
def produceSearchParams(page: int = 1) -> str:
|
||||
msge = {
|
||||
"9:int": 20 * (page - 1), # pagination
|
||||
"30:int": 1 # no self-harm censorship
|
||||
}
|
||||
|
||||
bbpb_dicts = fdictToBbpb(msge)
|
||||
b64_params = bbpbToB64(bbpb_dicts, urlsafe=True, padding=True)
|
||||
|
||||
return b64_params
|
||||
93
ythdd_struct_builder.py
Normal file
93
ythdd_struct_builder.py
Normal file
@@ -0,0 +1,93 @@
|
||||
from ythdd_globals import safeTraverse
|
||||
import ythdd_globals
|
||||
|
||||
def genThumbs(videoId: str):
|
||||
|
||||
result = []
|
||||
thumbnails = [
|
||||
{'height': 720, 'width': 1280, 'quality': "maxres", 'url': "maxres"}, # will always attempt to return the best quality available
|
||||
{'height': 720, 'width': 1280, 'quality': "maxresdefault", 'url': "maxresdefault"},
|
||||
{'height': 480, 'width': 640, 'quality': "sddefault", 'url': "sddefault"},
|
||||
{'height': 360, 'width': 480, 'quality': "high", 'url': "hqdefault"},
|
||||
{'height': 180, 'width': 320, 'quality': "medium", 'url': "mqdefault"},
|
||||
{'height': 90, 'width': 120, 'quality': "default", 'url': "default"},
|
||||
{'height': 90, 'width': 120, 'quality': "start", 'url': "1"},
|
||||
{'height': 90, 'width': 120, 'quality': "middle", 'url': "2"},
|
||||
{'height': 90, 'width': 120, 'quality': "end", 'url': "3"},
|
||||
]
|
||||
|
||||
for x in thumbnails:
|
||||
width = x['width']
|
||||
height = x['height']
|
||||
quality = x['quality']
|
||||
url = ythdd_globals.config['general']['public_facing_url'] + 'vi/' + videoId + '/' + x['url'] + '.jpg'
|
||||
result.append({'quality': quality, 'url': url, 'width': width, 'height': height})
|
||||
|
||||
return result
|
||||
|
||||
def genStoryboards(video_id: str) -> list:
|
||||
# generates storyboards inside of /api/v1/videos/:video_id
|
||||
storyboards = []
|
||||
cached_storyboards = safeTraverse(ythdd_globals.general_cache["storyboards"], [video_id], default=[])
|
||||
for sb in cached_storyboards["formats"]:
|
||||
built_storyboard = {
|
||||
"url": f"/api/v1/storyboards/{video_id}?width={sb['width']}&height={sb['height']}",
|
||||
"templateUrl": cached_storyboards['template_url'].replace("$L", str(sb['index'])).replace("$N", sb['name']) + f"&sigh={sb['sigh']}",
|
||||
"width": sb['width'],
|
||||
"height": sb['height'],
|
||||
"count": sb['thumb_count'],
|
||||
"interval": sb['interval'],
|
||||
"storyboardWidth": sb['columns'],
|
||||
"storyboardHeight": sb['rows'],
|
||||
"storyboardCount": sb['images_count']
|
||||
}
|
||||
storyboards.append(built_storyboard)
|
||||
|
||||
return storyboards
|
||||
|
||||
def msToWebvttTimestamp(time: int):
|
||||
|
||||
ms = time % 1000
|
||||
time //= 1000
|
||||
|
||||
hours = time // (60 * 60)
|
||||
time -= hours * 60 * 60
|
||||
minutes = time // 60
|
||||
time -= minutes * 60
|
||||
seconds = time
|
||||
timestamp = f"{str(hours).zfill(2)}:{str(minutes).zfill(2)}:{str(seconds).zfill(2)}.{str(ms).zfill(3)}"
|
||||
|
||||
return timestamp
|
||||
|
||||
def genWebvttStoryboard(video_id: str, width: int = None, height: int = None):
|
||||
# generates WebVTT storyboards for /api/v1/storyboards/:video_id
|
||||
webvtt = "WEBVTT\n\n"
|
||||
cached_storyboards = safeTraverse(ythdd_globals.general_cache["storyboards"], [video_id])
|
||||
if cached_storyboards is None:
|
||||
return ""
|
||||
found_storyboard = {}
|
||||
|
||||
for sb in cached_storyboards["formats"]:
|
||||
if width is not None and width == sb['width']:
|
||||
found_storyboard = sb
|
||||
if height is not None and height == sb['height']:
|
||||
found_storyboard = sb
|
||||
|
||||
# could be changed
|
||||
if not found_storyboard:
|
||||
found_storyboard = cached_storyboards["formats"][0]
|
||||
|
||||
start = 0
|
||||
thumbs_per_sb = sb['columns'] * sb['rows']
|
||||
xx = 0
|
||||
yy = 0
|
||||
for x in range(found_storyboard["thumb_count"]):
|
||||
xx = x % sb['columns']
|
||||
yy = (x // sb['rows']) % sb['rows']
|
||||
xywh = f"#xywh={xx * sb['width']},{yy * sb['height']},{sb['width']},{sb['height']}"
|
||||
webvtt += f"{msToWebvttTimestamp(start)} --> {msToWebvttTimestamp(start + found_storyboard['interval'])}\n"
|
||||
webvtt += cached_storyboards['template_url'].replace("$L", str(sb['index'])).replace("$N", sb['name']).replace("$M", str(x // (thumbs_per_sb))) + f"&sigh={sb['sigh']}{xywh}\n"
|
||||
webvtt += "\n"
|
||||
start += found_storyboard['interval']
|
||||
|
||||
return webvtt
|
||||
618
ythdd_struct_parser.py
Normal file
618
ythdd_struct_parser.py
Normal file
@@ -0,0 +1,618 @@
|
||||
from html import escape
|
||||
from invidious_formats import FORMATS
|
||||
from ythdd_globals import safeTraverse
|
||||
import json
|
||||
import dateparser
|
||||
import ythdd_globals
|
||||
import ythdd_extractor
|
||||
import ythdd_struct_builder
|
||||
|
||||
DEFAULT_AVATAR = "https://yt3.ggpht.com/a/default-user=s176-c-k-c0x00ffffff-no-rj"
|
||||
|
||||
def doesContainNumber(string: str, numeric_system: int = 10) -> bool:
|
||||
try:
|
||||
number = int(string, numeric_system)
|
||||
return True
|
||||
except ValueError:
|
||||
return False
|
||||
raise BaseException("doesContainNumber(): Unknown error while determining if a string contains a number")
|
||||
|
||||
def parseLengthFromTimeBadge(time_str: str) -> int:
|
||||
# Returns 0 if unsuccessful
|
||||
length = 0
|
||||
time_lookup_list = [1, 60, 3_600, 86_400]
|
||||
time_list = time_str.split(":")
|
||||
if False in map(doesContainNumber, time_list): # works around ['LIVE'] for livestreams or ['Upcoming'] for scheduled videos
|
||||
pass
|
||||
else:
|
||||
for z in range(len(time_list)):
|
||||
length += time_lookup_list[z] * int(time_list[len(time_list) - 1 - z])
|
||||
return length
|
||||
|
||||
def parseViewsFromViewText(viewcounttext: str) -> int:
|
||||
# Returns 0 if unsuccessful
|
||||
views = 0
|
||||
magnitude = {'K': 1_000, 'M': 1_000_000, 'B': 1_000_000_000}
|
||||
if viewcounttext:
|
||||
if viewcounttext.lower() == "no":
|
||||
viewcounttext = "0"
|
||||
views = float("0" + "".join([z for z in viewcounttext if 48 <= ord(z) and ord(z) <= 57 or ord(z) == 46]))
|
||||
viewcounttext = viewcounttext.split(" ")[0]
|
||||
for x in magnitude.keys():
|
||||
if x == viewcounttext[-1].upper():
|
||||
views *= magnitude[x]
|
||||
return int(views)
|
||||
|
||||
def parseRenderers(entry: dict, context: dict = {}) -> dict:
|
||||
|
||||
if not isinstance(entry, dict):
|
||||
raise ValueError("parsed entry is not of type dict")
|
||||
|
||||
match safeTraverse(list(entry.keys()), [0], default=""):
|
||||
|
||||
case "videoRenderer": # represents a video
|
||||
# as of october 2025 slowly phased out in favor of lockupViewModel(?)
|
||||
|
||||
published_date = safeTraverse(entry, ["videoRenderer", "publishedTimeText", "simpleText"], default="now")
|
||||
published_date = published_date.removeprefix("Streamed ")
|
||||
description, description_html = parseDescriptionSnippet(safeTraverse(entry, ["videoRenderer", "descriptionSnippet", "runs"], default=[]))
|
||||
collaborative = False
|
||||
|
||||
if "author_name" in context:
|
||||
author_name = context["author_name"]
|
||||
else:
|
||||
author_name = safeTraverse(entry, ["videoRenderer", "ownerText", "runs", 0, "text"], default="Unknown author")
|
||||
|
||||
if "author_ucid" in context:
|
||||
author_ucid = context["author_ucid"]
|
||||
else:
|
||||
author_ucid = safeTraverse(entry, ["videoRenderer", "ownerText", "runs", 0, "navigationEndpoint", "browseEndpoint", "browseId"], default="UNKNOWNCHANNELID")
|
||||
if author_ucid == "UNKNOWNCHANNELID":
|
||||
# this is a first indicator that a video is a collaborative (has multiple authors)
|
||||
# if that's the case, let's take the first author's ucid as the ucid
|
||||
collaborative = safeTraverse(entry, ["videoRenderer", "ownerText", "runs", 0, "navigationEndpoint", "showDialogCommand", "panelLoadingStrategy", "inlineContent", "dialogViewModel", "header", "dialogHeaderViewModel", "headline", "content"]) == "Collaborators"
|
||||
|
||||
if "verified" in context:
|
||||
verified = context["verified"]
|
||||
else:
|
||||
verified = ythdd_extractor.isVerified(safeTraverse(entry, ["videoRenderer", "ownerBadges", 0]))
|
||||
|
||||
if "avatar" in context:
|
||||
avatar_url = context["avatar"]
|
||||
else:
|
||||
avatar_url = safeTraverse(entry, ["videoRenderer", "avatar", "decoratedAvatarViewModel", "avatar", "avatarViewModel", "image", "sources", 0, "url"], default=DEFAULT_AVATAR)
|
||||
|
||||
views_or_viewers_model = safeTraverse(entry, ["videoRenderer", "viewCountText"], default={})
|
||||
if "simpleText" in views_or_viewers_model:
|
||||
# means this is a video with X views
|
||||
view_count = parseViewsFromViewText(entry["videoRenderer"]["viewCountText"]["simpleText"])
|
||||
view_count_text = entry["videoRenderer"]["viewCountText"]["simpleText"]
|
||||
elif "runs" in views_or_viewers_model:
|
||||
# means this is a livestream with X concurrent viewers
|
||||
view_count = parseViewsFromViewText(entry["videoRenderer"]["viewCountText"]["runs"][0]["text"] + " watching")
|
||||
view_count_text = entry["videoRenderer"]["viewCountText"]["runs"][0]["text"] + " watching"
|
||||
else:
|
||||
# unknown model, assume no views
|
||||
view_count = 0
|
||||
view_count_text = "Unknown amount of views"
|
||||
|
||||
if collaborative:
|
||||
livm = safeTraverse(entry, ["videoRenderer", "ownerText", "runs", 0, "navigationEndpoint", "showDialogCommand", "panelLoadingStrategy", "inlineContent", "dialogViewModel", "customContent", "listViewModel", "listItems"], default=[])
|
||||
if "author_name" not in context:
|
||||
# override the default "name1 and others" or "name1 and name2" text
|
||||
# with full author info
|
||||
all_authors = []
|
||||
for collaborative_author in livm:
|
||||
collaborative_author_name = safeTraverse(collaborative_author, ["listItemViewModel", "title", "content"])
|
||||
if collaborative_author_name is not None:
|
||||
all_authors.append(collaborative_author_name)
|
||||
if all_authors != []: # check if custom extraction succeeded
|
||||
author_name = ", ".join(all_authors)
|
||||
if author_ucid == "UNKNOWNCHANNELID":
|
||||
# retrieve main author's ucid
|
||||
author_ucid = safeTraverse(livm, [0, "listItemViewModel", "title", "commandRuns", 0, "onTap", "innertubeCommand", "browseEndpoint", "browseId"], default="UNKNOWNCHANNELID")
|
||||
if safeTraverse(entry, ["videoRenderer", "ownerBadges", 0]) is None:
|
||||
# check if the main author is verified
|
||||
verified = False
|
||||
if safeTraverse(livm, [0, "listItemViewModel", "title", "attachmentRuns", 0, "element", "type", "imageType", "image", "sources", 0, "clientResource", "imageName"]) in ("AUDIO_BADGE", "CHECK_CIRCLE_FILLED"):
|
||||
verified = True
|
||||
if avatar_url == DEFAULT_AVATAR:
|
||||
# retrieve the main channel's avatar
|
||||
avatar_url = safeTraverse(livm, [0, "listItemViewModel", "leadingAccessory", "avatarViewModel", "image", "sources", 0, "url"], default=DEFAULT_AVATAR)
|
||||
|
||||
ythdd_globals.print_debug("videoRenderer fired")
|
||||
return {
|
||||
"type": "video",
|
||||
"title": safeTraverse(entry, ["videoRenderer", "title", "runs", 0, "text"]),
|
||||
"videoId": safeTraverse(entry, ["videoRenderer", "videoId"]),
|
||||
"author": author_name,
|
||||
"authorId": author_ucid,
|
||||
"authorUrl": "/channel/" + author_ucid,
|
||||
"authorVerified": verified, # TODO
|
||||
"authorThumbnails": ythdd_extractor.generateChannelAvatarsFromUrl(avatar_url),
|
||||
"videoThumbnails": ythdd_struct_builder.genThumbs(safeTraverse(entry, ["videoRenderer", "videoId"], default="unknown")),
|
||||
"description": description,
|
||||
"descriptionHtml": description_html,
|
||||
"viewCount": view_count,
|
||||
"viewCountText": view_count_text,
|
||||
"published": int(dateparser.parse(published_date).timestamp()), # sadly best we can do, invidious does this too
|
||||
"publishedText": published_date,
|
||||
"lengthSeconds": parseLengthFromTimeBadge(safeTraverse(entry, ["videoRenderer", "lengthText", "simpleText"], default="0:0")),
|
||||
"liveNow": False,
|
||||
"premium": ythdd_extractor.isPremium(safeTraverse(entry, ["videoRenderer", "badges", 0])), # will fail if it's not the only badge
|
||||
"isUpcoming": False,
|
||||
"isNew": False,
|
||||
"is4k": False,
|
||||
"is8k": False,
|
||||
"isVr180": False,
|
||||
"isVr360": False,
|
||||
"is3d": False,
|
||||
"hasCaptions": False
|
||||
}
|
||||
|
||||
# modify the premiere timestamp afterwards here?
|
||||
|
||||
case "lockupViewModel": # represents playlists/mixes (and videos since october 2025)
|
||||
# related videos lvms are handled in ythdd_inv_tl.videos()
|
||||
|
||||
lvm = entry["lockupViewModel"]
|
||||
playlist_type = safeTraverse(lvm, ["contentImage", "collectionThumbnailViewModel", "primaryThumbnail", "thumbnailViewModel", "overlays", 0, "thumbnailOverlayBadgeViewModel", "thumbnailBadges", 0, "thumbnailBadgeViewModel", "icon", "sources", 0, "clientResource", "imageName"], default="")
|
||||
|
||||
if playlist_type == "MIX":
|
||||
# mixes aren't currently supported
|
||||
return
|
||||
|
||||
if not playlist_type:
|
||||
# struct represents a video
|
||||
ythdd_globals.print_debug("lockupViewModel fired (not a playlist). this is an a/b test; any following errors stem from it.")
|
||||
|
||||
lmvm = safeTraverse(lvm, ['metadata', 'lockupMetadataViewModel'], default={})
|
||||
video_id = safeTraverse(lvm, ['contentId'])
|
||||
|
||||
author_name = safeTraverse(context, ["author_name"], default="Unknown author")
|
||||
author_ucid = safeTraverse(context, ["author_ucid"], default="UNKNOWNCHANNELID")
|
||||
verified = safeTraverse(context, ["verified"], default=False) # TODO: check if this can be retrieved here
|
||||
avatar_url = safeTraverse(context, ["avatar"], default=DEFAULT_AVATAR)
|
||||
|
||||
title = safeTraverse(lmvm, ["title", "content"], default="No title")
|
||||
video_metadata = safeTraverse(lmvm, ["metadata", "contentMetadataViewModel", "metadataRows", 0, "metadataParts"], default=[])
|
||||
view_count_text = safeTraverse(video_metadata, [0, "text", "content"], default="0 views")
|
||||
published_date = safeTraverse(video_metadata, [1, "text", "content"], default="now")
|
||||
length_text = safeTraverse(lvm, ["contentImage", "thumbnailViewModel", "overlays", ..., "thumbnailBottomOverlayViewModel", "badges", -1, "thumbnailBadgeViewModel", "text"], default="0:0")
|
||||
view_count = parseViewsFromViewText(view_count_text)
|
||||
length = parseLengthFromTimeBadge(length_text)
|
||||
|
||||
resp = {
|
||||
"type": "video",
|
||||
"title": title,
|
||||
"videoId": video_id,
|
||||
"author": author_name,
|
||||
"authorId": author_ucid,
|
||||
"authorUrl": "/channel/" + author_ucid,
|
||||
"authorVerified": verified, # TODO
|
||||
"authorThumbnails": ythdd_extractor.generateChannelAvatarsFromUrl(avatar_url),
|
||||
"videoThumbnails": ythdd_struct_builder.genThumbs(video_id),
|
||||
"description": "", # can't be retrieved from lockupViewModel
|
||||
"descriptionHtml": "",
|
||||
"viewCount": view_count,
|
||||
"viewCountText": view_count_text,
|
||||
"published": int(dateparser.parse(published_date).timestamp()), # sadly best we can do, invidious does this too
|
||||
"publishedText": published_date,
|
||||
"lengthSeconds": length,
|
||||
"liveNow": False, # can't be live if it's in creator's video feed
|
||||
"premium": False, # todo: check this
|
||||
"isUpcoming": False,
|
||||
"isNew": False,
|
||||
"is4k": False,
|
||||
"is8k": False,
|
||||
"isVr180": False,
|
||||
"isVr360": False,
|
||||
"is3d": False,
|
||||
"hasCaptions": False
|
||||
}
|
||||
return resp
|
||||
|
||||
# struct represents a playlist
|
||||
meta = safeTraverse(lvm, ["metadata"], default=[])
|
||||
lmvm = safeTraverse(meta, ["lockupMetadataViewModel", "metadata", "contentMetadataViewModel", "metadataRows"], default=[])
|
||||
thumbnail = safeTraverse(lvm, ["contentImage", "collectionThumbnailViewModel", "primaryThumbnail", "thumbnailViewModel", "image", "sources", -1, "url"], default="no-url?")
|
||||
thumbnail = ythdd_globals.translateLinks(thumbnail[:thumbnail.rfind("?")])
|
||||
verified = safeTraverse(context, ["verified"], default=False)
|
||||
|
||||
playlist_id = safeTraverse(lvm, ["contentId"], default="UNKNOWNPLAYLISTID")
|
||||
length = safeTraverse(lvm, ["contentImage", "collectionThumbnailViewModel", "primaryThumbnail", "thumbnailViewModel", "overlays", 0, "thumbnailOverlayBadgeViewModel", "thumbnailBadges", 0, "thumbnailBadgeViewModel", "text"], default="0 videos")
|
||||
length = parseViewsFromViewText(length.split(" ")[0])
|
||||
|
||||
# Turns out for some responses we do have some data, while not on others.
|
||||
# Data from context should be prioritized, thus even if something is found with safeTraverse,
|
||||
# the parser will ignore it in favour of the context.
|
||||
ucid = safeTraverse(lmvm, [0, "metadataParts", 0, "text", "commandRuns", 0, "onTap", "innertubeCommand", "browseEndpoint", "browseId"], default="UNKNOWNCHANNELID")
|
||||
author = safeTraverse(lmvm, [0, "metadataParts", 0, "text", "content"], default="ythdd: unknown author")
|
||||
ucid = safeTraverse(context, ["author_ucid"], default=ucid)
|
||||
author = safeTraverse(context, ["author_name"], default=author)
|
||||
|
||||
ythdd_globals.print_debug("lockupViewModel fired (playlist)")
|
||||
return {
|
||||
"type": "playlist",
|
||||
"title": safeTraverse(meta, ["lockupMetadataViewModel", "title", "content"], default="ythdd: unknown title"),
|
||||
"playlistId": playlist_id,
|
||||
"playlistThumbnail": thumbnail,
|
||||
"author": author,
|
||||
"authorId": ucid,
|
||||
"authorUrl": "/channel/" + ucid,
|
||||
"authorVerified": verified,
|
||||
"videoCount": length,
|
||||
"videos": [] # provided for historical reasons i guess
|
||||
}
|
||||
|
||||
case "shelfRenderer": # "people also watched"
|
||||
return
|
||||
|
||||
case "gridShelfViewModel": # shorts?
|
||||
return
|
||||
|
||||
case "shortsLockupViewModel": # shorts on channel pages
|
||||
|
||||
video_id = safeTraverse(entry, ["shortsLockupViewModel", "onTap", "innertubeCommand", "reelWatchEndpoint", "videoId"], default="UnknownVideoId")
|
||||
title = safeTraverse(entry, ["shortsLockupViewModel", "overlayMetadata", "primaryText", "content"], default="ythdd: couldn't find title")
|
||||
views_text = safeTraverse(entry, ["shortsLockupViewModel", "overlayMetadata", "secondaryText", "content"], default="No views")
|
||||
|
||||
published_date = "No data about published time" # the view model doesn't provide data about the date a short is published
|
||||
|
||||
if video_id == "UnknownVideoId": # failsafe
|
||||
video_id = safeTraverse(entry, ["shortsLockupViewModel", "entityId"], default="-UnknownVideoId")
|
||||
video_id = video_id[video_id.rfind("-") + 1:]
|
||||
|
||||
if "author_name" in context:
|
||||
author_name = context["author_name"]
|
||||
else:
|
||||
author_name = "Unknown author"
|
||||
|
||||
if "author_ucid" in context:
|
||||
author_ucid = context["author_ucid"]
|
||||
else:
|
||||
author_ucid = "UNKNOWNCHANNELID"
|
||||
|
||||
if "verified" in context:
|
||||
verified = context["verified"]
|
||||
else:
|
||||
verified = False
|
||||
|
||||
if "avatar" in context:
|
||||
avatar_url = context["avatar"]
|
||||
else:
|
||||
avatar_url = "unknown"
|
||||
|
||||
ythdd_globals.print_debug("shortsLockupViewModel fired")
|
||||
return {
|
||||
"type": "video",
|
||||
"title": title,
|
||||
"videoId": video_id,
|
||||
"author": author_name,
|
||||
"authorId": author_ucid,
|
||||
"authorUrl": "/channel/" + author_ucid,
|
||||
"authorVerified": False,
|
||||
"videoThumbnails": ythdd_struct_builder.genThumbs(video_id),
|
||||
"description": "",
|
||||
"descriptionHtml": "",
|
||||
"viewCount": parseViewsFromViewText(views_text),
|
||||
"viewCountText": views_text,
|
||||
"published": int(0),
|
||||
"publishedText": published_date,
|
||||
"lengthSeconds": int(60), # invidious locks this to 60s no matter what the actual duration is
|
||||
"liveNow": False,
|
||||
"premium": False,
|
||||
"isUpcoming": False,
|
||||
"premiereTimestamp": 0,
|
||||
"isNew": False,
|
||||
"is4k": False,
|
||||
"is8k": False,
|
||||
"isVr180": False,
|
||||
"isVr360": False,
|
||||
"is3d": False,
|
||||
"hasCaptions": False
|
||||
}
|
||||
|
||||
case "gridVideoRenderer": # videos on channel pages
|
||||
|
||||
# doesn't work on Yattee
|
||||
# thumbnails = safeTraverse(entry, ["gridVideoRenderer", "thumbnail", "thumbnails"], default=[])
|
||||
# for thumbnail in thumbnails:
|
||||
# thumbnail["url"] = ythdd_globals.translateLinks(thumbnail["url"])
|
||||
|
||||
video_id = safeTraverse(entry, ["gridVideoRenderer", "videoId"], default="UnknownVideoId")
|
||||
thumbnails = ythdd_struct_builder.genThumbs(video_id)
|
||||
|
||||
published_date = safeTraverse(entry, ["gridVideoRenderer", "publishedTimeText", "simpleText"], default="now")
|
||||
published_date = published_date.removeprefix("Streamed ")
|
||||
|
||||
ythdd_globals.print_debug("gridVideoRenderer fired")
|
||||
return {
|
||||
"type": "video",
|
||||
"title": safeTraverse(entry, ["gridVideoRenderer", "title", "simpleText"], default="unknown video title"),
|
||||
"videoId": video_id,
|
||||
"author": context["author_name"],
|
||||
"authorId": context["author_ucid"],
|
||||
"authorUrl": "/channel/" + context["author_ucid"],
|
||||
"authorVerified": False, # TODO: handle badge related tasks here using context
|
||||
"videoThumbnails": thumbnails,
|
||||
"description": "", # won't work without using an RSS feed (?)
|
||||
"descriptionHtml": "", # -||-
|
||||
"viewCount": parseViewsFromViewText(safeTraverse(entry, ["gridVideoRenderer", "viewCountText", "simpleText"], default="0 views")),
|
||||
"viewCountText": safeTraverse(entry, ["gridVideoRenderer", "shortViewCountText", "simpleText"], default="0 views"),
|
||||
"published": int(dateparser.parse(published_date).timestamp()),
|
||||
"publishedText": published_date,
|
||||
"lengthSeconds": parseLengthFromTimeBadge(safeTraverse(entry, ["gridVideoRenderer", "thumbnailOverlays", 0, "thumbnailOverlayTimeStatusRenderer", "text", "simpleText"], default="0:0")),
|
||||
"liveNow": True if published_date == "now" else False,
|
||||
"premium": False,
|
||||
"isUpcoming": False,
|
||||
"isNew": False,
|
||||
"is4k": False,
|
||||
"is8k": False,
|
||||
"isVr180": False,
|
||||
"isVr360": False,
|
||||
"is3d": False,
|
||||
"hasCaptions": False
|
||||
}
|
||||
|
||||
case "channelRenderer": # channels in search results
|
||||
|
||||
avatars = ythdd_extractor.generateChannelAvatarsFromUrl(safeTraverse(entry, ["channelRenderer", "thumbnail", "thumbnails", 0, "url"], default=DEFAULT_AVATAR))
|
||||
description, description_html = parseDescriptionSnippet(safeTraverse(entry, ["channelRenderer", "descriptionSnippet", "runs"], default=[]))
|
||||
isVerified = ythdd_extractor.isVerified(safeTraverse(entry, ["channelRenderer", "ownerBadges", 0], default=[]))
|
||||
|
||||
ythdd_globals.print_debug("channelRenderer fired")
|
||||
return {
|
||||
"type": "channel",
|
||||
"author": safeTraverse(entry, ["channelRenderer", "title", "simpleText"], default="Unknown channel"),
|
||||
"authorId": safeTraverse(entry, ["channelRenderer", "channelId"], default="UNKNOWNCHANNELID"),
|
||||
"authorUrl": "/channel/" + safeTraverse(entry, ["channelRenderer", "channelId"], default="UNKNOWNCHANNELID"),
|
||||
"authorVerified": isVerified,
|
||||
"authorThumbnails": avatars,
|
||||
"autoGenerated": False,
|
||||
"subCount": parseViewsFromViewText(safeTraverse(entry, ["channelRenderer", "videoCountText", "simpleText"], default="0 subscribers")),
|
||||
"videoCount": 0,
|
||||
"channelHandle": safeTraverse(entry, ["channelRenderer", "navigationEndpoint", "browseEndpoint", "canonicalBaseUrl"], default="/@ythdd_unknown_handle")[1:],
|
||||
"description": description,
|
||||
"descriptionHtml": description_html
|
||||
}
|
||||
|
||||
case "playlistVideoRenderer":
|
||||
# used by all content inside of playlists which have at least one non-shorts video/livestream
|
||||
|
||||
video_id = safeTraverse(entry, ["playlistVideoRenderer", "videoId"], default="UnknownVideoId")
|
||||
title = safeTraverse(entry, ["playlistVideoRenderer", "title", "runs", 0, "text"], default="Unknown video title")
|
||||
author_ucid = safeTraverse(entry, ["playlistVideoRenderer", "shortBylineText", "runs", 0, "navigationEndpoint", "browseEndpoint", "browseId"])
|
||||
author_name = safeTraverse(entry, ["playlistVideoRenderer", "shortBylineText", "runs", 0, "text"], default="Unknown author")
|
||||
video_index = int(safeTraverse(entry, ["playlistVideoRenderer", "index", "simpleText"], default="1")) - 1
|
||||
length = parseLengthFromTimeBadge(safeTraverse(entry, ["playlistVideoRenderer", "lengthText", "simpleText"], default="0:0"))
|
||||
published_date = safeTraverse(entry, ["playlistVideoRenderer", "videoInfo", "runs", -1, "text"], default="2000-01-01")
|
||||
published_date = published_date.removeprefix("Streamed ").removeprefix(" watching")
|
||||
|
||||
# handle livestreams
|
||||
if not published_date:
|
||||
published_date = "now"
|
||||
|
||||
if author_ucid is None:
|
||||
# likely a collaborative video, let's try
|
||||
# to fetch the uploader's ucid with that in mind
|
||||
livm = safeTraverse(entry, ["playlistVideoRenderer", "shortBylineText", "runs", 0, "navigationEndpoint", "showDialogCommand", "panelLoadingStrategy", "inlineContent", "dialogViewModel", "customContent", "listViewModel", "listItems"], default=[])
|
||||
# name extraction logic the same as in videoRenderer
|
||||
all_authors = []
|
||||
for collaborative_author in livm:
|
||||
collaborative_author_name = safeTraverse(collaborative_author, ["listItemViewModel", "title", "content"])
|
||||
if collaborative_author_name is not None:
|
||||
all_authors.append(collaborative_author_name)
|
||||
if all_authors != []:
|
||||
author_name = ", ".join(all_authors)
|
||||
author_ucid = safeTraverse(livm, [0, "listItemViewModel", "title", "commandRuns", 0, "onTap", "innertubeCommand", "browseEndpoint", "browseId"], default="UNKNOWNCHANNELID")
|
||||
|
||||
# surprisingly, innertube responds with the avatar of the user that added the video to the playlist
|
||||
# we can extract that information, e.g. for yattee to display
|
||||
avatar_url = safeTraverse(entry, ["playlistVideoRenderer", "thumbnailOverlays", ..., "thumbnailOverlayAvatarStackViewModel", "avatarStack", "avatarStackViewModel", "avatars", 0, "avatarViewModel", "image", "sources", 0, "url"])
|
||||
avatars = None if avatar_url is None else ythdd_extractor.generateChannelAvatarsFromUrl(avatar_url)
|
||||
|
||||
ythdd_globals.print_debug("playlistVideoRenderer fired")
|
||||
return {
|
||||
"type": "video",
|
||||
"title": title,
|
||||
"videoId": video_id,
|
||||
"author": author_name,
|
||||
"authorId": author_ucid,
|
||||
"authorUrl": "/channel/" + author_ucid,
|
||||
"authorThumbnails": avatars,
|
||||
"videoThumbnails": ythdd_struct_builder.genThumbs(video_id),
|
||||
"index": video_index,
|
||||
"lengthSeconds": length,
|
||||
"liveNow": False, # todo: check this?
|
||||
# these do not need to be returned, but some clients try to read it
|
||||
# so we return an approximation here:
|
||||
"published": int(dateparser.parse(published_date).timestamp()),
|
||||
"publishedText": published_date
|
||||
}
|
||||
|
||||
case _:
|
||||
print("received an entry of unknown type (thus can't be parsed):")
|
||||
print(entry)
|
||||
print("")
|
||||
# breakpoint()
|
||||
return
|
||||
|
||||
def customCommentRendererParser(comment: dict, context: dict = {}) -> dict:
|
||||
|
||||
cep = safeTraverse(comment, ["payload", "commentEntityPayload"], default={})
|
||||
content = safeTraverse(cep, ["properties", "content", "content"], default="")
|
||||
content_html = escape(content).replace("\r\n", "<br>").replace("\n", "<br>")
|
||||
author = safeTraverse(cep, ["author"], default={})
|
||||
verified = safeTraverse(author, ["isVerified"], default=False) or safeTraverse(author, ["isArtist"], default=False)
|
||||
ucid = safeTraverse(author, ["channelId"], default="UNKNOWNCHANNELID")
|
||||
published_date = safeTraverse(cep, ["properties", "publishedTime"], default="now")
|
||||
edited = False
|
||||
|
||||
if published_date.endswith(" (edited)"):
|
||||
edited = True
|
||||
published_date_unix = int(dateparser.parse(published_date.removesuffix(" (edited)")).timestamp())
|
||||
else:
|
||||
published_date_unix = int(dateparser.parse(published_date).timestamp())
|
||||
|
||||
inv_comment = {
|
||||
"authorId": ucid,
|
||||
"authorUrl": "/channel/" + ucid,
|
||||
"author": safeTraverse(author, ["displayName"], default="@ythdd-unknown-user"),
|
||||
"verified": verified,
|
||||
"authorThumbnails": ythdd_extractor.generateChannelAvatarsFromUrl(safeTraverse(author, ["avatarThumbnailUrl"], default=DEFAULT_AVATAR)), # proxy them!
|
||||
"authorIsChannelOwner": safeTraverse(author, ["isCreator"], default=False), # ???
|
||||
"isSponsor": False, # not sure how to retrieve this
|
||||
"likeCount": parseViewsFromViewText("0" + safeTraverse(cep, ["toolbar", "likeCountNotliked"], default="0") + " likes"),
|
||||
"isPinned": False,
|
||||
"commentId": safeTraverse(cep, ["properties", "commentId"], default="UNKNOWNCOMMENTID"),
|
||||
"content": content,
|
||||
"contentHtml": content_html,
|
||||
"isEdited": edited,
|
||||
"published": published_date_unix,
|
||||
"publishedText": published_date if published_date != "now" else "unknown amount of time ago"
|
||||
}
|
||||
|
||||
if "replies" in comment:
|
||||
inv_comment["replies"] = comment["replies"]
|
||||
|
||||
return inv_comment
|
||||
|
||||
def parseDescriptionSnippet(snippet: list):
|
||||
|
||||
text = ""
|
||||
text_html = ""
|
||||
for entry in snippet:
|
||||
text += entry["text"]
|
||||
if "bold" in entry: # is checking entry["bold"] == True necessary?
|
||||
text_html += "<b>" + entry["text"] + "</b>"
|
||||
else:
|
||||
text_html += entry["text"]
|
||||
text_html = escape(text_html).replace("\r\n", "<br>").replace("\n", "<br>")
|
||||
|
||||
return text, text_html
|
||||
|
||||
def runsToText(runs: list, default: str = "") -> str:
|
||||
# "default" will be returned when text extraction fails.
|
||||
extracted_text = ""
|
||||
|
||||
for field in runs:
|
||||
extracted_text += safeTraverse(field, ["text"], default="")
|
||||
|
||||
if extracted_text:
|
||||
return extracted_text
|
||||
|
||||
return default
|
||||
|
||||
def extractTextFromSimpleOrRuns(obj: dict, default: str = "") -> str:
|
||||
# Extracts the text both from "runs" and "simpleText"
|
||||
# with failsafe to default.
|
||||
text = default
|
||||
if not isinstance(obj, dict):
|
||||
return default
|
||||
if "runs" in obj:
|
||||
text = runsToText(obj["runs"])
|
||||
elif "simpleText" in obj:
|
||||
text = obj["simpleText"]
|
||||
else:
|
||||
print(f"error(extractTextFromSimpleOrRuns): text extraction failed for {obj}")
|
||||
return text
|
||||
|
||||
|
||||
def findNearestResolution(width: int, height: int) -> int:
|
||||
# Finds the nearest standard resolution (one of 144p, 240p, ...)
|
||||
# So far only used for Yattee, as it has trouble playing anything
|
||||
# without one of the standard resolutions. Playback on other
|
||||
# clients is unaffected.
|
||||
|
||||
# failsafe behaviour
|
||||
try:
|
||||
width = int(width)
|
||||
height = int(height)
|
||||
res = min(width, height)
|
||||
except:
|
||||
return 360
|
||||
|
||||
standard_resolutions = [144, 240, 360, 720, 1080, 2160, 4320]
|
||||
if res in standard_resolutions:
|
||||
return res
|
||||
|
||||
# calculate relative distance to one of the standard resolutions
|
||||
res_normalized = [abs(1 - (x / res)) for x in standard_resolutions]
|
||||
# pick the one where the distance is the smallest
|
||||
target_index = res_normalized.index(min(res_normalized))
|
||||
target_res = standard_resolutions[target_index]
|
||||
|
||||
return target_res
|
||||
|
||||
def parseFormatStreams(wdata_fstream: dict, ydata_stream: dict) -> dict:
|
||||
|
||||
try:
|
||||
stream_url = ydata_stream["url"]
|
||||
except:
|
||||
ythdd_globals.print_debug( "could not extract format stream URL from yt-dlp response:")
|
||||
ythdd_globals.print_debug(f"wdata: {wdata_fstream}")
|
||||
ythdd_globals.print_debug(f"ydata: {ydata_stream}")
|
||||
|
||||
fstream = {
|
||||
"url": stream_url,
|
||||
"itag": str(wdata_fstream["itag"]),
|
||||
"type": wdata_fstream["mimeType"],
|
||||
"quality": wdata_fstream["quality"],
|
||||
"bitrate": str(wdata_fstream["bitrate"]),
|
||||
"fps": wdata_fstream["fps"],
|
||||
"size": f"{wdata_fstream['width']}x{wdata_fstream['height']}",
|
||||
"resolution": f"{findNearestResolution(wdata_fstream['width'], wdata_fstream['height'])}p", # possibly not really needed here
|
||||
"qualityLabel": wdata_fstream["qualityLabel"],
|
||||
"container": safeTraverse(FORMATS.get(wdata_fstream["itag"]), [ "ext"], default="mp4"), # invidious_formats
|
||||
"encoding": safeTraverse(FORMATS.get(wdata_fstream["itag"]), ["vcodec"], default="mp4") # invidious_formats
|
||||
}
|
||||
|
||||
|
||||
return fstream
|
||||
|
||||
def parseAdaptiveStreams(wdata_astream: dict, ydata_stream: dict) -> dict:
|
||||
|
||||
try:
|
||||
stream_url = ydata_stream["url"]
|
||||
except:
|
||||
ythdd_globals.print_debug( "could not extract adaptive stream URL from yt-dlp response:")
|
||||
ythdd_globals.print_debug(f"wdata: {wdata_fstream}")
|
||||
ythdd_globals.print_debug(f"ydata: {ydata_stream}")
|
||||
|
||||
astream_common = {
|
||||
"init": f"{wdata_astream[ 'initRange']['start']}-{wdata_astream[ 'initRange']['end']}",
|
||||
"index": f"{wdata_astream['indexRange']['start']}-{wdata_astream['indexRange']['end']}",
|
||||
"bitrate": str(wdata_astream["bitrate"]),
|
||||
"url": stream_url,
|
||||
"itag": str(wdata_astream["itag"]),
|
||||
"type": wdata_astream["mimeType"],
|
||||
"clen": wdata_astream["contentLength"],
|
||||
"lmt": wdata_astream["lastModified"],
|
||||
"projectionType": wdata_astream["projectionType"],
|
||||
"container": safeTraverse(FORMATS.get(wdata_astream["itag"]), [ "ext"], default="mp4"), # invidious_formats
|
||||
"encoding": safeTraverse(FORMATS.get(wdata_astream["itag"]), ["vcodec"], default="mp4") # invidious_formats
|
||||
}
|
||||
|
||||
isVideo = True
|
||||
if "audioQuality" in wdata_astream:
|
||||
isVideo = False
|
||||
|
||||
if isVideo:
|
||||
astream = astream_common
|
||||
# video-specific metadata
|
||||
astream["fps"] = wdata_astream["fps"]
|
||||
astream["size"] = f"{wdata_astream['width']}x{wdata_astream['height']}"
|
||||
astream["resolution"] = f"{findNearestResolution(wdata_astream['width'], wdata_astream['height'])}p"
|
||||
astream["qualityLabel"] = wdata_astream["qualityLabel"]
|
||||
astream["colorInfo"] = safeTraverse(wdata_astream, ["colorInfo"])
|
||||
else:
|
||||
astream = astream_common
|
||||
# audio-specific metadata
|
||||
astream["encoding"] = safeTraverse(FORMATS.get(wdata_astream["itag"]), ["acodec"], default="mp4")
|
||||
astream["audioQuality"] = wdata_astream["audioQuality"],
|
||||
astream["audioSampleRate"] = int(wdata_astream["audioSampleRate"]),
|
||||
astream["audioChannels"] = wdata_astream["audioChannels"]
|
||||
|
||||
return astream
|
||||
|
||||
Reference in New Issue
Block a user