Compare commits

...

3 Commits

Author SHA1 Message Date
06679ee165 hotfix: rely (more) on yt-dlp for extraction (part 1/2)
adaptiveFormats and hlsUrl need to be fixed (or maybe replaced by DASH?)
2025-02-28 01:02:05 +01:00
dbc90d3f74 update extractor headers, add support for checking badges and avatars 2025-02-28 00:57:40 +01:00
d1b9f90e7e add support for GUC proxy, move safeTraverse to ythdd_globals 2025-02-28 00:56:13 +01:00
5 changed files with 178 additions and 63 deletions

View File

@@ -41,3 +41,13 @@ def ggphtProxy(received_request):
response = Response(ggpht.raw, mimetype=ggpht.headers['content-type'], status=ggpht.status_code) response = Response(ggpht.raw, mimetype=ggpht.headers['content-type'], status=ggpht.status_code)
return response return response
def gucProxy(received_request):
prefix = "https://yt3.googleusercontent.com/"
guc = requests.get(prefix + received_request, headers=ythdd_globals.getHeaders(caller='proxy'), stream=True)
guc.raw.decode_content = True
response = Response(guc.raw, mimetype=guc.headers['content-type'], status=guc.status_code)
return response

View File

@@ -62,6 +62,7 @@ def setup():
app.add_url_rule('/api/<path:received_request>', view_func=ythdd_api.api_global_catchall) app.add_url_rule('/api/<path:received_request>', view_func=ythdd_api.api_global_catchall)
app.add_url_rule('/vi/<path:received_request>', view_func=views.thumbnailProxy) app.add_url_rule('/vi/<path:received_request>', view_func=views.thumbnailProxy)
app.add_url_rule('/ggpht/<path:received_request>', view_func=views.ggphtProxy) app.add_url_rule('/ggpht/<path:received_request>', view_func=views.ggphtProxy)
app.add_url_rule('/guc/<path:received_request>', view_func=views.gucProxy)
db = ythdd_db.initDB(app, config) db = ythdd_db.initDB(app, config)
with app.app_context(): with app.app_context():

View File

@@ -1,5 +1,6 @@
#!/usr/bin/python3 #!/usr/bin/python3
import brotli, yt_dlp, requests, json, time import brotli, yt_dlp, requests, json, time
from ythdd_globals import safeTraverse
import ythdd_globals import ythdd_globals
ytdl_opts = { ytdl_opts = {
@@ -60,7 +61,7 @@ stage1_body = {
stage2_headers = { stage2_headers = {
"Connection": "keep-alive", "Connection": "keep-alive",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:135.0) Gecko/20100101 Firefox/135.0",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "en-us,en;q=0.5", "Accept-Language": "en-us,en;q=0.5",
"Sec-Fetch-Mode": "navigate", "Sec-Fetch-Mode": "navigate",
@@ -69,13 +70,13 @@ stage2_headers = {
stage3_headers = { stage3_headers = {
"Connection": "keep-alive", "Connection": "keep-alive",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:135.0) Gecko/20100101 Firefox/135.0",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "en-us,en;q=0.5", "Accept-Language": "en-us,en;q=0.5",
"Sec-Fetch-Mode": "navigate", "Sec-Fetch-Mode": "navigate",
"Content-Type": "application/json", "Content-Type": "application/json",
"X-Youtube-Client-Name": "1", "X-Youtube-Client-Name": "1",
"X-Youtube-Client-Version": "2.20241126.01.00", "X-Youtube-Client-Version": "2.20250226.01.00",
"Origin": "https://www.youtube.com", "Origin": "https://www.youtube.com",
"Accept-Encoding": "gzip, deflate, br", "Accept-Encoding": "gzip, deflate, br",
"Cookie": "PREF=hl=en&tz=UTC; SOCS=CAI" "Cookie": "PREF=hl=en&tz=UTC; SOCS=CAI"
@@ -87,7 +88,7 @@ stage3_body = {
"client": "client":
{ {
"clientName": "WEB", "clientName": "WEB",
"clientVersion": "2.20241126.01.00", "clientVersion": "2.20250226.01.00",
"hl": "en", "hl": "en",
"timeZone": "UTC", "timeZone": "UTC",
"utcOffsetMinutes": 0 "utcOffsetMinutes": 0
@@ -98,6 +99,30 @@ stage3_body = {
"racyCheckOk": True "racyCheckOk": True
} }
web_context_dict = {
'context': {
'client': {
'hl': 'en',
'gl': 'US',
'deviceMake': '',
'deviceModel': '',
'userAgent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:135.0) Gecko/20100101 Firefox/135.0,gzip(gfe)',
'clientName': 'WEB',
'clientVersion': '2.20250226.01.00',
'osName': 'Windows',
'osVersion': '10.0',
'screenPixelDensity': 2,
'platform': 'DESKTOP',
'screenDensityFloat': 2,
'userInterfaceTheme': 'USER_INTERFACE_THEME_LIGHT',
'browserName': 'Firefox',
'browserVersion': '135.0',
'acceptHeader': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'utcOffsetMinutes': 0,
}
}
}
def extract(url: str, getcomments=False, maxcomments=""): def extract(url: str, getcomments=False, maxcomments=""):
# TODO: check user-agent and cookiefile # TODO: check user-agent and cookiefile
@@ -179,4 +204,70 @@ def IOSextract(uri: str):
end = time.time() end = time.time()
#return {'stage1': stage1, 'stage2': stage2, 'stage3': stage3, 'took': end - start} #return {'stage1': stage1, 'stage2': stage2, 'stage3': stage3, 'took': end - start}
return {'stage1': stage1, 'stage3': stage3, 'took': end - start} return {'stage1': stage1, 'stage3': stage3, 'took': end - start}
def makeWebContext(secondaryContextDict: dict):
# Uses web_context_dict to create a context, returns a dict.
current_web_context_dict = web_context_dict
for key in secondaryContextDict:
current_web_context_dict[key] = secondaryContextDict[key]
return current_web_context_dict
def getChannelAvatar(response_json: dict):
# Returns a dictionary: {url: <proxied url to remote server>, width: ..., height: ...}
# containing the best resolution in terms of pixel count.
# A great majority of the code has been influenced by https://github.com/iv-org/invidious/blob/master/src/invidious/channels/about.cr.
avatars = safeTraverse(response_json, ['metadata', 'channelMetadataRenderer', 'avatar', 'thumbnails'], default=None)
if avatars is None:
# fallback to lower resolution avatars
avatars = safeTraverse(response_json, ['header',
'pageHeaderRenderer',
'content',
'pageHeaderViewModel',
'image',
'decoratedAvatarViewModel',
'avatar',
'avatarViewModel',
'image',
'sources'], default=None)
# if avatars is None: # TODO: if avatars is still None, use a local avatar
best_avatar = avatars[-1] # usually, the best avatar is stored last
for avatar in avatars:
if avatar['width'] * avatar['height'] > best_avatar['width'] * best_avatar['height']:
best_avatar = avatar
best_avatar['url'] = ythdd_globals.translateLinks(best_avatar['url'])
return best_avatar
def isVerified(response_json: dict):
# Returns True if any user badge has been found (verified/artist).
badges = safeTraverse(response_json, [], default=False)
if badges: return True
return False
def browseAbout(ucid: str):
# Returns the response from innertubes browse endpoint for channels (as a dict).
if len(ucid) != 24:
raise ValueError(f"Something is wrong with the UCID {ucid}. Expected a 24-character long channel ID, not {len(ucid)}.")
context = makeWebContext({'browseId': ucid})
response = requests.post(
'https://www.youtube.com/youtubei/v1/browse?prettyPrint=false',
headers = ythdd_globals.getHeaders(),
json = context,
)
response_json = json.loads(response.text)
return response_json

View File

@@ -86,8 +86,21 @@ def translateLinks(link):
link = link.replace("https://i.ytimg.com/", config['general']['public_facing_url']) link = link.replace("https://i.ytimg.com/", config['general']['public_facing_url'])
link = link.replace("https://yt3.ggpht.com/", config['general']['public_facing_url'] + "ggpht/") link = link.replace("https://yt3.ggpht.com/", config['general']['public_facing_url'] + "ggpht/")
link = link.replace("https://yt3.googleusercontent.com/", config['general']['public_facing_url'] + "guc/")
return link return link
def getUptime(): def getUptime():
return int(time.time()) - starttime return int(time.time()) - starttime
def safeTraverse(obj: dict, path: list, default=None):
result = obj
try:
for x in path:
#print(f"traversing {result} with respect to {x}")
result = result[x]
except KeyError:
result = default
print(f"error reading: {' -> '.join(path)} - returning: {default}")
finally:
return result

View File

@@ -6,6 +6,7 @@
from flask import Response, request, redirect from flask import Response, request, redirect
from markupsafe import escape from markupsafe import escape
from time import strftime, gmtime, time from time import strftime, gmtime, time
from ythdd_globals import safeTraverse
import json, datetime import json, datetime
import invidious_formats import invidious_formats
import ythdd_globals import ythdd_globals
@@ -78,17 +79,6 @@ def trending():
def popular(): def popular():
return send(200, [{}]) return send(200, [{}])
def safeTraverse(obj: dict, path: list, default=None):
result = obj
try:
for x in path:
result = result[x]
except KeyError:
result = default
print(f"error reading: {' -> '.join(path)} - returning: {default}")
finally:
return result
def getError(idata: dict): def getError(idata: dict):
unknown_error = {"status": "Unknown error", "reason": "This is a generic ythdd error."} unknown_error = {"status": "Unknown error", "reason": "This is a generic ythdd error."}
error = "" error = ""
@@ -245,27 +235,31 @@ def videos(data):
#print(f"got data: {data}") #print(f"got data: {data}")
#print("requesting idata from IOSextract") #print("requesting idata from IOSextract")
idata = ythdd_extractor.IOSextract(data[3]) # idata = ythdd_extractor.IOSextract(data[3])
hls_url = safeTraverse(idata, ['stage1', 'streamingData', 'hlsManifestUrl'], default="") # hls_url = safeTraverse(idata, ['stage1', 'streamingData', 'hlsManifestUrl'], default="")
adaptive_formats = safeTraverse(idata, ['stage1', 'streamingData', 'adaptiveFormats'], default=[]) # adaptive_formats = safeTraverse(idata, ['stage1', 'streamingData', 'adaptiveFormats'], default=[])
if not hls_url or not adaptive_formats: # if not hls_url or not adaptive_formats:
print(f"serious error: couldn't get hls_url or adaptive_formats!\n" # print(f"serious error: couldn't get hls_url or adaptive_formats!\n"
f"dumping idata:\n" # f"dumping idata:\n"
f"{idata}") # f"{idata}")
return send(500, {'error': getError(idata)}) # return send(500, {'error': getError(idata)})
time_start = time()
ydata = ythdd_extractor.extract(data[3])
wdata = ythdd_extractor.WEBextractSinglePage(data[3]) wdata = ythdd_extractor.WEBextractSinglePage(data[3])
#return send(200, {'ydata': ydata, 'wdata': wdata})
#return send(200, {'idata': idata, 'wdata': wdata}) #return send(200, {'idata': idata, 'wdata': wdata})
main_results = idata['stage3']['contents']['twoColumnWatchNextResults'] # main_results = idata['stage3']['contents']['twoColumnWatchNextResults']
primary_results = safeTraverse(main_results, ['results', 'results', 'contents']) # primary_results = safeTraverse(main_results, ['results', 'results', 'contents'])
if primary_results: # if primary_results:
video_primary_renderer = safeTraverse(primary_results, [0, 'videoPrimaryInfoRenderer']) # video_primary_renderer = safeTraverse(primary_results, [0, 'videoPrimaryInfoRenderer'])
video_secondary_renderer = safeTraverse(primary_results, [1, 'videoSecondaryInfoRenderer']) # video_secondary_renderer = safeTraverse(primary_results, [1, 'videoSecondaryInfoRenderer'])
else: # else:
print("error: primary_results not found in invidious TL videos()") # print("error: primary_results not found in invidious TL videos()")
video_details = safeTraverse(wdata, ['ec1', 'videoDetails']) video_details = safeTraverse(wdata, ['ec1', 'videoDetails'])
microformat = safeTraverse(wdata, ['ec1', 'microformat', 'playerMicroformatRenderer'], default={}) microformat = safeTraverse(wdata, ['ec1', 'microformat', 'playerMicroformatRenderer'], default={})
@@ -320,51 +314,55 @@ def videos(data):
related_video['viewCount'] = related_views related_video['viewCount'] = related_views
related.append(related_video) related.append(related_video)
magnitude = {'K': 1_000, 'M': 1_000_000, 'B': 1_000_000_000} # magnitude = {'K': 1_000, 'M': 1_000_000, 'B': 1_000_000_000}
toplevel_buttons = safeTraverse(video_primary_renderer, ['videoActions', 'menuRenderer', 'topLevelButtons'], default={}) # hacky solution # toplevel_buttons = safeTraverse(video_primary_renderer, ['videoActions', 'menuRenderer', 'topLevelButtons'], default={}) # hacky solution
likes_text = safeTraverse(toplevel_buttons, [0, 'segmentedLikeDislikeButtonViewModel', 'likeButtonViewModel', 'likeButtonViewModel', 'toggleButtonViewModel', 'toggleButtonViewModel', 'defaultButtonViewModel', 'buttonViewModel', 'title'], default="") # hacky solution # likes_text = safeTraverse(toplevel_buttons, [0, 'segmentedLikeDislikeButtonViewModel', 'likeButtonViewModel', 'likeButtonViewModel', 'toggleButtonViewModel', 'toggleButtonViewModel', 'defaultButtonViewModel', 'buttonViewModel', 'title'], default="") # hacky solution
likes = 0 # likes = 0
if likes_text: # if likes_text:
likes = int("".join([x for x in likes_text if 48 <= ord(x) and ord(x) <= 57])) # ASCII for 0-9, no regex needed # likes = int("".join([x for x in likes_text if 48 <= ord(x) and ord(x) <= 57])) # ASCII for 0-9, no regex needed
likes_text = likes_text.split(" ")[0] # likes_text = likes_text.split(" ")[0]
for x in magnitude.keys(): # for x in magnitude.keys():
if x in likes_text: # if x in likes_text:
likes *= magnitude[x] # likes *= magnitude[x]
likes = safeTraverse(ydata, ['like_count'], default=0)
description = safeTraverse(microformat, ['description', 'simpleText'], default="\n(ythdd: failed to retrieve description, perhaps it's empty?)") description = safeTraverse(microformat, ['description', 'simpleText'], default="\n(ythdd: failed to retrieve description, perhaps it's empty?)")
short_description = safeTraverse(wdata, ['ec1', 'videoDetails', 'shortDescription'], default="(ythdd: failed to retrieve short description, perhaps it's empty?)") short_description = safeTraverse(wdata, ['ec1', 'videoDetails', 'shortDescription'], default="(ythdd: failed to retrieve short description, perhaps it's empty?)")
description_html = "<p>" + description + "</p>" # sorry, not happening right now, TODO: https://github.com/iv-org/invidious/blob/master/src/invidious/videos/parser.cr#L329 description_html = "<p>" + description + "</p>" # sorry, not happening right now, TODO: https://github.com/iv-org/invidious/blob/master/src/invidious/videos/parser.cr#L329
metadata = safeTraverse(video_secondary_renderer, ['metadataRowContainer', 'metadataRowContainerRenderer', 'rows'], default={}) # metadata = safeTraverse(video_secondary_renderer, ['metadataRowContainer', 'metadataRowContainerRenderer', 'rows'], default={})
genre = safeTraverse(microformat, ['category']) genre = safeTraverse(microformat, ['category'])
# TODO: genre blah blah blah... # TODO: genre blah blah blah...
author = safeTraverse(video_details, ['author'], default="Unknown Author") author = safeTraverse(video_details, ['author'], default="Unknown Author")
ucid = safeTraverse(video_details, ['channelId'], default="UNKNOWNCHANNELID") ucid = safeTraverse(video_details, ['channelId'], default="UNKNOWNCHANNELID")
author_info = safeTraverse(video_secondary_renderer, ['owner', 'videoOwnerRenderer'], default={}) # author_info = safeTraverse(video_secondary_renderer, ['owner', 'videoOwnerRenderer'], default={})
author_thumbnail = safeTraverse(author_info, ['thumbnail', 'thumbnails']) # lowest quality thumbnail # author_thumbnail = safeTraverse(author_info, ['thumbnail', 'thumbnails']) # lowest quality thumbnail
subs_text = safeTraverse(author_info, ['subscriberCountText', 'simpleText'], default="0") # subs_text = safeTraverse(author_info, ['subscriberCountText', 'simpleText'], default="0")
subs = 0 # subs = 0
if subs_text: # if subs_text:
subs = int("".join([x for x in subs_text if 48 <= ord(x) and ord(x) <= 57])) # subs = int("".join([x for x in subs_text if 48 <= ord(x) and ord(x) <= 57]))
subs_text = subs_text.split(" ")[0] # subs_text = subs_text.split(" ")[0]
for x in magnitude.keys(): # for x in magnitude.keys():
if x in subs_text: # if x in subs_text:
subs *= magnitude[x] # subs *= magnitude[x]
for x in author_thumbnail: subs = ydata['channel_follower_count']
# rewrite to use views.py channel_about_info = ythdd_extractor.browseAbout(ucid)
x['url'] = ythdd_globals.translateLinks(x['url']) author_thumbnail = ythdd_extractor.getChannelAvatar(channel_about_info)
# for x in author_thumbnail:
# # rewrite to use views.py
# x['url'] = ythdd_globals.translateLinks(x['url'])
# so far it seems to be impossible to tell if a channel is verified or not, # so far it seems to be impossible to tell if a channel is verified or not,
# that is - without making another request # that is - without making another request
author_verified = False author_verified = ythdd_extractor.isVerified(channel_about_info)
format_streams = [] format_streams = []
adaptive_formats, format_streams = rebuildFormats(adaptive_formats) # adaptive_formats, format_streams = rebuildFormats(adaptive_formats)
if live_now: if live_now:
video_type = "livestream" video_type = "livestream"
elif premiere_timestamp: elif premiere_timestamp:
video_type = "scheduled" video_type = "scheduled"
published = dateToEpoch(premiere_timestamp) if premiere_timestamp else int(time.time()) published = dateToEpoch(premiere_timestamp) if premiere_timestamp else int(time())
else: else:
video_type = "video" video_type = "video"
@@ -373,6 +371,7 @@ def videos(data):
premium = True premium = True
# TODO: detect paywalled patron-only videos # TODO: detect paywalled patron-only videos
time_end = time()
#''' #'''
response = { response = {
@@ -406,7 +405,7 @@ def videos(data):
"authorVerified": author_verified, "authorVerified": author_verified,
"authorThumbnails": author_thumbnail, "authorThumbnails": author_thumbnail,
"subCountText": subs_text, "subCountText": str(subs),
"lengthSeconds": length, "lengthSeconds": length,
"allowRatings": allow_ratings, "allowRatings": allow_ratings,
"rating": 0, "rating": 0,
@@ -417,8 +416,8 @@ def videos(data):
"dashUrl": ythdd_globals.config['general']['public_facing_url'] + "/dash/not/implemented/", # not implemented "dashUrl": ythdd_globals.config['general']['public_facing_url'] + "/dash/not/implemented/", # not implemented
"premiereTimestamp": premiere_timestamp, "premiereTimestamp": premiere_timestamp,
"hlsUrl": hls_url, #"hlsUrl": hls_url, # broken after a change in iOS player
"adaptiveFormats": adaptive_formats, #"adaptiveFormats": adaptive_formats, # same as hlsUrl
"formatStreams": format_streams, # very bare bones, empty actually xD "formatStreams": format_streams, # very bare bones, empty actually xD
"captions": [], # not implemented "captions": [], # not implemented
# "captions": [ # "captions": [
@@ -436,7 +435,8 @@ def videos(data):
# "license": String # "license": String
# } # }
# ], # ],
"recommendedVideos": related "recommendedVideos": related,
"took": time_end - time_start
} }
#''' #'''