hotfix: rely (more) on yt-dlp for extraction (part 1/2)

adaptiveFormats and hlsUrl need to be fixed (or maybe replaced by DASH?)
This commit is contained in:
2025-02-28 01:02:05 +01:00
parent dbc90d3f74
commit 06679ee165

View File

@@ -235,27 +235,31 @@ def videos(data):
#print(f"got data: {data}") #print(f"got data: {data}")
#print("requesting idata from IOSextract") #print("requesting idata from IOSextract")
idata = ythdd_extractor.IOSextract(data[3]) # idata = ythdd_extractor.IOSextract(data[3])
hls_url = safeTraverse(idata, ['stage1', 'streamingData', 'hlsManifestUrl'], default="") # hls_url = safeTraverse(idata, ['stage1', 'streamingData', 'hlsManifestUrl'], default="")
adaptive_formats = safeTraverse(idata, ['stage1', 'streamingData', 'adaptiveFormats'], default=[]) # adaptive_formats = safeTraverse(idata, ['stage1', 'streamingData', 'adaptiveFormats'], default=[])
if not hls_url or not adaptive_formats: # if not hls_url or not adaptive_formats:
print(f"serious error: couldn't get hls_url or adaptive_formats!\n" # print(f"serious error: couldn't get hls_url or adaptive_formats!\n"
f"dumping idata:\n" # f"dumping idata:\n"
f"{idata}") # f"{idata}")
return send(500, {'error': getError(idata)}) # return send(500, {'error': getError(idata)})
time_start = time()
ydata = ythdd_extractor.extract(data[3])
wdata = ythdd_extractor.WEBextractSinglePage(data[3]) wdata = ythdd_extractor.WEBextractSinglePage(data[3])
#return send(200, {'ydata': ydata, 'wdata': wdata})
#return send(200, {'idata': idata, 'wdata': wdata}) #return send(200, {'idata': idata, 'wdata': wdata})
main_results = idata['stage3']['contents']['twoColumnWatchNextResults'] # main_results = idata['stage3']['contents']['twoColumnWatchNextResults']
primary_results = safeTraverse(main_results, ['results', 'results', 'contents']) # primary_results = safeTraverse(main_results, ['results', 'results', 'contents'])
if primary_results: # if primary_results:
video_primary_renderer = safeTraverse(primary_results, [0, 'videoPrimaryInfoRenderer']) # video_primary_renderer = safeTraverse(primary_results, [0, 'videoPrimaryInfoRenderer'])
video_secondary_renderer = safeTraverse(primary_results, [1, 'videoSecondaryInfoRenderer']) # video_secondary_renderer = safeTraverse(primary_results, [1, 'videoSecondaryInfoRenderer'])
else: # else:
print("error: primary_results not found in invidious TL videos()") # print("error: primary_results not found in invidious TL videos()")
video_details = safeTraverse(wdata, ['ec1', 'videoDetails']) video_details = safeTraverse(wdata, ['ec1', 'videoDetails'])
microformat = safeTraverse(wdata, ['ec1', 'microformat', 'playerMicroformatRenderer'], default={}) microformat = safeTraverse(wdata, ['ec1', 'microformat', 'playerMicroformatRenderer'], default={})
@@ -310,51 +314,55 @@ def videos(data):
related_video['viewCount'] = related_views related_video['viewCount'] = related_views
related.append(related_video) related.append(related_video)
magnitude = {'K': 1_000, 'M': 1_000_000, 'B': 1_000_000_000} # magnitude = {'K': 1_000, 'M': 1_000_000, 'B': 1_000_000_000}
toplevel_buttons = safeTraverse(video_primary_renderer, ['videoActions', 'menuRenderer', 'topLevelButtons'], default={}) # hacky solution # toplevel_buttons = safeTraverse(video_primary_renderer, ['videoActions', 'menuRenderer', 'topLevelButtons'], default={}) # hacky solution
likes_text = safeTraverse(toplevel_buttons, [0, 'segmentedLikeDislikeButtonViewModel', 'likeButtonViewModel', 'likeButtonViewModel', 'toggleButtonViewModel', 'toggleButtonViewModel', 'defaultButtonViewModel', 'buttonViewModel', 'title'], default="") # hacky solution # likes_text = safeTraverse(toplevel_buttons, [0, 'segmentedLikeDislikeButtonViewModel', 'likeButtonViewModel', 'likeButtonViewModel', 'toggleButtonViewModel', 'toggleButtonViewModel', 'defaultButtonViewModel', 'buttonViewModel', 'title'], default="") # hacky solution
likes = 0 # likes = 0
if likes_text: # if likes_text:
likes = int("".join([x for x in likes_text if 48 <= ord(x) and ord(x) <= 57])) # ASCII for 0-9, no regex needed # likes = int("".join([x for x in likes_text if 48 <= ord(x) and ord(x) <= 57])) # ASCII for 0-9, no regex needed
likes_text = likes_text.split(" ")[0] # likes_text = likes_text.split(" ")[0]
for x in magnitude.keys(): # for x in magnitude.keys():
if x in likes_text: # if x in likes_text:
likes *= magnitude[x] # likes *= magnitude[x]
likes = safeTraverse(ydata, ['like_count'], default=0)
description = safeTraverse(microformat, ['description', 'simpleText'], default="\n(ythdd: failed to retrieve description, perhaps it's empty?)") description = safeTraverse(microformat, ['description', 'simpleText'], default="\n(ythdd: failed to retrieve description, perhaps it's empty?)")
short_description = safeTraverse(wdata, ['ec1', 'videoDetails', 'shortDescription'], default="(ythdd: failed to retrieve short description, perhaps it's empty?)") short_description = safeTraverse(wdata, ['ec1', 'videoDetails', 'shortDescription'], default="(ythdd: failed to retrieve short description, perhaps it's empty?)")
description_html = "<p>" + description + "</p>" # sorry, not happening right now, TODO: https://github.com/iv-org/invidious/blob/master/src/invidious/videos/parser.cr#L329 description_html = "<p>" + description + "</p>" # sorry, not happening right now, TODO: https://github.com/iv-org/invidious/blob/master/src/invidious/videos/parser.cr#L329
metadata = safeTraverse(video_secondary_renderer, ['metadataRowContainer', 'metadataRowContainerRenderer', 'rows'], default={}) # metadata = safeTraverse(video_secondary_renderer, ['metadataRowContainer', 'metadataRowContainerRenderer', 'rows'], default={})
genre = safeTraverse(microformat, ['category']) genre = safeTraverse(microformat, ['category'])
# TODO: genre blah blah blah... # TODO: genre blah blah blah...
author = safeTraverse(video_details, ['author'], default="Unknown Author") author = safeTraverse(video_details, ['author'], default="Unknown Author")
ucid = safeTraverse(video_details, ['channelId'], default="UNKNOWNCHANNELID") ucid = safeTraverse(video_details, ['channelId'], default="UNKNOWNCHANNELID")
author_info = safeTraverse(video_secondary_renderer, ['owner', 'videoOwnerRenderer'], default={}) # author_info = safeTraverse(video_secondary_renderer, ['owner', 'videoOwnerRenderer'], default={})
author_thumbnail = safeTraverse(author_info, ['thumbnail', 'thumbnails']) # lowest quality thumbnail # author_thumbnail = safeTraverse(author_info, ['thumbnail', 'thumbnails']) # lowest quality thumbnail
subs_text = safeTraverse(author_info, ['subscriberCountText', 'simpleText'], default="0") # subs_text = safeTraverse(author_info, ['subscriberCountText', 'simpleText'], default="0")
subs = 0 # subs = 0
if subs_text: # if subs_text:
subs = int("".join([x for x in subs_text if 48 <= ord(x) and ord(x) <= 57])) # subs = int("".join([x for x in subs_text if 48 <= ord(x) and ord(x) <= 57]))
subs_text = subs_text.split(" ")[0] # subs_text = subs_text.split(" ")[0]
for x in magnitude.keys(): # for x in magnitude.keys():
if x in subs_text: # if x in subs_text:
subs *= magnitude[x] # subs *= magnitude[x]
for x in author_thumbnail: subs = ydata['channel_follower_count']
# rewrite to use views.py channel_about_info = ythdd_extractor.browseAbout(ucid)
x['url'] = ythdd_globals.translateLinks(x['url']) author_thumbnail = ythdd_extractor.getChannelAvatar(channel_about_info)
# for x in author_thumbnail:
# # rewrite to use views.py
# x['url'] = ythdd_globals.translateLinks(x['url'])
# so far it seems to be impossible to tell if a channel is verified or not, # so far it seems to be impossible to tell if a channel is verified or not,
# that is - without making another request # that is - without making another request
author_verified = False author_verified = ythdd_extractor.isVerified(channel_about_info)
format_streams = [] format_streams = []
adaptive_formats, format_streams = rebuildFormats(adaptive_formats) # adaptive_formats, format_streams = rebuildFormats(adaptive_formats)
if live_now: if live_now:
video_type = "livestream" video_type = "livestream"
elif premiere_timestamp: elif premiere_timestamp:
video_type = "scheduled" video_type = "scheduled"
published = dateToEpoch(premiere_timestamp) if premiere_timestamp else int(time.time()) published = dateToEpoch(premiere_timestamp) if premiere_timestamp else int(time())
else: else:
video_type = "video" video_type = "video"
@@ -363,6 +371,7 @@ def videos(data):
premium = True premium = True
# TODO: detect paywalled patron-only videos # TODO: detect paywalled patron-only videos
time_end = time()
#''' #'''
response = { response = {
@@ -396,7 +405,7 @@ def videos(data):
"authorVerified": author_verified, "authorVerified": author_verified,
"authorThumbnails": author_thumbnail, "authorThumbnails": author_thumbnail,
"subCountText": subs_text, "subCountText": str(subs),
"lengthSeconds": length, "lengthSeconds": length,
"allowRatings": allow_ratings, "allowRatings": allow_ratings,
"rating": 0, "rating": 0,
@@ -407,8 +416,8 @@ def videos(data):
"dashUrl": ythdd_globals.config['general']['public_facing_url'] + "/dash/not/implemented/", # not implemented "dashUrl": ythdd_globals.config['general']['public_facing_url'] + "/dash/not/implemented/", # not implemented
"premiereTimestamp": premiere_timestamp, "premiereTimestamp": premiere_timestamp,
"hlsUrl": hls_url, #"hlsUrl": hls_url, # broken after a change in iOS player
"adaptiveFormats": adaptive_formats, #"adaptiveFormats": adaptive_formats, # same as hlsUrl
"formatStreams": format_streams, # very bare bones, empty actually xD "formatStreams": format_streams, # very bare bones, empty actually xD
"captions": [], # not implemented "captions": [], # not implemented
# "captions": [ # "captions": [
@@ -426,7 +435,8 @@ def videos(data):
# "license": String # "license": String
# } # }
# ], # ],
"recommendedVideos": related "recommendedVideos": related,
"took": time_end - time_start
} }
#''' #'''