From 06679ee165fa9e21edbbc0d5e4aeb691204e8a30 Mon Sep 17 00:00:00 2001 From: sherl Date: Fri, 28 Feb 2025 01:02:05 +0100 Subject: [PATCH] hotfix: rely (more) on yt-dlp for extraction (part 1/2) adaptiveFormats and hlsUrl need to be fixed (or maybe replaced by DASH?) --- ythdd_inv_tl.py | 102 ++++++++++++++++++++++++++---------------------- 1 file changed, 56 insertions(+), 46 deletions(-) diff --git a/ythdd_inv_tl.py b/ythdd_inv_tl.py index 01b55b2..71dd92d 100644 --- a/ythdd_inv_tl.py +++ b/ythdd_inv_tl.py @@ -235,27 +235,31 @@ def videos(data): #print(f"got data: {data}") #print("requesting idata from IOSextract") - idata = ythdd_extractor.IOSextract(data[3]) + # idata = ythdd_extractor.IOSextract(data[3]) - hls_url = safeTraverse(idata, ['stage1', 'streamingData', 'hlsManifestUrl'], default="") - adaptive_formats = safeTraverse(idata, ['stage1', 'streamingData', 'adaptiveFormats'], default=[]) - if not hls_url or not adaptive_formats: - print(f"serious error: couldn't get hls_url or adaptive_formats!\n" - f"dumping idata:\n" - f"{idata}") - return send(500, {'error': getError(idata)}) + # hls_url = safeTraverse(idata, ['stage1', 'streamingData', 'hlsManifestUrl'], default="") + # adaptive_formats = safeTraverse(idata, ['stage1', 'streamingData', 'adaptiveFormats'], default=[]) + # if not hls_url or not adaptive_formats: + # print(f"serious error: couldn't get hls_url or adaptive_formats!\n" + # f"dumping idata:\n" + # f"{idata}") + # return send(500, {'error': getError(idata)}) + time_start = time() + + ydata = ythdd_extractor.extract(data[3]) wdata = ythdd_extractor.WEBextractSinglePage(data[3]) + #return send(200, {'ydata': ydata, 'wdata': wdata}) #return send(200, {'idata': idata, 'wdata': wdata}) - main_results = idata['stage3']['contents']['twoColumnWatchNextResults'] - primary_results = safeTraverse(main_results, ['results', 'results', 'contents']) - if primary_results: - video_primary_renderer = safeTraverse(primary_results, [0, 'videoPrimaryInfoRenderer']) - video_secondary_renderer = safeTraverse(primary_results, [1, 'videoSecondaryInfoRenderer']) - else: - print("error: primary_results not found in invidious TL videos()") + # main_results = idata['stage3']['contents']['twoColumnWatchNextResults'] + # primary_results = safeTraverse(main_results, ['results', 'results', 'contents']) + # if primary_results: + # video_primary_renderer = safeTraverse(primary_results, [0, 'videoPrimaryInfoRenderer']) + # video_secondary_renderer = safeTraverse(primary_results, [1, 'videoSecondaryInfoRenderer']) + # else: + # print("error: primary_results not found in invidious TL videos()") video_details = safeTraverse(wdata, ['ec1', 'videoDetails']) microformat = safeTraverse(wdata, ['ec1', 'microformat', 'playerMicroformatRenderer'], default={}) @@ -310,51 +314,55 @@ def videos(data): related_video['viewCount'] = related_views related.append(related_video) - magnitude = {'K': 1_000, 'M': 1_000_000, 'B': 1_000_000_000} - toplevel_buttons = safeTraverse(video_primary_renderer, ['videoActions', 'menuRenderer', 'topLevelButtons'], default={}) # hacky solution - likes_text = safeTraverse(toplevel_buttons, [0, 'segmentedLikeDislikeButtonViewModel', 'likeButtonViewModel', 'likeButtonViewModel', 'toggleButtonViewModel', 'toggleButtonViewModel', 'defaultButtonViewModel', 'buttonViewModel', 'title'], default="") # hacky solution - likes = 0 - if likes_text: - likes = int("".join([x for x in likes_text if 48 <= ord(x) and ord(x) <= 57])) # ASCII for 0-9, no regex needed - likes_text = likes_text.split(" ")[0] - for x in magnitude.keys(): - if x in likes_text: - likes *= magnitude[x] + # magnitude = {'K': 1_000, 'M': 1_000_000, 'B': 1_000_000_000} + # toplevel_buttons = safeTraverse(video_primary_renderer, ['videoActions', 'menuRenderer', 'topLevelButtons'], default={}) # hacky solution + # likes_text = safeTraverse(toplevel_buttons, [0, 'segmentedLikeDislikeButtonViewModel', 'likeButtonViewModel', 'likeButtonViewModel', 'toggleButtonViewModel', 'toggleButtonViewModel', 'defaultButtonViewModel', 'buttonViewModel', 'title'], default="") # hacky solution + # likes = 0 + # if likes_text: + # likes = int("".join([x for x in likes_text if 48 <= ord(x) and ord(x) <= 57])) # ASCII for 0-9, no regex needed + # likes_text = likes_text.split(" ")[0] + # for x in magnitude.keys(): + # if x in likes_text: + # likes *= magnitude[x] + likes = safeTraverse(ydata, ['like_count'], default=0) description = safeTraverse(microformat, ['description', 'simpleText'], default="\n(ythdd: failed to retrieve description, perhaps it's empty?)") short_description = safeTraverse(wdata, ['ec1', 'videoDetails', 'shortDescription'], default="(ythdd: failed to retrieve short description, perhaps it's empty?)") description_html = "

" + description + "

" # sorry, not happening right now, TODO: https://github.com/iv-org/invidious/blob/master/src/invidious/videos/parser.cr#L329 - metadata = safeTraverse(video_secondary_renderer, ['metadataRowContainer', 'metadataRowContainerRenderer', 'rows'], default={}) + # metadata = safeTraverse(video_secondary_renderer, ['metadataRowContainer', 'metadataRowContainerRenderer', 'rows'], default={}) genre = safeTraverse(microformat, ['category']) # TODO: genre blah blah blah... author = safeTraverse(video_details, ['author'], default="Unknown Author") ucid = safeTraverse(video_details, ['channelId'], default="UNKNOWNCHANNELID") - author_info = safeTraverse(video_secondary_renderer, ['owner', 'videoOwnerRenderer'], default={}) - author_thumbnail = safeTraverse(author_info, ['thumbnail', 'thumbnails']) # lowest quality thumbnail - subs_text = safeTraverse(author_info, ['subscriberCountText', 'simpleText'], default="0") - subs = 0 - if subs_text: - subs = int("".join([x for x in subs_text if 48 <= ord(x) and ord(x) <= 57])) - subs_text = subs_text.split(" ")[0] - for x in magnitude.keys(): - if x in subs_text: - subs *= magnitude[x] - for x in author_thumbnail: - # rewrite to use views.py - x['url'] = ythdd_globals.translateLinks(x['url']) + # author_info = safeTraverse(video_secondary_renderer, ['owner', 'videoOwnerRenderer'], default={}) + # author_thumbnail = safeTraverse(author_info, ['thumbnail', 'thumbnails']) # lowest quality thumbnail + # subs_text = safeTraverse(author_info, ['subscriberCountText', 'simpleText'], default="0") + # subs = 0 + # if subs_text: + # subs = int("".join([x for x in subs_text if 48 <= ord(x) and ord(x) <= 57])) + # subs_text = subs_text.split(" ")[0] + # for x in magnitude.keys(): + # if x in subs_text: + # subs *= magnitude[x] + subs = ydata['channel_follower_count'] + channel_about_info = ythdd_extractor.browseAbout(ucid) + author_thumbnail = ythdd_extractor.getChannelAvatar(channel_about_info) + # for x in author_thumbnail: + # # rewrite to use views.py + # x['url'] = ythdd_globals.translateLinks(x['url']) # so far it seems to be impossible to tell if a channel is verified or not, # that is - without making another request - author_verified = False + author_verified = ythdd_extractor.isVerified(channel_about_info) format_streams = [] - adaptive_formats, format_streams = rebuildFormats(adaptive_formats) + # adaptive_formats, format_streams = rebuildFormats(adaptive_formats) if live_now: video_type = "livestream" elif premiere_timestamp: video_type = "scheduled" - published = dateToEpoch(premiere_timestamp) if premiere_timestamp else int(time.time()) + published = dateToEpoch(premiere_timestamp) if premiere_timestamp else int(time()) else: video_type = "video" @@ -363,6 +371,7 @@ def videos(data): premium = True # TODO: detect paywalled patron-only videos + time_end = time() #''' response = { @@ -396,7 +405,7 @@ def videos(data): "authorVerified": author_verified, "authorThumbnails": author_thumbnail, - "subCountText": subs_text, + "subCountText": str(subs), "lengthSeconds": length, "allowRatings": allow_ratings, "rating": 0, @@ -407,8 +416,8 @@ def videos(data): "dashUrl": ythdd_globals.config['general']['public_facing_url'] + "/dash/not/implemented/", # not implemented "premiereTimestamp": premiere_timestamp, - "hlsUrl": hls_url, - "adaptiveFormats": adaptive_formats, + #"hlsUrl": hls_url, # broken after a change in iOS player + #"adaptiveFormats": adaptive_formats, # same as hlsUrl "formatStreams": format_streams, # very bare bones, empty actually xD "captions": [], # not implemented # "captions": [ @@ -426,7 +435,8 @@ def videos(data): # "license": String # } # ], - "recommendedVideos": related + "recommendedVideos": related, + "took": time_end - time_start } #'''