From 468795a7a2bc3e0f030c8d615c268fa203e37dd1 Mon Sep 17 00:00:00 2001 From: sherl Date: Fri, 3 Oct 2025 01:16:56 +0200 Subject: [PATCH] feat: search pagination adds support for getting past the first page of search results --- ythdd_extractor.py | 9 +++++++-- ythdd_inv_tl.py | 12 +++++++++--- ythdd_proto.py | 11 +++++++++++ 3 files changed, 27 insertions(+), 5 deletions(-) diff --git a/ythdd_extractor.py b/ythdd_extractor.py index ce24117..5ea3d55 100644 --- a/ythdd_extractor.py +++ b/ythdd_extractor.py @@ -357,14 +357,19 @@ def browseChannel(ucid: str, params: str = None, ctoken: str = None): return response_json -def WEBextractSearchResults(search_query: str) -> list: +def WEBextractSearchResults(search_query: str, page: int) -> list: # Posts a search request to innertube API # and processes only the relevant part (the actual results) if search_query is None: return [] - web_context = makeWebContext({"query": search_query}) + additional_context = {"query": search_query} + if page is not None: + params = ythdd_proto.produceSearchParams(page) + additional_context["params"] = params + + web_context = makeWebContext(additional_context) response = requests.post('https://www.youtube.com/youtubei/v1/search', params={"prettyPrint": False}, headers=stage2_headers, diff --git a/ythdd_inv_tl.py b/ythdd_inv_tl.py index 0530b2f..504118c 100644 --- a/ythdd_inv_tl.py +++ b/ythdd_inv_tl.py @@ -23,7 +23,7 @@ import ythdd_struct_parser # [✓] /api/v1/stats (stats()) # [✓] /streams/dQw4w9WgXcQ (does nothing) # [✓] /vi/:videoIdXXXX/maxresdefault.jpg -# [✓] /api/v1/search?q=... (videos and playlists) +# [✓] /api/v1/search?q=... (videos and playlists), pagination # [✓] /api/v1/search/suggestions?q=...&pq=... # [✓] /api/v1/channel/:ucid # [✓] /api/v1/channel/:ucid/videos, shorts, playlists, streams @@ -38,6 +38,7 @@ import ythdd_struct_parser # [X] /api/v1/videos/:videoIdXXXX does not depend on yt-dlp and offloads stream retrieval elsewhere (making initial response fast) # [X] /api/v1/manifest/:videoIdXXXX (above is prerequisite) # [X] rewrite the awful lookup logic +# [X] /api/v1/search?q=... complex filtering options (https://gitea.invidious.io/iv-org/invidious/src/branch/master/src/invidious/search/filters.cr) # ---------- # IDEAS: # [*] /api/v1/popular returns last requested videos by the IP (serving as multi-device history?) @@ -579,14 +580,19 @@ def search(data, req): # ignore paginated requests as we do nothing with the continuation token page = req.args.get('page') if page is not None and page != '1': - return send(404, []) + try: + page = int(page) + except: + return send(400, {"error": "Wrong page."}) + else: + page = None # when page is "1" if (data[-2].lower() != "search" or data[-1].lower() != "") and data[-1].lower() != "search": previous_query = req.args.get('pq') suggestions = ythdd_extractor.WEBgetSearchSuggestions(search_query, previous_query) return send(200, suggestions) - results = ythdd_extractor.WEBextractSearchResults(search_query) + results = ythdd_extractor.WEBextractSearchResults(search_query, page) results_list = [] for entry in results: diff --git a/ythdd_proto.py b/ythdd_proto.py index c7c0cf9..d56c58e 100644 --- a/ythdd_proto.py +++ b/ythdd_proto.py @@ -83,3 +83,14 @@ def producePlaylistContinuation(plid: str, offset: int = 0) -> str: b64_ctoken = bbpbToB64(bbpb_dicts, urlsafe=True, padding=True) return b64_ctoken + +def produceSearchParams(page: int = 1) -> str: + msge = { + "9:int": 20 * (page - 1), # pagination + "30:int": 1 # no self-harm censorship + } + + bbpb_dicts = fdictToBbpb(msge) + b64_params = bbpbToB64(bbpb_dicts, urlsafe=True, padding=True) + + return b64_params \ No newline at end of file