feat: channel browsing and code overhaul

a lot of code responsible for parsing data into invidious-compatible
structures has been moved to the ythdd_struct_parser file
This commit is contained in:
2025-09-12 03:48:57 +02:00
parent 256d21bbcd
commit 96c1b5396e
3 changed files with 592 additions and 252 deletions

View File

@@ -23,7 +23,7 @@ version = "0.0.1"
apiVersion = "1"
randomly_generated_passcode = 0
video_cache = {}
general_cache = {"search": []}
general_cache = {"search": [], "continuations": {"channels": {}}, "channels": {}}
def getConfig(configfile):

View File

@@ -7,18 +7,24 @@ from flask import Response, request, redirect
from markupsafe import escape
from time import strftime, gmtime, time
from ythdd_globals import safeTraverse
from hashlib import md5
import json, datetime
import dateparser
import html
import invidious_formats
import ythdd_globals
import ythdd_api_v1
import ythdd_extractor
import ythdd_struct_parser
# TODO:
# [✓] /api/v1/stats (stats())
# [✓] /streams/dQw4w9WgXcQ (does nothing)
# [✓] /vi/videoIdXXXX/maxresdefault.jpg (todo: add a fallback for 404s)
# [✓] /api/v1/search?q=... (videos and playlists)
# [✓] /api/v1/search/suggestions?q=...&pq=...
# [✓] /api/v1/channels/id
# [✓] /api/v1/channels/videos, shorts, playlists
# [X] /api/v1/playlists/:plid
# [*] /api/v1/auth/subscriptions (stub? db?)
# [*] /api/v1/auth/feed?page=1 (stub? db?)
@@ -76,14 +82,6 @@ def epochToDate(epoch):
def dateToEpoch(date: str):
return datetime.datetime.fromisoformat(date).timestamp()
def doesContainNumber(string: str, numeric_system: int = 10) -> bool:
try:
number = int(string, numeric_system)
return True
except ValueError:
return False
raise BaseException("doesContainNumber(): Unknown error while determining if a string contains a number")
def trending():
return send(200, [{}])
@@ -111,30 +109,6 @@ def getError(idata: dict):
return error
def genThumbs(videoId: str):
result = []
thumbnails = [
#{'height': 720, 'width': 1280, 'quality': "maxres", 'url': "maxres"}, # for the time being omit the buggy maxres quality
{'height': 720, 'width': 1280, 'quality': "maxresdefault", 'url': "maxresdefault"},
{'height': 480, 'width': 640, 'quality': "sddefault", 'url': "sddefault"},
{'height': 360, 'width': 480, 'quality': "high", 'url': "hqdefault"},
{'height': 180, 'width': 320, 'quality': "medium", 'url': "mqdefault"},
{'height': 90, 'width': 120, 'quality': "default", 'url': "default"},
{'height': 90, 'width': 120, 'quality': "start", 'url': "1"},
{'height': 90, 'width': 120, 'quality': "middle", 'url': "2"},
{'height': 90, 'width': 120, 'quality': "end", 'url': "3"},
]
for x in thumbnails:
width = x['width']
height = x['height']
quality = x['quality']
url = ythdd_globals.config['general']['public_facing_url'] + 'vi/' + videoId + '/' + x['url'] + '.jpg'
result.append({'quality': quality, 'url': url, 'width': width, 'height': height})
return result
def rebuildFormats(data):
result = [{} for x in data]
formatStreams = []
@@ -267,11 +241,14 @@ def rebuildFormatsFromYtdlpApi(ydata: dict):
fnote = safeTraverse(stream, ["format_note"], default="low")
if type is None:
type = "mp4"
abr = safeTraverse(stream, ["abr"], default="0")
if abr is None:
abr = "0"
newRow[ "type"] = "audio/" + type
newRow[ "audioQuality"] = fnote
newRow["audioSampleRate"] = int(safeTraverse(stream, ["asr"], default="44100"))
newRow[ "audioChannels"] = int(safeTraverse(stream, ["audio_channels"]))
newRow[ "qualityLabel"] = str(safeTraverse(stream, ["abr"], default="?"))
newRow[ "qualityLabel"] = str(int(abr)) + "k (audio)"
newRow[ "resolution"] = f"{fnote} quality"
newRow[ "size"] = "0x0"
if safeTraverse(stream, ["vcodec"]) != "none":
@@ -284,7 +261,7 @@ def rebuildFormatsFromYtdlpApi(ydata: dict):
newRow[ "type"] = "video/" + type
newRow[ "resolution"] = (height if height in ("144", "240", "360", "480", "720", "1080") else "360") + "p" # mpv won't play the video inside of Yattee if it's a non-standard resolution (bug?)
newRow[ "fps"] = safeTraverse(stream, ["fps"], default=30)
newRow[ "qualityLabel"] = height + "p" + str(int(newRow['fps'])) * (newRow["fps"] > 30) # also a placeholder
newRow[ "qualityLabel"] = height + "p" + str(int(newRow['fps'])) * (newRow["fps"] > 30) + " (video)" # also a placeholder
newRow[ "size"] = width + "x" + height
newRow[ "clen"] = safeTraverse(params, ["clen"], default=safeTraverse(stream, ["filesize"], default="0"))
@@ -303,9 +280,10 @@ def rebuildFormatsFromYtdlpApi(ydata: dict):
if vcodec != "none" and acodec != "none":
# 360p stream
format_streams.append(newRow.copy())
newRow["qualityLabel"] = height + "p" + str(int(newRow['fps'])) * (newRow["fps"] > 30)
format_streams.append(newRow)
if vcodec != "none" or acodec != "none":
adaptive_formats.append(newRow.copy())
adaptive_formats.append(newRow)
# {
@@ -345,11 +323,6 @@ def videos(data):
# https://github.com/iv-org/invidious/blob/master/src/invidious/videos/parser.cr
response = {}
#print(f"got data: {data}")
#print("requesting idata from IOSextract")
# idata = ythdd_extractor.IOSextract(data[3])
# hls_url = safeTraverse(idata, ['stage1', 'streamingData', 'hlsManifestUrl'], default="")
# adaptive_formats = safeTraverse(idata, ['stage1', 'streamingData', 'adaptiveFormats'], default=[])
# if not hls_url or not adaptive_formats:
@@ -374,13 +347,10 @@ def videos(data):
#return send(200, {'ydata': ydata, 'wdata': wdata})
#return send(200, {'idata': idata, 'wdata': wdata})
# main_results = idata['stage3']['contents']['twoColumnWatchNextResults']
# primary_results = safeTraverse(main_results, ['results', 'results', 'contents'])
# if primary_results:
# video_primary_renderer = safeTraverse(primary_results, [0, 'videoPrimaryInfoRenderer'])
# video_secondary_renderer = safeTraverse(primary_results, [1, 'videoSecondaryInfoRenderer'])
# else:
# print("error: primary_results not found in invidious TL videos()")
main_results = wdata['ec2']['contents']['twoColumnWatchNextResults']
primary_results = safeTraverse(main_results, ['results', 'results', 'contents'])
# video_primary_renderer = safeTraverse(primary_results, [0, 'videoPrimaryInfoRenderer'])
video_secondary_renderer = safeTraverse(primary_results, [1, 'videoSecondaryInfoRenderer'])
video_details = safeTraverse(wdata, ['ec1', 'videoDetails'])
microformat = safeTraverse(wdata, ['ec1', 'microformat', 'playerMicroformatRenderer'], default={})
@@ -404,102 +374,61 @@ def videos(data):
related_raw = safeTraverse(wdata, ['ec2', 'contents', 'twoColumnWatchNextResults', 'secondaryResults', 'secondaryResults', 'results'], default=[]) # can possibly change in the future
related = []
for x in related_raw[:-1]:
if safeTraverse(x, ['compactVideoRenderer'], default=[]):
y = safeTraverse(x, ['compactVideoRenderer'])
if type(y) != dict:
continue
related_video = {}
related_video['videoId'] = safeTraverse(y, ['videoId'])
related_video['title'] = safeTraverse(y, ['title', 'simpleText'])
related_video['videoThumbnails'] = genThumbs(related_video['videoId']) #safeTraverse(y, ['thumbnail', 'thumbnails'])
related_video['author'] = safeTraverse(y, ['longBylineText', 'runs', 0, 'text'])
related_video['authorId'] = safeTraverse(y, ['longBylineText', 'runs', 0, 'navigationEndpoint', 'browseEndpoint', 'browseId'], default="UNKNOWNCHANNELID")
related_video['authorUrl'] = '/channel/' + related_video['authorId']
related_video['authorVerified'] = False
if "ownerBadges" in y:
related_video['authorVerified'] = True # hopefully this won't break things, as invidious API doesn't distinguish music and normal verified badges
related_video['authorThumbnails'] = safeTraverse(y, ['channelThumbnail', 'thumbnails'], default=[])
for z in related_video['authorThumbnails']:
z['url'] = ythdd_globals.translateLinks(z['url'])
related_video['lengthSeconds'] = 0
time_lookup_list = [1, 60, 3_600, 86_400]
time_list = safeTraverse(y, ['lengthText', 'simpleText'], default="0:0").split(":")
for z in range(len(time_list)):
related_video['lengthSeconds'] += time_lookup_list[z] * int(time_list[len(time_list) - 1 - z])
related_views_text = safeTraverse(y, ['viewCountText', 'simpleText'], default="0").split(" ")[0]
related_video['viewCountText'] = safeTraverse(y, ['shortViewCountText', 'simpleText'], default="0").split(" ")[0]
related_views = 0
if related_views_text:
if related_views_text.lower() == "no":
related_views_text = "0"
related_views = int("0" + "".join([z for z in related_views_text if 48 <= ord(z) and ord(z) <= 57]))
related_views_text = related_views_text.split(" ")[0]
related_video['viewCount'] = related_views
related.append(related_video)
else:
y = safeTraverse(x, ['lockupViewModel'])
if type(y) != dict:
continue
lmvm = safeTraverse(y, ['metadata', 'lockupMetadataViewModel'], default=[])
related_video = {}
related_video['videoId'] = safeTraverse(y, ['contentId'])
related_video['title'] = safeTraverse(lmvm, ['title', 'content'])
related_video['videoThumbnails'] = genThumbs(related_video['videoId']) #safeTraverse(y, ['thumbnail', 'thumbnails'])
related_video['author'] = safeTraverse(lmvm, ['metadata', 'contentMetadataViewModel', 'metadataRows', 0, 'metadataParts', 0, 'text', 'content'])
related_video['authorId'] = safeTraverse(lmvm, ['image', 'decoratedAvatarViewModel', 'rendererContext', 'commandContext', 'onTap', 'innertubeCommand', 'browseEndpoint', 'browseId'], default="UNKNOWNCHANNELID")
related_video['authorUrl'] = '/channel/' + related_video['authorId']
related_video['authorVerified'] = False if safeTraverse(lmvm, ['metadata', 'contentMetadataViewModel', 'metadataRows', 0, 'metadataParts', 0, 'text', 'attachmentRuns']) is None else True # seens to do the job
related_video['authorThumbnails'] = safeTraverse(lmvm, ['image', 'decoratedAvatarViewModel', 'avatar', 'avatarViewModel', 'image', 'sources'], default=[])
for z in related_video['authorThumbnails']:
z['url'] = ythdd_globals.translateLinks(z['url'])
related_video['lengthSeconds'] = parseLengthFromTimeBadge(safeTraverse(y, ['contentImage', 'thumbnailViewModel', 'overlays', 0, 'thumbnailOverlayBadgeViewModel', 'thumbnailBadges', 0, 'thumbnailBadgeViewModel', 'text'], default="0:0"))
related_video['viewCountText'] = safeTraverse(lmvm, ['metadata', 'contentMetadataViewModel', 'metadataRows', 1, 'metadataParts', 0, 'text', 'content'], default="0").split(" ")[0]
related_video['viewCount'] = parseViewsFromViewText(related_video['viewCountText'])
related.append(related_video)
for entry in related_raw[:-1]:
related_entry = {}
match safeTraverse(list(entry.keys()), [0], default=""):
case "compactVideoRenderer":
# legacy renderer, a/b tested and later phased out in summer 2025
continue
case "lockupViewModel":
y = safeTraverse(x, ['lockupViewModel'])
if not isinstance(y, dict):
continue
isMix = playlist_type = safeTraverse(entry, ["lockupViewModel", "contentImage", "collectionThumbnailViewModel", "primaryThumbnail", "thumbnailViewModel", "overlays", 0, "thumbnailOverlayBadgeViewModel", "thumbnailBadges", 0, "thumbnailBadgeViewModel", "icon", "sources", 0, "clientResource", "imageName"], default="") == "MIX"
if isMix:
# mixes aren't currently supported
continue
lmvm = safeTraverse(y, ['metadata', 'lockupMetadataViewModel'], default=[])
related_entry['videoId'] = safeTraverse(y, ['contentId'])
related_entry['title'] = safeTraverse(lmvm, ['title', 'content'])
related_entry['videoThumbnails'] = ythdd_struct_parser.genThumbs(related_entry['videoId']) #safeTraverse(y, ['thumbnail', 'thumbnails'])
related_entry['author'] = safeTraverse(lmvm, ['metadata', 'contentMetadataViewModel', 'metadataRows', 0, 'metadataParts', 0, 'text', 'content'])
related_entry['authorId'] = safeTraverse(lmvm, ['image', 'decoratedAvatarViewModel', 'rendererContext', 'commandContext', 'onTap', 'innertubeCommand', 'browseEndpoint', 'browseId'], default="UNKNOWNCHANNELID")
related_entry['authorUrl'] = '/channel/' + related_entry['authorId']
related_entry['authorVerified'] = False if safeTraverse(lmvm, ['metadata', 'contentMetadataViewModel', 'metadataRows', 0, 'metadataParts', 0, 'text', 'attachmentRuns']) is None else True # seens to do the job
author_avatar_url = safeTraverse(lmvm, ['image', 'decoratedAvatarViewModel', 'avatar', 'avatarViewModel', 'image', 'sources', 0, 'url'], default="no-avatar")
related_entry['authorThumbnails'] = ythdd_extractor.generateChannelAvatarsFromUrl(author_avatar_url)
related_entry['lengthSeconds'] = ythdd_struct_parser.parseLengthFromTimeBadge(safeTraverse(y, ['contentImage', 'thumbnailViewModel', 'overlays', 0, 'thumbnailOverlayBadgeViewModel', 'thumbnailBadges', 0, 'thumbnailBadgeViewModel', 'text'], default="0:0"))
related_entry['viewCountText'] = safeTraverse(lmvm, ['metadata', 'contentMetadataViewModel', 'metadataRows', 1, 'metadataParts', 0, 'text', 'content'], default="0").split(" ")[0]
related_entry['viewCount'] = ythdd_struct_parser.parseViewsFromViewText(related_entry['viewCountText'])
case _:
# unsupported model: print info into stdout
print("received an entry of unknown type during parsing of related videos:")
print(entry)
print("")
continue
related.append(related_entry)
# magnitude = {'K': 1_000, 'M': 1_000_000, 'B': 1_000_000_000}
# toplevel_buttons = safeTraverse(video_primary_renderer, ['videoActions', 'menuRenderer', 'topLevelButtons'], default={}) # hacky solution
# likes_text = safeTraverse(toplevel_buttons, [0, 'segmentedLikeDislikeButtonViewModel', 'likeButtonViewModel', 'likeButtonViewModel', 'toggleButtonViewModel', 'toggleButtonViewModel', 'defaultButtonViewModel', 'buttonViewModel', 'title'], default="") # hacky solution
# likes = 0
# if likes_text:
# likes = int("".join([x for x in likes_text if 48 <= ord(x) and ord(x) <= 57])) # ASCII for 0-9, no regex needed
# likes_text = likes_text.split(" ")[0]
# for x in magnitude.keys():
# if x in likes_text:
# likes *= magnitude[x]
likes = safeTraverse(ydata, ['like_count'], default=0)
description = safeTraverse(microformat, ['description', 'simpleText'], default="\n(ythdd: failed to retrieve description, perhaps it's empty?)")
short_description = safeTraverse(wdata, ['ec1', 'videoDetails', 'shortDescription'], default="(ythdd: failed to retrieve short description, perhaps it's empty?)")
description_html = "<p>" + description + "</p>" # sorry, not happening right now, TODO: https://github.com/iv-org/invidious/blob/master/src/invidious/videos/parser.cr#L329
description_html = html.escape(description).replace("\r\n", "<br>").replace("\n", "<br>") # still TODO: https://github.com/iv-org/invidious/blob/master/src/invidious/videos/parser.cr#L329
# metadata = safeTraverse(video_secondary_renderer, ['metadataRowContainer', 'metadataRowContainerRenderer', 'rows'], default={})
genre = safeTraverse(microformat, ['category'])
# TODO: genre blah blah blah...
author = safeTraverse(video_details, ['author'], default="Unknown Author")
ucid = safeTraverse(video_details, ['channelId'], default="UNKNOWNCHANNELID")
# author_info = safeTraverse(video_secondary_renderer, ['owner', 'videoOwnerRenderer'], default={})
# author_thumbnail = safeTraverse(author_info, ['thumbnail', 'thumbnails']) # lowest quality thumbnail
# subs_text = safeTraverse(author_info, ['subscriberCountText', 'simpleText'], default="0")
# subs = 0
# if subs_text:
# subs = int("".join([x for x in subs_text if 48 <= ord(x) and ord(x) <= 57]))
# subs_text = subs_text.split(" ")[0]
# for x in magnitude.keys():
# if x in subs_text:
# subs *= magnitude[x]
subs = ydata['channel_follower_count']
channel_about_info = ythdd_extractor.browseAbout(ucid)
author_thumbnail = [ # must be a list
ythdd_extractor.getChannelAvatar(channel_about_info)
] * 3 # yes really
# for x in author_thumbnail:
# # rewrite to use views.py
# x['url'] = ythdd_globals.translateLinks(x['url'])
author_thumbnail = ythdd_extractor.generateChannelAvatarsFromUrl(safeTraverse(video_secondary_renderer, ['owner', 'videoOwnerRenderer', 'thumbnail', 'thumbnails', 0, 'url'], default="no-avatar"))
# so far it seems to be impossible to tell if a channel is verified or not,
# that is - without making another request
author_verified = ythdd_extractor.isVerified(channel_about_info)
author_verified = ythdd_extractor.isVerified(safeTraverse(video_secondary_renderer, ['owner', 'videoOwnerRenderer', 'badges', 0], default=[]))
format_streams = []
# adaptive_formats, format_streams = rebuildFormats(adaptive_formats)
@@ -520,38 +449,32 @@ def videos(data):
time_end = time()
#'''
response = {
"type": video_type,
"title": title,
"videoId": video_id,
"videoThumbnails": genThumbs(video_id),
"videoThumbnails": ythdd_struct_parser.genThumbs(video_id),
"storyboards": [], # not implemented
"description": description, # due to change (include ythdd metadata)
"descriptionHtml": description_html, # basically the same as normal description for the time being
"descriptionHtml": description_html,
"published": published,
"publishedText": published_date,
"keywords": keywords,
"viewCount": views,
"viewCountText": str(views), # not implemented
"likeCount": likes,
"dislikeCount": 0,
"paid": False, # not implemented
"premium": premium,
"isFamilyFriendly": family_friendly,
"allowedRegions": allowed_regions,
"genre": genre,
"genreUrl": "/genreUrl/not/implemented/", # not implemented
"author": author,
"authorId": ucid,
"authorUrl": "/channel/" + ucid,
"authorVerified": author_verified,
"authorThumbnails": author_thumbnail,
"subCountText": str(subs),
"lengthSeconds": length,
"allowRatings": allow_ratings,
@@ -560,12 +483,11 @@ def videos(data):
"liveNow": live_now,
"isPostLiveDvr": post_live_dvr,
"isUpcoming": is_upcoming,
"dashUrl": ythdd_globals.config['general']['public_facing_url'] + "dash/not/implemented/", # not implemented
"dashUrl": ythdd_globals.config['general']['public_facing_url'] + "api/invidious/api/v1/manifest/" + video_id, # not implemented
"premiereTimestamp": premiere_timestamp,
#"hlsUrl": hls_url, # broken after a change in iOS player
#"hlsUrl": hls_url, # broken after a change in iOS player, only usable for livestreams
"adaptiveFormats": adaptive_formats, # same as hlsUrl
"formatStreams": format_streams, # very bare bones, empty actually xD
"formatStreams": format_streams,
"captions": [], # not implemented
# "captions": [
# {
@@ -585,7 +507,6 @@ def videos(data):
"recommendedVideos": related,
"took": time_end - time_start
}
#'''
if ythdd_globals.config['general']['debug']:
response["ydata"] = ydata
@@ -606,31 +527,6 @@ def videos(data):
return send(status_code, response)
def parseLengthFromTimeBadge(time_str: str) -> int:
# Returns 0 if unsuccessful
length = 0
time_lookup_list = [1, 60, 3_600, 86_400]
time_list = time_str.split(":")
if False in map(doesContainNumber, time_list): # works around ['LIVE'] for livestreams or ['Upcoming'] for scheduled videos
pass
else:
for z in range(len(time_list)):
length += time_lookup_list[z] * int(time_list[len(time_list) - 1 - z])
return length
def parseViewsFromViewText(viewcounttext: str) -> int:
views = 0
magnitude = {'K': 1_000, 'M': 1_000_000, 'B': 1_000_000_000}
if viewcounttext:
if viewcounttext.lower() == "no":
viewcounttext = "0"
views = int("0" + "".join([z for z in viewcounttext if 48 <= ord(z) and ord(z) <= 57]))
viewcounttext = viewcounttext.split(" ")[0]
for x in magnitude.keys():
if x == viewcounttext[-1].upper():
views *= magnitude[x]
return views
def search(data, req):
search_query = req.args.get('q')
@@ -648,91 +544,216 @@ def search(data, req):
results_list = []
for entry in results:
match safeTraverse(list(entry.keys()), [0], default=""):
case "videoRenderer": # represents a video
published_date = safeTraverse(entry, ["videoRenderer", "publishedTimeText", "simpleText"], default="now")
published_date = published_date.removeprefix("Streamed ")
results_list.append(
{
"type": "video",
"title": safeTraverse(entry, ["videoRenderer", "title", "runs", 0, "text"]),
"videoId": safeTraverse(entry, ["videoRenderer", "videoId"]),
"author": safeTraverse(entry, ["videoRenderer", "ownerText", "runs", 0, "text"]),
"authorId": safeTraverse(entry, ["videoRenderer", "ownerText", "runs", 0, "navigationEndpoint", "browseEndpoint", "browseId"]),
"authorUrl": "/channel/" + safeTraverse(entry, ["videoRenderer", "ownerText", "runs", 0, "navigationEndpoint", "browseEndpoint", "browseId"], default="UNKNOWNCHANNELID"),
"authorVerified": False, # TODO
"authorThumbnails": ythdd_extractor.generateChannelAvatarsFromUrl(safeTraverse(entry, ["videoRenderer", "avatar", "decoratedAvatarViewModel", "avatar", "avatarViewModel", "image", "sources", 0, "url"], default="unknown")),
"videoThumbnails": genThumbs(safeTraverse(entry, ["videoRenderer", "videoId"], default="unknown")),
"description": "",
"descriptionHtml": "",
"viewCount": parseViewsFromViewText(safeTraverse(entry, ["videoRenderer", "viewCountText", "simpleText"], default="No views")),
"viewCountText": safeTraverse(entry, ["videoRenderer", "viewCountText", "simpleText"], default="Unknown amount of views"),
"published": int(dateparser.parse(published_date).timestamp()), # sadly best we can do, invidious does this too
"publishedText": published_date,
"lengthSeconds": parseLengthFromTimeBadge(safeTraverse(entry, ["videoRenderer", "lengthText", "simpleText"], default="0:0")),
"liveNow": False,
"premium": False,
"isUpcoming": False,
"isNew": False,
"is4k": False,
"is8k": False,
"isVr180": False,
"isVr360": False,
"is3d": False,
"hasCaptions": False
}
)
# modify the premiere timestamp afterwards here?
case "lockupViewModel": # represents playlists/mixes
isMix = safeTraverse(entry, ["lockupViewModel", "contentImage", "collectionThumbnailViewModel", "primaryThumbnail", "thumbnailViewModel", "overlays", 0, "thumbnailOverlayBadgeViewModel", "thumbnailBadges", 0, "thumbnailBadgeViewModel", "text"], default="") == "Mix"
if isMix:
# mixes aren't currently supported
continue
lvm = entry["lockupViewModel"]
meta = safeTraverse(lvm, ["metadata"], default=[])
lmvm = safeTraverse(meta, ["lockupMetadataViewModel", "metadata", "contentMetadataViewModel", "metadataRows"], default=[])
ucid = safeTraverse(lmvm, [0, "metadataParts", 0, "text", "commandRuns", 0, "onTap", "innertubeCommand", "browseEndpoint", "browseId"], default="UNKNOWNCHANNELID")
length = safeTraverse(lvm, ["contentImage", "collectionThumbnailViewModel", "primaryThumbnail", "thumbnailViewModel", "overlays", 0, "thumbnailOverlayBadgeViewModel", "thumbnailBadges", 0, "thumbnailBadgeViewModel", "text"], default="0 videos")
length = parseViewsFromViewText(length.split(" ")[0])
results_list.append(
{
"type": "playlist",
"title": safeTraverse(meta, ["lockupMetadataViewModel", "title", "content"], default="ythdd: unknown title"),
"playlistId": safeTraverse(lmvm, [2, "metadataParts", 0, "text", "commandRuns", 0, "onTap", "innertubeCommand", "watchEndpoint", "playlistId"], default="UNKNOWNPLAYLISTID"),
"playlistThumbnail": safeTraverse(lvm, ["contentImage", "collectionThumbnailViewModel", "primaryThumbnail", "thumbnailViewModel", "image", "sources", 0, "url"], default="no-url?"), # todo: sanitize this
"author": safeTraverse(lmvm, [0, "metadataParts", 0, "text", "content"], default="ythdd: unknown author"),
"authorId": ucid,
"authorUrl": "/channel/" + ucid,
"authorVerified": False,
"videoCount": length,
"videos": [] # provided for historical reasons i guess
}
)
case "shelfRenderer": # "people also watched"
continue
case "gridShelfViewModel": # shorts?
continue
case _:
print("received a search result of unknown type:")
print(entry)
print("")
# breakpoint()
continue
parsed_entry = ythdd_struct_parser.parseRenderers(entry)
if parsed_entry is not None:
results_list.append(parsed_entry)
return send(200, results_list)
def get_channel_tab(requested_tab, ucid, req, only_json: bool = False):
# check for page/cont
ctoken = req.args.get('continuation')
# perhaps continuation tokens should be checked here (whether they are inside of general_cache)
# this way, malicious requests containing bogus ctokens can't be sent to potentially ban/ratelimit the instance (?)
# if ctoken is not None and ctoken not in ythdd_globals.general_cache...
# unique req fingerprint allows for this exact query to be cached in memory.
# md5 sum serves as a "unique", but deterministic value which can be checked for cache hit/miss
unique_request_fingerprint = md5(f"{ucid}_{requested_tab}_{ctoken}".encode('utf-8')).hexdigest()
# if we haven't discovered parameters required for browsing a specific tab,
# then koad them now
if ucid not in ythdd_globals.general_cache["continuations"]["channels"]:
channels(["", "", "", ucid], req, True)
# check if request has been cached within the last hour
if ythdd_globals.config['general']['cache'] and unique_request_fingerprint in ythdd_globals.general_cache["channels"]:
if ythdd_globals.general_cache["channels"][unique_request_fingerprint]['cacheTime'] + 1 * 60 * 60 > time():
response = ythdd_globals.general_cache["channels"][unique_request_fingerprint]
if only_json:
return response
else:
return send(200, response)
else:
del ythdd_globals.general_cache["channels"][unique_request_fingerprint]
# load relevant data from global (general) cache
param = safeTraverse(ythdd_globals.general_cache["continuations"]["channels"][ucid], ["tabs", requested_tab, "param"], default=None)
name = safeTraverse(ythdd_globals.general_cache["continuations"]["channels"][ucid], ["name"], default="")
avatar = safeTraverse(ythdd_globals.general_cache["continuations"]["channels"][ucid], ["avatar"], default="no-avatar")
verified = safeTraverse(ythdd_globals.general_cache["continuations"]["channels"][ucid], ["verified"], default=False)
# if provided, ctoken will be used for browsing as well
wdata = ythdd_extractor.browseChannel(ucid, params=param, ctoken=ctoken)
# sanity check (whether we got what we requested)
received_tab = safeTraverse(wdata, ["responseContext", "serviceTrackingParams", 0, "params", 0, "value"])
if received_tab != f"channel.{requested_tab}":
# if that's not the case, either something changed in the innertube API,
# or content that was asked for isn't available
print(f"INFO: couldn't verify server returned channel data we asked for. "
f"Requested channel.{requested_tab}, got {received_tab}. Most likely we sent a request to Innertube which got rejected.")
# load requested tab
result = {}
if ctoken is None:
tabs = safeTraverse(wdata, ["contents", "twoColumnBrowseResultsRenderer", "tabs"], default=[])
for tab in tabs:
tab_name = safeTraverse(tab, ["tabRenderer", "title"], default="").lower()
if tab_name and tab_name == requested_tab:
result = safeTraverse(tab, ["tabRenderer", "content"], default=[])
break
items = []
inner_contents = []
new_continuation = ""
response = {}
match requested_tab:
case "videos" | "shorts":
# videos/shorts have actually the same response schema,
# only the renderers differ - but they are taken care of in ythdd_struct_parser.parseRenderers()
if ctoken is None:
inner_contents = safeTraverse(result, ["richGridRenderer", "contents"], default=[[]])
else:
inner_contents = safeTraverse(wdata, ["onResponseReceivedActions", 0, "appendContinuationItemsAction", "continuationItems"], default=[[]])
for entry in inner_contents:
# videos from videos tab have no owner info (?) or it's in another place. if it is somewhere, this expression can be made simpler by traversing something else in struct parser.
item = safeTraverse(entry, ["richItemRenderer", "content"])
if item is not None:
items.append(ythdd_struct_parser.parseRenderers(item, {"author_name": name, "author_ucid": ucid, "avatar": avatar}))
new_continuation = safeTraverse(inner_contents[-1], ["continuationItemRenderer", "continuationEndpoint", "continuationCommand", "token"], default="")
response = {
"videos": items,
"continuation": new_continuation
}
# cache response
if ythdd_globals.config['general']['cache']:
ythdd_globals.general_cache["channels"][unique_request_fingerprint] = response
ythdd_globals.general_cache["channels"][unique_request_fingerprint]['cacheTime'] = time()
# todo: save continuation(?)
# or... is there a usecase for saving it?
case "playlists":
if ctoken is None:
inner_contents = safeTraverse(result, ["sectionListRenderer", "contents", 0, "itemSectionRenderer", "contents", 0, "gridRenderer", "items"], default=[[]])
else:
inner_contents = safeTraverse(wdata, ["onResponseReceivedActions", 0, "appendContinuationItemsAction", "continuationItems"], default=[[]])
for entry in inner_contents:
item = ythdd_struct_parser.parseRenderers(entry, {"author_name": name, "author_ucid": ucid, "avatar": avatar})
items.append(item)
new_continuation = safeTraverse(inner_contents[-1], ["continuationItemRenderer", "continuationEndpoint", "continuationCommand", "token"], default="")
response = {
"playlists": items,
"continuation": new_continuation
}
# cache response
if ythdd_globals.config['general']['cache']:
ythdd_globals.general_cache["channels"][unique_request_fingerprint] = response
ythdd_globals.general_cache["channels"][unique_request_fingerprint]['cacheTime'] = time()
case _:
# for all other renderers, which aren't currently supported
response = {
# "wdata": wdata
}
if ythdd_globals.config["general"]["debug"]:
response["wdata"] = wdata
if only_json:
return response
return send(200, response)
def channels(data, req, only_json: bool = False):
if len(data[3]) != 24 or not data[3].startswith("UC"):
# silly sanity check
return send(404, {"error": "This channel does not exist."})
if len(data) > 4:
match data[4]:
case "videos" | "shorts" | "playlists" | "podcasts":
return get_channel_tab(data[4], data[3], req)
case _:
return send(400, {"error": f"Bad request, unrecognized/unsupported tab \"{data[4]}\"."})
wdata = ythdd_extractor.browseChannel(data[3])
channel_meta = safeTraverse(wdata, ["metadata", "channelMetadataRenderer"])
banners = safeTraverse(wdata, ["header", "pageHeaderRenderer", "content", "pageHeaderViewModel", "banner", "imageBannerViewModel", "image", "sources"], default=[])
avatar = safeTraverse(wdata, ["header", "pageHeaderRenderer", "content", "pageHeaderViewModel", "image", "decoratedAvatarViewModel", "avatar", "avatarViewModel", "image", "sources", 0, "url"], default="no-avatar")
subscribers = ythdd_struct_parser.parseViewsFromViewText(safeTraverse(wdata, ["header", "pageHeaderRenderer", "content", "pageHeaderViewModel", "metadata", "contentMetadataViewModel", "metadataRows", 1, "metadataParts", 0, "text", "content"], default="0"))
verified = False # to be replaced later with ythdd_extractor.isVerified(...)
author_name = safeTraverse(channel_meta, ["title"], default="Unknown Channel")
author_ucid = safeTraverse(channel_meta, ["externalId"], default="UNKNOWNCHANNELID")
ythdd_globals.general_cache["continuations"]["channels"][author_ucid] = {
"avatar": avatar,
"name": author_name,
"tabs": {},
"verified": verified
}
tabs = safeTraverse(wdata, ["contents", "twoColumnBrowseResultsRenderer", "tabs"], default=[])
tab_names = []
for tab in tabs:
# collect tab names
tab_name = safeTraverse(tab, ["tabRenderer", "title"], default="").lower()
if tab_name:
tab_names.append(tab_name)
# and their params (used to retrieve data about them)
ythdd_globals.general_cache["continuations"]["channels"][author_ucid]["tabs"][tab_name] = dict()
ythdd_globals.general_cache["continuations"]["channels"][author_ucid]["tabs"][tab_name]["param"] = safeTraverse(tab, ["tabRenderer", "endpoint", "browseEndpoint", "params"], default=None)
latest_videos = get_channel_tab("videos", data[3], req, only_json=True)
for banner in banners:
banner["url"] = ythdd_globals.translateLinks(banner["url"])
avatars = ythdd_extractor.generateChannelAvatarsFromUrl(avatar)
response = {
"author": author_name,
"authorId": author_ucid,
"authorUrl": "https://www.youtube.com/channel/" + author_ucid,
"authorBanners": banners,
"authorThumbnails": avatars,
"subCount": subscribers,
"totalViews": 0,
"joined": 0,
"autoGenerated": False, # todo: actually check this
"ageGated": False,
"isFamilyFriendly": safeTraverse(channel_meta, ["isFamilySafe"], default=False),
"description": safeTraverse(channel_meta, ["description"], default="ythdd: no channel description"),
"descriptionHtml": html.escape(safeTraverse(channel_meta, ["description"], default="ythdd: no channel description (html)")).replace("\r\n", "<br>").replace("\n", "<br>"),
"allowedRegions": safeTraverse(channel_meta, ["availableCountryCodes"], default=[]),
"tabs": tab_names,
"tags": [safeTraverse(channel_meta, ["keywords"], default="")],
"authorVerified": verified,
"latestVideos": latest_videos["videos"], # using struct parser
"relatedChannels": []
}
if ythdd_globals.config["general"]["debug"]:
response["wdata"] = wdata
if only_json:
return response
return send(200, response)
def lookup(data, req):
# possibly TODO: rewrite this mess
if len(data) > 2:
@@ -750,6 +771,8 @@ def lookup(data, req):
return auth(data)
case 'search':
return search(data, req)
case 'channels':
return channels(data, req)
case _:
incrementBadRequests()
return notImplemented(data)

317
ythdd_struct_parser.py Normal file
View File

@@ -0,0 +1,317 @@
from ythdd_globals import safeTraverse
from html import escape
import json
import dateparser
import ythdd_globals
import ythdd_extractor
def genThumbs(videoId: str):
result = []
thumbnails = [
#{'height': 720, 'width': 1280, 'quality': "maxres", 'url': "maxres"}, # for the time being omit the buggy maxres quality
{'height': 720, 'width': 1280, 'quality': "maxresdefault", 'url': "maxresdefault"},
{'height': 480, 'width': 640, 'quality': "sddefault", 'url': "sddefault"},
{'height': 360, 'width': 480, 'quality': "high", 'url': "hqdefault"},
{'height': 180, 'width': 320, 'quality': "medium", 'url': "mqdefault"},
{'height': 90, 'width': 120, 'quality': "default", 'url': "default"},
{'height': 90, 'width': 120, 'quality': "start", 'url': "1"},
{'height': 90, 'width': 120, 'quality': "middle", 'url': "2"},
{'height': 90, 'width': 120, 'quality': "end", 'url': "3"},
]
for x in thumbnails:
width = x['width']
height = x['height']
quality = x['quality']
url = ythdd_globals.config['general']['public_facing_url'] + 'vi/' + videoId + '/' + x['url'] + '.jpg'
result.append({'quality': quality, 'url': url, 'width': width, 'height': height})
return result
def doesContainNumber(string: str, numeric_system: int = 10) -> bool:
try:
number = int(string, numeric_system)
return True
except ValueError:
return False
raise BaseException("doesContainNumber(): Unknown error while determining if a string contains a number")
def parseLengthFromTimeBadge(time_str: str) -> int:
# Returns 0 if unsuccessful
length = 0
time_lookup_list = [1, 60, 3_600, 86_400]
time_list = time_str.split(":")
if False in map(doesContainNumber, time_list): # works around ['LIVE'] for livestreams or ['Upcoming'] for scheduled videos
pass
else:
for z in range(len(time_list)):
length += time_lookup_list[z] * int(time_list[len(time_list) - 1 - z])
return length
def parseViewsFromViewText(viewcounttext: str) -> int:
# Returns 0 if unsuccessful
views = 0
magnitude = {'K': 1_000, 'M': 1_000_000, 'B': 1_000_000_000}
if viewcounttext:
if viewcounttext.lower() == "no":
viewcounttext = "0"
views = float("0" + "".join([z for z in viewcounttext if 48 <= ord(z) and ord(z) <= 57 or ord(z) == 46]))
viewcounttext = viewcounttext.split(" ")[0]
for x in magnitude.keys():
if x == viewcounttext[-1].upper():
views *= magnitude[x]
return int(views)
def parseRenderers(entry: dict, context: dict = {}) -> dict:
if not isinstance(entry, dict):
raise ValueError("parsed entry is not of type dict")
match safeTraverse(list(entry.keys()), [0], default=""):
case "videoRenderer": # represents a video
published_date = safeTraverse(entry, ["videoRenderer", "publishedTimeText", "simpleText"], default="now")
published_date = published_date.removeprefix("Streamed ")
description, description_html = parseDescriptionSnippet(safeTraverse(entry, ["videoRenderer", "descriptionSnippet", "runs"], default=[]))
if "author_name" in context:
author_name = context["author_name"]
else:
author_name = safeTraverse(entry, ["videoRenderer", "ownerText", "runs", 0, "text"], default="Unknown author")
if "author_ucid" in context:
author_ucid = context["author_ucid"]
else:
author_ucid = safeTraverse(entry, ["videoRenderer", "ownerText", "runs", 0, "navigationEndpoint", "browseEndpoint", "browseId"], default="UNKNOWNCHANNELID")
if "verified" in context:
verified = context["verified"]
else:
verified = ythdd_extractor.isVerified(safeTraverse(entry, ["ownerBadges", 0]))
if "avatar" in context:
avatar_url = context["avatar"]
else:
avatar_url = safeTraverse(entry, ["videoRenderer", "avatar", "decoratedAvatarViewModel", "avatar", "avatarViewModel", "image", "sources", 0, "url"], default="unknown")
return {
"type": "video",
"title": safeTraverse(entry, ["videoRenderer", "title", "runs", 0, "text"]),
"videoId": safeTraverse(entry, ["videoRenderer", "videoId"]),
"author": author_name,
"authorId": author_ucid,
"authorUrl": "/channel/" + author_ucid,
"authorVerified": verified, # TODO
"authorThumbnails": ythdd_extractor.generateChannelAvatarsFromUrl(avatar_url),
"videoThumbnails": genThumbs(safeTraverse(entry, ["videoRenderer", "videoId"], default="unknown")),
"description": description,
"descriptionHtml": description_html,
"viewCount": parseViewsFromViewText(safeTraverse(entry, ["videoRenderer", "viewCountText", "simpleText"], default="No views")),
"viewCountText": safeTraverse(entry, ["videoRenderer", "viewCountText", "simpleText"], default="Unknown amount of views"),
"published": int(dateparser.parse(published_date).timestamp()), # sadly best we can do, invidious does this too
"publishedText": published_date,
"lengthSeconds": parseLengthFromTimeBadge(safeTraverse(entry, ["videoRenderer", "lengthText", "simpleText"], default="0:0")),
"liveNow": False,
"premium": ythdd_extractor.isPremium(safeTraverse(entry, ["videoRenderer", "badges", 0])), # will fail if it's not the only badge
"isUpcoming": False,
"isNew": False,
"is4k": False,
"is8k": False,
"isVr180": False,
"isVr360": False,
"is3d": False,
"hasCaptions": False
}
# modify the premiere timestamp afterwards here?
case "lockupViewModel": # represents playlists/mixes
playlist_type = safeTraverse(entry, ["lockupViewModel", "contentImage", "collectionThumbnailViewModel", "primaryThumbnail", "thumbnailViewModel", "overlays", 0, "thumbnailOverlayBadgeViewModel", "thumbnailBadges", 0, "thumbnailBadgeViewModel", "icon", "sources", 0, "clientResource", "imageName"], default="PLAYLISTS")
if playlist_type == "MIX":
# mixes aren't currently supported
return
lvm = entry["lockupViewModel"]
meta = safeTraverse(lvm, ["metadata"], default=[])
lmvm = safeTraverse(meta, ["lockupMetadataViewModel", "metadata", "contentMetadataViewModel", "metadataRows"], default=[])
thumbnail = ythdd_globals.translateLinks(safeTraverse(lvm, ["contentImage", "collectionThumbnailViewModel", "primaryThumbnail", "thumbnailViewModel", "image", "sources", -1, "url"], default="no-url?"))
verified = safeTraverse(context, ["verified"], default=False)
playlist_id = safeTraverse(lvm, ["contentId"], default="UNKNOWNPLAYLISTID")
length = safeTraverse(lvm, ["contentImage", "collectionThumbnailViewModel", "primaryThumbnail", "thumbnailViewModel", "overlays", 0, "thumbnailOverlayBadgeViewModel", "thumbnailBadges", 0, "thumbnailBadgeViewModel", "text"], default="0 videos")
length = parseViewsFromViewText(length.split(" ")[0])
# Turns out for some responses we do some data, while not on others.
# Data from context should be prioritized, thus even if something is found with safeTraverse,
# the parser will ignore it in favour of the context.
ucid = safeTraverse(lmvm, [0, "metadataParts", 0, "text", "commandRuns", 0, "onTap", "innertubeCommand", "browseEndpoint", "browseId"], default="UNKNOWNCHANNELID")
author = safeTraverse(lmvm, [0, "metadataParts", 0, "text", "content"], default="ythdd: unknown author")
ucid = safeTraverse(context, ["author_ucid"], default=ucid)
author = safeTraverse(context, ["author_name"], default=author)
return {
"type": "playlist",
"title": safeTraverse(meta, ["lockupMetadataViewModel", "title", "content"], default="ythdd: unknown title"),
"playlistId": playlist_id,
"playlistThumbnail": thumbnail,
"author": author,
"authorId": ucid,
"authorUrl": "/channel/" + ucid,
"authorVerified": verified,
"videoCount": length,
"videos": [] # provided for historical reasons i guess
}
case "shelfRenderer": # "people also watched"
return
case "gridShelfViewModel": # shorts?
return
case "shortsLockupViewModel": # shorts on channel pages
video_id = safeTraverse(entry, ["shortsLockupViewModel", "onTap", "innertubeCommand", "reelWatchEndpoint", "videoId"], default="UnknownVideoId")
title = safeTraverse(entry, ["shortsLockupViewModel", "overlayMetadata", "primaryText", "content"], default="ythdd: couldn't find title")
views_text = safeTraverse(entry, ["shortsLockupViewModel", "overlayMetadata", "secondaryText", "content"], default="No views")
published_date = "No data about published time" # the view model doesn't provide data about the date a short is published
if video_id == "UnknownVideoId": # failsafe
video_id = safeTraverse(entry, ["shortsLockupViewModel", "entityId"], default="-UnknownVideoId")
video_id = video_id[video_id.rfind("-") + 1:]
if "author_name" in context:
author_name = context["author_name"]
else:
author_name = "Unknown author"
if "author_ucid" in context:
author_ucid = context["author_ucid"]
else:
author_ucid = "UNKNOWNCHANNELID"
if "verified" in context:
verified = context["verified"]
else:
verified = False
if "avatar" in context:
avatar_url = context["avatar"]
else:
avatar_url = "unknown"
return {
"type": "video",
"title": title,
"videoId": video_id,
"author": author_name,
"authorId": author_ucid,
"authorUrl": "/channel/" + author_ucid,
"authorVerified": False,
"videoThumbnails": genThumbs(video_id),
"description": "",
"descriptionHtml": "",
"viewCount": parseViewsFromViewText(views_text),
"viewCountText": views_text,
"published": int(0),
"publishedText": published_date,
"lengthSeconds": int(60), # invidious locks this to 60s no matter what the actual duration is
"liveNow": False,
"premium": False,
"isUpcoming": False,
"premiereTimestamp": 0,
"isNew": False,
"is4k": False,
"is8k": False,
"isVr180": False,
"isVr360": False,
"is3d": False,
"hasCaptions": False
}
case "gridVideoRenderer": # videos on channel pages
# doesn't work on Yattee
# thumbnails = safeTraverse(entry, ["gridVideoRenderer", "thumbnail", "thumbnails"], default=[])
# for thumbnail in thumbnails:
# thumbnail["url"] = ythdd_globals.translateLinks(thumbnail["url"])
video_id = safeTraverse(entry, ["gridVideoRenderer", "videoId"], default="UnknownVideoId")
thumbnails = genThumbs(video_id)
published_date = safeTraverse(entry, ["gridVideoRenderer", "publishedTimeText", "simpleText"], default="now")
published_date = published_date.removeprefix("Streamed ")
return {
"type": "video",
"title": safeTraverse(entry, ["gridVideoRenderer", "title", "simpleText"], default="unknown video title"),
"videoId": video_id,
"author": context["author_name"],
"authorId": context["author_ucid"],
"authorUrl": "/channel/" + context["author_ucid"],
"authorVerified": False, # TODO: handle badge related tasks here using context
"videoThumbnails": thumbnails,
"description": "", # won't work without using an RSS feed (?)
"descriptionHtml": "", # -||-
"viewCount": parseViewsFromViewText(safeTraverse(entry, ["gridVideoRenderer", "viewCountText", "simpleText"], default="0 views")),
"viewCountText": safeTraverse(entry, ["gridVideoRenderer", "shortViewCountText", "simpleText"], default="0 views"),
"published": int(dateparser.parse(published_date).timestamp()),
"publishedText": published_date,
"lengthSeconds": parseLengthFromTimeBadge(safeTraverse(entry, ["gridVideoRenderer", "thumbnailOverlays", 0, "thumbnailOverlayTimeStatusRenderer", "text", "simpleText"], default="0:0")),
"liveNow": True if published_date == "now" else False,
"premium": False,
"isUpcoming": False,
"isNew": False,
"is4k": False,
"is8k": False,
"isVr180": False,
"isVr360": False,
"is3d": False,
"hasCaptions": False
}
case "channelRenderer": # channels in search results
avatars = ythdd_extractor.generateChannelAvatarsFromUrl(safeTraverse(entry, ["channelRenderer", "thumbnail", "thumbnails", 0, "url"], default="no-avatar"))
description, description_html = parseDescriptionSnippet(safeTraverse(entry, ["channelRenderer", "descriptionSnippet", "runs"], default=[]))
isVerified = ythdd_extractor.isVerified(safeTraverse(entry, ["channelRenderer", "ownerBadges", 0], default=[]))
return {
"type": "channel",
"author": safeTraverse(entry, ["channelRenderer", "title", "simpleText"], default="Unknown channel"),
"authorId": safeTraverse(entry, ["channelRenderer", "channelId"], default="UNKNOWNCHANNELID"),
"authorUrl": "/channel/" + safeTraverse(entry, ["channelRenderer", "channelId"], default="UNKNOWNCHANNELID"),
"authorVerified": isVerified,
"authorThumbnails": avatars,
"autoGenerated": False,
"subCount": parseViewsFromViewText(safeTraverse(entry, ["channelRenderer", "videoCountText", "simpleText"], default="0 subscribers")),
"videoCount": 0,
"channelHandle": safeTraverse(entry, ["channelRenderer", "navigationEndpoint", "browseEndpoint", "canonicalBaseUrl"], default="/@ythdd_unknown_handle")[1:],
"description": description,
"descriptionHtml": description_html
}
case _:
print("received an entry of unknown type:")
print(entry)
print("")
# breakpoint()
return
def parseDescriptionSnippet(snippet: list):
text = ""
text_html = ""
for entry in snippet:
text += entry["text"]
if "bold" in entry: # is checking entry["bold"] == True necessary?
text_html += "<b>" + entry["text"] + "</b>"
else:
text_html += entry["text"]
text_html = escape(text_html).replace("\r\n", "<br>").replace("\n", "<br>")
return text, text_html