update extractor headers, add support for checking badges and avatars
This commit is contained in:
@@ -61,7 +61,7 @@ stage1_body = {
|
|||||||
|
|
||||||
stage2_headers = {
|
stage2_headers = {
|
||||||
"Connection": "keep-alive",
|
"Connection": "keep-alive",
|
||||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36",
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:135.0) Gecko/20100101 Firefox/135.0",
|
||||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||||
"Accept-Language": "en-us,en;q=0.5",
|
"Accept-Language": "en-us,en;q=0.5",
|
||||||
"Sec-Fetch-Mode": "navigate",
|
"Sec-Fetch-Mode": "navigate",
|
||||||
@@ -70,13 +70,13 @@ stage2_headers = {
|
|||||||
|
|
||||||
stage3_headers = {
|
stage3_headers = {
|
||||||
"Connection": "keep-alive",
|
"Connection": "keep-alive",
|
||||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36",
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:135.0) Gecko/20100101 Firefox/135.0",
|
||||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||||
"Accept-Language": "en-us,en;q=0.5",
|
"Accept-Language": "en-us,en;q=0.5",
|
||||||
"Sec-Fetch-Mode": "navigate",
|
"Sec-Fetch-Mode": "navigate",
|
||||||
"Content-Type": "application/json",
|
"Content-Type": "application/json",
|
||||||
"X-Youtube-Client-Name": "1",
|
"X-Youtube-Client-Name": "1",
|
||||||
"X-Youtube-Client-Version": "2.20241126.01.00",
|
"X-Youtube-Client-Version": "2.20250226.01.00",
|
||||||
"Origin": "https://www.youtube.com",
|
"Origin": "https://www.youtube.com",
|
||||||
"Accept-Encoding": "gzip, deflate, br",
|
"Accept-Encoding": "gzip, deflate, br",
|
||||||
"Cookie": "PREF=hl=en&tz=UTC; SOCS=CAI"
|
"Cookie": "PREF=hl=en&tz=UTC; SOCS=CAI"
|
||||||
@@ -88,7 +88,7 @@ stage3_body = {
|
|||||||
"client":
|
"client":
|
||||||
{
|
{
|
||||||
"clientName": "WEB",
|
"clientName": "WEB",
|
||||||
"clientVersion": "2.20241126.01.00",
|
"clientVersion": "2.20250226.01.00",
|
||||||
"hl": "en",
|
"hl": "en",
|
||||||
"timeZone": "UTC",
|
"timeZone": "UTC",
|
||||||
"utcOffsetMinutes": 0
|
"utcOffsetMinutes": 0
|
||||||
@@ -99,6 +99,30 @@ stage3_body = {
|
|||||||
"racyCheckOk": True
|
"racyCheckOk": True
|
||||||
}
|
}
|
||||||
|
|
||||||
|
web_context_dict = {
|
||||||
|
'context': {
|
||||||
|
'client': {
|
||||||
|
'hl': 'en',
|
||||||
|
'gl': 'US',
|
||||||
|
'deviceMake': '',
|
||||||
|
'deviceModel': '',
|
||||||
|
'userAgent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:135.0) Gecko/20100101 Firefox/135.0,gzip(gfe)',
|
||||||
|
'clientName': 'WEB',
|
||||||
|
'clientVersion': '2.20250226.01.00',
|
||||||
|
'osName': 'Windows',
|
||||||
|
'osVersion': '10.0',
|
||||||
|
'screenPixelDensity': 2,
|
||||||
|
'platform': 'DESKTOP',
|
||||||
|
'screenDensityFloat': 2,
|
||||||
|
'userInterfaceTheme': 'USER_INTERFACE_THEME_LIGHT',
|
||||||
|
'browserName': 'Firefox',
|
||||||
|
'browserVersion': '135.0',
|
||||||
|
'acceptHeader': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
||||||
|
'utcOffsetMinutes': 0,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
def extract(url: str, getcomments=False, maxcomments=""):
|
def extract(url: str, getcomments=False, maxcomments=""):
|
||||||
# TODO: check user-agent and cookiefile
|
# TODO: check user-agent and cookiefile
|
||||||
|
|
||||||
@@ -180,4 +204,70 @@ def IOSextract(uri: str):
|
|||||||
end = time.time()
|
end = time.time()
|
||||||
|
|
||||||
#return {'stage1': stage1, 'stage2': stage2, 'stage3': stage3, 'took': end - start}
|
#return {'stage1': stage1, 'stage2': stage2, 'stage3': stage3, 'took': end - start}
|
||||||
return {'stage1': stage1, 'stage3': stage3, 'took': end - start}
|
return {'stage1': stage1, 'stage3': stage3, 'took': end - start}
|
||||||
|
|
||||||
|
def makeWebContext(secondaryContextDict: dict):
|
||||||
|
# Uses web_context_dict to create a context, returns a dict.
|
||||||
|
|
||||||
|
current_web_context_dict = web_context_dict
|
||||||
|
|
||||||
|
for key in secondaryContextDict:
|
||||||
|
current_web_context_dict[key] = secondaryContextDict[key]
|
||||||
|
|
||||||
|
return current_web_context_dict
|
||||||
|
|
||||||
|
def getChannelAvatar(response_json: dict):
|
||||||
|
# Returns a dictionary: {url: <proxied url to remote server>, width: ..., height: ...}
|
||||||
|
# containing the best resolution in terms of pixel count.
|
||||||
|
# A great majority of the code has been influenced by https://github.com/iv-org/invidious/blob/master/src/invidious/channels/about.cr.
|
||||||
|
|
||||||
|
avatars = safeTraverse(response_json, ['metadata', 'channelMetadataRenderer', 'avatar', 'thumbnails'], default=None)
|
||||||
|
|
||||||
|
if avatars is None:
|
||||||
|
# fallback to lower resolution avatars
|
||||||
|
avatars = safeTraverse(response_json, ['header',
|
||||||
|
'pageHeaderRenderer',
|
||||||
|
'content',
|
||||||
|
'pageHeaderViewModel',
|
||||||
|
'image',
|
||||||
|
'decoratedAvatarViewModel',
|
||||||
|
'avatar',
|
||||||
|
'avatarViewModel',
|
||||||
|
'image',
|
||||||
|
'sources'], default=None)
|
||||||
|
|
||||||
|
# if avatars is None: # TODO: if avatars is still None, use a local avatar
|
||||||
|
|
||||||
|
best_avatar = avatars[-1] # usually, the best avatar is stored last
|
||||||
|
for avatar in avatars:
|
||||||
|
if avatar['width'] * avatar['height'] > best_avatar['width'] * best_avatar['height']:
|
||||||
|
best_avatar = avatar
|
||||||
|
|
||||||
|
best_avatar['url'] = ythdd_globals.translateLinks(best_avatar['url'])
|
||||||
|
|
||||||
|
return best_avatar
|
||||||
|
|
||||||
|
def isVerified(response_json: dict):
|
||||||
|
# Returns True if any user badge has been found (verified/artist).
|
||||||
|
badges = safeTraverse(response_json, [], default=False)
|
||||||
|
|
||||||
|
if badges: return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
def browseAbout(ucid: str):
|
||||||
|
# Returns the response from innertubes browse endpoint for channels (as a dict).
|
||||||
|
|
||||||
|
if len(ucid) != 24:
|
||||||
|
raise ValueError(f"Something is wrong with the UCID {ucid}. Expected a 24-character long channel ID, not {len(ucid)}.")
|
||||||
|
|
||||||
|
context = makeWebContext({'browseId': ucid})
|
||||||
|
|
||||||
|
response = requests.post(
|
||||||
|
'https://www.youtube.com/youtubei/v1/browse?prettyPrint=false',
|
||||||
|
headers = ythdd_globals.getHeaders(),
|
||||||
|
json = context,
|
||||||
|
)
|
||||||
|
|
||||||
|
response_json = json.loads(response.text)
|
||||||
|
|
||||||
|
return response_json
|
||||||
Reference in New Issue
Block a user