2020-09-10 05:54:32 +05:30
|
|
|
from bs4 import BeautifulSoup as bs
|
|
|
|
from urllib.parse import unquote
|
|
|
|
from youtube_dl import YoutubeDL
|
|
|
|
import urllib.parse
|
|
|
|
import requests
|
|
|
|
import json
|
|
|
|
|
2020-09-20 15:29:26 +05:30
|
|
|
# from https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/youtube.py
|
|
|
|
_formats = {
|
|
|
|
'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'audio_bitrate': 64, 'vcodec': 'h263'},
|
|
|
|
'6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'audio_bitrate': 64, 'vcodec': 'h263'},
|
|
|
|
'13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
|
|
|
|
'17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'audio_bitrate': 24, 'vcodec': 'mp4v'},
|
|
|
|
'18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'audio_bitrate': 96, 'vcodec': 'h264'},
|
|
|
|
'22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'audio_bitrate': 192, 'vcodec': 'h264'},
|
|
|
|
'34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'audio_bitrate': 128, 'vcodec': 'h264'},
|
|
|
|
'35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'audio_bitrate': 128, 'vcodec': 'h264'},
|
|
|
|
# itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), audio_bitrate varies as well
|
|
|
|
'36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
|
|
|
|
'37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'audio_bitrate': 192, 'vcodec': 'h264'},
|
|
|
|
'38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'audio_bitrate': 192, 'vcodec': 'h264'},
|
|
|
|
'43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'audio_bitrate': 128, 'vcodec': 'vp8'},
|
|
|
|
'44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'audio_bitrate': 128, 'vcodec': 'vp8'},
|
|
|
|
'45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'audio_bitrate': 192, 'vcodec': 'vp8'},
|
|
|
|
'46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'audio_bitrate': 192, 'vcodec': 'vp8'},
|
|
|
|
'59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'audio_bitrate': 128, 'vcodec': 'h264'},
|
|
|
|
'78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'audio_bitrate': 128, 'vcodec': 'h264'},
|
|
|
|
|
|
|
|
|
|
|
|
# 3D videos
|
|
|
|
'82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'audio_bitrate': 128, 'vcodec': 'h264'},
|
|
|
|
'83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'audio_bitrate': 128, 'vcodec': 'h264'},
|
|
|
|
'84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'audio_bitrate': 192, 'vcodec': 'h264'},
|
|
|
|
'85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'audio_bitrate': 192, 'vcodec': 'h264'},
|
|
|
|
'100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'audio_bitrate': 128, 'vcodec': 'vp8'},
|
|
|
|
'101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'audio_bitrate': 192, 'vcodec': 'vp8'},
|
|
|
|
'102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'audio_bitrate': 192, 'vcodec': 'vp8'},
|
|
|
|
|
|
|
|
# Apple HTTP Live Streaming
|
|
|
|
'91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'audio_bitrate': 48, 'vcodec': 'h264'},
|
|
|
|
'92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'audio_bitrate': 48, 'vcodec': 'h264'},
|
|
|
|
'93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'audio_bitrate': 128, 'vcodec': 'h264'},
|
|
|
|
'94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'audio_bitrate': 128, 'vcodec': 'h264'},
|
|
|
|
'95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'audio_bitrate': 256, 'vcodec': 'h264'},
|
|
|
|
'96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'audio_bitrate': 256, 'vcodec': 'h264'},
|
|
|
|
'132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'audio_bitrate': 48, 'vcodec': 'h264'},
|
|
|
|
'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'audio_bitrate': 24, 'vcodec': 'h264'},
|
|
|
|
|
|
|
|
# DASH mp4 video
|
|
|
|
'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
|
|
|
|
'134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
|
|
|
|
'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
|
|
|
|
'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
|
|
|
|
'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
|
|
|
|
'138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
|
|
|
|
'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
|
|
|
|
'212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
|
|
|
|
'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
|
|
|
|
'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
|
|
|
|
'299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
|
|
|
|
'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
|
|
|
|
|
|
|
|
# Dash mp4 audio
|
|
|
|
'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'audio_bitrate': 48, 'container': 'm4a_dash'},
|
|
|
|
'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'audio_bitrate': 128, 'container': 'm4a_dash'},
|
|
|
|
'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'audio_bitrate': 256, 'container': 'm4a_dash'},
|
|
|
|
'256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
|
|
|
|
'258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
|
|
|
|
'325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
|
|
|
|
'328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
|
|
|
|
|
|
|
|
# Dash webm
|
|
|
|
'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
|
|
|
|
'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
|
|
|
|
'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
|
|
|
|
'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
|
|
|
|
'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
|
|
|
|
'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
|
|
|
|
'278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
|
|
|
|
'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
|
|
|
|
'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
|
|
|
|
'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
|
|
|
|
'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
|
|
|
|
'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
|
|
|
|
'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
|
|
|
|
'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
|
|
|
|
'271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
|
|
|
|
# itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
|
|
|
|
'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
|
|
|
|
'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
|
|
|
|
'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
|
|
|
|
'308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
|
|
|
|
'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
|
|
|
|
'315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
|
|
|
|
|
|
|
|
# Dash webm audio
|
|
|
|
'171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'audio_bitrate': 128},
|
|
|
|
'172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'audio_bitrate': 256},
|
|
|
|
|
|
|
|
# Dash webm audio with opus inside
|
|
|
|
'249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'audio_bitrate': 50},
|
|
|
|
'250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'audio_bitrate': 70},
|
|
|
|
'251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'audio_bitrate': 160},
|
|
|
|
|
|
|
|
# RTMP (unnamed)
|
|
|
|
'_rtmp': {'protocol': 'rtmp'},
|
|
|
|
|
|
|
|
# av01 video only formats sometimes served with "unknown" codecs
|
|
|
|
'394': {'vcodec': 'av01.0.05M.08'},
|
|
|
|
'395': {'vcodec': 'av01.0.05M.08'},
|
|
|
|
'396': {'vcodec': 'av01.0.05M.08'},
|
|
|
|
'397': {'vcodec': 'av01.0.05M.08'},
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2020-09-10 05:54:32 +05:30
|
|
|
def get_renderer_key(renderer, key):
|
|
|
|
for k in renderer:
|
|
|
|
if key in k:
|
|
|
|
return k[key]
|
|
|
|
|
|
|
|
def get_video_primary_info(datad, datai):
|
|
|
|
contents = datai["contents"]["twoColumnWatchNextResults"]['results']['results']['contents']
|
|
|
|
item = get_renderer_key(contents, "videoPrimaryInfoRenderer")
|
2020-10-05 20:28:01 +05:30
|
|
|
details = datad['videoDetails']
|
2020-10-05 18:42:02 +05:30
|
|
|
|
|
|
|
# Check if is Livestream
|
|
|
|
if details.get('isLive') and details['lengthSeconds'] == '0':
|
|
|
|
isLive = True
|
|
|
|
else:
|
|
|
|
isLive = False
|
|
|
|
|
|
|
|
# Check if is a Scheduled video
|
|
|
|
if details.get('isUpcoming'):
|
|
|
|
isUpcoming = True
|
|
|
|
views = "Scheduled video"
|
|
|
|
premieres = item['dateText']['simpleText']
|
|
|
|
audioURL = False
|
|
|
|
else:
|
|
|
|
isUpcoming = False
|
|
|
|
premieres = False
|
|
|
|
views = details['viewCount']
|
2020-10-05 20:28:01 +05:30
|
|
|
|
|
|
|
ydl = YoutubeDL()
|
|
|
|
|
|
|
|
if not isUpcoming:
|
|
|
|
data = ydl.extract_info(details['videoId'], False)
|
|
|
|
while not data['formats']:
|
|
|
|
data = ydl.extract_info(details['videoId'], False)
|
|
|
|
formats = data['formats']
|
|
|
|
|
|
|
|
## Get audio
|
|
|
|
audio_urls = []
|
|
|
|
for f in data['formats']:
|
|
|
|
for fid in _formats:
|
|
|
|
if f['format_id'] == fid:
|
|
|
|
try:
|
|
|
|
if 'audio' in _formats[fid]['format_note']:
|
|
|
|
aurl = f['url']
|
|
|
|
fnote = _formats[fid]['format_note']
|
|
|
|
bitrate = _formats[fid]['audio_bitrate']
|
|
|
|
audio_inf = {
|
|
|
|
"url":aurl,
|
|
|
|
"id":fnote,
|
|
|
|
"btr": bitrate
|
|
|
|
}
|
|
|
|
audio_urls.append(audio_inf)
|
|
|
|
except:
|
2020-10-05 20:30:20 +05:30
|
|
|
continue
|
|
|
|
if not isLive:
|
|
|
|
audioURL = audio_urls[-1]
|
|
|
|
else:
|
|
|
|
audioURL = "#"
|
2020-10-05 20:28:01 +05:30
|
|
|
else: # If it is a scheduled video
|
|
|
|
audio_urls = False
|
|
|
|
formats = False
|
2020-10-05 18:42:02 +05:30
|
|
|
try:
|
2020-09-10 05:54:32 +05:30
|
|
|
primaryInfo = {
|
|
|
|
"id": details['videoId'],
|
|
|
|
"title": details['title'],
|
|
|
|
"description": details['shortDescription'],
|
2020-09-10 19:54:53 +05:30
|
|
|
"views": views,
|
2020-09-10 05:54:32 +05:30
|
|
|
"duration": details['lengthSeconds'],
|
|
|
|
"date": item['dateText']['simpleText'],
|
|
|
|
"rating": details['averageRating'],
|
|
|
|
"author": details['author'],
|
|
|
|
"isPrivate": details['isPrivate'],
|
2020-10-05 18:42:02 +05:30
|
|
|
"isLive": isLive,
|
2020-09-10 05:54:32 +05:30
|
|
|
"isUpcoming": isUpcoming,
|
|
|
|
"url":url,
|
2020-10-05 18:42:02 +05:30
|
|
|
"allowRatings": details['allowRatings'],
|
2020-10-05 20:28:01 +05:30
|
|
|
"urls":formats,
|
2020-09-20 15:29:26 +05:30
|
|
|
"thumbnail": details['thumbnail']['thumbnails'][0]['url'],
|
2020-10-05 17:18:54 +05:30
|
|
|
"audio": audioURL,
|
|
|
|
"premieres": premieres
|
2020-09-10 05:54:32 +05:30
|
|
|
}
|
|
|
|
except:
|
|
|
|
# If error take only most common items
|
|
|
|
primaryInfo = {
|
|
|
|
"id": details['videoId'],
|
|
|
|
"title": details['title'],
|
|
|
|
"description": details['shortDescription'],
|
|
|
|
"views": details['viewCount'],
|
|
|
|
"duration": details['lengthSeconds'],
|
|
|
|
"date": item['dateText']['simpleText'],
|
|
|
|
"rating": details['averageRating'],
|
|
|
|
"author": details['author'],
|
|
|
|
"isPrivate":False,
|
2020-10-05 18:42:02 +05:30
|
|
|
"isLive":isLive,
|
2020-09-10 19:54:53 +05:30
|
|
|
"isUpcoming":isUpcoming,
|
2020-09-10 05:54:32 +05:30
|
|
|
"allowRatings":True,
|
2020-10-05 20:28:01 +05:30
|
|
|
"urls":formats,
|
2020-09-20 15:29:26 +05:30
|
|
|
"thumbnail": details['thumbnail']['thumbnails'][0]['url'],
|
2020-10-05 17:18:54 +05:30
|
|
|
"audio": audioURL,
|
|
|
|
"premieres": premieres
|
2020-09-10 05:54:32 +05:30
|
|
|
}
|
|
|
|
return primaryInfo
|
|
|
|
|
|
|
|
def get_video_owner_info(data):
|
|
|
|
contents = data["contents"]["twoColumnWatchNextResults"]['results']['results']['contents']
|
|
|
|
item = get_renderer_key(contents, "videoSecondaryInfoRenderer")
|
|
|
|
ownerItem = item['owner']['videoOwnerRenderer']
|
|
|
|
|
2020-10-05 16:53:52 +05:30
|
|
|
try:
|
|
|
|
sC = ownerItem['subscriberCountText']['runs'][0]['text']
|
|
|
|
except:
|
|
|
|
sC = "Unknown"
|
2020-09-10 05:54:32 +05:30
|
|
|
ownerInfo = {
|
|
|
|
"thumbnail": ownerItem['thumbnail']['thumbnails'][0]['url'],
|
|
|
|
"username": ownerItem['title']['runs'][0]['text'],
|
2020-09-10 06:02:40 +05:30
|
|
|
"id": ownerItem['title']['runs'][0]['navigationEndpoint']['browseEndpoint']['browseId'],
|
2020-10-05 16:53:52 +05:30
|
|
|
"suscriberCount":sC
|
2020-09-10 05:54:32 +05:30
|
|
|
}
|
|
|
|
return ownerInfo
|
|
|
|
|
|
|
|
def get_video_info(id):
|
|
|
|
headers = {"Accept-Language": "en-US,en;q=0.5"}
|
|
|
|
encoded_search = urllib.parse.quote(id)
|
|
|
|
BASE_URL = "https://youtube.com"
|
|
|
|
|
|
|
|
url = f"{BASE_URL}/watch?v={encoded_search}"
|
|
|
|
response = requests.get(url, headers=headers).text
|
|
|
|
|
|
|
|
while 'window["ytInitialData"]' and 'window["ytInitialData"]' not in response:
|
|
|
|
response = requests.get(url, headers=headers).text
|
|
|
|
|
|
|
|
start = (
|
|
|
|
response.index('window["ytInitialData"]')
|
|
|
|
+ len('window["ytInitialData"]')
|
|
|
|
+ 3
|
|
|
|
)
|
|
|
|
|
|
|
|
start2 = (
|
|
|
|
response.index('window["ytInitialPlayerResponse"]')
|
|
|
|
+ len('window["ytInitialPlayerResponse"]') + 3
|
|
|
|
)
|
|
|
|
|
|
|
|
end1 = response.index("};", start) + 1
|
|
|
|
end2 = response.index("};", start2) + 1
|
|
|
|
jsonIni = response[start:end1]
|
|
|
|
dataInitial = json.loads(jsonIni)
|
|
|
|
|
|
|
|
jsonDet = response[start2:end2]
|
|
|
|
dataDetails = json.loads(jsonDet)
|
|
|
|
|
|
|
|
#title, views, date
|
|
|
|
videoInfo = get_video_primary_info(dataDetails, dataInitial)
|
|
|
|
ownerInfo = get_video_owner_info(dataInitial)
|
|
|
|
|
|
|
|
'''soup = bs(response, "html.parser")
|
|
|
|
soup = str(str(soup.find("div", attrs={"id":"player-wrap"}).find_all("script")).split("ytplayer.config =")[1]).split("url")
|
|
|
|
for url in soup:
|
|
|
|
if "googlevideo" in url:
|
|
|
|
print(unquote(url.replace("\\", "")))'''
|
|
|
|
info = {"video":videoInfo, "owner":ownerInfo}
|
|
|
|
return info
|