Solve merge conflict
This commit is contained in:
parent
8efae82302
commit
c66afd6485
@ -454,8 +454,27 @@ def get_live_urls(urls):
|
|||||||
def watch():
|
def watch():
|
||||||
id = request.args.get('v', None)
|
id = request.args.get('v', None)
|
||||||
info = ytwatch.extract_info(id, False, playlist_id=None, index=None)
|
info = ytwatch.extract_info(id, False, playlist_id=None, index=None)
|
||||||
|
<<<<<<< Updated upstream
|
||||||
|
<<<<<<< Updated upstream
|
||||||
# Use nginx
|
# Use nginx
|
||||||
best_formats = ["22", "18", "34", "35", "36", "37", "38", "43", "44", "45", "46"]
|
best_formats = ["22", "18", "34", "35", "36", "37", "38", "43", "44", "45", "46"]
|
||||||
|
=======
|
||||||
|
=======
|
||||||
|
>>>>>>> Stashed changes
|
||||||
|
vsources = ytwatch.get_video_sources(info, False)
|
||||||
|
|
||||||
|
# Retry 3 times if no sources are available.
|
||||||
|
retry = 3
|
||||||
|
while retry != 0 and len(vsources) == 0:
|
||||||
|
vsources = ytwatch.get_video_sources(info, False)
|
||||||
|
retry -= 1
|
||||||
|
|
||||||
|
for source in vsources:
|
||||||
|
hostName = urllib.parse.urlparse(source['src']).netloc
|
||||||
|
source['src'] = source['src'].replace("https://{}".format(hostName), "") + "&host=" + hostName
|
||||||
|
|
||||||
|
# Parse video formats
|
||||||
|
>>>>>>> Stashed changes
|
||||||
for v_format in info['formats']:
|
for v_format in info['formats']:
|
||||||
hostName = urllib.parse.urlparse(v_format['url']).netloc
|
hostName = urllib.parse.urlparse(v_format['url']).netloc
|
||||||
v_format['url'] = v_format['url'].replace("https://{}".format(hostName), "") + "&host=" + hostName
|
v_format['url'] = v_format['url'].replace("https://{}".format(hostName), "") + "&host=" + hostName
|
||||||
|
@ -38,6 +38,14 @@ packaging==20.4
|
|||||||
pylint==2.6.0
|
pylint==2.6.0
|
||||||
PyMySQL==0.10.1
|
PyMySQL==0.10.1
|
||||||
pyparsing==2.4.7
|
pyparsing==2.4.7
|
||||||
|
<<<<<<< Updated upstream
|
||||||
|
=======
|
||||||
|
PySocks==1.7.1
|
||||||
|
python-anticaptcha==0.7.1
|
||||||
|
<<<<<<< Updated upstream
|
||||||
|
>>>>>>> Stashed changes
|
||||||
|
=======
|
||||||
|
>>>>>>> Stashed changes
|
||||||
python-dateutil==2.8.1
|
python-dateutil==2.8.1
|
||||||
python-dotenv==0.14.0
|
python-dotenv==0.14.0
|
||||||
python-editor==1.0.4
|
python-editor==1.0.4
|
||||||
|
@ -11,5 +11,6 @@
|
|||||||
"admin_message":"Message from the admin text",
|
"admin_message":"Message from the admin text",
|
||||||
"admin_user":"admin_username",
|
"admin_user":"admin_username",
|
||||||
"max_old_user_days": 60,
|
"max_old_user_days": 60,
|
||||||
"donate_url": ""
|
"donate_url": "",
|
||||||
|
"anticaptcha": "cf4bb53a6b87f973be8c0c976c390342"
|
||||||
}
|
}
|
||||||
|
155
youtube/util.py
155
youtube/util.py
@ -1,9 +1,13 @@
|
|||||||
import gzip
|
import gzip
|
||||||
|
|
||||||
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
from youtube import yt_data_extract
|
from youtube import yt_data_extract
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import brotli
|
import brotli
|
||||||
|
|
||||||
have_brotli = True
|
have_brotli = True
|
||||||
except ImportError:
|
except ImportError:
|
||||||
have_brotli = False
|
have_brotli = False
|
||||||
@ -15,7 +19,7 @@ import json
|
|||||||
import gevent
|
import gevent
|
||||||
import gevent.queue
|
import gevent.queue
|
||||||
import gevent.lock
|
import gevent.lock
|
||||||
|
from python_anticaptcha import AnticaptchaClient, NoCaptchaTaskProxylessTask
|
||||||
# The trouble with the requests library: It ships its own certificate bundle via certifi
|
# The trouble with the requests library: It ships its own certificate bundle via certifi
|
||||||
# instead of using the system certificate store, meaning self-signed certificates
|
# instead of using the system certificate store, meaning self-signed certificates
|
||||||
# configured by the user will not work. Some draconian networks block TLS unless a corporate
|
# configured by the user will not work. Some draconian networks block TLS unless a corporate
|
||||||
@ -51,13 +55,12 @@ import urllib3.contrib.socks
|
|||||||
|
|
||||||
URL_ORIGIN = "/https://www.youtube.com"
|
URL_ORIGIN = "/https://www.youtube.com"
|
||||||
|
|
||||||
connection_pool = urllib3.PoolManager(cert_reqs = 'CERT_REQUIRED')
|
connection_pool = urllib3.PoolManager(cert_reqs='CERT_REQUIRED')
|
||||||
|
|
||||||
def get_pool(use_tor):
|
|
||||||
return connection_pool
|
|
||||||
|
|
||||||
class HTTPAsymmetricCookieProcessor(urllib.request.BaseHandler):
|
class HTTPAsymmetricCookieProcessor(urllib.request.BaseHandler):
|
||||||
'''Separate cookiejars for receiving and sending'''
|
'''Separate cookiejars for receiving and sending'''
|
||||||
|
|
||||||
def __init__(self, cookiejar_send=None, cookiejar_receive=None):
|
def __init__(self, cookiejar_send=None, cookiejar_receive=None):
|
||||||
self.cookiejar_send = cookiejar_send
|
self.cookiejar_send = cookiejar_send
|
||||||
self.cookiejar_receive = cookiejar_receive
|
self.cookiejar_receive = cookiejar_receive
|
||||||
@ -75,6 +78,7 @@ class HTTPAsymmetricCookieProcessor(urllib.request.BaseHandler):
|
|||||||
https_request = http_request
|
https_request = http_request
|
||||||
https_response = http_response
|
https_response = http_response
|
||||||
|
|
||||||
|
|
||||||
class FetchError(Exception):
|
class FetchError(Exception):
|
||||||
def __init__(self, code, reason='', ip=None):
|
def __init__(self, code, reason='', ip=None):
|
||||||
Exception.__init__(self, 'HTTP error during request: ' + code + ' ' + reason)
|
Exception.__init__(self, 'HTTP error during request: ' + code + ' ' + reason)
|
||||||
@ -82,6 +86,7 @@ class FetchError(Exception):
|
|||||||
self.reason = reason
|
self.reason = reason
|
||||||
self.ip = ip
|
self.ip = ip
|
||||||
|
|
||||||
|
|
||||||
def decode_content(content, encoding_header):
|
def decode_content(content, encoding_header):
|
||||||
encodings = encoding_header.replace(' ', '').split(',')
|
encodings = encoding_header.replace(' ', '').split(',')
|
||||||
for encoding in reversed(encodings):
|
for encoding in reversed(encodings):
|
||||||
@ -93,6 +98,57 @@ def decode_content(content, encoding_header):
|
|||||||
content = gzip.decompress(content)
|
content = gzip.decompress(content)
|
||||||
return content
|
return content
|
||||||
|
|
||||||
|
|
||||||
|
def bypass_captcha():
|
||||||
|
session = requests.Session()
|
||||||
|
url = "https://youtube.com/watch?v=CvFH_6DNRCY&gl=US&hl=en&has_verified=1&bpctr=9999999999"
|
||||||
|
print("Starting python GET request...")
|
||||||
|
response = session.get(url)
|
||||||
|
print("GET successful!")
|
||||||
|
print("vvv COOKIES DICT vvv")
|
||||||
|
cookies = session.cookies.get_dict()
|
||||||
|
print(cookies)
|
||||||
|
|
||||||
|
inputs = {}
|
||||||
|
html = BeautifulSoup(str(response.text), "lxml")
|
||||||
|
|
||||||
|
# If there's a captcha and we need to solve it...
|
||||||
|
if html.body.find('div', attrs={'class': 'g-recaptcha'}):
|
||||||
|
# Get the captcha form
|
||||||
|
form = html.body.find('form', attrs={"action": "/das_captcha"})
|
||||||
|
|
||||||
|
# Set up form inputs for request
|
||||||
|
for _input in form.find_all('input'):
|
||||||
|
try:
|
||||||
|
print(_input["name"] + " -> " + _input["value"])
|
||||||
|
inputs[_input["name"]] = _input["value"]
|
||||||
|
except KeyError:
|
||||||
|
continue
|
||||||
|
print("\n vvv Form inputs created vvv ")
|
||||||
|
print(inputs)
|
||||||
|
|
||||||
|
# Get CAPTCHA keys
|
||||||
|
site_key = html.body.find('div', attrs={'class': 'g-recaptcha'})['data-sitekey']
|
||||||
|
s_value = html.body.find('input', attrs={'name': 'session_token'})['value']
|
||||||
|
|
||||||
|
# Get anti-captcha API key
|
||||||
|
config = json.load(open('yotter-config.json'))
|
||||||
|
client = AnticaptchaClient(config['anticaptcha'])
|
||||||
|
# Create anti-captcha Task
|
||||||
|
task = NoCaptchaTaskProxylessTask(url, site_key)
|
||||||
|
job = client.createTask(task)
|
||||||
|
job.join()
|
||||||
|
|
||||||
|
inputs['g-recaptcha-response'] = job.get_solution_response()
|
||||||
|
|
||||||
|
# Print POST request headers
|
||||||
|
print(requests.post("https://youtube.com/das_captcha", data=inputs,
|
||||||
|
headers={"Content-Type": "application/x-www-form-urlencoded",
|
||||||
|
"Accept-Language": "en-US,en;q=0.5",
|
||||||
|
"Referer": "https://www.youtube.com/das_captcha",
|
||||||
|
"Origin": "https://www.youtube.com"}).headers)
|
||||||
|
|
||||||
|
|
||||||
def fetch_url_response(url, headers=(), timeout=15, data=None,
|
def fetch_url_response(url, headers=(), timeout=15, data=None,
|
||||||
cookiejar_send=None, cookiejar_receive=None,
|
cookiejar_send=None, cookiejar_receive=None,
|
||||||
use_tor=True, max_redirects=None):
|
use_tor=True, max_redirects=None):
|
||||||
@ -105,7 +161,7 @@ def fetch_url_response(url, headers=(), timeout=15, data=None,
|
|||||||
When both are set to the same object, cookies will be sent from the object,
|
When both are set to the same object, cookies will be sent from the object,
|
||||||
and response cookies will be merged into it.
|
and response cookies will be merged into it.
|
||||||
'''
|
'''
|
||||||
headers = dict(headers) # Note: Calling dict() on a dict will make a copy
|
headers = dict(headers) # Note: Calling dict() on a dict will make a copy
|
||||||
if have_brotli:
|
if have_brotli:
|
||||||
headers['Accept-Encoding'] = 'gzip, br'
|
headers['Accept-Encoding'] = 'gzip, br'
|
||||||
else:
|
else:
|
||||||
@ -124,32 +180,46 @@ def fetch_url_response(url, headers=(), timeout=15, data=None,
|
|||||||
elif not isinstance(data, bytes):
|
elif not isinstance(data, bytes):
|
||||||
data = urllib.parse.urlencode(data).encode('ascii')
|
data = urllib.parse.urlencode(data).encode('ascii')
|
||||||
|
|
||||||
if cookiejar_send is not None or cookiejar_receive is not None: # Use urllib
|
if cookiejar_send is not None or cookiejar_receive is not None: # Use urllib
|
||||||
req = urllib.request.Request(url, data=data, headers=headers)
|
req = urllib.request.Request(url, data=data, headers=headers)
|
||||||
|
|
||||||
cookie_processor = HTTPAsymmetricCookieProcessor(cookiejar_send=cookiejar_send, cookiejar_receive=cookiejar_receive)
|
cookie_processor = HTTPAsymmetricCookieProcessor(cookiejar_send=cookiejar_send,
|
||||||
|
cookiejar_receive=cookiejar_receive)
|
||||||
opener = urllib.request.build_opener(cookie_processor)
|
opener = urllib.request.build_opener(cookie_processor)
|
||||||
|
|
||||||
response = opener.open(req, timeout=timeout)
|
response = opener.open(req, timeout=timeout)
|
||||||
cleanup_func = (lambda r: None)
|
cleanup_func = (lambda r: None)
|
||||||
|
|
||||||
else: # Use a urllib3 pool. Cookies can't be used since urllib3 doesn't have easy support for them.
|
else: # Use a urllib3 pool. Cookies can't be used since urllib3 doesn't have easy support for them.
|
||||||
# default: Retry.DEFAULT = Retry(3)
|
# default: Retry.DEFAULT = Retry(3)
|
||||||
# (in connectionpool.py in urllib3)
|
# (in connectionpool.py in urllib3)
|
||||||
# According to the documentation for urlopen, a redirect counts as a
|
# According to the documentation for urlopen, a redirect counts as a
|
||||||
# retry. So there are 3 redirects max by default.
|
# retry. So there are 3 redirects max by default.
|
||||||
|
print("Testing for CAPTCHA python GET request...")
|
||||||
|
r = requests.get(url)
|
||||||
|
print("GET successful!")
|
||||||
|
|
||||||
|
html = BeautifulSoup(str(r.text), "lxml")
|
||||||
|
# If there's a captcha and we need to solve it...
|
||||||
|
if html.body.find('div', attrs={'class': 'g-recaptcha'}):
|
||||||
|
print("ReCaptcha detected! Trying to bypass it.")
|
||||||
|
bypass_captcha()
|
||||||
|
|
||||||
if max_redirects:
|
if max_redirects:
|
||||||
retries = urllib3.Retry(3+max_redirects, redirect=max_redirects)
|
retries = urllib3.Retry(3 + max_redirects, redirect=max_redirects)
|
||||||
else:
|
else:
|
||||||
retries = urllib3.Retry(3)
|
retries = urllib3.Retry(3)
|
||||||
pool = get_pool(use_tor)
|
|
||||||
|
pool = connection_pool
|
||||||
response = pool.request(method, url, headers=headers,
|
response = pool.request(method, url, headers=headers,
|
||||||
timeout=timeout, preload_content=False,
|
timeout=timeout, preload_content=False,
|
||||||
decode_content=False, retries=retries)
|
decode_content=False, retries=retries)
|
||||||
|
|
||||||
cleanup_func = (lambda r: r.release_conn())
|
cleanup_func = (lambda r: r.release_conn())
|
||||||
|
|
||||||
return response, cleanup_func
|
return response, cleanup_func
|
||||||
|
|
||||||
|
|
||||||
def fetch_url(url, headers=(), timeout=15, report_text=None, data=None,
|
def fetch_url(url, headers=(), timeout=15, report_text=None, data=None,
|
||||||
cookiejar_send=None, cookiejar_receive=None, use_tor=True,
|
cookiejar_send=None, cookiejar_receive=None, use_tor=True,
|
||||||
debug_name=None):
|
debug_name=None):
|
||||||
@ -159,18 +229,18 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None,
|
|||||||
url, headers, timeout=timeout,
|
url, headers, timeout=timeout,
|
||||||
cookiejar_send=cookiejar_send, cookiejar_receive=cookiejar_receive,
|
cookiejar_send=cookiejar_send, cookiejar_receive=cookiejar_receive,
|
||||||
use_tor=use_tor)
|
use_tor=use_tor)
|
||||||
|
print(response)
|
||||||
response_time = time.time()
|
response_time = time.time()
|
||||||
|
|
||||||
content = response.read()
|
content = response.read()
|
||||||
read_finish = time.time()
|
read_finish = time.time()
|
||||||
|
|
||||||
cleanup_func(response) # release_connection for urllib3
|
cleanup_func(response) # release_connection for urllib3
|
||||||
|
|
||||||
if (response.status == 429
|
if (response.status == 429
|
||||||
and content.startswith(b'<!DOCTYPE')
|
and content.startswith(b'<!DOCTYPE')
|
||||||
and b'Our systems have detected unusual traffic' in content):
|
and b'Our systems have detected unusual traffic' in content):
|
||||||
ip = re.search(br'IP address: ((?:[\da-f]*:)+[\da-f]+|(?:\d+\.)+\d+)',
|
ip = re.search(br'IP address: ((?:[\da-f]*:)+[\da-f]+|(?:\d+\.)+\d+)',
|
||||||
content)
|
content)
|
||||||
ip = ip.group(1).decode('ascii') if ip else None
|
ip = ip.group(1).decode('ascii') if ip else None
|
||||||
raise FetchError('429', reason=response.reason, ip=ip)
|
raise FetchError('429', reason=response.reason, ip=ip)
|
||||||
|
|
||||||
@ -178,12 +248,14 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None,
|
|||||||
raise FetchError(str(response.status), reason=response.reason, ip=None)
|
raise FetchError(str(response.status), reason=response.reason, ip=None)
|
||||||
|
|
||||||
if report_text:
|
if report_text:
|
||||||
print(report_text, ' Latency:', round(response_time - start_time,3), ' Read time:', round(read_finish - response_time,3))
|
print(report_text, ' Latency:', round(response_time - start_time, 3), ' Read time:',
|
||||||
|
round(read_finish - response_time, 3))
|
||||||
content = decode_content(content, response.getheader('Content-Encoding', default='identity'))
|
content = decode_content(content, response.getheader('Content-Encoding', default='identity'))
|
||||||
return content
|
return content
|
||||||
|
|
||||||
|
|
||||||
def head(url, use_tor=False, report_text=None, max_redirects=10):
|
def head(url, use_tor=False, report_text=None, max_redirects=10):
|
||||||
pool = get_pool(use_tor)
|
pool = connection_pool
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
|
|
||||||
# default: Retry.DEFAULT = Retry(3)
|
# default: Retry.DEFAULT = Retry(3)
|
||||||
@ -191,24 +263,21 @@ def head(url, use_tor=False, report_text=None, max_redirects=10):
|
|||||||
# According to the documentation for urlopen, a redirect counts as a retry
|
# According to the documentation for urlopen, a redirect counts as a retry
|
||||||
# So there are 3 redirects max by default. Let's change that
|
# So there are 3 redirects max by default. Let's change that
|
||||||
# to 10 since googlevideo redirects a lot.
|
# to 10 since googlevideo redirects a lot.
|
||||||
retries = urllib3.Retry(3+max_redirects, redirect=max_redirects,
|
retries = urllib3.Retry(3 + max_redirects, redirect=max_redirects,
|
||||||
raise_on_redirect=False)
|
raise_on_redirect=False)
|
||||||
headers = {'User-Agent': 'Python-urllib'}
|
headers = {'User-Agent': 'Python-urllib'}
|
||||||
response = pool.request('HEAD', url, headers=headers, retries=retries)
|
response = pool.request('HEAD', url, headers=headers, retries=retries)
|
||||||
if report_text:
|
if report_text:
|
||||||
print(report_text, ' Latency:', round(time.time() - start_time,3))
|
print(report_text, ' Latency:', round(time.time() - start_time, 3))
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
|
||||||
mobile_user_agent = 'Mozilla/5.0 (Linux; Android 7.0; Redmi Note 4 Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Mobile Safari/537.36'
|
mobile_user_agent = 'Mozilla/5.0 (Linux; Android 7.0; Redmi Note 4 Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Mobile Safari/537.36'
|
||||||
mobile_ua = (('User-Agent', mobile_user_agent),)
|
mobile_ua = (('User-Agent', mobile_user_agent),)
|
||||||
desktop_user_agent = 'Mozilla/5.0 (Windows NT 6.1; rv:52.0) Gecko/20100101 Firefox/52.0'
|
desktop_user_agent = 'Mozilla/5.0 (Windows NT 6.1; rv:52.0) Gecko/20100101 Firefox/52.0'
|
||||||
desktop_ua = (('User-Agent', desktop_user_agent),)
|
desktop_ua = (('User-Agent', desktop_user_agent),)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class RateLimitedQueue(gevent.queue.Queue):
|
class RateLimitedQueue(gevent.queue.Queue):
|
||||||
''' Does initial_burst (def. 30) at first, then alternates between waiting waiting_period (def. 5) seconds and doing subsequent_bursts (def. 10) queries. After 5 seconds with nothing left in the queue, resets rate limiting. '''
|
''' Does initial_burst (def. 30) at first, then alternates between waiting waiting_period (def. 5) seconds and doing subsequent_bursts (def. 10) queries. After 5 seconds with nothing left in the queue, resets rate limiting. '''
|
||||||
|
|
||||||
@ -225,9 +294,8 @@ class RateLimitedQueue(gevent.queue.Queue):
|
|||||||
self.empty_start = 0
|
self.empty_start = 0
|
||||||
gevent.queue.Queue.__init__(self)
|
gevent.queue.Queue.__init__(self)
|
||||||
|
|
||||||
|
|
||||||
def get(self):
|
def get(self):
|
||||||
self.lock.acquire() # blocks if another greenlet currently has the lock
|
self.lock.acquire() # blocks if another greenlet currently has the lock
|
||||||
if self.count_since_last_wait >= self.subsequent_bursts and self.surpassed_initial:
|
if self.count_since_last_wait >= self.subsequent_bursts and self.surpassed_initial:
|
||||||
gevent.sleep(self.waiting_period)
|
gevent.sleep(self.waiting_period)
|
||||||
self.count_since_last_wait = 0
|
self.count_since_last_wait = 0
|
||||||
@ -243,7 +311,7 @@ class RateLimitedQueue(gevent.queue.Queue):
|
|||||||
self.currently_empty = True
|
self.currently_empty = True
|
||||||
self.empty_start = time.monotonic()
|
self.empty_start = time.monotonic()
|
||||||
|
|
||||||
item = gevent.queue.Queue.get(self) # blocks when nothing left
|
item = gevent.queue.Queue.get(self) # blocks when nothing left
|
||||||
|
|
||||||
if self.currently_empty:
|
if self.currently_empty:
|
||||||
if time.monotonic() - self.empty_start >= self.waiting_period:
|
if time.monotonic() - self.empty_start >= self.waiting_period:
|
||||||
@ -257,7 +325,6 @@ class RateLimitedQueue(gevent.queue.Queue):
|
|||||||
return item
|
return item
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def download_thumbnail(save_directory, video_id):
|
def download_thumbnail(save_directory, video_id):
|
||||||
url = "https://i.ytimg.com/vi/" + video_id + "/mqdefault.jpg"
|
url = "https://i.ytimg.com/vi/" + video_id + "/mqdefault.jpg"
|
||||||
save_location = os.path.join(save_directory, video_id + ".jpg")
|
save_location = os.path.join(save_directory, video_id + ".jpg")
|
||||||
@ -269,26 +336,23 @@ def download_thumbnail(save_directory, video_id):
|
|||||||
try:
|
try:
|
||||||
f = open(save_location, 'wb')
|
f = open(save_location, 'wb')
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
os.makedirs(save_directory, exist_ok = True)
|
os.makedirs(save_directory, exist_ok=True)
|
||||||
f = open(save_location, 'wb')
|
f = open(save_location, 'wb')
|
||||||
f.write(thumbnail)
|
f.write(thumbnail)
|
||||||
f.close()
|
f.close()
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
def download_thumbnails(save_directory, ids):
|
def download_thumbnails(save_directory, ids):
|
||||||
if not isinstance(ids, (list, tuple)):
|
if not isinstance(ids, (list, tuple)):
|
||||||
ids = list(ids)
|
ids = list(ids)
|
||||||
# only do 5 at a time
|
# only do 5 at a time
|
||||||
# do the n where n is divisible by 5
|
# do the n where n is divisible by 5
|
||||||
i = -1
|
i = -1
|
||||||
for i in range(0, int(len(ids)/5) - 1 ):
|
for i in range(0, int(len(ids) / 5) - 1):
|
||||||
gevent.joinall([gevent.spawn(download_thumbnail, save_directory, ids[j]) for j in range(i*5, i*5 + 5)])
|
gevent.joinall([gevent.spawn(download_thumbnail, save_directory, ids[j]) for j in range(i * 5, i * 5 + 5)])
|
||||||
# do the remainders (< 5)
|
# do the remainders (< 5)
|
||||||
gevent.joinall([gevent.spawn(download_thumbnail, save_directory, ids[j]) for j in range(i*5 + 5, len(ids))])
|
gevent.joinall([gevent.spawn(download_thumbnail, save_directory, ids[j]) for j in range(i * 5 + 5, len(ids))])
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def dict_add(*dicts):
|
def dict_add(*dicts):
|
||||||
@ -296,6 +360,7 @@ def dict_add(*dicts):
|
|||||||
dicts[0].update(dictionary)
|
dicts[0].update(dictionary)
|
||||||
return dicts[0]
|
return dicts[0]
|
||||||
|
|
||||||
|
|
||||||
def video_id(url):
|
def video_id(url):
|
||||||
url_parts = urllib.parse.urlparse(url)
|
url_parts = urllib.parse.urlparse(url)
|
||||||
return urllib.parse.parse_qs(url_parts.query)['v'][0]
|
return urllib.parse.parse_qs(url_parts.query)['v'][0]
|
||||||
@ -304,11 +369,12 @@ def video_id(url):
|
|||||||
# default, sddefault, mqdefault, hqdefault, hq720
|
# default, sddefault, mqdefault, hqdefault, hq720
|
||||||
def get_thumbnail_url(video_id):
|
def get_thumbnail_url(video_id):
|
||||||
return "/i.ytimg.com/vi/" + video_id + "/mqdefault.jpg"
|
return "/i.ytimg.com/vi/" + video_id + "/mqdefault.jpg"
|
||||||
|
|
||||||
|
|
||||||
def seconds_to_timestamp(seconds):
|
def seconds_to_timestamp(seconds):
|
||||||
seconds = int(seconds)
|
seconds = int(seconds)
|
||||||
hours, seconds = divmod(seconds,3600)
|
hours, seconds = divmod(seconds, 3600)
|
||||||
minutes, seconds = divmod(seconds,60)
|
minutes, seconds = divmod(seconds, 60)
|
||||||
if hours != 0:
|
if hours != 0:
|
||||||
timestamp = str(hours) + ":"
|
timestamp = str(hours) + ":"
|
||||||
timestamp += str(minutes).zfill(2) # zfill pads with zeros
|
timestamp += str(minutes).zfill(2) # zfill pads with zeros
|
||||||
@ -319,31 +385,32 @@ def seconds_to_timestamp(seconds):
|
|||||||
return timestamp
|
return timestamp
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def update_query_string(query_string, items):
|
def update_query_string(query_string, items):
|
||||||
parameters = urllib.parse.parse_qs(query_string)
|
parameters = urllib.parse.parse_qs(query_string)
|
||||||
parameters.update(items)
|
parameters.update(items)
|
||||||
return urllib.parse.urlencode(parameters, doseq=True)
|
return urllib.parse.urlencode(parameters, doseq=True)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def uppercase_escape(s):
|
def uppercase_escape(s):
|
||||||
return re.sub(
|
return re.sub(
|
||||||
r'\\U([0-9a-fA-F]{8})',
|
r'\\U([0-9a-fA-F]{8})',
|
||||||
lambda m: chr(int(m.group(1), base=16)), s)
|
lambda m: chr(int(m.group(1), base=16)), s)
|
||||||
|
|
||||||
|
|
||||||
def prefix_url(url):
|
def prefix_url(url):
|
||||||
if url is None:
|
if url is None:
|
||||||
return None
|
return None
|
||||||
url = url.lstrip('/') # some urls have // before them, which has a special meaning
|
url = url.lstrip('/') # some urls have // before them, which has a special meaning
|
||||||
return '/' + url
|
return '/' + url
|
||||||
|
|
||||||
|
|
||||||
def left_remove(string, substring):
|
def left_remove(string, substring):
|
||||||
'''removes substring from the start of string, if present'''
|
'''removes substring from the start of string, if present'''
|
||||||
if string.startswith(substring):
|
if string.startswith(substring):
|
||||||
return string[len(substring):]
|
return string[len(substring):]
|
||||||
return string
|
return string
|
||||||
|
|
||||||
|
|
||||||
def concat_or_none(*strings):
|
def concat_or_none(*strings):
|
||||||
'''Concatenates strings. Returns None if any of the arguments are None'''
|
'''Concatenates strings. Returns None if any of the arguments are None'''
|
||||||
result = ''
|
result = ''
|
||||||
@ -365,6 +432,7 @@ def prefix_urls(item):
|
|||||||
except KeyError:
|
except KeyError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
def add_extra_html_info(item):
|
def add_extra_html_info(item):
|
||||||
if item['type'] == 'video':
|
if item['type'] == 'video':
|
||||||
item['url'] = (URL_ORIGIN + '/watch?v=' + item['id']) if item.get('id') else None
|
item['url'] = (URL_ORIGIN + '/watch?v=' + item['id']) if item.get('id') else None
|
||||||
@ -383,6 +451,7 @@ def add_extra_html_info(item):
|
|||||||
elif item['type'] == 'channel':
|
elif item['type'] == 'channel':
|
||||||
item['url'] = (URL_ORIGIN + "/channel/" + item['id']) if item.get('id') else None
|
item['url'] = (URL_ORIGIN + "/channel/" + item['id']) if item.get('id') else None
|
||||||
|
|
||||||
|
|
||||||
def parse_info_prepare_for_html(renderer, additional_info={}):
|
def parse_info_prepare_for_html(renderer, additional_info={}):
|
||||||
item = yt_data_extract.extract_item_info(renderer, additional_info)
|
item = yt_data_extract.extract_item_info(renderer, additional_info)
|
||||||
prefix_urls(item)
|
prefix_urls(item)
|
||||||
@ -390,8 +459,8 @@ def parse_info_prepare_for_html(renderer, additional_info={}):
|
|||||||
|
|
||||||
return item
|
return item
|
||||||
|
|
||||||
|
|
||||||
def check_gevent_exceptions(*tasks):
|
def check_gevent_exceptions(*tasks):
|
||||||
for task in tasks:
|
for task in tasks:
|
||||||
if task.exception:
|
if task.exception:
|
||||||
raise task.exception
|
raise task.exception
|
||||||
|
|
||||||
|
@ -148,7 +148,7 @@ headers = (
|
|||||||
def extract_info(video_id, use_invidious, playlist_id=None, index=None):
|
def extract_info(video_id, use_invidious, playlist_id=None, index=None):
|
||||||
# bpctr=9999999999 will bypass are-you-sure dialogs for controversial
|
# bpctr=9999999999 will bypass are-you-sure dialogs for controversial
|
||||||
# videos
|
# videos
|
||||||
url = 'https://m.youtube.com/watch?v=' + video_id + '&pbj=1&bpctr=9999999999'
|
url = 'https://m.youtube.com/watch?v=' + video_id + '&gl=US&hl=en&has_verified=1&pbj=1&bpctr=9999999999'
|
||||||
if playlist_id:
|
if playlist_id:
|
||||||
url += '&list=' + playlist_id
|
url += '&list=' + playlist_id
|
||||||
if index:
|
if index:
|
||||||
|
Reference in New Issue
Block a user