Solve merge conflict
This commit is contained in:
parent
8efae82302
commit
c66afd6485
@ -454,8 +454,27 @@ def get_live_urls(urls):
|
||||
def watch():
|
||||
id = request.args.get('v', None)
|
||||
info = ytwatch.extract_info(id, False, playlist_id=None, index=None)
|
||||
<<<<<<< Updated upstream
|
||||
<<<<<<< Updated upstream
|
||||
# Use nginx
|
||||
best_formats = ["22", "18", "34", "35", "36", "37", "38", "43", "44", "45", "46"]
|
||||
=======
|
||||
=======
|
||||
>>>>>>> Stashed changes
|
||||
vsources = ytwatch.get_video_sources(info, False)
|
||||
|
||||
# Retry 3 times if no sources are available.
|
||||
retry = 3
|
||||
while retry != 0 and len(vsources) == 0:
|
||||
vsources = ytwatch.get_video_sources(info, False)
|
||||
retry -= 1
|
||||
|
||||
for source in vsources:
|
||||
hostName = urllib.parse.urlparse(source['src']).netloc
|
||||
source['src'] = source['src'].replace("https://{}".format(hostName), "") + "&host=" + hostName
|
||||
|
||||
# Parse video formats
|
||||
>>>>>>> Stashed changes
|
||||
for v_format in info['formats']:
|
||||
hostName = urllib.parse.urlparse(v_format['url']).netloc
|
||||
v_format['url'] = v_format['url'].replace("https://{}".format(hostName), "") + "&host=" + hostName
|
||||
|
@ -38,6 +38,14 @@ packaging==20.4
|
||||
pylint==2.6.0
|
||||
PyMySQL==0.10.1
|
||||
pyparsing==2.4.7
|
||||
<<<<<<< Updated upstream
|
||||
=======
|
||||
PySocks==1.7.1
|
||||
python-anticaptcha==0.7.1
|
||||
<<<<<<< Updated upstream
|
||||
>>>>>>> Stashed changes
|
||||
=======
|
||||
>>>>>>> Stashed changes
|
||||
python-dateutil==2.8.1
|
||||
python-dotenv==0.14.0
|
||||
python-editor==1.0.4
|
||||
|
@ -11,5 +11,6 @@
|
||||
"admin_message":"Message from the admin text",
|
||||
"admin_user":"admin_username",
|
||||
"max_old_user_days": 60,
|
||||
"donate_url": ""
|
||||
"donate_url": "",
|
||||
"anticaptcha": "cf4bb53a6b87f973be8c0c976c390342"
|
||||
}
|
||||
|
155
youtube/util.py
155
youtube/util.py
@ -1,9 +1,13 @@
|
||||
import gzip
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
from youtube import yt_data_extract
|
||||
|
||||
try:
|
||||
import brotli
|
||||
|
||||
have_brotli = True
|
||||
except ImportError:
|
||||
have_brotli = False
|
||||
@ -15,7 +19,7 @@ import json
|
||||
import gevent
|
||||
import gevent.queue
|
||||
import gevent.lock
|
||||
|
||||
from python_anticaptcha import AnticaptchaClient, NoCaptchaTaskProxylessTask
|
||||
# The trouble with the requests library: It ships its own certificate bundle via certifi
|
||||
# instead of using the system certificate store, meaning self-signed certificates
|
||||
# configured by the user will not work. Some draconian networks block TLS unless a corporate
|
||||
@ -51,13 +55,12 @@ import urllib3.contrib.socks
|
||||
|
||||
URL_ORIGIN = "/https://www.youtube.com"
|
||||
|
||||
connection_pool = urllib3.PoolManager(cert_reqs = 'CERT_REQUIRED')
|
||||
connection_pool = urllib3.PoolManager(cert_reqs='CERT_REQUIRED')
|
||||
|
||||
def get_pool(use_tor):
|
||||
return connection_pool
|
||||
|
||||
class HTTPAsymmetricCookieProcessor(urllib.request.BaseHandler):
|
||||
'''Separate cookiejars for receiving and sending'''
|
||||
|
||||
def __init__(self, cookiejar_send=None, cookiejar_receive=None):
|
||||
self.cookiejar_send = cookiejar_send
|
||||
self.cookiejar_receive = cookiejar_receive
|
||||
@ -75,6 +78,7 @@ class HTTPAsymmetricCookieProcessor(urllib.request.BaseHandler):
|
||||
https_request = http_request
|
||||
https_response = http_response
|
||||
|
||||
|
||||
class FetchError(Exception):
|
||||
def __init__(self, code, reason='', ip=None):
|
||||
Exception.__init__(self, 'HTTP error during request: ' + code + ' ' + reason)
|
||||
@ -82,6 +86,7 @@ class FetchError(Exception):
|
||||
self.reason = reason
|
||||
self.ip = ip
|
||||
|
||||
|
||||
def decode_content(content, encoding_header):
|
||||
encodings = encoding_header.replace(' ', '').split(',')
|
||||
for encoding in reversed(encodings):
|
||||
@ -93,6 +98,57 @@ def decode_content(content, encoding_header):
|
||||
content = gzip.decompress(content)
|
||||
return content
|
||||
|
||||
|
||||
def bypass_captcha():
|
||||
session = requests.Session()
|
||||
url = "https://youtube.com/watch?v=CvFH_6DNRCY&gl=US&hl=en&has_verified=1&bpctr=9999999999"
|
||||
print("Starting python GET request...")
|
||||
response = session.get(url)
|
||||
print("GET successful!")
|
||||
print("vvv COOKIES DICT vvv")
|
||||
cookies = session.cookies.get_dict()
|
||||
print(cookies)
|
||||
|
||||
inputs = {}
|
||||
html = BeautifulSoup(str(response.text), "lxml")
|
||||
|
||||
# If there's a captcha and we need to solve it...
|
||||
if html.body.find('div', attrs={'class': 'g-recaptcha'}):
|
||||
# Get the captcha form
|
||||
form = html.body.find('form', attrs={"action": "/das_captcha"})
|
||||
|
||||
# Set up form inputs for request
|
||||
for _input in form.find_all('input'):
|
||||
try:
|
||||
print(_input["name"] + " -> " + _input["value"])
|
||||
inputs[_input["name"]] = _input["value"]
|
||||
except KeyError:
|
||||
continue
|
||||
print("\n vvv Form inputs created vvv ")
|
||||
print(inputs)
|
||||
|
||||
# Get CAPTCHA keys
|
||||
site_key = html.body.find('div', attrs={'class': 'g-recaptcha'})['data-sitekey']
|
||||
s_value = html.body.find('input', attrs={'name': 'session_token'})['value']
|
||||
|
||||
# Get anti-captcha API key
|
||||
config = json.load(open('yotter-config.json'))
|
||||
client = AnticaptchaClient(config['anticaptcha'])
|
||||
# Create anti-captcha Task
|
||||
task = NoCaptchaTaskProxylessTask(url, site_key)
|
||||
job = client.createTask(task)
|
||||
job.join()
|
||||
|
||||
inputs['g-recaptcha-response'] = job.get_solution_response()
|
||||
|
||||
# Print POST request headers
|
||||
print(requests.post("https://youtube.com/das_captcha", data=inputs,
|
||||
headers={"Content-Type": "application/x-www-form-urlencoded",
|
||||
"Accept-Language": "en-US,en;q=0.5",
|
||||
"Referer": "https://www.youtube.com/das_captcha",
|
||||
"Origin": "https://www.youtube.com"}).headers)
|
||||
|
||||
|
||||
def fetch_url_response(url, headers=(), timeout=15, data=None,
|
||||
cookiejar_send=None, cookiejar_receive=None,
|
||||
use_tor=True, max_redirects=None):
|
||||
@ -105,7 +161,7 @@ def fetch_url_response(url, headers=(), timeout=15, data=None,
|
||||
When both are set to the same object, cookies will be sent from the object,
|
||||
and response cookies will be merged into it.
|
||||
'''
|
||||
headers = dict(headers) # Note: Calling dict() on a dict will make a copy
|
||||
headers = dict(headers) # Note: Calling dict() on a dict will make a copy
|
||||
if have_brotli:
|
||||
headers['Accept-Encoding'] = 'gzip, br'
|
||||
else:
|
||||
@ -124,32 +180,46 @@ def fetch_url_response(url, headers=(), timeout=15, data=None,
|
||||
elif not isinstance(data, bytes):
|
||||
data = urllib.parse.urlencode(data).encode('ascii')
|
||||
|
||||
if cookiejar_send is not None or cookiejar_receive is not None: # Use urllib
|
||||
if cookiejar_send is not None or cookiejar_receive is not None: # Use urllib
|
||||
req = urllib.request.Request(url, data=data, headers=headers)
|
||||
|
||||
cookie_processor = HTTPAsymmetricCookieProcessor(cookiejar_send=cookiejar_send, cookiejar_receive=cookiejar_receive)
|
||||
cookie_processor = HTTPAsymmetricCookieProcessor(cookiejar_send=cookiejar_send,
|
||||
cookiejar_receive=cookiejar_receive)
|
||||
opener = urllib.request.build_opener(cookie_processor)
|
||||
|
||||
response = opener.open(req, timeout=timeout)
|
||||
cleanup_func = (lambda r: None)
|
||||
|
||||
else: # Use a urllib3 pool. Cookies can't be used since urllib3 doesn't have easy support for them.
|
||||
else: # Use a urllib3 pool. Cookies can't be used since urllib3 doesn't have easy support for them.
|
||||
# default: Retry.DEFAULT = Retry(3)
|
||||
# (in connectionpool.py in urllib3)
|
||||
# According to the documentation for urlopen, a redirect counts as a
|
||||
# retry. So there are 3 redirects max by default.
|
||||
print("Testing for CAPTCHA python GET request...")
|
||||
r = requests.get(url)
|
||||
print("GET successful!")
|
||||
|
||||
html = BeautifulSoup(str(r.text), "lxml")
|
||||
# If there's a captcha and we need to solve it...
|
||||
if html.body.find('div', attrs={'class': 'g-recaptcha'}):
|
||||
print("ReCaptcha detected! Trying to bypass it.")
|
||||
bypass_captcha()
|
||||
|
||||
if max_redirects:
|
||||
retries = urllib3.Retry(3+max_redirects, redirect=max_redirects)
|
||||
retries = urllib3.Retry(3 + max_redirects, redirect=max_redirects)
|
||||
else:
|
||||
retries = urllib3.Retry(3)
|
||||
pool = get_pool(use_tor)
|
||||
|
||||
pool = connection_pool
|
||||
response = pool.request(method, url, headers=headers,
|
||||
timeout=timeout, preload_content=False,
|
||||
decode_content=False, retries=retries)
|
||||
|
||||
cleanup_func = (lambda r: r.release_conn())
|
||||
|
||||
return response, cleanup_func
|
||||
|
||||
|
||||
def fetch_url(url, headers=(), timeout=15, report_text=None, data=None,
|
||||
cookiejar_send=None, cookiejar_receive=None, use_tor=True,
|
||||
debug_name=None):
|
||||
@ -159,18 +229,18 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None,
|
||||
url, headers, timeout=timeout,
|
||||
cookiejar_send=cookiejar_send, cookiejar_receive=cookiejar_receive,
|
||||
use_tor=use_tor)
|
||||
print(response)
|
||||
response_time = time.time()
|
||||
|
||||
content = response.read()
|
||||
read_finish = time.time()
|
||||
|
||||
cleanup_func(response) # release_connection for urllib3
|
||||
|
||||
if (response.status == 429
|
||||
and content.startswith(b'<!DOCTYPE')
|
||||
and b'Our systems have detected unusual traffic' in content):
|
||||
ip = re.search(br'IP address: ((?:[\da-f]*:)+[\da-f]+|(?:\d+\.)+\d+)',
|
||||
content)
|
||||
content)
|
||||
ip = ip.group(1).decode('ascii') if ip else None
|
||||
raise FetchError('429', reason=response.reason, ip=ip)
|
||||
|
||||
@ -178,12 +248,14 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None,
|
||||
raise FetchError(str(response.status), reason=response.reason, ip=None)
|
||||
|
||||
if report_text:
|
||||
print(report_text, ' Latency:', round(response_time - start_time,3), ' Read time:', round(read_finish - response_time,3))
|
||||
print(report_text, ' Latency:', round(response_time - start_time, 3), ' Read time:',
|
||||
round(read_finish - response_time, 3))
|
||||
content = decode_content(content, response.getheader('Content-Encoding', default='identity'))
|
||||
return content
|
||||
|
||||
|
||||
def head(url, use_tor=False, report_text=None, max_redirects=10):
|
||||
pool = get_pool(use_tor)
|
||||
pool = connection_pool
|
||||
start_time = time.time()
|
||||
|
||||
# default: Retry.DEFAULT = Retry(3)
|
||||
@ -191,24 +263,21 @@ def head(url, use_tor=False, report_text=None, max_redirects=10):
|
||||
# According to the documentation for urlopen, a redirect counts as a retry
|
||||
# So there are 3 redirects max by default. Let's change that
|
||||
# to 10 since googlevideo redirects a lot.
|
||||
retries = urllib3.Retry(3+max_redirects, redirect=max_redirects,
|
||||
raise_on_redirect=False)
|
||||
retries = urllib3.Retry(3 + max_redirects, redirect=max_redirects,
|
||||
raise_on_redirect=False)
|
||||
headers = {'User-Agent': 'Python-urllib'}
|
||||
response = pool.request('HEAD', url, headers=headers, retries=retries)
|
||||
if report_text:
|
||||
print(report_text, ' Latency:', round(time.time() - start_time,3))
|
||||
print(report_text, ' Latency:', round(time.time() - start_time, 3))
|
||||
return response
|
||||
|
||||
|
||||
mobile_user_agent = 'Mozilla/5.0 (Linux; Android 7.0; Redmi Note 4 Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Mobile Safari/537.36'
|
||||
mobile_ua = (('User-Agent', mobile_user_agent),)
|
||||
desktop_user_agent = 'Mozilla/5.0 (Windows NT 6.1; rv:52.0) Gecko/20100101 Firefox/52.0'
|
||||
desktop_ua = (('User-Agent', desktop_user_agent),)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
class RateLimitedQueue(gevent.queue.Queue):
|
||||
''' Does initial_burst (def. 30) at first, then alternates between waiting waiting_period (def. 5) seconds and doing subsequent_bursts (def. 10) queries. After 5 seconds with nothing left in the queue, resets rate limiting. '''
|
||||
|
||||
@ -225,9 +294,8 @@ class RateLimitedQueue(gevent.queue.Queue):
|
||||
self.empty_start = 0
|
||||
gevent.queue.Queue.__init__(self)
|
||||
|
||||
|
||||
def get(self):
|
||||
self.lock.acquire() # blocks if another greenlet currently has the lock
|
||||
self.lock.acquire() # blocks if another greenlet currently has the lock
|
||||
if self.count_since_last_wait >= self.subsequent_bursts and self.surpassed_initial:
|
||||
gevent.sleep(self.waiting_period)
|
||||
self.count_since_last_wait = 0
|
||||
@ -243,7 +311,7 @@ class RateLimitedQueue(gevent.queue.Queue):
|
||||
self.currently_empty = True
|
||||
self.empty_start = time.monotonic()
|
||||
|
||||
item = gevent.queue.Queue.get(self) # blocks when nothing left
|
||||
item = gevent.queue.Queue.get(self) # blocks when nothing left
|
||||
|
||||
if self.currently_empty:
|
||||
if time.monotonic() - self.empty_start >= self.waiting_period:
|
||||
@ -257,7 +325,6 @@ class RateLimitedQueue(gevent.queue.Queue):
|
||||
return item
|
||||
|
||||
|
||||
|
||||
def download_thumbnail(save_directory, video_id):
|
||||
url = "https://i.ytimg.com/vi/" + video_id + "/mqdefault.jpg"
|
||||
save_location = os.path.join(save_directory, video_id + ".jpg")
|
||||
@ -269,26 +336,23 @@ def download_thumbnail(save_directory, video_id):
|
||||
try:
|
||||
f = open(save_location, 'wb')
|
||||
except FileNotFoundError:
|
||||
os.makedirs(save_directory, exist_ok = True)
|
||||
os.makedirs(save_directory, exist_ok=True)
|
||||
f = open(save_location, 'wb')
|
||||
f.write(thumbnail)
|
||||
f.close()
|
||||
return True
|
||||
|
||||
|
||||
def download_thumbnails(save_directory, ids):
|
||||
if not isinstance(ids, (list, tuple)):
|
||||
ids = list(ids)
|
||||
# only do 5 at a time
|
||||
# do the n where n is divisible by 5
|
||||
i = -1
|
||||
for i in range(0, int(len(ids)/5) - 1 ):
|
||||
gevent.joinall([gevent.spawn(download_thumbnail, save_directory, ids[j]) for j in range(i*5, i*5 + 5)])
|
||||
for i in range(0, int(len(ids) / 5) - 1):
|
||||
gevent.joinall([gevent.spawn(download_thumbnail, save_directory, ids[j]) for j in range(i * 5, i * 5 + 5)])
|
||||
# do the remainders (< 5)
|
||||
gevent.joinall([gevent.spawn(download_thumbnail, save_directory, ids[j]) for j in range(i*5 + 5, len(ids))])
|
||||
|
||||
|
||||
|
||||
|
||||
gevent.joinall([gevent.spawn(download_thumbnail, save_directory, ids[j]) for j in range(i * 5 + 5, len(ids))])
|
||||
|
||||
|
||||
def dict_add(*dicts):
|
||||
@ -296,6 +360,7 @@ def dict_add(*dicts):
|
||||
dicts[0].update(dictionary)
|
||||
return dicts[0]
|
||||
|
||||
|
||||
def video_id(url):
|
||||
url_parts = urllib.parse.urlparse(url)
|
||||
return urllib.parse.parse_qs(url_parts.query)['v'][0]
|
||||
@ -304,11 +369,12 @@ def video_id(url):
|
||||
# default, sddefault, mqdefault, hqdefault, hq720
|
||||
def get_thumbnail_url(video_id):
|
||||
return "/i.ytimg.com/vi/" + video_id + "/mqdefault.jpg"
|
||||
|
||||
|
||||
|
||||
def seconds_to_timestamp(seconds):
|
||||
seconds = int(seconds)
|
||||
hours, seconds = divmod(seconds,3600)
|
||||
minutes, seconds = divmod(seconds,60)
|
||||
hours, seconds = divmod(seconds, 3600)
|
||||
minutes, seconds = divmod(seconds, 60)
|
||||
if hours != 0:
|
||||
timestamp = str(hours) + ":"
|
||||
timestamp += str(minutes).zfill(2) # zfill pads with zeros
|
||||
@ -319,31 +385,32 @@ def seconds_to_timestamp(seconds):
|
||||
return timestamp
|
||||
|
||||
|
||||
|
||||
def update_query_string(query_string, items):
|
||||
parameters = urllib.parse.parse_qs(query_string)
|
||||
parameters.update(items)
|
||||
return urllib.parse.urlencode(parameters, doseq=True)
|
||||
|
||||
|
||||
|
||||
def uppercase_escape(s):
|
||||
return re.sub(
|
||||
r'\\U([0-9a-fA-F]{8})',
|
||||
lambda m: chr(int(m.group(1), base=16)), s)
|
||||
return re.sub(
|
||||
r'\\U([0-9a-fA-F]{8})',
|
||||
lambda m: chr(int(m.group(1), base=16)), s)
|
||||
|
||||
|
||||
def prefix_url(url):
|
||||
if url is None:
|
||||
return None
|
||||
url = url.lstrip('/') # some urls have // before them, which has a special meaning
|
||||
url = url.lstrip('/') # some urls have // before them, which has a special meaning
|
||||
return '/' + url
|
||||
|
||||
|
||||
def left_remove(string, substring):
|
||||
'''removes substring from the start of string, if present'''
|
||||
if string.startswith(substring):
|
||||
return string[len(substring):]
|
||||
return string
|
||||
|
||||
|
||||
def concat_or_none(*strings):
|
||||
'''Concatenates strings. Returns None if any of the arguments are None'''
|
||||
result = ''
|
||||
@ -365,6 +432,7 @@ def prefix_urls(item):
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
|
||||
def add_extra_html_info(item):
|
||||
if item['type'] == 'video':
|
||||
item['url'] = (URL_ORIGIN + '/watch?v=' + item['id']) if item.get('id') else None
|
||||
@ -383,6 +451,7 @@ def add_extra_html_info(item):
|
||||
elif item['type'] == 'channel':
|
||||
item['url'] = (URL_ORIGIN + "/channel/" + item['id']) if item.get('id') else None
|
||||
|
||||
|
||||
def parse_info_prepare_for_html(renderer, additional_info={}):
|
||||
item = yt_data_extract.extract_item_info(renderer, additional_info)
|
||||
prefix_urls(item)
|
||||
@ -390,8 +459,8 @@ def parse_info_prepare_for_html(renderer, additional_info={}):
|
||||
|
||||
return item
|
||||
|
||||
|
||||
def check_gevent_exceptions(*tasks):
|
||||
for task in tasks:
|
||||
if task.exception:
|
||||
raise task.exception
|
||||
|
||||
|
@ -148,7 +148,7 @@ headers = (
|
||||
def extract_info(video_id, use_invidious, playlist_id=None, index=None):
|
||||
# bpctr=9999999999 will bypass are-you-sure dialogs for controversial
|
||||
# videos
|
||||
url = 'https://m.youtube.com/watch?v=' + video_id + '&pbj=1&bpctr=9999999999'
|
||||
url = 'https://m.youtube.com/watch?v=' + video_id + '&gl=US&hl=en&has_verified=1&pbj=1&bpctr=9999999999'
|
||||
if playlist_id:
|
||||
url += '&list=' + playlist_id
|
||||
if index:
|
||||
|
Reference in New Issue
Block a user