Solve merge conflict

This commit is contained in:
pluja 2020-10-12 08:08:52 +02:00
parent 8efae82302
commit c66afd6485
5 changed files with 142 additions and 45 deletions

View File

@ -454,8 +454,27 @@ def get_live_urls(urls):
def watch(): def watch():
id = request.args.get('v', None) id = request.args.get('v', None)
info = ytwatch.extract_info(id, False, playlist_id=None, index=None) info = ytwatch.extract_info(id, False, playlist_id=None, index=None)
<<<<<<< Updated upstream
<<<<<<< Updated upstream
# Use nginx # Use nginx
best_formats = ["22", "18", "34", "35", "36", "37", "38", "43", "44", "45", "46"] best_formats = ["22", "18", "34", "35", "36", "37", "38", "43", "44", "45", "46"]
=======
=======
>>>>>>> Stashed changes
vsources = ytwatch.get_video_sources(info, False)
# Retry 3 times if no sources are available.
retry = 3
while retry != 0 and len(vsources) == 0:
vsources = ytwatch.get_video_sources(info, False)
retry -= 1
for source in vsources:
hostName = urllib.parse.urlparse(source['src']).netloc
source['src'] = source['src'].replace("https://{}".format(hostName), "") + "&host=" + hostName
# Parse video formats
>>>>>>> Stashed changes
for v_format in info['formats']: for v_format in info['formats']:
hostName = urllib.parse.urlparse(v_format['url']).netloc hostName = urllib.parse.urlparse(v_format['url']).netloc
v_format['url'] = v_format['url'].replace("https://{}".format(hostName), "") + "&host=" + hostName v_format['url'] = v_format['url'].replace("https://{}".format(hostName), "") + "&host=" + hostName

View File

@ -38,6 +38,14 @@ packaging==20.4
pylint==2.6.0 pylint==2.6.0
PyMySQL==0.10.1 PyMySQL==0.10.1
pyparsing==2.4.7 pyparsing==2.4.7
<<<<<<< Updated upstream
=======
PySocks==1.7.1
python-anticaptcha==0.7.1
<<<<<<< Updated upstream
>>>>>>> Stashed changes
=======
>>>>>>> Stashed changes
python-dateutil==2.8.1 python-dateutil==2.8.1
python-dotenv==0.14.0 python-dotenv==0.14.0
python-editor==1.0.4 python-editor==1.0.4

View File

@ -11,5 +11,6 @@
"admin_message":"Message from the admin text", "admin_message":"Message from the admin text",
"admin_user":"admin_username", "admin_user":"admin_username",
"max_old_user_days": 60, "max_old_user_days": 60,
"donate_url": "" "donate_url": "",
"anticaptcha": "cf4bb53a6b87f973be8c0c976c390342"
} }

View File

@ -1,9 +1,13 @@
import gzip import gzip
import requests
from bs4 import BeautifulSoup
from youtube import yt_data_extract from youtube import yt_data_extract
try: try:
import brotli import brotli
have_brotli = True have_brotli = True
except ImportError: except ImportError:
have_brotli = False have_brotli = False
@ -15,7 +19,7 @@ import json
import gevent import gevent
import gevent.queue import gevent.queue
import gevent.lock import gevent.lock
from python_anticaptcha import AnticaptchaClient, NoCaptchaTaskProxylessTask
# The trouble with the requests library: It ships its own certificate bundle via certifi # The trouble with the requests library: It ships its own certificate bundle via certifi
# instead of using the system certificate store, meaning self-signed certificates # instead of using the system certificate store, meaning self-signed certificates
# configured by the user will not work. Some draconian networks block TLS unless a corporate # configured by the user will not work. Some draconian networks block TLS unless a corporate
@ -51,13 +55,12 @@ import urllib3.contrib.socks
URL_ORIGIN = "/https://www.youtube.com" URL_ORIGIN = "/https://www.youtube.com"
connection_pool = urllib3.PoolManager(cert_reqs = 'CERT_REQUIRED') connection_pool = urllib3.PoolManager(cert_reqs='CERT_REQUIRED')
def get_pool(use_tor):
return connection_pool
class HTTPAsymmetricCookieProcessor(urllib.request.BaseHandler): class HTTPAsymmetricCookieProcessor(urllib.request.BaseHandler):
'''Separate cookiejars for receiving and sending''' '''Separate cookiejars for receiving and sending'''
def __init__(self, cookiejar_send=None, cookiejar_receive=None): def __init__(self, cookiejar_send=None, cookiejar_receive=None):
self.cookiejar_send = cookiejar_send self.cookiejar_send = cookiejar_send
self.cookiejar_receive = cookiejar_receive self.cookiejar_receive = cookiejar_receive
@ -75,6 +78,7 @@ class HTTPAsymmetricCookieProcessor(urllib.request.BaseHandler):
https_request = http_request https_request = http_request
https_response = http_response https_response = http_response
class FetchError(Exception): class FetchError(Exception):
def __init__(self, code, reason='', ip=None): def __init__(self, code, reason='', ip=None):
Exception.__init__(self, 'HTTP error during request: ' + code + ' ' + reason) Exception.__init__(self, 'HTTP error during request: ' + code + ' ' + reason)
@ -82,6 +86,7 @@ class FetchError(Exception):
self.reason = reason self.reason = reason
self.ip = ip self.ip = ip
def decode_content(content, encoding_header): def decode_content(content, encoding_header):
encodings = encoding_header.replace(' ', '').split(',') encodings = encoding_header.replace(' ', '').split(',')
for encoding in reversed(encodings): for encoding in reversed(encodings):
@ -93,6 +98,57 @@ def decode_content(content, encoding_header):
content = gzip.decompress(content) content = gzip.decompress(content)
return content return content
def bypass_captcha():
session = requests.Session()
url = "https://youtube.com/watch?v=CvFH_6DNRCY&gl=US&hl=en&has_verified=1&bpctr=9999999999"
print("Starting python GET request...")
response = session.get(url)
print("GET successful!")
print("vvv COOKIES DICT vvv")
cookies = session.cookies.get_dict()
print(cookies)
inputs = {}
html = BeautifulSoup(str(response.text), "lxml")
# If there's a captcha and we need to solve it...
if html.body.find('div', attrs={'class': 'g-recaptcha'}):
# Get the captcha form
form = html.body.find('form', attrs={"action": "/das_captcha"})
# Set up form inputs for request
for _input in form.find_all('input'):
try:
print(_input["name"] + " -> " + _input["value"])
inputs[_input["name"]] = _input["value"]
except KeyError:
continue
print("\n vvv Form inputs created vvv ")
print(inputs)
# Get CAPTCHA keys
site_key = html.body.find('div', attrs={'class': 'g-recaptcha'})['data-sitekey']
s_value = html.body.find('input', attrs={'name': 'session_token'})['value']
# Get anti-captcha API key
config = json.load(open('yotter-config.json'))
client = AnticaptchaClient(config['anticaptcha'])
# Create anti-captcha Task
task = NoCaptchaTaskProxylessTask(url, site_key)
job = client.createTask(task)
job.join()
inputs['g-recaptcha-response'] = job.get_solution_response()
# Print POST request headers
print(requests.post("https://youtube.com/das_captcha", data=inputs,
headers={"Content-Type": "application/x-www-form-urlencoded",
"Accept-Language": "en-US,en;q=0.5",
"Referer": "https://www.youtube.com/das_captcha",
"Origin": "https://www.youtube.com"}).headers)
def fetch_url_response(url, headers=(), timeout=15, data=None, def fetch_url_response(url, headers=(), timeout=15, data=None,
cookiejar_send=None, cookiejar_receive=None, cookiejar_send=None, cookiejar_receive=None,
use_tor=True, max_redirects=None): use_tor=True, max_redirects=None):
@ -105,7 +161,7 @@ def fetch_url_response(url, headers=(), timeout=15, data=None,
When both are set to the same object, cookies will be sent from the object, When both are set to the same object, cookies will be sent from the object,
and response cookies will be merged into it. and response cookies will be merged into it.
''' '''
headers = dict(headers) # Note: Calling dict() on a dict will make a copy headers = dict(headers) # Note: Calling dict() on a dict will make a copy
if have_brotli: if have_brotli:
headers['Accept-Encoding'] = 'gzip, br' headers['Accept-Encoding'] = 'gzip, br'
else: else:
@ -124,32 +180,46 @@ def fetch_url_response(url, headers=(), timeout=15, data=None,
elif not isinstance(data, bytes): elif not isinstance(data, bytes):
data = urllib.parse.urlencode(data).encode('ascii') data = urllib.parse.urlencode(data).encode('ascii')
if cookiejar_send is not None or cookiejar_receive is not None: # Use urllib if cookiejar_send is not None or cookiejar_receive is not None: # Use urllib
req = urllib.request.Request(url, data=data, headers=headers) req = urllib.request.Request(url, data=data, headers=headers)
cookie_processor = HTTPAsymmetricCookieProcessor(cookiejar_send=cookiejar_send, cookiejar_receive=cookiejar_receive) cookie_processor = HTTPAsymmetricCookieProcessor(cookiejar_send=cookiejar_send,
cookiejar_receive=cookiejar_receive)
opener = urllib.request.build_opener(cookie_processor) opener = urllib.request.build_opener(cookie_processor)
response = opener.open(req, timeout=timeout) response = opener.open(req, timeout=timeout)
cleanup_func = (lambda r: None) cleanup_func = (lambda r: None)
else: # Use a urllib3 pool. Cookies can't be used since urllib3 doesn't have easy support for them. else: # Use a urllib3 pool. Cookies can't be used since urllib3 doesn't have easy support for them.
# default: Retry.DEFAULT = Retry(3) # default: Retry.DEFAULT = Retry(3)
# (in connectionpool.py in urllib3) # (in connectionpool.py in urllib3)
# According to the documentation for urlopen, a redirect counts as a # According to the documentation for urlopen, a redirect counts as a
# retry. So there are 3 redirects max by default. # retry. So there are 3 redirects max by default.
print("Testing for CAPTCHA python GET request...")
r = requests.get(url)
print("GET successful!")
html = BeautifulSoup(str(r.text), "lxml")
# If there's a captcha and we need to solve it...
if html.body.find('div', attrs={'class': 'g-recaptcha'}):
print("ReCaptcha detected! Trying to bypass it.")
bypass_captcha()
if max_redirects: if max_redirects:
retries = urllib3.Retry(3+max_redirects, redirect=max_redirects) retries = urllib3.Retry(3 + max_redirects, redirect=max_redirects)
else: else:
retries = urllib3.Retry(3) retries = urllib3.Retry(3)
pool = get_pool(use_tor)
pool = connection_pool
response = pool.request(method, url, headers=headers, response = pool.request(method, url, headers=headers,
timeout=timeout, preload_content=False, timeout=timeout, preload_content=False,
decode_content=False, retries=retries) decode_content=False, retries=retries)
cleanup_func = (lambda r: r.release_conn()) cleanup_func = (lambda r: r.release_conn())
return response, cleanup_func return response, cleanup_func
def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, def fetch_url(url, headers=(), timeout=15, report_text=None, data=None,
cookiejar_send=None, cookiejar_receive=None, use_tor=True, cookiejar_send=None, cookiejar_receive=None, use_tor=True,
debug_name=None): debug_name=None):
@ -159,18 +229,18 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None,
url, headers, timeout=timeout, url, headers, timeout=timeout,
cookiejar_send=cookiejar_send, cookiejar_receive=cookiejar_receive, cookiejar_send=cookiejar_send, cookiejar_receive=cookiejar_receive,
use_tor=use_tor) use_tor=use_tor)
print(response)
response_time = time.time() response_time = time.time()
content = response.read() content = response.read()
read_finish = time.time() read_finish = time.time()
cleanup_func(response) # release_connection for urllib3 cleanup_func(response) # release_connection for urllib3
if (response.status == 429 if (response.status == 429
and content.startswith(b'<!DOCTYPE') and content.startswith(b'<!DOCTYPE')
and b'Our systems have detected unusual traffic' in content): and b'Our systems have detected unusual traffic' in content):
ip = re.search(br'IP address: ((?:[\da-f]*:)+[\da-f]+|(?:\d+\.)+\d+)', ip = re.search(br'IP address: ((?:[\da-f]*:)+[\da-f]+|(?:\d+\.)+\d+)',
content) content)
ip = ip.group(1).decode('ascii') if ip else None ip = ip.group(1).decode('ascii') if ip else None
raise FetchError('429', reason=response.reason, ip=ip) raise FetchError('429', reason=response.reason, ip=ip)
@ -178,12 +248,14 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None,
raise FetchError(str(response.status), reason=response.reason, ip=None) raise FetchError(str(response.status), reason=response.reason, ip=None)
if report_text: if report_text:
print(report_text, ' Latency:', round(response_time - start_time,3), ' Read time:', round(read_finish - response_time,3)) print(report_text, ' Latency:', round(response_time - start_time, 3), ' Read time:',
round(read_finish - response_time, 3))
content = decode_content(content, response.getheader('Content-Encoding', default='identity')) content = decode_content(content, response.getheader('Content-Encoding', default='identity'))
return content return content
def head(url, use_tor=False, report_text=None, max_redirects=10): def head(url, use_tor=False, report_text=None, max_redirects=10):
pool = get_pool(use_tor) pool = connection_pool
start_time = time.time() start_time = time.time()
# default: Retry.DEFAULT = Retry(3) # default: Retry.DEFAULT = Retry(3)
@ -191,24 +263,21 @@ def head(url, use_tor=False, report_text=None, max_redirects=10):
# According to the documentation for urlopen, a redirect counts as a retry # According to the documentation for urlopen, a redirect counts as a retry
# So there are 3 redirects max by default. Let's change that # So there are 3 redirects max by default. Let's change that
# to 10 since googlevideo redirects a lot. # to 10 since googlevideo redirects a lot.
retries = urllib3.Retry(3+max_redirects, redirect=max_redirects, retries = urllib3.Retry(3 + max_redirects, redirect=max_redirects,
raise_on_redirect=False) raise_on_redirect=False)
headers = {'User-Agent': 'Python-urllib'} headers = {'User-Agent': 'Python-urllib'}
response = pool.request('HEAD', url, headers=headers, retries=retries) response = pool.request('HEAD', url, headers=headers, retries=retries)
if report_text: if report_text:
print(report_text, ' Latency:', round(time.time() - start_time,3)) print(report_text, ' Latency:', round(time.time() - start_time, 3))
return response return response
mobile_user_agent = 'Mozilla/5.0 (Linux; Android 7.0; Redmi Note 4 Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Mobile Safari/537.36' mobile_user_agent = 'Mozilla/5.0 (Linux; Android 7.0; Redmi Note 4 Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Mobile Safari/537.36'
mobile_ua = (('User-Agent', mobile_user_agent),) mobile_ua = (('User-Agent', mobile_user_agent),)
desktop_user_agent = 'Mozilla/5.0 (Windows NT 6.1; rv:52.0) Gecko/20100101 Firefox/52.0' desktop_user_agent = 'Mozilla/5.0 (Windows NT 6.1; rv:52.0) Gecko/20100101 Firefox/52.0'
desktop_ua = (('User-Agent', desktop_user_agent),) desktop_ua = (('User-Agent', desktop_user_agent),)
class RateLimitedQueue(gevent.queue.Queue): class RateLimitedQueue(gevent.queue.Queue):
''' Does initial_burst (def. 30) at first, then alternates between waiting waiting_period (def. 5) seconds and doing subsequent_bursts (def. 10) queries. After 5 seconds with nothing left in the queue, resets rate limiting. ''' ''' Does initial_burst (def. 30) at first, then alternates between waiting waiting_period (def. 5) seconds and doing subsequent_bursts (def. 10) queries. After 5 seconds with nothing left in the queue, resets rate limiting. '''
@ -225,9 +294,8 @@ class RateLimitedQueue(gevent.queue.Queue):
self.empty_start = 0 self.empty_start = 0
gevent.queue.Queue.__init__(self) gevent.queue.Queue.__init__(self)
def get(self): def get(self):
self.lock.acquire() # blocks if another greenlet currently has the lock self.lock.acquire() # blocks if another greenlet currently has the lock
if self.count_since_last_wait >= self.subsequent_bursts and self.surpassed_initial: if self.count_since_last_wait >= self.subsequent_bursts and self.surpassed_initial:
gevent.sleep(self.waiting_period) gevent.sleep(self.waiting_period)
self.count_since_last_wait = 0 self.count_since_last_wait = 0
@ -243,7 +311,7 @@ class RateLimitedQueue(gevent.queue.Queue):
self.currently_empty = True self.currently_empty = True
self.empty_start = time.monotonic() self.empty_start = time.monotonic()
item = gevent.queue.Queue.get(self) # blocks when nothing left item = gevent.queue.Queue.get(self) # blocks when nothing left
if self.currently_empty: if self.currently_empty:
if time.monotonic() - self.empty_start >= self.waiting_period: if time.monotonic() - self.empty_start >= self.waiting_period:
@ -257,7 +325,6 @@ class RateLimitedQueue(gevent.queue.Queue):
return item return item
def download_thumbnail(save_directory, video_id): def download_thumbnail(save_directory, video_id):
url = "https://i.ytimg.com/vi/" + video_id + "/mqdefault.jpg" url = "https://i.ytimg.com/vi/" + video_id + "/mqdefault.jpg"
save_location = os.path.join(save_directory, video_id + ".jpg") save_location = os.path.join(save_directory, video_id + ".jpg")
@ -269,26 +336,23 @@ def download_thumbnail(save_directory, video_id):
try: try:
f = open(save_location, 'wb') f = open(save_location, 'wb')
except FileNotFoundError: except FileNotFoundError:
os.makedirs(save_directory, exist_ok = True) os.makedirs(save_directory, exist_ok=True)
f = open(save_location, 'wb') f = open(save_location, 'wb')
f.write(thumbnail) f.write(thumbnail)
f.close() f.close()
return True return True
def download_thumbnails(save_directory, ids): def download_thumbnails(save_directory, ids):
if not isinstance(ids, (list, tuple)): if not isinstance(ids, (list, tuple)):
ids = list(ids) ids = list(ids)
# only do 5 at a time # only do 5 at a time
# do the n where n is divisible by 5 # do the n where n is divisible by 5
i = -1 i = -1
for i in range(0, int(len(ids)/5) - 1 ): for i in range(0, int(len(ids) / 5) - 1):
gevent.joinall([gevent.spawn(download_thumbnail, save_directory, ids[j]) for j in range(i*5, i*5 + 5)]) gevent.joinall([gevent.spawn(download_thumbnail, save_directory, ids[j]) for j in range(i * 5, i * 5 + 5)])
# do the remainders (< 5) # do the remainders (< 5)
gevent.joinall([gevent.spawn(download_thumbnail, save_directory, ids[j]) for j in range(i*5 + 5, len(ids))]) gevent.joinall([gevent.spawn(download_thumbnail, save_directory, ids[j]) for j in range(i * 5 + 5, len(ids))])
def dict_add(*dicts): def dict_add(*dicts):
@ -296,6 +360,7 @@ def dict_add(*dicts):
dicts[0].update(dictionary) dicts[0].update(dictionary)
return dicts[0] return dicts[0]
def video_id(url): def video_id(url):
url_parts = urllib.parse.urlparse(url) url_parts = urllib.parse.urlparse(url)
return urllib.parse.parse_qs(url_parts.query)['v'][0] return urllib.parse.parse_qs(url_parts.query)['v'][0]
@ -304,11 +369,12 @@ def video_id(url):
# default, sddefault, mqdefault, hqdefault, hq720 # default, sddefault, mqdefault, hqdefault, hq720
def get_thumbnail_url(video_id): def get_thumbnail_url(video_id):
return "/i.ytimg.com/vi/" + video_id + "/mqdefault.jpg" return "/i.ytimg.com/vi/" + video_id + "/mqdefault.jpg"
def seconds_to_timestamp(seconds): def seconds_to_timestamp(seconds):
seconds = int(seconds) seconds = int(seconds)
hours, seconds = divmod(seconds,3600) hours, seconds = divmod(seconds, 3600)
minutes, seconds = divmod(seconds,60) minutes, seconds = divmod(seconds, 60)
if hours != 0: if hours != 0:
timestamp = str(hours) + ":" timestamp = str(hours) + ":"
timestamp += str(minutes).zfill(2) # zfill pads with zeros timestamp += str(minutes).zfill(2) # zfill pads with zeros
@ -319,31 +385,32 @@ def seconds_to_timestamp(seconds):
return timestamp return timestamp
def update_query_string(query_string, items): def update_query_string(query_string, items):
parameters = urllib.parse.parse_qs(query_string) parameters = urllib.parse.parse_qs(query_string)
parameters.update(items) parameters.update(items)
return urllib.parse.urlencode(parameters, doseq=True) return urllib.parse.urlencode(parameters, doseq=True)
def uppercase_escape(s): def uppercase_escape(s):
return re.sub( return re.sub(
r'\\U([0-9a-fA-F]{8})', r'\\U([0-9a-fA-F]{8})',
lambda m: chr(int(m.group(1), base=16)), s) lambda m: chr(int(m.group(1), base=16)), s)
def prefix_url(url): def prefix_url(url):
if url is None: if url is None:
return None return None
url = url.lstrip('/') # some urls have // before them, which has a special meaning url = url.lstrip('/') # some urls have // before them, which has a special meaning
return '/' + url return '/' + url
def left_remove(string, substring): def left_remove(string, substring):
'''removes substring from the start of string, if present''' '''removes substring from the start of string, if present'''
if string.startswith(substring): if string.startswith(substring):
return string[len(substring):] return string[len(substring):]
return string return string
def concat_or_none(*strings): def concat_or_none(*strings):
'''Concatenates strings. Returns None if any of the arguments are None''' '''Concatenates strings. Returns None if any of the arguments are None'''
result = '' result = ''
@ -365,6 +432,7 @@ def prefix_urls(item):
except KeyError: except KeyError:
pass pass
def add_extra_html_info(item): def add_extra_html_info(item):
if item['type'] == 'video': if item['type'] == 'video':
item['url'] = (URL_ORIGIN + '/watch?v=' + item['id']) if item.get('id') else None item['url'] = (URL_ORIGIN + '/watch?v=' + item['id']) if item.get('id') else None
@ -383,6 +451,7 @@ def add_extra_html_info(item):
elif item['type'] == 'channel': elif item['type'] == 'channel':
item['url'] = (URL_ORIGIN + "/channel/" + item['id']) if item.get('id') else None item['url'] = (URL_ORIGIN + "/channel/" + item['id']) if item.get('id') else None
def parse_info_prepare_for_html(renderer, additional_info={}): def parse_info_prepare_for_html(renderer, additional_info={}):
item = yt_data_extract.extract_item_info(renderer, additional_info) item = yt_data_extract.extract_item_info(renderer, additional_info)
prefix_urls(item) prefix_urls(item)
@ -390,8 +459,8 @@ def parse_info_prepare_for_html(renderer, additional_info={}):
return item return item
def check_gevent_exceptions(*tasks): def check_gevent_exceptions(*tasks):
for task in tasks: for task in tasks:
if task.exception: if task.exception:
raise task.exception raise task.exception

View File

@ -148,7 +148,7 @@ headers = (
def extract_info(video_id, use_invidious, playlist_id=None, index=None): def extract_info(video_id, use_invidious, playlist_id=None, index=None):
# bpctr=9999999999 will bypass are-you-sure dialogs for controversial # bpctr=9999999999 will bypass are-you-sure dialogs for controversial
# videos # videos
url = 'https://m.youtube.com/watch?v=' + video_id + '&pbj=1&bpctr=9999999999' url = 'https://m.youtube.com/watch?v=' + video_id + '&gl=US&hl=en&has_verified=1&pbj=1&bpctr=9999999999'
if playlist_id: if playlist_id:
url += '&list=' + playlist_id url += '&list=' + playlist_id
if index: if index: