Solve merge conflict

This commit is contained in:
pluja 2020-10-12 08:08:52 +02:00
parent 8efae82302
commit c66afd6485
5 changed files with 142 additions and 45 deletions

View File

@ -454,8 +454,27 @@ def get_live_urls(urls):
def watch(): def watch():
id = request.args.get('v', None) id = request.args.get('v', None)
info = ytwatch.extract_info(id, False, playlist_id=None, index=None) info = ytwatch.extract_info(id, False, playlist_id=None, index=None)
<<<<<<< Updated upstream
<<<<<<< Updated upstream
# Use nginx # Use nginx
best_formats = ["22", "18", "34", "35", "36", "37", "38", "43", "44", "45", "46"] best_formats = ["22", "18", "34", "35", "36", "37", "38", "43", "44", "45", "46"]
=======
=======
>>>>>>> Stashed changes
vsources = ytwatch.get_video_sources(info, False)
# Retry 3 times if no sources are available.
retry = 3
while retry != 0 and len(vsources) == 0:
vsources = ytwatch.get_video_sources(info, False)
retry -= 1
for source in vsources:
hostName = urllib.parse.urlparse(source['src']).netloc
source['src'] = source['src'].replace("https://{}".format(hostName), "") + "&host=" + hostName
# Parse video formats
>>>>>>> Stashed changes
for v_format in info['formats']: for v_format in info['formats']:
hostName = urllib.parse.urlparse(v_format['url']).netloc hostName = urllib.parse.urlparse(v_format['url']).netloc
v_format['url'] = v_format['url'].replace("https://{}".format(hostName), "") + "&host=" + hostName v_format['url'] = v_format['url'].replace("https://{}".format(hostName), "") + "&host=" + hostName

View File

@ -38,6 +38,14 @@ packaging==20.4
pylint==2.6.0 pylint==2.6.0
PyMySQL==0.10.1 PyMySQL==0.10.1
pyparsing==2.4.7 pyparsing==2.4.7
<<<<<<< Updated upstream
=======
PySocks==1.7.1
python-anticaptcha==0.7.1
<<<<<<< Updated upstream
>>>>>>> Stashed changes
=======
>>>>>>> Stashed changes
python-dateutil==2.8.1 python-dateutil==2.8.1
python-dotenv==0.14.0 python-dotenv==0.14.0
python-editor==1.0.4 python-editor==1.0.4

View File

@ -11,5 +11,6 @@
"admin_message":"Message from the admin text", "admin_message":"Message from the admin text",
"admin_user":"admin_username", "admin_user":"admin_username",
"max_old_user_days": 60, "max_old_user_days": 60,
"donate_url": "" "donate_url": "",
"anticaptcha": "cf4bb53a6b87f973be8c0c976c390342"
} }

View File

@ -1,9 +1,13 @@
import gzip import gzip
import requests
from bs4 import BeautifulSoup
from youtube import yt_data_extract from youtube import yt_data_extract
try: try:
import brotli import brotli
have_brotli = True have_brotli = True
except ImportError: except ImportError:
have_brotli = False have_brotli = False
@ -15,7 +19,7 @@ import json
import gevent import gevent
import gevent.queue import gevent.queue
import gevent.lock import gevent.lock
from python_anticaptcha import AnticaptchaClient, NoCaptchaTaskProxylessTask
# The trouble with the requests library: It ships its own certificate bundle via certifi # The trouble with the requests library: It ships its own certificate bundle via certifi
# instead of using the system certificate store, meaning self-signed certificates # instead of using the system certificate store, meaning self-signed certificates
# configured by the user will not work. Some draconian networks block TLS unless a corporate # configured by the user will not work. Some draconian networks block TLS unless a corporate
@ -51,13 +55,12 @@ import urllib3.contrib.socks
URL_ORIGIN = "/https://www.youtube.com" URL_ORIGIN = "/https://www.youtube.com"
connection_pool = urllib3.PoolManager(cert_reqs = 'CERT_REQUIRED') connection_pool = urllib3.PoolManager(cert_reqs='CERT_REQUIRED')
def get_pool(use_tor):
return connection_pool
class HTTPAsymmetricCookieProcessor(urllib.request.BaseHandler): class HTTPAsymmetricCookieProcessor(urllib.request.BaseHandler):
'''Separate cookiejars for receiving and sending''' '''Separate cookiejars for receiving and sending'''
def __init__(self, cookiejar_send=None, cookiejar_receive=None): def __init__(self, cookiejar_send=None, cookiejar_receive=None):
self.cookiejar_send = cookiejar_send self.cookiejar_send = cookiejar_send
self.cookiejar_receive = cookiejar_receive self.cookiejar_receive = cookiejar_receive
@ -75,6 +78,7 @@ class HTTPAsymmetricCookieProcessor(urllib.request.BaseHandler):
https_request = http_request https_request = http_request
https_response = http_response https_response = http_response
class FetchError(Exception): class FetchError(Exception):
def __init__(self, code, reason='', ip=None): def __init__(self, code, reason='', ip=None):
Exception.__init__(self, 'HTTP error during request: ' + code + ' ' + reason) Exception.__init__(self, 'HTTP error during request: ' + code + ' ' + reason)
@ -82,6 +86,7 @@ class FetchError(Exception):
self.reason = reason self.reason = reason
self.ip = ip self.ip = ip
def decode_content(content, encoding_header): def decode_content(content, encoding_header):
encodings = encoding_header.replace(' ', '').split(',') encodings = encoding_header.replace(' ', '').split(',')
for encoding in reversed(encodings): for encoding in reversed(encodings):
@ -93,6 +98,57 @@ def decode_content(content, encoding_header):
content = gzip.decompress(content) content = gzip.decompress(content)
return content return content
def bypass_captcha():
session = requests.Session()
url = "https://youtube.com/watch?v=CvFH_6DNRCY&gl=US&hl=en&has_verified=1&bpctr=9999999999"
print("Starting python GET request...")
response = session.get(url)
print("GET successful!")
print("vvv COOKIES DICT vvv")
cookies = session.cookies.get_dict()
print(cookies)
inputs = {}
html = BeautifulSoup(str(response.text), "lxml")
# If there's a captcha and we need to solve it...
if html.body.find('div', attrs={'class': 'g-recaptcha'}):
# Get the captcha form
form = html.body.find('form', attrs={"action": "/das_captcha"})
# Set up form inputs for request
for _input in form.find_all('input'):
try:
print(_input["name"] + " -> " + _input["value"])
inputs[_input["name"]] = _input["value"]
except KeyError:
continue
print("\n vvv Form inputs created vvv ")
print(inputs)
# Get CAPTCHA keys
site_key = html.body.find('div', attrs={'class': 'g-recaptcha'})['data-sitekey']
s_value = html.body.find('input', attrs={'name': 'session_token'})['value']
# Get anti-captcha API key
config = json.load(open('yotter-config.json'))
client = AnticaptchaClient(config['anticaptcha'])
# Create anti-captcha Task
task = NoCaptchaTaskProxylessTask(url, site_key)
job = client.createTask(task)
job.join()
inputs['g-recaptcha-response'] = job.get_solution_response()
# Print POST request headers
print(requests.post("https://youtube.com/das_captcha", data=inputs,
headers={"Content-Type": "application/x-www-form-urlencoded",
"Accept-Language": "en-US,en;q=0.5",
"Referer": "https://www.youtube.com/das_captcha",
"Origin": "https://www.youtube.com"}).headers)
def fetch_url_response(url, headers=(), timeout=15, data=None, def fetch_url_response(url, headers=(), timeout=15, data=None,
cookiejar_send=None, cookiejar_receive=None, cookiejar_send=None, cookiejar_receive=None,
use_tor=True, max_redirects=None): use_tor=True, max_redirects=None):
@ -127,7 +183,8 @@ def fetch_url_response(url, headers=(), timeout=15, data=None,
if cookiejar_send is not None or cookiejar_receive is not None: # Use urllib if cookiejar_send is not None or cookiejar_receive is not None: # Use urllib
req = urllib.request.Request(url, data=data, headers=headers) req = urllib.request.Request(url, data=data, headers=headers)
cookie_processor = HTTPAsymmetricCookieProcessor(cookiejar_send=cookiejar_send, cookiejar_receive=cookiejar_receive) cookie_processor = HTTPAsymmetricCookieProcessor(cookiejar_send=cookiejar_send,
cookiejar_receive=cookiejar_receive)
opener = urllib.request.build_opener(cookie_processor) opener = urllib.request.build_opener(cookie_processor)
response = opener.open(req, timeout=timeout) response = opener.open(req, timeout=timeout)
@ -138,18 +195,31 @@ def fetch_url_response(url, headers=(), timeout=15, data=None,
# (in connectionpool.py in urllib3) # (in connectionpool.py in urllib3)
# According to the documentation for urlopen, a redirect counts as a # According to the documentation for urlopen, a redirect counts as a
# retry. So there are 3 redirects max by default. # retry. So there are 3 redirects max by default.
print("Testing for CAPTCHA python GET request...")
r = requests.get(url)
print("GET successful!")
html = BeautifulSoup(str(r.text), "lxml")
# If there's a captcha and we need to solve it...
if html.body.find('div', attrs={'class': 'g-recaptcha'}):
print("ReCaptcha detected! Trying to bypass it.")
bypass_captcha()
if max_redirects: if max_redirects:
retries = urllib3.Retry(3+max_redirects, redirect=max_redirects) retries = urllib3.Retry(3 + max_redirects, redirect=max_redirects)
else: else:
retries = urllib3.Retry(3) retries = urllib3.Retry(3)
pool = get_pool(use_tor)
pool = connection_pool
response = pool.request(method, url, headers=headers, response = pool.request(method, url, headers=headers,
timeout=timeout, preload_content=False, timeout=timeout, preload_content=False,
decode_content=False, retries=retries) decode_content=False, retries=retries)
cleanup_func = (lambda r: r.release_conn()) cleanup_func = (lambda r: r.release_conn())
return response, cleanup_func return response, cleanup_func
def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, def fetch_url(url, headers=(), timeout=15, report_text=None, data=None,
cookiejar_send=None, cookiejar_receive=None, use_tor=True, cookiejar_send=None, cookiejar_receive=None, use_tor=True,
debug_name=None): debug_name=None):
@ -159,11 +229,11 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None,
url, headers, timeout=timeout, url, headers, timeout=timeout,
cookiejar_send=cookiejar_send, cookiejar_receive=cookiejar_receive, cookiejar_send=cookiejar_send, cookiejar_receive=cookiejar_receive,
use_tor=use_tor) use_tor=use_tor)
print(response)
response_time = time.time() response_time = time.time()
content = response.read() content = response.read()
read_finish = time.time() read_finish = time.time()
cleanup_func(response) # release_connection for urllib3 cleanup_func(response) # release_connection for urllib3
if (response.status == 429 if (response.status == 429
@ -178,12 +248,14 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None,
raise FetchError(str(response.status), reason=response.reason, ip=None) raise FetchError(str(response.status), reason=response.reason, ip=None)
if report_text: if report_text:
print(report_text, ' Latency:', round(response_time - start_time,3), ' Read time:', round(read_finish - response_time,3)) print(report_text, ' Latency:', round(response_time - start_time, 3), ' Read time:',
round(read_finish - response_time, 3))
content = decode_content(content, response.getheader('Content-Encoding', default='identity')) content = decode_content(content, response.getheader('Content-Encoding', default='identity'))
return content return content
def head(url, use_tor=False, report_text=None, max_redirects=10): def head(url, use_tor=False, report_text=None, max_redirects=10):
pool = get_pool(use_tor) pool = connection_pool
start_time = time.time() start_time = time.time()
# default: Retry.DEFAULT = Retry(3) # default: Retry.DEFAULT = Retry(3)
@ -191,24 +263,21 @@ def head(url, use_tor=False, report_text=None, max_redirects=10):
# According to the documentation for urlopen, a redirect counts as a retry # According to the documentation for urlopen, a redirect counts as a retry
# So there are 3 redirects max by default. Let's change that # So there are 3 redirects max by default. Let's change that
# to 10 since googlevideo redirects a lot. # to 10 since googlevideo redirects a lot.
retries = urllib3.Retry(3+max_redirects, redirect=max_redirects, retries = urllib3.Retry(3 + max_redirects, redirect=max_redirects,
raise_on_redirect=False) raise_on_redirect=False)
headers = {'User-Agent': 'Python-urllib'} headers = {'User-Agent': 'Python-urllib'}
response = pool.request('HEAD', url, headers=headers, retries=retries) response = pool.request('HEAD', url, headers=headers, retries=retries)
if report_text: if report_text:
print(report_text, ' Latency:', round(time.time() - start_time,3)) print(report_text, ' Latency:', round(time.time() - start_time, 3))
return response return response
mobile_user_agent = 'Mozilla/5.0 (Linux; Android 7.0; Redmi Note 4 Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Mobile Safari/537.36' mobile_user_agent = 'Mozilla/5.0 (Linux; Android 7.0; Redmi Note 4 Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Mobile Safari/537.36'
mobile_ua = (('User-Agent', mobile_user_agent),) mobile_ua = (('User-Agent', mobile_user_agent),)
desktop_user_agent = 'Mozilla/5.0 (Windows NT 6.1; rv:52.0) Gecko/20100101 Firefox/52.0' desktop_user_agent = 'Mozilla/5.0 (Windows NT 6.1; rv:52.0) Gecko/20100101 Firefox/52.0'
desktop_ua = (('User-Agent', desktop_user_agent),) desktop_ua = (('User-Agent', desktop_user_agent),)
class RateLimitedQueue(gevent.queue.Queue): class RateLimitedQueue(gevent.queue.Queue):
''' Does initial_burst (def. 30) at first, then alternates between waiting waiting_period (def. 5) seconds and doing subsequent_bursts (def. 10) queries. After 5 seconds with nothing left in the queue, resets rate limiting. ''' ''' Does initial_burst (def. 30) at first, then alternates between waiting waiting_period (def. 5) seconds and doing subsequent_bursts (def. 10) queries. After 5 seconds with nothing left in the queue, resets rate limiting. '''
@ -225,7 +294,6 @@ class RateLimitedQueue(gevent.queue.Queue):
self.empty_start = 0 self.empty_start = 0
gevent.queue.Queue.__init__(self) gevent.queue.Queue.__init__(self)
def get(self): def get(self):
self.lock.acquire() # blocks if another greenlet currently has the lock self.lock.acquire() # blocks if another greenlet currently has the lock
if self.count_since_last_wait >= self.subsequent_bursts and self.surpassed_initial: if self.count_since_last_wait >= self.subsequent_bursts and self.surpassed_initial:
@ -257,7 +325,6 @@ class RateLimitedQueue(gevent.queue.Queue):
return item return item
def download_thumbnail(save_directory, video_id): def download_thumbnail(save_directory, video_id):
url = "https://i.ytimg.com/vi/" + video_id + "/mqdefault.jpg" url = "https://i.ytimg.com/vi/" + video_id + "/mqdefault.jpg"
save_location = os.path.join(save_directory, video_id + ".jpg") save_location = os.path.join(save_directory, video_id + ".jpg")
@ -269,26 +336,23 @@ def download_thumbnail(save_directory, video_id):
try: try:
f = open(save_location, 'wb') f = open(save_location, 'wb')
except FileNotFoundError: except FileNotFoundError:
os.makedirs(save_directory, exist_ok = True) os.makedirs(save_directory, exist_ok=True)
f = open(save_location, 'wb') f = open(save_location, 'wb')
f.write(thumbnail) f.write(thumbnail)
f.close() f.close()
return True return True
def download_thumbnails(save_directory, ids): def download_thumbnails(save_directory, ids):
if not isinstance(ids, (list, tuple)): if not isinstance(ids, (list, tuple)):
ids = list(ids) ids = list(ids)
# only do 5 at a time # only do 5 at a time
# do the n where n is divisible by 5 # do the n where n is divisible by 5
i = -1 i = -1
for i in range(0, int(len(ids)/5) - 1 ): for i in range(0, int(len(ids) / 5) - 1):
gevent.joinall([gevent.spawn(download_thumbnail, save_directory, ids[j]) for j in range(i*5, i*5 + 5)]) gevent.joinall([gevent.spawn(download_thumbnail, save_directory, ids[j]) for j in range(i * 5, i * 5 + 5)])
# do the remainders (< 5) # do the remainders (< 5)
gevent.joinall([gevent.spawn(download_thumbnail, save_directory, ids[j]) for j in range(i*5 + 5, len(ids))]) gevent.joinall([gevent.spawn(download_thumbnail, save_directory, ids[j]) for j in range(i * 5 + 5, len(ids))])
def dict_add(*dicts): def dict_add(*dicts):
@ -296,6 +360,7 @@ def dict_add(*dicts):
dicts[0].update(dictionary) dicts[0].update(dictionary)
return dicts[0] return dicts[0]
def video_id(url): def video_id(url):
url_parts = urllib.parse.urlparse(url) url_parts = urllib.parse.urlparse(url)
return urllib.parse.parse_qs(url_parts.query)['v'][0] return urllib.parse.parse_qs(url_parts.query)['v'][0]
@ -305,10 +370,11 @@ def video_id(url):
def get_thumbnail_url(video_id): def get_thumbnail_url(video_id):
return "/i.ytimg.com/vi/" + video_id + "/mqdefault.jpg" return "/i.ytimg.com/vi/" + video_id + "/mqdefault.jpg"
def seconds_to_timestamp(seconds): def seconds_to_timestamp(seconds):
seconds = int(seconds) seconds = int(seconds)
hours, seconds = divmod(seconds,3600) hours, seconds = divmod(seconds, 3600)
minutes, seconds = divmod(seconds,60) minutes, seconds = divmod(seconds, 60)
if hours != 0: if hours != 0:
timestamp = str(hours) + ":" timestamp = str(hours) + ":"
timestamp += str(minutes).zfill(2) # zfill pads with zeros timestamp += str(minutes).zfill(2) # zfill pads with zeros
@ -319,31 +385,32 @@ def seconds_to_timestamp(seconds):
return timestamp return timestamp
def update_query_string(query_string, items): def update_query_string(query_string, items):
parameters = urllib.parse.parse_qs(query_string) parameters = urllib.parse.parse_qs(query_string)
parameters.update(items) parameters.update(items)
return urllib.parse.urlencode(parameters, doseq=True) return urllib.parse.urlencode(parameters, doseq=True)
def uppercase_escape(s): def uppercase_escape(s):
return re.sub( return re.sub(
r'\\U([0-9a-fA-F]{8})', r'\\U([0-9a-fA-F]{8})',
lambda m: chr(int(m.group(1), base=16)), s) lambda m: chr(int(m.group(1), base=16)), s)
def prefix_url(url): def prefix_url(url):
if url is None: if url is None:
return None return None
url = url.lstrip('/') # some urls have // before them, which has a special meaning url = url.lstrip('/') # some urls have // before them, which has a special meaning
return '/' + url return '/' + url
def left_remove(string, substring): def left_remove(string, substring):
'''removes substring from the start of string, if present''' '''removes substring from the start of string, if present'''
if string.startswith(substring): if string.startswith(substring):
return string[len(substring):] return string[len(substring):]
return string return string
def concat_or_none(*strings): def concat_or_none(*strings):
'''Concatenates strings. Returns None if any of the arguments are None''' '''Concatenates strings. Returns None if any of the arguments are None'''
result = '' result = ''
@ -365,6 +432,7 @@ def prefix_urls(item):
except KeyError: except KeyError:
pass pass
def add_extra_html_info(item): def add_extra_html_info(item):
if item['type'] == 'video': if item['type'] == 'video':
item['url'] = (URL_ORIGIN + '/watch?v=' + item['id']) if item.get('id') else None item['url'] = (URL_ORIGIN + '/watch?v=' + item['id']) if item.get('id') else None
@ -383,6 +451,7 @@ def add_extra_html_info(item):
elif item['type'] == 'channel': elif item['type'] == 'channel':
item['url'] = (URL_ORIGIN + "/channel/" + item['id']) if item.get('id') else None item['url'] = (URL_ORIGIN + "/channel/" + item['id']) if item.get('id') else None
def parse_info_prepare_for_html(renderer, additional_info={}): def parse_info_prepare_for_html(renderer, additional_info={}):
item = yt_data_extract.extract_item_info(renderer, additional_info) item = yt_data_extract.extract_item_info(renderer, additional_info)
prefix_urls(item) prefix_urls(item)
@ -390,8 +459,8 @@ def parse_info_prepare_for_html(renderer, additional_info={}):
return item return item
def check_gevent_exceptions(*tasks): def check_gevent_exceptions(*tasks):
for task in tasks: for task in tasks:
if task.exception: if task.exception:
raise task.exception raise task.exception

View File

@ -148,7 +148,7 @@ headers = (
def extract_info(video_id, use_invidious, playlist_id=None, index=None): def extract_info(video_id, use_invidious, playlist_id=None, index=None):
# bpctr=9999999999 will bypass are-you-sure dialogs for controversial # bpctr=9999999999 will bypass are-you-sure dialogs for controversial
# videos # videos
url = 'https://m.youtube.com/watch?v=' + video_id + '&pbj=1&bpctr=9999999999' url = 'https://m.youtube.com/watch?v=' + video_id + '&gl=US&hl=en&has_verified=1&pbj=1&bpctr=9999999999'
if playlist_id: if playlist_id:
url += '&list=' + playlist_id url += '&list=' + playlist_id
if index: if index: