From c66afd6485ef06e1d4123bd6cf761c957c976190 Mon Sep 17 00:00:00 2001
From: pluja <github_pluja@r3d.red>
Date: Mon, 12 Oct 2020 08:08:52 +0200
Subject: [PATCH] Solve merge conflict

---
 app/routes.py      |  19 ++++++
 requirements.txt   |   8 +++
 yotter-config.json |   3 +-
 youtube/util.py    | 155 ++++++++++++++++++++++++++++++++-------------
 youtube/watch.py   |   2 +-
 5 files changed, 142 insertions(+), 45 deletions(-)

diff --git a/app/routes.py b/app/routes.py
index 9e26eaf..f7b168f 100644
--- a/app/routes.py
+++ b/app/routes.py
@@ -454,8 +454,27 @@ def get_live_urls(urls):
 def watch():
     id = request.args.get('v', None)
     info = ytwatch.extract_info(id, False, playlist_id=None, index=None)
+<<<<<<< Updated upstream
+<<<<<<< Updated upstream
     # Use nginx
     best_formats = ["22", "18", "34", "35", "36", "37", "38", "43", "44", "45", "46"]
+=======
+=======
+>>>>>>> Stashed changes
+    vsources = ytwatch.get_video_sources(info, False)
+
+    # Retry 3 times if no sources are available.
+    retry = 3
+    while retry != 0 and len(vsources) == 0:
+        vsources = ytwatch.get_video_sources(info, False)
+        retry -= 1
+
+    for source in vsources:
+        hostName = urllib.parse.urlparse(source['src']).netloc
+        source['src'] = source['src'].replace("https://{}".format(hostName), "") + "&host=" + hostName
+
+    # Parse video formats
+>>>>>>> Stashed changes
     for v_format in info['formats']:
         hostName = urllib.parse.urlparse(v_format['url']).netloc
         v_format['url'] = v_format['url'].replace("https://{}".format(hostName), "") + "&host=" + hostName
diff --git a/requirements.txt b/requirements.txt
index d0d7b7e..d4e34b5 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -38,6 +38,14 @@ packaging==20.4
 pylint==2.6.0
 PyMySQL==0.10.1
 pyparsing==2.4.7
+<<<<<<< Updated upstream
+=======
+PySocks==1.7.1
+python-anticaptcha==0.7.1
+<<<<<<< Updated upstream
+>>>>>>> Stashed changes
+=======
+>>>>>>> Stashed changes
 python-dateutil==2.8.1
 python-dotenv==0.14.0
 python-editor==1.0.4
diff --git a/yotter-config.json b/yotter-config.json
index e0ceeab..54425f7 100644
--- a/yotter-config.json
+++ b/yotter-config.json
@@ -11,5 +11,6 @@
 	"admin_message":"Message from the admin text",
 	"admin_user":"admin_username",
 	"max_old_user_days": 60,
-	"donate_url": ""
+	"donate_url": "",
+	"anticaptcha": "cf4bb53a6b87f973be8c0c976c390342"
 }
diff --git a/youtube/util.py b/youtube/util.py
index e3f6c65..4588461 100644
--- a/youtube/util.py
+++ b/youtube/util.py
@@ -1,9 +1,13 @@
 import gzip
 
+import requests
+from bs4 import BeautifulSoup
+
 from youtube import yt_data_extract
 
 try:
     import brotli
+
     have_brotli = True
 except ImportError:
     have_brotli = False
@@ -15,7 +19,7 @@ import json
 import gevent
 import gevent.queue
 import gevent.lock
-
+from python_anticaptcha import AnticaptchaClient, NoCaptchaTaskProxylessTask
 # The trouble with the requests library: It ships its own certificate bundle via certifi
 #  instead of using the system certificate store, meaning self-signed certificates
 #  configured by the user will not work. Some draconian networks block TLS unless a corporate
@@ -51,13 +55,12 @@ import urllib3.contrib.socks
 
 URL_ORIGIN = "/https://www.youtube.com"
 
-connection_pool = urllib3.PoolManager(cert_reqs = 'CERT_REQUIRED')
+connection_pool = urllib3.PoolManager(cert_reqs='CERT_REQUIRED')
 
-def get_pool(use_tor):
-    return connection_pool
 
 class HTTPAsymmetricCookieProcessor(urllib.request.BaseHandler):
     '''Separate cookiejars for receiving and sending'''
+
     def __init__(self, cookiejar_send=None, cookiejar_receive=None):
         self.cookiejar_send = cookiejar_send
         self.cookiejar_receive = cookiejar_receive
@@ -75,6 +78,7 @@ class HTTPAsymmetricCookieProcessor(urllib.request.BaseHandler):
     https_request = http_request
     https_response = http_response
 
+
 class FetchError(Exception):
     def __init__(self, code, reason='', ip=None):
         Exception.__init__(self, 'HTTP error during request: ' + code + ' ' + reason)
@@ -82,6 +86,7 @@ class FetchError(Exception):
         self.reason = reason
         self.ip = ip
 
+
 def decode_content(content, encoding_header):
     encodings = encoding_header.replace(' ', '').split(',')
     for encoding in reversed(encodings):
@@ -93,6 +98,57 @@ def decode_content(content, encoding_header):
             content = gzip.decompress(content)
     return content
 
+
+def bypass_captcha():
+    session = requests.Session()
+    url = "https://youtube.com/watch?v=CvFH_6DNRCY&gl=US&hl=en&has_verified=1&bpctr=9999999999"
+    print("Starting python GET request...")
+    response = session.get(url)
+    print("GET successful!")
+    print("vvv COOKIES DICT vvv")
+    cookies = session.cookies.get_dict()
+    print(cookies)
+
+    inputs = {}
+    html = BeautifulSoup(str(response.text), "lxml")
+
+    # If there's a captcha and we need to solve it...
+    if html.body.find('div', attrs={'class': 'g-recaptcha'}):
+        # Get the captcha form
+        form = html.body.find('form', attrs={"action": "/das_captcha"})
+
+        # Set up form inputs for request
+        for _input in form.find_all('input'):
+            try:
+                print(_input["name"] + "  ->  " + _input["value"])
+                inputs[_input["name"]] = _input["value"]
+            except KeyError:
+                continue
+        print("\n vvv Form inputs created vvv ")
+        print(inputs)
+
+        # Get CAPTCHA keys
+        site_key = html.body.find('div', attrs={'class': 'g-recaptcha'})['data-sitekey']
+        s_value = html.body.find('input', attrs={'name': 'session_token'})['value']
+
+        # Get anti-captcha API key
+        config = json.load(open('yotter-config.json'))
+        client = AnticaptchaClient(config['anticaptcha'])
+        # Create anti-captcha Task
+        task = NoCaptchaTaskProxylessTask(url, site_key)
+        job = client.createTask(task)
+        job.join()
+
+        inputs['g-recaptcha-response'] = job.get_solution_response()
+
+        # Print POST request headers
+        print(requests.post("https://youtube.com/das_captcha", data=inputs,
+                            headers={"Content-Type": "application/x-www-form-urlencoded",
+                                     "Accept-Language": "en-US,en;q=0.5",
+                                     "Referer": "https://www.youtube.com/das_captcha",
+                                     "Origin": "https://www.youtube.com"}).headers)
+
+
 def fetch_url_response(url, headers=(), timeout=15, data=None,
                        cookiejar_send=None, cookiejar_receive=None,
                        use_tor=True, max_redirects=None):
@@ -105,7 +161,7 @@ def fetch_url_response(url, headers=(), timeout=15, data=None,
     When both are set to the same object, cookies will be sent from the object,
      and response cookies will be merged into it.
     '''
-    headers = dict(headers)     # Note: Calling dict() on a dict will make a copy
+    headers = dict(headers)  # Note: Calling dict() on a dict will make a copy
     if have_brotli:
         headers['Accept-Encoding'] = 'gzip, br'
     else:
@@ -124,32 +180,46 @@ def fetch_url_response(url, headers=(), timeout=15, data=None,
         elif not isinstance(data, bytes):
             data = urllib.parse.urlencode(data).encode('ascii')
 
-    if cookiejar_send is not None or cookiejar_receive is not None:     # Use urllib
+    if cookiejar_send is not None or cookiejar_receive is not None:  # Use urllib
         req = urllib.request.Request(url, data=data, headers=headers)
 
-        cookie_processor = HTTPAsymmetricCookieProcessor(cookiejar_send=cookiejar_send, cookiejar_receive=cookiejar_receive)
+        cookie_processor = HTTPAsymmetricCookieProcessor(cookiejar_send=cookiejar_send,
+                                                         cookiejar_receive=cookiejar_receive)
         opener = urllib.request.build_opener(cookie_processor)
 
         response = opener.open(req, timeout=timeout)
         cleanup_func = (lambda r: None)
 
-    else:           # Use a urllib3 pool. Cookies can't be used since urllib3 doesn't have easy support for them.
+    else:  # Use a urllib3 pool. Cookies can't be used since urllib3 doesn't have easy support for them.
         # default: Retry.DEFAULT = Retry(3)
         # (in connectionpool.py in urllib3)
         # According to the documentation for urlopen, a redirect counts as a
         # retry. So there are 3 redirects max by default.
+        print("Testing for CAPTCHA python GET request...")
+        r = requests.get(url)
+        print("GET successful!")
+
+        html = BeautifulSoup(str(r.text), "lxml")
+        # If there's a captcha and we need to solve it...
+        if html.body.find('div', attrs={'class': 'g-recaptcha'}):
+            print("ReCaptcha detected! Trying to bypass it.")
+            bypass_captcha()
+
         if max_redirects:
-            retries = urllib3.Retry(3+max_redirects, redirect=max_redirects)
+            retries = urllib3.Retry(3 + max_redirects, redirect=max_redirects)
         else:
             retries = urllib3.Retry(3)
-        pool = get_pool(use_tor)
+
+        pool = connection_pool
         response = pool.request(method, url, headers=headers,
                                 timeout=timeout, preload_content=False,
                                 decode_content=False, retries=retries)
+
         cleanup_func = (lambda r: r.release_conn())
 
     return response, cleanup_func
 
+
 def fetch_url(url, headers=(), timeout=15, report_text=None, data=None,
               cookiejar_send=None, cookiejar_receive=None, use_tor=True,
               debug_name=None):
@@ -159,18 +229,18 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None,
         url, headers, timeout=timeout,
         cookiejar_send=cookiejar_send, cookiejar_receive=cookiejar_receive,
         use_tor=use_tor)
+    print(response)
     response_time = time.time()
 
     content = response.read()
     read_finish = time.time()
-
     cleanup_func(response)  # release_connection for urllib3
 
     if (response.status == 429
             and content.startswith(b'<!DOCTYPE')
             and b'Our systems have detected unusual traffic' in content):
         ip = re.search(br'IP address: ((?:[\da-f]*:)+[\da-f]+|(?:\d+\.)+\d+)',
-            content)
+                       content)
         ip = ip.group(1).decode('ascii') if ip else None
         raise FetchError('429', reason=response.reason, ip=ip)
 
@@ -178,12 +248,14 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None,
         raise FetchError(str(response.status), reason=response.reason, ip=None)
 
     if report_text:
-        print(report_text, '    Latency:', round(response_time - start_time,3), '    Read time:', round(read_finish - response_time,3))
+        print(report_text, '    Latency:', round(response_time - start_time, 3), '    Read time:',
+              round(read_finish - response_time, 3))
     content = decode_content(content, response.getheader('Content-Encoding', default='identity'))
     return content
 
+
 def head(url, use_tor=False, report_text=None, max_redirects=10):
-    pool = get_pool(use_tor)
+    pool = connection_pool
     start_time = time.time()
 
     # default: Retry.DEFAULT = Retry(3)
@@ -191,24 +263,21 @@ def head(url, use_tor=False, report_text=None, max_redirects=10):
     # According to the documentation for urlopen, a redirect counts as a retry
     # So there are 3 redirects max by default. Let's change that
     # to 10 since googlevideo redirects a lot.
-    retries = urllib3.Retry(3+max_redirects, redirect=max_redirects,
-        raise_on_redirect=False)
+    retries = urllib3.Retry(3 + max_redirects, redirect=max_redirects,
+                            raise_on_redirect=False)
     headers = {'User-Agent': 'Python-urllib'}
     response = pool.request('HEAD', url, headers=headers, retries=retries)
     if report_text:
-        print(report_text, '    Latency:', round(time.time() - start_time,3))
+        print(report_text, '    Latency:', round(time.time() - start_time, 3))
     return response
 
+
 mobile_user_agent = 'Mozilla/5.0 (Linux; Android 7.0; Redmi Note 4 Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Mobile Safari/537.36'
 mobile_ua = (('User-Agent', mobile_user_agent),)
 desktop_user_agent = 'Mozilla/5.0 (Windows NT 6.1; rv:52.0) Gecko/20100101 Firefox/52.0'
 desktop_ua = (('User-Agent', desktop_user_agent),)
 
 
-
-
-
-
 class RateLimitedQueue(gevent.queue.Queue):
     ''' Does initial_burst (def. 30) at first, then alternates between waiting waiting_period (def. 5) seconds and doing subsequent_bursts (def. 10) queries. After 5 seconds with nothing left in the queue, resets rate limiting. '''
 
@@ -225,9 +294,8 @@ class RateLimitedQueue(gevent.queue.Queue):
         self.empty_start = 0
         gevent.queue.Queue.__init__(self)
 
-
     def get(self):
-        self.lock.acquire()     # blocks if another greenlet currently has the lock
+        self.lock.acquire()  # blocks if another greenlet currently has the lock
         if self.count_since_last_wait >= self.subsequent_bursts and self.surpassed_initial:
             gevent.sleep(self.waiting_period)
             self.count_since_last_wait = 0
@@ -243,7 +311,7 @@ class RateLimitedQueue(gevent.queue.Queue):
             self.currently_empty = True
             self.empty_start = time.monotonic()
 
-        item = gevent.queue.Queue.get(self)     # blocks when nothing left
+        item = gevent.queue.Queue.get(self)  # blocks when nothing left
 
         if self.currently_empty:
             if time.monotonic() - self.empty_start >= self.waiting_period:
@@ -257,7 +325,6 @@ class RateLimitedQueue(gevent.queue.Queue):
         return item
 
 
-
 def download_thumbnail(save_directory, video_id):
     url = "https://i.ytimg.com/vi/" + video_id + "/mqdefault.jpg"
     save_location = os.path.join(save_directory, video_id + ".jpg")
@@ -269,26 +336,23 @@ def download_thumbnail(save_directory, video_id):
     try:
         f = open(save_location, 'wb')
     except FileNotFoundError:
-        os.makedirs(save_directory, exist_ok = True)
+        os.makedirs(save_directory, exist_ok=True)
         f = open(save_location, 'wb')
     f.write(thumbnail)
     f.close()
     return True
 
+
 def download_thumbnails(save_directory, ids):
     if not isinstance(ids, (list, tuple)):
         ids = list(ids)
     # only do 5 at a time
     # do the n where n is divisible by 5
     i = -1
-    for i in range(0, int(len(ids)/5) - 1 ):
-        gevent.joinall([gevent.spawn(download_thumbnail, save_directory, ids[j]) for j in range(i*5, i*5 + 5)])
+    for i in range(0, int(len(ids) / 5) - 1):
+        gevent.joinall([gevent.spawn(download_thumbnail, save_directory, ids[j]) for j in range(i * 5, i * 5 + 5)])
     # do the remainders (< 5)
-    gevent.joinall([gevent.spawn(download_thumbnail, save_directory, ids[j]) for j in range(i*5 + 5, len(ids))])
-
-
-
-
+    gevent.joinall([gevent.spawn(download_thumbnail, save_directory, ids[j]) for j in range(i * 5 + 5, len(ids))])
 
 
 def dict_add(*dicts):
@@ -296,6 +360,7 @@ def dict_add(*dicts):
         dicts[0].update(dictionary)
     return dicts[0]
 
+
 def video_id(url):
     url_parts = urllib.parse.urlparse(url)
     return urllib.parse.parse_qs(url_parts.query)['v'][0]
@@ -304,11 +369,12 @@ def video_id(url):
 # default, sddefault, mqdefault, hqdefault, hq720
 def get_thumbnail_url(video_id):
     return "/i.ytimg.com/vi/" + video_id + "/mqdefault.jpg"
-    
+
+
 def seconds_to_timestamp(seconds):
     seconds = int(seconds)
-    hours, seconds = divmod(seconds,3600)
-    minutes, seconds = divmod(seconds,60)
+    hours, seconds = divmod(seconds, 3600)
+    minutes, seconds = divmod(seconds, 60)
     if hours != 0:
         timestamp = str(hours) + ":"
         timestamp += str(minutes).zfill(2)  # zfill pads with zeros
@@ -319,31 +385,32 @@ def seconds_to_timestamp(seconds):
     return timestamp
 
 
-
 def update_query_string(query_string, items):
     parameters = urllib.parse.parse_qs(query_string)
     parameters.update(items)
     return urllib.parse.urlencode(parameters, doseq=True)
 
 
-
 def uppercase_escape(s):
-     return re.sub(
-         r'\\U([0-9a-fA-F]{8})',
-         lambda m: chr(int(m.group(1), base=16)), s)
+    return re.sub(
+        r'\\U([0-9a-fA-F]{8})',
+        lambda m: chr(int(m.group(1), base=16)), s)
+
 
 def prefix_url(url):
     if url is None:
         return None
-    url = url.lstrip('/')     # some urls have // before them, which has a special meaning
+    url = url.lstrip('/')  # some urls have // before them, which has a special meaning
     return '/' + url
 
+
 def left_remove(string, substring):
     '''removes substring from the start of string, if present'''
     if string.startswith(substring):
         return string[len(substring):]
     return string
 
+
 def concat_or_none(*strings):
     '''Concatenates strings. Returns None if any of the arguments are None'''
     result = ''
@@ -365,6 +432,7 @@ def prefix_urls(item):
     except KeyError:
         pass
 
+
 def add_extra_html_info(item):
     if item['type'] == 'video':
         item['url'] = (URL_ORIGIN + '/watch?v=' + item['id']) if item.get('id') else None
@@ -383,6 +451,7 @@ def add_extra_html_info(item):
     elif item['type'] == 'channel':
         item['url'] = (URL_ORIGIN + "/channel/" + item['id']) if item.get('id') else None
 
+
 def parse_info_prepare_for_html(renderer, additional_info={}):
     item = yt_data_extract.extract_item_info(renderer, additional_info)
     prefix_urls(item)
@@ -390,8 +459,8 @@ def parse_info_prepare_for_html(renderer, additional_info={}):
 
     return item
 
+
 def check_gevent_exceptions(*tasks):
     for task in tasks:
         if task.exception:
             raise task.exception
-
diff --git a/youtube/watch.py b/youtube/watch.py
index 51b220d..e5f54e8 100644
--- a/youtube/watch.py
+++ b/youtube/watch.py
@@ -148,7 +148,7 @@ headers = (
 def extract_info(video_id, use_invidious, playlist_id=None, index=None):
     # bpctr=9999999999 will bypass are-you-sure dialogs for controversial
     # videos
-    url = 'https://m.youtube.com/watch?v=' + video_id + '&pbj=1&bpctr=9999999999'
+    url = 'https://m.youtube.com/watch?v=' + video_id + '&gl=US&hl=en&has_verified=1&pbj=1&bpctr=9999999999'
     if playlist_id:
         url += '&list=' + playlist_id
     if index: