Ditch python-anticaptcha and use requests

2020-10-12 12:20:42 +02:00 · 2020-10-12 12:20:42 +02:00 · 6c5ce51b26
commit 6c5ce51b26
parent c1a6c67fea
1 changed files with 26 additions and 17 deletions
--- a/youtube/util.py
+++ b/youtube/util.py
@ -101,8 +101,6 @@ def decode_content(content, encoding_header):
 def bypass_captcha(session, response, url, cookies):
    print("vvv COOKIES DICT vvv")
    cookies = [{c['name']: c['value']} for c in cookies]
    inputs = {}
    html = BeautifulSoup(str(response.text), "lxml")
@ -125,22 +123,38 @@ def bypass_captcha(session, response, url, cookies):
        site_key = html.body.find('div', attrs={'class': 'g-recaptcha'})['data-sitekey']
        s_value = html.body.find('input', attrs={'name': 'session_token'})['value']
-        # Get anti-captcha API key
+        # Get anti-captcha API key from config
        config = json.load(open('yotter-config.json'))
-        client = AnticaptchaClient(config['anticaptcha'])
+        # Generate anti-captcha request payload
-        # Create anti-captcha Task
+        body = {'clientKey': config['anticaptcha']}
-        task = NoCaptchaTaskProxylessTask(url, site_key)
+        task = {'type': "NoCaptchaTaskProxyless", 'websiteURL': url,
-        job = client.createTask(task)
+                'websiteKey': site_key, "recaptchaDataSValue": s_value}
-        job.join()
+        body['task'] = task
-        inputs['g-recaptcha-response'] = job.get_solution_response()
+        # Create the task.
        response = requests.post("https://api.anti-captcha.com/createTask", json=body).json()
        task_id = response["taskId"]
        print("Task was created: {}. Waiting...".format(task_id))
        # Wait until task is completed
        body = {"clientKey": config['anticaptcha'], "taskId": task_id}
        response = requests.post("https://api.anti-captcha.com/getTaskResult", json=body).json()
        ready = response["status"] == "ready"
        while not ready:
            print(response['status'])
            response = requests.post("https://api.anti-captcha.com/getTaskResult", json=body).json()
            ready = response["status"] == "ready"
        inputs['g-recaptcha-response'] = response['solution']['gRecaptchaResponse']
        print(response['solution'])
        # Print POST request headers
        print(requests.post("https://youtube.com/das_captcha", data=inputs,
                            headers={"Content-Type": "application/x-www-form-urlencoded",
                                     "Accept-Language": "en-US,en;q=0.5",
                                     "User-Agent":'Mozilla/5.0 (Windows NT 10.0; rv:78.0) Gecko/20100101 Firefox/78.0',
                                     "Referer": "https://www.youtube.com/das_captcha",
-                                     "Origin": "https://www.youtube.com"}, cookies=cookies).headers)
+                                     "Origin": "https://www.youtube.com"}, cookies=session.cookies).headers)
 def fetch_url_response(url, headers=(), timeout=15, data=None,
@ -176,11 +190,9 @@ def fetch_url_response(url, headers=(), timeout=15, data=None,
    if cookiejar_send is not None or cookiejar_receive is not None:  # Use urllib
        req = urllib.request.Request(url, data=data, headers=headers)
        cookie_processor = HTTPAsymmetricCookieProcessor(cookiejar_send=cookiejar_send,
                                                         cookiejar_receive=cookiejar_receive)
        opener = urllib.request.build_opener(cookie_processor)
        response = opener.open(req, timeout=timeout)
        cleanup_func = (lambda r: None)
@ -192,17 +204,14 @@ def fetch_url_response(url, headers=(), timeout=15, data=None,
        session = requests.Session()
        print("Starting python GET request to "+url+"...")
-        response = session.get(url, headers={'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:81.0) Gecko/20100101 Firefox/81.0'})
+        response = session.get(url, headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:78.0) Gecko/20100101 Firefox/78.0'})
        # Strings that appear when there's a Captcha.
        string_de = "Fülle das folgende Feld aus, um YouTube weiter zu nutzen."
        string_en = "To continue with your YouTube experience, please fill out the form below."
        # If there's a captcha, bypass it.
        if string_de in response.text or string_en in response.text:
-            # Parse response cookies.
+            bypass_captcha(session, response, url, session.cookies)
            cookies = [{'name': c.name, 'value': c.value, 'domain': c.domain, 'path': c.path} for c in session.cookies]
            print(cookies)
            bypass_captcha(session, response, url, cookies)
            return "Captcha", "Captcha"
        if max_redirects: