Cookie management

This commit is contained in:
pluja 2020-10-12 09:50:41 +02:00
parent e028ee929c
commit 438374890d
2 changed files with 11 additions and 10 deletions

View File

@ -8,8 +8,7 @@
<div class="header"> <div class="header">
Ahh... Here we go again! Ahh... Here we go again!
</div> </div>
<p>Google is asking to solve a Captcha. As we don't want you to do it, we'll do it for you.</p> <p>Google is asking to solve a Captcha. As we don't want you to do it, we'll do it for you. <b> Please, try again in a few seconds.</b></p>
<p>Try again in a few seconds.</p>
<a href="{{origin}}"> Click here to reload </a> <a href="{{origin}}"> Click here to reload </a>
</div> </div>
</div> </div>

View File

@ -99,11 +99,10 @@ def decode_content(content, encoding_header):
return content return content
def bypass_captcha(session, response, url): def bypass_captcha(session, response, url, cookies):
print("vvv COOKIES DICT vvv") print("vvv COOKIES DICT vvv")
cookies = session.cookies.get_dict()
print(cookies)
cookies = [{c.name: c.value} for c in cookies]
inputs = {} inputs = {}
html = BeautifulSoup(str(response.text), "lxml") html = BeautifulSoup(str(response.text), "lxml")
@ -141,7 +140,7 @@ def bypass_captcha(session, response, url):
headers={"Content-Type": "application/x-www-form-urlencoded", headers={"Content-Type": "application/x-www-form-urlencoded",
"Accept-Language": "en-US,en;q=0.5", "Accept-Language": "en-US,en;q=0.5",
"Referer": "https://www.youtube.com/das_captcha", "Referer": "https://www.youtube.com/das_captcha",
"Origin": "https://www.youtube.com"}).headers) "Origin": "https://www.youtube.com"}, cookies=cookies).headers)
def fetch_url_response(url, headers=(), timeout=15, data=None, def fetch_url_response(url, headers=(), timeout=15, data=None,
@ -192,14 +191,18 @@ def fetch_url_response(url, headers=(), timeout=15, data=None,
# retry. So there are 3 redirects max by default. # retry. So there are 3 redirects max by default.
session = requests.Session() session = requests.Session()
print("Starting python GET request...") print("Starting python GET request to "+url+"...")
response = session.get(url) response = session.get(url, headers={'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:81.0) Gecko/20100101 Firefox/81.0'})
# Strings that appear when there's a Captcha. # Strings that appear when there's a Captcha.
string_de = "Fülle das folgende Feld aus, um YouTube weiter zu nutzen." string_de = "Fülle das folgende Feld aus, um YouTube weiter zu nutzen."
string_en = "To continue with your YouTube experience, please fill out the form below." string_en = "To continue with your YouTube experience, please fill out the form below."
# If there's a captcha, bypass it. # If there's a captcha, bypass it.
if string_de in response.text or string_en in response.text: if string_de in response.text or string_en in response.text:
bypass_captcha(session, response, url) # Parse response cookies.
cookies = [{'name': c.name, 'value': c.value, 'domain': c.domain, 'path': c.path} for c in session.cookies]
print(cookies)
bypass_captcha(session, response, url, cookies)
return "Captcha", "Captcha" return "Captcha", "Captcha"
if max_redirects: if max_redirects:
@ -463,4 +466,3 @@ def check_gevent_exceptions(*tasks):
for task in tasks: for task in tasks:
if task.exception: if task.exception:
raise task.exception raise task.exception