print(response)
This commit is contained in:
parent
6c5ce51b26
commit
6fee62a491
75
tw_data/feed.py
Normal file
75
tw_data/feed.py
Normal file
@ -0,0 +1,75 @@
|
|||||||
|
from requests_futures.sessions import FuturesSession
|
||||||
|
from werkzeug.datastructures import Headers
|
||||||
|
from flask import Markup
|
||||||
|
from concurrent.futures import as_completed
|
||||||
|
from numerize import numerize
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
from re import findall
|
||||||
|
import time, datetime
|
||||||
|
import requests
|
||||||
|
import bleach
|
||||||
|
import urllib
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
|
||||||
|
NITTERINSTANCE = "https://nitter.net/"
|
||||||
|
|
||||||
|
def get_feed(usernames, maxOld):
|
||||||
|
'''
|
||||||
|
Returns feed tweets given a set of usernames
|
||||||
|
'''
|
||||||
|
feedTweets = []
|
||||||
|
with FuturesSession() as session:
|
||||||
|
futures = [session.get('{instance}{user}'.format(instance=NITTERINSTANCE, user=u)) for u in usernames]
|
||||||
|
for future in as_completed(futures):
|
||||||
|
res = future.result().content.decode('utf-8')
|
||||||
|
html = BeautifulSoup(res, "html.parser")
|
||||||
|
userFeed = html.find_all('div', attrs={'class':'timeline-item'})
|
||||||
|
if userFeed != []:
|
||||||
|
for post in userFeed[:-1]:
|
||||||
|
tweet = {}
|
||||||
|
date_time_str = post.find('span', attrs={'class':'tweet-date'}).find('a')['title'].replace(",","")
|
||||||
|
time = datetime.datetime.now() - datetime.datetime.strptime(date_time_str, '%d/%m/%Y %H:%M:%S')
|
||||||
|
if time.days >= maxOld:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if post.find('div', attrs={'class':'pinned'}):
|
||||||
|
if post.find('div', attrs={'class':'pinned'}).find('span', attrs={'icon-pin'}):
|
||||||
|
continue
|
||||||
|
|
||||||
|
tweet['originalPoster'] = post.find('a', attrs={'class':'username'}).text
|
||||||
|
tweet['twitterName'] = post.find('a', attrs={'class':'fullname'}).text
|
||||||
|
tweet['timeStamp'] = datetime.datetime.strptime(date_time_str, '%d/%m/%Y %H:%M:%S')
|
||||||
|
tweet['date'] = post.find('span', attrs={'class':'tweet-date'}).find('a').text
|
||||||
|
tweet['content'] = Markup(post.find('div', attrs={'class':'tweet-content'}))
|
||||||
|
|
||||||
|
if post.find('div', attrs={'class':'retweet-header'}):
|
||||||
|
tweet['username'] = post.find('div', attrs={'class':'retweet-header'}).find('div', attrs={'class':'icon-container'}).text
|
||||||
|
tweet['isRT'] = True
|
||||||
|
else:
|
||||||
|
tweet['username'] = tweet['originalPoster']
|
||||||
|
tweet['isRT'] = False
|
||||||
|
|
||||||
|
tweet['profilePic'] = NITTERINSTANCE+post.find('a', attrs={'class':'tweet-avatar'}).find('img')['src'][1:]
|
||||||
|
url = NITTERINSTANCE + post.find('a', attrs={'class':'tweet-link'})['href'][1:]
|
||||||
|
if post.find('div', attrs={'class':'quote'}):
|
||||||
|
tweet['isReply'] = True
|
||||||
|
tweet['quote'] = post.find('div', attrs={'class':'quote'})
|
||||||
|
if tweet['quote'].find('div', attrs={'class':'quote-text'}):
|
||||||
|
tweet['replyingTweetContent'] = Markup(tweet['quote'].find('div', attrs={'class':'quote-text'}))
|
||||||
|
|
||||||
|
if tweet['quote'].find('a', attrs={'class':'still-image'}):
|
||||||
|
tweet['replyAttachedImg'] = NITTERINSTANCE+tweet['quote'].find('a', attrs={'class':'still-image'})['href'][1:]
|
||||||
|
|
||||||
|
if tweet['quote'].find('div', attrs={'class':'unavailable-quote'}):
|
||||||
|
tweet['replyingUser']="Unavailable"
|
||||||
|
else:
|
||||||
|
tweet['replyingUser']=tweet['quote'].find('a', attrs={'class':'username'}).text
|
||||||
|
post.find('div', attrs={'class':'quote'}).decompose()
|
||||||
|
|
||||||
|
if post.find('div', attrs={'class':'attachments'}):
|
||||||
|
if not post.find(class_='quote'):
|
||||||
|
if post.find('div', attrs={'class':'attachments'}).find('a', attrs={'class':'still-image'}):
|
||||||
|
attachedImg = NITTERINSTANCE + post.find('div', attrs={'class':'attachments'}).find('a')['href'][1:]
|
||||||
|
feedTweets.append(tweet)
|
||||||
|
return feedTweets
|
116
tw_data/user.py
Normal file
116
tw_data/user.py
Normal file
@ -0,0 +1,116 @@
|
|||||||
|
from flask import Markup
|
||||||
|
from requests_futures.sessions import FuturesSession
|
||||||
|
from werkzeug.datastructures import Headers
|
||||||
|
from concurrent.futures import as_completed
|
||||||
|
from numerize import numerize
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
from re import findall
|
||||||
|
import time, datetime
|
||||||
|
import requests
|
||||||
|
import bleach
|
||||||
|
import urllib
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
|
||||||
|
##########################
|
||||||
|
#### Config variables ####
|
||||||
|
##########################
|
||||||
|
NITTERINSTANCE = 'https://nitter.net/'
|
||||||
|
|
||||||
|
def get_uer_info(username):
|
||||||
|
response = urllib.request.urlopen('{instance}{user}'.format(instance=NITTERINSTANCE, user=username)).read()
|
||||||
|
#rssFeed = feedparser.parse(response.content)
|
||||||
|
|
||||||
|
html = BeautifulSoup(str(response), "lxml")
|
||||||
|
if html.body.find('div', attrs={'class':'error-panel'}):
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
html = html.body.find('div', attrs={'class':'profile-card'})
|
||||||
|
|
||||||
|
if html.find('a', attrs={'class':'profile-card-fullname'}):
|
||||||
|
fullName = html.find('a', attrs={'class':'profile-card-fullname'}).getText().encode('latin1').decode('unicode_escape').encode('latin1').decode('utf8')
|
||||||
|
else:
|
||||||
|
fullName = None
|
||||||
|
|
||||||
|
if html.find('div', attrs={'class':'profile-bio'}):
|
||||||
|
profileBio = html.find('div', attrs={'class':'profile-bio'}).getText().encode('latin1').decode('unicode_escape').encode('latin1').decode('utf8')
|
||||||
|
else:
|
||||||
|
profileBio = None
|
||||||
|
|
||||||
|
user = {
|
||||||
|
"profileFullName":fullName,
|
||||||
|
"profileUsername":html.find('a', attrs={'class':'profile-card-username'}).string.encode('latin_1').decode('unicode_escape').encode('latin_1').decode('utf8'),
|
||||||
|
"profileBio":profileBio,
|
||||||
|
"tweets":html.find_all('span', attrs={'class':'profile-stat-num'})[0].string,
|
||||||
|
"following":html.find_all('span', attrs={'class':'profile-stat-num'})[1].string,
|
||||||
|
"followers":numerize.numerize(int(html.find_all('span', attrs={'class':'profile-stat-num'})[2].string.replace(",",""))),
|
||||||
|
"likes":html.find_all('span', attrs={'class':'profile-stat-num'})[3].string,
|
||||||
|
"profilePic":"{instance}{pic}".format(instance=NITTERINSTANCE, pic=html.find('a', attrs={'class':'profile-card-avatar'})['href'][1:])
|
||||||
|
}
|
||||||
|
return user
|
||||||
|
|
||||||
|
def get_tweets(user, page=1):
|
||||||
|
feed = urllib.request.urlopen('{instance}{user}'.format(instance=NITTERINSTANCE, user=user)).read()
|
||||||
|
#Gather feedPosts
|
||||||
|
res = feed.decode('utf-8')
|
||||||
|
html = BeautifulSoup(res, "html.parser")
|
||||||
|
feedPosts = get_feed_tweets(html)
|
||||||
|
|
||||||
|
if page == 2:
|
||||||
|
nextPage = html.find('div', attrs={'class':'show-more'}).find('a')['href']
|
||||||
|
print('{instance}{user}{page}'.format(instance=NITTERINSTANCE, user=user, page=nextPage))
|
||||||
|
feed = urllib.request.urlopen('{instance}{user}{page}'.format(instance=NITTERINSTANCE, user=user, page=nextPage)).read()
|
||||||
|
res = feed.decode('utf-8')
|
||||||
|
html = BeautifulSoup(res, "html.parser")
|
||||||
|
feedPosts = get_feed_tweets(html)
|
||||||
|
return feedPosts
|
||||||
|
|
||||||
|
def get_feed_tweets(html):
|
||||||
|
feedPosts = []
|
||||||
|
userFeed = html.find_all('div', attrs={'class':'timeline-item'})
|
||||||
|
if userFeed != []:
|
||||||
|
for post in userFeed[:-1]:
|
||||||
|
if 'show-more' in str(post):
|
||||||
|
continue
|
||||||
|
date_time_str = post.find('span', attrs={'class':'tweet-date'}).find('a')['title'].replace(",","")
|
||||||
|
|
||||||
|
if post.find('div', attrs={'class':'pinned'}):
|
||||||
|
if post.find('div', attrs={'class':'pinned'}).find('span', attrs={'icon-pin'}):
|
||||||
|
continue
|
||||||
|
|
||||||
|
tweet = {}
|
||||||
|
tweet['op'] = post.find('a', attrs={'class':'username'}).text
|
||||||
|
tweet['twitterName'] = post.find('a', attrs={'class':'fullname'}).text
|
||||||
|
tweet['timeStamp'] = str(datetime.datetime.strptime(date_time_str, '%d/%m/%Y %H:%M:%S'))
|
||||||
|
tweet['date'] = post.find('span', attrs={'class':'tweet-date'}).find('a').text
|
||||||
|
tweet['content'] = Markup(post.find('div', attrs={'class':'tweet-content'}).decode_contents())
|
||||||
|
|
||||||
|
if post.find('div', attrs={'class':'retweet-header'}):
|
||||||
|
tweet['username'] = post.find('div', attrs={'class':'retweet-header'}).find('div', attrs={'class':'icon-container'}).text
|
||||||
|
tweet['isRT'] = True
|
||||||
|
else:
|
||||||
|
tweet['username'] = tweet['op']
|
||||||
|
tweet['isRT'] = False
|
||||||
|
|
||||||
|
tweet['profilePic'] = NITTERINSTANCE+post.find('a', attrs={'class':'tweet-avatar'}).find('img')['src'][1:]
|
||||||
|
tweet['url'] = NITTERINSTANCE + post.find('a', attrs={'class':'tweet-link'})['href'][1:]
|
||||||
|
if post.find('div', attrs={'class':'quote'}):
|
||||||
|
tweet['isReply'] = True
|
||||||
|
quote = post.find('div', attrs={'class':'quote'})
|
||||||
|
if quote.find('div', attrs={'class':'quote-text'}):
|
||||||
|
tweet['replyingTweetContent'] = Markup(quote.find('div', attrs={'class':'quote-text'}))
|
||||||
|
|
||||||
|
if quote.find('a', attrs={'class':'still-image'}):
|
||||||
|
tweet['replyAttachedImg'] = NITTERINSTANCE+quote.find('a', attrs={'class':'still-image'})['href'][1:]
|
||||||
|
|
||||||
|
tweet['replyingUser']=quote.find('a', attrs={'class':'username'}).text
|
||||||
|
post.find('div', attrs={'class':'quote'}).decompose()
|
||||||
|
|
||||||
|
if post.find('div', attrs={'class':'attachments'}):
|
||||||
|
if not post.find(class_='quote'):
|
||||||
|
if post.find('div', attrs={'class':'attachments'}).find('a', attrs={'class':'still-image'}):
|
||||||
|
tweet['attachedImg'] = NITTERINSTANCE + post.find('div', attrs={'class':'attachments'}).find('a')['href'][1:]
|
||||||
|
feedPosts.append(tweet)
|
||||||
|
else:
|
||||||
|
return {"emptyFeed": True}
|
||||||
|
return feedPosts
|
@ -147,7 +147,7 @@ def bypass_captcha(session, response, url, cookies):
|
|||||||
|
|
||||||
|
|
||||||
inputs['g-recaptcha-response'] = response['solution']['gRecaptchaResponse']
|
inputs['g-recaptcha-response'] = response['solution']['gRecaptchaResponse']
|
||||||
print(response['solution'])
|
print(response)
|
||||||
# Print POST request headers
|
# Print POST request headers
|
||||||
print(requests.post("https://youtube.com/das_captcha", data=inputs,
|
print(requests.post("https://youtube.com/das_captcha", data=inputs,
|
||||||
headers={"Content-Type": "application/x-www-form-urlencoded",
|
headers={"Content-Type": "application/x-www-form-urlencoded",
|
||||||
@ -204,7 +204,7 @@ def fetch_url_response(url, headers=(), timeout=15, data=None,
|
|||||||
|
|
||||||
session = requests.Session()
|
session = requests.Session()
|
||||||
print("Starting python GET request to "+url+"...")
|
print("Starting python GET request to "+url+"...")
|
||||||
response = session.get(url, headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:78.0) Gecko/20100101 Firefox/78.0'})
|
response = session.get(url, headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:78.0) Gecko/20100101 Firefox/78.0', "Accept-Language": "en-US,en;q=0.5"})
|
||||||
|
|
||||||
# Strings that appear when there's a Captcha.
|
# Strings that appear when there's a Captcha.
|
||||||
string_de = "Fülle das folgende Feld aus, um YouTube weiter zu nutzen."
|
string_de = "Fülle das folgende Feld aus, um YouTube weiter zu nutzen."
|
||||||
|
Reference in New Issue
Block a user