Compare commits

...
This repository has been archived on 2022-06-28. You can view files and clone it, but cannot push or open issues or pull requests.

14 Commits

Author SHA1 Message Date
pluja
48cacb7af0 print cookie 2020-10-17 13:22:15 +02:00
pluja
d14a81acff Fix 2020-10-15 06:57:51 +02:00
pluja
6fee62a491 print(response) 2020-10-12 12:39:19 +02:00
pluja
6c5ce51b26 Ditch python-anticaptcha and use requests 2020-10-12 12:20:42 +02:00
pluja
c1a6c67fea Fix for look dict typo 2020-10-12 09:52:20 +02:00
pluja
438374890d Cookie management 2020-10-12 09:50:41 +02:00
pluja
e028ee929c Warn when solving captcha 2020-10-12 09:11:36 +02:00
pluja
c11eec9555 Fix problem with url var 2020-10-12 08:52:20 +02:00
pluja
4ef4a28e03 Fix requirements.txt 2020-10-12 08:50:03 +02:00
pluja
a678d9a6e1 Merge branch 'yotter-dev' of https://github.com/ytorg/yotter into yotter-dev 2020-10-12 08:47:32 +02:00
pluja
99b9ad5591 New version / CaptchaBypass testing 2020-10-12 08:47:15 +02:00
PLUJA
da47a690e5
Update yotter-config.json 2020-10-12 08:23:21 +02:00
pluja
ceffdcfe24 merge conflict 2020-10-12 08:15:45 +02:00
pluja
c66afd6485 Solve merge conflict 2020-10-12 08:08:52 +02:00
15 changed files with 956 additions and 279 deletions

View File

@ -1,4 +1,3 @@
import datetime import datetime
import glob import glob
import json import json
@ -28,10 +27,10 @@ from youtube_search import YoutubeSearch
from app import app, db from app import app, db
from app.forms import LoginForm, RegistrationForm, EmptyForm, SearchForm, ChannelForm from app.forms import LoginForm, RegistrationForm, EmptyForm, SearchForm, ChannelForm
from app.models import User, twitterPost, ytPost, Post, youtubeFollow, twitterFollow from app.models import User, twitterPost, ytPost, Post, youtubeFollow, twitterFollow
from youtube import comments, utils from youtube import comments, utils, channel as ytch, search as yts
from youtube import watch as ytwatch from youtube import watch as ytwatch
######################################### #########################################
from youtube_data import search as yts
######################################### #########################################
@ -326,6 +325,10 @@ def ytsearch():
else: else:
prev_page = "/ytsearch?q={q}&s={s}&p={p}".format(q=query, s=sort, p=int(page) - 1) prev_page = "/ytsearch?q={q}&s={s}&p={p}".format(q=query, s=sort, p=int(page) - 1)
for video in results['videos']:
hostname = urllib.parse.urlparse(video['videoThumb']).netloc
video['videoThumb'] = video['videoThumb'].replace("https://{}".format(hostname), "") + "&host=" + hostname
for channel in results['channels']: for channel in results['channels']:
if config['nginxVideoStream']: if config['nginxVideoStream']:
channel['thumbnail'] = channel['thumbnail'].replace("~", "/") channel['thumbnail'] = channel['thumbnail'].replace("~", "/")
@ -342,9 +345,7 @@ def ytsearch():
@app.route('/ytfollow/<channelId>', methods=['POST']) @app.route('/ytfollow/<channelId>', methods=['POST'])
@login_required @login_required
def ytfollow(channelId): def ytfollow(channelId):
form = EmptyForm() r = followYoutubeChannel(channelId)
if form.validate_on_submit():
r = followYoutubeChannel(channelId)
return redirect(request.referrer) return redirect(request.referrer)
@ -376,9 +377,7 @@ def followYoutubeChannel(channelId):
@app.route('/ytunfollow/<channelId>', methods=['POST']) @app.route('/ytunfollow/<channelId>', methods=['POST'])
@login_required @login_required
def ytunfollow(channelId): def ytunfollow(channelId):
form = EmptyForm() unfollowYoutubeChannel(channelId)
if form.validate_on_submit():
unfollowYoutubeChannel(channelId)
return redirect(request.referrer) return redirect(request.referrer)
@ -404,27 +403,38 @@ def unfollowYoutubeChannel(channelId):
def channel(id): def channel(id):
form = ChannelForm() form = ChannelForm()
button_form = EmptyForm() button_form = EmptyForm()
data = requests.get('https://www.youtube.com/feeds/videos.xml?channel_id={id}'.format(id=id))
data = feedparser.parse(data.content)
channelData = YoutubeSearch.channelInfo(id) page = request.args.get('p', None)
sort = request.args.get('s', None)
if page is None:
page = 1
if sort is None:
sort = 3
for video in channelData[1]: data = ytch.get_channel_tab_info(id, page, sort)
for video in data['items']:
if config['nginxVideoStream']: if config['nginxVideoStream']:
hostName = urllib.parse.urlparse(video['videoThumb']).netloc hostName = urllib.parse.urlparse(video['thumbnail'][1:]).netloc
video['videoThumb'] = video['videoThumb'].replace("https://{}".format(hostName), "").replace("hqdefault", video['thumbnail'] = video['thumbnail'].replace("https://{}".format(hostName), "")[1:].replace("hqdefault",
"mqdefault") + "&host=" + hostName "mqdefault") + "&host=" + hostName
else: else:
video['videoThumb'] = video['videoThumb'].replace('/', '~') video['thumbnail'] = video['thumbnail'].replace('/', '~')
if config['nginxVideoStream']:
hostName = urllib.parse.urlparse(channelData[0]['avatar']).netloc
channelData[0]['avatar'] = channelData[0]['avatar'].replace("https://{}".format(hostName),
"") + "?host=" + hostName
else:
channelData[0]['avatar'] = channelData[0]['avatar'].replace('/', '~')
return render_template('channel.html', form=form, btform=button_form, channel=channelData[0], videos=channelData[1], if config['nginxVideoStream']:
restricted=config['restrictPublicUsage'], config=config) hostName = urllib.parse.urlparse(data['avatar'][1:]).netloc
data['avatar'] = data['avatar'].replace("https://{}".format(hostName), "")[1:] + "?host=" + hostName
else:
data['avatar'] = data['avatar'].replace('/', '~')
next_page = "/channel/{q}?s={s}&p={p}".format(q=id, s=sort, p=int(page) + 1)
if int(page) == 1:
prev_page = "/channel/{q}?s={s}&p={p}".format(q=id, s=sort, p=1)
else:
prev_page = "/channel/{q}?s={s}&p={p}".format(q=id, s=sort, p=int(page) - 1)
return render_template('channel.html', form=form, btform=button_form, data=data,
restricted=config['restrictPublicUsage'], config=config, next_page=next_page, prev_page=prev_page)
def get_best_urls(urls): def get_best_urls(urls):
@ -454,27 +464,48 @@ def get_live_urls(urls):
def watch(): def watch():
id = request.args.get('v', None) id = request.args.get('v', None)
info = ytwatch.extract_info(id, False, playlist_id=None, index=None) info = ytwatch.extract_info(id, False, playlist_id=None, index=None)
# Use nginx if info == 'Captcha':
best_formats = ["22", "18", "34", "35", "36", "37", "38", "43", "44", "45", "46"] return render_template('captcha.html', origin=request.referrer)
retry = 3
while retry != 0 and info['playability_error'] == 'Could not find player':
info=ytwatch.extract_info(id, False, playlist_id=None, index=None)
retry -= 1
vsources = ytwatch.get_video_sources(info, False)
# Retry 3 times if no sources are available.
retry = 3
while retry != 0 and len(vsources) == 0:
vsources = ytwatch.get_video_sources(info, False)
retry -= 1
for source in vsources:
hostName = urllib.parse.urlparse(source['src']).netloc
source['src'] = source['src'].replace("https://{}".format(hostName), "") + "&host=" + hostName
# Parse video formats
for v_format in info['formats']: for v_format in info['formats']:
hostName = urllib.parse.urlparse(v_format['url']).netloc hostName = urllib.parse.urlparse(v_format['url']).netloc
v_format['url'] = v_format['url'].replace("https://{}".format(hostName), "") + "&host=" + hostName v_format['url'] = v_format['url'].replace("https://{}".format(hostName), "") + "&host=" + hostName
if v_format['audio_bitrate'] is not None and v_format['vcodec'] is not None: if v_format['audio_bitrate'] is not None and v_format['vcodec'] is None:
v_format['video_valid'] = True
elif v_format['audio_bitrate'] is not None and v_format['vcodec'] is None:
v_format['audio_valid'] = True v_format['audio_valid'] = True
info['description'] = Markup(bleach.linkify(info['description'].replace("\n", "<br>"))) # Markup description
try:
info['description'] = Markup(bleach.linkify(info['description'].replace("\n", "<br>")))
except AttributeError or TypeError:
print(info['description'])
# Get comments # Get comments
videocomments = comments.video_comments(id, sort=0, offset=0, lc='', secret_key='') videocomments = comments.video_comments(id, sort=0, offset=0, lc='', secret_key='')
videocomments = utils.post_process_comments_info(videocomments) videocomments = utils.post_process_comments_info(videocomments)
if videocomments is not None: if videocomments is not None:
videocomments.sort(key=lambda x: x['likes'], reverse=True) videocomments.sort(key=lambda x: x['likes'], reverse=True)
info['rating'] = str((info['like_count']/(info['like_count']+info['dislike_count']))*100)[0:4] # Calculate rating %
return render_template("video.html", info=info, title='{}'.format(info['title']), config=config, videocomments=videocomments) info['rating'] = str((info['like_count'] / (info['like_count'] + info['dislike_count'])) * 100)[0:4]
return render_template("video.html", info=info, title='{}'.format(info['title']), config=config,
videocomments=videocomments, vsources=vsources)
def markupString(string): def markupString(string):
@ -723,20 +754,17 @@ def register():
return render_template('register.html', title='Register', registrations=REGISTRATIONS, form=form, config=config) return render_template('register.html', title='Register', registrations=REGISTRATIONS, form=form, config=config)
@app.route('/registrations_status/icon') @app.route('/status')
def registrations_status_icon(): def status():
count = db.session.query(User).count() count = db.session.query(User).count()
if count >= config['maxInstanceUsers'] or config['maxInstanceUsers'] == 0: if count >= config['maxInstanceUsers'] or config['maxInstanceUsers'] == 0:
return redirect(url_for('static', filename='img/close.png')) filen = url_for('static', filename='img/close.png')
caniregister = False
else: else:
return redirect(url_for('static', filename='img/open.png')) filen = url_for('static', filename='img/open.png')
caniregister = True
@app.route('/registrations_status/text')
def registrations_status_text():
count = db.session.query(User).count()
return "{c}/{t}".format(c=count, t=config['maxInstanceUsers'])
return render_template('status.html', title='STATUS', count=count, max=config['maxInstanceUsers'], file=filen, cani=caniregister)
@app.route('/error/<errno>') @app.route('/error/<errno>')
def error(errno): def error(errno):

View File

@ -1,46 +1,35 @@
<div class="card"> <div class="ui card">
<div class="image"> <a class="image" href="{{url_for('watch', v=video.id, _method='GET')}}">
{%if config.nginxVideoStream%} <img src="https://yotter.xyz{{video.videoThumb}}">
<img alt="Thumbnail" src="{{video.videoThumb}}"> </a>
{%else%} <div class="content">
<img alt="Thumbnail" src="/img/{{video.videoThumb.replace('/', '~')}}"> <a class="header" href="{{url_for('watch', v=video.id, _method='GET')}}">{{video.videoTitle}}</a>
{%endif%} <div class="meta">
</div> <a class="break-word" href="{{url_for('channel', id=video.channelId)}}">{{video.channelName}}</a>
<div class="content">
{% if video.views == "Livestream" %}
<a class="video-title break-word" href="#">{{video.videoTitle}}</a>
{% else %}
<a class="video-title break-word" href="{{url_for('watch', v=video.id, _method='GET')}}">{{video.videoTitle}}</a>
{% endif %}
<div class="meta">
<a class="break-word" href="{{url_for('channel', id=video.channelId)}}">{{video.channelName}}</a>
</div>
<div class="description break-word">
{{video.description}}
</div>
</div> </div>
</div>
<div class="extra content"> <div class="extra content">
{% if video.isLive == "Livestream" or video.isLive %} {% if video.isLive == "Livestream" or video.isLive %}
<span class="right floated"> <span class="left floated like">
<i class="red circle icon"></i> <i class="red circle icon"></i>
{{video.views}} {{video.views}}
</span> </span>
{% else %} {% else %}
<span class="right floated"> <span class="left floated like">
<i class="eye icon"></i> <i class="eye icon"></i>
{{video.views}} {{video.views}}
</span> </span>
{% endif %} {% endif %}
{% if video.timeStamp == "Scheduled" or video.isUpcoming %} {% if video.timeStamp == "Scheduled" or video.isUpcoming %}
<span class="right floated"> <span class="right floated star">
<i class="blue clock icon"></i> <i class="blue clock icon"></i>
{{video.timeStamp}} {{video.timeStamp}}
</span> </span>
{% else %} {% else %}
<span class="right floated"> <span class="right floated star">
<i class="clock icon"></i> <i class="clock icon"></i>
{{video.timeStamp}} {{video.timeStamp}}
</span> </span>
{% endif %} {% endif %}
<span> <span>

View File

@ -0,0 +1,17 @@
{% extends "base.html" %}
{% block content %}
<div class="ui text container center aligned centered">
<div class="ui icon negative message">
<i class="meh outline icon"></i>
<div class="content">
<div class="header">
Ahh... Here we go again!
</div>
<p>Google is asking to solve a Captcha. As we don't want you to do it, we'll do it for you. <b> Please, try again in a few seconds.</b></p>
<a href="{{origin}}"> Click here to reload </a>
</div>
</div>
</div>
{%endblock%}

View File

@ -1,58 +1,94 @@
{% extends "base.html" %} {% extends "base.html" %}
{% block content %} {% block content %}
<div class="blue ui centered card"> <div class="ui center aligned text container">
<div class="content"> <div class="ui centered vertical segment">
<div class="center aligned author"> <h2 class="ui header">
{%if config.nginxVideoStream%} <img src="{{data.avatar}}" class="ui circular image">
<img alt="Thumbnail" src="{{channel.avatar}}"> {{data.channel_name}}
</h2>
</div>
<div class="ui vertical segment">
<p>{{data.short_description}}</p>
</div>
<div class="ui vertical segment">
<div class="ui tiny statistic">
<div class="value">
{%if data.approx_suscriber_count == None%}
<i class="user icon"></i> ?
{%else%} {%else%}
<img alt="Thumbnail" src="/img/{{channel.avatar.replace('/', '~')}}"> <i class="user icon"></i> {{data.approx_subscriber_count}}
{%endif%} {%endif%}
</div> </div>
<div class="center aligned header"><a href="">{{channel.name}}</a></div> <div class="label">
<div class="center aligned description"> Followers
<div class="statistic">
<div class="value">
<i class="users icon"></i>{{channel.subCount}}
</div>
<div class="label">
Followers
</div>
</div>
</div> </div>
</div> </div>
{% if restricted or current_user.is_authenticated %} {% if restricted or current_user.is_authenticated %}
<div class="center aligned extra content"> {% if not current_user.is_following_yt(data.channel_id) %}
{% if not current_user.is_following_yt(channel.id) %} <form action="{{ url_for('ytfollow', channelId=data.channel_id) }}" method="post">
<p> <button type="submit" value="Submit" class="ui red button">
<form action="{{ url_for('ytfollow', channelId=channel.id) }}" method="post"> <i class="user icon"></i>
{{ btform.hidden_tag() }} Suscribe
{{ btform.submit(value='Follow') }} </button>
</form> </form>
</p> {% else %}
{% else %} <form action="{{ url_for('ytunfollow', channelId=data.channel_id) }}" method="post">
<p> <button type="submit" value="Submit" class="ui red active button">
<form action="{{ url_for('ytunfollow', channelId=channel.id) }}" method="post"> <i class="user icon"></i>
{{ btform.hidden_tag() }} Unsuscribe
{{ btform.submit(value='Unfollow') }} </button>
</form> </form>
</p> {%endif%}
{% endif %} {%endif%}
</div>
{% endif %}
</div>
</div> </div>
</div>
<br> <br>
<br> <br>
{% if not videos %} {% if data['error'] != None %}
{% include '_empty_feed.html' %} {% include '_empty_feed.html' %}
{% else %} {% else %}
<div class="ui centered cards"> <div class="ui centered cards">
{% for video in videos %} {% for video in data['items'] %}
{% include '_video_item.html' %} <div class="ui card">
<a class="image" href="{{url_for('watch', v=video.id, _method='GET')}}">
<img src="https://yotter.xyz{{video.thumbnail}}">
</a>
<div class="content">
<a class="header" href="{{url_for('watch', v=video.id, _method='GET')}}">{{video.title}}</a>
<div class="meta">
<a class="break-word" href="{{url_for('channel', id=video.channel_id)}}">{{data.channel_name}}</a>
</div>
</div>
<div class="extra content">
<span class="left floated like">
<i class="eye icon"></i>
{{video.approx_view_count}}
</span>
{%if video.duration == "PREMIERING NOW" or video.duration == "LIVE"%}
<span class="right floated star">
<i class="red circle icon"></i>
LIVE
</span>
{%else%}
<span class="right floated star">
<i class="clock icon"></i>
{{video.time_published}}
</span>
{%endif%}
</div>
</div>
{% endfor %} {% endfor %}
</div> </div>
{% endif %} {% endif %}
<br>
<div class="ui center aligned text container">
<a href="{{prev_page}}"> <button class="ui left attached button"><i class="angle red left icon"></i></button> </a>
<a href="{{next_page}}"> <button class="right attached ui button"><i class="angle red right icon"></i></button></a>
</div>
<br>
{% endblock %} {% endblock %}

46
app/templates/status.html Normal file
View File

@ -0,0 +1,46 @@
{% extends "base.html" %}
{% block content %}
<div class="ui text container center aligned centered">
<div class="ui placeholder segment">
<div class="ui two column stackable center aligned grid">
<div class="ui vertical divider">
{%if cani%}
:)
{%else%}
:(
{%endif%}
</div>
<div class="middle aligned row">
<div class="column">
<h3 class="ui header"> Capacity </h3>
<div class="ui icon header">
{%if cani%}
<i class="green users icon"></i>
{%else%}
<i class="red users icon"></i>
{%endif%}
{{count}}/{{max}}
</div>
</div>
<div class="column">
<div class="ui icon header">
<i class="user circle outline icon"></i>
Can I register?
</div>
{%if cani%}
<a href="/register"><div class="ui green button">
Yes!
</div></a>
{%else%}
<a href="#!"><div class="ui disabled red button">
It's full!
</div></a>
{%endif%}
</div>
</div>
</div>
</div>
</div>
{%endblock%}

View File

@ -34,20 +34,18 @@
</div> </div>
{%else%} {%else%}
<div class="video-js-responsive-container vjs-hd"> <div class="video-js-responsive-container vjs-hd">
<video class="video-js vjs-default-skin" <video-js autofocus class="video-js vjs-default-skin"
data-setup='{ "playbackRates": [0.5, 0.75, 1, 1.25,1.5, 1.75, 2] }' data-setup='{ "playbackRates": [0.5, 0.75, 1, 1.25,1.5, 1.75, 2] }'
width="1080" width="1080"
controls controls
buffered buffered
preload="none"> preload="none">
{% if config.nginxVideoStream %} {% if config.nginxVideoStream %}
{% for format in info.formats %} {% for source in vsources %}
{% if format.video_valid %} <source src="{{source.src}}" type="{{source.type}}">
<source src="{{format.url}}" type="video/{{format.ext}}">
{% endif %}
{% endfor %} {% endfor %}
{% endif %} {% endif %}
</video> </video-js>
</div> </div>
{%endif%} {%endif%}
@ -99,7 +97,6 @@
<script src="{{ url_for('static',filename='video.min.js') }}"></script> <script src="{{ url_for('static',filename='video.min.js') }}"></script>
{% if info.live %} {% if info.live %}
<p>Active</p>
<script src="{{ url_for('static',filename='videojs-http-streaming.min.js')}}"></script> <script src="{{ url_for('static',filename='videojs-http-streaming.min.js')}}"></script>
<script> <script>
var player = videojs('live'); var player = videojs('live');

View File

@ -2,9 +2,11 @@ alembic==1.4.3
astroid==2.4.2 astroid==2.4.2
async-timeout==3.0.1 async-timeout==3.0.1
attrs==20.2.0 attrs==20.2.0
beautifulsoup4==4.9.3 beautifulsoup4==4.9.2
bleach==3.2.1 bleach==3.2.1
Brotli==1.0.9
bs4==0.0.1 bs4==0.0.1
cachetools==4.1.1
certifi==2020.6.20 certifi==2020.6.20
chardet==3.0.4 chardet==3.0.4
click==7.1.2 click==7.1.2
@ -38,6 +40,8 @@ packaging==20.4
pylint==2.6.0 pylint==2.6.0
PyMySQL==0.10.1 PyMySQL==0.10.1
pyparsing==2.4.7 pyparsing==2.4.7
PySocks==1.7.1
python-anticaptcha==0.7.1
python-dateutil==2.8.1 python-dateutil==2.8.1
python-dotenv==0.14.0 python-dotenv==0.14.0
python-editor==1.0.4 python-editor==1.0.4

75
tw_data/feed.py Normal file
View File

@ -0,0 +1,75 @@
from requests_futures.sessions import FuturesSession
from werkzeug.datastructures import Headers
from flask import Markup
from concurrent.futures import as_completed
from numerize import numerize
from bs4 import BeautifulSoup
from re import findall
import time, datetime
import requests
import bleach
import urllib
import json
import re
NITTERINSTANCE = "https://nitter.net/"
def get_feed(usernames, maxOld):
'''
Returns feed tweets given a set of usernames
'''
feedTweets = []
with FuturesSession() as session:
futures = [session.get('{instance}{user}'.format(instance=NITTERINSTANCE, user=u)) for u in usernames]
for future in as_completed(futures):
res = future.result().content.decode('utf-8')
html = BeautifulSoup(res, "html.parser")
userFeed = html.find_all('div', attrs={'class':'timeline-item'})
if userFeed != []:
for post in userFeed[:-1]:
tweet = {}
date_time_str = post.find('span', attrs={'class':'tweet-date'}).find('a')['title'].replace(",","")
time = datetime.datetime.now() - datetime.datetime.strptime(date_time_str, '%d/%m/%Y %H:%M:%S')
if time.days >= maxOld:
continue
if post.find('div', attrs={'class':'pinned'}):
if post.find('div', attrs={'class':'pinned'}).find('span', attrs={'icon-pin'}):
continue
tweet['originalPoster'] = post.find('a', attrs={'class':'username'}).text
tweet['twitterName'] = post.find('a', attrs={'class':'fullname'}).text
tweet['timeStamp'] = datetime.datetime.strptime(date_time_str, '%d/%m/%Y %H:%M:%S')
tweet['date'] = post.find('span', attrs={'class':'tweet-date'}).find('a').text
tweet['content'] = Markup(post.find('div', attrs={'class':'tweet-content'}))
if post.find('div', attrs={'class':'retweet-header'}):
tweet['username'] = post.find('div', attrs={'class':'retweet-header'}).find('div', attrs={'class':'icon-container'}).text
tweet['isRT'] = True
else:
tweet['username'] = tweet['originalPoster']
tweet['isRT'] = False
tweet['profilePic'] = NITTERINSTANCE+post.find('a', attrs={'class':'tweet-avatar'}).find('img')['src'][1:]
url = NITTERINSTANCE + post.find('a', attrs={'class':'tweet-link'})['href'][1:]
if post.find('div', attrs={'class':'quote'}):
tweet['isReply'] = True
tweet['quote'] = post.find('div', attrs={'class':'quote'})
if tweet['quote'].find('div', attrs={'class':'quote-text'}):
tweet['replyingTweetContent'] = Markup(tweet['quote'].find('div', attrs={'class':'quote-text'}))
if tweet['quote'].find('a', attrs={'class':'still-image'}):
tweet['replyAttachedImg'] = NITTERINSTANCE+tweet['quote'].find('a', attrs={'class':'still-image'})['href'][1:]
if tweet['quote'].find('div', attrs={'class':'unavailable-quote'}):
tweet['replyingUser']="Unavailable"
else:
tweet['replyingUser']=tweet['quote'].find('a', attrs={'class':'username'}).text
post.find('div', attrs={'class':'quote'}).decompose()
if post.find('div', attrs={'class':'attachments'}):
if not post.find(class_='quote'):
if post.find('div', attrs={'class':'attachments'}).find('a', attrs={'class':'still-image'}):
attachedImg = NITTERINSTANCE + post.find('div', attrs={'class':'attachments'}).find('a')['href'][1:]
feedTweets.append(tweet)
return feedTweets

116
tw_data/user.py Normal file
View File

@ -0,0 +1,116 @@
from flask import Markup
from requests_futures.sessions import FuturesSession
from werkzeug.datastructures import Headers
from concurrent.futures import as_completed
from numerize import numerize
from bs4 import BeautifulSoup
from re import findall
import time, datetime
import requests
import bleach
import urllib
import json
import re
##########################
#### Config variables ####
##########################
NITTERINSTANCE = 'https://nitter.net/'
def get_uer_info(username):
response = urllib.request.urlopen('{instance}{user}'.format(instance=NITTERINSTANCE, user=username)).read()
#rssFeed = feedparser.parse(response.content)
html = BeautifulSoup(str(response), "lxml")
if html.body.find('div', attrs={'class':'error-panel'}):
return False
else:
html = html.body.find('div', attrs={'class':'profile-card'})
if html.find('a', attrs={'class':'profile-card-fullname'}):
fullName = html.find('a', attrs={'class':'profile-card-fullname'}).getText().encode('latin1').decode('unicode_escape').encode('latin1').decode('utf8')
else:
fullName = None
if html.find('div', attrs={'class':'profile-bio'}):
profileBio = html.find('div', attrs={'class':'profile-bio'}).getText().encode('latin1').decode('unicode_escape').encode('latin1').decode('utf8')
else:
profileBio = None
user = {
"profileFullName":fullName,
"profileUsername":html.find('a', attrs={'class':'profile-card-username'}).string.encode('latin_1').decode('unicode_escape').encode('latin_1').decode('utf8'),
"profileBio":profileBio,
"tweets":html.find_all('span', attrs={'class':'profile-stat-num'})[0].string,
"following":html.find_all('span', attrs={'class':'profile-stat-num'})[1].string,
"followers":numerize.numerize(int(html.find_all('span', attrs={'class':'profile-stat-num'})[2].string.replace(",",""))),
"likes":html.find_all('span', attrs={'class':'profile-stat-num'})[3].string,
"profilePic":"{instance}{pic}".format(instance=NITTERINSTANCE, pic=html.find('a', attrs={'class':'profile-card-avatar'})['href'][1:])
}
return user
def get_tweets(user, page=1):
feed = urllib.request.urlopen('{instance}{user}'.format(instance=NITTERINSTANCE, user=user)).read()
#Gather feedPosts
res = feed.decode('utf-8')
html = BeautifulSoup(res, "html.parser")
feedPosts = get_feed_tweets(html)
if page == 2:
nextPage = html.find('div', attrs={'class':'show-more'}).find('a')['href']
print('{instance}{user}{page}'.format(instance=NITTERINSTANCE, user=user, page=nextPage))
feed = urllib.request.urlopen('{instance}{user}{page}'.format(instance=NITTERINSTANCE, user=user, page=nextPage)).read()
res = feed.decode('utf-8')
html = BeautifulSoup(res, "html.parser")
feedPosts = get_feed_tweets(html)
return feedPosts
def get_feed_tweets(html):
feedPosts = []
userFeed = html.find_all('div', attrs={'class':'timeline-item'})
if userFeed != []:
for post in userFeed[:-1]:
if 'show-more' in str(post):
continue
date_time_str = post.find('span', attrs={'class':'tweet-date'}).find('a')['title'].replace(",","")
if post.find('div', attrs={'class':'pinned'}):
if post.find('div', attrs={'class':'pinned'}).find('span', attrs={'icon-pin'}):
continue
tweet = {}
tweet['op'] = post.find('a', attrs={'class':'username'}).text
tweet['twitterName'] = post.find('a', attrs={'class':'fullname'}).text
tweet['timeStamp'] = str(datetime.datetime.strptime(date_time_str, '%d/%m/%Y %H:%M:%S'))
tweet['date'] = post.find('span', attrs={'class':'tweet-date'}).find('a').text
tweet['content'] = Markup(post.find('div', attrs={'class':'tweet-content'}).decode_contents())
if post.find('div', attrs={'class':'retweet-header'}):
tweet['username'] = post.find('div', attrs={'class':'retweet-header'}).find('div', attrs={'class':'icon-container'}).text
tweet['isRT'] = True
else:
tweet['username'] = tweet['op']
tweet['isRT'] = False
tweet['profilePic'] = NITTERINSTANCE+post.find('a', attrs={'class':'tweet-avatar'}).find('img')['src'][1:]
tweet['url'] = NITTERINSTANCE + post.find('a', attrs={'class':'tweet-link'})['href'][1:]
if post.find('div', attrs={'class':'quote'}):
tweet['isReply'] = True
quote = post.find('div', attrs={'class':'quote'})
if quote.find('div', attrs={'class':'quote-text'}):
tweet['replyingTweetContent'] = Markup(quote.find('div', attrs={'class':'quote-text'}))
if quote.find('a', attrs={'class':'still-image'}):
tweet['replyAttachedImg'] = NITTERINSTANCE+quote.find('a', attrs={'class':'still-image'})['href'][1:]
tweet['replyingUser']=quote.find('a', attrs={'class':'username'}).text
post.find('div', attrs={'class':'quote'}).decompose()
if post.find('div', attrs={'class':'attachments'}):
if not post.find(class_='quote'):
if post.find('div', attrs={'class':'attachments'}).find('a', attrs={'class':'still-image'}):
tweet['attachedImg'] = NITTERINSTANCE + post.find('div', attrs={'class':'attachments'}).find('a')['href'][1:]
feedPosts.append(tweet)
else:
return {"emptyFeed": True}
return feedPosts

View File

@ -11,5 +11,6 @@
"admin_message":"Message from the admin text", "admin_message":"Message from the admin text",
"admin_user":"admin_username", "admin_user":"admin_username",
"max_old_user_days": 60, "max_old_user_days": 60,
"donate_url": "" "donate_url": "",
"anticaptcha":""
} }

View File

@ -1,20 +1,16 @@
import base64 import base64
from youtube import util, yt_data_extract, local_playlist, subscriptions
from youtube import yt_app
import urllib
import json import json
from string import Template
import youtube.proto as proto
import html
import math import math
import gevent
import re import re
import cachetools.func
import traceback import traceback
import urllib
import cachetools.func
import flask import flask
from flask import request import gevent
import youtube.proto as proto
from youtube import util, yt_data_extract
headers_desktop = ( headers_desktop = (
('Accept', '*/*'), ('Accept', '*/*'),
@ -109,7 +105,7 @@ def channel_ctoken_v1(channel_id, page, sort, tab, view=1):
return base64.urlsafe_b64encode(pointless_nest).decode('ascii') return base64.urlsafe_b64encode(pointless_nest).decode('ascii')
def get_channel_tab(channel_id, page="1", sort=3, tab='videos', view=1, print_status=True): def get_channel_tab_info(channel_id, page="1", sort=3, tab='videos', view=1, print_status=True):
message = 'Got channel tab' if print_status else None message = 'Got channel tab' if print_status else None
if int(sort) == 2 and int(page) > 1: if int(sort) == 2 and int(page) > 1:
@ -128,7 +124,11 @@ def get_channel_tab(channel_id, page="1", sort=3, tab='videos', view=1, print_st
headers_desktop + generic_cookie, headers_desktop + generic_cookie,
debug_name='channel_tab', report_text=message) debug_name='channel_tab', report_text=message)
return content info = yt_data_extract.extract_channel_info(json.loads(content), tab)
if info['error'] is not None:
return False
post_process_channel_info(info)
return info
# cache entries expire after 30 minutes # cache entries expire after 30 minutes
@cachetools.func.ttl_cache(maxsize=128, ttl=30*60) @cachetools.func.ttl_cache(maxsize=128, ttl=30*60)
@ -259,23 +259,4 @@ def get_channel_page_general_url(base_url, tab, request, channel_id=None):
**info **info
) )
@yt_app.route('/channel/<channel_id>/')
@yt_app.route('/channel/<channel_id>/<tab>')
def get_channel_page(channel_id, tab='videos'):
return get_channel_page_general_url('https://www.youtube.com/channel/' + channel_id, tab, request, channel_id)
@yt_app.route('/user/<username>/')
@yt_app.route('/user/<username>/<tab>')
def get_user_page(username, tab='videos'):
return get_channel_page_general_url('https://www.youtube.com/user/' + username, tab, request)
@yt_app.route('/c/<custom>/')
@yt_app.route('/c/<custom>/<tab>')
def get_custom_c_page(custom, tab='videos'):
return get_channel_page_general_url('https://www.youtube.com/c/' + custom, tab, request)
@yt_app.route('/<custom>')
@yt_app.route('/<custom>/<tab>')
def get_toplevel_custom_page(custom, tab='videos'):
return get_channel_page_general_url('https://www.youtube.com/' + custom, tab, request)

213
youtube/channels.py Normal file
View File

@ -0,0 +1,213 @@
from youtube import proto
from flask import Markup as mk
import requests
import base64
import json
import re
# From: https://github.com/user234683/youtube-local/blob/master/youtube/channel.py
# SORT:
# videos:
# Popular - 1
# Oldest - 2
# Newest - 3
# playlists:
# Oldest - 2
# Newest - 3
# Last video added - 4
# view:
# grid: 0 or 1
# list: 2
headers = {
'Host': 'www.youtube.com',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64)',
'Accept': '*/*',
'Accept-Language': 'en-US,en;q=0.5',
'X-YouTube-Client-Name': '1',
'X-YouTube-Client-Version': '2.20180418',
}
real_cookie = (('Cookie', 'VISITOR_INFO1_LIVE=8XihrAcN1l4'),)
generic_cookie = (('Cookie', 'VISITOR_INFO1_LIVE=ST1Ti53r4fU'),)
def channel_ctoken_desktop(channel_id, page, sort, tab, view=1):
# see https://github.com/iv-org/invidious/issues/1319#issuecomment-671732646
# page > 1 doesn't work when sorting by oldest
offset = 30*(int(page) - 1)
schema_number = {
3: 6307666885028338688,
2: 17254859483345278706,
1: 16570086088270825023,
}[int(sort)]
page_token = proto.string(61, proto.unpadded_b64encode(proto.string(1,
proto.uint(1, schema_number) + proto.string(2,
proto.string(1, proto.unpadded_b64encode(proto.uint(1,offset)))
)
)))
tab = proto.string(2, tab )
sort = proto.uint(3, int(sort))
#page = proto.string(15, str(page) )
shelf_view = proto.uint(4, 0)
view = proto.uint(6, int(view))
continuation_info = proto.string(3,
proto.percent_b64encode(tab + sort + shelf_view + view + page_token)
)
channel_id = proto.string(2, channel_id )
pointless_nest = proto.string(80226972, channel_id + continuation_info)
return base64.urlsafe_b64encode(pointless_nest).decode('ascii')
def channel_ctoken_mobile(channel_id, page, sort, tab, view=1):
tab = proto.string(2, tab )
sort = proto.uint(3, int(sort))
page = proto.string(15, str(page) )
# example with shelves in videos tab: https://www.youtube.com/channel/UCNL1ZadSjHpjm4q9j2sVtOA/videos
shelf_view = proto.uint(4, 0)
view = proto.uint(6, int(view))
continuation_info = proto.string( 3, proto.percent_b64encode(tab + view + sort + shelf_view + page) )
channel_id = proto.string(2, channel_id )
pointless_nest = proto.string(80226972, channel_id + continuation_info)
return base64.urlsafe_b64encode(pointless_nest).decode('ascii')
def id_or_username(string):
cidRegex = "^UC.{22}$"
if re.match(cidRegex, string):
return "channel"
else:
return "user"
def get_channel_videos_tab(content):
tabs = content['contents']['twoColumnBrowseResultsRenderer']['tabs']
for tab in tabs:
if tab['title'] != "Videos":
continue
else:
return tab
def get_video_items_from_tab(tab):
items = []
for item in tab:
try:
if item['gridVideoRenderer']:
items.append(item)
else:
continue
except KeyError:
continue
return items
def get_info_grid_video_item(item, channel=None):
item = item['gridVideoRenderer']
thumbnailOverlays = item['thumbnailOverlays']
published = ""
views = ""
isLive = False
isUpcoming = False
try:
if 'UPCOMING' in str(thumbnailOverlays):
start_time = item['upcomingEventData']['startTime']
isUpcoming = True
views = "-"
published = "Scheduled"
except KeyError:
isUpcoming = False
try:
if 'LIVE' in str(thumbnailOverlays):
isLive = True
try:
views = item['viewCountText']['simpleText']
except:
views = "Live"
try:
duration = item['lengthText']['simpleText']
except:
duration = "-"
if published != "Scheduled":
try:
published = item['publishedTimeText']['simpleText']
except KeyError:
published = "None"
except KeyError:
isUpcoming = False
isLive = False
if not isUpcoming and not isLive:
views = item['viewCountText']['simpleText']
published = item['publishedTimeText']['simpleText']
try:
duration = item['lengthText']['simpleText']
except:
duration = "?"
video = {
'videoTitle':item['title']['runs'][0]['text'],
'description':"",
'views':views,
'timeStamp':published,
'duration':duration,
'channelName':channel['username'],
'authorUrl':"/channel/{}".format(channel['channelId']),
'channelId':channel['channelId'],
'id':item['videoId'],
'videoUrl':"/watch?v={}".format(item['videoId']),
'isLive':isLive,
'isUpcoming':isUpcoming,
'videoThumb':item['thumbnail']['thumbnails'][0]['url']
}
return video
def get_author_info_from_channel(content):
hmd = content['metadata']['channelMetadataRenderer']
cmd = content['header']['c4TabbedHeaderRenderer']
description = mk(hmd['description'])
channel = {
"channelId": cmd['channelId'],
"username": cmd['title'],
"thumbnail": "https:{}".format(cmd['avatar']['thumbnails'][0]['url'].replace("/", "~")),
"description":description,
"suscribers": cmd['subscriberCountText']['runs'][0]['text'].split(" ")[0],
"banner": cmd['banner']['thumbnails'][0]['url']
}
return channel
def get_channel_info(channelId, videos=True, page=1, sort=3):
if id_or_username(channelId) == "channel":
videos = []
ciUrl = "https://www.youtube.com/channel/{}".format(channelId)
mainUrl = "https://www.youtube.com/browse_ajax?ctoken={}".format(channel_ctoken_desktop(channelId, page, sort, "videos"))
content = json.loads(requests.get(mainUrl, headers=headers).text)
req = requests.get(ciUrl, headers=headers).text
start = (
req.index('window["ytInitialData"]')
+ len('window["ytInitialData"]')
+ 3
)
end = req.index("};", start) + 1
jsonIni = req[start:end]
data = json.loads(jsonIni)
#videosTab = get_channel_videos_tab(content)
authorInfo = get_author_info_from_channel(data)
if videos:
gridVideoItemList = get_video_items_from_tab(content[1]['response']['continuationContents']['gridContinuation']['items'])
for video in gridVideoItemList:
vid = get_info_grid_video_item(video, authorInfo)
videos.append(vid)
print({"channel":authorInfo, "videos":videos})
return {"channel":authorInfo, "videos":videos}
else:
return {"channel":authorInfo}
else:
baseUrl = "https://www.youtube.com/user/{}".format(channelId)

View File

@ -1,38 +1,10 @@
from youtube import proto
from youtube import utils
from flask import Markup
import urllib.parse
import requests
import base64 import base64
import json import json
import urllib
import flask
from flask import request
from werkzeug.exceptions import abort
from youtube import util, yt_data_extract, proto
from youtube import yt_app
# Sort: 1
# Upload date: 2
# View count: 3
# Rating: 1
# Relevance: 0
# Offset: 9
# Filters: 2
# Upload date: 1
# Type: 2
# Duration: 3
features = {
'4k': 14,
'hd': 4,
'hdr': 25,
'subtitles': 5,
'creative_commons': 6,
'3d': 7,
'live': 8,
'purchased': 9,
'360': 15,
'location': 23,
}
def page_number_to_sp_parameter(page, autocorrect, sort, filters): def page_number_to_sp_parameter(page, autocorrect, sort, filters):
offset = (int(page) - 1)*20 # 20 results per page offset = (int(page) - 1)*20 # 20 results per page
@ -41,8 +13,8 @@ def page_number_to_sp_parameter(page, autocorrect, sort, filters):
result = proto.uint(1, sort) + filters_enc + autocorrect + proto.uint(9, offset) + proto.string(61, b'') result = proto.uint(1, sort) + filters_enc + autocorrect + proto.uint(9, offset) + proto.string(61, b'')
return base64.urlsafe_b64encode(result).decode('ascii') return base64.urlsafe_b64encode(result).decode('ascii')
def get_search_json(query, page, autocorrect, sort, filters): def search_by_terms(search_terms, page, autocorrect, sort, filters):
url = "https://www.youtube.com/results?search_query=" + urllib.parse.quote_plus(query) url = "https://www.youtube.com/results?search_query=" + urllib.parse.quote_plus(search_terms)
headers = { headers = {
'Host': 'www.youtube.com', 'Host': 'www.youtube.com',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64)', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64)',
@ -52,54 +24,145 @@ def get_search_json(query, page, autocorrect, sort, filters):
'X-YouTube-Client-Version': '2.20180418', 'X-YouTube-Client-Version': '2.20180418',
} }
url += "&pbj=1&sp=" + page_number_to_sp_parameter(page, autocorrect, sort, filters).replace("=", "%3D") url += "&pbj=1&sp=" + page_number_to_sp_parameter(page, autocorrect, sort, filters).replace("=", "%3D")
content = util.fetch_url(url, headers=headers, report_text="Got search results", debug_name='search_results') content = requests.get(url, headers=headers).text
info = json.loads(content) info = json.loads(content)
return info videos = get_videos_from_search(info)
channels = get_channels_from_search(info)
results = {
"videos": videos,
"channels": channels
}
return results
@yt_app.route('/search') def get_channels_from_search(search):
def get_search_page(): results = []
if len(request.args) == 0: search = search[1]['response']
return flask.render_template('base.html', title="Search") primaryContents = search['contents']['twoColumnSearchResultsRenderer']['primaryContents']
contents = primaryContents['sectionListRenderer']['contents']
if 'query' not in request.args: for content in contents:
abort(400) try:
items = content['itemSectionRenderer']['contents']
except:
continue
query = request.args.get("query") for item in items:
page = request.args.get("page", "1") try:
autocorrect = int(request.args.get("autocorrect", "1")) item['channelRenderer']
sort = int(request.args.get("sort", "0")) channel = get_channel_renderer_item_info(item['channelRenderer'])
filters = {} results.append(channel)
filters['time'] = int(request.args.get("time", "0")) except KeyError:
filters['type'] = int(request.args.get("type", "0")) continue
filters['duration'] = int(request.args.get("duration", "0")) return results
polymer_json = get_search_json(query, page, autocorrect, sort, filters)
search_info = yt_data_extract.extract_search_info(polymer_json) def get_channel_renderer_item_info(item):
if search_info['error']: try:
return flask.render_template('error.html', error_message = search_info['error']) suscribers = item['subscriberCountText']['simpleText'].split(" ")[0]
except:
suscribers = "?"
try:
description = utils.get_description_snippet_text(item['descriptionSnippet']['runs'])
except KeyError:
description = ""
for extract_item_info in search_info['items']: try:
util.prefix_urls(extract_item_info) channel = {
util.add_extra_html_info(extract_item_info) "channelId": item['channelId'],
"username": item['title']['simpleText'],
"thumbnail": "https:{}".format(item['thumbnail']['thumbnails'][0]['url'].replace("/", "~")),
"description": Markup(str(description)),
"suscribers": suscribers,
"videos": item['videoCountText']['runs'][0]['text']
}
except KeyError:
channel = {
"channelId": item['channelId'],
"username": item['title']['simpleText'],
"avatar": item['thumbnail']['thumbnails'][0]['url'],
"suscribers": suscribers
}
return channel
corrections = search_info['corrections'] def get_videos_from_search(search):
if corrections['type'] == 'did_you_mean': latest = []
corrected_query_string = request.args.to_dict(flat=False) results = []
corrected_query_string['query'] = [corrections['corrected_query']] search = search[1]['response']
corrections['corrected_query_url'] = util.URL_ORIGIN + '/search?' + urllib.parse.urlencode(corrected_query_string, doseq=True) primaryContents = search['contents']['twoColumnSearchResultsRenderer']['primaryContents']
elif corrections['type'] == 'showing_results_for': contents = primaryContents['sectionListRenderer']['contents']
no_autocorrect_query_string = request.args.to_dict(flat=False) for content in contents:
no_autocorrect_query_string['autocorrect'] = ['0'] try:
no_autocorrect_query_url = util.URL_ORIGIN + '/search?' + urllib.parse.urlencode(no_autocorrect_query_string, doseq=True) items = content['itemSectionRenderer']['contents']
corrections['original_query_url'] = no_autocorrect_query_url except:
continue
for item in items:
try:
item['videoRenderer']
video = get_video_renderer_item_info(item['videoRenderer'])
results.append(video)
except KeyError:
continue
# Sometimes Youtube will return an empty query. Try again.
return results
def get_video_renderer_item_info(item):
published = ""
views = ""
isLive = False
isUpcoming = False
thumbnailOverlays = item['thumbnailOverlays']
try:
if 'UPCOMING' in str(thumbnailOverlays):
start_time = item['upcomingEventData']['startTime']
isUpcoming = True
views = "-"
published = "Scheduled"
except KeyError:
isUpcoming = False
try:
if 'LIVE' in str(thumbnailOverlays):
isLive = True
try:
views = item['viewCountText']['simpleText']
except:
views = "Live"
try:
duration = item['lengthText']['simpleText']
except:
duration = "-"
if published != "Scheduled":
try:
published = item['publishedTimeText']['simpleText']
except KeyError:
published = "None"
except:
isUpcoming = False
isLive = False
if not isUpcoming and not isLive:
views = item['viewCountText']['simpleText']
published = item['publishedTimeText']['simpleText']
duration = item['lengthText']['simpleText']
video = {
'videoTitle':item['title']['runs'][0]['text'],
'description':Markup(str(utils.get_description_snippet_text(item['descriptionSnippet']['runs']))),
'views':views,
'timeStamp':published,
'duration':duration,
'channelName':item['ownerText']['runs'][0]['text'],
'authorUrl':"/channel/{}".format(item['ownerText']['runs'][0]['navigationEndpoint']['browseEndpoint']['browseId']),
'channelId':item['ownerText']['runs'][0]['navigationEndpoint']['browseEndpoint']['browseId'],
'id':item['videoId'],
'videoUrl':"/watch?v={}".format(item['videoId']),
'isLive':isLive,
'isUpcoming':isUpcoming,
'videoThumb':item['thumbnail']['thumbnails'][0]['url']
}
return video
return flask.render_template('search.html',
header_playlist_names = local_playlist.get_playlist_names(),
query = query,
estimated_results = search_info['estimated_results'],
estimated_pages = search_info['estimated_pages'],
corrections = search_info['corrections'],
results = search_info['items'],
parameters_dictionary = request.args,
)

View File

@ -1,9 +1,13 @@
import gzip import gzip
import requests
from bs4 import BeautifulSoup
from youtube import yt_data_extract from youtube import yt_data_extract
try: try:
import brotli import brotli
have_brotli = True have_brotli = True
except ImportError: except ImportError:
have_brotli = False have_brotli = False
@ -15,7 +19,7 @@ import json
import gevent import gevent
import gevent.queue import gevent.queue
import gevent.lock import gevent.lock
from python_anticaptcha import AnticaptchaClient, NoCaptchaTaskProxylessTask
# The trouble with the requests library: It ships its own certificate bundle via certifi # The trouble with the requests library: It ships its own certificate bundle via certifi
# instead of using the system certificate store, meaning self-signed certificates # instead of using the system certificate store, meaning self-signed certificates
# configured by the user will not work. Some draconian networks block TLS unless a corporate # configured by the user will not work. Some draconian networks block TLS unless a corporate
@ -51,13 +55,12 @@ import urllib3.contrib.socks
URL_ORIGIN = "/https://www.youtube.com" URL_ORIGIN = "/https://www.youtube.com"
connection_pool = urllib3.PoolManager(cert_reqs = 'CERT_REQUIRED') connection_pool = urllib3.PoolManager(cert_reqs='CERT_REQUIRED')
def get_pool(use_tor):
return connection_pool
class HTTPAsymmetricCookieProcessor(urllib.request.BaseHandler): class HTTPAsymmetricCookieProcessor(urllib.request.BaseHandler):
'''Separate cookiejars for receiving and sending''' '''Separate cookiejars for receiving and sending'''
def __init__(self, cookiejar_send=None, cookiejar_receive=None): def __init__(self, cookiejar_send=None, cookiejar_receive=None):
self.cookiejar_send = cookiejar_send self.cookiejar_send = cookiejar_send
self.cookiejar_receive = cookiejar_receive self.cookiejar_receive = cookiejar_receive
@ -75,6 +78,7 @@ class HTTPAsymmetricCookieProcessor(urllib.request.BaseHandler):
https_request = http_request https_request = http_request
https_response = http_response https_response = http_response
class FetchError(Exception): class FetchError(Exception):
def __init__(self, code, reason='', ip=None): def __init__(self, code, reason='', ip=None):
Exception.__init__(self, 'HTTP error during request: ' + code + ' ' + reason) Exception.__init__(self, 'HTTP error during request: ' + code + ' ' + reason)
@ -82,6 +86,7 @@ class FetchError(Exception):
self.reason = reason self.reason = reason
self.ip = ip self.ip = ip
def decode_content(content, encoding_header): def decode_content(content, encoding_header):
encodings = encoding_header.replace(' ', '').split(',') encodings = encoding_header.replace(' ', '').split(',')
for encoding in reversed(encodings): for encoding in reversed(encodings):
@ -93,6 +98,68 @@ def decode_content(content, encoding_header):
content = gzip.decompress(content) content = gzip.decompress(content)
return content return content
def bypass_captcha(session, response, url, cookies):
print("vvv COOKIES DICT vvv")
inputs = {}
html = BeautifulSoup(str(response.text), "lxml")
# If there's a captcha and we need to solve it...
if html.body.find('div', attrs={'class': 'g-recaptcha'}):
# Get the captcha form
form = html.body.find('form', attrs={"action": "/das_captcha"})
# Set up form inputs for request
for _input in form.find_all('input'):
try:
print(_input["name"] + " -> " + _input["value"])
inputs[_input["name"]] = _input["value"]
except KeyError:
continue
print("\n vvv Form inputs created vvv ")
print(inputs)
# Get CAPTCHA keys
site_key = html.body.find('div', attrs={'class': 'g-recaptcha'})['data-sitekey']
s_value = html.body.find('input', attrs={'name': 'session_token'})['value']
# Get anti-captcha API key from config
config = json.load(open('yotter-config.json'))
# Generate anti-captcha request payload
body = {'clientKey': config['anticaptcha']}
task = {'type': "NoCaptchaTaskProxyless",
'websiteURL': url,
'websiteKey': site_key,
'recaptchaDataSValue': s_value}
body['task'] = task
# Create the task.
response = requests.post("https://api.anti-captcha.com/createTask", json=body).json()
task_id = response["taskId"]
print("Task was created: {}. Waiting...".format(task_id))
# Wait until task is completed
body = {"clientKey": config['anticaptcha'], "taskId": task_id}
response = requests.post("https://api.anti-captcha.com/getTaskResult", json=body).json()
ready = response["status"] == "ready"
while not ready:
print(response['status'])
response = requests.post("https://api.anti-captcha.com/getTaskResult", json=body).json()
ready = response["status"] == "ready"
inputs['g-recaptcha-response'] = response['solution']['gRecaptchaResponse']
print(response)
# Print POST request headers
yt_rq = requests.post("https://youtube.com/das_captcha", data=inputs,
headers={"Content-Type": "application/x-www-form-urlencoded",
"Accept-Language": "en-US,en;q=0.5",
"User-Agent":'Mozilla/5.0 (Windows NT 10.0; rv:78.0) Gecko/20100101 Firefox/78.0',
"Referer": "https://www.youtube.com/das_captcha",
"Origin": "https://www.youtube.com"}, cookies=session.cookies).headers
print(yt_rq['Cookie'])
def fetch_url_response(url, headers=(), timeout=15, data=None, def fetch_url_response(url, headers=(), timeout=15, data=None,
cookiejar_send=None, cookiejar_receive=None, cookiejar_send=None, cookiejar_receive=None,
use_tor=True, max_redirects=None): use_tor=True, max_redirects=None):
@ -105,7 +172,7 @@ def fetch_url_response(url, headers=(), timeout=15, data=None,
When both are set to the same object, cookies will be sent from the object, When both are set to the same object, cookies will be sent from the object,
and response cookies will be merged into it. and response cookies will be merged into it.
''' '''
headers = dict(headers) # Note: Calling dict() on a dict will make a copy headers = dict(headers) # Note: Calling dict() on a dict will make a copy
if have_brotli: if have_brotli:
headers['Accept-Encoding'] = 'gzip, br' headers['Accept-Encoding'] = 'gzip, br'
else: else:
@ -124,32 +191,47 @@ def fetch_url_response(url, headers=(), timeout=15, data=None,
elif not isinstance(data, bytes): elif not isinstance(data, bytes):
data = urllib.parse.urlencode(data).encode('ascii') data = urllib.parse.urlencode(data).encode('ascii')
if cookiejar_send is not None or cookiejar_receive is not None: # Use urllib if cookiejar_send is not None or cookiejar_receive is not None: # Use urllib
req = urllib.request.Request(url, data=data, headers=headers) req = urllib.request.Request(url, data=data, headers=headers)
cookie_processor = HTTPAsymmetricCookieProcessor(cookiejar_send=cookiejar_send,
cookie_processor = HTTPAsymmetricCookieProcessor(cookiejar_send=cookiejar_send, cookiejar_receive=cookiejar_receive) cookiejar_receive=cookiejar_receive)
opener = urllib.request.build_opener(cookie_processor) opener = urllib.request.build_opener(cookie_processor)
response = opener.open(req, timeout=timeout) response = opener.open(req, timeout=timeout)
cleanup_func = (lambda r: None) cleanup_func = (lambda r: None)
else: # Use a urllib3 pool. Cookies can't be used since urllib3 doesn't have easy support for them. else: # Use a urllib3 pool. Cookies can't be used since urllib3 doesn't have easy support for them.
# default: Retry.DEFAULT = Retry(3) # default: Retry.DEFAULT = Retry(3)
# (in connectionpool.py in urllib3) # (in connectionpool.py in urllib3)
# According to the documentation for urlopen, a redirect counts as a # According to the documentation for urlopen, a redirect counts as a
# retry. So there are 3 redirects max by default. # retry. So there are 3 redirects max by default.
session = requests.Session()
print("Starting python GET request to "+url+"...")
response = session.get(url, headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:78.0) Gecko/20100101 Firefox/78.0', "Accept-Language": "en-US,en;q=0.5"})
# Strings that appear when there's a Captcha.
string_de = "Fülle das folgende Feld aus, um YouTube weiter zu nutzen."
string_en = "To continue with your YouTube experience, please fill out the form below."
# If there's a captcha, bypass it.
if string_de in response.text or string_en in response.text:
bypass_captcha(session, response, url, session.cookies)
return "Captcha", "Captcha"
if max_redirects: if max_redirects:
retries = urllib3.Retry(3+max_redirects, redirect=max_redirects) retries = urllib3.Retry(3 + max_redirects, redirect=max_redirects)
else: else:
retries = urllib3.Retry(3) retries = urllib3.Retry(3)
pool = get_pool(use_tor)
pool = connection_pool
response = pool.request(method, url, headers=headers, response = pool.request(method, url, headers=headers,
timeout=timeout, preload_content=False, timeout=timeout, preload_content=False,
decode_content=False, retries=retries) decode_content=False, retries=retries)
cleanup_func = (lambda r: r.release_conn()) cleanup_func = (lambda r: r.release_conn())
return response, cleanup_func return response, cleanup_func
def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, def fetch_url(url, headers=(), timeout=15, report_text=None, data=None,
cookiejar_send=None, cookiejar_receive=None, use_tor=True, cookiejar_send=None, cookiejar_receive=None, use_tor=True,
debug_name=None): debug_name=None):
@ -159,18 +241,20 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None,
url, headers, timeout=timeout, url, headers, timeout=timeout,
cookiejar_send=cookiejar_send, cookiejar_receive=cookiejar_receive, cookiejar_send=cookiejar_send, cookiejar_receive=cookiejar_receive,
use_tor=use_tor) use_tor=use_tor)
response_time = time.time() print(response)
if response == "Captcha":
return "Captcha"
response_time = time.time()
content = response.read() content = response.read()
read_finish = time.time() read_finish = time.time()
cleanup_func(response) # release_connection for urllib3 cleanup_func(response) # release_connection for urllib3
if (response.status == 429 if (response.status == 429
and content.startswith(b'<!DOCTYPE') and content.startswith(b'<!DOCTYPE')
and b'Our systems have detected unusual traffic' in content): and b'Our systems have detected unusual traffic' in content):
ip = re.search(br'IP address: ((?:[\da-f]*:)+[\da-f]+|(?:\d+\.)+\d+)', ip = re.search(br'IP address: ((?:[\da-f]*:)+[\da-f]+|(?:\d+\.)+\d+)',
content) content)
ip = ip.group(1).decode('ascii') if ip else None ip = ip.group(1).decode('ascii') if ip else None
raise FetchError('429', reason=response.reason, ip=ip) raise FetchError('429', reason=response.reason, ip=ip)
@ -178,12 +262,14 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None,
raise FetchError(str(response.status), reason=response.reason, ip=None) raise FetchError(str(response.status), reason=response.reason, ip=None)
if report_text: if report_text:
print(report_text, ' Latency:', round(response_time - start_time,3), ' Read time:', round(read_finish - response_time,3)) print(report_text, ' Latency:', round(response_time - start_time, 3), ' Read time:',
round(read_finish - response_time, 3))
content = decode_content(content, response.getheader('Content-Encoding', default='identity')) content = decode_content(content, response.getheader('Content-Encoding', default='identity'))
return content return content
def head(url, use_tor=False, report_text=None, max_redirects=10): def head(url, use_tor=False, report_text=None, max_redirects=10):
pool = get_pool(use_tor) pool = connection_pool
start_time = time.time() start_time = time.time()
# default: Retry.DEFAULT = Retry(3) # default: Retry.DEFAULT = Retry(3)
@ -191,24 +277,21 @@ def head(url, use_tor=False, report_text=None, max_redirects=10):
# According to the documentation for urlopen, a redirect counts as a retry # According to the documentation for urlopen, a redirect counts as a retry
# So there are 3 redirects max by default. Let's change that # So there are 3 redirects max by default. Let's change that
# to 10 since googlevideo redirects a lot. # to 10 since googlevideo redirects a lot.
retries = urllib3.Retry(3+max_redirects, redirect=max_redirects, retries = urllib3.Retry(3 + max_redirects, redirect=max_redirects,
raise_on_redirect=False) raise_on_redirect=False)
headers = {'User-Agent': 'Python-urllib'} headers = {'User-Agent': 'Python-urllib'}
response = pool.request('HEAD', url, headers=headers, retries=retries) response = pool.request('HEAD', url, headers=headers, retries=retries)
if report_text: if report_text:
print(report_text, ' Latency:', round(time.time() - start_time,3)) print(report_text, ' Latency:', round(time.time() - start_time, 3))
return response return response
mobile_user_agent = 'Mozilla/5.0 (Linux; Android 7.0; Redmi Note 4 Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Mobile Safari/537.36' mobile_user_agent = 'Mozilla/5.0 (Linux; Android 7.0; Redmi Note 4 Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Mobile Safari/537.36'
mobile_ua = (('User-Agent', mobile_user_agent),) mobile_ua = (('User-Agent', mobile_user_agent),)
desktop_user_agent = 'Mozilla/5.0 (Windows NT 6.1; rv:52.0) Gecko/20100101 Firefox/52.0' desktop_user_agent = 'Mozilla/5.0 (Windows NT 6.1; rv:52.0) Gecko/20100101 Firefox/52.0'
desktop_ua = (('User-Agent', desktop_user_agent),) desktop_ua = (('User-Agent', desktop_user_agent),)
class RateLimitedQueue(gevent.queue.Queue): class RateLimitedQueue(gevent.queue.Queue):
''' Does initial_burst (def. 30) at first, then alternates between waiting waiting_period (def. 5) seconds and doing subsequent_bursts (def. 10) queries. After 5 seconds with nothing left in the queue, resets rate limiting. ''' ''' Does initial_burst (def. 30) at first, then alternates between waiting waiting_period (def. 5) seconds and doing subsequent_bursts (def. 10) queries. After 5 seconds with nothing left in the queue, resets rate limiting. '''
@ -225,9 +308,8 @@ class RateLimitedQueue(gevent.queue.Queue):
self.empty_start = 0 self.empty_start = 0
gevent.queue.Queue.__init__(self) gevent.queue.Queue.__init__(self)
def get(self): def get(self):
self.lock.acquire() # blocks if another greenlet currently has the lock self.lock.acquire() # blocks if another greenlet currently has the lock
if self.count_since_last_wait >= self.subsequent_bursts and self.surpassed_initial: if self.count_since_last_wait >= self.subsequent_bursts and self.surpassed_initial:
gevent.sleep(self.waiting_period) gevent.sleep(self.waiting_period)
self.count_since_last_wait = 0 self.count_since_last_wait = 0
@ -243,7 +325,7 @@ class RateLimitedQueue(gevent.queue.Queue):
self.currently_empty = True self.currently_empty = True
self.empty_start = time.monotonic() self.empty_start = time.monotonic()
item = gevent.queue.Queue.get(self) # blocks when nothing left item = gevent.queue.Queue.get(self) # blocks when nothing left
if self.currently_empty: if self.currently_empty:
if time.monotonic() - self.empty_start >= self.waiting_period: if time.monotonic() - self.empty_start >= self.waiting_period:
@ -257,7 +339,6 @@ class RateLimitedQueue(gevent.queue.Queue):
return item return item
def download_thumbnail(save_directory, video_id): def download_thumbnail(save_directory, video_id):
url = "https://i.ytimg.com/vi/" + video_id + "/mqdefault.jpg" url = "https://i.ytimg.com/vi/" + video_id + "/mqdefault.jpg"
save_location = os.path.join(save_directory, video_id + ".jpg") save_location = os.path.join(save_directory, video_id + ".jpg")
@ -269,26 +350,23 @@ def download_thumbnail(save_directory, video_id):
try: try:
f = open(save_location, 'wb') f = open(save_location, 'wb')
except FileNotFoundError: except FileNotFoundError:
os.makedirs(save_directory, exist_ok = True) os.makedirs(save_directory, exist_ok=True)
f = open(save_location, 'wb') f = open(save_location, 'wb')
f.write(thumbnail) f.write(thumbnail)
f.close() f.close()
return True return True
def download_thumbnails(save_directory, ids): def download_thumbnails(save_directory, ids):
if not isinstance(ids, (list, tuple)): if not isinstance(ids, (list, tuple)):
ids = list(ids) ids = list(ids)
# only do 5 at a time # only do 5 at a time
# do the n where n is divisible by 5 # do the n where n is divisible by 5
i = -1 i = -1
for i in range(0, int(len(ids)/5) - 1 ): for i in range(0, int(len(ids) / 5) - 1):
gevent.joinall([gevent.spawn(download_thumbnail, save_directory, ids[j]) for j in range(i*5, i*5 + 5)]) gevent.joinall([gevent.spawn(download_thumbnail, save_directory, ids[j]) for j in range(i * 5, i * 5 + 5)])
# do the remainders (< 5) # do the remainders (< 5)
gevent.joinall([gevent.spawn(download_thumbnail, save_directory, ids[j]) for j in range(i*5 + 5, len(ids))]) gevent.joinall([gevent.spawn(download_thumbnail, save_directory, ids[j]) for j in range(i * 5 + 5, len(ids))])
def dict_add(*dicts): def dict_add(*dicts):
@ -296,6 +374,7 @@ def dict_add(*dicts):
dicts[0].update(dictionary) dicts[0].update(dictionary)
return dicts[0] return dicts[0]
def video_id(url): def video_id(url):
url_parts = urllib.parse.urlparse(url) url_parts = urllib.parse.urlparse(url)
return urllib.parse.parse_qs(url_parts.query)['v'][0] return urllib.parse.parse_qs(url_parts.query)['v'][0]
@ -304,11 +383,12 @@ def video_id(url):
# default, sddefault, mqdefault, hqdefault, hq720 # default, sddefault, mqdefault, hqdefault, hq720
def get_thumbnail_url(video_id): def get_thumbnail_url(video_id):
return "/i.ytimg.com/vi/" + video_id + "/mqdefault.jpg" return "/i.ytimg.com/vi/" + video_id + "/mqdefault.jpg"
def seconds_to_timestamp(seconds): def seconds_to_timestamp(seconds):
seconds = int(seconds) seconds = int(seconds)
hours, seconds = divmod(seconds,3600) hours, seconds = divmod(seconds, 3600)
minutes, seconds = divmod(seconds,60) minutes, seconds = divmod(seconds, 60)
if hours != 0: if hours != 0:
timestamp = str(hours) + ":" timestamp = str(hours) + ":"
timestamp += str(minutes).zfill(2) # zfill pads with zeros timestamp += str(minutes).zfill(2) # zfill pads with zeros
@ -319,31 +399,32 @@ def seconds_to_timestamp(seconds):
return timestamp return timestamp
def update_query_string(query_string, items): def update_query_string(query_string, items):
parameters = urllib.parse.parse_qs(query_string) parameters = urllib.parse.parse_qs(query_string)
parameters.update(items) parameters.update(items)
return urllib.parse.urlencode(parameters, doseq=True) return urllib.parse.urlencode(parameters, doseq=True)
def uppercase_escape(s): def uppercase_escape(s):
return re.sub( return re.sub(
r'\\U([0-9a-fA-F]{8})', r'\\U([0-9a-fA-F]{8})',
lambda m: chr(int(m.group(1), base=16)), s) lambda m: chr(int(m.group(1), base=16)), s)
def prefix_url(url): def prefix_url(url):
if url is None: if url is None:
return None return None
url = url.lstrip('/') # some urls have // before them, which has a special meaning url = url.lstrip('/') # some urls have // before them, which has a special meaning
return '/' + url return '/' + url
def left_remove(string, substring): def left_remove(string, substring):
'''removes substring from the start of string, if present''' '''removes substring from the start of string, if present'''
if string.startswith(substring): if string.startswith(substring):
return string[len(substring):] return string[len(substring):]
return string return string
def concat_or_none(*strings): def concat_or_none(*strings):
'''Concatenates strings. Returns None if any of the arguments are None''' '''Concatenates strings. Returns None if any of the arguments are None'''
result = '' result = ''
@ -365,6 +446,7 @@ def prefix_urls(item):
except KeyError: except KeyError:
pass pass
def add_extra_html_info(item): def add_extra_html_info(item):
if item['type'] == 'video': if item['type'] == 'video':
item['url'] = (URL_ORIGIN + '/watch?v=' + item['id']) if item.get('id') else None item['url'] = (URL_ORIGIN + '/watch?v=' + item['id']) if item.get('id') else None
@ -383,6 +465,7 @@ def add_extra_html_info(item):
elif item['type'] == 'channel': elif item['type'] == 'channel':
item['url'] = (URL_ORIGIN + "/channel/" + item['id']) if item.get('id') else None item['url'] = (URL_ORIGIN + "/channel/" + item['id']) if item.get('id') else None
def parse_info_prepare_for_html(renderer, additional_info={}): def parse_info_prepare_for_html(renderer, additional_info={}):
item = yt_data_extract.extract_item_info(renderer, additional_info) item = yt_data_extract.extract_item_info(renderer, additional_info)
prefix_urls(item) prefix_urls(item)
@ -390,8 +473,8 @@ def parse_info_prepare_for_html(renderer, additional_info={}):
return item return item
def check_gevent_exceptions(*tasks): def check_gevent_exceptions(*tasks):
for task in tasks: for task in tasks:
if task.exception: if task.exception:
raise task.exception raise task.exception

View File

@ -8,11 +8,11 @@ from youtube import util, yt_data_extract
def get_video_sources(info, tor_bypass=False): def get_video_sources(info, tor_bypass=False):
video_sources = [] video_sources = []
max_resolution = "720" max_resolution = 1080
for fmt in info['formats']: for fmt in info['formats']:
if not all(fmt[attr] for attr in ('quality', 'width', 'ext', 'url')): if not all(fmt[attr] for attr in ('quality', 'width', 'ext', 'url')):
continue continue
if fmt['acodec'] and fmt['vcodec'] and fmt['height'] <= max_resolution: if fmt['acodec'] and fmt['vcodec'] and (fmt['height'] <= max_resolution):
video_sources.append({ video_sources.append({
'src': fmt['url'], 'src': fmt['url'],
'type': 'video/' + fmt['ext'], 'type': 'video/' + fmt['ext'],
@ -123,6 +123,24 @@ def get_subtitle_sources(info):
return sources return sources
def decrypt_signatures(info):
'''return error string, or False if no errors'''
if not yt_data_extract.requires_decryption(info):
return False
if not info['player_name']:
return 'Could not find player name'
if not info['base_js']:
return 'Failed to find base.js'
player_name = info['player_name']
base_js = util.fetch_url(info['base_js'], debug_name='base.js', report_text='Fetched player ' + player_name)
base_js = base_js.decode('utf-8')
err = yt_data_extract.extract_decryption_function(info, base_js)
if err:
return err
err = yt_data_extract.decrypt_signatures(info)
return err
def get_ordered_music_list_attributes(music_list): def get_ordered_music_list_attributes(music_list):
# get the set of attributes which are used by atleast 1 track # get the set of attributes which are used by atleast 1 track
@ -146,14 +164,18 @@ headers = (
('X-YouTube-Client-Version', '2.20180830'), ('X-YouTube-Client-Version', '2.20180830'),
) + util.mobile_ua ) + util.mobile_ua
def extract_info(video_id, use_invidious, playlist_id=None, index=None): def extract_info(video_id, use_invidious, playlist_id=None, index=None):
# bpctr=9999999999 will bypass are-you-sure dialogs for controversial # bpctr=9999999999 will bypass are-you-sure dialogs for controversial videos
# videos
url = 'https://m.youtube.com/watch?v=' + video_id + '&pbj=1&bpctr=9999999999' url = 'https://m.youtube.com/watch?v=' + video_id + '&pbj=1&bpctr=9999999999'
if playlist_id: if playlist_id:
url += '&list=' + playlist_id url += '&list=' + playlist_id
if index: if index:
url += '&index=' + index url += '&index=' + index
polymer_json = util.fetch_url(url, headers=headers, debug_name='watch') polymer_json = util.fetch_url(url, headers=headers, debug_name='watch')
# If there's a captcha... Return word Captcha
if polymer_json == 'Captcha':
return 'Captcha'
polymer_json = polymer_json.decode('utf-8') polymer_json = polymer_json.decode('utf-8')
# TODO: Decide whether this should be done in yt_data_extract.extract_watch_info # TODO: Decide whether this should be done in yt_data_extract.extract_watch_info
try: try:
@ -173,6 +195,12 @@ def extract_info(video_id, use_invidious, playlist_id=None, index=None):
url = 'https://www.youtube.com/get_video_info?' + urllib.parse.urlencode(data) url = 'https://www.youtube.com/get_video_info?' + urllib.parse.urlencode(data)
video_info_page = util.fetch_url(url, debug_name='get_video_info', report_text='Fetched age restriction bypass page').decode('utf-8') video_info_page = util.fetch_url(url, debug_name='get_video_info', report_text='Fetched age restriction bypass page').decode('utf-8')
yt_data_extract.update_with_age_restricted_info(info, video_info_page) yt_data_extract.update_with_age_restricted_info(info, video_info_page)
# signature decryption
decryption_error = decrypt_signatures(info)
if decryption_error:
decryption_error = 'Error decrypting url signatures: ' + decryption_error
info['playability_error'] = decryption_error
# check if urls ready (non-live format) in former livestream # check if urls ready (non-live format) in former livestream
# urls not ready if all of them have no filesize # urls not ready if all of them have no filesize
if info['was_live']: if info['was_live']: