Use Youtube-dlc for video data extraction

This commit is contained in:
pluja 2020-10-30 16:16:09 +01:00
parent 016bcd4775
commit 78d6044dd2
3 changed files with 211 additions and 144 deletions

View File

@ -29,7 +29,7 @@ from app.forms import LoginForm, RegistrationForm, EmptyForm, SearchForm, Channe
from app.models import User, twitterPost, ytPost, Post, youtubeFollow, twitterFollow from app.models import User, twitterPost, ytPost, Post, youtubeFollow, twitterFollow
from youtube import comments, utils, channel as ytch, search as yts from youtube import comments, utils, channel as ytch, search as yts
from youtube import watch as ytwatch from youtube import watch as ytwatch
from youtube import video as ytvid
######################################### #########################################
######################################### #########################################
@ -467,50 +467,40 @@ def get_live_urls(urls):
best_urls.append(url) best_urls.append(url)
return best_urls return best_urls
@app.route('/watch', methods=['GET']) @app.route('/watch', methods=['GET'])
@login_required @login_required
def watch(): def watch():
id = request.args.get('v', None) id = request.args.get('v', None)
info = ytwatch.extract_info(id, False, playlist_id=None, index=None) info = ytvid.get_info(id)
if info['error'] == False:
for format in info['formats']:
hostName = urllib.parse.urlparse(format['url']).netloc
format['url'] = format['url'].replace("https://{}".format(hostName), "") + "&host=" + hostName
for format in info['audio_formats']:
hostName = urllib.parse.urlparse(format['url']).netloc
format['url'] = format['url'].replace("https://{}".format(hostName), "") + "&host=" + hostName
vsources = ytwatch.get_video_sources(info, False) # Markup description
# Retry 3 times if no sources are available. try:
retry = 3 info['description'] = Markup(bleach.linkify(info['description'].replace("\n", "<br>"))).replace('www.youtube.com', config['serverName']).replace('youtube.com', config['serverName']).replace("/join","")
while retry != 0 and len(vsources) == 0: except AttributeError or TypeError:
vsources = ytwatch.get_video_sources(info, False) print(info['description'])
retry -= 1
for source in vsources: # Get comments
hostName = urllib.parse.urlparse(source['src']).netloc if not info['is_live']:
source['src'] = source['src'].replace("https://{}".format(hostName), "") + "&host=" + hostName videocomments = comments.video_comments(id, sort=0, offset=0, lc='', secret_key='')
videocomments = utils.post_process_comments_info(videocomments)
# Parse video formats if videocomments is not None:
for v_format in info['formats']: videocomments.sort(key=lambda x: x['likes'], reverse=True)
hostName = urllib.parse.urlparse(v_format['url']).netloc else:
v_format['url'] = v_format['url'].replace("https://{}".format(hostName), "") + "&host=" + hostName videocomments=False
if v_format['audio_bitrate'] is not None and v_format['vcodec'] is None:
v_format['audio_valid'] = True return render_template("video.html", info=info, title=info['title'], config=config,
videocomments=videocomments)
# Markup description
try: return render_template("video.html", info=info, title='Scheduled Video', config=config)
info['description'] = Markup(bleach.linkify(info['description'].replace("\n", "<br>")))
except AttributeError or TypeError:
print(info['description'])
# Get comments
videocomments = comments.video_comments(id, sort=0, offset=0, lc='', secret_key='')
videocomments = utils.post_process_comments_info(videocomments)
if videocomments is not None:
videocomments.sort(key=lambda x: x['likes'], reverse=True)
# Calculate rating %
if info['like_count']+info['dislike_count']>0:
info['rating'] = str((info['like_count'] / (info['like_count'] + info['dislike_count'])) * 100)[0:4]
else:
info['rating'] = 50.0
return render_template("video.html", info=info, title='{}'.format(info['title']), config=config,
videocomments=videocomments, vsources=vsources)
def markupString(string): def markupString(string):
@ -881,7 +871,7 @@ def getFeed(urls):
newPost["twitterName"] = post.find('a', attrs={'class': 'fullname'}).text newPost["twitterName"] = post.find('a', attrs={'class': 'fullname'}).text
newPost["timeStamp"] = date_time_str newPost["timeStamp"] = date_time_str
newPost["date"] = post.find('span', attrs={'class': 'tweet-date'}).find('a').text newPost["date"] = post.find('span', attrs={'class': 'tweet-date'}).find('a').text
newPost["content"] = Markup(post.find('div', attrs={'class': 'tweet-content'})) newPost["content"] = Markup(post.find('div', attrs={'class': 'tweet-content'})).replace("\n", "<br>")
if post.find('div', attrs={'class': 'retweet-header'}): if post.find('div', attrs={'class': 'retweet-header'}):
newPost["username"] = post.find('div', attrs={'class': 'retweet-header'}).find('div', attrs={ newPost["username"] = post.find('div', attrs={'class': 'retweet-header'}).find('div', attrs={

View File

@ -4,124 +4,124 @@
{% extends "base.html" %} {% extends "base.html" %}
{% block content %} {% block content %}
<div style="width: 80%;" class="ui container"> <div style="width: 80%;" class="ui container">
{% if info.error != None or info.playability_error != None %}
<div class="ui center aligned text container">
<div class="ui segment">
<h4 class="ui header">ERROR WITH VIDEO</h4>
</div>
</div>
{% elif info.playability_status != None %}
<div class="ui center aligned text container">
<div class="ui segment">
<h4 class="ui header">SCHEDULED VIDEO</h4>
<h5 class="ui header">{{video.premieres}}</h5>
</div>
</div>
{% elif info.live %}
<div class="video-js-responsive-container vjs-hd">
<video-js id=live width="1080" class="video-js vjs-default-skin" controls>
<source
src="#"
type="application/x-mpegURL">
</video-js>
</div>
<div class="ui center aligned text container">
<div class="ui segment">
<h3 class="ui header"><i class="red small circle icon"></i> LIVESTREAM VIDEO</h3>
<h4 class="ui header">FEATURE AVAILABLE SOON</h4>
<h5 class="ui header">Livestreams are under developent and still not supported on Yotter.</h5>
</div>
</div>
{%else%}
<div class="video-js-responsive-container vjs-hd">
<video-js autofocus class="video-js vjs-default-skin"
data-setup='{ "playbackRates": [0.5, 0.75, 1, 1.25,1.5, 1.75, 2] }'
width="1080"
controls
buffered
preload="none">
{% if config.isInstance %}
{% for source in vsources %}
<source src="{{source.src}}" type="{{source.type}}">
{% endfor %}
{% endif %}
</video-js>
</div>
{%endif%}
<div class="ui segments"> {% if info.error == True %}
<div class="ui center aligned text container">
<div class="ui segment"> <div class="ui segment">
<h2 class="ui header break-word">{{info.title}}</h2> <h4 class="ui header"><i class="calendar icon"></i> SCHEDULED VIDEO</h4>
<h5 class="ui header">This video is scheduled and is not supported by Yotter.</h5>
</div> </div>
<div class="ui horizontal segments"> </div>
<div class="center aligned ui segment"> {% else %}
<a href="{{ url_for('channel', id=info.author_id)}}"> {% if info.start_time != None %}
<i class="user icon"></i> <b>{{info.author}}</b> {% elif info.is_live != None %}
</a> <!--<div class="video-js-responsive-container vjs-hd">
</div> <video-js id=live width="1080" class="video-js vjs-default-skin" controls>
<div class="center aligned ui segment"> <source
<div class="ui mini statistic"> src="#"
<div class="value"> type="application/x-mpegURL">
<i class="grey eye icon"></i> <b>{{info.view_count}}</b> </video-js>
</div> </div>-->
<div class="label"> <div class="ui center aligned text container">
views <div class="ui segment">
<h3 class="ui header"><i class="red small circle icon"></i> LIVESTREAM VIDEO</h3>
<h4 class="ui header">FEATURE AVAILABLE SOON</h4>
<h5 class="ui header">Livestreams are under developent and still not supported on Yotter.</h5>
</div> </div>
</div> </div>
</div> {%else%}
<div class="center aligned ui segment"> <div class="video-js-responsive-container vjs-hd">
{% if info.rating | int > 49 %} <video-js autofocus class="video-js vjs-default-skin"
data-setup='{ "playbackRates": [0.5, 0.75, 1, 1.25,1.5, 1.75, 2] }'
width="1080"
controls
buffered
preload="none">
{% if config.isInstance %}
{% for source in info.formats %}
<source src="{{source.url}}" type="video/{{source.ext}}">
{% endfor %}
{% endif %}
</video-js>
</div>
{%endif%}
<div class="ui segments">
<div class="ui segment">
<h2 class="ui header break-word">{{info.title}}</h2>
</div>
<div class="ui horizontal segments">
<div class="center aligned ui segment">
<a href="{{ url_for('channel', id=info.uploader_id)}}">
<i class="user icon"></i> <b>{{info.uploader}}</b>
</a>
<div class="label">
<i class="user icon"></i>{{info.subscriber_count}}
</div>
</div>
<div class="center aligned ui segment">
<div class="ui mini statistic"> <div class="ui mini statistic">
<div class="value"> <div class="value">
<i class="green thumbs up icon"></i> <b>{{info.rating}}%</b> <i class="grey eye icon"></i> <b>{{info.view_count}}</b>
</div> </div>
<div class="label"> <div class="label">
Total: {{info.like_count+info.dislike_count}} votes views
</div> </div>
</div> </div>
{% else %} </div>
<div class="ui mini statistic"> <div class="center aligned ui segment">
<div class="value"> {% if info.average_rating | int > 2.5 %}
<i class="red thumbs down icon"></i> <b>{{info.rating}}%</b> <div class="ui mini statistic">
</div> <div class="value">
<div class="label"> <i class="green thumbs up icon"></i> <b>{{info.average_rating}}/5</b>
Total: {{info.like_count+info.dislike_count}} votes </div>
</div> <div class="label">
</div> Total: {{info.total_likes}} votes
{% endif %} </div>
</div> </div>
</div> {% else %}
<div class="ui mini statistic">
<div class="value">
<i class="red thumbs down icon"></i> <b>{{info.average_rating}}/5</b>
</div>
<div class="label">
Total: {{info.total_likes}} votes
</div>
</div>
{% endif %}
</div>
</div>
<div class="ui raised center aligned segment break-word"> <div class="ui raised center aligned segment break-word">
<p><i class="grey music icon"></i><b>Audio Only</b></p> <p><i class="grey music icon"></i><b>Audio Only</b></p>
<audio controls> <audio controls>
{% for format in info.formats %} {% for format in info.audio_formats %}
{% if format.audio_valid %}
<source src="{{format.url}}"> <source src="{{format.url}}">
{%endif%} {%endfor%}
{%endfor%} No audio available.
No audio available. </audio>
</audio> </div>
<div class="ui raised segment break-word">
<p>{{info.description}}</p>
</div>
</div> </div>
<div class="ui raised segment break-word"> {%if videocomments%}
<p>{{info.description}}</p> <div class="ui comments">
<h3 class="ui dividing header">Comments</h3>
{% for comment in videocomments %}
{% include '_video_comment.html' %}
{% endfor %}
</div> </div>
</div> {%endif%}
<script src="{{ url_for('static',filename='video.min.js') }}"></script>
<div class="ui comments"> {% if info.live %}
<h3 class="ui dividing header">Comments</h3> <script src="{{ url_for('static',filename='videojs-http-streaming.min.js')}}"></script>
{% for comment in videocomments %} <script>
{% include '_video_comment.html' %} var player = videojs('live');
{% endfor %} player.play();
</div> </script>
{% endif %}
<script src="{{ url_for('static',filename='video.min.js') }}"></script> {%endif%}
{% if info.live %}
<script src="{{ url_for('static',filename='videojs-http-streaming.min.js')}}"></script>
<script>
var player = videojs('live');
player.play();
</script>
{% endif %}
{% endblock %} {% endblock %}

77
youtube/video.py Normal file
View File

@ -0,0 +1,77 @@
from youtube_dlc import YoutubeDL
import json
options = {
'ignoreerrors': True,
'quiet': True,
'skip_download': True
}
ydl = YoutubeDL(options)
ydl.add_default_info_extractors()
config = json.load(open('yotter-config.json'))
def get_info(url):
video = {}
video['error'] = False
try:
info = ydl.extract_info(url, download=False)
except:
video['error'] = True
if info == None:
video['error'] = True
if not video['error'] and info is not None:
video['uploader'] = info['uploader']
video['uploader_id'] = info['uploader_id']
video['channel_id'] = info['channel_id']
video['upload_date'] = info['upload_date']
video['title'] = info['title']
video['thumbnails'] = info['thumbnails']
video['description'] = info['description']
video['categories'] = info['categories']
video['subtitles'] = info['subtitles']
video['duration'] = info['duration']
video['view_count'] = info['view_count']
if(info['like_count'] == None):
video['like_count'] = 0
else:
video['like_count'] = info['like_count']
if(info['dislike_count'] == None):
video['dislike_count'] = 0
else:
video['dislike_count'] = info['dislike_count']
video['total_likes'] = int(info['dislike_count']) + int(info['like_count'])
video['average_rating'] = str(info['average_rating'])[0:4]
video['formats'] = get_video_formats(info['formats'])
video['audio_formats'] = get_video_formats(info['formats'], audio=True)
video['is_live'] = info['is_live']
video['start_time'] = info['start_time']
video['end_time'] = info['end_time']
video['series'] = info['series']
video['subscriber_count'] = info['subscriber_count']
return video
def get_video_formats(formats, audio=False):
best_formats = []
audio_formats = []
for format in formats:
if format['vcodec'] != 'none' and format['acodec'] != 'none':
# Video and Audio
if format['format_note'] == '144p':
continue
else:
best_formats.append(format)
elif format['vcodec'] == 'none' and format['acodec'] != 'none':
# Audio only
audio_formats.append(format)
else:
# Video only
continue
if audio:
return audio_formats
else:
return best_formats