Use Youtube-dlc for video data extraction

This commit is contained in:
pluja 2020-10-30 16:16:09 +01:00
parent 016bcd4775
commit 78d6044dd2
3 changed files with 211 additions and 144 deletions

View File

@ -29,7 +29,7 @@ from app.forms import LoginForm, RegistrationForm, EmptyForm, SearchForm, Channe
from app.models import User, twitterPost, ytPost, Post, youtubeFollow, twitterFollow from app.models import User, twitterPost, ytPost, Post, youtubeFollow, twitterFollow
from youtube import comments, utils, channel as ytch, search as yts from youtube import comments, utils, channel as ytch, search as yts
from youtube import watch as ytwatch from youtube import watch as ytwatch
from youtube import video as ytvid
######################################### #########################################
######################################### #########################################
@ -467,50 +467,40 @@ def get_live_urls(urls):
best_urls.append(url) best_urls.append(url)
return best_urls return best_urls
@app.route('/watch', methods=['GET']) @app.route('/watch', methods=['GET'])
@login_required @login_required
def watch(): def watch():
id = request.args.get('v', None) id = request.args.get('v', None)
info = ytwatch.extract_info(id, False, playlist_id=None, index=None) info = ytvid.get_info(id)
vsources = ytwatch.get_video_sources(info, False) if info['error'] == False:
# Retry 3 times if no sources are available. for format in info['formats']:
retry = 3 hostName = urllib.parse.urlparse(format['url']).netloc
while retry != 0 and len(vsources) == 0: format['url'] = format['url'].replace("https://{}".format(hostName), "") + "&host=" + hostName
vsources = ytwatch.get_video_sources(info, False)
retry -= 1
for source in vsources: for format in info['audio_formats']:
hostName = urllib.parse.urlparse(source['src']).netloc hostName = urllib.parse.urlparse(format['url']).netloc
source['src'] = source['src'].replace("https://{}".format(hostName), "") + "&host=" + hostName format['url'] = format['url'].replace("https://{}".format(hostName), "") + "&host=" + hostName
# Parse video formats
for v_format in info['formats']:
hostName = urllib.parse.urlparse(v_format['url']).netloc
v_format['url'] = v_format['url'].replace("https://{}".format(hostName), "") + "&host=" + hostName
if v_format['audio_bitrate'] is not None and v_format['vcodec'] is None:
v_format['audio_valid'] = True
# Markup description # Markup description
try: try:
info['description'] = Markup(bleach.linkify(info['description'].replace("\n", "<br>"))) info['description'] = Markup(bleach.linkify(info['description'].replace("\n", "<br>"))).replace('www.youtube.com', config['serverName']).replace('youtube.com', config['serverName']).replace("/join","")
except AttributeError or TypeError: except AttributeError or TypeError:
print(info['description']) print(info['description'])
# Get comments # Get comments
if not info['is_live']:
videocomments = comments.video_comments(id, sort=0, offset=0, lc='', secret_key='') videocomments = comments.video_comments(id, sort=0, offset=0, lc='', secret_key='')
videocomments = utils.post_process_comments_info(videocomments) videocomments = utils.post_process_comments_info(videocomments)
if videocomments is not None: if videocomments is not None:
videocomments.sort(key=lambda x: x['likes'], reverse=True) videocomments.sort(key=lambda x: x['likes'], reverse=True)
# Calculate rating %
if info['like_count']+info['dislike_count']>0:
info['rating'] = str((info['like_count'] / (info['like_count'] + info['dislike_count'])) * 100)[0:4]
else: else:
info['rating'] = 50.0 videocomments=False
return render_template("video.html", info=info, title='{}'.format(info['title']), config=config,
videocomments=videocomments, vsources=vsources) return render_template("video.html", info=info, title=info['title'], config=config,
videocomments=videocomments)
return render_template("video.html", info=info, title='Scheduled Video', config=config)
def markupString(string): def markupString(string):
@ -881,7 +871,7 @@ def getFeed(urls):
newPost["twitterName"] = post.find('a', attrs={'class': 'fullname'}).text newPost["twitterName"] = post.find('a', attrs={'class': 'fullname'}).text
newPost["timeStamp"] = date_time_str newPost["timeStamp"] = date_time_str
newPost["date"] = post.find('span', attrs={'class': 'tweet-date'}).find('a').text newPost["date"] = post.find('span', attrs={'class': 'tweet-date'}).find('a').text
newPost["content"] = Markup(post.find('div', attrs={'class': 'tweet-content'})) newPost["content"] = Markup(post.find('div', attrs={'class': 'tweet-content'})).replace("\n", "<br>")
if post.find('div', attrs={'class': 'retweet-header'}): if post.find('div', attrs={'class': 'retweet-header'}):
newPost["username"] = post.find('div', attrs={'class': 'retweet-header'}).find('div', attrs={ newPost["username"] = post.find('div', attrs={'class': 'retweet-header'}).find('div', attrs={

View File

@ -4,27 +4,24 @@
{% extends "base.html" %} {% extends "base.html" %}
{% block content %} {% block content %}
<div style="width: 80%;" class="ui container"> <div style="width: 80%;" class="ui container">
{% if info.error != None or info.playability_error != None %}
{% if info.error == True %}
<div class="ui center aligned text container"> <div class="ui center aligned text container">
<div class="ui segment"> <div class="ui segment">
<h4 class="ui header">ERROR WITH VIDEO</h4> <h4 class="ui header"><i class="calendar icon"></i> SCHEDULED VIDEO</h4>
<h5 class="ui header">This video is scheduled and is not supported by Yotter.</h5>
</div> </div>
</div> </div>
{% elif info.playability_status != None %} {% else %}
<div class="ui center aligned text container"> {% if info.start_time != None %}
<div class="ui segment"> {% elif info.is_live != None %}
<h4 class="ui header">SCHEDULED VIDEO</h4> <!--<div class="video-js-responsive-container vjs-hd">
<h5 class="ui header">{{video.premieres}}</h5>
</div>
</div>
{% elif info.live %}
<div class="video-js-responsive-container vjs-hd">
<video-js id=live width="1080" class="video-js vjs-default-skin" controls> <video-js id=live width="1080" class="video-js vjs-default-skin" controls>
<source <source
src="#" src="#"
type="application/x-mpegURL"> type="application/x-mpegURL">
</video-js> </video-js>
</div> </div>-->
<div class="ui center aligned text container"> <div class="ui center aligned text container">
<div class="ui segment"> <div class="ui segment">
<h3 class="ui header"><i class="red small circle icon"></i> LIVESTREAM VIDEO</h3> <h3 class="ui header"><i class="red small circle icon"></i> LIVESTREAM VIDEO</h3>
@ -41,8 +38,8 @@
buffered buffered
preload="none"> preload="none">
{% if config.isInstance %} {% if config.isInstance %}
{% for source in vsources %} {% for source in info.formats %}
<source src="{{source.src}}" type="{{source.type}}"> <source src="{{source.url}}" type="video/{{source.ext}}">
{% endfor %} {% endfor %}
{% endif %} {% endif %}
</video-js> </video-js>
@ -55,9 +52,12 @@
</div> </div>
<div class="ui horizontal segments"> <div class="ui horizontal segments">
<div class="center aligned ui segment"> <div class="center aligned ui segment">
<a href="{{ url_for('channel', id=info.author_id)}}"> <a href="{{ url_for('channel', id=info.uploader_id)}}">
<i class="user icon"></i> <b>{{info.author}}</b> <i class="user icon"></i> <b>{{info.uploader}}</b>
</a> </a>
<div class="label">
<i class="user icon"></i>{{info.subscriber_count}}
</div>
</div> </div>
<div class="center aligned ui segment"> <div class="center aligned ui segment">
<div class="ui mini statistic"> <div class="ui mini statistic">
@ -70,22 +70,22 @@
</div> </div>
</div> </div>
<div class="center aligned ui segment"> <div class="center aligned ui segment">
{% if info.rating | int > 49 %} {% if info.average_rating | int > 2.5 %}
<div class="ui mini statistic"> <div class="ui mini statistic">
<div class="value"> <div class="value">
<i class="green thumbs up icon"></i> <b>{{info.rating}}%</b> <i class="green thumbs up icon"></i> <b>{{info.average_rating}}/5</b>
</div> </div>
<div class="label"> <div class="label">
Total: {{info.like_count+info.dislike_count}} votes Total: {{info.total_likes}} votes
</div> </div>
</div> </div>
{% else %} {% else %}
<div class="ui mini statistic"> <div class="ui mini statistic">
<div class="value"> <div class="value">
<i class="red thumbs down icon"></i> <b>{{info.rating}}%</b> <i class="red thumbs down icon"></i> <b>{{info.average_rating}}/5</b>
</div> </div>
<div class="label"> <div class="label">
Total: {{info.like_count+info.dislike_count}} votes Total: {{info.total_likes}} votes
</div> </div>
</div> </div>
{% endif %} {% endif %}
@ -95,10 +95,8 @@
<div class="ui raised center aligned segment break-word"> <div class="ui raised center aligned segment break-word">
<p><i class="grey music icon"></i><b>Audio Only</b></p> <p><i class="grey music icon"></i><b>Audio Only</b></p>
<audio controls> <audio controls>
{% for format in info.formats %} {% for format in info.audio_formats %}
{% if format.audio_valid %}
<source src="{{format.url}}"> <source src="{{format.url}}">
{%endif%}
{%endfor%} {%endfor%}
No audio available. No audio available.
</audio> </audio>
@ -109,19 +107,21 @@
</div> </div>
</div> </div>
{%if videocomments%}
<div class="ui comments"> <div class="ui comments">
<h3 class="ui dividing header">Comments</h3> <h3 class="ui dividing header">Comments</h3>
{% for comment in videocomments %} {% for comment in videocomments %}
{% include '_video_comment.html' %} {% include '_video_comment.html' %}
{% endfor %} {% endfor %}
</div> </div>
{%endif%}
<script src="{{ url_for('static',filename='video.min.js') }}"></script> <script src="{{ url_for('static',filename='video.min.js') }}"></script>
{% if info.live %} {% if info.live %}
<script src="{{ url_for('static',filename='videojs-http-streaming.min.js')}}"></script> <script src="{{ url_for('static',filename='videojs-http-streaming.min.js')}}"></script>
<script> <script>
var player = videojs('live'); var player = videojs('live');
player.play(); player.play();
</script> </script>
{% endif %} {% endif %}
{%endif%}
{% endblock %} {% endblock %}

77
youtube/video.py Normal file
View File

@ -0,0 +1,77 @@
from youtube_dlc import YoutubeDL
import json
options = {
'ignoreerrors': True,
'quiet': True,
'skip_download': True
}
ydl = YoutubeDL(options)
ydl.add_default_info_extractors()
config = json.load(open('yotter-config.json'))
def get_info(url):
video = {}
video['error'] = False
try:
info = ydl.extract_info(url, download=False)
except:
video['error'] = True
if info == None:
video['error'] = True
if not video['error'] and info is not None:
video['uploader'] = info['uploader']
video['uploader_id'] = info['uploader_id']
video['channel_id'] = info['channel_id']
video['upload_date'] = info['upload_date']
video['title'] = info['title']
video['thumbnails'] = info['thumbnails']
video['description'] = info['description']
video['categories'] = info['categories']
video['subtitles'] = info['subtitles']
video['duration'] = info['duration']
video['view_count'] = info['view_count']
if(info['like_count'] == None):
video['like_count'] = 0
else:
video['like_count'] = info['like_count']
if(info['dislike_count'] == None):
video['dislike_count'] = 0
else:
video['dislike_count'] = info['dislike_count']
video['total_likes'] = int(info['dislike_count']) + int(info['like_count'])
video['average_rating'] = str(info['average_rating'])[0:4]
video['formats'] = get_video_formats(info['formats'])
video['audio_formats'] = get_video_formats(info['formats'], audio=True)
video['is_live'] = info['is_live']
video['start_time'] = info['start_time']
video['end_time'] = info['end_time']
video['series'] = info['series']
video['subscriber_count'] = info['subscriber_count']
return video
def get_video_formats(formats, audio=False):
best_formats = []
audio_formats = []
for format in formats:
if format['vcodec'] != 'none' and format['acodec'] != 'none':
# Video and Audio
if format['format_note'] == '144p':
continue
else:
best_formats.append(format)
elif format['vcodec'] == 'none' and format['acodec'] != 'none':
# Audio only
audio_formats.append(format)
else:
# Video only
continue
if audio:
return audio_formats
else:
return best_formats