Use Youtube-dlc for video data extraction

This commit is contained in:
pluja 2020-10-30 16:16:09 +01:00
parent 016bcd4775
commit 78d6044dd2
3 changed files with 211 additions and 144 deletions

View File

@ -29,7 +29,7 @@ from app.forms import LoginForm, RegistrationForm, EmptyForm, SearchForm, Channe
from app.models import User, twitterPost, ytPost, Post, youtubeFollow, twitterFollow
from youtube import comments, utils, channel as ytch, search as yts
from youtube import watch as ytwatch
from youtube import video as ytvid
#########################################
#########################################
@ -467,50 +467,40 @@ def get_live_urls(urls):
best_urls.append(url)
return best_urls
@app.route('/watch', methods=['GET'])
@login_required
def watch():
id = request.args.get('v', None)
info = ytwatch.extract_info(id, False, playlist_id=None, index=None)
info = ytvid.get_info(id)
vsources = ytwatch.get_video_sources(info, False)
# Retry 3 times if no sources are available.
retry = 3
while retry != 0 and len(vsources) == 0:
vsources = ytwatch.get_video_sources(info, False)
retry -= 1
if info['error'] == False:
for format in info['formats']:
hostName = urllib.parse.urlparse(format['url']).netloc
format['url'] = format['url'].replace("https://{}".format(hostName), "") + "&host=" + hostName
for source in vsources:
hostName = urllib.parse.urlparse(source['src']).netloc
source['src'] = source['src'].replace("https://{}".format(hostName), "") + "&host=" + hostName
for format in info['audio_formats']:
hostName = urllib.parse.urlparse(format['url']).netloc
format['url'] = format['url'].replace("https://{}".format(hostName), "") + "&host=" + hostName
# Parse video formats
for v_format in info['formats']:
hostName = urllib.parse.urlparse(v_format['url']).netloc
v_format['url'] = v_format['url'].replace("https://{}".format(hostName), "") + "&host=" + hostName
if v_format['audio_bitrate'] is not None and v_format['vcodec'] is None:
v_format['audio_valid'] = True
# Markup description
try:
info['description'] = Markup(bleach.linkify(info['description'].replace("\n", "<br>"))).replace('www.youtube.com', config['serverName']).replace('youtube.com', config['serverName']).replace("/join","")
except AttributeError or TypeError:
print(info['description'])
# Markup description
try:
info['description'] = Markup(bleach.linkify(info['description'].replace("\n", "<br>")))
except AttributeError or TypeError:
print(info['description'])
# Get comments
if not info['is_live']:
videocomments = comments.video_comments(id, sort=0, offset=0, lc='', secret_key='')
videocomments = utils.post_process_comments_info(videocomments)
if videocomments is not None:
videocomments.sort(key=lambda x: x['likes'], reverse=True)
else:
videocomments=False
# Get comments
videocomments = comments.video_comments(id, sort=0, offset=0, lc='', secret_key='')
videocomments = utils.post_process_comments_info(videocomments)
if videocomments is not None:
videocomments.sort(key=lambda x: x['likes'], reverse=True)
return render_template("video.html", info=info, title=info['title'], config=config,
videocomments=videocomments)
# Calculate rating %
if info['like_count']+info['dislike_count']>0:
info['rating'] = str((info['like_count'] / (info['like_count'] + info['dislike_count'])) * 100)[0:4]
else:
info['rating'] = 50.0
return render_template("video.html", info=info, title='{}'.format(info['title']), config=config,
videocomments=videocomments, vsources=vsources)
return render_template("video.html", info=info, title='Scheduled Video', config=config)
def markupString(string):
@ -881,7 +871,7 @@ def getFeed(urls):
newPost["twitterName"] = post.find('a', attrs={'class': 'fullname'}).text
newPost["timeStamp"] = date_time_str
newPost["date"] = post.find('span', attrs={'class': 'tweet-date'}).find('a').text
newPost["content"] = Markup(post.find('div', attrs={'class': 'tweet-content'}))
newPost["content"] = Markup(post.find('div', attrs={'class': 'tweet-content'})).replace("\n", "<br>")
if post.find('div', attrs={'class': 'retweet-header'}):
newPost["username"] = post.find('div', attrs={'class': 'retweet-header'}).find('div', attrs={

View File

@ -4,124 +4,124 @@
{% extends "base.html" %}
{% block content %}
<div style="width: 80%;" class="ui container">
{% if info.error != None or info.playability_error != None %}
<div class="ui center aligned text container">
<div class="ui segment">
<h4 class="ui header">ERROR WITH VIDEO</h4>
</div>
</div>
{% elif info.playability_status != None %}
<div class="ui center aligned text container">
<div class="ui segment">
<h4 class="ui header">SCHEDULED VIDEO</h4>
<h5 class="ui header">{{video.premieres}}</h5>
</div>
</div>
{% elif info.live %}
<div class="video-js-responsive-container vjs-hd">
<video-js id=live width="1080" class="video-js vjs-default-skin" controls>
<source
src="#"
type="application/x-mpegURL">
</video-js>
</div>
<div class="ui center aligned text container">
<div class="ui segment">
<h3 class="ui header"><i class="red small circle icon"></i> LIVESTREAM VIDEO</h3>
<h4 class="ui header">FEATURE AVAILABLE SOON</h4>
<h5 class="ui header">Livestreams are under developent and still not supported on Yotter.</h5>
</div>
</div>
{%else%}
<div class="video-js-responsive-container vjs-hd">
<video-js autofocus class="video-js vjs-default-skin"
data-setup='{ "playbackRates": [0.5, 0.75, 1, 1.25,1.5, 1.75, 2] }'
width="1080"
controls
buffered
preload="none">
{% if config.isInstance %}
{% for source in vsources %}
<source src="{{source.src}}" type="{{source.type}}">
{% endfor %}
{% endif %}
</video-js>
</div>
{%endif%}
<div class="ui segments">
{% if info.error == True %}
<div class="ui center aligned text container">
<div class="ui segment">
<h2 class="ui header break-word">{{info.title}}</h2>
<h4 class="ui header"><i class="calendar icon"></i> SCHEDULED VIDEO</h4>
<h5 class="ui header">This video is scheduled and is not supported by Yotter.</h5>
</div>
<div class="ui horizontal segments">
<div class="center aligned ui segment">
<a href="{{ url_for('channel', id=info.author_id)}}">
<i class="user icon"></i> <b>{{info.author}}</b>
</a>
</div>
<div class="center aligned ui segment">
<div class="ui mini statistic">
<div class="value">
<i class="grey eye icon"></i> <b>{{info.view_count}}</b>
</div>
<div class="label">
views
</div>
{% else %}
{% if info.start_time != None %}
{% elif info.is_live != None %}
<!--<div class="video-js-responsive-container vjs-hd">
<video-js id=live width="1080" class="video-js vjs-default-skin" controls>
<source
src="#"
type="application/x-mpegURL">
</video-js>
</div>-->
<div class="ui center aligned text container">
<div class="ui segment">
<h3 class="ui header"><i class="red small circle icon"></i> LIVESTREAM VIDEO</h3>
<h4 class="ui header">FEATURE AVAILABLE SOON</h4>
<h5 class="ui header">Livestreams are under developent and still not supported on Yotter.</h5>
</div>
</div>
</div>
<div class="center aligned ui segment">
{% if info.rating | int > 49 %}
{%else%}
<div class="video-js-responsive-container vjs-hd">
<video-js autofocus class="video-js vjs-default-skin"
data-setup='{ "playbackRates": [0.5, 0.75, 1, 1.25,1.5, 1.75, 2] }'
width="1080"
controls
buffered
preload="none">
{% if config.isInstance %}
{% for source in info.formats %}
<source src="{{source.url}}" type="video/{{source.ext}}">
{% endfor %}
{% endif %}
</video-js>
</div>
{%endif%}
<div class="ui segments">
<div class="ui segment">
<h2 class="ui header break-word">{{info.title}}</h2>
</div>
<div class="ui horizontal segments">
<div class="center aligned ui segment">
<a href="{{ url_for('channel', id=info.uploader_id)}}">
<i class="user icon"></i> <b>{{info.uploader}}</b>
</a>
<div class="label">
<i class="user icon"></i>{{info.subscriber_count}}
</div>
</div>
<div class="center aligned ui segment">
<div class="ui mini statistic">
<div class="value">
<i class="green thumbs up icon"></i> <b>{{info.rating}}%</b>
<i class="grey eye icon"></i> <b>{{info.view_count}}</b>
</div>
<div class="label">
Total: {{info.like_count+info.dislike_count}} votes
views
</div>
</div>
{% else %}
<div class="ui mini statistic">
<div class="value">
<i class="red thumbs down icon"></i> <b>{{info.rating}}%</b>
</div>
<div class="label">
Total: {{info.like_count+info.dislike_count}} votes
</div>
</div>
{% endif %}
</div>
</div>
</div>
<div class="center aligned ui segment">
{% if info.average_rating | int > 2.5 %}
<div class="ui mini statistic">
<div class="value">
<i class="green thumbs up icon"></i> <b>{{info.average_rating}}/5</b>
</div>
<div class="label">
Total: {{info.total_likes}} votes
</div>
</div>
{% else %}
<div class="ui mini statistic">
<div class="value">
<i class="red thumbs down icon"></i> <b>{{info.average_rating}}/5</b>
</div>
<div class="label">
Total: {{info.total_likes}} votes
</div>
</div>
{% endif %}
</div>
</div>
<div class="ui raised center aligned segment break-word">
<p><i class="grey music icon"></i><b>Audio Only</b></p>
<audio controls>
{% for format in info.formats %}
{% if format.audio_valid %}
<div class="ui raised center aligned segment break-word">
<p><i class="grey music icon"></i><b>Audio Only</b></p>
<audio controls>
{% for format in info.audio_formats %}
<source src="{{format.url}}">
{%endif%}
{%endfor%}
No audio available.
</audio>
{%endfor%}
No audio available.
</audio>
</div>
<div class="ui raised segment break-word">
<p>{{info.description}}</p>
</div>
</div>
<div class="ui raised segment break-word">
<p>{{info.description}}</p>
{%if videocomments%}
<div class="ui comments">
<h3 class="ui dividing header">Comments</h3>
{% for comment in videocomments %}
{% include '_video_comment.html' %}
{% endfor %}
</div>
</div>
<div class="ui comments">
<h3 class="ui dividing header">Comments</h3>
{% for comment in videocomments %}
{% include '_video_comment.html' %}
{% endfor %}
</div>
<script src="{{ url_for('static',filename='video.min.js') }}"></script>
{% if info.live %}
<script src="{{ url_for('static',filename='videojs-http-streaming.min.js')}}"></script>
<script>
var player = videojs('live');
player.play();
</script>
{% endif %}
{%endif%}
<script src="{{ url_for('static',filename='video.min.js') }}"></script>
{% if info.live %}
<script src="{{ url_for('static',filename='videojs-http-streaming.min.js')}}"></script>
<script>
var player = videojs('live');
player.play();
</script>
{% endif %}
{%endif%}
{% endblock %}

77
youtube/video.py Normal file
View File

@ -0,0 +1,77 @@
from youtube_dlc import YoutubeDL
import json
options = {
'ignoreerrors': True,
'quiet': True,
'skip_download': True
}
ydl = YoutubeDL(options)
ydl.add_default_info_extractors()
config = json.load(open('yotter-config.json'))
def get_info(url):
video = {}
video['error'] = False
try:
info = ydl.extract_info(url, download=False)
except:
video['error'] = True
if info == None:
video['error'] = True
if not video['error'] and info is not None:
video['uploader'] = info['uploader']
video['uploader_id'] = info['uploader_id']
video['channel_id'] = info['channel_id']
video['upload_date'] = info['upload_date']
video['title'] = info['title']
video['thumbnails'] = info['thumbnails']
video['description'] = info['description']
video['categories'] = info['categories']
video['subtitles'] = info['subtitles']
video['duration'] = info['duration']
video['view_count'] = info['view_count']
if(info['like_count'] == None):
video['like_count'] = 0
else:
video['like_count'] = info['like_count']
if(info['dislike_count'] == None):
video['dislike_count'] = 0
else:
video['dislike_count'] = info['dislike_count']
video['total_likes'] = int(info['dislike_count']) + int(info['like_count'])
video['average_rating'] = str(info['average_rating'])[0:4]
video['formats'] = get_video_formats(info['formats'])
video['audio_formats'] = get_video_formats(info['formats'], audio=True)
video['is_live'] = info['is_live']
video['start_time'] = info['start_time']
video['end_time'] = info['end_time']
video['series'] = info['series']
video['subscriber_count'] = info['subscriber_count']
return video
def get_video_formats(formats, audio=False):
best_formats = []
audio_formats = []
for format in formats:
if format['vcodec'] != 'none' and format['acodec'] != 'none':
# Video and Audio
if format['format_note'] == '144p':
continue
else:
best_formats.append(format)
elif format['vcodec'] == 'none' and format['acodec'] != 'none':
# Audio only
audio_formats.append(format)
else:
# Video only
continue
if audio:
return audio_formats
else:
return best_formats