Use Youtube-dlc for video data extraction

This commit is contained in:
pluja 2020-10-30 16:16:09 +01:00
parent 016bcd4775
commit 78d6044dd2
3 changed files with 211 additions and 144 deletions

View File

@ -29,7 +29,7 @@ from app.forms import LoginForm, RegistrationForm, EmptyForm, SearchForm, Channe
from app.models import User, twitterPost, ytPost, Post, youtubeFollow, twitterFollow
from youtube import comments, utils, channel as ytch, search as yts
from youtube import watch as ytwatch
from youtube import video as ytvid
#########################################
#########################################
@ -467,50 +467,40 @@ def get_live_urls(urls):
best_urls.append(url)
return best_urls
@app.route('/watch', methods=['GET'])
@login_required
def watch():
id = request.args.get('v', None)
info = ytwatch.extract_info(id, False, playlist_id=None, index=None)
info = ytvid.get_info(id)
vsources = ytwatch.get_video_sources(info, False)
# Retry 3 times if no sources are available.
retry = 3
while retry != 0 and len(vsources) == 0:
vsources = ytwatch.get_video_sources(info, False)
retry -= 1
if info['error'] == False:
for format in info['formats']:
hostName = urllib.parse.urlparse(format['url']).netloc
format['url'] = format['url'].replace("https://{}".format(hostName), "") + "&host=" + hostName
for source in vsources:
hostName = urllib.parse.urlparse(source['src']).netloc
source['src'] = source['src'].replace("https://{}".format(hostName), "") + "&host=" + hostName
# Parse video formats
for v_format in info['formats']:
hostName = urllib.parse.urlparse(v_format['url']).netloc
v_format['url'] = v_format['url'].replace("https://{}".format(hostName), "") + "&host=" + hostName
if v_format['audio_bitrate'] is not None and v_format['vcodec'] is None:
v_format['audio_valid'] = True
for format in info['audio_formats']:
hostName = urllib.parse.urlparse(format['url']).netloc
format['url'] = format['url'].replace("https://{}".format(hostName), "") + "&host=" + hostName
# Markup description
try:
info['description'] = Markup(bleach.linkify(info['description'].replace("\n", "<br>")))
info['description'] = Markup(bleach.linkify(info['description'].replace("\n", "<br>"))).replace('www.youtube.com', config['serverName']).replace('youtube.com', config['serverName']).replace("/join","")
except AttributeError or TypeError:
print(info['description'])
# Get comments
if not info['is_live']:
videocomments = comments.video_comments(id, sort=0, offset=0, lc='', secret_key='')
videocomments = utils.post_process_comments_info(videocomments)
if videocomments is not None:
videocomments.sort(key=lambda x: x['likes'], reverse=True)
# Calculate rating %
if info['like_count']+info['dislike_count']>0:
info['rating'] = str((info['like_count'] / (info['like_count'] + info['dislike_count'])) * 100)[0:4]
else:
info['rating'] = 50.0
return render_template("video.html", info=info, title='{}'.format(info['title']), config=config,
videocomments=videocomments, vsources=vsources)
videocomments=False
return render_template("video.html", info=info, title=info['title'], config=config,
videocomments=videocomments)
return render_template("video.html", info=info, title='Scheduled Video', config=config)
def markupString(string):
@ -881,7 +871,7 @@ def getFeed(urls):
newPost["twitterName"] = post.find('a', attrs={'class': 'fullname'}).text
newPost["timeStamp"] = date_time_str
newPost["date"] = post.find('span', attrs={'class': 'tweet-date'}).find('a').text
newPost["content"] = Markup(post.find('div', attrs={'class': 'tweet-content'}))
newPost["content"] = Markup(post.find('div', attrs={'class': 'tweet-content'})).replace("\n", "<br>")
if post.find('div', attrs={'class': 'retweet-header'}):
newPost["username"] = post.find('div', attrs={'class': 'retweet-header'}).find('div', attrs={

View File

@ -4,27 +4,24 @@
{% extends "base.html" %}
{% block content %}
<div style="width: 80%;" class="ui container">
{% if info.error != None or info.playability_error != None %}
{% if info.error == True %}
<div class="ui center aligned text container">
<div class="ui segment">
<h4 class="ui header">ERROR WITH VIDEO</h4>
<h4 class="ui header"><i class="calendar icon"></i> SCHEDULED VIDEO</h4>
<h5 class="ui header">This video is scheduled and is not supported by Yotter.</h5>
</div>
</div>
{% elif info.playability_status != None %}
<div class="ui center aligned text container">
<div class="ui segment">
<h4 class="ui header">SCHEDULED VIDEO</h4>
<h5 class="ui header">{{video.premieres}}</h5>
</div>
</div>
{% elif info.live %}
<div class="video-js-responsive-container vjs-hd">
{% else %}
{% if info.start_time != None %}
{% elif info.is_live != None %}
<!--<div class="video-js-responsive-container vjs-hd">
<video-js id=live width="1080" class="video-js vjs-default-skin" controls>
<source
src="#"
type="application/x-mpegURL">
</video-js>
</div>
</div>-->
<div class="ui center aligned text container">
<div class="ui segment">
<h3 class="ui header"><i class="red small circle icon"></i> LIVESTREAM VIDEO</h3>
@ -41,8 +38,8 @@
buffered
preload="none">
{% if config.isInstance %}
{% for source in vsources %}
<source src="{{source.src}}" type="{{source.type}}">
{% for source in info.formats %}
<source src="{{source.url}}" type="video/{{source.ext}}">
{% endfor %}
{% endif %}
</video-js>
@ -55,9 +52,12 @@
</div>
<div class="ui horizontal segments">
<div class="center aligned ui segment">
<a href="{{ url_for('channel', id=info.author_id)}}">
<i class="user icon"></i> <b>{{info.author}}</b>
<a href="{{ url_for('channel', id=info.uploader_id)}}">
<i class="user icon"></i> <b>{{info.uploader}}</b>
</a>
<div class="label">
<i class="user icon"></i>{{info.subscriber_count}}
</div>
</div>
<div class="center aligned ui segment">
<div class="ui mini statistic">
@ -70,22 +70,22 @@
</div>
</div>
<div class="center aligned ui segment">
{% if info.rating | int > 49 %}
{% if info.average_rating | int > 2.5 %}
<div class="ui mini statistic">
<div class="value">
<i class="green thumbs up icon"></i> <b>{{info.rating}}%</b>
<i class="green thumbs up icon"></i> <b>{{info.average_rating}}/5</b>
</div>
<div class="label">
Total: {{info.like_count+info.dislike_count}} votes
Total: {{info.total_likes}} votes
</div>
</div>
{% else %}
<div class="ui mini statistic">
<div class="value">
<i class="red thumbs down icon"></i> <b>{{info.rating}}%</b>
<i class="red thumbs down icon"></i> <b>{{info.average_rating}}/5</b>
</div>
<div class="label">
Total: {{info.like_count+info.dislike_count}} votes
Total: {{info.total_likes}} votes
</div>
</div>
{% endif %}
@ -95,10 +95,8 @@
<div class="ui raised center aligned segment break-word">
<p><i class="grey music icon"></i><b>Audio Only</b></p>
<audio controls>
{% for format in info.formats %}
{% if format.audio_valid %}
{% for format in info.audio_formats %}
<source src="{{format.url}}">
{%endif%}
{%endfor%}
No audio available.
</audio>
@ -109,13 +107,14 @@
</div>
</div>
{%if videocomments%}
<div class="ui comments">
<h3 class="ui dividing header">Comments</h3>
{% for comment in videocomments %}
{% include '_video_comment.html' %}
{% endfor %}
</div>
{%endif%}
<script src="{{ url_for('static',filename='video.min.js') }}"></script>
{% if info.live %}
<script src="{{ url_for('static',filename='videojs-http-streaming.min.js')}}"></script>
@ -124,4 +123,5 @@
player.play();
</script>
{% endif %}
{%endif%}
{% endblock %}

77
youtube/video.py Normal file
View File

@ -0,0 +1,77 @@
from youtube_dlc import YoutubeDL
import json
options = {
'ignoreerrors': True,
'quiet': True,
'skip_download': True
}
ydl = YoutubeDL(options)
ydl.add_default_info_extractors()
config = json.load(open('yotter-config.json'))
def get_info(url):
video = {}
video['error'] = False
try:
info = ydl.extract_info(url, download=False)
except:
video['error'] = True
if info == None:
video['error'] = True
if not video['error'] and info is not None:
video['uploader'] = info['uploader']
video['uploader_id'] = info['uploader_id']
video['channel_id'] = info['channel_id']
video['upload_date'] = info['upload_date']
video['title'] = info['title']
video['thumbnails'] = info['thumbnails']
video['description'] = info['description']
video['categories'] = info['categories']
video['subtitles'] = info['subtitles']
video['duration'] = info['duration']
video['view_count'] = info['view_count']
if(info['like_count'] == None):
video['like_count'] = 0
else:
video['like_count'] = info['like_count']
if(info['dislike_count'] == None):
video['dislike_count'] = 0
else:
video['dislike_count'] = info['dislike_count']
video['total_likes'] = int(info['dislike_count']) + int(info['like_count'])
video['average_rating'] = str(info['average_rating'])[0:4]
video['formats'] = get_video_formats(info['formats'])
video['audio_formats'] = get_video_formats(info['formats'], audio=True)
video['is_live'] = info['is_live']
video['start_time'] = info['start_time']
video['end_time'] = info['end_time']
video['series'] = info['series']
video['subscriber_count'] = info['subscriber_count']
return video
def get_video_formats(formats, audio=False):
best_formats = []
audio_formats = []
for format in formats:
if format['vcodec'] != 'none' and format['acodec'] != 'none':
# Video and Audio
if format['format_note'] == '144p':
continue
else:
best_formats.append(format)
elif format['vcodec'] == 'none' and format['acodec'] != 'none':
# Audio only
audio_formats.append(format)
else:
# Video only
continue
if audio:
return audio_formats
else:
return best_formats