From 78d6044dd2ec9d944fc152781bb60cbef3c7c570 Mon Sep 17 00:00:00 2001 From: pluja Date: Fri, 30 Oct 2020 16:16:09 +0100 Subject: [PATCH] Use Youtube-dlc for video data extraction --- app/routes.py | 70 ++++++------- app/templates/video.html | 208 +++++++++++++++++++-------------------- youtube/video.py | 77 +++++++++++++++ 3 files changed, 211 insertions(+), 144 deletions(-) create mode 100644 youtube/video.py diff --git a/app/routes.py b/app/routes.py index 005bd2f..71969c8 100644 --- a/app/routes.py +++ b/app/routes.py @@ -29,7 +29,7 @@ from app.forms import LoginForm, RegistrationForm, EmptyForm, SearchForm, Channe from app.models import User, twitterPost, ytPost, Post, youtubeFollow, twitterFollow from youtube import comments, utils, channel as ytch, search as yts from youtube import watch as ytwatch - +from youtube import video as ytvid ######################################### ######################################### @@ -467,50 +467,40 @@ def get_live_urls(urls): best_urls.append(url) return best_urls - @app.route('/watch', methods=['GET']) @login_required def watch(): id = request.args.get('v', None) - info = ytwatch.extract_info(id, False, playlist_id=None, index=None) + info = ytvid.get_info(id) + + if info['error'] == False: + for format in info['formats']: + hostName = urllib.parse.urlparse(format['url']).netloc + format['url'] = format['url'].replace("https://{}".format(hostName), "") + "&host=" + hostName + + for format in info['audio_formats']: + hostName = urllib.parse.urlparse(format['url']).netloc + format['url'] = format['url'].replace("https://{}".format(hostName), "") + "&host=" + hostName - vsources = ytwatch.get_video_sources(info, False) - # Retry 3 times if no sources are available. - retry = 3 - while retry != 0 and len(vsources) == 0: - vsources = ytwatch.get_video_sources(info, False) - retry -= 1 + # Markup description + try: + info['description'] = Markup(bleach.linkify(info['description'].replace("\n", "
"))).replace('www.youtube.com', config['serverName']).replace('youtube.com', config['serverName']).replace("/join","") + except AttributeError or TypeError: + print(info['description']) - for source in vsources: - hostName = urllib.parse.urlparse(source['src']).netloc - source['src'] = source['src'].replace("https://{}".format(hostName), "") + "&host=" + hostName - - # Parse video formats - for v_format in info['formats']: - hostName = urllib.parse.urlparse(v_format['url']).netloc - v_format['url'] = v_format['url'].replace("https://{}".format(hostName), "") + "&host=" + hostName - if v_format['audio_bitrate'] is not None and v_format['vcodec'] is None: - v_format['audio_valid'] = True - - # Markup description - try: - info['description'] = Markup(bleach.linkify(info['description'].replace("\n", "
"))) - except AttributeError or TypeError: - print(info['description']) - - # Get comments - videocomments = comments.video_comments(id, sort=0, offset=0, lc='', secret_key='') - videocomments = utils.post_process_comments_info(videocomments) - if videocomments is not None: - videocomments.sort(key=lambda x: x['likes'], reverse=True) - - # Calculate rating % - if info['like_count']+info['dislike_count']>0: - info['rating'] = str((info['like_count'] / (info['like_count'] + info['dislike_count'])) * 100)[0:4] - else: - info['rating'] = 50.0 - return render_template("video.html", info=info, title='{}'.format(info['title']), config=config, - videocomments=videocomments, vsources=vsources) + # Get comments + if not info['is_live']: + videocomments = comments.video_comments(id, sort=0, offset=0, lc='', secret_key='') + videocomments = utils.post_process_comments_info(videocomments) + if videocomments is not None: + videocomments.sort(key=lambda x: x['likes'], reverse=True) + else: + videocomments=False + + return render_template("video.html", info=info, title=info['title'], config=config, + videocomments=videocomments) + + return render_template("video.html", info=info, title='Scheduled Video', config=config) def markupString(string): @@ -881,7 +871,7 @@ def getFeed(urls): newPost["twitterName"] = post.find('a', attrs={'class': 'fullname'}).text newPost["timeStamp"] = date_time_str newPost["date"] = post.find('span', attrs={'class': 'tweet-date'}).find('a').text - newPost["content"] = Markup(post.find('div', attrs={'class': 'tweet-content'})) + newPost["content"] = Markup(post.find('div', attrs={'class': 'tweet-content'})).replace("\n", "
") if post.find('div', attrs={'class': 'retweet-header'}): newPost["username"] = post.find('div', attrs={'class': 'retweet-header'}).find('div', attrs={ diff --git a/app/templates/video.html b/app/templates/video.html index e846577..fd30320 100644 --- a/app/templates/video.html +++ b/app/templates/video.html @@ -4,124 +4,124 @@ {% extends "base.html" %} {% block content %}
- {% if info.error != None or info.playability_error != None %} -
-
-

ERROR WITH VIDEO

-
-
- {% elif info.playability_status != None %} -
-
-

SCHEDULED VIDEO

-
{{video.premieres}}
-
-
- {% elif info.live %} -
- - - -
-
-
-

LIVESTREAM VIDEO

-

FEATURE AVAILABLE SOON

-
Livestreams are under developent and still not supported on Yotter.
-
-
- {%else%} -
- - {% if config.isInstance %} - {% for source in vsources %} - - {% endfor %} - {% endif %} - -
- {%endif%} -
+ {% if info.error == True %} +
-

{{info.title}}

+

SCHEDULED VIDEO

+
This video is scheduled and is not supported by Yotter.
-
- -
-
-
- {{info.view_count}} -
-
- views +
+ {% else %} + {% if info.start_time != None %} + {% elif info.is_live != None %} + +
+
+

LIVESTREAM VIDEO

+

FEATURE AVAILABLE SOON

+
Livestreams are under developent and still not supported on Yotter.
-
-
- {% if info.rating | int > 49 %} + {%else%} +
+ + {% if config.isInstance %} + {% for source in info.formats %} + + {% endfor %} + {% endif %} + +
+ {%endif%} + +
+
+

{{info.title}}

+
+
+
+ + {{info.uploader}} + +
+ {{info.subscriber_count}} +
+
+
- {{info.rating}}% + {{info.view_count}}
- Total: {{info.like_count+info.dislike_count}} votes + views
- {% else %} -
-
- {{info.rating}}% -
-
- Total: {{info.like_count+info.dislike_count}} votes -
-
- {% endif %} -
-
+
+
+ {% if info.average_rating | int > 2.5 %} +
+
+ {{info.average_rating}}/5 +
+
+ Total: {{info.total_likes}} votes +
+
+ {% else %} +
+
+ {{info.average_rating}}/5 +
+
+ Total: {{info.total_likes}} votes +
+
+ {% endif %} +
+
-
-

Audio Only

-
-
-

{{info.description}}

+ {%if videocomments%} +
+

Comments

+ {% for comment in videocomments %} + {% include '_video_comment.html' %} + {% endfor %}
-
- -
-

Comments

- {% for comment in videocomments %} - {% include '_video_comment.html' %} - {% endfor %} -
- - -{% if info.live %} - - -{% endif %} + {%endif%} + + {% if info.live %} + + + {% endif %} + {%endif%} {% endblock %} \ No newline at end of file diff --git a/youtube/video.py b/youtube/video.py new file mode 100644 index 0000000..5108193 --- /dev/null +++ b/youtube/video.py @@ -0,0 +1,77 @@ +from youtube_dlc import YoutubeDL +import json +options = { + 'ignoreerrors': True, + 'quiet': True, + 'skip_download': True +} +ydl = YoutubeDL(options) +ydl.add_default_info_extractors() +config = json.load(open('yotter-config.json')) + +def get_info(url): + video = {} + video['error'] = False + + try: + info = ydl.extract_info(url, download=False) + except: + video['error'] = True + + if info == None: + video['error'] = True + if not video['error'] and info is not None: + video['uploader'] = info['uploader'] + video['uploader_id'] = info['uploader_id'] + video['channel_id'] = info['channel_id'] + video['upload_date'] = info['upload_date'] + video['title'] = info['title'] + video['thumbnails'] = info['thumbnails'] + video['description'] = info['description'] + video['categories'] = info['categories'] + video['subtitles'] = info['subtitles'] + video['duration'] = info['duration'] + video['view_count'] = info['view_count'] + + if(info['like_count'] == None): + video['like_count'] = 0 + else: + video['like_count'] = info['like_count'] + + if(info['dislike_count'] == None): + video['dislike_count'] = 0 + else: + video['dislike_count'] = info['dislike_count'] + + video['total_likes'] = int(info['dislike_count']) + int(info['like_count']) + video['average_rating'] = str(info['average_rating'])[0:4] + video['formats'] = get_video_formats(info['formats']) + video['audio_formats'] = get_video_formats(info['formats'], audio=True) + video['is_live'] = info['is_live'] + video['start_time'] = info['start_time'] + video['end_time'] = info['end_time'] + video['series'] = info['series'] + video['subscriber_count'] = info['subscriber_count'] + return video + +def get_video_formats(formats, audio=False): + best_formats = [] + audio_formats = [] + for format in formats: + if format['vcodec'] != 'none' and format['acodec'] != 'none': + # Video and Audio + if format['format_note'] == '144p': + continue + else: + best_formats.append(format) + elif format['vcodec'] == 'none' and format['acodec'] != 'none': + # Audio only + audio_formats.append(format) + else: + # Video only + continue + + if audio: + return audio_formats + else: + return best_formats \ No newline at end of file