Merge branch 'dev-indep' of https://github.com/ytorg/yotter into dev-indep

2020-10-10 17:33:51 +02:00 · 2020-10-10 17:33:51 +02:00 · 7a71b6914a
commit 7a71b6914a
parent 6e8e3b2131 eb9bd592f1
21 changed files with 3296 additions and 722 deletions
--- a/app/routes.py
+++ b/app/routes.py
--- a/app/templates/_video_comment.html
+++ b/app/templates/_video_comment.html
@ -1,7 +1,7 @@
 <div class="comment">
    <a class="avatar" style="width: 32px; height: 32px;"><img src="{{ comment.thumbnail }}"></a>
    <div class="content">
-        {% if comment.authorIsChannelOwner %}
+        {% if comment.author == info.author %}
        <a class="author" style="color: red;" href="{{comment.channel}}"><i class="red user circle icon"></i>{{comment.author}}</a>
        {% else %}
@ -22,9 +22,6 @@
                <i class="thumbs up icon"></i>
                {{comment.likes}}
            </div>
            {%if comment.creatorHeart != false%}
                <i class="small red heart icon"></i><img class="ui circular image" style="width: 15px; height: 15px;" src="{{comment.creatorHeart}}">
            {% endif %}
        </div>
    </div>
 </div>
--- a/app/templates/video.html
+++ b/app/templates/video.html
@ -4,30 +4,30 @@
 {% extends "base.html" %}
 {% block content %}
 <div class="ui text container">
-    {% if video.nginxUrl == "#" %}
+    {% if info.error != None or info.playability_error != None %}
    <div class="ui center aligned text container">
        <div class="ui segment">
                <h4 class="ui header">ERROR WITH VIDEO</h4>
        </div>
    </div>
-    {% elif video.isUpcoming %}
+    {% elif info.playability_status != None %}
    <div class="ui center aligned text container">
        <div class="ui segment">
                <h4 class="ui header">SCHEDULED VIDEO</h4>
                <h5 class="ui header">{{video.premieres}}</h5>
        </div>
    </div>
-    {% elif video.isLive %}
+    {% elif info.live %}
    <div class="video-js-responsive-container vjs-hd">
-        <video-js id=live width="1080" class="video-js vjs-default-skin" controls buffered>
+        <video-js id=live width="1080" class="video-js vjs-default-skin" controls>
            <source
-            src="{{urls[0]['url']}}"
+            src="#"
            type="application/x-mpegURL">
        </video-js>
    </div>
        <div class="ui center aligned text container">
            <div class="ui segment">
-                    <h3 class="ui header">LIVESTREAM VIDEO</h3>
+                    <h3 class="ui header"><i class="red small circle icon"></i> LIVESTREAM VIDEO</h3>
                    <h4 class="ui header">FEATURE AVAILABLE SOON</h4>
                    <h5 class="ui header">Livestreams are under developent and still not supported on Yotter.</h5>
            </div>
@ -41,11 +41,11 @@
            buffered
            preload="none">
            {% if config.nginxVideoStream %}
-                {% for url in urls %}
+                {% for format in info.formats %}
-                    <source src="{{url.url}}" type="video/{{url.ext}}">
+                    {% if format.video_valid %}
                        <source src="{{format.url}}" type="video/{{format.ext}}">
                    {% endif %}
                {% endfor %}
            {% else %}
                <source src="{{url_for('stream', url=video.videoUrl.replace('/', 'YotterSlash'))}}" type="video/mp4">
            {% endif %}
            </video>
        </div>
@ -53,55 +53,54 @@
    <div class="ui segments">
        <div class="ui segment">
-            <h2 class="ui header break-word">{{video.title}}</h2>
+            <h2 class="ui header break-word">{{info.title}}</h2>
        </div>
        <div class="ui horizontal segments">
          <div class="center aligned ui segment">
-            <a href="{{ url_for('channel', id=video.channelId)}}">
+            <a href="{{ url_for('channel', id=info.author_id)}}">
-                {%if video.author.__len__() > 8%}
+                <i class="user icon"></i> {{info.author}}
                    <i class="user icon"></i> {{video.author[0:10]+'...'}}
                {%else%}
                    <i class="user icon"></i> {{video.author}}
                {%endif%}
            </a>
          </div>
          <div class="center aligned ui segment">
-            <h4 class="ui header"><i class="grey eye icon"></i>{{video.viewCount}}</h4>
+            <h4 class="ui header"><i class="grey eye icon"></i>{{info.view_count}}</h4>
          </div>
          <div class="center aligned ui segment">
-            {% if video.averageRating | int > 49 %}
+            {% if info.rating | int > 49 %}
-                <h4 class="ui header"><i class="green thumbs up icon"></i> {{video.averageRating[0:4]}}%</h4>
+                <h4 class="ui header"><i class="green thumbs up icon"></i> {{info.rating}}%</h4>
            {% else %}
-                <h4 class="ui header"><i class="red thumbs down icon"></i> {{video.averageRating[0:4]}}%</h4>
+                <h4 class="ui header"><i class="red thumbs down icon"></i> {{info.rating}}%</h4>
            {% endif %}
          </div>
        </div>
        <div class="ui raised center aligned segment break-word">
-            <p><i class="grey music icon"></i><b><a href="{{video.nginxAudioUrl}}">Play Only Audio</a></b></p>
+            <p><i class="grey music icon"></i><b>Audio Only</b></p>
            <audio controls>
-                <source src="{{video.nginxAudioUrl}}">
+                {% for format in info.formats %}
-              Your browser does not support the audio element.
+                    {% if format.audio_valid %}
                        <source src="{{format.url}}">
                    {%endif%}
                {%endfor%}
              No audio available.
            </audio>
        </div>
        <div class="ui raised segment break-word">
-            <p>{{video.description}}</p>
+            <p>{{info.description}}</p>
        </div>
    </div>
    {% if comments != False %}
    <div class="ui comments">
        <h3 class="ui dividing header">Comments</h3>
-        {% for comment in video.comments %}
+        {% for comment in videocomments %}
                {% include '_video_comment.html' %}
        {% endfor %}
    </div>
    {%endif%}
 <script src="{{ url_for('static',filename='video.min.js') }}"></script>
-<script src="{{ url_for('static',filename='videojs-http-streaming.min.js')}}"></script>
+{% if info.live %}
-{% if video.isLive %}
+    <p>Active</p>
    <script src="{{ url_for('static',filename='videojs-http-streaming.min.js')}}"></script>
    <script>
        var player = videojs('live');
        player.play();
--- a/youtube/channel.py
+++ b/youtube/channel.py
@ -0,0 +1,281 @@
 import base64
 from youtube import util, yt_data_extract, local_playlist, subscriptions
 from youtube import yt_app
 import urllib
 import json
 from string import Template
 import youtube.proto as proto
 import html
 import math
 import gevent
 import re
 import cachetools.func
 import traceback
 import flask
 from flask import request
 headers_desktop = (
    ('Accept', '*/*'),
    ('Accept-Language', 'en-US,en;q=0.5'),
    ('X-YouTube-Client-Name', '1'),
    ('X-YouTube-Client-Version', '2.20180830'),
 ) + util.desktop_ua
 headers_mobile = (
    ('Accept', '*/*'),
    ('Accept-Language', 'en-US,en;q=0.5'),
    ('X-YouTube-Client-Name', '2'),
    ('X-YouTube-Client-Version', '2.20180830'),
 ) + util.mobile_ua
 real_cookie = (('Cookie', 'VISITOR_INFO1_LIVE=8XihrAcN1l4'),)
 generic_cookie = (('Cookie', 'VISITOR_INFO1_LIVE=ST1Ti53r4fU'),)
 # SORT:
 # videos:
 #    Popular - 1
 #    Oldest - 2
 #    Newest - 3
 # playlists:
 #    Oldest - 2
 #    Newest - 3
 #    Last video added - 4
 # view:
 # grid: 0 or 1
 # list: 2
 def channel_ctoken_v3(channel_id, page, sort, tab, view=1):
    # page > 1 doesn't work when sorting by oldest
    offset = 30*(int(page) - 1)
    page_token = proto.string(61, proto.unpadded_b64encode(
        proto.string(1, proto.unpadded_b64encode(proto.uint(1,offset)))
    ))
    tab = proto.string(2, tab )
    sort = proto.uint(3, int(sort))
    shelf_view = proto.uint(4, 0)
    view = proto.uint(6, int(view))
    continuation_info = proto.string(3,
        proto.percent_b64encode(tab + sort + shelf_view + view + page_token)
    )
    channel_id = proto.string(2, channel_id )
    pointless_nest = proto.string(80226972, channel_id + continuation_info)
    return base64.urlsafe_b64encode(pointless_nest).decode('ascii')
 def channel_ctoken_v2(channel_id, page, sort, tab, view=1):
    # see https://github.com/iv-org/invidious/issues/1319#issuecomment-671732646
    # page > 1 doesn't work when sorting by oldest
    offset = 30*(int(page) - 1)
    schema_number = {
        3: 6307666885028338688,
        2: 17254859483345278706,
        1: 16570086088270825023,
    }[int(sort)]
    page_token = proto.string(61, proto.unpadded_b64encode(proto.string(1,
            proto.uint(1, schema_number) + proto.string(2,
                proto.string(1, proto.unpadded_b64encode(proto.uint(1,offset)))
            )
    )))
    tab = proto.string(2, tab )
    sort = proto.uint(3, int(sort))
    #page = proto.string(15, str(page) )
    shelf_view = proto.uint(4, 0)
    view = proto.uint(6, int(view))
    continuation_info = proto.string(3,
        proto.percent_b64encode(tab + sort + shelf_view + view + page_token)
    )
    channel_id = proto.string(2, channel_id )
    pointless_nest = proto.string(80226972, channel_id + continuation_info)
    return base64.urlsafe_b64encode(pointless_nest).decode('ascii')
 def channel_ctoken_v1(channel_id, page, sort, tab, view=1):
    tab = proto.string(2, tab )
    sort = proto.uint(3, int(sort))
    page = proto.string(15, str(page) )
    # example with shelves in videos tab: https://www.youtube.com/channel/UCNL1ZadSjHpjm4q9j2sVtOA/videos
    shelf_view = proto.uint(4, 0)
    view = proto.uint(6, int(view))
    continuation_info = proto.string(3, proto.percent_b64encode(tab + view + sort + shelf_view + page + proto.uint(23, 0)) )
    channel_id = proto.string(2, channel_id )
    pointless_nest = proto.string(80226972, channel_id + continuation_info)
    return base64.urlsafe_b64encode(pointless_nest).decode('ascii')
 def get_channel_tab(channel_id, page="1", sort=3, tab='videos', view=1, print_status=True):
    message = 'Got channel tab' if print_status else None
    if int(sort) == 2 and int(page) > 1:
        ctoken = channel_ctoken_v1(channel_id, page, sort, tab, view)
        ctoken = ctoken.replace('=', '%3D')
        url = ('https://www.youtube.com/channel/' + channel_id + '/' + tab
            + '?action_continuation=1&continuation=' + ctoken
            + '&pbj=1')
        content = util.fetch_url(url, headers_desktop + real_cookie,
            debug_name='channel_tab', report_text=message)
    else:
        ctoken = channel_ctoken_v3(channel_id, page, sort, tab, view)
        ctoken = ctoken.replace('=', '%3D')
        url = 'https://www.youtube.com/browse_ajax?ctoken=' + ctoken
        content = util.fetch_url(url,
            headers_desktop + generic_cookie,
            debug_name='channel_tab', report_text=message)
    return content
 # cache entries expire after 30 minutes
@cachetools.func.ttl_cache(maxsize=128, ttl=30*60)
 def get_number_of_videos_channel(channel_id):
    if channel_id is None:
        return 1000
    # Uploads playlist
    playlist_id = 'UU' + channel_id[2:]
    url = 'https://m.youtube.com/playlist?list=' + playlist_id + '&pbj=1'
    try:
        response = util.fetch_url(url, headers_mobile,
            debug_name='number_of_videos', report_text='Got number of videos')
    except urllib.error.HTTPError as e:
        traceback.print_exc()
        print("Couldn't retrieve number of videos")
        return 1000
    response = response.decode('utf-8')
    # match = re.search(r'"numVideosText":\s*{\s*"runs":\s*\[{"text":\s*"([\d,]*) videos"', response)
    match = re.search(r'"numVideosText".*?([,\d]+)', response)
    if match:
        return int(match.group(1).replace(',',''))
    else:
        return 0
 channel_id_re = re.compile(r'videos\.xml\?channel_id=([a-zA-Z0-9_-]{24})"')
@cachetools.func.lru_cache(maxsize=128)
 def get_channel_id(base_url):
    # method that gives the smallest possible response at ~4 kb
    # needs to be as fast as possible
    base_url = base_url.replace('https://www', 'https://m') # avoid redirect
    response = util.fetch_url(base_url + '/about?pbj=1', headers_mobile,
        debug_name='get_channel_id', report_text='Got channel id').decode('utf-8')
    match = channel_id_re.search(response)
    if match:
        return match.group(1)
    return None
 def get_number_of_videos_general(base_url):
    return get_number_of_videos_channel(get_channel_id(base_url))
 def get_channel_search_json(channel_id, query, page):
    params = proto.string(2, 'search') + proto.string(15, str(page))
    params = proto.percent_b64encode(params)
    ctoken = proto.string(2, channel_id) + proto.string(3, params) + proto.string(11, query)
    ctoken = base64.urlsafe_b64encode(proto.nested(80226972, ctoken)).decode('ascii')
    polymer_json = util.fetch_url("https://www.youtube.com/browse_ajax?ctoken=" + ctoken, headers_desktop, debug_name='channel_search')
    return polymer_json
 def post_process_channel_info(info):
    info['avatar'] = util.prefix_url(info['avatar'])
    info['channel_url'] = util.prefix_url(info['channel_url'])
    for item in info['items']:
        util.prefix_urls(item)
        util.add_extra_html_info(item)
 playlist_sort_codes = {'2': "da", '3': "dd", '4': "lad"}
 # youtube.com/[channel_id]/[tab]
 # youtube.com/user/[username]/[tab]
 # youtube.com/c/[custom]/[tab]
 # youtube.com/[custom]/[tab]
 def get_channel_page_general_url(base_url, tab, request, channel_id=None):
    page_number = int(request.args.get('page', 1))
    sort = request.args.get('sort', '3')
    view = request.args.get('view', '1')
    query = request.args.get('query', '')
    if tab == 'videos' and channel_id:
        tasks = (
            gevent.spawn(get_number_of_videos_channel, channel_id),
            gevent.spawn(get_channel_tab, channel_id, page_number, sort, 'videos', view)
        )
        gevent.joinall(tasks)
        util.check_gevent_exceptions(*tasks)
        number_of_videos, polymer_json = tasks[0].value, tasks[1].value
    elif tab == 'videos':
        tasks = (
            gevent.spawn(get_number_of_videos_general, base_url),
            gevent.spawn(util.fetch_url, base_url + '/videos?pbj=1&view=0', headers_desktop, debug_name='gen_channel_videos')
        )
        gevent.joinall(tasks)
        util.check_gevent_exceptions(*tasks)
        number_of_videos, polymer_json = tasks[0].value, tasks[1].value
    elif tab == 'about':
        polymer_json = util.fetch_url(base_url + '/about?pbj=1', headers_desktop, debug_name='gen_channel_about')
    elif tab == 'playlists':
        polymer_json = util.fetch_url(base_url+ '/playlists?pbj=1&view=1&sort=' + playlist_sort_codes[sort], headers_desktop, debug_name='gen_channel_playlists')
    elif tab == 'search' and channel_id:
        polymer_json = get_channel_search_json(channel_id, query, page_number)
    elif tab == 'search':
        url = base_url + '/search?pbj=1&query=' + urllib.parse.quote(query, safe='')
        polymer_json = util.fetch_url(url, headers_desktop, debug_name='gen_channel_search')
    else:
        flask.abort(404, 'Unknown channel tab: ' + tab)
    info = yt_data_extract.extract_channel_info(json.loads(polymer_json), tab)
    if info['error'] is not None:
        return flask.render_template('error.html', error_message = info['error'])
    post_process_channel_info(info)
    if tab == 'videos':
        info['number_of_videos'] = number_of_videos
        info['number_of_pages'] = math.ceil(number_of_videos/30)
        info['header_playlist_names'] = local_playlist.get_playlist_names()
    if tab in ('videos', 'playlists'):
        info['current_sort'] = sort
    elif tab == 'search':
        info['search_box_value'] = query
        info['header_playlist_names'] = local_playlist.get_playlist_names()
        info['page_number'] = page_number
    info['subscribed'] = subscriptions.is_subscribed(info['channel_id'])
    return flask.render_template('channel.html',
        parameters_dictionary = request.args,
        **info
    )
@yt_app.route('/channel/<channel_id>/')
@yt_app.route('/channel/<channel_id>/<tab>')
 def get_channel_page(channel_id, tab='videos'):
    return get_channel_page_general_url('https://www.youtube.com/channel/' + channel_id, tab, request, channel_id)
@yt_app.route('/user/<username>/')
@yt_app.route('/user/<username>/<tab>')
 def get_user_page(username, tab='videos'):
    return get_channel_page_general_url('https://www.youtube.com/user/' + username, tab, request)
@yt_app.route('/c/<custom>/')
@yt_app.route('/c/<custom>/<tab>')
 def get_custom_c_page(custom, tab='videos'):
    return get_channel_page_general_url('https://www.youtube.com/c/' + custom, tab, request)
@yt_app.route('/<custom>')
@yt_app.route('/<custom>/<tab>')
 def get_toplevel_custom_page(custom, tab='videos'):
    return get_channel_page_general_url('https://www.youtube.com/' + custom, tab, request)
--- a/youtube/comments.py
+++ b/youtube/comments.py
@ -0,0 +1,145 @@
 import base64
 import json
 from youtube import proto, util, yt_data_extract
 from youtube.util import concat_or_none
 # Here's what I know about the secret key (starting with ASJN_i)
 # *The secret key definitely contains the following information (or perhaps the information is stored at youtube's servers):
 #   -Video id
 #   -Offset
 #   -Sort
 # *If the video id or sort in the ctoken contradicts the ASJN, the response is an error. The offset encoded outside the ASJN is ignored entirely.
 # *The ASJN is base64 encoded data, indicated by the fact that the character after "ASJN_i" is one of ("0", "1", "2", "3")
 # *The encoded data is not valid protobuf
 # *The encoded data (after the 5 or so bytes that are always the same) is indistinguishable from random data according to a battery of randomness tests
 # *The ASJN in the ctoken provided by a response changes in regular intervals of about a second or two.
 # *Old ASJN's continue to work, and start at the same comment even if new comments have been posted since
 # *The ASJN has no relation with any of the data in the response it came from
 def make_comment_ctoken(video_id, sort=0, offset=0, lc='', secret_key=''):
    video_id = proto.as_bytes(video_id)
    secret_key = proto.as_bytes(secret_key)
    page_info = proto.string(4,video_id) + proto.uint(6, sort)
    offset_information = proto.nested(4, page_info) + proto.uint(5, offset)
    if secret_key:
        offset_information = proto.string(1, secret_key) + offset_information
    page_params = proto.string(2, video_id)
    if lc:
        page_params += proto.string(6, proto.percent_b64encode(proto.string(15, lc)))
    result = proto.nested(2, page_params) + proto.uint(3,6) + proto.nested(6, offset_information)
    return base64.urlsafe_b64encode(result).decode('ascii')
 def comment_replies_ctoken(video_id, comment_id, max_results=500):  
    params = proto.string(2, comment_id) + proto.uint(9, max_results)
    params = proto.nested(3, params)
    result = proto.nested(2, proto.string(2, video_id)) + proto.uint(3,6) + proto.nested(6, params)
    return base64.urlsafe_b64encode(result).decode('ascii')
 mobile_headers = {
    'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E304 Safari/602.1',
    'Accept': '*/*',
    'Accept-Language': 'en-US,en;q=0.5',
    'X-YouTube-Client-Name': '2',
    'X-YouTube-Client-Version': '2.20180823',
 }
 def request_comments(ctoken, replies=False):
    if replies: # let's make it use different urls for no reason despite all the data being encoded
        base_url = "https://m.youtube.com/watch_comment?action_get_comment_replies=1&ctoken="
    else:
        base_url = "https://m.youtube.com/watch_comment?action_get_comments=1&ctoken="
    url = base_url + ctoken.replace("=", "%3D") + "&pbj=1"
    for i in range(0,8):    # don't retry more than 8 times
        content = util.fetch_url(url, headers=mobile_headers, report_text="Retrieved comments", debug_name='request_comments')
        if content[0:4] == b")]}'":             # random closing characters included at beginning of response for some reason
            content = content[4:]
        elif content[0:10] == b'\n<!DOCTYPE':   # occasionally returns html instead of json for no reason
            content = b''
            print("got <!DOCTYPE>, retrying")
            continue
        break
    polymer_json = json.loads(util.uppercase_escape(content.decode('utf-8')))
    return polymer_json
 def single_comment_ctoken(video_id, comment_id):
    page_params = proto.string(2, video_id) + proto.string(6, proto.percent_b64encode(proto.string(15, comment_id)))
    result = proto.nested(2, page_params) + proto.uint(3,6)
    return base64.urlsafe_b64encode(result).decode('ascii')
 def post_process_comments_info(comments_info):
    for comment in comments_info['comments']:
        comment['author_url'] = concat_or_none(
            util.URL_ORIGIN, comment['author_url'])
        comment['author_avatar'] = concat_or_none(
            '/', comment['author_avatar'])
        comment['permalink'] = concat_or_none(util.URL_ORIGIN, '/watch?v=',
            comments_info['video_id'], '&lc=', comment['id'])
        reply_count = comment['reply_count']
        if reply_count == 0:
            comment['replies_url'] = concat_or_none(util.URL_ORIGIN,
                '/post_comment?parent_id=', comment['id'],
                '&video_id=', comments_info['video_id'])
        else:
            comment['replies_url'] = concat_or_none(util.URL_ORIGIN,
                '/comments?parent_id=', comment['id'],
                '&video_id=', comments_info['video_id'])
        if reply_count == 0:
            comment['view_replies_text'] = 'Reply'
        elif reply_count == 1:
            comment['view_replies_text'] = '1 reply'
        else:
            comment['view_replies_text'] = str(reply_count) + ' replies'
        if comment['like_count'] == 1:
            comment['likes_text'] = '1 like'
        else:
            comment['likes_text'] = str(comment['like_count']) + ' likes'
    if comments_info['ctoken']:
        comments_info['more_comments_url'] = concat_or_none(util.URL_ORIGIN,
            '/comments?ctoken=', comments_info['ctoken'])
    comments_info['page_number'] = page_number = str(int(comments_info['offset']/20) + 1)
    if not comments_info['is_replies']:
        comments_info['sort_text'] = 'top' if comments_info['sort'] == 0 else 'newest'
    comments_info['video_url'] = concat_or_none(util.URL_ORIGIN,
        '/watch?v=', comments_info['video_id'])
    comments_info['video_thumbnail'] = concat_or_none('/i.ytimg.com/vi/',
        comments_info['video_id'], '/mqdefault.jpg')
 def video_comments(video_id, sort=0, offset=0, lc='', secret_key=''):
    comments_info = yt_data_extract.extract_comments_info(request_comments(make_comment_ctoken(video_id, sort, offset, lc, secret_key)))
    post_process_comments_info(comments_info)
    post_comment_url = util.URL_ORIGIN + "/post_comment?video_id=" + video_id
    other_sort_url = util.URL_ORIGIN + '/comments?ctoken=' + make_comment_ctoken(video_id, sort=1 - sort, lc=lc)
    other_sort_text = 'Sort by ' + ('newest' if sort == 0 else 'top')
    comments_info['comment_links'] = [('Post comment', post_comment_url), (other_sort_text, other_sort_url)]
    return comments_info
    return {}
--- a/youtube/opensearch.xml
+++ b/youtube/opensearch.xml
@ -0,0 +1,11 @@
 <SearchPlugin xmlns="http://www.mozilla.org/2006/browser/search/">
 <ShortName>Youtube local</ShortName>
 <Description>no CIA shit in the background</Description>
 <InputEncoding>UTF-8</InputEncoding>
 <Image width="16" height="16">data:image/x-icon;base64,AAABAAEAEBAAAAEACAAlAgAAFgAAAIlQTkcNChoKAAAADUlIRFIAAAAQAAAAEAgGAAAAH/P/YQAAAexJREFUOI2lkzFPmlEUhp/73fshtCUCRtvQkJoKMrDQJvoHnBzUhc3EH0DUQf+As6tujo4M6mTiIDp0kGiMTRojTRNSW6o12iD4YYXv3g7Qr4O0ScM7npz7vOe+J0fk83lDF7K6eQygwkdHhI+P0bYNxmBXq5RmZui5vGQgn0f7fKi7O4oLC1gPD48BP9JpnpRKJFZXcQMB3m1u4vr9NHp76d/bo39/n4/z84ROThBa4/r91OJxMKb9BSn5mskAIOt1eq6uEFpjVyrEcjk+T0+TXlzkbTZLuFDAur9/nIFRipuREQCe7+zgBgK8mZvj/fIylVTKa/6UzXKbSnnuHkA0GnwbH/cA0a0takND3IyOEiwWAXBiMYTWjzLwtvB9bAyAwMUF8ZUVPiwtYTWbHqA6PIxoNv8OMLbN3eBga9TZWYQxaKX+AJJJhOv+AyAlT0slAG6TSX5n8+zszJugkzxA4PzcK9YSCQCk42DXaq1aGwqgfT5ebG9jpMQyUjKwu8vrtbWWqxC83NjAd31NsO2uleJnX58HCJ6eEjk8BGNQAA+RCOXJScpTU2AMwnUxlkXk4ACA+2iUSKGArNeRjkMsl6M8MYHQGtHpmIxSvFpfRzoORinQGqvZBCEwQoAxfMlkaIRCnQH/o66v8Re19MavaDNLfgAAAABJRU5ErkJggg==</Image>
 <Url type="text/html" method="GET" template="http://localhost:$port_number/youtube.com/search">
  <Param name="query" value="{searchTerms}"/>
 </Url>
 <SearchForm>http://localhost:$port_number/youtube.com/search</SearchForm>
 </SearchPlugin>
--- a/youtube/playlist.py
+++ b/youtube/playlist.py
@ -0,0 +1,123 @@
 from youtube import util, yt_data_extract, proto, local_playlist
 from youtube import yt_app
 import base64
 import urllib
 import json
 import string
 import gevent
 import math
 from flask import request
 import flask
 def playlist_ctoken(playlist_id, offset):  
    offset = proto.uint(1, offset)
    # this is just obfuscation as far as I can tell. It doesn't even follow protobuf
    offset = b'PT:' + proto.unpadded_b64encode(offset)
    offset = proto.string(15, offset)
    continuation_info = proto.string( 3, proto.percent_b64encode(offset) )
    playlist_id = proto.string(2, 'VL' + playlist_id )
    pointless_nest = proto.string(80226972, playlist_id + continuation_info)
    return base64.urlsafe_b64encode(pointless_nest).decode('ascii')
 # initial request types:
 #   polymer_json: https://m.youtube.com/playlist?list=PLv3TTBr1W_9tppikBxAE_G6qjWdBljBHJ&pbj=1&lact=0
 #   ajax json:    https://m.youtube.com/playlist?list=PLv3TTBr1W_9tppikBxAE_G6qjWdBljBHJ&pbj=1&lact=0 with header X-YouTube-Client-Version: 1.20180418
 # continuation request types:
 #   polymer_json: https://m.youtube.com/playlist?&ctoken=[...]&pbj=1
 #   ajax json:    https://m.youtube.com/playlist?action_continuation=1&ajax=1&ctoken=[...]
 headers_1 = (
    ('Accept', '*/*'),
    ('Accept-Language', 'en-US,en;q=0.5'),
    ('X-YouTube-Client-Name', '2'),
    ('X-YouTube-Client-Version', '2.20180614'),
 )
 def playlist_first_page(playlist_id, report_text = "Retrieved playlist"):
    url = 'https://m.youtube.com/playlist?list=' + playlist_id + '&pbj=1'
    content = util.fetch_url(url, util.mobile_ua + headers_1, report_text=report_text, debug_name='playlist_first_page')
    content = json.loads(util.uppercase_escape(content.decode('utf-8')))
    return content
 #https://m.youtube.com/playlist?itct=CBMQybcCIhMIptj9xJaJ2wIV2JKcCh3Idwu-&ctoken=4qmFsgI2EiRWTFBMT3kwajlBdmxWWlB0bzZJa2pLZnB1MFNjeC0tN1BHVEMaDmVnWlFWRHBEUWxFJTNE&pbj=1
 def get_videos(playlist_id, page):
    url = "https://m.youtube.com/playlist?ctoken=" + playlist_ctoken(playlist_id, (int(page)-1)*20) + "&pbj=1"
    headers = {
        'User-Agent': '  Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E304 Safari/602.1',
        'Accept': '*/*',
        'Accept-Language': 'en-US,en;q=0.5',
        'X-YouTube-Client-Name': '2',
        'X-YouTube-Client-Version': '2.20180508',
    }
    content = util.fetch_url(url, headers, report_text="Retrieved playlist", debug_name='playlist_videos')
    info = json.loads(util.uppercase_escape(content.decode('utf-8')))
    return info
@yt_app.route('/playlist')
 def get_playlist_page():
    if 'list' not in request.args:
        abort(400)
    playlist_id = request.args.get('list')
    page = request.args.get('page', '1')
    if page == '1':
        first_page_json = playlist_first_page(playlist_id)
        this_page_json = first_page_json
    else:
        tasks = (
            gevent.spawn(playlist_first_page, playlist_id, report_text="Retrieved playlist info" ), 
            gevent.spawn(get_videos, playlist_id, page)
        )
        gevent.joinall(tasks)
        util.check_gevent_exceptions(*tasks)
        first_page_json, this_page_json = tasks[0].value, tasks[1].value
    info = yt_data_extract.extract_playlist_info(this_page_json)
    if info['error']:
        return flask.render_template('error.html', error_message = info['error'])
    if page != '1':
        info['metadata'] = yt_data_extract.extract_playlist_metadata(first_page_json)
    util.prefix_urls(info['metadata'])
    for item in info.get('items', ()):
        util.prefix_urls(item)
        util.add_extra_html_info(item)
        if 'id' in item:
            item['thumbnail'] = '/https://i.ytimg.com/vi/' + item['id'] + '/default.jpg'
        item['url'] += '&list=' + playlist_id
        if item['index']:
            item['url'] += '&index=' + str(item['index'])
    video_count = yt_data_extract.deep_get(info, 'metadata', 'video_count')
    if video_count is None:
        video_count = 40
    return flask.render_template('playlist.html',
        header_playlist_names = local_playlist.get_playlist_names(),
        video_list = info.get('items', []),
        num_pages = math.ceil(video_count/20),
        parameters_dictionary = request.args,
        **info['metadata']
    ).encode('utf-8')
--- a/youtube/proto.py
+++ b/youtube/proto.py
@ -0,0 +1,129 @@
 from math import ceil
 import base64
 import io
 def byte(n):
    return bytes((n,))
 def varint_encode(offset):
    '''In this encoding system, for each 8-bit byte, the first bit is 1 if there are more bytes, and 0 is this is the last one.
    The next 7 bits are data. These 7-bit sections represent the data in Little endian order. For example, suppose the data is
    aaaaaaabbbbbbbccccccc (each of these sections is 7 bits). It will be encoded as:
    1ccccccc 1bbbbbbb 0aaaaaaa
    This encoding is used in youtube parameters to encode offsets and to encode the length for length-prefixed data.
    See https://developers.google.com/protocol-buffers/docs/encoding#varints for more info.'''
    needed_bytes = ceil(offset.bit_length()/7) or 1 # (0).bit_length() returns 0, but we need 1 in that case.
    encoded_bytes = bytearray(needed_bytes)
    for i in range(0, needed_bytes - 1):
        encoded_bytes[i] = (offset & 127) | 128  # 7 least significant bits
        offset = offset >> 7
    encoded_bytes[-1] = offset & 127 # leave first bit as zero for last byte
    return bytes(encoded_bytes)
 def varint_decode(encoded):
    decoded = 0
    for i, byte in enumerate(encoded):
        decoded |= (byte & 127) << 7*i
        if not (byte & 128):
            break
    return decoded
 def string(field_number, data):
    data = as_bytes(data)
    return _proto_field(2, field_number, varint_encode(len(data)) + data)
 nested = string
 def uint(field_number, value):
    return _proto_field(0, field_number, varint_encode(value))
 def _proto_field(wire_type, field_number, data):
    ''' See https://developers.google.com/protocol-buffers/docs/encoding#structure '''
    return varint_encode( (field_number << 3) | wire_type) + data
 def percent_b64encode(data):
    return base64.urlsafe_b64encode(data).replace(b'=', b'%3D')
 def unpadded_b64encode(data):
    return base64.urlsafe_b64encode(data).replace(b'=', b'')
 def as_bytes(value):
    if isinstance(value, str):
        return value.encode('utf-8')
    return value
 def read_varint(data):
    result = 0
    i = 0
    while True:
        try:
            byte = data.read(1)[0]
        except IndexError:
            if i == 0:
                raise EOFError()
            raise Exception('Unterminated varint starting at ' + str(data.tell() - i))
        result |= (byte & 127) << 7*i
        if not byte & 128:
            break
        i += 1
    return result
 def read_group(data, end_sequence):
    start = data.tell()
    index = data.original.find(end_sequence, start)
    if index == -1:
        raise Exception('Unterminated group')
    data.seek(index + len(end_sequence))
    return data.original[start:index]
 def read_protobuf(data):
    data_original = data
    data = io.BytesIO(data)
    data.original = data_original
    while True:
        try:
            tag = read_varint(data)
        except EOFError:
            break
        wire_type = tag & 7
        field_number = tag >> 3
        if wire_type == 0:
            value = read_varint(data)
        elif wire_type == 1:
            value = data.read(8)
        elif wire_type == 2:
            length = read_varint(data)
            value = data.read(length)
        elif wire_type == 3:
            end_bytes = encode_varint((field_number << 3) | 4)
            value = read_group(data, end_bytes)
        elif wire_type == 5:
            value = data.read(4)
        else:
            raise Exception("Unknown wire type: " + str(wire_type) + ", Tag: " + bytes_to_hex(succinct_encode(tag)) + ", at position " + str(data.tell()))
        yield (wire_type, field_number, value)
 def parse(data):
    return {field_number: value for _, field_number, value in read_protobuf(data)}
 def b64_to_bytes(data):
    if isinstance(data, bytes):
        data = data.decode('ascii')
    data = data.replace("%3D", "=")
    return base64.urlsafe_b64decode(data + "="*((4 - len(data)%4)%4) )
--- a/youtube/search.py
+++ b/youtube/search.py
@ -0,0 +1,105 @@
 import base64
 import json
 import urllib
 import flask
 from flask import request
 from werkzeug.exceptions import abort
 from youtube import util, yt_data_extract, proto
 from youtube import yt_app
 # Sort: 1
    # Upload date: 2
    # View count: 3
    # Rating: 1
    # Relevance: 0
 # Offset: 9
 # Filters: 2
    # Upload date: 1
    # Type: 2
    # Duration: 3
 features = {
    '4k': 14,
    'hd': 4,
    'hdr': 25,
    'subtitles': 5,
    'creative_commons': 6,
    '3d': 7,
    'live': 8,
    'purchased': 9,
    '360': 15,
    'location': 23,
 }
 def page_number_to_sp_parameter(page, autocorrect, sort, filters):
    offset = (int(page) - 1)*20    # 20 results per page
    autocorrect = proto.nested(8, proto.uint(1, 1 - int(autocorrect) ))
    filters_enc = proto.nested(2, proto.uint(1, filters['time']) + proto.uint(2, filters['type']) + proto.uint(3, filters['duration']))
    result = proto.uint(1, sort) + filters_enc + autocorrect + proto.uint(9, offset) + proto.string(61, b'')
    return base64.urlsafe_b64encode(result).decode('ascii')
 def get_search_json(query, page, autocorrect, sort, filters):
    url = "https://www.youtube.com/results?search_query=" + urllib.parse.quote_plus(query)
    headers = {
        'Host': 'www.youtube.com',
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64)',
        'Accept': '*/*',
        'Accept-Language': 'en-US,en;q=0.5',
        'X-YouTube-Client-Name': '1',
        'X-YouTube-Client-Version': '2.20180418',
    }
    url += "&pbj=1&sp=" + page_number_to_sp_parameter(page, autocorrect, sort, filters).replace("=", "%3D")
    content = util.fetch_url(url, headers=headers, report_text="Got search results", debug_name='search_results')
    info = json.loads(content)
    return info
@yt_app.route('/search')
 def get_search_page():
    if len(request.args) == 0:
        return flask.render_template('base.html', title="Search")
    if 'query' not in request.args:
        abort(400)
    query = request.args.get("query")
    page = request.args.get("page", "1")
    autocorrect = int(request.args.get("autocorrect", "1"))
    sort = int(request.args.get("sort", "0"))
    filters = {}
    filters['time'] = int(request.args.get("time", "0"))
    filters['type'] = int(request.args.get("type", "0"))
    filters['duration'] = int(request.args.get("duration", "0"))
    polymer_json = get_search_json(query, page, autocorrect, sort, filters)
    search_info = yt_data_extract.extract_search_info(polymer_json)
    if search_info['error']:
        return flask.render_template('error.html', error_message = search_info['error'])
    for extract_item_info in search_info['items']:
        util.prefix_urls(extract_item_info)
        util.add_extra_html_info(extract_item_info)
    corrections = search_info['corrections']
    if corrections['type'] == 'did_you_mean':
        corrected_query_string = request.args.to_dict(flat=False)
        corrected_query_string['query'] = [corrections['corrected_query']]
        corrections['corrected_query_url'] = util.URL_ORIGIN + '/search?' + urllib.parse.urlencode(corrected_query_string, doseq=True)
    elif corrections['type'] == 'showing_results_for':
        no_autocorrect_query_string = request.args.to_dict(flat=False)
        no_autocorrect_query_string['autocorrect'] = ['0']
        no_autocorrect_query_url = util.URL_ORIGIN + '/search?' + urllib.parse.urlencode(no_autocorrect_query_string, doseq=True)
        corrections['original_query_url'] = no_autocorrect_query_url
    return flask.render_template('search.html',
        header_playlist_names = local_playlist.get_playlist_names(),
        query = query,
        estimated_results = search_info['estimated_results'],
        estimated_pages = search_info['estimated_pages'],
        corrections = search_info['corrections'],
        results = search_info['items'],
        parameters_dictionary = request.args,
    )
--- a/youtube/util.py
+++ b/youtube/util.py
@ -0,0 +1,397 @@
 import gzip
 from youtube import yt_data_extract
 try:
    import brotli
    have_brotli = True
 except ImportError:
    have_brotli = False
 import urllib.parse
 import re
 import time
 import os
 import json
 import gevent
 import gevent.queue
 import gevent.lock
 # The trouble with the requests library: It ships its own certificate bundle via certifi
 #  instead of using the system certificate store, meaning self-signed certificates
 #  configured by the user will not work. Some draconian networks block TLS unless a corporate
 #  certificate is installed on the system. Additionally, some users install a self signed cert
 #  in order to use programs to modify or monitor requests made by programs on the system.
 # Finally, certificates expire and need to be updated, or are sometimes revoked. Sometimes
 #  certificate authorites go rogue and need to be untrusted. Since we are going through Tor exit nodes,
 #  this becomes all the more important. A rogue CA could issue a fake certificate for accounts.google.com, and a
 #  malicious exit node could use this to decrypt traffic when logging in and retrieve passwords. Examples:
 #   https://www.engadget.com/2015/10/29/google-warns-symantec-over-certificates/
 #   https://nakedsecurity.sophos.com/2013/12/09/serious-security-google-finds-fake-but-trusted-ssl-certificates-for-its-domains-made-in-france/
 # In the requests documentation it says:
 #    "Before version 2.16, Requests bundled a set of root CAs that it trusted, sourced from the Mozilla trust store.
 #     The certificates were only updated once for each Requests version. When certifi was not installed,
 #     this led to extremely out-of-date certificate bundles when using significantly older versions of Requests.
 #     For the sake of security we recommend upgrading certifi frequently!"
 #   (http://docs.python-requests.org/en/master/user/advanced/#ca-certificates)
 # Expecting users to remember to manually update certifi on Linux isn't reasonable in my view.
 #  On windows, this is even worse since I am distributing all dependencies. This program is not
 #  updated frequently, and using requests would lead to outdated certificates. Certificates
 #  should be updated with OS updates, instead of thousands of developers of different programs
 #  being expected to do this correctly 100% of the time.
 # There is hope that this might be fixed eventually:
 #   https://github.com/kennethreitz/requests/issues/2966
 # Until then, I will use a mix of urllib3 and urllib.
 import urllib3
 import urllib3.contrib.socks
 URL_ORIGIN = "/https://www.youtube.com"
 connection_pool = urllib3.PoolManager(cert_reqs = 'CERT_REQUIRED')
 def get_pool(use_tor):
    return connection_pool
 class HTTPAsymmetricCookieProcessor(urllib.request.BaseHandler):
    '''Separate cookiejars for receiving and sending'''
    def __init__(self, cookiejar_send=None, cookiejar_receive=None):
        self.cookiejar_send = cookiejar_send
        self.cookiejar_receive = cookiejar_receive
    def http_request(self, request):
        if self.cookiejar_send is not None:
            self.cookiejar_send.add_cookie_header(request)
        return request
    def http_response(self, request, response):
        if self.cookiejar_receive is not None:
            self.cookiejar_receive.extract_cookies(response, request)
        return response
    https_request = http_request
    https_response = http_response
 class FetchError(Exception):
    def __init__(self, code, reason='', ip=None):
        Exception.__init__(self, 'HTTP error during request: ' + code + ' ' + reason)
        self.code = code
        self.reason = reason
        self.ip = ip
 def decode_content(content, encoding_header):
    encodings = encoding_header.replace(' ', '').split(',')
    for encoding in reversed(encodings):
        if encoding == 'identity':
            continue
        if encoding == 'br':
            content = brotli.decompress(content)
        elif encoding == 'gzip':
            content = gzip.decompress(content)
    return content
 def fetch_url_response(url, headers=(), timeout=15, data=None,
                       cookiejar_send=None, cookiejar_receive=None,
                       use_tor=True, max_redirects=None):
    '''
    returns response, cleanup_function
    When cookiejar_send is set to a CookieJar object,
     those cookies will be sent in the request (but cookies in response will not be merged into it)
    When cookiejar_receive is set to a CookieJar object,
     cookies received in the response will be merged into the object (nothing will be sent from it)
    When both are set to the same object, cookies will be sent from the object,
     and response cookies will be merged into it.
    '''
    headers = dict(headers)     # Note: Calling dict() on a dict will make a copy
    if have_brotli:
        headers['Accept-Encoding'] = 'gzip, br'
    else:
        headers['Accept-Encoding'] = 'gzip'
    # prevent python version being leaked by urllib if User-Agent isn't provided
    #  (urllib will use ex. Python-urllib/3.6 otherwise)
    if 'User-Agent' not in headers and 'user-agent' not in headers and 'User-agent' not in headers:
        headers['User-Agent'] = 'Python-urllib'
    method = "GET"
    if data is not None:
        method = "POST"
        if isinstance(data, str):
            data = data.encode('ascii')
        elif not isinstance(data, bytes):
            data = urllib.parse.urlencode(data).encode('ascii')
    if cookiejar_send is not None or cookiejar_receive is not None:     # Use urllib
        req = urllib.request.Request(url, data=data, headers=headers)
        cookie_processor = HTTPAsymmetricCookieProcessor(cookiejar_send=cookiejar_send, cookiejar_receive=cookiejar_receive)
        opener = urllib.request.build_opener(cookie_processor)
        response = opener.open(req, timeout=timeout)
        cleanup_func = (lambda r: None)
    else:           # Use a urllib3 pool. Cookies can't be used since urllib3 doesn't have easy support for them.
        # default: Retry.DEFAULT = Retry(3)
        # (in connectionpool.py in urllib3)
        # According to the documentation for urlopen, a redirect counts as a
        # retry. So there are 3 redirects max by default.
        if max_redirects:
            retries = urllib3.Retry(3+max_redirects, redirect=max_redirects)
        else:
            retries = urllib3.Retry(3)
        pool = get_pool(use_tor)
        response = pool.request(method, url, headers=headers,
                                timeout=timeout, preload_content=False,
                                decode_content=False, retries=retries)
        cleanup_func = (lambda r: r.release_conn())
    return response, cleanup_func
 def fetch_url(url, headers=(), timeout=15, report_text=None, data=None,
              cookiejar_send=None, cookiejar_receive=None, use_tor=True,
              debug_name=None):
    start_time = time.time()
    response, cleanup_func = fetch_url_response(
        url, headers, timeout=timeout,
        cookiejar_send=cookiejar_send, cookiejar_receive=cookiejar_receive,
        use_tor=use_tor)
    response_time = time.time()
    content = response.read()
    read_finish = time.time()
    cleanup_func(response)  # release_connection for urllib3
    if (response.status == 429
            and content.startswith(b'<!DOCTYPE')
            and b'Our systems have detected unusual traffic' in content):
        ip = re.search(br'IP address: ((?:[\da-f]*:)+[\da-f]+|(?:\d+\.)+\d+)',
            content)
        ip = ip.group(1).decode('ascii') if ip else None
        raise FetchError('429', reason=response.reason, ip=ip)
    elif response.status >= 400:
        raise FetchError(str(response.status), reason=response.reason, ip=None)
    if report_text:
        print(report_text, '    Latency:', round(response_time - start_time,3), '    Read time:', round(read_finish - response_time,3))
    content = decode_content(content, response.getheader('Content-Encoding', default='identity'))
    return content
 def head(url, use_tor=False, report_text=None, max_redirects=10):
    pool = get_pool(use_tor)
    start_time = time.time()
    # default: Retry.DEFAULT = Retry(3)
    # (in connectionpool.py in urllib3)
    # According to the documentation for urlopen, a redirect counts as a retry
    # So there are 3 redirects max by default. Let's change that
    # to 10 since googlevideo redirects a lot.
    retries = urllib3.Retry(3+max_redirects, redirect=max_redirects,
        raise_on_redirect=False)
    headers = {'User-Agent': 'Python-urllib'}
    response = pool.request('HEAD', url, headers=headers, retries=retries)
    if report_text:
        print(report_text, '    Latency:', round(time.time() - start_time,3))
    return response
 mobile_user_agent = 'Mozilla/5.0 (Linux; Android 7.0; Redmi Note 4 Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Mobile Safari/537.36'
 mobile_ua = (('User-Agent', mobile_user_agent),)
 desktop_user_agent = 'Mozilla/5.0 (Windows NT 6.1; rv:52.0) Gecko/20100101 Firefox/52.0'
 desktop_ua = (('User-Agent', desktop_user_agent),)
 class RateLimitedQueue(gevent.queue.Queue):
    ''' Does initial_burst (def. 30) at first, then alternates between waiting waiting_period (def. 5) seconds and doing subsequent_bursts (def. 10) queries. After 5 seconds with nothing left in the queue, resets rate limiting. '''
    def __init__(self, initial_burst=30, waiting_period=5, subsequent_bursts=10):
        self.initial_burst = initial_burst
        self.waiting_period = waiting_period
        self.subsequent_bursts = subsequent_bursts
        self.count_since_last_wait = 0
        self.surpassed_initial = False
        self.lock = gevent.lock.BoundedSemaphore(1)
        self.currently_empty = False
        self.empty_start = 0
        gevent.queue.Queue.__init__(self)
    def get(self):
        self.lock.acquire()     # blocks if another greenlet currently has the lock
        if self.count_since_last_wait >= self.subsequent_bursts and self.surpassed_initial:
            gevent.sleep(self.waiting_period)
            self.count_since_last_wait = 0
        elif self.count_since_last_wait >= self.initial_burst and not self.surpassed_initial:
            self.surpassed_initial = True
            gevent.sleep(self.waiting_period)
            self.count_since_last_wait = 0
        self.count_since_last_wait += 1
        if not self.currently_empty and self.empty():
            self.currently_empty = True
            self.empty_start = time.monotonic()
        item = gevent.queue.Queue.get(self)     # blocks when nothing left
        if self.currently_empty:
            if time.monotonic() - self.empty_start >= self.waiting_period:
                self.count_since_last_wait = 0
                self.surpassed_initial = False
            self.currently_empty = False
        self.lock.release()
        return item
 def download_thumbnail(save_directory, video_id):
    url = "https://i.ytimg.com/vi/" + video_id + "/mqdefault.jpg"
    save_location = os.path.join(save_directory, video_id + ".jpg")
    try:
        thumbnail = fetch_url(url, report_text="Saved thumbnail: " + video_id)
    except urllib.error.HTTPError as e:
        print("Failed to download thumbnail for " + video_id + ": " + str(e))
        return False
    try:
        f = open(save_location, 'wb')
    except FileNotFoundError:
        os.makedirs(save_directory, exist_ok = True)
        f = open(save_location, 'wb')
    f.write(thumbnail)
    f.close()
    return True
 def download_thumbnails(save_directory, ids):
    if not isinstance(ids, (list, tuple)):
        ids = list(ids)
    # only do 5 at a time
    # do the n where n is divisible by 5
    i = -1
    for i in range(0, int(len(ids)/5) - 1 ):
        gevent.joinall([gevent.spawn(download_thumbnail, save_directory, ids[j]) for j in range(i*5, i*5 + 5)])
    # do the remainders (< 5)
    gevent.joinall([gevent.spawn(download_thumbnail, save_directory, ids[j]) for j in range(i*5 + 5, len(ids))])
 def dict_add(*dicts):
    for dictionary in dicts[1:]:
        dicts[0].update(dictionary)
    return dicts[0]
 def video_id(url):
    url_parts = urllib.parse.urlparse(url)
    return urllib.parse.parse_qs(url_parts.query)['v'][0]
 # default, sddefault, mqdefault, hqdefault, hq720
 def get_thumbnail_url(video_id):
    return "/i.ytimg.com/vi/" + video_id + "/mqdefault.jpg"
 def seconds_to_timestamp(seconds):
    seconds = int(seconds)
    hours, seconds = divmod(seconds,3600)
    minutes, seconds = divmod(seconds,60)
    if hours != 0:
        timestamp = str(hours) + ":"
        timestamp += str(minutes).zfill(2)  # zfill pads with zeros
    else:
        timestamp = str(minutes)
    timestamp += ":" + str(seconds).zfill(2)
    return timestamp
 def update_query_string(query_string, items):
    parameters = urllib.parse.parse_qs(query_string)
    parameters.update(items)
    return urllib.parse.urlencode(parameters, doseq=True)
 def uppercase_escape(s):
     return re.sub(
         r'\\U([0-9a-fA-F]{8})',
         lambda m: chr(int(m.group(1), base=16)), s)
 def prefix_url(url):
    if url is None:
        return None
    url = url.lstrip('/')     # some urls have // before them, which has a special meaning
    return '/' + url
 def left_remove(string, substring):
    '''removes substring from the start of string, if present'''
    if string.startswith(substring):
        return string[len(substring):]
    return string
 def concat_or_none(*strings):
    '''Concatenates strings. Returns None if any of the arguments are None'''
    result = ''
    for string in strings:
        if string is None:
            return None
        result += string
    return result
 def prefix_urls(item):
    try:
        item['thumbnail'] = prefix_url(item['thumbnail'])
    except KeyError:
        pass
    try:
        item['author_url'] = prefix_url(item['author_url'])
    except KeyError:
        pass
 def add_extra_html_info(item):
    if item['type'] == 'video':
        item['url'] = (URL_ORIGIN + '/watch?v=' + item['id']) if item.get('id') else None
        video_info = {}
        for key in ('id', 'title', 'author', 'duration'):
            try:
                video_info[key] = item[key]
            except KeyError:
                video_info[key] = ''
        item['video_info'] = json.dumps(video_info)
    elif item['type'] == 'playlist':
        item['url'] = (URL_ORIGIN + '/playlist?list=' + item['id']) if item.get('id') else None
    elif item['type'] == 'channel':
        item['url'] = (URL_ORIGIN + "/channel/" + item['id']) if item.get('id') else None
 def parse_info_prepare_for_html(renderer, additional_info={}):
    item = yt_data_extract.extract_item_info(renderer, additional_info)
    prefix_urls(item)
    add_extra_html_info(item)
    return item
 def check_gevent_exceptions(*tasks):
    for task in tasks:
        if task.exception:
            raise task.exception
--- a/youtube/utils.py
+++ b/youtube/utils.py
@ -0,0 +1,61 @@
 import urllib
 from flask import Markup
 import bleach
 def get_description_snippet_text(ds):
    string = ""
    for t in ds:
        try:
            if t['bold']:
                text = "<b>"+t['text']+"</b>"
            else:
                text = t['text']
        except:
            text = t['text']
        string = string + text
    return string
 def concat_texts(strings):
    '''Concatenates strings. Returns None if any of the arguments are None'''
    result = ''
    for string in strings:
        if string['text'] is None:
            return None
        result += string['text']
    return result
 def parse_comment(raw_comment):
    cmnt = {}
    imgHostName = urllib.parse.urlparse(raw_comment['author_avatar'][1:]).netloc
    cmnt['author'] = raw_comment['author']
    cmnt['thumbnail'] = raw_comment['author_avatar'].replace("https://{}".format(imgHostName),"")[1:] + "?host=" + imgHostName
    print(cmnt['thumbnail'])
    cmnt['channel'] = raw_comment['author_url']
    cmnt['text'] = Markup(bleach.linkify(concat_texts(raw_comment['text']).replace("\n", "<br>")))
    cmnt['date'] = raw_comment['time_published']
    try:
        cmnt['creatorHeart'] = raw_comment['creatorHeart']['creatorHeartRenderer']['creatorThumbnail']['thumbnails'][0][
            'url']
    except:
        cmnt['creatorHeart'] = False
    try:
        cmnt['likes'] = raw_comment['like_count']
    except:
        cmnt['likes'] = 0
    try:
        cmnt['replies'] = raw_comment['reply_count']
    except:
        cmnt['replies'] = 0
    return cmnt
 def post_process_comments_info(comments_info):
    comments = []
    for comment in comments_info['comments']:
        comments.append(parse_comment(comment))
    return comments
--- a/youtube/watch.py
+++ b/youtube/watch.py
@ -0,0 +1,246 @@
 import json
 import math
 import traceback
 import urllib
 from youtube import util, yt_data_extract
 def get_video_sources(info, tor_bypass=False):
    video_sources = []
    max_resolution = "720"
    for fmt in info['formats']:
        if not all(fmt[attr] for attr in ('quality', 'width', 'ext', 'url')):
            continue
        if fmt['acodec'] and fmt['vcodec'] and fmt['height'] <= max_resolution:
            video_sources.append({
                'src': fmt['url'],
                'type': 'video/' + fmt['ext'],
                'quality': fmt['quality'],
                'height': fmt['height'],
                'width': fmt['width'],
            })
    #### order the videos sources so the preferred resolution is first ###
    video_sources.sort(key=lambda source: source['quality'], reverse=True)
    return video_sources
 def make_caption_src(info, lang, auto=False, trans_lang=None):
    label = lang
    if auto:
        label += ' (Automatic)'
    if trans_lang:
        label += ' -> ' + trans_lang
    return {
        'url': '/' + yt_data_extract.get_caption_url(info, lang, 'vtt', auto, trans_lang),
        'label': label,
        'srclang': trans_lang[0:2] if trans_lang else lang[0:2],
        'on': False,
    }
 def lang_in(lang, sequence):
    '''Tests if the language is in sequence, with e.g. en and en-US considered the same'''
    if lang is None:
        return False
    lang = lang[0:2]
    return lang in (l[0:2] for l in sequence)
 def lang_eq(lang1, lang2):
    '''Tests if two iso 639-1 codes are equal, with en and en-US considered the same.
       Just because the codes are equal does not mean the dialects are mutually intelligible, but this will have to do for now without a complex language model'''
    if lang1 is None or lang2 is None:
        return False
    return lang1[0:2] == lang2[0:2]
 def equiv_lang_in(lang, sequence):
    '''Extracts a language in sequence which is equivalent to lang.
    e.g. if lang is en, extracts en-GB from sequence.
    Necessary because if only a specific variant like en-GB is available, can't ask Youtube for simply en. Need to get the available variant.'''
    lang = lang[0:2]
    for l in sequence:
        if l[0:2] == lang:
            return l
    return None
 def get_subtitle_sources(info):
    '''Returns these sources, ordered from least to most intelligible:
    native_video_lang (Automatic)
    foreign_langs (Manual)
    native_video_lang (Automatic) -> pref_lang
    foreign_langs (Manual) -> pref_lang
    native_video_lang (Manual) -> pref_lang
    pref_lang (Automatic)
    pref_lang (Manual)'''
    sources = []
    pref_lang = 'en'
    native_video_lang = None
    if info['automatic_caption_languages']:
        native_video_lang = info['automatic_caption_languages'][0]
    highest_fidelity_is_manual = False
    # Sources are added in very specific order outlined above
    # More intelligible sources are put further down to avoid browser bug when there are too many languages
    # (in firefox, it is impossible to select a language near the top of the list because it is cut off)
    # native_video_lang (Automatic)
    if native_video_lang and not lang_eq(native_video_lang, pref_lang):
        sources.append(make_caption_src(info, native_video_lang, auto=True))
    # foreign_langs (Manual)
    for lang in info['manual_caption_languages']:
        if not lang_eq(lang, pref_lang):
            sources.append(make_caption_src(info, lang))
    if (lang_in(pref_lang, info['translation_languages'])
            and not lang_in(pref_lang, info['automatic_caption_languages'])
            and not lang_in(pref_lang, info['manual_caption_languages'])):
        # native_video_lang (Automatic) -> pref_lang
        if native_video_lang and not lang_eq(pref_lang, native_video_lang):
            sources.append(make_caption_src(info, native_video_lang, auto=True, trans_lang=pref_lang))
        # foreign_langs (Manual) -> pref_lang
        for lang in info['manual_caption_languages']:
            if not lang_eq(lang, native_video_lang) and not lang_eq(lang, pref_lang):
                sources.append(make_caption_src(info, lang, trans_lang=pref_lang))
        # native_video_lang (Manual) -> pref_lang
        if lang_in(native_video_lang, info['manual_caption_languages']):
            sources.append(make_caption_src(info, native_video_lang, trans_lang=pref_lang))
    # pref_lang (Automatic)
    if lang_in(pref_lang, info['automatic_caption_languages']):
        sources.append(make_caption_src(info, equiv_lang_in(pref_lang, info['automatic_caption_languages']), auto=True))
    # pref_lang (Manual)
    if lang_in(pref_lang, info['manual_caption_languages']):
        sources.append(make_caption_src(info, equiv_lang_in(pref_lang, info['manual_caption_languages'])))
        highest_fidelity_is_manual = True
    if len(sources) == 0:
        assert len(info['automatic_caption_languages']) == 0 and len(info['manual_caption_languages']) == 0
    return sources
 def get_ordered_music_list_attributes(music_list):
    # get the set of attributes which are used by atleast 1 track
    # so there isn't an empty, extraneous album column which no tracks use, for example
    used_attributes = set()
    for track in music_list:
        used_attributes = used_attributes | track.keys()
    # now put them in the right order
    ordered_attributes = []
    for attribute in ('Artist', 'Title', 'Album'):
        if attribute.lower() in used_attributes:
            ordered_attributes.append(attribute)
    return ordered_attributes
 headers = (
    ('Accept', '*/*'),
    ('Accept-Language', 'en-US,en;q=0.5'),
    ('X-YouTube-Client-Name', '2'),
    ('X-YouTube-Client-Version', '2.20180830'),
 ) + util.mobile_ua
 def extract_info(video_id, use_invidious, playlist_id=None, index=None):
    # bpctr=9999999999 will bypass are-you-sure dialogs for controversial
    # videos
    url = 'https://m.youtube.com/watch?v=' + video_id + '&pbj=1&bpctr=9999999999'
    if playlist_id:
        url += '&list=' + playlist_id
    if index:
        url += '&index=' + index
    polymer_json = util.fetch_url(url, headers=headers, debug_name='watch')
    polymer_json = polymer_json.decode('utf-8')
    # TODO: Decide whether this should be done in yt_data_extract.extract_watch_info
    try:
        polymer_json = json.loads(polymer_json)
    except json.decoder.JSONDecodeError:
        traceback.print_exc()
        return {'error': 'Failed to parse json response'}
    info = yt_data_extract.extract_watch_info(polymer_json)
    # age restriction bypass
    if info['age_restricted']:
        print('Fetching age restriction bypass page')
        data = {
            'video_id': video_id,
            'eurl': 'https://youtube.googleapis.com/v/' + video_id,
        }
        url = 'https://www.youtube.com/get_video_info?' + urllib.parse.urlencode(data)
        video_info_page = util.fetch_url(url, debug_name='get_video_info', report_text='Fetched age restriction bypass page').decode('utf-8')
        yt_data_extract.update_with_age_restricted_info(info, video_info_page)
    # check if urls ready (non-live format) in former livestream
    # urls not ready if all of them have no filesize
    if info['was_live']:
        info['urls_ready'] = False
        for fmt in info['formats']:
            if fmt['file_size'] is not None:
                info['urls_ready'] = True
    else:
        info['urls_ready'] = True
    # livestream urls
    # sometimes only the livestream urls work soon after the livestream is over
    if (info['hls_manifest_url']
        and (info['live'] or not info['formats'] or not info['urls_ready'])
    ):
        manifest = util.fetch_url(info['hls_manifest_url'],
            debug_name='hls_manifest.m3u8',
            report_text='Fetched hls manifest'
        ).decode('utf-8')
        info['hls_formats'], err = yt_data_extract.extract_hls_formats(manifest)
        if not err:
            info['playability_error'] = None
        for fmt in info['hls_formats']:
            fmt['video_quality'] = video_quality_string(fmt)
    else:
        info['hls_formats'] = []
    # check for 403. Unnecessary for tor video routing b/c ip address is same
    info['invidious_used'] = False
    info['invidious_reload_button'] = False
    info['tor_bypass_used'] = False
    return info
 def video_quality_string(format):
    if format['vcodec']:
        result =str(format['width'] or '?') + 'x' + str(format['height'] or '?')
        if format['fps']:
            result += ' ' + str(format['fps']) + 'fps'
        return result
    elif format['acodec']:
        return 'audio only'
    return '?'
 def audio_quality_string(format):
    if format['acodec']:
        result = str(format['audio_bitrate'] or '?') + 'k'
        if format['audio_sample_rate']:
            result += ' ' + str(format['audio_sample_rate']) + ' Hz'
        return result
    elif format['vcodec']:
        return 'video only'
    return '?'
 # from https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/utils.py
 def format_bytes(bytes):
    if bytes is None:
        return 'N/A'
    if type(bytes) is str:
        bytes = float(bytes)
    if bytes == 0.0:
        exponent = 0
    else:
        exponent = int(math.log(bytes, 1024.0))
    suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
    converted = float(bytes) / float(1024 ** exponent)
    return '%.2f%s' % (converted, suffix)
--- a/youtube/yt_data_extract/init.py
+++ b/youtube/yt_data_extract/init.py
@ -0,0 +1,12 @@
 from .common import (get, multi_get, deep_get, multi_deep_get,
    liberal_update, conservative_update, remove_redirect, normalize_url,
    extract_str, extract_formatted_text, extract_int, extract_approx_int,
    extract_date, extract_item_info, extract_items, extract_response)
 from .everything_else import (extract_channel_info, extract_search_info,
    extract_playlist_metadata, extract_playlist_info, extract_comments_info)
 from .watch_extraction import (extract_watch_info, get_caption_url,
    update_with_age_restricted_info, requires_decryption,
    extract_decryption_function, decrypt_signatures, _formats,
    update_format_with_type_info, extract_hls_formats)
--- a/youtube/yt_data_extract/common.py
+++ b/youtube/yt_data_extract/common.py
@ -0,0 +1,470 @@
 import re
 import urllib.parse
 import collections
 def get(object, key, default=None, types=()):
    '''Like dict.get(), but returns default if the result doesn't match one of the types.
       Also works for indexing lists.'''
    try:
        result = object[key]
    except (TypeError, IndexError, KeyError):
        return default
    if not types or isinstance(result, types):
        return result
    else:
        return default
 def multi_get(object, *keys, default=None, types=()):
    '''Like get, but try other keys if the first fails'''
    for key in keys:
        try:
            result = object[key]
        except (TypeError, IndexError, KeyError):
            pass
        else:
            if not types or isinstance(result, types):
                return result
            else:
                continue
    return default
 def deep_get(object, *keys, default=None, types=()):
    '''Like dict.get(), but for nested dictionaries/sequences, supporting keys or indices.
       Last argument is the default value to use in case of any IndexErrors or KeyErrors.
       If types is given and the result doesn't match one of those types, default is returned'''
    try:
        for key in keys:
            object = object[key]
    except (TypeError, IndexError, KeyError):
        return default
    else:
        if not types or isinstance(object, types):
            return object
        else:
            return default
 def multi_deep_get(object, *key_sequences, default=None, types=()):
    '''Like deep_get, but can try different key sequences in case one fails.
       Return default if all of them fail. key_sequences is a list of lists'''
    for key_sequence in key_sequences:
        _object = object
        try:
            for key in key_sequence:
                _object = _object[key]
        except (TypeError, IndexError, KeyError):
            pass
        else:
            if not types or isinstance(_object, types):
                return _object
            else:
                continue
    return default
 def liberal_update(obj, key, value):
    '''Updates obj[key] with value as long as value is not None.
    Ensures obj[key] will at least get a value of None, however'''
    if (value is not None) or (key not in obj):
        obj[key] = value
 def conservative_update(obj, key, value):
    '''Only updates obj if it doesn't have key or obj[key] is None'''
    if obj.get(key) is None:
        obj[key] = value
 def concat_or_none(*strings):
    '''Concatenates strings. Returns None if any of the arguments are None'''
    result = ''
    for string in strings:
        if string is None:
            return None
        result += string
    return result
 def remove_redirect(url):
    if url is None:
        return None
    if re.fullmatch(r'(((https?:)?//)?(www.)?youtube.com)?/redirect\?.*', url) is not None: # youtube puts these on external links to do tracking
        query_string = url[url.find('?')+1: ]
        return urllib.parse.parse_qs(query_string)['q'][0]
    return url
 youtube_url_re = re.compile(r'^(?:(?:(?:https?:)?//)?(?:www\.)?youtube\.com)?(/.*)$')
 def normalize_url(url):
    if url is None:
        return None
    match = youtube_url_re.fullmatch(url)
    if match is None:
        raise Exception()
    return 'https://www.youtube.com' + match.group(1)
 def _recover_urls(runs):
    for run in runs:
        url = deep_get(run, 'navigationEndpoint', 'urlEndpoint', 'url')
        text = run.get('text', '')
        # second condition is necessary because youtube makes other things into urls, such as hashtags, which we want to keep as text
        if url is not None and (text.startswith('http://') or text.startswith('https://')):
            url = remove_redirect(url)
            run['url'] = url
            run['text'] = url # youtube truncates the url text, use actual url instead
 def extract_str(node, default=None, recover_urls=False):
    '''default is the value returned if the extraction fails. If recover_urls is true, will attempt to fix Youtube's truncation of url text (most prominently seen in descriptions)'''
    if isinstance(node, str):
        return node
    try:
        return node['simpleText']
    except (KeyError, TypeError):
        pass
    if isinstance(node, dict) and 'runs' in node:
        if recover_urls:
            _recover_urls(node['runs'])
        return ''.join(text_run.get('text', '') for text_run in node['runs'])
    return default
 def extract_formatted_text(node):
    if not node:
        return []
    if 'runs' in node:
        _recover_urls(node['runs'])
        return node['runs']
    elif 'simpleText' in node:
        return [{'text': node['simpleText']}]
    return []
 def extract_int(string, default=None):
    if isinstance(string, int):
        return string
    if not isinstance(string, str):
        string = extract_str(string)
    if not string:
        return default
    match = re.search(r'\b(\d+)\b', string.replace(',', ''))
    if match is None:
        return default
    try:
        return int(match.group(1))
    except ValueError:
        return default
 def extract_approx_int(string):
    '''e.g. "15.1M" from "15.1M subscribers"'''
    if not isinstance(string, str):
        string = extract_str(string)
    if not string:
        return None
    match = re.search(r'\b(\d+(?:\.\d+)?[KMBTkmbt]?)\b', string.replace(',', ''))
    if match is None:
        return None
    return match.group(1)
 MONTH_ABBREVIATIONS = {'jan':'1', 'feb':'2', 'mar':'3', 'apr':'4', 'may':'5', 'jun':'6', 'jul':'7', 'aug':'8', 'sep':'9', 'oct':'10', 'nov':'11', 'dec':'12'}
 def extract_date(date_text):
    '''Input: "Mar 9, 2019". Output: "2019-3-9"'''
    if not isinstance(date_text, str):
        date_text = extract_str(date_text)
    if date_text is None:
        return None
    date_text = date_text.replace(',', '').lower()
    parts = date_text.split()
    if len(parts) >= 3:
        month, day, year = parts[-3:]
        month = MONTH_ABBREVIATIONS.get(month[0:3]) # slicing in case they start writing out the full month name
        if month and (re.fullmatch(r'\d\d?', day) is not None) and (re.fullmatch(r'\d{4}', year) is not None):
            return year + '-' + month + '-' + day
    return None
 def check_missing_keys(object, *key_sequences):
    for key_sequence in key_sequences:
        _object = object
        try:
            for key in key_sequence:
                _object = _object[key]
        except (KeyError, IndexError, TypeError):
            return 'Could not find ' + key
    return None
 def extract_item_info(item, additional_info={}):
    if not item:
        return {'error': 'No item given'}
    type = get(list(item.keys()), 0)
    if not type:
        return {'error': 'Could not find type'}
    item = item[type]
    info = {'error': None}
    if type in ('itemSectionRenderer', 'compactAutoplayRenderer'):
        return extract_item_info(deep_get(item, 'contents', 0), additional_info)
    if type in ('movieRenderer', 'clarificationRenderer'):
        info['type'] = 'unsupported'
        return info
    info.update(additional_info)
    # type looks like e.g. 'compactVideoRenderer' or 'gridVideoRenderer'
    # camelCase split, https://stackoverflow.com/a/37697078
    type_parts = [s.lower() for s in re.sub(r'([A-Z][a-z]+)', r' \1', type).split()]
    if len(type_parts) < 2:
        info['type'] = 'unsupported'
        return
    primary_type = type_parts[-2]
    if primary_type == 'video':
        info['type'] = 'video'
    elif primary_type in ('playlist', 'radio', 'show'):
        info['type'] = 'playlist'
    elif primary_type == 'channel':
        info['type'] = 'channel'
    elif type == 'videoWithContextRenderer': # stupid exception
        info['type'] = 'video'
        primary_type = 'video'
    else:
        info['type'] = 'unsupported'
    # videoWithContextRenderer changes it to 'headline' just to be annoying
    info['title'] = extract_str(multi_get(item, 'title', 'headline'))
    if primary_type != 'channel':
        info['author'] = extract_str(multi_get(item, 'longBylineText', 'shortBylineText', 'ownerText'))
        info['author_id'] = extract_str(multi_deep_get(item,
            ['longBylineText', 'runs', 0, 'navigationEndpoint', 'browseEndpoint', 'browseId'],
            ['shortBylineText', 'runs', 0, 'navigationEndpoint', 'browseEndpoint', 'browseId'],
            ['ownerText', 'runs', 0, 'navigationEndpoint', 'browseEndpoint', 'browseId']
        ))
        info['author_url'] = ('https://www.youtube.com/channel/' + info['author_id']) if info['author_id'] else None
    info['description'] = extract_formatted_text(multi_get(item, 'descriptionSnippet', 'descriptionText'))
    info['thumbnail'] = multi_deep_get(item,
        ['thumbnail', 'thumbnails', 0, 'url'],      # videos
        ['thumbnails', 0, 'thumbnails', 0, 'url'],  # playlists
        ['thumbnailRenderer', 'showCustomThumbnailRenderer', 'thumbnail', 'thumbnails', 0, 'url'], # shows
    )
    info['badges'] = []
    for badge_node in multi_get(item, 'badges', 'ownerBadges', default=()):
        badge = deep_get(badge_node, 'metadataBadgeRenderer', 'label')
        if badge:
            info['badges'].append(badge)
    if primary_type in ('video', 'playlist'):
        info['time_published'] = None
        timestamp = re.search(r'(\d+ \w+ ago)',
            extract_str(item.get('publishedTimeText'), default=''))
        if timestamp:
            info['time_published'] = timestamp.group(1)
    if primary_type == 'video':
        info['id'] = item.get('videoId')
        info['view_count'] = extract_int(item.get('viewCountText'))
        # dig into accessibility data to get view_count for videos marked as recommended, and to get time_published
        accessibility_label = multi_deep_get(item,
            ['title', 'accessibility', 'accessibilityData', 'label'],
            ['headline', 'accessibility', 'accessibilityData', 'label'],
            default='')
        timestamp = re.search(r'(\d+ \w+ ago)', accessibility_label)
        if timestamp:
            conservative_update(info, 'time_published', timestamp.group(1))
        view_count = re.search(r'(\d+) views', accessibility_label.replace(',', ''))
        if view_count:
            conservative_update(info, 'view_count', int(view_count.group(1)))
        if info['view_count']:
            info['approx_view_count'] = '{:,}'.format(info['view_count'])
        else:
            info['approx_view_count'] = extract_approx_int(item.get('shortViewCountText'))
        # handle case where it is "No views"
        if not info['approx_view_count']:
            if ('No views' in item.get('shortViewCountText', '')
                    or 'no views' in accessibility_label.lower()):
                info['view_count'] = 0
                info['approx_view_count'] = '0'
        info['duration'] = extract_str(item.get('lengthText'))
        # if it's an item in a playlist, get its index
        if 'index' in item: # url has wrong index on playlist page 
            info['index'] = extract_int(item.get('index'))
        elif 'indexText' in item:
            # Current item in playlist has ▶ instead of the actual index, must
            # dig into url
            match = re.search(r'index=(\d+)', deep_get(item,
                'navigationEndpoint', 'commandMetadata', 'webCommandMetadata',
                'url', default=''))
            if match is None:   # worth a try then
                info['index'] = extract_int(item.get('indexText'))
            else:
                info['index'] = int(match.group(1))
        else:
            info['index'] = None
    elif primary_type in ('playlist', 'radio'):
        info['id'] = item.get('playlistId')
        info['video_count'] = extract_int(item.get('videoCount'))
    elif primary_type == 'channel':
        info['id'] = item.get('channelId')
        info['approx_subscriber_count'] = extract_approx_int(item.get('subscriberCountText'))
    elif primary_type == 'show':
        info['id'] = deep_get(item, 'navigationEndpoint', 'watchEndpoint', 'playlistId')
    if primary_type in ('playlist', 'channel'):
        conservative_update(info, 'video_count', extract_int(item.get('videoCountText')))
    for overlay in item.get('thumbnailOverlays', []):
        conservative_update(info, 'duration', extract_str(deep_get(
            overlay, 'thumbnailOverlayTimeStatusRenderer', 'text'
        )))
        # show renderers don't have videoCountText
        conservative_update(info, 'video_count', extract_int(deep_get(
            overlay, 'thumbnailOverlayBottomPanelRenderer', 'text'
        )))
    return info
 def extract_response(polymer_json):
    '''return response, error'''
    response = multi_deep_get(polymer_json, [1, 'response'], ['response'])
    if response is None:
        return None, 'Failed to extract response'
    else:
        return response, None
 _item_types = {
    'movieRenderer',
    'didYouMeanRenderer',
    'showingResultsForRenderer',
    'videoRenderer',
    'compactVideoRenderer',
    'compactAutoplayRenderer',
    'videoWithContextRenderer',
    'gridVideoRenderer',
    'playlistVideoRenderer',
    'playlistRenderer',
    'compactPlaylistRenderer',
    'gridPlaylistRenderer',
    'radioRenderer',
    'compactRadioRenderer',
    'gridRadioRenderer',
    'showRenderer',
    'compactShowRenderer',
    'gridShowRenderer',
    'channelRenderer',
    'compactChannelRenderer',
    'gridChannelRenderer',
 }
 def _traverse_browse_renderer(renderer):
    for tab in get(renderer, 'tabs', ()):
        tab_renderer = multi_get(tab, 'tabRenderer', 'expandableTabRenderer')
        if tab_renderer is None:
            continue
        if tab_renderer.get('selected', False):
            return get(tab_renderer, 'content', {})
    print('Could not find tab with content')
    return {}
 def _traverse_standard_list(renderer):
    renderer_list = multi_get(renderer, 'contents', 'items', default=())
    continuation = deep_get(renderer, 'continuations', 0, 'nextContinuationData', 'continuation')
    return renderer_list, continuation
 # these renderers contain one inside them
 nested_renderer_dispatch = {
    'singleColumnBrowseResultsRenderer': _traverse_browse_renderer,
    'twoColumnBrowseResultsRenderer': _traverse_browse_renderer,
    'twoColumnSearchResultsRenderer': lambda renderer: get(renderer, 'primaryContents', {}),
 }
 # these renderers contain a list of renderers inside them
 nested_renderer_list_dispatch = {
    'sectionListRenderer': _traverse_standard_list,
    'itemSectionRenderer': _traverse_standard_list,
    'gridRenderer': _traverse_standard_list,
    'playlistVideoListRenderer': _traverse_standard_list,
    'singleColumnWatchNextResults': lambda r: (deep_get(r, 'results', 'results', 'contents', default=[]), None),
 }
 def get_nested_renderer_list_function(key):
    if key in nested_renderer_list_dispatch:
        return nested_renderer_list_dispatch[key]
    elif key.endswith('Continuation'):
        return _traverse_standard_list
    return None
 def extract_items_from_renderer(renderer, item_types=_item_types):
    ctoken = None
    items = []
    iter_stack = collections.deque()
    current_iter = iter(())
    while True:
        # mode 1: get a new renderer by iterating.
        # goes down the stack for an iterator if one has been exhausted
        if not renderer:
            try:
                renderer = current_iter.__next__()
            except StopIteration:
                try:
                    current_iter = iter_stack.pop()
                except IndexError:
                    return items, ctoken
            # Get new renderer or check that the one we got is good before
            # proceeding to mode 2
            continue
        # mode 2: dig into the current renderer
        key, value = list(renderer.items())[0]
        # the renderer is an item
        if key in item_types:
            items.append(renderer)
        # has a list in it, add it to the iter stack
        elif get_nested_renderer_list_function(key):
            renderer_list, cont = get_nested_renderer_list_function(key)(value)
            if renderer_list:
                iter_stack.append(current_iter)
                current_iter = iter(renderer_list)
                if cont:
                    ctoken = cont
        # new renderer nested inside this one
        elif key in nested_renderer_dispatch:
            renderer = nested_renderer_dispatch[key](value)
            continue    # don't reset renderer to None
        renderer = None
 def extract_items(response, item_types=_item_types):
    '''return items, ctoken'''
    if 'continuationContents' in response:
        # sometimes there's another, empty, junk [something]Continuation key
        # find real one
        for key, renderer_cont in get(response,
                'continuationContents', {}).items():
            # e.g. commentSectionContinuation, playlistVideoListContinuation
            if key.endswith('Continuation'):
                items, cont = extract_items_from_renderer({key: renderer_cont},
                    item_types=item_types)
                if items:
                    return items, cont
        return [], None
    elif 'contents' in response:
        renderer = get(response, 'contents', {})
        return extract_items_from_renderer(renderer, item_types=item_types)
    else:
        return [], None
--- a/youtube/yt_data_extract/everything_else.py
+++ b/youtube/yt_data_extract/everything_else.py
@ -0,0 +1,281 @@
 from .common import (get, multi_get, deep_get, multi_deep_get,
    liberal_update, conservative_update, remove_redirect, normalize_url,
    extract_str, extract_formatted_text, extract_int, extract_approx_int,
    extract_date, check_missing_keys, extract_item_info, extract_items,
    extract_response)
 from youtube import proto
 import re
 import urllib
 from math import ceil
 def extract_channel_info(polymer_json, tab):
    response, err = extract_response(polymer_json)
    if err:
        return {'error': err}
    metadata = deep_get(response, 'metadata', 'channelMetadataRenderer',
        default={})
    if not metadata:
        metadata = deep_get(response, 'microformat', 'microformatDataRenderer',
            default={})
    # channel doesn't exist or was terminated
    # example terminated channel: https://www.youtube.com/channel/UCnKJeK_r90jDdIuzHXC0Org
    if not metadata:
        if response.get('alerts'):
            error_string = ' '.join(
                extract_str(deep_get(alert, 'alertRenderer', 'text'), default='')
                for alert in response['alerts']
            )
            if not error_string:
                error_string = 'Failed to extract error'
            return {'error': error_string}
        elif deep_get(response, 'responseContext', 'errors'):
            for error in response['responseContext']['errors'].get('error', []):
                if error.get('code') == 'INVALID_VALUE' and error.get('location') == 'browse_id':
                    return {'error': 'This channel does not exist'}
        return {'error': 'Failure getting metadata'}
    info = {'error': None}
    info['current_tab'] = tab
    info['approx_subscriber_count'] = extract_approx_int(deep_get(response,
        'header', 'c4TabbedHeaderRenderer', 'subscriberCountText'))
    # stuff from microformat (info given by youtube for every page on channel)
    info['short_description'] = metadata.get('description')
    if info['short_description'] and len(info['short_description']) > 730:
        info['short_description'] = info['short_description'][0:730] + '...'
    info['channel_name'] = metadata.get('title')
    info['avatar'] = multi_deep_get(metadata,
        ['avatar', 'thumbnails', 0, 'url'],
        ['thumbnail', 'thumbnails', 0, 'url'],
    )
    channel_url = multi_get(metadata, 'urlCanonical', 'channelUrl')
    if channel_url:
        channel_id = get(channel_url.rstrip('/').split('/'), -1)
        info['channel_id'] = channel_id
    else:
        info['channel_id'] = metadata.get('externalId')
    if info['channel_id']:
        info['channel_url'] = 'https://www.youtube.com/channel/' + channel_id
    else:
        info['channel_url'] = None
    # get items
    info['items'] = []
    # empty channel
    if 'contents' not in response and 'continuationContents' not in response:
        return info
    if tab in ('videos', 'playlists', 'search'):
        items, ctoken = extract_items(response)
        additional_info = {'author': info['channel_name'], 'author_url': info['channel_url']}
        info['items'] = [extract_item_info(renderer, additional_info) for renderer in items]
        if tab == 'search':
            info['is_last_page'] = (ctoken is None)
    elif tab == 'about':
        items, _ = extract_items(response, item_types={'channelAboutFullMetadataRenderer'})
        if not items:
            info['error'] = 'Could not find channelAboutFullMetadataRenderer'
            return info
        channel_metadata = items[0]['channelAboutFullMetadataRenderer']
        info['links'] = []
        for link_json in channel_metadata.get('primaryLinks', ()):
            url = remove_redirect(deep_get(link_json, 'navigationEndpoint', 'urlEndpoint', 'url'))
            text = extract_str(link_json.get('title'))
            info['links'].append( (text, url) )
        info['date_joined'] = extract_date(channel_metadata.get('joinedDateText'))
        info['view_count'] = extract_int(channel_metadata.get('viewCountText'))
        info['description'] = extract_str(channel_metadata.get('description'), default='')
    else:
        raise NotImplementedError('Unknown or unsupported channel tab: ' + tab)
    return info
 def extract_search_info(polymer_json):
    response, err = extract_response(polymer_json)
    if err:
        return {'error': err}
    info = {'error': None}
    info['estimated_results'] = int(response['estimatedResults'])
    info['estimated_pages'] = ceil(info['estimated_results']/20)
    results, _ = extract_items(response)
    info['items'] = []
    info['corrections'] = {'type': None}
    for renderer in results:
        type = list(renderer.keys())[0]
        if type == 'shelfRenderer':
            continue
        if type == 'didYouMeanRenderer':
            renderer = renderer[type]
            info['corrections'] = {
                'type': 'did_you_mean',
                'corrected_query': renderer['correctedQueryEndpoint']['searchEndpoint']['query'],
                'corrected_query_text': renderer['correctedQuery']['runs'],
            }
            continue
        if type == 'showingResultsForRenderer':
            renderer = renderer[type]
            info['corrections'] = {
                'type': 'showing_results_for',
                'corrected_query_text': renderer['correctedQuery']['runs'],
                'original_query_text': renderer['originalQuery']['simpleText'],
            }
            continue
        i_info = extract_item_info(renderer)
        if i_info.get('type') != 'unsupported':
            info['items'].append(i_info)
    return info
 def extract_playlist_metadata(polymer_json):
    response, err = extract_response(polymer_json)
    if err:
        return {'error': err}
    metadata = {'error': None}
    header = deep_get(response, 'header', 'playlistHeaderRenderer', default={})
    metadata['title'] = extract_str(header.get('title'))
    metadata['first_video_id'] = deep_get(header, 'playEndpoint', 'watchEndpoint', 'videoId')
    first_id = re.search(r'([a-z_\-]{11})', deep_get(header,
        'thumbnail', 'thumbnails', 0, 'url', default=''))
    if first_id:
        conservative_update(metadata, 'first_video_id', first_id.group(1))
    if metadata['first_video_id'] is None:
        metadata['thumbnail'] = None
    else:
        metadata['thumbnail'] = 'https://i.ytimg.com/vi/' + metadata['first_video_id'] + '/mqdefault.jpg'
    metadata['video_count'] = extract_int(header.get('numVideosText'))
    metadata['description'] = extract_str(header.get('descriptionText'), default='')
    metadata['author'] = extract_str(header.get('ownerText'))
    metadata['author_id'] = multi_deep_get(header, 
        ['ownerText', 'runs', 0, 'navigationEndpoint', 'browseEndpoint', 'browseId'],
        ['ownerEndpoint', 'browseEndpoint', 'browseId'])
    if metadata['author_id']:
        metadata['author_url'] = 'https://www.youtube.com/channel/' + metadata['author_id']
    else:
        metadata['author_url'] = None
    metadata['view_count'] = extract_int(header.get('viewCountText'))
    metadata['like_count'] = extract_int(header.get('likesCountWithoutLikeText'))
    for stat in header.get('stats', ()):
        text = extract_str(stat)
        if 'videos' in text:
            conservative_update(metadata, 'video_count', extract_int(text))
        elif 'views' in text:
            conservative_update(metadata, 'view_count', extract_int(text))
        elif 'updated' in text:
            metadata['time_published'] = extract_date(text)
    return metadata
 def extract_playlist_info(polymer_json):
    response, err = extract_response(polymer_json)
    if err:
        return {'error': err}
    info = {'error': None}
    first_page = 'continuationContents' not in response
    video_list, _ = extract_items(response)
    info['items'] = [extract_item_info(renderer) for renderer in video_list]
    if first_page:
        info['metadata'] = extract_playlist_metadata(polymer_json)
    return info
 def _ctoken_metadata(ctoken):
    result = dict()
    params = proto.parse(proto.b64_to_bytes(ctoken))
    result['video_id'] = proto.parse(params[2])[2].decode('ascii')
    offset_information = proto.parse(params[6])
    result['offset'] = offset_information.get(5, 0)
    result['is_replies'] = False
    if (3 in offset_information) and (2 in proto.parse(offset_information[3])):
        result['is_replies'] = True
        result['sort'] = None
    else:
        try:
            result['sort'] = proto.parse(offset_information[4])[6]
        except KeyError:
            result['sort'] = 0
    return result
 def extract_comments_info(polymer_json):
    response, err = extract_response(polymer_json)
    if err:
        return {'error': err}
    info = {'error': None}
    url = multi_deep_get(polymer_json, [1, 'url'], ['url'])
    if url:
        ctoken = urllib.parse.parse_qs(url[url.find('?')+1:])['ctoken'][0]
        metadata = _ctoken_metadata(ctoken)
    else:
        metadata = {}
    info['video_id'] = metadata.get('video_id')
    info['offset'] = metadata.get('offset')
    info['is_replies'] = metadata.get('is_replies')
    info['sort'] = metadata.get('sort')
    info['video_title'] = None
    comments, ctoken = extract_items(response,
        item_types={'commentThreadRenderer', 'commentRenderer'})
    info['comments'] = []
    info['ctoken'] = ctoken
    for comment in comments:
        comment_info = {}
        if 'commentThreadRenderer' in comment:  # top level comments
            conservative_update(info, 'is_replies', False)
            comment_thread  = comment['commentThreadRenderer']
            info['video_title'] = extract_str(comment_thread.get('commentTargetTitle'))
            if 'replies' not in comment_thread:
                comment_info['reply_count'] = 0
            else:
                comment_info['reply_count'] = extract_int(deep_get(comment_thread,
                    'replies', 'commentRepliesRenderer', 'moreText'
                ), default=1)   # With 1 reply, the text reads "View reply"
            comment_renderer = deep_get(comment_thread, 'comment', 'commentRenderer', default={})
        elif 'commentRenderer' in comment:  # replies
            comment_info['reply_count'] = 0     # replyCount, below, not present for replies even if the reply has further replies to it
            conservative_update(info, 'is_replies', True)
            comment_renderer = comment['commentRenderer']
        else:
            comment_renderer = {}
        # These 3 are sometimes absent, likely because the channel was deleted
        comment_info['author'] = extract_str(comment_renderer.get('authorText'))
        comment_info['author_url'] = deep_get(comment_renderer,
            'authorEndpoint', 'commandMetadata', 'webCommandMetadata', 'url')
        comment_info['author_id'] = deep_get(comment_renderer,
            'authorEndpoint', 'browseEndpoint', 'browseId')
        comment_info['author_avatar'] = deep_get(comment_renderer,
            'authorThumbnail', 'thumbnails', 0, 'url')
        comment_info['id'] = comment_renderer.get('commentId')
        comment_info['text'] = extract_formatted_text(comment_renderer.get('contentText'))
        comment_info['time_published'] = extract_str(comment_renderer.get('publishedTimeText'))
        comment_info['like_count'] = comment_renderer.get('likeCount')
        liberal_update(comment_info, 'reply_count', comment_renderer.get('replyCount'))
        info['comments'].append(comment_info)
    return info
--- a/youtube/yt_data_extract/watch_extraction.py
+++ b/youtube/yt_data_extract/watch_extraction.py
@ -0,0 +1,689 @@
 from .common import (get, multi_get, deep_get, multi_deep_get,
    liberal_update, conservative_update, remove_redirect, normalize_url,
    extract_str, extract_formatted_text, extract_int, extract_approx_int,
    extract_date, check_missing_keys, extract_item_info, extract_items,
    extract_response, concat_or_none)
 import json
 import urllib.parse
 import traceback
 import re
 # from https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/youtube.py
 _formats = {
    '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'audio_bitrate': 64, 'vcodec': 'h263'},
    '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'audio_bitrate': 64, 'vcodec': 'h263'},
    '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
    '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'audio_bitrate': 24, 'vcodec': 'mp4v'},
    '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'audio_bitrate': 96, 'vcodec': 'h264'},
    '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'audio_bitrate': 192, 'vcodec': 'h264'},
    '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'audio_bitrate': 128, 'vcodec': 'h264'},
    '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'audio_bitrate': 128, 'vcodec': 'h264'},
    # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), audio_bitrate varies as well
    '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
    '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'audio_bitrate': 192, 'vcodec': 'h264'},
    '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'audio_bitrate': 192, 'vcodec': 'h264'},
    '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'audio_bitrate': 128, 'vcodec': 'vp8'},
    '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'audio_bitrate': 128, 'vcodec': 'vp8'},
    '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'audio_bitrate': 192, 'vcodec': 'vp8'},
    '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'audio_bitrate': 192, 'vcodec': 'vp8'},
    '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'audio_bitrate': 128, 'vcodec': 'h264'},
    '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'audio_bitrate': 128, 'vcodec': 'h264'},
    # 3D videos
    '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'audio_bitrate': 128, 'vcodec': 'h264'},
    '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'audio_bitrate': 128, 'vcodec': 'h264'},
    '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'audio_bitrate': 192, 'vcodec': 'h264'},
    '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'audio_bitrate': 192, 'vcodec': 'h264'},
    '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'audio_bitrate': 128, 'vcodec': 'vp8'},
    '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'audio_bitrate': 192, 'vcodec': 'vp8'},
    '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'audio_bitrate': 192, 'vcodec': 'vp8'},
    # Apple HTTP Live Streaming
    '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'audio_bitrate': 48, 'vcodec': 'h264'},
    '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'audio_bitrate': 48, 'vcodec': 'h264'},
    '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'audio_bitrate': 128, 'vcodec': 'h264'},
    '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'audio_bitrate': 128, 'vcodec': 'h264'},
    '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'audio_bitrate': 256, 'vcodec': 'h264'},
    '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'audio_bitrate': 256, 'vcodec': 'h264'},
    '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'audio_bitrate': 48, 'vcodec': 'h264'},
    '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'audio_bitrate': 24, 'vcodec': 'h264'},
    # DASH mp4 video
    '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
    '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
    '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
    '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
    '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
    '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'},  # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
    '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
    '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
    '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
    '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
    '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
    '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
    # Dash mp4 audio
    '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'audio_bitrate': 48, 'container': 'm4a_dash'},
    '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'audio_bitrate': 128, 'container': 'm4a_dash'},
    '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'audio_bitrate': 256, 'container': 'm4a_dash'},
    '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
    '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
    '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
    '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
    # Dash webm
    '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
    '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
    '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
    '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
    '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
    '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
    '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
    '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
    '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
    '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
    '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
    '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
    '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
    '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
    '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
    # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
    '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
    '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
    '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
    '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
    '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
    '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
    # Dash webm audio
    '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'audio_bitrate': 128},
    '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'audio_bitrate': 256},
    # Dash webm audio with opus inside
    '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'audio_bitrate': 50},
    '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'audio_bitrate': 70},
    '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'audio_bitrate': 160},
    # RTMP (unnamed)
    '_rtmp': {'protocol': 'rtmp'},
    # av01 video only formats sometimes served with "unknown" codecs
    '394': {'vcodec': 'av01.0.05M.08'},
    '395': {'vcodec': 'av01.0.05M.08'},
    '396': {'vcodec': 'av01.0.05M.08'},
    '397': {'vcodec': 'av01.0.05M.08'},
 }
 def _extract_metadata_row_info(video_renderer_info):
    # extract category and music list
    info = {
        'category': None,
        'music_list': [],
    }
    current_song = {}
    for row in deep_get(video_renderer_info, 'metadataRowContainer', 'metadataRowContainerRenderer', 'rows', default=[]):
        row_title = extract_str(deep_get(row, 'metadataRowRenderer', 'title'), default='')
        row_content = extract_str(deep_get(row, 'metadataRowRenderer', 'contents', 0))
        if row_title == 'Category':
            info['category'] = row_content
        elif row_title in ('Song', 'Music'):
            if current_song:
                info['music_list'].append(current_song)
            current_song = {'title': row_content}
        elif row_title == 'Artist':
            current_song['artist'] = row_content
        elif row_title == 'Album':
            current_song['album'] = row_content
        elif row_title == 'Writers':
            current_song['writers'] = row_content
        elif row_title.startswith('Licensed'):
            current_song['licensor'] = row_content
    if current_song:
        info['music_list'].append(current_song)
    return info
 def _extract_watch_info_mobile(top_level):
    info = {}
    microformat = deep_get(top_level, 'playerResponse', 'microformat', 'playerMicroformatRenderer', default={})
    family_safe = microformat.get('isFamilySafe')
    if family_safe is None:
        info['age_restricted'] = None
    else:
        info['age_restricted'] = not family_safe
    info['allowed_countries'] = microformat.get('availableCountries', [])
    info['time_published'] = microformat.get('publishDate')
    response = top_level.get('response', {})
    # this renderer has the stuff visible on the page
    # check for playlist
    items, _ = extract_items(response,
        item_types={'singleColumnWatchNextResults'})
    if items:
        watch_next_results = items[0]['singleColumnWatchNextResults']
        playlist = deep_get(watch_next_results, 'playlist', 'playlist')
        if playlist is None:
            info['playlist'] = None
        else:
            info['playlist'] = {}
            info['playlist']['title'] = playlist.get('title')
            info['playlist']['author'] = extract_str(multi_get(playlist, 
                'ownerName', 'longBylineText', 'shortBylineText', 'ownerText'))
            author_id = deep_get(playlist, 'longBylineText', 'runs', 0,
                'navigationEndpoint', 'browseEndpoint', 'browseId')
            info['playlist']['author_id'] = author_id
            if author_id:
                info['playlist']['author_url'] = concat_or_none(
                    'https://www.youtube.com/channel/', author_id)
            info['playlist']['id'] = playlist.get('playlistId')
            info['playlist']['url'] = concat_or_none(
                'https://www.youtube.com/playlist?list=',
                info['playlist']['id'])
            info['playlist']['video_count'] = playlist.get('totalVideos')
            info['playlist']['current_index'] = playlist.get('currentIndex')
            info['playlist']['items'] = [
                extract_item_info(i) for i in playlist.get('contents', ())]
    else:
        info['playlist'] = None
    # Holds the visible video info. It is inside singleColumnWatchNextResults
    # but use our convenience function instead
    items, _ = extract_items(response, item_types={'slimVideoMetadataRenderer'})
    if items:
        video_info = items[0]['slimVideoMetadataRenderer']
    else:
        print('Failed to extract video metadata')
        video_info = {}
    info.update(_extract_metadata_row_info(video_info))
    info['description'] = extract_str(video_info.get('description'), recover_urls=True)
    info['view_count'] = extract_int(extract_str(video_info.get('expandedSubtitle')))
    info['author'] = extract_str(deep_get(video_info, 'owner', 'slimOwnerRenderer', 'title'))
    info['author_id'] = deep_get(video_info, 'owner', 'slimOwnerRenderer', 'navigationEndpoint', 'browseEndpoint', 'browseId')
    info['title'] = extract_str(video_info.get('title'))
    info['live'] = 'watching' in extract_str(video_info.get('expandedSubtitle'), default='')
    info['unlisted'] = False
    for badge in video_info.get('badges', []):
        if deep_get(badge, 'metadataBadgeRenderer', 'label') == 'Unlisted':
            info['unlisted'] = True
    info['like_count'] = None
    info['dislike_count'] = None
    if not info['time_published']:
        info['time_published'] = extract_date(extract_str(video_info.get('dateText', None)))
    for button in video_info.get('buttons', ()):
        button_renderer = button.get('slimMetadataToggleButtonRenderer', {})
        # all the digits can be found in the accessibility data
        count = extract_int(deep_get(button_renderer, 'button', 'toggleButtonRenderer', 'defaultText', 'accessibility', 'accessibilityData', 'label'))
        # this count doesn't have all the digits, it's like 53K for instance
        dumb_count = extract_int(extract_str(deep_get(button_renderer, 'button', 'toggleButtonRenderer', 'defaultText')))
        # the accessibility text will be "No likes" or "No dislikes" or something like that, but dumb count will be 0
        if dumb_count == 0:
            count = 0
        if 'isLike' in button_renderer:
            info['like_count'] = count
        elif 'isDislike' in button_renderer:
            info['dislike_count'] = count
    # comment section info
    items, _ = extract_items(response, item_types={
        'commentSectionRenderer', 'commentsEntryPointHeaderRenderer'})
    if items:
        header_type = list(items[0])[0]
        comment_info = items[0][header_type]
        # This seems to be some kind of A/B test being done on mobile, where
        # this is present instead of the normal commentSectionRenderer. It can
        # be seen here:
        # https://www.androidpolice.com/2019/10/31/google-youtube-app-comment-section-below-videos/
        # https://www.youtube.com/watch?v=bR5Q-wD-6qo
        if header_type == 'commentsEntryPointHeaderRenderer':
            comment_count_text = extract_str(comment_info.get('headerText'))
        else:
            comment_count_text = extract_str(deep_get(comment_info,
                'header', 'commentSectionHeaderRenderer', 'countText'))
        if comment_count_text == 'Comments':    # just this with no number, means 0 comments
            info['comment_count'] = 0
        else:
            info['comment_count'] = extract_int(comment_count_text)
        info['comments_disabled'] = False
    else:   # no comment section present means comments are disabled
        info['comment_count'] = 0
        info['comments_disabled'] = True
    # check for limited state
    items, _ = extract_items(response, item_types={'limitedStateMessageRenderer'})
    if items:
        info['limited_state'] = True
    else:
        info['limited_state'] = False
    # related videos
    related, _ = extract_items(response)
    info['related_videos'] = [extract_item_info(renderer) for renderer in related]
    return info
 def _extract_watch_info_desktop(top_level):
    info = {
        'comment_count': None,
        'comments_disabled': None,
        'allowed_countries': [],
        'limited_state': None,
        'playlist': None,
    }
    video_info = {}
    for renderer in deep_get(top_level, 'response', 'contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', default=()):
        if renderer and list(renderer.keys())[0] in ('videoPrimaryInfoRenderer', 'videoSecondaryInfoRenderer'):
            video_info.update(list(renderer.values())[0])
    info.update(_extract_metadata_row_info(video_info))
    info['description'] = extract_str(video_info.get('description', None), recover_urls=True)
    info['time_published'] = extract_date(extract_str(video_info.get('dateText', None)))
    likes_dislikes = deep_get(video_info, 'sentimentBar', 'sentimentBarRenderer', 'tooltip', default='').split('/')
    if len(likes_dislikes) == 2:
        info['like_count'] = extract_int(likes_dislikes[0])
        info['dislike_count'] = extract_int(likes_dislikes[1])
    else:
        info['like_count'] = None
        info['dislike_count'] = None
    info['title'] = extract_str(video_info.get('title', None))
    info['author'] = extract_str(deep_get(video_info, 'owner', 'videoOwnerRenderer', 'title'))
    info['author_id'] = deep_get(video_info, 'owner', 'videoOwnerRenderer', 'navigationEndpoint', 'browseEndpoint', 'browseId')
    info['view_count'] = extract_int(extract_str(deep_get(video_info, 'viewCount', 'videoViewCountRenderer', 'viewCount')))
    related = deep_get(top_level, 'response', 'contents', 'twoColumnWatchNextResults', 'secondaryResults', 'secondaryResults', 'results', default=[])
    info['related_videos'] = [extract_item_info(renderer) for renderer in related]
    return info
 def update_format_with_codec_info(fmt, codec):
    if (codec.startswith('av')
            or codec in ('vp9', 'vp8', 'vp8.0', 'h263', 'h264', 'mp4v')):
        if codec == 'vp8.0':
            codec = 'vp8'
        conservative_update(fmt, 'vcodec', codec)
    elif (codec.startswith('mp4a')
            or codec in ('opus', 'mp3', 'aac', 'dtse', 'ec-3', 'vorbis')):
        conservative_update(fmt, 'acodec', codec)
    else:
        print('Warning: unrecognized codec: ' + codec)
 fmt_type_re = re.compile(
    r'(text|audio|video)/([\w0-9]+); codecs="([\w0-9\.]+(?:, [\w0-9\.]+)*)"')
 def update_format_with_type_info(fmt, yt_fmt):
    # 'type' for invidious api format
    mime_type = multi_get(yt_fmt, 'mimeType', 'type')
    if mime_type is None:
        return
    match = re.fullmatch(fmt_type_re, mime_type)
    type, fmt['ext'], codecs = match.groups()
    codecs = codecs.split(', ')
    for codec in codecs:
        update_format_with_codec_info(fmt, codec)
    if type == 'audio':
        assert len(codecs) == 1
 def _extract_formats(info, player_response):
    streaming_data = player_response.get('streamingData', {})
    yt_formats = streaming_data.get('formats', []) + streaming_data.get('adaptiveFormats', [])
    info['formats'] = []
    # because we may retry the extract_formats with a different player_response
    # so keep what we have
    conservative_update(info, 'hls_manifest_url',
        streaming_data.get('hlsManifestUrl'))
    conservative_update(info, 'dash_manifest_url',
        streaming_data.get('dash_manifest_url'))
    for yt_fmt in yt_formats:
        itag = yt_fmt.get('itag')
        fmt = {}
        fmt['itag'] = itag
        fmt['ext'] = None
        fmt['audio_bitrate'] = None
        fmt['acodec'] = None
        fmt['vcodec'] = None
        fmt['width'] = yt_fmt.get('width')
        fmt['height'] = yt_fmt.get('height')
        fmt['file_size'] = yt_fmt.get('contentLength')
        fmt['audio_sample_rate'] = yt_fmt.get('audioSampleRate')
        fmt['fps'] = yt_fmt.get('fps')
        update_format_with_type_info(fmt, yt_fmt)
        cipher = dict(urllib.parse.parse_qsl(multi_get(yt_fmt,
            'cipher', 'signatureCipher', default='')))
        if cipher:
            fmt['url'] = cipher.get('url')
        else:
            fmt['url'] = yt_fmt.get('url')
        fmt['s'] = cipher.get('s')
        fmt['sp'] = cipher.get('sp')
        # update with information from big table
        hardcoded_itag_info = _formats.get(str(itag), {})
        for key, value in hardcoded_itag_info.items():
            conservative_update(fmt, key, value) # prefer info from Youtube
        fmt['quality'] = hardcoded_itag_info.get('height')
        info['formats'].append(fmt)
    # get ip address
    if info['formats']:
        query_string = (info['formats'][0].get('url') or '?').split('?')[1]
        info['ip_address'] = deep_get(
            urllib.parse.parse_qs(query_string), 'ip', 0)
    else:
        info['ip_address'] = None
 hls_regex = re.compile(r'[\w_-]+=(?:"[^"]+"|[^",]+),')
 def extract_hls_formats(hls_manifest):
    '''returns hls_formats, err'''
    hls_formats = []
    try:
        lines = hls_manifest.splitlines()
        i = 0
        while i < len(lines):
            if lines[i].startswith('#EXT-X-STREAM-INF'):
                fmt = {'acodec': None, 'vcodec': None, 'height': None,
                    'width': None, 'fps': None, 'audio_bitrate': None,
                    'itag': None, 'file_size': None,
                    'audio_sample_rate': None, 'url': None}
                properties = lines[i].split(':')[1]
                properties += ',' # make regex work for last key-value pair
                for pair in hls_regex.findall(properties):
                    key, value = pair.rstrip(',').split('=')
                    if key == 'CODECS':
                        for codec in value.strip('"').split(','):
                            update_format_with_codec_info(fmt, codec)
                    elif key == 'RESOLUTION':
                        fmt['width'], fmt['height'] = map(int, value.split('x'))
                        fmt['resolution'] = value
                    elif key == 'FRAME-RATE':
                        fmt['fps'] = int(value)
                i += 1
                fmt['url'] = lines[i]
                assert fmt['url'].startswith('http')
                fmt['ext'] = 'm3u8'
                hls_formats.append(fmt)
            i += 1
    except Exception as e:
        traceback.print_exc()
        return [], str(e)
    return hls_formats, None
 def _extract_playability_error(info, player_response, error_prefix=''):
    if info['formats']:
        info['playability_status'] = None
        info['playability_error'] = None
        return
    playability_status = deep_get(player_response, 'playabilityStatus', 'status', default=None)
    info['playability_status'] = playability_status
    playability_reason = extract_str(multi_deep_get(player_response,
        ['playabilityStatus', 'reason'],
        ['playabilityStatus', 'errorScreen', 'playerErrorMessageRenderer', 'reason'],
        default='Could not find playability error')
    )
    if playability_status not in (None, 'OK'):
        info['playability_error'] = error_prefix + playability_reason
    elif not info['playability_error']: # do not override
        info['playability_error'] = error_prefix + 'Unknown playability error'
 SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt')
 def extract_watch_info(polymer_json):
    info = {'playability_error': None, 'error': None}
    if isinstance(polymer_json, dict):
        top_level = polymer_json
    elif isinstance(polymer_json, (list, tuple)):
        top_level = {}
        for page_part in polymer_json:
            if not isinstance(page_part, dict):
                return {'error': 'Invalid page part'}
            top_level.update(page_part)
    else:
        return {'error': 'Invalid top level polymer data'}
    error = check_missing_keys(top_level,
        ['player', 'args'],
        ['player', 'assets', 'js'],
        ['playerResponse'],
    )
    if error:
        info['playability_error'] = error
    player_response = top_level.get('playerResponse', {})
    # usually, only the embedded one has the urls
    player_args = deep_get(top_level, 'player', 'args', default={})
    if 'player_response' in player_args:
        embedded_player_response = json.loads(player_args['player_response'])
    else:
        embedded_player_response = {}
    # captions
    info['automatic_caption_languages'] = []
    info['manual_caption_languages'] = []
    info['_manual_caption_language_names'] = {}     # language name written in that language, needed in some cases to create the url
    info['translation_languages'] = []
    captions_info = player_response.get('captions', {})
    info['_captions_base_url'] = normalize_url(deep_get(captions_info, 'playerCaptionsRenderer', 'baseUrl'))
    for caption_track in deep_get(captions_info, 'playerCaptionsTracklistRenderer', 'captionTracks', default=()):
        lang_code = caption_track.get('languageCode')
        if not lang_code:
            continue
        if caption_track.get('kind') == 'asr':
            info['automatic_caption_languages'].append(lang_code)
        else:
            info['manual_caption_languages'].append(lang_code)
        base_url = caption_track.get('baseUrl', '')
        lang_name = deep_get(urllib.parse.parse_qs(urllib.parse.urlparse(base_url).query), 'name', 0)
        if lang_name:
            info['_manual_caption_language_names'][lang_code] = lang_name
    for translation_lang_info in deep_get(captions_info, 'playerCaptionsTracklistRenderer', 'translationLanguages', default=()):
        lang_code = translation_lang_info.get('languageCode')
        if lang_code:
            info['translation_languages'].append(lang_code)
        if translation_lang_info.get('isTranslatable') == False:
            print('WARNING: Found non-translatable caption language')
    # formats
    _extract_formats(info, embedded_player_response)
    if not info['formats']:
        _extract_formats(info, player_response)
    # playability errors
    _extract_playability_error(info, player_response)
    # check age-restriction
    info['age_restricted'] = (info['playability_status'] == 'LOGIN_REQUIRED' and info['playability_error'] and ' age' in info['playability_error'])
    # base_js (for decryption of signatures)
    info['base_js'] = deep_get(top_level, 'player', 'assets', 'js')
    if info['base_js']:
        info['base_js'] = normalize_url(info['base_js'])
        # must uniquely identify url
        info['player_name'] = urllib.parse.urlparse(info['base_js']).path
    else:
        info['player_name'] = None
    # extract stuff from visible parts of page
    mobile = 'singleColumnWatchNextResults' in deep_get(top_level, 'response', 'contents', default={})
    if mobile:
        info.update(_extract_watch_info_mobile(top_level))
    else:
        info.update(_extract_watch_info_desktop(top_level))
    # stuff from videoDetails. Use liberal_update to prioritize info from videoDetails over existing info
    vd = deep_get(top_level, 'playerResponse', 'videoDetails', default={})
    liberal_update(info, 'title',      extract_str(vd.get('title')))
    liberal_update(info, 'duration',   extract_int(vd.get('lengthSeconds')))
    liberal_update(info, 'view_count', extract_int(vd.get('viewCount')))
    # videos with no description have a blank string
    liberal_update(info, 'description', vd.get('shortDescription'))
    liberal_update(info, 'id',          vd.get('videoId'))
    liberal_update(info, 'author',      vd.get('author'))
    liberal_update(info, 'author_id',   vd.get('channelId'))
    info['was_live'] =                  vd.get('isLiveContent')
    conservative_update(info, 'unlisted', not vd.get('isCrawlable', True))  #isCrawlable is false on limited state videos even if they aren't unlisted
    liberal_update(info, 'tags',        vd.get('keywords', []))
    # fallback stuff from microformat
    mf = deep_get(top_level, 'playerResponse', 'microformat', 'playerMicroformatRenderer', default={})
    conservative_update(info, 'title',      extract_str(mf.get('title')))
    conservative_update(info, 'duration', extract_int(mf.get('lengthSeconds')))
    # this gives the view count for limited state videos
    conservative_update(info, 'view_count', extract_int(mf.get('viewCount')))
    conservative_update(info, 'description', extract_str(mf.get('description'), recover_urls=True))
    conservative_update(info, 'author', mf.get('ownerChannelName'))
    conservative_update(info, 'author_id', mf.get('externalChannelId'))
    conservative_update(info, 'live', deep_get(mf, 'liveBroadcastDetails',
        'isLiveNow'))
    liberal_update(info, 'unlisted', mf.get('isUnlisted'))
    liberal_update(info, 'category', mf.get('category'))
    liberal_update(info, 'time_published', mf.get('publishDate'))
    liberal_update(info, 'time_uploaded', mf.get('uploadDate'))
    # other stuff
    info['author_url'] = 'https://www.youtube.com/channel/' + info['author_id'] if info['author_id'] else None
    return info
 def get_caption_url(info, language, format, automatic=False, translation_language=None):
    '''Gets the url for captions with the given language and format. If automatic is True, get the automatic captions for that language. If translation_language is given, translate the captions from `language` to `translation_language`. If automatic is true and translation_language is given, the automatic captions will be translated.'''
    url = info['_captions_base_url']
    url += '&lang=' + language
    url += '&fmt=' + format
    if automatic:
        url += '&kind=asr'
    elif language in info['_manual_caption_language_names']:
        url += '&name=' + urllib.parse.quote(info['_manual_caption_language_names'][language], safe='')
    if translation_language:
        url += '&tlang=' + translation_language
    return url
 def update_with_age_restricted_info(info, video_info_page):
    ERROR_PREFIX = 'Error bypassing age-restriction: '
    video_info = urllib.parse.parse_qs(video_info_page)
    player_response = deep_get(video_info, 'player_response', 0)
    if player_response is None:
        info['playability_error'] = ERROR_PREFIX + 'Could not find player_response in video_info_page'
        return
    try:
        player_response = json.loads(player_response)
    except json.decoder.JSONDecodeError:
        traceback.print_exc()
        info['playability_error'] = ERROR_PREFIX + 'Failed to parse json response'
        return
    _extract_formats(info, player_response)
    _extract_playability_error(info, player_response, error_prefix=ERROR_PREFIX)
 def requires_decryption(info):
    return ('formats' in info) and info['formats'] and info['formats'][0]['s']
 # adapted from youtube-dl and invidious:
 # https://github.com/omarroth/invidious/blob/master/src/invidious/helpers/signatures.cr
 decrypt_function_re = re.compile(r'function\(a\)\{(a=a\.split\(""\)[^\}{]+)return a\.join\(""\)\}')
 op_with_arg_re = re.compile(r'[^\.]+\.([^\(]+)\(a,(\d+)\)')
 def extract_decryption_function(info, base_js):
    '''Insert decryption function into info. Return error string if not successful.
    Decryption function is a list of list[2] of numbers.
    It is advisable to cache the decryption function (uniquely identified by info['player_name']) so base.js (1 MB) doesn't need to be redownloaded each time'''
    info['decryption_function'] = None
    decrypt_function_match = decrypt_function_re.search(base_js)
    if decrypt_function_match is None:
        return 'Could not find decryption function in base.js'
    function_body = decrypt_function_match.group(1).split(';')[1:-1]
    if not function_body:
        return 'Empty decryption function body'
    var_name = get(function_body[0].split('.'), 0)
    if var_name is None:
        return 'Could not find var_name'
    var_body_match = re.search(r'var ' + re.escape(var_name) + r'=\{(.*?)\};', base_js, flags=re.DOTALL)
    if var_body_match is None:
        return 'Could not find var_body'
    operations = var_body_match.group(1).replace('\n', '').split('},')
    if not operations:
        return 'Did not find any definitions in var_body'
    operations[-1] = operations[-1][:-1]    # remove the trailing '}' since we split by '},' on the others
    operation_definitions = {}
    for op in operations:
        colon_index = op.find(':')
        opening_brace_index = op.find('{')
        if colon_index == -1 or opening_brace_index == -1:
            return 'Could not parse operation'
        op_name = op[:colon_index]
        op_body = op[opening_brace_index+1:]
        if op_body == 'a.reverse()':
            operation_definitions[op_name] = 0
        elif op_body == 'a.splice(0,b)':
            operation_definitions[op_name] = 1
        elif op_body.startswith('var c=a[0]'):
            operation_definitions[op_name] = 2
        else:
            return 'Unknown op_body: ' + op_body
    decryption_function = []
    for op_with_arg in function_body:
        match = op_with_arg_re.fullmatch(op_with_arg)
        if match is None:
            return 'Could not parse operation with arg'
        op_name = match.group(1)
        if op_name not in operation_definitions:
            return 'Unknown op_name: ' + op_name
        op_argument = match.group(2)
        decryption_function.append([operation_definitions[op_name], int(op_argument)])
    info['decryption_function'] = decryption_function
    return False
 def _operation_2(a, b):
    c = a[0]
    a[0] = a[b % len(a)]
    a[b % len(a)] = c
 def decrypt_signatures(info):
    '''Applies info['decryption_function'] to decrypt all the signatures. Return err.'''
    if not info.get('decryption_function'):
        return 'decryption_function not in info'
    for format in info['formats']:
        if not format['s'] or not format['sp'] or not format['url']:
            print('Warning: s, sp, or url not in format')
            continue
        a = list(format['s'])
        for op, argument in info['decryption_function']:
            if op == 0:
                a.reverse()
            elif op == 1:
                a = a[argument:]
            else:
                _operation_2(a, argument)
        signature = ''.join(a)
        format['url'] += '&' + format['sp'] + '=' + signature
    return False
--- a/youtube_data/channels.py
+++ b/youtube_data/channels.py
@ -1,4 +1,4 @@
-from youtube_data import proto, utils
+from youtube_data import proto
 from flask import Markup as mk
 import requests
 import base64
--- a/youtube_data/comments.py
+++ b/youtube_data/comments.py
@ -1,130 +0,0 @@
 from youtube_data import proto
 import json
 import base64
 import urllib
 import requests
 import re
 import bleach
 from flask import Markup
 URL_ORIGIN = "/https://www.youtube.com"
 def make_comment_ctoken(video_id, sort=0, offset=0, lc='', secret_key=''):
    video_id = proto.as_bytes(video_id)
    secret_key = proto.as_bytes(secret_key)
    page_info = proto.string(4,video_id) + proto.uint(6, sort)
    offset_information = proto.nested(4, page_info) + proto.uint(5, offset)
    if secret_key:
        offset_information = proto.string(1, secret_key) + offset_information
    page_params = proto.string(2, video_id)
    if lc:
        page_params += proto.string(6, proto.percent_b64encode(proto.string(15, lc)))
    result = proto.nested(2, page_params) + proto.uint(3,6) + proto.nested(6, offset_information)
    return base64.urlsafe_b64encode(result).decode('ascii')
 def comment_replies_ctoken(video_id, comment_id, max_results=500):  
    params = proto.string(2, comment_id) + proto.uint(9, max_results)
    params = proto.nested(3, params)
    result = proto.nested(2, proto.string(2, video_id)) + proto.uint(3,6) + proto.nested(6, params)
    return base64.urlsafe_b64encode(result).decode('ascii')
 mobile_headers = {
    'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E304 Safari/602.1',
    'Accept': '*/*',
    'Accept-Language': 'en-US,en;q=0.5',
    'X-YouTube-Client-Name': '2',
    'X-YouTube-Client-Version': '2.20180823',
 }
 def request_comments(ctoken, replies=False):
    if replies: # let's make it use different urls for no reason despite all the data being encoded
        base_url = "https://m.youtube.com/watch_comment?action_get_comment_replies=1&ctoken="
    else:
        base_url = "https://m.youtube.com/watch_comment?action_get_comments=1&ctoken="
    url = base_url + ctoken.replace("=", "%3D") + "&pbj=1"
    for i in range(0,8):    # don't retry more than 8 times
        content = requests.get(url, headers=mobile_headers).text
        if content[0:4] == b")]}'":             # random closing characters included at beginning of response for some reason
            content = content[4:]
        elif content[0:10] == b'\n<!DOCTYPE':   # occasionally returns html instead of json for no reason
            content = b''
            print("got <!DOCTYPE>, retrying")
            continue
        break
    polymer_json = json.loads(content)
    return polymer_json
 def single_comment_ctoken(video_id, comment_id):
    page_params = proto.string(2, video_id) + proto.string(6, proto.percent_b64encode(proto.string(15, comment_id)))
    result = proto.nested(2, page_params) + proto.uint(3,6)
    return base64.urlsafe_b64encode(result).decode('ascii')
 def concat_texts(strings):
    '''Concatenates strings. Returns None if any of the arguments are None'''
    result = ''
    for string in strings:
        if string['text'] is None:
            return None
        result += string['text']
    return result
 def parse_comment(raw_comment):
    cmnt = {}
    print(raw_comment)
    raw_comment = raw_comment['commentThreadRenderer']['comment']['commentRenderer']
    imgHostName = urllib.parse.urlparse(raw_comment['authorThumbnail']['thumbnails'][0]['url']).netloc
    cmnt['author'] = raw_comment['authorText']['runs'][0]['text']
    cmnt['thumbnail'] = raw_comment['authorThumbnail']['thumbnails'][0]['url'].replace("https://{}".format(imgHostName), "")+"?host="+imgHostName
    cmnt['channel'] = raw_comment['authorEndpoint']['commandMetadata']['webCommandMetadata']['url']
    cmnt['text'] = Markup(bleach.linkify(concat_texts(raw_comment['contentText']['runs']).replace("\n", "<br>")))
    cmnt['date'] = raw_comment['publishedTimeText']['runs'][0]['text']
    try:
        cmnt['creatorHeart'] = raw_comment['creatorHeart']['creatorHeartRenderer']['creatorThumbnail']['thumbnails'][0]['url']
    except:
        cmnt['creatorHeart'] = False
    try:
        cmnt['likes'] = raw_comment['likeCount']
    except:
        cmnt['likes'] = 0
    try:
        cmnt['replies'] = raw_comment['replyCount']
    except:
        cmnt['replies'] = 0
    cmnt['authorIsChannelOwner'] = raw_comment['authorIsChannelOwner']
    try:
        cmnt['pinned'] = raw_comment['pinnedCommentBadge']
        cmnt['pinned'] = True
    except:
        cmnt['pinned'] = False
    return cmnt
 def post_process_comments_info(comments_info):
    comments = []
    for comment in comments_info[1]['response']['continuationContents']['commentSectionContinuation']['items']:        
        comments.append(parse_comment(comment))
    return comments
 def video_comments(video_id, sort=0, offset=0, lc='', secret_key=''):
    comments_info = request_comments(make_comment_ctoken(video_id, sort, offset, lc, secret_key))
    comments_info = post_process_comments_info(comments_info)
    return comments_info
    return {}
--- a/youtube_data/search.py
+++ b/youtube_data/search.py
@ -1,5 +1,5 @@
-from youtube_data import proto, utils
+from youtube_data import proto
-from bs4 import BeautifulSoup as bs
+from youtube import utils
 from flask import Markup
 import urllib.parse
 import requests
--- a/youtube_data/utils.py
+++ b/youtube_data/utils.py
@ -1,12 +0,0 @@
 def get_description_snippet_text(ds):
    string = ""
    for t in ds:
        try:
            if t['bold']:
                text = "<b>"+t['text']+"</b>"
            else:
                text = t['text']
        except:
            text = t['text']
        string = string + text
    return string
--- a/youtube_data/videos.py
+++ b/youtube_data/videos.py
@ -1,281 +0,0 @@
 from bs4 import BeautifulSoup as bs
 from urllib.parse import unquote
 from youtube_dl import YoutubeDL
 import urllib.parse
 import requests
 import json
 # from https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/youtube.py
 _formats = {
    '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'audio_bitrate': 64, 'vcodec': 'h263'},
    '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'audio_bitrate': 64, 'vcodec': 'h263'},
    '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
    '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'audio_bitrate': 24, 'vcodec': 'mp4v'},
    '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'audio_bitrate': 96, 'vcodec': 'h264'},
    '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'audio_bitrate': 192, 'vcodec': 'h264'},
    '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'audio_bitrate': 128, 'vcodec': 'h264'},
    '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'audio_bitrate': 128, 'vcodec': 'h264'},
    # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), audio_bitrate varies as well
    '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
    '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'audio_bitrate': 192, 'vcodec': 'h264'},
    '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'audio_bitrate': 192, 'vcodec': 'h264'},
    '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'audio_bitrate': 128, 'vcodec': 'vp8'},
    '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'audio_bitrate': 128, 'vcodec': 'vp8'},
    '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'audio_bitrate': 192, 'vcodec': 'vp8'},
    '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'audio_bitrate': 192, 'vcodec': 'vp8'},
    '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'audio_bitrate': 128, 'vcodec': 'h264'},
    '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'audio_bitrate': 128, 'vcodec': 'h264'},
    # 3D videos
    '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'audio_bitrate': 128, 'vcodec': 'h264'},
    '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'audio_bitrate': 128, 'vcodec': 'h264'},
    '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'audio_bitrate': 192, 'vcodec': 'h264'},
    '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'audio_bitrate': 192, 'vcodec': 'h264'},
    '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'audio_bitrate': 128, 'vcodec': 'vp8'},
    '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'audio_bitrate': 192, 'vcodec': 'vp8'},
    '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'audio_bitrate': 192, 'vcodec': 'vp8'},
    # Apple HTTP Live Streaming
    '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'audio_bitrate': 48, 'vcodec': 'h264'},
    '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'audio_bitrate': 48, 'vcodec': 'h264'},
    '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'audio_bitrate': 128, 'vcodec': 'h264'},
    '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'audio_bitrate': 128, 'vcodec': 'h264'},
    '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'audio_bitrate': 256, 'vcodec': 'h264'},
    '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'audio_bitrate': 256, 'vcodec': 'h264'},
    '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'audio_bitrate': 48, 'vcodec': 'h264'},
    '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'audio_bitrate': 24, 'vcodec': 'h264'},
    # DASH mp4 video
    '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
    '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
    '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
    '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
    '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
    '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'},  # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
    '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
    '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
    '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
    '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
    '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
    '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
    # Dash mp4 audio
    '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'audio_bitrate': 48, 'container': 'm4a_dash'},
    '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'audio_bitrate': 128, 'container': 'm4a_dash'},
    '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'audio_bitrate': 256, 'container': 'm4a_dash'},
    '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
    '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
    '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
    '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
    # Dash webm
    '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
    '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
    '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
    '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
    '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
    '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
    '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
    '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
    '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
    '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
    '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
    '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
    '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
    '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
    '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
    # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
    '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
    '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
    '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
    '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
    '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
    '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
    # Dash webm audio
    '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'audio_bitrate': 128},
    '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'audio_bitrate': 256},
    # Dash webm audio with opus inside
    '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'audio_bitrate': 50},
    '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'audio_bitrate': 70},
    '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'audio_bitrate': 160},
    # RTMP (unnamed)
    '_rtmp': {'protocol': 'rtmp'},
    # av01 video only formats sometimes served with "unknown" codecs
    '394': {'vcodec': 'av01.0.05M.08'},
    '395': {'vcodec': 'av01.0.05M.08'},
    '396': {'vcodec': 'av01.0.05M.08'},
    '397': {'vcodec': 'av01.0.05M.08'},
 }
 def get_renderer_key(renderer, key):
    for k in renderer:
        if key in k:
            return k[key]
 def get_video_primary_info(datad, datai):
    contents = datai["contents"]["twoColumnWatchNextResults"]['results']['results']['contents']
    item = get_renderer_key(contents, "videoPrimaryInfoRenderer")
    details = datad['videoDetails']    
    # Check if is Livestream        
    if details.get('isLive') and details['lengthSeconds'] == '0':
        isLive = True
    else:
        isLive = False
    # Check if is a Scheduled video
    if details.get('isUpcoming') == True:
        isUpcoming = True
        views = "Scheduled video"
        premieres = item['dateText']['simpleText']
        audioURL = False
    else:
        isUpcoming = False
        premieres = False
        views = details['viewCount']
    ydl = YoutubeDL()
    if isUpcoming == False:
        data = ydl.extract_info(details['videoId'], False)
        while not data['formats']:
            data = ydl.extract_info(details['videoId'], False)
        formats = data['formats']
        ## Get audio
        audio_urls = []
        for f in data['formats']:
            for fid in _formats:
                if f['format_id'] == fid:
                    try:
                        if 'audio' in _formats[fid]['format_note']:
                            aurl = f['url']
                            fnote = _formats[fid]['format_note']
                            bitrate = _formats[fid]['audio_bitrate']
                            audio_inf = {
                                "url":aurl,
                                "id":fnote,
                                "btr": bitrate
                            }
                            audio_urls.append(audio_inf)
                    except:
                        continue
        if not isLive:
            audioURL = audio_urls[-1]['url']
        else:
            audioURL = False
    else: # If it is a scheduled video
        audio_urls = False
        formats = False
    try:            
        primaryInfo = {
            "id": details['videoId'],
            "title": details['title'],
            "description": details['shortDescription'],
            "views": views,
            "duration": details['lengthSeconds'],
            "date": item['dateText']['simpleText'],
            "rating": details['averageRating'],
            "author": details['author'],
            "isPrivate": details['isPrivate'],
            "isLive": isLive,
            "isUpcoming": isUpcoming,
            "url":url,
            "allowRatings": details['allowRatings'],
            "urls":formats,
            "thumbnail": details['thumbnail']['thumbnails'][0]['url'],
            "audio": audioURL,
            "premieres": premieres
        }
    except:
        # If error take only most common items
        primaryInfo = {
            "id": details['videoId'],
            "title": details['title'],
            "description": details['shortDescription'],
            "views": details['viewCount'],
            "duration": details['lengthSeconds'],
            "date": item['dateText']['simpleText'],
            "rating": details['averageRating'],
            "author": details['author'],
            "isPrivate":False,
            "isLive":isLive,
            "isUpcoming":isUpcoming,
            "allowRatings":True,
            "urls":formats,
            "thumbnail": details['thumbnail']['thumbnails'][0]['url'],
            "audio": audioURL,
            "premieres": premieres
        }
    return primaryInfo
 def get_video_owner_info(data):
    contents = data["contents"]["twoColumnWatchNextResults"]['results']['results']['contents']
    item = get_renderer_key(contents, "videoSecondaryInfoRenderer")
    ownerItem = item['owner']['videoOwnerRenderer']
    try:
        sC = ownerItem['subscriberCountText']['runs'][0]['text']
    except:
        sC = "Unknown"
    ownerInfo = {
        "thumbnail": ownerItem['thumbnail']['thumbnails'][0]['url'],
        "username": ownerItem['title']['runs'][0]['text'],
        "id": ownerItem['title']['runs'][0]['navigationEndpoint']['browseEndpoint']['browseId'],
        "suscriberCount":sC
    }
    return ownerInfo
 def get_video_info(id):
    headers = {"Accept-Language": "en-US,en;q=0.5"}
    encoded_search = urllib.parse.quote(id)
    BASE_URL = "https://youtube.com"
    url = f"{BASE_URL}/watch?v={encoded_search}"
    response = requests.get(url, headers=headers).text
    while 'window["ytInitialData"]' and 'window["ytInitialData"]' not in response:
        response = requests.get(url, headers=headers).text
    start = (
        response.index('window["ytInitialData"]')
        + len('window["ytInitialData"]')
        + 3
    )
    start2 = (
        response.index('window["ytInitialPlayerResponse"]')
        + len('window["ytInitialPlayerResponse"]') + 3
    )
    end1 = response.index("};", start) + 1
    end2 = response.index("};", start2) + 1
    jsonIni = response[start:end1]
    dataInitial = json.loads(jsonIni)
    try:
        jsonDet = response[start2:end2]
        dataDetails = json.loads(jsonDet)
    except:
        response = requests.get(url, headers=headers).json()
        jsonDet = response[start2:end2]
        dataDetails = json.loads(jsonDet)
    #title, views, date
    videoInfo = get_video_primary_info(dataDetails, dataInitial)
    ownerInfo = get_video_owner_info(dataInitial)
    '''soup = bs(response, "html.parser")
    soup = str(str(soup.find("div", attrs={"id":"player-wrap"}).find_all("script")).split("ytplayer.config =")[1]).split("url")
    for url in soup:
        if "googlevideo" in url:
            print(unquote(url.replace("\\", "")))'''
    info = {"video":videoInfo, "owner":ownerInfo}
    return info