Merge branch 'dev-indep' of https://github.com/ytorg/yotter into dev-indep

2020-10-10 17:33:51 +02:00 · 2020-10-10 17:33:51 +02:00 · 7a71b6914a
commit 7a71b6914a
parent 6e8e3b2131 eb9bd592f1
21 changed files with 3296 additions and 722 deletions
--- a/app/routes.py
+++ b/app/routes.py
--- a/app/templates/_video_comment.html
+++ b/app/templates/_video_comment.html
@ -1,7 +1,7 @@
 <div class="comment">
    <a class="avatar" style="width: 32px; height: 32px;"><img src="{{ comment.thumbnail }}"></a>
    <div class="content">
-        {% if comment.authorIsChannelOwner %}
+        {% if comment.author == info.author %}
        
        <a class="author" style="color: red;" href="{{comment.channel}}"><i class="red user circle icon"></i>{{comment.author}}</a>
        {% else %}
@ -22,9 +22,6 @@
                <i class="thumbs up icon"></i>
                {{comment.likes}}
            </div>
-            {%if comment.creatorHeart != false%}
-                <i class="small red heart icon"></i><img class="ui circular image" style="width: 15px; height: 15px;" src="{{comment.creatorHeart}}">
-            {% endif %}
        </div>
    </div>
 </div>
--- a/app/templates/video.html
+++ b/app/templates/video.html
@ -4,30 +4,30 @@
 {% extends "base.html" %}
 {% block content %}
 <div class="ui text container">
-    {% if video.nginxUrl == "#" %}
+    {% if info.error != None or info.playability_error != None %}
    <div class="ui center aligned text container">
        <div class="ui segment">
                <h4 class="ui header">ERROR WITH VIDEO</h4>
        </div>
    </div>
-    {% elif video.isUpcoming %}
+    {% elif info.playability_status != None %}
    <div class="ui center aligned text container">
        <div class="ui segment">
                <h4 class="ui header">SCHEDULED VIDEO</h4>
                <h5 class="ui header">{{video.premieres}}</h5>
        </div>
    </div>
-    {% elif video.isLive %}
+    {% elif info.live %}
    <div class="video-js-responsive-container vjs-hd">
-        <video-js id=live width="1080" class="video-js vjs-default-skin" controls buffered>
+        <video-js id=live width="1080" class="video-js vjs-default-skin" controls>
            <source
-            src="{{urls[0]['url']}}"
+            src="#"
            type="application/x-mpegURL">
        </video-js>
    </div>
        <div class="ui center aligned text container">
            <div class="ui segment">
-                    <h3 class="ui header">LIVESTREAM VIDEO</h3>
+                    <h3 class="ui header"><i class="red small circle icon"></i> LIVESTREAM VIDEO</h3>
                    <h4 class="ui header">FEATURE AVAILABLE SOON</h4>
                    <h5 class="ui header">Livestreams are under developent and still not supported on Yotter.</h5>
            </div>
@ -41,11 +41,11 @@
            buffered
            preload="none">
            {% if config.nginxVideoStream %}
-                {% for url in urls %}
-                    <source src="{{url.url}}" type="video/{{url.ext}}">
+                {% for format in info.formats %}
+                    {% if format.video_valid %}
+                        <source src="{{format.url}}" type="video/{{format.ext}}">
+                    {% endif %}
                {% endfor %}
-            {% else %}
-                <source src="{{url_for('stream', url=video.videoUrl.replace('/', 'YotterSlash'))}}" type="video/mp4">
            {% endif %}
            </video>
        </div>
@ -53,55 +53,54 @@

    <div class="ui segments">
        <div class="ui segment">
-            <h2 class="ui header break-word">{{video.title}}</h2>
+            <h2 class="ui header break-word">{{info.title}}</h2>
        </div>
        <div class="ui horizontal segments">
          <div class="center aligned ui segment">
-            <a href="{{ url_for('channel', id=video.channelId)}}">
-                {%if video.author.__len__() > 8%}
-                    <i class="user icon"></i> {{video.author[0:10]+'...'}}
-                {%else%}
-                    <i class="user icon"></i> {{video.author}}
-                {%endif%}
+            <a href="{{ url_for('channel', id=info.author_id)}}">
+                <i class="user icon"></i> {{info.author}}
            </a>
          </div>
          <div class="center aligned ui segment">
-            <h4 class="ui header"><i class="grey eye icon"></i>{{video.viewCount}}</h4>
+            <h4 class="ui header"><i class="grey eye icon"></i>{{info.view_count}}</h4>
          </div>
          <div class="center aligned ui segment">
-            {% if video.averageRating | int > 49 %}
-                <h4 class="ui header"><i class="green thumbs up icon"></i> {{video.averageRating[0:4]}}%</h4>
+            {% if info.rating | int > 49 %}
+                <h4 class="ui header"><i class="green thumbs up icon"></i> {{info.rating}}%</h4>
            {% else %}
-                <h4 class="ui header"><i class="red thumbs down icon"></i> {{video.averageRating[0:4]}}%</h4>
+                <h4 class="ui header"><i class="red thumbs down icon"></i> {{info.rating}}%</h4>
            {% endif %}
          </div>
        </div>

        <div class="ui raised center aligned segment break-word">
-            <p><i class="grey music icon"></i><b><a href="{{video.nginxAudioUrl}}">Play Only Audio</a></b></p>
+            <p><i class="grey music icon"></i><b>Audio Only</b></p>
            <audio controls>
-                <source src="{{video.nginxAudioUrl}}">
-              Your browser does not support the audio element.
+                {% for format in info.formats %}
+                    {% if format.audio_valid %}
+                        <source src="{{format.url}}">
+                    {%endif%}
+                {%endfor%}
+              No audio available.
            </audio>
        </div>

        <div class="ui raised segment break-word">
-            <p>{{video.description}}</p>
+            <p>{{info.description}}</p>
        </div>
    </div>

-    {% if comments != False %}
    <div class="ui comments">
        <h3 class="ui dividing header">Comments</h3>
-        {% for comment in video.comments %}
+        {% for comment in videocomments %}
                {% include '_video_comment.html' %}
        {% endfor %}
    </div>
-    {%endif%}
            
 <script src="{{ url_for('static',filename='video.min.js') }}"></script>
-<script src="{{ url_for('static',filename='videojs-http-streaming.min.js')}}"></script>
-{% if video.isLive %}
+{% if info.live %}
+    <p>Active</p>
+    <script src="{{ url_for('static',filename='videojs-http-streaming.min.js')}}"></script>
    <script>
        var player = videojs('live');
        player.play();
--- a/youtube/channel.py
+++ b/youtube/channel.py
@ -0,0 +1,281 @@
+import base64
+from youtube import util, yt_data_extract, local_playlist, subscriptions
+from youtube import yt_app
+
+import urllib
+import json
+from string import Template
+import youtube.proto as proto
+import html
+import math
+import gevent
+import re
+import cachetools.func
+import traceback
+
+import flask
+from flask import request
+
+headers_desktop = (
+    ('Accept', '*/*'),
+    ('Accept-Language', 'en-US,en;q=0.5'),
+    ('X-YouTube-Client-Name', '1'),
+    ('X-YouTube-Client-Version', '2.20180830'),
+) + util.desktop_ua
+headers_mobile = (
+    ('Accept', '*/*'),
+    ('Accept-Language', 'en-US,en;q=0.5'),
+    ('X-YouTube-Client-Name', '2'),
+    ('X-YouTube-Client-Version', '2.20180830'),
+) + util.mobile_ua
+real_cookie = (('Cookie', 'VISITOR_INFO1_LIVE=8XihrAcN1l4'),)
+generic_cookie = (('Cookie', 'VISITOR_INFO1_LIVE=ST1Ti53r4fU'),)
+
+# SORT:
+# videos:
+#    Popular - 1
+#    Oldest - 2
+#    Newest - 3
+# playlists:
+#    Oldest - 2
+#    Newest - 3
+#    Last video added - 4
+
+# view:
+# grid: 0 or 1
+# list: 2
+def channel_ctoken_v3(channel_id, page, sort, tab, view=1):
+    # page > 1 doesn't work when sorting by oldest
+    offset = 30*(int(page) - 1)
+    page_token = proto.string(61, proto.unpadded_b64encode(
+        proto.string(1, proto.unpadded_b64encode(proto.uint(1,offset)))
+    ))
+
+    tab = proto.string(2, tab )
+    sort = proto.uint(3, int(sort))
+
+    shelf_view = proto.uint(4, 0)
+    view = proto.uint(6, int(view))
+    continuation_info = proto.string(3,
+        proto.percent_b64encode(tab + sort + shelf_view + view + page_token)
+    )
+
+    channel_id = proto.string(2, channel_id )
+    pointless_nest = proto.string(80226972, channel_id + continuation_info)
+
+    return base64.urlsafe_b64encode(pointless_nest).decode('ascii')
+
+def channel_ctoken_v2(channel_id, page, sort, tab, view=1):
+    # see https://github.com/iv-org/invidious/issues/1319#issuecomment-671732646
+    # page > 1 doesn't work when sorting by oldest
+    offset = 30*(int(page) - 1)
+    schema_number = {
+        3: 6307666885028338688,
+        2: 17254859483345278706,
+        1: 16570086088270825023,
+    }[int(sort)]
+    page_token = proto.string(61, proto.unpadded_b64encode(proto.string(1,
+            proto.uint(1, schema_number) + proto.string(2,
+                proto.string(1, proto.unpadded_b64encode(proto.uint(1,offset)))
+            )
+    )))
+
+    tab = proto.string(2, tab )
+    sort = proto.uint(3, int(sort))
+    #page = proto.string(15, str(page) )
+
+    shelf_view = proto.uint(4, 0)
+    view = proto.uint(6, int(view))
+    continuation_info = proto.string(3,
+        proto.percent_b64encode(tab + sort + shelf_view + view + page_token)
+    )
+
+    channel_id = proto.string(2, channel_id )
+    pointless_nest = proto.string(80226972, channel_id + continuation_info)
+
+    return base64.urlsafe_b64encode(pointless_nest).decode('ascii')
+
+def channel_ctoken_v1(channel_id, page, sort, tab, view=1):
+    tab = proto.string(2, tab )
+    sort = proto.uint(3, int(sort))
+    page = proto.string(15, str(page) )
+    # example with shelves in videos tab: https://www.youtube.com/channel/UCNL1ZadSjHpjm4q9j2sVtOA/videos
+    shelf_view = proto.uint(4, 0)
+    view = proto.uint(6, int(view))
+    continuation_info = proto.string(3, proto.percent_b64encode(tab + view + sort + shelf_view + page + proto.uint(23, 0)) )
+
+    channel_id = proto.string(2, channel_id )
+    pointless_nest = proto.string(80226972, channel_id + continuation_info)
+
+    return base64.urlsafe_b64encode(pointless_nest).decode('ascii')
+
+def get_channel_tab(channel_id, page="1", sort=3, tab='videos', view=1, print_status=True):
+    message = 'Got channel tab' if print_status else None
+
+    if int(sort) == 2 and int(page) > 1:
+        ctoken = channel_ctoken_v1(channel_id, page, sort, tab, view)
+        ctoken = ctoken.replace('=', '%3D')
+        url = ('https://www.youtube.com/channel/' + channel_id + '/' + tab
+            + '?action_continuation=1&continuation=' + ctoken
+            + '&pbj=1')
+        content = util.fetch_url(url, headers_desktop + real_cookie,
+            debug_name='channel_tab', report_text=message)
+    else:
+        ctoken = channel_ctoken_v3(channel_id, page, sort, tab, view)
+        ctoken = ctoken.replace('=', '%3D')
+        url = 'https://www.youtube.com/browse_ajax?ctoken=' + ctoken
+        content = util.fetch_url(url,
+            headers_desktop + generic_cookie,
+            debug_name='channel_tab', report_text=message)
+
+    return content
+
+# cache entries expire after 30 minutes
+@cachetools.func.ttl_cache(maxsize=128, ttl=30*60)
+def get_number_of_videos_channel(channel_id):
+    if channel_id is None:
+        return 1000
+
+    # Uploads playlist
+    playlist_id = 'UU' + channel_id[2:]
+    url = 'https://m.youtube.com/playlist?list=' + playlist_id + '&pbj=1'
+
+    try:
+        response = util.fetch_url(url, headers_mobile,
+            debug_name='number_of_videos', report_text='Got number of videos')
+    except urllib.error.HTTPError as e:
+        traceback.print_exc()
+        print("Couldn't retrieve number of videos")
+        return 1000
+
+    response = response.decode('utf-8')
+
+    # match = re.search(r'"numVideosText":\s*{\s*"runs":\s*\[{"text":\s*"([\d,]*) videos"', response)
+    match = re.search(r'"numVideosText".*?([,\d]+)', response)
+    if match:
+        return int(match.group(1).replace(',',''))
+    else:
+        return 0
+
+channel_id_re = re.compile(r'videos\.xml\?channel_id=([a-zA-Z0-9_-]{24})"')
+@cachetools.func.lru_cache(maxsize=128)
+def get_channel_id(base_url):
+    # method that gives the smallest possible response at ~4 kb
+    # needs to be as fast as possible
+    base_url = base_url.replace('https://www', 'https://m') # avoid redirect
+    response = util.fetch_url(base_url + '/about?pbj=1', headers_mobile,
+        debug_name='get_channel_id', report_text='Got channel id').decode('utf-8')
+    match = channel_id_re.search(response)
+    if match:
+        return match.group(1)
+    return None
+
+def get_number_of_videos_general(base_url):
+    return get_number_of_videos_channel(get_channel_id(base_url))
+
+def get_channel_search_json(channel_id, query, page):
+    params = proto.string(2, 'search') + proto.string(15, str(page))
+    params = proto.percent_b64encode(params)
+    ctoken = proto.string(2, channel_id) + proto.string(3, params) + proto.string(11, query)
+    ctoken = base64.urlsafe_b64encode(proto.nested(80226972, ctoken)).decode('ascii')
+
+    polymer_json = util.fetch_url("https://www.youtube.com/browse_ajax?ctoken=" + ctoken, headers_desktop, debug_name='channel_search')
+
+    return polymer_json
+
+
+def post_process_channel_info(info):
+    info['avatar'] = util.prefix_url(info['avatar'])
+    info['channel_url'] = util.prefix_url(info['channel_url'])
+    for item in info['items']:
+        util.prefix_urls(item)
+        util.add_extra_html_info(item)
+
+
+
+
+
+playlist_sort_codes = {'2': "da", '3': "dd", '4': "lad"}
+
+# youtube.com/[channel_id]/[tab]
+# youtube.com/user/[username]/[tab]
+# youtube.com/c/[custom]/[tab]
+# youtube.com/[custom]/[tab]
+def get_channel_page_general_url(base_url, tab, request, channel_id=None):
+
+    page_number = int(request.args.get('page', 1))
+    sort = request.args.get('sort', '3')
+    view = request.args.get('view', '1')
+    query = request.args.get('query', '')
+
+    if tab == 'videos' and channel_id:
+        tasks = (
+            gevent.spawn(get_number_of_videos_channel, channel_id),
+            gevent.spawn(get_channel_tab, channel_id, page_number, sort, 'videos', view)
+        )
+        gevent.joinall(tasks)
+        util.check_gevent_exceptions(*tasks)
+        number_of_videos, polymer_json = tasks[0].value, tasks[1].value
+    elif tab == 'videos':
+        tasks = (
+            gevent.spawn(get_number_of_videos_general, base_url),
+            gevent.spawn(util.fetch_url, base_url + '/videos?pbj=1&view=0', headers_desktop, debug_name='gen_channel_videos')
+        )
+        gevent.joinall(tasks)
+        util.check_gevent_exceptions(*tasks)
+        number_of_videos, polymer_json = tasks[0].value, tasks[1].value
+    elif tab == 'about':
+        polymer_json = util.fetch_url(base_url + '/about?pbj=1', headers_desktop, debug_name='gen_channel_about')
+    elif tab == 'playlists':
+        polymer_json = util.fetch_url(base_url+ '/playlists?pbj=1&view=1&sort=' + playlist_sort_codes[sort], headers_desktop, debug_name='gen_channel_playlists')
+    elif tab == 'search' and channel_id:
+        polymer_json = get_channel_search_json(channel_id, query, page_number)
+    elif tab == 'search':
+        url = base_url + '/search?pbj=1&query=' + urllib.parse.quote(query, safe='')
+        polymer_json = util.fetch_url(url, headers_desktop, debug_name='gen_channel_search')
+    else:
+        flask.abort(404, 'Unknown channel tab: ' + tab)
+
+
+    info = yt_data_extract.extract_channel_info(json.loads(polymer_json), tab)
+    if info['error'] is not None:
+        return flask.render_template('error.html', error_message = info['error'])
+
+    post_process_channel_info(info)
+    if tab == 'videos':
+        info['number_of_videos'] = number_of_videos
+        info['number_of_pages'] = math.ceil(number_of_videos/30)
+        info['header_playlist_names'] = local_playlist.get_playlist_names()
+    if tab in ('videos', 'playlists'):
+        info['current_sort'] = sort
+    elif tab == 'search':
+        info['search_box_value'] = query
+        info['header_playlist_names'] = local_playlist.get_playlist_names()
+        info['page_number'] = page_number
+    info['subscribed'] = subscriptions.is_subscribed(info['channel_id'])
+
+    return flask.render_template('channel.html',
+        parameters_dictionary = request.args,
+        **info
+    )
+
+@yt_app.route('/channel/<channel_id>/')
+@yt_app.route('/channel/<channel_id>/<tab>')
+def get_channel_page(channel_id, tab='videos'):
+    return get_channel_page_general_url('https://www.youtube.com/channel/' + channel_id, tab, request, channel_id)
+
+@yt_app.route('/user/<username>/')
+@yt_app.route('/user/<username>/<tab>')
+def get_user_page(username, tab='videos'):
+    return get_channel_page_general_url('https://www.youtube.com/user/' + username, tab, request)
+
+@yt_app.route('/c/<custom>/')
+@yt_app.route('/c/<custom>/<tab>')
+def get_custom_c_page(custom, tab='videos'):
+    return get_channel_page_general_url('https://www.youtube.com/c/' + custom, tab, request)
+
+@yt_app.route('/<custom>')
+@yt_app.route('/<custom>/<tab>')
+def get_toplevel_custom_page(custom, tab='videos'):
+    return get_channel_page_general_url('https://www.youtube.com/' + custom, tab, request)
+
--- a/youtube/comments.py
+++ b/youtube/comments.py
@ -0,0 +1,145 @@
+import base64
+import json
+
+from youtube import proto, util, yt_data_extract
+from youtube.util import concat_or_none
+
+
+# Here's what I know about the secret key (starting with ASJN_i)
+# *The secret key definitely contains the following information (or perhaps the information is stored at youtube's servers):
+#   -Video id
+#   -Offset
+#   -Sort
+# *If the video id or sort in the ctoken contradicts the ASJN, the response is an error. The offset encoded outside the ASJN is ignored entirely.
+# *The ASJN is base64 encoded data, indicated by the fact that the character after "ASJN_i" is one of ("0", "1", "2", "3")
+# *The encoded data is not valid protobuf
+# *The encoded data (after the 5 or so bytes that are always the same) is indistinguishable from random data according to a battery of randomness tests
+# *The ASJN in the ctoken provided by a response changes in regular intervals of about a second or two.
+# *Old ASJN's continue to work, and start at the same comment even if new comments have been posted since
+# *The ASJN has no relation with any of the data in the response it came from
+
+def make_comment_ctoken(video_id, sort=0, offset=0, lc='', secret_key=''):
+    video_id = proto.as_bytes(video_id)
+    secret_key = proto.as_bytes(secret_key)
+    
+
+    page_info = proto.string(4,video_id) + proto.uint(6, sort)
+    offset_information = proto.nested(4, page_info) + proto.uint(5, offset)
+    if secret_key:
+        offset_information = proto.string(1, secret_key) + offset_information
+
+    page_params = proto.string(2, video_id)
+    if lc:
+        page_params += proto.string(6, proto.percent_b64encode(proto.string(15, lc)))
+
+    result = proto.nested(2, page_params) + proto.uint(3,6) + proto.nested(6, offset_information)
+    return base64.urlsafe_b64encode(result).decode('ascii')
+
+def comment_replies_ctoken(video_id, comment_id, max_results=500):  
+
+    params = proto.string(2, comment_id) + proto.uint(9, max_results)
+    params = proto.nested(3, params)
+    
+    result = proto.nested(2, proto.string(2, video_id)) + proto.uint(3,6) + proto.nested(6, params)
+    return base64.urlsafe_b64encode(result).decode('ascii')
+
+
+
+mobile_headers = {
+    'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E304 Safari/602.1',
+    'Accept': '*/*',
+    'Accept-Language': 'en-US,en;q=0.5',
+    'X-YouTube-Client-Name': '2',
+    'X-YouTube-Client-Version': '2.20180823',
+}
+def request_comments(ctoken, replies=False):
+    if replies: # let's make it use different urls for no reason despite all the data being encoded
+        base_url = "https://m.youtube.com/watch_comment?action_get_comment_replies=1&ctoken="
+    else:
+        base_url = "https://m.youtube.com/watch_comment?action_get_comments=1&ctoken="
+    url = base_url + ctoken.replace("=", "%3D") + "&pbj=1"
+
+    for i in range(0,8):    # don't retry more than 8 times
+        content = util.fetch_url(url, headers=mobile_headers, report_text="Retrieved comments", debug_name='request_comments')
+        if content[0:4] == b")]}'":             # random closing characters included at beginning of response for some reason
+            content = content[4:]
+        elif content[0:10] == b'\n<!DOCTYPE':   # occasionally returns html instead of json for no reason
+            content = b''
+            print("got <!DOCTYPE>, retrying")
+            continue
+        break
+
+    polymer_json = json.loads(util.uppercase_escape(content.decode('utf-8')))
+    return polymer_json
+
+
+def single_comment_ctoken(video_id, comment_id):
+    page_params = proto.string(2, video_id) + proto.string(6, proto.percent_b64encode(proto.string(15, comment_id)))
+
+    result = proto.nested(2, page_params) + proto.uint(3,6)
+    return base64.urlsafe_b64encode(result).decode('ascii')
+
+
+
+def post_process_comments_info(comments_info):
+    for comment in comments_info['comments']:
+        comment['author_url'] = concat_or_none(
+            util.URL_ORIGIN, comment['author_url'])
+        comment['author_avatar'] = concat_or_none(
+            '/', comment['author_avatar'])
+
+        comment['permalink'] = concat_or_none(util.URL_ORIGIN, '/watch?v=',
+            comments_info['video_id'], '&lc=', comment['id'])
+
+        reply_count = comment['reply_count']
+        if reply_count == 0:
+            comment['replies_url'] = concat_or_none(util.URL_ORIGIN,
+                '/post_comment?parent_id=', comment['id'],
+                '&video_id=', comments_info['video_id'])
+        else:
+            comment['replies_url'] = concat_or_none(util.URL_ORIGIN,
+                '/comments?parent_id=', comment['id'],
+                '&video_id=', comments_info['video_id'])
+
+        if reply_count == 0:
+            comment['view_replies_text'] = 'Reply'
+        elif reply_count == 1:
+            comment['view_replies_text'] = '1 reply'
+        else:
+            comment['view_replies_text'] = str(reply_count) + ' replies'
+
+
+        if comment['like_count'] == 1:
+            comment['likes_text'] = '1 like'
+        else:
+            comment['likes_text'] = str(comment['like_count']) + ' likes'
+
+
+    if comments_info['ctoken']:
+        comments_info['more_comments_url'] = concat_or_none(util.URL_ORIGIN,
+            '/comments?ctoken=', comments_info['ctoken'])
+
+    comments_info['page_number'] = page_number = str(int(comments_info['offset']/20) + 1)
+
+    if not comments_info['is_replies']:
+        comments_info['sort_text'] = 'top' if comments_info['sort'] == 0 else 'newest'
+
+
+    comments_info['video_url'] = concat_or_none(util.URL_ORIGIN,
+        '/watch?v=', comments_info['video_id'])
+    comments_info['video_thumbnail'] = concat_or_none('/i.ytimg.com/vi/',
+        comments_info['video_id'], '/mqdefault.jpg')
+
+
+def video_comments(video_id, sort=0, offset=0, lc='', secret_key=''):
+    comments_info = yt_data_extract.extract_comments_info(request_comments(make_comment_ctoken(video_id, sort, offset, lc, secret_key)))
+    post_process_comments_info(comments_info)
+
+    post_comment_url = util.URL_ORIGIN + "/post_comment?video_id=" + video_id
+    other_sort_url = util.URL_ORIGIN + '/comments?ctoken=' + make_comment_ctoken(video_id, sort=1 - sort, lc=lc)
+    other_sort_text = 'Sort by ' + ('newest' if sort == 0 else 'top')
+    comments_info['comment_links'] = [('Post comment', post_comment_url), (other_sort_text, other_sort_url)]
+
+    return comments_info
+
+    return {}
--- a/youtube/opensearch.xml
+++ b/youtube/opensearch.xml
@ -0,0 +1,11 @@
+<SearchPlugin xmlns="http://www.mozilla.org/2006/browser/search/">
+<ShortName>Youtube local</ShortName>
+<Description>no CIA shit in the background</Description>
+<InputEncoding>UTF-8</InputEncoding>
+<Image width="16" height="16">data:image/x-icon;base64,AAABAAEAEBAAAAEACAAlAgAAFgAAAIlQTkcNChoKAAAADUlIRFIAAAAQAAAAEAgGAAAAH/P/YQAAAexJREFUOI2lkzFPmlEUhp/73fshtCUCRtvQkJoKMrDQJvoHnBzUhc3EH0DUQf+As6tujo4M6mTiIDp0kGiMTRojTRNSW6o12iD4YYXv3g7Qr4O0ScM7npz7vOe+J0fk83lDF7K6eQygwkdHhI+P0bYNxmBXq5RmZui5vGQgn0f7fKi7O4oLC1gPD48BP9JpnpRKJFZXcQMB3m1u4vr9NHp76d/bo39/n4/z84ROThBa4/r91OJxMKb9BSn5mskAIOt1eq6uEFpjVyrEcjk+T0+TXlzkbTZLuFDAur9/nIFRipuREQCe7+zgBgK8mZvj/fIylVTKa/6UzXKbSnnuHkA0GnwbH/cA0a0takND3IyOEiwWAXBiMYTWjzLwtvB9bAyAwMUF8ZUVPiwtYTWbHqA6PIxoNv8OMLbN3eBga9TZWYQxaKX+AJJJhOv+AyAlT0slAG6TSX5n8+zszJugkzxA4PzcK9YSCQCk42DXaq1aGwqgfT5ebG9jpMQyUjKwu8vrtbWWqxC83NjAd31NsO2uleJnX58HCJ6eEjk8BGNQAA+RCOXJScpTU2AMwnUxlkXk4ACA+2iUSKGArNeRjkMsl6M8MYHQGtHpmIxSvFpfRzoORinQGqvZBCEwQoAxfMlkaIRCnQH/o66v8Re19MavaDNLfgAAAABJRU5ErkJggg==</Image>
+
+<Url type="text/html" method="GET" template="http://localhost:$port_number/youtube.com/search">
+  <Param name="query" value="{searchTerms}"/>
+</Url>
+<SearchForm>http://localhost:$port_number/youtube.com/search</SearchForm>
+</SearchPlugin>
--- a/youtube/playlist.py
+++ b/youtube/playlist.py
@ -0,0 +1,123 @@
+from youtube import util, yt_data_extract, proto, local_playlist
+from youtube import yt_app
+
+import base64
+import urllib
+import json
+import string
+import gevent
+import math
+from flask import request
+import flask
+
+
+
+
+
+def playlist_ctoken(playlist_id, offset):  
+    
+    offset = proto.uint(1, offset)
+    # this is just obfuscation as far as I can tell. It doesn't even follow protobuf
+    offset = b'PT:' + proto.unpadded_b64encode(offset)
+    offset = proto.string(15, offset)
+
+    continuation_info = proto.string( 3, proto.percent_b64encode(offset) )
+    
+    playlist_id = proto.string(2, 'VL' + playlist_id )
+    pointless_nest = proto.string(80226972, playlist_id + continuation_info)
+
+    return base64.urlsafe_b64encode(pointless_nest).decode('ascii')
+
+# initial request types:
+#   polymer_json: https://m.youtube.com/playlist?list=PLv3TTBr1W_9tppikBxAE_G6qjWdBljBHJ&pbj=1&lact=0
+#   ajax json:    https://m.youtube.com/playlist?list=PLv3TTBr1W_9tppikBxAE_G6qjWdBljBHJ&pbj=1&lact=0 with header X-YouTube-Client-Version: 1.20180418
+
+
+# continuation request types:
+#   polymer_json: https://m.youtube.com/playlist?&ctoken=[...]&pbj=1
+#   ajax json:    https://m.youtube.com/playlist?action_continuation=1&ajax=1&ctoken=[...]
+
+
+headers_1 = (
+    ('Accept', '*/*'),
+    ('Accept-Language', 'en-US,en;q=0.5'),
+    ('X-YouTube-Client-Name', '2'),
+    ('X-YouTube-Client-Version', '2.20180614'),
+)
+
+def playlist_first_page(playlist_id, report_text = "Retrieved playlist"):
+    url = 'https://m.youtube.com/playlist?list=' + playlist_id + '&pbj=1'
+    content = util.fetch_url(url, util.mobile_ua + headers_1, report_text=report_text, debug_name='playlist_first_page')
+    content = json.loads(util.uppercase_escape(content.decode('utf-8')))
+
+    return content
+    
+
+#https://m.youtube.com/playlist?itct=CBMQybcCIhMIptj9xJaJ2wIV2JKcCh3Idwu-&ctoken=4qmFsgI2EiRWTFBMT3kwajlBdmxWWlB0bzZJa2pLZnB1MFNjeC0tN1BHVEMaDmVnWlFWRHBEUWxFJTNE&pbj=1
+def get_videos(playlist_id, page):
+
+    url = "https://m.youtube.com/playlist?ctoken=" + playlist_ctoken(playlist_id, (int(page)-1)*20) + "&pbj=1"
+    headers = {
+        'User-Agent': '  Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E304 Safari/602.1',
+        'Accept': '*/*',
+        'Accept-Language': 'en-US,en;q=0.5',
+        'X-YouTube-Client-Name': '2',
+        'X-YouTube-Client-Version': '2.20180508',
+    }
+
+    content = util.fetch_url(url, headers, report_text="Retrieved playlist", debug_name='playlist_videos')
+
+    info = json.loads(util.uppercase_escape(content.decode('utf-8')))
+    return info
+
+
+@yt_app.route('/playlist')
+def get_playlist_page():
+    if 'list' not in request.args:
+        abort(400)
+
+    playlist_id = request.args.get('list')
+    page = request.args.get('page', '1')
+
+    if page == '1':
+        first_page_json = playlist_first_page(playlist_id)
+        this_page_json = first_page_json
+    else:
+        tasks = (
+            gevent.spawn(playlist_first_page, playlist_id, report_text="Retrieved playlist info" ), 
+            gevent.spawn(get_videos, playlist_id, page)
+        )
+        gevent.joinall(tasks)
+        util.check_gevent_exceptions(*tasks)
+        first_page_json, this_page_json = tasks[0].value, tasks[1].value
+
+    info = yt_data_extract.extract_playlist_info(this_page_json)
+    if info['error']:
+        return flask.render_template('error.html', error_message = info['error'])
+
+    if page != '1':
+        info['metadata'] = yt_data_extract.extract_playlist_metadata(first_page_json)
+
+    util.prefix_urls(info['metadata'])
+    for item in info.get('items', ()):
+        util.prefix_urls(item)
+        util.add_extra_html_info(item)
+        if 'id' in item:
+            item['thumbnail'] = '/https://i.ytimg.com/vi/' + item['id'] + '/default.jpg'
+
+        item['url'] += '&list=' + playlist_id
+        if item['index']:
+            item['url'] += '&index=' + str(item['index'])
+
+    video_count = yt_data_extract.deep_get(info, 'metadata', 'video_count')
+    if video_count is None:
+        video_count = 40
+
+    return flask.render_template('playlist.html',
+        header_playlist_names = local_playlist.get_playlist_names(),
+        video_list = info.get('items', []),
+        num_pages = math.ceil(video_count/20),
+        parameters_dictionary = request.args,
+
+        **info['metadata']
+    ).encode('utf-8')
--- a/youtube/proto.py
+++ b/youtube/proto.py
@ -0,0 +1,129 @@
+from math import ceil
+import base64
+import io
+
+def byte(n):
+    return bytes((n,))
+
+    
+def varint_encode(offset):
+    '''In this encoding system, for each 8-bit byte, the first bit is 1 if there are more bytes, and 0 is this is the last one.
+    The next 7 bits are data. These 7-bit sections represent the data in Little endian order. For example, suppose the data is
+    aaaaaaabbbbbbbccccccc (each of these sections is 7 bits). It will be encoded as:
+    1ccccccc 1bbbbbbb 0aaaaaaa
+    
+    This encoding is used in youtube parameters to encode offsets and to encode the length for length-prefixed data.
+    See https://developers.google.com/protocol-buffers/docs/encoding#varints for more info.'''
+    needed_bytes = ceil(offset.bit_length()/7) or 1 # (0).bit_length() returns 0, but we need 1 in that case.
+    encoded_bytes = bytearray(needed_bytes)
+    for i in range(0, needed_bytes - 1):
+        encoded_bytes[i] = (offset & 127) | 128  # 7 least significant bits
+        offset = offset >> 7
+    encoded_bytes[-1] = offset & 127 # leave first bit as zero for last byte
+    
+    return bytes(encoded_bytes)
+
+    
+def varint_decode(encoded):
+    decoded = 0
+    for i, byte in enumerate(encoded):
+        decoded |= (byte & 127) << 7*i
+        
+        if not (byte & 128):
+            break
+    return decoded
+
+    
+def string(field_number, data):
+    data = as_bytes(data)
+    return _proto_field(2, field_number, varint_encode(len(data)) + data)
+nested = string
+
+def uint(field_number, value):
+    return _proto_field(0, field_number, varint_encode(value))
+    
+
+    
+    
+def _proto_field(wire_type, field_number, data):
+    ''' See https://developers.google.com/protocol-buffers/docs/encoding#structure '''
+    return varint_encode( (field_number << 3) | wire_type) + data
+
+
+    
+def percent_b64encode(data):
+    return base64.urlsafe_b64encode(data).replace(b'=', b'%3D')
+    
+    
+def unpadded_b64encode(data):
+    return base64.urlsafe_b64encode(data).replace(b'=', b'')
+
+def as_bytes(value):
+    if isinstance(value, str):
+        return value.encode('utf-8')
+    return value
+
+
+def read_varint(data):
+    result = 0
+    i = 0
+    while True:
+        try:
+            byte = data.read(1)[0]
+        except IndexError:
+            if i == 0:
+                raise EOFError()
+            raise Exception('Unterminated varint starting at ' + str(data.tell() - i))
+        result |= (byte & 127) << 7*i
+        if not byte & 128:
+            break
+
+        i += 1
+    return result
+
+                                
+def read_group(data, end_sequence):
+    start = data.tell()
+    index = data.original.find(end_sequence, start)
+    if index == -1:
+        raise Exception('Unterminated group')
+    data.seek(index + len(end_sequence))
+    return data.original[start:index]
+
+def read_protobuf(data):
+    data_original = data
+    data = io.BytesIO(data)
+    data.original = data_original
+    while True:
+        try:
+            tag = read_varint(data)
+        except EOFError:
+            break
+        wire_type = tag & 7
+        field_number = tag >> 3
+        
+        if wire_type == 0:
+            value = read_varint(data)
+        elif wire_type == 1:
+            value = data.read(8)
+        elif wire_type == 2:
+            length = read_varint(data)
+            value = data.read(length)
+        elif wire_type == 3:
+            end_bytes = encode_varint((field_number << 3) | 4)
+            value = read_group(data, end_bytes)
+        elif wire_type == 5:
+            value = data.read(4)
+        else:
+            raise Exception("Unknown wire type: " + str(wire_type) + ", Tag: " + bytes_to_hex(succinct_encode(tag)) + ", at position " + str(data.tell()))
+        yield (wire_type, field_number, value)
+
+def parse(data):
+    return {field_number: value for _, field_number, value in read_protobuf(data)}
+
+def b64_to_bytes(data):
+    if isinstance(data, bytes):
+        data = data.decode('ascii')
+    data = data.replace("%3D", "=")
+    return base64.urlsafe_b64decode(data + "="*((4 - len(data)%4)%4) )
+
--- a/youtube/search.py
+++ b/youtube/search.py
@ -0,0 +1,105 @@
+import base64
+import json
+import urllib
+
+import flask
+from flask import request
+from werkzeug.exceptions import abort
+
+from youtube import util, yt_data_extract, proto
+from youtube import yt_app
+
+# Sort: 1
+    # Upload date: 2
+    # View count: 3
+    # Rating: 1
+    # Relevance: 0
+# Offset: 9
+# Filters: 2
+    # Upload date: 1
+    # Type: 2
+    # Duration: 3
+
+
+features = {
+    '4k': 14,
+    'hd': 4,
+    'hdr': 25,
+    'subtitles': 5,
+    'creative_commons': 6,
+    '3d': 7,
+    'live': 8,
+    'purchased': 9,
+    '360': 15,
+    'location': 23,
+}
+
+def page_number_to_sp_parameter(page, autocorrect, sort, filters):
+    offset = (int(page) - 1)*20    # 20 results per page
+    autocorrect = proto.nested(8, proto.uint(1, 1 - int(autocorrect) ))
+    filters_enc = proto.nested(2, proto.uint(1, filters['time']) + proto.uint(2, filters['type']) + proto.uint(3, filters['duration']))
+    result = proto.uint(1, sort) + filters_enc + autocorrect + proto.uint(9, offset) + proto.string(61, b'')
+    return base64.urlsafe_b64encode(result).decode('ascii')
+
+def get_search_json(query, page, autocorrect, sort, filters):
+    url = "https://www.youtube.com/results?search_query=" + urllib.parse.quote_plus(query)
+    headers = {
+        'Host': 'www.youtube.com',
+        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64)',
+        'Accept': '*/*',
+        'Accept-Language': 'en-US,en;q=0.5',
+        'X-YouTube-Client-Name': '1',
+        'X-YouTube-Client-Version': '2.20180418',
+    }
+    url += "&pbj=1&sp=" + page_number_to_sp_parameter(page, autocorrect, sort, filters).replace("=", "%3D")
+    content = util.fetch_url(url, headers=headers, report_text="Got search results", debug_name='search_results')
+    info = json.loads(content)
+    return info
+
+
+@yt_app.route('/search')
+def get_search_page():
+    if len(request.args) == 0:
+        return flask.render_template('base.html', title="Search")
+
+    if 'query' not in request.args:
+        abort(400)
+
+    query = request.args.get("query")
+    page = request.args.get("page", "1")
+    autocorrect = int(request.args.get("autocorrect", "1"))
+    sort = int(request.args.get("sort", "0"))
+    filters = {}
+    filters['time'] = int(request.args.get("time", "0"))
+    filters['type'] = int(request.args.get("type", "0"))
+    filters['duration'] = int(request.args.get("duration", "0"))
+    polymer_json = get_search_json(query, page, autocorrect, sort, filters)
+
+    search_info = yt_data_extract.extract_search_info(polymer_json)
+    if search_info['error']:
+        return flask.render_template('error.html', error_message = search_info['error'])
+
+    for extract_item_info in search_info['items']:
+        util.prefix_urls(extract_item_info)
+        util.add_extra_html_info(extract_item_info)
+
+    corrections = search_info['corrections']
+    if corrections['type'] == 'did_you_mean':
+        corrected_query_string = request.args.to_dict(flat=False)
+        corrected_query_string['query'] = [corrections['corrected_query']]
+        corrections['corrected_query_url'] = util.URL_ORIGIN + '/search?' + urllib.parse.urlencode(corrected_query_string, doseq=True)
+    elif corrections['type'] == 'showing_results_for':
+        no_autocorrect_query_string = request.args.to_dict(flat=False)
+        no_autocorrect_query_string['autocorrect'] = ['0']
+        no_autocorrect_query_url = util.URL_ORIGIN + '/search?' + urllib.parse.urlencode(no_autocorrect_query_string, doseq=True)
+        corrections['original_query_url'] = no_autocorrect_query_url
+
+    return flask.render_template('search.html',
+        header_playlist_names = local_playlist.get_playlist_names(),
+        query = query,
+        estimated_results = search_info['estimated_results'],
+        estimated_pages = search_info['estimated_pages'],
+        corrections = search_info['corrections'],
+        results = search_info['items'],
+        parameters_dictionary = request.args,
+    )
--- a/youtube/util.py
+++ b/youtube/util.py
@ -0,0 +1,397 @@
+import gzip
+
+from youtube import yt_data_extract
+
+try:
+    import brotli
+    have_brotli = True
+except ImportError:
+    have_brotli = False
+import urllib.parse
+import re
+import time
+import os
+import json
+import gevent
+import gevent.queue
+import gevent.lock
+
+# The trouble with the requests library: It ships its own certificate bundle via certifi
+#  instead of using the system certificate store, meaning self-signed certificates
+#  configured by the user will not work. Some draconian networks block TLS unless a corporate
+#  certificate is installed on the system. Additionally, some users install a self signed cert
+#  in order to use programs to modify or monitor requests made by programs on the system.
+
+# Finally, certificates expire and need to be updated, or are sometimes revoked. Sometimes
+#  certificate authorites go rogue and need to be untrusted. Since we are going through Tor exit nodes,
+#  this becomes all the more important. A rogue CA could issue a fake certificate for accounts.google.com, and a
+#  malicious exit node could use this to decrypt traffic when logging in and retrieve passwords. Examples:
+#   https://www.engadget.com/2015/10/29/google-warns-symantec-over-certificates/
+#   https://nakedsecurity.sophos.com/2013/12/09/serious-security-google-finds-fake-but-trusted-ssl-certificates-for-its-domains-made-in-france/
+
+# In the requests documentation it says:
+#    "Before version 2.16, Requests bundled a set of root CAs that it trusted, sourced from the Mozilla trust store.
+#     The certificates were only updated once for each Requests version. When certifi was not installed,
+#     this led to extremely out-of-date certificate bundles when using significantly older versions of Requests.
+#     For the sake of security we recommend upgrading certifi frequently!"
+#   (http://docs.python-requests.org/en/master/user/advanced/#ca-certificates)
+
+# Expecting users to remember to manually update certifi on Linux isn't reasonable in my view.
+#  On windows, this is even worse since I am distributing all dependencies. This program is not
+#  updated frequently, and using requests would lead to outdated certificates. Certificates
+#  should be updated with OS updates, instead of thousands of developers of different programs
+#  being expected to do this correctly 100% of the time.
+
+# There is hope that this might be fixed eventually:
+#   https://github.com/kennethreitz/requests/issues/2966
+
+# Until then, I will use a mix of urllib3 and urllib.
+import urllib3
+import urllib3.contrib.socks
+
+URL_ORIGIN = "/https://www.youtube.com"
+
+connection_pool = urllib3.PoolManager(cert_reqs = 'CERT_REQUIRED')
+
+def get_pool(use_tor):
+    return connection_pool
+
+class HTTPAsymmetricCookieProcessor(urllib.request.BaseHandler):
+    '''Separate cookiejars for receiving and sending'''
+    def __init__(self, cookiejar_send=None, cookiejar_receive=None):
+        self.cookiejar_send = cookiejar_send
+        self.cookiejar_receive = cookiejar_receive
+
+    def http_request(self, request):
+        if self.cookiejar_send is not None:
+            self.cookiejar_send.add_cookie_header(request)
+        return request
+
+    def http_response(self, request, response):
+        if self.cookiejar_receive is not None:
+            self.cookiejar_receive.extract_cookies(response, request)
+        return response
+
+    https_request = http_request
+    https_response = http_response
+
+class FetchError(Exception):
+    def __init__(self, code, reason='', ip=None):
+        Exception.__init__(self, 'HTTP error during request: ' + code + ' ' + reason)
+        self.code = code
+        self.reason = reason
+        self.ip = ip
+
+def decode_content(content, encoding_header):
+    encodings = encoding_header.replace(' ', '').split(',')
+    for encoding in reversed(encodings):
+        if encoding == 'identity':
+            continue
+        if encoding == 'br':
+            content = brotli.decompress(content)
+        elif encoding == 'gzip':
+            content = gzip.decompress(content)
+    return content
+
+def fetch_url_response(url, headers=(), timeout=15, data=None,
+                       cookiejar_send=None, cookiejar_receive=None,
+                       use_tor=True, max_redirects=None):
+    '''
+    returns response, cleanup_function
+    When cookiejar_send is set to a CookieJar object,
+     those cookies will be sent in the request (but cookies in response will not be merged into it)
+    When cookiejar_receive is set to a CookieJar object,
+     cookies received in the response will be merged into the object (nothing will be sent from it)
+    When both are set to the same object, cookies will be sent from the object,
+     and response cookies will be merged into it.
+    '''
+    headers = dict(headers)     # Note: Calling dict() on a dict will make a copy
+    if have_brotli:
+        headers['Accept-Encoding'] = 'gzip, br'
+    else:
+        headers['Accept-Encoding'] = 'gzip'
+
+    # prevent python version being leaked by urllib if User-Agent isn't provided
+    #  (urllib will use ex. Python-urllib/3.6 otherwise)
+    if 'User-Agent' not in headers and 'user-agent' not in headers and 'User-agent' not in headers:
+        headers['User-Agent'] = 'Python-urllib'
+
+    method = "GET"
+    if data is not None:
+        method = "POST"
+        if isinstance(data, str):
+            data = data.encode('ascii')
+        elif not isinstance(data, bytes):
+            data = urllib.parse.urlencode(data).encode('ascii')
+
+    if cookiejar_send is not None or cookiejar_receive is not None:     # Use urllib
+        req = urllib.request.Request(url, data=data, headers=headers)
+
+        cookie_processor = HTTPAsymmetricCookieProcessor(cookiejar_send=cookiejar_send, cookiejar_receive=cookiejar_receive)
+        opener = urllib.request.build_opener(cookie_processor)
+
+        response = opener.open(req, timeout=timeout)
+        cleanup_func = (lambda r: None)
+
+    else:           # Use a urllib3 pool. Cookies can't be used since urllib3 doesn't have easy support for them.
+        # default: Retry.DEFAULT = Retry(3)
+        # (in connectionpool.py in urllib3)
+        # According to the documentation for urlopen, a redirect counts as a
+        # retry. So there are 3 redirects max by default.
+        if max_redirects:
+            retries = urllib3.Retry(3+max_redirects, redirect=max_redirects)
+        else:
+            retries = urllib3.Retry(3)
+        pool = get_pool(use_tor)
+        response = pool.request(method, url, headers=headers,
+                                timeout=timeout, preload_content=False,
+                                decode_content=False, retries=retries)
+        cleanup_func = (lambda r: r.release_conn())
+
+    return response, cleanup_func
+
+def fetch_url(url, headers=(), timeout=15, report_text=None, data=None,
+              cookiejar_send=None, cookiejar_receive=None, use_tor=True,
+              debug_name=None):
+    start_time = time.time()
+
+    response, cleanup_func = fetch_url_response(
+        url, headers, timeout=timeout,
+        cookiejar_send=cookiejar_send, cookiejar_receive=cookiejar_receive,
+        use_tor=use_tor)
+    response_time = time.time()
+
+    content = response.read()
+    read_finish = time.time()
+
+    cleanup_func(response)  # release_connection for urllib3
+
+    if (response.status == 429
+            and content.startswith(b'<!DOCTYPE')
+            and b'Our systems have detected unusual traffic' in content):
+        ip = re.search(br'IP address: ((?:[\da-f]*:)+[\da-f]+|(?:\d+\.)+\d+)',
+            content)
+        ip = ip.group(1).decode('ascii') if ip else None
+        raise FetchError('429', reason=response.reason, ip=ip)
+
+    elif response.status >= 400:
+        raise FetchError(str(response.status), reason=response.reason, ip=None)
+
+    if report_text:
+        print(report_text, '    Latency:', round(response_time - start_time,3), '    Read time:', round(read_finish - response_time,3))
+    content = decode_content(content, response.getheader('Content-Encoding', default='identity'))
+    return content
+
+def head(url, use_tor=False, report_text=None, max_redirects=10):
+    pool = get_pool(use_tor)
+    start_time = time.time()
+
+    # default: Retry.DEFAULT = Retry(3)
+    # (in connectionpool.py in urllib3)
+    # According to the documentation for urlopen, a redirect counts as a retry
+    # So there are 3 redirects max by default. Let's change that
+    # to 10 since googlevideo redirects a lot.
+    retries = urllib3.Retry(3+max_redirects, redirect=max_redirects,
+        raise_on_redirect=False)
+    headers = {'User-Agent': 'Python-urllib'}
+    response = pool.request('HEAD', url, headers=headers, retries=retries)
+    if report_text:
+        print(report_text, '    Latency:', round(time.time() - start_time,3))
+    return response
+
+mobile_user_agent = 'Mozilla/5.0 (Linux; Android 7.0; Redmi Note 4 Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Mobile Safari/537.36'
+mobile_ua = (('User-Agent', mobile_user_agent),)
+desktop_user_agent = 'Mozilla/5.0 (Windows NT 6.1; rv:52.0) Gecko/20100101 Firefox/52.0'
+desktop_ua = (('User-Agent', desktop_user_agent),)
+
+
+
+
+
+
+class RateLimitedQueue(gevent.queue.Queue):
+    ''' Does initial_burst (def. 30) at first, then alternates between waiting waiting_period (def. 5) seconds and doing subsequent_bursts (def. 10) queries. After 5 seconds with nothing left in the queue, resets rate limiting. '''
+
+    def __init__(self, initial_burst=30, waiting_period=5, subsequent_bursts=10):
+        self.initial_burst = initial_burst
+        self.waiting_period = waiting_period
+        self.subsequent_bursts = subsequent_bursts
+
+        self.count_since_last_wait = 0
+        self.surpassed_initial = False
+
+        self.lock = gevent.lock.BoundedSemaphore(1)
+        self.currently_empty = False
+        self.empty_start = 0
+        gevent.queue.Queue.__init__(self)
+
+
+    def get(self):
+        self.lock.acquire()     # blocks if another greenlet currently has the lock
+        if self.count_since_last_wait >= self.subsequent_bursts and self.surpassed_initial:
+            gevent.sleep(self.waiting_period)
+            self.count_since_last_wait = 0
+
+        elif self.count_since_last_wait >= self.initial_burst and not self.surpassed_initial:
+            self.surpassed_initial = True
+            gevent.sleep(self.waiting_period)
+            self.count_since_last_wait = 0
+
+        self.count_since_last_wait += 1
+
+        if not self.currently_empty and self.empty():
+            self.currently_empty = True
+            self.empty_start = time.monotonic()
+
+        item = gevent.queue.Queue.get(self)     # blocks when nothing left
+
+        if self.currently_empty:
+            if time.monotonic() - self.empty_start >= self.waiting_period:
+                self.count_since_last_wait = 0
+                self.surpassed_initial = False
+
+            self.currently_empty = False
+
+        self.lock.release()
+
+        return item
+
+
+
+def download_thumbnail(save_directory, video_id):
+    url = "https://i.ytimg.com/vi/" + video_id + "/mqdefault.jpg"
+    save_location = os.path.join(save_directory, video_id + ".jpg")
+    try:
+        thumbnail = fetch_url(url, report_text="Saved thumbnail: " + video_id)
+    except urllib.error.HTTPError as e:
+        print("Failed to download thumbnail for " + video_id + ": " + str(e))
+        return False
+    try:
+        f = open(save_location, 'wb')
+    except FileNotFoundError:
+        os.makedirs(save_directory, exist_ok = True)
+        f = open(save_location, 'wb')
+    f.write(thumbnail)
+    f.close()
+    return True
+
+def download_thumbnails(save_directory, ids):
+    if not isinstance(ids, (list, tuple)):
+        ids = list(ids)
+    # only do 5 at a time
+    # do the n where n is divisible by 5
+    i = -1
+    for i in range(0, int(len(ids)/5) - 1 ):
+        gevent.joinall([gevent.spawn(download_thumbnail, save_directory, ids[j]) for j in range(i*5, i*5 + 5)])
+    # do the remainders (< 5)
+    gevent.joinall([gevent.spawn(download_thumbnail, save_directory, ids[j]) for j in range(i*5 + 5, len(ids))])
+
+
+
+
+
+
+def dict_add(*dicts):
+    for dictionary in dicts[1:]:
+        dicts[0].update(dictionary)
+    return dicts[0]
+
+def video_id(url):
+    url_parts = urllib.parse.urlparse(url)
+    return urllib.parse.parse_qs(url_parts.query)['v'][0]
+
+
+# default, sddefault, mqdefault, hqdefault, hq720
+def get_thumbnail_url(video_id):
+    return "/i.ytimg.com/vi/" + video_id + "/mqdefault.jpg"
+    
+def seconds_to_timestamp(seconds):
+    seconds = int(seconds)
+    hours, seconds = divmod(seconds,3600)
+    minutes, seconds = divmod(seconds,60)
+    if hours != 0:
+        timestamp = str(hours) + ":"
+        timestamp += str(minutes).zfill(2)  # zfill pads with zeros
+    else:
+        timestamp = str(minutes)
+
+    timestamp += ":" + str(seconds).zfill(2)
+    return timestamp
+
+
+
+def update_query_string(query_string, items):
+    parameters = urllib.parse.parse_qs(query_string)
+    parameters.update(items)
+    return urllib.parse.urlencode(parameters, doseq=True)
+
+
+
+def uppercase_escape(s):
+     return re.sub(
+         r'\\U([0-9a-fA-F]{8})',
+         lambda m: chr(int(m.group(1), base=16)), s)
+
+def prefix_url(url):
+    if url is None:
+        return None
+    url = url.lstrip('/')     # some urls have // before them, which has a special meaning
+    return '/' + url
+
+def left_remove(string, substring):
+    '''removes substring from the start of string, if present'''
+    if string.startswith(substring):
+        return string[len(substring):]
+    return string
+
+def concat_or_none(*strings):
+    '''Concatenates strings. Returns None if any of the arguments are None'''
+    result = ''
+    for string in strings:
+        if string is None:
+            return None
+        result += string
+    return result
+
+
+def prefix_urls(item):
+    try:
+        item['thumbnail'] = prefix_url(item['thumbnail'])
+    except KeyError:
+        pass
+
+    try:
+        item['author_url'] = prefix_url(item['author_url'])
+    except KeyError:
+        pass
+
+def add_extra_html_info(item):
+    if item['type'] == 'video':
+        item['url'] = (URL_ORIGIN + '/watch?v=' + item['id']) if item.get('id') else None
+
+        video_info = {}
+        for key in ('id', 'title', 'author', 'duration'):
+            try:
+                video_info[key] = item[key]
+            except KeyError:
+                video_info[key] = ''
+
+        item['video_info'] = json.dumps(video_info)
+
+    elif item['type'] == 'playlist':
+        item['url'] = (URL_ORIGIN + '/playlist?list=' + item['id']) if item.get('id') else None
+    elif item['type'] == 'channel':
+        item['url'] = (URL_ORIGIN + "/channel/" + item['id']) if item.get('id') else None
+
+def parse_info_prepare_for_html(renderer, additional_info={}):
+    item = yt_data_extract.extract_item_info(renderer, additional_info)
+    prefix_urls(item)
+    add_extra_html_info(item)
+
+    return item
+
+def check_gevent_exceptions(*tasks):
+    for task in tasks:
+        if task.exception:
+            raise task.exception
+
--- a/youtube/utils.py
+++ b/youtube/utils.py
@ -0,0 +1,61 @@
+import urllib
+from flask import Markup
+import bleach
+def get_description_snippet_text(ds):
+    string = ""
+    for t in ds:
+        try:
+            if t['bold']:
+                text = "<b>"+t['text']+"</b>"
+            else:
+                text = t['text']
+        except:
+            text = t['text']
+        string = string + text
+    return string
+
+
+def concat_texts(strings):
+    '''Concatenates strings. Returns None if any of the arguments are None'''
+    result = ''
+    for string in strings:
+        if string['text'] is None:
+            return None
+        result += string['text']
+    return result
+
+
+def parse_comment(raw_comment):
+    cmnt = {}
+    imgHostName = urllib.parse.urlparse(raw_comment['author_avatar'][1:]).netloc
+    cmnt['author'] = raw_comment['author']
+    cmnt['thumbnail'] = raw_comment['author_avatar'].replace("https://{}".format(imgHostName),"")[1:] + "?host=" + imgHostName
+
+    print(cmnt['thumbnail'])
+    cmnt['channel'] = raw_comment['author_url']
+    cmnt['text'] = Markup(bleach.linkify(concat_texts(raw_comment['text']).replace("\n", "<br>")))
+    cmnt['date'] = raw_comment['time_published']
+
+    try:
+        cmnt['creatorHeart'] = raw_comment['creatorHeart']['creatorHeartRenderer']['creatorThumbnail']['thumbnails'][0][
+            'url']
+    except:
+        cmnt['creatorHeart'] = False
+
+    try:
+        cmnt['likes'] = raw_comment['like_count']
+    except:
+        cmnt['likes'] = 0
+
+    try:
+        cmnt['replies'] = raw_comment['reply_count']
+    except:
+        cmnt['replies'] = 0
+    return cmnt
+
+
+def post_process_comments_info(comments_info):
+    comments = []
+    for comment in comments_info['comments']:
+        comments.append(parse_comment(comment))
+    return comments
--- a/youtube/watch.py
+++ b/youtube/watch.py
@ -0,0 +1,246 @@
+import json
+import math
+import traceback
+import urllib
+
+from youtube import util, yt_data_extract
+
+
+def get_video_sources(info, tor_bypass=False):
+    video_sources = []
+    max_resolution = "720"
+    for fmt in info['formats']:
+        if not all(fmt[attr] for attr in ('quality', 'width', 'ext', 'url')):
+            continue
+        if fmt['acodec'] and fmt['vcodec'] and fmt['height'] <= max_resolution:
+            video_sources.append({
+                'src': fmt['url'],
+                'type': 'video/' + fmt['ext'],
+                'quality': fmt['quality'],
+                'height': fmt['height'],
+                'width': fmt['width'],
+            })
+
+    #### order the videos sources so the preferred resolution is first ###
+
+    video_sources.sort(key=lambda source: source['quality'], reverse=True)
+
+    return video_sources
+
+def make_caption_src(info, lang, auto=False, trans_lang=None):
+    label = lang
+    if auto:
+        label += ' (Automatic)'
+    if trans_lang:
+        label += ' -> ' + trans_lang
+    return {
+        'url': '/' + yt_data_extract.get_caption_url(info, lang, 'vtt', auto, trans_lang),
+        'label': label,
+        'srclang': trans_lang[0:2] if trans_lang else lang[0:2],
+        'on': False,
+    }
+
+def lang_in(lang, sequence):
+    '''Tests if the language is in sequence, with e.g. en and en-US considered the same'''
+    if lang is None:
+        return False
+    lang = lang[0:2]
+    return lang in (l[0:2] for l in sequence)
+
+def lang_eq(lang1, lang2):
+    '''Tests if two iso 639-1 codes are equal, with en and en-US considered the same.
+       Just because the codes are equal does not mean the dialects are mutually intelligible, but this will have to do for now without a complex language model'''
+    if lang1 is None or lang2 is None:
+        return False
+    return lang1[0:2] == lang2[0:2]
+
+def equiv_lang_in(lang, sequence):
+    '''Extracts a language in sequence which is equivalent to lang.
+    e.g. if lang is en, extracts en-GB from sequence.
+    Necessary because if only a specific variant like en-GB is available, can't ask Youtube for simply en. Need to get the available variant.'''
+    lang = lang[0:2]
+    for l in sequence:
+        if l[0:2] == lang:
+            return l
+    return None
+
+def get_subtitle_sources(info):
+    '''Returns these sources, ordered from least to most intelligible:
+    native_video_lang (Automatic)
+    foreign_langs (Manual)
+    native_video_lang (Automatic) -> pref_lang
+    foreign_langs (Manual) -> pref_lang
+    native_video_lang (Manual) -> pref_lang
+    pref_lang (Automatic)
+    pref_lang (Manual)'''
+    sources = []
+    pref_lang = 'en'
+    native_video_lang = None
+    if info['automatic_caption_languages']:
+        native_video_lang = info['automatic_caption_languages'][0]
+
+    highest_fidelity_is_manual = False
+
+    # Sources are added in very specific order outlined above
+    # More intelligible sources are put further down to avoid browser bug when there are too many languages
+    # (in firefox, it is impossible to select a language near the top of the list because it is cut off)
+
+    # native_video_lang (Automatic)
+    if native_video_lang and not lang_eq(native_video_lang, pref_lang):
+        sources.append(make_caption_src(info, native_video_lang, auto=True))
+
+    # foreign_langs (Manual)
+    for lang in info['manual_caption_languages']:
+        if not lang_eq(lang, pref_lang):
+            sources.append(make_caption_src(info, lang))
+
+    if (lang_in(pref_lang, info['translation_languages'])
+            and not lang_in(pref_lang, info['automatic_caption_languages'])
+            and not lang_in(pref_lang, info['manual_caption_languages'])):
+        # native_video_lang (Automatic) -> pref_lang
+        if native_video_lang and not lang_eq(pref_lang, native_video_lang):
+            sources.append(make_caption_src(info, native_video_lang, auto=True, trans_lang=pref_lang))
+
+        # foreign_langs (Manual) -> pref_lang
+        for lang in info['manual_caption_languages']:
+            if not lang_eq(lang, native_video_lang) and not lang_eq(lang, pref_lang):
+                sources.append(make_caption_src(info, lang, trans_lang=pref_lang))
+
+        # native_video_lang (Manual) -> pref_lang
+        if lang_in(native_video_lang, info['manual_caption_languages']):
+            sources.append(make_caption_src(info, native_video_lang, trans_lang=pref_lang))
+
+    # pref_lang (Automatic)
+    if lang_in(pref_lang, info['automatic_caption_languages']):
+        sources.append(make_caption_src(info, equiv_lang_in(pref_lang, info['automatic_caption_languages']), auto=True))
+
+    # pref_lang (Manual)
+    if lang_in(pref_lang, info['manual_caption_languages']):
+        sources.append(make_caption_src(info, equiv_lang_in(pref_lang, info['manual_caption_languages'])))
+        highest_fidelity_is_manual = True
+    if len(sources) == 0:
+        assert len(info['automatic_caption_languages']) == 0 and len(info['manual_caption_languages']) == 0
+
+    return sources
+
+
+def get_ordered_music_list_attributes(music_list):
+    # get the set of attributes which are used by atleast 1 track
+    # so there isn't an empty, extraneous album column which no tracks use, for example
+    used_attributes = set()
+    for track in music_list:
+        used_attributes = used_attributes | track.keys()
+
+    # now put them in the right order
+    ordered_attributes = []
+    for attribute in ('Artist', 'Title', 'Album'):
+        if attribute.lower() in used_attributes:
+            ordered_attributes.append(attribute)
+
+    return ordered_attributes
+
+headers = (
+    ('Accept', '*/*'),
+    ('Accept-Language', 'en-US,en;q=0.5'),
+    ('X-YouTube-Client-Name', '2'),
+    ('X-YouTube-Client-Version', '2.20180830'),
+) + util.mobile_ua
+def extract_info(video_id, use_invidious, playlist_id=None, index=None):
+    # bpctr=9999999999 will bypass are-you-sure dialogs for controversial
+    # videos
+    url = 'https://m.youtube.com/watch?v=' + video_id + '&pbj=1&bpctr=9999999999'
+    if playlist_id:
+        url += '&list=' + playlist_id
+    if index:
+        url += '&index=' + index
+    polymer_json = util.fetch_url(url, headers=headers, debug_name='watch')
+    polymer_json = polymer_json.decode('utf-8')
+    # TODO: Decide whether this should be done in yt_data_extract.extract_watch_info
+    try:
+        polymer_json = json.loads(polymer_json)
+    except json.decoder.JSONDecodeError:
+        traceback.print_exc()
+        return {'error': 'Failed to parse json response'}
+    info = yt_data_extract.extract_watch_info(polymer_json)
+
+    # age restriction bypass
+    if info['age_restricted']:
+        print('Fetching age restriction bypass page')
+        data = {
+            'video_id': video_id,
+            'eurl': 'https://youtube.googleapis.com/v/' + video_id,
+        }
+        url = 'https://www.youtube.com/get_video_info?' + urllib.parse.urlencode(data)
+        video_info_page = util.fetch_url(url, debug_name='get_video_info', report_text='Fetched age restriction bypass page').decode('utf-8')
+        yt_data_extract.update_with_age_restricted_info(info, video_info_page)
+    # check if urls ready (non-live format) in former livestream
+    # urls not ready if all of them have no filesize
+    if info['was_live']:
+        info['urls_ready'] = False
+        for fmt in info['formats']:
+            if fmt['file_size'] is not None:
+                info['urls_ready'] = True
+    else:
+        info['urls_ready'] = True
+
+    # livestream urls
+    # sometimes only the livestream urls work soon after the livestream is over
+    if (info['hls_manifest_url']
+        and (info['live'] or not info['formats'] or not info['urls_ready'])
+    ):
+        manifest = util.fetch_url(info['hls_manifest_url'],
+            debug_name='hls_manifest.m3u8',
+            report_text='Fetched hls manifest'
+        ).decode('utf-8')
+
+        info['hls_formats'], err = yt_data_extract.extract_hls_formats(manifest)
+        if not err:
+            info['playability_error'] = None
+        for fmt in info['hls_formats']:
+            fmt['video_quality'] = video_quality_string(fmt)
+    else:
+        info['hls_formats'] = []
+
+    # check for 403. Unnecessary for tor video routing b/c ip address is same
+    info['invidious_used'] = False
+    info['invidious_reload_button'] = False
+    info['tor_bypass_used'] = False
+    return info
+
+def video_quality_string(format):
+    if format['vcodec']:
+        result =str(format['width'] or '?') + 'x' + str(format['height'] or '?')
+        if format['fps']:
+            result += ' ' + str(format['fps']) + 'fps'
+        return result
+    elif format['acodec']:
+        return 'audio only'
+
+    return '?'
+
+def audio_quality_string(format):
+    if format['acodec']:
+        result = str(format['audio_bitrate'] or '?') + 'k'
+        if format['audio_sample_rate']:
+            result += ' ' + str(format['audio_sample_rate']) + ' Hz'
+        return result
+    elif format['vcodec']:
+        return 'video only'
+
+    return '?'
+
+# from https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/utils.py
+def format_bytes(bytes):
+    if bytes is None:
+        return 'N/A'
+    if type(bytes) is str:
+        bytes = float(bytes)
+    if bytes == 0.0:
+        exponent = 0
+    else:
+        exponent = int(math.log(bytes, 1024.0))
+    suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
+    converted = float(bytes) / float(1024 ** exponent)
+    return '%.2f%s' % (converted, suffix)
+
+
--- a/youtube/yt_data_extract/init.py
+++ b/youtube/yt_data_extract/init.py
@ -0,0 +1,12 @@
+from .common import (get, multi_get, deep_get, multi_deep_get,
+    liberal_update, conservative_update, remove_redirect, normalize_url,
+    extract_str, extract_formatted_text, extract_int, extract_approx_int,
+    extract_date, extract_item_info, extract_items, extract_response)
+
+from .everything_else import (extract_channel_info, extract_search_info,
+    extract_playlist_metadata, extract_playlist_info, extract_comments_info)
+
+from .watch_extraction import (extract_watch_info, get_caption_url,
+    update_with_age_restricted_info, requires_decryption,
+    extract_decryption_function, decrypt_signatures, _formats,
+    update_format_with_type_info, extract_hls_formats)
--- a/youtube/yt_data_extract/common.py
+++ b/youtube/yt_data_extract/common.py
@ -0,0 +1,470 @@
+import re
+import urllib.parse
+import collections
+
+def get(object, key, default=None, types=()):
+    '''Like dict.get(), but returns default if the result doesn't match one of the types.
+       Also works for indexing lists.'''
+    try:
+        result = object[key]
+    except (TypeError, IndexError, KeyError):
+        return default
+
+    if not types or isinstance(result, types):
+        return result
+    else:
+        return default
+
+def multi_get(object, *keys, default=None, types=()):
+    '''Like get, but try other keys if the first fails'''
+    for key in keys:
+        try:
+            result = object[key]
+        except (TypeError, IndexError, KeyError):
+            pass
+        else:
+            if not types or isinstance(result, types):
+                return result
+            else:
+                continue
+    return default
+
+
+def deep_get(object, *keys, default=None, types=()):
+    '''Like dict.get(), but for nested dictionaries/sequences, supporting keys or indices.
+       Last argument is the default value to use in case of any IndexErrors or KeyErrors.
+       If types is given and the result doesn't match one of those types, default is returned'''
+    try:
+        for key in keys:
+            object = object[key]
+    except (TypeError, IndexError, KeyError):
+        return default
+    else:
+        if not types or isinstance(object, types):
+            return object
+        else:
+            return default
+
+def multi_deep_get(object, *key_sequences, default=None, types=()):
+    '''Like deep_get, but can try different key sequences in case one fails.
+       Return default if all of them fail. key_sequences is a list of lists'''
+    for key_sequence in key_sequences:
+        _object = object
+        try:
+            for key in key_sequence:
+                _object = _object[key]
+        except (TypeError, IndexError, KeyError):
+            pass
+        else:
+            if not types or isinstance(_object, types):
+                return _object
+            else:
+                continue
+    return default
+
+def liberal_update(obj, key, value):
+    '''Updates obj[key] with value as long as value is not None.
+    Ensures obj[key] will at least get a value of None, however'''
+    if (value is not None) or (key not in obj):
+        obj[key] = value
+
+def conservative_update(obj, key, value):
+    '''Only updates obj if it doesn't have key or obj[key] is None'''
+    if obj.get(key) is None:
+        obj[key] = value
+
+def concat_or_none(*strings):
+    '''Concatenates strings. Returns None if any of the arguments are None'''
+    result = ''
+    for string in strings:
+        if string is None:
+            return None
+        result += string
+    return result
+
+def remove_redirect(url):
+    if url is None:
+        return None
+    if re.fullmatch(r'(((https?:)?//)?(www.)?youtube.com)?/redirect\?.*', url) is not None: # youtube puts these on external links to do tracking
+        query_string = url[url.find('?')+1: ]
+        return urllib.parse.parse_qs(query_string)['q'][0]
+    return url
+
+youtube_url_re = re.compile(r'^(?:(?:(?:https?:)?//)?(?:www\.)?youtube\.com)?(/.*)$')
+def normalize_url(url):
+    if url is None:
+        return None
+    match = youtube_url_re.fullmatch(url)
+    if match is None:
+        raise Exception()
+
+    return 'https://www.youtube.com' + match.group(1)
+
+def _recover_urls(runs):
+    for run in runs:
+        url = deep_get(run, 'navigationEndpoint', 'urlEndpoint', 'url')
+        text = run.get('text', '')
+        # second condition is necessary because youtube makes other things into urls, such as hashtags, which we want to keep as text
+        if url is not None and (text.startswith('http://') or text.startswith('https://')):
+            url = remove_redirect(url)
+            run['url'] = url
+            run['text'] = url # youtube truncates the url text, use actual url instead
+
+def extract_str(node, default=None, recover_urls=False):
+    '''default is the value returned if the extraction fails. If recover_urls is true, will attempt to fix Youtube's truncation of url text (most prominently seen in descriptions)'''
+    if isinstance(node, str):
+        return node
+
+    try:
+        return node['simpleText']
+    except (KeyError, TypeError):
+        pass
+
+    if isinstance(node, dict) and 'runs' in node:
+        if recover_urls:
+            _recover_urls(node['runs'])
+        return ''.join(text_run.get('text', '') for text_run in node['runs'])
+
+    return default
+
+def extract_formatted_text(node):
+    if not node:
+        return []
+    if 'runs' in node:
+        _recover_urls(node['runs'])
+        return node['runs']
+    elif 'simpleText' in node:
+        return [{'text': node['simpleText']}]
+    return []
+
+def extract_int(string, default=None):
+    if isinstance(string, int):
+        return string
+    if not isinstance(string, str):
+        string = extract_str(string)
+    if not string:
+        return default
+    match = re.search(r'\b(\d+)\b', string.replace(',', ''))
+    if match is None:
+        return default
+    try:
+        return int(match.group(1))
+    except ValueError:
+        return default
+
+def extract_approx_int(string):
+    '''e.g. "15.1M" from "15.1M subscribers"'''
+    if not isinstance(string, str):
+        string = extract_str(string)
+    if not string:
+        return None
+    match = re.search(r'\b(\d+(?:\.\d+)?[KMBTkmbt]?)\b', string.replace(',', ''))
+    if match is None:
+        return None
+    return match.group(1)
+
+MONTH_ABBREVIATIONS = {'jan':'1', 'feb':'2', 'mar':'3', 'apr':'4', 'may':'5', 'jun':'6', 'jul':'7', 'aug':'8', 'sep':'9', 'oct':'10', 'nov':'11', 'dec':'12'}
+def extract_date(date_text):
+    '''Input: "Mar 9, 2019". Output: "2019-3-9"'''
+    if not isinstance(date_text, str):
+        date_text = extract_str(date_text)
+    if date_text is None:
+        return None
+
+    date_text = date_text.replace(',', '').lower()
+    parts = date_text.split()
+    if len(parts) >= 3:
+        month, day, year = parts[-3:]
+        month = MONTH_ABBREVIATIONS.get(month[0:3]) # slicing in case they start writing out the full month name
+        if month and (re.fullmatch(r'\d\d?', day) is not None) and (re.fullmatch(r'\d{4}', year) is not None):
+            return year + '-' + month + '-' + day
+    return None
+
+def check_missing_keys(object, *key_sequences):
+    for key_sequence in key_sequences:
+        _object = object
+        try:
+            for key in key_sequence:
+                _object = _object[key]
+        except (KeyError, IndexError, TypeError):
+            return 'Could not find ' + key
+
+    return None
+
+def extract_item_info(item, additional_info={}):
+    if not item:
+        return {'error': 'No item given'}
+
+    type = get(list(item.keys()), 0)
+    if not type:
+        return {'error': 'Could not find type'}
+    item = item[type]
+
+    info = {'error': None}
+    if type in ('itemSectionRenderer', 'compactAutoplayRenderer'):
+        return extract_item_info(deep_get(item, 'contents', 0), additional_info)
+
+    if type in ('movieRenderer', 'clarificationRenderer'):
+        info['type'] = 'unsupported'
+        return info
+
+    info.update(additional_info)
+
+    # type looks like e.g. 'compactVideoRenderer' or 'gridVideoRenderer'
+    # camelCase split, https://stackoverflow.com/a/37697078
+    type_parts = [s.lower() for s in re.sub(r'([A-Z][a-z]+)', r' \1', type).split()]
+    if len(type_parts) < 2:
+        info['type'] = 'unsupported'
+        return
+    primary_type = type_parts[-2]
+    if primary_type == 'video':
+        info['type'] = 'video'
+    elif primary_type in ('playlist', 'radio', 'show'):
+        info['type'] = 'playlist'
+    elif primary_type == 'channel':
+        info['type'] = 'channel'
+    elif type == 'videoWithContextRenderer': # stupid exception
+        info['type'] = 'video'
+        primary_type = 'video'
+    else:
+        info['type'] = 'unsupported'
+
+    # videoWithContextRenderer changes it to 'headline' just to be annoying
+    info['title'] = extract_str(multi_get(item, 'title', 'headline'))
+    if primary_type != 'channel':
+        info['author'] = extract_str(multi_get(item, 'longBylineText', 'shortBylineText', 'ownerText'))
+        info['author_id'] = extract_str(multi_deep_get(item,
+            ['longBylineText', 'runs', 0, 'navigationEndpoint', 'browseEndpoint', 'browseId'],
+            ['shortBylineText', 'runs', 0, 'navigationEndpoint', 'browseEndpoint', 'browseId'],
+            ['ownerText', 'runs', 0, 'navigationEndpoint', 'browseEndpoint', 'browseId']
+        ))
+        info['author_url'] = ('https://www.youtube.com/channel/' + info['author_id']) if info['author_id'] else None
+    info['description'] = extract_formatted_text(multi_get(item, 'descriptionSnippet', 'descriptionText'))
+    info['thumbnail'] = multi_deep_get(item,
+        ['thumbnail', 'thumbnails', 0, 'url'],      # videos
+        ['thumbnails', 0, 'thumbnails', 0, 'url'],  # playlists
+        ['thumbnailRenderer', 'showCustomThumbnailRenderer', 'thumbnail', 'thumbnails', 0, 'url'], # shows
+    )
+
+    info['badges'] = []
+    for badge_node in multi_get(item, 'badges', 'ownerBadges', default=()):
+        badge = deep_get(badge_node, 'metadataBadgeRenderer', 'label')
+        if badge:
+            info['badges'].append(badge)
+
+    if primary_type in ('video', 'playlist'):
+        info['time_published'] = None
+        timestamp = re.search(r'(\d+ \w+ ago)',
+            extract_str(item.get('publishedTimeText'), default=''))
+        if timestamp:
+            info['time_published'] = timestamp.group(1)
+
+    if primary_type == 'video':
+        info['id'] = item.get('videoId')
+        info['view_count'] = extract_int(item.get('viewCountText'))
+
+        # dig into accessibility data to get view_count for videos marked as recommended, and to get time_published
+        accessibility_label = multi_deep_get(item,
+            ['title', 'accessibility', 'accessibilityData', 'label'],
+            ['headline', 'accessibility', 'accessibilityData', 'label'],
+            default='')
+        timestamp = re.search(r'(\d+ \w+ ago)', accessibility_label)
+        if timestamp:
+            conservative_update(info, 'time_published', timestamp.group(1))
+        view_count = re.search(r'(\d+) views', accessibility_label.replace(',', ''))
+        if view_count:
+            conservative_update(info, 'view_count', int(view_count.group(1)))
+
+        if info['view_count']:
+            info['approx_view_count'] = '{:,}'.format(info['view_count'])
+        else:
+            info['approx_view_count'] = extract_approx_int(item.get('shortViewCountText'))
+
+        # handle case where it is "No views"
+        if not info['approx_view_count']:
+            if ('No views' in item.get('shortViewCountText', '')
+                    or 'no views' in accessibility_label.lower()):
+                info['view_count'] = 0
+                info['approx_view_count'] = '0'
+
+        info['duration'] = extract_str(item.get('lengthText'))
+
+        # if it's an item in a playlist, get its index
+        if 'index' in item: # url has wrong index on playlist page 
+            info['index'] = extract_int(item.get('index'))
+        elif 'indexText' in item:
+            # Current item in playlist has ▶ instead of the actual index, must
+            # dig into url
+            match = re.search(r'index=(\d+)', deep_get(item,
+                'navigationEndpoint', 'commandMetadata', 'webCommandMetadata',
+                'url', default=''))
+            if match is None:   # worth a try then
+                info['index'] = extract_int(item.get('indexText'))
+            else:
+                info['index'] = int(match.group(1))
+        else:
+            info['index'] = None
+
+    elif primary_type in ('playlist', 'radio'):
+        info['id'] = item.get('playlistId')
+        info['video_count'] = extract_int(item.get('videoCount'))
+    elif primary_type == 'channel':
+        info['id'] = item.get('channelId')
+        info['approx_subscriber_count'] = extract_approx_int(item.get('subscriberCountText'))
+    elif primary_type == 'show':
+        info['id'] = deep_get(item, 'navigationEndpoint', 'watchEndpoint', 'playlistId')
+
+    if primary_type in ('playlist', 'channel'):
+        conservative_update(info, 'video_count', extract_int(item.get('videoCountText')))
+
+    for overlay in item.get('thumbnailOverlays', []):
+        conservative_update(info, 'duration', extract_str(deep_get(
+            overlay, 'thumbnailOverlayTimeStatusRenderer', 'text'
+        )))
+        # show renderers don't have videoCountText
+        conservative_update(info, 'video_count', extract_int(deep_get(
+            overlay, 'thumbnailOverlayBottomPanelRenderer', 'text'
+        )))
+    return info
+
+def extract_response(polymer_json):
+    '''return response, error'''
+    response = multi_deep_get(polymer_json, [1, 'response'], ['response'])
+    if response is None:
+        return None, 'Failed to extract response'
+    else:
+        return response, None
+
+
+_item_types = {
+    'movieRenderer',
+    'didYouMeanRenderer',
+    'showingResultsForRenderer',
+
+    'videoRenderer',
+    'compactVideoRenderer',
+    'compactAutoplayRenderer',
+    'videoWithContextRenderer',
+    'gridVideoRenderer',
+    'playlistVideoRenderer',
+
+    'playlistRenderer',
+    'compactPlaylistRenderer',
+    'gridPlaylistRenderer',
+
+    'radioRenderer',
+    'compactRadioRenderer',
+    'gridRadioRenderer',
+
+    'showRenderer',
+    'compactShowRenderer',
+    'gridShowRenderer',
+
+
+    'channelRenderer',
+    'compactChannelRenderer',
+    'gridChannelRenderer',
+}
+
+def _traverse_browse_renderer(renderer):
+    for tab in get(renderer, 'tabs', ()):
+        tab_renderer = multi_get(tab, 'tabRenderer', 'expandableTabRenderer')
+        if tab_renderer is None:
+            continue
+        if tab_renderer.get('selected', False):
+            return get(tab_renderer, 'content', {})
+    print('Could not find tab with content')
+    return {}
+
+def _traverse_standard_list(renderer):
+    renderer_list = multi_get(renderer, 'contents', 'items', default=())
+    continuation = deep_get(renderer, 'continuations', 0, 'nextContinuationData', 'continuation')
+    return renderer_list, continuation
+
+# these renderers contain one inside them
+nested_renderer_dispatch = {
+    'singleColumnBrowseResultsRenderer': _traverse_browse_renderer,
+    'twoColumnBrowseResultsRenderer': _traverse_browse_renderer,
+    'twoColumnSearchResultsRenderer': lambda renderer: get(renderer, 'primaryContents', {}),
+}
+
+# these renderers contain a list of renderers inside them
+nested_renderer_list_dispatch = {
+    'sectionListRenderer': _traverse_standard_list,
+    'itemSectionRenderer': _traverse_standard_list,
+    'gridRenderer': _traverse_standard_list,
+    'playlistVideoListRenderer': _traverse_standard_list,
+    'singleColumnWatchNextResults': lambda r: (deep_get(r, 'results', 'results', 'contents', default=[]), None),
+}
+def get_nested_renderer_list_function(key):
+    if key in nested_renderer_list_dispatch:
+        return nested_renderer_list_dispatch[key]
+    elif key.endswith('Continuation'):
+        return _traverse_standard_list
+    return None
+
+def extract_items_from_renderer(renderer, item_types=_item_types):
+    ctoken = None
+    items = []
+
+    iter_stack = collections.deque()
+    current_iter = iter(())
+
+    while True:
+        # mode 1: get a new renderer by iterating.
+        # goes down the stack for an iterator if one has been exhausted
+        if not renderer:
+            try:
+                renderer = current_iter.__next__()
+            except StopIteration:
+                try:
+                    current_iter = iter_stack.pop()
+                except IndexError:
+                    return items, ctoken
+            # Get new renderer or check that the one we got is good before
+            # proceeding to mode 2
+            continue
+
+
+        # mode 2: dig into the current renderer
+        key, value = list(renderer.items())[0]
+
+        # the renderer is an item
+        if key in item_types:
+            items.append(renderer)
+
+        # has a list in it, add it to the iter stack
+        elif get_nested_renderer_list_function(key):
+            renderer_list, cont = get_nested_renderer_list_function(key)(value)
+            if renderer_list:
+                iter_stack.append(current_iter)
+                current_iter = iter(renderer_list)
+                if cont:
+                    ctoken = cont
+
+        # new renderer nested inside this one
+        elif key in nested_renderer_dispatch:
+            renderer = nested_renderer_dispatch[key](value)
+            continue    # don't reset renderer to None
+
+        renderer = None
+
+def extract_items(response, item_types=_item_types):
+    '''return items, ctoken'''
+    if 'continuationContents' in response:
+        # sometimes there's another, empty, junk [something]Continuation key
+        # find real one
+        for key, renderer_cont in get(response,
+                'continuationContents', {}).items():
+            # e.g. commentSectionContinuation, playlistVideoListContinuation
+            if key.endswith('Continuation'):
+                items, cont = extract_items_from_renderer({key: renderer_cont},
+                    item_types=item_types)
+                if items:
+                    return items, cont
+        return [], None
+    elif 'contents' in response:
+        renderer = get(response, 'contents', {})
+        return extract_items_from_renderer(renderer, item_types=item_types)
+    else:
+        return [], None
--- a/youtube/yt_data_extract/everything_else.py
+++ b/youtube/yt_data_extract/everything_else.py
@ -0,0 +1,281 @@
+from .common import (get, multi_get, deep_get, multi_deep_get,
+    liberal_update, conservative_update, remove_redirect, normalize_url,
+    extract_str, extract_formatted_text, extract_int, extract_approx_int,
+    extract_date, check_missing_keys, extract_item_info, extract_items,
+    extract_response)
+from youtube import proto
+
+import re
+import urllib
+from math import ceil
+
+def extract_channel_info(polymer_json, tab):
+    response, err = extract_response(polymer_json)
+    if err:
+        return {'error': err}
+
+
+    metadata = deep_get(response, 'metadata', 'channelMetadataRenderer',
+        default={})
+    if not metadata:
+        metadata = deep_get(response, 'microformat', 'microformatDataRenderer',
+            default={})
+
+    # channel doesn't exist or was terminated
+    # example terminated channel: https://www.youtube.com/channel/UCnKJeK_r90jDdIuzHXC0Org
+    if not metadata:
+        if response.get('alerts'):
+            error_string = ' '.join(
+                extract_str(deep_get(alert, 'alertRenderer', 'text'), default='')
+                for alert in response['alerts']
+            )
+            if not error_string:
+                error_string = 'Failed to extract error'
+            return {'error': error_string}
+        elif deep_get(response, 'responseContext', 'errors'):
+            for error in response['responseContext']['errors'].get('error', []):
+                if error.get('code') == 'INVALID_VALUE' and error.get('location') == 'browse_id':
+                    return {'error': 'This channel does not exist'}
+        return {'error': 'Failure getting metadata'}
+
+    info = {'error': None}
+    info['current_tab'] = tab
+
+    info['approx_subscriber_count'] = extract_approx_int(deep_get(response,
+        'header', 'c4TabbedHeaderRenderer', 'subscriberCountText'))
+
+    # stuff from microformat (info given by youtube for every page on channel)
+    info['short_description'] = metadata.get('description')
+    if info['short_description'] and len(info['short_description']) > 730:
+        info['short_description'] = info['short_description'][0:730] + '...'
+    info['channel_name'] = metadata.get('title')
+    info['avatar'] = multi_deep_get(metadata,
+        ['avatar', 'thumbnails', 0, 'url'],
+        ['thumbnail', 'thumbnails', 0, 'url'],
+    )
+    channel_url = multi_get(metadata, 'urlCanonical', 'channelUrl')
+    if channel_url:
+        channel_id = get(channel_url.rstrip('/').split('/'), -1)
+        info['channel_id'] = channel_id
+    else:
+        info['channel_id'] = metadata.get('externalId')
+    if info['channel_id']:
+        info['channel_url'] = 'https://www.youtube.com/channel/' + channel_id
+    else:
+        info['channel_url'] = None
+
+    # get items
+    info['items'] = []
+
+    # empty channel
+    if 'contents' not in response and 'continuationContents' not in response:
+        return info
+
+    if tab in ('videos', 'playlists', 'search'):
+        items, ctoken = extract_items(response)
+        additional_info = {'author': info['channel_name'], 'author_url': info['channel_url']}
+        info['items'] = [extract_item_info(renderer, additional_info) for renderer in items]
+        if tab == 'search':
+            info['is_last_page'] = (ctoken is None)
+    elif tab == 'about':
+        items, _ = extract_items(response, item_types={'channelAboutFullMetadataRenderer'})
+        if not items:
+            info['error'] = 'Could not find channelAboutFullMetadataRenderer'
+            return info
+        channel_metadata = items[0]['channelAboutFullMetadataRenderer']
+
+        info['links'] = []
+        for link_json in channel_metadata.get('primaryLinks', ()):
+            url = remove_redirect(deep_get(link_json, 'navigationEndpoint', 'urlEndpoint', 'url'))
+            text = extract_str(link_json.get('title'))
+            info['links'].append( (text, url) )
+
+        info['date_joined'] = extract_date(channel_metadata.get('joinedDateText'))
+        info['view_count'] = extract_int(channel_metadata.get('viewCountText'))
+        info['description'] = extract_str(channel_metadata.get('description'), default='')
+    else:
+        raise NotImplementedError('Unknown or unsupported channel tab: ' + tab)
+
+    return info
+
+def extract_search_info(polymer_json):
+    response, err = extract_response(polymer_json)
+    if err:
+        return {'error': err}
+    info = {'error': None}
+    info['estimated_results'] = int(response['estimatedResults'])
+    info['estimated_pages'] = ceil(info['estimated_results']/20)
+
+
+    results, _ = extract_items(response)
+
+
+    info['items'] = []
+    info['corrections'] = {'type': None}
+    for renderer in results:
+        type = list(renderer.keys())[0]
+        if type == 'shelfRenderer':
+            continue
+        if type == 'didYouMeanRenderer':
+            renderer = renderer[type]
+
+            info['corrections'] = {
+                'type': 'did_you_mean',
+                'corrected_query': renderer['correctedQueryEndpoint']['searchEndpoint']['query'],
+                'corrected_query_text': renderer['correctedQuery']['runs'],
+            }
+            continue
+        if type == 'showingResultsForRenderer':
+            renderer = renderer[type]
+
+            info['corrections'] = {
+                'type': 'showing_results_for',
+                'corrected_query_text': renderer['correctedQuery']['runs'],
+                'original_query_text': renderer['originalQuery']['simpleText'],
+            }
+            continue
+
+        i_info = extract_item_info(renderer)
+        if i_info.get('type') != 'unsupported':
+            info['items'].append(i_info)
+
+
+    return info
+
+def extract_playlist_metadata(polymer_json):
+    response, err = extract_response(polymer_json)
+    if err:
+        return {'error': err}
+
+    metadata = {'error': None}
+    header = deep_get(response, 'header', 'playlistHeaderRenderer', default={})
+    metadata['title'] = extract_str(header.get('title'))
+
+    metadata['first_video_id'] = deep_get(header, 'playEndpoint', 'watchEndpoint', 'videoId')
+    first_id = re.search(r'([a-z_\-]{11})', deep_get(header,
+        'thumbnail', 'thumbnails', 0, 'url', default=''))
+    if first_id:
+        conservative_update(metadata, 'first_video_id', first_id.group(1))
+    if metadata['first_video_id'] is None:
+        metadata['thumbnail'] = None
+    else:
+        metadata['thumbnail'] = 'https://i.ytimg.com/vi/' + metadata['first_video_id'] + '/mqdefault.jpg'
+
+    metadata['video_count'] = extract_int(header.get('numVideosText'))
+    metadata['description'] = extract_str(header.get('descriptionText'), default='')
+    metadata['author'] = extract_str(header.get('ownerText'))
+    metadata['author_id'] = multi_deep_get(header, 
+        ['ownerText', 'runs', 0, 'navigationEndpoint', 'browseEndpoint', 'browseId'],
+        ['ownerEndpoint', 'browseEndpoint', 'browseId'])
+    if metadata['author_id']:
+        metadata['author_url'] = 'https://www.youtube.com/channel/' + metadata['author_id']
+    else:
+        metadata['author_url'] = None
+    metadata['view_count'] = extract_int(header.get('viewCountText'))
+    metadata['like_count'] = extract_int(header.get('likesCountWithoutLikeText'))
+    for stat in header.get('stats', ()):
+        text = extract_str(stat)
+        if 'videos' in text:
+            conservative_update(metadata, 'video_count', extract_int(text))
+        elif 'views' in text:
+            conservative_update(metadata, 'view_count', extract_int(text))
+        elif 'updated' in text:
+            metadata['time_published'] = extract_date(text)
+
+    return metadata
+
+def extract_playlist_info(polymer_json):
+    response, err = extract_response(polymer_json)
+    if err:
+        return {'error': err}
+    info = {'error': None}
+    first_page = 'continuationContents' not in response
+    video_list, _ = extract_items(response)
+
+    info['items'] = [extract_item_info(renderer) for renderer in video_list]
+
+    if first_page:
+        info['metadata'] = extract_playlist_metadata(polymer_json)
+
+    return info
+
+def _ctoken_metadata(ctoken):
+    result = dict()
+    params = proto.parse(proto.b64_to_bytes(ctoken))
+    result['video_id'] = proto.parse(params[2])[2].decode('ascii')
+
+    offset_information = proto.parse(params[6])
+    result['offset'] = offset_information.get(5, 0)
+
+    result['is_replies'] = False
+    if (3 in offset_information) and (2 in proto.parse(offset_information[3])):
+        result['is_replies'] = True
+        result['sort'] = None
+    else:
+        try:
+            result['sort'] = proto.parse(offset_information[4])[6]
+        except KeyError:
+            result['sort'] = 0
+    return result
+
+def extract_comments_info(polymer_json):
+    response, err = extract_response(polymer_json)
+    if err:
+        return {'error': err}
+    info = {'error': None}
+
+    url = multi_deep_get(polymer_json, [1, 'url'], ['url'])
+    if url:
+        ctoken = urllib.parse.parse_qs(url[url.find('?')+1:])['ctoken'][0]
+        metadata = _ctoken_metadata(ctoken)
+    else:
+        metadata = {}
+    info['video_id'] = metadata.get('video_id')
+    info['offset'] = metadata.get('offset')
+    info['is_replies'] = metadata.get('is_replies')
+    info['sort'] = metadata.get('sort')
+    info['video_title'] = None
+
+    comments, ctoken = extract_items(response,
+        item_types={'commentThreadRenderer', 'commentRenderer'})
+    info['comments'] = []
+    info['ctoken'] = ctoken
+    for comment in comments:
+        comment_info = {}
+
+        if 'commentThreadRenderer' in comment:  # top level comments
+            conservative_update(info, 'is_replies', False)
+            comment_thread  = comment['commentThreadRenderer']
+            info['video_title'] = extract_str(comment_thread.get('commentTargetTitle'))
+            if 'replies' not in comment_thread:
+                comment_info['reply_count'] = 0
+            else:
+                comment_info['reply_count'] = extract_int(deep_get(comment_thread,
+                    'replies', 'commentRepliesRenderer', 'moreText'
+                ), default=1)   # With 1 reply, the text reads "View reply"
+            comment_renderer = deep_get(comment_thread, 'comment', 'commentRenderer', default={})
+        elif 'commentRenderer' in comment:  # replies
+            comment_info['reply_count'] = 0     # replyCount, below, not present for replies even if the reply has further replies to it
+            conservative_update(info, 'is_replies', True)
+            comment_renderer = comment['commentRenderer']
+        else:
+            comment_renderer = {}
+
+        # These 3 are sometimes absent, likely because the channel was deleted
+        comment_info['author'] = extract_str(comment_renderer.get('authorText'))
+        comment_info['author_url'] = deep_get(comment_renderer,
+            'authorEndpoint', 'commandMetadata', 'webCommandMetadata', 'url')
+        comment_info['author_id'] = deep_get(comment_renderer,
+            'authorEndpoint', 'browseEndpoint', 'browseId')
+
+        comment_info['author_avatar'] = deep_get(comment_renderer,
+            'authorThumbnail', 'thumbnails', 0, 'url')
+        comment_info['id'] = comment_renderer.get('commentId')
+        comment_info['text'] = extract_formatted_text(comment_renderer.get('contentText'))
+        comment_info['time_published'] = extract_str(comment_renderer.get('publishedTimeText'))
+        comment_info['like_count'] = comment_renderer.get('likeCount')
+        liberal_update(comment_info, 'reply_count', comment_renderer.get('replyCount'))
+
+        info['comments'].append(comment_info)
+
+    return info
--- a/youtube/yt_data_extract/watch_extraction.py
+++ b/youtube/yt_data_extract/watch_extraction.py
@ -0,0 +1,689 @@
+from .common import (get, multi_get, deep_get, multi_deep_get,
+    liberal_update, conservative_update, remove_redirect, normalize_url,
+    extract_str, extract_formatted_text, extract_int, extract_approx_int,
+    extract_date, check_missing_keys, extract_item_info, extract_items,
+    extract_response, concat_or_none)
+
+import json
+import urllib.parse
+import traceback
+import re
+
+# from https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/youtube.py
+_formats = {
+    '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'audio_bitrate': 64, 'vcodec': 'h263'},
+    '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'audio_bitrate': 64, 'vcodec': 'h263'},
+    '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
+    '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'audio_bitrate': 24, 'vcodec': 'mp4v'},
+    '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'audio_bitrate': 96, 'vcodec': 'h264'},
+    '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'audio_bitrate': 192, 'vcodec': 'h264'},
+    '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'audio_bitrate': 128, 'vcodec': 'h264'},
+    '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'audio_bitrate': 128, 'vcodec': 'h264'},
+    # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), audio_bitrate varies as well
+    '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
+    '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'audio_bitrate': 192, 'vcodec': 'h264'},
+    '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'audio_bitrate': 192, 'vcodec': 'h264'},
+    '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'audio_bitrate': 128, 'vcodec': 'vp8'},
+    '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'audio_bitrate': 128, 'vcodec': 'vp8'},
+    '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'audio_bitrate': 192, 'vcodec': 'vp8'},
+    '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'audio_bitrate': 192, 'vcodec': 'vp8'},
+    '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'audio_bitrate': 128, 'vcodec': 'h264'},
+    '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'audio_bitrate': 128, 'vcodec': 'h264'},
+
+
+    # 3D videos
+    '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'audio_bitrate': 128, 'vcodec': 'h264'},
+    '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'audio_bitrate': 128, 'vcodec': 'h264'},
+    '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'audio_bitrate': 192, 'vcodec': 'h264'},
+    '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'audio_bitrate': 192, 'vcodec': 'h264'},
+    '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'audio_bitrate': 128, 'vcodec': 'vp8'},
+    '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'audio_bitrate': 192, 'vcodec': 'vp8'},
+    '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'audio_bitrate': 192, 'vcodec': 'vp8'},
+
+    # Apple HTTP Live Streaming
+    '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'audio_bitrate': 48, 'vcodec': 'h264'},
+    '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'audio_bitrate': 48, 'vcodec': 'h264'},
+    '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'audio_bitrate': 128, 'vcodec': 'h264'},
+    '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'audio_bitrate': 128, 'vcodec': 'h264'},
+    '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'audio_bitrate': 256, 'vcodec': 'h264'},
+    '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'audio_bitrate': 256, 'vcodec': 'h264'},
+    '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'audio_bitrate': 48, 'vcodec': 'h264'},
+    '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'audio_bitrate': 24, 'vcodec': 'h264'},
+
+    # DASH mp4 video
+    '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
+    '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
+    '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
+    '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
+    '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
+    '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'},  # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
+    '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
+    '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
+    '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
+    '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
+    '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
+    '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
+
+    # Dash mp4 audio
+    '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'audio_bitrate': 48, 'container': 'm4a_dash'},
+    '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'audio_bitrate': 128, 'container': 'm4a_dash'},
+    '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'audio_bitrate': 256, 'container': 'm4a_dash'},
+    '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
+    '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
+    '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
+    '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
+
+    # Dash webm
+    '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
+    '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
+    '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
+    '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
+    '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
+    '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
+    '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
+    '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
+    '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
+    '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
+    '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
+    '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
+    '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
+    '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
+    '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
+    # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
+    '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
+    '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
+    '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
+    '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
+    '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
+    '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
+
+    # Dash webm audio
+    '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'audio_bitrate': 128},
+    '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'audio_bitrate': 256},
+
+    # Dash webm audio with opus inside
+    '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'audio_bitrate': 50},
+    '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'audio_bitrate': 70},
+    '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'audio_bitrate': 160},
+
+    # RTMP (unnamed)
+    '_rtmp': {'protocol': 'rtmp'},
+
+    # av01 video only formats sometimes served with "unknown" codecs
+    '394': {'vcodec': 'av01.0.05M.08'},
+    '395': {'vcodec': 'av01.0.05M.08'},
+    '396': {'vcodec': 'av01.0.05M.08'},
+    '397': {'vcodec': 'av01.0.05M.08'},
+}
+
+def _extract_metadata_row_info(video_renderer_info):
+    # extract category and music list
+    info = {
+        'category': None,
+        'music_list': [],
+    }
+
+    current_song = {}
+    for row in deep_get(video_renderer_info, 'metadataRowContainer', 'metadataRowContainerRenderer', 'rows', default=[]):
+        row_title = extract_str(deep_get(row, 'metadataRowRenderer', 'title'), default='')
+        row_content = extract_str(deep_get(row, 'metadataRowRenderer', 'contents', 0))
+        if row_title == 'Category':
+            info['category'] = row_content
+        elif row_title in ('Song', 'Music'):
+            if current_song:
+                info['music_list'].append(current_song)
+            current_song = {'title': row_content}
+        elif row_title == 'Artist':
+            current_song['artist'] = row_content
+        elif row_title == 'Album':
+            current_song['album'] = row_content
+        elif row_title == 'Writers':
+            current_song['writers'] = row_content
+        elif row_title.startswith('Licensed'):
+            current_song['licensor'] = row_content
+    if current_song:
+        info['music_list'].append(current_song)
+
+    return info
+
+def _extract_watch_info_mobile(top_level):
+    info = {}
+    microformat = deep_get(top_level, 'playerResponse', 'microformat', 'playerMicroformatRenderer', default={})
+
+    family_safe = microformat.get('isFamilySafe')
+    if family_safe is None:
+        info['age_restricted'] = None
+    else:
+        info['age_restricted'] = not family_safe
+    info['allowed_countries'] = microformat.get('availableCountries', [])
+    info['time_published'] = microformat.get('publishDate')
+
+    response = top_level.get('response', {})
+
+    # this renderer has the stuff visible on the page
+    # check for playlist
+    items, _ = extract_items(response,
+        item_types={'singleColumnWatchNextResults'})
+    if items:
+        watch_next_results = items[0]['singleColumnWatchNextResults']
+        playlist = deep_get(watch_next_results, 'playlist', 'playlist')
+        if playlist is None:
+            info['playlist'] = None
+        else:
+            info['playlist'] = {}
+            info['playlist']['title'] = playlist.get('title')
+            info['playlist']['author'] = extract_str(multi_get(playlist, 
+                'ownerName', 'longBylineText', 'shortBylineText', 'ownerText'))
+            author_id = deep_get(playlist, 'longBylineText', 'runs', 0,
+                'navigationEndpoint', 'browseEndpoint', 'browseId')
+            info['playlist']['author_id'] = author_id
+            if author_id:
+                info['playlist']['author_url'] = concat_or_none(
+                    'https://www.youtube.com/channel/', author_id)
+            info['playlist']['id'] = playlist.get('playlistId')
+            info['playlist']['url'] = concat_or_none(
+                'https://www.youtube.com/playlist?list=',
+                info['playlist']['id'])
+            info['playlist']['video_count'] = playlist.get('totalVideos')
+            info['playlist']['current_index'] = playlist.get('currentIndex')
+            info['playlist']['items'] = [
+                extract_item_info(i) for i in playlist.get('contents', ())]
+    else:
+        info['playlist'] = None
+
+    # Holds the visible video info. It is inside singleColumnWatchNextResults
+    # but use our convenience function instead
+    items, _ = extract_items(response, item_types={'slimVideoMetadataRenderer'})
+    if items:
+        video_info = items[0]['slimVideoMetadataRenderer']
+    else:
+        print('Failed to extract video metadata')
+        video_info = {}
+
+    info.update(_extract_metadata_row_info(video_info))
+    info['description'] = extract_str(video_info.get('description'), recover_urls=True)
+    info['view_count'] = extract_int(extract_str(video_info.get('expandedSubtitle')))
+    info['author'] = extract_str(deep_get(video_info, 'owner', 'slimOwnerRenderer', 'title'))
+    info['author_id'] = deep_get(video_info, 'owner', 'slimOwnerRenderer', 'navigationEndpoint', 'browseEndpoint', 'browseId')
+    info['title'] = extract_str(video_info.get('title'))
+    info['live'] = 'watching' in extract_str(video_info.get('expandedSubtitle'), default='')
+    info['unlisted'] = False
+    for badge in video_info.get('badges', []):
+        if deep_get(badge, 'metadataBadgeRenderer', 'label') == 'Unlisted':
+            info['unlisted'] = True
+    info['like_count'] = None
+    info['dislike_count'] = None
+    if not info['time_published']:
+        info['time_published'] = extract_date(extract_str(video_info.get('dateText', None)))
+    for button in video_info.get('buttons', ()):
+        button_renderer = button.get('slimMetadataToggleButtonRenderer', {})
+
+        # all the digits can be found in the accessibility data
+        count = extract_int(deep_get(button_renderer, 'button', 'toggleButtonRenderer', 'defaultText', 'accessibility', 'accessibilityData', 'label'))
+
+        # this count doesn't have all the digits, it's like 53K for instance
+        dumb_count = extract_int(extract_str(deep_get(button_renderer, 'button', 'toggleButtonRenderer', 'defaultText')))
+
+        # the accessibility text will be "No likes" or "No dislikes" or something like that, but dumb count will be 0
+        if dumb_count == 0:
+            count = 0
+
+        if 'isLike' in button_renderer:
+            info['like_count'] = count
+        elif 'isDislike' in button_renderer:
+            info['dislike_count'] = count
+
+    # comment section info
+    items, _ = extract_items(response, item_types={
+        'commentSectionRenderer', 'commentsEntryPointHeaderRenderer'})
+    if items:
+        header_type = list(items[0])[0]
+        comment_info = items[0][header_type]
+        # This seems to be some kind of A/B test being done on mobile, where
+        # this is present instead of the normal commentSectionRenderer. It can
+        # be seen here:
+        # https://www.androidpolice.com/2019/10/31/google-youtube-app-comment-section-below-videos/
+        # https://www.youtube.com/watch?v=bR5Q-wD-6qo
+        if header_type == 'commentsEntryPointHeaderRenderer':
+            comment_count_text = extract_str(comment_info.get('headerText'))
+        else:
+            comment_count_text = extract_str(deep_get(comment_info,
+                'header', 'commentSectionHeaderRenderer', 'countText'))
+        if comment_count_text == 'Comments':    # just this with no number, means 0 comments
+            info['comment_count'] = 0
+        else:
+            info['comment_count'] = extract_int(comment_count_text)
+        info['comments_disabled'] = False
+    else:   # no comment section present means comments are disabled
+        info['comment_count'] = 0
+        info['comments_disabled'] = True
+
+    # check for limited state
+    items, _ = extract_items(response, item_types={'limitedStateMessageRenderer'})
+    if items:
+        info['limited_state'] = True
+    else:
+        info['limited_state'] = False
+
+    # related videos
+    related, _ = extract_items(response)
+    info['related_videos'] = [extract_item_info(renderer) for renderer in related]
+
+    return info
+
+def _extract_watch_info_desktop(top_level):
+    info = {
+        'comment_count': None,
+        'comments_disabled': None,
+        'allowed_countries': [],
+        'limited_state': None,
+        'playlist': None,
+    }
+
+    video_info = {}
+    for renderer in deep_get(top_level, 'response', 'contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', default=()):
+        if renderer and list(renderer.keys())[0] in ('videoPrimaryInfoRenderer', 'videoSecondaryInfoRenderer'):
+            video_info.update(list(renderer.values())[0])
+
+    info.update(_extract_metadata_row_info(video_info))
+    info['description'] = extract_str(video_info.get('description', None), recover_urls=True)
+    info['time_published'] = extract_date(extract_str(video_info.get('dateText', None)))
+
+    likes_dislikes = deep_get(video_info, 'sentimentBar', 'sentimentBarRenderer', 'tooltip', default='').split('/')
+    if len(likes_dislikes) == 2:
+        info['like_count'] = extract_int(likes_dislikes[0])
+        info['dislike_count'] = extract_int(likes_dislikes[1])
+    else:
+        info['like_count'] = None
+        info['dislike_count'] = None
+
+    info['title'] = extract_str(video_info.get('title', None))
+    info['author'] = extract_str(deep_get(video_info, 'owner', 'videoOwnerRenderer', 'title'))
+    info['author_id'] = deep_get(video_info, 'owner', 'videoOwnerRenderer', 'navigationEndpoint', 'browseEndpoint', 'browseId')
+    info['view_count'] = extract_int(extract_str(deep_get(video_info, 'viewCount', 'videoViewCountRenderer', 'viewCount')))
+
+    related = deep_get(top_level, 'response', 'contents', 'twoColumnWatchNextResults', 'secondaryResults', 'secondaryResults', 'results', default=[])
+    info['related_videos'] = [extract_item_info(renderer) for renderer in related]
+
+    return info
+
+def update_format_with_codec_info(fmt, codec):
+    if (codec.startswith('av')
+            or codec in ('vp9', 'vp8', 'vp8.0', 'h263', 'h264', 'mp4v')):
+        if codec == 'vp8.0':
+            codec = 'vp8'
+        conservative_update(fmt, 'vcodec', codec)
+    elif (codec.startswith('mp4a')
+            or codec in ('opus', 'mp3', 'aac', 'dtse', 'ec-3', 'vorbis')):
+        conservative_update(fmt, 'acodec', codec)
+    else:
+        print('Warning: unrecognized codec: ' + codec)
+
+fmt_type_re = re.compile(
+    r'(text|audio|video)/([\w0-9]+); codecs="([\w0-9\.]+(?:, [\w0-9\.]+)*)"')
+def update_format_with_type_info(fmt, yt_fmt):
+    # 'type' for invidious api format
+    mime_type = multi_get(yt_fmt, 'mimeType', 'type')
+    if mime_type is None:
+        return
+    match = re.fullmatch(fmt_type_re, mime_type)
+
+    type, fmt['ext'], codecs = match.groups()
+    codecs = codecs.split(', ')
+    for codec in codecs:
+        update_format_with_codec_info(fmt, codec)
+    if type == 'audio':
+        assert len(codecs) == 1
+
+def _extract_formats(info, player_response):
+    streaming_data = player_response.get('streamingData', {})
+    yt_formats = streaming_data.get('formats', []) + streaming_data.get('adaptiveFormats', [])
+
+    info['formats'] = []
+    # because we may retry the extract_formats with a different player_response
+    # so keep what we have
+    conservative_update(info, 'hls_manifest_url',
+        streaming_data.get('hlsManifestUrl'))
+    conservative_update(info, 'dash_manifest_url',
+        streaming_data.get('dash_manifest_url'))
+
+    for yt_fmt in yt_formats:
+        itag = yt_fmt.get('itag')
+
+        fmt = {}
+        fmt['itag'] = itag
+        fmt['ext'] = None
+        fmt['audio_bitrate'] = None
+        fmt['acodec'] = None
+        fmt['vcodec'] = None
+        fmt['width'] = yt_fmt.get('width')
+        fmt['height'] = yt_fmt.get('height')
+        fmt['file_size'] = yt_fmt.get('contentLength')
+        fmt['audio_sample_rate'] = yt_fmt.get('audioSampleRate')
+        fmt['fps'] = yt_fmt.get('fps')
+        update_format_with_type_info(fmt, yt_fmt)
+        cipher = dict(urllib.parse.parse_qsl(multi_get(yt_fmt,
+            'cipher', 'signatureCipher', default='')))
+        if cipher:
+            fmt['url'] = cipher.get('url')
+        else:
+            fmt['url'] = yt_fmt.get('url')
+        fmt['s'] = cipher.get('s')
+        fmt['sp'] = cipher.get('sp')
+
+        # update with information from big table
+        hardcoded_itag_info = _formats.get(str(itag), {})
+        for key, value in hardcoded_itag_info.items():
+            conservative_update(fmt, key, value) # prefer info from Youtube
+        fmt['quality'] = hardcoded_itag_info.get('height')
+
+        info['formats'].append(fmt)
+
+    # get ip address
+    if info['formats']:
+        query_string = (info['formats'][0].get('url') or '?').split('?')[1]
+        info['ip_address'] = deep_get(
+            urllib.parse.parse_qs(query_string), 'ip', 0)
+    else:
+        info['ip_address'] = None
+
+hls_regex = re.compile(r'[\w_-]+=(?:"[^"]+"|[^",]+),')
+def extract_hls_formats(hls_manifest):
+    '''returns hls_formats, err'''
+    hls_formats = []
+    try:
+        lines = hls_manifest.splitlines()
+        i = 0
+        while i < len(lines):
+            if lines[i].startswith('#EXT-X-STREAM-INF'):
+                fmt = {'acodec': None, 'vcodec': None, 'height': None,
+                    'width': None, 'fps': None, 'audio_bitrate': None,
+                    'itag': None, 'file_size': None,
+                    'audio_sample_rate': None, 'url': None}
+                properties = lines[i].split(':')[1]
+                properties += ',' # make regex work for last key-value pair
+
+                for pair in hls_regex.findall(properties):
+                    key, value = pair.rstrip(',').split('=')
+                    if key == 'CODECS':
+                        for codec in value.strip('"').split(','):
+                            update_format_with_codec_info(fmt, codec)
+                    elif key == 'RESOLUTION':
+                        fmt['width'], fmt['height'] = map(int, value.split('x'))
+                        fmt['resolution'] = value
+                    elif key == 'FRAME-RATE':
+                        fmt['fps'] = int(value)
+                i += 1
+                fmt['url'] = lines[i]
+                assert fmt['url'].startswith('http')
+                fmt['ext'] = 'm3u8'
+                hls_formats.append(fmt)
+            i += 1
+    except Exception as e:
+        traceback.print_exc()
+        return [], str(e)
+    return hls_formats, None
+
+
+def _extract_playability_error(info, player_response, error_prefix=''):
+    if info['formats']:
+        info['playability_status'] = None
+        info['playability_error'] = None
+        return
+
+    playability_status = deep_get(player_response, 'playabilityStatus', 'status', default=None)
+    info['playability_status'] = playability_status
+
+    playability_reason = extract_str(multi_deep_get(player_response,
+        ['playabilityStatus', 'reason'],
+        ['playabilityStatus', 'errorScreen', 'playerErrorMessageRenderer', 'reason'],
+        default='Could not find playability error')
+    )
+
+    if playability_status not in (None, 'OK'):
+        info['playability_error'] = error_prefix + playability_reason
+    elif not info['playability_error']: # do not override
+        info['playability_error'] = error_prefix + 'Unknown playability error'
+
+SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt')
+def extract_watch_info(polymer_json):
+    info = {'playability_error': None, 'error': None}
+
+    if isinstance(polymer_json, dict):
+        top_level = polymer_json
+    elif isinstance(polymer_json, (list, tuple)):
+        top_level = {}
+        for page_part in polymer_json:
+            if not isinstance(page_part, dict):
+                return {'error': 'Invalid page part'}
+            top_level.update(page_part)
+    else:
+        return {'error': 'Invalid top level polymer data'}
+
+    error = check_missing_keys(top_level,
+        ['player', 'args'],
+        ['player', 'assets', 'js'],
+        ['playerResponse'],
+    )
+    if error:
+        info['playability_error'] = error
+
+    player_response = top_level.get('playerResponse', {})
+
+    # usually, only the embedded one has the urls
+    player_args = deep_get(top_level, 'player', 'args', default={})
+    if 'player_response' in player_args:
+        embedded_player_response = json.loads(player_args['player_response'])
+    else:
+        embedded_player_response = {}
+
+    # captions
+    info['automatic_caption_languages'] = []
+    info['manual_caption_languages'] = []
+    info['_manual_caption_language_names'] = {}     # language name written in that language, needed in some cases to create the url
+    info['translation_languages'] = []
+    captions_info = player_response.get('captions', {})
+    info['_captions_base_url'] = normalize_url(deep_get(captions_info, 'playerCaptionsRenderer', 'baseUrl'))
+    for caption_track in deep_get(captions_info, 'playerCaptionsTracklistRenderer', 'captionTracks', default=()):
+        lang_code = caption_track.get('languageCode')
+        if not lang_code:
+            continue
+        if caption_track.get('kind') == 'asr':
+            info['automatic_caption_languages'].append(lang_code)
+        else:
+            info['manual_caption_languages'].append(lang_code)
+        base_url = caption_track.get('baseUrl', '')
+        lang_name = deep_get(urllib.parse.parse_qs(urllib.parse.urlparse(base_url).query), 'name', 0)
+        if lang_name:
+            info['_manual_caption_language_names'][lang_code] = lang_name
+
+    for translation_lang_info in deep_get(captions_info, 'playerCaptionsTracklistRenderer', 'translationLanguages', default=()):
+        lang_code = translation_lang_info.get('languageCode')
+        if lang_code:
+            info['translation_languages'].append(lang_code)
+        if translation_lang_info.get('isTranslatable') == False:
+            print('WARNING: Found non-translatable caption language')
+
+    # formats
+    _extract_formats(info, embedded_player_response)
+    if not info['formats']:
+        _extract_formats(info, player_response)
+
+    # playability errors
+    _extract_playability_error(info, player_response)
+
+    # check age-restriction
+    info['age_restricted'] = (info['playability_status'] == 'LOGIN_REQUIRED' and info['playability_error'] and ' age' in info['playability_error'])
+
+    # base_js (for decryption of signatures)
+    info['base_js'] = deep_get(top_level, 'player', 'assets', 'js')
+    if info['base_js']:
+        info['base_js'] = normalize_url(info['base_js'])
+        # must uniquely identify url
+        info['player_name'] = urllib.parse.urlparse(info['base_js']).path
+    else:
+        info['player_name'] = None
+
+    # extract stuff from visible parts of page
+    mobile = 'singleColumnWatchNextResults' in deep_get(top_level, 'response', 'contents', default={})
+    if mobile:
+        info.update(_extract_watch_info_mobile(top_level))
+    else:
+        info.update(_extract_watch_info_desktop(top_level))
+
+    # stuff from videoDetails. Use liberal_update to prioritize info from videoDetails over existing info
+    vd = deep_get(top_level, 'playerResponse', 'videoDetails', default={})
+    liberal_update(info, 'title',      extract_str(vd.get('title')))
+    liberal_update(info, 'duration',   extract_int(vd.get('lengthSeconds')))
+    liberal_update(info, 'view_count', extract_int(vd.get('viewCount')))
+    # videos with no description have a blank string
+    liberal_update(info, 'description', vd.get('shortDescription'))
+    liberal_update(info, 'id',          vd.get('videoId'))
+    liberal_update(info, 'author',      vd.get('author'))
+    liberal_update(info, 'author_id',   vd.get('channelId'))
+    info['was_live'] =                  vd.get('isLiveContent')
+    conservative_update(info, 'unlisted', not vd.get('isCrawlable', True))  #isCrawlable is false on limited state videos even if they aren't unlisted
+    liberal_update(info, 'tags',        vd.get('keywords', []))
+
+    # fallback stuff from microformat
+    mf = deep_get(top_level, 'playerResponse', 'microformat', 'playerMicroformatRenderer', default={})
+    conservative_update(info, 'title',      extract_str(mf.get('title')))
+    conservative_update(info, 'duration', extract_int(mf.get('lengthSeconds')))
+    # this gives the view count for limited state videos
+    conservative_update(info, 'view_count', extract_int(mf.get('viewCount')))
+    conservative_update(info, 'description', extract_str(mf.get('description'), recover_urls=True))
+    conservative_update(info, 'author', mf.get('ownerChannelName'))
+    conservative_update(info, 'author_id', mf.get('externalChannelId'))
+    conservative_update(info, 'live', deep_get(mf, 'liveBroadcastDetails',
+        'isLiveNow'))
+    liberal_update(info, 'unlisted', mf.get('isUnlisted'))
+    liberal_update(info, 'category', mf.get('category'))
+    liberal_update(info, 'time_published', mf.get('publishDate'))
+    liberal_update(info, 'time_uploaded', mf.get('uploadDate'))
+
+    # other stuff
+    info['author_url'] = 'https://www.youtube.com/channel/' + info['author_id'] if info['author_id'] else None
+    return info
+
+def get_caption_url(info, language, format, automatic=False, translation_language=None):
+    '''Gets the url for captions with the given language and format. If automatic is True, get the automatic captions for that language. If translation_language is given, translate the captions from `language` to `translation_language`. If automatic is true and translation_language is given, the automatic captions will be translated.'''
+    url = info['_captions_base_url']
+    url += '&lang=' + language
+    url += '&fmt=' + format
+    if automatic:
+        url += '&kind=asr'
+    elif language in info['_manual_caption_language_names']:
+        url += '&name=' + urllib.parse.quote(info['_manual_caption_language_names'][language], safe='')
+
+    if translation_language:
+        url += '&tlang=' + translation_language
+    return url
+
+def update_with_age_restricted_info(info, video_info_page):
+    ERROR_PREFIX = 'Error bypassing age-restriction: '
+
+    video_info = urllib.parse.parse_qs(video_info_page)
+    player_response = deep_get(video_info, 'player_response', 0)
+    if player_response is None:
+        info['playability_error'] = ERROR_PREFIX + 'Could not find player_response in video_info_page'
+        return
+    try:
+        player_response = json.loads(player_response)
+    except json.decoder.JSONDecodeError:
+        traceback.print_exc()
+        info['playability_error'] = ERROR_PREFIX + 'Failed to parse json response'
+        return
+
+    _extract_formats(info, player_response)
+    _extract_playability_error(info, player_response, error_prefix=ERROR_PREFIX)
+
+def requires_decryption(info):
+    return ('formats' in info) and info['formats'] and info['formats'][0]['s']
+
+# adapted from youtube-dl and invidious:
+# https://github.com/omarroth/invidious/blob/master/src/invidious/helpers/signatures.cr
+decrypt_function_re = re.compile(r'function\(a\)\{(a=a\.split\(""\)[^\}{]+)return a\.join\(""\)\}')
+op_with_arg_re = re.compile(r'[^\.]+\.([^\(]+)\(a,(\d+)\)')
+def extract_decryption_function(info, base_js):
+    '''Insert decryption function into info. Return error string if not successful.
+    Decryption function is a list of list[2] of numbers.
+    It is advisable to cache the decryption function (uniquely identified by info['player_name']) so base.js (1 MB) doesn't need to be redownloaded each time'''
+    info['decryption_function'] = None
+    decrypt_function_match = decrypt_function_re.search(base_js)
+    if decrypt_function_match is None:
+        return 'Could not find decryption function in base.js'
+
+    function_body = decrypt_function_match.group(1).split(';')[1:-1]
+    if not function_body:
+        return 'Empty decryption function body'
+
+    var_name = get(function_body[0].split('.'), 0)
+    if var_name is None:
+        return 'Could not find var_name'
+
+    var_body_match = re.search(r'var ' + re.escape(var_name) + r'=\{(.*?)\};', base_js, flags=re.DOTALL)
+    if var_body_match is None:
+        return 'Could not find var_body'
+
+    operations = var_body_match.group(1).replace('\n', '').split('},')
+    if not operations:
+        return 'Did not find any definitions in var_body'
+    operations[-1] = operations[-1][:-1]    # remove the trailing '}' since we split by '},' on the others
+    operation_definitions = {}
+    for op in operations:
+        colon_index = op.find(':')
+        opening_brace_index = op.find('{')
+
+        if colon_index == -1 or opening_brace_index == -1:
+            return 'Could not parse operation'
+        op_name = op[:colon_index]
+        op_body = op[opening_brace_index+1:]
+        if op_body == 'a.reverse()':
+            operation_definitions[op_name] = 0
+        elif op_body == 'a.splice(0,b)':
+            operation_definitions[op_name] = 1
+        elif op_body.startswith('var c=a[0]'):
+            operation_definitions[op_name] = 2
+        else:
+            return 'Unknown op_body: ' + op_body
+
+    decryption_function = []
+    for op_with_arg in function_body:
+        match = op_with_arg_re.fullmatch(op_with_arg)
+        if match is None:
+            return 'Could not parse operation with arg'
+        op_name = match.group(1)
+        if op_name not in operation_definitions:
+            return 'Unknown op_name: ' + op_name
+        op_argument = match.group(2)
+        decryption_function.append([operation_definitions[op_name], int(op_argument)])
+
+    info['decryption_function'] = decryption_function
+    return False
+
+def _operation_2(a, b):
+    c = a[0]
+    a[0] = a[b % len(a)]
+    a[b % len(a)] = c
+
+def decrypt_signatures(info):
+    '''Applies info['decryption_function'] to decrypt all the signatures. Return err.'''
+    if not info.get('decryption_function'):
+        return 'decryption_function not in info'
+    for format in info['formats']:
+        if not format['s'] or not format['sp'] or not format['url']:
+            print('Warning: s, sp, or url not in format')
+            continue
+
+        a = list(format['s'])
+        for op, argument in info['decryption_function']:
+            if op == 0:
+                a.reverse()
+            elif op == 1:
+                a = a[argument:]
+            else:
+                _operation_2(a, argument)
+
+        signature = ''.join(a)
+        format['url'] += '&' + format['sp'] + '=' + signature
+    return False
--- a/youtube_data/channels.py
+++ b/youtube_data/channels.py
@ -1,4 +1,4 @@
-from youtube_data import proto, utils
+from youtube_data import proto
 from flask import Markup as mk
 import requests
 import base64
--- a/youtube_data/comments.py
+++ b/youtube_data/comments.py
@ -1,130 +0,0 @@
-from youtube_data import proto
-import json
-import base64
-import urllib
-import requests
-import re
-import bleach
-from flask import Markup
-
-URL_ORIGIN = "/https://www.youtube.com"
-
-def make_comment_ctoken(video_id, sort=0, offset=0, lc='', secret_key=''):
-    video_id = proto.as_bytes(video_id)
-    secret_key = proto.as_bytes(secret_key)
-    
-
-    page_info = proto.string(4,video_id) + proto.uint(6, sort)
-    offset_information = proto.nested(4, page_info) + proto.uint(5, offset)
-    if secret_key:
-        offset_information = proto.string(1, secret_key) + offset_information
-
-    page_params = proto.string(2, video_id)
-    if lc:
-        page_params += proto.string(6, proto.percent_b64encode(proto.string(15, lc)))
-
-    result = proto.nested(2, page_params) + proto.uint(3,6) + proto.nested(6, offset_information)
-    return base64.urlsafe_b64encode(result).decode('ascii')
-
-def comment_replies_ctoken(video_id, comment_id, max_results=500):  
-
-    params = proto.string(2, comment_id) + proto.uint(9, max_results)
-    params = proto.nested(3, params)
-    
-    result = proto.nested(2, proto.string(2, video_id)) + proto.uint(3,6) + proto.nested(6, params)
-    return base64.urlsafe_b64encode(result).decode('ascii')
-
-
-
-mobile_headers = {
-    'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E304 Safari/602.1',
-    'Accept': '*/*',
-    'Accept-Language': 'en-US,en;q=0.5',
-    'X-YouTube-Client-Name': '2',
-    'X-YouTube-Client-Version': '2.20180823',
-}
-def request_comments(ctoken, replies=False):
-    if replies: # let's make it use different urls for no reason despite all the data being encoded
-        base_url = "https://m.youtube.com/watch_comment?action_get_comment_replies=1&ctoken="
-    else:
-        base_url = "https://m.youtube.com/watch_comment?action_get_comments=1&ctoken="
-    url = base_url + ctoken.replace("=", "%3D") + "&pbj=1"
-
-    for i in range(0,8):    # don't retry more than 8 times
-        content = requests.get(url, headers=mobile_headers).text
-        if content[0:4] == b")]}'":             # random closing characters included at beginning of response for some reason
-            content = content[4:]
-        elif content[0:10] == b'\n<!DOCTYPE':   # occasionally returns html instead of json for no reason
-            content = b''
-            print("got <!DOCTYPE>, retrying")
-            continue
-        break
-
-    polymer_json = json.loads(content)
-    return polymer_json
-
-
-def single_comment_ctoken(video_id, comment_id):
-    page_params = proto.string(2, video_id) + proto.string(6, proto.percent_b64encode(proto.string(15, comment_id)))
-
-    result = proto.nested(2, page_params) + proto.uint(3,6)
-    return base64.urlsafe_b64encode(result).decode('ascii')
-
-
-def concat_texts(strings):
-    '''Concatenates strings. Returns None if any of the arguments are None'''
-    result = ''
-    for string in strings:
-        if string['text'] is None:
-            return None
-        result += string['text']
-    return result
-
-def parse_comment(raw_comment):
-    cmnt = {}
-    print(raw_comment)
-    raw_comment = raw_comment['commentThreadRenderer']['comment']['commentRenderer']
-    imgHostName = urllib.parse.urlparse(raw_comment['authorThumbnail']['thumbnails'][0]['url']).netloc
-    cmnt['author'] = raw_comment['authorText']['runs'][0]['text']
-    cmnt['thumbnail'] = raw_comment['authorThumbnail']['thumbnails'][0]['url'].replace("https://{}".format(imgHostName), "")+"?host="+imgHostName
-    cmnt['channel'] = raw_comment['authorEndpoint']['commandMetadata']['webCommandMetadata']['url']
-    cmnt['text'] = Markup(bleach.linkify(concat_texts(raw_comment['contentText']['runs']).replace("\n", "<br>")))
-    cmnt['date'] = raw_comment['publishedTimeText']['runs'][0]['text']
-
-    try:
-        cmnt['creatorHeart'] = raw_comment['creatorHeart']['creatorHeartRenderer']['creatorThumbnail']['thumbnails'][0]['url']
-    except:
-        cmnt['creatorHeart'] = False
-    
-    try:
-        cmnt['likes'] = raw_comment['likeCount']
-    except:
-        cmnt['likes'] = 0
-
-    try:
-        cmnt['replies'] = raw_comment['replyCount']
-    except:
-        cmnt['replies'] = 0
-
-    cmnt['authorIsChannelOwner'] = raw_comment['authorIsChannelOwner']
-    try:
-        cmnt['pinned'] = raw_comment['pinnedCommentBadge']
-        cmnt['pinned'] = True
-    except:
-        cmnt['pinned'] = False
-    return cmnt
-
-def post_process_comments_info(comments_info):
-    comments = []
-    for comment in comments_info[1]['response']['continuationContents']['commentSectionContinuation']['items']:        
-        comments.append(parse_comment(comment))
-    return comments
-        
-
-
-def video_comments(video_id, sort=0, offset=0, lc='', secret_key=''):
-    comments_info = request_comments(make_comment_ctoken(video_id, sort, offset, lc, secret_key))
-    comments_info = post_process_comments_info(comments_info)
-    return comments_info
-    return {}
-
--- a/youtube_data/search.py
+++ b/youtube_data/search.py
@ -1,5 +1,5 @@
-from youtube_data import proto, utils
-from bs4 import BeautifulSoup as bs
+from youtube_data import proto
+from youtube import utils
 from flask import Markup
 import urllib.parse
 import requests
--- a/youtube_data/utils.py
+++ b/youtube_data/utils.py
@ -1,12 +0,0 @@
-def get_description_snippet_text(ds):
-    string = ""
-    for t in ds:
-        try:
-            if t['bold']:
-                text = "<b>"+t['text']+"</b>"
-            else:
-                text = t['text']
-        except:
-            text = t['text']
-        string = string + text
-    return string
--- a/youtube_data/videos.py
+++ b/youtube_data/videos.py
@ -1,281 +0,0 @@
-from bs4 import BeautifulSoup as bs
-from urllib.parse import unquote
-from youtube_dl import YoutubeDL
-import urllib.parse
-import requests
-import json
-
-# from https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/youtube.py
-_formats = {
-    '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'audio_bitrate': 64, 'vcodec': 'h263'},
-    '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'audio_bitrate': 64, 'vcodec': 'h263'},
-    '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
-    '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'audio_bitrate': 24, 'vcodec': 'mp4v'},
-    '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'audio_bitrate': 96, 'vcodec': 'h264'},
-    '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'audio_bitrate': 192, 'vcodec': 'h264'},
-    '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'audio_bitrate': 128, 'vcodec': 'h264'},
-    '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'audio_bitrate': 128, 'vcodec': 'h264'},
-    # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), audio_bitrate varies as well
-    '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
-    '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'audio_bitrate': 192, 'vcodec': 'h264'},
-    '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'audio_bitrate': 192, 'vcodec': 'h264'},
-    '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'audio_bitrate': 128, 'vcodec': 'vp8'},
-    '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'audio_bitrate': 128, 'vcodec': 'vp8'},
-    '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'audio_bitrate': 192, 'vcodec': 'vp8'},
-    '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'audio_bitrate': 192, 'vcodec': 'vp8'},
-    '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'audio_bitrate': 128, 'vcodec': 'h264'},
-    '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'audio_bitrate': 128, 'vcodec': 'h264'},
-
-
-    # 3D videos
-    '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'audio_bitrate': 128, 'vcodec': 'h264'},
-    '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'audio_bitrate': 128, 'vcodec': 'h264'},
-    '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'audio_bitrate': 192, 'vcodec': 'h264'},
-    '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'audio_bitrate': 192, 'vcodec': 'h264'},
-    '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'audio_bitrate': 128, 'vcodec': 'vp8'},
-    '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'audio_bitrate': 192, 'vcodec': 'vp8'},
-    '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'audio_bitrate': 192, 'vcodec': 'vp8'},
-
-    # Apple HTTP Live Streaming
-    '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'audio_bitrate': 48, 'vcodec': 'h264'},
-    '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'audio_bitrate': 48, 'vcodec': 'h264'},
-    '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'audio_bitrate': 128, 'vcodec': 'h264'},
-    '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'audio_bitrate': 128, 'vcodec': 'h264'},
-    '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'audio_bitrate': 256, 'vcodec': 'h264'},
-    '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'audio_bitrate': 256, 'vcodec': 'h264'},
-    '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'audio_bitrate': 48, 'vcodec': 'h264'},
-    '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'audio_bitrate': 24, 'vcodec': 'h264'},
-
-    # DASH mp4 video
-    '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
-    '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
-    '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
-    '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
-    '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
-    '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'},  # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
-    '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
-    '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
-    '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
-    '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
-    '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
-    '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
-
-    # Dash mp4 audio
-    '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'audio_bitrate': 48, 'container': 'm4a_dash'},
-    '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'audio_bitrate': 128, 'container': 'm4a_dash'},
-    '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'audio_bitrate': 256, 'container': 'm4a_dash'},
-    '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
-    '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
-    '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
-    '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
-
-    # Dash webm
-    '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
-    '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
-    '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
-    '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
-    '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
-    '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
-    '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
-    '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
-    '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
-    '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
-    '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
-    '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
-    '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
-    '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
-    '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
-    # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
-    '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
-    '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
-    '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
-    '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
-    '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
-    '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
-
-    # Dash webm audio
-    '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'audio_bitrate': 128},
-    '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'audio_bitrate': 256},
-
-    # Dash webm audio with opus inside
-    '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'audio_bitrate': 50},
-    '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'audio_bitrate': 70},
-    '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'audio_bitrate': 160},
-
-    # RTMP (unnamed)
-    '_rtmp': {'protocol': 'rtmp'},
-
-    # av01 video only formats sometimes served with "unknown" codecs
-    '394': {'vcodec': 'av01.0.05M.08'},
-    '395': {'vcodec': 'av01.0.05M.08'},
-    '396': {'vcodec': 'av01.0.05M.08'},
-    '397': {'vcodec': 'av01.0.05M.08'},
-}
-
-
-def get_renderer_key(renderer, key):
-    for k in renderer:
-        if key in k:
-            return k[key]
-
-def get_video_primary_info(datad, datai):
-    contents = datai["contents"]["twoColumnWatchNextResults"]['results']['results']['contents']
-    item = get_renderer_key(contents, "videoPrimaryInfoRenderer")
-    details = datad['videoDetails']    
-
-    # Check if is Livestream        
-    if details.get('isLive') and details['lengthSeconds'] == '0':
-        isLive = True
-    else:
-        isLive = False
-
-    # Check if is a Scheduled video
-    if details.get('isUpcoming') == True:
-        isUpcoming = True
-        views = "Scheduled video"
-        premieres = item['dateText']['simpleText']
-        audioURL = False
-    else:
-        isUpcoming = False
-        premieres = False
-        views = details['viewCount']
-    
-    ydl = YoutubeDL()
-    
-    if isUpcoming == False:
-        data = ydl.extract_info(details['videoId'], False)
-        while not data['formats']:
-            data = ydl.extract_info(details['videoId'], False)
-        formats = data['formats']
-
-        ## Get audio
-        audio_urls = []
-        for f in data['formats']:
-            for fid in _formats:
-                if f['format_id'] == fid:
-                    try:
-                        if 'audio' in _formats[fid]['format_note']:
-                            aurl = f['url']
-                            fnote = _formats[fid]['format_note']
-                            bitrate = _formats[fid]['audio_bitrate']
-                            audio_inf = {
-                                "url":aurl,
-                                "id":fnote,
-                                "btr": bitrate
-                            }
-                            audio_urls.append(audio_inf)
-                    except:
-                        continue
-        if not isLive:
-            audioURL = audio_urls[-1]['url']
-        else:
-            audioURL = False
-    else: # If it is a scheduled video
-        audio_urls = False
-        formats = False
-    try:            
-        primaryInfo = {
-            "id": details['videoId'],
-            "title": details['title'],
-            "description": details['shortDescription'],
-            "views": views,
-            "duration": details['lengthSeconds'],
-            "date": item['dateText']['simpleText'],
-            "rating": details['averageRating'],
-            "author": details['author'],
-            "isPrivate": details['isPrivate'],
-            "isLive": isLive,
-            "isUpcoming": isUpcoming,
-            "url":url,
-            "allowRatings": details['allowRatings'],
-            "urls":formats,
-            "thumbnail": details['thumbnail']['thumbnails'][0]['url'],
-            "audio": audioURL,
-            "premieres": premieres
-        }
-    except:
-        # If error take only most common items
-        primaryInfo = {
-            "id": details['videoId'],
-            "title": details['title'],
-            "description": details['shortDescription'],
-            "views": details['viewCount'],
-            "duration": details['lengthSeconds'],
-            "date": item['dateText']['simpleText'],
-            "rating": details['averageRating'],
-            "author": details['author'],
-            "isPrivate":False,
-            "isLive":isLive,
-            "isUpcoming":isUpcoming,
-            "allowRatings":True,
-            "urls":formats,
-            "thumbnail": details['thumbnail']['thumbnails'][0]['url'],
-            "audio": audioURL,
-            "premieres": premieres
-        }
-    return primaryInfo
-
-def get_video_owner_info(data):
-    contents = data["contents"]["twoColumnWatchNextResults"]['results']['results']['contents']
-    item = get_renderer_key(contents, "videoSecondaryInfoRenderer")
-    ownerItem = item['owner']['videoOwnerRenderer']
-
-    try:
-        sC = ownerItem['subscriberCountText']['runs'][0]['text']
-    except:
-        sC = "Unknown"
-    ownerInfo = {
-        "thumbnail": ownerItem['thumbnail']['thumbnails'][0]['url'],
-        "username": ownerItem['title']['runs'][0]['text'],
-        "id": ownerItem['title']['runs'][0]['navigationEndpoint']['browseEndpoint']['browseId'],
-        "suscriberCount":sC
-    }
-    return ownerInfo
-
-def get_video_info(id):
-    headers = {"Accept-Language": "en-US,en;q=0.5"}
-    encoded_search = urllib.parse.quote(id)
-    BASE_URL = "https://youtube.com"
-
-    url = f"{BASE_URL}/watch?v={encoded_search}"
-    response = requests.get(url, headers=headers).text
-
-    while 'window["ytInitialData"]' and 'window["ytInitialData"]' not in response:
-        response = requests.get(url, headers=headers).text
-    
-    start = (
-        response.index('window["ytInitialData"]')
-        + len('window["ytInitialData"]')
-        + 3
-    )
-
-    start2 = (
-        response.index('window["ytInitialPlayerResponse"]')
-        + len('window["ytInitialPlayerResponse"]') + 3
-    )
-
-    end1 = response.index("};", start) + 1
-    end2 = response.index("};", start2) + 1
-    jsonIni = response[start:end1]
-    dataInitial = json.loads(jsonIni)
-
-    try:
-        jsonDet = response[start2:end2]
-        dataDetails = json.loads(jsonDet)
-    except:
-        response = requests.get(url, headers=headers).json()
-        jsonDet = response[start2:end2]
-        dataDetails = json.loads(jsonDet)
-
-
-    #title, views, date
-    videoInfo = get_video_primary_info(dataDetails, dataInitial)
-    ownerInfo = get_video_owner_info(dataInitial)
-
-    '''soup = bs(response, "html.parser")
-    soup = str(str(soup.find("div", attrs={"id":"player-wrap"}).find_all("script")).split("ytplayer.config =")[1]).split("url")
-    for url in soup:
-        if "googlevideo" in url:
-            print(unquote(url.replace("\\", "")))'''
-    info = {"video":videoInfo, "owner":ownerInfo}
-    return info