Compare commits

..

1 Commits

Author SHA1 Message Date
dependabot[bot]
66d404765e
Bump chardet from 3.0.4 to 4.0.0
Bumps [chardet](https://github.com/chardet/chardet) from 3.0.4 to 4.0.0.
- [Release notes](https://github.com/chardet/chardet/releases)
- [Commits](https://github.com/chardet/chardet/compare/3.0.4...4.0.0)

Signed-off-by: dependabot[bot] <support@github.com>
2021-02-03 07:38:24 +00:00
20 changed files with 88 additions and 331 deletions

View File

@ -8,4 +8,3 @@ docker-compose.yml
LICENSE
*.md
dockerhash.txt
app/static

View File

@ -11,20 +11,20 @@ jobs:
cpython-build-docker:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2.3.4
- uses: actions/checkout@v2
with:
fetch-depth: 0
- name: Set up QEMU
uses: docker/setup-qemu-action@v1.2.0
uses: docker/setup-qemu-action@v1
with:
platforms: all
- name: Set up Docker Buildx
id: buildx
uses: docker/setup-buildx-action@v1.5.1
uses: docker/setup-buildx-action@v1
with:
version: latest
- name: Login to DockerHub
uses: docker/login-action@v1.10.0
uses: docker/login-action@v1
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}
@ -33,14 +33,14 @@ jobs:
- name: Write the current version to a file
run: "{ git describe --tags --abbrev=0 & date +\"%d-%m-%y\" & git rev-list HEAD --max-count=1 --abbrev-commit;} > version.txt"
- name: cache docker cache
uses: actions/cache@v2.1.6
uses: actions/cache@v2.1.3
with:
path: ${{ github.workspace }}/cache
key: ${{ runner.os }}-docker-cpython-${{ hashFiles('**/requirements.txt') }}-${{ hashFiles('**/dockerhash.txt') }}
restore-keys: |
${{ runner.os }}-docker-cpython-
- name: Build and push
uses: docker/build-push-action@v2.6.1
uses: docker/build-push-action@v2
with:
context: .
file: ./Dockerfile
@ -52,20 +52,20 @@ jobs:
pypy-build-docker:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2.3.4
- uses: actions/checkout@v2
with:
fetch-depth: 0
- name: Set up QEMU
uses: docker/setup-qemu-action@v1.2.0
uses: docker/setup-qemu-action@v1
with:
platforms: all
- name: Set up Docker Buildx
id: buildx
uses: docker/setup-buildx-action@v1.5.1
uses: docker/setup-buildx-action@v1
with:
version: latest
- name: Login to DockerHub
uses: docker/login-action@v1.10.0
uses: docker/login-action@v1
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}
@ -74,14 +74,14 @@ jobs:
- name: Write the current version to a file
run: "{ git describe --tags --abbrev=0 & date +\"%d-%m-%y\" & git rev-list HEAD --max-count=1 --abbrev-commit;} > version.txt"
- name: cache docker cache
uses: actions/cache@v2.1.6
uses: actions/cache@v2.1.3
with:
path: ${{ github.workspace }}/cache
key: ${{ runner.os }}-docker-pypy-${{ hashFiles('**/requirements.txt') }}-${{ hashFiles('**/dockerhash.txt') }}
restore-keys: |
${{ runner.os }}-docker-pypy-
- name: Build and push
uses: docker/build-push-action@v2.6.1
uses: docker/build-push-action@v2
with:
context: .
file: ./pypy.Dockerfile
@ -90,44 +90,3 @@ jobs:
tags: ytorg/yotter:pypy
cache-from: type=local,src=cache
cache-to: type=local,dest=cache
nginx-build-docker:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2.3.4
with:
fetch-depth: 0
- name: Set up QEMU
uses: docker/setup-qemu-action@v1.2.0
with:
platforms: all
- name: Set up Docker Buildx
id: buildx
uses: docker/setup-buildx-action@v1.5.1
with:
version: latest
- name: Login to DockerHub
uses: docker/login-action@v1.10.0
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}
- name: Get hash of latest image
run: docker pull nginx:mainline-alpine && docker inspect --format='{{index .RepoDigests 0}}' nginx:mainline-alpine > dockerhash.txt
- name: Write the current version to a file
run: "{ git describe --tags --abbrev=0 & date +\"%d-%m-%y\" & git rev-list HEAD --max-count=1 --abbrev-commit;} > version.txt"
- name: cache docker cache
uses: actions/cache@v2.1.6
with:
path: ${{ github.workspace }}/cache
key: ${{ runner.os }}-docker-nginx-${{ hashFiles('**/dockerhash.txt') }}
restore-keys: |
${{ runner.os }}-docker-nginx-
- name: Build and push
uses: docker/build-push-action@v2.6.1
with:
context: .
file: ./Dockerfile
platforms: linux/amd64,linux/arm64
push: true
tags: ytorg/nginx:latest
cache-from: type=local,src=cache
cache-to: type=local,dest=cache

View File

@ -8,7 +8,7 @@ WORKDIR /usr/src/app
COPY ./requirements.txt /usr/src/app
# Build Dependencies
RUN apk --no-cache add gcc musl-dev libffi-dev openssl-dev libxml2-dev libxslt-dev file llvm-dev make g++ cargo rust
RUN apk --no-cache add gcc musl-dev libffi-dev openssl-dev libxml2-dev libxslt-dev file llvm-dev make g++
# Python Dependencies
RUN pip install --no-cache-dir --prefix=/install wheel cryptography gunicorn pymysql

View File

@ -1,12 +1,9 @@
## This project is no longer maintained. Visit [this repo](https://github.com/TeamPiped/Piped) for an alternative.
<p align="center"> <img width="700" src="app/static/img/banner.png"> </img></p>
<p align="center">
<a href="https://www.gnu.org/licenses/gpl-3.0"><img alt="License: GPL v3" src="https://img.shields.io/badge/License-AGPLv3-blue.svg"></img></a>
<a href="https://github.com/pluja/Yotter"><img alt="Development state" src="https://img.shields.io/badge/State-Beta-blue.svg"></img></a>
<a href="https://github.com/pluja/Yotter/pulls"><img alt="Pull Requests Welcome" src="https://img.shields.io/badge/PRs-Welcome-green.svg"></img></a>
<a href="https://git.kavin.rocks/kavin/Yotter"><img alt="Mirror 1" src="https://img.shields.io/badge/Mirror1-git.kavin.rocks-teal"></img></a>
<a href="https://84.38.177.154/libremirrors/ytorg/Yotter"><img alt="Mirror 2" src="https://img.shields.io/badge/Mirror2-git.rip-purple"></img></a>
<a href="https://git.rip/libremirrors/ytorg/Yotter"><img alt="Mirror" src="https://img.shields.io/badge/Mirror-git.rip-purple"></img></a>
</p>
Yotter allows you to follow and gather all the content from your favorite Twitter and YouTube accounts in a *beautiful* feed so you can stay up to date without compromising your privacy at all. Yotter is written with Python and Flask and uses Semantic-UI as its CSS framework.
@ -16,7 +13,6 @@ Yotter is possible thanks to several open-source projects that are listed on the
# Index:
* [Why](#why)
* [Features](#features)
* [Roadmap](#roadmap)
* [FAQ](#FAQ)
* [Privacy and Security](#privacy)
* [Public instances](#public-instances)
@ -34,7 +30,7 @@ With the *particular* data about you, they can get money from the highest bidder
Further more, they don't care about **what you in particular watch**, this is only sold to the highest bidder who then may or may not do the harm. What they care more about is **what people watch** this is the important data and the one that allows to manipulate, bias, censor, etc.
So we need platforms and spaces where we can freely watch and listen content without these watchful eyes upon us. Ideally, everyone would use a free (as in freedom) and decentralized platform like [Peertube](https://joinpeertube.org/), [Odysee](https://odysee.com/), [Mastodon](https://joinmastodon.org/) or [Pleroma](https://pleroma.social/) but things are not like this. The main multimedia content factory is Youtube and the microblogging king is Twitter. So we will do whatever is possible to be able to watch and read the content and avoid the surveillance that seeks us these days. We will resist.
So we need platforms and spaces where we can freely watch and listen content without these watchful eyes upon us. Ideally, everyone would use a free (as in freedom) and decentralized platform like [Peertube](https://joinpeertube.org/), [LBRY](https://lbry.tv/), [Mastodon](https://joinmastodon.org/) or [Pleroma](https://pleroma.social/) but things are not like this. The main multimedia content factory is Youtube and the microblogging king is Twitter. So we will do whatever is possible to be able to watch and read the content and avoid the surveillance that seeks us these days. We will resist.
# Features:
- [x] No Ads.
@ -53,24 +49,11 @@ So we need platforms and spaces where we can freely watch and listen content wit
*Video player is VideoJS, which uses JavaScript. But if JavaScript is disabled Yotter still works perfectly and uses the default HTML video player.
### Roadmap
The following features are planned to be implemented in the near future:
* [ ] Improve performance and efficiency
#### Youtube specific:
* [ ] Subtitles
* [ ] > 720p Quality
* [ ] Save youtube videos
* [ ] Support for live streams
#### Twitter specific:
* [ ] Translations
# FAQ
### What's the difference between this and Invidious?
At first I started working on this project as a solution for following Twitter accounts (a thing that can't be done with Nitter) and getting a Twitter-like feed. Weeks later the leader of Invidious, Omar Roth, announced that he was stepping away from the project. As an Invidious active user, this made me think that a new alternative was needed for the community and also an alternative with an easier programming language for most people (as Invidious is written in Crystal). So I started developing a '*written-in-python Invidious alternative*' and it went quite well.
At first I started working on this project as a solution for following Twitter accounts (a thing that can't be done with Nitter) and getting a Twitter-like feed. Weeks later the leader of Invidious, Omar Roth, announced that he was stepping away from the project. As an Invidious active user, this made me think that a new alternative was needed for the community and also an alternative with an easier programmin language for most people (as Invidious is written in Crystal). So I started developing a '*written-in-python Invidious alternative*' and it went quite well.
I hope that this project can prosper, gain contributors, new instances and create a good community around it.
I hope that this project can prosperate, gain contributors, new instances and create a good community around it.
### Why do I have to register to use Yotter?
@ -147,14 +130,24 @@ These are projects that either make Yotter possible as an **essential part** of
* [Video.js](https://videojs.com/)
* [Invidious](https://github.com/iv-org/invidious)
# [Donate](https://github.com/pluja/pluja/blob/main/SUPPORT.md)
[Click here to see donation options](https://github.com/pluja/pluja/blob/main/SUPPORT.md)
# Donate
This project is completely free and Open Source and will always be.
Donations are used to mantain the [yotter.xyz](https://yotter.xyz/) public instance. [This is the server](https://www.netcup.eu/bestellen/produkt.php?produkt=2598) that I have rented for now.
#### Crypto:
##### Preferred
- **Bitcoin**: `bc1q5y3g907ju0pt40me7dr9fy5uhfhfgfv9k3kh3z`
- **Monero**: `48nQGAXaC6eFK2Wo7SVVyF9xL333gDHjzdmRL3XETEqbU3w4CcKjjHVUZPU4W3dg1oJL8be3iGtUAQsgV88dzbS7QNpZjC2`
##### Others:
- **Ethereum**: `0x6cf0B1C3354c255F1a876f4833A1BBE82D887Ad6`
- **Litecoin**: `MHjnpYHwu4y4AeQvVBDv52T8V6BzVxmiNZ`
- **ZCash**: `t1a6smU9a6dxGfZWcX9bCRzE31qsaF27FsD`
#### Fiat:
- <a href="https://liberapay.com/pluja/donate"><img alt="Donate using Liberapay" src="https://liberapay.com/assets/widgets/donate.svg"></a>
## Screenshots
#### Twitter / Tweets / Profiles
<p align="center"> <img width="720" src="https://i.imgur.com/tA15ciH.png"> </img></p>

View File

@ -370,7 +370,7 @@ def followYoutubeChannel(channelId):
try:
try:
if not current_user.is_following_yt(channelId):
channelData = ytch.get_channel_tab(channelId, tab='about')
channelData = ytch.get_channel_tab_info(channelId, tab='about')
if channelData == False:
return False
follow = youtubeFollow()
@ -429,7 +429,8 @@ def channel(id):
if sort is None:
sort = 3
data = ytch.get_channel_tab(id, page, sort)
data = ytch.get_channel_tab_info(id, page, sort)
for video in data['items']:
if config['isInstance']:
hostName = urllib.parse.urlparse(video['thumbnail'][1:]).netloc

View File

@ -32,14 +32,14 @@
<form action="{{ url_for('ytfollow', channelId=data.channel_id) }}" method="post">
<button type="submit" value="Submit" class="ui red button">
<i class="user icon"></i>
Subscribe
Suscribe
</button>
</form>
{% else %}
<form action="{{ url_for('ytunfollow', channelId=data.channel_id) }}" method="post">
<button type="submit" value="Submit" class="ui red active button">
<i class="user icon"></i>
Unsubscribe
Unsuscribe
</button>
</form>
{%endif%}

View File

@ -41,9 +41,9 @@
<div class="text container ui">
<div class="ui warning message">
<div class="header">
{{config.admin_message_title|safe}}
{{config.admin_message_title}}
</div>
{{config.admin_message|safe}}
{{config.admin_message}}
</div>
</div>
{% endif %}

View File

@ -6,7 +6,7 @@
{{ form.hidden_tag() }}
<p>
{{ form.username.label }}<br>
{{ form.username(size=32, autofocus=true) }}<br>
{{ form.username(size=32) }}<br>
{% for error in form.username.errors %}
<span style="color: red;">[{{ error }}]</span>
{% endfor %}

View File

@ -4,7 +4,7 @@
<div class="ui center aligned text container">
<form action="{{url_for('ytsearch', _method='GET')}}">
<div class="ui search">
<input class="prompt" name="q" type="text" placeholder="Search..." autofocus>
<input class="prompt" name="q" type="text" placeholder="Search...">
<select name="s" id="sort">
<option value="0">Relevance</option>
<option value="3">Views</option>

View File

@ -12,18 +12,6 @@ services:
- mysql:/var/lib/mysql
healthcheck:
test: ["CMD", "mysqladmin", "ping", "--silent"]
nginx:
image: ytorg/nginx:latest
restart: unless-stopped
environment:
HOSTNAME: 'changeme.example.com'
HTTP_PORT: 8080
YOTTER_ADDRESS: 'http://yotter:5000'
YTPROXY_ADDRESS: 'http://unix:/var/run/ytproxy/http-proxy.sock'
ports:
- "127.0.0.1:8080:8080"
volumes:
- "/var/run/ytproxy:/app/socket/"
ytproxy:
image: 1337kavin/ytproxy:latest
restart: unless-stopped
@ -43,10 +31,6 @@ services:
volumes:
- migrations:/usr/src/app/migrations
- ./yotter-config.json:/usr/src/app/yotter-config.json
healthcheck:
test: ["CMD", "wget" ,"--no-verbose", "--tries=1", "--spider", "http://localhost:5000"]
interval: 1m
timeout: 3s
volumes:
mysql:
migrations:

View File

@ -1,12 +0,0 @@
FROM nginx:mainline-alpine
WORKDIR /var/www
COPY ./app/static ./static
COPY ./nginx.conf.tmpl /nginx.conf.tmpl
ENV HOSTNAME= \
HTTP_PORT=80 \
YOTTER_ADDRESS=http://127.0.0.1:5000 \
YTPROXY_ADDRESS=http://unix:/var/run/ytproxy/http-proxy.sock
CMD ["/bin/sh", "-c", "envsubst '${HOSTNAME} ${HTTP_PORT} ${YOTTER_ADDRESS} ${YTPROXY_ADDRESS}' < /nginx.conf.tmpl > /etc/nginx/conf.d/default.conf && nginx -g 'daemon off;'"]

View File

@ -1,10 +0,0 @@
.circleci
.git
.github
.gitignore
cache
Dockerfile
docker-compose.yml
LICENSE
*.md
dockerhash.txt

View File

@ -1,30 +0,0 @@
server {
listen ${HTTP_PORT};
server_name ${HOSTNAME};
access_log off;
location / {
proxy_pass ${YOTTER_ADDRESS};
proxy_set_header Host $host;
proxy_http_version 1.1;
proxy_set_header Connection "";
}
location /static/ {
root /var/www;
sendfile on;
aio threads=default;
}
location ~ (^/videoplayback$|/videoplayback/|/vi/|/a/|/ytc|/vi_webp/|/sb/) {
proxy_pass ${YTPROXY_ADDRESS};
add_header Access-Control-Allow-Origin *;
sendfile on;
tcp_nopush on;
aio_write on;
aio threads=default;
directio 512;
proxy_http_version 1.1;
proxy_set_header Connection "";
}
}

View File

@ -9,15 +9,9 @@ COPY ./requirements.txt /usr/src/app
# Build Dependencies
RUN apt-get update \
&& apt-get install -yq build-essential libssl-dev libffi-dev libxml2-dev libxslt-dev zlib1g-dev curl \
&& apt-get install -yq build-essential libssl-dev libffi-dev libxml2-dev libxslt-dev zlib1g-dev \
&& rm -rf /var/lib/apt/lists/* /etc/apt/sources.list.d/*
# install rust toolchain
RUN curl https://sh.rustup.rs -sSf | \
sh -s -- --default-toolchain stable -y
ENV PATH=/root/.cargo/bin:$PATH
# Python Dependencies
RUN pip install --no-warn-script-location --ignore-installed --no-cache-dir --prefix=/install wheel cryptography gunicorn pymysql
RUN pip install --no-warn-script-location --ignore-installed --no-cache-dir --prefix=/install -r requirements.txt

View File

@ -3,7 +3,7 @@ beautifulsoup4==4.9.3
bleach==3.3.0
cachetools==4.2.0
certifi==2020.12.5
chardet==3.0.4
chardet==4.0.0
click==7.1.2
feedparser==6.0.2
Flask==1.1.2
@ -16,8 +16,8 @@ gevent==20.9.0
greenlet==0.4.17
idna==2.10
itsdangerous==1.1.0
Jinja2==2.11.3
lxml>=4.6.3
Jinja2==2.11.2
lxml==4.6.2
Mako==1.1.3
MarkupSafe==1.1.1
numerize==0.12
@ -34,7 +34,7 @@ six==1.15.0
socks==0
soupsieve==2.0.1
SQLAlchemy==1.3.22
urllib3==1.26.5
urllib3==1.26.2
webencodings==0.5.1
Werkzeug==1.0.1
WTForms==2.3.3

View File

@ -1,7 +1,7 @@
{
"serverName": "yotter.xyz",
"nitterInstance": "https://nitter.mastodont.cat/",
"maxInstanceUsers": 200,
"nitterInstance": "https://nitter.net/",
"maxInstanceUsers": 120,
"serverLocation": "Germany",
"restrictPublicUsage":true,
"isInstance":true,

View File

@ -105,36 +105,25 @@ def channel_ctoken_v1(channel_id, page, sort, tab, view=1):
return base64.urlsafe_b64encode(pointless_nest).decode('ascii')
def get_channel_tab(channel_id, page="1", sort=3, tab='videos', view=1,
ctoken=None, print_status=True):
def get_channel_tab_info(channel_id, page="1", sort=3, tab='videos', view=1, print_status=True):
message = 'Got channel tab' if print_status else None
if not ctoken:
if int(sort) == 2 and int(page) > 1:
ctoken = channel_ctoken_v1(channel_id, page, sort, tab, view)
ctoken = ctoken.replace('=', '%3D')
url = ('https://www.youtube.com/channel/' + channel_id + '/' + tab
+ '?action_continuation=1&continuation=' + ctoken
+ '&pbj=1')
content = util.fetch_url(url, headers_desktop + real_cookie,
debug_name='channel_tab', report_text=message)
else:
ctoken = channel_ctoken_v3(channel_id, page, sort, tab, view)
ctoken = ctoken.replace('=', '%3D')
url = 'https://www.youtube.com/browse_ajax?ctoken=' + ctoken
content = util.fetch_url(url,
headers_desktop + generic_cookie,
debug_name='channel_tab', report_text=message)
# Not sure what the purpose of the key is or whether it will change
# For now it seems to be constant for the API endpoint, not dependent
# on the browsing session or channel
key = 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
url = 'https://www.youtube.com/youtubei/v1/browse?key=' + key
data = {
'context': {
'client': {
'hl': 'en',
'gl': 'US',
'clientName': 'WEB',
'clientVersion': '2.20180830',
},
},
'continuation': ctoken,
}
content_type_header = (('Content-Type', 'application/json'),)
content = util.fetch_url(
url, headers_desktop + content_type_header,
data=json.dumps(data), debug_name='channel_tab', report_text=message)
info = yt_data_extract.extract_channel_info(json.loads(content), tab)
if info['error'] is not None:
return False
@ -185,31 +174,12 @@ def get_number_of_videos_general(base_url):
return get_number_of_videos_channel(get_channel_id(base_url))
def get_channel_search_json(channel_id, query, page):
offset = proto.unpadded_b64encode(proto.uint(3, (page-1)*30))
params = proto.string(2, 'search') + proto.string(15, offset)
params = proto.string(2, 'search') + proto.string(15, str(page))
params = proto.percent_b64encode(params)
ctoken = proto.string(2, channel_id) + proto.string(3, params) + proto.string(11, query)
ctoken = base64.urlsafe_b64encode(proto.nested(80226972, ctoken)).decode('ascii')
key = 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
url = 'https://www.youtube.com/youtubei/v1/browse?key=' + key
data = {
'context': {
'client': {
'hl': 'en',
'gl': 'US',
'clientName': 'WEB',
'clientVersion': '2.20180830',
},
},
'continuation': ctoken,
}
content_type_header = (('Content-Type', 'application/json'),)
polymer_json = util.fetch_url(
url, headers_desktop + content_type_header,
data=json.dumps(data), debug_name='channel_search')
polymer_json = util.fetch_url("https://www.youtube.com/browse_ajax?ctoken=" + ctoken, headers_desktop, debug_name='channel_search')
return polymer_json

View File

@ -120,9 +120,9 @@ def fetch_url_response(url, headers=(), timeout=15, data=None,
if data is not None:
method = "POST"
if isinstance(data, str):
data = data.encode('utf-8')
data = data.encode('ascii')
elif not isinstance(data, bytes):
data = urllib.parse.urlencode(data).encode('utf-8')
data = urllib.parse.urlencode(data).encode('ascii')
if cookiejar_send is not None or cookiejar_receive is not None: # Use urllib
req = urllib.request.Request(url, data=data, headers=headers)
@ -143,7 +143,7 @@ def fetch_url_response(url, headers=(), timeout=15, data=None,
else:
retries = urllib3.Retry(3)
pool = get_pool(use_tor)
response = pool.request(method, url, headers=headers, body=data,
response = pool.request(method, url, headers=headers,
timeout=timeout, preload_content=False,
decode_content=False, retries=retries)
cleanup_func = (lambda r: r.release_conn())
@ -156,7 +156,7 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None,
start_time = time.time()
response, cleanup_func = fetch_url_response(
url, headers, timeout=timeout, data=data,
url, headers, timeout=timeout,
cookiejar_send=cookiejar_send, cookiejar_receive=cookiejar_receive,
use_tor=use_tor)
response_time = time.time()

View File

@ -329,11 +329,6 @@ def extract_item_info(item, additional_info={}):
def extract_response(polymer_json):
'''return response, error'''
# /youtubei/v1/browse endpoint returns response directly
if isinstance(polymer_json, dict) and 'responseContext' in polymer_json:
# this is the response
return polymer_json, None
response = multi_deep_get(polymer_json, [1, 'response'], ['response'])
if response is None:
return None, 'Failed to extract response'

View File

@ -177,6 +177,7 @@ def _extract_watch_info_mobile(top_level):
author_id = deep_get(playlist, 'longBylineText', 'runs', 0,
'navigationEndpoint', 'browseEndpoint', 'browseId')
info['playlist']['author_id'] = author_id
if author_id:
info['playlist']['author_url'] = concat_or_none(
'https://www.youtube.com/channel/', author_id)
info['playlist']['id'] = playlist.get('playlistId')
@ -446,8 +447,7 @@ def _extract_playability_error(info, player_response, error_prefix=''):
SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt')
def extract_watch_info(polymer_json):
info = {'playability_error': None, 'error': None,
'player_response_missing': None}
info = {'playability_error': None, 'error': None}
if isinstance(polymer_json, dict):
top_level = polymer_json
@ -509,10 +509,6 @@ def extract_watch_info(polymer_json):
if not info['formats']:
_extract_formats(info, player_response)
# see https://github.com/user234683/youtube-local/issues/22#issuecomment-706395160
info['player_urls_missing'] = (
not info['formats'] and not embedded_player_response)
# playability errors
_extract_playability_error(info, player_response)
@ -569,84 +565,6 @@ def extract_watch_info(polymer_json):
info['author_url'] = 'https://www.youtube.com/channel/' + info['author_id'] if info['author_id'] else None
return info
single_char_codes = {
'n': '\n',
'\\': '\\',
'"': '"',
"'": "'",
'b': '\b',
'f': '\f',
'n': '\n',
'r': '\r',
't': '\t',
'v': '\x0b',
'0': '\x00',
'\n': '', # backslash followed by literal newline joins lines
}
def js_escape_replace(match):
r'''Resolves javascript string escape sequences such as \x..'''
# some js-strings in the watch page html include them for no reason
# https://mathiasbynens.be/notes/javascript-escapes
escaped_sequence = match.group(1)
if escaped_sequence[0] in ('x', 'u'):
return chr(int(escaped_sequence[1:], base=16))
# In javascript, if it's not one of those escape codes, it's just the
# literal character. e.g., "\a" = "a"
return single_char_codes.get(escaped_sequence, escaped_sequence)
# works but complicated and unsafe:
#PLAYER_RESPONSE_RE = re.compile(r'<script[^>]*?>[^<]*?var ytInitialPlayerResponse = ({(?:"(?:[^"\\]|\\.)*?"|[^"])+?});')
# Because there are sometimes additional statements after the json object
# so we just capture all of those until end of script and tell json decoder
# to ignore extra stuff after the json object
PLAYER_RESPONSE_RE = re.compile(r'<script[^>]*?>[^<]*?var ytInitialPlayerResponse = ({.*?)</script>')
INITIAL_DATA_RE = re.compile(r"<script[^>]*?>var ytInitialData = '(.+?[^\\])';")
BASE_JS_RE = re.compile(r'jsUrl":\s*"([\w\-\./]+?/base.js)"')
JS_STRING_ESCAPE_RE = re.compile(r'\\([^xu]|x..|u....)')
def extract_watch_info_from_html(watch_html):
base_js_match = BASE_JS_RE.search(watch_html)
player_response_match = PLAYER_RESPONSE_RE.search(watch_html)
initial_data_match = INITIAL_DATA_RE.search(watch_html)
if base_js_match is not None:
base_js_url = base_js_match.group(1)
else:
base_js_url = None
if player_response_match is not None:
decoder = json.JSONDecoder()
# this will make it ignore extra stuff after end of object
player_response = decoder.raw_decode(player_response_match.group(1))[0]
else:
return {'error': 'Could not find ytInitialPlayerResponse'}
player_response = None
if initial_data_match is not None:
initial_data = initial_data_match.group(1)
initial_data = JS_STRING_ESCAPE_RE.sub(js_escape_replace, initial_data)
initial_data = json.loads(initial_data)
else:
print('extract_watch_info_from_html: failed to find initialData')
initial_data = None
# imitate old format expected by extract_watch_info
fake_polymer_json = {
'player': {
'args': {},
'assets': {
'js': base_js_url
}
},
'playerResponse': player_response,
'response': initial_data,
}
return extract_watch_info(fake_polymer_json)
def get_caption_url(info, language, format, automatic=False, translation_language=None):
'''Gets the url for captions with the given language and format. If automatic is True, get the automatic captions for that language. If translation_language is given, translate the captions from `language` to `translation_language`. If automatic is true and translation_language is given, the automatic captions will be translated.'''
url = info['_captions_base_url']
@ -662,8 +580,7 @@ def get_caption_url(info, language, format, automatic=False, translation_languag
return url
def update_with_age_restricted_info(info, video_info_page):
'''Inserts urls from 'player_response' in get_video_info page'''
ERROR_PREFIX = 'Error getting missing player or bypassing age-restriction: '
ERROR_PREFIX = 'Error bypassing age-restriction: '
video_info = urllib.parse.parse_qs(video_info_page)
player_response = deep_get(video_info, 'player_response', 0)
@ -686,9 +603,7 @@ def requires_decryption(info):
# adapted from youtube-dl and invidious:
# https://github.com/omarroth/invidious/blob/master/src/invidious/helpers/signatures.cr
decrypt_function_re = re.compile(r'function\(a\)\{(a=a\.split\(""\)[^\}{]+)return a\.join\(""\)\}')
# gives us e.g. rt, .xK, 5 from rt.xK(a,5) or rt, ["xK"], 5 from rt["xK"](a,5)
# (var, operation, argument)
var_op_arg_re = re.compile(r'(\w+)(\.\w+|\["[^"]+"\])\(a,(\d+)\)')
op_with_arg_re = re.compile(r'[^\.]+\.([^\(]+)\(a,(\d+)\)')
def extract_decryption_function(info, base_js):
'''Insert decryption function into info. Return error string if not successful.
Decryption function is a list of list[2] of numbers.
@ -702,11 +617,10 @@ def extract_decryption_function(info, base_js):
if not function_body:
return 'Empty decryption function body'
var_with_operation_match = var_op_arg_re.fullmatch(function_body[0])
if var_with_operation_match is None:
var_name = get(function_body[0].split('.'), 0)
if var_name is None:
return 'Could not find var_name'
var_name = var_with_operation_match.group(1)
var_body_match = re.search(r'var ' + re.escape(var_name) + r'=\{(.*?)\};', base_js, flags=re.DOTALL)
if var_body_match is None:
return 'Could not find var_body'
@ -735,13 +649,13 @@ def extract_decryption_function(info, base_js):
decryption_function = []
for op_with_arg in function_body:
match = var_op_arg_re.fullmatch(op_with_arg)
match = op_with_arg_re.fullmatch(op_with_arg)
if match is None:
return 'Could not parse operation with arg'
op_name = match.group(2).strip('[].')
op_name = match.group(1)
if op_name not in operation_definitions:
return 'Unknown op_name: ' + str(op_name)
op_argument = match.group(3)
return 'Unknown op_name: ' + op_name
op_argument = match.group(2)
decryption_function.append([operation_definitions[op_name], int(op_argument)])
info['decryption_function'] = decryption_function