Merge pull request #261 from TeamNewPipe/yt_new

Update YouTube to material version
This commit is contained in:
Tobias Groza 2020-02-25 21:57:18 +01:00 committed by GitHub
commit 8838e2d136
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 871 additions and 1036 deletions

View File

@ -1,11 +1,11 @@
package org.schabi.newpipe.extractor.services.youtube.extractors; package org.schabi.newpipe.extractor.services.youtube.extractors;
import com.grack.nanojson.JsonArray;
import com.grack.nanojson.JsonObject; import com.grack.nanojson.JsonObject;
import com.grack.nanojson.JsonParser; import com.grack.nanojson.JsonParser;
import com.grack.nanojson.JsonParserException; import com.grack.nanojson.JsonParserException;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document; import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.schabi.newpipe.extractor.StreamingService; import org.schabi.newpipe.extractor.StreamingService;
import org.schabi.newpipe.extractor.channel.ChannelExtractor; import org.schabi.newpipe.extractor.channel.ChannelExtractor;
import org.schabi.newpipe.extractor.downloader.Downloader; import org.schabi.newpipe.extractor.downloader.Downloader;
@ -17,11 +17,18 @@ import org.schabi.newpipe.extractor.localization.TimeAgoParser;
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper; import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper;
import org.schabi.newpipe.extractor.stream.StreamInfoItem; import org.schabi.newpipe.extractor.stream.StreamInfoItem;
import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector; import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector;
import org.schabi.newpipe.extractor.utils.Parser;
import org.schabi.newpipe.extractor.utils.Utils; import org.schabi.newpipe.extractor.utils.Utils;
import javax.annotation.Nonnull;
import java.io.IOException; import java.io.IOException;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import javax.annotation.Nonnull;
import static org.schabi.newpipe.extractor.utils.Utils.HTTP;
import static org.schabi.newpipe.extractor.utils.Utils.HTTPS;
/* /*
* Created by Christian Schabesberger on 25.07.16. * Created by Christian Schabesberger on 25.07.16.
@ -49,6 +56,7 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
private static final String CHANNEL_URL_PARAMETERS = "/videos?view=0&flow=list&sort=dd&live_view=10000"; private static final String CHANNEL_URL_PARAMETERS = "/videos?view=0&flow=list&sort=dd&live_view=10000";
private Document doc; private Document doc;
private JsonObject initialData;
public YoutubeChannelExtractor(StreamingService service, ListLinkHandler linkHandler) { public YoutubeChannelExtractor(StreamingService service, ListLinkHandler linkHandler) {
super(service, linkHandler); super(service, linkHandler);
@ -59,11 +67,13 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
String channelUrl = super.getUrl() + CHANNEL_URL_PARAMETERS; String channelUrl = super.getUrl() + CHANNEL_URL_PARAMETERS;
final Response response = downloader.get(channelUrl, getExtractorLocalization()); final Response response = downloader.get(channelUrl, getExtractorLocalization());
doc = YoutubeParsingHelper.parseAndCheckPage(channelUrl, response); doc = YoutubeParsingHelper.parseAndCheckPage(channelUrl, response);
initialData = YoutubeParsingHelper.getInitialData(response.responseBody());
} }
@Override @Override
public String getNextPageUrl() throws ExtractionException { public String getNextPageUrl() throws ExtractionException {
return getNextPageUrlFrom(doc); return getNextPageUrlFrom(getVideoTab().getObject("content").getObject("sectionListRenderer").getArray("continuations"));
} }
@Nonnull @Nonnull
@ -80,15 +90,7 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
@Override @Override
public String getId() throws ParsingException { public String getId() throws ParsingException {
try { try {
return doc.select("meta[itemprop=\"channelId\"]").first().attr("content"); return initialData.getObject("header").getObject("c4TabbedHeaderRenderer").getString("channelId");
} catch (Exception ignored) {}
// fallback method; does not work with channels that have no "Subscribe" button (e.g. EminemVEVO)
try {
Element element = doc.getElementsByClass("yt-uix-subscription-button").first();
if (element == null) element = doc.getElementsByClass("yt-uix-subscription-preferences-button").first();
return element.attr("data-channel-external-id");
} catch (Exception e) { } catch (Exception e) {
throw new ParsingException("Could not get channel id", e); throw new ParsingException("Could not get channel id", e);
} }
@ -98,7 +100,7 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
@Override @Override
public String getName() throws ParsingException { public String getName() throws ParsingException {
try { try {
return doc.select("meta[property=\"og:title\"]").first().attr("content"); return initialData.getObject("header").getObject("c4TabbedHeaderRenderer").getString("title");
} catch (Exception e) { } catch (Exception e) {
throw new ParsingException("Could not get channel name", e); throw new ParsingException("Could not get channel name", e);
} }
@ -107,7 +109,8 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
@Override @Override
public String getAvatarUrl() throws ParsingException { public String getAvatarUrl() throws ParsingException {
try { try {
return doc.select("img[class=\"channel-header-profile-image\"]").first().attr("abs:src"); return initialData.getObject("header").getObject("c4TabbedHeaderRenderer").getObject("avatar")
.getArray("thumbnails").getObject(0).getString("url");
} catch (Exception e) { } catch (Exception e) {
throw new ParsingException("Could not get avatar", e); throw new ParsingException("Could not get avatar", e);
} }
@ -116,13 +119,27 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
@Override @Override
public String getBannerUrl() throws ParsingException { public String getBannerUrl() throws ParsingException {
try { try {
Element el = doc.select("div[id=\"gh-banner\"]").first().select("style").first(); String url = null;
String cssContent = el.html(); try {
String url = "https:" + Parser.matchGroup1("url\\(([^)]+)\\)", cssContent); url = initialData.getObject("header").getObject("c4TabbedHeaderRenderer").getObject("banner")
.getArray("thumbnails").getObject(0).getString("url");
} catch (Exception ignored) {}
if (url == null || url.contains("s.ytimg.com") || url.contains("default_banner")) {
return null;
}
// the first characters of the banner URLs are different for each channel and some are not even valid URLs
if (url.startsWith("//")) {
url = url.substring(2);
}
if (url.startsWith(HTTP)) {
url = Utils.replaceHttpWithHttps(url);
} else if (!url.startsWith(HTTPS)) {
url = HTTPS + url;
}
return url.contains("s.ytimg.com") || url.contains("default_banner") ? null : url; return url;
} catch (Exception e) { } catch (Exception e) {
throw new ParsingException("Could not get Banner", e); throw new ParsingException("Could not get banner", e);
} }
} }
@ -137,12 +154,10 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
@Override @Override
public long getSubscriberCount() throws ParsingException { public long getSubscriberCount() throws ParsingException {
final JsonObject subscriberInfo = initialData.getObject("header").getObject("c4TabbedHeaderRenderer").getObject("subscriberCountText");
final Element el = doc.select("span[class*=\"yt-subscription-button-subscriber-count\"]").first(); if (subscriberInfo != null) {
if (el != null) {
String elTitle = el.attr("title");
try { try {
return Utils.mixedNumberWordToLong(elTitle); return Utils.mixedNumberWordToLong(subscriberInfo.getArray("runs").getObject(0).getString("text"));
} catch (NumberFormatException e) { } catch (NumberFormatException e) {
throw new ParsingException("Could not get subscriber count", e); throw new ParsingException("Could not get subscriber count", e);
} }
@ -155,7 +170,7 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
@Override @Override
public String getDescription() throws ParsingException { public String getDescription() throws ParsingException {
try { try {
return doc.select("meta[name=\"description\"]").first().attr("content"); return initialData.getObject("metadata").getObject("channelMetadataRenderer").getString("description");
} catch (Exception e) { } catch (Exception e) {
throw new ParsingException("Could not get channel description", e); throw new ParsingException("Could not get channel description", e);
} }
@ -165,8 +180,10 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
@Override @Override
public InfoItemsPage<StreamInfoItem> getInitialPage() throws ExtractionException { public InfoItemsPage<StreamInfoItem> getInitialPage() throws ExtractionException {
StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId()); StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId());
Element ul = doc.select("ul[id=\"browse-items-primary\"]").first();
collectStreamsFrom(collector, ul); JsonArray videos = getVideoTab().getObject("content").getObject("sectionListRenderer").getArray("contents");
collectStreamsFrom(collector, videos);
return new InfoItemsPage<>(collector, getNextPageUrl()); return new InfoItemsPage<>(collector, getNextPageUrl());
} }
@ -181,106 +198,98 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
fetchPage(); fetchPage();
StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId()); StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId());
JsonObject ajaxJson; JsonArray ajaxJson;
Map<String, List<String>> headers = new HashMap<>();
headers.put("X-YouTube-Client-Name", Collections.singletonList("1"));
try { try {
final String response = getDownloader().get(pageUrl, getExtractorLocalization()).responseBody(); // Use the hardcoded client version first to get JSON with a structure we know
ajaxJson = JsonParser.object().from(response); headers.put("X-YouTube-Client-Version",
} catch (JsonParserException pe) { Collections.singletonList(YoutubeParsingHelper.HARDCODED_CLIENT_VERSION));
throw new ParsingException("Could not parse json data for next streams", pe); final String response = getDownloader().get(pageUrl, headers, getExtractorLocalization()).responseBody();
if (response.length() < 50) { // ensure to have a valid response
throw new ParsingException("Could not parse json data for next streams");
}
ajaxJson = JsonParser.array().from(response);
} catch (Exception e) {
try {
headers.put("X-YouTube-Client-Version",
Collections.singletonList(YoutubeParsingHelper.getClientVersion(initialData, doc.toString())));
final String response = getDownloader().get(pageUrl, headers, getExtractorLocalization()).responseBody();
if (response.length() < 50) { // ensure to have a valid response
throw new ParsingException("Could not parse json data for next streams");
}
ajaxJson = JsonParser.array().from(response);
} catch (JsonParserException ignored) {
throw new ParsingException("Could not parse json data for next streams", e);
}
} }
final Document ajaxHtml = Jsoup.parse(ajaxJson.getString("content_html"), pageUrl); JsonObject sectionListContinuation = ajaxJson.getObject(1).getObject("response")
collectStreamsFrom(collector, ajaxHtml.select("body").first()); .getObject("continuationContents").getObject("sectionListContinuation");
return new InfoItemsPage<>(collector, getNextPageUrlFromAjaxPage(ajaxJson, pageUrl)); collectStreamsFrom(collector, sectionListContinuation.getArray("contents"));
return new InfoItemsPage<>(collector, getNextPageUrlFrom(sectionListContinuation.getArray("continuations")));
} }
private String getNextPageUrlFromAjaxPage(final JsonObject ajaxJson, final String pageUrl)
throws ParsingException { private String getNextPageUrlFrom(JsonArray continuations) {
String loadMoreHtmlDataRaw = ajaxJson.getString("load_more_widget_html"); if (continuations == null) {
if (!loadMoreHtmlDataRaw.isEmpty()) {
return getNextPageUrlFrom(Jsoup.parse(loadMoreHtmlDataRaw, pageUrl));
} else {
return ""; return "";
} }
JsonObject nextContinuationData = continuations.getObject(0).getObject("nextContinuationData");
String continuation = nextContinuationData.getString("continuation");
String clickTrackingParams = nextContinuationData.getString("clickTrackingParams");
return "https://www.youtube.com/browse_ajax?ctoken=" + continuation + "&continuation=" + continuation
+ "&itct=" + clickTrackingParams;
} }
private String getNextPageUrlFrom(Document d) throws ParsingException { private void collectStreamsFrom(StreamInfoItemsCollector collector, JsonArray videos) throws ParsingException {
try {
Element button = d.select("button[class*=\"yt-uix-load-more\"]").first();
if (button != null) {
return button.attr("abs:data-uix-load-more-href");
} else {
// Sometimes channels are simply so small, they don't have a more streams/videos
return "";
}
} catch (Exception e) {
throw new ParsingException("Could not get next page url", e);
}
}
private void collectStreamsFrom(StreamInfoItemsCollector collector, Element element) throws ParsingException {
collector.reset(); collector.reset();
final String uploaderName = getName(); final String uploaderName = getName();
final String uploaderUrl = getUrl(); final String uploaderUrl = getUrl();
final TimeAgoParser timeAgoParser = getTimeAgoParser(); final TimeAgoParser timeAgoParser = getTimeAgoParser();
for (final Element li : element.children()) { for (Object video : videos) {
if (li.select("div[class=\"feed-item-dismissable\"]").first() != null) { JsonObject videoInfo = ((JsonObject) video).getObject("itemSectionRenderer")
collector.commit(new YoutubeStreamInfoItemExtractor(li, timeAgoParser) { .getArray("contents").getObject(0);
if (videoInfo.getObject("videoRenderer") != null) {
collector.commit(new YoutubeStreamInfoItemExtractor(videoInfo.getObject("videoRenderer"), timeAgoParser) {
@Override @Override
public String getUrl() throws ParsingException { public String getUploaderName() {
try {
Element el = li.select("div[class=\"feed-item-dismissable\"]").first();
Element dl = el.select("h3").first().select("a").first();
return dl.attr("abs:href");
} catch (Exception e) {
throw new ParsingException("Could not get web page url for the video", e);
}
}
@Override
public String getName() throws ParsingException {
try {
Element el = li.select("div[class=\"feed-item-dismissable\"]").first();
Element dl = el.select("h3").first().select("a").first();
return dl.text();
} catch (Exception e) {
throw new ParsingException("Could not get title", e);
}
}
@Override
public String getUploaderName() throws ParsingException {
return uploaderName; return uploaderName;
} }
@Override @Override
public String getUploaderUrl() throws ParsingException { public String getUploaderUrl() {
return uploaderUrl; return uploaderUrl;
} }
@Override
public String getThumbnailUrl() throws ParsingException {
try {
String url;
Element te = li.select("span[class=\"yt-thumb-clip\"]").first()
.select("img").first();
url = te.attr("abs:src");
// Sometimes youtube sends links to gif files which somehow seem to not exist
// anymore. Items with such gif also offer a secondary image source. So we are going
// to use that if we've caught such an item.
if (url.contains(".gif")) {
url = te.attr("abs:data-thumb");
}
return url;
} catch (Exception e) {
throw new ParsingException("Could not get thumbnail url", e);
}
}
}); });
} }
} }
} }
private JsonObject getVideoTab() throws ParsingException {
JsonArray tabs = initialData.getObject("contents").getObject("twoColumnBrowseResultsRenderer")
.getArray("tabs");
JsonObject videoTab = null;
for (Object tab : tabs) {
if (((JsonObject) tab).getObject("tabRenderer") != null) {
if (((JsonObject) tab).getObject("tabRenderer").getString("title").equals("Videos")) {
videoTab = ((JsonObject) tab).getObject("tabRenderer");
break;
}
}
}
if (videoTab == null) {
throw new ParsingException("Could not find Videos tab");
}
return videoTab;
}
} }

View File

@ -1,12 +1,14 @@
package org.schabi.newpipe.extractor.services.youtube.extractors; package org.schabi.newpipe.extractor.services.youtube.extractors;
import org.jsoup.nodes.Element; import com.grack.nanojson.JsonObject;
import org.schabi.newpipe.extractor.channel.ChannelInfoItemExtractor; import org.schabi.newpipe.extractor.channel.ChannelInfoItemExtractor;
import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeChannelLinkHandlerFactory;
import org.schabi.newpipe.extractor.utils.Utils; import org.schabi.newpipe.extractor.utils.Utils;
import java.util.regex.Matcher; import static org.schabi.newpipe.extractor.utils.Utils.HTTP;
import java.util.regex.Pattern; import static org.schabi.newpipe.extractor.utils.Utils.HTTPS;
/* /*
* Created by Christian Schabesberger on 12.02.17. * Created by Christian Schabesberger on 12.02.17.
@ -29,87 +31,75 @@ import java.util.regex.Pattern;
*/ */
public class YoutubeChannelInfoItemExtractor implements ChannelInfoItemExtractor { public class YoutubeChannelInfoItemExtractor implements ChannelInfoItemExtractor {
private final Element el; private JsonObject channelInfoItem;
public YoutubeChannelInfoItemExtractor(Element el) { public YoutubeChannelInfoItemExtractor(JsonObject channelInfoItem) {
this.el = el; this.channelInfoItem = channelInfoItem;
} }
@Override @Override
public String getThumbnailUrl() throws ParsingException { public String getThumbnailUrl() throws ParsingException {
Element img = el.select("span[class*=\"yt-thumb-simple\"]").first() try {
.select("img").first(); String url = channelInfoItem.getObject("thumbnail").getArray("thumbnails").getObject(0).getString("url");
if (url.startsWith("//")) {
String url = img.attr("abs:src"); url = url.substring(2);
}
if (url.contains("gif")) { if (url.startsWith(HTTP)) {
url = img.attr("abs:data-thumb"); url = Utils.replaceHttpWithHttps(url);
} else if (!url.startsWith(HTTPS)) {
url = HTTPS + url;
}
return url;
} catch (Exception e) {
throw new ParsingException("Could not get thumbnail url", e);
} }
return url;
} }
@Override @Override
public String getName() throws ParsingException { public String getName() throws ParsingException {
return el.select("a[class*=\"yt-uix-tile-link\"]").first() try {
.text(); return channelInfoItem.getObject("title").getString("simpleText");
} catch (Exception e) {
throw new ParsingException("Could not get name", e);
}
} }
@Override @Override
public String getUrl() throws ParsingException { public String getUrl() throws ParsingException {
try { try {
String buttonTrackingUrl = el.select("button[class*=\"yt-uix-button\"]").first() String id = "channel/" + channelInfoItem.getString("channelId"); // Does prepending 'channel/' always work?
.attr("abs:data-href"); return YoutubeChannelLinkHandlerFactory.getInstance().getUrl(id);
Pattern channelIdPattern = Pattern.compile("(?:.*?)\\%252Fchannel\\%252F([A-Za-z0-9\\-\\_]+)(?:.*)");
Matcher match = channelIdPattern.matcher(buttonTrackingUrl);
if (match.matches()) {
return YoutubeChannelExtractor.CHANNEL_URL_BASE + match.group(1);
}
} catch(Exception ignored) {}
// fallback method for channels without "Subscribe" button (or just in case yt changes things)
// provides an url with "/user/NAME", inconsistent with stream and channel extractor: tests will fail
try {
return el.select("a[class*=\"yt-uix-tile-link\"]").first()
.attr("abs:href");
} catch (Exception e) { } catch (Exception e) {
throw new ParsingException("Could not get channel url", e); throw new ParsingException("Could not get url", e);
} }
} }
@Override @Override
public long getSubscriberCount() throws ParsingException { public long getSubscriberCount() throws ParsingException {
final Element subsEl = el.select("span[class*=\"yt-subscriber-count\"]").first(); try {
if (subsEl != null) { String subscribers = channelInfoItem.getObject("subscriberCountText").getString("simpleText").split(" ")[0];
try { return Utils.mixedNumberWordToLong(subscribers);
return Long.parseLong(Utils.removeNonDigitCharacters(subsEl.text())); } catch (Exception e) {
} catch (NumberFormatException e) { throw new ParsingException("Could not get subscriber count", e);
throw new ParsingException("Could not get subscriber count", e);
}
} else {
// If the element is null, the channel have the subscriber count disabled
return -1;
} }
} }
@Override @Override
public long getStreamCount() throws ParsingException { public long getStreamCount() throws ParsingException {
Element metaEl = el.select("ul[class*=\"yt-lockup-meta-info\"]").first(); try {
if (metaEl == null) { return Long.parseLong(Utils.removeNonDigitCharacters(channelInfoItem.getObject("videoCountText")
return 0; .getArray("runs").getObject(0).getString("text")));
} else { } catch (Exception e) {
return Long.parseLong(Utils.removeNonDigitCharacters(metaEl.text())); throw new ParsingException("Could not get stream count", e);
} }
} }
@Override @Override
public String getDescription() throws ParsingException { public String getDescription() throws ParsingException {
Element desEl = el.select("div[class*=\"yt-lockup-description\"]").first(); try {
if (desEl == null) { return channelInfoItem.getObject("descriptionSnippet").getArray("runs").getObject(0).getString("text");
return ""; } catch (Exception e) {
} else { throw new ParsingException("Could not get description", e);
return desEl.text();
} }
} }
} }

View File

@ -1,34 +1,39 @@
package org.schabi.newpipe.extractor.services.youtube.extractors; package org.schabi.newpipe.extractor.services.youtube.extractors;
import com.grack.nanojson.JsonArray;
import com.grack.nanojson.JsonObject; import com.grack.nanojson.JsonObject;
import com.grack.nanojson.JsonParser; import com.grack.nanojson.JsonParser;
import com.grack.nanojson.JsonParserException; import com.grack.nanojson.JsonParserException;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document; import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.schabi.newpipe.extractor.StreamingService; import org.schabi.newpipe.extractor.StreamingService;
import org.schabi.newpipe.extractor.downloader.Downloader; import org.schabi.newpipe.extractor.downloader.Downloader;
import org.schabi.newpipe.extractor.downloader.Response; import org.schabi.newpipe.extractor.downloader.Response;
import org.schabi.newpipe.extractor.exceptions.ExtractionException; import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.linkhandler.LinkHandlerFactory;
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler; import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler;
import org.schabi.newpipe.extractor.localization.TimeAgoParser; import org.schabi.newpipe.extractor.localization.TimeAgoParser;
import org.schabi.newpipe.extractor.playlist.PlaylistExtractor; import org.schabi.newpipe.extractor.playlist.PlaylistExtractor;
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper; import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper;
import org.schabi.newpipe.extractor.stream.StreamInfoItem; import org.schabi.newpipe.extractor.stream.StreamInfoItem;
import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector; import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector;
import org.schabi.newpipe.extractor.stream.StreamType;
import org.schabi.newpipe.extractor.utils.Utils; import org.schabi.newpipe.extractor.utils.Utils;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import java.io.IOException; import java.io.IOException;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import javax.annotation.Nonnull;
@SuppressWarnings("WeakerAccess") @SuppressWarnings("WeakerAccess")
public class YoutubePlaylistExtractor extends PlaylistExtractor { public class YoutubePlaylistExtractor extends PlaylistExtractor {
private Document doc; private Document doc;
private JsonObject initialData;
private JsonObject uploaderInfo;
private JsonObject playlistInfo;
public YoutubePlaylistExtractor(StreamingService service, ListLinkHandler linkHandler) { public YoutubePlaylistExtractor(StreamingService service, ListLinkHandler linkHandler) {
super(service, linkHandler); super(service, linkHandler);
@ -39,18 +44,61 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor {
final String url = getUrl(); final String url = getUrl();
final Response response = downloader.get(url, getExtractorLocalization()); final Response response = downloader.get(url, getExtractorLocalization());
doc = YoutubeParsingHelper.parseAndCheckPage(url, response); doc = YoutubeParsingHelper.parseAndCheckPage(url, response);
initialData = YoutubeParsingHelper.getInitialData(response.responseBody());
uploaderInfo = getUploaderInfo();
playlistInfo = getPlaylistInfo();
}
private JsonObject getUploaderInfo() throws ParsingException {
JsonArray items = initialData.getObject("sidebar").getObject("playlistSidebarRenderer").getArray("items");
try {
JsonObject uploaderInfo = items.getObject(1).getObject("playlistSidebarSecondaryInfoRenderer")
.getObject("videoOwner").getObject("videoOwnerRenderer");
if (uploaderInfo != null) {
return uploaderInfo;
}
} catch (Exception ignored) {}
// we might want to create a loop here instead of using duplicated code
try {
JsonObject uploaderInfo = items.getObject(items.size()).getObject("playlistSidebarSecondaryInfoRenderer")
.getObject("videoOwner").getObject("videoOwnerRenderer");
if (uploaderInfo != null) {
return uploaderInfo;
}
} catch (Exception e) {
throw new ParsingException("Could not get uploader info", e);
}
throw new ParsingException("Could not get uploader info");
}
private JsonObject getPlaylistInfo() throws ParsingException {
try {
return initialData.getObject("sidebar").getObject("playlistSidebarRenderer").getArray("items")
.getObject(0).getObject("playlistSidebarPrimaryInfoRenderer");
} catch (Exception e) {
throw new ParsingException("Could not get PlaylistInfo", e);
}
} }
@Override @Override
public String getNextPageUrl() throws ExtractionException { public String getNextPageUrl() {
return getNextPageUrlFrom(doc); return getNextPageUrlFrom(initialData.getObject("contents").getObject("twoColumnBrowseResultsRenderer")
.getArray("tabs").getObject(0).getObject("tabRenderer").getObject("content")
.getObject("sectionListRenderer").getArray("contents").getObject(0)
.getObject("itemSectionRenderer").getArray("contents").getObject(0)
.getObject("playlistVideoListRenderer").getArray("continuations"));
} }
@Nonnull @Nonnull
@Override @Override
public String getName() throws ParsingException { public String getName() throws ParsingException {
try { try {
return doc.select("div[id=pl-header] h1[class=pl-header-title]").first().text(); String name = playlistInfo.getObject("title").getArray("runs").getObject(0).getString("text");
if (name != null) return name;
} catch (Exception ignored) {}
try {
return initialData.getObject("microformat").getObject("microformatDataRenderer").getString("title");
} catch (Exception e) { } catch (Exception e) {
throw new ParsingException("Could not get playlist name", e); throw new ParsingException("Could not get playlist name", e);
} }
@ -59,7 +107,12 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor {
@Override @Override
public String getThumbnailUrl() throws ParsingException { public String getThumbnailUrl() throws ParsingException {
try { try {
return doc.select("div[id=pl-header] div[class=pl-header-thumb] img").first().attr("abs:src"); return playlistInfo.getObject("thumbnailRenderer").getObject("playlistVideoThumbnailRenderer")
.getObject("thumbnail").getArray("thumbnails").getObject(0).getString("url");
} catch (Exception ignored) {}
try {
return initialData.getObject("microformat").getObject("microformatDataRenderer").getObject("thumbnail")
.getArray("thumbnails").getObject(0).getString("url");
} catch (Exception e) { } catch (Exception e) {
throw new ParsingException("Could not get playlist thumbnail", e); throw new ParsingException("Could not get playlist thumbnail", e);
} }
@ -75,8 +128,7 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor {
public String getUploaderUrl() throws ParsingException { public String getUploaderUrl() throws ParsingException {
try { try {
return YoutubeChannelExtractor.CHANNEL_URL_BASE + return YoutubeChannelExtractor.CHANNEL_URL_BASE +
doc.select("button[class*=\"yt-uix-subscription-button\"]") uploaderInfo.getObject("navigationEndpoint").getObject("browseEndpoint").getString("browseId");
.first().attr("data-channel-external-id");
} catch (Exception e) { } catch (Exception e) {
throw new ParsingException("Could not get playlist uploader url", e); throw new ParsingException("Could not get playlist uploader url", e);
} }
@ -85,7 +137,7 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor {
@Override @Override
public String getUploaderName() throws ParsingException { public String getUploaderName() throws ParsingException {
try { try {
return doc.select("span[class=\"qualified-channel-title-text\"]").first().select("a").first().text(); return uploaderInfo.getObject("title").getArray("runs").getObject(0).getString("text");
} catch (Exception e) { } catch (Exception e) {
throw new ParsingException("Could not get playlist uploader name", e); throw new ParsingException("Could not get playlist uploader name", e);
} }
@ -94,7 +146,7 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor {
@Override @Override
public String getUploaderAvatarUrl() throws ParsingException { public String getUploaderAvatarUrl() throws ParsingException {
try { try {
return doc.select("div[id=gh-banner] img[class=channel-header-profile-image]").first().attr("abs:src"); return uploaderInfo.getObject("thumbnail").getArray("thumbnails").getObject(0).getString("url");
} catch (Exception e) { } catch (Exception e) {
throw new ParsingException("Could not get playlist uploader avatar", e); throw new ParsingException("Could not get playlist uploader avatar", e);
} }
@ -102,33 +154,26 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor {
@Override @Override
public long getStreamCount() throws ParsingException { public long getStreamCount() throws ParsingException {
String input;
try { try {
input = doc.select("ul[class=\"pl-header-details\"] li").get(1).text(); String viewsText = getPlaylistInfo().getArray("stats").getObject(0).getArray("runs").getObject(0).getString("text");
} catch (IndexOutOfBoundsException e) { return Long.parseLong(Utils.removeNonDigitCharacters(viewsText));
} catch (Exception e) {
throw new ParsingException("Could not get video count from playlist", e); throw new ParsingException("Could not get video count from playlist", e);
} }
try {
return Long.parseLong(Utils.removeNonDigitCharacters(input));
} catch (NumberFormatException e) {
// When there's no videos in a playlist, there's no number in the "innerHtml",
// all characters that is not a number is removed, so we try to parse a empty string
if (!input.isEmpty()) {
return 0;
} else {
throw new ParsingException("Could not handle input: " + input, e);
}
}
} }
@Nonnull @Nonnull
@Override @Override
public InfoItemsPage<StreamInfoItem> getInitialPage() throws ExtractionException { public InfoItemsPage<StreamInfoItem> getInitialPage() {
StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId()); StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId());
Element tbody = doc.select("tbody[id=\"pl-load-more-destination\"]").first();
collectStreamsFrom(collector, tbody); JsonArray videos = initialData.getObject("contents").getObject("twoColumnBrowseResultsRenderer")
.getArray("tabs").getObject(0).getObject("tabRenderer").getObject("content")
.getObject("sectionListRenderer").getArray("contents").getObject(0)
.getObject("itemSectionRenderer").getArray("contents").getObject(0)
.getObject("playlistVideoListRenderer").getArray("contents");
collectStreamsFrom(collector, videos);
return new InfoItemsPage<>(collector, getNextPageUrl()); return new InfoItemsPage<>(collector, getNextPageUrl());
} }
@ -139,156 +184,67 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor {
} }
StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId()); StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId());
JsonObject pageJson; JsonArray ajaxJson;
Map<String, List<String>> headers = new HashMap<>();
headers.put("X-YouTube-Client-Name", Collections.singletonList("1"));
try { try {
final String responseBody = getDownloader().get(pageUrl, getExtractorLocalization()).responseBody(); // Use the hardcoded client version first to get JSON with a structure we know
pageJson = JsonParser.object().from(responseBody); headers.put("X-YouTube-Client-Version",
} catch (JsonParserException pe) { Collections.singletonList(YoutubeParsingHelper.HARDCODED_CLIENT_VERSION));
throw new ParsingException("Could not parse ajax json", pe); final String response = getDownloader().get(pageUrl, headers, getExtractorLocalization()).responseBody();
if (response.length() < 50) { // ensure to have a valid response
throw new ParsingException("Could not parse json data for next streams");
}
ajaxJson = JsonParser.array().from(response);
} catch (Exception e) {
try {
headers.put("X-YouTube-Client-Version",
Collections.singletonList(YoutubeParsingHelper.getClientVersion(initialData, doc.toString())));
final String response = getDownloader().get(pageUrl, headers, getExtractorLocalization()).responseBody();
if (response.length() < 50) { // ensure to have a valid response
throw new ParsingException("Could not parse json data for next streams");
}
ajaxJson = JsonParser.array().from(response);
} catch (JsonParserException ignored) {
throw new ParsingException("Could not parse json data for next streams", e);
}
} }
final Document pageHtml = Jsoup.parse("<table><tbody id=\"pl-load-more-destination\">" JsonObject sectionListContinuation = ajaxJson.getObject(1).getObject("response")
+ pageJson.getString("content_html") .getObject("continuationContents").getObject("playlistVideoListContinuation");
+ "</tbody></table>", pageUrl);
collectStreamsFrom(collector, pageHtml.select("tbody[id=\"pl-load-more-destination\"]").first()); collectStreamsFrom(collector, sectionListContinuation.getArray("contents"));
return new InfoItemsPage<>(collector, getNextPageUrlFromAjax(pageJson, pageUrl)); return new InfoItemsPage<>(collector, getNextPageUrlFrom(sectionListContinuation.getArray("continuations")));
} }
private String getNextPageUrlFromAjax(final JsonObject pageJson, final String pageUrl) private String getNextPageUrlFrom(JsonArray continuations) {
throws ParsingException { if (continuations == null) {
String nextPageHtml = pageJson.getString("load_more_widget_html");
if (!nextPageHtml.isEmpty()) {
return getNextPageUrlFrom(Jsoup.parse(nextPageHtml, pageUrl));
} else {
return ""; return "";
} }
JsonObject nextContinuationData = continuations.getObject(0).getObject("nextContinuationData");
String continuation = nextContinuationData.getString("continuation");
String clickTrackingParams = nextContinuationData.getString("clickTrackingParams");
return "https://www.youtube.com/browse_ajax?ctoken=" + continuation + "&continuation=" + continuation
+ "&itct=" + clickTrackingParams;
} }
private String getNextPageUrlFrom(Document d) throws ParsingException { private void collectStreamsFrom(StreamInfoItemsCollector collector, JsonArray videos) {
try {
Element button = d.select("button[class*=\"yt-uix-load-more\"]").first();
if (button != null) {
return button.attr("abs:data-uix-load-more-href");
} else {
// Sometimes playlists are simply so small, they don't have a more streams/videos
return "";
}
} catch (Exception e) {
throw new ParsingException("could not get next streams' url", e);
}
}
private void collectStreamsFrom(@Nonnull StreamInfoItemsCollector collector, @Nullable Element element) {
collector.reset(); collector.reset();
if (element == null) {
return;
}
final LinkHandlerFactory streamLinkHandlerFactory = getService().getStreamLHFactory();
final TimeAgoParser timeAgoParser = getTimeAgoParser(); final TimeAgoParser timeAgoParser = getTimeAgoParser();
for (final Element li : element.children()) { for (Object video : videos) {
if (isDeletedItem(li)) { if (((JsonObject) video).getObject("playlistVideoRenderer") != null) {
continue; collector.commit(new YoutubeStreamInfoItemExtractor(((JsonObject) video).getObject("playlistVideoRenderer"), timeAgoParser) {
@Override
public long getViewCount() {
return -1;
}
});
} }
collector.commit(new YoutubeStreamInfoItemExtractor(li, timeAgoParser) {
public Element uploaderLink;
@Override
public boolean isAd() {
return false;
}
@Override
public String getUrl() throws ParsingException {
try {
return streamLinkHandlerFactory.fromId(li.attr("data-video-id")).getUrl();
} catch (Exception e) {
throw new ParsingException("Could not get web page url for the video", e);
}
}
@Override
public String getName() throws ParsingException {
try {
return li.attr("data-title");
} catch (Exception e) {
throw new ParsingException("Could not get title", e);
}
}
@Override
public long getDuration() throws ParsingException {
try {
if (getStreamType() == StreamType.LIVE_STREAM) return -1;
Element first = li.select("div[class=\"timestamp\"] span").first();
if (first == null) {
// Video unavailable (private, deleted, etc.), this is a thing that happens specifically with playlists,
// because in other cases, those videos don't even show up
return -1;
}
return YoutubeParsingHelper.parseDurationString(first.text());
} catch (Exception e) {
throw new ParsingException("Could not get duration" + getUrl(), e);
}
}
private Element getUploaderLink() {
// should always be present since we filter deleted items
if (uploaderLink == null) {
uploaderLink = li.select("div[class=pl-video-owner] a").first();
}
return uploaderLink;
}
@Override
public String getUploaderName() throws ParsingException {
return getUploaderLink().text();
}
@Override
public String getUploaderUrl() throws ParsingException {
// this url is not always in the form "/channel/..."
// sometimes Youtube provides urls in the from "/user/..."
return getUploaderLink().attr("abs:href");
}
@Override
public String getTextualUploadDate() throws ParsingException {
return "";
}
@Override
public long getViewCount() throws ParsingException {
return -1;
}
@Override
public String getThumbnailUrl() throws ParsingException {
try {
return "https://i.ytimg.com/vi/" + streamLinkHandlerFactory.fromUrl(getUrl()).getId() + "/hqdefault.jpg";
} catch (Exception e) {
throw new ParsingException("Could not get thumbnail url", e);
}
}
});
} }
} }
/**
* Check if the playlist item is deleted
*
* @param li the list item
* @return true if the item is deleted
*/
private boolean isDeletedItem(Element li) {
return li.select("div[class=pl-video-owner] a").isEmpty();
}
} }

View File

@ -1,97 +1,63 @@
package org.schabi.newpipe.extractor.services.youtube.extractors; package org.schabi.newpipe.extractor.services.youtube.extractors;
import org.jsoup.nodes.Element; import com.grack.nanojson.JsonObject;
import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.playlist.PlaylistInfoItemExtractor; import org.schabi.newpipe.extractor.playlist.PlaylistInfoItemExtractor;
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubePlaylistLinkHandlerFactory;
import org.schabi.newpipe.extractor.utils.Utils; import org.schabi.newpipe.extractor.utils.Utils;
public class YoutubePlaylistInfoItemExtractor implements PlaylistInfoItemExtractor { public class YoutubePlaylistInfoItemExtractor implements PlaylistInfoItemExtractor {
private final Element el; private JsonObject playlistInfoItem;
public YoutubePlaylistInfoItemExtractor(Element el) { public YoutubePlaylistInfoItemExtractor(JsonObject playlistInfoItem) {
this.el = el; this.playlistInfoItem = playlistInfoItem;
} }
@Override @Override
public String getThumbnailUrl() throws ParsingException { public String getThumbnailUrl() throws ParsingException {
String url;
try { try {
Element te = el.select("div[class=\"yt-thumb video-thumb\"]").first() return playlistInfoItem.getArray("thumbnails").getObject(0).getArray("thumbnails")
.select("img").first(); .getObject(0).getString("url");
url = te.attr("abs:src");
if (url.contains(".gif")) {
url = te.attr("abs:data-thumb");
}
} catch (Exception e) { } catch (Exception e) {
throw new ParsingException("Failed to extract playlist thumbnail url", e); throw new ParsingException("Could not get thumbnail url", e);
} }
return url;
} }
@Override @Override
public String getName() throws ParsingException { public String getName() throws ParsingException {
String name;
try { try {
final Element title = el.select("[class=\"yt-lockup-title\"]").first() return playlistInfoItem.getObject("title").getString("simpleText");
.select("a").first();
name = title == null ? "" : title.text();
} catch (Exception e) { } catch (Exception e) {
throw new ParsingException("Failed to extract playlist name", e); throw new ParsingException("Could not get name", e);
} }
return name;
} }
@Override @Override
public String getUrl() throws ParsingException { public String getUrl() throws ParsingException {
try { try {
final Element a = el.select("div[class=\"yt-lockup-meta\"]") String id = playlistInfoItem.getString("playlistId");
.select("ul[class=\"yt-lockup-meta-info\"]") return YoutubePlaylistLinkHandlerFactory.getInstance().getUrl(id);
.select("li").select("a").first();
if (a != null) {
return a.attr("abs:href");
}
// this is for yt premium playlists
return el.select("h3[class=\"yt-lockup-title\"").first()
.select("a").first()
.attr("abs:href");
} catch (Exception e) { } catch (Exception e) {
throw new ParsingException("Failed to extract playlist url", e); throw new ParsingException("Could not get url", e);
} }
} }
@Override @Override
public String getUploaderName() throws ParsingException { public String getUploaderName() throws ParsingException {
String name;
try { try {
final Element div = el.select("div[class=\"yt-lockup-byline\"]").first() return playlistInfoItem.getObject("longBylineText").getArray("runs").getObject(0).getString("text");
.select("a").first();
name = div.text();
} catch (Exception e) { } catch (Exception e) {
throw new ParsingException("Failed to extract playlist uploader", e); throw new ParsingException("Could not get uploader name", e);
} }
return name;
} }
@Override @Override
public long getStreamCount() throws ParsingException { public long getStreamCount() throws ParsingException {
try { try {
final Element count = el.select("span[class=\"formatted-video-count-label\"]").first() return Long.parseLong(Utils.removeNonDigitCharacters(playlistInfoItem.getString("videoCount")));
.select("b").first();
return count == null ? 0 : Long.parseLong(Utils.removeNonDigitCharacters(count.text()));
} catch (Exception e) { } catch (Exception e) {
throw new ParsingException("Failed to extract playlist stream count", e); throw new ParsingException("Could not get stream count", e);
} }
} }
} }

View File

@ -1,8 +1,11 @@
package org.schabi.newpipe.extractor.services.youtube.extractors; package org.schabi.newpipe.extractor.services.youtube.extractors;
import org.jsoup.Jsoup; import com.grack.nanojson.JsonArray;
import com.grack.nanojson.JsonObject;
import com.grack.nanojson.JsonParser;
import com.grack.nanojson.JsonParserException;
import org.jsoup.nodes.Document; import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.schabi.newpipe.extractor.InfoItem; import org.schabi.newpipe.extractor.InfoItem;
import org.schabi.newpipe.extractor.StreamingService; import org.schabi.newpipe.extractor.StreamingService;
import org.schabi.newpipe.extractor.downloader.Downloader; import org.schabi.newpipe.extractor.downloader.Downloader;
@ -14,13 +17,14 @@ import org.schabi.newpipe.extractor.localization.TimeAgoParser;
import org.schabi.newpipe.extractor.search.InfoItemsSearchCollector; import org.schabi.newpipe.extractor.search.InfoItemsSearchCollector;
import org.schabi.newpipe.extractor.search.SearchExtractor; import org.schabi.newpipe.extractor.search.SearchExtractor;
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper; import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper;
import org.schabi.newpipe.extractor.utils.Parser;
import java.io.IOException;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import javax.annotation.Nonnull; import javax.annotation.Nonnull;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URL;
/* /*
* Created by Christian Schabesberger on 22.07.2018 * Created by Christian Schabesberger on 22.07.2018
@ -45,6 +49,7 @@ import java.net.URL;
public class YoutubeSearchExtractor extends SearchExtractor { public class YoutubeSearchExtractor extends SearchExtractor {
private Document doc; private Document doc;
private JsonObject initialData;
public YoutubeSearchExtractor(StreamingService service, SearchQueryHandler linkHandler) { public YoutubeSearchExtractor(StreamingService service, SearchQueryHandler linkHandler) {
super(service, linkHandler); super(service, linkHandler);
@ -55,6 +60,7 @@ public class YoutubeSearchExtractor extends SearchExtractor {
final String url = getUrl(); final String url = getUrl();
final Response response = downloader.get(url, getExtractorLocalization()); final Response response = downloader.get(url, getExtractorLocalization());
doc = YoutubeParsingHelper.parseAndCheckPage(url, response); doc = YoutubeParsingHelper.parseAndCheckPage(url, response);
initialData = YoutubeParsingHelper.getInitialData(response.responseBody());
} }
@Nonnull @Nonnull
@ -65,80 +71,109 @@ public class YoutubeSearchExtractor extends SearchExtractor {
@Override @Override
public String getSearchSuggestion() { public String getSearchSuggestion() {
final Element el = doc.select("div[class*=\"spell-correction\"]").first(); JsonObject showingResultsForRenderer = initialData.getObject("contents")
if (el != null) { .getObject("twoColumnSearchResultsRenderer").getObject("primaryContents")
return el.select("a").first().text(); .getObject("sectionListRenderer").getArray("contents").getObject(0)
} else { .getObject("itemSectionRenderer").getArray("contents").getObject(0)
.getObject("showingResultsForRenderer");
if (showingResultsForRenderer == null) {
return ""; return "";
} else {
return showingResultsForRenderer.getObject("correctedQuery").getArray("runs")
.getObject(0).getString("text");
} }
} }
@Nonnull @Nonnull
@Override @Override
public InfoItemsPage<InfoItem> getInitialPage() throws ExtractionException { public InfoItemsPage<InfoItem> getInitialPage() throws ExtractionException {
return new InfoItemsPage<>(collectItems(doc), getNextPageUrl()); InfoItemsSearchCollector collector = getInfoItemSearchCollector();
JsonArray videos = initialData.getObject("contents").getObject("twoColumnSearchResultsRenderer")
.getObject("primaryContents").getObject("sectionListRenderer").getArray("contents")
.getObject(0).getObject("itemSectionRenderer").getArray("contents");
collectStreamsFrom(collector, videos);
return new InfoItemsPage<>(collector, getNextPageUrl());
} }
@Override @Override
public String getNextPageUrl() throws ExtractionException { public String getNextPageUrl() throws ExtractionException {
return getUrl() + "&page=" + 2; return getNextPageUrlFrom(initialData.getObject("contents").getObject("twoColumnSearchResultsRenderer")
.getObject("primaryContents").getObject("sectionListRenderer").getArray("contents")
.getObject(0).getObject("itemSectionRenderer").getArray("continuations"));
} }
@Override @Override
public InfoItemsPage<InfoItem> getPage(String pageUrl) throws IOException, ExtractionException { public InfoItemsPage<InfoItem> getPage(String pageUrl) throws IOException, ExtractionException {
final String response = getDownloader().get(pageUrl, getExtractorLocalization()).responseBody(); if (pageUrl == null || pageUrl.isEmpty()) {
doc = Jsoup.parse(response, pageUrl); throw new ExtractionException(new IllegalArgumentException("Page url is empty or null"));
}
return new InfoItemsPage<>(collectItems(doc), getNextPageUrlFromCurrentUrl(pageUrl));
}
private String getNextPageUrlFromCurrentUrl(String currentUrl)
throws MalformedURLException, UnsupportedEncodingException {
final int pageNr = Integer.parseInt(
Parser.compatParseMap(
new URL(currentUrl)
.getQuery())
.get("page"));
return currentUrl.replace("&page=" + pageNr,
"&page=" + Integer.toString(pageNr + 1));
}
private InfoItemsSearchCollector collectItems(Document doc) throws NothingFoundException {
InfoItemsSearchCollector collector = getInfoItemSearchCollector(); InfoItemsSearchCollector collector = getInfoItemSearchCollector();
collector.reset(); JsonArray ajaxJson;
Element list = doc.select("ol[class=\"item-section\"]").first(); Map<String, List<String>> headers = new HashMap<>();
final TimeAgoParser timeAgoParser = getTimeAgoParser(); headers.put("X-YouTube-Client-Name", Collections.singletonList("1"));
for (Element item : list.children()) { try {
/* First we need to determine which kind of item we are working with. // Use the hardcoded client version first to get JSON with a structure we know
Youtube depicts five different kinds of items on its search result page. These are headers.put("X-YouTube-Client-Version",
regular videos, playlists, channels, two types of video suggestions, and a "no video Collections.singletonList(YoutubeParsingHelper.HARDCODED_CLIENT_VERSION));
found" item. Since we only want videos, we need to filter out all the others. final String response = getDownloader().get(pageUrl, headers, getExtractorLocalization()).responseBody();
An example for this can be seen here: if (response.length() < 50) { // ensure to have a valid response
https://www.youtube.com/results?search_query=asdf&page=1 throw new ParsingException("Could not parse json data for next streams");
}
We already applied a filter to the url, so we don't need to care about channels and ajaxJson = JsonParser.array().from(response);
playlists now. } catch (Exception e) {
*/ try {
headers.put("X-YouTube-Client-Version",
Element el; Collections.singletonList(YoutubeParsingHelper.getClientVersion(initialData, doc.toString())));
final String response = getDownloader().get(pageUrl, headers, getExtractorLocalization()).responseBody();
if ((el = item.select("div[class*=\"search-message\"]").first()) != null) { if (response.length() < 50) { // ensure to have a valid response
throw new NothingFoundException(el.text()); throw new ParsingException("Could not parse json data for next streams");
}
// video item type ajaxJson = JsonParser.array().from(response);
} else if ((el = item.select("div[class*=\"yt-lockup-video\"]").first()) != null) { } catch (JsonParserException ignored) {
collector.commit(new YoutubeStreamInfoItemExtractor(el, timeAgoParser)); throw new ParsingException("Could not parse json data for next streams", e);
} else if ((el = item.select("div[class*=\"yt-lockup-channel\"]").first()) != null) {
collector.commit(new YoutubeChannelInfoItemExtractor(el));
} else if ((el = item.select("div[class*=\"yt-lockup-playlist\"]").first()) != null &&
item.select(".yt-pl-icon-mix").isEmpty()) {
collector.commit(new YoutubePlaylistInfoItemExtractor(el));
} }
} }
return collector; JsonObject itemSectionRenderer = ajaxJson.getObject(1).getObject("response")
.getObject("continuationContents").getObject("itemSectionContinuation");
collectStreamsFrom(collector, itemSectionRenderer.getArray("contents"));
return new InfoItemsPage<>(collector, getNextPageUrlFrom(itemSectionRenderer.getArray("continuations")));
}
private void collectStreamsFrom(InfoItemsSearchCollector collector, JsonArray videos) throws NothingFoundException {
collector.reset();
final TimeAgoParser timeAgoParser = getTimeAgoParser();
for (Object item : videos) {
if (((JsonObject) item).getObject("backgroundPromoRenderer") != null) {
throw new NothingFoundException(((JsonObject) item).getObject("backgroundPromoRenderer")
.getObject("bodyText").getArray("runs").getObject(0).getString("text"));
} else if (((JsonObject) item).getObject("videoRenderer") != null) {
collector.commit(new YoutubeStreamInfoItemExtractor(((JsonObject) item).getObject("videoRenderer"), timeAgoParser));
} else if (((JsonObject) item).getObject("channelRenderer") != null) {
collector.commit(new YoutubeChannelInfoItemExtractor(((JsonObject) item).getObject("channelRenderer")));
} else if (((JsonObject) item).getObject("playlistRenderer") != null) {
collector.commit(new YoutubePlaylistInfoItemExtractor(((JsonObject) item).getObject("playlistRenderer")));
}
}
}
private String getNextPageUrlFrom(JsonArray continuations) throws ParsingException {
if (continuations == null) {
return "";
}
JsonObject nextContinuationData = continuations.getObject(0).getObject("nextContinuationData");
String continuation = nextContinuationData.getString("continuation");
String clickTrackingParams = nextContinuationData.getString("clickTrackingParams");
return getUrl() + "&pbj=1&ctoken=" + continuation + "&continuation=" + continuation
+ "&itct=" + clickTrackingParams;
} }
} }

View File

@ -3,11 +3,9 @@ package org.schabi.newpipe.extractor.services.youtube.extractors;
import com.grack.nanojson.JsonArray; import com.grack.nanojson.JsonArray;
import com.grack.nanojson.JsonObject; import com.grack.nanojson.JsonObject;
import com.grack.nanojson.JsonParser; import com.grack.nanojson.JsonParser;
import com.grack.nanojson.JsonParserException;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document; import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element; import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.mozilla.javascript.Context; import org.mozilla.javascript.Context;
import org.mozilla.javascript.Function; import org.mozilla.javascript.Function;
import org.mozilla.javascript.ScriptableObject; import org.mozilla.javascript.ScriptableObject;
@ -15,7 +13,6 @@ import org.schabi.newpipe.extractor.MediaFormat;
import org.schabi.newpipe.extractor.NewPipe; import org.schabi.newpipe.extractor.NewPipe;
import org.schabi.newpipe.extractor.StreamingService; import org.schabi.newpipe.extractor.StreamingService;
import org.schabi.newpipe.extractor.downloader.Downloader; import org.schabi.newpipe.extractor.downloader.Downloader;
import org.schabi.newpipe.extractor.downloader.Request;
import org.schabi.newpipe.extractor.downloader.Response; import org.schabi.newpipe.extractor.downloader.Response;
import org.schabi.newpipe.extractor.exceptions.ContentNotAvailableException; import org.schabi.newpipe.extractor.exceptions.ContentNotAvailableException;
import org.schabi.newpipe.extractor.exceptions.ExtractionException; import org.schabi.newpipe.extractor.exceptions.ExtractionException;
@ -23,23 +20,41 @@ import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.exceptions.ReCaptchaException; import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
import org.schabi.newpipe.extractor.linkhandler.LinkHandler; import org.schabi.newpipe.extractor.linkhandler.LinkHandler;
import org.schabi.newpipe.extractor.localization.DateWrapper; import org.schabi.newpipe.extractor.localization.DateWrapper;
import org.schabi.newpipe.extractor.localization.Localization;
import org.schabi.newpipe.extractor.localization.TimeAgoParser; import org.schabi.newpipe.extractor.localization.TimeAgoParser;
import org.schabi.newpipe.extractor.localization.TimeAgoPatternsManager;
import org.schabi.newpipe.extractor.services.youtube.ItagItem; import org.schabi.newpipe.extractor.services.youtube.ItagItem;
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper; import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper;
import org.schabi.newpipe.extractor.stream.*; import org.schabi.newpipe.extractor.stream.AudioStream;
import org.schabi.newpipe.extractor.utils.JsonUtils; import org.schabi.newpipe.extractor.stream.Description;
import org.schabi.newpipe.extractor.stream.Frameset;
import org.schabi.newpipe.extractor.stream.Stream;
import org.schabi.newpipe.extractor.stream.StreamExtractor;
import org.schabi.newpipe.extractor.stream.StreamInfoItem;
import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector;
import org.schabi.newpipe.extractor.stream.StreamType;
import org.schabi.newpipe.extractor.stream.SubtitlesStream;
import org.schabi.newpipe.extractor.stream.VideoStream;
import org.schabi.newpipe.extractor.utils.Parser; import org.schabi.newpipe.extractor.utils.Parser;
import org.schabi.newpipe.extractor.utils.Utils; import org.schabi.newpipe.extractor.utils.Utils;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import java.io.IOException; import java.io.IOException;
import java.io.UnsupportedEncodingException; import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException; import java.net.URLDecoder;
import java.net.URL; import java.nio.charset.StandardCharsets;
import java.util.*; import java.text.SimpleDateFormat;
import java.util.regex.Matcher; import java.util.ArrayList;
import java.util.regex.Pattern; import java.util.Calendar;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
/* /*
* Created by Christian Schabesberger on 06.08.15. * Created by Christian Schabesberger on 06.08.15.
@ -62,8 +77,6 @@ import java.util.regex.Pattern;
*/ */
public class YoutubeStreamExtractor extends StreamExtractor { public class YoutubeStreamExtractor extends StreamExtractor {
private static final String TAG = YoutubeStreamExtractor.class.getSimpleName();
/*////////////////////////////////////////////////////////////////////////// /*//////////////////////////////////////////////////////////////////////////
// Exceptions // Exceptions
//////////////////////////////////////////////////////////////////////////*/ //////////////////////////////////////////////////////////////////////////*/
@ -74,12 +87,6 @@ public class YoutubeStreamExtractor extends StreamExtractor {
} }
} }
public class SubtitlesException extends ContentNotAvailableException {
SubtitlesException(String message, Throwable cause) {
super(message, cause);
}
}
/*//////////////////////////////////////////////////////////////////////////*/ /*//////////////////////////////////////////////////////////////////////////*/
private Document doc; private Document doc;
@ -88,6 +95,7 @@ public class YoutubeStreamExtractor extends StreamExtractor {
@Nonnull @Nonnull
private final Map<String, String> videoInfoPage = new HashMap<>(); private final Map<String, String> videoInfoPage = new HashMap<>();
private JsonObject playerResponse; private JsonObject playerResponse;
private JsonObject initialData;
@Nonnull @Nonnull
private List<SubtitlesInfo> subtitlesInfos = new ArrayList<>(); private List<SubtitlesInfo> subtitlesInfos = new ArrayList<>();
@ -106,22 +114,17 @@ public class YoutubeStreamExtractor extends StreamExtractor {
@Override @Override
public String getName() throws ParsingException { public String getName() throws ParsingException {
assertPageFetched(); assertPageFetched();
String title = null;
try { try {
return playerResponse.getObject("videoDetails").getString("title"); title = getVideoPrimaryInfoRenderer().getObject("title").getArray("runs").getObject(0).getString("text");
} catch (Exception ignored) {}
} catch (Exception e) { if (title == null) {
// fallback HTML method
String name = null;
try { try {
name = doc.select("meta[name=title]").attr(CONTENT); title = playerResponse.getObject("videoDetails").getString("title");
} catch (Exception ignored) { } catch (Exception ignored) {}
}
if (name == null) {
throw new ParsingException("Could not get name", e);
}
return name;
} }
if (title != null) return title;
throw new ParsingException("Could not get name");
} }
@Override @Override
@ -131,19 +134,33 @@ public class YoutubeStreamExtractor extends StreamExtractor {
} }
try { try {
return playerResponse.getObject("microformat").getObject("playerMicroformatRenderer").getString("publishDate"); // return playerResponse.getObject("microformat").getObject("playerMicroformatRenderer").getString("publishDate");
} catch (Exception e) { } catch (Exception ignored) {}
String uploadDate = null;
try {
uploadDate = doc.select("meta[itemprop=datePublished]").attr(CONTENT);
} catch (Exception ignored) {
}
if (uploadDate == null) { try {
throw new ParsingException("Could not get upload date", e); if (getVideoPrimaryInfoRenderer().getObject("dateText").getString("simpleText").startsWith("Premiered")) {
String time = getVideoPrimaryInfoRenderer().getObject("dateText").getString("simpleText").substring(10);
try { // Premiered 20 hours ago
TimeAgoParser timeAgoParser = TimeAgoPatternsManager.getTimeAgoParserFor(Localization.fromLocalizationCode("en"));
Calendar parsedTime = timeAgoParser.parse(time).date();
return new SimpleDateFormat("yyyy-MM-dd").format(parsedTime.getTime());
} catch (Exception ignored) {}
try { // Premiered Premiered Feb 21, 2020
Date d = new SimpleDateFormat("MMM dd, YYYY", Locale.ENGLISH).parse(time);
return new SimpleDateFormat("yyyy-MM-dd").format(d.getTime());
} catch (Exception ignored) {}
} }
return uploadDate; } catch (Exception ignored) {}
}
try {
// TODO this parses English formatted dates only, we need a better approach to parse the textual date
Date d = new SimpleDateFormat("dd MMM yyyy", Locale.ENGLISH).parse(
getVideoPrimaryInfoRenderer().getObject("dateText").getString("simpleText"));
return new SimpleDateFormat("yyyy-MM-dd").format(d);
} catch (Exception ignored) {}
throw new ParsingException("Could not get upload date");
} }
@Override @Override
@ -167,15 +184,7 @@ public class YoutubeStreamExtractor extends StreamExtractor {
return thumbnails.getObject(thumbnails.size() - 1).getString("url"); return thumbnails.getObject(thumbnails.size() - 1).getString("url");
} catch (Exception e) { } catch (Exception e) {
String url = null; throw new ParsingException("Could not get thumbnail url");
try {
url = doc.select("link[itemprop=\"thumbnailUrl\"]").first().attr("abs:href");
} catch (Exception ignored) {}
if (url == null) {
throw new ParsingException("Could not get thumbnail url", e);
}
return url;
} }
} }
@ -184,88 +193,65 @@ public class YoutubeStreamExtractor extends StreamExtractor {
@Override @Override
public Description getDescription() throws ParsingException { public Description getDescription() throws ParsingException {
assertPageFetched(); assertPageFetched();
// description with more info on links
try { try {
// first try to get html-formatted description boolean htmlConversionRequired = false;
return new Description(parseHtmlAndGetFullLinks(doc.select("p[id=\"eow-description\"]").first().html()), Description.HTML); JsonArray descriptions = getVideoSecondaryInfoRenderer().getObject("description").getArray("runs");
} catch (Exception e) { StringBuilder descriptionBuilder = new StringBuilder(descriptions.size());
try { for (Object textObjectHolder : descriptions) {
// fallback to raw non-html description JsonObject textHolder = (JsonObject) textObjectHolder;
return new Description(playerResponse.getObject("videoDetails").getString("shortDescription"), Description.PLAIN_TEXT); String text = textHolder.getString("text");
} catch (Exception ignored) { if (textHolder.getObject("navigationEndpoint") != null) {
throw new ParsingException("Could not get the description", e); // The text is a link. Get the URL it points to and generate a HTML link of it
} if (textHolder.getObject("navigationEndpoint").getObject("urlEndpoint") != null) {
} String internUrl = textHolder.getObject("navigationEndpoint").getObject("urlEndpoint").getString("url");
} if (internUrl.startsWith("/redirect?")) {
// q parameter can be the first parameter
// onclick="yt.www.watch.player.seekTo(0*3600+00*60+00);return false;" internUrl = internUrl.substring(10);
// :00 is NOT recognized as a timestamp in description or comments. String[] params = internUrl.split("&");
// 0:00 is recognized in both description and comments. for (String param : params) {
// https://www.youtube.com/watch?v=4cccfDXu1vA if (param.split("=")[0].equals("q")) {
private final static Pattern DESCRIPTION_TIMESTAMP_ONCLICK_REGEX = Pattern.compile( String url = URLDecoder.decode(param.split("=")[1], StandardCharsets.UTF_8.name());
"seekTo\\(" if (url != null && !url.isEmpty()) {
+ "(?:(\\d+)\\*3600\\+)?" // hours? descriptionBuilder.append("<a href=\"").append(url).append("\">").append(text).append("</a>");
+ "(\\d+)\\*60\\+" // minutes htmlConversionRequired = true;
+ "(\\d+)" // seconds } else {
+ "\\)"); descriptionBuilder.append(text);
}
@SafeVarargs break;
private static <T> T coalesce(T... args) { }
for (T arg : args) { }
if (arg != null) return arg; } else if (internUrl.startsWith("http")) {
} descriptionBuilder.append("<a href=\"").append(internUrl).append("\">").append(text).append("</a>");
throw new IllegalArgumentException("all arguments to coalesce() were null"); htmlConversionRequired = true;
} }
continue;
private String parseHtmlAndGetFullLinks(String descriptionHtml) }
throws MalformedURLException, UnsupportedEncodingException, ParsingException { continue;
final Document description = Jsoup.parse(descriptionHtml, getUrl()); }
for (Element a : description.select("a")) { if (text != null) {
final String rawUrl = a.attr("abs:href"); descriptionBuilder.append(text);
final URL redirectLink = new URL(rawUrl);
final Matcher onClickTimestamp;
final String queryString;
if ((onClickTimestamp = DESCRIPTION_TIMESTAMP_ONCLICK_REGEX.matcher(a.attr("onclick")))
.find()) {
a.removeAttr("onclick");
String hours = coalesce(onClickTimestamp.group(1), "0");
String minutes = onClickTimestamp.group(2);
String seconds = onClickTimestamp.group(3);
int timestamp = 0;
timestamp += Integer.parseInt(hours) * 3600;
timestamp += Integer.parseInt(minutes) * 60;
timestamp += Integer.parseInt(seconds);
String setTimestamp = "&t=" + timestamp;
// Even after clicking https://youtu.be/...?t=6,
// getUrl() is https://www.youtube.com/watch?v=..., never youtu.be, never &t=.
a.attr("href", getUrl() + setTimestamp);
} else if ((queryString = redirectLink.getQuery()) != null) {
// if the query string is null we are not dealing with a redirect link,
// so we don't need to override it.
final String link =
Parser.compatParseMap(queryString).get("q");
if (link != null) {
// if link is null the a tag is a hashtag.
// They refer to the youtube search. We do not handle them.
a.text(link);
a.attr("href", link);
} else if (redirectLink.toString().contains("https://www.youtube.com/")) {
a.text(redirectLink.toString());
a.attr("href", redirectLink.toString());
} }
} else if (redirectLink.toString().contains("https://www.youtube.com/")) {
descriptionHtml = descriptionHtml.replace(rawUrl, redirectLink.toString());
a.text(redirectLink.toString());
a.attr("href", redirectLink.toString());
} }
String description = descriptionBuilder.toString();
if (!description.isEmpty()) {
if (htmlConversionRequired) {
description = description.replaceAll("\\n", "<br>");
description = description.replaceAll(" ", " &nbsp;");
return new Description(description, Description.HTML);
}
return new Description(description, Description.PLAIN_TEXT);
}
} catch (Exception ignored) { }
// raw non-html description
try {
return new Description(playerResponse.getObject("videoDetails").getString("shortDescription"), Description.PLAIN_TEXT);
} catch (Exception ignored) {
throw new ParsingException("Could not get description");
} }
return description.select("body").first().html();
} }
@Override @Override
@ -318,68 +304,25 @@ public class YoutubeStreamExtractor extends StreamExtractor {
@Override @Override
public long getViewCount() throws ParsingException { public long getViewCount() throws ParsingException {
assertPageFetched(); assertPageFetched();
String views = null;
try { try {
if (getStreamType().equals(StreamType.LIVE_STREAM)) { views = getVideoPrimaryInfoRenderer().getObject("viewCount")
return getLiveStreamWatchingCount(); .getObject("videoViewCountRenderer").getObject("viewCount")
} else { .getArray("runs").getObject(0).getString("text");
return Long.parseLong(playerResponse.getObject("videoDetails").getString("viewCount")); } catch (Exception ignored) {}
} if (views == null) {
} catch (Exception e) {
try { try {
return Long.parseLong(doc.select("meta[itemprop=interactionCount]").attr(CONTENT)); views = getVideoPrimaryInfoRenderer().getObject("viewCount")
} catch (Exception ignored) { .getObject("videoViewCountRenderer").getObject("viewCount").getString("simpleText");
throw new ParsingException("Could not get view count", e); } catch (Exception ignored) {}
}
} }
} if (views == null) {
try {
private long getLiveStreamWatchingCount() throws ExtractionException, IOException, JsonParserException { views = playerResponse.getObject("videoDetails").getString("viewCount");
// https://www.youtube.com/youtubei/v1/updated_metadata?alt=json&key= } catch (Exception ignored) {}
String innerTubeKey = null, clientVersion = null;
if (playerArgs != null && !playerArgs.isEmpty()) {
innerTubeKey = playerArgs.getString("innertube_api_key");
clientVersion = playerArgs.getString("innertube_context_client_version");
} else if (!videoInfoPage.isEmpty()) {
innerTubeKey = videoInfoPage.get("innertube_api_key");
clientVersion = videoInfoPage.get("innertube_context_client_version");
} }
if (views != null) return Long.parseLong(Utils.removeNonDigitCharacters(views));
if (innerTubeKey == null || innerTubeKey.isEmpty()) { throw new ParsingException("Could not get view count");
throw new ExtractionException("Couldn't get innerTube key");
}
if (clientVersion == null || clientVersion.isEmpty()) {
throw new ExtractionException("Couldn't get innerTube client version");
}
final String metadataUrl = "https://www.youtube.com/youtubei/v1/updated_metadata?alt=json&key=" + innerTubeKey;
final byte[] dataBody = ("{\"context\":{\"client\":{\"clientName\":1,\"clientVersion\":\"" + clientVersion + "\"}}" +
",\"videoId\":\"" + getId() + "\"}").getBytes("UTF-8");
final Response response = getDownloader().execute(Request.newBuilder()
.post(metadataUrl, dataBody)
.addHeader("Content-Type", "application/json")
.build());
final JsonObject jsonObject = JsonParser.object().from(response.responseBody());
for (Object actionEntry : jsonObject.getArray("actions")) {
if (!(actionEntry instanceof JsonObject)) continue;
final JsonObject entry = (JsonObject) actionEntry;
final JsonObject updateViewershipAction = entry.getObject("updateViewershipAction", null);
if (updateViewershipAction == null) continue;
final JsonArray viewCountRuns = JsonUtils.getArray(updateViewershipAction, "viewership.videoViewCountRenderer.viewCount.runs");
if (viewCountRuns.isEmpty()) continue;
final JsonObject textObject = viewCountRuns.getObject(0);
if (!textObject.has("text")) {
throw new ExtractionException("Response don't have \"text\" element");
}
return Long.parseLong(Utils.removeNonDigitCharacters(textObject.getString("text")));
}
throw new ExtractionException("Could not find correct results in response");
} }
@Override @Override
@ -387,9 +330,9 @@ public class YoutubeStreamExtractor extends StreamExtractor {
assertPageFetched(); assertPageFetched();
String likesString = ""; String likesString = "";
try { try {
Element button = doc.select("button.like-button-renderer-like-button").first();
try { try {
likesString = button.select("span.yt-uix-button-content").first().text(); likesString = getVideoPrimaryInfoRenderer().getObject("sentimentBar")
.getObject("sentimentBarRenderer").getString("tooltip").split("/")[0];
} catch (NullPointerException e) { } catch (NullPointerException e) {
//if this kicks in our button has no content and therefore ratings must be disabled //if this kicks in our button has no content and therefore ratings must be disabled
if (playerResponse.getObject("videoDetails").getBoolean("allowRatings")) { if (playerResponse.getObject("videoDetails").getBoolean("allowRatings")) {
@ -410,9 +353,9 @@ public class YoutubeStreamExtractor extends StreamExtractor {
assertPageFetched(); assertPageFetched();
String dislikesString = ""; String dislikesString = "";
try { try {
Element button = doc.select("button.like-button-renderer-dislike-button").first();
try { try {
dislikesString = button.select("span.yt-uix-button-content").first().text(); dislikesString = getVideoPrimaryInfoRenderer().getObject("sentimentBar")
.getObject("sentimentBarRenderer").getString("tooltip").split("/")[1];
} catch (NullPointerException e) { } catch (NullPointerException e) {
//if this kicks in our button has no content and therefore ratings must be disabled //if this kicks in our button has no content and therefore ratings must be disabled
if (playerResponse.getObject("videoDetails").getBoolean("allowRatings")) { if (playerResponse.getObject("videoDetails").getBoolean("allowRatings")) {
@ -432,40 +375,36 @@ public class YoutubeStreamExtractor extends StreamExtractor {
@Override @Override
public String getUploaderUrl() throws ParsingException { public String getUploaderUrl() throws ParsingException {
assertPageFetched(); assertPageFetched();
String uploaderId = null;
try { try {
return "https://www.youtube.com/channel/" + uploaderId = getVideoSecondaryInfoRenderer().getObject("owner").getObject("videoOwnerRenderer")
playerResponse.getObject("videoDetails").getString("channelId"); .getObject("navigationEndpoint").getObject("browseEndpoint").getString("browseId");
} catch (Exception e) { } catch (Exception ignored) {}
String uploaderUrl = null; if (uploaderId == null) {
try { try {
uploaderUrl = doc.select("div[class=\"yt-user-info\"]").first().children() uploaderId = playerResponse.getObject("videoDetails").getString("channelId");
.select("a").first().attr("abs:href");
} catch (Exception ignored) {} } catch (Exception ignored) {}
if (uploaderUrl == null) {
throw new ParsingException("Could not get channel link", e);
}
return uploaderUrl;
} }
if (uploaderId != null) return "https://www.youtube.com/channel/" + uploaderId;
throw new ParsingException("Could not get uploader url");
} }
@Nonnull @Nonnull
@Override @Override
public String getUploaderName() throws ParsingException { public String getUploaderName() throws ParsingException {
assertPageFetched(); assertPageFetched();
String uploaderName = null;
try { try {
return playerResponse.getObject("videoDetails").getString("author"); uploaderName = getVideoSecondaryInfoRenderer().getObject("owner").getObject("videoOwnerRenderer")
} catch (Exception e) { .getObject("title").getArray("runs").getObject(0).getString("text");
String name = null; } catch (Exception ignored) {}
if (uploaderName == null) {
try { try {
name = doc.select("div.yt-user-info").first().text(); uploaderName = playerResponse.getObject("videoDetails").getString("author");
} catch (Exception ignored) {} } catch (Exception ignored) {}
if (name == null) {
throw new ParsingException("Could not get uploader name");
}
return name;
} }
if (uploaderName != null) return uploaderName;
throw new ParsingException("Could not get uploader name");
} }
@Nonnull @Nonnull
@ -475,12 +414,19 @@ public class YoutubeStreamExtractor extends StreamExtractor {
String uploaderAvatarUrl = null; String uploaderAvatarUrl = null;
try { try {
uploaderAvatarUrl = doc.select("a[class*=\"yt-user-photo\"]").first() uploaderAvatarUrl = initialData.getObject("contents").getObject("twoColumnWatchNextResults").getObject("secondaryResults")
.select("img").first() .getObject("secondaryResults").getArray("results").getObject(0).getObject("compactAutoplayRenderer")
.attr("abs:data-thumb"); .getArray("contents").getObject(0).getObject("compactVideoRenderer").getObject("channelThumbnail")
} catch (Exception e) {//todo: add fallback method .getArray("thumbnails").getObject(0).getString("url");
throw new ParsingException("Could not get uploader avatar url", e); if (uploaderAvatarUrl != null && !uploaderAvatarUrl.isEmpty()) {
} return uploaderAvatarUrl;
}
} catch (Exception ignored) {}
try {
uploaderAvatarUrl = getVideoSecondaryInfoRenderer().getObject("owner").getObject("videoOwnerRenderer")
.getObject("thumbnail").getArray("thumbnails").getObject(0).getString("url");
} catch (Exception ignored) {}
if (uploaderAvatarUrl == null) { if (uploaderAvatarUrl == null) {
throw new ParsingException("Could not get uploader avatar url"); throw new ParsingException("Could not get uploader avatar url");
@ -594,13 +540,13 @@ public class YoutubeStreamExtractor extends StreamExtractor {
@Override @Override
@Nonnull @Nonnull
public List<SubtitlesStream> getSubtitlesDefault() throws IOException, ExtractionException { public List<SubtitlesStream> getSubtitlesDefault() {
return getSubtitles(MediaFormat.TTML); return getSubtitles(MediaFormat.TTML);
} }
@Override @Override
@Nonnull @Nonnull
public List<SubtitlesStream> getSubtitles(final MediaFormat format) throws IOException, ExtractionException { public List<SubtitlesStream> getSubtitles(final MediaFormat format) {
assertPageFetched(); assertPageFetched();
List<SubtitlesStream> subtitles = new ArrayList<>(); List<SubtitlesStream> subtitles = new ArrayList<>();
for (final SubtitlesInfo subtitlesInfo : subtitlesInfos) { for (final SubtitlesInfo subtitlesInfo : subtitlesInfos) {
@ -624,18 +570,20 @@ public class YoutubeStreamExtractor extends StreamExtractor {
} }
@Override @Override
public StreamInfoItem getNextStream() throws IOException, ExtractionException { public StreamInfoItem getNextStream() throws ExtractionException {
assertPageFetched(); assertPageFetched();
if (isAgeRestricted) {
return null;
}
try { try {
StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId()); final JsonObject videoInfo = initialData.getObject("contents").getObject("twoColumnWatchNextResults")
.getObject("secondaryResults").getObject("secondaryResults").getArray("results")
.getObject(0).getObject("compactAutoplayRenderer").getArray("contents")
.getObject(0).getObject("compactVideoRenderer");
final TimeAgoParser timeAgoParser = getTimeAgoParser(); final TimeAgoParser timeAgoParser = getTimeAgoParser();
StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId());
Elements watch = doc.select("div[class=\"watch-sidebar-section\"]"); collector.commit(new YoutubeStreamInfoItemExtractor(videoInfo, timeAgoParser));
if (watch.size() < 1) {
return null;// prevent the snackbar notification "report error" on age-restricted videos
}
collector.commit(extractVideoPreviewInfo(watch.first().select("li").first(), timeAgoParser));
return collector.getItems().get(0); return collector.getItems().get(0);
} catch (Exception e) { } catch (Exception e) {
throw new ParsingException("Could not get next video", e); throw new ParsingException("Could not get next video", e);
@ -643,20 +591,22 @@ public class YoutubeStreamExtractor extends StreamExtractor {
} }
@Override @Override
public StreamInfoItemsCollector getRelatedStreams() throws IOException, ExtractionException { public StreamInfoItemsCollector getRelatedStreams() throws ExtractionException {
assertPageFetched(); assertPageFetched();
if (isAgeRestricted) {
return null;
}
try { try {
StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId()); StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId());
JsonArray results = initialData.getObject("contents").getObject("twoColumnWatchNextResults")
.getObject("secondaryResults").getObject("secondaryResults").getArray("results");
final TimeAgoParser timeAgoParser = getTimeAgoParser(); final TimeAgoParser timeAgoParser = getTimeAgoParser();
Element ul = doc.select("ul[id=\"watch-related\"]").first(); for (Object ul : results) {
if (ul != null) { final JsonObject videoInfo = ((JsonObject) ul).getObject("compactVideoRenderer");
for (Element li : ul.children()) {
// first check if we have a playlist. If so leave them out if (videoInfo != null) collector.commit(new YoutubeStreamInfoItemExtractor(videoInfo, timeAgoParser));
if (li.select("a[class*=\"content-link\"]").first() != null) {
collector.commit(extractVideoPreviewInfo(li, timeAgoParser));
}
}
} }
return collector; return collector;
} catch (Exception e) { } catch (Exception e) {
@ -736,6 +686,7 @@ public class YoutubeStreamExtractor extends StreamExtractor {
isAgeRestricted = false; isAgeRestricted = false;
} }
playerResponse = getPlayerResponse(); playerResponse = getPlayerResponse();
initialData = YoutubeParsingHelper.getInitialData(pageHtml);
if (decryptionCode.isEmpty()) { if (decryptionCode.isEmpty()) {
decryptionCode = loadDecryptionCode(playerUrl); decryptionCode = loadDecryptionCode(playerUrl);
@ -752,12 +703,10 @@ public class YoutubeStreamExtractor extends StreamExtractor {
return JsonParser.object().from(ytPlayerConfigRaw); return JsonParser.object().from(ytPlayerConfigRaw);
} catch (Parser.RegexException e) { } catch (Parser.RegexException e) {
String errorReason = getErrorMessage(); String errorReason = getErrorMessage();
switch (errorReason) { if (errorReason.isEmpty()) {
case "": throw new ContentNotAvailableException("Content not available: player config empty", e);
throw new ContentNotAvailableException("Content not available: player config empty", e);
default:
throw new ContentNotAvailableException("Content not available", e);
} }
throw new ContentNotAvailableException("Content not available", e);
} catch (Exception e) { } catch (Exception e) {
throw new ParsingException("Could not parse yt player config", e); throw new ParsingException("Could not parse yt player config", e);
} }
@ -912,7 +861,7 @@ public class YoutubeStreamExtractor extends StreamExtractor {
} }
@Nonnull @Nonnull
private List<SubtitlesInfo> getAvailableSubtitlesInfo() throws SubtitlesException { private List<SubtitlesInfo> getAvailableSubtitlesInfo() {
// If the video is age restricted getPlayerConfig will fail // If the video is age restricted getPlayerConfig will fail
if (isAgeRestricted) return Collections.emptyList(); if (isAgeRestricted) return Collections.emptyList();
@ -926,7 +875,7 @@ public class YoutubeStreamExtractor extends StreamExtractor {
final JsonObject renderer = captions.getObject("playerCaptionsTracklistRenderer", new JsonObject()); final JsonObject renderer = captions.getObject("playerCaptionsTracklistRenderer", new JsonObject());
final JsonArray captionsArray = renderer.getArray("captionTracks", new JsonArray()); final JsonArray captionsArray = renderer.getArray("captionTracks", new JsonArray());
// todo: use this to apply auto translation to different language from a source language // todo: use this to apply auto translation to different language from a source language
final JsonArray autoCaptionsArray = renderer.getArray("translationLanguages", new JsonArray()); // final JsonArray autoCaptionsArray = renderer.getArray("translationLanguages", new JsonArray());
// This check is necessary since there may be cases where subtitles metadata do not contain caption track info // This check is necessary since there may be cases where subtitles metadata do not contain caption track info
// e.g. https://www.youtube.com/watch?v=-Vpwatutnko // e.g. https://www.youtube.com/watch?v=-Vpwatutnko
@ -983,6 +932,44 @@ public class YoutubeStreamExtractor extends StreamExtractor {
// Utils // Utils
//////////////////////////////////////////////////////////////////////////*/ //////////////////////////////////////////////////////////////////////////*/
private JsonObject getVideoPrimaryInfoRenderer() throws ParsingException {
JsonArray contents = initialData.getObject("contents").getObject("twoColumnWatchNextResults")
.getObject("results").getObject("results").getArray("contents");
JsonObject videoPrimaryInfoRenderer = null;
for (Object content : contents) {
if (((JsonObject) content).getObject("videoPrimaryInfoRenderer") != null) {
videoPrimaryInfoRenderer = ((JsonObject) content).getObject("videoPrimaryInfoRenderer");
break;
}
}
if (videoPrimaryInfoRenderer == null) {
throw new ParsingException("Could not find videoPrimaryInfoRenderer");
}
return videoPrimaryInfoRenderer;
}
private JsonObject getVideoSecondaryInfoRenderer() throws ParsingException {
JsonArray contents = initialData.getObject("contents").getObject("twoColumnWatchNextResults")
.getObject("results").getObject("results").getArray("contents");
JsonObject videoSecondaryInfoRenderer = null;
for (Object content : contents) {
if (((JsonObject) content).getObject("videoSecondaryInfoRenderer") != null) {
videoSecondaryInfoRenderer = ((JsonObject) content).getObject("videoSecondaryInfoRenderer");
break;
}
}
if (videoSecondaryInfoRenderer == null) {
throw new ParsingException("Could not find videoSecondaryInfoRenderer");
}
return videoSecondaryInfoRenderer;
}
@Nonnull @Nonnull
private static String getVideoInfoUrl(final String id, final String sts) { private static String getVideoInfoUrl(final String id, final String sts) {
return "https://www.youtube.com/get_video_info?" + "video_id=" + id + return "https://www.youtube.com/get_video_info?" + "video_id=" + id +
@ -1026,60 +1013,6 @@ public class YoutubeStreamExtractor extends StreamExtractor {
return urlAndItags; return urlAndItags;
} }
/**
* Provides information about links to other videos on the video page, such as related videos.
* This is encapsulated in a StreamInfoItem object, which is a subset of the fields in a full StreamInfo.
*/
private StreamInfoItemExtractor extractVideoPreviewInfo(final Element li, final TimeAgoParser timeAgoParser) {
return new YoutubeStreamInfoItemExtractor(li, timeAgoParser) {
@Override
public String getUrl() throws ParsingException {
return li.select("a.content-link").first().attr("abs:href");
}
@Override
public String getName() throws ParsingException {
//todo: check NullPointerException causing
return li.select("span.title").first().text();
//this page causes the NullPointerException, after finding it by searching for "tjvg":
//https://www.youtube.com/watch?v=Uqg0aEhLFAg
}
@Override
public String getUploaderName() throws ParsingException {
return li.select("span[class*=\"attribution\"").first()
.select("span").first().text();
}
@Override
public String getUploaderUrl() throws ParsingException {
return ""; // The uploader is not linked
}
@Override
public String getTextualUploadDate() throws ParsingException {
return "";
}
@Override
public String getThumbnailUrl() throws ParsingException {
Element img = li.select("img").first();
String thumbnailUrl = img.attr("abs:src");
// Sometimes youtube sends links to gif files which somehow seem to not exist
// anymore. Items with such gif also offer a secondary image source. So we are going
// to use that if we caught such an item.
if (thumbnailUrl.contains(".gif")) {
thumbnailUrl = img.attr("data-thumb");
}
if (thumbnailUrl.startsWith("//")) {
thumbnailUrl = HTTPS + thumbnailUrl;
}
return thumbnailUrl;
}
};
}
@Nonnull @Nonnull
@Override @Override
public List<Frameset> getFrames() throws ExtractionException { public List<Frameset> getFrames() throws ExtractionException {
@ -1137,40 +1070,44 @@ public class YoutubeStreamExtractor extends StreamExtractor {
} }
} }
@Nonnull
@Override @Override
public String getHost() throws ParsingException { public String getHost() {
return "";
}
@Nonnull
@Override
public String getPrivacy() {
return "";
}
@Nonnull
@Override
public String getCategory() {
return "";
}
@Nonnull
@Override
public String getLicence() {
return ""; return "";
} }
@Override @Override
public String getPrivacy() throws ParsingException { public Locale getLanguageInfo() {
return "";
}
@Override
public String getCategory() throws ParsingException {
return "";
}
@Override
public String getLicence() throws ParsingException {
return "";
}
@Override
public Locale getLanguageInfo() throws ParsingException {
return null; return null;
} }
@Nonnull @Nonnull
@Override @Override
public List<String> getTags() throws ParsingException { public List<String> getTags() {
return new ArrayList<>(); return new ArrayList<>();
} }
@Nonnull @Nonnull
@Override @Override
public String getSupportInfo() throws ParsingException { public String getSupportInfo() {
return ""; return "";
} }
} }

View File

@ -1,19 +1,19 @@
package org.schabi.newpipe.extractor.services.youtube.extractors; package org.schabi.newpipe.extractor.services.youtube.extractors;
import org.jsoup.nodes.Element; import com.grack.nanojson.JsonArray;
import org.jsoup.select.Elements; import com.grack.nanojson.JsonObject;
import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.localization.DateWrapper; import org.schabi.newpipe.extractor.localization.DateWrapper;
import org.schabi.newpipe.extractor.localization.TimeAgoParser; import org.schabi.newpipe.extractor.localization.TimeAgoParser;
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeChannelLinkHandlerFactory;
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper; import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper;
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeStreamLinkHandlerFactory;
import org.schabi.newpipe.extractor.stream.StreamInfoItemExtractor; import org.schabi.newpipe.extractor.stream.StreamInfoItemExtractor;
import org.schabi.newpipe.extractor.stream.StreamType; import org.schabi.newpipe.extractor.stream.StreamType;
import org.schabi.newpipe.extractor.utils.Utils; import org.schabi.newpipe.extractor.utils.Utils;
import javax.annotation.Nullable; import javax.annotation.Nullable;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;
/* /*
* Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org> * Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org>
@ -35,263 +35,190 @@ import java.util.Date;
public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor { public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor {
private final Element item; private JsonObject videoInfo;
private final TimeAgoParser timeAgoParser; private final TimeAgoParser timeAgoParser;
private String cachedUploadDate;
/** /**
* Creates an extractor of StreamInfoItems from a YouTube page. * Creates an extractor of StreamInfoItems from a YouTube page.
* *
* @param item The page element * @param videoInfoItem The JSON page element
* @param timeAgoParser A parser of the textual dates or {@code null}. * @param timeAgoParser A parser of the textual dates or {@code null}.
*/ */
public YoutubeStreamInfoItemExtractor(Element item, @Nullable TimeAgoParser timeAgoParser) { public YoutubeStreamInfoItemExtractor(JsonObject videoInfoItem, @Nullable TimeAgoParser timeAgoParser) {
this.item = item; this.videoInfo = videoInfoItem;
this.timeAgoParser = timeAgoParser; this.timeAgoParser = timeAgoParser;
} }
@Override @Override
public StreamType getStreamType() throws ParsingException { public StreamType getStreamType() {
if (isLiveStream(item)) { try {
return StreamType.LIVE_STREAM; if (videoInfo.getArray("badges").getObject(0).getObject("metadataBadgeRenderer").getString("label").equals("LIVE NOW")) {
} else { return StreamType.LIVE_STREAM;
return StreamType.VIDEO_STREAM; }
} } catch (Exception ignored) {}
return StreamType.VIDEO_STREAM;
} }
@Override @Override
public boolean isAd() throws ParsingException { public boolean isAd() throws ParsingException {
return !item.select("span[class*=\"icon-not-available\"]").isEmpty() return isPremium() || getName().equals("[Private video]") || getName().equals("[Deleted video]");
|| !item.select("span[class*=\"yt-badge-ad\"]").isEmpty()
|| isPremiumVideo();
}
private boolean isPremiumVideo() {
Element premiumSpan = item.select("span[class=\"standalone-collection-badge-renderer-red-text\"]").first();
if (premiumSpan == null) return false;
// if this span has text it most likely says ("Free Video") so we can play this
if (premiumSpan.hasText()) return false;
return true;
} }
@Override @Override
public String getUrl() throws ParsingException { public String getUrl() throws ParsingException {
try { try {
Element el = item.select("div[class*=\"yt-lockup-video\"]").first(); String videoId = videoInfo.getString("videoId");
Element dl = el.select("h3").first().select("a").first(); return YoutubeStreamLinkHandlerFactory.getInstance().getUrl(videoId);
return dl.attr("abs:href");
} catch (Exception e) { } catch (Exception e) {
throw new ParsingException("Could not get web page url for the video", e); throw new ParsingException("Could not get url", e);
} }
} }
@Override @Override
public String getName() throws ParsingException { public String getName() throws ParsingException {
String name = null;
try { try {
Element el = item.select("div[class*=\"yt-lockup-video\"]").first(); name = videoInfo.getObject("title").getString("simpleText");
Element dl = el.select("h3").first().select("a").first(); } catch (Exception ignored) {}
return dl.text(); if (name == null) {
} catch (Exception e) { try {
throw new ParsingException("Could not get title", e); name = videoInfo.getObject("title").getArray("runs").getObject(0).getString("text");
} catch (Exception ignored) {}
} }
if (name != null && !name.isEmpty()) return name;
throw new ParsingException("Could not get name");
} }
@Override @Override
public long getDuration() throws ParsingException { public long getDuration() throws ParsingException {
try { try {
if (getStreamType() == StreamType.LIVE_STREAM) return -1; if (getStreamType() == StreamType.LIVE_STREAM) return -1;
return YoutubeParsingHelper.parseDurationString(videoInfo.getObject("lengthText").getString("simpleText"));
final Element duration = item.select("span[class*=\"video-time\"]").first();
// apparently on youtube, video-time element will not show up if the video has a duration of 00:00
// see: https://www.youtube.com/results?sp=EgIQAVAU&q=asdfgf
return duration == null ? 0 : YoutubeParsingHelper.parseDurationString(duration.text());
} catch (Exception e) { } catch (Exception e) {
throw new ParsingException("Could not get Duration: " + getUrl(), e); throw new ParsingException("Could not get duration", e);
} }
} }
@Override @Override
public String getUploaderName() throws ParsingException { public String getUploaderName() throws ParsingException {
String name = null;
try { try {
return item.select("div[class=\"yt-lockup-byline\"]").first() name = videoInfo.getObject("longBylineText").getArray("runs")
.select("a").first() .getObject(0).getString("text");
.text(); } catch (Exception ignored) {}
} catch (Exception e) { if (name == null) {
throw new ParsingException("Could not get uploader", e); try {
name = videoInfo.getObject("ownerText").getArray("runs")
.getObject(0).getString("text");
} catch (Exception ignored) {}
} }
if (name == null) {
try {
name = videoInfo.getObject("shortBylineText").getArray("runs")
.getObject(0).getString("text");
} catch (Exception ignored) {}
}
if (name != null && !name.isEmpty()) return name;
throw new ParsingException("Could not get uploader name");
} }
@Override @Override
public String getUploaderUrl() throws ParsingException { public String getUploaderUrl() throws ParsingException {
// this url is not always in the form "/channel/..."
// sometimes Youtube provides urls in the from "/user/..."
try { try {
String id = null;
try { try {
return item.select("div[class=\"yt-lockup-byline\"]").first() id = videoInfo.getObject("longBylineText").getArray("runs")
.select("a").first() .getObject(0).getObject("navigationEndpoint")
.attr("abs:href"); .getObject("browseEndpoint").getString("browseId");
} catch (Exception e){} } catch (Exception ignored) {}
if (id == null) {
// try this if the first didn't work try {
return item.select("span[class=\"title\"") id = videoInfo.getObject("ownerText").getArray("runs")
.text().split(" - ")[0]; .getObject(0).getObject("navigationEndpoint")
.getObject("browseEndpoint").getString("browseId");
} catch (Exception ignored) {}
}
if (id == null) {
try {
id = videoInfo.getObject("shortBylineText").getArray("runs")
.getObject(0).getObject("navigationEndpoint")
.getObject("browseEndpoint").getString("browseId");
} catch (Exception ignored) {}
}
if (id == null || id.isEmpty()) {
throw new IllegalArgumentException("is empty");
}
return YoutubeChannelLinkHandlerFactory.getInstance().getUrl(id);
} catch (Exception e) { } catch (Exception e) {
System.out.println(item.html()); throw new ParsingException("Could not get uploader url");
throw new ParsingException("Could not get uploader url", e);
} }
} }
@Nullable @Nullable
@Override @Override
public String getTextualUploadDate() throws ParsingException { public String getTextualUploadDate() {
if (getStreamType().equals(StreamType.LIVE_STREAM)) {
return null;
}
if (cachedUploadDate != null) {
return cachedUploadDate;
}
try { try {
if (isVideoReminder()) { return videoInfo.getObject("publishedTimeText").getString("simpleText");
final Calendar calendar = getDateFromReminder();
if (calendar != null) {
return cachedUploadDate = new SimpleDateFormat("yyyy-MM-dd HH:mm")
.format(calendar.getTime());
}
}
Element meta = item.select("div[class=\"yt-lockup-meta\"]").first();
if (meta == null) return "";
final Elements li = meta.select("li");
if (li.isEmpty()) return "";
return cachedUploadDate = li.first().text();
} catch (Exception e) { } catch (Exception e) {
throw new ParsingException("Could not get upload date", e); // upload date is not always available, e.g. in playlists
return null;
} }
} }
@Nullable @Nullable
@Override @Override
public DateWrapper getUploadDate() throws ParsingException { public DateWrapper getUploadDate() throws ParsingException {
if (getStreamType().equals(StreamType.LIVE_STREAM)) {
return null;
}
if (isVideoReminder()) {
return new DateWrapper(getDateFromReminder());
}
String textualUploadDate = getTextualUploadDate(); String textualUploadDate = getTextualUploadDate();
if (timeAgoParser != null && textualUploadDate != null && !textualUploadDate.isEmpty()) { if (timeAgoParser != null && textualUploadDate != null && !textualUploadDate.isEmpty()) {
return timeAgoParser.parse(textualUploadDate); try {
} else { return timeAgoParser.parse(textualUploadDate);
return null; } catch (ParsingException e) {
throw new ParsingException("Could not get upload date", e);
}
} }
return null;
} }
@Override @Override
public long getViewCount() throws ParsingException { public long getViewCount() throws ParsingException {
String input;
final Element spanViewCount = item.select("span.view-count").first();
if (spanViewCount != null) {
input = spanViewCount.text();
} else if (getStreamType().equals(StreamType.LIVE_STREAM)) {
Element meta = item.select("ul.yt-lockup-meta-info").first();
if (meta == null) return 0;
final Elements li = meta.select("li");
if (li.isEmpty()) return 0;
input = li.first().text();
} else {
try {
Element meta = item.select("div.yt-lockup-meta").first();
if (meta == null) return -1;
// This case can happen if google releases a special video
if (meta.select("li").size() < 2) return -1;
input = meta.select("li").get(1).text();
} catch (IndexOutOfBoundsException e) {
throw new ParsingException("Could not parse yt-lockup-meta although available: " + getUrl(), e);
}
}
if (input == null) {
throw new ParsingException("Input is null");
}
try { try {
if (videoInfo.getObject("topStandaloneBadge") != null || isPremium()) {
return Long.parseLong(Utils.removeNonDigitCharacters(input)); return -1;
} catch (NumberFormatException e) {
// if this happens the video probably has no views
if (!input.isEmpty()) {
return 0;
} }
String viewCount;
throw new ParsingException("Could not handle input: " + input, e); if (getStreamType() == StreamType.LIVE_STREAM) {
viewCount = videoInfo.getObject("viewCountText")
.getArray("runs").getObject(0).getString("text");
} else {
viewCount = videoInfo.getObject("viewCountText").getString("simpleText");
}
if (viewCount.equals("Recommended for you")) return -1;
return Long.parseLong(Utils.removeNonDigitCharacters(viewCount));
} catch (Exception e) {
throw new ParsingException("Could not get view count", e);
} }
} }
@Override @Override
public String getThumbnailUrl() throws ParsingException { public String getThumbnailUrl() throws ParsingException {
try { try {
String url; // TODO: Don't simply get the first item, but look at all thumbnails and their resolution
Element te = item.select("div[class=\"yt-thumb video-thumb\"]").first() return videoInfo.getObject("thumbnail").getArray("thumbnails")
.select("img").first(); .getObject(0).getString("url");
url = te.attr("abs:src");
// Sometimes youtube sends links to gif files which somehow seem to not exist
// anymore. Items with such gif also offer a secondary image source. So we are going
// to use that if we've caught such an item.
if (url.contains(".gif")) {
url = te.attr("abs:data-thumb");
}
return url;
} catch (Exception e) { } catch (Exception e) {
throw new ParsingException("Could not get thumbnail url", e); throw new ParsingException("Could not get thumbnail url", e);
} }
} }
private boolean isPremium() {
private boolean isVideoReminder() { try {
return !item.select("span.yt-uix-livereminder").isEmpty(); JsonArray badges = videoInfo.getArray("badges");
} for (Object badge : badges) {
if (((JsonObject) badge).getObject("metadataBadgeRenderer").getString("label").equals("Premium")) {
private Calendar getDateFromReminder() throws ParsingException { return true;
final Element timeFuture = item.select("span.yt-badge.localized-date").first(); }
if (timeFuture == null) {
throw new ParsingException("Span timeFuture is null");
}
final String timestamp = timeFuture.attr("data-timestamp");
if (!timestamp.isEmpty()) {
try {
final Calendar calendar = Calendar.getInstance();
calendar.setTime(new Date(Long.parseLong(timestamp) * 1000L));
return calendar;
} catch (Exception e) {
throw new ParsingException("Could not parse = \"" + timestamp + "\"");
} }
} } catch (Exception ignored) {}
return false;
throw new ParsingException("Could not parse date from reminder element: \"" + timeFuture + "\"");
}
/**
* Generic method that checks if the element contains any clues that it's a livestream item
*/
protected static boolean isLiveStream(Element item) {
return !item.select("span[class*=\"yt-badge-live\"]").isEmpty()
|| !item.select("span[class*=\"video-time-overlay-live\"]").isEmpty();
} }
} }

View File

@ -20,9 +20,9 @@ package org.schabi.newpipe.extractor.services.youtube.extractors;
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>. * along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
*/ */
import org.jsoup.nodes.Document; import com.grack.nanojson.JsonArray;
import org.jsoup.nodes.Element; import com.grack.nanojson.JsonObject;
import org.jsoup.select.Elements;
import org.schabi.newpipe.extractor.StreamingService; import org.schabi.newpipe.extractor.StreamingService;
import org.schabi.newpipe.extractor.downloader.Downloader; import org.schabi.newpipe.extractor.downloader.Downloader;
import org.schabi.newpipe.extractor.downloader.Response; import org.schabi.newpipe.extractor.downloader.Response;
@ -35,12 +35,12 @@ import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingH
import org.schabi.newpipe.extractor.stream.StreamInfoItem; import org.schabi.newpipe.extractor.stream.StreamInfoItem;
import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector; import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector;
import javax.annotation.Nonnull;
import java.io.IOException; import java.io.IOException;
public class YoutubeTrendingExtractor extends KioskExtractor<StreamInfoItem> { import javax.annotation.Nonnull;
private Document doc; public class YoutubeTrendingExtractor extends KioskExtractor<StreamInfoItem> {
private JsonObject initialData;
public YoutubeTrendingExtractor(StreamingService service, public YoutubeTrendingExtractor(StreamingService service,
ListLinkHandler linkHandler, ListLinkHandler linkHandler,
@ -54,7 +54,7 @@ public class YoutubeTrendingExtractor extends KioskExtractor<StreamInfoItem> {
"?gl=" + getExtractorContentCountry().getCountryCode(); "?gl=" + getExtractorContentCountry().getCountryCode();
final Response response = downloader.get(url, getExtractorLocalization()); final Response response = downloader.get(url, getExtractorLocalization());
doc = YoutubeParsingHelper.parseAndCheckPage(url, response); initialData = YoutubeParsingHelper.getInitialData(response.responseBody());
} }
@Override @Override
@ -70,99 +70,36 @@ public class YoutubeTrendingExtractor extends KioskExtractor<StreamInfoItem> {
@Nonnull @Nonnull
@Override @Override
public String getName() throws ParsingException { public String getName() throws ParsingException {
String name;
try { try {
Element a = doc.select("a[href*=\"/feed/trending\"]").first(); name = initialData.getObject("header").getObject("feedTabbedHeaderRenderer").getObject("title")
Element span = a.select("span[class*=\"display-name\"]").first(); .getArray("runs").getObject(0).getString("text");
Element nameSpan = span.select("span").first();
return nameSpan.text();
} catch (Exception e) { } catch (Exception e) {
throw new ParsingException("Could not get Trending name", e); throw new ParsingException("Could not get Trending name", e);
} }
if (name != null && !name.isEmpty()) {
return name;
}
throw new ParsingException("Could not get Trending name");
} }
@Nonnull @Nonnull
@Override @Override
public InfoItemsPage<StreamInfoItem> getInitialPage() throws ParsingException { public InfoItemsPage<StreamInfoItem> getInitialPage() {
StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId()); StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId());
Elements uls = doc.select("ul[class*=\"expanded-shelf-content-list\"]"); JsonArray firstPageElements = initialData.getObject("contents").getObject("twoColumnBrowseResultsRenderer")
.getArray("tabs").getObject(0).getObject("tabRenderer").getObject("content")
.getObject("sectionListRenderer").getArray("contents").getObject(0).getObject("itemSectionRenderer")
.getArray("contents").getObject(0).getObject("shelfRenderer").getObject("content")
.getObject("expandedShelfContentsRenderer").getArray("items");
final TimeAgoParser timeAgoParser = getTimeAgoParser(); final TimeAgoParser timeAgoParser = getTimeAgoParser();
for (Element ul : uls) { for (Object ul : firstPageElements) {
for (final Element li : ul.children()) { final JsonObject videoInfo = ((JsonObject) ul).getObject("videoRenderer");
final Element el = li.select("div[class*=\"yt-lockup-dismissable\"]").first(); collector.commit(new YoutubeStreamInfoItemExtractor(videoInfo, timeAgoParser));
collector.commit(new YoutubeStreamInfoItemExtractor(li, timeAgoParser) {
@Override
public String getUrl() throws ParsingException {
try {
Element dl = el.select("h3").first().select("a").first();
return dl.attr("abs:href");
} catch (Exception e) {
throw new ParsingException("Could not get web page url for the video", e);
}
}
@Override
public String getName() throws ParsingException {
try {
Element dl = el.select("h3").first().select("a").first();
return dl.text();
} catch (Exception e) {
throw new ParsingException("Could not get web page url for the video", e);
}
}
@Override
public String getUploaderUrl() throws ParsingException {
try {
String link = getUploaderLink().attr("abs:href");
if (link.isEmpty()) {
throw new IllegalArgumentException("is empty");
}
return link;
} catch (Exception e) {
throw new ParsingException("Could not get Uploader name");
}
}
private Element getUploaderLink() {
// this url is not always in the form "/channel/..."
// sometimes Youtube provides urls in the from "/user/..."
Element uploaderEl = el.select("div[class*=\"yt-lockup-byline \"]").first();
return uploaderEl.select("a").first();
}
@Override
public String getUploaderName() throws ParsingException {
try {
return getUploaderLink().text();
} catch (Exception e) {
throw new ParsingException("Could not get Uploader name");
}
}
@Override
public String getThumbnailUrl() throws ParsingException {
try {
String url;
Element te = li.select("span[class=\"yt-thumb-simple\"]").first()
.select("img").first();
url = te.attr("abs:src");
// Sometimes youtube sends links to gif files which somehow seem to not exist
// anymore. Items with such gif also offer a secondary image source. So we are going
// to use that if we've caught such an item.
if (url.contains(".gif")) {
url = te.attr("abs:data-thumb");
}
return url;
} catch (Exception e) {
throw new ParsingException("Could not get thumbnail url", e);
}
}
});
}
} }
return new InfoItemsPage<>(collector, getNextPageUrl()); return new InfoItemsPage<>(collector, getNextPageUrl());
} }
} }

View File

@ -1,11 +1,16 @@
package org.schabi.newpipe.extractor.services.youtube.linkHandler; package org.schabi.newpipe.extractor.services.youtube.linkHandler;
import com.grack.nanojson.JsonArray;
import com.grack.nanojson.JsonObject;
import com.grack.nanojson.JsonParser;
import com.grack.nanojson.JsonParserException;
import org.jsoup.Jsoup; import org.jsoup.Jsoup;
import org.jsoup.nodes.Document; import org.jsoup.nodes.Document;
import org.schabi.newpipe.extractor.downloader.Response; import org.schabi.newpipe.extractor.downloader.Response;
import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.exceptions.ReCaptchaException; import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
import org.schabi.newpipe.extractor.utils.Parser;
import java.net.URL; import java.net.URL;
import java.text.ParseException; import java.text.ParseException;
@ -38,6 +43,8 @@ public class YoutubeParsingHelper {
private YoutubeParsingHelper() { private YoutubeParsingHelper() {
} }
public static final String HARDCODED_CLIENT_VERSION = "2.20200214.04.00";
private static final String FEED_BASE_CHANNEL_ID = "https://www.youtube.com/feeds/videos.xml?channel_id="; private static final String FEED_BASE_CHANNEL_ID = "https://www.youtube.com/feeds/videos.xml?channel_id=";
private static final String FEED_BASE_USER = "https://www.youtube.com/feeds/videos.xml?user="; private static final String FEED_BASE_USER = "https://www.youtube.com/feeds/videos.xml?user=";
@ -143,4 +150,68 @@ public class YoutubeParsingHelper {
uploadDate.setTime(date); uploadDate.setTime(date);
return uploadDate; return uploadDate;
} }
public static JsonObject getInitialData(String html) throws ParsingException {
try {
String initialData = Parser.matchGroup1("window\\[\"ytInitialData\"\\]\\s*=\\s*(\\{.*?\\});", html);
return JsonParser.object().from(initialData);
} catch (JsonParserException | Parser.RegexException e) {
throw new ParsingException("Could not get ytInitialData", e);
}
}
/**
* Get the client version from a page
* @param initialData
* @param html The page HTML
* @return
* @throws ParsingException
*/
public static String getClientVersion(JsonObject initialData, String html) throws ParsingException {
if (initialData == null) initialData = getInitialData(html);
JsonArray serviceTrackingParams = initialData.getObject("responseContext").getArray("serviceTrackingParams");
String shortClientVersion = null;
// try to get version from initial data first
for (Object service : serviceTrackingParams) {
JsonObject s = (JsonObject) service;
if (s.getString("service").equals("CSI")) {
JsonArray params = s.getArray("params");
for (Object param: params) {
JsonObject p = (JsonObject) param;
String key = p.getString("key");
if (key != null && key.equals("cver")) {
return p.getString("value");
}
}
} else if (s.getString("service").equals("ECATCHER")) {
// fallback to get a shortened client version which does not contain the last do digits
JsonArray params = s.getArray("params");
for (Object param: params) {
JsonObject p = (JsonObject) param;
String key = p.getString("key");
if (key != null && key.equals("client.version")) {
shortClientVersion = p.getString("value");
}
}
}
}
String clientVersion;
String[] patterns = {
"INNERTUBE_CONTEXT_CLIENT_VERSION\":\"([0-9\\.]+?)\"",
"innertube_context_client_version\":\"([0-9\\.]+?)\"",
"client.version=([0-9\\.]+)"
};
for (String pattern: patterns) {
try {
clientVersion = Parser.matchGroup1(pattern, html);
if (clientVersion != null && !clientVersion.isEmpty()) return clientVersion;
} catch (Exception ignored) {}
}
if (shortClientVersion != null) return shortClientVersion;
throw new ParsingException("Could not get client version");
}
} }

View File

@ -24,13 +24,13 @@ public class YoutubeSearchQueryHandlerFactory extends SearchQueryHandlerFactory
public String getUrl(String searchString, List<String> contentFilters, String sortFilter) throws ParsingException { public String getUrl(String searchString, List<String> contentFilters, String sortFilter) throws ParsingException {
try { try {
final String url = "https://www.youtube.com/results" final String url = "https://www.youtube.com/results"
+ "?q=" + URLEncoder.encode(searchString, CHARSET_UTF_8); + "?search_query=" + URLEncoder.encode(searchString, CHARSET_UTF_8);
if (contentFilters.size() > 0) { if (contentFilters.size() > 0) {
switch (contentFilters.get(0)) { switch (contentFilters.get(0)) {
case VIDEOS: return url + "&sp=EgIQAVAU"; case VIDEOS: return url + "&sp=EgIQAQ%253D%253D";
case CHANNELS: return url + "&sp=EgIQAlAU"; case CHANNELS: return url + "&sp=EgIQAg%253D%253D";
case PLAYLISTS: return url + "&sp=EgIQA1AU"; case PLAYLISTS: return url + "&sp=EgIQAw%253D%253D";
case ALL: case ALL:
default: default:
} }

View File

@ -10,6 +10,9 @@ import java.util.List;
public class Utils { public class Utils {
public static final String HTTP = "http://";
public static final String HTTPS = "https://";
private Utils() { private Utils() {
//no instance //no instance
} }
@ -83,9 +86,6 @@ public class Utils {
} }
} }
private static final String HTTP = "http://";
private static final String HTTPS = "https://";
public static String replaceHttpWithHttps(final String url) { public static String replaceHttpWithHttps(final String url) {
if (url == null) return null; if (url == null) return null;

View File

@ -20,7 +20,7 @@ import java.util.Map;
public class DownloaderTestImpl extends Downloader { public class DownloaderTestImpl extends Downloader {
private static final String USER_AGENT = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0"; private static final String USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:68.0) Gecko/20100101 Firefox/68.0";
private static final String DEFAULT_HTTP_ACCEPT_LANGUAGE = "en"; private static final String DEFAULT_HTTP_ACCEPT_LANGUAGE = "en";
private static DownloaderTestImpl instance = null; private static DownloaderTestImpl instance = null;

View File

@ -170,7 +170,7 @@ public class YoutubeChannelExtractorTest {
@Test @Test
public void testDescription() throws Exception { public void testDescription() throws Exception {
assertTrue("What it actually was: " + extractor.getDescription(), assertTrue("What it actually was: " + extractor.getDescription(),
extractor.getDescription().contains("Our World is Amazing. Questions? Ideas? Tweet me:")); extractor.getDescription().contains("Our World is Amazing. \n\nQuestions? Ideas? Tweet me:"));
} }
@Test @Test

View File

@ -12,6 +12,8 @@ import org.schabi.newpipe.extractor.channel.ChannelInfoItem;
import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeSearchExtractor; import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeSearchExtractor;
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeSearchQueryHandlerFactory; import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeSearchQueryHandlerFactory;
import java.util.regex.Pattern;
import static java.util.Arrays.asList; import static java.util.Arrays.asList;
import static org.junit.Assert.*; import static org.junit.Assert.*;
import static org.schabi.newpipe.extractor.ServiceList.YouTube; import static org.schabi.newpipe.extractor.ServiceList.YouTube;
@ -51,7 +53,12 @@ public class YoutubeSearchExtractorChannelOnlyTest extends YoutubeSearchExtracto
@Test @Test
public void testGetSecondPageUrl() throws Exception { public void testGetSecondPageUrl() throws Exception {
assertEquals("https://www.youtube.com/results?q=pewdiepie&sp=EgIQAlAU&gl=GB&page=2", extractor.getNextPageUrl()); // check that ctoken, continuation and itct are longer than 5 characters
Pattern pattern = Pattern.compile(
"https:\\/\\/www.youtube.com\\/results\\?search_query=pewdiepie&sp=EgIQAg%253D%253D&gl=GB&pbj=1"
+ "&ctoken=[\\w%]{5,}?&continuation=[\\w%]{5,}?&itct=[\\w]{5,}?"
);
assertTrue(pattern.matcher(extractor.getNextPageUrl()).find());
} }
@Ignore @Ignore

View File

@ -28,13 +28,13 @@ public class YoutubeSearchQHTest {
@Test @Test
public void testWithContentfilter() throws Exception { public void testWithContentfilter() throws Exception {
assertEquals("https://www.youtube.com/results?q=asdf&sp=EgIQAVAU", YouTube.getSearchQHFactory() assertEquals("https://www.youtube.com/results?search_query=asdf&sp=EgIQAQ%253D%253D", YouTube.getSearchQHFactory()
.fromQuery("asdf", asList(new String[]{VIDEOS}), "").getUrl()); .fromQuery("asdf", asList(new String[]{VIDEOS}), "").getUrl());
assertEquals("https://www.youtube.com/results?q=asdf&sp=EgIQAlAU", YouTube.getSearchQHFactory() assertEquals("https://www.youtube.com/results?search_query=asdf&sp=EgIQAg%253D%253D", YouTube.getSearchQHFactory()
.fromQuery("asdf", asList(new String[]{CHANNELS}), "").getUrl()); .fromQuery("asdf", asList(new String[]{CHANNELS}), "").getUrl());
assertEquals("https://www.youtube.com/results?q=asdf&sp=EgIQA1AU", YouTube.getSearchQHFactory() assertEquals("https://www.youtube.com/results?search_query=asdf&sp=EgIQAw%253D%253D", YouTube.getSearchQHFactory()
.fromQuery("asdf", asList(new String[]{PLAYLISTS}), "").getUrl()); .fromQuery("asdf", asList(new String[]{PLAYLISTS}), "").getUrl());
assertEquals("https://www.youtube.com/results?q=asdf", YouTube.getSearchQHFactory() assertEquals("https://www.youtube.com/results?search_query=asdf", YouTube.getSearchQHFactory()
.fromQuery("asdf", asList(new String[]{"fjiijie"}), "").getUrl()); .fromQuery("asdf", asList(new String[]{"fjiijie"}), "").getUrl());
} }