mirror of
https://github.com/TeamNewPipe/NewPipeExtractor.git
synced 2025-04-29 00:10:35 +05:30
Merge pull request #261 from TeamNewPipe/yt_new
Update YouTube to material version
This commit is contained in:
commit
8838e2d136
@ -1,11 +1,11 @@
|
|||||||
package org.schabi.newpipe.extractor.services.youtube.extractors;
|
package org.schabi.newpipe.extractor.services.youtube.extractors;
|
||||||
|
|
||||||
|
import com.grack.nanojson.JsonArray;
|
||||||
import com.grack.nanojson.JsonObject;
|
import com.grack.nanojson.JsonObject;
|
||||||
import com.grack.nanojson.JsonParser;
|
import com.grack.nanojson.JsonParser;
|
||||||
import com.grack.nanojson.JsonParserException;
|
import com.grack.nanojson.JsonParserException;
|
||||||
import org.jsoup.Jsoup;
|
|
||||||
import org.jsoup.nodes.Document;
|
import org.jsoup.nodes.Document;
|
||||||
import org.jsoup.nodes.Element;
|
|
||||||
import org.schabi.newpipe.extractor.StreamingService;
|
import org.schabi.newpipe.extractor.StreamingService;
|
||||||
import org.schabi.newpipe.extractor.channel.ChannelExtractor;
|
import org.schabi.newpipe.extractor.channel.ChannelExtractor;
|
||||||
import org.schabi.newpipe.extractor.downloader.Downloader;
|
import org.schabi.newpipe.extractor.downloader.Downloader;
|
||||||
@ -17,11 +17,18 @@ import org.schabi.newpipe.extractor.localization.TimeAgoParser;
|
|||||||
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper;
|
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper;
|
||||||
import org.schabi.newpipe.extractor.stream.StreamInfoItem;
|
import org.schabi.newpipe.extractor.stream.StreamInfoItem;
|
||||||
import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector;
|
import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector;
|
||||||
import org.schabi.newpipe.extractor.utils.Parser;
|
|
||||||
import org.schabi.newpipe.extractor.utils.Utils;
|
import org.schabi.newpipe.extractor.utils.Utils;
|
||||||
|
|
||||||
import javax.annotation.Nonnull;
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import javax.annotation.Nonnull;
|
||||||
|
|
||||||
|
import static org.schabi.newpipe.extractor.utils.Utils.HTTP;
|
||||||
|
import static org.schabi.newpipe.extractor.utils.Utils.HTTPS;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Created by Christian Schabesberger on 25.07.16.
|
* Created by Christian Schabesberger on 25.07.16.
|
||||||
@ -49,6 +56,7 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
|
|||||||
private static final String CHANNEL_URL_PARAMETERS = "/videos?view=0&flow=list&sort=dd&live_view=10000";
|
private static final String CHANNEL_URL_PARAMETERS = "/videos?view=0&flow=list&sort=dd&live_view=10000";
|
||||||
|
|
||||||
private Document doc;
|
private Document doc;
|
||||||
|
private JsonObject initialData;
|
||||||
|
|
||||||
public YoutubeChannelExtractor(StreamingService service, ListLinkHandler linkHandler) {
|
public YoutubeChannelExtractor(StreamingService service, ListLinkHandler linkHandler) {
|
||||||
super(service, linkHandler);
|
super(service, linkHandler);
|
||||||
@ -59,11 +67,13 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
|
|||||||
String channelUrl = super.getUrl() + CHANNEL_URL_PARAMETERS;
|
String channelUrl = super.getUrl() + CHANNEL_URL_PARAMETERS;
|
||||||
final Response response = downloader.get(channelUrl, getExtractorLocalization());
|
final Response response = downloader.get(channelUrl, getExtractorLocalization());
|
||||||
doc = YoutubeParsingHelper.parseAndCheckPage(channelUrl, response);
|
doc = YoutubeParsingHelper.parseAndCheckPage(channelUrl, response);
|
||||||
|
initialData = YoutubeParsingHelper.getInitialData(response.responseBody());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getNextPageUrl() throws ExtractionException {
|
public String getNextPageUrl() throws ExtractionException {
|
||||||
return getNextPageUrlFrom(doc);
|
return getNextPageUrlFrom(getVideoTab().getObject("content").getObject("sectionListRenderer").getArray("continuations"));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Nonnull
|
@Nonnull
|
||||||
@ -80,15 +90,7 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
|
|||||||
@Override
|
@Override
|
||||||
public String getId() throws ParsingException {
|
public String getId() throws ParsingException {
|
||||||
try {
|
try {
|
||||||
return doc.select("meta[itemprop=\"channelId\"]").first().attr("content");
|
return initialData.getObject("header").getObject("c4TabbedHeaderRenderer").getString("channelId");
|
||||||
} catch (Exception ignored) {}
|
|
||||||
|
|
||||||
// fallback method; does not work with channels that have no "Subscribe" button (e.g. EminemVEVO)
|
|
||||||
try {
|
|
||||||
Element element = doc.getElementsByClass("yt-uix-subscription-button").first();
|
|
||||||
if (element == null) element = doc.getElementsByClass("yt-uix-subscription-preferences-button").first();
|
|
||||||
|
|
||||||
return element.attr("data-channel-external-id");
|
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
throw new ParsingException("Could not get channel id", e);
|
throw new ParsingException("Could not get channel id", e);
|
||||||
}
|
}
|
||||||
@ -98,7 +100,7 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
|
|||||||
@Override
|
@Override
|
||||||
public String getName() throws ParsingException {
|
public String getName() throws ParsingException {
|
||||||
try {
|
try {
|
||||||
return doc.select("meta[property=\"og:title\"]").first().attr("content");
|
return initialData.getObject("header").getObject("c4TabbedHeaderRenderer").getString("title");
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
throw new ParsingException("Could not get channel name", e);
|
throw new ParsingException("Could not get channel name", e);
|
||||||
}
|
}
|
||||||
@ -107,7 +109,8 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
|
|||||||
@Override
|
@Override
|
||||||
public String getAvatarUrl() throws ParsingException {
|
public String getAvatarUrl() throws ParsingException {
|
||||||
try {
|
try {
|
||||||
return doc.select("img[class=\"channel-header-profile-image\"]").first().attr("abs:src");
|
return initialData.getObject("header").getObject("c4TabbedHeaderRenderer").getObject("avatar")
|
||||||
|
.getArray("thumbnails").getObject(0).getString("url");
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
throw new ParsingException("Could not get avatar", e);
|
throw new ParsingException("Could not get avatar", e);
|
||||||
}
|
}
|
||||||
@ -116,13 +119,27 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
|
|||||||
@Override
|
@Override
|
||||||
public String getBannerUrl() throws ParsingException {
|
public String getBannerUrl() throws ParsingException {
|
||||||
try {
|
try {
|
||||||
Element el = doc.select("div[id=\"gh-banner\"]").first().select("style").first();
|
String url = null;
|
||||||
String cssContent = el.html();
|
try {
|
||||||
String url = "https:" + Parser.matchGroup1("url\\(([^)]+)\\)", cssContent);
|
url = initialData.getObject("header").getObject("c4TabbedHeaderRenderer").getObject("banner")
|
||||||
|
.getArray("thumbnails").getObject(0).getString("url");
|
||||||
|
} catch (Exception ignored) {}
|
||||||
|
if (url == null || url.contains("s.ytimg.com") || url.contains("default_banner")) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
// the first characters of the banner URLs are different for each channel and some are not even valid URLs
|
||||||
|
if (url.startsWith("//")) {
|
||||||
|
url = url.substring(2);
|
||||||
|
}
|
||||||
|
if (url.startsWith(HTTP)) {
|
||||||
|
url = Utils.replaceHttpWithHttps(url);
|
||||||
|
} else if (!url.startsWith(HTTPS)) {
|
||||||
|
url = HTTPS + url;
|
||||||
|
}
|
||||||
|
|
||||||
return url.contains("s.ytimg.com") || url.contains("default_banner") ? null : url;
|
return url;
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
throw new ParsingException("Could not get Banner", e);
|
throw new ParsingException("Could not get banner", e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -137,12 +154,10 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public long getSubscriberCount() throws ParsingException {
|
public long getSubscriberCount() throws ParsingException {
|
||||||
|
final JsonObject subscriberInfo = initialData.getObject("header").getObject("c4TabbedHeaderRenderer").getObject("subscriberCountText");
|
||||||
final Element el = doc.select("span[class*=\"yt-subscription-button-subscriber-count\"]").first();
|
if (subscriberInfo != null) {
|
||||||
if (el != null) {
|
|
||||||
String elTitle = el.attr("title");
|
|
||||||
try {
|
try {
|
||||||
return Utils.mixedNumberWordToLong(elTitle);
|
return Utils.mixedNumberWordToLong(subscriberInfo.getArray("runs").getObject(0).getString("text"));
|
||||||
} catch (NumberFormatException e) {
|
} catch (NumberFormatException e) {
|
||||||
throw new ParsingException("Could not get subscriber count", e);
|
throw new ParsingException("Could not get subscriber count", e);
|
||||||
}
|
}
|
||||||
@ -155,7 +170,7 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
|
|||||||
@Override
|
@Override
|
||||||
public String getDescription() throws ParsingException {
|
public String getDescription() throws ParsingException {
|
||||||
try {
|
try {
|
||||||
return doc.select("meta[name=\"description\"]").first().attr("content");
|
return initialData.getObject("metadata").getObject("channelMetadataRenderer").getString("description");
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
throw new ParsingException("Could not get channel description", e);
|
throw new ParsingException("Could not get channel description", e);
|
||||||
}
|
}
|
||||||
@ -165,8 +180,10 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
|
|||||||
@Override
|
@Override
|
||||||
public InfoItemsPage<StreamInfoItem> getInitialPage() throws ExtractionException {
|
public InfoItemsPage<StreamInfoItem> getInitialPage() throws ExtractionException {
|
||||||
StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId());
|
StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId());
|
||||||
Element ul = doc.select("ul[id=\"browse-items-primary\"]").first();
|
|
||||||
collectStreamsFrom(collector, ul);
|
JsonArray videos = getVideoTab().getObject("content").getObject("sectionListRenderer").getArray("contents");
|
||||||
|
collectStreamsFrom(collector, videos);
|
||||||
|
|
||||||
return new InfoItemsPage<>(collector, getNextPageUrl());
|
return new InfoItemsPage<>(collector, getNextPageUrl());
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -181,106 +198,98 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
|
|||||||
fetchPage();
|
fetchPage();
|
||||||
|
|
||||||
StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId());
|
StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId());
|
||||||
JsonObject ajaxJson;
|
JsonArray ajaxJson;
|
||||||
|
|
||||||
|
Map<String, List<String>> headers = new HashMap<>();
|
||||||
|
headers.put("X-YouTube-Client-Name", Collections.singletonList("1"));
|
||||||
try {
|
try {
|
||||||
final String response = getDownloader().get(pageUrl, getExtractorLocalization()).responseBody();
|
// Use the hardcoded client version first to get JSON with a structure we know
|
||||||
ajaxJson = JsonParser.object().from(response);
|
headers.put("X-YouTube-Client-Version",
|
||||||
} catch (JsonParserException pe) {
|
Collections.singletonList(YoutubeParsingHelper.HARDCODED_CLIENT_VERSION));
|
||||||
throw new ParsingException("Could not parse json data for next streams", pe);
|
final String response = getDownloader().get(pageUrl, headers, getExtractorLocalization()).responseBody();
|
||||||
|
if (response.length() < 50) { // ensure to have a valid response
|
||||||
|
throw new ParsingException("Could not parse json data for next streams");
|
||||||
|
}
|
||||||
|
ajaxJson = JsonParser.array().from(response);
|
||||||
|
} catch (Exception e) {
|
||||||
|
try {
|
||||||
|
headers.put("X-YouTube-Client-Version",
|
||||||
|
Collections.singletonList(YoutubeParsingHelper.getClientVersion(initialData, doc.toString())));
|
||||||
|
final String response = getDownloader().get(pageUrl, headers, getExtractorLocalization()).responseBody();
|
||||||
|
if (response.length() < 50) { // ensure to have a valid response
|
||||||
|
throw new ParsingException("Could not parse json data for next streams");
|
||||||
|
}
|
||||||
|
ajaxJson = JsonParser.array().from(response);
|
||||||
|
} catch (JsonParserException ignored) {
|
||||||
|
throw new ParsingException("Could not parse json data for next streams", e);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
final Document ajaxHtml = Jsoup.parse(ajaxJson.getString("content_html"), pageUrl);
|
JsonObject sectionListContinuation = ajaxJson.getObject(1).getObject("response")
|
||||||
collectStreamsFrom(collector, ajaxHtml.select("body").first());
|
.getObject("continuationContents").getObject("sectionListContinuation");
|
||||||
|
|
||||||
return new InfoItemsPage<>(collector, getNextPageUrlFromAjaxPage(ajaxJson, pageUrl));
|
collectStreamsFrom(collector, sectionListContinuation.getArray("contents"));
|
||||||
|
|
||||||
|
return new InfoItemsPage<>(collector, getNextPageUrlFrom(sectionListContinuation.getArray("continuations")));
|
||||||
}
|
}
|
||||||
|
|
||||||
private String getNextPageUrlFromAjaxPage(final JsonObject ajaxJson, final String pageUrl)
|
|
||||||
throws ParsingException {
|
private String getNextPageUrlFrom(JsonArray continuations) {
|
||||||
String loadMoreHtmlDataRaw = ajaxJson.getString("load_more_widget_html");
|
if (continuations == null) {
|
||||||
if (!loadMoreHtmlDataRaw.isEmpty()) {
|
|
||||||
return getNextPageUrlFrom(Jsoup.parse(loadMoreHtmlDataRaw, pageUrl));
|
|
||||||
} else {
|
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
JsonObject nextContinuationData = continuations.getObject(0).getObject("nextContinuationData");
|
||||||
|
String continuation = nextContinuationData.getString("continuation");
|
||||||
|
String clickTrackingParams = nextContinuationData.getString("clickTrackingParams");
|
||||||
|
return "https://www.youtube.com/browse_ajax?ctoken=" + continuation + "&continuation=" + continuation
|
||||||
|
+ "&itct=" + clickTrackingParams;
|
||||||
}
|
}
|
||||||
|
|
||||||
private String getNextPageUrlFrom(Document d) throws ParsingException {
|
private void collectStreamsFrom(StreamInfoItemsCollector collector, JsonArray videos) throws ParsingException {
|
||||||
try {
|
|
||||||
Element button = d.select("button[class*=\"yt-uix-load-more\"]").first();
|
|
||||||
if (button != null) {
|
|
||||||
return button.attr("abs:data-uix-load-more-href");
|
|
||||||
} else {
|
|
||||||
// Sometimes channels are simply so small, they don't have a more streams/videos
|
|
||||||
return "";
|
|
||||||
}
|
|
||||||
} catch (Exception e) {
|
|
||||||
throw new ParsingException("Could not get next page url", e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private void collectStreamsFrom(StreamInfoItemsCollector collector, Element element) throws ParsingException {
|
|
||||||
collector.reset();
|
collector.reset();
|
||||||
|
|
||||||
final String uploaderName = getName();
|
final String uploaderName = getName();
|
||||||
final String uploaderUrl = getUrl();
|
final String uploaderUrl = getUrl();
|
||||||
final TimeAgoParser timeAgoParser = getTimeAgoParser();
|
final TimeAgoParser timeAgoParser = getTimeAgoParser();
|
||||||
|
|
||||||
for (final Element li : element.children()) {
|
for (Object video : videos) {
|
||||||
if (li.select("div[class=\"feed-item-dismissable\"]").first() != null) {
|
JsonObject videoInfo = ((JsonObject) video).getObject("itemSectionRenderer")
|
||||||
collector.commit(new YoutubeStreamInfoItemExtractor(li, timeAgoParser) {
|
.getArray("contents").getObject(0);
|
||||||
|
if (videoInfo.getObject("videoRenderer") != null) {
|
||||||
|
collector.commit(new YoutubeStreamInfoItemExtractor(videoInfo.getObject("videoRenderer"), timeAgoParser) {
|
||||||
@Override
|
@Override
|
||||||
public String getUrl() throws ParsingException {
|
public String getUploaderName() {
|
||||||
try {
|
|
||||||
Element el = li.select("div[class=\"feed-item-dismissable\"]").first();
|
|
||||||
Element dl = el.select("h3").first().select("a").first();
|
|
||||||
return dl.attr("abs:href");
|
|
||||||
} catch (Exception e) {
|
|
||||||
throw new ParsingException("Could not get web page url for the video", e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String getName() throws ParsingException {
|
|
||||||
try {
|
|
||||||
Element el = li.select("div[class=\"feed-item-dismissable\"]").first();
|
|
||||||
Element dl = el.select("h3").first().select("a").first();
|
|
||||||
return dl.text();
|
|
||||||
} catch (Exception e) {
|
|
||||||
throw new ParsingException("Could not get title", e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String getUploaderName() throws ParsingException {
|
|
||||||
return uploaderName;
|
return uploaderName;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getUploaderUrl() throws ParsingException {
|
public String getUploaderUrl() {
|
||||||
return uploaderUrl;
|
return uploaderUrl;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public String getThumbnailUrl() throws ParsingException {
|
|
||||||
try {
|
|
||||||
String url;
|
|
||||||
Element te = li.select("span[class=\"yt-thumb-clip\"]").first()
|
|
||||||
.select("img").first();
|
|
||||||
url = te.attr("abs:src");
|
|
||||||
// Sometimes youtube sends links to gif files which somehow seem to not exist
|
|
||||||
// anymore. Items with such gif also offer a secondary image source. So we are going
|
|
||||||
// to use that if we've caught such an item.
|
|
||||||
if (url.contains(".gif")) {
|
|
||||||
url = te.attr("abs:data-thumb");
|
|
||||||
}
|
|
||||||
return url;
|
|
||||||
} catch (Exception e) {
|
|
||||||
throw new ParsingException("Could not get thumbnail url", e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private JsonObject getVideoTab() throws ParsingException {
|
||||||
|
JsonArray tabs = initialData.getObject("contents").getObject("twoColumnBrowseResultsRenderer")
|
||||||
|
.getArray("tabs");
|
||||||
|
JsonObject videoTab = null;
|
||||||
|
|
||||||
|
for (Object tab : tabs) {
|
||||||
|
if (((JsonObject) tab).getObject("tabRenderer") != null) {
|
||||||
|
if (((JsonObject) tab).getObject("tabRenderer").getString("title").equals("Videos")) {
|
||||||
|
videoTab = ((JsonObject) tab).getObject("tabRenderer");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (videoTab == null) {
|
||||||
|
throw new ParsingException("Could not find Videos tab");
|
||||||
|
}
|
||||||
|
|
||||||
|
return videoTab;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,12 +1,14 @@
|
|||||||
package org.schabi.newpipe.extractor.services.youtube.extractors;
|
package org.schabi.newpipe.extractor.services.youtube.extractors;
|
||||||
|
|
||||||
import org.jsoup.nodes.Element;
|
import com.grack.nanojson.JsonObject;
|
||||||
|
|
||||||
import org.schabi.newpipe.extractor.channel.ChannelInfoItemExtractor;
|
import org.schabi.newpipe.extractor.channel.ChannelInfoItemExtractor;
|
||||||
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||||
|
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeChannelLinkHandlerFactory;
|
||||||
import org.schabi.newpipe.extractor.utils.Utils;
|
import org.schabi.newpipe.extractor.utils.Utils;
|
||||||
|
|
||||||
import java.util.regex.Matcher;
|
import static org.schabi.newpipe.extractor.utils.Utils.HTTP;
|
||||||
import java.util.regex.Pattern;
|
import static org.schabi.newpipe.extractor.utils.Utils.HTTPS;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Created by Christian Schabesberger on 12.02.17.
|
* Created by Christian Schabesberger on 12.02.17.
|
||||||
@ -29,87 +31,75 @@ import java.util.regex.Pattern;
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
public class YoutubeChannelInfoItemExtractor implements ChannelInfoItemExtractor {
|
public class YoutubeChannelInfoItemExtractor implements ChannelInfoItemExtractor {
|
||||||
private final Element el;
|
private JsonObject channelInfoItem;
|
||||||
|
|
||||||
public YoutubeChannelInfoItemExtractor(Element el) {
|
public YoutubeChannelInfoItemExtractor(JsonObject channelInfoItem) {
|
||||||
this.el = el;
|
this.channelInfoItem = channelInfoItem;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getThumbnailUrl() throws ParsingException {
|
public String getThumbnailUrl() throws ParsingException {
|
||||||
Element img = el.select("span[class*=\"yt-thumb-simple\"]").first()
|
try {
|
||||||
.select("img").first();
|
String url = channelInfoItem.getObject("thumbnail").getArray("thumbnails").getObject(0).getString("url");
|
||||||
|
if (url.startsWith("//")) {
|
||||||
String url = img.attr("abs:src");
|
url = url.substring(2);
|
||||||
|
}
|
||||||
if (url.contains("gif")) {
|
if (url.startsWith(HTTP)) {
|
||||||
url = img.attr("abs:data-thumb");
|
url = Utils.replaceHttpWithHttps(url);
|
||||||
|
} else if (!url.startsWith(HTTPS)) {
|
||||||
|
url = HTTPS + url;
|
||||||
|
}
|
||||||
|
return url;
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new ParsingException("Could not get thumbnail url", e);
|
||||||
}
|
}
|
||||||
return url;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getName() throws ParsingException {
|
public String getName() throws ParsingException {
|
||||||
return el.select("a[class*=\"yt-uix-tile-link\"]").first()
|
try {
|
||||||
.text();
|
return channelInfoItem.getObject("title").getString("simpleText");
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new ParsingException("Could not get name", e);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getUrl() throws ParsingException {
|
public String getUrl() throws ParsingException {
|
||||||
try {
|
try {
|
||||||
String buttonTrackingUrl = el.select("button[class*=\"yt-uix-button\"]").first()
|
String id = "channel/" + channelInfoItem.getString("channelId"); // Does prepending 'channel/' always work?
|
||||||
.attr("abs:data-href");
|
return YoutubeChannelLinkHandlerFactory.getInstance().getUrl(id);
|
||||||
|
|
||||||
Pattern channelIdPattern = Pattern.compile("(?:.*?)\\%252Fchannel\\%252F([A-Za-z0-9\\-\\_]+)(?:.*)");
|
|
||||||
Matcher match = channelIdPattern.matcher(buttonTrackingUrl);
|
|
||||||
|
|
||||||
if (match.matches()) {
|
|
||||||
return YoutubeChannelExtractor.CHANNEL_URL_BASE + match.group(1);
|
|
||||||
}
|
|
||||||
} catch(Exception ignored) {}
|
|
||||||
|
|
||||||
// fallback method for channels without "Subscribe" button (or just in case yt changes things)
|
|
||||||
// provides an url with "/user/NAME", inconsistent with stream and channel extractor: tests will fail
|
|
||||||
try {
|
|
||||||
return el.select("a[class*=\"yt-uix-tile-link\"]").first()
|
|
||||||
.attr("abs:href");
|
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
throw new ParsingException("Could not get channel url", e);
|
throw new ParsingException("Could not get url", e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public long getSubscriberCount() throws ParsingException {
|
public long getSubscriberCount() throws ParsingException {
|
||||||
final Element subsEl = el.select("span[class*=\"yt-subscriber-count\"]").first();
|
try {
|
||||||
if (subsEl != null) {
|
String subscribers = channelInfoItem.getObject("subscriberCountText").getString("simpleText").split(" ")[0];
|
||||||
try {
|
return Utils.mixedNumberWordToLong(subscribers);
|
||||||
return Long.parseLong(Utils.removeNonDigitCharacters(subsEl.text()));
|
} catch (Exception e) {
|
||||||
} catch (NumberFormatException e) {
|
throw new ParsingException("Could not get subscriber count", e);
|
||||||
throw new ParsingException("Could not get subscriber count", e);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// If the element is null, the channel have the subscriber count disabled
|
|
||||||
return -1;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public long getStreamCount() throws ParsingException {
|
public long getStreamCount() throws ParsingException {
|
||||||
Element metaEl = el.select("ul[class*=\"yt-lockup-meta-info\"]").first();
|
try {
|
||||||
if (metaEl == null) {
|
return Long.parseLong(Utils.removeNonDigitCharacters(channelInfoItem.getObject("videoCountText")
|
||||||
return 0;
|
.getArray("runs").getObject(0).getString("text")));
|
||||||
} else {
|
} catch (Exception e) {
|
||||||
return Long.parseLong(Utils.removeNonDigitCharacters(metaEl.text()));
|
throw new ParsingException("Could not get stream count", e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getDescription() throws ParsingException {
|
public String getDescription() throws ParsingException {
|
||||||
Element desEl = el.select("div[class*=\"yt-lockup-description\"]").first();
|
try {
|
||||||
if (desEl == null) {
|
return channelInfoItem.getObject("descriptionSnippet").getArray("runs").getObject(0).getString("text");
|
||||||
return "";
|
} catch (Exception e) {
|
||||||
} else {
|
throw new ParsingException("Could not get description", e);
|
||||||
return desEl.text();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,34 +1,39 @@
|
|||||||
package org.schabi.newpipe.extractor.services.youtube.extractors;
|
package org.schabi.newpipe.extractor.services.youtube.extractors;
|
||||||
|
|
||||||
|
import com.grack.nanojson.JsonArray;
|
||||||
import com.grack.nanojson.JsonObject;
|
import com.grack.nanojson.JsonObject;
|
||||||
import com.grack.nanojson.JsonParser;
|
import com.grack.nanojson.JsonParser;
|
||||||
import com.grack.nanojson.JsonParserException;
|
import com.grack.nanojson.JsonParserException;
|
||||||
import org.jsoup.Jsoup;
|
|
||||||
import org.jsoup.nodes.Document;
|
import org.jsoup.nodes.Document;
|
||||||
import org.jsoup.nodes.Element;
|
|
||||||
import org.schabi.newpipe.extractor.StreamingService;
|
import org.schabi.newpipe.extractor.StreamingService;
|
||||||
import org.schabi.newpipe.extractor.downloader.Downloader;
|
import org.schabi.newpipe.extractor.downloader.Downloader;
|
||||||
import org.schabi.newpipe.extractor.downloader.Response;
|
import org.schabi.newpipe.extractor.downloader.Response;
|
||||||
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
|
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
|
||||||
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||||
import org.schabi.newpipe.extractor.linkhandler.LinkHandlerFactory;
|
|
||||||
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler;
|
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler;
|
||||||
import org.schabi.newpipe.extractor.localization.TimeAgoParser;
|
import org.schabi.newpipe.extractor.localization.TimeAgoParser;
|
||||||
import org.schabi.newpipe.extractor.playlist.PlaylistExtractor;
|
import org.schabi.newpipe.extractor.playlist.PlaylistExtractor;
|
||||||
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper;
|
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper;
|
||||||
import org.schabi.newpipe.extractor.stream.StreamInfoItem;
|
import org.schabi.newpipe.extractor.stream.StreamInfoItem;
|
||||||
import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector;
|
import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector;
|
||||||
import org.schabi.newpipe.extractor.stream.StreamType;
|
|
||||||
import org.schabi.newpipe.extractor.utils.Utils;
|
import org.schabi.newpipe.extractor.utils.Utils;
|
||||||
|
|
||||||
import javax.annotation.Nonnull;
|
|
||||||
import javax.annotation.Nullable;
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import javax.annotation.Nonnull;
|
||||||
|
|
||||||
@SuppressWarnings("WeakerAccess")
|
@SuppressWarnings("WeakerAccess")
|
||||||
public class YoutubePlaylistExtractor extends PlaylistExtractor {
|
public class YoutubePlaylistExtractor extends PlaylistExtractor {
|
||||||
|
|
||||||
private Document doc;
|
private Document doc;
|
||||||
|
private JsonObject initialData;
|
||||||
|
private JsonObject uploaderInfo;
|
||||||
|
private JsonObject playlistInfo;
|
||||||
|
|
||||||
public YoutubePlaylistExtractor(StreamingService service, ListLinkHandler linkHandler) {
|
public YoutubePlaylistExtractor(StreamingService service, ListLinkHandler linkHandler) {
|
||||||
super(service, linkHandler);
|
super(service, linkHandler);
|
||||||
@ -39,18 +44,61 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor {
|
|||||||
final String url = getUrl();
|
final String url = getUrl();
|
||||||
final Response response = downloader.get(url, getExtractorLocalization());
|
final Response response = downloader.get(url, getExtractorLocalization());
|
||||||
doc = YoutubeParsingHelper.parseAndCheckPage(url, response);
|
doc = YoutubeParsingHelper.parseAndCheckPage(url, response);
|
||||||
|
initialData = YoutubeParsingHelper.getInitialData(response.responseBody());
|
||||||
|
uploaderInfo = getUploaderInfo();
|
||||||
|
playlistInfo = getPlaylistInfo();
|
||||||
|
}
|
||||||
|
|
||||||
|
private JsonObject getUploaderInfo() throws ParsingException {
|
||||||
|
JsonArray items = initialData.getObject("sidebar").getObject("playlistSidebarRenderer").getArray("items");
|
||||||
|
try {
|
||||||
|
JsonObject uploaderInfo = items.getObject(1).getObject("playlistSidebarSecondaryInfoRenderer")
|
||||||
|
.getObject("videoOwner").getObject("videoOwnerRenderer");
|
||||||
|
if (uploaderInfo != null) {
|
||||||
|
return uploaderInfo;
|
||||||
|
}
|
||||||
|
} catch (Exception ignored) {}
|
||||||
|
|
||||||
|
// we might want to create a loop here instead of using duplicated code
|
||||||
|
try {
|
||||||
|
JsonObject uploaderInfo = items.getObject(items.size()).getObject("playlistSidebarSecondaryInfoRenderer")
|
||||||
|
.getObject("videoOwner").getObject("videoOwnerRenderer");
|
||||||
|
if (uploaderInfo != null) {
|
||||||
|
return uploaderInfo;
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new ParsingException("Could not get uploader info", e);
|
||||||
|
}
|
||||||
|
throw new ParsingException("Could not get uploader info");
|
||||||
|
}
|
||||||
|
|
||||||
|
private JsonObject getPlaylistInfo() throws ParsingException {
|
||||||
|
try {
|
||||||
|
return initialData.getObject("sidebar").getObject("playlistSidebarRenderer").getArray("items")
|
||||||
|
.getObject(0).getObject("playlistSidebarPrimaryInfoRenderer");
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new ParsingException("Could not get PlaylistInfo", e);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getNextPageUrl() throws ExtractionException {
|
public String getNextPageUrl() {
|
||||||
return getNextPageUrlFrom(doc);
|
return getNextPageUrlFrom(initialData.getObject("contents").getObject("twoColumnBrowseResultsRenderer")
|
||||||
|
.getArray("tabs").getObject(0).getObject("tabRenderer").getObject("content")
|
||||||
|
.getObject("sectionListRenderer").getArray("contents").getObject(0)
|
||||||
|
.getObject("itemSectionRenderer").getArray("contents").getObject(0)
|
||||||
|
.getObject("playlistVideoListRenderer").getArray("continuations"));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Nonnull
|
@Nonnull
|
||||||
@Override
|
@Override
|
||||||
public String getName() throws ParsingException {
|
public String getName() throws ParsingException {
|
||||||
try {
|
try {
|
||||||
return doc.select("div[id=pl-header] h1[class=pl-header-title]").first().text();
|
String name = playlistInfo.getObject("title").getArray("runs").getObject(0).getString("text");
|
||||||
|
if (name != null) return name;
|
||||||
|
} catch (Exception ignored) {}
|
||||||
|
try {
|
||||||
|
return initialData.getObject("microformat").getObject("microformatDataRenderer").getString("title");
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
throw new ParsingException("Could not get playlist name", e);
|
throw new ParsingException("Could not get playlist name", e);
|
||||||
}
|
}
|
||||||
@ -59,7 +107,12 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor {
|
|||||||
@Override
|
@Override
|
||||||
public String getThumbnailUrl() throws ParsingException {
|
public String getThumbnailUrl() throws ParsingException {
|
||||||
try {
|
try {
|
||||||
return doc.select("div[id=pl-header] div[class=pl-header-thumb] img").first().attr("abs:src");
|
return playlistInfo.getObject("thumbnailRenderer").getObject("playlistVideoThumbnailRenderer")
|
||||||
|
.getObject("thumbnail").getArray("thumbnails").getObject(0).getString("url");
|
||||||
|
} catch (Exception ignored) {}
|
||||||
|
try {
|
||||||
|
return initialData.getObject("microformat").getObject("microformatDataRenderer").getObject("thumbnail")
|
||||||
|
.getArray("thumbnails").getObject(0).getString("url");
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
throw new ParsingException("Could not get playlist thumbnail", e);
|
throw new ParsingException("Could not get playlist thumbnail", e);
|
||||||
}
|
}
|
||||||
@ -75,8 +128,7 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor {
|
|||||||
public String getUploaderUrl() throws ParsingException {
|
public String getUploaderUrl() throws ParsingException {
|
||||||
try {
|
try {
|
||||||
return YoutubeChannelExtractor.CHANNEL_URL_BASE +
|
return YoutubeChannelExtractor.CHANNEL_URL_BASE +
|
||||||
doc.select("button[class*=\"yt-uix-subscription-button\"]")
|
uploaderInfo.getObject("navigationEndpoint").getObject("browseEndpoint").getString("browseId");
|
||||||
.first().attr("data-channel-external-id");
|
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
throw new ParsingException("Could not get playlist uploader url", e);
|
throw new ParsingException("Could not get playlist uploader url", e);
|
||||||
}
|
}
|
||||||
@ -85,7 +137,7 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor {
|
|||||||
@Override
|
@Override
|
||||||
public String getUploaderName() throws ParsingException {
|
public String getUploaderName() throws ParsingException {
|
||||||
try {
|
try {
|
||||||
return doc.select("span[class=\"qualified-channel-title-text\"]").first().select("a").first().text();
|
return uploaderInfo.getObject("title").getArray("runs").getObject(0).getString("text");
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
throw new ParsingException("Could not get playlist uploader name", e);
|
throw new ParsingException("Could not get playlist uploader name", e);
|
||||||
}
|
}
|
||||||
@ -94,7 +146,7 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor {
|
|||||||
@Override
|
@Override
|
||||||
public String getUploaderAvatarUrl() throws ParsingException {
|
public String getUploaderAvatarUrl() throws ParsingException {
|
||||||
try {
|
try {
|
||||||
return doc.select("div[id=gh-banner] img[class=channel-header-profile-image]").first().attr("abs:src");
|
return uploaderInfo.getObject("thumbnail").getArray("thumbnails").getObject(0).getString("url");
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
throw new ParsingException("Could not get playlist uploader avatar", e);
|
throw new ParsingException("Could not get playlist uploader avatar", e);
|
||||||
}
|
}
|
||||||
@ -102,33 +154,26 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public long getStreamCount() throws ParsingException {
|
public long getStreamCount() throws ParsingException {
|
||||||
String input;
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
input = doc.select("ul[class=\"pl-header-details\"] li").get(1).text();
|
String viewsText = getPlaylistInfo().getArray("stats").getObject(0).getArray("runs").getObject(0).getString("text");
|
||||||
} catch (IndexOutOfBoundsException e) {
|
return Long.parseLong(Utils.removeNonDigitCharacters(viewsText));
|
||||||
|
} catch (Exception e) {
|
||||||
throw new ParsingException("Could not get video count from playlist", e);
|
throw new ParsingException("Could not get video count from playlist", e);
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
|
||||||
return Long.parseLong(Utils.removeNonDigitCharacters(input));
|
|
||||||
} catch (NumberFormatException e) {
|
|
||||||
// When there's no videos in a playlist, there's no number in the "innerHtml",
|
|
||||||
// all characters that is not a number is removed, so we try to parse a empty string
|
|
||||||
if (!input.isEmpty()) {
|
|
||||||
return 0;
|
|
||||||
} else {
|
|
||||||
throw new ParsingException("Could not handle input: " + input, e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Nonnull
|
@Nonnull
|
||||||
@Override
|
@Override
|
||||||
public InfoItemsPage<StreamInfoItem> getInitialPage() throws ExtractionException {
|
public InfoItemsPage<StreamInfoItem> getInitialPage() {
|
||||||
StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId());
|
StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId());
|
||||||
Element tbody = doc.select("tbody[id=\"pl-load-more-destination\"]").first();
|
|
||||||
collectStreamsFrom(collector, tbody);
|
JsonArray videos = initialData.getObject("contents").getObject("twoColumnBrowseResultsRenderer")
|
||||||
|
.getArray("tabs").getObject(0).getObject("tabRenderer").getObject("content")
|
||||||
|
.getObject("sectionListRenderer").getArray("contents").getObject(0)
|
||||||
|
.getObject("itemSectionRenderer").getArray("contents").getObject(0)
|
||||||
|
.getObject("playlistVideoListRenderer").getArray("contents");
|
||||||
|
|
||||||
|
collectStreamsFrom(collector, videos);
|
||||||
return new InfoItemsPage<>(collector, getNextPageUrl());
|
return new InfoItemsPage<>(collector, getNextPageUrl());
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -139,156 +184,67 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor {
|
|||||||
}
|
}
|
||||||
|
|
||||||
StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId());
|
StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId());
|
||||||
JsonObject pageJson;
|
JsonArray ajaxJson;
|
||||||
|
|
||||||
|
Map<String, List<String>> headers = new HashMap<>();
|
||||||
|
headers.put("X-YouTube-Client-Name", Collections.singletonList("1"));
|
||||||
try {
|
try {
|
||||||
final String responseBody = getDownloader().get(pageUrl, getExtractorLocalization()).responseBody();
|
// Use the hardcoded client version first to get JSON with a structure we know
|
||||||
pageJson = JsonParser.object().from(responseBody);
|
headers.put("X-YouTube-Client-Version",
|
||||||
} catch (JsonParserException pe) {
|
Collections.singletonList(YoutubeParsingHelper.HARDCODED_CLIENT_VERSION));
|
||||||
throw new ParsingException("Could not parse ajax json", pe);
|
final String response = getDownloader().get(pageUrl, headers, getExtractorLocalization()).responseBody();
|
||||||
|
if (response.length() < 50) { // ensure to have a valid response
|
||||||
|
throw new ParsingException("Could not parse json data for next streams");
|
||||||
|
}
|
||||||
|
ajaxJson = JsonParser.array().from(response);
|
||||||
|
} catch (Exception e) {
|
||||||
|
try {
|
||||||
|
headers.put("X-YouTube-Client-Version",
|
||||||
|
Collections.singletonList(YoutubeParsingHelper.getClientVersion(initialData, doc.toString())));
|
||||||
|
final String response = getDownloader().get(pageUrl, headers, getExtractorLocalization()).responseBody();
|
||||||
|
if (response.length() < 50) { // ensure to have a valid response
|
||||||
|
throw new ParsingException("Could not parse json data for next streams");
|
||||||
|
}
|
||||||
|
ajaxJson = JsonParser.array().from(response);
|
||||||
|
} catch (JsonParserException ignored) {
|
||||||
|
throw new ParsingException("Could not parse json data for next streams", e);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
final Document pageHtml = Jsoup.parse("<table><tbody id=\"pl-load-more-destination\">"
|
JsonObject sectionListContinuation = ajaxJson.getObject(1).getObject("response")
|
||||||
+ pageJson.getString("content_html")
|
.getObject("continuationContents").getObject("playlistVideoListContinuation");
|
||||||
+ "</tbody></table>", pageUrl);
|
|
||||||
|
|
||||||
collectStreamsFrom(collector, pageHtml.select("tbody[id=\"pl-load-more-destination\"]").first());
|
collectStreamsFrom(collector, sectionListContinuation.getArray("contents"));
|
||||||
|
|
||||||
return new InfoItemsPage<>(collector, getNextPageUrlFromAjax(pageJson, pageUrl));
|
return new InfoItemsPage<>(collector, getNextPageUrlFrom(sectionListContinuation.getArray("continuations")));
|
||||||
}
|
}
|
||||||
|
|
||||||
private String getNextPageUrlFromAjax(final JsonObject pageJson, final String pageUrl)
|
private String getNextPageUrlFrom(JsonArray continuations) {
|
||||||
throws ParsingException {
|
if (continuations == null) {
|
||||||
String nextPageHtml = pageJson.getString("load_more_widget_html");
|
|
||||||
if (!nextPageHtml.isEmpty()) {
|
|
||||||
return getNextPageUrlFrom(Jsoup.parse(nextPageHtml, pageUrl));
|
|
||||||
} else {
|
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
JsonObject nextContinuationData = continuations.getObject(0).getObject("nextContinuationData");
|
||||||
|
String continuation = nextContinuationData.getString("continuation");
|
||||||
|
String clickTrackingParams = nextContinuationData.getString("clickTrackingParams");
|
||||||
|
return "https://www.youtube.com/browse_ajax?ctoken=" + continuation + "&continuation=" + continuation
|
||||||
|
+ "&itct=" + clickTrackingParams;
|
||||||
}
|
}
|
||||||
|
|
||||||
private String getNextPageUrlFrom(Document d) throws ParsingException {
|
private void collectStreamsFrom(StreamInfoItemsCollector collector, JsonArray videos) {
|
||||||
try {
|
|
||||||
Element button = d.select("button[class*=\"yt-uix-load-more\"]").first();
|
|
||||||
if (button != null) {
|
|
||||||
return button.attr("abs:data-uix-load-more-href");
|
|
||||||
} else {
|
|
||||||
// Sometimes playlists are simply so small, they don't have a more streams/videos
|
|
||||||
return "";
|
|
||||||
}
|
|
||||||
} catch (Exception e) {
|
|
||||||
throw new ParsingException("could not get next streams' url", e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private void collectStreamsFrom(@Nonnull StreamInfoItemsCollector collector, @Nullable Element element) {
|
|
||||||
collector.reset();
|
collector.reset();
|
||||||
|
|
||||||
if (element == null) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
final LinkHandlerFactory streamLinkHandlerFactory = getService().getStreamLHFactory();
|
|
||||||
final TimeAgoParser timeAgoParser = getTimeAgoParser();
|
final TimeAgoParser timeAgoParser = getTimeAgoParser();
|
||||||
|
|
||||||
for (final Element li : element.children()) {
|
for (Object video : videos) {
|
||||||
if (isDeletedItem(li)) {
|
if (((JsonObject) video).getObject("playlistVideoRenderer") != null) {
|
||||||
continue;
|
collector.commit(new YoutubeStreamInfoItemExtractor(((JsonObject) video).getObject("playlistVideoRenderer"), timeAgoParser) {
|
||||||
|
@Override
|
||||||
|
public long getViewCount() {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
collector.commit(new YoutubeStreamInfoItemExtractor(li, timeAgoParser) {
|
|
||||||
public Element uploaderLink;
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean isAd() {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String getUrl() throws ParsingException {
|
|
||||||
try {
|
|
||||||
return streamLinkHandlerFactory.fromId(li.attr("data-video-id")).getUrl();
|
|
||||||
} catch (Exception e) {
|
|
||||||
throw new ParsingException("Could not get web page url for the video", e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String getName() throws ParsingException {
|
|
||||||
try {
|
|
||||||
return li.attr("data-title");
|
|
||||||
} catch (Exception e) {
|
|
||||||
throw new ParsingException("Could not get title", e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public long getDuration() throws ParsingException {
|
|
||||||
try {
|
|
||||||
if (getStreamType() == StreamType.LIVE_STREAM) return -1;
|
|
||||||
|
|
||||||
Element first = li.select("div[class=\"timestamp\"] span").first();
|
|
||||||
if (first == null) {
|
|
||||||
// Video unavailable (private, deleted, etc.), this is a thing that happens specifically with playlists,
|
|
||||||
// because in other cases, those videos don't even show up
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
return YoutubeParsingHelper.parseDurationString(first.text());
|
|
||||||
} catch (Exception e) {
|
|
||||||
throw new ParsingException("Could not get duration" + getUrl(), e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
private Element getUploaderLink() {
|
|
||||||
// should always be present since we filter deleted items
|
|
||||||
if (uploaderLink == null) {
|
|
||||||
uploaderLink = li.select("div[class=pl-video-owner] a").first();
|
|
||||||
}
|
|
||||||
return uploaderLink;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String getUploaderName() throws ParsingException {
|
|
||||||
return getUploaderLink().text();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String getUploaderUrl() throws ParsingException {
|
|
||||||
// this url is not always in the form "/channel/..."
|
|
||||||
// sometimes Youtube provides urls in the from "/user/..."
|
|
||||||
return getUploaderLink().attr("abs:href");
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String getTextualUploadDate() throws ParsingException {
|
|
||||||
return "";
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public long getViewCount() throws ParsingException {
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String getThumbnailUrl() throws ParsingException {
|
|
||||||
try {
|
|
||||||
return "https://i.ytimg.com/vi/" + streamLinkHandlerFactory.fromUrl(getUrl()).getId() + "/hqdefault.jpg";
|
|
||||||
} catch (Exception e) {
|
|
||||||
throw new ParsingException("Could not get thumbnail url", e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Check if the playlist item is deleted
|
|
||||||
*
|
|
||||||
* @param li the list item
|
|
||||||
* @return true if the item is deleted
|
|
||||||
*/
|
|
||||||
private boolean isDeletedItem(Element li) {
|
|
||||||
return li.select("div[class=pl-video-owner] a").isEmpty();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
@ -1,97 +1,63 @@
|
|||||||
package org.schabi.newpipe.extractor.services.youtube.extractors;
|
package org.schabi.newpipe.extractor.services.youtube.extractors;
|
||||||
|
|
||||||
import org.jsoup.nodes.Element;
|
import com.grack.nanojson.JsonObject;
|
||||||
|
|
||||||
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||||
import org.schabi.newpipe.extractor.playlist.PlaylistInfoItemExtractor;
|
import org.schabi.newpipe.extractor.playlist.PlaylistInfoItemExtractor;
|
||||||
|
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubePlaylistLinkHandlerFactory;
|
||||||
import org.schabi.newpipe.extractor.utils.Utils;
|
import org.schabi.newpipe.extractor.utils.Utils;
|
||||||
|
|
||||||
public class YoutubePlaylistInfoItemExtractor implements PlaylistInfoItemExtractor {
|
public class YoutubePlaylistInfoItemExtractor implements PlaylistInfoItemExtractor {
|
||||||
private final Element el;
|
private JsonObject playlistInfoItem;
|
||||||
|
|
||||||
public YoutubePlaylistInfoItemExtractor(Element el) {
|
public YoutubePlaylistInfoItemExtractor(JsonObject playlistInfoItem) {
|
||||||
this.el = el;
|
this.playlistInfoItem = playlistInfoItem;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getThumbnailUrl() throws ParsingException {
|
public String getThumbnailUrl() throws ParsingException {
|
||||||
String url;
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
Element te = el.select("div[class=\"yt-thumb video-thumb\"]").first()
|
return playlistInfoItem.getArray("thumbnails").getObject(0).getArray("thumbnails")
|
||||||
.select("img").first();
|
.getObject(0).getString("url");
|
||||||
url = te.attr("abs:src");
|
|
||||||
|
|
||||||
if (url.contains(".gif")) {
|
|
||||||
url = te.attr("abs:data-thumb");
|
|
||||||
}
|
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
throw new ParsingException("Failed to extract playlist thumbnail url", e);
|
throw new ParsingException("Could not get thumbnail url", e);
|
||||||
}
|
}
|
||||||
|
|
||||||
return url;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getName() throws ParsingException {
|
public String getName() throws ParsingException {
|
||||||
String name;
|
|
||||||
try {
|
try {
|
||||||
final Element title = el.select("[class=\"yt-lockup-title\"]").first()
|
return playlistInfoItem.getObject("title").getString("simpleText");
|
||||||
.select("a").first();
|
|
||||||
|
|
||||||
name = title == null ? "" : title.text();
|
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
throw new ParsingException("Failed to extract playlist name", e);
|
throw new ParsingException("Could not get name", e);
|
||||||
}
|
}
|
||||||
|
|
||||||
return name;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getUrl() throws ParsingException {
|
public String getUrl() throws ParsingException {
|
||||||
try {
|
try {
|
||||||
final Element a = el.select("div[class=\"yt-lockup-meta\"]")
|
String id = playlistInfoItem.getString("playlistId");
|
||||||
.select("ul[class=\"yt-lockup-meta-info\"]")
|
return YoutubePlaylistLinkHandlerFactory.getInstance().getUrl(id);
|
||||||
.select("li").select("a").first();
|
|
||||||
|
|
||||||
if (a != null) {
|
|
||||||
return a.attr("abs:href");
|
|
||||||
}
|
|
||||||
|
|
||||||
// this is for yt premium playlists
|
|
||||||
return el.select("h3[class=\"yt-lockup-title\"").first()
|
|
||||||
.select("a").first()
|
|
||||||
.attr("abs:href");
|
|
||||||
|
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
throw new ParsingException("Failed to extract playlist url", e);
|
throw new ParsingException("Could not get url", e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getUploaderName() throws ParsingException {
|
public String getUploaderName() throws ParsingException {
|
||||||
String name;
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
final Element div = el.select("div[class=\"yt-lockup-byline\"]").first()
|
return playlistInfoItem.getObject("longBylineText").getArray("runs").getObject(0).getString("text");
|
||||||
.select("a").first();
|
|
||||||
|
|
||||||
name = div.text();
|
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
throw new ParsingException("Failed to extract playlist uploader", e);
|
throw new ParsingException("Could not get uploader name", e);
|
||||||
}
|
}
|
||||||
|
|
||||||
return name;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public long getStreamCount() throws ParsingException {
|
public long getStreamCount() throws ParsingException {
|
||||||
try {
|
try {
|
||||||
final Element count = el.select("span[class=\"formatted-video-count-label\"]").first()
|
return Long.parseLong(Utils.removeNonDigitCharacters(playlistInfoItem.getString("videoCount")));
|
||||||
.select("b").first();
|
|
||||||
|
|
||||||
return count == null ? 0 : Long.parseLong(Utils.removeNonDigitCharacters(count.text()));
|
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
throw new ParsingException("Failed to extract playlist stream count", e);
|
throw new ParsingException("Could not get stream count", e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,8 +1,11 @@
|
|||||||
package org.schabi.newpipe.extractor.services.youtube.extractors;
|
package org.schabi.newpipe.extractor.services.youtube.extractors;
|
||||||
|
|
||||||
import org.jsoup.Jsoup;
|
import com.grack.nanojson.JsonArray;
|
||||||
|
import com.grack.nanojson.JsonObject;
|
||||||
|
import com.grack.nanojson.JsonParser;
|
||||||
|
import com.grack.nanojson.JsonParserException;
|
||||||
|
|
||||||
import org.jsoup.nodes.Document;
|
import org.jsoup.nodes.Document;
|
||||||
import org.jsoup.nodes.Element;
|
|
||||||
import org.schabi.newpipe.extractor.InfoItem;
|
import org.schabi.newpipe.extractor.InfoItem;
|
||||||
import org.schabi.newpipe.extractor.StreamingService;
|
import org.schabi.newpipe.extractor.StreamingService;
|
||||||
import org.schabi.newpipe.extractor.downloader.Downloader;
|
import org.schabi.newpipe.extractor.downloader.Downloader;
|
||||||
@ -14,13 +17,14 @@ import org.schabi.newpipe.extractor.localization.TimeAgoParser;
|
|||||||
import org.schabi.newpipe.extractor.search.InfoItemsSearchCollector;
|
import org.schabi.newpipe.extractor.search.InfoItemsSearchCollector;
|
||||||
import org.schabi.newpipe.extractor.search.SearchExtractor;
|
import org.schabi.newpipe.extractor.search.SearchExtractor;
|
||||||
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper;
|
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper;
|
||||||
import org.schabi.newpipe.extractor.utils.Parser;
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
import javax.annotation.Nonnull;
|
import javax.annotation.Nonnull;
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.UnsupportedEncodingException;
|
|
||||||
import java.net.MalformedURLException;
|
|
||||||
import java.net.URL;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Created by Christian Schabesberger on 22.07.2018
|
* Created by Christian Schabesberger on 22.07.2018
|
||||||
@ -45,6 +49,7 @@ import java.net.URL;
|
|||||||
public class YoutubeSearchExtractor extends SearchExtractor {
|
public class YoutubeSearchExtractor extends SearchExtractor {
|
||||||
|
|
||||||
private Document doc;
|
private Document doc;
|
||||||
|
private JsonObject initialData;
|
||||||
|
|
||||||
public YoutubeSearchExtractor(StreamingService service, SearchQueryHandler linkHandler) {
|
public YoutubeSearchExtractor(StreamingService service, SearchQueryHandler linkHandler) {
|
||||||
super(service, linkHandler);
|
super(service, linkHandler);
|
||||||
@ -55,6 +60,7 @@ public class YoutubeSearchExtractor extends SearchExtractor {
|
|||||||
final String url = getUrl();
|
final String url = getUrl();
|
||||||
final Response response = downloader.get(url, getExtractorLocalization());
|
final Response response = downloader.get(url, getExtractorLocalization());
|
||||||
doc = YoutubeParsingHelper.parseAndCheckPage(url, response);
|
doc = YoutubeParsingHelper.parseAndCheckPage(url, response);
|
||||||
|
initialData = YoutubeParsingHelper.getInitialData(response.responseBody());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Nonnull
|
@Nonnull
|
||||||
@ -65,80 +71,109 @@ public class YoutubeSearchExtractor extends SearchExtractor {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getSearchSuggestion() {
|
public String getSearchSuggestion() {
|
||||||
final Element el = doc.select("div[class*=\"spell-correction\"]").first();
|
JsonObject showingResultsForRenderer = initialData.getObject("contents")
|
||||||
if (el != null) {
|
.getObject("twoColumnSearchResultsRenderer").getObject("primaryContents")
|
||||||
return el.select("a").first().text();
|
.getObject("sectionListRenderer").getArray("contents").getObject(0)
|
||||||
} else {
|
.getObject("itemSectionRenderer").getArray("contents").getObject(0)
|
||||||
|
.getObject("showingResultsForRenderer");
|
||||||
|
if (showingResultsForRenderer == null) {
|
||||||
return "";
|
return "";
|
||||||
|
} else {
|
||||||
|
return showingResultsForRenderer.getObject("correctedQuery").getArray("runs")
|
||||||
|
.getObject(0).getString("text");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Nonnull
|
@Nonnull
|
||||||
@Override
|
@Override
|
||||||
public InfoItemsPage<InfoItem> getInitialPage() throws ExtractionException {
|
public InfoItemsPage<InfoItem> getInitialPage() throws ExtractionException {
|
||||||
return new InfoItemsPage<>(collectItems(doc), getNextPageUrl());
|
InfoItemsSearchCollector collector = getInfoItemSearchCollector();
|
||||||
|
JsonArray videos = initialData.getObject("contents").getObject("twoColumnSearchResultsRenderer")
|
||||||
|
.getObject("primaryContents").getObject("sectionListRenderer").getArray("contents")
|
||||||
|
.getObject(0).getObject("itemSectionRenderer").getArray("contents");
|
||||||
|
|
||||||
|
collectStreamsFrom(collector, videos);
|
||||||
|
return new InfoItemsPage<>(collector, getNextPageUrl());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getNextPageUrl() throws ExtractionException {
|
public String getNextPageUrl() throws ExtractionException {
|
||||||
return getUrl() + "&page=" + 2;
|
return getNextPageUrlFrom(initialData.getObject("contents").getObject("twoColumnSearchResultsRenderer")
|
||||||
|
.getObject("primaryContents").getObject("sectionListRenderer").getArray("contents")
|
||||||
|
.getObject(0).getObject("itemSectionRenderer").getArray("continuations"));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public InfoItemsPage<InfoItem> getPage(String pageUrl) throws IOException, ExtractionException {
|
public InfoItemsPage<InfoItem> getPage(String pageUrl) throws IOException, ExtractionException {
|
||||||
final String response = getDownloader().get(pageUrl, getExtractorLocalization()).responseBody();
|
if (pageUrl == null || pageUrl.isEmpty()) {
|
||||||
doc = Jsoup.parse(response, pageUrl);
|
throw new ExtractionException(new IllegalArgumentException("Page url is empty or null"));
|
||||||
|
}
|
||||||
|
|
||||||
return new InfoItemsPage<>(collectItems(doc), getNextPageUrlFromCurrentUrl(pageUrl));
|
|
||||||
}
|
|
||||||
|
|
||||||
private String getNextPageUrlFromCurrentUrl(String currentUrl)
|
|
||||||
throws MalformedURLException, UnsupportedEncodingException {
|
|
||||||
final int pageNr = Integer.parseInt(
|
|
||||||
Parser.compatParseMap(
|
|
||||||
new URL(currentUrl)
|
|
||||||
.getQuery())
|
|
||||||
.get("page"));
|
|
||||||
|
|
||||||
return currentUrl.replace("&page=" + pageNr,
|
|
||||||
"&page=" + Integer.toString(pageNr + 1));
|
|
||||||
}
|
|
||||||
|
|
||||||
private InfoItemsSearchCollector collectItems(Document doc) throws NothingFoundException {
|
|
||||||
InfoItemsSearchCollector collector = getInfoItemSearchCollector();
|
InfoItemsSearchCollector collector = getInfoItemSearchCollector();
|
||||||
collector.reset();
|
JsonArray ajaxJson;
|
||||||
|
|
||||||
Element list = doc.select("ol[class=\"item-section\"]").first();
|
Map<String, List<String>> headers = new HashMap<>();
|
||||||
final TimeAgoParser timeAgoParser = getTimeAgoParser();
|
headers.put("X-YouTube-Client-Name", Collections.singletonList("1"));
|
||||||
|
|
||||||
for (Element item : list.children()) {
|
try {
|
||||||
/* First we need to determine which kind of item we are working with.
|
// Use the hardcoded client version first to get JSON with a structure we know
|
||||||
Youtube depicts five different kinds of items on its search result page. These are
|
headers.put("X-YouTube-Client-Version",
|
||||||
regular videos, playlists, channels, two types of video suggestions, and a "no video
|
Collections.singletonList(YoutubeParsingHelper.HARDCODED_CLIENT_VERSION));
|
||||||
found" item. Since we only want videos, we need to filter out all the others.
|
final String response = getDownloader().get(pageUrl, headers, getExtractorLocalization()).responseBody();
|
||||||
An example for this can be seen here:
|
if (response.length() < 50) { // ensure to have a valid response
|
||||||
https://www.youtube.com/results?search_query=asdf&page=1
|
throw new ParsingException("Could not parse json data for next streams");
|
||||||
|
}
|
||||||
We already applied a filter to the url, so we don't need to care about channels and
|
ajaxJson = JsonParser.array().from(response);
|
||||||
playlists now.
|
} catch (Exception e) {
|
||||||
*/
|
try {
|
||||||
|
headers.put("X-YouTube-Client-Version",
|
||||||
Element el;
|
Collections.singletonList(YoutubeParsingHelper.getClientVersion(initialData, doc.toString())));
|
||||||
|
final String response = getDownloader().get(pageUrl, headers, getExtractorLocalization()).responseBody();
|
||||||
if ((el = item.select("div[class*=\"search-message\"]").first()) != null) {
|
if (response.length() < 50) { // ensure to have a valid response
|
||||||
throw new NothingFoundException(el.text());
|
throw new ParsingException("Could not parse json data for next streams");
|
||||||
|
}
|
||||||
// video item type
|
ajaxJson = JsonParser.array().from(response);
|
||||||
} else if ((el = item.select("div[class*=\"yt-lockup-video\"]").first()) != null) {
|
} catch (JsonParserException ignored) {
|
||||||
collector.commit(new YoutubeStreamInfoItemExtractor(el, timeAgoParser));
|
throw new ParsingException("Could not parse json data for next streams", e);
|
||||||
} else if ((el = item.select("div[class*=\"yt-lockup-channel\"]").first()) != null) {
|
|
||||||
collector.commit(new YoutubeChannelInfoItemExtractor(el));
|
|
||||||
} else if ((el = item.select("div[class*=\"yt-lockup-playlist\"]").first()) != null &&
|
|
||||||
item.select(".yt-pl-icon-mix").isEmpty()) {
|
|
||||||
collector.commit(new YoutubePlaylistInfoItemExtractor(el));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return collector;
|
JsonObject itemSectionRenderer = ajaxJson.getObject(1).getObject("response")
|
||||||
|
.getObject("continuationContents").getObject("itemSectionContinuation");
|
||||||
|
|
||||||
|
collectStreamsFrom(collector, itemSectionRenderer.getArray("contents"));
|
||||||
|
|
||||||
|
return new InfoItemsPage<>(collector, getNextPageUrlFrom(itemSectionRenderer.getArray("continuations")));
|
||||||
|
}
|
||||||
|
|
||||||
|
private void collectStreamsFrom(InfoItemsSearchCollector collector, JsonArray videos) throws NothingFoundException {
|
||||||
|
collector.reset();
|
||||||
|
|
||||||
|
final TimeAgoParser timeAgoParser = getTimeAgoParser();
|
||||||
|
|
||||||
|
for (Object item : videos) {
|
||||||
|
if (((JsonObject) item).getObject("backgroundPromoRenderer") != null) {
|
||||||
|
throw new NothingFoundException(((JsonObject) item).getObject("backgroundPromoRenderer")
|
||||||
|
.getObject("bodyText").getArray("runs").getObject(0).getString("text"));
|
||||||
|
} else if (((JsonObject) item).getObject("videoRenderer") != null) {
|
||||||
|
collector.commit(new YoutubeStreamInfoItemExtractor(((JsonObject) item).getObject("videoRenderer"), timeAgoParser));
|
||||||
|
} else if (((JsonObject) item).getObject("channelRenderer") != null) {
|
||||||
|
collector.commit(new YoutubeChannelInfoItemExtractor(((JsonObject) item).getObject("channelRenderer")));
|
||||||
|
} else if (((JsonObject) item).getObject("playlistRenderer") != null) {
|
||||||
|
collector.commit(new YoutubePlaylistInfoItemExtractor(((JsonObject) item).getObject("playlistRenderer")));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private String getNextPageUrlFrom(JsonArray continuations) throws ParsingException {
|
||||||
|
if (continuations == null) {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
JsonObject nextContinuationData = continuations.getObject(0).getObject("nextContinuationData");
|
||||||
|
String continuation = nextContinuationData.getString("continuation");
|
||||||
|
String clickTrackingParams = nextContinuationData.getString("clickTrackingParams");
|
||||||
|
return getUrl() + "&pbj=1&ctoken=" + continuation + "&continuation=" + continuation
|
||||||
|
+ "&itct=" + clickTrackingParams;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -3,11 +3,9 @@ package org.schabi.newpipe.extractor.services.youtube.extractors;
|
|||||||
import com.grack.nanojson.JsonArray;
|
import com.grack.nanojson.JsonArray;
|
||||||
import com.grack.nanojson.JsonObject;
|
import com.grack.nanojson.JsonObject;
|
||||||
import com.grack.nanojson.JsonParser;
|
import com.grack.nanojson.JsonParser;
|
||||||
import com.grack.nanojson.JsonParserException;
|
|
||||||
import org.jsoup.Jsoup;
|
|
||||||
import org.jsoup.nodes.Document;
|
import org.jsoup.nodes.Document;
|
||||||
import org.jsoup.nodes.Element;
|
import org.jsoup.nodes.Element;
|
||||||
import org.jsoup.select.Elements;
|
|
||||||
import org.mozilla.javascript.Context;
|
import org.mozilla.javascript.Context;
|
||||||
import org.mozilla.javascript.Function;
|
import org.mozilla.javascript.Function;
|
||||||
import org.mozilla.javascript.ScriptableObject;
|
import org.mozilla.javascript.ScriptableObject;
|
||||||
@ -15,7 +13,6 @@ import org.schabi.newpipe.extractor.MediaFormat;
|
|||||||
import org.schabi.newpipe.extractor.NewPipe;
|
import org.schabi.newpipe.extractor.NewPipe;
|
||||||
import org.schabi.newpipe.extractor.StreamingService;
|
import org.schabi.newpipe.extractor.StreamingService;
|
||||||
import org.schabi.newpipe.extractor.downloader.Downloader;
|
import org.schabi.newpipe.extractor.downloader.Downloader;
|
||||||
import org.schabi.newpipe.extractor.downloader.Request;
|
|
||||||
import org.schabi.newpipe.extractor.downloader.Response;
|
import org.schabi.newpipe.extractor.downloader.Response;
|
||||||
import org.schabi.newpipe.extractor.exceptions.ContentNotAvailableException;
|
import org.schabi.newpipe.extractor.exceptions.ContentNotAvailableException;
|
||||||
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
|
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
|
||||||
@ -23,23 +20,41 @@ import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
|||||||
import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
|
import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
|
||||||
import org.schabi.newpipe.extractor.linkhandler.LinkHandler;
|
import org.schabi.newpipe.extractor.linkhandler.LinkHandler;
|
||||||
import org.schabi.newpipe.extractor.localization.DateWrapper;
|
import org.schabi.newpipe.extractor.localization.DateWrapper;
|
||||||
|
import org.schabi.newpipe.extractor.localization.Localization;
|
||||||
import org.schabi.newpipe.extractor.localization.TimeAgoParser;
|
import org.schabi.newpipe.extractor.localization.TimeAgoParser;
|
||||||
|
import org.schabi.newpipe.extractor.localization.TimeAgoPatternsManager;
|
||||||
import org.schabi.newpipe.extractor.services.youtube.ItagItem;
|
import org.schabi.newpipe.extractor.services.youtube.ItagItem;
|
||||||
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper;
|
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper;
|
||||||
import org.schabi.newpipe.extractor.stream.*;
|
import org.schabi.newpipe.extractor.stream.AudioStream;
|
||||||
import org.schabi.newpipe.extractor.utils.JsonUtils;
|
import org.schabi.newpipe.extractor.stream.Description;
|
||||||
|
import org.schabi.newpipe.extractor.stream.Frameset;
|
||||||
|
import org.schabi.newpipe.extractor.stream.Stream;
|
||||||
|
import org.schabi.newpipe.extractor.stream.StreamExtractor;
|
||||||
|
import org.schabi.newpipe.extractor.stream.StreamInfoItem;
|
||||||
|
import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector;
|
||||||
|
import org.schabi.newpipe.extractor.stream.StreamType;
|
||||||
|
import org.schabi.newpipe.extractor.stream.SubtitlesStream;
|
||||||
|
import org.schabi.newpipe.extractor.stream.VideoStream;
|
||||||
import org.schabi.newpipe.extractor.utils.Parser;
|
import org.schabi.newpipe.extractor.utils.Parser;
|
||||||
import org.schabi.newpipe.extractor.utils.Utils;
|
import org.schabi.newpipe.extractor.utils.Utils;
|
||||||
|
|
||||||
import javax.annotation.Nonnull;
|
|
||||||
import javax.annotation.Nullable;
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.UnsupportedEncodingException;
|
import java.io.UnsupportedEncodingException;
|
||||||
import java.net.MalformedURLException;
|
import java.net.URLDecoder;
|
||||||
import java.net.URL;
|
import java.nio.charset.StandardCharsets;
|
||||||
import java.util.*;
|
import java.text.SimpleDateFormat;
|
||||||
import java.util.regex.Matcher;
|
import java.util.ArrayList;
|
||||||
import java.util.regex.Pattern;
|
import java.util.Calendar;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.Date;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.LinkedHashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Locale;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import javax.annotation.Nonnull;
|
||||||
|
import javax.annotation.Nullable;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Created by Christian Schabesberger on 06.08.15.
|
* Created by Christian Schabesberger on 06.08.15.
|
||||||
@ -62,8 +77,6 @@ import java.util.regex.Pattern;
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
public class YoutubeStreamExtractor extends StreamExtractor {
|
public class YoutubeStreamExtractor extends StreamExtractor {
|
||||||
private static final String TAG = YoutubeStreamExtractor.class.getSimpleName();
|
|
||||||
|
|
||||||
/*//////////////////////////////////////////////////////////////////////////
|
/*//////////////////////////////////////////////////////////////////////////
|
||||||
// Exceptions
|
// Exceptions
|
||||||
//////////////////////////////////////////////////////////////////////////*/
|
//////////////////////////////////////////////////////////////////////////*/
|
||||||
@ -74,12 +87,6 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public class SubtitlesException extends ContentNotAvailableException {
|
|
||||||
SubtitlesException(String message, Throwable cause) {
|
|
||||||
super(message, cause);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/*//////////////////////////////////////////////////////////////////////////*/
|
/*//////////////////////////////////////////////////////////////////////////*/
|
||||||
|
|
||||||
private Document doc;
|
private Document doc;
|
||||||
@ -88,6 +95,7 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
|||||||
@Nonnull
|
@Nonnull
|
||||||
private final Map<String, String> videoInfoPage = new HashMap<>();
|
private final Map<String, String> videoInfoPage = new HashMap<>();
|
||||||
private JsonObject playerResponse;
|
private JsonObject playerResponse;
|
||||||
|
private JsonObject initialData;
|
||||||
|
|
||||||
@Nonnull
|
@Nonnull
|
||||||
private List<SubtitlesInfo> subtitlesInfos = new ArrayList<>();
|
private List<SubtitlesInfo> subtitlesInfos = new ArrayList<>();
|
||||||
@ -106,22 +114,17 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
|||||||
@Override
|
@Override
|
||||||
public String getName() throws ParsingException {
|
public String getName() throws ParsingException {
|
||||||
assertPageFetched();
|
assertPageFetched();
|
||||||
|
String title = null;
|
||||||
try {
|
try {
|
||||||
return playerResponse.getObject("videoDetails").getString("title");
|
title = getVideoPrimaryInfoRenderer().getObject("title").getArray("runs").getObject(0).getString("text");
|
||||||
|
} catch (Exception ignored) {}
|
||||||
} catch (Exception e) {
|
if (title == null) {
|
||||||
// fallback HTML method
|
|
||||||
String name = null;
|
|
||||||
try {
|
try {
|
||||||
name = doc.select("meta[name=title]").attr(CONTENT);
|
title = playerResponse.getObject("videoDetails").getString("title");
|
||||||
} catch (Exception ignored) {
|
} catch (Exception ignored) {}
|
||||||
}
|
|
||||||
|
|
||||||
if (name == null) {
|
|
||||||
throw new ParsingException("Could not get name", e);
|
|
||||||
}
|
|
||||||
return name;
|
|
||||||
}
|
}
|
||||||
|
if (title != null) return title;
|
||||||
|
throw new ParsingException("Could not get name");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -131,19 +134,33 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
|||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
return playerResponse.getObject("microformat").getObject("playerMicroformatRenderer").getString("publishDate");
|
// return playerResponse.getObject("microformat").getObject("playerMicroformatRenderer").getString("publishDate");
|
||||||
} catch (Exception e) {
|
} catch (Exception ignored) {}
|
||||||
String uploadDate = null;
|
|
||||||
try {
|
|
||||||
uploadDate = doc.select("meta[itemprop=datePublished]").attr(CONTENT);
|
|
||||||
} catch (Exception ignored) {
|
|
||||||
}
|
|
||||||
|
|
||||||
if (uploadDate == null) {
|
try {
|
||||||
throw new ParsingException("Could not get upload date", e);
|
if (getVideoPrimaryInfoRenderer().getObject("dateText").getString("simpleText").startsWith("Premiered")) {
|
||||||
|
String time = getVideoPrimaryInfoRenderer().getObject("dateText").getString("simpleText").substring(10);
|
||||||
|
|
||||||
|
try { // Premiered 20 hours ago
|
||||||
|
TimeAgoParser timeAgoParser = TimeAgoPatternsManager.getTimeAgoParserFor(Localization.fromLocalizationCode("en"));
|
||||||
|
Calendar parsedTime = timeAgoParser.parse(time).date();
|
||||||
|
return new SimpleDateFormat("yyyy-MM-dd").format(parsedTime.getTime());
|
||||||
|
} catch (Exception ignored) {}
|
||||||
|
|
||||||
|
try { // Premiered Premiered Feb 21, 2020
|
||||||
|
Date d = new SimpleDateFormat("MMM dd, YYYY", Locale.ENGLISH).parse(time);
|
||||||
|
return new SimpleDateFormat("yyyy-MM-dd").format(d.getTime());
|
||||||
|
} catch (Exception ignored) {}
|
||||||
}
|
}
|
||||||
return uploadDate;
|
} catch (Exception ignored) {}
|
||||||
}
|
|
||||||
|
try {
|
||||||
|
// TODO this parses English formatted dates only, we need a better approach to parse the textual date
|
||||||
|
Date d = new SimpleDateFormat("dd MMM yyyy", Locale.ENGLISH).parse(
|
||||||
|
getVideoPrimaryInfoRenderer().getObject("dateText").getString("simpleText"));
|
||||||
|
return new SimpleDateFormat("yyyy-MM-dd").format(d);
|
||||||
|
} catch (Exception ignored) {}
|
||||||
|
throw new ParsingException("Could not get upload date");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -167,15 +184,7 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
|||||||
return thumbnails.getObject(thumbnails.size() - 1).getString("url");
|
return thumbnails.getObject(thumbnails.size() - 1).getString("url");
|
||||||
|
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
String url = null;
|
throw new ParsingException("Could not get thumbnail url");
|
||||||
try {
|
|
||||||
url = doc.select("link[itemprop=\"thumbnailUrl\"]").first().attr("abs:href");
|
|
||||||
} catch (Exception ignored) {}
|
|
||||||
|
|
||||||
if (url == null) {
|
|
||||||
throw new ParsingException("Could not get thumbnail url", e);
|
|
||||||
}
|
|
||||||
return url;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -184,88 +193,65 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
|||||||
@Override
|
@Override
|
||||||
public Description getDescription() throws ParsingException {
|
public Description getDescription() throws ParsingException {
|
||||||
assertPageFetched();
|
assertPageFetched();
|
||||||
|
// description with more info on links
|
||||||
try {
|
try {
|
||||||
// first try to get html-formatted description
|
boolean htmlConversionRequired = false;
|
||||||
return new Description(parseHtmlAndGetFullLinks(doc.select("p[id=\"eow-description\"]").first().html()), Description.HTML);
|
JsonArray descriptions = getVideoSecondaryInfoRenderer().getObject("description").getArray("runs");
|
||||||
} catch (Exception e) {
|
StringBuilder descriptionBuilder = new StringBuilder(descriptions.size());
|
||||||
try {
|
for (Object textObjectHolder : descriptions) {
|
||||||
// fallback to raw non-html description
|
JsonObject textHolder = (JsonObject) textObjectHolder;
|
||||||
return new Description(playerResponse.getObject("videoDetails").getString("shortDescription"), Description.PLAIN_TEXT);
|
String text = textHolder.getString("text");
|
||||||
} catch (Exception ignored) {
|
if (textHolder.getObject("navigationEndpoint") != null) {
|
||||||
throw new ParsingException("Could not get the description", e);
|
// The text is a link. Get the URL it points to and generate a HTML link of it
|
||||||
}
|
if (textHolder.getObject("navigationEndpoint").getObject("urlEndpoint") != null) {
|
||||||
}
|
String internUrl = textHolder.getObject("navigationEndpoint").getObject("urlEndpoint").getString("url");
|
||||||
}
|
if (internUrl.startsWith("/redirect?")) {
|
||||||
|
// q parameter can be the first parameter
|
||||||
// onclick="yt.www.watch.player.seekTo(0*3600+00*60+00);return false;"
|
internUrl = internUrl.substring(10);
|
||||||
// :00 is NOT recognized as a timestamp in description or comments.
|
String[] params = internUrl.split("&");
|
||||||
// 0:00 is recognized in both description and comments.
|
for (String param : params) {
|
||||||
// https://www.youtube.com/watch?v=4cccfDXu1vA
|
if (param.split("=")[0].equals("q")) {
|
||||||
private final static Pattern DESCRIPTION_TIMESTAMP_ONCLICK_REGEX = Pattern.compile(
|
String url = URLDecoder.decode(param.split("=")[1], StandardCharsets.UTF_8.name());
|
||||||
"seekTo\\("
|
if (url != null && !url.isEmpty()) {
|
||||||
+ "(?:(\\d+)\\*3600\\+)?" // hours?
|
descriptionBuilder.append("<a href=\"").append(url).append("\">").append(text).append("</a>");
|
||||||
+ "(\\d+)\\*60\\+" // minutes
|
htmlConversionRequired = true;
|
||||||
+ "(\\d+)" // seconds
|
} else {
|
||||||
+ "\\)");
|
descriptionBuilder.append(text);
|
||||||
|
}
|
||||||
@SafeVarargs
|
break;
|
||||||
private static <T> T coalesce(T... args) {
|
}
|
||||||
for (T arg : args) {
|
}
|
||||||
if (arg != null) return arg;
|
} else if (internUrl.startsWith("http")) {
|
||||||
}
|
descriptionBuilder.append("<a href=\"").append(internUrl).append("\">").append(text).append("</a>");
|
||||||
throw new IllegalArgumentException("all arguments to coalesce() were null");
|
htmlConversionRequired = true;
|
||||||
}
|
}
|
||||||
|
continue;
|
||||||
private String parseHtmlAndGetFullLinks(String descriptionHtml)
|
}
|
||||||
throws MalformedURLException, UnsupportedEncodingException, ParsingException {
|
continue;
|
||||||
final Document description = Jsoup.parse(descriptionHtml, getUrl());
|
}
|
||||||
for (Element a : description.select("a")) {
|
if (text != null) {
|
||||||
final String rawUrl = a.attr("abs:href");
|
descriptionBuilder.append(text);
|
||||||
final URL redirectLink = new URL(rawUrl);
|
|
||||||
|
|
||||||
final Matcher onClickTimestamp;
|
|
||||||
final String queryString;
|
|
||||||
if ((onClickTimestamp = DESCRIPTION_TIMESTAMP_ONCLICK_REGEX.matcher(a.attr("onclick")))
|
|
||||||
.find()) {
|
|
||||||
a.removeAttr("onclick");
|
|
||||||
|
|
||||||
String hours = coalesce(onClickTimestamp.group(1), "0");
|
|
||||||
String minutes = onClickTimestamp.group(2);
|
|
||||||
String seconds = onClickTimestamp.group(3);
|
|
||||||
|
|
||||||
int timestamp = 0;
|
|
||||||
timestamp += Integer.parseInt(hours) * 3600;
|
|
||||||
timestamp += Integer.parseInt(minutes) * 60;
|
|
||||||
timestamp += Integer.parseInt(seconds);
|
|
||||||
|
|
||||||
String setTimestamp = "&t=" + timestamp;
|
|
||||||
|
|
||||||
// Even after clicking https://youtu.be/...?t=6,
|
|
||||||
// getUrl() is https://www.youtube.com/watch?v=..., never youtu.be, never &t=.
|
|
||||||
a.attr("href", getUrl() + setTimestamp);
|
|
||||||
|
|
||||||
} else if ((queryString = redirectLink.getQuery()) != null) {
|
|
||||||
// if the query string is null we are not dealing with a redirect link,
|
|
||||||
// so we don't need to override it.
|
|
||||||
final String link =
|
|
||||||
Parser.compatParseMap(queryString).get("q");
|
|
||||||
|
|
||||||
if (link != null) {
|
|
||||||
// if link is null the a tag is a hashtag.
|
|
||||||
// They refer to the youtube search. We do not handle them.
|
|
||||||
a.text(link);
|
|
||||||
a.attr("href", link);
|
|
||||||
} else if (redirectLink.toString().contains("https://www.youtube.com/")) {
|
|
||||||
a.text(redirectLink.toString());
|
|
||||||
a.attr("href", redirectLink.toString());
|
|
||||||
}
|
}
|
||||||
} else if (redirectLink.toString().contains("https://www.youtube.com/")) {
|
|
||||||
descriptionHtml = descriptionHtml.replace(rawUrl, redirectLink.toString());
|
|
||||||
a.text(redirectLink.toString());
|
|
||||||
a.attr("href", redirectLink.toString());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
String description = descriptionBuilder.toString();
|
||||||
|
|
||||||
|
if (!description.isEmpty()) {
|
||||||
|
if (htmlConversionRequired) {
|
||||||
|
description = description.replaceAll("\\n", "<br>");
|
||||||
|
description = description.replaceAll(" ", " ");
|
||||||
|
return new Description(description, Description.HTML);
|
||||||
|
}
|
||||||
|
return new Description(description, Description.PLAIN_TEXT);
|
||||||
|
}
|
||||||
|
} catch (Exception ignored) { }
|
||||||
|
|
||||||
|
// raw non-html description
|
||||||
|
try {
|
||||||
|
return new Description(playerResponse.getObject("videoDetails").getString("shortDescription"), Description.PLAIN_TEXT);
|
||||||
|
} catch (Exception ignored) {
|
||||||
|
throw new ParsingException("Could not get description");
|
||||||
}
|
}
|
||||||
return description.select("body").first().html();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -318,68 +304,25 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
|||||||
@Override
|
@Override
|
||||||
public long getViewCount() throws ParsingException {
|
public long getViewCount() throws ParsingException {
|
||||||
assertPageFetched();
|
assertPageFetched();
|
||||||
|
String views = null;
|
||||||
try {
|
try {
|
||||||
if (getStreamType().equals(StreamType.LIVE_STREAM)) {
|
views = getVideoPrimaryInfoRenderer().getObject("viewCount")
|
||||||
return getLiveStreamWatchingCount();
|
.getObject("videoViewCountRenderer").getObject("viewCount")
|
||||||
} else {
|
.getArray("runs").getObject(0).getString("text");
|
||||||
return Long.parseLong(playerResponse.getObject("videoDetails").getString("viewCount"));
|
} catch (Exception ignored) {}
|
||||||
}
|
if (views == null) {
|
||||||
} catch (Exception e) {
|
|
||||||
try {
|
try {
|
||||||
return Long.parseLong(doc.select("meta[itemprop=interactionCount]").attr(CONTENT));
|
views = getVideoPrimaryInfoRenderer().getObject("viewCount")
|
||||||
} catch (Exception ignored) {
|
.getObject("videoViewCountRenderer").getObject("viewCount").getString("simpleText");
|
||||||
throw new ParsingException("Could not get view count", e);
|
} catch (Exception ignored) {}
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
if (views == null) {
|
||||||
|
try {
|
||||||
private long getLiveStreamWatchingCount() throws ExtractionException, IOException, JsonParserException {
|
views = playerResponse.getObject("videoDetails").getString("viewCount");
|
||||||
// https://www.youtube.com/youtubei/v1/updated_metadata?alt=json&key=
|
} catch (Exception ignored) {}
|
||||||
String innerTubeKey = null, clientVersion = null;
|
|
||||||
if (playerArgs != null && !playerArgs.isEmpty()) {
|
|
||||||
innerTubeKey = playerArgs.getString("innertube_api_key");
|
|
||||||
clientVersion = playerArgs.getString("innertube_context_client_version");
|
|
||||||
} else if (!videoInfoPage.isEmpty()) {
|
|
||||||
innerTubeKey = videoInfoPage.get("innertube_api_key");
|
|
||||||
clientVersion = videoInfoPage.get("innertube_context_client_version");
|
|
||||||
}
|
}
|
||||||
|
if (views != null) return Long.parseLong(Utils.removeNonDigitCharacters(views));
|
||||||
if (innerTubeKey == null || innerTubeKey.isEmpty()) {
|
throw new ParsingException("Could not get view count");
|
||||||
throw new ExtractionException("Couldn't get innerTube key");
|
|
||||||
}
|
|
||||||
|
|
||||||
if (clientVersion == null || clientVersion.isEmpty()) {
|
|
||||||
throw new ExtractionException("Couldn't get innerTube client version");
|
|
||||||
}
|
|
||||||
|
|
||||||
final String metadataUrl = "https://www.youtube.com/youtubei/v1/updated_metadata?alt=json&key=" + innerTubeKey;
|
|
||||||
final byte[] dataBody = ("{\"context\":{\"client\":{\"clientName\":1,\"clientVersion\":\"" + clientVersion + "\"}}" +
|
|
||||||
",\"videoId\":\"" + getId() + "\"}").getBytes("UTF-8");
|
|
||||||
final Response response = getDownloader().execute(Request.newBuilder()
|
|
||||||
.post(metadataUrl, dataBody)
|
|
||||||
.addHeader("Content-Type", "application/json")
|
|
||||||
.build());
|
|
||||||
final JsonObject jsonObject = JsonParser.object().from(response.responseBody());
|
|
||||||
|
|
||||||
for (Object actionEntry : jsonObject.getArray("actions")) {
|
|
||||||
if (!(actionEntry instanceof JsonObject)) continue;
|
|
||||||
final JsonObject entry = (JsonObject) actionEntry;
|
|
||||||
|
|
||||||
final JsonObject updateViewershipAction = entry.getObject("updateViewershipAction", null);
|
|
||||||
if (updateViewershipAction == null) continue;
|
|
||||||
|
|
||||||
final JsonArray viewCountRuns = JsonUtils.getArray(updateViewershipAction, "viewership.videoViewCountRenderer.viewCount.runs");
|
|
||||||
if (viewCountRuns.isEmpty()) continue;
|
|
||||||
|
|
||||||
final JsonObject textObject = viewCountRuns.getObject(0);
|
|
||||||
if (!textObject.has("text")) {
|
|
||||||
throw new ExtractionException("Response don't have \"text\" element");
|
|
||||||
}
|
|
||||||
|
|
||||||
return Long.parseLong(Utils.removeNonDigitCharacters(textObject.getString("text")));
|
|
||||||
}
|
|
||||||
|
|
||||||
throw new ExtractionException("Could not find correct results in response");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -387,9 +330,9 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
|||||||
assertPageFetched();
|
assertPageFetched();
|
||||||
String likesString = "";
|
String likesString = "";
|
||||||
try {
|
try {
|
||||||
Element button = doc.select("button.like-button-renderer-like-button").first();
|
|
||||||
try {
|
try {
|
||||||
likesString = button.select("span.yt-uix-button-content").first().text();
|
likesString = getVideoPrimaryInfoRenderer().getObject("sentimentBar")
|
||||||
|
.getObject("sentimentBarRenderer").getString("tooltip").split("/")[0];
|
||||||
} catch (NullPointerException e) {
|
} catch (NullPointerException e) {
|
||||||
//if this kicks in our button has no content and therefore ratings must be disabled
|
//if this kicks in our button has no content and therefore ratings must be disabled
|
||||||
if (playerResponse.getObject("videoDetails").getBoolean("allowRatings")) {
|
if (playerResponse.getObject("videoDetails").getBoolean("allowRatings")) {
|
||||||
@ -410,9 +353,9 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
|||||||
assertPageFetched();
|
assertPageFetched();
|
||||||
String dislikesString = "";
|
String dislikesString = "";
|
||||||
try {
|
try {
|
||||||
Element button = doc.select("button.like-button-renderer-dislike-button").first();
|
|
||||||
try {
|
try {
|
||||||
dislikesString = button.select("span.yt-uix-button-content").first().text();
|
dislikesString = getVideoPrimaryInfoRenderer().getObject("sentimentBar")
|
||||||
|
.getObject("sentimentBarRenderer").getString("tooltip").split("/")[1];
|
||||||
} catch (NullPointerException e) {
|
} catch (NullPointerException e) {
|
||||||
//if this kicks in our button has no content and therefore ratings must be disabled
|
//if this kicks in our button has no content and therefore ratings must be disabled
|
||||||
if (playerResponse.getObject("videoDetails").getBoolean("allowRatings")) {
|
if (playerResponse.getObject("videoDetails").getBoolean("allowRatings")) {
|
||||||
@ -432,40 +375,36 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
|||||||
@Override
|
@Override
|
||||||
public String getUploaderUrl() throws ParsingException {
|
public String getUploaderUrl() throws ParsingException {
|
||||||
assertPageFetched();
|
assertPageFetched();
|
||||||
|
String uploaderId = null;
|
||||||
try {
|
try {
|
||||||
return "https://www.youtube.com/channel/" +
|
uploaderId = getVideoSecondaryInfoRenderer().getObject("owner").getObject("videoOwnerRenderer")
|
||||||
playerResponse.getObject("videoDetails").getString("channelId");
|
.getObject("navigationEndpoint").getObject("browseEndpoint").getString("browseId");
|
||||||
} catch (Exception e) {
|
} catch (Exception ignored) {}
|
||||||
String uploaderUrl = null;
|
if (uploaderId == null) {
|
||||||
try {
|
try {
|
||||||
uploaderUrl = doc.select("div[class=\"yt-user-info\"]").first().children()
|
uploaderId = playerResponse.getObject("videoDetails").getString("channelId");
|
||||||
.select("a").first().attr("abs:href");
|
|
||||||
} catch (Exception ignored) {}
|
} catch (Exception ignored) {}
|
||||||
|
|
||||||
if (uploaderUrl == null) {
|
|
||||||
throw new ParsingException("Could not get channel link", e);
|
|
||||||
}
|
|
||||||
return uploaderUrl;
|
|
||||||
}
|
}
|
||||||
|
if (uploaderId != null) return "https://www.youtube.com/channel/" + uploaderId;
|
||||||
|
throw new ParsingException("Could not get uploader url");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Nonnull
|
@Nonnull
|
||||||
@Override
|
@Override
|
||||||
public String getUploaderName() throws ParsingException {
|
public String getUploaderName() throws ParsingException {
|
||||||
assertPageFetched();
|
assertPageFetched();
|
||||||
|
String uploaderName = null;
|
||||||
try {
|
try {
|
||||||
return playerResponse.getObject("videoDetails").getString("author");
|
uploaderName = getVideoSecondaryInfoRenderer().getObject("owner").getObject("videoOwnerRenderer")
|
||||||
} catch (Exception e) {
|
.getObject("title").getArray("runs").getObject(0).getString("text");
|
||||||
String name = null;
|
} catch (Exception ignored) {}
|
||||||
|
if (uploaderName == null) {
|
||||||
try {
|
try {
|
||||||
name = doc.select("div.yt-user-info").first().text();
|
uploaderName = playerResponse.getObject("videoDetails").getString("author");
|
||||||
} catch (Exception ignored) {}
|
} catch (Exception ignored) {}
|
||||||
|
|
||||||
if (name == null) {
|
|
||||||
throw new ParsingException("Could not get uploader name");
|
|
||||||
}
|
|
||||||
return name;
|
|
||||||
}
|
}
|
||||||
|
if (uploaderName != null) return uploaderName;
|
||||||
|
throw new ParsingException("Could not get uploader name");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Nonnull
|
@Nonnull
|
||||||
@ -475,12 +414,19 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
|||||||
|
|
||||||
String uploaderAvatarUrl = null;
|
String uploaderAvatarUrl = null;
|
||||||
try {
|
try {
|
||||||
uploaderAvatarUrl = doc.select("a[class*=\"yt-user-photo\"]").first()
|
uploaderAvatarUrl = initialData.getObject("contents").getObject("twoColumnWatchNextResults").getObject("secondaryResults")
|
||||||
.select("img").first()
|
.getObject("secondaryResults").getArray("results").getObject(0).getObject("compactAutoplayRenderer")
|
||||||
.attr("abs:data-thumb");
|
.getArray("contents").getObject(0).getObject("compactVideoRenderer").getObject("channelThumbnail")
|
||||||
} catch (Exception e) {//todo: add fallback method
|
.getArray("thumbnails").getObject(0).getString("url");
|
||||||
throw new ParsingException("Could not get uploader avatar url", e);
|
if (uploaderAvatarUrl != null && !uploaderAvatarUrl.isEmpty()) {
|
||||||
}
|
return uploaderAvatarUrl;
|
||||||
|
}
|
||||||
|
} catch (Exception ignored) {}
|
||||||
|
|
||||||
|
try {
|
||||||
|
uploaderAvatarUrl = getVideoSecondaryInfoRenderer().getObject("owner").getObject("videoOwnerRenderer")
|
||||||
|
.getObject("thumbnail").getArray("thumbnails").getObject(0).getString("url");
|
||||||
|
} catch (Exception ignored) {}
|
||||||
|
|
||||||
if (uploaderAvatarUrl == null) {
|
if (uploaderAvatarUrl == null) {
|
||||||
throw new ParsingException("Could not get uploader avatar url");
|
throw new ParsingException("Could not get uploader avatar url");
|
||||||
@ -594,13 +540,13 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
@Nonnull
|
@Nonnull
|
||||||
public List<SubtitlesStream> getSubtitlesDefault() throws IOException, ExtractionException {
|
public List<SubtitlesStream> getSubtitlesDefault() {
|
||||||
return getSubtitles(MediaFormat.TTML);
|
return getSubtitles(MediaFormat.TTML);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@Nonnull
|
@Nonnull
|
||||||
public List<SubtitlesStream> getSubtitles(final MediaFormat format) throws IOException, ExtractionException {
|
public List<SubtitlesStream> getSubtitles(final MediaFormat format) {
|
||||||
assertPageFetched();
|
assertPageFetched();
|
||||||
List<SubtitlesStream> subtitles = new ArrayList<>();
|
List<SubtitlesStream> subtitles = new ArrayList<>();
|
||||||
for (final SubtitlesInfo subtitlesInfo : subtitlesInfos) {
|
for (final SubtitlesInfo subtitlesInfo : subtitlesInfos) {
|
||||||
@ -624,18 +570,20 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public StreamInfoItem getNextStream() throws IOException, ExtractionException {
|
public StreamInfoItem getNextStream() throws ExtractionException {
|
||||||
assertPageFetched();
|
assertPageFetched();
|
||||||
|
if (isAgeRestricted) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
try {
|
try {
|
||||||
StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId());
|
final JsonObject videoInfo = initialData.getObject("contents").getObject("twoColumnWatchNextResults")
|
||||||
|
.getObject("secondaryResults").getObject("secondaryResults").getArray("results")
|
||||||
|
.getObject(0).getObject("compactAutoplayRenderer").getArray("contents")
|
||||||
|
.getObject(0).getObject("compactVideoRenderer");
|
||||||
final TimeAgoParser timeAgoParser = getTimeAgoParser();
|
final TimeAgoParser timeAgoParser = getTimeAgoParser();
|
||||||
|
StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId());
|
||||||
|
|
||||||
Elements watch = doc.select("div[class=\"watch-sidebar-section\"]");
|
collector.commit(new YoutubeStreamInfoItemExtractor(videoInfo, timeAgoParser));
|
||||||
if (watch.size() < 1) {
|
|
||||||
return null;// prevent the snackbar notification "report error" on age-restricted videos
|
|
||||||
}
|
|
||||||
|
|
||||||
collector.commit(extractVideoPreviewInfo(watch.first().select("li").first(), timeAgoParser));
|
|
||||||
return collector.getItems().get(0);
|
return collector.getItems().get(0);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
throw new ParsingException("Could not get next video", e);
|
throw new ParsingException("Could not get next video", e);
|
||||||
@ -643,20 +591,22 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public StreamInfoItemsCollector getRelatedStreams() throws IOException, ExtractionException {
|
public StreamInfoItemsCollector getRelatedStreams() throws ExtractionException {
|
||||||
assertPageFetched();
|
assertPageFetched();
|
||||||
|
if (isAgeRestricted) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
try {
|
try {
|
||||||
StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId());
|
StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId());
|
||||||
|
JsonArray results = initialData.getObject("contents").getObject("twoColumnWatchNextResults")
|
||||||
|
.getObject("secondaryResults").getObject("secondaryResults").getArray("results");
|
||||||
|
|
||||||
final TimeAgoParser timeAgoParser = getTimeAgoParser();
|
final TimeAgoParser timeAgoParser = getTimeAgoParser();
|
||||||
|
|
||||||
Element ul = doc.select("ul[id=\"watch-related\"]").first();
|
for (Object ul : results) {
|
||||||
if (ul != null) {
|
final JsonObject videoInfo = ((JsonObject) ul).getObject("compactVideoRenderer");
|
||||||
for (Element li : ul.children()) {
|
|
||||||
// first check if we have a playlist. If so leave them out
|
if (videoInfo != null) collector.commit(new YoutubeStreamInfoItemExtractor(videoInfo, timeAgoParser));
|
||||||
if (li.select("a[class*=\"content-link\"]").first() != null) {
|
|
||||||
collector.commit(extractVideoPreviewInfo(li, timeAgoParser));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return collector;
|
return collector;
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
@ -736,6 +686,7 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
|||||||
isAgeRestricted = false;
|
isAgeRestricted = false;
|
||||||
}
|
}
|
||||||
playerResponse = getPlayerResponse();
|
playerResponse = getPlayerResponse();
|
||||||
|
initialData = YoutubeParsingHelper.getInitialData(pageHtml);
|
||||||
|
|
||||||
if (decryptionCode.isEmpty()) {
|
if (decryptionCode.isEmpty()) {
|
||||||
decryptionCode = loadDecryptionCode(playerUrl);
|
decryptionCode = loadDecryptionCode(playerUrl);
|
||||||
@ -752,12 +703,10 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
|||||||
return JsonParser.object().from(ytPlayerConfigRaw);
|
return JsonParser.object().from(ytPlayerConfigRaw);
|
||||||
} catch (Parser.RegexException e) {
|
} catch (Parser.RegexException e) {
|
||||||
String errorReason = getErrorMessage();
|
String errorReason = getErrorMessage();
|
||||||
switch (errorReason) {
|
if (errorReason.isEmpty()) {
|
||||||
case "":
|
throw new ContentNotAvailableException("Content not available: player config empty", e);
|
||||||
throw new ContentNotAvailableException("Content not available: player config empty", e);
|
|
||||||
default:
|
|
||||||
throw new ContentNotAvailableException("Content not available", e);
|
|
||||||
}
|
}
|
||||||
|
throw new ContentNotAvailableException("Content not available", e);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
throw new ParsingException("Could not parse yt player config", e);
|
throw new ParsingException("Could not parse yt player config", e);
|
||||||
}
|
}
|
||||||
@ -912,7 +861,7 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Nonnull
|
@Nonnull
|
||||||
private List<SubtitlesInfo> getAvailableSubtitlesInfo() throws SubtitlesException {
|
private List<SubtitlesInfo> getAvailableSubtitlesInfo() {
|
||||||
// If the video is age restricted getPlayerConfig will fail
|
// If the video is age restricted getPlayerConfig will fail
|
||||||
if (isAgeRestricted) return Collections.emptyList();
|
if (isAgeRestricted) return Collections.emptyList();
|
||||||
|
|
||||||
@ -926,7 +875,7 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
|||||||
final JsonObject renderer = captions.getObject("playerCaptionsTracklistRenderer", new JsonObject());
|
final JsonObject renderer = captions.getObject("playerCaptionsTracklistRenderer", new JsonObject());
|
||||||
final JsonArray captionsArray = renderer.getArray("captionTracks", new JsonArray());
|
final JsonArray captionsArray = renderer.getArray("captionTracks", new JsonArray());
|
||||||
// todo: use this to apply auto translation to different language from a source language
|
// todo: use this to apply auto translation to different language from a source language
|
||||||
final JsonArray autoCaptionsArray = renderer.getArray("translationLanguages", new JsonArray());
|
// final JsonArray autoCaptionsArray = renderer.getArray("translationLanguages", new JsonArray());
|
||||||
|
|
||||||
// This check is necessary since there may be cases where subtitles metadata do not contain caption track info
|
// This check is necessary since there may be cases where subtitles metadata do not contain caption track info
|
||||||
// e.g. https://www.youtube.com/watch?v=-Vpwatutnko
|
// e.g. https://www.youtube.com/watch?v=-Vpwatutnko
|
||||||
@ -983,6 +932,44 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
|||||||
// Utils
|
// Utils
|
||||||
//////////////////////////////////////////////////////////////////////////*/
|
//////////////////////////////////////////////////////////////////////////*/
|
||||||
|
|
||||||
|
private JsonObject getVideoPrimaryInfoRenderer() throws ParsingException {
|
||||||
|
JsonArray contents = initialData.getObject("contents").getObject("twoColumnWatchNextResults")
|
||||||
|
.getObject("results").getObject("results").getArray("contents");
|
||||||
|
JsonObject videoPrimaryInfoRenderer = null;
|
||||||
|
|
||||||
|
for (Object content : contents) {
|
||||||
|
if (((JsonObject) content).getObject("videoPrimaryInfoRenderer") != null) {
|
||||||
|
videoPrimaryInfoRenderer = ((JsonObject) content).getObject("videoPrimaryInfoRenderer");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (videoPrimaryInfoRenderer == null) {
|
||||||
|
throw new ParsingException("Could not find videoPrimaryInfoRenderer");
|
||||||
|
}
|
||||||
|
|
||||||
|
return videoPrimaryInfoRenderer;
|
||||||
|
}
|
||||||
|
|
||||||
|
private JsonObject getVideoSecondaryInfoRenderer() throws ParsingException {
|
||||||
|
JsonArray contents = initialData.getObject("contents").getObject("twoColumnWatchNextResults")
|
||||||
|
.getObject("results").getObject("results").getArray("contents");
|
||||||
|
JsonObject videoSecondaryInfoRenderer = null;
|
||||||
|
|
||||||
|
for (Object content : contents) {
|
||||||
|
if (((JsonObject) content).getObject("videoSecondaryInfoRenderer") != null) {
|
||||||
|
videoSecondaryInfoRenderer = ((JsonObject) content).getObject("videoSecondaryInfoRenderer");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (videoSecondaryInfoRenderer == null) {
|
||||||
|
throw new ParsingException("Could not find videoSecondaryInfoRenderer");
|
||||||
|
}
|
||||||
|
|
||||||
|
return videoSecondaryInfoRenderer;
|
||||||
|
}
|
||||||
|
|
||||||
@Nonnull
|
@Nonnull
|
||||||
private static String getVideoInfoUrl(final String id, final String sts) {
|
private static String getVideoInfoUrl(final String id, final String sts) {
|
||||||
return "https://www.youtube.com/get_video_info?" + "video_id=" + id +
|
return "https://www.youtube.com/get_video_info?" + "video_id=" + id +
|
||||||
@ -1026,60 +1013,6 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
|||||||
return urlAndItags;
|
return urlAndItags;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Provides information about links to other videos on the video page, such as related videos.
|
|
||||||
* This is encapsulated in a StreamInfoItem object, which is a subset of the fields in a full StreamInfo.
|
|
||||||
*/
|
|
||||||
private StreamInfoItemExtractor extractVideoPreviewInfo(final Element li, final TimeAgoParser timeAgoParser) {
|
|
||||||
return new YoutubeStreamInfoItemExtractor(li, timeAgoParser) {
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String getUrl() throws ParsingException {
|
|
||||||
return li.select("a.content-link").first().attr("abs:href");
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String getName() throws ParsingException {
|
|
||||||
//todo: check NullPointerException causing
|
|
||||||
return li.select("span.title").first().text();
|
|
||||||
//this page causes the NullPointerException, after finding it by searching for "tjvg":
|
|
||||||
//https://www.youtube.com/watch?v=Uqg0aEhLFAg
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String getUploaderName() throws ParsingException {
|
|
||||||
return li.select("span[class*=\"attribution\"").first()
|
|
||||||
.select("span").first().text();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String getUploaderUrl() throws ParsingException {
|
|
||||||
return ""; // The uploader is not linked
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String getTextualUploadDate() throws ParsingException {
|
|
||||||
return "";
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String getThumbnailUrl() throws ParsingException {
|
|
||||||
Element img = li.select("img").first();
|
|
||||||
String thumbnailUrl = img.attr("abs:src");
|
|
||||||
// Sometimes youtube sends links to gif files which somehow seem to not exist
|
|
||||||
// anymore. Items with such gif also offer a secondary image source. So we are going
|
|
||||||
// to use that if we caught such an item.
|
|
||||||
if (thumbnailUrl.contains(".gif")) {
|
|
||||||
thumbnailUrl = img.attr("data-thumb");
|
|
||||||
}
|
|
||||||
if (thumbnailUrl.startsWith("//")) {
|
|
||||||
thumbnailUrl = HTTPS + thumbnailUrl;
|
|
||||||
}
|
|
||||||
return thumbnailUrl;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
@Nonnull
|
@Nonnull
|
||||||
@Override
|
@Override
|
||||||
public List<Frameset> getFrames() throws ExtractionException {
|
public List<Frameset> getFrames() throws ExtractionException {
|
||||||
@ -1137,40 +1070,44 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Nonnull
|
||||||
@Override
|
@Override
|
||||||
public String getHost() throws ParsingException {
|
public String getHost() {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Nonnull
|
||||||
|
@Override
|
||||||
|
public String getPrivacy() {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Nonnull
|
||||||
|
@Override
|
||||||
|
public String getCategory() {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Nonnull
|
||||||
|
@Override
|
||||||
|
public String getLicence() {
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getPrivacy() throws ParsingException {
|
public Locale getLanguageInfo() {
|
||||||
return "";
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String getCategory() throws ParsingException {
|
|
||||||
return "";
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String getLicence() throws ParsingException {
|
|
||||||
return "";
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Locale getLanguageInfo() throws ParsingException {
|
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Nonnull
|
@Nonnull
|
||||||
@Override
|
@Override
|
||||||
public List<String> getTags() throws ParsingException {
|
public List<String> getTags() {
|
||||||
return new ArrayList<>();
|
return new ArrayList<>();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Nonnull
|
@Nonnull
|
||||||
@Override
|
@Override
|
||||||
public String getSupportInfo() throws ParsingException {
|
public String getSupportInfo() {
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,19 +1,19 @@
|
|||||||
package org.schabi.newpipe.extractor.services.youtube.extractors;
|
package org.schabi.newpipe.extractor.services.youtube.extractors;
|
||||||
|
|
||||||
import org.jsoup.nodes.Element;
|
import com.grack.nanojson.JsonArray;
|
||||||
import org.jsoup.select.Elements;
|
import com.grack.nanojson.JsonObject;
|
||||||
|
|
||||||
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||||
import org.schabi.newpipe.extractor.localization.DateWrapper;
|
import org.schabi.newpipe.extractor.localization.DateWrapper;
|
||||||
import org.schabi.newpipe.extractor.localization.TimeAgoParser;
|
import org.schabi.newpipe.extractor.localization.TimeAgoParser;
|
||||||
|
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeChannelLinkHandlerFactory;
|
||||||
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper;
|
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper;
|
||||||
|
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeStreamLinkHandlerFactory;
|
||||||
import org.schabi.newpipe.extractor.stream.StreamInfoItemExtractor;
|
import org.schabi.newpipe.extractor.stream.StreamInfoItemExtractor;
|
||||||
import org.schabi.newpipe.extractor.stream.StreamType;
|
import org.schabi.newpipe.extractor.stream.StreamType;
|
||||||
import org.schabi.newpipe.extractor.utils.Utils;
|
import org.schabi.newpipe.extractor.utils.Utils;
|
||||||
|
|
||||||
import javax.annotation.Nullable;
|
import javax.annotation.Nullable;
|
||||||
import java.text.SimpleDateFormat;
|
|
||||||
import java.util.Calendar;
|
|
||||||
import java.util.Date;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org>
|
* Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org>
|
||||||
@ -35,263 +35,190 @@ import java.util.Date;
|
|||||||
|
|
||||||
public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor {
|
public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor {
|
||||||
|
|
||||||
private final Element item;
|
private JsonObject videoInfo;
|
||||||
private final TimeAgoParser timeAgoParser;
|
private final TimeAgoParser timeAgoParser;
|
||||||
|
|
||||||
private String cachedUploadDate;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates an extractor of StreamInfoItems from a YouTube page.
|
* Creates an extractor of StreamInfoItems from a YouTube page.
|
||||||
*
|
*
|
||||||
* @param item The page element
|
* @param videoInfoItem The JSON page element
|
||||||
* @param timeAgoParser A parser of the textual dates or {@code null}.
|
* @param timeAgoParser A parser of the textual dates or {@code null}.
|
||||||
*/
|
*/
|
||||||
public YoutubeStreamInfoItemExtractor(Element item, @Nullable TimeAgoParser timeAgoParser) {
|
public YoutubeStreamInfoItemExtractor(JsonObject videoInfoItem, @Nullable TimeAgoParser timeAgoParser) {
|
||||||
this.item = item;
|
this.videoInfo = videoInfoItem;
|
||||||
this.timeAgoParser = timeAgoParser;
|
this.timeAgoParser = timeAgoParser;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public StreamType getStreamType() throws ParsingException {
|
public StreamType getStreamType() {
|
||||||
if (isLiveStream(item)) {
|
try {
|
||||||
return StreamType.LIVE_STREAM;
|
if (videoInfo.getArray("badges").getObject(0).getObject("metadataBadgeRenderer").getString("label").equals("LIVE NOW")) {
|
||||||
} else {
|
return StreamType.LIVE_STREAM;
|
||||||
return StreamType.VIDEO_STREAM;
|
}
|
||||||
}
|
} catch (Exception ignored) {}
|
||||||
|
return StreamType.VIDEO_STREAM;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean isAd() throws ParsingException {
|
public boolean isAd() throws ParsingException {
|
||||||
return !item.select("span[class*=\"icon-not-available\"]").isEmpty()
|
return isPremium() || getName().equals("[Private video]") || getName().equals("[Deleted video]");
|
||||||
|| !item.select("span[class*=\"yt-badge-ad\"]").isEmpty()
|
|
||||||
|| isPremiumVideo();
|
|
||||||
}
|
|
||||||
|
|
||||||
private boolean isPremiumVideo() {
|
|
||||||
Element premiumSpan = item.select("span[class=\"standalone-collection-badge-renderer-red-text\"]").first();
|
|
||||||
if (premiumSpan == null) return false;
|
|
||||||
|
|
||||||
// if this span has text it most likely says ("Free Video") so we can play this
|
|
||||||
if (premiumSpan.hasText()) return false;
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getUrl() throws ParsingException {
|
public String getUrl() throws ParsingException {
|
||||||
try {
|
try {
|
||||||
Element el = item.select("div[class*=\"yt-lockup-video\"]").first();
|
String videoId = videoInfo.getString("videoId");
|
||||||
Element dl = el.select("h3").first().select("a").first();
|
return YoutubeStreamLinkHandlerFactory.getInstance().getUrl(videoId);
|
||||||
return dl.attr("abs:href");
|
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
throw new ParsingException("Could not get web page url for the video", e);
|
throw new ParsingException("Could not get url", e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getName() throws ParsingException {
|
public String getName() throws ParsingException {
|
||||||
|
String name = null;
|
||||||
try {
|
try {
|
||||||
Element el = item.select("div[class*=\"yt-lockup-video\"]").first();
|
name = videoInfo.getObject("title").getString("simpleText");
|
||||||
Element dl = el.select("h3").first().select("a").first();
|
} catch (Exception ignored) {}
|
||||||
return dl.text();
|
if (name == null) {
|
||||||
} catch (Exception e) {
|
try {
|
||||||
throw new ParsingException("Could not get title", e);
|
name = videoInfo.getObject("title").getArray("runs").getObject(0).getString("text");
|
||||||
|
} catch (Exception ignored) {}
|
||||||
}
|
}
|
||||||
|
if (name != null && !name.isEmpty()) return name;
|
||||||
|
throw new ParsingException("Could not get name");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public long getDuration() throws ParsingException {
|
public long getDuration() throws ParsingException {
|
||||||
try {
|
try {
|
||||||
if (getStreamType() == StreamType.LIVE_STREAM) return -1;
|
if (getStreamType() == StreamType.LIVE_STREAM) return -1;
|
||||||
|
return YoutubeParsingHelper.parseDurationString(videoInfo.getObject("lengthText").getString("simpleText"));
|
||||||
final Element duration = item.select("span[class*=\"video-time\"]").first();
|
|
||||||
// apparently on youtube, video-time element will not show up if the video has a duration of 00:00
|
|
||||||
// see: https://www.youtube.com/results?sp=EgIQAVAU&q=asdfgf
|
|
||||||
return duration == null ? 0 : YoutubeParsingHelper.parseDurationString(duration.text());
|
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
throw new ParsingException("Could not get Duration: " + getUrl(), e);
|
throw new ParsingException("Could not get duration", e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getUploaderName() throws ParsingException {
|
public String getUploaderName() throws ParsingException {
|
||||||
|
String name = null;
|
||||||
try {
|
try {
|
||||||
return item.select("div[class=\"yt-lockup-byline\"]").first()
|
name = videoInfo.getObject("longBylineText").getArray("runs")
|
||||||
.select("a").first()
|
.getObject(0).getString("text");
|
||||||
.text();
|
} catch (Exception ignored) {}
|
||||||
} catch (Exception e) {
|
if (name == null) {
|
||||||
throw new ParsingException("Could not get uploader", e);
|
try {
|
||||||
|
name = videoInfo.getObject("ownerText").getArray("runs")
|
||||||
|
.getObject(0).getString("text");
|
||||||
|
} catch (Exception ignored) {}
|
||||||
}
|
}
|
||||||
|
if (name == null) {
|
||||||
|
try {
|
||||||
|
name = videoInfo.getObject("shortBylineText").getArray("runs")
|
||||||
|
.getObject(0).getString("text");
|
||||||
|
} catch (Exception ignored) {}
|
||||||
|
}
|
||||||
|
if (name != null && !name.isEmpty()) return name;
|
||||||
|
throw new ParsingException("Could not get uploader name");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getUploaderUrl() throws ParsingException {
|
public String getUploaderUrl() throws ParsingException {
|
||||||
// this url is not always in the form "/channel/..."
|
|
||||||
// sometimes Youtube provides urls in the from "/user/..."
|
|
||||||
try {
|
try {
|
||||||
|
String id = null;
|
||||||
try {
|
try {
|
||||||
return item.select("div[class=\"yt-lockup-byline\"]").first()
|
id = videoInfo.getObject("longBylineText").getArray("runs")
|
||||||
.select("a").first()
|
.getObject(0).getObject("navigationEndpoint")
|
||||||
.attr("abs:href");
|
.getObject("browseEndpoint").getString("browseId");
|
||||||
} catch (Exception e){}
|
} catch (Exception ignored) {}
|
||||||
|
if (id == null) {
|
||||||
// try this if the first didn't work
|
try {
|
||||||
return item.select("span[class=\"title\"")
|
id = videoInfo.getObject("ownerText").getArray("runs")
|
||||||
.text().split(" - ")[0];
|
.getObject(0).getObject("navigationEndpoint")
|
||||||
|
.getObject("browseEndpoint").getString("browseId");
|
||||||
|
} catch (Exception ignored) {}
|
||||||
|
}
|
||||||
|
if (id == null) {
|
||||||
|
try {
|
||||||
|
id = videoInfo.getObject("shortBylineText").getArray("runs")
|
||||||
|
.getObject(0).getObject("navigationEndpoint")
|
||||||
|
.getObject("browseEndpoint").getString("browseId");
|
||||||
|
} catch (Exception ignored) {}
|
||||||
|
}
|
||||||
|
if (id == null || id.isEmpty()) {
|
||||||
|
throw new IllegalArgumentException("is empty");
|
||||||
|
}
|
||||||
|
return YoutubeChannelLinkHandlerFactory.getInstance().getUrl(id);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
System.out.println(item.html());
|
throw new ParsingException("Could not get uploader url");
|
||||||
throw new ParsingException("Could not get uploader url", e);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Nullable
|
@Nullable
|
||||||
@Override
|
@Override
|
||||||
public String getTextualUploadDate() throws ParsingException {
|
public String getTextualUploadDate() {
|
||||||
if (getStreamType().equals(StreamType.LIVE_STREAM)) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (cachedUploadDate != null) {
|
|
||||||
return cachedUploadDate;
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
if (isVideoReminder()) {
|
return videoInfo.getObject("publishedTimeText").getString("simpleText");
|
||||||
final Calendar calendar = getDateFromReminder();
|
|
||||||
if (calendar != null) {
|
|
||||||
return cachedUploadDate = new SimpleDateFormat("yyyy-MM-dd HH:mm")
|
|
||||||
.format(calendar.getTime());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
Element meta = item.select("div[class=\"yt-lockup-meta\"]").first();
|
|
||||||
if (meta == null) return "";
|
|
||||||
|
|
||||||
final Elements li = meta.select("li");
|
|
||||||
if (li.isEmpty()) return "";
|
|
||||||
|
|
||||||
return cachedUploadDate = li.first().text();
|
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
throw new ParsingException("Could not get upload date", e);
|
// upload date is not always available, e.g. in playlists
|
||||||
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Nullable
|
@Nullable
|
||||||
@Override
|
@Override
|
||||||
public DateWrapper getUploadDate() throws ParsingException {
|
public DateWrapper getUploadDate() throws ParsingException {
|
||||||
if (getStreamType().equals(StreamType.LIVE_STREAM)) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (isVideoReminder()) {
|
|
||||||
return new DateWrapper(getDateFromReminder());
|
|
||||||
}
|
|
||||||
|
|
||||||
String textualUploadDate = getTextualUploadDate();
|
String textualUploadDate = getTextualUploadDate();
|
||||||
if (timeAgoParser != null && textualUploadDate != null && !textualUploadDate.isEmpty()) {
|
if (timeAgoParser != null && textualUploadDate != null && !textualUploadDate.isEmpty()) {
|
||||||
return timeAgoParser.parse(textualUploadDate);
|
try {
|
||||||
} else {
|
return timeAgoParser.parse(textualUploadDate);
|
||||||
return null;
|
} catch (ParsingException e) {
|
||||||
|
throw new ParsingException("Could not get upload date", e);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public long getViewCount() throws ParsingException {
|
public long getViewCount() throws ParsingException {
|
||||||
String input;
|
|
||||||
|
|
||||||
final Element spanViewCount = item.select("span.view-count").first();
|
|
||||||
if (spanViewCount != null) {
|
|
||||||
input = spanViewCount.text();
|
|
||||||
|
|
||||||
} else if (getStreamType().equals(StreamType.LIVE_STREAM)) {
|
|
||||||
Element meta = item.select("ul.yt-lockup-meta-info").first();
|
|
||||||
if (meta == null) return 0;
|
|
||||||
|
|
||||||
final Elements li = meta.select("li");
|
|
||||||
if (li.isEmpty()) return 0;
|
|
||||||
|
|
||||||
input = li.first().text();
|
|
||||||
} else {
|
|
||||||
try {
|
|
||||||
Element meta = item.select("div.yt-lockup-meta").first();
|
|
||||||
if (meta == null) return -1;
|
|
||||||
|
|
||||||
// This case can happen if google releases a special video
|
|
||||||
if (meta.select("li").size() < 2) return -1;
|
|
||||||
|
|
||||||
input = meta.select("li").get(1).text();
|
|
||||||
} catch (IndexOutOfBoundsException e) {
|
|
||||||
throw new ParsingException("Could not parse yt-lockup-meta although available: " + getUrl(), e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (input == null) {
|
|
||||||
throw new ParsingException("Input is null");
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
if (videoInfo.getObject("topStandaloneBadge") != null || isPremium()) {
|
||||||
return Long.parseLong(Utils.removeNonDigitCharacters(input));
|
return -1;
|
||||||
} catch (NumberFormatException e) {
|
|
||||||
// if this happens the video probably has no views
|
|
||||||
if (!input.isEmpty()) {
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
String viewCount;
|
||||||
throw new ParsingException("Could not handle input: " + input, e);
|
if (getStreamType() == StreamType.LIVE_STREAM) {
|
||||||
|
viewCount = videoInfo.getObject("viewCountText")
|
||||||
|
.getArray("runs").getObject(0).getString("text");
|
||||||
|
} else {
|
||||||
|
viewCount = videoInfo.getObject("viewCountText").getString("simpleText");
|
||||||
|
}
|
||||||
|
if (viewCount.equals("Recommended for you")) return -1;
|
||||||
|
return Long.parseLong(Utils.removeNonDigitCharacters(viewCount));
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new ParsingException("Could not get view count", e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getThumbnailUrl() throws ParsingException {
|
public String getThumbnailUrl() throws ParsingException {
|
||||||
try {
|
try {
|
||||||
String url;
|
// TODO: Don't simply get the first item, but look at all thumbnails and their resolution
|
||||||
Element te = item.select("div[class=\"yt-thumb video-thumb\"]").first()
|
return videoInfo.getObject("thumbnail").getArray("thumbnails")
|
||||||
.select("img").first();
|
.getObject(0).getString("url");
|
||||||
url = te.attr("abs:src");
|
|
||||||
// Sometimes youtube sends links to gif files which somehow seem to not exist
|
|
||||||
// anymore. Items with such gif also offer a secondary image source. So we are going
|
|
||||||
// to use that if we've caught such an item.
|
|
||||||
if (url.contains(".gif")) {
|
|
||||||
url = te.attr("abs:data-thumb");
|
|
||||||
}
|
|
||||||
return url;
|
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
throw new ParsingException("Could not get thumbnail url", e);
|
throw new ParsingException("Could not get thumbnail url", e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private boolean isPremium() {
|
||||||
private boolean isVideoReminder() {
|
try {
|
||||||
return !item.select("span.yt-uix-livereminder").isEmpty();
|
JsonArray badges = videoInfo.getArray("badges");
|
||||||
}
|
for (Object badge : badges) {
|
||||||
|
if (((JsonObject) badge).getObject("metadataBadgeRenderer").getString("label").equals("Premium")) {
|
||||||
private Calendar getDateFromReminder() throws ParsingException {
|
return true;
|
||||||
final Element timeFuture = item.select("span.yt-badge.localized-date").first();
|
}
|
||||||
|
|
||||||
if (timeFuture == null) {
|
|
||||||
throw new ParsingException("Span timeFuture is null");
|
|
||||||
}
|
|
||||||
|
|
||||||
final String timestamp = timeFuture.attr("data-timestamp");
|
|
||||||
if (!timestamp.isEmpty()) {
|
|
||||||
try {
|
|
||||||
final Calendar calendar = Calendar.getInstance();
|
|
||||||
calendar.setTime(new Date(Long.parseLong(timestamp) * 1000L));
|
|
||||||
return calendar;
|
|
||||||
} catch (Exception e) {
|
|
||||||
throw new ParsingException("Could not parse = \"" + timestamp + "\"");
|
|
||||||
}
|
}
|
||||||
}
|
} catch (Exception ignored) {}
|
||||||
|
return false;
|
||||||
throw new ParsingException("Could not parse date from reminder element: \"" + timeFuture + "\"");
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Generic method that checks if the element contains any clues that it's a livestream item
|
|
||||||
*/
|
|
||||||
protected static boolean isLiveStream(Element item) {
|
|
||||||
return !item.select("span[class*=\"yt-badge-live\"]").isEmpty()
|
|
||||||
|| !item.select("span[class*=\"video-time-overlay-live\"]").isEmpty();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -20,9 +20,9 @@ package org.schabi.newpipe.extractor.services.youtube.extractors;
|
|||||||
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
|
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import org.jsoup.nodes.Document;
|
import com.grack.nanojson.JsonArray;
|
||||||
import org.jsoup.nodes.Element;
|
import com.grack.nanojson.JsonObject;
|
||||||
import org.jsoup.select.Elements;
|
|
||||||
import org.schabi.newpipe.extractor.StreamingService;
|
import org.schabi.newpipe.extractor.StreamingService;
|
||||||
import org.schabi.newpipe.extractor.downloader.Downloader;
|
import org.schabi.newpipe.extractor.downloader.Downloader;
|
||||||
import org.schabi.newpipe.extractor.downloader.Response;
|
import org.schabi.newpipe.extractor.downloader.Response;
|
||||||
@ -35,12 +35,12 @@ import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingH
|
|||||||
import org.schabi.newpipe.extractor.stream.StreamInfoItem;
|
import org.schabi.newpipe.extractor.stream.StreamInfoItem;
|
||||||
import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector;
|
import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector;
|
||||||
|
|
||||||
import javax.annotation.Nonnull;
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
public class YoutubeTrendingExtractor extends KioskExtractor<StreamInfoItem> {
|
import javax.annotation.Nonnull;
|
||||||
|
|
||||||
private Document doc;
|
public class YoutubeTrendingExtractor extends KioskExtractor<StreamInfoItem> {
|
||||||
|
private JsonObject initialData;
|
||||||
|
|
||||||
public YoutubeTrendingExtractor(StreamingService service,
|
public YoutubeTrendingExtractor(StreamingService service,
|
||||||
ListLinkHandler linkHandler,
|
ListLinkHandler linkHandler,
|
||||||
@ -54,7 +54,7 @@ public class YoutubeTrendingExtractor extends KioskExtractor<StreamInfoItem> {
|
|||||||
"?gl=" + getExtractorContentCountry().getCountryCode();
|
"?gl=" + getExtractorContentCountry().getCountryCode();
|
||||||
|
|
||||||
final Response response = downloader.get(url, getExtractorLocalization());
|
final Response response = downloader.get(url, getExtractorLocalization());
|
||||||
doc = YoutubeParsingHelper.parseAndCheckPage(url, response);
|
initialData = YoutubeParsingHelper.getInitialData(response.responseBody());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -70,99 +70,36 @@ public class YoutubeTrendingExtractor extends KioskExtractor<StreamInfoItem> {
|
|||||||
@Nonnull
|
@Nonnull
|
||||||
@Override
|
@Override
|
||||||
public String getName() throws ParsingException {
|
public String getName() throws ParsingException {
|
||||||
|
String name;
|
||||||
try {
|
try {
|
||||||
Element a = doc.select("a[href*=\"/feed/trending\"]").first();
|
name = initialData.getObject("header").getObject("feedTabbedHeaderRenderer").getObject("title")
|
||||||
Element span = a.select("span[class*=\"display-name\"]").first();
|
.getArray("runs").getObject(0).getString("text");
|
||||||
Element nameSpan = span.select("span").first();
|
|
||||||
return nameSpan.text();
|
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
throw new ParsingException("Could not get Trending name", e);
|
throw new ParsingException("Could not get Trending name", e);
|
||||||
}
|
}
|
||||||
|
if (name != null && !name.isEmpty()) {
|
||||||
|
return name;
|
||||||
|
}
|
||||||
|
throw new ParsingException("Could not get Trending name");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Nonnull
|
@Nonnull
|
||||||
@Override
|
@Override
|
||||||
public InfoItemsPage<StreamInfoItem> getInitialPage() throws ParsingException {
|
public InfoItemsPage<StreamInfoItem> getInitialPage() {
|
||||||
StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId());
|
StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId());
|
||||||
Elements uls = doc.select("ul[class*=\"expanded-shelf-content-list\"]");
|
JsonArray firstPageElements = initialData.getObject("contents").getObject("twoColumnBrowseResultsRenderer")
|
||||||
|
.getArray("tabs").getObject(0).getObject("tabRenderer").getObject("content")
|
||||||
|
.getObject("sectionListRenderer").getArray("contents").getObject(0).getObject("itemSectionRenderer")
|
||||||
|
.getArray("contents").getObject(0).getObject("shelfRenderer").getObject("content")
|
||||||
|
.getObject("expandedShelfContentsRenderer").getArray("items");
|
||||||
|
|
||||||
final TimeAgoParser timeAgoParser = getTimeAgoParser();
|
final TimeAgoParser timeAgoParser = getTimeAgoParser();
|
||||||
|
|
||||||
for (Element ul : uls) {
|
for (Object ul : firstPageElements) {
|
||||||
for (final Element li : ul.children()) {
|
final JsonObject videoInfo = ((JsonObject) ul).getObject("videoRenderer");
|
||||||
final Element el = li.select("div[class*=\"yt-lockup-dismissable\"]").first();
|
collector.commit(new YoutubeStreamInfoItemExtractor(videoInfo, timeAgoParser));
|
||||||
collector.commit(new YoutubeStreamInfoItemExtractor(li, timeAgoParser) {
|
|
||||||
@Override
|
|
||||||
public String getUrl() throws ParsingException {
|
|
||||||
try {
|
|
||||||
Element dl = el.select("h3").first().select("a").first();
|
|
||||||
return dl.attr("abs:href");
|
|
||||||
} catch (Exception e) {
|
|
||||||
throw new ParsingException("Could not get web page url for the video", e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String getName() throws ParsingException {
|
|
||||||
try {
|
|
||||||
Element dl = el.select("h3").first().select("a").first();
|
|
||||||
return dl.text();
|
|
||||||
} catch (Exception e) {
|
|
||||||
throw new ParsingException("Could not get web page url for the video", e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String getUploaderUrl() throws ParsingException {
|
|
||||||
try {
|
|
||||||
String link = getUploaderLink().attr("abs:href");
|
|
||||||
if (link.isEmpty()) {
|
|
||||||
throw new IllegalArgumentException("is empty");
|
|
||||||
}
|
|
||||||
return link;
|
|
||||||
} catch (Exception e) {
|
|
||||||
throw new ParsingException("Could not get Uploader name");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private Element getUploaderLink() {
|
|
||||||
// this url is not always in the form "/channel/..."
|
|
||||||
// sometimes Youtube provides urls in the from "/user/..."
|
|
||||||
Element uploaderEl = el.select("div[class*=\"yt-lockup-byline \"]").first();
|
|
||||||
return uploaderEl.select("a").first();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String getUploaderName() throws ParsingException {
|
|
||||||
try {
|
|
||||||
return getUploaderLink().text();
|
|
||||||
} catch (Exception e) {
|
|
||||||
throw new ParsingException("Could not get Uploader name");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String getThumbnailUrl() throws ParsingException {
|
|
||||||
try {
|
|
||||||
String url;
|
|
||||||
Element te = li.select("span[class=\"yt-thumb-simple\"]").first()
|
|
||||||
.select("img").first();
|
|
||||||
url = te.attr("abs:src");
|
|
||||||
// Sometimes youtube sends links to gif files which somehow seem to not exist
|
|
||||||
// anymore. Items with such gif also offer a secondary image source. So we are going
|
|
||||||
// to use that if we've caught such an item.
|
|
||||||
if (url.contains(".gif")) {
|
|
||||||
url = te.attr("abs:data-thumb");
|
|
||||||
}
|
|
||||||
return url;
|
|
||||||
} catch (Exception e) {
|
|
||||||
throw new ParsingException("Could not get thumbnail url", e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return new InfoItemsPage<>(collector, getNextPageUrl());
|
return new InfoItemsPage<>(collector, getNextPageUrl());
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,11 +1,16 @@
|
|||||||
package org.schabi.newpipe.extractor.services.youtube.linkHandler;
|
package org.schabi.newpipe.extractor.services.youtube.linkHandler;
|
||||||
|
|
||||||
|
|
||||||
|
import com.grack.nanojson.JsonArray;
|
||||||
|
import com.grack.nanojson.JsonObject;
|
||||||
|
import com.grack.nanojson.JsonParser;
|
||||||
|
import com.grack.nanojson.JsonParserException;
|
||||||
import org.jsoup.Jsoup;
|
import org.jsoup.Jsoup;
|
||||||
import org.jsoup.nodes.Document;
|
import org.jsoup.nodes.Document;
|
||||||
import org.schabi.newpipe.extractor.downloader.Response;
|
import org.schabi.newpipe.extractor.downloader.Response;
|
||||||
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||||
import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
|
import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
|
||||||
|
import org.schabi.newpipe.extractor.utils.Parser;
|
||||||
|
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
import java.text.ParseException;
|
import java.text.ParseException;
|
||||||
@ -38,6 +43,8 @@ public class YoutubeParsingHelper {
|
|||||||
private YoutubeParsingHelper() {
|
private YoutubeParsingHelper() {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static final String HARDCODED_CLIENT_VERSION = "2.20200214.04.00";
|
||||||
|
|
||||||
private static final String FEED_BASE_CHANNEL_ID = "https://www.youtube.com/feeds/videos.xml?channel_id=";
|
private static final String FEED_BASE_CHANNEL_ID = "https://www.youtube.com/feeds/videos.xml?channel_id=";
|
||||||
private static final String FEED_BASE_USER = "https://www.youtube.com/feeds/videos.xml?user=";
|
private static final String FEED_BASE_USER = "https://www.youtube.com/feeds/videos.xml?user=";
|
||||||
|
|
||||||
@ -143,4 +150,68 @@ public class YoutubeParsingHelper {
|
|||||||
uploadDate.setTime(date);
|
uploadDate.setTime(date);
|
||||||
return uploadDate;
|
return uploadDate;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static JsonObject getInitialData(String html) throws ParsingException {
|
||||||
|
try {
|
||||||
|
String initialData = Parser.matchGroup1("window\\[\"ytInitialData\"\\]\\s*=\\s*(\\{.*?\\});", html);
|
||||||
|
return JsonParser.object().from(initialData);
|
||||||
|
} catch (JsonParserException | Parser.RegexException e) {
|
||||||
|
throw new ParsingException("Could not get ytInitialData", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the client version from a page
|
||||||
|
* @param initialData
|
||||||
|
* @param html The page HTML
|
||||||
|
* @return
|
||||||
|
* @throws ParsingException
|
||||||
|
*/
|
||||||
|
public static String getClientVersion(JsonObject initialData, String html) throws ParsingException {
|
||||||
|
if (initialData == null) initialData = getInitialData(html);
|
||||||
|
JsonArray serviceTrackingParams = initialData.getObject("responseContext").getArray("serviceTrackingParams");
|
||||||
|
String shortClientVersion = null;
|
||||||
|
|
||||||
|
// try to get version from initial data first
|
||||||
|
for (Object service : serviceTrackingParams) {
|
||||||
|
JsonObject s = (JsonObject) service;
|
||||||
|
if (s.getString("service").equals("CSI")) {
|
||||||
|
JsonArray params = s.getArray("params");
|
||||||
|
for (Object param: params) {
|
||||||
|
JsonObject p = (JsonObject) param;
|
||||||
|
String key = p.getString("key");
|
||||||
|
if (key != null && key.equals("cver")) {
|
||||||
|
return p.getString("value");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if (s.getString("service").equals("ECATCHER")) {
|
||||||
|
// fallback to get a shortened client version which does not contain the last do digits
|
||||||
|
JsonArray params = s.getArray("params");
|
||||||
|
for (Object param: params) {
|
||||||
|
JsonObject p = (JsonObject) param;
|
||||||
|
String key = p.getString("key");
|
||||||
|
if (key != null && key.equals("client.version")) {
|
||||||
|
shortClientVersion = p.getString("value");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
String clientVersion;
|
||||||
|
String[] patterns = {
|
||||||
|
"INNERTUBE_CONTEXT_CLIENT_VERSION\":\"([0-9\\.]+?)\"",
|
||||||
|
"innertube_context_client_version\":\"([0-9\\.]+?)\"",
|
||||||
|
"client.version=([0-9\\.]+)"
|
||||||
|
};
|
||||||
|
for (String pattern: patterns) {
|
||||||
|
try {
|
||||||
|
clientVersion = Parser.matchGroup1(pattern, html);
|
||||||
|
if (clientVersion != null && !clientVersion.isEmpty()) return clientVersion;
|
||||||
|
} catch (Exception ignored) {}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (shortClientVersion != null) return shortClientVersion;
|
||||||
|
|
||||||
|
throw new ParsingException("Could not get client version");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -24,13 +24,13 @@ public class YoutubeSearchQueryHandlerFactory extends SearchQueryHandlerFactory
|
|||||||
public String getUrl(String searchString, List<String> contentFilters, String sortFilter) throws ParsingException {
|
public String getUrl(String searchString, List<String> contentFilters, String sortFilter) throws ParsingException {
|
||||||
try {
|
try {
|
||||||
final String url = "https://www.youtube.com/results"
|
final String url = "https://www.youtube.com/results"
|
||||||
+ "?q=" + URLEncoder.encode(searchString, CHARSET_UTF_8);
|
+ "?search_query=" + URLEncoder.encode(searchString, CHARSET_UTF_8);
|
||||||
|
|
||||||
if (contentFilters.size() > 0) {
|
if (contentFilters.size() > 0) {
|
||||||
switch (contentFilters.get(0)) {
|
switch (contentFilters.get(0)) {
|
||||||
case VIDEOS: return url + "&sp=EgIQAVAU";
|
case VIDEOS: return url + "&sp=EgIQAQ%253D%253D";
|
||||||
case CHANNELS: return url + "&sp=EgIQAlAU";
|
case CHANNELS: return url + "&sp=EgIQAg%253D%253D";
|
||||||
case PLAYLISTS: return url + "&sp=EgIQA1AU";
|
case PLAYLISTS: return url + "&sp=EgIQAw%253D%253D";
|
||||||
case ALL:
|
case ALL:
|
||||||
default:
|
default:
|
||||||
}
|
}
|
||||||
|
@ -10,6 +10,9 @@ import java.util.List;
|
|||||||
|
|
||||||
public class Utils {
|
public class Utils {
|
||||||
|
|
||||||
|
public static final String HTTP = "http://";
|
||||||
|
public static final String HTTPS = "https://";
|
||||||
|
|
||||||
private Utils() {
|
private Utils() {
|
||||||
//no instance
|
//no instance
|
||||||
}
|
}
|
||||||
@ -83,9 +86,6 @@ public class Utils {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static final String HTTP = "http://";
|
|
||||||
private static final String HTTPS = "https://";
|
|
||||||
|
|
||||||
public static String replaceHttpWithHttps(final String url) {
|
public static String replaceHttpWithHttps(final String url) {
|
||||||
if (url == null) return null;
|
if (url == null) return null;
|
||||||
|
|
||||||
|
@ -20,7 +20,7 @@ import java.util.Map;
|
|||||||
|
|
||||||
public class DownloaderTestImpl extends Downloader {
|
public class DownloaderTestImpl extends Downloader {
|
||||||
|
|
||||||
private static final String USER_AGENT = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0";
|
private static final String USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:68.0) Gecko/20100101 Firefox/68.0";
|
||||||
private static final String DEFAULT_HTTP_ACCEPT_LANGUAGE = "en";
|
private static final String DEFAULT_HTTP_ACCEPT_LANGUAGE = "en";
|
||||||
|
|
||||||
private static DownloaderTestImpl instance = null;
|
private static DownloaderTestImpl instance = null;
|
||||||
|
@ -170,7 +170,7 @@ public class YoutubeChannelExtractorTest {
|
|||||||
@Test
|
@Test
|
||||||
public void testDescription() throws Exception {
|
public void testDescription() throws Exception {
|
||||||
assertTrue("What it actually was: " + extractor.getDescription(),
|
assertTrue("What it actually was: " + extractor.getDescription(),
|
||||||
extractor.getDescription().contains("Our World is Amazing. Questions? Ideas? Tweet me:"));
|
extractor.getDescription().contains("Our World is Amazing. \n\nQuestions? Ideas? Tweet me:"));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -12,6 +12,8 @@ import org.schabi.newpipe.extractor.channel.ChannelInfoItem;
|
|||||||
import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeSearchExtractor;
|
import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeSearchExtractor;
|
||||||
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeSearchQueryHandlerFactory;
|
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeSearchQueryHandlerFactory;
|
||||||
|
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
import static java.util.Arrays.asList;
|
import static java.util.Arrays.asList;
|
||||||
import static org.junit.Assert.*;
|
import static org.junit.Assert.*;
|
||||||
import static org.schabi.newpipe.extractor.ServiceList.YouTube;
|
import static org.schabi.newpipe.extractor.ServiceList.YouTube;
|
||||||
@ -51,7 +53,12 @@ public class YoutubeSearchExtractorChannelOnlyTest extends YoutubeSearchExtracto
|
|||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testGetSecondPageUrl() throws Exception {
|
public void testGetSecondPageUrl() throws Exception {
|
||||||
assertEquals("https://www.youtube.com/results?q=pewdiepie&sp=EgIQAlAU&gl=GB&page=2", extractor.getNextPageUrl());
|
// check that ctoken, continuation and itct are longer than 5 characters
|
||||||
|
Pattern pattern = Pattern.compile(
|
||||||
|
"https:\\/\\/www.youtube.com\\/results\\?search_query=pewdiepie&sp=EgIQAg%253D%253D&gl=GB&pbj=1"
|
||||||
|
+ "&ctoken=[\\w%]{5,}?&continuation=[\\w%]{5,}?&itct=[\\w]{5,}?"
|
||||||
|
);
|
||||||
|
assertTrue(pattern.matcher(extractor.getNextPageUrl()).find());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Ignore
|
@Ignore
|
||||||
|
@ -28,13 +28,13 @@ public class YoutubeSearchQHTest {
|
|||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testWithContentfilter() throws Exception {
|
public void testWithContentfilter() throws Exception {
|
||||||
assertEquals("https://www.youtube.com/results?q=asdf&sp=EgIQAVAU", YouTube.getSearchQHFactory()
|
assertEquals("https://www.youtube.com/results?search_query=asdf&sp=EgIQAQ%253D%253D", YouTube.getSearchQHFactory()
|
||||||
.fromQuery("asdf", asList(new String[]{VIDEOS}), "").getUrl());
|
.fromQuery("asdf", asList(new String[]{VIDEOS}), "").getUrl());
|
||||||
assertEquals("https://www.youtube.com/results?q=asdf&sp=EgIQAlAU", YouTube.getSearchQHFactory()
|
assertEquals("https://www.youtube.com/results?search_query=asdf&sp=EgIQAg%253D%253D", YouTube.getSearchQHFactory()
|
||||||
.fromQuery("asdf", asList(new String[]{CHANNELS}), "").getUrl());
|
.fromQuery("asdf", asList(new String[]{CHANNELS}), "").getUrl());
|
||||||
assertEquals("https://www.youtube.com/results?q=asdf&sp=EgIQA1AU", YouTube.getSearchQHFactory()
|
assertEquals("https://www.youtube.com/results?search_query=asdf&sp=EgIQAw%253D%253D", YouTube.getSearchQHFactory()
|
||||||
.fromQuery("asdf", asList(new String[]{PLAYLISTS}), "").getUrl());
|
.fromQuery("asdf", asList(new String[]{PLAYLISTS}), "").getUrl());
|
||||||
assertEquals("https://www.youtube.com/results?q=asdf", YouTube.getSearchQHFactory()
|
assertEquals("https://www.youtube.com/results?search_query=asdf", YouTube.getSearchQHFactory()
|
||||||
.fromQuery("asdf", asList(new String[]{"fjiijie"}), "").getUrl());
|
.fromQuery("asdf", asList(new String[]{"fjiijie"}), "").getUrl());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user