mirror of
https://github.com/TeamNewPipe/NewPipeExtractor.git
synced 2025-04-27 23:40:36 +05:30
Reimplement some methods in YoutubeStreamExtractor
This commit is contained in:
parent
02b59903fa
commit
f13c0288cc
@ -4,7 +4,6 @@ import com.grack.nanojson.JsonArray;
|
|||||||
import com.grack.nanojson.JsonObject;
|
import com.grack.nanojson.JsonObject;
|
||||||
import com.grack.nanojson.JsonParser;
|
import com.grack.nanojson.JsonParser;
|
||||||
|
|
||||||
import org.jsoup.Jsoup;
|
|
||||||
import org.jsoup.nodes.Document;
|
import org.jsoup.nodes.Document;
|
||||||
import org.jsoup.nodes.Element;
|
import org.jsoup.nodes.Element;
|
||||||
import org.mozilla.javascript.Context;
|
import org.mozilla.javascript.Context;
|
||||||
@ -39,8 +38,6 @@ import org.schabi.newpipe.extractor.utils.Utils;
|
|||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.UnsupportedEncodingException;
|
import java.io.UnsupportedEncodingException;
|
||||||
import java.net.MalformedURLException;
|
|
||||||
import java.net.URL;
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
@ -48,8 +45,6 @@ import java.util.LinkedHashMap;
|
|||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Locale;
|
import java.util.Locale;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.regex.Matcher;
|
|
||||||
import java.util.regex.Pattern;
|
|
||||||
|
|
||||||
import javax.annotation.Nonnull;
|
import javax.annotation.Nonnull;
|
||||||
import javax.annotation.Nullable;
|
import javax.annotation.Nullable;
|
||||||
@ -75,8 +70,6 @@ import javax.annotation.Nullable;
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
public class YoutubeStreamExtractor extends StreamExtractor {
|
public class YoutubeStreamExtractor extends StreamExtractor {
|
||||||
private static final String TAG = YoutubeStreamExtractor.class.getSimpleName();
|
|
||||||
|
|
||||||
/*//////////////////////////////////////////////////////////////////////////
|
/*//////////////////////////////////////////////////////////////////////////
|
||||||
// Exceptions
|
// Exceptions
|
||||||
//////////////////////////////////////////////////////////////////////////*/
|
//////////////////////////////////////////////////////////////////////////*/
|
||||||
@ -87,12 +80,6 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public class SubtitlesException extends ContentNotAvailableException {
|
|
||||||
SubtitlesException(String message, Throwable cause) {
|
|
||||||
super(message, cause);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/*//////////////////////////////////////////////////////////////////////////*/
|
/*//////////////////////////////////////////////////////////////////////////*/
|
||||||
|
|
||||||
private Document doc;
|
private Document doc;
|
||||||
@ -120,22 +107,17 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
|||||||
@Override
|
@Override
|
||||||
public String getName() throws ParsingException {
|
public String getName() throws ParsingException {
|
||||||
assertPageFetched();
|
assertPageFetched();
|
||||||
|
String title = null;
|
||||||
try {
|
try {
|
||||||
return playerResponse.getObject("videoDetails").getString("title");
|
title = getVideoPrimaryInfoRenderer().getObject("title").getArray("runs").getObject(0).getString("text");
|
||||||
|
} catch (Exception ignored) {}
|
||||||
} catch (Exception e) {
|
if (title == null) {
|
||||||
// fallback HTML method
|
|
||||||
String name = null;
|
|
||||||
try {
|
try {
|
||||||
name = doc.select("meta[name=title]").attr(CONTENT);
|
title = playerResponse.getObject("videoDetails").getString("title");
|
||||||
} catch (Exception ignored) {
|
} catch (Exception ignored) {}
|
||||||
}
|
|
||||||
|
|
||||||
if (name == null) {
|
|
||||||
throw new ParsingException("Could not get name", e);
|
|
||||||
}
|
|
||||||
return name;
|
|
||||||
}
|
}
|
||||||
|
if (title != null) return title;
|
||||||
|
throw new ParsingException("Could not get name");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -144,19 +126,12 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
|||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: try videoPrimaryInfoRenderer.dateText.simpleText
|
||||||
|
|
||||||
try {
|
try {
|
||||||
return playerResponse.getObject("microformat").getObject("playerMicroformatRenderer").getString("publishDate");
|
return playerResponse.getObject("microformat").getObject("playerMicroformatRenderer").getString("publishDate");
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
String uploadDate = null;
|
throw new ParsingException("Could not get upload date");
|
||||||
try {
|
|
||||||
uploadDate = doc.select("meta[itemprop=datePublished]").attr(CONTENT);
|
|
||||||
} catch (Exception ignored) {
|
|
||||||
}
|
|
||||||
|
|
||||||
if (uploadDate == null) {
|
|
||||||
throw new ParsingException("Could not get upload date", e);
|
|
||||||
}
|
|
||||||
return uploadDate;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -181,15 +156,7 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
|||||||
return thumbnails.getObject(thumbnails.size() - 1).getString("url");
|
return thumbnails.getObject(thumbnails.size() - 1).getString("url");
|
||||||
|
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
String url = null;
|
throw new ParsingException("Could not get thumbnail url");
|
||||||
try {
|
|
||||||
url = doc.select("link[itemprop=\"thumbnailUrl\"]").first().attr("abs:href");
|
|
||||||
} catch (Exception ignored) {}
|
|
||||||
|
|
||||||
if (url == null) {
|
|
||||||
throw new ParsingException("Could not get thumbnail url", e);
|
|
||||||
}
|
|
||||||
return url;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -198,93 +165,19 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
|||||||
@Override
|
@Override
|
||||||
public Description getDescription() throws ParsingException {
|
public Description getDescription() throws ParsingException {
|
||||||
assertPageFetched();
|
assertPageFetched();
|
||||||
|
// TODO: Parse videoSecondaryInfoRenderer.description
|
||||||
try {
|
try {
|
||||||
// first try to get html-formatted description
|
// raw non-html description
|
||||||
return new Description(parseHtmlAndGetFullLinks(doc.select("p[id=\"eow-description\"]").first().html()), Description.HTML);
|
return new Description(playerResponse.getObject("videoDetails").getString("shortDescription"), Description.PLAIN_TEXT);
|
||||||
} catch (Exception e) {
|
} catch (Exception ignored) {
|
||||||
try {
|
throw new ParsingException("Could not get the description");
|
||||||
// fallback to raw non-html description
|
|
||||||
return new Description(playerResponse.getObject("videoDetails").getString("shortDescription"), Description.PLAIN_TEXT);
|
|
||||||
} catch (Exception ignored) {
|
|
||||||
throw new ParsingException("Could not get the description", e);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// onclick="yt.www.watch.player.seekTo(0*3600+00*60+00);return false;"
|
|
||||||
// :00 is NOT recognized as a timestamp in description or comments.
|
|
||||||
// 0:00 is recognized in both description and comments.
|
|
||||||
// https://www.youtube.com/watch?v=4cccfDXu1vA
|
|
||||||
private final static Pattern DESCRIPTION_TIMESTAMP_ONCLICK_REGEX = Pattern.compile(
|
|
||||||
"seekTo\\("
|
|
||||||
+ "(?:(\\d+)\\*3600\\+)?" // hours?
|
|
||||||
+ "(\\d+)\\*60\\+" // minutes
|
|
||||||
+ "(\\d+)" // seconds
|
|
||||||
+ "\\)");
|
|
||||||
|
|
||||||
@SafeVarargs
|
|
||||||
private static <T> T coalesce(T... args) {
|
|
||||||
for (T arg : args) {
|
|
||||||
if (arg != null) return arg;
|
|
||||||
}
|
|
||||||
throw new IllegalArgumentException("all arguments to coalesce() were null");
|
|
||||||
}
|
|
||||||
|
|
||||||
private String parseHtmlAndGetFullLinks(String descriptionHtml)
|
|
||||||
throws MalformedURLException, UnsupportedEncodingException, ParsingException {
|
|
||||||
final Document description = Jsoup.parse(descriptionHtml, getUrl());
|
|
||||||
for (Element a : description.select("a")) {
|
|
||||||
final String rawUrl = a.attr("abs:href");
|
|
||||||
final URL redirectLink = new URL(rawUrl);
|
|
||||||
|
|
||||||
final Matcher onClickTimestamp;
|
|
||||||
final String queryString;
|
|
||||||
if ((onClickTimestamp = DESCRIPTION_TIMESTAMP_ONCLICK_REGEX.matcher(a.attr("onclick")))
|
|
||||||
.find()) {
|
|
||||||
a.removeAttr("onclick");
|
|
||||||
|
|
||||||
String hours = coalesce(onClickTimestamp.group(1), "0");
|
|
||||||
String minutes = onClickTimestamp.group(2);
|
|
||||||
String seconds = onClickTimestamp.group(3);
|
|
||||||
|
|
||||||
int timestamp = 0;
|
|
||||||
timestamp += Integer.parseInt(hours) * 3600;
|
|
||||||
timestamp += Integer.parseInt(minutes) * 60;
|
|
||||||
timestamp += Integer.parseInt(seconds);
|
|
||||||
|
|
||||||
String setTimestamp = "&t=" + timestamp;
|
|
||||||
|
|
||||||
// Even after clicking https://youtu.be/...?t=6,
|
|
||||||
// getUrl() is https://www.youtube.com/watch?v=..., never youtu.be, never &t=.
|
|
||||||
a.attr("href", getUrl() + setTimestamp);
|
|
||||||
|
|
||||||
} else if ((queryString = redirectLink.getQuery()) != null) {
|
|
||||||
// if the query string is null we are not dealing with a redirect link,
|
|
||||||
// so we don't need to override it.
|
|
||||||
final String link =
|
|
||||||
Parser.compatParseMap(queryString).get("q");
|
|
||||||
|
|
||||||
if (link != null) {
|
|
||||||
// if link is null the a tag is a hashtag.
|
|
||||||
// They refer to the youtube search. We do not handle them.
|
|
||||||
a.text(link);
|
|
||||||
a.attr("href", link);
|
|
||||||
} else if (redirectLink.toString().contains("https://www.youtube.com/")) {
|
|
||||||
a.text(redirectLink.toString());
|
|
||||||
a.attr("href", redirectLink.toString());
|
|
||||||
}
|
|
||||||
} else if (redirectLink.toString().contains("https://www.youtube.com/")) {
|
|
||||||
descriptionHtml = descriptionHtml.replace(rawUrl, redirectLink.toString());
|
|
||||||
a.text(redirectLink.toString());
|
|
||||||
a.attr("href", redirectLink.toString());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return description.select("body").first().html();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int getAgeLimit() throws ParsingException {
|
public int getAgeLimit() throws ParsingException {
|
||||||
assertPageFetched();
|
assertPageFetched();
|
||||||
|
// TODO: Find new way to get age limit
|
||||||
if (!isAgeRestricted) {
|
if (!isAgeRestricted) {
|
||||||
return NO_AGE_LIMIT;
|
return NO_AGE_LIMIT;
|
||||||
}
|
}
|
||||||
@ -332,54 +225,25 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
|||||||
@Override
|
@Override
|
||||||
public long getViewCount() throws ParsingException {
|
public long getViewCount() throws ParsingException {
|
||||||
assertPageFetched();
|
assertPageFetched();
|
||||||
|
String views = null;
|
||||||
try {
|
try {
|
||||||
if (getStreamType().equals(StreamType.LIVE_STREAM)) {
|
views = getVideoPrimaryInfoRenderer().getObject("viewCount")
|
||||||
// The array index is variable, therefore we loop throw the complete array.
|
.getObject("videoViewCountRenderer").getObject("viewCount")
|
||||||
// videoPrimaryInfoRenderer is often stored at index 1
|
.getArray("runs").getObject(0).getString("text");
|
||||||
JsonArray contents = initialData.getObject("contents").getObject("twoColumnWatchNextResults")
|
} catch (Exception ignored) {}
|
||||||
.getObject("results").getObject("results").getArray("contents");
|
if (views == null) {
|
||||||
for (Object c : contents) {
|
|
||||||
try {
|
|
||||||
// this gets current view count, but there is also an overall view count which is stored here:
|
|
||||||
// contents.twoColumnWatchNextResults.secondaryResults.secondaryResults.results[0]
|
|
||||||
// .compactAutoplayRenderer.contents[0].compactVideoRenderer.viewCountText.simpleText
|
|
||||||
String views = ((JsonObject) c).getObject("videoPrimaryInfoRenderer")
|
|
||||||
.getObject("viewCount").getObject("videoViewCountRenderer").getObject("viewCount")
|
|
||||||
.getArray("runs").getObject(0).getString("text");
|
|
||||||
return Long.parseLong(Utils.removeNonDigitCharacters(views));
|
|
||||||
} catch (Exception ignored) {}
|
|
||||||
}
|
|
||||||
throw new ParsingException("Could not get view count from live stream");
|
|
||||||
|
|
||||||
} else {
|
|
||||||
return Long.parseLong(playerResponse.getObject("videoDetails").getString("viewCount"));
|
|
||||||
}
|
|
||||||
} catch (Exception e) {
|
|
||||||
try {
|
try {
|
||||||
return Long.parseLong(doc.select("meta[itemprop=interactionCount]").attr(CONTENT));
|
views = getVideoPrimaryInfoRenderer().getObject("viewCount")
|
||||||
} catch (Exception ignored) {
|
.getObject("videoViewCountRenderer").getObject("viewCount").getString("simpleText");
|
||||||
throw new ParsingException("Could not get view count", e);
|
} catch (Exception ignored) {}
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
if (views == null) {
|
||||||
|
try {
|
||||||
private JsonObject getVideoPrimaryInfoRenderer() throws ParsingException {
|
views = playerResponse.getObject("videoDetails").getString("viewCount");
|
||||||
JsonArray contents = initialData.getObject("contents").getObject("twoColumnWatchNextResults")
|
} catch (Exception ignored) {}
|
||||||
.getObject("results").getObject("results").getArray("contents");
|
|
||||||
JsonObject videoPrimaryInfoRenderer = null;
|
|
||||||
|
|
||||||
for (Object content : contents) {
|
|
||||||
if (((JsonObject) content).getObject("videoPrimaryInfoRenderer") != null) {
|
|
||||||
videoPrimaryInfoRenderer = ((JsonObject) content).getObject("videoPrimaryInfoRenderer");
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
if (views != null) return Long.parseLong(views);
|
||||||
if (videoPrimaryInfoRenderer == null) {
|
throw new ParsingException("Could not get view count");
|
||||||
throw new ParsingException("Could not find videoPrimaryInfoRenderer");
|
|
||||||
}
|
|
||||||
|
|
||||||
return videoPrimaryInfoRenderer;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -993,6 +857,25 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
|||||||
// Utils
|
// Utils
|
||||||
//////////////////////////////////////////////////////////////////////////*/
|
//////////////////////////////////////////////////////////////////////////*/
|
||||||
|
|
||||||
|
private JsonObject getVideoPrimaryInfoRenderer() throws ParsingException {
|
||||||
|
JsonArray contents = initialData.getObject("contents").getObject("twoColumnWatchNextResults")
|
||||||
|
.getObject("results").getObject("results").getArray("contents");
|
||||||
|
JsonObject videoPrimaryInfoRenderer = null;
|
||||||
|
|
||||||
|
for (Object content : contents) {
|
||||||
|
if (((JsonObject) content).getObject("videoPrimaryInfoRenderer") != null) {
|
||||||
|
videoPrimaryInfoRenderer = ((JsonObject) content).getObject("videoPrimaryInfoRenderer");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (videoPrimaryInfoRenderer == null) {
|
||||||
|
throw new ParsingException("Could not find videoPrimaryInfoRenderer");
|
||||||
|
}
|
||||||
|
|
||||||
|
return videoPrimaryInfoRenderer;
|
||||||
|
}
|
||||||
|
|
||||||
@Nonnull
|
@Nonnull
|
||||||
private static String getVideoInfoUrl(final String id, final String sts) {
|
private static String getVideoInfoUrl(final String id, final String sts) {
|
||||||
return "https://www.youtube.com/get_video_info?" + "video_id=" + id +
|
return "https://www.youtube.com/get_video_info?" + "video_id=" + id +
|
||||||
|
Loading…
x
Reference in New Issue
Block a user