mirror of
https://github.com/TeamNewPipe/NewPipeExtractor.git
synced 2025-01-07 10:00:34 +05:30
Use more often playerResponse in yt stream extractor
This enhances performance and should make the extractor more reliable since it get info from a stable json structure that shouldn't be subject to many changes. Fallback html methods have been kept. In case of error the thrown exception contains the data about the playerResponse failure, that should be clearer than a NPE caused by not-found html tags.
This commit is contained in:
parent
d83787a5ca
commit
1ed89aad3e
@ -106,20 +106,22 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
|||||||
@Override
|
@Override
|
||||||
public String getName() throws ParsingException {
|
public String getName() throws ParsingException {
|
||||||
assertPageFetched();
|
assertPageFetched();
|
||||||
String name = getStringFromMetaData("title");
|
try {
|
||||||
if(name == null) {
|
return playerResponse.getObject("videoDetails").getString("title");
|
||||||
// Fallback to HTML method
|
|
||||||
|
} catch (Exception e) {
|
||||||
|
// fallback HTML method
|
||||||
|
String name = null;
|
||||||
try {
|
try {
|
||||||
name = doc.select("meta[name=title]").attr(CONTENT);
|
name = doc.select("meta[name=title]").attr(CONTENT);
|
||||||
} catch (Exception e) {
|
} catch (Exception ignored) {}
|
||||||
throw new ParsingException("Could not get the title", e);
|
|
||||||
}
|
if (name == null) {
|
||||||
}
|
throw new ParsingException("Could not get name", e);
|
||||||
if(name == null || name.isEmpty()) {
|
|
||||||
throw new ParsingException("Could not get the title");
|
|
||||||
}
|
}
|
||||||
return name;
|
return name;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getTextualUploadDate() throws ParsingException {
|
public String getTextualUploadDate() throws ParsingException {
|
||||||
@ -128,10 +130,18 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
|||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
return doc.select("meta[itemprop=datePublished]").attr(CONTENT);
|
return playerResponse.getObject("microformat").getObject("playerMicroformatRenderer").getString("publishDate");
|
||||||
} catch (Exception e) {//todo: add fallback method
|
} catch (Exception e) {
|
||||||
|
String uploadDate = null;
|
||||||
|
try {
|
||||||
|
uploadDate = doc.select("meta[itemprop=datePublished]").attr(CONTENT);
|
||||||
|
} catch (Exception ignored) {}
|
||||||
|
|
||||||
|
if (uploadDate == null) {
|
||||||
throw new ParsingException("Could not get upload date", e);
|
throw new ParsingException("Could not get upload date", e);
|
||||||
}
|
}
|
||||||
|
return uploadDate;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -149,24 +159,23 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
|||||||
@Override
|
@Override
|
||||||
public String getThumbnailUrl() throws ParsingException {
|
public String getThumbnailUrl() throws ParsingException {
|
||||||
assertPageFetched();
|
assertPageFetched();
|
||||||
// Try to get high resolution thumbnail first, if it fails, use low res from the player instead
|
|
||||||
try {
|
try {
|
||||||
return doc.select("link[itemprop=\"thumbnailUrl\"]").first().attr("abs:href");
|
JsonArray thumbnails = playerResponse.getObject("videoDetails").getObject("thumbnail").getArray("thumbnails");
|
||||||
} catch (Exception ignored) {
|
// the last thumbnail is the one with the highest resolution
|
||||||
// Try other method...
|
return thumbnails.getObject(thumbnails.size()-1).getString("url");
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
if (playerArgs != null && playerArgs.isString("thumbnail_url")) return playerArgs.getString("thumbnail_url");
|
|
||||||
} catch (Exception ignored) {
|
|
||||||
// Try other method...
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
return videoInfoPage.get("thumbnail_url");
|
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
|
String url = null;
|
||||||
|
try {
|
||||||
|
url = doc.select("link[itemprop=\"thumbnailUrl\"]").first().attr("abs:href");
|
||||||
|
} catch (Exception ignored) {}
|
||||||
|
|
||||||
|
if (url == null) {
|
||||||
throw new ParsingException("Could not get thumbnail url", e);
|
throw new ParsingException("Could not get thumbnail url", e);
|
||||||
}
|
}
|
||||||
|
return url;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Nonnull
|
@Nonnull
|
||||||
@ -174,11 +183,17 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
|||||||
public String getDescription() throws ParsingException {
|
public String getDescription() throws ParsingException {
|
||||||
assertPageFetched();
|
assertPageFetched();
|
||||||
try {
|
try {
|
||||||
|
// first try to get html-formatted description
|
||||||
return parseHtmlAndGetFullLinks(doc.select("p[id=\"eow-description\"]").first().html());
|
return parseHtmlAndGetFullLinks(doc.select("p[id=\"eow-description\"]").first().html());
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
|
try {
|
||||||
|
// fallback to raw non-html description
|
||||||
|
return playerResponse.getObject("videoDetails").getString("shortDescription");
|
||||||
|
} catch (Exception ignored) {
|
||||||
throw new ParsingException("Could not get the description", e);
|
throw new ParsingException("Could not get the description", e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// onclick="yt.www.watch.player.seekTo(0*3600+00*60+00);return false;"
|
// onclick="yt.www.watch.player.seekTo(0*3600+00*60+00);return false;"
|
||||||
// :00 is NOT recognized as a timestamp in description or comments.
|
// :00 is NOT recognized as a timestamp in description or comments.
|
||||||
@ -269,25 +284,22 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
|||||||
public long getLength() throws ParsingException {
|
public long getLength() throws ParsingException {
|
||||||
assertPageFetched();
|
assertPageFetched();
|
||||||
|
|
||||||
// try getting duration from playerargs
|
|
||||||
try {
|
|
||||||
String durationMs = playerResponse
|
|
||||||
.getObject("streamingData")
|
|
||||||
.getArray("formats")
|
|
||||||
.getObject(0)
|
|
||||||
.getString("approxDurationMs");
|
|
||||||
return Long.parseLong(durationMs)/1000;
|
|
||||||
} catch (Exception e) {
|
|
||||||
}
|
|
||||||
|
|
||||||
//try getting value from age gated video
|
|
||||||
try {
|
try {
|
||||||
String duration = playerResponse
|
String duration = playerResponse
|
||||||
.getObject("videoDetails")
|
.getObject("videoDetails")
|
||||||
.getString("lengthSeconds");
|
.getString("lengthSeconds");
|
||||||
return Long.parseLong(duration);
|
return Long.parseLong(duration);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
throw new ParsingException("Every methode to get the duration has failed: ", e);
|
try {
|
||||||
|
String durationMs = playerResponse
|
||||||
|
.getObject("streamingData")
|
||||||
|
.getArray("formats")
|
||||||
|
.getObject(0)
|
||||||
|
.getString("approxDurationMs");
|
||||||
|
return Math.round(Long.parseLong(durationMs)/1000.0f);
|
||||||
|
} catch (Exception ignored) {
|
||||||
|
throw new ParsingException("Could not get duration", e);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -307,11 +319,15 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
|||||||
try {
|
try {
|
||||||
if (getStreamType().equals(StreamType.LIVE_STREAM)) {
|
if (getStreamType().equals(StreamType.LIVE_STREAM)) {
|
||||||
return getLiveStreamWatchingCount();
|
return getLiveStreamWatchingCount();
|
||||||
|
} else {
|
||||||
|
return Long.parseLong(playerResponse.getObject("videoDetails").getString("viewCount"));
|
||||||
}
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
try {
|
||||||
return Long.parseLong(doc.select("meta[itemprop=interactionCount]").attr(CONTENT));
|
return Long.parseLong(doc.select("meta[itemprop=interactionCount]").attr(CONTENT));
|
||||||
} catch (Exception e) {//todo: find fallback method
|
} catch (Exception ignored) {
|
||||||
throw new ParsingException("Could not get number of views", e);
|
throw new ParsingException("Could not get view count", e);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -373,7 +389,10 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
|||||||
try {
|
try {
|
||||||
likesString = button.select("span.yt-uix-button-content").first().text();
|
likesString = button.select("span.yt-uix-button-content").first().text();
|
||||||
} catch (NullPointerException e) {
|
} catch (NullPointerException e) {
|
||||||
//if this kicks in our button has no content and therefore likes/dislikes are disabled
|
//if this kicks in our button has no content and therefore ratings must be disabled
|
||||||
|
if (playerResponse.getObject("videoDetails").getBoolean("allowRatings")) {
|
||||||
|
throw new ParsingException("Ratings are enabled even though the like button is missing", e);
|
||||||
|
}
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
return Integer.parseInt(Utils.removeNonDigitCharacters(likesString));
|
return Integer.parseInt(Utils.removeNonDigitCharacters(likesString));
|
||||||
@ -393,7 +412,10 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
|||||||
try {
|
try {
|
||||||
dislikesString = button.select("span.yt-uix-button-content").first().text();
|
dislikesString = button.select("span.yt-uix-button-content").first().text();
|
||||||
} catch (NullPointerException e) {
|
} catch (NullPointerException e) {
|
||||||
//if this kicks in our button has no content and therefore likes/dislikes are disabled
|
//if this kicks in our button has no content and therefore ratings must be disabled
|
||||||
|
if (playerResponse.getObject("videoDetails").getBoolean("allowRatings")) {
|
||||||
|
throw new ParsingException("Ratings are enabled even though the dislike button is missing", e);
|
||||||
|
}
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
return Integer.parseInt(Utils.removeNonDigitCharacters(dislikesString));
|
return Integer.parseInt(Utils.removeNonDigitCharacters(dislikesString));
|
||||||
@ -409,60 +431,59 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
|||||||
public String getUploaderUrl() throws ParsingException {
|
public String getUploaderUrl() throws ParsingException {
|
||||||
assertPageFetched();
|
assertPageFetched();
|
||||||
try {
|
try {
|
||||||
return doc.select("div[class=\"yt-user-info\"]").first().children()
|
return "https://www.youtube.com/channel/" +
|
||||||
.select("a").first().attr("abs:href");
|
playerResponse.getObject("videoDetails").getString("channelId");
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
|
String uploaderUrl = null;
|
||||||
|
try {
|
||||||
|
uploaderUrl = doc.select("div[class=\"yt-user-info\"]").first().children()
|
||||||
|
.select("a").first().attr("abs:href");
|
||||||
|
} catch (Exception ignored) {}
|
||||||
|
|
||||||
|
if (uploaderUrl == null) {
|
||||||
throw new ParsingException("Could not get channel link", e);
|
throw new ParsingException("Could not get channel link", e);
|
||||||
}
|
}
|
||||||
|
return uploaderUrl;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Nullable
|
|
||||||
private String getStringFromMetaData(String field) {
|
|
||||||
assertPageFetched();
|
|
||||||
String value = null;
|
|
||||||
if(playerArgs != null) {
|
|
||||||
// This can not fail
|
|
||||||
value = playerArgs.getString(field);
|
|
||||||
}
|
|
||||||
if(value == null) {
|
|
||||||
// This can not fail too
|
|
||||||
value = videoInfoPage.get(field);
|
|
||||||
}
|
|
||||||
return value;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Nonnull
|
@Nonnull
|
||||||
@Override
|
@Override
|
||||||
public String getUploaderName() throws ParsingException {
|
public String getUploaderName() throws ParsingException {
|
||||||
assertPageFetched();
|
assertPageFetched();
|
||||||
String name = getStringFromMetaData("author");
|
|
||||||
|
|
||||||
if(name == null) {
|
|
||||||
try {
|
try {
|
||||||
// Fallback to HTML method
|
return playerResponse.getObject("videoDetails").getString("author");
|
||||||
name = doc.select("div.yt-user-info").first().text();
|
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
throw new ParsingException("Could not get uploader name", e);
|
String name = null;
|
||||||
}
|
try {
|
||||||
}
|
name = doc.select("div.yt-user-info").first().text();
|
||||||
if(name == null || name.isEmpty()) {
|
} catch (Exception ignored) {}
|
||||||
|
|
||||||
|
if (name == null) {
|
||||||
throw new ParsingException("Could not get uploader name");
|
throw new ParsingException("Could not get uploader name");
|
||||||
}
|
}
|
||||||
return name;
|
return name;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Nonnull
|
@Nonnull
|
||||||
@Override
|
@Override
|
||||||
public String getUploaderAvatarUrl() throws ParsingException {
|
public String getUploaderAvatarUrl() throws ParsingException {
|
||||||
assertPageFetched();
|
assertPageFetched();
|
||||||
|
|
||||||
|
String uploaderAvatarUrl = null;
|
||||||
try {
|
try {
|
||||||
return doc.select("a[class*=\"yt-user-photo\"]").first()
|
uploaderAvatarUrl = doc.select("a[class*=\"yt-user-photo\"]").first()
|
||||||
.select("img").first()
|
.select("img").first()
|
||||||
.attr("abs:data-thumb");
|
.attr("abs:data-thumb");
|
||||||
} catch (Exception e) {//todo: add fallback method
|
} catch (Exception e) {//todo: add fallback method
|
||||||
throw new ParsingException("Could not get uploader thumbnail URL.", e);
|
throw new ParsingException("Could not get uploader avatar url", e);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (uploaderAvatarUrl == null) {
|
||||||
|
throw new ParsingException("Could not get uploader avatar url");
|
||||||
|
}
|
||||||
|
return uploaderAvatarUrl;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Nonnull
|
@Nonnull
|
||||||
@ -590,12 +611,12 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
|||||||
public StreamType getStreamType() throws ParsingException {
|
public StreamType getStreamType() throws ParsingException {
|
||||||
assertPageFetched();
|
assertPageFetched();
|
||||||
try {
|
try {
|
||||||
if (playerArgs != null && (playerArgs.has("ps") && playerArgs.get("ps").toString().equals("live") ||
|
if (!playerResponse.getObject("streamingData").has(FORMATS) ||
|
||||||
(!playerResponse.getObject("streamingData").has(FORMATS)))) {
|
(playerArgs != null && playerArgs.has("ps") && playerArgs.get("ps").toString().equals("live"))) {
|
||||||
return StreamType.LIVE_STREAM;
|
return StreamType.LIVE_STREAM;
|
||||||
}
|
}
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
throw new ParsingException("Could not get hls manifest url", e);
|
throw new ParsingException("Could not get stream type", e);
|
||||||
}
|
}
|
||||||
return StreamType.VIDEO_STREAM;
|
return StreamType.VIDEO_STREAM;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user