From 365b0329f3dfb0dba257d1399624b5e9a4a56822 Mon Sep 17 00:00:00 2001 From: wb9688 Date: Thu, 27 Feb 2020 17:39:23 +0100 Subject: [PATCH] Implement getTextFromObject() function --- .../extractors/YoutubeChannelExtractor.java | 25 +++- .../YoutubeChannelInfoItemExtractor.java | 17 +-- .../extractors/YoutubePlaylistExtractor.java | 28 +++- .../YoutubePlaylistInfoItemExtractor.java | 6 +- .../extractors/YoutubeSearchExtractor.java | 9 +- .../extractors/YoutubeStreamExtractor.java | 132 +++++------------- .../YoutubeStreamInfoItemExtractor.java | 64 +++------ .../extractors/YoutubeTrendingExtractor.java | 5 +- .../linkHandler/YoutubeParsingHelper.java | 68 +++++++++ .../youtube/YoutubePlaylistExtractorTest.java | 2 +- .../YoutubeStreamExtractorDefaultTest.java | 1 - 11 files changed, 185 insertions(+), 172 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java index 2d2fe917e..4b52bee0b 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java @@ -25,6 +25,7 @@ import java.util.Map; import javax.annotation.Nonnull; +import static org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper.getTextFromObject; import static org.schabi.newpipe.extractor.utils.Utils.HTTP; import static org.schabi.newpipe.extractor.utils.Utils.HTTPS; @@ -124,8 +125,20 @@ public class YoutubeChannelExtractor extends ChannelExtractor { @Override public String getAvatarUrl() throws ParsingException { try { - return initialData.getObject("header").getObject("c4TabbedHeaderRenderer").getObject("avatar") + String url = initialData.getObject("header").getObject("c4TabbedHeaderRenderer").getObject("avatar") .getArray("thumbnails").getObject(0).getString("url"); + + // the first characters of the avatar URLs are different for each channel and some are not even valid URLs + if (url.startsWith("//")) { + url = url.substring(2); + } + if (url.startsWith(HTTP)) { + url = Utils.replaceHttpWithHttps(url); + } else if (!url.startsWith(HTTPS)) { + url = HTTPS + url; + } + + return url; } catch (Exception e) { throw new ParsingException("Could not get avatar", e); } @@ -172,7 +185,7 @@ public class YoutubeChannelExtractor extends ChannelExtractor { final JsonObject subscriberInfo = initialData.getObject("header").getObject("c4TabbedHeaderRenderer").getObject("subscriberCountText"); if (subscriberInfo != null) { try { - return Utils.mixedNumberWordToLong(subscriberInfo.getArray("runs").getObject(0).getString("text")); + return Utils.mixedNumberWordToLong(getTextFromObject(subscriberInfo)); } catch (NumberFormatException e) { throw new ParsingException("Could not get subscriber count", e); } @@ -301,10 +314,10 @@ public class YoutubeChannelExtractor extends ChannelExtractor { } try { - if (videoTab.getObject("content").getObject("sectionListRenderer").getArray("contents") - .getObject(0).getObject("itemSectionRenderer").getArray("contents") - .getObject(0).getObject("messageRenderer").getObject("text").getArray("runs") - .getObject(0).getString("text").equals("This channel has no videos.")) + if (getTextFromObject(videoTab.getObject("content").getObject("sectionListRenderer") + .getArray("contents").getObject(0).getObject("itemSectionRenderer") + .getArray("contents").getObject(0).getObject("messageRenderer") + .getObject("text")).equals("This channel has no videos.")) return null; } catch (Exception ignored) {} diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelInfoItemExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelInfoItemExtractor.java index 518d8ea04..e82c4377d 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelInfoItemExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelInfoItemExtractor.java @@ -1,6 +1,5 @@ package org.schabi.newpipe.extractor.services.youtube.extractors; -import com.grack.nanojson.JsonArray; import com.grack.nanojson.JsonObject; import org.schabi.newpipe.extractor.channel.ChannelInfoItemExtractor; @@ -8,6 +7,7 @@ import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeChannelLinkHandlerFactory; import org.schabi.newpipe.extractor.utils.Utils; +import static org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper.getTextFromObject; import static org.schabi.newpipe.extractor.utils.Utils.HTTP; import static org.schabi.newpipe.extractor.utils.Utils.HTTPS; @@ -59,7 +59,7 @@ public class YoutubeChannelInfoItemExtractor implements ChannelInfoItemExtractor @Override public String getName() throws ParsingException { try { - return channelInfoItem.getObject("title").getString("simpleText"); + return getTextFromObject(channelInfoItem.getObject("title")); } catch (Exception e) { throw new ParsingException("Could not get name", e); } @@ -68,7 +68,7 @@ public class YoutubeChannelInfoItemExtractor implements ChannelInfoItemExtractor @Override public String getUrl() throws ParsingException { try { - String id = "channel/" + channelInfoItem.getString("channelId"); // Does prepending 'channel/' always work? + String id = "channel/" + channelInfoItem.getString("channelId"); return YoutubeChannelLinkHandlerFactory.getInstance().getUrl(id); } catch (Exception e) { throw new ParsingException("Could not get url", e); @@ -78,7 +78,7 @@ public class YoutubeChannelInfoItemExtractor implements ChannelInfoItemExtractor @Override public long getSubscriberCount() throws ParsingException { try { - String subscribers = channelInfoItem.getObject("subscriberCountText").getString("simpleText").split(" ")[0]; + String subscribers = getTextFromObject(channelInfoItem.getObject("subscriberCountText")); return Utils.mixedNumberWordToLong(subscribers); } catch (Exception e) { throw new ParsingException("Could not get subscriber count", e); @@ -88,8 +88,7 @@ public class YoutubeChannelInfoItemExtractor implements ChannelInfoItemExtractor @Override public long getStreamCount() throws ParsingException { try { - return Long.parseLong(Utils.removeNonDigitCharacters(channelInfoItem.getObject("videoCountText") - .getArray("runs").getObject(0).getString("text"))); + return Long.parseLong(Utils.removeNonDigitCharacters(getTextFromObject(channelInfoItem.getObject("videoCountText")))); } catch (Exception e) { throw new ParsingException("Could not get stream count", e); } @@ -98,11 +97,7 @@ public class YoutubeChannelInfoItemExtractor implements ChannelInfoItemExtractor @Override public String getDescription() throws ParsingException { try { - StringBuilder description = new StringBuilder(); - JsonArray descriptionArray = channelInfoItem.getObject("descriptionSnippet").getArray("runs"); - for (Object descriptionPart : descriptionArray) - description.append(((JsonObject) descriptionPart).getString("text")); - return description.toString(); + return getTextFromObject(channelInfoItem.getObject("descriptionSnippet")); } catch (Exception e) { throw new ParsingException("Could not get description", e); } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistExtractor.java index 240fa537f..e32b42ef1 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistExtractor.java @@ -25,6 +25,11 @@ import java.util.Map; import javax.annotation.Nonnull; +import static org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper.getTextFromObject; +import static org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper.getUrlFromNavigationEndpoint; +import static org.schabi.newpipe.extractor.utils.Utils.HTTP; +import static org.schabi.newpipe.extractor.utils.Utils.HTTPS; + @SuppressWarnings("WeakerAccess") public class YoutubePlaylistExtractor extends PlaylistExtractor { private JsonObject initialData; @@ -104,7 +109,7 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor { @Override public String getName() throws ParsingException { try { - String name = playlistInfo.getObject("title").getArray("runs").getObject(0).getString("text"); + String name = getTextFromObject(playlistInfo.getObject("title")); if (name != null) return name; } catch (Exception ignored) {} try { @@ -137,8 +142,7 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor { @Override public String getUploaderUrl() throws ParsingException { try { - return YoutubeChannelExtractor.CHANNEL_URL_BASE + - getUploaderInfo().getObject("navigationEndpoint").getObject("browseEndpoint").getString("browseId"); + return getUrlFromNavigationEndpoint(getUploaderInfo().getObject("navigationEndpoint")); } catch (Exception e) { throw new ParsingException("Could not get playlist uploader url", e); } @@ -147,7 +151,7 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor { @Override public String getUploaderName() throws ParsingException { try { - return getUploaderInfo().getObject("title").getArray("runs").getObject(0).getString("text"); + return getTextFromObject(getUploaderInfo().getObject("title")); } catch (Exception e) { throw new ParsingException("Could not get playlist uploader name", e); } @@ -156,7 +160,19 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor { @Override public String getUploaderAvatarUrl() throws ParsingException { try { - return getUploaderInfo().getObject("thumbnail").getArray("thumbnails").getObject(0).getString("url"); + String url = getUploaderInfo().getObject("thumbnail").getArray("thumbnails").getObject(0).getString("url"); + + // the first characters of the avatar URLs are different for each channel and some are not even valid URLs + if (url.startsWith("//")) { + url = url.substring(2); + } + if (url.startsWith(HTTP)) { + url = Utils.replaceHttpWithHttps(url); + } else if (!url.startsWith(HTTPS)) { + url = HTTPS + url; + } + + return url; } catch (Exception e) { throw new ParsingException("Could not get playlist uploader avatar", e); } @@ -165,7 +181,7 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor { @Override public long getStreamCount() throws ParsingException { try { - String viewsText = getPlaylistInfo().getArray("stats").getObject(0).getArray("runs").getObject(0).getString("text"); + String viewsText = getTextFromObject(getPlaylistInfo().getArray("stats").getObject(0)); return Long.parseLong(Utils.removeNonDigitCharacters(viewsText)); } catch (Exception e) { throw new ParsingException("Could not get video count from playlist", e); diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistInfoItemExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistInfoItemExtractor.java index 358fa2e69..03bf9b228 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistInfoItemExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistInfoItemExtractor.java @@ -7,6 +7,8 @@ import org.schabi.newpipe.extractor.playlist.PlaylistInfoItemExtractor; import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubePlaylistLinkHandlerFactory; import org.schabi.newpipe.extractor.utils.Utils; +import static org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper.getTextFromObject; + public class YoutubePlaylistInfoItemExtractor implements PlaylistInfoItemExtractor { private JsonObject playlistInfoItem; @@ -27,7 +29,7 @@ public class YoutubePlaylistInfoItemExtractor implements PlaylistInfoItemExtract @Override public String getName() throws ParsingException { try { - return playlistInfoItem.getObject("title").getString("simpleText"); + return getTextFromObject(playlistInfoItem.getObject("title")); } catch (Exception e) { throw new ParsingException("Could not get name", e); } @@ -46,7 +48,7 @@ public class YoutubePlaylistInfoItemExtractor implements PlaylistInfoItemExtract @Override public String getUploaderName() throws ParsingException { try { - return playlistInfoItem.getObject("longBylineText").getArray("runs").getObject(0).getString("text"); + return getTextFromObject(playlistInfoItem.getObject("longBylineText")); } catch (Exception e) { throw new ParsingException("Could not get uploader name", e); } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java index 7ec0e5cc2..bdc46d196 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java @@ -24,6 +24,8 @@ import java.util.Map; import javax.annotation.Nonnull; +import static org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper.getTextFromObject; + /* * Created by Christian Schabesberger on 22.07.2018 * @@ -91,8 +93,7 @@ public class YoutubeSearchExtractor extends SearchExtractor { if (showingResultsForRenderer == null) { return ""; } else { - return showingResultsForRenderer.getObject("correctedQuery").getArray("runs") - .getObject(0).getString("text"); + return getTextFromObject(showingResultsForRenderer.getObject("correctedQuery")); } } @@ -155,8 +156,8 @@ public class YoutubeSearchExtractor extends SearchExtractor { for (Object item : videos) { if (((JsonObject) item).getObject("backgroundPromoRenderer") != null) { - throw new NothingFoundException(((JsonObject) item).getObject("backgroundPromoRenderer") - .getObject("bodyText").getArray("runs").getObject(0).getString("text")); + throw new NothingFoundException(getTextFromObject(((JsonObject) item) + .getObject("backgroundPromoRenderer").getObject("bodyText"))); } else if (((JsonObject) item).getObject("videoRenderer") != null) { collector.commit(new YoutubeStreamInfoItemExtractor(((JsonObject) item).getObject("videoRenderer"), timeAgoParser)); } else if (((JsonObject) item).getObject("channelRenderer") != null) { diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java index d6a3c943e..e985d1b98 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java @@ -40,8 +40,6 @@ import org.schabi.newpipe.extractor.utils.Utils; import java.io.IOException; import java.io.UnsupportedEncodingException; -import java.net.URLDecoder; -import java.nio.charset.StandardCharsets; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Calendar; @@ -56,6 +54,10 @@ import java.util.Map; import javax.annotation.Nonnull; import javax.annotation.Nullable; +import static org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper.getTextFromObject; +import static org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper.getUrlFromNavigationEndpoint; +import static org.schabi.newpipe.extractor.utils.Utils.HTTP; + /* * Created by Christian Schabesberger on 06.08.15. * @@ -114,11 +116,7 @@ public class YoutubeStreamExtractor extends StreamExtractor { assertPageFetched(); String title = null; try { - StringBuilder titleBuilder = new StringBuilder(); - JsonArray titleArray = getVideoPrimaryInfoRenderer().getObject("title").getArray("runs"); - for (Object titlePart : titleArray) - titleBuilder.append(((JsonObject) titlePart).getString("text")); - title = titleBuilder.toString(); + title = getTextFromObject(getVideoPrimaryInfoRenderer().getObject("title")); } catch (Exception ignored) {} if (title == null) { try { @@ -146,8 +144,8 @@ public class YoutubeStreamExtractor extends StreamExtractor { } catch (Exception ignored) {} try { - if (getVideoPrimaryInfoRenderer().getObject("dateText").getString("simpleText").startsWith("Premiered")) { - String time = getVideoPrimaryInfoRenderer().getObject("dateText").getString("simpleText").substring(10); + if (getTextFromObject(getVideoPrimaryInfoRenderer().getObject("dateText")).startsWith("Premiered")) { + String time = getTextFromObject(getVideoPrimaryInfoRenderer().getObject("dateText")).substring(10); try { // Premiered 20 hours ago TimeAgoParser timeAgoParser = TimeAgoPatternsManager.getTimeAgoParserFor(Localization.fromLocalizationCode("en")); @@ -165,7 +163,7 @@ public class YoutubeStreamExtractor extends StreamExtractor { try { // TODO this parses English formatted dates only, we need a better approach to parse the textual date Date d = new SimpleDateFormat("dd MMM yyyy", Locale.ENGLISH).parse( - getVideoPrimaryInfoRenderer().getObject("dateText").getString("simpleText")); + getTextFromObject(getVideoPrimaryInfoRenderer().getObject("dateText"))); return new SimpleDateFormat("yyyy-MM-dd").format(d); } catch (Exception ignored) {} throw new ParsingException("Could not get upload date"); @@ -203,73 +201,8 @@ public class YoutubeStreamExtractor extends StreamExtractor { assertPageFetched(); // description with more info on links try { - boolean htmlConversionRequired = false; - JsonArray descriptions = getVideoSecondaryInfoRenderer().getObject("description").getArray("runs"); - StringBuilder descriptionBuilder = new StringBuilder(descriptions.size()); - for (Object textObjectHolder : descriptions) { - JsonObject textHolder = (JsonObject) textObjectHolder; - String text = textHolder.getString("text"); - if (textHolder.getObject("navigationEndpoint") != null) { - // The text is a link. Get the URL it points to and generate a HTML link of it - if (textHolder.getObject("navigationEndpoint").getObject("urlEndpoint") != null) { - String internUrl = textHolder.getObject("navigationEndpoint").getObject("urlEndpoint").getString("url"); - if (internUrl.startsWith("/redirect?")) { - // q parameter can be the first parameter - internUrl = internUrl.substring(10); - String[] params = internUrl.split("&"); - for (String param : params) { - if (param.split("=")[0].equals("q")) { - String url = URLDecoder.decode(param.split("=")[1], StandardCharsets.UTF_8.name()); - if (url != null && !url.isEmpty()) { - descriptionBuilder.append("").append(text).append(""); - htmlConversionRequired = true; - } else { - descriptionBuilder.append(text); - } - break; - } - } - } else if (internUrl.startsWith("http")) { - descriptionBuilder.append("").append(text).append(""); - htmlConversionRequired = true; - } - } else if (textHolder.getObject("navigationEndpoint").getObject("browseEndpoint") != null) { - descriptionBuilder.append("").append(text).append(""); - htmlConversionRequired = true; - } else if (textHolder.getObject("navigationEndpoint").getObject("watchEndpoint") != null) { - descriptionBuilder.append("").append(text).append(""); - htmlConversionRequired = true; - } - continue; - } - if (text != null) { - descriptionBuilder.append(text); - } - } - - String description = descriptionBuilder.toString(); - - if (!description.isEmpty()) { - if (htmlConversionRequired) { - description = description.replaceAll("\\n", "
"); - description = description.replaceAll(" ", "  "); - return new Description(description, Description.HTML); - } - return new Description(description, Description.PLAIN_TEXT); - } + String description = getTextFromObject(getVideoSecondaryInfoRenderer().getObject("description"), true); + return new Description(description, Description.HTML); } catch (Exception ignored) { } // raw non-html description @@ -329,16 +262,9 @@ public class YoutubeStreamExtractor extends StreamExtractor { assertPageFetched(); String views = null; try { - views = getVideoPrimaryInfoRenderer().getObject("viewCount") - .getObject("videoViewCountRenderer").getObject("viewCount") - .getArray("runs").getObject(0).getString("text"); + views = getTextFromObject(getVideoPrimaryInfoRenderer().getObject("viewCount") + .getObject("videoViewCountRenderer").getObject("viewCount")); } catch (Exception ignored) {} - if (views == null) { - try { - views = getVideoPrimaryInfoRenderer().getObject("viewCount") - .getObject("videoViewCountRenderer").getObject("viewCount").getString("simpleText"); - } catch (Exception ignored) {} - } if (views == null) { try { views = playerResponse.getObject("videoDetails").getString("viewCount"); @@ -398,17 +324,15 @@ public class YoutubeStreamExtractor extends StreamExtractor { @Override public String getUploaderUrl() throws ParsingException { assertPageFetched(); - String uploaderId = null; try { - uploaderId = getVideoSecondaryInfoRenderer().getObject("owner").getObject("videoOwnerRenderer") - .getObject("navigationEndpoint").getObject("browseEndpoint").getString("browseId"); + String uploaderUrl = getUrlFromNavigationEndpoint(getVideoSecondaryInfoRenderer() + .getObject("owner").getObject("videoOwnerRenderer").getObject("navigationEndpoint")); + if (uploaderUrl != null) return uploaderUrl; + } catch (Exception ignored) {} + try { + String uploaderId = playerResponse.getObject("videoDetails").getString("channelId"); + if (uploaderId != null) return "https://www.youtube.com/channel/" + uploaderId; } catch (Exception ignored) {} - if (uploaderId == null) { - try { - uploaderId = playerResponse.getObject("videoDetails").getString("channelId"); - } catch (Exception ignored) {} - } - if (uploaderId != null) return "https://www.youtube.com/channel/" + uploaderId; throw new ParsingException("Could not get uploader url"); } @@ -418,8 +342,8 @@ public class YoutubeStreamExtractor extends StreamExtractor { assertPageFetched(); String uploaderName = null; try { - uploaderName = getVideoSecondaryInfoRenderer().getObject("owner").getObject("videoOwnerRenderer") - .getObject("title").getArray("runs").getObject(0).getString("text"); + uploaderName = getTextFromObject(getVideoSecondaryInfoRenderer().getObject("owner") + .getObject("videoOwnerRenderer").getObject("title")); } catch (Exception ignored) {} if (uploaderName == null) { try { @@ -435,8 +359,20 @@ public class YoutubeStreamExtractor extends StreamExtractor { public String getUploaderAvatarUrl() throws ParsingException { assertPageFetched(); try { - return getVideoSecondaryInfoRenderer().getObject("owner").getObject("videoOwnerRenderer") + String url = getVideoSecondaryInfoRenderer().getObject("owner").getObject("videoOwnerRenderer") .getObject("thumbnail").getArray("thumbnails").getObject(0).getString("url"); + + // the first characters of the avatar URLs are different for each channel and some are not even valid URLs + if (url.startsWith("//")) { + url = url.substring(2); + } + if (url.startsWith(HTTP)) { + url = Utils.replaceHttpWithHttps(url); + } else if (!url.startsWith(HTTPS)) { + url = HTTPS + url; + } + + return url; } catch (Exception e) { throw new ParsingException("Could not get uploader avatar url", e); } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamInfoItemExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamInfoItemExtractor.java index 4e40034b1..d3cee8b01 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamInfoItemExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamInfoItemExtractor.java @@ -6,7 +6,6 @@ import com.grack.nanojson.JsonObject; import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.localization.DateWrapper; import org.schabi.newpipe.extractor.localization.TimeAgoParser; -import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeChannelLinkHandlerFactory; import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper; import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeStreamLinkHandlerFactory; import org.schabi.newpipe.extractor.stream.StreamInfoItemExtractor; @@ -15,6 +14,9 @@ import org.schabi.newpipe.extractor.utils.Utils; import javax.annotation.Nullable; +import static org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper.getTextFromObject; +import static org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper.getUrlFromNavigationEndpoint; + /* * Copyright (C) Christian Schabesberger 2016 * YoutubeStreamInfoItemExtractor.java is part of NewPipe. @@ -76,15 +78,7 @@ public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor { @Override public String getName() throws ParsingException { - String name = null; - try { - name = videoInfo.getObject("title").getString("simpleText"); - } catch (Exception ignored) {} - if (name == null) { - try { - name = videoInfo.getObject("title").getArray("runs").getObject(0).getString("text"); - } catch (Exception ignored) {} - } + String name = getTextFromObject(videoInfo.getObject("title")); if (name != null && !name.isEmpty()) return name; throw new ParsingException("Could not get name"); } @@ -94,14 +88,14 @@ public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor { if (getStreamType() == StreamType.LIVE_STREAM) return -1; String duration = null; try { - duration = videoInfo.getObject("lengthText").getString("simpleText"); + duration = getTextFromObject(videoInfo.getObject("lengthText")); } catch (Exception ignored) {} if (duration == null) { try { for (Object thumbnailOverlay : videoInfo.getArray("thumbnailOverlays")) { if (((JsonObject) thumbnailOverlay).getObject("thumbnailOverlayTimeStatusRenderer") != null) { - duration = ((JsonObject) thumbnailOverlay).getObject("thumbnailOverlayTimeStatusRenderer") - .getObject("text").getString("simpleText"); + duration = getTextFromObject(((JsonObject) thumbnailOverlay) + .getObject("thumbnailOverlayTimeStatusRenderer").getObject("text")); } } } catch (Exception ignored) {} @@ -114,19 +108,16 @@ public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor { public String getUploaderName() throws ParsingException { String name = null; try { - name = videoInfo.getObject("longBylineText").getArray("runs") - .getObject(0).getString("text"); + name = getTextFromObject(videoInfo.getObject("longBylineText")); } catch (Exception ignored) {} if (name == null) { try { - name = videoInfo.getObject("ownerText").getArray("runs") - .getObject(0).getString("text"); + name = getTextFromObject(videoInfo.getObject("ownerText")); } catch (Exception ignored) {} } if (name == null) { try { - name = videoInfo.getObject("shortBylineText").getArray("runs") - .getObject(0).getString("text"); + name = getTextFromObject(videoInfo.getObject("shortBylineText")); } catch (Exception ignored) {} } if (name != null && !name.isEmpty()) return name; @@ -136,30 +127,27 @@ public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor { @Override public String getUploaderUrl() throws ParsingException { try { - String id = null; + String url = null; try { - id = videoInfo.getObject("longBylineText").getArray("runs") - .getObject(0).getObject("navigationEndpoint") - .getObject("browseEndpoint").getString("browseId"); + url = getUrlFromNavigationEndpoint(videoInfo.getObject("longBylineText") + .getArray("runs").getObject(0).getObject("navigationEndpoint")); } catch (Exception ignored) {} - if (id == null) { + if (url == null) { try { - id = videoInfo.getObject("ownerText").getArray("runs") - .getObject(0).getObject("navigationEndpoint") - .getObject("browseEndpoint").getString("browseId"); + url = getUrlFromNavigationEndpoint(videoInfo.getObject("ownerText") + .getArray("runs").getObject(0).getObject("navigationEndpoint")); } catch (Exception ignored) {} } - if (id == null) { + if (url == null) { try { - id = videoInfo.getObject("shortBylineText").getArray("runs") - .getObject(0).getObject("navigationEndpoint") - .getObject("browseEndpoint").getString("browseId"); + url = getUrlFromNavigationEndpoint(videoInfo.getObject("shortBylineText") + .getArray("runs").getObject(0).getObject("navigationEndpoint")); } catch (Exception ignored) {} } - if (id == null || id.isEmpty()) { + if (url == null || url.isEmpty()) { throw new IllegalArgumentException("is empty"); } - return YoutubeChannelLinkHandlerFactory.getInstance().getUrl("channel/" + id); + return url; } catch (Exception e) { throw new ParsingException("Could not get uploader url"); } @@ -169,7 +157,7 @@ public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor { @Override public String getTextualUploadDate() { try { - return videoInfo.getObject("publishedTimeText").getString("simpleText"); + return getTextFromObject(videoInfo.getObject("publishedTimeText")); } catch (Exception e) { // upload date is not always available, e.g. in playlists return null; @@ -196,13 +184,7 @@ public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor { if (videoInfo.getObject("topStandaloneBadge") != null || isPremium()) { return -1; } - String viewCount; - if (getStreamType() == StreamType.LIVE_STREAM) { - viewCount = videoInfo.getObject("viewCountText") - .getArray("runs").getObject(0).getString("text"); - } else { - viewCount = videoInfo.getObject("viewCountText").getString("simpleText"); - } + String viewCount = getTextFromObject(videoInfo.getObject("viewCountText")); if (viewCount.equals("Recommended for you")) return -1; return Long.parseLong(Utils.removeNonDigitCharacters(viewCount)); } catch (Exception e) { diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeTrendingExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeTrendingExtractor.java index af1a91b38..25aaa4dbb 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeTrendingExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeTrendingExtractor.java @@ -44,6 +44,8 @@ import java.util.Map; import javax.annotation.Nonnull; +import static org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper.getTextFromObject; + public class YoutubeTrendingExtractor extends KioskExtractor { private JsonObject initialData; @@ -93,8 +95,7 @@ public class YoutubeTrendingExtractor extends KioskExtractor { public String getName() throws ParsingException { String name; try { - name = initialData.getObject("header").getObject("feedTabbedHeaderRenderer").getObject("title") - .getArray("runs").getObject(0).getString("text"); + name = getTextFromObject(initialData.getObject("header").getObject("feedTabbedHeaderRenderer").getObject("title")); } catch (Exception e) { throw new ParsingException("Could not get Trending name", e); } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeParsingHelper.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeParsingHelper.java index 24edef600..63e204246 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeParsingHelper.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeParsingHelper.java @@ -13,7 +13,10 @@ import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.exceptions.ReCaptchaException; import org.schabi.newpipe.extractor.utils.Parser; +import java.io.UnsupportedEncodingException; import java.net.URL; +import java.net.URLDecoder; +import java.nio.charset.StandardCharsets; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.Calendar; @@ -251,4 +254,69 @@ public class YoutubeParsingHelper { throw new ParsingException("Could not get client version"); } + + public static String getUrlFromNavigationEndpoint(JsonObject navigationEndpoint) { + if (navigationEndpoint.getObject("urlEndpoint") != null) { + String internUrl = navigationEndpoint.getObject("urlEndpoint").getString("url"); + if (internUrl.startsWith("/redirect?")) { + // q parameter can be the first parameter + internUrl = internUrl.substring(10); + String[] params = internUrl.split("&"); + for (String param : params) { + if (param.split("=")[0].equals("q")) { + String url; + try { + url = URLDecoder.decode(param.split("=")[1], StandardCharsets.UTF_8.name()); + } catch (UnsupportedEncodingException e) { + return null; + } + return url; + } + } + } else if (internUrl.startsWith("http")) { + return internUrl; + } + } else if (navigationEndpoint.getObject("browseEndpoint") != null) { + return "https://www.youtube.com" + navigationEndpoint.getObject("browseEndpoint").getString("canonicalBaseUrl"); + } else if (navigationEndpoint.getObject("watchEndpoint") != null) { + StringBuilder url = new StringBuilder(); + url.append("https://www.youtube.com/watch?v=").append(navigationEndpoint.getObject("watchEndpoint").getString("videoId")); + if (navigationEndpoint.getObject("watchEndpoint").has("playlistId")) + url.append("&list=").append(navigationEndpoint.getObject("watchEndpoint").getString("playlistId")); + if (navigationEndpoint.getObject("watchEndpoint").has("startTimeSeconds")) + url.append("&t=").append(navigationEndpoint.getObject("watchEndpoint").getInt("startTimeSeconds")); + return url.toString(); + } + return null; + } + + public static String getTextFromObject(JsonObject textObject, boolean html) { + if (textObject.has("simpleText")) return textObject.getString("simpleText"); + + StringBuilder textBuilder = new StringBuilder(); + for (Object textPart : textObject.getArray("runs")) { + String text = ((JsonObject) textPart).getString("text"); + if (html && ((JsonObject) textPart).getObject("navigationEndpoint") != null) { + String url = getUrlFromNavigationEndpoint(((JsonObject) textPart).getObject("navigationEndpoint")); + if (url != null && !url.isEmpty()) { + textBuilder.append("").append(text).append(""); + continue; + } + } + textBuilder.append(text); + } + + String text = textBuilder.toString(); + + if (html) { + text = text.replaceAll("\\n", "
"); + text = text.replaceAll(" ", "  "); + } + + return text; + } + + public static String getTextFromObject(JsonObject textObject) { + return getTextFromObject(textObject, false); + } } diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubePlaylistExtractorTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubePlaylistExtractorTest.java index d4de9175e..7c9112798 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubePlaylistExtractorTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubePlaylistExtractorTest.java @@ -99,7 +99,7 @@ public class YoutubePlaylistExtractorTest { @Test public void testUploaderUrl() throws Exception { - assertEquals("https://www.youtube.com/channel/UCs72iRpTEuwV3y6pdWYLgiw", extractor.getUploaderUrl()); + assertEquals("https://www.youtube.com/user/andre0y0you", extractor.getUploaderUrl()); } @Test diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/stream/YoutubeStreamExtractorDefaultTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/stream/YoutubeStreamExtractorDefaultTest.java index cb2b768df..d6cf3815f 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/stream/YoutubeStreamExtractorDefaultTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/stream/YoutubeStreamExtractorDefaultTest.java @@ -245,7 +245,6 @@ public class YoutubeStreamExtractorDefaultTest { @Test public void testGetDescription() throws ParsingException { - System.out.println(extractor.getDescription().getContent()); assertNotNull(extractor.getDescription()); assertFalse(extractor.getDescription().getContent().isEmpty()); }