From 5b59a1a8c5c1de9ef3ac01aae9fa24c31db7921d Mon Sep 17 00:00:00 2001 From: Stypox Date: Thu, 7 Dec 2023 20:57:44 +0100 Subject: [PATCH] [YouTube] Move meta info extraction to separate file YoutubeParsingHelper was longer than 2000 lines which caused checkstyle issues --- .../youtube/YoutubeMetaInfoHelper.java | 200 ++++++++++++++++++ .../youtube/YoutubeParsingHelper.java | 176 +-------------- .../extractors/YoutubeSearchExtractor.java | 4 +- .../extractors/YoutubeStreamExtractor.java | 3 +- 4 files changed, 205 insertions(+), 178 deletions(-) create mode 100644 extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeMetaInfoHelper.java diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeMetaInfoHelper.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeMetaInfoHelper.java new file mode 100644 index 000000000..c8410d057 --- /dev/null +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeMetaInfoHelper.java @@ -0,0 +1,200 @@ +package org.schabi.newpipe.extractor.services.youtube; + +import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.extractCachedUrlIfNeeded; +import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject; +import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObjectOrThrow; +import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getUrlFromNavigationEndpoint; +import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.isGoogleURL; +import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty; +import static org.schabi.newpipe.extractor.utils.Utils.replaceHttpWithHttps; + +import com.grack.nanojson.JsonArray; +import com.grack.nanojson.JsonObject; + +import org.schabi.newpipe.extractor.MetaInfo; +import org.schabi.newpipe.extractor.exceptions.ParsingException; +import org.schabi.newpipe.extractor.stream.Description; + +import java.net.MalformedURLException; +import java.net.URL; +import java.util.ArrayList; +import java.util.List; +import java.util.Objects; +import java.util.function.Consumer; +import java.util.stream.Collectors; + +import javax.annotation.Nonnull; + +public final class YoutubeMetaInfoHelper { + + private YoutubeMetaInfoHelper() { + } + + + @Nonnull + public static List getMetaInfo(@Nonnull final JsonArray contents) + throws ParsingException { + final List metaInfo = new ArrayList<>(); + for (final Object content : contents) { + final JsonObject resultObject = (JsonObject) content; + if (resultObject.has("itemSectionRenderer")) { + for (final Object sectionContentObject + : resultObject.getObject("itemSectionRenderer").getArray("contents")) { + + final JsonObject sectionContent = (JsonObject) sectionContentObject; + if (sectionContent.has("infoPanelContentRenderer")) { + metaInfo.add(getInfoPanelContent(sectionContent + .getObject("infoPanelContentRenderer"))); + } + if (sectionContent.has("clarificationRenderer")) { + metaInfo.add(getClarificationRenderer(sectionContent + .getObject("clarificationRenderer") + )); + } + if (sectionContent.has("emergencyOneboxRenderer")) { + getEmergencyOneboxRenderer( + sectionContent.getObject("emergencyOneboxRenderer"), + metaInfo::add + ); + } + } + } + } + return metaInfo; + } + + @Nonnull + private static MetaInfo getInfoPanelContent(@Nonnull final JsonObject infoPanelContentRenderer) + throws ParsingException { + final MetaInfo metaInfo = new MetaInfo(); + final StringBuilder sb = new StringBuilder(); + for (final Object paragraph : infoPanelContentRenderer.getArray("paragraphs")) { + if (sb.length() != 0) { + sb.append("
"); + } + sb.append(getTextFromObject((JsonObject) paragraph)); + } + metaInfo.setContent(new Description(sb.toString(), Description.HTML)); + if (infoPanelContentRenderer.has("sourceEndpoint")) { + final String metaInfoLinkUrl = getUrlFromNavigationEndpoint( + infoPanelContentRenderer.getObject("sourceEndpoint")); + try { + metaInfo.addUrl(new URL(Objects.requireNonNull(extractCachedUrlIfNeeded( + metaInfoLinkUrl)))); + } catch (final NullPointerException | MalformedURLException e) { + throw new ParsingException("Could not get metadata info URL", e); + } + + final String metaInfoLinkText = getTextFromObject( + infoPanelContentRenderer.getObject("inlineSource")); + if (isNullOrEmpty(metaInfoLinkText)) { + throw new ParsingException("Could not get metadata info link text."); + } + metaInfo.addUrlText(metaInfoLinkText); + } + + return metaInfo; + } + + @Nonnull + private static MetaInfo getClarificationRenderer( + @Nonnull final JsonObject clarificationRenderer) throws ParsingException { + final MetaInfo metaInfo = new MetaInfo(); + + final String title = getTextFromObject(clarificationRenderer + .getObject("contentTitle")); + final String text = getTextFromObject(clarificationRenderer + .getObject("text")); + if (title == null || text == null) { + throw new ParsingException("Could not extract clarification renderer content"); + } + metaInfo.setTitle(title); + metaInfo.setContent(new Description(text, Description.PLAIN_TEXT)); + + if (clarificationRenderer.has("actionButton")) { + final JsonObject actionButton = clarificationRenderer.getObject("actionButton") + .getObject("buttonRenderer"); + try { + final String url = getUrlFromNavigationEndpoint(actionButton + .getObject("command")); + metaInfo.addUrl(new URL(Objects.requireNonNull(extractCachedUrlIfNeeded(url)))); + } catch (final NullPointerException | MalformedURLException e) { + throw new ParsingException("Could not get metadata info URL", e); + } + + final String metaInfoLinkText = getTextFromObject( + actionButton.getObject("text")); + if (isNullOrEmpty(metaInfoLinkText)) { + throw new ParsingException("Could not get metadata info link text."); + } + metaInfo.addUrlText(metaInfoLinkText); + } + + if (clarificationRenderer.has("secondaryEndpoint") && clarificationRenderer + .has("secondarySource")) { + final String url = getUrlFromNavigationEndpoint(clarificationRenderer + .getObject("secondaryEndpoint")); + // Ignore Google URLs, because those point to a Google search about "Covid-19" + if (url != null && !isGoogleURL(url)) { + try { + metaInfo.addUrl(new URL(url)); + final String description = getTextFromObject(clarificationRenderer + .getObject("secondarySource")); + metaInfo.addUrlText(description == null ? url : description); + } catch (final MalformedURLException e) { + throw new ParsingException("Could not get metadata info secondary URL", e); + } + } + } + + return metaInfo; + } + + private static void getEmergencyOneboxRenderer( + @Nonnull final JsonObject emergencyOneboxRenderer, + final Consumer addMetaInfo + ) throws ParsingException { + final List supportRenderers = emergencyOneboxRenderer.values() + .stream() + .filter(o -> o instanceof JsonObject + && ((JsonObject) o).has("singleActionEmergencySupportRenderer")) + .map(o -> ((JsonObject) o).getObject("singleActionEmergencySupportRenderer")) + .collect(Collectors.toList()); + + if (supportRenderers.isEmpty()) { + throw new ParsingException("Could not extract any meta info from emergency renderer"); + } + + for (final JsonObject r : supportRenderers) { + final MetaInfo metaInfo = new MetaInfo(); + + // usually an encouragement like "We are with you" + final String title = getTextFromObjectOrThrow(r.getObject("title"), "title"); + // usually a phone number + final String action = getTextFromObjectOrThrow(r.getObject("actionText"), "action"); + // usually details about the phone number + final String details = getTextFromObjectOrThrow(r.getObject("detailsText"), "details"); + // usually the name of an association + final String urlText = getTextFromObjectOrThrow(r.getObject("navigationText"), + "urlText"); + + metaInfo.setTitle(title); + metaInfo.setContent(new Description(details + "\n" + action, Description.PLAIN_TEXT)); + metaInfo.addUrlText(urlText); + + // usually the webpage of the association + final String url = getUrlFromNavigationEndpoint(r.getObject("navigationEndpoint")); + if (url == null) { + throw new ParsingException("Could not extract emergency renderer url"); + } + + try { + metaInfo.addUrl(new URL(replaceHttpWithHttps(url))); + } catch (final MalformedURLException e) { + throw new ParsingException("Could not parse emergency renderer url", e); + } + + addMetaInfo.accept(metaInfo); + } + } +} diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeParsingHelper.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeParsingHelper.java index ca60293ea..7db34ba78 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeParsingHelper.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeParsingHelper.java @@ -25,7 +25,6 @@ import static org.schabi.newpipe.extractor.utils.Utils.HTTP; import static org.schabi.newpipe.extractor.utils.Utils.HTTPS; import static org.schabi.newpipe.extractor.utils.Utils.getStringResultFromRegexArray; import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty; -import static org.schabi.newpipe.extractor.utils.Utils.replaceHttpWithHttps; import com.grack.nanojson.JsonArray; import com.grack.nanojson.JsonBuilder; @@ -37,7 +36,6 @@ import com.grack.nanojson.JsonWriter; import org.jsoup.nodes.Entities; import org.schabi.newpipe.extractor.Image; import org.schabi.newpipe.extractor.Image.ResolutionLevel; -import org.schabi.newpipe.extractor.MetaInfo; import org.schabi.newpipe.extractor.downloader.Response; import org.schabi.newpipe.extractor.exceptions.AccountTerminatedException; import org.schabi.newpipe.extractor.exceptions.ContentNotAvailableException; @@ -48,7 +46,6 @@ import org.schabi.newpipe.extractor.localization.ContentCountry; import org.schabi.newpipe.extractor.localization.Localization; import org.schabi.newpipe.extractor.playlist.PlaylistInfo; import org.schabi.newpipe.extractor.stream.AudioTrackType; -import org.schabi.newpipe.extractor.stream.Description; import org.schabi.newpipe.extractor.utils.JsonUtils; import org.schabi.newpipe.extractor.utils.Parser; import org.schabi.newpipe.extractor.utils.RandomStringFromAlphabetGenerator; @@ -63,16 +60,13 @@ import java.time.LocalDate; import java.time.OffsetDateTime; import java.time.ZoneOffset; import java.time.format.DateTimeParseException; -import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Locale; import java.util.Map; -import java.util.Objects; import java.util.Optional; import java.util.Random; import java.util.Set; -import java.util.function.Consumer; import java.util.regex.Pattern; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -264,7 +258,7 @@ public final class YoutubeParsingHelper { private static boolean consentAccepted = false; - private static boolean isGoogleURL(final String url) { + public static boolean isGoogleURL(final String url) { final String cachedUrl = extractCachedUrlIfNeeded(url); try { final URL u = new URL(cachedUrl); @@ -1660,174 +1654,6 @@ public final class YoutubeParsingHelper { } } - @Nonnull - public static List getMetaInfo(@Nonnull final JsonArray contents) - throws ParsingException { - final List metaInfo = new ArrayList<>(); - for (final Object content : contents) { - final JsonObject resultObject = (JsonObject) content; - if (resultObject.has("itemSectionRenderer")) { - for (final Object sectionContentObject - : resultObject.getObject("itemSectionRenderer").getArray("contents")) { - - final JsonObject sectionContent = (JsonObject) sectionContentObject; - if (sectionContent.has("infoPanelContentRenderer")) { - metaInfo.add(getInfoPanelContent(sectionContent - .getObject("infoPanelContentRenderer"))); - } - if (sectionContent.has("clarificationRenderer")) { - metaInfo.add(getClarificationRenderer(sectionContent - .getObject("clarificationRenderer") - )); - } - if (sectionContent.has("emergencyOneboxRenderer")) { - getEmergencyOneboxRenderer( - sectionContent.getObject("emergencyOneboxRenderer"), - metaInfo::add - ); - } - } - } - } - return metaInfo; - } - - @Nonnull - private static MetaInfo getInfoPanelContent(@Nonnull final JsonObject infoPanelContentRenderer) - throws ParsingException { - final MetaInfo metaInfo = new MetaInfo(); - final StringBuilder sb = new StringBuilder(); - for (final Object paragraph : infoPanelContentRenderer.getArray("paragraphs")) { - if (sb.length() != 0) { - sb.append("
"); - } - sb.append(YoutubeParsingHelper.getTextFromObject((JsonObject) paragraph)); - } - metaInfo.setContent(new Description(sb.toString(), Description.HTML)); - if (infoPanelContentRenderer.has("sourceEndpoint")) { - final String metaInfoLinkUrl = YoutubeParsingHelper.getUrlFromNavigationEndpoint( - infoPanelContentRenderer.getObject("sourceEndpoint")); - try { - metaInfo.addUrl(new URL(Objects.requireNonNull(extractCachedUrlIfNeeded( - metaInfoLinkUrl)))); - } catch (final NullPointerException | MalformedURLException e) { - throw new ParsingException("Could not get metadata info URL", e); - } - - final String metaInfoLinkText = YoutubeParsingHelper.getTextFromObject( - infoPanelContentRenderer.getObject("inlineSource")); - if (isNullOrEmpty(metaInfoLinkText)) { - throw new ParsingException("Could not get metadata info link text."); - } - metaInfo.addUrlText(metaInfoLinkText); - } - - return metaInfo; - } - - @Nonnull - private static MetaInfo getClarificationRenderer( - @Nonnull final JsonObject clarificationRenderer) throws ParsingException { - final MetaInfo metaInfo = new MetaInfo(); - - final String title = YoutubeParsingHelper.getTextFromObject(clarificationRenderer - .getObject("contentTitle")); - final String text = YoutubeParsingHelper.getTextFromObject(clarificationRenderer - .getObject("text")); - if (title == null || text == null) { - throw new ParsingException("Could not extract clarification renderer content"); - } - metaInfo.setTitle(title); - metaInfo.setContent(new Description(text, Description.PLAIN_TEXT)); - - if (clarificationRenderer.has("actionButton")) { - final JsonObject actionButton = clarificationRenderer.getObject("actionButton") - .getObject("buttonRenderer"); - try { - final String url = YoutubeParsingHelper.getUrlFromNavigationEndpoint(actionButton - .getObject("command")); - metaInfo.addUrl(new URL(Objects.requireNonNull(extractCachedUrlIfNeeded(url)))); - } catch (final NullPointerException | MalformedURLException e) { - throw new ParsingException("Could not get metadata info URL", e); - } - - final String metaInfoLinkText = YoutubeParsingHelper.getTextFromObject( - actionButton.getObject("text")); - if (isNullOrEmpty(metaInfoLinkText)) { - throw new ParsingException("Could not get metadata info link text."); - } - metaInfo.addUrlText(metaInfoLinkText); - } - - if (clarificationRenderer.has("secondaryEndpoint") && clarificationRenderer - .has("secondarySource")) { - final String url = getUrlFromNavigationEndpoint(clarificationRenderer - .getObject("secondaryEndpoint")); - // Ignore Google URLs, because those point to a Google search about "Covid-19" - if (url != null && !isGoogleURL(url)) { - try { - metaInfo.addUrl(new URL(url)); - final String description = getTextFromObject(clarificationRenderer - .getObject("secondarySource")); - metaInfo.addUrlText(description == null ? url : description); - } catch (final MalformedURLException e) { - throw new ParsingException("Could not get metadata info secondary URL", e); - } - } - } - - return metaInfo; - } - - @Nonnull - private static void getEmergencyOneboxRenderer( - @Nonnull final JsonObject emergencyOneboxRenderer, - final Consumer addMetaInfo - ) throws ParsingException { - final List supportRenderers = emergencyOneboxRenderer.entrySet().stream() - .filter((a) -> a.getValue() instanceof JsonObject - && ((JsonObject) a.getValue()).has("singleActionEmergencySupportRenderer")) - .map((a) -> ((JsonObject) a.getValue()) - .getObject("singleActionEmergencySupportRenderer")) - .collect(Collectors.toList()); - - if (supportRenderers.isEmpty()) { - throw new ParsingException("Could not extract any meta info from emergency renderer"); - } - - for (final JsonObject r : supportRenderers) { - final MetaInfo metaInfo = new MetaInfo(); - - // usually an encouragement like "We are with you" - final String title = getTextFromObjectOrThrow(r.getObject("title"), "title"); - // usually a phone number - final String action = getTextFromObjectOrThrow(r.getObject("actionText"), "action"); - // usually details about the phone number - final String details = getTextFromObjectOrThrow(r.getObject("detailsText"), "details"); - // usually the name of an association - final String urlText = getTextFromObjectOrThrow(r.getObject("navigationText"), - "urlText"); - - metaInfo.setTitle(title); - metaInfo.setContent(new Description(details + "\n" + action, Description.PLAIN_TEXT)); - metaInfo.addUrlText(urlText); - - // usually the webpage of the association - final String url = getUrlFromNavigationEndpoint(r.getObject("navigationEndpoint")); - if (url == null) { - throw new ParsingException("Could not extract emergency renderer url"); - } - - try { - metaInfo.addUrl(new URL(replaceHttpWithHttps(url))); - } catch (final MalformedURLException e) { - throw new ParsingException("Could not parse emergency renderer url", e); - } - - addMetaInfo.accept(metaInfo); - } - } - /** * Sometimes, YouTube provides URLs which use Google's cache. They look like * {@code https://webcache.googleusercontent.com/search?q=cache:CACHED_URL} diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java index 7af8bc706..90d5cab01 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java @@ -30,7 +30,7 @@ import org.schabi.newpipe.extractor.linkhandler.SearchQueryHandler; import org.schabi.newpipe.extractor.localization.Localization; import org.schabi.newpipe.extractor.localization.TimeAgoParser; import org.schabi.newpipe.extractor.search.SearchExtractor; -import org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper; +import org.schabi.newpipe.extractor.services.youtube.YoutubeMetaInfoHelper; import org.schabi.newpipe.extractor.utils.JsonUtils; import java.io.IOException; @@ -151,7 +151,7 @@ public class YoutubeSearchExtractor extends SearchExtractor { @Nonnull @Override public List getMetaInfo() throws ParsingException { - return YoutubeParsingHelper.getMetaInfo( + return YoutubeMetaInfoHelper.getMetaInfo( initialData.getObject("contents") .getObject("twoColumnSearchResultsRenderer") .getObject("primaryContents") diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java index 5f3a7b724..e77ea16b4 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java @@ -67,6 +67,7 @@ import org.schabi.newpipe.extractor.localization.TimeAgoParser; import org.schabi.newpipe.extractor.localization.TimeAgoPatternsManager; import org.schabi.newpipe.extractor.services.youtube.ItagItem; import org.schabi.newpipe.extractor.services.youtube.YoutubeJavaScriptPlayerManager; +import org.schabi.newpipe.extractor.services.youtube.YoutubeMetaInfoHelper; import org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper; import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeChannelLinkHandlerFactory; import org.schabi.newpipe.extractor.stream.AudioStream; @@ -1592,7 +1593,7 @@ public class YoutubeStreamExtractor extends StreamExtractor { @Nonnull @Override public List getMetaInfo() throws ParsingException { - return YoutubeParsingHelper.getMetaInfo(nextResponse + return YoutubeMetaInfoHelper.getMetaInfo(nextResponse .getObject("contents") .getObject("twoColumnWatchNextResults") .getObject("results")