From df26badd4a7356c001a9c326ce199a5ee5b34f71 Mon Sep 17 00:00:00 2001 From: AudricV <74829229+AudricV@users.noreply.github.com> Date: Thu, 4 Apr 2024 19:36:31 +0200 Subject: [PATCH] [YouTube] Add common methods to get ID, name and age gate object of channels Also move duplicate strings into constants and support pageHeader channel header in user channels on YoutubeChannelHelper methods. --- .../youtube/YoutubeChannelHelper.java | 256 ++++++++++++++++-- 1 file changed, 229 insertions(+), 27 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeChannelHelper.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeChannelHelper.java index dd338900c..74335017a 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeChannelHelper.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeChannelHelper.java @@ -4,16 +4,19 @@ import com.grack.nanojson.JsonObject; import com.grack.nanojson.JsonWriter; import org.schabi.newpipe.extractor.exceptions.ContentNotAvailableException; import org.schabi.newpipe.extractor.exceptions.ExtractionException; +import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.localization.ContentCountry; import org.schabi.newpipe.extractor.localization.Localization; import javax.annotation.Nonnull; +import javax.annotation.Nullable; import java.io.IOException; import java.nio.charset.StandardCharsets; import java.util.Optional; import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.defaultAlertsCheck; import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getJsonPostResponse; +import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject; import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.prepareDesktopJsonBuilder; import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty; @@ -21,6 +24,19 @@ import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty; * Shared functions for extracting YouTube channel pages and tabs. */ public final class YoutubeChannelHelper { + + private static final String BROWSE_ENDPOINT = "browseEndpoint"; + private static final String BROWSE_ID = "browseId"; + private static final String CAROUSEL_HEADER_RENDERER = "carouselHeaderRenderer"; + private static final String C4_TABBED_HEADER_RENDERER = "c4TabbedHeaderRenderer"; + private static final String CONTENT = "content"; + private static final String CONTENTS = "contents"; + private static final String HEADER = "header"; + private static final String PAGE_HEADER_VIEW_MODEL = "pageHeaderViewModel"; + private static final String TAB_RENDERER = "tabRenderer"; + private static final String TITLE = "title"; + private static final String TOPIC_CHANNEL_DETAILS_RENDERER = "topicChannelDetailsRenderer"; + private YoutubeChannelHelper() { } @@ -64,8 +80,8 @@ public final class YoutubeChannelHelper { .getObject("webCommandMetadata") .getString("webPageType", ""); - final JsonObject browseEndpoint = endpoint.getObject("browseEndpoint"); - final String browseId = browseEndpoint.getString("browseId", ""); + final JsonObject browseEndpoint = endpoint.getObject(BROWSE_ENDPOINT); + final String browseId = browseEndpoint.getString(BROWSE_ID, ""); if (webPageType.equalsIgnoreCase("WEB_PAGE_TYPE_BROWSE") || webPageType.equalsIgnoreCase("WEB_PAGE_TYPE_CHANNEL") @@ -140,7 +156,7 @@ public final class YoutubeChannelHelper { while (level < 3) { final byte[] body = JsonWriter.string(prepareDesktopJsonBuilder( localization, country) - .value("browseId", id) + .value(BROWSE_ID, id) .value("params", parameters) .done()) .getBytes(StandardCharsets.UTF_8); @@ -159,8 +175,8 @@ public final class YoutubeChannelHelper { .getObject("webCommandMetadata") .getString("webPageType", ""); - final String browseId = endpoint.getObject("browseEndpoint") - .getString("browseId", ""); + final String browseId = endpoint.getObject(BROWSE_ENDPOINT) + .getString(BROWSE_ID, ""); if (webPageType.equalsIgnoreCase("WEB_PAGE_TYPE_BROWSE") || webPageType.equalsIgnoreCase("WEB_PAGE_TYPE_CHANNEL") @@ -257,7 +273,7 @@ public final class YoutubeChannelHelper { * A {@code pageHeaderRenderer} channel header type. * *

- * This header returns only the channel's name and its avatar. + * This header returns only the channel's name and its avatar for system channels. *

*/ PAGE @@ -294,20 +310,20 @@ public final class YoutubeChannelHelper { @Nonnull public static Optional getChannelHeader( @Nonnull final JsonObject channelResponse) { - final JsonObject header = channelResponse.getObject("header"); + final JsonObject header = channelResponse.getObject(HEADER); - if (header.has("c4TabbedHeaderRenderer")) { - return Optional.of(header.getObject("c4TabbedHeaderRenderer")) + if (header.has(C4_TABBED_HEADER_RENDERER)) { + return Optional.of(header.getObject(C4_TABBED_HEADER_RENDERER)) .map(json -> new ChannelHeader(json, ChannelHeader.HeaderType.C4_TABBED)); - } else if (header.has("carouselHeaderRenderer")) { - return header.getObject("carouselHeaderRenderer") - .getArray("contents") + } else if (header.has(CAROUSEL_HEADER_RENDERER)) { + return header.getObject(CAROUSEL_HEADER_RENDERER) + .getArray(CONTENTS) .stream() .filter(JsonObject.class::isInstance) .map(JsonObject.class::cast) - .filter(item -> item.has("topicChannelDetailsRenderer")) + .filter(item -> item.has(TOPIC_CHANNEL_DETAILS_RENDERER)) .findFirst() - .map(item -> item.getObject("topicChannelDetailsRenderer")) + .map(item -> item.getObject(TOPIC_CHANNEL_DETAILS_RENDERER)) .map(json -> new ChannelHeader(json, ChannelHeader.HeaderType.CAROUSEL)); } else if (header.has("pageHeaderRenderer")) { return Optional.of(header.getObject("pageHeaderRenderer")) @@ -333,22 +349,208 @@ public final class YoutubeChannelHelper { * @return whether the channel is verified */ public static boolean isChannelVerified(@Nonnull final ChannelHeader channelHeader) { - // carouselHeaderRenderer and pageHeaderRenderer does not contain any verification - // badges - // Since they are only shown on YouTube internal channels or on channels of large - // organizations broadcasting live events, we can assume the channel to be verified - if (channelHeader.headerType == ChannelHeader.HeaderType.CAROUSEL - || channelHeader.headerType == ChannelHeader.HeaderType.PAGE) { - return true; + switch (channelHeader.headerType) { + // carouselHeaderRenderers do not contain any verification badges + // Since they are only shown on YouTube internal channels or on channels of large + // organizations broadcasting live events, we can assume the channel to be verified + case CAROUSEL: + return true; + case PAGE: + final JsonObject pageHeaderViewModel = channelHeader.json.getObject(CONTENT) + .getObject(PAGE_HEADER_VIEW_MODEL); + + final boolean hasCircleOrMusicIcon = pageHeaderViewModel.getObject(TITLE) + .getObject("dynamicTextViewModel") + .getObject("text") + .getArray("attachmentRuns") + .stream() + .filter(JsonObject.class::isInstance) + .map(JsonObject.class::cast) + .anyMatch(attachmentRun -> attachmentRun.getObject("element") + .getObject("type") + .getObject("imageType") + .getObject("image") + .getArray("sources") + .stream() + .filter(JsonObject.class::isInstance) + .map(JsonObject.class::cast) + .anyMatch(source -> { + final String imageName = source.getObject("clientResource") + .getString("imageName"); + return "CHECK_CIRCLE_FILLED".equals(imageName) + || "MUSIC_FILLED".equals(imageName); + })); + if (!hasCircleOrMusicIcon && pageHeaderViewModel.getObject("image") + .has("contentPreviewImageViewModel")) { + // If a pageHeaderRenderer has no object in which a check verified may be + // contained and if it has a contentPreviewImageViewModel, it should mean + // that the header is coming from a system channel, which we can assume to + // be verified + return true; + } + + return hasCircleOrMusicIcon; + case INTERACTIVE_TABBED: + // If the header has an autoGenerated property, it should mean that the channel has + // been auto generated by YouTube: we can assume the channel to be verified in this + // case + return channelHeader.json.has("autoGenerated"); + default: + return YoutubeParsingHelper.isVerified(channelHeader.json.getArray("badges")); + } + } + + /** + * Get the ID of a channel from its response. + * + *

+ * For {@link ChannelHeader.HeaderType#C4_TABBED c4TabbedHeaderRenderer} and + * {@link ChannelHeader.HeaderType#CAROUSEL carouselHeaderRenderer} channel headers, the ID is + * get from the header. + *

+ * + *

+ * For other headers or if it cannot be got, the ID from the {@code channelMetadataRenderer} + * in the channel response is used. + *

+ * + *

+ * If the ID cannot still be get, the fallback channel ID, if provided, will be used. + *

+ * + * @param header the channel header + * @param fallbackChannelId the fallback channel ID, which can be null + * @return the ID of the channel + * @throws ParsingException if the channel ID cannot be got from the channel header, the + * channel response and the fallback channel ID + */ + @Nonnull + public static String getChannelId( + @SuppressWarnings("OptionalUsedAsFieldOrParameterType") + @Nonnull final Optional header, + @Nonnull final JsonObject jsonResponse, + @Nullable final String fallbackChannelId) throws ParsingException { + if (header.isPresent()) { + final ChannelHeader channelHeader = header.get(); + switch (channelHeader.headerType) { + case C4_TABBED: + final String channelId = channelHeader.json.getObject(HEADER) + .getObject(C4_TABBED_HEADER_RENDERER) + .getString("channelId", ""); + if (!isNullOrEmpty(channelId)) { + return channelId; + } + final String navigationC4TabChannelId = channelHeader.json + .getObject("navigationEndpoint") + .getObject(BROWSE_ENDPOINT) + .getString(BROWSE_ID); + if (!isNullOrEmpty(navigationC4TabChannelId)) { + return navigationC4TabChannelId; + } + break; + case CAROUSEL: + final String navigationCarouselChannelId = channelHeader.json.getObject(HEADER) + .getObject(CAROUSEL_HEADER_RENDERER) + .getArray(CONTENTS) + .stream() + .filter(JsonObject.class::isInstance) + .map(JsonObject.class::cast) + .filter(item -> item.has(TOPIC_CHANNEL_DETAILS_RENDERER)) + .findFirst() + .orElse(new JsonObject()) + .getObject(TOPIC_CHANNEL_DETAILS_RENDERER) + .getObject("navigationEndpoint") + .getObject(BROWSE_ENDPOINT) + .getString(BROWSE_ID); + if (!isNullOrEmpty(navigationCarouselChannelId)) { + return navigationCarouselChannelId; + } + break; + default: + break; + } } - if (channelHeader.headerType == ChannelHeader.HeaderType.INTERACTIVE_TABBED) { - // If the header has an autoGenerated property, it should mean that the channel has - // been auto generated by YouTube: we can assume the channel to be verified in this - // case - return channelHeader.json.has("autoGenerated"); + final String externalChannelId = jsonResponse.getObject("metadata") + .getObject("channelMetadataRenderer") + .getString("externalChannelId"); + if (!isNullOrEmpty(externalChannelId)) { + return externalChannelId; } - return YoutubeParsingHelper.isVerified(channelHeader.json.getArray("badges")); + if (!isNullOrEmpty(fallbackChannelId)) { + return fallbackChannelId; + } else { + throw new ParsingException("Could not get channel ID"); + } + } + + @Nonnull + public static String getChannelName(@SuppressWarnings("OptionalUsedAsFieldOrParameterType") + @Nonnull final Optional channelHeader, + @Nonnull final JsonObject jsonResponse, + @Nullable final JsonObject channelAgeGateRenderer) + throws ParsingException { + if (channelAgeGateRenderer != null) { + final String title = channelAgeGateRenderer.getString("channelTitle"); + if (isNullOrEmpty(title)) { + throw new ParsingException("Could not get channel name"); + } + return title; + } + + final String metadataRendererTitle = jsonResponse.getObject("metadata") + .getObject("channelMetadataRenderer") + .getString(TITLE); + if (!isNullOrEmpty(metadataRendererTitle)) { + return metadataRendererTitle; + } + + return channelHeader.map(header -> { + final JsonObject channelJson = header.json; + switch (header.headerType) { + case PAGE: + return channelJson.getObject(CONTENT) + .getObject(PAGE_HEADER_VIEW_MODEL) + .getObject(TITLE) + .getObject("dynamicTextViewModel") + .getObject("text") + .getString(CONTENT, channelJson.getString("pageTitle")); + case CAROUSEL: + case INTERACTIVE_TABBED: + return getTextFromObject(channelJson.getObject(TITLE)); + case C4_TABBED: + default: + return channelJson.getString(TITLE); + } + }) + // The channel name from a microformatDataRenderer may be different from the one + // displayed, especially for auto-generated channels, depending on the language + // requested for the interface (hl parameter of InnerTube requests' payload) + .or(() -> Optional.ofNullable(jsonResponse.getObject("microformat") + .getObject("microformatDataRenderer") + .getString(TITLE))) + .orElseThrow(() -> new ParsingException("Could not get channel name")); + } + + @Nullable + public static JsonObject getChannelAgeGateRenderer(@Nonnull final JsonObject jsonResponse) { + return jsonResponse.getObject(CONTENTS) + .getObject("twoColumnBrowseResultsRenderer") + .getArray("tabs") + .stream() + .filter(JsonObject.class::isInstance) + .map(JsonObject.class::cast) + .flatMap(tab -> tab.getObject(TAB_RENDERER) + .getObject(CONTENT) + .getObject("sectionListRenderer") + .getArray(CONTENTS) + .stream() + .filter(JsonObject.class::isInstance) + .map(JsonObject.class::cast)) + .filter(content -> content.has("channelAgeGateRenderer")) + .map(content -> content.getObject("channelAgeGateRenderer")) + .findFirst() + .orElse(null); } }