From df26badd4a7356c001a9c326ce199a5ee5b34f71 Mon Sep 17 00:00:00 2001
From: AudricV <74829229+AudricV@users.noreply.github.com>
Date: Thu, 4 Apr 2024 19:36:31 +0200
Subject: [PATCH] [YouTube] Add common methods to get ID, name and age gate
object of channels
Also move duplicate strings into constants and support pageHeader channel
header in user channels on YoutubeChannelHelper methods.
---
.../youtube/YoutubeChannelHelper.java | 256 ++++++++++++++++--
1 file changed, 229 insertions(+), 27 deletions(-)
diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeChannelHelper.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeChannelHelper.java
index dd338900c..74335017a 100644
--- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeChannelHelper.java
+++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeChannelHelper.java
@@ -4,16 +4,19 @@ import com.grack.nanojson.JsonObject;
import com.grack.nanojson.JsonWriter;
import org.schabi.newpipe.extractor.exceptions.ContentNotAvailableException;
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
+import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.localization.ContentCountry;
import org.schabi.newpipe.extractor.localization.Localization;
import javax.annotation.Nonnull;
+import javax.annotation.Nullable;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Optional;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.defaultAlertsCheck;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getJsonPostResponse;
+import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.prepareDesktopJsonBuilder;
import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
@@ -21,6 +24,19 @@ import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
* Shared functions for extracting YouTube channel pages and tabs.
*/
public final class YoutubeChannelHelper {
+
+ private static final String BROWSE_ENDPOINT = "browseEndpoint";
+ private static final String BROWSE_ID = "browseId";
+ private static final String CAROUSEL_HEADER_RENDERER = "carouselHeaderRenderer";
+ private static final String C4_TABBED_HEADER_RENDERER = "c4TabbedHeaderRenderer";
+ private static final String CONTENT = "content";
+ private static final String CONTENTS = "contents";
+ private static final String HEADER = "header";
+ private static final String PAGE_HEADER_VIEW_MODEL = "pageHeaderViewModel";
+ private static final String TAB_RENDERER = "tabRenderer";
+ private static final String TITLE = "title";
+ private static final String TOPIC_CHANNEL_DETAILS_RENDERER = "topicChannelDetailsRenderer";
+
private YoutubeChannelHelper() {
}
@@ -64,8 +80,8 @@ public final class YoutubeChannelHelper {
.getObject("webCommandMetadata")
.getString("webPageType", "");
- final JsonObject browseEndpoint = endpoint.getObject("browseEndpoint");
- final String browseId = browseEndpoint.getString("browseId", "");
+ final JsonObject browseEndpoint = endpoint.getObject(BROWSE_ENDPOINT);
+ final String browseId = browseEndpoint.getString(BROWSE_ID, "");
if (webPageType.equalsIgnoreCase("WEB_PAGE_TYPE_BROWSE")
|| webPageType.equalsIgnoreCase("WEB_PAGE_TYPE_CHANNEL")
@@ -140,7 +156,7 @@ public final class YoutubeChannelHelper {
while (level < 3) {
final byte[] body = JsonWriter.string(prepareDesktopJsonBuilder(
localization, country)
- .value("browseId", id)
+ .value(BROWSE_ID, id)
.value("params", parameters)
.done())
.getBytes(StandardCharsets.UTF_8);
@@ -159,8 +175,8 @@ public final class YoutubeChannelHelper {
.getObject("webCommandMetadata")
.getString("webPageType", "");
- final String browseId = endpoint.getObject("browseEndpoint")
- .getString("browseId", "");
+ final String browseId = endpoint.getObject(BROWSE_ENDPOINT)
+ .getString(BROWSE_ID, "");
if (webPageType.equalsIgnoreCase("WEB_PAGE_TYPE_BROWSE")
|| webPageType.equalsIgnoreCase("WEB_PAGE_TYPE_CHANNEL")
@@ -257,7 +273,7 @@ public final class YoutubeChannelHelper {
* A {@code pageHeaderRenderer} channel header type.
*
*
- * This header returns only the channel's name and its avatar.
+ * This header returns only the channel's name and its avatar for system channels.
*
*/
PAGE
@@ -294,20 +310,20 @@ public final class YoutubeChannelHelper {
@Nonnull
public static Optional getChannelHeader(
@Nonnull final JsonObject channelResponse) {
- final JsonObject header = channelResponse.getObject("header");
+ final JsonObject header = channelResponse.getObject(HEADER);
- if (header.has("c4TabbedHeaderRenderer")) {
- return Optional.of(header.getObject("c4TabbedHeaderRenderer"))
+ if (header.has(C4_TABBED_HEADER_RENDERER)) {
+ return Optional.of(header.getObject(C4_TABBED_HEADER_RENDERER))
.map(json -> new ChannelHeader(json, ChannelHeader.HeaderType.C4_TABBED));
- } else if (header.has("carouselHeaderRenderer")) {
- return header.getObject("carouselHeaderRenderer")
- .getArray("contents")
+ } else if (header.has(CAROUSEL_HEADER_RENDERER)) {
+ return header.getObject(CAROUSEL_HEADER_RENDERER)
+ .getArray(CONTENTS)
.stream()
.filter(JsonObject.class::isInstance)
.map(JsonObject.class::cast)
- .filter(item -> item.has("topicChannelDetailsRenderer"))
+ .filter(item -> item.has(TOPIC_CHANNEL_DETAILS_RENDERER))
.findFirst()
- .map(item -> item.getObject("topicChannelDetailsRenderer"))
+ .map(item -> item.getObject(TOPIC_CHANNEL_DETAILS_RENDERER))
.map(json -> new ChannelHeader(json, ChannelHeader.HeaderType.CAROUSEL));
} else if (header.has("pageHeaderRenderer")) {
return Optional.of(header.getObject("pageHeaderRenderer"))
@@ -333,22 +349,208 @@ public final class YoutubeChannelHelper {
* @return whether the channel is verified
*/
public static boolean isChannelVerified(@Nonnull final ChannelHeader channelHeader) {
- // carouselHeaderRenderer and pageHeaderRenderer does not contain any verification
- // badges
- // Since they are only shown on YouTube internal channels or on channels of large
- // organizations broadcasting live events, we can assume the channel to be verified
- if (channelHeader.headerType == ChannelHeader.HeaderType.CAROUSEL
- || channelHeader.headerType == ChannelHeader.HeaderType.PAGE) {
- return true;
+ switch (channelHeader.headerType) {
+ // carouselHeaderRenderers do not contain any verification badges
+ // Since they are only shown on YouTube internal channels or on channels of large
+ // organizations broadcasting live events, we can assume the channel to be verified
+ case CAROUSEL:
+ return true;
+ case PAGE:
+ final JsonObject pageHeaderViewModel = channelHeader.json.getObject(CONTENT)
+ .getObject(PAGE_HEADER_VIEW_MODEL);
+
+ final boolean hasCircleOrMusicIcon = pageHeaderViewModel.getObject(TITLE)
+ .getObject("dynamicTextViewModel")
+ .getObject("text")
+ .getArray("attachmentRuns")
+ .stream()
+ .filter(JsonObject.class::isInstance)
+ .map(JsonObject.class::cast)
+ .anyMatch(attachmentRun -> attachmentRun.getObject("element")
+ .getObject("type")
+ .getObject("imageType")
+ .getObject("image")
+ .getArray("sources")
+ .stream()
+ .filter(JsonObject.class::isInstance)
+ .map(JsonObject.class::cast)
+ .anyMatch(source -> {
+ final String imageName = source.getObject("clientResource")
+ .getString("imageName");
+ return "CHECK_CIRCLE_FILLED".equals(imageName)
+ || "MUSIC_FILLED".equals(imageName);
+ }));
+ if (!hasCircleOrMusicIcon && pageHeaderViewModel.getObject("image")
+ .has("contentPreviewImageViewModel")) {
+ // If a pageHeaderRenderer has no object in which a check verified may be
+ // contained and if it has a contentPreviewImageViewModel, it should mean
+ // that the header is coming from a system channel, which we can assume to
+ // be verified
+ return true;
+ }
+
+ return hasCircleOrMusicIcon;
+ case INTERACTIVE_TABBED:
+ // If the header has an autoGenerated property, it should mean that the channel has
+ // been auto generated by YouTube: we can assume the channel to be verified in this
+ // case
+ return channelHeader.json.has("autoGenerated");
+ default:
+ return YoutubeParsingHelper.isVerified(channelHeader.json.getArray("badges"));
+ }
+ }
+
+ /**
+ * Get the ID of a channel from its response.
+ *
+ *
+ * For {@link ChannelHeader.HeaderType#C4_TABBED c4TabbedHeaderRenderer} and
+ * {@link ChannelHeader.HeaderType#CAROUSEL carouselHeaderRenderer} channel headers, the ID is
+ * get from the header.
+ *
+ *
+ *
+ * For other headers or if it cannot be got, the ID from the {@code channelMetadataRenderer}
+ * in the channel response is used.
+ *
+ *
+ *
+ * If the ID cannot still be get, the fallback channel ID, if provided, will be used.
+ *
+ *
+ * @param header the channel header
+ * @param fallbackChannelId the fallback channel ID, which can be null
+ * @return the ID of the channel
+ * @throws ParsingException if the channel ID cannot be got from the channel header, the
+ * channel response and the fallback channel ID
+ */
+ @Nonnull
+ public static String getChannelId(
+ @SuppressWarnings("OptionalUsedAsFieldOrParameterType")
+ @Nonnull final Optional header,
+ @Nonnull final JsonObject jsonResponse,
+ @Nullable final String fallbackChannelId) throws ParsingException {
+ if (header.isPresent()) {
+ final ChannelHeader channelHeader = header.get();
+ switch (channelHeader.headerType) {
+ case C4_TABBED:
+ final String channelId = channelHeader.json.getObject(HEADER)
+ .getObject(C4_TABBED_HEADER_RENDERER)
+ .getString("channelId", "");
+ if (!isNullOrEmpty(channelId)) {
+ return channelId;
+ }
+ final String navigationC4TabChannelId = channelHeader.json
+ .getObject("navigationEndpoint")
+ .getObject(BROWSE_ENDPOINT)
+ .getString(BROWSE_ID);
+ if (!isNullOrEmpty(navigationC4TabChannelId)) {
+ return navigationC4TabChannelId;
+ }
+ break;
+ case CAROUSEL:
+ final String navigationCarouselChannelId = channelHeader.json.getObject(HEADER)
+ .getObject(CAROUSEL_HEADER_RENDERER)
+ .getArray(CONTENTS)
+ .stream()
+ .filter(JsonObject.class::isInstance)
+ .map(JsonObject.class::cast)
+ .filter(item -> item.has(TOPIC_CHANNEL_DETAILS_RENDERER))
+ .findFirst()
+ .orElse(new JsonObject())
+ .getObject(TOPIC_CHANNEL_DETAILS_RENDERER)
+ .getObject("navigationEndpoint")
+ .getObject(BROWSE_ENDPOINT)
+ .getString(BROWSE_ID);
+ if (!isNullOrEmpty(navigationCarouselChannelId)) {
+ return navigationCarouselChannelId;
+ }
+ break;
+ default:
+ break;
+ }
}
- if (channelHeader.headerType == ChannelHeader.HeaderType.INTERACTIVE_TABBED) {
- // If the header has an autoGenerated property, it should mean that the channel has
- // been auto generated by YouTube: we can assume the channel to be verified in this
- // case
- return channelHeader.json.has("autoGenerated");
+ final String externalChannelId = jsonResponse.getObject("metadata")
+ .getObject("channelMetadataRenderer")
+ .getString("externalChannelId");
+ if (!isNullOrEmpty(externalChannelId)) {
+ return externalChannelId;
}
- return YoutubeParsingHelper.isVerified(channelHeader.json.getArray("badges"));
+ if (!isNullOrEmpty(fallbackChannelId)) {
+ return fallbackChannelId;
+ } else {
+ throw new ParsingException("Could not get channel ID");
+ }
+ }
+
+ @Nonnull
+ public static String getChannelName(@SuppressWarnings("OptionalUsedAsFieldOrParameterType")
+ @Nonnull final Optional channelHeader,
+ @Nonnull final JsonObject jsonResponse,
+ @Nullable final JsonObject channelAgeGateRenderer)
+ throws ParsingException {
+ if (channelAgeGateRenderer != null) {
+ final String title = channelAgeGateRenderer.getString("channelTitle");
+ if (isNullOrEmpty(title)) {
+ throw new ParsingException("Could not get channel name");
+ }
+ return title;
+ }
+
+ final String metadataRendererTitle = jsonResponse.getObject("metadata")
+ .getObject("channelMetadataRenderer")
+ .getString(TITLE);
+ if (!isNullOrEmpty(metadataRendererTitle)) {
+ return metadataRendererTitle;
+ }
+
+ return channelHeader.map(header -> {
+ final JsonObject channelJson = header.json;
+ switch (header.headerType) {
+ case PAGE:
+ return channelJson.getObject(CONTENT)
+ .getObject(PAGE_HEADER_VIEW_MODEL)
+ .getObject(TITLE)
+ .getObject("dynamicTextViewModel")
+ .getObject("text")
+ .getString(CONTENT, channelJson.getString("pageTitle"));
+ case CAROUSEL:
+ case INTERACTIVE_TABBED:
+ return getTextFromObject(channelJson.getObject(TITLE));
+ case C4_TABBED:
+ default:
+ return channelJson.getString(TITLE);
+ }
+ })
+ // The channel name from a microformatDataRenderer may be different from the one
+ // displayed, especially for auto-generated channels, depending on the language
+ // requested for the interface (hl parameter of InnerTube requests' payload)
+ .or(() -> Optional.ofNullable(jsonResponse.getObject("microformat")
+ .getObject("microformatDataRenderer")
+ .getString(TITLE)))
+ .orElseThrow(() -> new ParsingException("Could not get channel name"));
+ }
+
+ @Nullable
+ public static JsonObject getChannelAgeGateRenderer(@Nonnull final JsonObject jsonResponse) {
+ return jsonResponse.getObject(CONTENTS)
+ .getObject("twoColumnBrowseResultsRenderer")
+ .getArray("tabs")
+ .stream()
+ .filter(JsonObject.class::isInstance)
+ .map(JsonObject.class::cast)
+ .flatMap(tab -> tab.getObject(TAB_RENDERER)
+ .getObject(CONTENT)
+ .getObject("sectionListRenderer")
+ .getArray(CONTENTS)
+ .stream()
+ .filter(JsonObject.class::isInstance)
+ .map(JsonObject.class::cast))
+ .filter(content -> content.has("channelAgeGateRenderer"))
+ .map(content -> content.getObject("channelAgeGateRenderer"))
+ .findFirst()
+ .orElse(null);
}
}