[YouTube] Add common methods to get ID, name and age gate object of channels

Also move duplicate strings into constants and support pageHeader channel
header in user channels on YoutubeChannelHelper methods.
This commit is contained in:
AudricV 2024-04-04 19:36:31 +02:00
parent 5a6da5f43e
commit df26badd4a
No known key found for this signature in database
GPG Key ID: DA92EC7905614198

View File

@ -4,16 +4,19 @@ import com.grack.nanojson.JsonObject;
import com.grack.nanojson.JsonWriter; import com.grack.nanojson.JsonWriter;
import org.schabi.newpipe.extractor.exceptions.ContentNotAvailableException; import org.schabi.newpipe.extractor.exceptions.ContentNotAvailableException;
import org.schabi.newpipe.extractor.exceptions.ExtractionException; import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.localization.ContentCountry; import org.schabi.newpipe.extractor.localization.ContentCountry;
import org.schabi.newpipe.extractor.localization.Localization; import org.schabi.newpipe.extractor.localization.Localization;
import javax.annotation.Nonnull; import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import java.io.IOException; import java.io.IOException;
import java.nio.charset.StandardCharsets; import java.nio.charset.StandardCharsets;
import java.util.Optional; import java.util.Optional;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.defaultAlertsCheck; import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.defaultAlertsCheck;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getJsonPostResponse; import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getJsonPostResponse;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.prepareDesktopJsonBuilder; import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.prepareDesktopJsonBuilder;
import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty; import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
@ -21,6 +24,19 @@ import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
* Shared functions for extracting YouTube channel pages and tabs. * Shared functions for extracting YouTube channel pages and tabs.
*/ */
public final class YoutubeChannelHelper { public final class YoutubeChannelHelper {
private static final String BROWSE_ENDPOINT = "browseEndpoint";
private static final String BROWSE_ID = "browseId";
private static final String CAROUSEL_HEADER_RENDERER = "carouselHeaderRenderer";
private static final String C4_TABBED_HEADER_RENDERER = "c4TabbedHeaderRenderer";
private static final String CONTENT = "content";
private static final String CONTENTS = "contents";
private static final String HEADER = "header";
private static final String PAGE_HEADER_VIEW_MODEL = "pageHeaderViewModel";
private static final String TAB_RENDERER = "tabRenderer";
private static final String TITLE = "title";
private static final String TOPIC_CHANNEL_DETAILS_RENDERER = "topicChannelDetailsRenderer";
private YoutubeChannelHelper() { private YoutubeChannelHelper() {
} }
@ -64,8 +80,8 @@ public final class YoutubeChannelHelper {
.getObject("webCommandMetadata") .getObject("webCommandMetadata")
.getString("webPageType", ""); .getString("webPageType", "");
final JsonObject browseEndpoint = endpoint.getObject("browseEndpoint"); final JsonObject browseEndpoint = endpoint.getObject(BROWSE_ENDPOINT);
final String browseId = browseEndpoint.getString("browseId", ""); final String browseId = browseEndpoint.getString(BROWSE_ID, "");
if (webPageType.equalsIgnoreCase("WEB_PAGE_TYPE_BROWSE") if (webPageType.equalsIgnoreCase("WEB_PAGE_TYPE_BROWSE")
|| webPageType.equalsIgnoreCase("WEB_PAGE_TYPE_CHANNEL") || webPageType.equalsIgnoreCase("WEB_PAGE_TYPE_CHANNEL")
@ -140,7 +156,7 @@ public final class YoutubeChannelHelper {
while (level < 3) { while (level < 3) {
final byte[] body = JsonWriter.string(prepareDesktopJsonBuilder( final byte[] body = JsonWriter.string(prepareDesktopJsonBuilder(
localization, country) localization, country)
.value("browseId", id) .value(BROWSE_ID, id)
.value("params", parameters) .value("params", parameters)
.done()) .done())
.getBytes(StandardCharsets.UTF_8); .getBytes(StandardCharsets.UTF_8);
@ -159,8 +175,8 @@ public final class YoutubeChannelHelper {
.getObject("webCommandMetadata") .getObject("webCommandMetadata")
.getString("webPageType", ""); .getString("webPageType", "");
final String browseId = endpoint.getObject("browseEndpoint") final String browseId = endpoint.getObject(BROWSE_ENDPOINT)
.getString("browseId", ""); .getString(BROWSE_ID, "");
if (webPageType.equalsIgnoreCase("WEB_PAGE_TYPE_BROWSE") if (webPageType.equalsIgnoreCase("WEB_PAGE_TYPE_BROWSE")
|| webPageType.equalsIgnoreCase("WEB_PAGE_TYPE_CHANNEL") || webPageType.equalsIgnoreCase("WEB_PAGE_TYPE_CHANNEL")
@ -257,7 +273,7 @@ public final class YoutubeChannelHelper {
* A {@code pageHeaderRenderer} channel header type. * A {@code pageHeaderRenderer} channel header type.
* *
* <p> * <p>
* This header returns only the channel's name and its avatar. * This header returns only the channel's name and its avatar for system channels.
* </p> * </p>
*/ */
PAGE PAGE
@ -294,20 +310,20 @@ public final class YoutubeChannelHelper {
@Nonnull @Nonnull
public static Optional<ChannelHeader> getChannelHeader( public static Optional<ChannelHeader> getChannelHeader(
@Nonnull final JsonObject channelResponse) { @Nonnull final JsonObject channelResponse) {
final JsonObject header = channelResponse.getObject("header"); final JsonObject header = channelResponse.getObject(HEADER);
if (header.has("c4TabbedHeaderRenderer")) { if (header.has(C4_TABBED_HEADER_RENDERER)) {
return Optional.of(header.getObject("c4TabbedHeaderRenderer")) return Optional.of(header.getObject(C4_TABBED_HEADER_RENDERER))
.map(json -> new ChannelHeader(json, ChannelHeader.HeaderType.C4_TABBED)); .map(json -> new ChannelHeader(json, ChannelHeader.HeaderType.C4_TABBED));
} else if (header.has("carouselHeaderRenderer")) { } else if (header.has(CAROUSEL_HEADER_RENDERER)) {
return header.getObject("carouselHeaderRenderer") return header.getObject(CAROUSEL_HEADER_RENDERER)
.getArray("contents") .getArray(CONTENTS)
.stream() .stream()
.filter(JsonObject.class::isInstance) .filter(JsonObject.class::isInstance)
.map(JsonObject.class::cast) .map(JsonObject.class::cast)
.filter(item -> item.has("topicChannelDetailsRenderer")) .filter(item -> item.has(TOPIC_CHANNEL_DETAILS_RENDERER))
.findFirst() .findFirst()
.map(item -> item.getObject("topicChannelDetailsRenderer")) .map(item -> item.getObject(TOPIC_CHANNEL_DETAILS_RENDERER))
.map(json -> new ChannelHeader(json, ChannelHeader.HeaderType.CAROUSEL)); .map(json -> new ChannelHeader(json, ChannelHeader.HeaderType.CAROUSEL));
} else if (header.has("pageHeaderRenderer")) { } else if (header.has("pageHeaderRenderer")) {
return Optional.of(header.getObject("pageHeaderRenderer")) return Optional.of(header.getObject("pageHeaderRenderer"))
@ -333,22 +349,208 @@ public final class YoutubeChannelHelper {
* @return whether the channel is verified * @return whether the channel is verified
*/ */
public static boolean isChannelVerified(@Nonnull final ChannelHeader channelHeader) { public static boolean isChannelVerified(@Nonnull final ChannelHeader channelHeader) {
// carouselHeaderRenderer and pageHeaderRenderer does not contain any verification switch (channelHeader.headerType) {
// badges // carouselHeaderRenderers do not contain any verification badges
// Since they are only shown on YouTube internal channels or on channels of large // Since they are only shown on YouTube internal channels or on channels of large
// organizations broadcasting live events, we can assume the channel to be verified // organizations broadcasting live events, we can assume the channel to be verified
if (channelHeader.headerType == ChannelHeader.HeaderType.CAROUSEL case CAROUSEL:
|| channelHeader.headerType == ChannelHeader.HeaderType.PAGE) { return true;
case PAGE:
final JsonObject pageHeaderViewModel = channelHeader.json.getObject(CONTENT)
.getObject(PAGE_HEADER_VIEW_MODEL);
final boolean hasCircleOrMusicIcon = pageHeaderViewModel.getObject(TITLE)
.getObject("dynamicTextViewModel")
.getObject("text")
.getArray("attachmentRuns")
.stream()
.filter(JsonObject.class::isInstance)
.map(JsonObject.class::cast)
.anyMatch(attachmentRun -> attachmentRun.getObject("element")
.getObject("type")
.getObject("imageType")
.getObject("image")
.getArray("sources")
.stream()
.filter(JsonObject.class::isInstance)
.map(JsonObject.class::cast)
.anyMatch(source -> {
final String imageName = source.getObject("clientResource")
.getString("imageName");
return "CHECK_CIRCLE_FILLED".equals(imageName)
|| "MUSIC_FILLED".equals(imageName);
}));
if (!hasCircleOrMusicIcon && pageHeaderViewModel.getObject("image")
.has("contentPreviewImageViewModel")) {
// If a pageHeaderRenderer has no object in which a check verified may be
// contained and if it has a contentPreviewImageViewModel, it should mean
// that the header is coming from a system channel, which we can assume to
// be verified
return true; return true;
} }
if (channelHeader.headerType == ChannelHeader.HeaderType.INTERACTIVE_TABBED) { return hasCircleOrMusicIcon;
case INTERACTIVE_TABBED:
// If the header has an autoGenerated property, it should mean that the channel has // If the header has an autoGenerated property, it should mean that the channel has
// been auto generated by YouTube: we can assume the channel to be verified in this // been auto generated by YouTube: we can assume the channel to be verified in this
// case // case
return channelHeader.json.has("autoGenerated"); return channelHeader.json.has("autoGenerated");
} default:
return YoutubeParsingHelper.isVerified(channelHeader.json.getArray("badges")); return YoutubeParsingHelper.isVerified(channelHeader.json.getArray("badges"));
} }
} }
/**
* Get the ID of a channel from its response.
*
* <p>
* For {@link ChannelHeader.HeaderType#C4_TABBED c4TabbedHeaderRenderer} and
* {@link ChannelHeader.HeaderType#CAROUSEL carouselHeaderRenderer} channel headers, the ID is
* get from the header.
* </p>
*
* <p>
* For other headers or if it cannot be got, the ID from the {@code channelMetadataRenderer}
* in the channel response is used.
* </p>
*
* <p>
* If the ID cannot still be get, the fallback channel ID, if provided, will be used.
* </p>
*
* @param header the channel header
* @param fallbackChannelId the fallback channel ID, which can be null
* @return the ID of the channel
* @throws ParsingException if the channel ID cannot be got from the channel header, the
* channel response and the fallback channel ID
*/
@Nonnull
public static String getChannelId(
@SuppressWarnings("OptionalUsedAsFieldOrParameterType")
@Nonnull final Optional<ChannelHeader> header,
@Nonnull final JsonObject jsonResponse,
@Nullable final String fallbackChannelId) throws ParsingException {
if (header.isPresent()) {
final ChannelHeader channelHeader = header.get();
switch (channelHeader.headerType) {
case C4_TABBED:
final String channelId = channelHeader.json.getObject(HEADER)
.getObject(C4_TABBED_HEADER_RENDERER)
.getString("channelId", "");
if (!isNullOrEmpty(channelId)) {
return channelId;
}
final String navigationC4TabChannelId = channelHeader.json
.getObject("navigationEndpoint")
.getObject(BROWSE_ENDPOINT)
.getString(BROWSE_ID);
if (!isNullOrEmpty(navigationC4TabChannelId)) {
return navigationC4TabChannelId;
}
break;
case CAROUSEL:
final String navigationCarouselChannelId = channelHeader.json.getObject(HEADER)
.getObject(CAROUSEL_HEADER_RENDERER)
.getArray(CONTENTS)
.stream()
.filter(JsonObject.class::isInstance)
.map(JsonObject.class::cast)
.filter(item -> item.has(TOPIC_CHANNEL_DETAILS_RENDERER))
.findFirst()
.orElse(new JsonObject())
.getObject(TOPIC_CHANNEL_DETAILS_RENDERER)
.getObject("navigationEndpoint")
.getObject(BROWSE_ENDPOINT)
.getString(BROWSE_ID);
if (!isNullOrEmpty(navigationCarouselChannelId)) {
return navigationCarouselChannelId;
}
break;
default:
break;
}
}
final String externalChannelId = jsonResponse.getObject("metadata")
.getObject("channelMetadataRenderer")
.getString("externalChannelId");
if (!isNullOrEmpty(externalChannelId)) {
return externalChannelId;
}
if (!isNullOrEmpty(fallbackChannelId)) {
return fallbackChannelId;
} else {
throw new ParsingException("Could not get channel ID");
}
}
@Nonnull
public static String getChannelName(@SuppressWarnings("OptionalUsedAsFieldOrParameterType")
@Nonnull final Optional<ChannelHeader> channelHeader,
@Nonnull final JsonObject jsonResponse,
@Nullable final JsonObject channelAgeGateRenderer)
throws ParsingException {
if (channelAgeGateRenderer != null) {
final String title = channelAgeGateRenderer.getString("channelTitle");
if (isNullOrEmpty(title)) {
throw new ParsingException("Could not get channel name");
}
return title;
}
final String metadataRendererTitle = jsonResponse.getObject("metadata")
.getObject("channelMetadataRenderer")
.getString(TITLE);
if (!isNullOrEmpty(metadataRendererTitle)) {
return metadataRendererTitle;
}
return channelHeader.map(header -> {
final JsonObject channelJson = header.json;
switch (header.headerType) {
case PAGE:
return channelJson.getObject(CONTENT)
.getObject(PAGE_HEADER_VIEW_MODEL)
.getObject(TITLE)
.getObject("dynamicTextViewModel")
.getObject("text")
.getString(CONTENT, channelJson.getString("pageTitle"));
case CAROUSEL:
case INTERACTIVE_TABBED:
return getTextFromObject(channelJson.getObject(TITLE));
case C4_TABBED:
default:
return channelJson.getString(TITLE);
}
})
// The channel name from a microformatDataRenderer may be different from the one
// displayed, especially for auto-generated channels, depending on the language
// requested for the interface (hl parameter of InnerTube requests' payload)
.or(() -> Optional.ofNullable(jsonResponse.getObject("microformat")
.getObject("microformatDataRenderer")
.getString(TITLE)))
.orElseThrow(() -> new ParsingException("Could not get channel name"));
}
@Nullable
public static JsonObject getChannelAgeGateRenderer(@Nonnull final JsonObject jsonResponse) {
return jsonResponse.getObject(CONTENTS)
.getObject("twoColumnBrowseResultsRenderer")
.getArray("tabs")
.stream()
.filter(JsonObject.class::isInstance)
.map(JsonObject.class::cast)
.flatMap(tab -> tab.getObject(TAB_RENDERER)
.getObject(CONTENT)
.getObject("sectionListRenderer")
.getArray(CONTENTS)
.stream()
.filter(JsonObject.class::isInstance)
.map(JsonObject.class::cast))
.filter(content -> content.has("channelAgeGateRenderer"))
.map(content -> content.getObject("channelAgeGateRenderer"))
.findFirst()
.orElse(null);
}
}