From f4931d8bbdb55b757a7605be5de88226c6eec04c Mon Sep 17 00:00:00 2001 From: AudricV <74829229+AudricV@users.noreply.github.com> Date: Thu, 18 Jul 2024 19:35:59 +0200 Subject: [PATCH] [YouTube] Workaround 403s on streaming URLs of WEB client after some time These changes work around an anti-bot token, for which its requirement is A/B tested on the WEB client. In this test, streaming URLs of this client return HTTP errors 403 if the token is not provided after some time. It also allows to not fetch the JavaScript player for non-age restricted videos, reducing data usage. The TVHTML5 embed client is now only fetched in the case of age-restricted videos. The methods forceFetchAndroidClient and forceFetchIosClient of YoutubeStreamExtractor have been removed, as they are now not needed anymore. These changes also break the extraction of appropriate error states for private and deleted videos and invalid video IDs. --- .../youtube/YoutubeParsingHelper.java | 56 ++-- .../extractors/YoutubeStreamExtractor.java | 262 +++++++----------- 2 files changed, 140 insertions(+), 178 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeParsingHelper.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeParsingHelper.java index aa262d2fe..82f2d2f0d 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeParsingHelper.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeParsingHelper.java @@ -1301,31 +1301,49 @@ public final class YoutubeParsingHelper { } @Nonnull - public static byte[] createDesktopPlayerBody( + public static JsonObject getWebPlayerResponse( + @Nonnull final Localization localization, + @Nonnull final ContentCountry contentCountry, + @Nonnull final String videoId) throws IOException, ExtractionException { + final byte[] body = JsonWriter.string( + prepareDesktopJsonBuilder(localization, contentCountry) + .value(VIDEO_ID, videoId) + .value(CONTENT_CHECK_OK, true) + .value(RACY_CHECK_OK, true) + .done()) + .getBytes(StandardCharsets.UTF_8); + final String url = YOUTUBEI_V1_URL + "player" + "?" + DISABLE_PRETTY_PRINT_PARAMETER + + "&$fields=microformat,playabilityStatus,storyboards,videoDetails"; + + return JsonUtils.toJsonObject(getValidJsonResponseBody( + getDownloader().postWithContentTypeJson( + url, getYouTubeHeaders(), body, localization))); + } + + @Nonnull + public static byte[] createTvHtml5EmbedPlayerBody( @Nonnull final Localization localization, @Nonnull final ContentCountry contentCountry, @Nonnull final String videoId, @Nonnull final Integer sts, - final boolean isTvHtml5DesktopJsonBuilder, - @Nonnull final String contentPlaybackNonce) throws IOException, ExtractionException { + @Nonnull final String contentPlaybackNonce) { // @formatter:off - return JsonWriter.string((isTvHtml5DesktopJsonBuilder - ? prepareTvHtml5EmbedJsonBuilder(localization, contentCountry, videoId) - : prepareDesktopJsonBuilder(localization, contentCountry)) - .object("playbackContext") - .object("contentPlaybackContext") - // Signature timestamp from the JavaScript base player is needed to get - // working obfuscated URLs - .value("signatureTimestamp", sts) - .value("referer", "https://www.youtube.com/watch?v=" + videoId) + return JsonWriter.string( + prepareTvHtml5EmbedJsonBuilder(localization, contentCountry, videoId) + .object("playbackContext") + .object("contentPlaybackContext") + // Signature timestamp from the JavaScript base player is needed to get + // working obfuscated URLs + .value("signatureTimestamp", sts) + .value("referer", "https://www.youtube.com/watch?v=" + videoId) + .end() .end() - .end() - .value(CPN, contentPlaybackNonce) - .value(VIDEO_ID, videoId) - .value(CONTENT_CHECK_OK, true) - .value(RACY_CHECK_OK, true) - .done()) - .getBytes(StandardCharsets.UTF_8); + .value(CPN, contentPlaybackNonce) + .value(VIDEO_ID, videoId) + .value(CONTENT_CHECK_OK, true) + .value(RACY_CHECK_OK, true) + .done()) + .getBytes(StandardCharsets.UTF_8); // @formatter:on } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java index 8298902da..84797217b 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java @@ -27,7 +27,7 @@ import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.CPN; import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.RACY_CHECK_OK; import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.VIDEO_ID; -import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.createDesktopPlayerBody; +import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.createTvHtml5EmbedPlayerBody; import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.fixThumbnailUrl; import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.generateContentPlaybackNonce; import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.generateTParameter; @@ -103,22 +103,21 @@ import javax.annotation.Nonnull; import javax.annotation.Nullable; public class YoutubeStreamExtractor extends StreamExtractor { - private static boolean isAndroidClientFetchForced = false; - private static boolean isIosClientFetchForced = false; private JsonObject playerResponse; private JsonObject nextResponse; @Nullable - private JsonObject html5StreamingData; + private JsonObject iosStreamingData; @Nullable private JsonObject androidStreamingData; @Nullable - private JsonObject iosStreamingData; + private JsonObject tvHtml5SimplyEmbedStreamingData; private JsonObject videoPrimaryInfoRenderer; private JsonObject videoSecondaryInfoRenderer; private JsonObject playerMicroFormatRenderer; + private JsonObject playerCaptionsTracklistRenderer; private int ageLimit = -1; private StreamType streamType; @@ -126,9 +125,9 @@ public class YoutubeStreamExtractor extends StreamExtractor { // URLs (with the cpn parameter). // Also because a nonce should be unique, it should be different between clients used, so // three different strings are used. - private String html5Cpn; - private String androidCpn; private String iosCpn; + private String androidCpn; + private String tvHtml5SimplyEmbedCpn; public YoutubeStreamExtractor(final StreamingService service, final LinkHandler linkHandler) { super(service, linkHandler); @@ -322,7 +321,7 @@ public class YoutubeStreamExtractor extends StreamExtractor { return Long.parseLong(duration); } catch (final Exception e) { return getDurationFromFirstAdaptiveFormat(Arrays.asList( - html5StreamingData, androidStreamingData, iosStreamingData)); + iosStreamingData, androidStreamingData, tvHtml5SimplyEmbedStreamingData)); } } @@ -584,7 +583,7 @@ public class YoutubeStreamExtractor extends StreamExtractor { // Android client doesn't contain all available streams (mainly the WEBM ones) return getManifestUrl( "dash", - Arrays.asList(html5StreamingData, androidStreamingData)); + Arrays.asList(androidStreamingData, tvHtml5SimplyEmbedStreamingData)); } @Nonnull @@ -597,7 +596,8 @@ public class YoutubeStreamExtractor extends StreamExtractor { // Also, on videos, non-iOS clients don't have an HLS manifest URL in their player response return getManifestUrl( "hls", - Arrays.asList(iosStreamingData, html5StreamingData, androidStreamingData)); + Arrays.asList( + iosStreamingData, androidStreamingData, tvHtml5SimplyEmbedStreamingData)); } @Nonnull @@ -669,9 +669,7 @@ public class YoutubeStreamExtractor extends StreamExtractor { // We cannot store the subtitles list because the media format may change final List subtitlesToReturn = new ArrayList<>(); - final JsonObject renderer = playerResponse.getObject("captions") - .getObject("playerCaptionsTracklistRenderer"); - final JsonArray captionsArray = renderer.getArray("captionTracks"); + final JsonArray captionsArray = playerCaptionsTracklistRenderer.getArray("captionTracks"); // TODO: use this to apply auto translation to different language from a source language // final JsonArray autoCaptionsArray = renderer.getArray("translationLanguages"); @@ -795,64 +793,70 @@ public class YoutubeStreamExtractor extends StreamExtractor { final Localization localization = getExtractorLocalization(); final ContentCountry contentCountry = getExtractorContentCountry(); - html5Cpn = generateContentPlaybackNonce(); - playerResponse = getJsonPostResponse(PLAYER, - createDesktopPlayerBody( - localization, - contentCountry, - videoId, - YoutubeJavaScriptPlayerManager.getSignatureTimestamp(videoId), - false, - html5Cpn), - localization); + final JsonObject webPlayerResponse = YoutubeParsingHelper.getWebPlayerResponse( + localization, contentCountry, videoId); - // Save the playerResponse from the player endpoint of the desktop internal API because - // there can be restrictions on the embedded player. - // E.g. if a video is age-restricted, the embedded player's playabilityStatus says that - // the video cannot be played outside of YouTube, but does not show the original message. - final JsonObject youtubePlayerResponse = playerResponse; - - if (playerResponse == null) { - throw new ExtractionException("Could not get playerResponse"); + if (isPlayerResponseNotValid(webPlayerResponse, videoId)) { + // Check the playability status, as private and deleted videos and invalid video IDs do + // not return the ID provided in the player response + // When the requested video is playable and a different video ID is returned, it has + // the OK playability status, meaning the ExtractionException after this check will be + // thrown + checkPlayabilityStatus( + webPlayerResponse, webPlayerResponse.getObject("playabilityStatus")); + throw new ExtractionException("Initial WEB player response is not valid"); } - final JsonObject playabilityStatus = playerResponse.getObject("playabilityStatus"); + // Save the webPlayerResponse into playerResponse in the case the video cannot be played, + // so some metadata can be retrieved + playerResponse = webPlayerResponse; - final boolean isAgeRestricted = playabilityStatus.getString("reason", "") + // Use the player response from the player endpoint of the desktop internal API because + // there can be restrictions on videos in the embedded player. + // E.g. if a video is age-restricted, the embedded player's playabilityStatus says that + // the video cannot be played outside of YouTube, but does not show the original message. + final JsonObject playabilityStatus = webPlayerResponse.getObject("playabilityStatus"); + + final boolean isAgeRestricted = "login_required".equalsIgnoreCase( + playabilityStatus.getString("status")) + && playabilityStatus.getString("reason", "") .contains("age"); setStreamType(); - if (!playerResponse.has(STREAMING_DATA)) { + if (isAgeRestricted) { + fetchTvHtml5EmbedJsonPlayer(contentCountry, localization, videoId); + + // If no streams can be fetched in the TVHTML5 simply embed client, the video should be + // age-restricted, therefore throw an AgeRestrictedContentException explicitly. + if (tvHtml5SimplyEmbedStreamingData == null) { + throw new AgeRestrictedContentException( + "This age-restricted video cannot be watched."); + } + + // Refresh the stream type because the stream type may be not properly known for + // age-restricted videos + setStreamType(); + } else { + checkPlayabilityStatus(webPlayerResponse, playabilityStatus); + + // Fetching successfully the iOS player is mandatory to get streams + fetchIosMobileJsonPlayer(contentCountry, localization, videoId); + try { - fetchTvHtml5EmbedJsonPlayer(contentCountry, localization, videoId); + fetchAndroidMobileJsonPlayer(contentCountry, localization, videoId); } catch (final Exception ignored) { + // Ignore exceptions related to ANDROID client fetch or parsing, as it is not + // compulsory to play contents } } - // Refresh the stream type because the stream type may be not properly known for - // age-restricted videos - setStreamType(); - - if (html5StreamingData == null && playerResponse.has(STREAMING_DATA)) { - html5StreamingData = playerResponse.getObject(STREAMING_DATA); - } - - if (html5StreamingData == null) { - checkPlayabilityStatus(youtubePlayerResponse, playabilityStatus); - } - - // The microformat JSON object of the content is not returned on the client we use to - // try to get streams of unavailable contents but is still returned on the WEB client, + // The microformat JSON object of the content is only returned on the WEB client, // so we need to store it instead of getting it directly from the playerResponse - playerMicroFormatRenderer = youtubePlayerResponse.getObject("microformat") + playerMicroFormatRenderer = webPlayerResponse.getObject("microformat") .getObject("playerMicroformatRenderer"); - if (isPlayerResponseNotValid(playerResponse, videoId)) { - throw new ExtractionException("Initial player response is not valid"); - } - final byte[] body = JsonWriter.string( prepareDesktopJsonBuilder(localization, contentCountry) .value(VIDEO_ID, videoId) @@ -861,29 +865,6 @@ public class YoutubeStreamExtractor extends StreamExtractor { .done()) .getBytes(StandardCharsets.UTF_8); nextResponse = getJsonPostResponse(NEXT, body, localization); - - // streamType can only have LIVE_STREAM, POST_LIVE_STREAM and VIDEO_STREAM values (see - // setStreamType()), so this block will be run only for POST_LIVE_STREAM and VIDEO_STREAM - // values if fetching of the ANDROID client is not forced - if ((!isAgeRestricted && streamType != StreamType.LIVE_STREAM) - || isAndroidClientFetchForced) { - try { - fetchAndroidMobileJsonPlayer(contentCountry, localization, videoId); - } catch (final Exception ignored) { - // Ignore exceptions related to ANDROID client fetch or parsing, as it is not - // compulsory to play contents - } - } - - if ((!isAgeRestricted && streamType == StreamType.LIVE_STREAM) - || isIosClientFetchForced) { - try { - fetchIosMobileJsonPlayer(contentCountry, localization, videoId); - } catch (final Exception ignored) { - // Ignore exceptions related to IOS client fetch or parsing, as it is not - // compulsory to play contents - } - } } private void checkPlayabilityStatus(final JsonObject youtubePlayerResponse, @@ -901,17 +882,10 @@ public class YoutubeStreamExtractor extends StreamExtractor { status = newPlayabilityStatus.getString("status"); final String reason = newPlayabilityStatus.getString("reason"); - if (status.equalsIgnoreCase("login_required")) { - if (reason == null) { - final String message = newPlayabilityStatus.getArray("messages").getString(0); - if (message != null && message.contains("private")) { - throw new PrivateContentException("This video is private."); - } - } else if (reason.contains("age")) { - // No streams can be fetched, therefore throw an AgeRestrictedContentException - // explicitly. - throw new AgeRestrictedContentException( - "This age-restricted video cannot be watched."); + if (status.equalsIgnoreCase("login_required") && reason == null) { + final String message = newPlayabilityStatus.getArray("messages").getString(0); + if (message != null && message.contains("private")) { + throw new PrivateContentException("This video is private."); } } @@ -936,10 +910,9 @@ public class YoutubeStreamExtractor extends StreamExtractor { if (detailedErrorMessage != null && detailedErrorMessage.contains("country")) { throw new GeographicRestrictionException( "This video is not available in client's country."); - } else if (detailedErrorMessage != null) { - throw new ContentNotAvailableException(detailedErrorMessage); } else { - throw new ContentNotAvailableException(reason); + throw new ContentNotAvailableException( + Objects.requireNonNullElse(detailedErrorMessage, reason)); } } } @@ -958,29 +931,34 @@ public class YoutubeStreamExtractor extends StreamExtractor { androidCpn = generateContentPlaybackNonce(); final byte[] mobileBody = JsonWriter.string( prepareAndroidMobileJsonBuilder(localization, contentCountry) + .object("playerRequest") + .value(VIDEO_ID, videoId) + .end() + .value("disablePlayerResponse", false) .value(VIDEO_ID, videoId) .value(CPN, androidCpn) .value(CONTENT_CHECK_OK, true) .value(RACY_CHECK_OK, true) - // Workaround getting streaming URLs which return 403 HTTP response code by - // using some parameters for Android client requests - .value("params", "CgIIAQ%3D%3D") .done()) .getBytes(StandardCharsets.UTF_8); - final JsonObject androidPlayerResponse = getJsonAndroidPostResponse(PLAYER, - mobileBody, localization, "&t=" + generateTParameter() - + "&id=" + videoId); + final JsonObject androidPlayerResponse = getJsonAndroidPostResponse( + "reel/reel_item_watch", + mobileBody, + localization, + "&t=" + generateTParameter() + "&id=" + videoId + "&$fields=playerResponse"); - if (isPlayerResponseNotValid(androidPlayerResponse, videoId)) { + final JsonObject playerResponseObject = androidPlayerResponse.getObject("playerResponse"); + if (isPlayerResponseNotValid(playerResponseObject, videoId)) { return; } - final JsonObject streamingData = androidPlayerResponse.getObject(STREAMING_DATA); + final JsonObject streamingData = playerResponseObject.getObject(STREAMING_DATA); if (!isNullOrEmpty(streamingData)) { androidStreamingData = streamingData; - if (html5StreamingData == null) { - playerResponse = androidPlayerResponse; + if (isNullOrEmpty(playerCaptionsTracklistRenderer)) { + playerCaptionsTracklistRenderer = playerResponseObject.getObject("captions") + .getObject("playerCaptionsTracklistRenderer"); } } } @@ -1008,15 +986,14 @@ public class YoutubeStreamExtractor extends StreamExtractor { + "&id=" + videoId); if (isPlayerResponseNotValid(iosPlayerResponse, videoId)) { - return; + throw new ExtractionException("IOS player response is not valid"); } final JsonObject streamingData = iosPlayerResponse.getObject(STREAMING_DATA); if (!isNullOrEmpty(streamingData)) { iosStreamingData = streamingData; - if (html5StreamingData == null) { - playerResponse = iosPlayerResponse; - } + playerCaptionsTracklistRenderer = iosPlayerResponse.getObject("captions") + .getObject("playerCaptionsTracklistRenderer"); } } @@ -1033,26 +1010,25 @@ public class YoutubeStreamExtractor extends StreamExtractor { @Nonnull final Localization localization, @Nonnull final String videoId) throws IOException, ExtractionException { - // Because a cpn is unique to each request, we need to generate it again - html5Cpn = generateContentPlaybackNonce(); + tvHtml5SimplyEmbedCpn = generateContentPlaybackNonce(); final JsonObject tvHtml5EmbedPlayerResponse = getJsonPostResponse(PLAYER, - createDesktopPlayerBody(localization, + createTvHtml5EmbedPlayerBody(localization, contentCountry, videoId, YoutubeJavaScriptPlayerManager.getSignatureTimestamp(videoId), - true, - html5Cpn), localization); + tvHtml5SimplyEmbedCpn), localization); if (isPlayerResponseNotValid(tvHtml5EmbedPlayerResponse, videoId)) { - return; + throw new ExtractionException("TVHTML5 embed player response is not valid"); } - final JsonObject streamingData = tvHtml5EmbedPlayerResponse.getObject( - STREAMING_DATA); + final JsonObject streamingData = tvHtml5EmbedPlayerResponse.getObject(STREAMING_DATA); if (!isNullOrEmpty(streamingData)) { playerResponse = tvHtml5EmbedPlayerResponse; - html5StreamingData = streamingData; + tvHtml5SimplyEmbedStreamingData = streamingData; + playerCaptionsTracklistRenderer = playerResponse.getObject("captions") + .getObject("playerCaptionsTracklistRenderer"); } } @@ -1144,14 +1120,20 @@ public class YoutubeStreamExtractor extends StreamExtractor { final List streamList = new ArrayList<>(); java.util.stream.Stream.of( - // Use the androidStreamingData object first because there is no n param and no - // signatureCiphers in streaming URLs of the Android client + /* + Use the iosStreamingData object first because there is no n param and no + signatureCiphers in streaming URLs of the iOS client + + The androidStreamingData is used as second way as it isn't used on livestreams, + it doesn't return all available streams, and the Android client extraction is + more likely to break + + As age-restricted videos are not common, use tvHtml5SimplyEmbedStreamingData + last, which will be the only one not empty for age-restricted content + */ + new Pair<>(iosStreamingData, iosCpn), new Pair<>(androidStreamingData, androidCpn), - new Pair<>(html5StreamingData, html5Cpn), - // Use the iosStreamingData object in the last position because most of the - // available streams can be extracted with the Android and web clients and also - // because the iOS client is only enabled by default on livestreams - new Pair<>(iosStreamingData, iosCpn) + new Pair<>(tvHtml5SimplyEmbedStreamingData, tvHtml5SimplyEmbedCpn) ) .flatMap(pair -> getStreamsFromStreamingDataKey(videoId, pair.getFirst(), streamingDataKey, itagTypeWanted, pair.getSecond())) @@ -1599,42 +1581,4 @@ public class YoutubeStreamExtractor extends StreamExtractor { .getObject("results") .getArray("contents")); } - - /** - * Enable or disable the fetch of the Android client for all stream types. - * - *

- * By default, the fetch of the Android client will be made only on videos, in order to reduce - * data usage, because available streams of the Android client will be almost equal to the ones - * available on the {@code WEB} client: you can get exclusively a 48kbps audio stream and a - * 3GPP very low stream (which is, most of times, a 144p8 stream). - *

- * - * @param forceFetchAndroidClientValue whether to always fetch the Android client and not only - * for videos - */ - public static void forceFetchAndroidClient(final boolean forceFetchAndroidClientValue) { - isAndroidClientFetchForced = forceFetchAndroidClientValue; - } - - /** - * Enable or disable the fetch of the iOS client for all stream types. - * - *

- * By default, the fetch of the iOS client will be made only on livestreams, in order to get an - * HLS manifest with separated audio and video which has also an higher replay time (up to one - * hour, depending of the content instead of 30 seconds with non-iOS clients). - *

- * - *

- * Enabling this option will allow you to get an HLS manifest also for regular videos, which - * contains resolutions up to 1080p60. - *

- * - * @param forceFetchIosClientValue whether to always fetch the iOS client and not only for - * livestreams - */ - public static void forceFetchIosClient(final boolean forceFetchIosClientValue) { - isIosClientFetchForced = forceFetchIosClientValue; - } }