From 4c987a530292bc4b5a43540efcdf4b262c19de0d Mon Sep 17 00:00:00 2001 From: wb9688 Date: Sun, 26 Jul 2020 10:01:03 +0200 Subject: [PATCH 1/3] Support YouTube's new continuations for search --- .../org/schabi/newpipe/extractor/Page.java | 20 +++- .../extractors/YoutubeSearchExtractor.java | 94 ++++++++++++++++--- 2 files changed, 98 insertions(+), 16 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/Page.java b/extractor/src/main/java/org/schabi/newpipe/extractor/Page.java index 6b8b42477..e4faae778 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/Page.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/Page.java @@ -8,35 +8,45 @@ import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty; public class Page implements Serializable { private final String url; + private final String id; private final List ids; private final Map cookies; - public Page(final String url, final List ids, final Map cookies) { + public Page(final String url, final String id, final List ids, final Map cookies) { this.url = url; + this.id = id; this.ids = ids; this.cookies = cookies; } public Page(final String url) { - this(url, null, null); + this(url, null, null, null); + } + + public Page(final String url, final String id) { + this(url, id, null, null); } public Page(final String url, final Map cookies) { - this(url, null, cookies); + this(url, null, null, cookies); } public Page(final List ids) { - this(null, ids, null); + this(null, null, ids, null); } public Page(final List ids, final Map cookies) { - this(null, ids, cookies); + this(null, null, ids, cookies); } public String getUrl() { return url; } + public String getId() { + return id; + } + public List getIds() { return ids; } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java index f02fc2bdf..df86b5d32 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java @@ -2,6 +2,10 @@ package org.schabi.newpipe.extractor.services.youtube.extractors; import com.grack.nanojson.JsonArray; import com.grack.nanojson.JsonObject; +import com.grack.nanojson.JsonParser; +import com.grack.nanojson.JsonParserException; +import com.grack.nanojson.JsonWriter; + import org.schabi.newpipe.extractor.InfoItem; import org.schabi.newpipe.extractor.Page; import org.schabi.newpipe.extractor.StreamingService; @@ -14,11 +18,18 @@ import org.schabi.newpipe.extractor.search.InfoItemsSearchCollector; import org.schabi.newpipe.extractor.search.SearchExtractor; import org.schabi.newpipe.extractor.utils.JsonUtils; -import javax.annotation.Nonnull; import java.io.IOException; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import javax.annotation.Nonnull; + +import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getClientVersion; import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getJsonResponse; import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject; +import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getValidJsonResponseBody; import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty; /* @@ -104,12 +115,16 @@ public class YoutubeSearchExtractor extends SearchExtractor { Page nextPage = null; - for (Object section : sections) { - final JsonObject itemSectionRenderer = ((JsonObject) section).getObject("itemSectionRenderer"); + for (final Object section : sections) { + if (((JsonObject) section).has("itemSectionRenderer")) { + final JsonObject itemSectionRenderer = ((JsonObject) section).getObject("itemSectionRenderer"); - collectStreamsFrom(collector, itemSectionRenderer.getArray("contents")); + collectStreamsFrom(collector, itemSectionRenderer.getArray("contents")); - nextPage = getNextPageFrom(itemSectionRenderer.getArray("continuations")); + nextPage = getNextPageFrom(itemSectionRenderer.getArray("continuations")); + } else if (((JsonObject) section).has("continuationItemRenderer")) { + nextPage = getNewNextPageFrom(((JsonObject) section).getObject("continuationItemRenderer")); + } } return new InfoItemsPage<>(collector, nextPage); @@ -122,15 +137,58 @@ public class YoutubeSearchExtractor extends SearchExtractor { } final InfoItemsSearchCollector collector = new InfoItemsSearchCollector(getServiceId()); - final JsonArray ajaxJson = getJsonResponse(page.getUrl(), getExtractorLocalization()); - final JsonObject itemSectionRenderer = ajaxJson.getObject(1).getObject("response") - .getObject("continuationContents").getObject("itemSectionContinuation"); + if (page.getId() == null) { + final JsonArray ajaxJson = getJsonResponse(page.getUrl(), getExtractorLocalization()); - collectStreamsFrom(collector, itemSectionRenderer.getArray("contents")); - final JsonArray continuations = itemSectionRenderer.getArray("continuations"); + final JsonObject itemSectionContinuation = ajaxJson.getObject(1).getObject("response") + .getObject("continuationContents").getObject("itemSectionContinuation"); - return new InfoItemsPage<>(collector, getNextPageFrom(continuations)); + collectStreamsFrom(collector, itemSectionContinuation.getArray("contents")); + final JsonArray continuations = itemSectionContinuation.getArray("continuations"); + + return new InfoItemsPage<>(collector, getNextPageFrom(continuations)); + } else { + // @formatter:off + final byte[] json = JsonWriter.string() + .object() + .object("context") + .object("client") + .value("hl", "en") + .value("gl", getExtractorContentCountry().getCountryCode()) + .value("clientName", "WEB") + .value("clientVersion", getClientVersion()) + .value("utcOffsetMinutes", 0) + .end() + .object("request").end() + .object("user").end() + .end() + .value("continuation", page.getId()) + .end().done().getBytes("UTF-8"); + // @formatter:on + + final Map> headers = new HashMap<>(); + headers.put("Origin", Collections.singletonList("https://www.youtube.com")); + headers.put("Referer", Collections.singletonList(this.getUrl())); + headers.put("Content-Type", Collections.singletonList("application/json")); + + final String responseBody = getValidJsonResponseBody(getDownloader().post(page.getUrl(), headers, json)); + + final JsonObject ajaxJson; + try { + ajaxJson = JsonParser.object().from(responseBody); + } catch (JsonParserException e) { + throw new ParsingException("Could not parse JSON", e); + } + + final JsonArray continuationItems = ajaxJson.getArray("onResponseReceivedCommands") + .getObject(0).getObject("appendContinuationItemsAction").getArray("continuationItems"); + + final JsonArray contents = continuationItems.getObject(0).getObject("itemSectionRenderer").getArray("contents"); + collectStreamsFrom(collector, contents); + + return new InfoItemsPage<>(collector, getNewNextPageFrom(continuationItems.getObject(1).getObject("continuationItemRenderer"))); + } } private void collectStreamsFrom(final InfoItemsSearchCollector collector, final JsonArray videos) throws NothingFoundException, ParsingException { @@ -162,4 +220,18 @@ public class YoutubeSearchExtractor extends SearchExtractor { return new Page(getUrl() + "&pbj=1&ctoken=" + continuation + "&continuation=" + continuation + "&itct=" + clickTrackingParams); } + + private Page getNewNextPageFrom(final JsonObject continuationItemRenderer) { + if (isNullOrEmpty(continuationItemRenderer)) { + return null; + } + + final String token = continuationItemRenderer.getObject("continuationEndpoint") + .getObject("continuationCommand").getString("token"); + + // FIXME: Key needs to be extracted + final String url = "https://www.youtube.com/youtubei/v1/search?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8"; + + return new Page(url, token); + } } From f345f667e2bd7306a1c54df1c7cf26d26707067a Mon Sep 17 00:00:00 2001 From: wb9688 Date: Sun, 26 Jul 2020 12:00:56 +0200 Subject: [PATCH 2/3] Extract YouTube's key --- .../youtube/YoutubeParsingHelper.java | 80 ++++++++++++------- .../extractors/YoutubeSearchExtractor.java | 8 +- 2 files changed, 55 insertions(+), 33 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeParsingHelper.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeParsingHelper.java index b329be282..f9d896d86 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeParsingHelper.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeParsingHelper.java @@ -64,6 +64,8 @@ public class YoutubeParsingHelper { private static final String HARDCODED_CLIENT_VERSION = "2.20200214.04.00"; private static String clientVersion; + private static String key; + private static final String[] HARDCODED_YOUTUBE_MUSIC_KEYS = {"AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30", "67", "0.1"}; private static String[] youtubeMusicKeys; @@ -214,39 +216,31 @@ public class YoutubeParsingHelper { return response.length() > 50; // ensure to have a valid response } - /** - * Get the client version from a page - * @return - * @throws ParsingException - */ - public static String getClientVersion() throws IOException, ExtractionException { - if (!isNullOrEmpty(clientVersion)) return clientVersion; - if (isHardcodedClientVersionValid()) return clientVersion = HARDCODED_CLIENT_VERSION; - + private static void getClientVersionAndKey() throws IOException, ExtractionException { final String url = "https://www.youtube.com/results?search_query=test"; final String html = getDownloader().get(url).responseBody(); - JsonObject initialData = getInitialData(html); - JsonArray serviceTrackingParams = initialData.getObject("responseContext").getArray("serviceTrackingParams"); + final JsonObject initialData = getInitialData(html); + final JsonArray serviceTrackingParams = initialData.getObject("responseContext").getArray("serviceTrackingParams"); String shortClientVersion = null; // try to get version from initial data first - for (Object service : serviceTrackingParams) { - JsonObject s = (JsonObject) service; + for (final Object service : serviceTrackingParams) { + final JsonObject s = (JsonObject) service; if (s.getString("service").equals("CSI")) { - JsonArray params = s.getArray("params"); - for (Object param : params) { - JsonObject p = (JsonObject) param; - String key = p.getString("key"); + final JsonArray params = s.getArray("params"); + for (final Object param : params) { + final JsonObject p = (JsonObject) param; + final String key = p.getString("key"); if (key != null && key.equals("cver")) { - return clientVersion = p.getString("value"); + clientVersion = p.getString("value"); } } } else if (s.getString("service").equals("ECATCHER")) { // fallback to get a shortened client version which does not contain the last two digits - JsonArray params = s.getArray("params"); - for (Object param : params) { - JsonObject p = (JsonObject) param; - String key = p.getString("key"); + final JsonArray params = s.getArray("params"); + for (final Object param : params) { + final JsonObject p = (JsonObject) param; + final String key = p.getString("key"); if (key != null && key.equals("client.version")) { shortClientVersion = p.getString("value"); } @@ -255,26 +249,54 @@ public class YoutubeParsingHelper { } String contextClientVersion; - String[] patterns = { + final String[] patterns = { "INNERTUBE_CONTEXT_CLIENT_VERSION\":\"([0-9\\.]+?)\"", "innertube_context_client_version\":\"([0-9\\.]+?)\"", "client.version=([0-9\\.]+)" }; - for (String pattern : patterns) { + for (final String pattern : patterns) { try { contextClientVersion = Parser.matchGroup1(pattern, html); if (!isNullOrEmpty(contextClientVersion)) { - return clientVersion = contextClientVersion; + clientVersion = contextClientVersion; } - } catch (Exception ignored) { - } + } catch (Parser.RegexException ignored) { } } if (shortClientVersion != null) { - return clientVersion = shortClientVersion; + clientVersion = shortClientVersion; } - throw new ParsingException("Could not get client version"); + try { + key = Parser.matchGroup1("INNERTUBE_API_KEY\":\"([0-9a-zA-Z_-]+?)\"", html); + } catch (Parser.RegexException e) { + try { + key = Parser.matchGroup1("innertubeApiKey\":\"([0-9a-zA-Z_-]+?)\"", html); + } catch (Parser.RegexException ignored) { } + } + } + + /** + * Get the client version + */ + public static String getClientVersion() throws IOException, ExtractionException { + if (!isNullOrEmpty(clientVersion)) return clientVersion; + if (isHardcodedClientVersionValid()) return clientVersion = HARDCODED_CLIENT_VERSION; + + getClientVersionAndKey(); + if (isNullOrEmpty(key)) throw new ParsingException("Could not extract client version"); + return clientVersion; + } + + /** + * Get the key + */ + public static String getKey() throws IOException, ExtractionException { + if (!isNullOrEmpty(key)) return key; + + getClientVersionAndKey(); + if (isNullOrEmpty(key)) throw new ParsingException("Could not extract key"); + return key; } public static boolean areHardcodedYoutubeMusicKeysValid() throws IOException, ReCaptchaException { diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java index df86b5d32..6e7d41c48 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java @@ -28,6 +28,7 @@ import javax.annotation.Nonnull; import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getClientVersion; import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getJsonResponse; +import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getKey; import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject; import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getValidJsonResponseBody; import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty; @@ -107,7 +108,7 @@ public class YoutubeSearchExtractor extends SearchExtractor { @Nonnull @Override - public InfoItemsPage getInitialPage() throws ExtractionException { + public InfoItemsPage getInitialPage() throws IOException, ExtractionException { final InfoItemsSearchCollector collector = new InfoItemsSearchCollector(getServiceId()); final JsonArray sections = initialData.getObject("contents").getObject("twoColumnSearchResultsRenderer") @@ -221,7 +222,7 @@ public class YoutubeSearchExtractor extends SearchExtractor { + "&itct=" + clickTrackingParams); } - private Page getNewNextPageFrom(final JsonObject continuationItemRenderer) { + private Page getNewNextPageFrom(final JsonObject continuationItemRenderer) throws IOException, ExtractionException { if (isNullOrEmpty(continuationItemRenderer)) { return null; } @@ -229,8 +230,7 @@ public class YoutubeSearchExtractor extends SearchExtractor { final String token = continuationItemRenderer.getObject("continuationEndpoint") .getObject("continuationCommand").getString("token"); - // FIXME: Key needs to be extracted - final String url = "https://www.youtube.com/youtubei/v1/search?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8"; + final String url = "https://www.youtube.com/youtubei/v1/search?key=" + getKey(); return new Page(url, token); } From 667dce034c1aa5a2378b2e4b5a1a9e2dc67c14f5 Mon Sep 17 00:00:00 2001 From: wb9688 Date: Sun, 26 Jul 2020 13:14:25 +0200 Subject: [PATCH 3/3] Only use fallback methods for clientVersion when clientVersion hasn't been successfully extracted yet --- .../extractor/services/youtube/YoutubeParsingHelper.java | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeParsingHelper.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeParsingHelper.java index f9d896d86..e124d0208 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeParsingHelper.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeParsingHelper.java @@ -216,7 +216,7 @@ public class YoutubeParsingHelper { return response.length() > 50; // ensure to have a valid response } - private static void getClientVersionAndKey() throws IOException, ExtractionException { + private static void extractClientVersionAndKey() throws IOException, ExtractionException { final String url = "https://www.youtube.com/results?search_query=test"; final String html = getDownloader().get(url).responseBody(); final JsonObject initialData = getInitialData(html); @@ -259,11 +259,12 @@ public class YoutubeParsingHelper { contextClientVersion = Parser.matchGroup1(pattern, html); if (!isNullOrEmpty(contextClientVersion)) { clientVersion = contextClientVersion; + break; } } catch (Parser.RegexException ignored) { } } - if (shortClientVersion != null) { + if (!isNullOrEmpty(clientVersion) && !isNullOrEmpty(shortClientVersion)) { clientVersion = shortClientVersion; } @@ -283,7 +284,7 @@ public class YoutubeParsingHelper { if (!isNullOrEmpty(clientVersion)) return clientVersion; if (isHardcodedClientVersionValid()) return clientVersion = HARDCODED_CLIENT_VERSION; - getClientVersionAndKey(); + extractClientVersionAndKey(); if (isNullOrEmpty(key)) throw new ParsingException("Could not extract client version"); return clientVersion; } @@ -294,7 +295,7 @@ public class YoutubeParsingHelper { public static String getKey() throws IOException, ExtractionException { if (!isNullOrEmpty(key)) return key; - getClientVersionAndKey(); + extractClientVersionAndKey(); if (isNullOrEmpty(key)) throw new ParsingException("Could not extract key"); return key; }