Merge pull request #373 from wb9688/fix-yt-continuations

Support YouTube's new continuations
This commit is contained in:
Tobias Groza 2020-07-26 13:56:33 +02:00 committed by GitHub
commit 8627d01006
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 152 additions and 47 deletions

View File

@ -8,35 +8,45 @@ import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
public class Page implements Serializable { public class Page implements Serializable {
private final String url; private final String url;
private final String id;
private final List<String> ids; private final List<String> ids;
private final Map<String, String> cookies; private final Map<String, String> cookies;
public Page(final String url, final List<String> ids, final Map<String, String> cookies) { public Page(final String url, final String id, final List<String> ids, final Map<String, String> cookies) {
this.url = url; this.url = url;
this.id = id;
this.ids = ids; this.ids = ids;
this.cookies = cookies; this.cookies = cookies;
} }
public Page(final String url) { public Page(final String url) {
this(url, null, null); this(url, null, null, null);
}
public Page(final String url, final String id) {
this(url, id, null, null);
} }
public Page(final String url, final Map<String, String> cookies) { public Page(final String url, final Map<String, String> cookies) {
this(url, null, cookies); this(url, null, null, cookies);
} }
public Page(final List<String> ids) { public Page(final List<String> ids) {
this(null, ids, null); this(null, null, ids, null);
} }
public Page(final List<String> ids, final Map<String, String> cookies) { public Page(final List<String> ids, final Map<String, String> cookies) {
this(null, ids, cookies); this(null, null, ids, cookies);
} }
public String getUrl() { public String getUrl() {
return url; return url;
} }
public String getId() {
return id;
}
public List<String> getIds() { public List<String> getIds() {
return ids; return ids;
} }

View File

@ -64,6 +64,8 @@ public class YoutubeParsingHelper {
private static final String HARDCODED_CLIENT_VERSION = "2.20200214.04.00"; private static final String HARDCODED_CLIENT_VERSION = "2.20200214.04.00";
private static String clientVersion; private static String clientVersion;
private static String key;
private static final String[] HARDCODED_YOUTUBE_MUSIC_KEYS = {"AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30", "67", "0.1"}; private static final String[] HARDCODED_YOUTUBE_MUSIC_KEYS = {"AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30", "67", "0.1"};
private static String[] youtubeMusicKeys; private static String[] youtubeMusicKeys;
@ -214,39 +216,31 @@ public class YoutubeParsingHelper {
return response.length() > 50; // ensure to have a valid response return response.length() > 50; // ensure to have a valid response
} }
/** private static void extractClientVersionAndKey() throws IOException, ExtractionException {
* Get the client version from a page
* @return
* @throws ParsingException
*/
public static String getClientVersion() throws IOException, ExtractionException {
if (!isNullOrEmpty(clientVersion)) return clientVersion;
if (isHardcodedClientVersionValid()) return clientVersion = HARDCODED_CLIENT_VERSION;
final String url = "https://www.youtube.com/results?search_query=test"; final String url = "https://www.youtube.com/results?search_query=test";
final String html = getDownloader().get(url).responseBody(); final String html = getDownloader().get(url).responseBody();
JsonObject initialData = getInitialData(html); final JsonObject initialData = getInitialData(html);
JsonArray serviceTrackingParams = initialData.getObject("responseContext").getArray("serviceTrackingParams"); final JsonArray serviceTrackingParams = initialData.getObject("responseContext").getArray("serviceTrackingParams");
String shortClientVersion = null; String shortClientVersion = null;
// try to get version from initial data first // try to get version from initial data first
for (Object service : serviceTrackingParams) { for (final Object service : serviceTrackingParams) {
JsonObject s = (JsonObject) service; final JsonObject s = (JsonObject) service;
if (s.getString("service").equals("CSI")) { if (s.getString("service").equals("CSI")) {
JsonArray params = s.getArray("params"); final JsonArray params = s.getArray("params");
for (Object param : params) { for (final Object param : params) {
JsonObject p = (JsonObject) param; final JsonObject p = (JsonObject) param;
String key = p.getString("key"); final String key = p.getString("key");
if (key != null && key.equals("cver")) { if (key != null && key.equals("cver")) {
return clientVersion = p.getString("value"); clientVersion = p.getString("value");
} }
} }
} else if (s.getString("service").equals("ECATCHER")) { } else if (s.getString("service").equals("ECATCHER")) {
// fallback to get a shortened client version which does not contain the last two digits // fallback to get a shortened client version which does not contain the last two digits
JsonArray params = s.getArray("params"); final JsonArray params = s.getArray("params");
for (Object param : params) { for (final Object param : params) {
JsonObject p = (JsonObject) param; final JsonObject p = (JsonObject) param;
String key = p.getString("key"); final String key = p.getString("key");
if (key != null && key.equals("client.version")) { if (key != null && key.equals("client.version")) {
shortClientVersion = p.getString("value"); shortClientVersion = p.getString("value");
} }
@ -255,26 +249,55 @@ public class YoutubeParsingHelper {
} }
String contextClientVersion; String contextClientVersion;
String[] patterns = { final String[] patterns = {
"INNERTUBE_CONTEXT_CLIENT_VERSION\":\"([0-9\\.]+?)\"", "INNERTUBE_CONTEXT_CLIENT_VERSION\":\"([0-9\\.]+?)\"",
"innertube_context_client_version\":\"([0-9\\.]+?)\"", "innertube_context_client_version\":\"([0-9\\.]+?)\"",
"client.version=([0-9\\.]+)" "client.version=([0-9\\.]+)"
}; };
for (String pattern : patterns) { for (final String pattern : patterns) {
try { try {
contextClientVersion = Parser.matchGroup1(pattern, html); contextClientVersion = Parser.matchGroup1(pattern, html);
if (!isNullOrEmpty(contextClientVersion)) { if (!isNullOrEmpty(contextClientVersion)) {
return clientVersion = contextClientVersion; clientVersion = contextClientVersion;
break;
} }
} catch (Exception ignored) { } catch (Parser.RegexException ignored) { }
}
if (!isNullOrEmpty(clientVersion) && !isNullOrEmpty(shortClientVersion)) {
clientVersion = shortClientVersion;
}
try {
key = Parser.matchGroup1("INNERTUBE_API_KEY\":\"([0-9a-zA-Z_-]+?)\"", html);
} catch (Parser.RegexException e) {
try {
key = Parser.matchGroup1("innertubeApiKey\":\"([0-9a-zA-Z_-]+?)\"", html);
} catch (Parser.RegexException ignored) { }
} }
} }
if (shortClientVersion != null) { /**
return clientVersion = shortClientVersion; * Get the client version
*/
public static String getClientVersion() throws IOException, ExtractionException {
if (!isNullOrEmpty(clientVersion)) return clientVersion;
if (isHardcodedClientVersionValid()) return clientVersion = HARDCODED_CLIENT_VERSION;
extractClientVersionAndKey();
if (isNullOrEmpty(key)) throw new ParsingException("Could not extract client version");
return clientVersion;
} }
throw new ParsingException("Could not get client version"); /**
* Get the key
*/
public static String getKey() throws IOException, ExtractionException {
if (!isNullOrEmpty(key)) return key;
extractClientVersionAndKey();
if (isNullOrEmpty(key)) throw new ParsingException("Could not extract key");
return key;
} }
public static boolean areHardcodedYoutubeMusicKeysValid() throws IOException, ReCaptchaException { public static boolean areHardcodedYoutubeMusicKeysValid() throws IOException, ReCaptchaException {

View File

@ -2,6 +2,10 @@ package org.schabi.newpipe.extractor.services.youtube.extractors;
import com.grack.nanojson.JsonArray; import com.grack.nanojson.JsonArray;
import com.grack.nanojson.JsonObject; import com.grack.nanojson.JsonObject;
import com.grack.nanojson.JsonParser;
import com.grack.nanojson.JsonParserException;
import com.grack.nanojson.JsonWriter;
import org.schabi.newpipe.extractor.InfoItem; import org.schabi.newpipe.extractor.InfoItem;
import org.schabi.newpipe.extractor.Page; import org.schabi.newpipe.extractor.Page;
import org.schabi.newpipe.extractor.StreamingService; import org.schabi.newpipe.extractor.StreamingService;
@ -14,11 +18,19 @@ import org.schabi.newpipe.extractor.search.InfoItemsSearchCollector;
import org.schabi.newpipe.extractor.search.SearchExtractor; import org.schabi.newpipe.extractor.search.SearchExtractor;
import org.schabi.newpipe.extractor.utils.JsonUtils; import org.schabi.newpipe.extractor.utils.JsonUtils;
import javax.annotation.Nonnull;
import java.io.IOException; import java.io.IOException;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import javax.annotation.Nonnull;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getClientVersion;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getJsonResponse; import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getJsonResponse;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getKey;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject; import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getValidJsonResponseBody;
import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty; import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
/* /*
@ -96,7 +108,7 @@ public class YoutubeSearchExtractor extends SearchExtractor {
@Nonnull @Nonnull
@Override @Override
public InfoItemsPage<InfoItem> getInitialPage() throws ExtractionException { public InfoItemsPage<InfoItem> getInitialPage() throws IOException, ExtractionException {
final InfoItemsSearchCollector collector = new InfoItemsSearchCollector(getServiceId()); final InfoItemsSearchCollector collector = new InfoItemsSearchCollector(getServiceId());
final JsonArray sections = initialData.getObject("contents").getObject("twoColumnSearchResultsRenderer") final JsonArray sections = initialData.getObject("contents").getObject("twoColumnSearchResultsRenderer")
@ -104,12 +116,16 @@ public class YoutubeSearchExtractor extends SearchExtractor {
Page nextPage = null; Page nextPage = null;
for (Object section : sections) { for (final Object section : sections) {
if (((JsonObject) section).has("itemSectionRenderer")) {
final JsonObject itemSectionRenderer = ((JsonObject) section).getObject("itemSectionRenderer"); final JsonObject itemSectionRenderer = ((JsonObject) section).getObject("itemSectionRenderer");
collectStreamsFrom(collector, itemSectionRenderer.getArray("contents")); collectStreamsFrom(collector, itemSectionRenderer.getArray("contents"));
nextPage = getNextPageFrom(itemSectionRenderer.getArray("continuations")); nextPage = getNextPageFrom(itemSectionRenderer.getArray("continuations"));
} else if (((JsonObject) section).has("continuationItemRenderer")) {
nextPage = getNewNextPageFrom(((JsonObject) section).getObject("continuationItemRenderer"));
}
} }
return new InfoItemsPage<>(collector, nextPage); return new InfoItemsPage<>(collector, nextPage);
@ -122,15 +138,58 @@ public class YoutubeSearchExtractor extends SearchExtractor {
} }
final InfoItemsSearchCollector collector = new InfoItemsSearchCollector(getServiceId()); final InfoItemsSearchCollector collector = new InfoItemsSearchCollector(getServiceId());
if (page.getId() == null) {
final JsonArray ajaxJson = getJsonResponse(page.getUrl(), getExtractorLocalization()); final JsonArray ajaxJson = getJsonResponse(page.getUrl(), getExtractorLocalization());
final JsonObject itemSectionRenderer = ajaxJson.getObject(1).getObject("response") final JsonObject itemSectionContinuation = ajaxJson.getObject(1).getObject("response")
.getObject("continuationContents").getObject("itemSectionContinuation"); .getObject("continuationContents").getObject("itemSectionContinuation");
collectStreamsFrom(collector, itemSectionRenderer.getArray("contents")); collectStreamsFrom(collector, itemSectionContinuation.getArray("contents"));
final JsonArray continuations = itemSectionRenderer.getArray("continuations"); final JsonArray continuations = itemSectionContinuation.getArray("continuations");
return new InfoItemsPage<>(collector, getNextPageFrom(continuations)); return new InfoItemsPage<>(collector, getNextPageFrom(continuations));
} else {
// @formatter:off
final byte[] json = JsonWriter.string()
.object()
.object("context")
.object("client")
.value("hl", "en")
.value("gl", getExtractorContentCountry().getCountryCode())
.value("clientName", "WEB")
.value("clientVersion", getClientVersion())
.value("utcOffsetMinutes", 0)
.end()
.object("request").end()
.object("user").end()
.end()
.value("continuation", page.getId())
.end().done().getBytes("UTF-8");
// @formatter:on
final Map<String, List<String>> headers = new HashMap<>();
headers.put("Origin", Collections.singletonList("https://www.youtube.com"));
headers.put("Referer", Collections.singletonList(this.getUrl()));
headers.put("Content-Type", Collections.singletonList("application/json"));
final String responseBody = getValidJsonResponseBody(getDownloader().post(page.getUrl(), headers, json));
final JsonObject ajaxJson;
try {
ajaxJson = JsonParser.object().from(responseBody);
} catch (JsonParserException e) {
throw new ParsingException("Could not parse JSON", e);
}
final JsonArray continuationItems = ajaxJson.getArray("onResponseReceivedCommands")
.getObject(0).getObject("appendContinuationItemsAction").getArray("continuationItems");
final JsonArray contents = continuationItems.getObject(0).getObject("itemSectionRenderer").getArray("contents");
collectStreamsFrom(collector, contents);
return new InfoItemsPage<>(collector, getNewNextPageFrom(continuationItems.getObject(1).getObject("continuationItemRenderer")));
}
} }
private void collectStreamsFrom(final InfoItemsSearchCollector collector, final JsonArray videos) throws NothingFoundException, ParsingException { private void collectStreamsFrom(final InfoItemsSearchCollector collector, final JsonArray videos) throws NothingFoundException, ParsingException {
@ -162,4 +221,17 @@ public class YoutubeSearchExtractor extends SearchExtractor {
return new Page(getUrl() + "&pbj=1&ctoken=" + continuation + "&continuation=" + continuation return new Page(getUrl() + "&pbj=1&ctoken=" + continuation + "&continuation=" + continuation
+ "&itct=" + clickTrackingParams); + "&itct=" + clickTrackingParams);
} }
private Page getNewNextPageFrom(final JsonObject continuationItemRenderer) throws IOException, ExtractionException {
if (isNullOrEmpty(continuationItemRenderer)) {
return null;
}
final String token = continuationItemRenderer.getObject("continuationEndpoint")
.getObject("continuationCommand").getString("token");
final String url = "https://www.youtube.com/youtubei/v1/search?key=" + getKey();
return new Page(url, token);
}
} }