diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/Downloader.java b/extractor/src/main/java/org/schabi/newpipe/extractor/Downloader.java
index 5f83c82b3..f3526fcec 100644
--- a/extractor/src/main/java/org/schabi/newpipe/extractor/Downloader.java
+++ b/extractor/src/main/java/org/schabi/newpipe/extractor/Downloader.java
@@ -62,6 +62,9 @@ public interface Downloader {
DownloadResponse head(String siteUrl) throws IOException, ReCaptchaException;
+ DownloadResponse get(String siteUrl, Localization localization)
+ throws IOException, ReCaptchaException;
+
DownloadResponse get(String siteUrl, DownloadRequest request)
throws IOException, ReCaptchaException;
diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java
index 38722fa52..9641d3931 100644
--- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java
+++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java
@@ -7,6 +7,7 @@ import com.grack.nanojson.JsonParserException;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
+import org.schabi.newpipe.extractor.DownloadResponse;
import org.schabi.newpipe.extractor.Downloader;
import org.schabi.newpipe.extractor.NewPipe;
import org.schabi.newpipe.extractor.StreamingService;
@@ -14,6 +15,7 @@ import org.schabi.newpipe.extractor.channel.ChannelExtractor;
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler;
+import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper;
import org.schabi.newpipe.extractor.stream.StreamInfoItem;
import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector;
import org.schabi.newpipe.extractor.utils.DonationLinkHelper;
@@ -60,8 +62,8 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
@Override
public void onFetchPage(@Nonnull Downloader downloader) throws IOException, ExtractionException {
String channelUrl = super.getUrl() + CHANNEL_URL_PARAMETERS;
- String pageContent = downloader.download(channelUrl);
- doc = Jsoup.parse(pageContent, channelUrl);
+ final DownloadResponse response = downloader.get(channelUrl);
+ doc = YoutubeParsingHelper.parseAndCheckPage(channelUrl, response);
}
@Override
diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistExtractor.java
index 98a4c4023..4480b38af 100644
--- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistExtractor.java
+++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistExtractor.java
@@ -6,6 +6,7 @@ import com.grack.nanojson.JsonParserException;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
+import org.schabi.newpipe.extractor.DownloadResponse;
import org.schabi.newpipe.extractor.Downloader;
import org.schabi.newpipe.extractor.StreamingService;
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
@@ -35,8 +36,9 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor {
@Override
public void onFetchPage(@Nonnull Downloader downloader) throws IOException, ExtractionException {
- String pageContent = downloader.download(getUrl());
- doc = Jsoup.parse(pageContent, getUrl());
+ final String url = getUrl();
+ final DownloadResponse response = downloader.get(url);
+ doc = YoutubeParsingHelper.parseAndCheckPage(url, response);
}
@Override
diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java
index 709e5f577..0a954607f 100644
--- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java
+++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java
@@ -3,6 +3,7 @@ package org.schabi.newpipe.extractor.services.youtube.extractors;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
+import org.schabi.newpipe.extractor.DownloadResponse;
import org.schabi.newpipe.extractor.Downloader;
import org.schabi.newpipe.extractor.InfoItem;
import org.schabi.newpipe.extractor.StreamingService;
@@ -12,6 +13,7 @@ import org.schabi.newpipe.extractor.search.InfoItemsSearchCollector;
import org.schabi.newpipe.extractor.search.SearchExtractor;
import org.schabi.newpipe.extractor.linkhandler.SearchQueryHandler;
import org.schabi.newpipe.extractor.utils.Localization;
+import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper;
import org.schabi.newpipe.extractor.utils.Parser;
import javax.annotation.Nonnull;
@@ -52,13 +54,9 @@ public class YoutubeSearchExtractor extends SearchExtractor {
@Override
public void onFetchPage(@Nonnull Downloader downloader) throws IOException, ExtractionException {
- final String site;
final String url = getUrl();
- //String url = builder.build().toString();
- //if we've been passed a valid language code, append it to the URL
- site = downloader.download(url, getLocalization());
-
- doc = Jsoup.parse(site, url);
+ final DownloadResponse response = downloader.get(url, getLocalization());
+ doc = YoutubeParsingHelper.parseAndCheckPage(url, response);
}
@Override
diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java
index a8a30013f..fa866cd5b 100644
--- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java
+++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java
@@ -18,6 +18,7 @@ import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
import org.schabi.newpipe.extractor.linkhandler.LinkHandler;
import org.schabi.newpipe.extractor.services.youtube.ItagItem;
+import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper;
import org.schabi.newpipe.extractor.stream.*;
import org.schabi.newpipe.extractor.utils.Localization;
import org.schabi.newpipe.extractor.utils.Parser;
@@ -536,7 +537,7 @@ public class YoutubeStreamExtractor extends StreamExtractor {
if (watch.size() < 1) {
return null;// prevent the snackbar notification "report error" on age-restricted videos
}
-
+
collector.commit(extractVideoPreviewInfo(watch.first().select("li").first()));
return collector.getItems().get(0);
} catch (Exception e) {
@@ -611,23 +612,16 @@ public class YoutubeStreamExtractor extends StreamExtractor {
private String pageHtml = null;
- private String getPageHtml(Downloader downloader) throws IOException, ExtractionException {
- final String verifiedUrl = getUrl() + VERIFIED_URL_PARAMS;
- if (pageHtml == null) {
- pageHtml = downloader.download(verifiedUrl);
- }
- return pageHtml;
- }
-
@Override
public void onFetchPage(@Nonnull Downloader downloader) throws IOException, ExtractionException {
- final String pageContent = getPageHtml(downloader);
- doc = Jsoup.parse(pageContent, getUrl());
+ final String verifiedUrl = getUrl() + VERIFIED_URL_PARAMS;
+ final DownloadResponse response = downloader.get(verifiedUrl);
+ pageHtml = response.getResponseBody();
+ doc = YoutubeParsingHelper.parseAndCheckPage(verifiedUrl, response);
final String playerUrl;
// Check if the video is age restricted
- if (pageContent.contains(" {
url += "?gl=" + contentCountry;
}
- String pageContent = downloader.download(url);
- doc = Jsoup.parse(pageContent, url);
+ final DownloadResponse response = downloader.get(url);
+ doc = YoutubeParsingHelper.parseAndCheckPage(url, response);
}
@Override
diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeParsingHelper.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeParsingHelper.java
index 4c3655340..120275caa 100644
--- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeParsingHelper.java
+++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeParsingHelper.java
@@ -1,7 +1,11 @@
package org.schabi.newpipe.extractor.services.youtube.linkHandler;
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.schabi.newpipe.extractor.DownloadResponse;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
+import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
import java.net.URL;
@@ -30,6 +34,23 @@ public class YoutubeParsingHelper {
private YoutubeParsingHelper() {
}
+ private static final String[] RECAPTCHA_DETECTION_SELECTORS = {
+ "form[action*=\"/das_captcha\"]",
+ "input[name*=\"action_recaptcha_verify\"]"
+ };
+
+ public static Document parseAndCheckPage(final String url, final DownloadResponse response) throws ReCaptchaException {
+ final Document document = Jsoup.parse(response.getResponseBody(), url);
+
+ for (String detectionSelector : RECAPTCHA_DETECTION_SELECTORS) {
+ if (!document.select(detectionSelector).isEmpty()) {
+ throw new ReCaptchaException("reCAPTCHA challenge requested (detected with selector: \"" + detectionSelector + "\")", url);
+ }
+ }
+
+ return document;
+ }
+
public static boolean isYoutubeURL(URL url) {
String host = url.getHost();
return host.equalsIgnoreCase("youtube.com") || host.equalsIgnoreCase("www.youtube.com")
diff --git a/extractor/src/test/java/org/schabi/newpipe/Downloader.java b/extractor/src/test/java/org/schabi/newpipe/Downloader.java
index 1a7536ac4..3091c74bb 100644
--- a/extractor/src/test/java/org/schabi/newpipe/Downloader.java
+++ b/extractor/src/test/java/org/schabi/newpipe/Downloader.java
@@ -16,6 +16,8 @@ import org.schabi.newpipe.extractor.DownloadResponse;
import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
import org.schabi.newpipe.extractor.utils.Localization;
+import static java.util.Collections.singletonList;
+
/*
* Created by Christian Schabesberger on 28.01.16.
*
@@ -194,6 +196,14 @@ public class Downloader implements org.schabi.newpipe.extractor.Downloader {
return new DownloadResponse(con.getResponseCode(), null, con.getHeaderFields());
}
+ @Override
+ public DownloadResponse get(String siteUrl, Localization localization) throws IOException, ReCaptchaException {
+ final Map> requestHeaders = new HashMap<>();
+ requestHeaders.put("Accept-Language", singletonList(localization.getLanguage()));
+
+ return get(siteUrl, new DownloadRequest(null, requestHeaders));
+ }
+
@Override
public DownloadResponse get(String siteUrl, DownloadRequest request)
throws IOException, ReCaptchaException {