diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/Downloader.java b/extractor/src/main/java/org/schabi/newpipe/extractor/Downloader.java index 5f83c82b3..f3526fcec 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/Downloader.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/Downloader.java @@ -62,6 +62,9 @@ public interface Downloader { DownloadResponse head(String siteUrl) throws IOException, ReCaptchaException; + DownloadResponse get(String siteUrl, Localization localization) + throws IOException, ReCaptchaException; + DownloadResponse get(String siteUrl, DownloadRequest request) throws IOException, ReCaptchaException; diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java index 38722fa52..9641d3931 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java @@ -7,6 +7,7 @@ import com.grack.nanojson.JsonParserException; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; +import org.schabi.newpipe.extractor.DownloadResponse; import org.schabi.newpipe.extractor.Downloader; import org.schabi.newpipe.extractor.NewPipe; import org.schabi.newpipe.extractor.StreamingService; @@ -14,6 +15,7 @@ import org.schabi.newpipe.extractor.channel.ChannelExtractor; import org.schabi.newpipe.extractor.exceptions.ExtractionException; import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler; +import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper; import org.schabi.newpipe.extractor.stream.StreamInfoItem; import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector; import org.schabi.newpipe.extractor.utils.DonationLinkHelper; @@ -60,8 +62,8 @@ public class YoutubeChannelExtractor extends ChannelExtractor { @Override public void onFetchPage(@Nonnull Downloader downloader) throws IOException, ExtractionException { String channelUrl = super.getUrl() + CHANNEL_URL_PARAMETERS; - String pageContent = downloader.download(channelUrl); - doc = Jsoup.parse(pageContent, channelUrl); + final DownloadResponse response = downloader.get(channelUrl); + doc = YoutubeParsingHelper.parseAndCheckPage(channelUrl, response); } @Override diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistExtractor.java index 98a4c4023..4480b38af 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistExtractor.java @@ -6,6 +6,7 @@ import com.grack.nanojson.JsonParserException; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; +import org.schabi.newpipe.extractor.DownloadResponse; import org.schabi.newpipe.extractor.Downloader; import org.schabi.newpipe.extractor.StreamingService; import org.schabi.newpipe.extractor.exceptions.ExtractionException; @@ -35,8 +36,9 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor { @Override public void onFetchPage(@Nonnull Downloader downloader) throws IOException, ExtractionException { - String pageContent = downloader.download(getUrl()); - doc = Jsoup.parse(pageContent, getUrl()); + final String url = getUrl(); + final DownloadResponse response = downloader.get(url); + doc = YoutubeParsingHelper.parseAndCheckPage(url, response); } @Override diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java index 709e5f577..0a954607f 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java @@ -3,6 +3,7 @@ package org.schabi.newpipe.extractor.services.youtube.extractors; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; +import org.schabi.newpipe.extractor.DownloadResponse; import org.schabi.newpipe.extractor.Downloader; import org.schabi.newpipe.extractor.InfoItem; import org.schabi.newpipe.extractor.StreamingService; @@ -12,6 +13,7 @@ import org.schabi.newpipe.extractor.search.InfoItemsSearchCollector; import org.schabi.newpipe.extractor.search.SearchExtractor; import org.schabi.newpipe.extractor.linkhandler.SearchQueryHandler; import org.schabi.newpipe.extractor.utils.Localization; +import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper; import org.schabi.newpipe.extractor.utils.Parser; import javax.annotation.Nonnull; @@ -52,13 +54,9 @@ public class YoutubeSearchExtractor extends SearchExtractor { @Override public void onFetchPage(@Nonnull Downloader downloader) throws IOException, ExtractionException { - final String site; final String url = getUrl(); - //String url = builder.build().toString(); - //if we've been passed a valid language code, append it to the URL - site = downloader.download(url, getLocalization()); - - doc = Jsoup.parse(site, url); + final DownloadResponse response = downloader.get(url, getLocalization()); + doc = YoutubeParsingHelper.parseAndCheckPage(url, response); } @Override diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java index a8a30013f..fa866cd5b 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java @@ -18,6 +18,7 @@ import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.exceptions.ReCaptchaException; import org.schabi.newpipe.extractor.linkhandler.LinkHandler; import org.schabi.newpipe.extractor.services.youtube.ItagItem; +import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper; import org.schabi.newpipe.extractor.stream.*; import org.schabi.newpipe.extractor.utils.Localization; import org.schabi.newpipe.extractor.utils.Parser; @@ -536,7 +537,7 @@ public class YoutubeStreamExtractor extends StreamExtractor { if (watch.size() < 1) { return null;// prevent the snackbar notification "report error" on age-restricted videos } - + collector.commit(extractVideoPreviewInfo(watch.first().select("li").first())); return collector.getItems().get(0); } catch (Exception e) { @@ -611,23 +612,16 @@ public class YoutubeStreamExtractor extends StreamExtractor { private String pageHtml = null; - private String getPageHtml(Downloader downloader) throws IOException, ExtractionException { - final String verifiedUrl = getUrl() + VERIFIED_URL_PARAMS; - if (pageHtml == null) { - pageHtml = downloader.download(verifiedUrl); - } - return pageHtml; - } - @Override public void onFetchPage(@Nonnull Downloader downloader) throws IOException, ExtractionException { - final String pageContent = getPageHtml(downloader); - doc = Jsoup.parse(pageContent, getUrl()); + final String verifiedUrl = getUrl() + VERIFIED_URL_PARAMS; + final DownloadResponse response = downloader.get(verifiedUrl); + pageHtml = response.getResponseBody(); + doc = YoutubeParsingHelper.parseAndCheckPage(verifiedUrl, response); final String playerUrl; // Check if the video is age restricted - if (pageContent.contains(" { url += "?gl=" + contentCountry; } - String pageContent = downloader.download(url); - doc = Jsoup.parse(pageContent, url); + final DownloadResponse response = downloader.get(url); + doc = YoutubeParsingHelper.parseAndCheckPage(url, response); } @Override diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeParsingHelper.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeParsingHelper.java index 4c3655340..120275caa 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeParsingHelper.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeParsingHelper.java @@ -1,7 +1,11 @@ package org.schabi.newpipe.extractor.services.youtube.linkHandler; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.schabi.newpipe.extractor.DownloadResponse; import org.schabi.newpipe.extractor.exceptions.ParsingException; +import org.schabi.newpipe.extractor.exceptions.ReCaptchaException; import java.net.URL; @@ -30,6 +34,23 @@ public class YoutubeParsingHelper { private YoutubeParsingHelper() { } + private static final String[] RECAPTCHA_DETECTION_SELECTORS = { + "form[action*=\"/das_captcha\"]", + "input[name*=\"action_recaptcha_verify\"]" + }; + + public static Document parseAndCheckPage(final String url, final DownloadResponse response) throws ReCaptchaException { + final Document document = Jsoup.parse(response.getResponseBody(), url); + + for (String detectionSelector : RECAPTCHA_DETECTION_SELECTORS) { + if (!document.select(detectionSelector).isEmpty()) { + throw new ReCaptchaException("reCAPTCHA challenge requested (detected with selector: \"" + detectionSelector + "\")", url); + } + } + + return document; + } + public static boolean isYoutubeURL(URL url) { String host = url.getHost(); return host.equalsIgnoreCase("youtube.com") || host.equalsIgnoreCase("www.youtube.com") diff --git a/extractor/src/test/java/org/schabi/newpipe/Downloader.java b/extractor/src/test/java/org/schabi/newpipe/Downloader.java index 1a7536ac4..3091c74bb 100644 --- a/extractor/src/test/java/org/schabi/newpipe/Downloader.java +++ b/extractor/src/test/java/org/schabi/newpipe/Downloader.java @@ -16,6 +16,8 @@ import org.schabi.newpipe.extractor.DownloadResponse; import org.schabi.newpipe.extractor.exceptions.ReCaptchaException; import org.schabi.newpipe.extractor.utils.Localization; +import static java.util.Collections.singletonList; + /* * Created by Christian Schabesberger on 28.01.16. * @@ -194,6 +196,14 @@ public class Downloader implements org.schabi.newpipe.extractor.Downloader { return new DownloadResponse(con.getResponseCode(), null, con.getHeaderFields()); } + @Override + public DownloadResponse get(String siteUrl, Localization localization) throws IOException, ReCaptchaException { + final Map> requestHeaders = new HashMap<>(); + requestHeaders.put("Accept-Language", singletonList(localization.getLanguage())); + + return get(siteUrl, new DownloadRequest(null, requestHeaders)); + } + @Override public DownloadResponse get(String siteUrl, DownloadRequest request) throws IOException, ReCaptchaException {