added http post method in downloader, formatting

This commit is contained in:
Ritvik Saraf 2018-09-02 05:06:26 +05:30
parent 8b8779b176
commit 95575756ee
8 changed files with 442 additions and 464 deletions

View File

@ -4,23 +4,21 @@ import java.util.List;
import java.util.Map; import java.util.Map;
public class DownloadResponse { public class DownloadResponse {
private final String responseBody; private final String responseBody;
private final Map<String, List<String>> responseHeaders; private final Map<String, List<String>> responseHeaders;
public DownloadResponse(String responseBody, Map<String, List<String>> headers) {
super();
this.responseBody = responseBody;
this.responseHeaders = headers;
}
public String getResponseBody() {
return responseBody;
}
public DownloadResponse(String responseBody, Map<String, List<String>> headers) { public Map<String, List<String>> getResponseHeaders() {
super(); return responseHeaders;
this.responseBody = responseBody; }
this.responseHeaders = headers;
}
public String getResponseBody() {
return responseBody;
}
public Map<String, List<String>> getResponseHeaders() {
return responseHeaders;
}
} }

View File

@ -28,41 +28,44 @@ import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
public interface Downloader { public interface Downloader {
/** /**
* Download the text file at the supplied URL as in download(String), but set * Download the text file at the supplied URL as in download(String), but set
* the HTTP header field "Accept-Language" to the supplied string. * the HTTP header field "Accept-Language" to the supplied string.
* *
* @param siteUrl the URL of the text file to return the contents of * @param siteUrl the URL of the text file to return the contents of
* @param language the language (usually a 2-character code) to set as the * @param language the language (usually a 2-character code) to set as the
* preferred language * preferred language
* @return the contents of the specified text file * @return the contents of the specified text file
* @throws IOException * @throws IOException
*/ */
String download(String siteUrl, String language) throws IOException, ReCaptchaException; String download(String siteUrl, String language) throws IOException, ReCaptchaException;
/** /**
* Download the text file at the supplied URL as in download(String), but set * Download the text file at the supplied URL as in download(String), but set
* the HTTP header field "Accept-Language" to the supplied string. * the HTTP header field "Accept-Language" to the supplied string.
* *
* @param siteUrl the URL of the text file to return the contents of * @param siteUrl the URL of the text file to return the contents of
* @param customProperties set request header properties * @param customProperties set request header properties
* @return the contents of the specified text file * @return the contents of the specified text file
* @throws IOException * @throws IOException
*/ */
String download(String siteUrl, Map<String, String> customProperties) throws IOException, ReCaptchaException; String download(String siteUrl, Map<String, String> customProperties) throws IOException, ReCaptchaException;
/** /**
* Download (via HTTP) the text file located at the supplied URL, and return its * Download (via HTTP) the text file located at the supplied URL, and return its
* contents. Primarily intended for downloading web pages. * contents. Primarily intended for downloading web pages.
* *
* @param siteUrl the URL of the text file to download * @param siteUrl the URL of the text file to download
* @return the contents of the specified text file * @return the contents of the specified text file
* @throws IOException * @throws IOException
*/ */
String download(String siteUrl) throws IOException, ReCaptchaException; String download(String siteUrl) throws IOException, ReCaptchaException;
DownloadResponse downloadWithHeaders(String siteUrl, Map<String, List<String>> requestHeaders) DownloadResponse get(String siteUrl, Map<String, List<String>> requestHeaders)
throws IOException, ReCaptchaException; throws IOException, ReCaptchaException;
DownloadResponse downloadWithHeaders(String siteUrl) throws IOException, ReCaptchaException; DownloadResponse get(String siteUrl) throws IOException, ReCaptchaException;
DownloadResponse post(String siteUrl, String requestBody, Map<String, List<String>> requestHeaders)
throws IOException, ReCaptchaException;
} }

View File

@ -4,19 +4,11 @@ import org.schabi.newpipe.extractor.InfoItemExtractor;
import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.exceptions.ParsingException;
public interface CommentsInfoItemExtractor extends InfoItemExtractor { public interface CommentsInfoItemExtractor extends InfoItemExtractor {
String getCommentId() throws ParsingException;
String getCommentId() throws ParsingException; String getCommentText() throws ParsingException;
String getAuthorName() throws ParsingException;
String getCommentText() throws ParsingException; String getAuthorThumbnail() throws ParsingException;
String getAuthorEndpoint() throws ParsingException;
String getAuthorName() throws ParsingException; String getPublishedTime() throws ParsingException;
Integer getLikeCount() throws ParsingException;
String getAuthorThumbnail() throws ParsingException;
String getAuthorEndpoint() throws ParsingException;
String getPublishedTime() throws ParsingException;
Integer getLikeCount() throws ParsingException;
} }

View File

@ -7,26 +7,6 @@ import org.schabi.newpipe.extractor.InfoItem;
import org.schabi.newpipe.extractor.InfoItemsCollector; import org.schabi.newpipe.extractor.InfoItemsCollector;
import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.exceptions.ParsingException;
/*
* Created by Christian Schabesberger on 28.02.16.
*
* Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org>
* CommentsInfoItemsCollector.java is part of NewPipe.
*
* NewPipe is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* NewPipe is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
*/
public class CommentsInfoItemsCollector extends InfoItemsCollector<CommentsInfoItem, CommentsInfoItemExtractor> { public class CommentsInfoItemsCollector extends InfoItemsCollector<CommentsInfoItem, CommentsInfoItemExtractor> {
public CommentsInfoItemsCollector(int serviceId) { public CommentsInfoItemsCollector(int serviceId) {

View File

@ -1,20 +1,17 @@
package org.schabi.newpipe.extractor.services.youtube.extractors; package org.schabi.newpipe.extractor.services.youtube.extractors;
import java.io.BufferedReader;
import java.io.IOException; import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException; import java.io.UnsupportedEncodingException;
import java.net.URL;
import java.net.URLEncoder; import java.net.URLEncoder;
import java.util.Arrays;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Optional; import java.util.Optional;
import javax.net.ssl.HttpsURLConnection;
import org.schabi.newpipe.extractor.DownloadResponse; import org.schabi.newpipe.extractor.DownloadResponse;
import org.schabi.newpipe.extractor.Downloader; import org.schabi.newpipe.extractor.Downloader;
import org.schabi.newpipe.extractor.NewPipe;
import org.schabi.newpipe.extractor.StreamingService; import org.schabi.newpipe.extractor.StreamingService;
import org.schabi.newpipe.extractor.comments.CommentsExtractor; import org.schabi.newpipe.extractor.comments.CommentsExtractor;
import org.schabi.newpipe.extractor.comments.CommentsInfoItem; import org.schabi.newpipe.extractor.comments.CommentsInfoItem;
@ -22,6 +19,7 @@ import org.schabi.newpipe.extractor.comments.CommentsInfoItemExtractor;
import org.schabi.newpipe.extractor.comments.CommentsInfoItemsCollector; import org.schabi.newpipe.extractor.comments.CommentsInfoItemsCollector;
import org.schabi.newpipe.extractor.exceptions.ExtractionException; import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler; import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler;
import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.JsonNode;
@ -29,235 +27,222 @@ import com.fasterxml.jackson.databind.ObjectMapper;
public class YoutubeCommentsExtractor extends CommentsExtractor { public class YoutubeCommentsExtractor extends CommentsExtractor {
private static final String USER_AGENT = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:61.0) Gecko/20100101 Firefox/61.0"; private static final String USER_AGENT = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:61.0) Gecko/20100101 Firefox/61.0";
private List<String> cookies; private List<String> cookies;
private String sessionToken; private String sessionToken;
private String commentsToken; private String commentsToken;
private ObjectMapper mapper = new ObjectMapper(); private ObjectMapper mapper = new ObjectMapper();
public YoutubeCommentsExtractor(StreamingService service, ListLinkHandler uiHandler) { public YoutubeCommentsExtractor(StreamingService service, ListLinkHandler uiHandler) {
super(service, uiHandler); super(service, uiHandler);
// TODO Auto-generated constructor stub // TODO Auto-generated constructor stub
} }
@Override @Override
public InfoItemsPage<CommentsInfoItem> getInitialPage() throws IOException, ExtractionException { public InfoItemsPage<CommentsInfoItem> getInitialPage() throws IOException, ExtractionException {
// initial page does not load any comments but is required to get session token // initial page does not load any comments but is required to get session token
// and cookies // and cookies
return getPage(getNextPageUrl()); return getPage(getNextPageUrl());
} }
@Override @Override
public String getNextPageUrl() throws IOException, ExtractionException { public String getNextPageUrl() throws IOException, ExtractionException {
return getNextPageUrl(commentsToken); return getNextPageUrl(commentsToken);
} }
private String getNextPageUrl(JsonNode ajaxJson) throws IOException, ExtractionException { private String getNextPageUrl(JsonNode ajaxJson) throws IOException, ExtractionException {
Optional<JsonNode> element = Optional.ofNullable(ajaxJson.findValue("itemSectionContinuation")) Optional<JsonNode> element = Optional.ofNullable(ajaxJson.findValue("itemSectionContinuation"))
.map(e -> e.get("continuations")).map(e -> e.findValue("continuation")); .map(e -> e.get("continuations")).map(e -> e.findValue("continuation"));
if (element.isPresent()) { if (element.isPresent()) {
return getNextPageUrl(element.get().asText()); return getNextPageUrl(element.get().asText());
} else { } else {
// no more comments // no more comments
return ""; return "";
} }
} }
private String getNextPageUrl(String continuation) throws ParsingException { private String getNextPageUrl(String continuation) throws ParsingException {
Map<String, String> params = new HashMap<>(); Map<String, String> params = new HashMap<>();
params.put("action_get_comments", "1"); params.put("action_get_comments", "1");
params.put("pbj", "1"); params.put("pbj", "1");
params.put("ctoken", continuation); params.put("ctoken", continuation);
params.put("continuation", continuation); params.put("continuation", continuation);
try { try {
return "https://www.youtube.com/comment_service_ajax?" + getDataString(params); return "https://www.youtube.com/comment_service_ajax?" + getDataString(params);
} catch (UnsupportedEncodingException e) { } catch (UnsupportedEncodingException e) {
throw new ParsingException("Could not get next page url", e); throw new ParsingException("Could not get next page url", e);
} }
} }
@Override @Override
public InfoItemsPage<CommentsInfoItem> getPage(String pageUrl) throws IOException, ExtractionException { public InfoItemsPage<CommentsInfoItem> getPage(String pageUrl) throws IOException, ExtractionException {
if (pageUrl == null || pageUrl.isEmpty()) { if (pageUrl == null || pageUrl.isEmpty()) {
throw new ExtractionException(new IllegalArgumentException("Page url is empty or null")); throw new ExtractionException(new IllegalArgumentException("Page url is empty or null"));
} }
String ajaxResponse = makeAjaxRequest(pageUrl); String ajaxResponse = makeAjaxRequest(pageUrl);
JsonNode ajaxJson = mapper.readTree(ajaxResponse); JsonNode ajaxJson = mapper.readTree(ajaxResponse);
CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(getServiceId()); CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(getServiceId());
collectCommentsFrom(collector, ajaxJson, pageUrl); collectCommentsFrom(collector, ajaxJson, pageUrl);
return new InfoItemsPage<>(collector, getNextPageUrl(ajaxJson)); return new InfoItemsPage<>(collector, getNextPageUrl(ajaxJson));
} }
private void collectCommentsFrom(CommentsInfoItemsCollector collector, JsonNode ajaxJson, String pageUrl) { private void collectCommentsFrom(CommentsInfoItemsCollector collector, JsonNode ajaxJson, String pageUrl) {
List<JsonNode> comments = ajaxJson.findValues("commentRenderer"); List<JsonNode> comments = ajaxJson.findValues("commentRenderer");
comments.stream().forEach(c -> { comments.stream().forEach(c -> {
CommentsInfoItemExtractor extractor = new CommentsInfoItemExtractor() { CommentsInfoItemExtractor extractor = new CommentsInfoItemExtractor() {
@Override @Override
public String getUrl() throws ParsingException { public String getUrl() throws ParsingException {
return pageUrl; return pageUrl;
} }
@Override @Override
public String getThumbnailUrl() throws ParsingException { public String getThumbnailUrl() throws ParsingException {
try { try {
return c.get("authorThumbnail").get("thumbnails").get(0).get("url").asText(); return c.get("authorThumbnail").get("thumbnails").get(0).get("url").asText();
} catch (Exception e) { } catch (Exception e) {
throw new ParsingException("Could not get thumbnail url", e); throw new ParsingException("Could not get thumbnail url", e);
} }
} }
@Override @Override
public String getName() throws ParsingException { public String getName() throws ParsingException {
try { try {
return c.get("authorText").get("simpleText").asText(); return c.get("authorText").get("simpleText").asText();
} catch (Exception e) { } catch (Exception e) {
throw new ParsingException("Could not get thumbnail url", e); throw new ParsingException("Could not get thumbnail url", e);
} }
} }
@Override @Override
public String getPublishedTime() throws ParsingException { public String getPublishedTime() throws ParsingException {
try { try {
return c.get("publishedTimeText").get("runs").get(0).get("text").asText(); return c.get("publishedTimeText").get("runs").get(0).get("text").asText();
} catch (Exception e) { } catch (Exception e) {
throw new ParsingException("Could not get thumbnail url", e); throw new ParsingException("Could not get thumbnail url", e);
} }
} }
@Override @Override
public Integer getLikeCount() throws ParsingException { public Integer getLikeCount() throws ParsingException {
try { try {
return c.get("likeCount").intValue(); return c.get("likeCount").intValue();
} catch (Exception e) { } catch (Exception e) {
throw new ParsingException("Could not get thumbnail url", e); throw new ParsingException("Could not get thumbnail url", e);
} }
} }
@Override @Override
public String getCommentText() throws ParsingException { public String getCommentText() throws ParsingException {
try { try {
if (null != c.get("contentText").get("simpleText")) { if (null != c.get("contentText").get("simpleText")) {
return c.get("contentText").get("simpleText").asText(); return c.get("contentText").get("simpleText").asText();
} else { } else {
return c.get("contentText").get("runs").get(0).get("text").asText(); return c.get("contentText").get("runs").get(0).get("text").asText();
} }
} catch (Exception e) { } catch (Exception e) {
throw new ParsingException("Could not get thumbnail url", e); throw new ParsingException("Could not get thumbnail url", e);
} }
} }
@Override @Override
public String getCommentId() throws ParsingException { public String getCommentId() throws ParsingException {
try { try {
return c.get("commentId").asText(); return c.get("commentId").asText();
} catch (Exception e) { } catch (Exception e) {
throw new ParsingException("Could not get thumbnail url", e); throw new ParsingException("Could not get thumbnail url", e);
} }
} }
@Override @Override
public String getAuthorThumbnail() throws ParsingException { public String getAuthorThumbnail() throws ParsingException {
try { try {
return c.get("authorThumbnail").get("thumbnails").get(0).get("url").asText(); return c.get("authorThumbnail").get("thumbnails").get(0).get("url").asText();
} catch (Exception e) { } catch (Exception e) {
throw new ParsingException("Could not get thumbnail url", e); throw new ParsingException("Could not get thumbnail url", e);
} }
} }
@Override @Override
public String getAuthorName() throws ParsingException { public String getAuthorName() throws ParsingException {
try { try {
return c.get("authorText").get("simpleText").asText(); return c.get("authorText").get("simpleText").asText();
} catch (Exception e) { } catch (Exception e) {
throw new ParsingException("Could not get thumbnail url", e); throw new ParsingException("Could not get thumbnail url", e);
} }
} }
@Override @Override
public String getAuthorEndpoint() throws ParsingException { public String getAuthorEndpoint() throws ParsingException {
try { try {
return "https://youtube.com" return "https://youtube.com"
+ c.get("authorEndpoint").get("browseEndpoint").get("canonicalBaseUrl").asText(); + c.get("authorEndpoint").get("browseEndpoint").get("canonicalBaseUrl").asText();
} catch (Exception e) { } catch (Exception e) {
throw new ParsingException("Could not get thumbnail url", e); throw new ParsingException("Could not get thumbnail url", e);
} }
} }
}; };
collector.commit(extractor); collector.commit(extractor);
}); });
} }
@Override @Override
public void onFetchPage(Downloader downloader) throws IOException, ExtractionException { public void onFetchPage(Downloader downloader) throws IOException, ExtractionException {
DownloadResponse response = downloader.downloadWithHeaders(getUrl()); DownloadResponse response = downloader.get(getUrl());
String responseBody = response.getResponseBody(); String responseBody = response.getResponseBody();
cookies = response.getResponseHeaders().get("Set-Cookie"); cookies = response.getResponseHeaders().get("Set-Cookie");
sessionToken = findValue(responseBody, "XSRF_TOKEN"); sessionToken = findValue(responseBody, "XSRF_TOKEN");
commentsToken = findValue(responseBody, "COMMENTS_TOKEN"); commentsToken = findValue(responseBody, "COMMENTS_TOKEN");
} }
@Override @Override
public String getName() throws ParsingException { public String getName() throws ParsingException {
// TODO Auto-generated method stub // TODO Auto-generated method stub
return null; return null;
} }
private String makeAjaxRequest(String siteUrl) throws IOException { private String makeAjaxRequest(String siteUrl) throws IOException, ReCaptchaException {
StringBuilder postData = new StringBuilder(); StringBuilder postData = new StringBuilder();
postData.append(URLEncoder.encode("session_token", "UTF-8")); postData.append(URLEncoder.encode("session_token", "UTF-8"));
postData.append('='); postData.append('=');
postData.append(URLEncoder.encode(sessionToken, "UTF-8")); postData.append(URLEncoder.encode(sessionToken, "UTF-8"));
byte[] postDataBytes = postData.toString().getBytes("UTF-8");
URL url = new URL(siteUrl); Map<String, List<String>> requestHeaders = new HashMap<>();
HttpsURLConnection con = (HttpsURLConnection) url.openConnection(); requestHeaders.put("Content-Type", Arrays.asList("application/x-www-form-urlencoded"));
con.setRequestMethod("POST"); requestHeaders.put("Accept", Arrays.asList("*/*"));
con.setRequestProperty("Content-Type", "application/x-www-form-urlencoded"); requestHeaders.put("User-Agent", Arrays.asList(USER_AGENT));
con.setRequestProperty("Content-Length", String.valueOf(postDataBytes.length)); requestHeaders.put("X-YouTube-Client-Version", Arrays.asList("2.20180815"));
con.setRequestProperty("Accept", "*/*"); requestHeaders.put("X-YouTube-Client-Name", Arrays.asList("1"));
con.setRequestProperty("User-Agent", USER_AGENT); requestHeaders.put("Cookie", cookies);
con.setRequestProperty("X-YouTube-Client-Version", "2.20180815");
con.setRequestProperty("X-YouTube-Client-Name", "1");
// set cookies
cookies.stream().forEach(c -> con.addRequestProperty("Cookie", c));
con.setDoOutput(true);
con.getOutputStream().write(postDataBytes);
BufferedReader in = new BufferedReader(new InputStreamReader(con.getInputStream(), "UTF-8")); return NewPipe.getDownloader().post(siteUrl, postData.toString(), requestHeaders).getResponseBody();
StringBuilder sb = new StringBuilder(); }
String inputLine;
while ((inputLine = in.readLine()) != null) {
sb.append(inputLine);
}
return sb.toString();
}
private String getDataString(Map<String, String> params) throws UnsupportedEncodingException { private String getDataString(Map<String, String> params) throws UnsupportedEncodingException {
StringBuilder result = new StringBuilder(); StringBuilder result = new StringBuilder();
boolean first = true; boolean first = true;
for (Map.Entry<String, String> entry : params.entrySet()) { for (Map.Entry<String, String> entry : params.entrySet()) {
if (first) if (first)
first = false; first = false;
else else
result.append("&"); result.append("&");
result.append(URLEncoder.encode(entry.getKey(), "UTF-8")); result.append(URLEncoder.encode(entry.getKey(), "UTF-8"));
result.append("="); result.append("=");
result.append(URLEncoder.encode(entry.getValue(), "UTF-8")); result.append(URLEncoder.encode(entry.getValue(), "UTF-8"));
} }
return result.toString(); return result.toString();
} }
private String findValue(String doc, String key) { private String findValue(String doc, String key) {
int beginIndex = doc.indexOf(key) + key.length() + 4; int beginIndex = doc.indexOf(key) + key.length() + 4;
int endIndex = doc.indexOf("\"", beginIndex); int endIndex = doc.indexOf("\"", beginIndex);
return doc.substring(beginIndex, endIndex); return doc.substring(beginIndex, endIndex);
} }
} }

View File

@ -1,16 +1,5 @@
package org.schabi.newpipe.extractor.services.youtube.linkHandler; package org.schabi.newpipe.extractor.services.youtube.linkHandler;
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandlerFactory;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.schabi.newpipe.extractor.Downloader;
import org.schabi.newpipe.extractor.NewPipe;
import org.schabi.newpipe.extractor.exceptions.FoundAdException;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
import org.schabi.newpipe.extractor.utils.Parser;
import java.io.IOException; import java.io.IOException;
import java.io.UnsupportedEncodingException; import java.io.UnsupportedEncodingException;
import java.net.URI; import java.net.URI;
@ -18,25 +7,16 @@ import java.net.URISyntaxException;
import java.net.URLDecoder; import java.net.URLDecoder;
import java.util.List; import java.util.List;
/* import org.jsoup.Jsoup;
* Created by Christian Schabesberger on 25.07.16. import org.jsoup.nodes.Document;
* import org.jsoup.nodes.Element;
* Copyright (C) Christian Schabesberger 2018 <chrźis.schabesberger@mailbox.org> import org.schabi.newpipe.extractor.Downloader;
* YoutubeChannelLinkHandlerFactory.java is part of NewPipe. import org.schabi.newpipe.extractor.NewPipe;
* import org.schabi.newpipe.extractor.exceptions.FoundAdException;
* NewPipe is free software: you can redistribute it and/or modify import org.schabi.newpipe.extractor.exceptions.ParsingException;
* it under the terms of the GNU General Public License as published by import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
* the Free Software Foundation, either version 3 of the License, or import org.schabi.newpipe.extractor.linkhandler.ListLinkHandlerFactory;
* (at your option) any later version. import org.schabi.newpipe.extractor.utils.Parser;
*
* NewPipe is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
*/
public class YoutubeCommentsLinkHandlerFactory extends ListLinkHandlerFactory { public class YoutubeCommentsLinkHandlerFactory extends ListLinkHandlerFactory {

View File

@ -36,144 +36,184 @@ import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
public class Downloader implements org.schabi.newpipe.extractor.Downloader { public class Downloader implements org.schabi.newpipe.extractor.Downloader {
private static final String USER_AGENT = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0"; private static final String USER_AGENT = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0";
private static String mCookies = ""; private static String mCookies = "";
private static Downloader instance = null; private static Downloader instance = null;
private Downloader() { private Downloader() {
} }
public static Downloader getInstance() { public static Downloader getInstance() {
if (instance == null) { if (instance == null) {
synchronized (Downloader.class) { synchronized (Downloader.class) {
if (instance == null) { if (instance == null) {
instance = new Downloader(); instance = new Downloader();
} }
} }
} }
return instance; return instance;
} }
public static synchronized void setCookies(String cookies) { public static synchronized void setCookies(String cookies) {
Downloader.mCookies = cookies; Downloader.mCookies = cookies;
} }
public static synchronized String getCookies() { public static synchronized String getCookies() {
return Downloader.mCookies; return Downloader.mCookies;
} }
/** /**
* Download the text file at the supplied URL as in download(String), but set * Download the text file at the supplied URL as in download(String), but set
* the HTTP header field "Accept-Language" to the supplied string. * the HTTP header field "Accept-Language" to the supplied string.
* *
* @param siteUrl the URL of the text file to return the contents of * @param siteUrl the URL of the text file to return the contents of
* @param language the language (usually a 2-character code) to set as the * @param language the language (usually a 2-character code) to set as the
* preferred language * preferred language
* @return the contents of the specified text file * @return the contents of the specified text file
*/ */
public String download(String siteUrl, String language) throws IOException, ReCaptchaException { public String download(String siteUrl, String language) throws IOException, ReCaptchaException {
Map<String, String> requestProperties = new HashMap<>(); Map<String, String> requestProperties = new HashMap<>();
requestProperties.put("Accept-Language", language); requestProperties.put("Accept-Language", language);
return download(siteUrl, requestProperties); return download(siteUrl, requestProperties);
} }
/** /**
* Download the text file at the supplied URL as in download(String), but set * Download the text file at the supplied URL as in download(String), but set
* the HTTP header field "Accept-Language" to the supplied string. * the HTTP header field "Accept-Language" to the supplied string.
* *
* @param siteUrl the URL of the text file to return the contents of * @param siteUrl the URL of the text file to return the contents of
* @param customProperties set request header properties * @param customProperties set request header properties
* @return the contents of the specified text file * @return the contents of the specified text file
* @throws IOException * @throws IOException
*/ */
public String download(String siteUrl, Map<String, String> customProperties) public String download(String siteUrl, Map<String, String> customProperties)
throws IOException, ReCaptchaException { throws IOException, ReCaptchaException {
URL url = new URL(siteUrl); URL url = new URL(siteUrl);
HttpsURLConnection con = (HttpsURLConnection) url.openConnection(); HttpsURLConnection con = (HttpsURLConnection) url.openConnection();
for (Map.Entry<String, String> pair : customProperties.entrySet()) { for (Map.Entry<String, String> pair : customProperties.entrySet()) {
con.setRequestProperty(pair.getKey(), pair.getValue()); con.setRequestProperty(pair.getKey(), pair.getValue());
} }
return dl(con); return dl(con);
} }
/** /**
* Common functionality between download(String url) and download(String url, * Common functionality between download(String url) and download(String url,
* String language) * String language)
*/ */
private static String dl(HttpsURLConnection con) throws IOException, ReCaptchaException { private static String dl(HttpsURLConnection con) throws IOException, ReCaptchaException {
StringBuilder response = new StringBuilder(); StringBuilder response = new StringBuilder();
BufferedReader in = null; BufferedReader in = null;
try { try {
con.setConnectTimeout(30 * 1000);// 30s
con.setReadTimeout(30 * 1000);// 30s
con.setRequestMethod("GET");
con.setRequestProperty("User-Agent", USER_AGENT);
if (getCookies().length() > 0) { con.setRequestMethod("GET");
con.addRequestProperty("Cookie", getCookies()); setDefaults(con);
}
in = new BufferedReader(new InputStreamReader(con.getInputStream())); in = new BufferedReader(new InputStreamReader(con.getInputStream()));
String inputLine; String inputLine;
while ((inputLine = in.readLine()) != null) { while ((inputLine = in.readLine()) != null) {
response.append(inputLine); response.append(inputLine);
} }
} catch (UnknownHostException uhe) {// thrown when there's no internet connection } catch (UnknownHostException uhe) {// thrown when there's no internet
throw new IOException("unknown host or no network", uhe); // connection
// Toast.makeText(getActivity(), uhe.getMessage(), Toast.LENGTH_LONG).show(); throw new IOException("unknown host or no network", uhe);
} catch (Exception e) { // Toast.makeText(getActivity(), uhe.getMessage(),
/* // Toast.LENGTH_LONG).show();
* HTTP 429 == Too Many Request Receive from Youtube.com = ReCaptcha challenge } catch (Exception e) {
* request See : https://github.com/rg3/youtube-dl/issues/5138 /*
*/ * HTTP 429 == Too Many Request Receive from Youtube.com = ReCaptcha challenge
if (con.getResponseCode() == 429) { * request See : https://github.com/rg3/youtube-dl/issues/5138
throw new ReCaptchaException("reCaptcha Challenge requested"); */
} if (con.getResponseCode() == 429) {
throw new ReCaptchaException("reCaptcha Challenge requested");
}
throw new IOException(con.getResponseCode() + " " + con.getResponseMessage(), e); throw new IOException(con.getResponseCode() + " " + con.getResponseMessage(), e);
} finally { } finally {
if (in != null) { if (in != null) {
in.close(); in.close();
} }
} }
return response.toString(); return response.toString();
} }
/** private static void setDefaults(HttpsURLConnection con) {
* Download (via HTTP) the text file located at the supplied URL, and return its
* contents. Primarily intended for downloading web pages.
*
* @param siteUrl the URL of the text file to download
* @return the contents of the specified text file
*/
public String download(String siteUrl) throws IOException, ReCaptchaException {
URL url = new URL(siteUrl);
HttpsURLConnection con = (HttpsURLConnection) url.openConnection();
// HttpsURLConnection con = NetCipher.getHttpsURLConnection(url);
return dl(con);
}
@Override con.setConnectTimeout(30 * 1000);// 30s
public DownloadResponse downloadWithHeaders(String siteUrl, Map<String, List<String>> requestHeaders) con.setReadTimeout(30 * 1000);// 30s
throws IOException, ReCaptchaException {
URL url = new URL(siteUrl);
HttpsURLConnection con = (HttpsURLConnection) url.openConnection();
for (Map.Entry<String, List<String>> pair : requestHeaders.entrySet()) {
pair.getValue().stream().forEach(value -> con.addRequestProperty(pair.getKey(), value));
}
String responseBody = dl(con);
return new DownloadResponse(responseBody, con.getHeaderFields());
}
@Override // set default user agent
public DownloadResponse downloadWithHeaders(String siteUrl) throws IOException, ReCaptchaException { if (null == con.getRequestProperty("User-Agent")) {
URL url = new URL(siteUrl); con.setRequestProperty("User-Agent", USER_AGENT);
HttpsURLConnection con = (HttpsURLConnection) url.openConnection(); }
String responseBody = dl(con);
return new DownloadResponse(responseBody, con.getHeaderFields()); // add default cookies
} if (getCookies().length() > 0) {
con.addRequestProperty("Cookie", getCookies());
}
}
/**
* Download (via HTTP) the text file located at the supplied URL, and return its
* contents. Primarily intended for downloading web pages.
*
* @param siteUrl the URL of the text file to download
* @return the contents of the specified text file
*/
public String download(String siteUrl) throws IOException, ReCaptchaException {
URL url = new URL(siteUrl);
HttpsURLConnection con = (HttpsURLConnection) url.openConnection();
// HttpsURLConnection con = NetCipher.getHttpsURLConnection(url);
return dl(con);
}
@Override
public DownloadResponse get(String siteUrl, Map<String, List<String>> requestHeaders)
throws IOException, ReCaptchaException {
URL url = new URL(siteUrl);
HttpsURLConnection con = (HttpsURLConnection) url.openConnection();
for (Map.Entry<String, List<String>> pair : requestHeaders.entrySet()) {
pair.getValue().stream().forEach(value -> con.addRequestProperty(pair.getKey(), value));
}
String responseBody = dl(con);
return new DownloadResponse(responseBody, con.getHeaderFields());
}
@Override
public DownloadResponse get(String siteUrl) throws IOException, ReCaptchaException {
URL url = new URL(siteUrl);
HttpsURLConnection con = (HttpsURLConnection) url.openConnection();
String responseBody = dl(con);
return new DownloadResponse(responseBody, con.getHeaderFields());
}
@Override
public DownloadResponse post(String siteUrl, String requestBody, Map<String, List<String>> requestHeaders)
throws IOException, ReCaptchaException {
URL url = new URL(siteUrl);
HttpsURLConnection con = (HttpsURLConnection) url.openConnection();
con.setRequestMethod("POST");
for (Map.Entry<String, List<String>> pair : requestHeaders.entrySet()) {
pair.getValue().stream().forEach(value -> con.addRequestProperty(pair.getKey(), value));
}
// set fields to default if not set already
setDefaults(con);
byte[] postDataBytes = requestBody.toString().getBytes("UTF-8");
con.setRequestProperty("Content-Length", String.valueOf(postDataBytes.length));
con.setDoOutput(true);
con.getOutputStream().write(postDataBytes);
BufferedReader in = new BufferedReader(new InputStreamReader(con.getInputStream()));
StringBuilder sb = new StringBuilder();
String inputLine;
while ((inputLine = in.readLine()) != null) {
sb.append(inputLine);
}
return new DownloadResponse(sb.toString(), con.getHeaderFields());
}
} }

View File

@ -16,31 +16,31 @@ import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeCommentsE
public class YoutubeCommentsExtractorTest { public class YoutubeCommentsExtractorTest {
private static YoutubeCommentsExtractor extractor; private static YoutubeCommentsExtractor extractor;
@BeforeClass @BeforeClass
public static void setUp() throws Exception { public static void setUp() throws Exception {
NewPipe.init(Downloader.getInstance()); NewPipe.init(Downloader.getInstance());
extractor = (YoutubeCommentsExtractor) YouTube extractor = (YoutubeCommentsExtractor) YouTube
.getCommentsExtractor("https://www.youtube.com/watch?v=rrgFN3AxGfs"); .getCommentsExtractor("https://www.youtube.com/watch?v=rrgFN3AxGfs");
extractor.fetchPage(); extractor.fetchPage();
} }
@Test @Test
public void testGetComments() throws IOException, ExtractionException { public void testGetComments() throws IOException, ExtractionException {
boolean result = false; boolean result = false;
InfoItemsPage<CommentsInfoItem> comments = extractor.getInitialPage(); InfoItemsPage<CommentsInfoItem> comments = extractor.getInitialPage();
result = findInComments(comments, "i should really be in the top comment.lol"); result = findInComments(comments, "i should really be in the top comment.lol");
while (comments.hasNextPage()) { while (comments.hasNextPage()) {
comments = extractor.getPage(comments.getNextPageUrl()); comments = extractor.getPage(comments.getNextPageUrl());
result = findInComments(comments, "i should really be in the top comment.lol"); result = findInComments(comments, "i should really be in the top comment.lol");
} }
assertTrue(result); assertTrue(result);
} }
private boolean findInComments(InfoItemsPage<CommentsInfoItem> comments, String comment) { private boolean findInComments(InfoItemsPage<CommentsInfoItem> comments, String comment) {
return comments.getItems().stream().filter(c -> c.getCommentText().contains(comment)).findAny().isPresent(); return comments.getItems().stream().filter(c -> c.getCommentText().contains(comment)).findAny().isPresent();
} }
} }