mirror of
https://github.com/TeamNewPipe/NewPipeExtractor.git
synced 2024-12-13 13:50:33 +05:30
added comments extractor
This commit is contained in:
parent
1e7bcfbd66
commit
53d3997904
@ -1,7 +1,7 @@
|
|||||||
allprojects {
|
allprojects {
|
||||||
apply plugin: 'java-library'
|
apply plugin: 'java-library'
|
||||||
sourceCompatibility = 1.7
|
sourceCompatibility = 1.8
|
||||||
targetCompatibility = 1.7
|
targetCompatibility = 1.8
|
||||||
|
|
||||||
version 'v0.13.0'
|
version 'v0.13.0'
|
||||||
|
|
||||||
|
@ -6,6 +6,7 @@ dependencies {
|
|||||||
implementation 'org.mozilla:rhino:1.7.7.1'
|
implementation 'org.mozilla:rhino:1.7.7.1'
|
||||||
implementation 'com.github.spotbugs:spotbugs-annotations:3.1.0'
|
implementation 'com.github.spotbugs:spotbugs-annotations:3.1.0'
|
||||||
implementation 'org.nibor.autolink:autolink:0.8.0'
|
implementation 'org.nibor.autolink:autolink:0.8.0'
|
||||||
|
implementation 'com.fasterxml.jackson.core:jackson-databind:2.9.5'
|
||||||
|
|
||||||
testImplementation 'junit:junit:4.12'
|
testImplementation 'junit:junit:4.12'
|
||||||
}
|
}
|
@ -0,0 +1,26 @@
|
|||||||
|
package org.schabi.newpipe.extractor;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
public class DownloadResponse {
|
||||||
|
private final String responseBody;
|
||||||
|
private final Map<String, List<String>> responseHeaders;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
public DownloadResponse(String responseBody, Map<String, List<String>> headers) {
|
||||||
|
super();
|
||||||
|
this.responseBody = responseBody;
|
||||||
|
this.responseHeaders = headers;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getResponseBody() {
|
||||||
|
return responseBody;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Map<String, List<String>> getResponseHeaders() {
|
||||||
|
return responseHeaders;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -1,10 +1,11 @@
|
|||||||
package org.schabi.newpipe.extractor;
|
package org.schabi.newpipe.extractor;
|
||||||
|
|
||||||
import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Created by Christian Schabesberger on 28.01.16.
|
* Created by Christian Schabesberger on 28.01.16.
|
||||||
*
|
*
|
||||||
@ -28,19 +29,20 @@ import java.util.Map;
|
|||||||
public interface Downloader {
|
public interface Downloader {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Download the text file at the supplied URL as in download(String),
|
* Download the text file at the supplied URL as in download(String), but set
|
||||||
* but set the HTTP header field "Accept-Language" to the supplied string.
|
* the HTTP header field "Accept-Language" to the supplied string.
|
||||||
*
|
*
|
||||||
* @param siteUrl the URL of the text file to return the contents of
|
* @param siteUrl the URL of the text file to return the contents of
|
||||||
* @param language the language (usually a 2-character code) to set as the preferred language
|
* @param language the language (usually a 2-character code) to set as the
|
||||||
|
* preferred language
|
||||||
* @return the contents of the specified text file
|
* @return the contents of the specified text file
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
*/
|
*/
|
||||||
String download(String siteUrl, String language) throws IOException, ReCaptchaException;
|
String download(String siteUrl, String language) throws IOException, ReCaptchaException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Download the text file at the supplied URL as in download(String),
|
* Download the text file at the supplied URL as in download(String), but set
|
||||||
* but set the HTTP header field "Accept-Language" to the supplied string.
|
* the HTTP header field "Accept-Language" to the supplied string.
|
||||||
*
|
*
|
||||||
* @param siteUrl the URL of the text file to return the contents of
|
* @param siteUrl the URL of the text file to return the contents of
|
||||||
* @param customProperties set request header properties
|
* @param customProperties set request header properties
|
||||||
@ -50,12 +52,17 @@ public interface Downloader {
|
|||||||
String download(String siteUrl, Map<String, String> customProperties) throws IOException, ReCaptchaException;
|
String download(String siteUrl, Map<String, String> customProperties) throws IOException, ReCaptchaException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Download (via HTTP) the text file located at the supplied URL, and return its contents.
|
* Download (via HTTP) the text file located at the supplied URL, and return its
|
||||||
* Primarily intended for downloading web pages.
|
* contents. Primarily intended for downloading web pages.
|
||||||
*
|
*
|
||||||
* @param siteUrl the URL of the text file to download
|
* @param siteUrl the URL of the text file to download
|
||||||
* @return the contents of the specified text file
|
* @return the contents of the specified text file
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
*/
|
*/
|
||||||
String download(String siteUrl) throws IOException, ReCaptchaException;
|
String download(String siteUrl) throws IOException, ReCaptchaException;
|
||||||
|
|
||||||
|
DownloadResponse downloadWithHeaders(String siteUrl, Map<String, List<String>> requestHeaders)
|
||||||
|
throws IOException, ReCaptchaException;
|
||||||
|
|
||||||
|
DownloadResponse downloadWithHeaders(String siteUrl) throws IOException, ReCaptchaException;
|
||||||
}
|
}
|
||||||
|
@ -68,6 +68,7 @@ public abstract class InfoItem implements Serializable {
|
|||||||
public enum InfoType {
|
public enum InfoType {
|
||||||
STREAM,
|
STREAM,
|
||||||
PLAYLIST,
|
PLAYLIST,
|
||||||
CHANNEL
|
CHANNEL,
|
||||||
|
COMMENT
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,18 +1,24 @@
|
|||||||
package org.schabi.newpipe.extractor;
|
package org.schabi.newpipe.extractor;
|
||||||
|
|
||||||
import org.schabi.newpipe.extractor.channel.ChannelExtractor;
|
|
||||||
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
|
|
||||||
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
|
||||||
import org.schabi.newpipe.extractor.kiosk.KioskList;
|
|
||||||
import org.schabi.newpipe.extractor.playlist.PlaylistExtractor;
|
|
||||||
import org.schabi.newpipe.extractor.search.SearchExtractor;
|
|
||||||
import org.schabi.newpipe.extractor.linkhandler.*;
|
|
||||||
import org.schabi.newpipe.extractor.stream.StreamExtractor;
|
|
||||||
import org.schabi.newpipe.extractor.subscription.SubscriptionExtractor;
|
|
||||||
|
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.schabi.newpipe.extractor.channel.ChannelExtractor;
|
||||||
|
import org.schabi.newpipe.extractor.comments.CommentsExtractor;
|
||||||
|
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
|
||||||
|
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||||
|
import org.schabi.newpipe.extractor.kiosk.KioskList;
|
||||||
|
import org.schabi.newpipe.extractor.linkhandler.LinkHandler;
|
||||||
|
import org.schabi.newpipe.extractor.linkhandler.LinkHandlerFactory;
|
||||||
|
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler;
|
||||||
|
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandlerFactory;
|
||||||
|
import org.schabi.newpipe.extractor.linkhandler.SearchQueryHandler;
|
||||||
|
import org.schabi.newpipe.extractor.linkhandler.SearchQueryHandlerFactory;
|
||||||
|
import org.schabi.newpipe.extractor.playlist.PlaylistExtractor;
|
||||||
|
import org.schabi.newpipe.extractor.search.SearchExtractor;
|
||||||
|
import org.schabi.newpipe.extractor.stream.StreamExtractor;
|
||||||
|
import org.schabi.newpipe.extractor.subscription.SubscriptionExtractor;
|
||||||
|
|
||||||
public abstract class StreamingService {
|
public abstract class StreamingService {
|
||||||
public static class ServiceInfo {
|
public static class ServiceInfo {
|
||||||
private final String name;
|
private final String name;
|
||||||
@ -71,6 +77,7 @@ public abstract class StreamingService {
|
|||||||
public abstract ListLinkHandlerFactory getChannelLHFactory();
|
public abstract ListLinkHandlerFactory getChannelLHFactory();
|
||||||
public abstract ListLinkHandlerFactory getPlaylistLHFactory();
|
public abstract ListLinkHandlerFactory getPlaylistLHFactory();
|
||||||
public abstract SearchQueryHandlerFactory getSearchQHFactory();
|
public abstract SearchQueryHandlerFactory getSearchQHFactory();
|
||||||
|
public abstract ListLinkHandlerFactory getCommentsLHFactory();
|
||||||
|
|
||||||
|
|
||||||
////////////////////////////////////////////
|
////////////////////////////////////////////
|
||||||
@ -84,6 +91,7 @@ public abstract class StreamingService {
|
|||||||
public abstract ChannelExtractor getChannelExtractor(ListLinkHandler urlIdHandler) throws ExtractionException;
|
public abstract ChannelExtractor getChannelExtractor(ListLinkHandler urlIdHandler) throws ExtractionException;
|
||||||
public abstract PlaylistExtractor getPlaylistExtractor(ListLinkHandler urlIdHandler) throws ExtractionException;
|
public abstract PlaylistExtractor getPlaylistExtractor(ListLinkHandler urlIdHandler) throws ExtractionException;
|
||||||
public abstract StreamExtractor getStreamExtractor(LinkHandler UIHFactory) throws ExtractionException;
|
public abstract StreamExtractor getStreamExtractor(LinkHandler UIHFactory) throws ExtractionException;
|
||||||
|
public abstract CommentsExtractor getCommentsExtractor(ListLinkHandler urlIdHandler) throws ExtractionException;
|
||||||
|
|
||||||
public SearchExtractor getSearchExtractor(String query, List<String> contentFilter, String sortFilter, String contentCountry) throws ExtractionException {
|
public SearchExtractor getSearchExtractor(String query, List<String> contentFilter, String sortFilter, String contentCountry) throws ExtractionException {
|
||||||
return getSearchExtractor(getSearchQHFactory().fromQuery(query, contentFilter, sortFilter), contentCountry);
|
return getSearchExtractor(getSearchQHFactory().fromQuery(query, contentFilter, sortFilter), contentCountry);
|
||||||
@ -113,6 +121,12 @@ public abstract class StreamingService {
|
|||||||
return getStreamExtractor(getStreamLHFactory().fromUrl(url));
|
return getStreamExtractor(getStreamLHFactory().fromUrl(url));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public CommentsExtractor getCommentsExtractor(String url) throws ExtractionException {
|
||||||
|
return getCommentsExtractor(getCommentsLHFactory().fromUrl(url));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -0,0 +1,14 @@
|
|||||||
|
package org.schabi.newpipe.extractor.comments;
|
||||||
|
|
||||||
|
import org.schabi.newpipe.extractor.ListExtractor;
|
||||||
|
import org.schabi.newpipe.extractor.StreamingService;
|
||||||
|
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler;
|
||||||
|
|
||||||
|
public abstract class CommentsExtractor extends ListExtractor<CommentsInfoItem> {
|
||||||
|
|
||||||
|
public CommentsExtractor(StreamingService service, ListLinkHandler uiHandler) {
|
||||||
|
super(service, uiHandler);
|
||||||
|
// TODO Auto-generated constructor stub
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,27 @@
|
|||||||
|
package org.schabi.newpipe.extractor.comments;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.schabi.newpipe.extractor.ListInfo;
|
||||||
|
import org.schabi.newpipe.extractor.NewPipe;
|
||||||
|
import org.schabi.newpipe.extractor.StreamingService;
|
||||||
|
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
|
||||||
|
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler;
|
||||||
|
|
||||||
|
public class CommentsInfo extends ListInfo<CommentsInfoItem>{
|
||||||
|
|
||||||
|
private CommentsInfo(int serviceId, ListLinkHandler listUrlIdHandler, String name) {
|
||||||
|
super(serviceId, listUrlIdHandler, name);
|
||||||
|
// TODO Auto-generated constructor stub
|
||||||
|
}
|
||||||
|
|
||||||
|
public static CommentsInfo getInfo(String url) throws IOException, ExtractionException {
|
||||||
|
return getInfo(NewPipe.getServiceByUrl(url), url);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static CommentsInfo getInfo(StreamingService serviceByUrl, String url) {
|
||||||
|
// TODO Auto-generated method stub
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,76 @@
|
|||||||
|
package org.schabi.newpipe.extractor.comments;
|
||||||
|
|
||||||
|
import org.schabi.newpipe.extractor.InfoItem;
|
||||||
|
|
||||||
|
public class CommentsInfoItem extends InfoItem{
|
||||||
|
|
||||||
|
private String commentId;
|
||||||
|
private String commentText;
|
||||||
|
private String authorName;
|
||||||
|
private String authorThumbnail;
|
||||||
|
private String authorEndpoint;
|
||||||
|
private String publishedTime;
|
||||||
|
private Integer likeCount;
|
||||||
|
|
||||||
|
public CommentsInfoItem(int serviceId, String url, String name) {
|
||||||
|
super(InfoType.COMMENT, serviceId, url, name);
|
||||||
|
// TODO Auto-generated constructor stub
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getCommentText() {
|
||||||
|
return commentText;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setCommentText(String contentText) {
|
||||||
|
this.commentText = contentText;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getAuthorName() {
|
||||||
|
return authorName;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setAuthorName(String authorName) {
|
||||||
|
this.authorName = authorName;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getAuthorThumbnail() {
|
||||||
|
return authorThumbnail;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setAuthorThumbnail(String authorThumbnail) {
|
||||||
|
this.authorThumbnail = authorThumbnail;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getAuthorEndpoint() {
|
||||||
|
return authorEndpoint;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setAuthorEndpoint(String authorEndpoint) {
|
||||||
|
this.authorEndpoint = authorEndpoint;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getPublishedTime() {
|
||||||
|
return publishedTime;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setPublishedTime(String publishedTime) {
|
||||||
|
this.publishedTime = publishedTime;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Integer getLikeCount() {
|
||||||
|
return likeCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setLikeCount(Integer likeCount) {
|
||||||
|
this.likeCount = likeCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getCommentId() {
|
||||||
|
return commentId;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setCommentId(String commentId) {
|
||||||
|
this.commentId = commentId;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,22 @@
|
|||||||
|
package org.schabi.newpipe.extractor.comments;
|
||||||
|
|
||||||
|
import org.schabi.newpipe.extractor.InfoItemExtractor;
|
||||||
|
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||||
|
|
||||||
|
public interface CommentsInfoItemExtractor extends InfoItemExtractor {
|
||||||
|
|
||||||
|
String getCommentId() throws ParsingException;
|
||||||
|
|
||||||
|
String getCommentText() throws ParsingException;
|
||||||
|
|
||||||
|
String getAuthorName() throws ParsingException;
|
||||||
|
|
||||||
|
String getAuthorThumbnail() throws ParsingException;
|
||||||
|
|
||||||
|
String getAuthorEndpoint() throws ParsingException;
|
||||||
|
|
||||||
|
String getPublishedTime() throws ParsingException;
|
||||||
|
|
||||||
|
Integer getLikeCount() throws ParsingException;
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,103 @@
|
|||||||
|
package org.schabi.newpipe.extractor.comments;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Vector;
|
||||||
|
|
||||||
|
import org.schabi.newpipe.extractor.InfoItem;
|
||||||
|
import org.schabi.newpipe.extractor.InfoItemsCollector;
|
||||||
|
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Created by Christian Schabesberger on 28.02.16.
|
||||||
|
*
|
||||||
|
* Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org>
|
||||||
|
* CommentsInfoItemsCollector.java is part of NewPipe.
|
||||||
|
*
|
||||||
|
* NewPipe is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* NewPipe is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
public class CommentsInfoItemsCollector extends InfoItemsCollector<CommentsInfoItem, CommentsInfoItemExtractor> {
|
||||||
|
|
||||||
|
public CommentsInfoItemsCollector(int serviceId) {
|
||||||
|
super(serviceId);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public CommentsInfoItem extract(CommentsInfoItemExtractor extractor) throws ParsingException {
|
||||||
|
|
||||||
|
// important information
|
||||||
|
int serviceId = getServiceId();
|
||||||
|
String url = extractor.getUrl();
|
||||||
|
String name = extractor.getName();
|
||||||
|
|
||||||
|
CommentsInfoItem resultItem = new CommentsInfoItem(serviceId, url, name);
|
||||||
|
|
||||||
|
// optional information
|
||||||
|
try {
|
||||||
|
resultItem.setCommentId(extractor.getCommentId());
|
||||||
|
} catch (Exception e) {
|
||||||
|
addError(e);
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
resultItem.setCommentText(extractor.getCommentText());
|
||||||
|
} catch (Exception e) {
|
||||||
|
addError(e);
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
resultItem.setAuthorName(extractor.getAuthorName());
|
||||||
|
} catch (Exception e) {
|
||||||
|
addError(e);
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
resultItem.setAuthorThumbnail(extractor.getAuthorThumbnail());
|
||||||
|
} catch (Exception e) {
|
||||||
|
addError(e);
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
resultItem.setAuthorEndpoint(extractor.getAuthorEndpoint());
|
||||||
|
} catch (Exception e) {
|
||||||
|
addError(e);
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
resultItem.setPublishedTime(extractor.getPublishedTime());
|
||||||
|
} catch (Exception e) {
|
||||||
|
addError(e);
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
resultItem.setLikeCount(extractor.getLikeCount());
|
||||||
|
} catch (Exception e) {
|
||||||
|
addError(e);
|
||||||
|
}
|
||||||
|
return resultItem;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void commit(CommentsInfoItemExtractor extractor) {
|
||||||
|
try {
|
||||||
|
addItem(extract(extractor));
|
||||||
|
} catch (Exception e) {
|
||||||
|
addError(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<CommentsInfoItem> getCommentsInfoItemList() {
|
||||||
|
List<CommentsInfoItem> siiList = new Vector<>();
|
||||||
|
for (InfoItem ii : super.getItems()) {
|
||||||
|
if (ii instanceof CommentsInfoItem) {
|
||||||
|
siiList.add((CommentsInfoItem) ii);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return siiList;
|
||||||
|
}
|
||||||
|
}
|
@ -3,6 +3,7 @@ package org.schabi.newpipe.extractor.services.soundcloud;
|
|||||||
import org.schabi.newpipe.extractor.*;
|
import org.schabi.newpipe.extractor.*;
|
||||||
import org.schabi.newpipe.extractor.linkhandler.*;
|
import org.schabi.newpipe.extractor.linkhandler.*;
|
||||||
import org.schabi.newpipe.extractor.channel.ChannelExtractor;
|
import org.schabi.newpipe.extractor.channel.ChannelExtractor;
|
||||||
|
import org.schabi.newpipe.extractor.comments.CommentsExtractor;
|
||||||
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
|
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
|
||||||
import org.schabi.newpipe.extractor.kiosk.KioskExtractor;
|
import org.schabi.newpipe.extractor.kiosk.KioskExtractor;
|
||||||
import org.schabi.newpipe.extractor.kiosk.KioskList;
|
import org.schabi.newpipe.extractor.kiosk.KioskList;
|
||||||
@ -98,4 +99,16 @@ public class SoundcloudService extends StreamingService {
|
|||||||
public SubscriptionExtractor getSubscriptionExtractor() {
|
public SubscriptionExtractor getSubscriptionExtractor() {
|
||||||
return new SoundcloudSubscriptionExtractor(this);
|
return new SoundcloudSubscriptionExtractor(this);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ListLinkHandlerFactory getCommentsLHFactory() {
|
||||||
|
// TODO Auto-generated method stub
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public CommentsExtractor getCommentsExtractor(ListLinkHandler urlIdHandler) throws ExtractionException {
|
||||||
|
// TODO Auto-generated method stub
|
||||||
|
return null;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,21 +1,42 @@
|
|||||||
package org.schabi.newpipe.extractor.services.youtube;
|
package org.schabi.newpipe.extractor.services.youtube;
|
||||||
|
|
||||||
import org.schabi.newpipe.extractor.*;
|
import static java.util.Arrays.asList;
|
||||||
import org.schabi.newpipe.extractor.linkhandler.*;
|
import static org.schabi.newpipe.extractor.StreamingService.ServiceInfo.MediaCapability.AUDIO;
|
||||||
|
import static org.schabi.newpipe.extractor.StreamingService.ServiceInfo.MediaCapability.LIVE;
|
||||||
|
import static org.schabi.newpipe.extractor.StreamingService.ServiceInfo.MediaCapability.VIDEO;
|
||||||
|
|
||||||
|
import org.schabi.newpipe.extractor.StreamingService;
|
||||||
|
import org.schabi.newpipe.extractor.SuggestionExtractor;
|
||||||
import org.schabi.newpipe.extractor.channel.ChannelExtractor;
|
import org.schabi.newpipe.extractor.channel.ChannelExtractor;
|
||||||
|
import org.schabi.newpipe.extractor.comments.CommentsExtractor;
|
||||||
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
|
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
|
||||||
import org.schabi.newpipe.extractor.kiosk.KioskExtractor;
|
import org.schabi.newpipe.extractor.kiosk.KioskExtractor;
|
||||||
import org.schabi.newpipe.extractor.kiosk.KioskList;
|
import org.schabi.newpipe.extractor.kiosk.KioskList;
|
||||||
|
import org.schabi.newpipe.extractor.linkhandler.LinkHandler;
|
||||||
|
import org.schabi.newpipe.extractor.linkhandler.LinkHandlerFactory;
|
||||||
|
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler;
|
||||||
|
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandlerFactory;
|
||||||
|
import org.schabi.newpipe.extractor.linkhandler.SearchQueryHandler;
|
||||||
|
import org.schabi.newpipe.extractor.linkhandler.SearchQueryHandlerFactory;
|
||||||
import org.schabi.newpipe.extractor.playlist.PlaylistExtractor;
|
import org.schabi.newpipe.extractor.playlist.PlaylistExtractor;
|
||||||
import org.schabi.newpipe.extractor.search.SearchExtractor;
|
import org.schabi.newpipe.extractor.search.SearchExtractor;
|
||||||
import org.schabi.newpipe.extractor.services.youtube.extractors.*;
|
import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeChannelExtractor;
|
||||||
import org.schabi.newpipe.extractor.services.youtube.linkHandler.*;
|
import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeCommentsExtractor;
|
||||||
|
import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubePlaylistExtractor;
|
||||||
|
import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeSearchExtractor;
|
||||||
|
import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeStreamExtractor;
|
||||||
|
import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeSubscriptionExtractor;
|
||||||
|
import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeSuggestionExtractor;
|
||||||
|
import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeTrendingExtractor;
|
||||||
|
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeChannelLinkHandlerFactory;
|
||||||
|
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeCommentsLinkHandlerFactory;
|
||||||
|
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubePlaylistLinkHandlerFactory;
|
||||||
|
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeSearchQueryHandlerFactory;
|
||||||
|
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeStreamLinkHandlerFactory;
|
||||||
|
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeTrendingLinkHandlerFactory;
|
||||||
import org.schabi.newpipe.extractor.stream.StreamExtractor;
|
import org.schabi.newpipe.extractor.stream.StreamExtractor;
|
||||||
import org.schabi.newpipe.extractor.subscription.SubscriptionExtractor;
|
import org.schabi.newpipe.extractor.subscription.SubscriptionExtractor;
|
||||||
|
|
||||||
import static java.util.Arrays.asList;
|
|
||||||
import static org.schabi.newpipe.extractor.StreamingService.ServiceInfo.MediaCapability.*;
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Created by Christian Schabesberger on 23.08.15.
|
* Created by Christian Schabesberger on 23.08.15.
|
||||||
@ -115,4 +136,14 @@ public class YoutubeService extends StreamingService {
|
|||||||
return new YoutubeSubscriptionExtractor(this);
|
return new YoutubeSubscriptionExtractor(this);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ListLinkHandlerFactory getCommentsLHFactory() {
|
||||||
|
return YoutubeCommentsLinkHandlerFactory.getInstance();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public CommentsExtractor getCommentsExtractor(ListLinkHandler urlIdHandler) throws ExtractionException {
|
||||||
|
return new YoutubeCommentsExtractor(this, urlIdHandler);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,262 @@
|
|||||||
|
package org.schabi.newpipe.extractor.services.youtube.extractors;
|
||||||
|
|
||||||
|
import java.io.BufferedReader;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStreamReader;
|
||||||
|
import java.io.UnsupportedEncodingException;
|
||||||
|
import java.net.URL;
|
||||||
|
import java.net.URLEncoder;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import javax.net.ssl.HttpsURLConnection;
|
||||||
|
|
||||||
|
import org.schabi.newpipe.extractor.DownloadResponse;
|
||||||
|
import org.schabi.newpipe.extractor.Downloader;
|
||||||
|
import org.schabi.newpipe.extractor.StreamingService;
|
||||||
|
import org.schabi.newpipe.extractor.comments.CommentsExtractor;
|
||||||
|
import org.schabi.newpipe.extractor.comments.CommentsInfoItem;
|
||||||
|
import org.schabi.newpipe.extractor.comments.CommentsInfoItemExtractor;
|
||||||
|
import org.schabi.newpipe.extractor.comments.CommentsInfoItemsCollector;
|
||||||
|
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
|
||||||
|
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||||
|
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.JsonNode;
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
|
public class YoutubeCommentsExtractor extends CommentsExtractor {
|
||||||
|
|
||||||
|
private static final String USER_AGENT = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:61.0) Gecko/20100101 Firefox/61.0";
|
||||||
|
|
||||||
|
private List<String> cookies;
|
||||||
|
private String sessionToken;
|
||||||
|
private String commentsToken;
|
||||||
|
|
||||||
|
private ObjectMapper mapper = new ObjectMapper();
|
||||||
|
|
||||||
|
public YoutubeCommentsExtractor(StreamingService service, ListLinkHandler uiHandler) {
|
||||||
|
super(service, uiHandler);
|
||||||
|
// TODO Auto-generated constructor stub
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public InfoItemsPage<CommentsInfoItem> getInitialPage() throws IOException, ExtractionException {
|
||||||
|
// initial page does not load any comments but is required to get session token
|
||||||
|
// and cookies
|
||||||
|
return getPage(getNextPageUrl());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getNextPageUrl() throws IOException, ExtractionException {
|
||||||
|
return getNextPageUrl(commentsToken);
|
||||||
|
}
|
||||||
|
|
||||||
|
private String getNextPageUrl(JsonNode ajaxJson) throws IOException, ExtractionException {
|
||||||
|
String continuation;
|
||||||
|
try {
|
||||||
|
continuation = ajaxJson.findValue("itemSectionContinuation").get("continuations").findValue("continuation")
|
||||||
|
.asText();
|
||||||
|
} catch (Exception e) {
|
||||||
|
// no more comments
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
return getNextPageUrl(continuation);
|
||||||
|
}
|
||||||
|
|
||||||
|
private String getNextPageUrl(String continuation) throws ParsingException {
|
||||||
|
Map<String, String> params = new HashMap<>();
|
||||||
|
params.put("action_get_comments", "1");
|
||||||
|
params.put("pbj", "1");
|
||||||
|
params.put("ctoken", continuation);
|
||||||
|
params.put("continuation", continuation);
|
||||||
|
try {
|
||||||
|
return "https://www.youtube.com/comment_service_ajax?" + getDataString(params);
|
||||||
|
} catch (UnsupportedEncodingException e) {
|
||||||
|
throw new ParsingException("Could not get next page url", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public InfoItemsPage<CommentsInfoItem> getPage(String pageUrl) throws IOException, ExtractionException {
|
||||||
|
if (pageUrl == null || pageUrl.isEmpty()) {
|
||||||
|
throw new ExtractionException(new IllegalArgumentException("Page url is empty or null"));
|
||||||
|
}
|
||||||
|
String ajaxResponse = makeAjaxRequest(pageUrl);
|
||||||
|
JsonNode ajaxJson = mapper.readTree(ajaxResponse);
|
||||||
|
CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(getServiceId());
|
||||||
|
collectCommentsFrom(collector, ajaxJson, pageUrl);
|
||||||
|
return new InfoItemsPage<>(collector, getNextPageUrl(ajaxJson));
|
||||||
|
}
|
||||||
|
|
||||||
|
private void collectCommentsFrom(CommentsInfoItemsCollector collector, JsonNode ajaxJson, String pageUrl) {
|
||||||
|
List<JsonNode> comments = ajaxJson.findValues("commentRenderer");
|
||||||
|
comments.stream().forEach(c -> {
|
||||||
|
CommentsInfoItemExtractor extractor = new CommentsInfoItemExtractor() {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getUrl() throws ParsingException {
|
||||||
|
return pageUrl;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getThumbnailUrl() throws ParsingException {
|
||||||
|
try {
|
||||||
|
return c.get("authorThumbnail").get("thumbnails").get(0).get("url").asText();
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new ParsingException("Could not get thumbnail url", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getName() throws ParsingException {
|
||||||
|
try {
|
||||||
|
return c.get("authorText").get("simpleText").asText();
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new ParsingException("Could not get thumbnail url", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getPublishedTime() throws ParsingException {
|
||||||
|
try {
|
||||||
|
return c.get("publishedTimeText").get("runs").get(0).get("text").asText();
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new ParsingException("Could not get thumbnail url", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Integer getLikeCount() throws ParsingException {
|
||||||
|
try {
|
||||||
|
return c.get("likeCount").intValue();
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new ParsingException("Could not get thumbnail url", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getCommentText() throws ParsingException {
|
||||||
|
try {
|
||||||
|
if (null != c.get("contentText").get("simpleText")) {
|
||||||
|
return c.get("contentText").get("simpleText").asText();
|
||||||
|
} else {
|
||||||
|
return c.get("contentText").get("runs").get(0).get("text").asText();
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new ParsingException("Could not get thumbnail url", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getCommentId() throws ParsingException {
|
||||||
|
try {
|
||||||
|
return c.get("commentId").asText();
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new ParsingException("Could not get thumbnail url", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getAuthorThumbnail() throws ParsingException {
|
||||||
|
try {
|
||||||
|
return c.get("authorThumbnail").get("thumbnails").get(0).get("url").asText();
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new ParsingException("Could not get thumbnail url", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getAuthorName() throws ParsingException {
|
||||||
|
try {
|
||||||
|
return c.get("authorText").get("simpleText").asText();
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new ParsingException("Could not get thumbnail url", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getAuthorEndpoint() throws ParsingException {
|
||||||
|
try {
|
||||||
|
return "https://youtube.com"
|
||||||
|
+ c.get("authorEndpoint").get("browseEndpoint").get("canonicalBaseUrl").asText();
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new ParsingException("Could not get thumbnail url", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
collector.commit(extractor);
|
||||||
|
});
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void onFetchPage(Downloader downloader) throws IOException, ExtractionException {
|
||||||
|
DownloadResponse response = downloader.downloadWithHeaders(getUrl());
|
||||||
|
String responseBody = response.getResponseBody();
|
||||||
|
cookies = response.getResponseHeaders().get("Set-Cookie");
|
||||||
|
sessionToken = findValue(responseBody, "XSRF_TOKEN");
|
||||||
|
commentsToken = findValue(responseBody, "COMMENTS_TOKEN");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getName() throws ParsingException {
|
||||||
|
// TODO Auto-generated method stub
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
private String makeAjaxRequest(String siteUrl) throws IOException {
|
||||||
|
|
||||||
|
StringBuilder postData = new StringBuilder();
|
||||||
|
postData.append(URLEncoder.encode("session_token", "UTF-8"));
|
||||||
|
postData.append('=');
|
||||||
|
postData.append(URLEncoder.encode(sessionToken, "UTF-8"));
|
||||||
|
byte[] postDataBytes = postData.toString().getBytes("UTF-8");
|
||||||
|
|
||||||
|
URL url = new URL(siteUrl);
|
||||||
|
HttpsURLConnection con = (HttpsURLConnection) url.openConnection();
|
||||||
|
con.setRequestMethod("POST");
|
||||||
|
con.setRequestProperty("Content-Type", "application/x-www-form-urlencoded");
|
||||||
|
con.setRequestProperty("Content-Length", String.valueOf(postDataBytes.length));
|
||||||
|
con.setRequestProperty("Accept", "*/*");
|
||||||
|
con.setRequestProperty("User-Agent", USER_AGENT);
|
||||||
|
con.setRequestProperty("X-YouTube-Client-Version", "2.20180815");
|
||||||
|
con.setRequestProperty("X-YouTube-Client-Name", "1");
|
||||||
|
// set cookies
|
||||||
|
cookies.stream().forEach(c -> con.addRequestProperty("Cookie", c));
|
||||||
|
con.setDoOutput(true);
|
||||||
|
con.getOutputStream().write(postDataBytes);
|
||||||
|
|
||||||
|
BufferedReader in = new BufferedReader(new InputStreamReader(con.getInputStream(), "UTF-8"));
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
String inputLine;
|
||||||
|
while ((inputLine = in.readLine()) != null) {
|
||||||
|
sb.append(inputLine);
|
||||||
|
}
|
||||||
|
return sb.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
private String getDataString(Map<String, String> params) throws UnsupportedEncodingException {
|
||||||
|
StringBuilder result = new StringBuilder();
|
||||||
|
boolean first = true;
|
||||||
|
for (Map.Entry<String, String> entry : params.entrySet()) {
|
||||||
|
if (first)
|
||||||
|
first = false;
|
||||||
|
else
|
||||||
|
result.append("&");
|
||||||
|
result.append(URLEncoder.encode(entry.getKey(), "UTF-8"));
|
||||||
|
result.append("=");
|
||||||
|
result.append(URLEncoder.encode(entry.getValue(), "UTF-8"));
|
||||||
|
}
|
||||||
|
return result.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
private String findValue(String doc, String key) {
|
||||||
|
int beginIndex = doc.indexOf(key) + key.length() + 4;
|
||||||
|
int endIndex = doc.indexOf("\"", beginIndex);
|
||||||
|
return doc.substring(beginIndex, endIndex);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,183 @@
|
|||||||
|
package org.schabi.newpipe.extractor.services.youtube.linkHandler;
|
||||||
|
|
||||||
|
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandlerFactory;
|
||||||
|
import org.jsoup.Jsoup;
|
||||||
|
import org.jsoup.nodes.Document;
|
||||||
|
import org.jsoup.nodes.Element;
|
||||||
|
import org.schabi.newpipe.extractor.Downloader;
|
||||||
|
import org.schabi.newpipe.extractor.NewPipe;
|
||||||
|
import org.schabi.newpipe.extractor.exceptions.FoundAdException;
|
||||||
|
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||||
|
import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
|
||||||
|
import org.schabi.newpipe.extractor.utils.Parser;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.UnsupportedEncodingException;
|
||||||
|
import java.net.URI;
|
||||||
|
import java.net.URISyntaxException;
|
||||||
|
import java.net.URLDecoder;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Created by Christian Schabesberger on 25.07.16.
|
||||||
|
*
|
||||||
|
* Copyright (C) Christian Schabesberger 2018 <chrźis.schabesberger@mailbox.org>
|
||||||
|
* YoutubeChannelLinkHandlerFactory.java is part of NewPipe.
|
||||||
|
*
|
||||||
|
* NewPipe is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* NewPipe is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
public class YoutubeCommentsLinkHandlerFactory extends ListLinkHandlerFactory {
|
||||||
|
|
||||||
|
private static final YoutubeCommentsLinkHandlerFactory instance = new YoutubeCommentsLinkHandlerFactory();
|
||||||
|
private static final String ID_PATTERN = "([\\-a-zA-Z0-9_]{11})";
|
||||||
|
|
||||||
|
public static YoutubeCommentsLinkHandlerFactory getInstance() {
|
||||||
|
return instance;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getId(String url) throws ParsingException, IllegalArgumentException {
|
||||||
|
if (url.isEmpty()) {
|
||||||
|
throw new IllegalArgumentException("The url parameter should not be empty");
|
||||||
|
}
|
||||||
|
|
||||||
|
String id;
|
||||||
|
String lowercaseUrl = url.toLowerCase();
|
||||||
|
if (lowercaseUrl.contains("youtube")) {
|
||||||
|
if (url.contains("attribution_link")) {
|
||||||
|
try {
|
||||||
|
String escapedQuery = Parser.matchGroup1("u=(.[^&|$]*)", url);
|
||||||
|
String query = URLDecoder.decode(escapedQuery, "UTF-8");
|
||||||
|
id = Parser.matchGroup1("v=" + ID_PATTERN, query);
|
||||||
|
} catch (UnsupportedEncodingException uee) {
|
||||||
|
throw new ParsingException("Could not parse attribution_link", uee);
|
||||||
|
}
|
||||||
|
} else if (lowercaseUrl.contains("youtube.com/shared?ci=")) {
|
||||||
|
return getRealIdFromSharedLink(url);
|
||||||
|
} else if (url.contains("vnd.youtube")) {
|
||||||
|
id = Parser.matchGroup1(ID_PATTERN, url);
|
||||||
|
} else if (url.contains("embed")) {
|
||||||
|
id = Parser.matchGroup1("embed/" + ID_PATTERN, url);
|
||||||
|
} else if (url.contains("googleads")) {
|
||||||
|
throw new FoundAdException("Error found add: " + url);
|
||||||
|
} else {
|
||||||
|
id = Parser.matchGroup1("[?&]v=" + ID_PATTERN, url);
|
||||||
|
}
|
||||||
|
} else if (lowercaseUrl.contains("youtu.be")) {
|
||||||
|
if (url.contains("v=")) {
|
||||||
|
id = Parser.matchGroup1("v=" + ID_PATTERN, url);
|
||||||
|
} else {
|
||||||
|
id = Parser.matchGroup1("[Yy][Oo][Uu][Tt][Uu]\\.[Bb][Ee]/" + ID_PATTERN, url);
|
||||||
|
}
|
||||||
|
} else if(lowercaseUrl.contains("hooktube")) {
|
||||||
|
if(lowercaseUrl.contains("&v=")
|
||||||
|
|| lowercaseUrl.contains("?v=")) {
|
||||||
|
id = Parser.matchGroup1("[?&]v=" + ID_PATTERN, url);
|
||||||
|
} else if (url.contains("/embed/")) {
|
||||||
|
id = Parser.matchGroup1("embed/" + ID_PATTERN, url);
|
||||||
|
} else if (url.contains("/v/")) {
|
||||||
|
id = Parser.matchGroup1("v/" + ID_PATTERN, url);
|
||||||
|
} else if (url.contains("/watch/")) {
|
||||||
|
id = Parser.matchGroup1("watch/" + ID_PATTERN, url);
|
||||||
|
} else {
|
||||||
|
throw new ParsingException("Error no suitable url: " + url);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
throw new ParsingException("Error no suitable url: " + url);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
if (!id.isEmpty()) {
|
||||||
|
return id;
|
||||||
|
} else {
|
||||||
|
throw new ParsingException("Error could not parse url: " + url);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the real url from a shared uri.
|
||||||
|
* <p>
|
||||||
|
* Shared URI's look like this:
|
||||||
|
* <pre>
|
||||||
|
* * https://www.youtube.com/shared?ci=PJICrTByb3E
|
||||||
|
* * vnd.youtube://www.youtube.com/shared?ci=PJICrTByb3E&feature=twitter-deep-link
|
||||||
|
* </pre>
|
||||||
|
*
|
||||||
|
* @param url The shared url
|
||||||
|
* @return the id of the stream
|
||||||
|
* @throws ParsingException
|
||||||
|
*/
|
||||||
|
private String getRealIdFromSharedLink(String url) throws ParsingException {
|
||||||
|
URI uri;
|
||||||
|
try {
|
||||||
|
uri = new URI(url);
|
||||||
|
} catch (URISyntaxException e) {
|
||||||
|
throw new ParsingException("Invalid shared link", e);
|
||||||
|
}
|
||||||
|
String sharedId = getSharedId(uri);
|
||||||
|
Downloader downloader = NewPipe.getDownloader();
|
||||||
|
String content;
|
||||||
|
try {
|
||||||
|
content = downloader.download("https://www.youtube.com/shared?ci=" + sharedId);
|
||||||
|
} catch (IOException | ReCaptchaException e) {
|
||||||
|
throw new ParsingException("Unable to resolve shared link", e);
|
||||||
|
}
|
||||||
|
final Document document = Jsoup.parse(content);
|
||||||
|
|
||||||
|
final Element element = document.select("link[rel=\"canonical\"]").first();
|
||||||
|
final String urlWithRealId = (element != null)
|
||||||
|
? element.attr("abs:href")
|
||||||
|
: document.select("meta[property=\"og:url\"]").first()
|
||||||
|
.attr("abs:content");
|
||||||
|
|
||||||
|
String realId = Parser.matchGroup1(ID_PATTERN, urlWithRealId);
|
||||||
|
if (sharedId.equals(realId)) {
|
||||||
|
throw new ParsingException("Got same id for as shared info_id: " + sharedId);
|
||||||
|
}
|
||||||
|
return realId;
|
||||||
|
}
|
||||||
|
|
||||||
|
private String getSharedId(URI uri) throws ParsingException {
|
||||||
|
if (!"/shared".equals(uri.getPath())) {
|
||||||
|
throw new ParsingException("Not a shared link: " + uri.toString() + " (path != " + uri.getPath() + ")");
|
||||||
|
}
|
||||||
|
return Parser.matchGroup1("ci=" + ID_PATTERN, uri.getQuery());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean onAcceptUrl(final String url) throws FoundAdException {
|
||||||
|
final String lowercaseUrl = url.toLowerCase();
|
||||||
|
if (lowercaseUrl.contains("youtube")
|
||||||
|
|| lowercaseUrl.contains("youtu.be")
|
||||||
|
|| lowercaseUrl.contains("hooktube")) {
|
||||||
|
// bad programming I know
|
||||||
|
try {
|
||||||
|
getId(url);
|
||||||
|
return true;
|
||||||
|
} catch (FoundAdException fe) {
|
||||||
|
throw fe;
|
||||||
|
} catch (ParsingException e) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getUrl(String id, List<String> contentFilter, String sortFilter) throws ParsingException {
|
||||||
|
return "https://www.youtube.com/watch?v=" + id;
|
||||||
|
}
|
||||||
|
}
|
@ -1,16 +1,18 @@
|
|||||||
package org.schabi.newpipe;
|
package org.schabi.newpipe;
|
||||||
|
|
||||||
import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
|
|
||||||
|
|
||||||
import javax.net.ssl.HttpsURLConnection;
|
|
||||||
import java.io.BufferedReader;
|
import java.io.BufferedReader;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStreamReader;
|
import java.io.InputStreamReader;
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
import java.net.UnknownHostException;
|
import java.net.UnknownHostException;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
import javax.net.ssl.HttpsURLConnection;
|
||||||
|
|
||||||
|
import org.schabi.newpipe.extractor.DownloadResponse;
|
||||||
|
import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Created by Christian Schabesberger on 28.01.16.
|
* Created by Christian Schabesberger on 28.01.16.
|
||||||
@ -62,11 +64,12 @@ public class Downloader implements org.schabi.newpipe.extractor.Downloader {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Download the text file at the supplied URL as in download(String),
|
* Download the text file at the supplied URL as in download(String), but set
|
||||||
* but set the HTTP header field "Accept-Language" to the supplied string.
|
* the HTTP header field "Accept-Language" to the supplied string.
|
||||||
*
|
*
|
||||||
* @param siteUrl the URL of the text file to return the contents of
|
* @param siteUrl the URL of the text file to return the contents of
|
||||||
* @param language the language (usually a 2-character code) to set as the preferred language
|
* @param language the language (usually a 2-character code) to set as the
|
||||||
|
* preferred language
|
||||||
* @return the contents of the specified text file
|
* @return the contents of the specified text file
|
||||||
*/
|
*/
|
||||||
public String download(String siteUrl, String language) throws IOException, ReCaptchaException {
|
public String download(String siteUrl, String language) throws IOException, ReCaptchaException {
|
||||||
@ -75,17 +78,17 @@ public class Downloader implements org.schabi.newpipe.extractor.Downloader {
|
|||||||
return download(siteUrl, requestProperties);
|
return download(siteUrl, requestProperties);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Download the text file at the supplied URL as in download(String),
|
* Download the text file at the supplied URL as in download(String), but set
|
||||||
* but set the HTTP header field "Accept-Language" to the supplied string.
|
* the HTTP header field "Accept-Language" to the supplied string.
|
||||||
*
|
*
|
||||||
* @param siteUrl the URL of the text file to return the contents of
|
* @param siteUrl the URL of the text file to return the contents of
|
||||||
* @param customProperties set request header properties
|
* @param customProperties set request header properties
|
||||||
* @return the contents of the specified text file
|
* @return the contents of the specified text file
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
*/
|
*/
|
||||||
public String download(String siteUrl, Map<String, String> customProperties) throws IOException, ReCaptchaException {
|
public String download(String siteUrl, Map<String, String> customProperties)
|
||||||
|
throws IOException, ReCaptchaException {
|
||||||
URL url = new URL(siteUrl);
|
URL url = new URL(siteUrl);
|
||||||
HttpsURLConnection con = (HttpsURLConnection) url.openConnection();
|
HttpsURLConnection con = (HttpsURLConnection) url.openConnection();
|
||||||
for (Map.Entry<String, String> pair : customProperties.entrySet()) {
|
for (Map.Entry<String, String> pair : customProperties.entrySet()) {
|
||||||
@ -95,7 +98,8 @@ public class Downloader implements org.schabi.newpipe.extractor.Downloader {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Common functionality between download(String url) and download(String url, String language)
|
* Common functionality between download(String url) and download(String url,
|
||||||
|
* String language)
|
||||||
*/
|
*/
|
||||||
private static String dl(HttpsURLConnection con) throws IOException, ReCaptchaException {
|
private static String dl(HttpsURLConnection con) throws IOException, ReCaptchaException {
|
||||||
StringBuilder response = new StringBuilder();
|
StringBuilder response = new StringBuilder();
|
||||||
@ -108,11 +112,10 @@ public class Downloader implements org.schabi.newpipe.extractor.Downloader {
|
|||||||
con.setRequestProperty("User-Agent", USER_AGENT);
|
con.setRequestProperty("User-Agent", USER_AGENT);
|
||||||
|
|
||||||
if (getCookies().length() > 0) {
|
if (getCookies().length() > 0) {
|
||||||
con.setRequestProperty("Cookie", getCookies());
|
con.addRequestProperty("Cookie", getCookies());
|
||||||
}
|
}
|
||||||
|
|
||||||
in = new BufferedReader(
|
in = new BufferedReader(new InputStreamReader(con.getInputStream()));
|
||||||
new InputStreamReader(con.getInputStream()));
|
|
||||||
String inputLine;
|
String inputLine;
|
||||||
|
|
||||||
while ((inputLine = in.readLine()) != null) {
|
while ((inputLine = in.readLine()) != null) {
|
||||||
@ -123,9 +126,8 @@ public class Downloader implements org.schabi.newpipe.extractor.Downloader {
|
|||||||
// Toast.makeText(getActivity(), uhe.getMessage(), Toast.LENGTH_LONG).show();
|
// Toast.makeText(getActivity(), uhe.getMessage(), Toast.LENGTH_LONG).show();
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
/*
|
/*
|
||||||
* HTTP 429 == Too Many Request
|
* HTTP 429 == Too Many Request Receive from Youtube.com = ReCaptcha challenge
|
||||||
* Receive from Youtube.com = ReCaptcha challenge request
|
* request See : https://github.com/rg3/youtube-dl/issues/5138
|
||||||
* See : https://github.com/rg3/youtube-dl/issues/5138
|
|
||||||
*/
|
*/
|
||||||
if (con.getResponseCode() == 429) {
|
if (con.getResponseCode() == 429) {
|
||||||
throw new ReCaptchaException("reCaptcha Challenge requested");
|
throw new ReCaptchaException("reCaptcha Challenge requested");
|
||||||
@ -142,8 +144,8 @@ public class Downloader implements org.schabi.newpipe.extractor.Downloader {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Download (via HTTP) the text file located at the supplied URL, and return its contents.
|
* Download (via HTTP) the text file located at the supplied URL, and return its
|
||||||
* Primarily intended for downloading web pages.
|
* contents. Primarily intended for downloading web pages.
|
||||||
*
|
*
|
||||||
* @param siteUrl the URL of the text file to download
|
* @param siteUrl the URL of the text file to download
|
||||||
* @return the contents of the specified text file
|
* @return the contents of the specified text file
|
||||||
@ -154,4 +156,24 @@ public class Downloader implements org.schabi.newpipe.extractor.Downloader {
|
|||||||
// HttpsURLConnection con = NetCipher.getHttpsURLConnection(url);
|
// HttpsURLConnection con = NetCipher.getHttpsURLConnection(url);
|
||||||
return dl(con);
|
return dl(con);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DownloadResponse downloadWithHeaders(String siteUrl, Map<String, List<String>> requestHeaders)
|
||||||
|
throws IOException, ReCaptchaException {
|
||||||
|
URL url = new URL(siteUrl);
|
||||||
|
HttpsURLConnection con = (HttpsURLConnection) url.openConnection();
|
||||||
|
for (Map.Entry<String, List<String>> pair : requestHeaders.entrySet()) {
|
||||||
|
pair.getValue().stream().forEach(value -> con.addRequestProperty(pair.getKey(), value));
|
||||||
|
}
|
||||||
|
String responseBody = dl(con);
|
||||||
|
return new DownloadResponse(responseBody, con.getHeaderFields());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DownloadResponse downloadWithHeaders(String siteUrl) throws IOException, ReCaptchaException {
|
||||||
|
URL url = new URL(siteUrl);
|
||||||
|
HttpsURLConnection con = (HttpsURLConnection) url.openConnection();
|
||||||
|
String responseBody = dl(con);
|
||||||
|
return new DownloadResponse(responseBody, con.getHeaderFields());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,46 @@
|
|||||||
|
package org.schabi.newpipe.extractor.services.youtube;
|
||||||
|
|
||||||
|
import static org.junit.Assert.assertTrue;
|
||||||
|
import static org.schabi.newpipe.extractor.ServiceList.YouTube;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.junit.BeforeClass;
|
||||||
|
import org.junit.Test;
|
||||||
|
import org.schabi.newpipe.Downloader;
|
||||||
|
import org.schabi.newpipe.extractor.ListExtractor.InfoItemsPage;
|
||||||
|
import org.schabi.newpipe.extractor.NewPipe;
|
||||||
|
import org.schabi.newpipe.extractor.comments.CommentsInfoItem;
|
||||||
|
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
|
||||||
|
import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeCommentsExtractor;
|
||||||
|
|
||||||
|
public class YoutubeCommentsExtractorTest {
|
||||||
|
|
||||||
|
private static YoutubeCommentsExtractor extractor;
|
||||||
|
|
||||||
|
@BeforeClass
|
||||||
|
public static void setUp() throws Exception {
|
||||||
|
NewPipe.init(Downloader.getInstance());
|
||||||
|
extractor = (YoutubeCommentsExtractor) YouTube
|
||||||
|
.getCommentsExtractor("https://www.youtube.com/watch?v=rrgFN3AxGfs");
|
||||||
|
extractor.fetchPage();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGetComments() throws IOException, ExtractionException {
|
||||||
|
boolean result = false;
|
||||||
|
InfoItemsPage<CommentsInfoItem> comments = extractor.getInitialPage();
|
||||||
|
result = findInComments(comments, "i should really be in the top comment.lol");
|
||||||
|
|
||||||
|
while (comments.hasNextPage()) {
|
||||||
|
comments = extractor.getPage(comments.getNextPageUrl());
|
||||||
|
result = findInComments(comments, "i should really be in the top comment.lol");
|
||||||
|
}
|
||||||
|
|
||||||
|
assertTrue(result);
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean findInComments(InfoItemsPage<CommentsInfoItem> comments, String comment) {
|
||||||
|
return comments.getItems().stream().filter(c -> c.getCommentText().contains(comment)).findAny().isPresent();
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user