mirror of
https://github.com/TeamNewPipe/NewPipeExtractor.git
synced 2024-12-14 22:30:33 +05:30
Merge branch 'dev'
This commit is contained in:
commit
c1e1ac1f57
@ -31,6 +31,7 @@ The following sites are currently supported:
|
||||
- YouTube
|
||||
- SoundCloud
|
||||
- MediaCCC
|
||||
- PeerTube (no P2P)
|
||||
|
||||
## License
|
||||
|
||||
|
@ -48,6 +48,7 @@ public class MediaCCCSearchExtractor extends SearchExtractor {
|
||||
@Override
|
||||
public InfoItemsPage<InfoItem> getInitialPage() throws IOException, ExtractionException {
|
||||
InfoItemsSearchCollector searchItems = getInfoItemSearchCollector();
|
||||
searchItems.reset();
|
||||
|
||||
if(getLinkHandler().getContentFilters().contains(CONFERENCES)
|
||||
|| getLinkHandler().getContentFilters().contains(ALL)
|
||||
|
@ -31,7 +31,7 @@ import static org.schabi.newpipe.extractor.ServiceList.SoundCloud;
|
||||
import static org.schabi.newpipe.extractor.utils.Utils.replaceHttpWithHttps;
|
||||
|
||||
public class SoundcloudParsingHelper {
|
||||
private static final String HARDCODED_CLIENT_ID = "bkcJLoXNaiFlsLaKBQXOxO5FhW0NJVnu"; // Updated on 29/11/19
|
||||
private static final String HARDCODED_CLIENT_ID = "r5ELVSy3RkcjX7ilaL7n2v1Z8irA9SL8"; // Updated on 31/12/19
|
||||
private static String clientId;
|
||||
|
||||
private SoundcloudParsingHelper() {
|
||||
|
@ -76,6 +76,7 @@ public class SoundcloudSearchExtractor extends SearchExtractor {
|
||||
|
||||
private InfoItemsCollector<InfoItem, InfoItemExtractor> collectItems(JsonArray searchCollection) {
|
||||
final InfoItemsSearchCollector collector = getInfoItemSearchCollector();
|
||||
collector.reset();
|
||||
|
||||
for (Object result : searchCollection) {
|
||||
if (!(result instanceof JsonObject)) continue;
|
||||
|
@ -106,6 +106,7 @@ public class YoutubeSearchExtractor extends SearchExtractor {
|
||||
|
||||
private InfoItemsSearchCollector collectItems(Document doc) throws NothingFoundException {
|
||||
InfoItemsSearchCollector collector = getInfoItemSearchCollector();
|
||||
collector.reset();
|
||||
|
||||
Element list = doc.select("ol[class=\"item-section\"]").first();
|
||||
final TimeAgoParser timeAgoParser = getTimeAgoParser();
|
||||
|
@ -662,7 +662,7 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
||||
}
|
||||
}
|
||||
|
||||
return errorReason != null ? errorReason.toString() : null;
|
||||
return errorReason != null ? errorReason.toString() : "";
|
||||
}
|
||||
|
||||
/*//////////////////////////////////////////////////////////////////////////
|
||||
@ -697,7 +697,7 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
||||
|
||||
final String playerUrl;
|
||||
// Check if the video is age restricted
|
||||
if (!doc.select("meta[property=\"og:restrictions:age\"").isEmpty()) {
|
||||
if (!doc.select("meta[property=\"og:restrictions:age\"]").isEmpty()) {
|
||||
final EmbeddedInfo info = getEmbeddedInfo();
|
||||
final String videoInfoUrl = getVideoInfoUrl(getId(), info.sts);
|
||||
final String infoPageResponse = downloader.get(videoInfoUrl, getExtractorLocalization()).responseBody();
|
||||
|
@ -51,7 +51,7 @@ public class YoutubeChannelLinkHandlerFactory extends ListLinkHandlerFactory {
|
||||
throw new ParsingException("the URL given is not a Youtube-URL");
|
||||
}
|
||||
|
||||
if (!path.startsWith("/user/") && !path.startsWith("/channel/")) {
|
||||
if (!path.startsWith("/user/") && !path.startsWith("/channel/") && !path.startsWith("/c/")) {
|
||||
throw new ParsingException("the URL given is neither a channel nor an user");
|
||||
}
|
||||
|
||||
|
@ -1,18 +1,14 @@
|
||||
package org.schabi.newpipe.extractor.services.youtube.linkHandler;
|
||||
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.net.URLDecoder;
|
||||
import java.util.List;
|
||||
|
||||
import org.schabi.newpipe.extractor.exceptions.FoundAdException;
|
||||
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandlerFactory;
|
||||
import org.schabi.newpipe.extractor.utils.Parser;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
public class YoutubeCommentsLinkHandlerFactory extends ListLinkHandlerFactory {
|
||||
|
||||
private static final YoutubeCommentsLinkHandlerFactory instance = new YoutubeCommentsLinkHandlerFactory();
|
||||
private static final String ID_PATTERN = "([\\-a-zA-Z0-9_]{11})";
|
||||
|
||||
public static YoutubeCommentsLinkHandlerFactory getInstance() {
|
||||
return instance;
|
||||
@ -24,78 +20,18 @@ public class YoutubeCommentsLinkHandlerFactory extends ListLinkHandlerFactory {
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getId(String url) throws ParsingException, IllegalArgumentException {
|
||||
if (url.isEmpty()) {
|
||||
throw new IllegalArgumentException("The url parameter should not be empty");
|
||||
}
|
||||
|
||||
String id;
|
||||
String lowercaseUrl = url.toLowerCase();
|
||||
if (lowercaseUrl.contains("youtube")) {
|
||||
if (url.contains("attribution_link")) {
|
||||
try {
|
||||
String escapedQuery = Parser.matchGroup1("u=(.[^&|$]*)", url);
|
||||
String query = URLDecoder.decode(escapedQuery, "UTF-8");
|
||||
id = Parser.matchGroup1("v=" + ID_PATTERN, query);
|
||||
} catch (UnsupportedEncodingException uee) {
|
||||
throw new ParsingException("Could not parse attribution_link", uee);
|
||||
}
|
||||
} else if (url.contains("vnd.youtube")) {
|
||||
id = Parser.matchGroup1(ID_PATTERN, url);
|
||||
} else if (url.contains("embed")) {
|
||||
id = Parser.matchGroup1("embed/" + ID_PATTERN, url);
|
||||
} else if (url.contains("googleads")) {
|
||||
throw new FoundAdException("Error found add: " + url);
|
||||
} else {
|
||||
id = Parser.matchGroup1("[?&]v=" + ID_PATTERN, url);
|
||||
}
|
||||
} else if (lowercaseUrl.contains("youtu.be")) {
|
||||
if (url.contains("v=")) {
|
||||
id = Parser.matchGroup1("v=" + ID_PATTERN, url);
|
||||
} else {
|
||||
id = Parser.matchGroup1("[Yy][Oo][Uu][Tt][Uu]\\.[Bb][Ee]/" + ID_PATTERN, url);
|
||||
}
|
||||
} else if(lowercaseUrl.contains("hooktube")) {
|
||||
if(lowercaseUrl.contains("&v=")
|
||||
|| lowercaseUrl.contains("?v=")) {
|
||||
id = Parser.matchGroup1("[?&]v=" + ID_PATTERN, url);
|
||||
} else if (url.contains("/embed/")) {
|
||||
id = Parser.matchGroup1("embed/" + ID_PATTERN, url);
|
||||
} else if (url.contains("/v/")) {
|
||||
id = Parser.matchGroup1("v/" + ID_PATTERN, url);
|
||||
} else if (url.contains("/watch/")) {
|
||||
id = Parser.matchGroup1("watch/" + ID_PATTERN, url);
|
||||
} else {
|
||||
throw new ParsingException("Error no suitable url: " + url);
|
||||
}
|
||||
} else {
|
||||
throw new ParsingException("Error no suitable url: " + url);
|
||||
}
|
||||
|
||||
|
||||
if (!id.isEmpty()) {
|
||||
return id;
|
||||
} else {
|
||||
throw new ParsingException("Error could not parse url: " + url);
|
||||
}
|
||||
public String getId(String urlString) throws ParsingException, IllegalArgumentException {
|
||||
return YoutubeStreamLinkHandlerFactory.getInstance().getId(urlString); //we need the same id, avoids duplicate code
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean onAcceptUrl(final String url) throws FoundAdException {
|
||||
final String lowercaseUrl = url.toLowerCase();
|
||||
if (lowercaseUrl.contains("youtube")
|
||||
|| lowercaseUrl.contains("youtu.be")
|
||||
|| lowercaseUrl.contains("hooktube")) {
|
||||
// bad programming I know
|
||||
try {
|
||||
getId(url);
|
||||
return true;
|
||||
} catch (FoundAdException fe) {
|
||||
throw fe;
|
||||
} catch (ParsingException e) {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
try {
|
||||
getId(url);
|
||||
return true;
|
||||
} catch (FoundAdException fe) {
|
||||
throw fe;
|
||||
} catch (ParsingException e) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
@ -24,7 +24,7 @@ public class PeertubeChannelLinkHandlerFactoryTest {
|
||||
}
|
||||
|
||||
@Test
|
||||
public void acceptrUrlTest() throws ParsingException {
|
||||
public void acceptUrlTest() throws ParsingException {
|
||||
assertTrue(linkHandler.acceptUrl("https://peertube.mastodon.host/accounts/kranti@videos.squat.net"));
|
||||
}
|
||||
|
||||
|
@ -24,7 +24,7 @@ public class PeertubeCommentsLinkHandlerFactoryTest {
|
||||
}
|
||||
|
||||
@Test
|
||||
public void acceptrUrlTest() throws ParsingException {
|
||||
public void acceptUrlTest() throws ParsingException {
|
||||
assertTrue(linkHandler.acceptUrl("https://peertube.mastodon.host/api/v1/videos/19319/comment-threads?start=0&count=10&sort=-createdAt"));
|
||||
}
|
||||
|
||||
|
@ -24,7 +24,7 @@ public class PeertubePlaylistLinkHandlerFactoryTest {
|
||||
}
|
||||
|
||||
@Test
|
||||
public void acceptrUrlTest() throws ParsingException {
|
||||
public void acceptUrlTest() throws ParsingException {
|
||||
assertTrue(linkHandler.acceptUrl("https://peertube.mastodon.host/video-channels/b45e84fb-c47f-475b-94f2-718126154d33/videos"));
|
||||
}
|
||||
|
||||
|
@ -24,10 +24,12 @@ public class YoutubeChannelLinkHandlerFactoryTest {
|
||||
}
|
||||
|
||||
@Test
|
||||
public void acceptrUrlTest() throws ParsingException {
|
||||
public void acceptUrlTest() throws ParsingException {
|
||||
assertTrue(linkHandler.acceptUrl("https://www.youtube.com/user/Gronkh"));
|
||||
assertTrue(linkHandler.acceptUrl("https://www.youtube.com/user/Netzkino/videos"));
|
||||
|
||||
assertTrue(linkHandler.acceptUrl("https://www.youtube.com/c/creatoracademy"));
|
||||
|
||||
assertTrue(linkHandler.acceptUrl("https://www.youtube.com/channel/UClq42foiSgl7sSpLupnugGA"));
|
||||
assertTrue(linkHandler.acceptUrl("https://www.youtube.com/channel/UClq42foiSgl7sSpLupnugGA/videos?disable_polymer=1"));
|
||||
|
||||
@ -64,5 +66,8 @@ public class YoutubeChannelLinkHandlerFactoryTest {
|
||||
|
||||
assertEquals("channel/UClq42foiSgl7sSpLupnugGA", linkHandler.fromUrl("https://invidio.us/channel/UClq42foiSgl7sSpLupnugGA").getId());
|
||||
assertEquals("channel/UClq42foiSgl7sSpLupnugGA", linkHandler.fromUrl("https://invidio.us/channel/UClq42foiSgl7sSpLupnugGA/videos?disable_polymer=1").getId());
|
||||
|
||||
assertEquals("c/creatoracademy", linkHandler.fromUrl("https://www.youtube.com/c/creatoracademy").getId());
|
||||
assertEquals("c/YouTubeCreators", linkHandler.fromUrl("https://www.youtube.com/c/YouTubeCreators").getId());
|
||||
}
|
||||
}
|
||||
|
@ -4,7 +4,6 @@ import org.jsoup.helper.StringUtil;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
import org.schabi.newpipe.DownloaderTestImpl;
|
||||
import org.schabi.newpipe.DownloaderTestImpl;
|
||||
import org.schabi.newpipe.extractor.ListExtractor.InfoItemsPage;
|
||||
import org.schabi.newpipe.extractor.NewPipe;
|
||||
import org.schabi.newpipe.extractor.comments.CommentsInfo;
|
||||
@ -21,17 +20,32 @@ import static org.schabi.newpipe.extractor.ServiceList.YouTube;
|
||||
|
||||
public class YoutubeCommentsExtractorTest {
|
||||
|
||||
private static YoutubeCommentsExtractor extractor;
|
||||
private static final String urlYT = "https://www.youtube.com/watch?v=D00Au7k3i6o";
|
||||
private static final String urlInvidious = "https://invidio.us/watch?v=D00Au7k3i6o";
|
||||
private static final String urlInvidioush = "https://invidiou.sh/watch?v=D00Au7k3i6o";
|
||||
private static YoutubeCommentsExtractor extractorYT;
|
||||
private static YoutubeCommentsExtractor extractorInvidious;
|
||||
private static YoutubeCommentsExtractor extractorInvidioush;
|
||||
|
||||
@BeforeClass
|
||||
public static void setUp() throws Exception {
|
||||
NewPipe.init(DownloaderTestImpl.getInstance());
|
||||
extractor = (YoutubeCommentsExtractor) YouTube
|
||||
.getCommentsExtractor("https://www.youtube.com/watch?v=D00Au7k3i6o");
|
||||
extractorYT = (YoutubeCommentsExtractor) YouTube
|
||||
.getCommentsExtractor(urlYT);
|
||||
extractorInvidious = (YoutubeCommentsExtractor) YouTube
|
||||
.getCommentsExtractor(urlInvidious);
|
||||
extractorInvidioush = (YoutubeCommentsExtractor) YouTube
|
||||
.getCommentsExtractor(urlInvidioush);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetComments() throws IOException, ExtractionException {
|
||||
assertTrue(getCommentsHelper(extractorYT));
|
||||
assertTrue(getCommentsHelper(extractorInvidious));
|
||||
assertTrue(getCommentsHelper(extractorInvidioush));
|
||||
}
|
||||
|
||||
private boolean getCommentsHelper(YoutubeCommentsExtractor extractor) throws IOException, ExtractionException {
|
||||
boolean result;
|
||||
InfoItemsPage<CommentsInfoItem> comments = extractor.getInitialPage();
|
||||
result = findInComments(comments, "s1ck m3m3");
|
||||
@ -41,14 +55,20 @@ public class YoutubeCommentsExtractorTest {
|
||||
result = findInComments(comments, "s1ck m3m3");
|
||||
}
|
||||
|
||||
assertTrue(result);
|
||||
return result;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetCommentsFromCommentsInfo() throws IOException, ExtractionException {
|
||||
assertTrue(getCommentsFromCommentsInfoHelper(urlYT));
|
||||
assertTrue(getCommentsFromCommentsInfoHelper(urlInvidious));
|
||||
assertTrue(getCommentsFromCommentsInfoHelper(urlInvidioush));
|
||||
}
|
||||
|
||||
private boolean getCommentsFromCommentsInfoHelper(String url) throws IOException, ExtractionException {
|
||||
boolean result = false;
|
||||
CommentsInfo commentsInfo = CommentsInfo.getInfo("https://www.youtube.com/watch?v=D00Au7k3i6o");
|
||||
assertTrue("what the fuck am i doing with my life".equals(commentsInfo.getName()));
|
||||
CommentsInfo commentsInfo = CommentsInfo.getInfo(url);
|
||||
assertEquals("what the fuck am i doing with my life", commentsInfo.getName());
|
||||
result = findInComments(commentsInfo.getRelatedItems(), "s1ck m3m3");
|
||||
|
||||
String nextPage = commentsInfo.getNextPageUrl();
|
||||
@ -57,16 +77,15 @@ public class YoutubeCommentsExtractorTest {
|
||||
result = findInComments(moreItems.getItems(), "s1ck m3m3");
|
||||
nextPage = moreItems.getNextPageUrl();
|
||||
}
|
||||
|
||||
assertTrue(result);
|
||||
return result;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetCommentsAllData() throws IOException, ExtractionException {
|
||||
InfoItemsPage<CommentsInfoItem> comments = extractor.getInitialPage();
|
||||
InfoItemsPage<CommentsInfoItem> comments = extractorYT.getInitialPage();
|
||||
|
||||
DefaultTests.defaultTestListOfItems(YouTube.getServiceId(), comments.getItems(), comments.getErrors());
|
||||
for(CommentsInfoItem c: comments.getItems()) {
|
||||
for (CommentsInfoItem c : comments.getItems()) {
|
||||
assertFalse(StringUtil.isBlank(c.getAuthorEndpoint()));
|
||||
assertFalse(StringUtil.isBlank(c.getAuthorName()));
|
||||
assertFalse(StringUtil.isBlank(c.getAuthorThumbnail()));
|
||||
@ -86,8 +105,8 @@ public class YoutubeCommentsExtractorTest {
|
||||
}
|
||||
|
||||
private boolean findInComments(List<CommentsInfoItem> comments, String comment) {
|
||||
for(CommentsInfoItem c: comments) {
|
||||
if(c.getCommentText().contains(comment)) {
|
||||
for (CommentsInfoItem c : comments) {
|
||||
if (c.getCommentText().contains(comment)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,71 @@
|
||||
package org.schabi.newpipe.extractor.services.youtube.search;
|
||||
|
||||
import static java.util.Collections.singletonList;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertNotNull;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
import static org.schabi.newpipe.extractor.ServiceList.YouTube;
|
||||
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
import org.schabi.newpipe.DownloaderTestImpl;
|
||||
import org.schabi.newpipe.extractor.InfoItem;
|
||||
import org.schabi.newpipe.extractor.ListExtractor;
|
||||
import org.schabi.newpipe.extractor.NewPipe;
|
||||
import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeSearchExtractor;
|
||||
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeSearchQueryHandlerFactory;
|
||||
|
||||
public class YoutubeSearchPagingTest {
|
||||
private static ListExtractor.InfoItemsPage<InfoItem> page1;
|
||||
private static ListExtractor.InfoItemsPage<InfoItem> page2;
|
||||
private static Set<String> urlList1;
|
||||
private static Set<String> urlList2;
|
||||
private static int page1Size;
|
||||
private static int page2Size;
|
||||
|
||||
@BeforeClass
|
||||
public static void setUpClass() throws Exception {
|
||||
NewPipe.init(DownloaderTestImpl.getInstance());
|
||||
|
||||
YoutubeSearchExtractor extractor = (YoutubeSearchExtractor) YouTube.getSearchExtractor("cirque du soleil",
|
||||
singletonList(YoutubeSearchQueryHandlerFactory.VIDEOS), null);
|
||||
|
||||
extractor.fetchPage();
|
||||
page1 = extractor.getInitialPage();
|
||||
urlList1 = extractUrls(page1.getItems());
|
||||
assertTrue("failed to load search result page one: too few items", 15 < page1.getItems().size());
|
||||
page1Size = page1.getItems().size();
|
||||
assertEquals("duplicated items in search result on page one", page1Size, urlList1.size());
|
||||
|
||||
assertTrue("search result has no second page", page1.hasNextPage());
|
||||
assertNotNull("next page url is null", page1.getNextPageUrl());
|
||||
page2 = extractor.getPage(page1.getNextPageUrl());
|
||||
urlList2 = extractUrls(page2.getItems());
|
||||
page2Size = page2.getItems().size();
|
||||
}
|
||||
|
||||
private static Set<String> extractUrls(List<InfoItem> list) {
|
||||
Set<String> result = new HashSet<>();
|
||||
for (InfoItem item : list) {
|
||||
result.add(item.getUrl());
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void secondPageUniqueVideos() {
|
||||
assertEquals("Second search result page has duplicated items", page2Size, urlList2.size());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void noRepeatingVideosInPages() {
|
||||
Set<String> intersection = new HashSet<>(urlList2);
|
||||
intersection.retainAll(urlList1);
|
||||
assertEquals("Found the same item on first AND second search page", 0, intersection.size());
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in New Issue
Block a user