Merge pull request #237 from B0pol/fix_invidious_comments

Fix invidious comments
This commit is contained in:
Tobias Groza 2020-01-17 22:55:51 +01:00 committed by GitHub
commit 2ee558fbe7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 43 additions and 88 deletions

View File

@ -1,18 +1,14 @@
package org.schabi.newpipe.extractor.services.youtube.linkHandler; package org.schabi.newpipe.extractor.services.youtube.linkHandler;
import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.util.List;
import org.schabi.newpipe.extractor.exceptions.FoundAdException; import org.schabi.newpipe.extractor.exceptions.FoundAdException;
import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandlerFactory; import org.schabi.newpipe.extractor.linkhandler.ListLinkHandlerFactory;
import org.schabi.newpipe.extractor.utils.Parser;
import java.util.List;
public class YoutubeCommentsLinkHandlerFactory extends ListLinkHandlerFactory { public class YoutubeCommentsLinkHandlerFactory extends ListLinkHandlerFactory {
private static final YoutubeCommentsLinkHandlerFactory instance = new YoutubeCommentsLinkHandlerFactory(); private static final YoutubeCommentsLinkHandlerFactory instance = new YoutubeCommentsLinkHandlerFactory();
private static final String ID_PATTERN = "([\\-a-zA-Z0-9_]{11})";
public static YoutubeCommentsLinkHandlerFactory getInstance() { public static YoutubeCommentsLinkHandlerFactory getInstance() {
return instance; return instance;
@ -24,78 +20,18 @@ public class YoutubeCommentsLinkHandlerFactory extends ListLinkHandlerFactory {
} }
@Override @Override
public String getId(String url) throws ParsingException, IllegalArgumentException { public String getId(String urlString) throws ParsingException, IllegalArgumentException {
if (url.isEmpty()) { return YoutubeStreamLinkHandlerFactory.getInstance().getId(urlString); //we need the same id, avoids duplicate code
throw new IllegalArgumentException("The url parameter should not be empty");
}
String id;
String lowercaseUrl = url.toLowerCase();
if (lowercaseUrl.contains("youtube")) {
if (url.contains("attribution_link")) {
try {
String escapedQuery = Parser.matchGroup1("u=(.[^&|$]*)", url);
String query = URLDecoder.decode(escapedQuery, "UTF-8");
id = Parser.matchGroup1("v=" + ID_PATTERN, query);
} catch (UnsupportedEncodingException uee) {
throw new ParsingException("Could not parse attribution_link", uee);
}
} else if (url.contains("vnd.youtube")) {
id = Parser.matchGroup1(ID_PATTERN, url);
} else if (url.contains("embed")) {
id = Parser.matchGroup1("embed/" + ID_PATTERN, url);
} else if (url.contains("googleads")) {
throw new FoundAdException("Error found add: " + url);
} else {
id = Parser.matchGroup1("[?&]v=" + ID_PATTERN, url);
}
} else if (lowercaseUrl.contains("youtu.be")) {
if (url.contains("v=")) {
id = Parser.matchGroup1("v=" + ID_PATTERN, url);
} else {
id = Parser.matchGroup1("[Yy][Oo][Uu][Tt][Uu]\\.[Bb][Ee]/" + ID_PATTERN, url);
}
} else if(lowercaseUrl.contains("hooktube")) {
if(lowercaseUrl.contains("&v=")
|| lowercaseUrl.contains("?v=")) {
id = Parser.matchGroup1("[?&]v=" + ID_PATTERN, url);
} else if (url.contains("/embed/")) {
id = Parser.matchGroup1("embed/" + ID_PATTERN, url);
} else if (url.contains("/v/")) {
id = Parser.matchGroup1("v/" + ID_PATTERN, url);
} else if (url.contains("/watch/")) {
id = Parser.matchGroup1("watch/" + ID_PATTERN, url);
} else {
throw new ParsingException("Error no suitable url: " + url);
}
} else {
throw new ParsingException("Error no suitable url: " + url);
}
if (!id.isEmpty()) {
return id;
} else {
throw new ParsingException("Error could not parse url: " + url);
}
} }
@Override @Override
public boolean onAcceptUrl(final String url) throws FoundAdException { public boolean onAcceptUrl(final String url) throws FoundAdException {
final String lowercaseUrl = url.toLowerCase(); try {
if (lowercaseUrl.contains("youtube") getId(url);
|| lowercaseUrl.contains("youtu.be") return true;
|| lowercaseUrl.contains("hooktube")) { } catch (FoundAdException fe) {
// bad programming I know throw fe;
try { } catch (ParsingException e) {
getId(url);
return true;
} catch (FoundAdException fe) {
throw fe;
} catch (ParsingException e) {
return false;
}
} else {
return false; return false;
} }
} }

View File

@ -4,7 +4,6 @@ import org.jsoup.helper.StringUtil;
import org.junit.BeforeClass; import org.junit.BeforeClass;
import org.junit.Test; import org.junit.Test;
import org.schabi.newpipe.DownloaderTestImpl; import org.schabi.newpipe.DownloaderTestImpl;
import org.schabi.newpipe.DownloaderTestImpl;
import org.schabi.newpipe.extractor.ListExtractor.InfoItemsPage; import org.schabi.newpipe.extractor.ListExtractor.InfoItemsPage;
import org.schabi.newpipe.extractor.NewPipe; import org.schabi.newpipe.extractor.NewPipe;
import org.schabi.newpipe.extractor.comments.CommentsInfo; import org.schabi.newpipe.extractor.comments.CommentsInfo;
@ -21,17 +20,32 @@ import static org.schabi.newpipe.extractor.ServiceList.YouTube;
public class YoutubeCommentsExtractorTest { public class YoutubeCommentsExtractorTest {
private static YoutubeCommentsExtractor extractor; private static final String urlYT = "https://www.youtube.com/watch?v=D00Au7k3i6o";
private static final String urlInvidious = "https://invidio.us/watch?v=D00Au7k3i6o";
private static final String urlInvidioush = "https://invidiou.sh/watch?v=D00Au7k3i6o";
private static YoutubeCommentsExtractor extractorYT;
private static YoutubeCommentsExtractor extractorInvidious;
private static YoutubeCommentsExtractor extractorInvidioush;
@BeforeClass @BeforeClass
public static void setUp() throws Exception { public static void setUp() throws Exception {
NewPipe.init(DownloaderTestImpl.getInstance()); NewPipe.init(DownloaderTestImpl.getInstance());
extractor = (YoutubeCommentsExtractor) YouTube extractorYT = (YoutubeCommentsExtractor) YouTube
.getCommentsExtractor("https://www.youtube.com/watch?v=D00Au7k3i6o"); .getCommentsExtractor(urlYT);
extractorInvidious = (YoutubeCommentsExtractor) YouTube
.getCommentsExtractor(urlInvidious);
extractorInvidioush = (YoutubeCommentsExtractor) YouTube
.getCommentsExtractor(urlInvidioush);
} }
@Test @Test
public void testGetComments() throws IOException, ExtractionException { public void testGetComments() throws IOException, ExtractionException {
assertTrue(getCommentsHelper(extractorYT));
assertTrue(getCommentsHelper(extractorInvidious));
assertTrue(getCommentsHelper(extractorInvidioush));
}
private boolean getCommentsHelper(YoutubeCommentsExtractor extractor) throws IOException, ExtractionException {
boolean result; boolean result;
InfoItemsPage<CommentsInfoItem> comments = extractor.getInitialPage(); InfoItemsPage<CommentsInfoItem> comments = extractor.getInitialPage();
result = findInComments(comments, "s1ck m3m3"); result = findInComments(comments, "s1ck m3m3");
@ -41,14 +55,20 @@ public class YoutubeCommentsExtractorTest {
result = findInComments(comments, "s1ck m3m3"); result = findInComments(comments, "s1ck m3m3");
} }
assertTrue(result); return result;
} }
@Test @Test
public void testGetCommentsFromCommentsInfo() throws IOException, ExtractionException { public void testGetCommentsFromCommentsInfo() throws IOException, ExtractionException {
assertTrue(getCommentsFromCommentsInfoHelper(urlYT));
assertTrue(getCommentsFromCommentsInfoHelper(urlInvidious));
assertTrue(getCommentsFromCommentsInfoHelper(urlInvidioush));
}
private boolean getCommentsFromCommentsInfoHelper(String url) throws IOException, ExtractionException {
boolean result = false; boolean result = false;
CommentsInfo commentsInfo = CommentsInfo.getInfo("https://www.youtube.com/watch?v=D00Au7k3i6o"); CommentsInfo commentsInfo = CommentsInfo.getInfo(url);
assertTrue("what the fuck am i doing with my life".equals(commentsInfo.getName())); assertEquals("what the fuck am i doing with my life", commentsInfo.getName());
result = findInComments(commentsInfo.getRelatedItems(), "s1ck m3m3"); result = findInComments(commentsInfo.getRelatedItems(), "s1ck m3m3");
String nextPage = commentsInfo.getNextPageUrl(); String nextPage = commentsInfo.getNextPageUrl();
@ -57,16 +77,15 @@ public class YoutubeCommentsExtractorTest {
result = findInComments(moreItems.getItems(), "s1ck m3m3"); result = findInComments(moreItems.getItems(), "s1ck m3m3");
nextPage = moreItems.getNextPageUrl(); nextPage = moreItems.getNextPageUrl();
} }
return result;
assertTrue(result);
} }
@Test @Test
public void testGetCommentsAllData() throws IOException, ExtractionException { public void testGetCommentsAllData() throws IOException, ExtractionException {
InfoItemsPage<CommentsInfoItem> comments = extractor.getInitialPage(); InfoItemsPage<CommentsInfoItem> comments = extractorYT.getInitialPage();
DefaultTests.defaultTestListOfItems(YouTube.getServiceId(), comments.getItems(), comments.getErrors()); DefaultTests.defaultTestListOfItems(YouTube.getServiceId(), comments.getItems(), comments.getErrors());
for(CommentsInfoItem c: comments.getItems()) { for (CommentsInfoItem c : comments.getItems()) {
assertFalse(StringUtil.isBlank(c.getAuthorEndpoint())); assertFalse(StringUtil.isBlank(c.getAuthorEndpoint()));
assertFalse(StringUtil.isBlank(c.getAuthorName())); assertFalse(StringUtil.isBlank(c.getAuthorName()));
assertFalse(StringUtil.isBlank(c.getAuthorThumbnail())); assertFalse(StringUtil.isBlank(c.getAuthorThumbnail()));
@ -86,8 +105,8 @@ public class YoutubeCommentsExtractorTest {
} }
private boolean findInComments(List<CommentsInfoItem> comments, String comment) { private boolean findInComments(List<CommentsInfoItem> comments, String comment) {
for(CommentsInfoItem c: comments) { for (CommentsInfoItem c : comments) {
if(c.getCommentText().contains(comment)) { if (c.getCommentText().contains(comment)) {
return true; return true;
} }
} }