Merge pull request #382 from TobiGr/fix/empty_comment

[YouTube] Fix crash on empty comment
2025-04-29 08:20:34 +05:30 · 2020-08-12 14:18:16 +02:00 · 2020-08-12 14:18:16 +02:00 · de8edbe7a5
commit de8edbe7a5
parent 5ac80624a4 0fb73301e3
3 changed files with 139 additions and 84 deletions
--- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java
+++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java
@ -47,13 +47,13 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
    @Override
    public InfoItemsPage<CommentsInfoItem> getInitialPage() throws IOException, ExtractionException {
-        String commentsTokenInside = findValue(responseBody, "commentSectionRenderer", "}");
+        final String commentsTokenInside = findValue(responseBody, "commentSectionRenderer", "}");
-        String commentsToken = findValue(commentsTokenInside, "continuation\":\"", "\"");
+        final String commentsToken = findValue(commentsTokenInside, "continuation\":\"", "\"");
        return getPage(getNextPage(commentsToken));
    }
    private Page getNextPage(JsonObject ajaxJson) throws ParsingException {
-        JsonArray arr;
+        final JsonArray arr;
        try {
            arr = JsonUtils.getArray(ajaxJson, "response.continuationContents.commentSectionContinuation.continuations");
        } catch (Exception e) {
@ -89,14 +89,14 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
            throw new IllegalArgumentException("Page doesn't contain an URL");
        }
-        String ajaxResponse = makeAjaxRequest(page.getUrl());
+        final String ajaxResponse = makeAjaxRequest(page.getUrl());
-        JsonObject ajaxJson;
+        final JsonObject ajaxJson;
        try {
            ajaxJson = JsonParser.array().from(ajaxResponse).getObject(1);
        } catch (Exception e) {
            throw new ParsingException("Could not parse json data for comments", e);
        }
-        CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(getServiceId());
+        final CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(getServiceId());
        collectCommentsFrom(collector, ajaxJson);
        return new InfoItemsPage<>(collector, getNextPage(ajaxJson));
    }
@ -160,8 +160,8 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
    }
    private String findValue(String doc, String start, String end) {
-        int beginIndex = doc.indexOf(start) + start.length();
+        final int beginIndex = doc.indexOf(start) + start.length();
-        int endIndex = doc.indexOf(end, beginIndex);
+        final int endIndex = doc.indexOf(end, beginIndex);
        return doc.substring(beginIndex, endIndex);
    }
 }
--- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsInfoItemExtractor.java
+++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsInfoItemExtractor.java
@ -34,7 +34,7 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
    @Override
    public String getThumbnailUrl() throws ParsingException {
        try {
-            JsonArray arr = JsonUtils.getArray(json, "authorThumbnail.thumbnails");
+            final JsonArray arr = JsonUtils.getArray(json, "authorThumbnail.thumbnails");
            return JsonUtils.getString(arr.getObject(2), "url");
        } catch (Exception e) {
            throw new ParsingException("Could not get thumbnail url", e);
@ -82,7 +82,13 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
    @Override
    public String getCommentText() throws ParsingException {
        try {
-            String commentText = getTextFromObject(JsonUtils.getObject(json, "contentText"));
+            final JsonObject contentText = JsonUtils.getObject(json, "contentText");
            if (contentText.isEmpty()) {
                // completely empty comments as described in
                // https://github.com/TeamNewPipe/NewPipeExtractor/issues/380#issuecomment-668808584
                return "";
            }
            final String commentText = getTextFromObject(contentText);
            // youtube adds U+FEFF in some comments. eg. https://www.youtube.com/watch?v=Nj4F63E59io<feff>
            return Utils.removeUTF8BOM(commentText);
        } catch (Exception e) {
--- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java
+++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java
@ -23,11 +23,17 @@ import static org.junit.Assert.assertTrue;
 import static org.schabi.newpipe.extractor.ServiceList.YouTube;
 public class YoutubeCommentsExtractorTest {
    /**
     * Test a "normal" YouTube and Invidious page
     */
    public static class Thomas {
        private static final String urlYT = "https://www.youtube.com/watch?v=D00Au7k3i6o";
        private static final String urlInvidious = "https://invidio.us/watch?v=D00Au7k3i6o";
        private static YoutubeCommentsExtractor extractorYT;
        private static YoutubeCommentsExtractor extractorInvidious;
        private static final String commentContent = "sub 4 sub";
        @BeforeClass
        public static void setUp() throws Exception {
            NewPipe.init(DownloaderTestImpl.getInstance());
@ -36,6 +42,7 @@ public class YoutubeCommentsExtractorTest {
            extractorYT.fetchPage();
            extractorInvidious = (YoutubeCommentsExtractor) YouTube
                    .getCommentsExtractor(urlInvidious);
            extractorInvidious.fetchPage();
        }
        @Test
@ -46,11 +53,11 @@ public class YoutubeCommentsExtractorTest {
        private boolean getCommentsHelper(YoutubeCommentsExtractor extractor) throws IOException, ExtractionException {
            InfoItemsPage<CommentsInfoItem> comments = extractor.getInitialPage();
-        boolean result = findInComments(comments, "s1ck m3m3");
+            boolean result = findInComments(comments, commentContent);
            while (comments.hasNextPage() && !result) {
                comments = extractor.getPage(comments.getNextPage());
-            result = findInComments(comments, "s1ck m3m3");
+                result = findInComments(comments, commentContent);
            }
            return result;
@ -63,16 +70,16 @@ public class YoutubeCommentsExtractorTest {
        }
        private boolean getCommentsFromCommentsInfoHelper(String url) throws IOException, ExtractionException {
-        CommentsInfo commentsInfo = CommentsInfo.getInfo(url);
+            final CommentsInfo commentsInfo = CommentsInfo.getInfo(url);
            assertEquals("Comments", commentsInfo.getName());
-        boolean result = findInComments(commentsInfo.getRelatedItems(), "s1ck m3m3");
+            boolean result = findInComments(commentsInfo.getRelatedItems(), commentContent);
            Page nextPage = commentsInfo.getNextPage();
            InfoItemsPage<CommentsInfoItem> moreItems = new InfoItemsPage<>(null, nextPage, null);
            while (moreItems.hasNextPage() && !result) {
                moreItems = CommentsInfo.getMoreItems(YouTube, commentsInfo, nextPage);
-            result = findInComments(moreItems.getItems(), "s1ck m3m3");
+                result = findInComments(moreItems.getItems(), commentContent);
                nextPage = moreItems.getNextPage();
            }
            return result;
@ -111,3 +118,45 @@ public class YoutubeCommentsExtractorTest {
            return false;
        }
    }
    /**
     * Test a video with an empty comment
     */
    public static class EmptyComment {
        private static YoutubeCommentsExtractor extractor;
        private final static String url = "https://www.youtube.com/watch?v=VM_6n762j6M";
        @BeforeClass
        public static void setUp() throws Exception {
            NewPipe.init(DownloaderTestImpl.getInstance());
            extractor = (YoutubeCommentsExtractor) YouTube
                    .getCommentsExtractor(url);
            extractor.fetchPage();
        }
        @Test
        public void testGetCommentsAllData() throws IOException, ExtractionException {
            final InfoItemsPage<CommentsInfoItem> comments = extractor.getInitialPage();
            DefaultTests.defaultTestListOfItems(YouTube, comments.getItems(), comments.getErrors());
            for (CommentsInfoItem c : comments.getItems()) {
                assertFalse(Utils.isBlank(c.getUploaderUrl()));
                assertFalse(Utils.isBlank(c.getUploaderName()));
                assertFalse(Utils.isBlank(c.getUploaderAvatarUrl()));
                assertFalse(Utils.isBlank(c.getCommentId()));
                assertFalse(Utils.isBlank(c.getName()));
                assertFalse(Utils.isBlank(c.getTextualUploadDate()));
                assertNotNull(c.getUploadDate());
                assertFalse(Utils.isBlank(c.getThumbnailUrl()));
                assertFalse(Utils.isBlank(c.getUrl()));
                assertFalse(c.getLikeCount() < 0);
                if (c.getCommentId().equals("Ugga_h1-EXdHB3gCoAEC")) { // comment without text
                    assertTrue(Utils.isBlank(c.getCommentText()));
                } else {
                    assertFalse(Utils.isBlank(c.getCommentText()));
                }
            }
        }
    }
 }