mirror of
https://github.com/TeamNewPipe/NewPipeExtractor.git
synced 2025-04-28 07:50:34 +05:30
Merge pull request #703 from FireMasterK/comment-replies
Add support for extracting comment replies continuation
This commit is contained in:
commit
a9d214478d
@ -1,6 +1,7 @@
|
|||||||
package org.schabi.newpipe.extractor.comments;
|
package org.schabi.newpipe.extractor.comments;
|
||||||
|
|
||||||
import org.schabi.newpipe.extractor.InfoItem;
|
import org.schabi.newpipe.extractor.InfoItem;
|
||||||
|
import org.schabi.newpipe.extractor.Page;
|
||||||
import org.schabi.newpipe.extractor.localization.DateWrapper;
|
import org.schabi.newpipe.extractor.localization.DateWrapper;
|
||||||
|
|
||||||
import javax.annotation.Nullable;
|
import javax.annotation.Nullable;
|
||||||
@ -21,6 +22,8 @@ public class CommentsInfoItem extends InfoItem {
|
|||||||
private boolean heartedByUploader;
|
private boolean heartedByUploader;
|
||||||
private boolean pinned;
|
private boolean pinned;
|
||||||
private int streamPosition;
|
private int streamPosition;
|
||||||
|
@Nullable
|
||||||
|
private Page replies;
|
||||||
|
|
||||||
public static final int NO_LIKE_COUNT = -1;
|
public static final int NO_LIKE_COUNT = -1;
|
||||||
public static final int NO_STREAM_POSITION = -1;
|
public static final int NO_STREAM_POSITION = -1;
|
||||||
@ -142,4 +145,8 @@ public class CommentsInfoItem extends InfoItem {
|
|||||||
public int getStreamPosition() {
|
public int getStreamPosition() {
|
||||||
return streamPosition;
|
return streamPosition;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void setReplies(@Nullable Page replies) { this.replies = replies; }
|
||||||
|
|
||||||
|
public Page getReplies() { return this.replies; }
|
||||||
}
|
}
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
package org.schabi.newpipe.extractor.comments;
|
package org.schabi.newpipe.extractor.comments;
|
||||||
|
|
||||||
import org.schabi.newpipe.extractor.InfoItemExtractor;
|
import org.schabi.newpipe.extractor.InfoItemExtractor;
|
||||||
|
import org.schabi.newpipe.extractor.Page;
|
||||||
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||||
import org.schabi.newpipe.extractor.localization.DateWrapper;
|
import org.schabi.newpipe.extractor.localization.DateWrapper;
|
||||||
import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeCommentsInfoItemExtractor;
|
import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeCommentsInfoItemExtractor;
|
||||||
@ -107,4 +108,13 @@ public interface CommentsInfoItemExtractor extends InfoItemExtractor {
|
|||||||
default int getStreamPosition() throws ParsingException {
|
default int getStreamPosition() throws ParsingException {
|
||||||
return CommentsInfoItem.NO_STREAM_POSITION;
|
return CommentsInfoItem.NO_STREAM_POSITION;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The continuation page which is used to get comment replies from.
|
||||||
|
* @return the continuation Page for the replies, or null if replies are not supported
|
||||||
|
*/
|
||||||
|
@Nullable
|
||||||
|
default Page getReplies() throws ParsingException {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -93,6 +93,12 @@ public class CommentsInfoItemsCollector extends InfoItemsCollector<CommentsInfoI
|
|||||||
addError(e);
|
addError(e);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
resultItem.setReplies(extractor.getReplies());
|
||||||
|
} catch (Exception e) {
|
||||||
|
addError(e);
|
||||||
|
}
|
||||||
|
|
||||||
return resultItem;
|
return resultItem;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -106,12 +112,6 @@ public class CommentsInfoItemsCollector extends InfoItemsCollector<CommentsInfoI
|
|||||||
}
|
}
|
||||||
|
|
||||||
public List<CommentsInfoItem> getCommentsInfoItemList() {
|
public List<CommentsInfoItem> getCommentsInfoItemList() {
|
||||||
List<CommentsInfoItem> siiList = new ArrayList<>();
|
return new ArrayList<>(super.getItems());
|
||||||
for (InfoItem ii : super.getItems()) {
|
|
||||||
if (ii instanceof CommentsInfoItem) {
|
|
||||||
siiList.add((CommentsInfoItem) ii);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return siiList;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -101,7 +101,7 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
|
|||||||
|
|
||||||
if (itemSectionRenderer.isPresent()) {
|
if (itemSectionRenderer.isPresent()) {
|
||||||
token = JsonUtils.getString(((JsonObject) itemSectionRenderer.get())
|
token = JsonUtils.getString(((JsonObject) itemSectionRenderer.get())
|
||||||
.getObject("itemSectionRenderer").getArray("contents").getObject(0),
|
.getObject("itemSectionRenderer").getArray("contents").getObject(0),
|
||||||
"continuationItemRenderer.continuationEndpoint.continuationCommand.token");
|
"continuationItemRenderer.continuationEndpoint.continuationCommand.token");
|
||||||
} else {
|
} else {
|
||||||
token = null;
|
token = null;
|
||||||
@ -140,10 +140,13 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
|
|||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
final JsonObject continuationItemRenderer = jsonArray.getObject(jsonArray.size() - 1).getObject("continuationItemRenderer");
|
||||||
|
|
||||||
|
final String jsonPath = continuationItemRenderer.has("button") ? "button.buttonRenderer.command.continuationCommand.token" : "continuationEndpoint.continuationCommand.token";
|
||||||
|
|
||||||
final String continuation;
|
final String continuation;
|
||||||
try {
|
try {
|
||||||
continuation = JsonUtils.getString(jsonArray.getObject(jsonArray.size() - 1),
|
continuation = JsonUtils.getString(continuationItemRenderer, jsonPath);
|
||||||
"continuationItemRenderer.continuationEndpoint.continuationCommand.token");
|
|
||||||
} catch (final Exception e) {
|
} catch (final Exception e) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
@ -167,7 +170,7 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
|
|||||||
|
|
||||||
final Localization localization = getExtractorLocalization();
|
final Localization localization = getExtractorLocalization();
|
||||||
final byte[] body = JsonWriter.string(prepareDesktopJsonBuilder(localization,
|
final byte[] body = JsonWriter.string(prepareDesktopJsonBuilder(localization,
|
||||||
getExtractorContentCountry())
|
getExtractorContentCountry())
|
||||||
.value("continuation", page.getId())
|
.value("continuation", page.getId())
|
||||||
.done())
|
.done())
|
||||||
.getBytes(UTF_8);
|
.getBytes(UTF_8);
|
||||||
@ -212,10 +215,11 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
|
|||||||
contents.remove(index);
|
contents.remove(index);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
final String jsonKey = contents.getObject(0).has("commentThreadRenderer") ? "commentThreadRenderer" : "commentRenderer";
|
||||||
|
|
||||||
final List<Object> comments;
|
final List<Object> comments;
|
||||||
try {
|
try {
|
||||||
comments = JsonUtils.getValues(contents,
|
comments = JsonUtils.getValues(contents, jsonKey);
|
||||||
"commentThreadRenderer.comment.commentRenderer");
|
|
||||||
} catch (final Exception e) {
|
} catch (final Exception e) {
|
||||||
throw new ParsingException("Unable to get parse youtube comments", e);
|
throw new ParsingException("Unable to get parse youtube comments", e);
|
||||||
}
|
}
|
||||||
@ -234,7 +238,7 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
|
|||||||
throws IOException, ExtractionException {
|
throws IOException, ExtractionException {
|
||||||
final Localization localization = getExtractorLocalization();
|
final Localization localization = getExtractorLocalization();
|
||||||
final byte[] body = JsonWriter.string(prepareDesktopJsonBuilder(localization,
|
final byte[] body = JsonWriter.string(prepareDesktopJsonBuilder(localization,
|
||||||
getExtractorContentCountry())
|
getExtractorContentCountry())
|
||||||
.value("videoId", getId())
|
.value("videoId", getId())
|
||||||
.done())
|
.done())
|
||||||
.getBytes(UTF_8);
|
.getBytes(UTF_8);
|
||||||
|
@ -3,6 +3,8 @@ package org.schabi.newpipe.extractor.services.youtube.extractors;
|
|||||||
import com.grack.nanojson.JsonArray;
|
import com.grack.nanojson.JsonArray;
|
||||||
import com.grack.nanojson.JsonObject;
|
import com.grack.nanojson.JsonObject;
|
||||||
|
|
||||||
|
import com.grack.nanojson.JsonWriter;
|
||||||
|
import org.schabi.newpipe.extractor.Page;
|
||||||
import org.schabi.newpipe.extractor.comments.CommentsInfoItemExtractor;
|
import org.schabi.newpipe.extractor.comments.CommentsInfoItemExtractor;
|
||||||
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||||
import org.schabi.newpipe.extractor.localization.DateWrapper;
|
import org.schabi.newpipe.extractor.localization.DateWrapper;
|
||||||
@ -18,6 +20,7 @@ import static org.schabi.newpipe.extractor.utils.Utils.EMPTY_STRING;
|
|||||||
public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtractor {
|
public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtractor {
|
||||||
|
|
||||||
private final JsonObject json;
|
private final JsonObject json;
|
||||||
|
private JsonObject commentRenderer;
|
||||||
private final String url;
|
private final String url;
|
||||||
private final TimeAgoParser timeAgoParser;
|
private final TimeAgoParser timeAgoParser;
|
||||||
|
|
||||||
@ -29,6 +32,16 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
|
|||||||
this.timeAgoParser = timeAgoParser;
|
this.timeAgoParser = timeAgoParser;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private JsonObject getCommentRenderer() throws ParsingException {
|
||||||
|
if(commentRenderer == null) {
|
||||||
|
if(!json.has("comment"))
|
||||||
|
commentRenderer = json;
|
||||||
|
else
|
||||||
|
commentRenderer = JsonUtils.getObject(json, "comment.commentRenderer");
|
||||||
|
}
|
||||||
|
return commentRenderer;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getUrl() throws ParsingException {
|
public String getUrl() throws ParsingException {
|
||||||
return url;
|
return url;
|
||||||
@ -37,7 +50,7 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
|
|||||||
@Override
|
@Override
|
||||||
public String getThumbnailUrl() throws ParsingException {
|
public String getThumbnailUrl() throws ParsingException {
|
||||||
try {
|
try {
|
||||||
final JsonArray arr = JsonUtils.getArray(json, "authorThumbnail.thumbnails");
|
final JsonArray arr = JsonUtils.getArray(getCommentRenderer(), "authorThumbnail.thumbnails");
|
||||||
return JsonUtils.getString(arr.getObject(2), "url");
|
return JsonUtils.getString(arr.getObject(2), "url");
|
||||||
} catch (final Exception e) {
|
} catch (final Exception e) {
|
||||||
throw new ParsingException("Could not get thumbnail url", e);
|
throw new ParsingException("Could not get thumbnail url", e);
|
||||||
@ -47,7 +60,7 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
|
|||||||
@Override
|
@Override
|
||||||
public String getName() throws ParsingException {
|
public String getName() throws ParsingException {
|
||||||
try {
|
try {
|
||||||
return getTextFromObject(JsonUtils.getObject(json, "authorText"));
|
return getTextFromObject(JsonUtils.getObject(getCommentRenderer(), "authorText"));
|
||||||
} catch (final Exception e) {
|
} catch (final Exception e) {
|
||||||
return EMPTY_STRING;
|
return EMPTY_STRING;
|
||||||
}
|
}
|
||||||
@ -56,7 +69,7 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
|
|||||||
@Override
|
@Override
|
||||||
public String getTextualUploadDate() throws ParsingException {
|
public String getTextualUploadDate() throws ParsingException {
|
||||||
try {
|
try {
|
||||||
return getTextFromObject(JsonUtils.getObject(json, "publishedTimeText"));
|
return getTextFromObject(JsonUtils.getObject(getCommentRenderer(), "publishedTimeText"));
|
||||||
} catch (final Exception e) {
|
} catch (final Exception e) {
|
||||||
throw new ParsingException("Could not get publishedTimeText", e);
|
throw new ParsingException("Could not get publishedTimeText", e);
|
||||||
}
|
}
|
||||||
@ -94,7 +107,7 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
|
|||||||
// Try first to get the exact like count by using the accessibility data
|
// Try first to get the exact like count by using the accessibility data
|
||||||
final String likeCount;
|
final String likeCount;
|
||||||
try {
|
try {
|
||||||
likeCount = Utils.removeNonDigitCharacters(JsonUtils.getString(json,
|
likeCount = Utils.removeNonDigitCharacters(JsonUtils.getString(getCommentRenderer(),
|
||||||
"actionButtons.commentActionButtonsRenderer.likeButton.toggleButtonRenderer.accessibilityData.accessibilityData.label"));
|
"actionButtons.commentActionButtonsRenderer.likeButton.toggleButtonRenderer.accessibilityData.accessibilityData.label"));
|
||||||
} catch (final Exception e) {
|
} catch (final Exception e) {
|
||||||
// Use the approximate like count returned into the voteCount object
|
// Use the approximate like count returned into the voteCount object
|
||||||
@ -145,11 +158,11 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
|
|||||||
*/
|
*/
|
||||||
try {
|
try {
|
||||||
// If a comment has no likes voteCount is not set
|
// If a comment has no likes voteCount is not set
|
||||||
if (!json.has("voteCount")) {
|
if (!getCommentRenderer().has("voteCount")) {
|
||||||
return EMPTY_STRING;
|
return EMPTY_STRING;
|
||||||
}
|
}
|
||||||
|
|
||||||
final JsonObject voteCountObj = JsonUtils.getObject(json, "voteCount");
|
final JsonObject voteCountObj = JsonUtils.getObject(getCommentRenderer(), "voteCount");
|
||||||
if (voteCountObj.isEmpty()) {
|
if (voteCountObj.isEmpty()) {
|
||||||
return EMPTY_STRING;
|
return EMPTY_STRING;
|
||||||
}
|
}
|
||||||
@ -162,7 +175,7 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
|
|||||||
@Override
|
@Override
|
||||||
public String getCommentText() throws ParsingException {
|
public String getCommentText() throws ParsingException {
|
||||||
try {
|
try {
|
||||||
final JsonObject contentText = JsonUtils.getObject(json, "contentText");
|
final JsonObject contentText = JsonUtils.getObject(getCommentRenderer(), "contentText");
|
||||||
if (contentText.isEmpty()) {
|
if (contentText.isEmpty()) {
|
||||||
// completely empty comments as described in
|
// completely empty comments as described in
|
||||||
// https://github.com/TeamNewPipe/NewPipeExtractor/issues/380#issuecomment-668808584
|
// https://github.com/TeamNewPipe/NewPipeExtractor/issues/380#issuecomment-668808584
|
||||||
@ -180,7 +193,7 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
|
|||||||
@Override
|
@Override
|
||||||
public String getCommentId() throws ParsingException {
|
public String getCommentId() throws ParsingException {
|
||||||
try {
|
try {
|
||||||
return JsonUtils.getString(json, "commentId");
|
return JsonUtils.getString(getCommentRenderer(), "commentId");
|
||||||
} catch (final Exception e) {
|
} catch (final Exception e) {
|
||||||
throw new ParsingException("Could not get comment id", e);
|
throw new ParsingException("Could not get comment id", e);
|
||||||
}
|
}
|
||||||
@ -189,7 +202,7 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
|
|||||||
@Override
|
@Override
|
||||||
public String getUploaderAvatarUrl() throws ParsingException {
|
public String getUploaderAvatarUrl() throws ParsingException {
|
||||||
try {
|
try {
|
||||||
JsonArray arr = JsonUtils.getArray(json, "authorThumbnail.thumbnails");
|
JsonArray arr = JsonUtils.getArray(getCommentRenderer(), "authorThumbnail.thumbnails");
|
||||||
return JsonUtils.getString(arr.getObject(2), "url");
|
return JsonUtils.getString(arr.getObject(2), "url");
|
||||||
} catch (final Exception e) {
|
} catch (final Exception e) {
|
||||||
throw new ParsingException("Could not get author thumbnail", e);
|
throw new ParsingException("Could not get author thumbnail", e);
|
||||||
@ -198,24 +211,24 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean isHeartedByUploader() throws ParsingException {
|
public boolean isHeartedByUploader() throws ParsingException {
|
||||||
final JsonObject commentActionButtonsRenderer = json.getObject("actionButtons")
|
final JsonObject commentActionButtonsRenderer = getCommentRenderer().getObject("actionButtons")
|
||||||
.getObject("commentActionButtonsRenderer");
|
.getObject("commentActionButtonsRenderer");
|
||||||
return commentActionButtonsRenderer.has("creatorHeart");
|
return commentActionButtonsRenderer.has("creatorHeart");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean isPinned() {
|
public boolean isPinned() throws ParsingException {
|
||||||
return json.has("pinnedCommentBadge");
|
return getCommentRenderer().has("pinnedCommentBadge");
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean isUploaderVerified() {
|
public boolean isUploaderVerified() throws ParsingException {
|
||||||
return json.has("authorCommentBadge");
|
return getCommentRenderer().has("authorCommentBadge");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getUploaderName() throws ParsingException {
|
public String getUploaderName() throws ParsingException {
|
||||||
try {
|
try {
|
||||||
return getTextFromObject(JsonUtils.getObject(json, "authorText"));
|
return getTextFromObject(JsonUtils.getObject(getCommentRenderer(), "authorText"));
|
||||||
} catch (final Exception e) {
|
} catch (final Exception e) {
|
||||||
return EMPTY_STRING;
|
return EMPTY_STRING;
|
||||||
}
|
}
|
||||||
@ -224,10 +237,20 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
|
|||||||
@Override
|
@Override
|
||||||
public String getUploaderUrl() throws ParsingException {
|
public String getUploaderUrl() throws ParsingException {
|
||||||
try {
|
try {
|
||||||
return "https://www.youtube.com/channel/" + JsonUtils.getString(json,
|
return "https://www.youtube.com/channel/" + JsonUtils.getString(getCommentRenderer(),
|
||||||
"authorEndpoint.browseEndpoint.browseId");
|
"authorEndpoint.browseEndpoint.browseId");
|
||||||
} catch (final Exception e) {
|
} catch (final Exception e) {
|
||||||
return EMPTY_STRING;
|
return EMPTY_STRING;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Page getReplies() throws ParsingException {
|
||||||
|
try {
|
||||||
|
final String id = JsonUtils.getString(JsonUtils.getArray(json, "replies.commentRepliesRenderer.contents").getObject(0), "continuationItemRenderer.continuationEndpoint.continuationCommand.token");
|
||||||
|
return new Page(url, id);
|
||||||
|
} catch (final Exception e) {
|
||||||
|
return null; // Would return null for Comment Replies, since YouTube does not support nested replies.
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -306,4 +306,32 @@ public class YoutubeCommentsExtractorTest {
|
|||||||
assertTrue("The first pinned comment has no vote count", !Utils.isBlank(pinnedComment.getTextualLikeCount()));
|
assertTrue("The first pinned comment has no vote count", !Utils.isBlank(pinnedComment.getTextualLikeCount()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static class RepliesTest {
|
||||||
|
private final static String url = "https://www.youtube.com/watch?v=--yeOvJGZQk";
|
||||||
|
private static YoutubeCommentsExtractor extractor;
|
||||||
|
|
||||||
|
@BeforeClass
|
||||||
|
public static void setUp() throws Exception {
|
||||||
|
YoutubeParsingHelper.resetClientVersionAndKey();
|
||||||
|
YoutubeParsingHelper.setNumberGenerator(new Random(1));
|
||||||
|
NewPipe.init(new DownloaderFactory().getDownloader(RESOURCE_PATH + "replies"));
|
||||||
|
extractor = (YoutubeCommentsExtractor) YouTube
|
||||||
|
.getCommentsExtractor(url);
|
||||||
|
extractor.fetchPage();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGetCommentsFirstReplies() throws IOException, ExtractionException {
|
||||||
|
final InfoItemsPage<CommentsInfoItem> comments = extractor.getInitialPage();
|
||||||
|
|
||||||
|
DefaultTests.defaultTestListOfItems(YouTube, comments.getItems(), comments.getErrors());
|
||||||
|
|
||||||
|
CommentsInfoItem firstComment = comments.getItems().get(0);
|
||||||
|
|
||||||
|
InfoItemsPage<CommentsInfoItem> replies = extractor.getPage(firstComment.getReplies());
|
||||||
|
|
||||||
|
assertEquals("First reply comment did not match", "Lol", replies.getItems().get(0).getCommentText());
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
Loading…
x
Reference in New Issue
Block a user