From e33fa926dd846efb1394a2e5c39a35f888ce3db3 Mon Sep 17 00:00:00 2001 From: TobiGr Date: Sun, 4 Dec 2022 13:19:24 +0100 Subject: [PATCH] [SoundCloud] Add support for comment replies --- .../org/schabi/newpipe/extractor/Page.java | 36 +++++++- .../soundcloud/SoundcloudParsingHelper.java | 13 +++ .../SoundcloudCommentsExtractor.java | 73 ++++++++++++---- .../SoundcloudCommentsInfoItemExtractor.java | 85 ++++++++++++++++--- .../SoundcloudCommentsLinkHandlerFactory.java | 22 ++++- .../SoundcloudStreamLinkHandlerFactory.java | 2 +- 6 files changed, 201 insertions(+), 30 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/Page.java b/extractor/src/main/java/org/schabi/newpipe/extractor/Page.java index e1b19e7fb..e13a92287 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/Page.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/Page.java @@ -1,18 +1,26 @@ package org.schabi.newpipe.extractor; +import javax.annotation.Nullable; import java.io.Serializable; import java.util.List; import java.util.Map; -import javax.annotation.Nullable; - import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty; +/** + * The {@link Page} class is used for storing information on future requests + * for retrieving content. + *
+ * A page has an {@link #id}, an {@link #url}, as well as information on possible {@link #cookies}. + * In case the data behind the URL has already been retrieved, + * it can be accessed by using @link #getBody()} and {@link #getContent()}. + */ public class Page implements Serializable { private final String url; private final String id; private final List ids; private final Map cookies; + private Serializable content; @Nullable private final byte[] body; @@ -78,4 +86,28 @@ public class Page implements Serializable { public byte[] getBody() { return body; } + + public boolean hasContent() { + return content != null; + } + + /** + * Get the page's content if it has been set, returns {@code null} otherwise. + * @return the page's content + */ + @Nullable + public Serializable getContent() { + return content; + } + + /** + * Set the page's content. + * The page's content can either be retrieved manually by requesting the resource + * behind the page's URL (see {@link #url} and {@link #getUrl()}) + * or storing it in a {@link Page}s instance in case the content has already been downloaded. + * @param content the page's content + */ + public void setContent(@Nullable final Serializable content) { + this.content = content; + } } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudParsingHelper.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudParsingHelper.java index 57deb64a2..3d217ce80 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudParsingHelper.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudParsingHelper.java @@ -323,4 +323,17 @@ public final class SoundcloudParsingHelper { public static String getUploaderName(final JsonObject object) { return object.getObject("user").getString("username", ""); } + + public static boolean isReplyTo(@Nonnull final JsonObject originalComment, + @Nonnull final JsonObject otherComment) { + final String mention = "@" + originalComment.getObject("user").getString("permalink"); + return otherComment.getString("body").startsWith(mention) + && originalComment.getInt("timestamp") == otherComment.getInt("timestamp"); + + } + + public static boolean isReply(@Nonnull final JsonObject comment) { + return comment.getString("body").startsWith("@"); + } + } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudCommentsExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudCommentsExtractor.java index b02a3ea80..d4afe9f09 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudCommentsExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudCommentsExtractor.java @@ -16,6 +16,7 @@ import org.schabi.newpipe.extractor.downloader.Response; import org.schabi.newpipe.extractor.exceptions.ExtractionException; import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler; +import org.schabi.newpipe.extractor.services.soundcloud.SoundcloudParsingHelper; import java.io.IOException; @@ -24,6 +25,8 @@ import javax.annotation.Nonnull; import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty; public class SoundcloudCommentsExtractor extends CommentsExtractor { + public static final String COLLECTION = "collection"; + public SoundcloudCommentsExtractor(final StreamingService service, final ListLinkHandler uiHandler) { super(service, uiHandler); @@ -46,7 +49,7 @@ public class SoundcloudCommentsExtractor extends CommentsExtractor { final CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector( getServiceId()); - collectStreamsFrom(collector, json.getArray("collection")); + collectStreamsFrom(collector, json); return new InfoItemsPage<>(collector, new Page(json.getString("next_href"))); } @@ -57,21 +60,32 @@ public class SoundcloudCommentsExtractor extends CommentsExtractor { if (page == null || isNullOrEmpty(page.getUrl())) { throw new IllegalArgumentException("Page doesn't contain an URL"); } - - final Downloader downloader = NewPipe.getDownloader(); - final Response response = downloader.get(page.getUrl()); - final JsonObject json; - try { - json = JsonParser.object().from(response.responseBody()); - } catch (final JsonParserException e) { - throw new ParsingException("Could not parse json", e); - } - final CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector( getServiceId()); - collectStreamsFrom(collector, json.getArray("collection")); + if (page.hasContent()) { + // This page contains the whole previously fetched comments. + // We need to get the comments which are replies to the comment with the page's id. + json = (JsonObject) page.getContent(); + try { + final int commentId = Integer.parseInt(page.getId()); + collectRepliesFrom(collector, json, commentId, page.getUrl()); + } catch (final NumberFormatException e) { + throw new ParsingException("Got invalid comment id", e); + } + } else { + + final Downloader downloader = NewPipe.getDownloader(); + final Response response = downloader.get(page.getUrl()); + + try { + json = JsonParser.object().from(response.responseBody()); + } catch (final JsonParserException e) { + throw new ParsingException("Could not parse json", e); + } + collectStreamsFrom(collector, json); + } return new InfoItemsPage<>(collector, new Page(json.getString("next_href"))); } @@ -80,10 +94,39 @@ public class SoundcloudCommentsExtractor extends CommentsExtractor { public void onFetchPage(@Nonnull final Downloader downloader) { } private void collectStreamsFrom(final CommentsInfoItemsCollector collector, - final JsonArray entries) throws ParsingException { + final JsonObject json) throws ParsingException { final String url = getUrl(); - for (final Object comment : entries) { - collector.commit(new SoundcloudCommentsInfoItemExtractor((JsonObject) comment, url)); + final JsonArray entries = json.getArray(COLLECTION); + for (int i = 0; i < entries.size(); i++) { + final JsonObject entry = entries.getObject(i); + if (i == 0 + || (!SoundcloudParsingHelper.isReply(entry) + && !SoundcloudParsingHelper.isReplyTo(entries.getObject(i - 1), entry))) { + collector.commit(new SoundcloudCommentsInfoItemExtractor( + json, i, entries.getObject(i), url)); + } } } + + private void collectRepliesFrom(final CommentsInfoItemsCollector collector, + final JsonObject json, + final int id, + final String url) throws ParsingException { + JsonObject originalComment = null; + final JsonArray entries = json.getArray(COLLECTION); + for (int i = 0; i < entries.size(); i++) { + final JsonObject comment = entries.getObject(i); + if (comment.getInt("id") == id) { + originalComment = comment; + continue; + } + if (originalComment != null + && SoundcloudParsingHelper.isReplyTo(originalComment, comment)) { + collector.commit(new SoundcloudCommentsInfoItemExtractor( + json, i, entries.getObject(i), url)); + + } + } + } + } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudCommentsInfoItemExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudCommentsInfoItemExtractor.java index ec3f353e6..79b27f1b2 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudCommentsInfoItemExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudCommentsInfoItemExtractor.java @@ -1,62 +1,79 @@ package org.schabi.newpipe.extractor.services.soundcloud.extractors; +import com.grack.nanojson.JsonArray; import com.grack.nanojson.JsonObject; +import org.schabi.newpipe.extractor.Page; +import org.schabi.newpipe.extractor.ServiceList; +import org.schabi.newpipe.extractor.comments.CommentsInfoItem; import org.schabi.newpipe.extractor.comments.CommentsInfoItemExtractor; +import org.schabi.newpipe.extractor.comments.CommentsInfoItemsCollector; import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.localization.DateWrapper; import org.schabi.newpipe.extractor.services.soundcloud.SoundcloudParsingHelper; import org.schabi.newpipe.extractor.stream.Description; import javax.annotation.Nullable; +import java.util.ArrayList; +import java.util.List; import java.util.Objects; public class SoundcloudCommentsInfoItemExtractor implements CommentsInfoItemExtractor { + public static final String USER = "user"; + public static final String BODY = "body"; + private final JsonObject json; + private final int index; + private final JsonObject item; private final String url; - public SoundcloudCommentsInfoItemExtractor(final JsonObject json, final String url) { + private int replyCount = CommentsInfoItem.UNKNOWN_REPLY_COUNT; + private Page repliesPage = null; + + public SoundcloudCommentsInfoItemExtractor(final JsonObject json, final int index, final JsonObject item, final String url) { this.json = json; + this.index = index; + this.item = item; this.url = url; } @Override public String getCommentId() { - return Objects.toString(json.getLong("id"), null); + return Objects.toString(item.getLong("id"), null); } @Override public Description getCommentText() { - return new Description(json.getString("body"), Description.PLAIN_TEXT); + return new Description(item.getString(BODY), Description.PLAIN_TEXT); } @Override public String getUploaderName() { - return json.getObject("user").getString("username"); + return item.getObject(USER).getString("username"); } @Override public String getUploaderAvatarUrl() { - return json.getObject("user").getString("avatar_url"); + return item.getObject(USER).getString("avatar_url"); } @Override public boolean isUploaderVerified() throws ParsingException { - return json.getObject("user").getBoolean("verified"); + return item.getObject(USER).getBoolean("verified"); } @Override public int getStreamPosition() throws ParsingException { - return json.getInt("timestamp") / 1000; // convert milliseconds to seconds + return item.getInt("timestamp") / 1000; // convert milliseconds to seconds } @Override public String getUploaderUrl() { - return json.getObject("user").getString("permalink_url"); + return item.getObject(USER).getString("permalink_url"); } @Override public String getTextualUploadDate() { - return json.getString("created_at"); + return item.getString("created_at"); } @Nullable @@ -67,7 +84,7 @@ public class SoundcloudCommentsInfoItemExtractor implements CommentsInfoItemExtr @Override public String getName() throws ParsingException { - return json.getObject("user").getString("permalink"); + return item.getObject(USER).getString("permalink"); } @Override @@ -77,6 +94,52 @@ public class SoundcloudCommentsInfoItemExtractor implements CommentsInfoItemExtr @Override public String getThumbnailUrl() { - return json.getObject("user").getString("avatar_url"); + return item.getObject(USER).getString("avatar_url"); + } + + @Override + public Page getReplies() { + if (replyCount == CommentsInfoItem.UNKNOWN_REPLY_COUNT) { + final List replies = new ArrayList<>(); + final CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector( + ServiceList.SoundCloud.getServiceId()); + final JsonArray jsonArray = new JsonArray(); + // Replies start with the mention of the user who created the original comment. + final String mention = "@" + item.getObject(USER).getString("permalink"); + // Loop through all comments which come after the original comment to find its replies. + final JsonArray allItems = json.getArray(SoundcloudCommentsExtractor.COLLECTION); + for (int i = index + 1; i < allItems.size(); i++) { + final JsonObject comment = allItems.getObject(i); + final String commentContent = comment.getString("body"); + if (commentContent.startsWith(mention)) { + replies.add(comment); + jsonArray.add(comment); + collector.commit(new SoundcloudCommentsInfoItemExtractor(json, i, comment, url)); + } else if (!commentContent.startsWith("@") || replies.isEmpty()) { + // Only the comments directly after the original comment + // starting with the mention of the comment's creator + // are replies to the original comment. + // The first comment not starting with these letters + // is the next top-level comment. + break; + } + } + replyCount = jsonArray.size(); + if (collector.getItems().isEmpty()) { + return null; + } + repliesPage = new Page(getUrl(), getCommentId()); + repliesPage.setContent(json); + } + + return repliesPage; + } + + @Override + public int getReplyCount() throws ParsingException { + if (replyCount == CommentsInfoItem.UNKNOWN_REPLY_COUNT) { + getReplies(); + } + return replyCount; } } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/linkHandler/SoundcloudCommentsLinkHandlerFactory.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/linkHandler/SoundcloudCommentsLinkHandlerFactory.java index 23c6a2939..39e124698 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/linkHandler/SoundcloudCommentsLinkHandlerFactory.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/linkHandler/SoundcloudCommentsLinkHandlerFactory.java @@ -3,6 +3,7 @@ package org.schabi.newpipe.extractor.services.soundcloud.linkHandler; import org.schabi.newpipe.extractor.exceptions.ExtractionException; import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.linkhandler.ListLinkHandlerFactory; +import org.schabi.newpipe.extractor.utils.Parser; import java.io.IOException; import java.util.List; @@ -14,6 +15,8 @@ public final class SoundcloudCommentsLinkHandlerFactory extends ListLinkHandlerF private static final SoundcloudCommentsLinkHandlerFactory INSTANCE = new SoundcloudCommentsLinkHandlerFactory(); + private static final String OFFSET_PATTERN = "https://api-v2.soundcloud.com/tracks/([0-9a-z]+)/comments?([0-9a-z/&])?offset=([0-9])+" + private SoundcloudCommentsLinkHandlerFactory() { } @@ -27,7 +30,7 @@ public final class SoundcloudCommentsLinkHandlerFactory extends ListLinkHandlerF final String sortFilter) throws ParsingException { try { return "https://api-v2.soundcloud.com/tracks/" + id + "/comments" + "?client_id=" - + clientId() + "&threaded=0" + "&filter_replies=1"; + + clientId() + "&threaded=1" + "&filter_replies=1"; // Anything but 1 = sort by new // + "&limit=NUMBER_OF_ITEMS_PER_REQUEST". We let the API control (default = 10) // + "&offset=OFFSET". We let the API control (default = 0, then we use nextPageUrl) @@ -36,12 +39,29 @@ public final class SoundcloudCommentsLinkHandlerFactory extends ListLinkHandlerF } } + public String getUrl(final String id, + final List contentFilter, + final String sortFilter, + final int offset) throws ParsingException { + return getUrl(id, contentFilter, sortFilter) + "&offset=" + offset; + } + @Override public String getId(final String url) throws ParsingException { // Delegation to avoid duplicate code, as we need the same id return SoundcloudStreamLinkHandlerFactory.getInstance().getId(url); } + public int getReplyOffset(final String url) throws ParsingException { + try { + return Integer.parseInt(Parser.matchGroup(OFFSET_PATTERN, url, 3)); + } catch (Parser.RegexException | NumberFormatException e) { + throw new ParsingException("Could not get offset from URL: " + url, e); + } + } + + + @Override public boolean onAcceptUrl(final String url) { try { diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/linkHandler/SoundcloudStreamLinkHandlerFactory.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/linkHandler/SoundcloudStreamLinkHandlerFactory.java index 9af4be09b..14ee29b0c 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/linkHandler/SoundcloudStreamLinkHandlerFactory.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/linkHandler/SoundcloudStreamLinkHandlerFactory.java @@ -33,7 +33,7 @@ public final class SoundcloudStreamLinkHandlerFactory extends LinkHandlerFactory @Override public String getId(final String url) throws ParsingException { if (Parser.isMatch(API_URL_PATTERN, url)) { - return Parser.matchGroup1(API_URL_PATTERN, url); + return Parser.matchGroup(API_URL_PATTERN, url, 2); } Utils.checkUrl(URL_PATTERN, url);