Almost fixed

Implemented a cache.
TODO: Do not store in cache when viewing replies....
This commit is contained in:
TobiGr 2023-01-03 00:19:41 +01:00
parent e5be686b06
commit 8ae7fcfa1e
12 changed files with 264 additions and 39 deletions

View File

@ -78,7 +78,7 @@ public abstract class InfoItemsCollector<I extends InfoItem, E extends InfoItemE
* Add an error
* @param error the error
*/
protected void addError(final Exception error) {
public void addError(final Exception error) {
errors.add(error);
}

View File

@ -19,7 +19,8 @@ import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler;
import org.schabi.newpipe.extractor.services.soundcloud.SoundcloudParsingHelper;
import org.schabi.newpipe.extractor.utils.cache.SoundCloudCommentsCache;
import org.schabi.newpipe.extractor.utils.cache.SoundCloudCommentsCache.CachedCommentInfo;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
@ -37,7 +38,8 @@ public class SoundcloudCommentsExtractor extends CommentsExtractor {
* and therefore the {@link SoundcloudCommentsInfoItemExtractor#replyCount}
* of the last top level comment cannot be determined certainly.
*/
@Nullable private JsonObject lastTopLevelComment;
private static final SoundCloudCommentsCache LAST_TOP_LEVEL_COMMENTS =
new SoundCloudCommentsCache(10);
public SoundcloudCommentsExtractor(final StreamingService service,
final ListLinkHandler uiHandler) {
@ -100,7 +102,18 @@ public class SoundcloudCommentsExtractor extends CommentsExtractor {
} catch (final JsonParserException e) {
throw new ParsingException("Could not parse json", e);
}
collectCommentsFrom(collector, json, lastTopLevelComment);
final CachedCommentInfo topLevelCommentElement = LAST_TOP_LEVEL_COMMENTS.get(getUrl());
if (topLevelCommentElement == null) {
if (LAST_TOP_LEVEL_COMMENTS.isEmpty()) {
collector.addError(new RuntimeException(
"Could not get last top level comment. It has been removed from cache."
+ " Increase the cache size to not loose any comments"));
}
collectCommentsFrom(collector, json, null);
} else {
collectCommentsFrom(collector, json, topLevelCommentElement);
}
}
if (hasNextPage) {
@ -111,54 +124,71 @@ public class SoundcloudCommentsExtractor extends CommentsExtractor {
}
@Override
public void onFetchPage(@Nonnull final Downloader downloader) { }
public void onFetchPage(@Nonnull final Downloader downloader) {
}
/**
* Collect top level comments from a SoundCloud API response.
* @param collector the collector which collects the the top level comments
* @param json the JsonObject of the API response
*
* @param collector the collector which collects the the top level comments
* @param json the JsonObject of the API response
* @param lastTopLevelComment the last top level comment from the previous page or {@code null}
* if this method is run for the initial page.
* @throws ParsingException
*/
private void collectCommentsFrom(@Nonnull final CommentsInfoItemsCollector collector,
@Nonnull final JsonObject json,
@Nullable final JsonObject lastTopLevelComment)
@Nullable final CachedCommentInfo lastTopLevelComment)
throws ParsingException {
final List<SoundcloudCommentsInfoItemExtractor> extractors = new ArrayList<>();
final String url = getUrl();
final JsonArray entries = json.getArray(COLLECTION);
/**
* The current top level comment.
*/
JsonObject currentTopLevelComment = null;
int currentTopLevelCommentIndex = 0;
boolean isLastCommentReply = true;
boolean isFirstCommentReply = false;
boolean addedLastTopLevelComment = lastTopLevelComment == null;
// Check whether the first comment in the list is a reply to the last top level comment
// from the previous page if there was a previous page.
if (lastTopLevelComment != null) {
final JsonObject firstComment = entries.getObject(0);
if (SoundcloudParsingHelper.isReplyTo(lastTopLevelComment, firstComment)) {
currentTopLevelComment = lastTopLevelComment;
final JsonObject firstComment = json.getArray(COLLECTION).getObject(0);
if (SoundcloudParsingHelper.isReplyTo(lastTopLevelComment.comment, firstComment)) {
currentTopLevelComment = lastTopLevelComment.comment;
isFirstCommentReply = true;
merge(json, lastTopLevelComment.json, lastTopLevelComment.index);
} else {
extractors.add(new SoundcloudCommentsInfoItemExtractor(
json, SoundcloudCommentsInfoItemExtractor.PREVIOUS_PAGE_INDEX,
firstComment, url, null));
lastTopLevelComment.json,
lastTopLevelComment.index,
lastTopLevelComment.comment, url, null));
addedLastTopLevelComment = true;
}
}
final JsonArray entries = json.getArray(COLLECTION);
for (int i = 0; i < entries.size(); i++) {
final JsonObject entry = entries.getObject(i);
// extract all top level comments
// The first comment is either a top level comment
// Extract all top level comments
// The first comment is a top level co
// if it is not a reply to the last top level comment
//
if (i == 0 && currentTopLevelComment == null
|| (!SoundcloudParsingHelper.isReplyTo(entries.getObject(i - 1), entry)
&& !SoundcloudParsingHelper.isReplyTo(currentTopLevelComment, entry))) {
if ((i == 0 && !isFirstCommentReply)
|| (
i != 0 && !SoundcloudParsingHelper.isReplyTo(entries.getObject(i - 1), entry)
&& !SoundcloudParsingHelper.isReplyTo(currentTopLevelComment, entry))) {
currentTopLevelComment = entry;
currentTopLevelCommentIndex = i;
if (!addedLastTopLevelComment) {
// There is a new top level comment. This also means that we can now determine
// the reply count and get all replies for the top level comment.
extractors.add(new SoundcloudCommentsInfoItemExtractor(
json, 0, lastTopLevelComment.comment, url, null));
addedLastTopLevelComment = true;
}
if (i == entries.size() - 1) {
isLastCommentReply = false;
this.lastTopLevelComment = currentTopLevelComment;
LAST_TOP_LEVEL_COMMENTS.put(getUrl(), currentTopLevelComment, json, i);
// Do not collect the last comment if it is a top level comment
// because it might have replies.
// That is information we cannot get from the comment itself
@ -168,14 +198,17 @@ public class SoundcloudCommentsExtractor extends CommentsExtractor {
break;
}
extractors.add(new SoundcloudCommentsInfoItemExtractor(
json, i, entry, url, lastTopLevelComment));
json, i, entry, url, null));
}
}
if (isLastCommentReply) {
// Do not collect the last top level comment if it has replies and the retrieved
// comment list ends with a reply. We do not know whether the next page starts
// with more replies to the last top level comment.
this.lastTopLevelComment = extractors.remove(extractors.size() - 1).item;
LAST_TOP_LEVEL_COMMENTS.put(
getUrl(),
extractors.remove(extractors.size() - 1).item,
json, currentTopLevelCommentIndex);
}
extractors.stream().forEach(collector::commit);
@ -183,11 +216,13 @@ public class SoundcloudCommentsExtractor extends CommentsExtractor {
/**
* Collect replies to a top level comment from a SoundCloud API response.
*
* @param collector the collector which collects the the replies
* @param json the SoundCloud API response
* @param id the comment's id for which the replies are collected
* @param url the corresponding page's URL
* @return
* @param json the SoundCloud API response
* @param id the comment's id for which the replies are collected
* @param url the corresponding page's URL
* @return {code true} if there might be more replies to the comment;
* {@code false} if there are definitely no more replies
*/
private boolean collectRepliesFrom(@Nonnull final CommentsInfoItemsCollector collector,
@Nonnull final JsonObject json,
@ -206,8 +241,8 @@ public class SoundcloudCommentsExtractor extends CommentsExtractor {
&& SoundcloudParsingHelper.isReplyTo(originalComment, comment)) {
collector.commit(new SoundcloudCommentsInfoItemExtractor(
json, i, entries.getObject(i), url, originalComment));
// There might be more replies to the originalComment,
// especially if the original comment is at the end of the list.
// There might be more replies to the originalComment
// if the original comment is at the end of the list.
if (i == entries.size() - 1 && json.has(NEXT_HREF)) {
moreReplies = true;
}
@ -216,4 +251,17 @@ public class SoundcloudCommentsExtractor extends CommentsExtractor {
return moreReplies;
}
private void merge(@Nonnull final JsonObject target, @Nonnull final JsonObject subject,
final int index) {
final JsonArray targetArray = target.getArray(COLLECTION);
final JsonArray subjectArray = subject.getArray(COLLECTION);
final JsonArray newArray = new JsonArray(
targetArray.size() + subjectArray.size() - index - 1);
for (int i = index; i < subjectArray.size(); i++) {
newArray.add(subjectArray.getObject(i));
}
newArray.addAll(targetArray);
target.put(COLLECTION, newArray);
}
}

View File

@ -1,5 +1,6 @@
package org.schabi.newpipe.extractor.services.soundcloud.extractors;
import static org.schabi.newpipe.extractor.services.soundcloud.extractors.SoundcloudCommentsExtractor.COLLECTION;
import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
import com.grack.nanojson.JsonArray;
@ -59,6 +60,13 @@ public class SoundcloudCommentsInfoItemExtractor implements CommentsInfoItemExtr
this(json, index, item, url, null);
}
public void addInfoFromNextPage(@Nonnull final JsonArray newItems, final int itemCount) {
final JsonArray currentItems = this.json.getArray(COLLECTION);
for (int i = 0; i < itemCount; i++) {
currentItems.add(newItems.getObject(i));
}
}
@Override
public String getCommentId() {
return Objects.toString(item.getLong("id"), null);
@ -75,7 +83,7 @@ public class SoundcloudCommentsInfoItemExtractor implements CommentsInfoItemExtr
// We need to do this manually.
if (commentContent.startsWith("@")) {
final String authorName = commentContent.split(" ", 2)[0].replace("@", "");
final JsonArray comments = json.getArray(SoundcloudCommentsExtractor.COLLECTION);
final JsonArray comments = json.getArray(COLLECTION);
JsonObject author = null;
for (int i = index - 1; i >= 0 && author == null; i--) {
final JsonObject commentsAuthor = comments.getObject(i).getObject("user");
@ -163,7 +171,7 @@ public class SoundcloudCommentsInfoItemExtractor implements CommentsInfoItemExtr
if (topLevelComment == null) {
// Loop through all comments which come after the original comment
// to find its replies.
final JsonArray allItems = json.getArray(SoundcloudCommentsExtractor.COLLECTION);
final JsonArray allItems = json.getArray(COLLECTION);
for (int i = index + 1; i < allItems.size(); i++) {
if (SoundcloudParsingHelper.isReplyTo(item, allItems.getObject(i))) {
replyCount++;

View File

@ -7,7 +7,7 @@ import org.schabi.newpipe.extractor.downloader.Response;
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.services.youtube.DeliveryType;
import org.schabi.newpipe.extractor.services.youtube.ItagItem;
import org.schabi.newpipe.extractor.utils.ManifestCreatorCache;
import org.schabi.newpipe.extractor.utils.cache.ManifestCreatorCache;
import org.w3c.dom.Attr;
import org.w3c.dom.DOMException;
import org.w3c.dom.Document;

View File

@ -15,7 +15,7 @@ import static org.schabi.newpipe.extractor.utils.Utils.isBlank;
import org.schabi.newpipe.extractor.downloader.Response;
import org.schabi.newpipe.extractor.services.youtube.DeliveryType;
import org.schabi.newpipe.extractor.services.youtube.ItagItem;
import org.schabi.newpipe.extractor.utils.ManifestCreatorCache;
import org.schabi.newpipe.extractor.utils.cache.ManifestCreatorCache;
import org.schabi.newpipe.extractor.utils.Utils;
import org.w3c.dom.DOMException;
import org.w3c.dom.Document;

View File

@ -15,7 +15,7 @@ import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
import org.schabi.newpipe.extractor.downloader.Response;
import org.schabi.newpipe.extractor.services.youtube.DeliveryType;
import org.schabi.newpipe.extractor.services.youtube.ItagItem;
import org.schabi.newpipe.extractor.utils.ManifestCreatorCache;
import org.schabi.newpipe.extractor.utils.cache.ManifestCreatorCache;
import org.w3c.dom.DOMException;
import org.w3c.dom.Document;
import org.w3c.dom.Element;

View File

@ -2,7 +2,7 @@ package org.schabi.newpipe.extractor.services.youtube.dashmanifestcreators;
import org.schabi.newpipe.extractor.services.youtube.DeliveryType;
import org.schabi.newpipe.extractor.services.youtube.ItagItem;
import org.schabi.newpipe.extractor.utils.ManifestCreatorCache;
import org.schabi.newpipe.extractor.utils.cache.ManifestCreatorCache;
import org.w3c.dom.DOMException;
import org.w3c.dom.Document;
import org.w3c.dom.Element;

View File

@ -0,0 +1,9 @@
package org.schabi.newpipe.extractor.utils.cache;
public interface Cache<K, V> {
void put(K key, V value);
V get(K key);
int size();
boolean isEmpty();
void clear();
}

View File

@ -1,4 +1,6 @@
package org.schabi.newpipe.extractor.utils;
package org.schabi.newpipe.extractor.utils.cache;
import org.schabi.newpipe.extractor.utils.Pair;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;

View File

@ -0,0 +1,74 @@
package org.schabi.newpipe.extractor.utils.cache;
import com.grack.nanojson.JsonObject;
import java.util.HashMap;
import java.util.Map;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
/**
* LRU cache which can contain a few items.
*/
public class SoundCloudCommentsCache {
private final int maxSize;
private final Map<String, CachedCommentInfo> store;
public SoundCloudCommentsCache(final int size) {
if (size < 1) {
throw new IllegalArgumentException("Size must be at least 1");
}
store = new HashMap<>(size);
maxSize = size;
}
public void put(@Nonnull final String key, @Nonnull final JsonObject comment,
@Nonnull final JsonObject json, final int index) {
if (store.size() == maxSize) {
store.remove(
store.entrySet().stream()
.reduce((a, b) -> a.getValue().lastHit < b.getValue().lastHit ? a : b)
.get().getKey());
}
store.put(key, new CachedCommentInfo(comment, json, index));
}
@Nullable
public CachedCommentInfo get(final String key) {
final CachedCommentInfo result = store.get(key);
if (result == null) {
return null;
}
result.lastHit = System.nanoTime();
return result;
}
public int size() {
return store.size();
}
public boolean isEmpty() {
return store.isEmpty();
}
public void clear() {
store.clear();
}
public final class CachedCommentInfo {
@Nonnull public final JsonObject comment;
@Nonnull public final JsonObject json;
public final int index;
private long lastHit = System.nanoTime();
private CachedCommentInfo(@Nonnull final JsonObject comment,
@Nonnull final JsonObject json,
final int index) {
this.comment = comment;
this.json = json;
this.index = index;
}
}
}

View File

@ -1,6 +1,7 @@
package org.schabi.newpipe.extractor.utils;
package org.schabi.newpipe.extractor.utils.cache;
import org.junit.jupiter.api.Test;
import org.schabi.newpipe.extractor.utils.cache.ManifestCreatorCache;
import static org.junit.jupiter.api.Assertions.assertEquals;

View File

@ -0,0 +1,83 @@
package org.schabi.newpipe.extractor.utils.cache;
import com.grack.nanojson.JsonObject;
import org.junit.jupiter.api.Test;
import static org.junit.jupiter.api.Assertions.*;
class SoundCloudCommentsCacheTest {
@Test
void testInstantiation() {
assertThrows(RuntimeException.class, () -> new SoundCloudCommentsCache(-15));
assertThrows(RuntimeException.class, () -> new SoundCloudCommentsCache(0));
assertDoesNotThrow(() -> new SoundCloudCommentsCache(1));
assertDoesNotThrow(() -> new SoundCloudCommentsCache(10));
}
@Test
void testSize() {
SoundCloudCommentsCache cache = new SoundCloudCommentsCache(10);
assertEquals(0, cache.size());
assertTrue(cache.isEmpty());
cache.put("a", new JsonObject(), new JsonObject(), 1);
assertEquals(1, cache.size());
cache.put("b", new JsonObject(), new JsonObject(), 1);
assertEquals(2, cache.size());
cache.put("c", new JsonObject(), new JsonObject(), 1);
assertEquals(3, cache.size());
cache.put("a", new JsonObject(), new JsonObject(), 1);
assertEquals(3, cache.size());
cache.put("b", new JsonObject(), new JsonObject(), 1);
assertEquals(3, cache.size());
cache.clear();
assertEquals(0, cache.size());
}
@Test
void testLRUStrategy() {
final SoundCloudCommentsCache cache = new SoundCloudCommentsCache(4);
cache.put("1", new JsonObject(), new JsonObject(), 1);
cache.put("2", new JsonObject(), new JsonObject(), 2);
cache.put("3", new JsonObject(), new JsonObject(), 3);
cache.put("4", new JsonObject(), new JsonObject(), 4);
cache.put("5", new JsonObject(), new JsonObject(), 5);
assertNull(cache.get("1"));
final SoundCloudCommentsCache.CachedCommentInfo cci = cache.get("2");
assertNotNull(cci);
cache.put("6", new JsonObject(), new JsonObject(), 6);
assertNotNull(cache.get("2"));
assertNull(cache.get("3"));
cache.put("7", new JsonObject(), new JsonObject(), 7);
cache.put("8", new JsonObject(), new JsonObject(), 8);
cache.put("9", new JsonObject(), new JsonObject(), 9);
assertNull(cache.get("1"));
assertNull(cache.get("3"));
assertNull(cache.get("4"));
assertNull(cache.get("5"));
assertNotNull(cache.get("2"));
}
@Test
void testStorage() {
final SoundCloudCommentsCache cache = new SoundCloudCommentsCache(10);
cache.put("1", new JsonObject(), new JsonObject(), 1);
cache.put("1", new JsonObject(), new JsonObject(), 2);
assertEquals(2, cache.get("1").index);
cache.put("1", new JsonObject(), new JsonObject(), 3);
assertEquals(3, cache.get("1").index);
}
@Test
void testClear() {
final SoundCloudCommentsCache cache = new SoundCloudCommentsCache(10);
cache.put("1", new JsonObject(), new JsonObject(), 1);
cache.put("2", new JsonObject(), new JsonObject(), 2);
cache.put("3", new JsonObject(), new JsonObject(), 3);
cache.put("4", new JsonObject(), new JsonObject(), 4);
cache.put("5", new JsonObject(), new JsonObject(), 5);
cache.clear();
assertTrue(cache.isEmpty());
assertEquals(0, cache.size());
}
}