From ba700bfb3e05752ea2e55ed57ce6973d0258a0e7 Mon Sep 17 00:00:00 2001 From: Fynn Godau Date: Sun, 22 Dec 2019 02:55:54 +0100 Subject: [PATCH] Add bandcamp playlists (albums) --- .../services/bandcamp/BandcampService.java | 6 +- .../BandcampChannelInfoItemExtractor.java | 2 + .../extractors/BandcampPlaylistExtractor.java | 133 ++++++++++++++++++ .../BandcampPlaylistInfoItemExtractor.java | 40 ++++++ .../extractors/BandcampSearchExtractor.java | 3 +- .../extractors/BandcampStreamExtractor.java | 10 +- .../BandcampStreamInfoItemExtractor.java | 28 ++-- .../BandcampPlaylistLinkHandlerFactory.java | 31 ++++ .../BandcampChannelExtractorTest.java | 2 + .../BandcampPlaylistExtractorTest.java | 29 ++++ ...andcampPlaylistLinkHandlerFactoryTest.java | 45 ++++++ .../bandcamp/BandcampSearchExtractorTest.java | 21 ++- 12 files changed, 330 insertions(+), 20 deletions(-) create mode 100644 extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampPlaylistExtractor.java create mode 100644 extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampPlaylistInfoItemExtractor.java create mode 100644 extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/linkHandler/BandcampPlaylistLinkHandlerFactory.java create mode 100644 extractor/src/test/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampPlaylistExtractorTest.java create mode 100644 extractor/src/test/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampPlaylistLinkHandlerFactoryTest.java diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampService.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampService.java index 4752e1f09..211f7310f 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampService.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampService.java @@ -11,9 +11,11 @@ import org.schabi.newpipe.extractor.linkhandler.*; import org.schabi.newpipe.extractor.playlist.PlaylistExtractor; import org.schabi.newpipe.extractor.search.SearchExtractor; import org.schabi.newpipe.extractor.services.bandcamp.extractors.BandcampChannelExtractor; +import org.schabi.newpipe.extractor.services.bandcamp.extractors.BandcampPlaylistExtractor; import org.schabi.newpipe.extractor.services.bandcamp.extractors.BandcampSearchExtractor; import org.schabi.newpipe.extractor.services.bandcamp.extractors.BandcampStreamExtractor; import org.schabi.newpipe.extractor.services.bandcamp.linkHandler.BandcampChannelLinkHandlerFactory; +import org.schabi.newpipe.extractor.services.bandcamp.linkHandler.BandcampPlaylistLinkHandlerFactory; import org.schabi.newpipe.extractor.services.bandcamp.linkHandler.BandcampSearchQueryHandlerFactory; import org.schabi.newpipe.extractor.services.bandcamp.linkHandler.BandcampStreamLinkHandlerFactory; import org.schabi.newpipe.extractor.stream.StreamExtractor; @@ -47,7 +49,7 @@ public class BandcampService extends StreamingService { @Override public ListLinkHandlerFactory getPlaylistLHFactory() { - return null; + return new BandcampPlaylistLinkHandlerFactory(); } @Override @@ -87,7 +89,7 @@ public class BandcampService extends StreamingService { @Override public PlaylistExtractor getPlaylistExtractor(ListLinkHandler linkHandler) throws ExtractionException { - return null; + return new BandcampPlaylistExtractor(this, linkHandler); } @Override diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampChannelInfoItemExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampChannelInfoItemExtractor.java index b5d851eab..33d999117 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampChannelInfoItemExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampChannelInfoItemExtractor.java @@ -1,3 +1,5 @@ +// Created by Fynn Godau 2019, licensed GNU GPL version 3 or later + package org.schabi.newpipe.extractor.services.bandcamp.extractors; import org.schabi.newpipe.extractor.channel.ChannelInfoItemExtractor; diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampPlaylistExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampPlaylistExtractor.java new file mode 100644 index 000000000..ac5e8998d --- /dev/null +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampPlaylistExtractor.java @@ -0,0 +1,133 @@ +package org.schabi.newpipe.extractor.services.bandcamp.extractors; + +import org.json.JSONArray; +import org.json.JSONException; +import org.json.JSONObject; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.schabi.newpipe.extractor.StreamingService; +import org.schabi.newpipe.extractor.downloader.Downloader; +import org.schabi.newpipe.extractor.exceptions.ExtractionException; +import org.schabi.newpipe.extractor.exceptions.ParsingException; +import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler; +import org.schabi.newpipe.extractor.playlist.PlaylistExtractor; +import org.schabi.newpipe.extractor.stream.StreamInfoItem; +import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector; + +import javax.annotation.Nonnull; +import java.io.IOException; + +import static org.schabi.newpipe.extractor.services.bandcamp.extractors.BandcampExtractorHelper.getJSONFromJavaScriptVariables; +import static org.schabi.newpipe.extractor.services.bandcamp.extractors.BandcampStreamExtractor.getAlbumInfoJson; + +public class BandcampPlaylistExtractor extends PlaylistExtractor { + + private Document document; + private JSONObject albumJson; + private JSONArray trackInfo; + private String name; + + public BandcampPlaylistExtractor(StreamingService service, ListLinkHandler linkHandler) { + super(service, linkHandler); + } + + @Override + public void onFetchPage(@Nonnull Downloader downloader) throws IOException, ExtractionException { + String html = downloader.get(getLinkHandler().getUrl()).responseBody(); + document = Jsoup.parse(html); + albumJson = getAlbumInfoJson(html); + trackInfo = albumJson.getJSONArray("trackinfo"); + + try { + name = getJSONFromJavaScriptVariables(html, "EmbedData").getString("album_title"); + } catch (JSONException e) { + throw new ParsingException("Faulty JSON; page likely does not contain album data", e); + } catch (ArrayIndexOutOfBoundsException e) { + throw new ParsingException("JSON does not exist", e); + } + + + + if (trackInfo.length() <= 1) { + // In this case, we are actually viewing a track page! + throw new ExtractionException("Page is actually a track, not an album"); + } + } + + @Override + public String getThumbnailUrl() throws ParsingException { + try { + return document.getElementsByAttributeValue("property", "og:image").get(0).attr("content"); + } catch (NullPointerException e) { + return ""; + } + } + + @Override + public String getBannerUrl() { + return ""; + } + + @Override + public String getUploaderUrl() throws ParsingException { + String[] parts = getUrl().split("/"); + // https: (/) (/) * .bandcamp.com (/) and leave out the rest + return "https://" + parts[2] + "/"; + } + + @Override + public String getUploaderName() { + return albumJson.getString("artist"); + } + + @Override + public String getUploaderAvatarUrl() { + try { + return document.getElementsByClass("band-photo").first().attr("src"); + } catch (NullPointerException e) { + return ""; + } + } + + @Override + public long getStreamCount() { + return trackInfo.length(); + } + + @Nonnull + @Override + public InfoItemsPage getInitialPage() throws ExtractionException { + + StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId()); + + for (int i = 0; i < trackInfo.length(); i++) { + JSONObject track = trackInfo.getJSONObject(i); + + collector.commit(new BandcampStreamInfoItemExtractor( + track.getString("title"), + getUploaderUrl() + track.getString("title_link"), + "", + "", + track.getLong("duration") + )); + } + + return new InfoItemsPage<>(collector, null); + } + + @Nonnull + @Override + public String getName() throws ParsingException { + return name; + } + + @Override + public String getNextPageUrl() { + return null; + } + + @Override + public InfoItemsPage getPage(String pageUrl) { + return null; + } +} diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampPlaylistInfoItemExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampPlaylistInfoItemExtractor.java new file mode 100644 index 000000000..11aea8ea4 --- /dev/null +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampPlaylistInfoItemExtractor.java @@ -0,0 +1,40 @@ +package org.schabi.newpipe.extractor.services.bandcamp.extractors; + +import org.schabi.newpipe.extractor.playlist.PlaylistInfoItemExtractor; + +public class BandcampPlaylistInfoItemExtractor implements PlaylistInfoItemExtractor { + + private String title, artist, url, cover; + + public BandcampPlaylistInfoItemExtractor(String title, String artist, String url, String cover) { + this.title = title; + this.artist = artist; + this.url = url; + this.cover = cover; + } + + @Override + public String getUploaderName() { + return artist; + } + + @Override + public long getStreamCount() { + return -1; + } + + @Override + public String getName() { + return title; + } + + @Override + public String getUrl() { + return url; + } + + @Override + public String getThumbnailUrl() { + return cover; + } +} diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampSearchExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampSearchExtractor.java index fc89002fc..38891c004 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampSearchExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampSearchExtractor.java @@ -74,8 +74,7 @@ public class BandcampSearchExtractor extends SearchExtractor { case "ALBUM": String artist = subhead.split(" by")[0]; - //searchResults.add(new Album(heading, artist, url, image)); - //collector.commit Playlist with heading, artist, url, image + collector.commit(new BandcampPlaylistInfoItemExtractor(heading, artist, url, image)); break; case "TRACK": diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampStreamExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampStreamExtractor.java index be4c03729..72c3a551c 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampStreamExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampStreamExtractor.java @@ -50,7 +50,7 @@ public class BandcampStreamExtractor extends StreamExtractor { * * @param html Website * @return Album metadata JSON - * @throws ParsingException In case of a faulty website + * @throws ParsingException In case of a faulty website */ public static JSONObject getAlbumInfoJson(String html) throws ParsingException { try { @@ -103,7 +103,11 @@ public class BandcampStreamExtractor extends StreamExtractor { @Nonnull @Override public String getThumbnailUrl() throws ParsingException { - return document.getElementsByAttributeValue("property", "og:image").get(0).attr("content"); + try { + return document.getElementsByAttributeValue("property", "og:image").get(0).attr("content"); + } catch (NullPointerException e) { + return ""; + } } @Nonnull @@ -188,7 +192,7 @@ public class BandcampStreamExtractor extends StreamExtractor { audioStreams.add(new AudioStream( albumJson.getJSONArray("trackinfo").getJSONObject(0) - .getJSONObject("file").getString("mp3-128"), + .getJSONObject("file").getString("mp3-128"), MediaFormat.MP3, 128 )); return audioStreams; diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampStreamInfoItemExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampStreamInfoItemExtractor.java index 6cb96411e..4f2a5724f 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampStreamInfoItemExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampStreamInfoItemExtractor.java @@ -15,48 +15,54 @@ public class BandcampStreamInfoItemExtractor implements StreamInfoItemExtractor private String url; private String cover; private String artist; + private long duration; public BandcampStreamInfoItemExtractor(String title, String url, String cover, String artist) { + this(title, url, cover, artist, -1); + } + + public BandcampStreamInfoItemExtractor(String title, String url, String cover, String artist, long duration) { this.title = title; this.url = url; this.cover = cover; this.artist = artist; + this.duration = duration; } @Override - public StreamType getStreamType() throws ParsingException { + public StreamType getStreamType() { return StreamType.AUDIO_STREAM; } @Override - public long getDuration() throws ParsingException { + public long getDuration() { + return duration; + } + + @Override + public long getViewCount() { return -1; } @Override - public long getViewCount() throws ParsingException { - return -1; - } - - @Override - public String getUploaderName() throws ParsingException { + public String getUploaderName() { return artist; } @Override - public String getUploaderUrl() throws ParsingException { + public String getUploaderUrl() { return null; } @Nullable @Override - public String getTextualUploadDate() throws ParsingException { + public String getTextualUploadDate() { return null; // TODO } @Nullable @Override - public DateWrapper getUploadDate() throws ParsingException { + public DateWrapper getUploadDate() { return null; } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/linkHandler/BandcampPlaylistLinkHandlerFactory.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/linkHandler/BandcampPlaylistLinkHandlerFactory.java new file mode 100644 index 000000000..e7aac43e2 --- /dev/null +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/linkHandler/BandcampPlaylistLinkHandlerFactory.java @@ -0,0 +1,31 @@ +// Created by Fynn Godau 2019, licensed GNU GPL version 3 or later + +package org.schabi.newpipe.extractor.services.bandcamp.linkHandler; + +import org.schabi.newpipe.extractor.exceptions.ParsingException; +import org.schabi.newpipe.extractor.linkhandler.ListLinkHandlerFactory; + +import java.util.List; + +/** + * Just as with streams, the album ids are essentially useless for us. + */ +public class BandcampPlaylistLinkHandlerFactory extends ListLinkHandlerFactory { + @Override + public String getId(String url) throws ParsingException { + return getUrl(url); + } + + @Override + public String getUrl(String url, List contentFilter, String sortFilter) throws ParsingException { + if (url.endsWith("/")) + url = url.substring(0, url.length() - 1); + url = url.replace("http://", "https://").toLowerCase(); + return url; + } + + @Override + public boolean onAcceptUrl(String url) throws ParsingException { + return getUrl(url).matches("https?://.+\\..+/album/.+"); + } +} diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampChannelExtractorTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampChannelExtractorTest.java index d924b4fb2..850529774 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampChannelExtractorTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampChannelExtractorTest.java @@ -1,3 +1,5 @@ +// Created by Fynn Godau 2019, licensed GNU GPL version 3 or later + package org.schabi.newpipe.extractor.services.bandcamp; import org.junit.BeforeClass; diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampPlaylistExtractorTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampPlaylistExtractorTest.java new file mode 100644 index 000000000..e7482dfb5 --- /dev/null +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampPlaylistExtractorTest.java @@ -0,0 +1,29 @@ +package org.schabi.newpipe.extractor.services.bandcamp; + +import org.junit.BeforeClass; +import org.junit.Test; +import org.schabi.newpipe.DownloaderTestImpl; +import org.schabi.newpipe.extractor.NewPipe; +import org.schabi.newpipe.extractor.exceptions.ExtractionException; +import org.schabi.newpipe.extractor.playlist.PlaylistExtractor; + +import java.io.IOException; + +import static org.junit.Assert.assertEquals; +import static org.schabi.newpipe.extractor.ServiceList.bandcamp; + +public class BandcampPlaylistExtractorTest { + + @BeforeClass + public static void setUp() { + NewPipe.init(DownloaderTestImpl.getInstance()); + } + + @Test + public void testCount() throws ExtractionException, IOException { + PlaylistExtractor extractor = bandcamp.getPlaylistExtractor("https://macbenson.bandcamp.com/album/coming-of-age"); + extractor.fetchPage(); + + assertEquals(5, extractor.getStreamCount()); + } +} diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampPlaylistLinkHandlerFactoryTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampPlaylistLinkHandlerFactoryTest.java new file mode 100644 index 000000000..1da3069c9 --- /dev/null +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampPlaylistLinkHandlerFactoryTest.java @@ -0,0 +1,45 @@ +// Created by Fynn Godau 2019, licensed GNU GPL version 3 or later + +package org.schabi.newpipe.extractor.services.bandcamp; + +import org.junit.BeforeClass; +import org.junit.Test; +import org.schabi.newpipe.DownloaderTestImpl; +import org.schabi.newpipe.extractor.NewPipe; +import org.schabi.newpipe.extractor.exceptions.ParsingException; +import org.schabi.newpipe.extractor.services.bandcamp.linkHandler.BandcampPlaylistLinkHandlerFactory; +import org.schabi.newpipe.extractor.services.bandcamp.linkHandler.BandcampStreamLinkHandlerFactory; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +/** + * Test for {@link BandcampPlaylistLinkHandlerFactory} + */ +public class BandcampPlaylistLinkHandlerFactoryTest { + + private static BandcampPlaylistLinkHandlerFactory linkHandler; + + @BeforeClass + public static void setUp() { + linkHandler = new BandcampPlaylistLinkHandlerFactory(); + NewPipe.init(DownloaderTestImpl.getInstance()); + } + + @Test + public void testAcceptUrl() throws ParsingException { + // Tests expecting false + assertFalse(linkHandler.acceptUrl("http://interovgm.com/releases/")); + assertFalse(linkHandler.acceptUrl("https://interovgm.com/releases")); + assertFalse(linkHandler.acceptUrl("http://zachbenson.bandcamp.com")); + assertFalse(linkHandler.acceptUrl("https://bandcamp.com")); + assertFalse(linkHandler.acceptUrl("https://zachbenson.bandcamp.com/")); + assertFalse(linkHandler.acceptUrl("https://zachbenson.bandcamp.com/track/kitchen")); + assertFalse(linkHandler.acceptUrl("https://interovgm.com/track/title")); + + // Tests expecting true + assertTrue(linkHandler.acceptUrl("https://powertothequeerkids.bandcamp.com/album/power-to-the-queer-kids")); + assertTrue(linkHandler.acceptUrl("https://zachbenson.bandcamp.com/album/prom")); + } + +} diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampSearchExtractorTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampSearchExtractorTest.java index 5780e1612..98bdc843f 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampSearchExtractorTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampSearchExtractorTest.java @@ -38,7 +38,7 @@ public class BandcampSearchExtractorTest { * the accordingly named song by Zach Benson */ @Test - public void testBestFriendsBasement() throws ExtractionException, IOException { + public void testStreamSearch() throws ExtractionException, IOException { SearchExtractor extractor = bandcamp.getSearchExtractor("best friend's basement"); ListExtractor.InfoItemsPage page = extractor.getInitialPage(); @@ -55,7 +55,7 @@ public class BandcampSearchExtractorTest { * Tests whether searching bandcamp for "C418" returns the artist's profile */ @Test - public void testC418() throws ExtractionException, IOException { + public void testChannelSearch() throws ExtractionException, IOException { SearchExtractor extractor = bandcamp.getSearchExtractor("C418"); InfoItem c418 = extractor.getInitialPage() .getItems().get(0); @@ -67,4 +67,21 @@ public class BandcampSearchExtractorTest { assertEquals("https://c418.bandcamp.com", c418.getUrl()); } + + /** + * Tests whether searching bandcamp for "minecraft volume alpha" returns the corresponding album + */ + @Test + public void testAlbumSearch() throws ExtractionException, IOException { + SearchExtractor extractor = bandcamp.getSearchExtractor("minecraft volume alpha"); + InfoItem minecraft = extractor.getInitialPage() + .getItems().get(0); + + // C418's artist profile should be the first result, no? + assertEquals("Minecraft - Volume Alpha", minecraft.getName()); + assertTrue(minecraft.getThumbnailUrl().endsWith(".jpg")); + assertTrue(minecraft.getThumbnailUrl().contains("f4.bcbits.com/img/")); + assertEquals("https://c418.bandcamp.com/album/minecraft-volume-alpha", minecraft.getUrl()); + + } }