From a42c77425dfc5165a75546a1b9822789d3623de2 Mon Sep 17 00:00:00 2001 From: Fynn Godau Date: Sun, 22 Dec 2019 00:42:26 +0100 Subject: [PATCH] Bandcamp channel extractor (ignoring everything but tracks) --- .../services/bandcamp/BandcampService.java | 3 +- .../extractors/BandcampChannelExtractor.java | 155 ++++++++++++++++++ .../extractors/BandcampExtractorHelper.java | 25 ++- .../extractors/BandcampSearchExtractor.java | 4 +- .../extractors/BandcampStreamExtractor.java | 2 + .../BandcampStreamInfoItemExtractor.java | 4 +- .../BandcampChannelLinkHandlerFactory.java | 28 +--- .../BandcampStreamLinkHandlerFactory.java | 4 +- .../BandcampChannelExtractorTest.java | 42 +++++ ...BandcampChannelLinkHandlerFactoryTest.java | 10 ++ .../bandcamp/BandcampStreamExtractorTest.java | 5 + 11 files changed, 252 insertions(+), 30 deletions(-) create mode 100644 extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampChannelExtractor.java create mode 100644 extractor/src/test/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampChannelExtractorTest.java diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampService.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampService.java index 86e1f7dad..4752e1f09 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampService.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampService.java @@ -10,6 +10,7 @@ import org.schabi.newpipe.extractor.kiosk.KioskList; import org.schabi.newpipe.extractor.linkhandler.*; import org.schabi.newpipe.extractor.playlist.PlaylistExtractor; import org.schabi.newpipe.extractor.search.SearchExtractor; +import org.schabi.newpipe.extractor.services.bandcamp.extractors.BandcampChannelExtractor; import org.schabi.newpipe.extractor.services.bandcamp.extractors.BandcampSearchExtractor; import org.schabi.newpipe.extractor.services.bandcamp.extractors.BandcampStreamExtractor; import org.schabi.newpipe.extractor.services.bandcamp.linkHandler.BandcampChannelLinkHandlerFactory; @@ -81,7 +82,7 @@ public class BandcampService extends StreamingService { @Override public ChannelExtractor getChannelExtractor(ListLinkHandler linkHandler) throws ExtractionException { - return null; + return new BandcampChannelExtractor(this, linkHandler); } @Override diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampChannelExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampChannelExtractor.java new file mode 100644 index 000000000..bb39ca1d0 --- /dev/null +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampChannelExtractor.java @@ -0,0 +1,155 @@ +// Created by Fynn Godau 2019, licensed GNU GPL version 3 or later + +package org.schabi.newpipe.extractor.services.bandcamp.extractors; + +import org.json.JSONArray; +import org.json.JSONObject; +import org.jsoup.nodes.Document; +import org.schabi.newpipe.extractor.NewPipe; +import org.schabi.newpipe.extractor.StreamingService; +import org.schabi.newpipe.extractor.channel.ChannelExtractor; +import org.schabi.newpipe.extractor.downloader.Downloader; +import org.schabi.newpipe.extractor.exceptions.ExtractionException; +import org.schabi.newpipe.extractor.exceptions.ParsingException; +import org.schabi.newpipe.extractor.exceptions.ReCaptchaException; +import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler; +import org.schabi.newpipe.extractor.stream.StreamInfoItem; +import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector; + +import javax.annotation.Nonnull; +import java.io.IOException; + +public class BandcampChannelExtractor extends ChannelExtractor { + + private JSONObject channelInfo; + + public BandcampChannelExtractor(StreamingService service, ListLinkHandler linkHandler) throws ParsingException { + super(service, linkHandler); + + channelInfo = getArtistDetails(getId()); + } + + /** + * Fetch artist details from mobile endpoint. + * + * I once took a moment to note down how it works. + */ + public static JSONObject getArtistDetails(String id) throws ParsingException { + try { + return + new JSONObject( + NewPipe.getDownloader().post( + "https://bandcamp.com/api/mobile/22/band_details", + null, + ("{\"band_id\":\"" + id + "\"}").getBytes() + ).responseBody() + ); + } catch (IOException | ReCaptchaException e) { + throw new ParsingException("Could not download band details", e); + } + } + + /** + * @param id The image ID + * @param album Whether this is the cover of an album + * @return Url of image with this ID in size 10 which is 1200x1200 (we could also choose size 0 + * but we don't want something as large as 3460x3460 here, do we?) + */ + public static String getImageUrl(long id, boolean album) { + return "https://f4.bcbits.com/img/" + (album ? 'a' : "") + id + "_10.jpg"; + } + + @Override + public String getAvatarUrl() { + return getImageUrl(channelInfo.getLong("bio_image_id"), false); + } + + /** + * Why does the mobile endpoint not contain the header?? Or at least not the same one? + * Anyway we're back to querying websites + */ + @Override + public String getBannerUrl() throws ParsingException { + try { + String html = getDownloader().get(channelInfo.getString("bandcamp_url")) + .responseBody(); + + return new Document(html).getElementById("customHeader") + .getElementsByTag("img") + .attr("src"); + + } catch (IOException | ReCaptchaException e) { + throw new ParsingException("Could not download artist web site", e); + } + } + + /** + * I had to learn bandcamp stopped providing RSS feeds when appending /feed to any URL + * because too few people used it. Bummer! + */ + @Override + public String getFeedUrl() { + return null; + } + + @Override + public long getSubscriberCount() { + return -1; + } + + @Override + public String getDescription() { + return channelInfo.getString("bio"); + } + + @Nonnull + @Override + public InfoItemsPage getInitialPage() throws ParsingException { + + StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId()); + + JSONArray discography = channelInfo.getJSONArray("discography"); + + for (int i = 0; i < discography.length(); i++) { + // I define discograph as an item that can appear in a discography + JSONObject discograph = discography.getJSONObject(i); + + if (!discograph.getString("item_type").equals("track")) continue; + + collector.commit(new BandcampStreamInfoItemExtractor( + discograph.getString("title"), + BandcampExtractorHelper.getStreamUrlFromIds( + discograph.getLong("band_id"), + discograph.getLong("item_id"), + discograph.getString("item_type") + ), + getImageUrl( + discograph.getLong("art_id"), true + ), + discograph.getString("band_name") + )); + } + + return new InfoItemsPage<>(collector, null); + } + + @Override + public String getNextPageUrl() { + return null; + } + + @Override + public InfoItemsPage getPage(String pageUrl) { + return null; + } + + @Override + public void onFetchPage(@Nonnull Downloader downloader) throws IOException, ExtractionException { + } + + @Nonnull + @Override + public String getName() { + return channelInfo.getString("name"); + } +} diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampExtractorHelper.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampExtractorHelper.java index 98ad88923..b1668641c 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampExtractorHelper.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampExtractorHelper.java @@ -4,8 +4,11 @@ package org.schabi.newpipe.extractor.services.bandcamp.extractors; import org.json.JSONException; import org.json.JSONObject; +import org.schabi.newpipe.extractor.NewPipe; import org.schabi.newpipe.extractor.exceptions.ParsingException; +import org.schabi.newpipe.extractor.exceptions.ReCaptchaException; +import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; @@ -21,7 +24,7 @@ public class BandcampExtractorHelper { * @param variable Name of the variable * @return The JsonObject stored in the variable with this name */ - public static JSONObject getJSONFromJavaScriptVariables(String html, String variable) throws JSONException, ParsingException { + public static JSONObject getJSONFromJavaScriptVariables(String html, String variable) throws JSONException, ArrayIndexOutOfBoundsException, ParsingException { String[] part = html.split("var " + variable + " = "); @@ -52,6 +55,26 @@ public class BandcampExtractorHelper { throw new ParsingException("Unexpected HTML: JSON never ends"); } + /** + * Translate all these parameters together to the URL of the corresponding album or track + * using the mobile api + */ + public static String getStreamUrlFromIds(long bandId, long itemId, String itemType) throws ParsingException { + + try { + String html = NewPipe.getDownloader().get( + "https://bandcamp.com/api/mobile/22/tralbum_details?band_id=" + bandId + + "&tralbum_id=" + itemId + "&tralbum_type=" + itemType.substring(0, 1)) + .responseBody(); + + return new JSONObject(html).getString("bandcamp_url").replace("http://", "https://"); + + } catch (JSONException | ReCaptchaException | IOException e) { + throw new ParsingException("Ids could not be translated to URL", e); + } + + } + /** * Concatenate all non-null and non-empty strings together while separating them using * the comma parameter diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampSearchExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampSearchExtractor.java index 1b49ff70a..34f52af1e 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampSearchExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampSearchExtractor.java @@ -88,14 +88,12 @@ public class BandcampSearchExtractor extends SearchExtractor { break; case "TRACK": - String album = subhead.split("from ")[0].split(" by")[0]; - String[] splitBy = subhead.split(" by"); String artist1 = null; if (splitBy.length > 1) { artist1 = subhead.split(" by")[1]; } - collector.commit(new BandcampStreamInfoItemExtractor(heading, url, image, artist1, album)); + collector.commit(new BandcampStreamInfoItemExtractor(heading, url, image, artist1)); break; } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampStreamExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampStreamExtractor.java index d9253b600..a8fa30004 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampStreamExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampStreamExtractor.java @@ -57,6 +57,8 @@ public class BandcampStreamExtractor extends StreamExtractor { return BandcampExtractorHelper.getJSONFromJavaScriptVariables(html, "TralbumData"); } catch (JSONException e) { throw new ParsingException("Faulty JSON; page likely does not contain album data", e); + } catch (ArrayIndexOutOfBoundsException e) { + throw new ParsingException("JSON does not exist", e); } } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampStreamInfoItemExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampStreamInfoItemExtractor.java index d1d965797..6cb96411e 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampStreamInfoItemExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampStreamInfoItemExtractor.java @@ -15,14 +15,12 @@ public class BandcampStreamInfoItemExtractor implements StreamInfoItemExtractor private String url; private String cover; private String artist; - private String albumName; - public BandcampStreamInfoItemExtractor(String title, String url, String cover, String artist, String albumName) { + public BandcampStreamInfoItemExtractor(String title, String url, String cover, String artist) { this.title = title; this.url = url; this.cover = cover; this.artist = artist; - this.albumName = albumName; } @Override diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/linkHandler/BandcampChannelLinkHandlerFactory.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/linkHandler/BandcampChannelLinkHandlerFactory.java index e4521ffb0..0d57eac01 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/linkHandler/BandcampChannelLinkHandlerFactory.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/linkHandler/BandcampChannelLinkHandlerFactory.java @@ -2,15 +2,14 @@ package org.schabi.newpipe.extractor.services.bandcamp.linkHandler; +import org.json.JSONException; import org.json.JSONObject; import org.schabi.newpipe.extractor.NewPipe; -import org.schabi.newpipe.extractor.exceptions.ExtractionException; import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.exceptions.ReCaptchaException; import org.schabi.newpipe.extractor.linkhandler.ListLinkHandlerFactory; +import org.schabi.newpipe.extractor.services.bandcamp.extractors.BandcampChannelExtractor; import org.schabi.newpipe.extractor.services.bandcamp.extractors.BandcampExtractorHelper; -import org.schabi.newpipe.extractor.services.bandcamp.extractors.BandcampStreamExtractor; -import org.schabi.newpipe.extractor.utils.ExtractorHelper; import java.io.IOException; import java.util.List; @@ -31,35 +30,24 @@ public class BandcampChannelLinkHandlerFactory extends ListLinkHandlerFactory { return String.valueOf(bandData.getLong("id")); - } catch (IOException | ReCaptchaException e) { + } catch (IOException | ReCaptchaException | ArrayIndexOutOfBoundsException e) { throw new ParsingException("Download failed", e); } } /** - * Fetch artist details from mobile endpoint, thereby receiving their URL. - * - * I once took a moment to note down how it works. - * - * @throws ParsingException + * Uses the mobile endpoint as a "translator" from id to url */ @Override public String getUrl(String id, List contentFilter, String sortFilter) throws ParsingException { try { - String data = NewPipe.getDownloader().post( - "https://bandcamp.com/api/mobile/22/band_details", - null, - ("{\"band_id\":\"" + id + "\"}").getBytes() - ).responseBody(); - - return new JSONObject(data) + return BandcampChannelExtractor.getArtistDetails(id) .getString("bandcamp_url") .replace("http://", "https://"); - - - } catch (IOException | ReCaptchaException e) { - throw new ParsingException("Download failed", e); + } catch (JSONException e) { + throw new ParsingException("JSON does not contain URL (invalid id?) or is otherwise invalid", e); } + } /** diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/linkHandler/BandcampStreamLinkHandlerFactory.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/linkHandler/BandcampStreamLinkHandlerFactory.java index e534f925f..0536594e1 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/linkHandler/BandcampStreamLinkHandlerFactory.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/linkHandler/BandcampStreamLinkHandlerFactory.java @@ -6,8 +6,8 @@ import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.linkhandler.LinkHandlerFactory; /** - * Tracks do have IDs, but they are not really useful. That's why id = url. - * Instead, URLs are cleaned up so that they always look the same. + * Tracks don't have standalone ids, they are always in combination with the band id. + * That's why id = url. Instead, URLs are cleaned up so that they always look the same. */ public class BandcampStreamLinkHandlerFactory extends LinkHandlerFactory { diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampChannelExtractorTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampChannelExtractorTest.java new file mode 100644 index 000000000..b39ae4f2f --- /dev/null +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampChannelExtractorTest.java @@ -0,0 +1,42 @@ +package org.schabi.newpipe.extractor.services.bandcamp; + +import org.junit.BeforeClass; +import org.junit.Test; +import org.schabi.newpipe.DownloaderTestImpl; +import org.schabi.newpipe.extractor.NewPipe; +import org.schabi.newpipe.extractor.exceptions.ParsingException; +import org.schabi.newpipe.extractor.services.bandcamp.extractors.BandcampChannelExtractor; +import org.schabi.newpipe.extractor.services.bandcamp.extractors.BandcampExtractorHelper; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.schabi.newpipe.extractor.ServiceList.bandcamp; + +public class BandcampChannelExtractorTest { + + private static BandcampChannelExtractor extractor; + + @BeforeClass + public static void setUp() throws Exception { + NewPipe.init(DownloaderTestImpl.getInstance()); + extractor = (BandcampChannelExtractor) bandcamp + .getChannelExtractor("https://zachbenson.bandcamp.com/"); + } + + @Test + public void testImageUrl() { + assertEquals("https://f4.bcbits.com/img/a2405652335_10.jpg", BandcampChannelExtractor.getImageUrl(2405652335L, true)); + assertEquals("https://f4.bcbits.com/img/17433693_10.jpg", BandcampChannelExtractor.getImageUrl(17433693L, false)); + } + + @Test + public void testTranslateIdsToUrl() throws ParsingException { + assertEquals("https://zachbenson.bandcamp.com/album/covers", BandcampExtractorHelper.getStreamUrlFromIds(2862267535L, 2063639444L, "album")); + // TODO write more test cases + } + + @Test + public void testLength() throws ParsingException { + assertTrue(extractor.getInitialPage().getItems().size() > 2); + } +} diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampChannelLinkHandlerFactoryTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampChannelLinkHandlerFactoryTest.java index 7e46a096b..191f096c0 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampChannelLinkHandlerFactoryTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampChannelLinkHandlerFactoryTest.java @@ -51,4 +51,14 @@ public class BandcampChannelLinkHandlerFactoryTest { assertEquals("https://infiniteammo.bandcamp.com", linkHandler.getUrl("3321800855")); } + @Test(expected = ParsingException.class) + public void testGetUrlWithInvalidId() throws ParsingException { + linkHandler.getUrl("0"); + } + + @Test(expected = ParsingException.class) + public void testGetIdWithInvalidUrl() throws ParsingException { + linkHandler.getId("https://bandcamp.com"); + } + } diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampStreamExtractorTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampStreamExtractorTest.java index bcd925fcf..49b7c6f15 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampStreamExtractorTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampStreamExtractorTest.java @@ -67,4 +67,9 @@ public class BandcampStreamExtractorTest { assertEquals(1, extractor.getAudioStreams().size()); } + @Test(expected = ParsingException.class) + public void testInvalidUrl() throws ExtractionException { + bandcamp.getStreamExtractor("https://bandcamp.com"); + } + }