mirror of
https://github.com/TeamNewPipe/NewPipeExtractor.git
synced 2024-12-14 22:30:33 +05:30
Rework link handlers to correctly accept external websites
This commit is contained in:
parent
be562b8436
commit
04dd3d4d32
@ -123,6 +123,28 @@ public class BandcampExtractorHelper {
|
|||||||
return "https://f4.bcbits.com/img/" + (album ? 'a' : "") + id + "_10.jpg";
|
return "https://f4.bcbits.com/img/" + (album ? 'a' : "") + id + "_10.jpg";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return <code>true</code> if the given url looks like it comes from a bandcamp custom domain
|
||||||
|
* or if it comes from bandcamp.com itself
|
||||||
|
*/
|
||||||
|
public static boolean isSupportedDomain(final String url) throws ParsingException {
|
||||||
|
|
||||||
|
// Accept all bandcamp.com URLs
|
||||||
|
if (url.toLowerCase().matches("https?://.+\\.bandcamp\\.com(/.*)?")) return true;
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Accept all other URLs if they contain a <meta> tag that says they are generated by bandcamp
|
||||||
|
return Jsoup.parse(
|
||||||
|
NewPipe.getDownloader().get(url).responseBody()
|
||||||
|
)
|
||||||
|
.getElementsByAttributeValue("name", "generator")
|
||||||
|
.attr("content").equals("Bandcamp");
|
||||||
|
} catch (IOException | ReCaptchaException e) {
|
||||||
|
throw new ParsingException("Could not determine whether URL is custom domain " +
|
||||||
|
"(not available? network error?)");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static DateWrapper parseDate(final String textDate) throws ParsingException {
|
static DateWrapper parseDate(final String textDate) throws ParsingException {
|
||||||
try {
|
try {
|
||||||
final Date date = new SimpleDateFormat("dd MMM yyyy HH:mm:ss zzz", Locale.ENGLISH).parse(textDate);
|
final Date date = new SimpleDateFormat("dd MMM yyyy HH:mm:ss zzz", Locale.ENGLISH).parse(textDate);
|
||||||
|
@ -24,7 +24,7 @@ public class BandcampChannelLinkHandlerFactory extends ListLinkHandlerFactory {
|
|||||||
try {
|
try {
|
||||||
final String response = NewPipe.getDownloader().get(url).responseBody();
|
final String response = NewPipe.getDownloader().get(url).responseBody();
|
||||||
|
|
||||||
// This variable contains band data!
|
// Use band data embedded in website to extract ID
|
||||||
final JsonObject bandData = BandcampExtractorHelper.getJsonData(response, "data-band");
|
final JsonObject bandData = BandcampExtractorHelper.getJsonData(response, "data-band");
|
||||||
|
|
||||||
return String.valueOf(bandData.getLong("id"));
|
return String.valueOf(bandData.getLong("id"));
|
||||||
@ -51,17 +51,15 @@ public class BandcampChannelLinkHandlerFactory extends ListLinkHandlerFactory {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Matches <code>* .bandcamp.com</code> as well as custom domains
|
* Accepts only pages that do not lead to an album or track. Supports external pages.
|
||||||
* where the profile is at <code>* . * /releases</code>
|
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public boolean onAcceptUrl(final String url) {
|
public boolean onAcceptUrl(final String url) throws ParsingException {
|
||||||
|
|
||||||
// Is a subdomain of bandcamp.com?
|
// Exclude URLs that lead to a track or album
|
||||||
boolean isBandcampComArtistPage = url.matches("https?://.+\\.bandcamp\\.com/?");
|
if (url.matches(".*/(album|track)/.*")) return false;
|
||||||
|
|
||||||
boolean isCustomDomainReleases = url.matches("https?://.+\\..+/releases/?(?!.)");
|
// Test whether domain is supported
|
||||||
|
return BandcampExtractorHelper.isSupportedDomain(url);
|
||||||
return isBandcampComArtistPage || isCustomDomainReleases;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -4,6 +4,7 @@ package org.schabi.newpipe.extractor.services.bandcamp.linkHandler;
|
|||||||
|
|
||||||
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||||
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandlerFactory;
|
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandlerFactory;
|
||||||
|
import org.schabi.newpipe.extractor.services.bandcamp.extractors.BandcampExtractorHelper;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
@ -22,8 +23,16 @@ public class BandcampPlaylistLinkHandlerFactory extends ListLinkHandlerFactory {
|
|||||||
return url;
|
return url;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Accepts all bandcamp URLs that contain /album/ behind their domain name.
|
||||||
|
*/
|
||||||
@Override
|
@Override
|
||||||
public boolean onAcceptUrl(final String url) {
|
public boolean onAcceptUrl(final String url) throws ParsingException {
|
||||||
return url.toLowerCase().matches("https?://.+\\..+/album/.+");
|
|
||||||
|
// Exclude URLs which do not lead to an album
|
||||||
|
if (!url.toLowerCase().matches("https?://.+\\..+/album/.+")) return false;
|
||||||
|
|
||||||
|
// Test whether domain is supported
|
||||||
|
return BandcampExtractorHelper.isSupportedDomain(url);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -4,6 +4,7 @@ package org.schabi.newpipe.extractor.services.bandcamp.linkHandler;
|
|||||||
|
|
||||||
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||||
import org.schabi.newpipe.extractor.linkhandler.LinkHandlerFactory;
|
import org.schabi.newpipe.extractor.linkhandler.LinkHandlerFactory;
|
||||||
|
import org.schabi.newpipe.extractor.services.bandcamp.extractors.BandcampExtractorHelper;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* <p>Tracks don't have standalone ids, they are always in combination with the band id.
|
* <p>Tracks don't have standalone ids, they are always in combination with the band id.
|
||||||
@ -40,16 +41,19 @@ public class BandcampStreamLinkHandlerFactory extends LinkHandlerFactory {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Sometimes, the root page of an artist is also an album or track
|
* Accepts URLs that point to a bandcamp radio show or that are a bandcamp
|
||||||
* page. In that case, it is assumed that one actually wants to open
|
* domain and point to a track.
|
||||||
* the profile and not the track it has set as the default one.
|
|
||||||
* <p>Urls are expected to be in this format to account for
|
|
||||||
* custom domains:</p>
|
|
||||||
* <code>https:// * . * /track/ *</code>
|
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public boolean onAcceptUrl(final String url) {
|
public boolean onAcceptUrl(final String url) throws ParsingException {
|
||||||
return url.toLowerCase().matches("https?://.+\\..+/track/.+")
|
|
||||||
|| url.toLowerCase().matches("https?://bandcamp\\.com/\\?show=\\d+");
|
// Accept Bandcamp radio
|
||||||
|
if (url.toLowerCase().matches("https?://bandcamp\\.com/\\?show=\\d+")) return true;
|
||||||
|
|
||||||
|
// Don't accept URLs that don't point to a track
|
||||||
|
if (!url.toLowerCase().matches("https?://.+\\..+/track/.+")) return false;
|
||||||
|
|
||||||
|
// Test whether domain is supported
|
||||||
|
return BandcampExtractorHelper.isSupportedDomain(url);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -26,13 +26,19 @@ public class BandcampChannelLinkHandlerFactoryTest {
|
|||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testAcceptUrl() throws ParsingException {
|
public void testAcceptUrl() throws ParsingException {
|
||||||
assertTrue(linkHandler.acceptUrl("http://interovgm.com/releases/"));
|
// Bandcamp URLs
|
||||||
assertTrue(linkHandler.acceptUrl("https://interovgm.com/releases"));
|
|
||||||
assertTrue(linkHandler.acceptUrl("http://zachbenson.bandcamp.com"));
|
assertTrue(linkHandler.acceptUrl("http://zachbenson.bandcamp.com"));
|
||||||
assertTrue(linkHandler.acceptUrl("https://zachbenson.bandcamp.com/"));
|
assertTrue(linkHandler.acceptUrl("https://zachbenson.bandcamp.com/"));
|
||||||
|
assertTrue(linkHandler.acceptUrl("https://billwurtz.bandcamp.com/releases"));
|
||||||
|
|
||||||
assertFalse(linkHandler.acceptUrl("https://bandcamp.com"));
|
assertFalse(linkHandler.acceptUrl("https://bandcamp.com"));
|
||||||
assertFalse(linkHandler.acceptUrl("https://zachbenson.bandcamp.com/track/kitchen"));
|
assertFalse(linkHandler.acceptUrl("https://zachbenson.bandcamp.com/track/kitchen"));
|
||||||
|
|
||||||
|
// External URLs
|
||||||
|
assertTrue(linkHandler.acceptUrl("http://interovgm.com/releases/"));
|
||||||
|
assertTrue(linkHandler.acceptUrl("https://interovgm.com/releases"));
|
||||||
|
|
||||||
|
assertFalse(linkHandler.acceptUrl("https://example.com/releases"));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -35,6 +35,7 @@ public class BandcampPlaylistLinkHandlerFactoryTest {
|
|||||||
assertFalse(linkHandler.acceptUrl("https://zachbenson.bandcamp.com/"));
|
assertFalse(linkHandler.acceptUrl("https://zachbenson.bandcamp.com/"));
|
||||||
assertFalse(linkHandler.acceptUrl("https://zachbenson.bandcamp.com/track/kitchen"));
|
assertFalse(linkHandler.acceptUrl("https://zachbenson.bandcamp.com/track/kitchen"));
|
||||||
assertFalse(linkHandler.acceptUrl("https://interovgm.com/track/title"));
|
assertFalse(linkHandler.acceptUrl("https://interovgm.com/track/title"));
|
||||||
|
assertFalse(linkHandler.acceptUrl("https://example.com/album/samplealbum"));
|
||||||
|
|
||||||
assertTrue(linkHandler.acceptUrl("https://powertothequeerkids.bandcamp.com/album/power-to-the-queer-kids"));
|
assertTrue(linkHandler.acceptUrl("https://powertothequeerkids.bandcamp.com/album/power-to-the-queer-kids"));
|
||||||
assertTrue(linkHandler.acceptUrl("https://zachbenson.bandcamp.com/album/prom"));
|
assertTrue(linkHandler.acceptUrl("https://zachbenson.bandcamp.com/album/prom"));
|
||||||
|
@ -43,6 +43,7 @@ public class BandcampStreamLinkHandlerFactoryTest {
|
|||||||
assertFalse(linkHandler.acceptUrl("https://bandcamp.com"));
|
assertFalse(linkHandler.acceptUrl("https://bandcamp.com"));
|
||||||
assertFalse(linkHandler.acceptUrl("https://zachbenson.bandcamp.com/"));
|
assertFalse(linkHandler.acceptUrl("https://zachbenson.bandcamp.com/"));
|
||||||
assertFalse(linkHandler.acceptUrl("https://powertothequeerkids.bandcamp.com/album/power-to-the-queer-kids"));
|
assertFalse(linkHandler.acceptUrl("https://powertothequeerkids.bandcamp.com/album/power-to-the-queer-kids"));
|
||||||
|
assertFalse(linkHandler.acceptUrl("https://example.com/track/sampletrack"));
|
||||||
|
|
||||||
assertTrue(linkHandler.acceptUrl("https://zachbenson.bandcamp.com/track/kitchen"));
|
assertTrue(linkHandler.acceptUrl("https://zachbenson.bandcamp.com/track/kitchen"));
|
||||||
assertTrue(linkHandler.acceptUrl("http://ZachBenson.Bandcamp.COM/Track/U-I-Tonite/"));
|
assertTrue(linkHandler.acceptUrl("http://ZachBenson.Bandcamp.COM/Track/U-I-Tonite/"));
|
||||||
|
Loading…
Reference in New Issue
Block a user