mirror of
https://github.com/TeamNewPipe/NewPipeExtractor.git
synced 2024-12-14 22:30:33 +05:30
Introduce FeedExtractor making fetching from dedicated feeds possible
YouTube, for example, has a dedicated feed which was built to be used like this.
This commit is contained in:
parent
be81f2945c
commit
26234a1c0f
@ -7,6 +7,7 @@ import org.schabi.newpipe.extractor.channel.ChannelExtractor;
|
||||
import org.schabi.newpipe.extractor.comments.CommentsExtractor;
|
||||
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
|
||||
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||
import org.schabi.newpipe.extractor.feed.FeedExtractor;
|
||||
import org.schabi.newpipe.extractor.kiosk.KioskList;
|
||||
import org.schabi.newpipe.extractor.linkhandler.LinkHandler;
|
||||
import org.schabi.newpipe.extractor.linkhandler.LinkHandlerFactory;
|
||||
@ -24,6 +25,8 @@ import org.schabi.newpipe.extractor.stream.StreamExtractor;
|
||||
import org.schabi.newpipe.extractor.subscription.SubscriptionExtractor;
|
||||
import org.schabi.newpipe.extractor.suggestion.SuggestionExtractor;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
|
||||
/*
|
||||
* Copyright (C) Christian Schabesberger 2018 <chris.schabesberger@mailbox.org>
|
||||
* StreamingService.java is part of NewPipe.
|
||||
@ -65,7 +68,7 @@ public abstract class StreamingService {
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
|
||||
public List<MediaCapability> getMediaCapabilities() {
|
||||
return mediaCapabilities;
|
||||
}
|
||||
@ -116,7 +119,7 @@ public abstract class StreamingService {
|
||||
public String toString() {
|
||||
return serviceId + ":" + serviceInfo.getName();
|
||||
}
|
||||
|
||||
|
||||
public abstract String getBaseUrl();
|
||||
|
||||
/*//////////////////////////////////////////////////////////////////////////
|
||||
@ -173,6 +176,19 @@ public abstract class StreamingService {
|
||||
*/
|
||||
public abstract SubscriptionExtractor getSubscriptionExtractor();
|
||||
|
||||
/**
|
||||
* This method decides which strategy will be chosen to fetch the feed. In YouTube, for example, a separate feed
|
||||
* exists which is lightweight and made specifically to be used like this.
|
||||
* <p>
|
||||
* In services which there's no other way to retrieve them, null should be returned.
|
||||
*
|
||||
* @return a {@link FeedExtractor} instance or null.
|
||||
*/
|
||||
@Nullable
|
||||
public FeedExtractor getFeedExtractor(String url) throws ExtractionException {
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Must create a new instance of a KioskList implementation.
|
||||
* @return a new KioskList instance
|
||||
@ -258,7 +274,7 @@ public abstract class StreamingService {
|
||||
}
|
||||
return getCommentsExtractor(llhf.fromUrl(url));
|
||||
}
|
||||
|
||||
|
||||
/*//////////////////////////////////////////////////////////////////////////
|
||||
// Utils
|
||||
//////////////////////////////////////////////////////////////////////////*/
|
||||
|
@ -0,0 +1,17 @@
|
||||
package org.schabi.newpipe.extractor.feed;
|
||||
|
||||
import org.schabi.newpipe.extractor.ListExtractor;
|
||||
import org.schabi.newpipe.extractor.StreamingService;
|
||||
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler;
|
||||
import org.schabi.newpipe.extractor.stream.StreamInfoItem;
|
||||
|
||||
/**
|
||||
* This class helps to extract items from lightweight feeds that the services may provide.
|
||||
* <p>
|
||||
* YouTube is an example of a service that has this alternative available.
|
||||
*/
|
||||
public abstract class FeedExtractor extends ListExtractor<StreamInfoItem> {
|
||||
public FeedExtractor(StreamingService service, ListLinkHandler listLinkHandler) {
|
||||
super(service, listLinkHandler);
|
||||
}
|
||||
}
|
@ -0,0 +1,52 @@
|
||||
package org.schabi.newpipe.extractor.feed;
|
||||
|
||||
import org.schabi.newpipe.extractor.ListExtractor.InfoItemsPage;
|
||||
import org.schabi.newpipe.extractor.ListInfo;
|
||||
import org.schabi.newpipe.extractor.NewPipe;
|
||||
import org.schabi.newpipe.extractor.StreamingService;
|
||||
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
|
||||
import org.schabi.newpipe.extractor.stream.StreamInfoItem;
|
||||
import org.schabi.newpipe.extractor.utils.ExtractorHelper;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
|
||||
public class FeedInfo extends ListInfo<StreamInfoItem> {
|
||||
|
||||
public FeedInfo(int serviceId, String id, String url, String originalUrl, String name, List<String> contentFilter, String sortFilter) {
|
||||
super(serviceId, id, url, originalUrl, name, contentFilter, sortFilter);
|
||||
}
|
||||
|
||||
public static FeedInfo getInfo(String url) throws IOException, ExtractionException {
|
||||
return getInfo(NewPipe.getServiceByUrl(url), url);
|
||||
}
|
||||
|
||||
public static FeedInfo getInfo(StreamingService service, String url) throws IOException, ExtractionException {
|
||||
final FeedExtractor extractor = service.getFeedExtractor(url);
|
||||
|
||||
if (extractor == null) {
|
||||
throw new IllegalArgumentException("Service \"" + service.getServiceInfo().getName() + "\" doesn't support FeedExtractor.");
|
||||
}
|
||||
|
||||
extractor.fetchPage();
|
||||
return getInfo(extractor);
|
||||
}
|
||||
|
||||
public static FeedInfo getInfo(FeedExtractor extractor) throws IOException, ExtractionException {
|
||||
extractor.fetchPage();
|
||||
|
||||
final int serviceId = extractor.getServiceId();
|
||||
final String id = extractor.getId();
|
||||
final String url = extractor.getUrl();
|
||||
final String originalUrl = extractor.getOriginalUrl();
|
||||
final String name = extractor.getName();
|
||||
|
||||
final FeedInfo info = new FeedInfo(serviceId, id, url, originalUrl, name, null, null);
|
||||
|
||||
final InfoItemsPage<StreamInfoItem> itemsPage = ExtractorHelper.getItemsPageOrLogError(info, extractor);
|
||||
info.setRelatedItems(itemsPage.getItems());
|
||||
info.setNextPageUrl(itemsPage.getNextPageUrl());
|
||||
|
||||
return info;
|
||||
}
|
||||
}
|
@ -12,6 +12,7 @@ import org.schabi.newpipe.extractor.StreamingService;
|
||||
import org.schabi.newpipe.extractor.channel.ChannelExtractor;
|
||||
import org.schabi.newpipe.extractor.comments.CommentsExtractor;
|
||||
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
|
||||
import org.schabi.newpipe.extractor.feed.FeedExtractor;
|
||||
import org.schabi.newpipe.extractor.kiosk.KioskExtractor;
|
||||
import org.schabi.newpipe.extractor.kiosk.KioskList;
|
||||
import org.schabi.newpipe.extractor.linkhandler.LinkHandler;
|
||||
@ -24,14 +25,7 @@ import org.schabi.newpipe.extractor.localization.ContentCountry;
|
||||
import org.schabi.newpipe.extractor.localization.Localization;
|
||||
import org.schabi.newpipe.extractor.playlist.PlaylistExtractor;
|
||||
import org.schabi.newpipe.extractor.search.SearchExtractor;
|
||||
import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeChannelExtractor;
|
||||
import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeCommentsExtractor;
|
||||
import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubePlaylistExtractor;
|
||||
import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeSearchExtractor;
|
||||
import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeStreamExtractor;
|
||||
import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeSubscriptionExtractor;
|
||||
import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeSuggestionExtractor;
|
||||
import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeTrendingExtractor;
|
||||
import org.schabi.newpipe.extractor.services.youtube.extractors.*;
|
||||
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeChannelLinkHandlerFactory;
|
||||
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeCommentsLinkHandlerFactory;
|
||||
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubePlaylistLinkHandlerFactory;
|
||||
@ -42,6 +36,8 @@ import org.schabi.newpipe.extractor.stream.StreamExtractor;
|
||||
import org.schabi.newpipe.extractor.subscription.SubscriptionExtractor;
|
||||
import org.schabi.newpipe.extractor.suggestion.SuggestionExtractor;
|
||||
|
||||
import javax.annotation.Nonnull;
|
||||
|
||||
/*
|
||||
* Created by Christian Schabesberger on 23.08.15.
|
||||
*
|
||||
@ -72,7 +68,7 @@ public class YoutubeService extends StreamingService {
|
||||
public String getBaseUrl() {
|
||||
return "https://youtube.com";
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public LinkHandlerFactory getStreamLHFactory() {
|
||||
return YoutubeStreamLinkHandlerFactory.getInstance();
|
||||
@ -147,6 +143,12 @@ public class YoutubeService extends StreamingService {
|
||||
return new YoutubeSubscriptionExtractor(this);
|
||||
}
|
||||
|
||||
@Nonnull
|
||||
@Override
|
||||
public FeedExtractor getFeedExtractor(final String channelUrl) throws ExtractionException {
|
||||
return new YoutubeFeedExtractor(this, getChannelLHFactory().fromUrl(channelUrl));
|
||||
}
|
||||
|
||||
@Override
|
||||
public ListLinkHandlerFactory getCommentsLHFactory() {
|
||||
return YoutubeCommentsLinkHandlerFactory.getInstance();
|
||||
|
@ -46,7 +46,6 @@ import java.io.IOException;
|
||||
@SuppressWarnings("WeakerAccess")
|
||||
public class YoutubeChannelExtractor extends ChannelExtractor {
|
||||
/*package-private*/ static final String CHANNEL_URL_BASE = "https://www.youtube.com/channel/";
|
||||
private static final String CHANNEL_FEED_BASE = "https://www.youtube.com/feeds/videos.xml?channel_id=";
|
||||
private static final String CHANNEL_URL_PARAMETERS = "/videos?view=0&flow=list&sort=dd&live_view=10000";
|
||||
|
||||
private Document doc;
|
||||
@ -130,7 +129,7 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
|
||||
@Override
|
||||
public String getFeedUrl() throws ParsingException {
|
||||
try {
|
||||
return CHANNEL_FEED_BASE + getId();
|
||||
return YoutubeParsingHelper.getFeedUrlFrom(getId());
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Could not get feed url", e);
|
||||
}
|
||||
|
@ -0,0 +1,82 @@
|
||||
package org.schabi.newpipe.extractor.services.youtube.extractors;
|
||||
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
import org.schabi.newpipe.extractor.ListExtractor;
|
||||
import org.schabi.newpipe.extractor.StreamingService;
|
||||
import org.schabi.newpipe.extractor.downloader.Downloader;
|
||||
import org.schabi.newpipe.extractor.downloader.Response;
|
||||
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
|
||||
import org.schabi.newpipe.extractor.feed.FeedExtractor;
|
||||
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler;
|
||||
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper;
|
||||
import org.schabi.newpipe.extractor.stream.StreamInfoItem;
|
||||
import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector;
|
||||
|
||||
import javax.annotation.Nonnull;
|
||||
import java.io.IOException;
|
||||
|
||||
public class YoutubeFeedExtractor extends FeedExtractor {
|
||||
public YoutubeFeedExtractor(StreamingService service, ListLinkHandler linkHandler) {
|
||||
super(service, linkHandler);
|
||||
}
|
||||
|
||||
private Document document;
|
||||
|
||||
@Override
|
||||
public void onFetchPage(@Nonnull Downloader downloader) throws IOException, ExtractionException {
|
||||
final String channelIdOrUser = getLinkHandler().getId();
|
||||
final String feedUrl = YoutubeParsingHelper.getFeedUrlFrom(channelIdOrUser);
|
||||
|
||||
final Response response = downloader.get(feedUrl);
|
||||
document = Jsoup.parse(response.responseBody());
|
||||
}
|
||||
|
||||
@Nonnull
|
||||
@Override
|
||||
public ListExtractor.InfoItemsPage<StreamInfoItem> getInitialPage() {
|
||||
final Elements entries = document.select("feed > entry");
|
||||
final StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId());
|
||||
|
||||
for (Element entryElement : entries) {
|
||||
collector.commit(new YoutubeFeedInfoItemExtractor(entryElement));
|
||||
}
|
||||
|
||||
return new InfoItemsPage<>(collector, null);
|
||||
}
|
||||
|
||||
@Nonnull
|
||||
@Override
|
||||
public String getId() {
|
||||
return document.getElementsByTag("yt:channelId").first().text();
|
||||
}
|
||||
|
||||
@Nonnull
|
||||
@Override
|
||||
public String getUrl() {
|
||||
return document.select("feed > author > uri").first().text();
|
||||
}
|
||||
|
||||
@Nonnull
|
||||
@Override
|
||||
public String getName() {
|
||||
return document.select("feed > author > name").first().text();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getNextPageUrl() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public InfoItemsPage<StreamInfoItem> getPage(String pageUrl) {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasNextPage() {
|
||||
return false;
|
||||
}
|
||||
}
|
@ -0,0 +1,94 @@
|
||||
package org.schabi.newpipe.extractor.services.youtube.extractors;
|
||||
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||
import org.schabi.newpipe.extractor.localization.DateWrapper;
|
||||
import org.schabi.newpipe.extractor.stream.StreamInfoItemExtractor;
|
||||
import org.schabi.newpipe.extractor.stream.StreamType;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
import java.text.ParseException;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.Calendar;
|
||||
import java.util.Date;
|
||||
import java.util.TimeZone;
|
||||
|
||||
public class YoutubeFeedInfoItemExtractor implements StreamInfoItemExtractor {
|
||||
private final Element entryElement;
|
||||
|
||||
public YoutubeFeedInfoItemExtractor(Element entryElement) {
|
||||
this.entryElement = entryElement;
|
||||
}
|
||||
|
||||
@Override
|
||||
public StreamType getStreamType() {
|
||||
// It is not possible to determine the stream type using the feed endpoint.
|
||||
// All entries are considered a video stream.
|
||||
return StreamType.VIDEO_STREAM;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isAd() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getDuration() {
|
||||
// Not available when fetching through the feed endpoint.
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getViewCount() {
|
||||
return Long.parseLong(entryElement.getElementsByTag("media:statistics").first().attr("views"));
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getUploaderName() {
|
||||
return entryElement.select("author > name").first().text();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getUploaderUrl() {
|
||||
return entryElement.select("author > uri").first().text();
|
||||
}
|
||||
|
||||
@Nullable
|
||||
@Override
|
||||
public String getTextualUploadDate() {
|
||||
return entryElement.getElementsByTag("published").first().text();
|
||||
}
|
||||
|
||||
@Nullable
|
||||
@Override
|
||||
public DateWrapper getUploadDate() throws ParsingException {
|
||||
final Date date;
|
||||
try {
|
||||
final SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss+00:00");
|
||||
dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
|
||||
date = dateFormat.parse(getTextualUploadDate());
|
||||
} catch (ParseException e) {
|
||||
throw new ParsingException("Could not parse date (\"" + getTextualUploadDate() + "\")", e);
|
||||
}
|
||||
|
||||
final Calendar calendar = Calendar.getInstance();
|
||||
calendar.setTime(date);
|
||||
|
||||
return new DateWrapper(calendar);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getName() {
|
||||
return entryElement.getElementsByTag("title").first().text();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getUrl() {
|
||||
return entryElement.getElementsByTag("link").first().attr("href");
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getThumbnailUrl() {
|
||||
return entryElement.getElementsByTag("media:thumbnail").first().attr("url");
|
||||
}
|
||||
}
|
@ -38,6 +38,9 @@ public class YoutubeParsingHelper {
|
||||
private YoutubeParsingHelper() {
|
||||
}
|
||||
|
||||
private static final String FEED_BASE_CHANNEL_ID = "https://www.youtube.com/feeds/videos.xml?channel_id=";
|
||||
private static final String FEED_BASE_USER = "https://www.youtube.com/feeds/videos.xml?user=";
|
||||
|
||||
private static final String[] RECAPTCHA_DETECTION_SELECTORS = {
|
||||
"form[action*=\"/das_captcha\"]",
|
||||
"input[name*=\"action_recaptcha_verify\"]"
|
||||
@ -118,6 +121,16 @@ public class YoutubeParsingHelper {
|
||||
+ Long.parseLong(seconds);
|
||||
}
|
||||
|
||||
public static String getFeedUrlFrom(final String channelIdOrUser) {
|
||||
if (channelIdOrUser.startsWith("user/")) {
|
||||
return FEED_BASE_USER + channelIdOrUser.replace("user/", "");
|
||||
} else if (channelIdOrUser.startsWith("channel/")) {
|
||||
return FEED_BASE_CHANNEL_ID + channelIdOrUser.replace("channel/", "");
|
||||
} else {
|
||||
return FEED_BASE_CHANNEL_ID + channelIdOrUser;
|
||||
}
|
||||
}
|
||||
|
||||
public static Calendar parseDateFrom(String textualUploadDate) throws ParsingException {
|
||||
Date date;
|
||||
try {
|
||||
|
@ -0,0 +1,72 @@
|
||||
package org.schabi.newpipe.extractor.services.youtube;
|
||||
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
import org.schabi.newpipe.DownloaderTestImpl;
|
||||
import org.schabi.newpipe.extractor.NewPipe;
|
||||
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||
import org.schabi.newpipe.extractor.services.BaseListExtractorTest;
|
||||
import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeFeedExtractor;
|
||||
|
||||
import static org.junit.Assert.*;
|
||||
import static org.schabi.newpipe.extractor.ServiceList.YouTube;
|
||||
import static org.schabi.newpipe.extractor.services.DefaultTests.defaultTestRelatedItems;
|
||||
|
||||
public class YoutubeFeedExtractorTest {
|
||||
public static class Kurzgesagt implements BaseListExtractorTest {
|
||||
private static YoutubeFeedExtractor extractor;
|
||||
|
||||
@BeforeClass
|
||||
public static void setUp() throws Exception {
|
||||
NewPipe.init(DownloaderTestImpl.getInstance());
|
||||
extractor = (YoutubeFeedExtractor) YouTube
|
||||
.getFeedExtractor("https://www.youtube.com/user/Kurzgesagt");
|
||||
extractor.fetchPage();
|
||||
}
|
||||
|
||||
/*//////////////////////////////////////////////////////////////////////////
|
||||
// Extractor
|
||||
//////////////////////////////////////////////////////////////////////////*/
|
||||
|
||||
@Test
|
||||
public void testServiceId() {
|
||||
assertEquals(YouTube.getServiceId(), extractor.getServiceId());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testName() {
|
||||
String name = extractor.getName();
|
||||
assertTrue(name, name.startsWith("Kurzgesagt"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testId() {
|
||||
assertEquals("UCsXVk37bltHxD1rDPwtNM8Q", extractor.getId());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUrl() {
|
||||
assertEquals("https://www.youtube.com/channel/UCsXVk37bltHxD1rDPwtNM8Q", extractor.getUrl());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testOriginalUrl() throws ParsingException {
|
||||
assertEquals("https://www.youtube.com/user/Kurzgesagt", extractor.getOriginalUrl());
|
||||
}
|
||||
|
||||
/*//////////////////////////////////////////////////////////////////////////
|
||||
// ListExtractor
|
||||
//////////////////////////////////////////////////////////////////////////*/
|
||||
|
||||
@Test
|
||||
public void testRelatedItems() throws Exception {
|
||||
defaultTestRelatedItems(extractor, YouTube.getServiceId());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMoreRelatedItems() {
|
||||
assertFalse(extractor.hasNextPage());
|
||||
assertNull(extractor.getNextPageUrl());
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user