From 0efb854d27deeee0f9f85d6f5537aa137aeb07c0 Mon Sep 17 00:00:00 2001 From: Xiang Rong Lin <41164160+XiangRongLin@users.noreply.github.com> Date: Sun, 2 Feb 2020 14:19:48 +0100 Subject: [PATCH] [Youtube] Implement mix extractor for auto-generated playlists. -New YoutubeMixPlaylistExtractor, that extracts from a mix (auto-generated playlist). -The url has the format of "youtube.com/watch?v=videoID&playlistID", where playlistID always starts with "RD" and usually followed by the videoID. -Change YoutubePlaylistLinkHandlerFactory to create a linkhandler with the given url if it is a mix. -Change YoutubeService to return YoutubeMixPlaylistExtractor if the url is a mix. --- .../youtube/YoutubeParsingHelper.java | 4 + .../services/youtube/YoutubeService.java | 8 +- .../YoutubeMixPlaylistExtractor.java | 196 ++++++++++++++++++ .../YoutubePlaylistLinkHandlerFactory.java | 21 ++ 4 files changed, 227 insertions(+), 2 deletions(-) create mode 100644 extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeMixPlaylistExtractor.java diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeParsingHelper.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeParsingHelper.java index 6ea588341..42420a1a7 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeParsingHelper.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeParsingHelper.java @@ -192,6 +192,10 @@ public class YoutubeParsingHelper { } } + public static boolean isYoutubeMixId(String playlistId) { + return playlistId.startsWith("RD"); + } + public static JsonObject getInitialData(String html) throws ParsingException { try { try { diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeService.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeService.java index 519672141..7d7a83eba 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeService.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeService.java @@ -110,7 +110,11 @@ public class YoutubeService extends StreamingService { @Override public PlaylistExtractor getPlaylistExtractor(ListLinkHandler linkHandler) { - return new YoutubePlaylistExtractor(this, linkHandler); + if (YoutubeParsingHelper.isYoutubeMixId(linkHandler.getId())) { + return new YoutubeMixPlaylistExtractor(this, linkHandler); + } else { + return new YoutubePlaylistExtractor(this, linkHandler); + } } @Override @@ -140,7 +144,7 @@ public class YoutubeService extends StreamingService { public KioskExtractor createNewKiosk(StreamingService streamingService, String url, String id) - throws ExtractionException { + throws ExtractionException { return new YoutubeTrendingExtractor(YoutubeService.this, new YoutubeTrendingLinkHandlerFactory().fromUrl(url), id); } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeMixPlaylistExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeMixPlaylistExtractor.java new file mode 100644 index 000000000..69b3fb987 --- /dev/null +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeMixPlaylistExtractor.java @@ -0,0 +1,196 @@ +package org.schabi.newpipe.extractor.services.youtube.extractors; + +import com.grack.nanojson.JsonObject; +import com.grack.nanojson.JsonParser; +import com.grack.nanojson.JsonParserException; +import java.io.IOException; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.schabi.newpipe.extractor.StreamingService; +import org.schabi.newpipe.extractor.downloader.Downloader; +import org.schabi.newpipe.extractor.downloader.Response; +import org.schabi.newpipe.extractor.exceptions.ExtractionException; +import org.schabi.newpipe.extractor.exceptions.ParsingException; +import org.schabi.newpipe.extractor.linkhandler.LinkHandlerFactory; +import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler; +import org.schabi.newpipe.extractor.localization.TimeAgoParser; +import org.schabi.newpipe.extractor.playlist.PlaylistExtractor; +import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper; +import org.schabi.newpipe.extractor.stream.StreamInfoItem; +import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector; + +public class YoutubeMixPlaylistExtractor extends PlaylistExtractor { + + private Document doc; + + public YoutubeMixPlaylistExtractor(StreamingService service, ListLinkHandler linkHandler) { + super(service, linkHandler); + } + + @Override + public void onFetchPage(@Nonnull Downloader downloader) throws IOException, ExtractionException { + final String url = getUrl(); + final Response response = downloader.get(url, getExtractorLocalization()); + doc = YoutubeParsingHelper.parseAndCheckPage(url, response); + } + + @Nonnull + @Override + public String getName() throws ParsingException { + try { + return doc.select("div[class=\"playlist-info\"] h3[class=\"playlist-title\"]").first().text(); + } catch (Exception e) { + throw new ParsingException("Could not get playlist name", e); + } + } + + @Override + public String getThumbnailUrl() throws ParsingException { + try { + return doc.select("ol[class*=\"playlist-videos-list\"] li").first().attr("data-thumbnail-url"); + } catch (Exception e) { + throw new ParsingException("Could not get playlist thumbnail", e); + } + } + + @Override + public String getBannerUrl() { + return ""; + } + + @Override + public String getUploaderUrl() { + //Youtube mix are auto-generated + return ""; + } + + @Override + public String getUploaderName() { + //Youtube mix are auto-generated + return ""; + } + + @Override + public String getUploaderAvatarUrl() { + //Youtube mix are auto-generated + return ""; + } + + @Override + public long getStreamCount() { + // Auto-generated playlist always start with 25 videos and are endless + // But the html doesn't have a continuation url + return doc.select("ol[class*=\"playlist-videos-list\"] li").size(); + } + + @Nonnull + @Override + public InfoItemsPage getInitialPage() { + StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId()); + Element ol = doc.select("ol[class*=\"playlist-videos-list\"]").first(); + collectStreamsFrom(collector, ol); + return new InfoItemsPage<>(collector, getNextPageUrl()); + } + + @Override + public String getNextPageUrl() { + return ""; + } + + @Override + public InfoItemsPage getPage(final String pageUrl) { + //Continuations are not implemented + return null; + } + + private void collectStreamsFrom( + @Nonnull StreamInfoItemsCollector collector, + @Nullable Element element) { + collector.reset(); + + if (element == null) { + return; + } + + final LinkHandlerFactory streamLinkHandlerFactory = getService().getStreamLHFactory(); + final TimeAgoParser timeAgoParser = getTimeAgoParser(); + + for (final Element li : element.children()) { + + collector.commit(new YoutubeStreamInfoItemExtractor(li, timeAgoParser) { + + @Override + public boolean isAd() { + return false; + } + + @Override + public String getUrl() throws ParsingException { + try { + return streamLinkHandlerFactory.fromId(li.attr("data-video-id")).getUrl(); + } catch (Exception e) { + throw new ParsingException("Could not get web page url for the video", e); + } + } + + @Override + public String getName() throws ParsingException { + try { + return li.attr("data-video-title"); + } catch (Exception e) { + throw new ParsingException("Could not get name", e); + } + } + + @Override + public long getDuration() throws ParsingException { + //Not present in doc + return 0; + } + + @Override + public String getUploaderName() throws ParsingException { + try { + return li.select( + "div[class=\"playlist-video-description\"]" + + "span[class=\"video-uploader-byline\"]") + .first() + .text(); + } catch (Exception e) { + throw new ParsingException("Could not get uploader", e); + } + } + + @Override + public String getUploaderUrl() { + //Not present in doc + return ""; + } + + @Override + public String getTextualUploadDate() { + //Not present in doc + return ""; + } + + @Override + public long getViewCount() { + return -1; + } + + @Override + public String getThumbnailUrl() throws ParsingException { + try { + return "https://i.ytimg.com/vi/" + streamLinkHandlerFactory.fromUrl(getUrl()).getId() + + "/hqdefault.jpg"; + } catch (Exception e) { + throw new ParsingException("Could not get thumbnail url", e); + } + } + }); + } + } +} diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubePlaylistLinkHandlerFactory.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubePlaylistLinkHandlerFactory.java index 56abc194b..3d0ba78a5 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubePlaylistLinkHandlerFactory.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubePlaylistLinkHandlerFactory.java @@ -2,10 +2,13 @@ package org.schabi.newpipe.extractor.services.youtube.linkHandler; import org.schabi.newpipe.extractor.exceptions.ContentNotSupportedException; import org.schabi.newpipe.extractor.exceptions.ParsingException; +import org.schabi.newpipe.extractor.linkhandler.LinkHandler; +import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler; import org.schabi.newpipe.extractor.linkhandler.ListLinkHandlerFactory; import org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper; import org.schabi.newpipe.extractor.utils.Utils; +import java.net.MalformedURLException; import java.net.URL; import java.util.List; @@ -67,4 +70,22 @@ public class YoutubePlaylistLinkHandlerFactory extends ListLinkHandlerFactory { } return true; } + + @Override + public ListLinkHandler fromUrl(String url) throws ParsingException { + try { + URL urlObj = Utils.stringToURL(url); + String listID = Utils.getQueryValue(urlObj, "list"); + if (listID != null && YoutubeParsingHelper.isYoutubeMixId(listID)) { + String videoID = Utils.getQueryValue(urlObj, "v"); + String newUrl = "https://www.youtube.com/watch?v=" + videoID + "&list=" + listID; + return new ListLinkHandler(new LinkHandler(url, newUrl, listID), getContentFilter(url), + getSortFilter(url)); + } + } catch (MalformedURLException exception) { + throw new ParsingException("Error could not parse url :" + exception.getMessage(), + exception); + } + return super.fromUrl(url); + } }