From e8794d92b5d54b6a4b9188dd3743cef5fac6dbd5 Mon Sep 17 00:00:00 2001 From: BlenderViking Date: Sun, 12 Mar 2017 16:15:51 +0100 Subject: [PATCH 1/2] ADD basic playlist support + youtube playlist support --- StreamingService.java | 6 + playlist/PlayListExtractor.java | 41 ++++ playlist/PlayListInfo.java | 51 ++++ playlist/PlayListInfoItem.java | 21 ++ playlist/PlayListInfoItemCollector.java | 33 +++ playlist/PlayListInfoItemExtractor.java | 9 + .../youtube/YoutubePlayListExtractor.java | 223 ++++++++++++++++++ .../youtube/YoutubePlayListUrlIdHandler.java | 38 +++ services/youtube/YoutubeService.java | 11 + 9 files changed, 433 insertions(+) create mode 100644 playlist/PlayListExtractor.java create mode 100644 playlist/PlayListInfo.java create mode 100644 playlist/PlayListInfoItem.java create mode 100644 playlist/PlayListInfoItemCollector.java create mode 100644 playlist/PlayListInfoItemExtractor.java create mode 100644 services/youtube/YoutubePlayListExtractor.java create mode 100644 services/youtube/YoutubePlayListUrlIdHandler.java diff --git a/StreamingService.java b/StreamingService.java index ed3c17bfc..889dad7b1 100644 --- a/StreamingService.java +++ b/StreamingService.java @@ -52,8 +52,11 @@ public abstract class StreamingService { public abstract SearchEngine getSearchEngineInstance(); public abstract UrlIdHandler getStreamUrlIdHandlerInstance(); public abstract UrlIdHandler getChannelUrlIdHandlerInstance(); + public abstract UrlIdHandler getPlayListUrlIdHandlerInstance(); public abstract ChannelExtractor getChannelExtractorInstance(String url, int page) throws ExtractionException, IOException; + public abstract PlayListExtractor getPlayListExtractorInstance(String url, int page) + throws ExtractionException, IOException; public abstract SuggestionExtractor getSuggestionExtractorInstance(); public final int getServiceId() { @@ -66,11 +69,14 @@ public abstract class StreamingService { public final LinkType getLinkTypeByUrl(String url) { UrlIdHandler sH = getStreamUrlIdHandlerInstance(); UrlIdHandler cH = getChannelUrlIdHandlerInstance(); + UrlIdHandler pH = getPlayListUrlIdHandlerInstance(); if(sH.acceptUrl(url)) { return LinkType.STREAM; } else if(cH.acceptUrl(url)) { return LinkType.CHANNEL; + } else if (pH.acceptUrl(url)) { + return LinkType.PLAYLIST; } else { return LinkType.NONE; } diff --git a/playlist/PlayListExtractor.java b/playlist/PlayListExtractor.java new file mode 100644 index 000000000..7f13312f2 --- /dev/null +++ b/playlist/PlayListExtractor.java @@ -0,0 +1,41 @@ +package org.schabi.newpipe.extractor.playlist; + +import org.schabi.newpipe.extractor.UrlIdHandler; +import org.schabi.newpipe.extractor.exceptions.ExtractionException; +import org.schabi.newpipe.extractor.exceptions.ParsingException; +import org.schabi.newpipe.extractor.stream_info.StreamInfoItemCollector; + +import java.io.IOException; + +public abstract class PlayListExtractor { + + private int serviceId; + private String url; + private UrlIdHandler urlIdHandler; + private StreamInfoItemCollector previewInfoCollector; + private int page = -1; + + public PlayListExtractor(UrlIdHandler urlIdHandler, String url, int page, int serviceId) + throws ExtractionException, IOException { + this.url = url; + this.page = page; + this.serviceId = serviceId; + this.urlIdHandler = urlIdHandler; + previewInfoCollector = new StreamInfoItemCollector(urlIdHandler, serviceId); + } + + public String getUrl() { return url; } + public UrlIdHandler getUrlIdHandler() { return urlIdHandler; } + public StreamInfoItemCollector getStreamPreviewInfoCollector() { + return previewInfoCollector; + } + + public abstract String getName() throws ParsingException; + public abstract String getAvatarUrl() throws ParsingException; + public abstract String getBannerUrl() throws ParsingException; + public abstract StreamInfoItemCollector getStreams() throws ParsingException; + public abstract boolean hasNextPage() throws ParsingException; + public int getServiceId() { + return serviceId; + } +} diff --git a/playlist/PlayListInfo.java b/playlist/PlayListInfo.java new file mode 100644 index 000000000..692fa2911 --- /dev/null +++ b/playlist/PlayListInfo.java @@ -0,0 +1,51 @@ +package org.schabi.newpipe.extractor.playlist; + +import org.schabi.newpipe.extractor.InfoItem; +import org.schabi.newpipe.extractor.exceptions.ParsingException; +import org.schabi.newpipe.extractor.stream_info.StreamInfoItemCollector; + +import java.util.List; +import java.util.Vector; + +public class PlayListInfo { + + public void addException(Exception e) { + errors.add(e); + } + + public static PlayListInfo getInfo(PlayListExtractor extractor) throws ParsingException { + PlayListInfo info = new PlayListInfo(); + + info.playList_name = extractor.getName(); + info.hasNextPage = extractor.hasNextPage(); + + try { + info.avatar_url = extractor.getAvatarUrl(); + } catch (Exception e) { + info.errors.add(e); + } + try { + info.banner_url = extractor.getBannerUrl(); + } catch (Exception e) { + info.errors.add(e); + } + try { + StreamInfoItemCollector c = extractor.getStreams(); + info.related_streams = c.getItemList(); + info.errors.addAll(c.getErrors()); + } catch(Exception e) { + info.errors.add(e); + } + + return info; + } + + public int service_id = -1; + public String playList_name = ""; + public String avatar_url = ""; + public String banner_url = ""; + public List related_streams = null; + public boolean hasNextPage = false; + + public List errors = new Vector<>(); +} diff --git a/playlist/PlayListInfoItem.java b/playlist/PlayListInfoItem.java new file mode 100644 index 000000000..e9ee57579 --- /dev/null +++ b/playlist/PlayListInfoItem.java @@ -0,0 +1,21 @@ +package org.schabi.newpipe.extractor.playlist; + +import org.schabi.newpipe.extractor.InfoItem; + +public class PlayListInfoItem implements InfoItem { + + public int serviceId = -1; + public String name = ""; + public String thumbnailUrl = ""; + public String webPageUrl = ""; + + public InfoType infoType() { + return InfoType.PLAYLIST; + } + public String getTitle() { + return name; + } + public String getLink() { + return webPageUrl; + } +} diff --git a/playlist/PlayListInfoItemCollector.java b/playlist/PlayListInfoItemCollector.java new file mode 100644 index 000000000..be559155d --- /dev/null +++ b/playlist/PlayListInfoItemCollector.java @@ -0,0 +1,33 @@ +package org.schabi.newpipe.extractor.playlist; + +import org.schabi.newpipe.extractor.InfoItemCollector; +import org.schabi.newpipe.extractor.channel.ChannelInfoItemExtractor; +import org.schabi.newpipe.extractor.exceptions.ParsingException; + +public class PlayListInfoItemCollector extends InfoItemCollector { + public PlayListInfoItemCollector(int serviceId) { + super(serviceId); + } + + public PlayListInfoItem extract(PlayListInfoItemExtractor extractor) throws ParsingException { + final PlayListInfoItem resultItem = new PlayListInfoItem(); + + resultItem.name = extractor.getPlayListName(); + resultItem.serviceId = getServiceId(); + resultItem.webPageUrl = extractor.getWebPageUrl(); + try { + resultItem.thumbnailUrl = extractor.getThumbnailUrl(); + } catch (Exception e) { + addError(e); + } + return resultItem; + } + + public void commit(PlayListInfoItemExtractor extractor) throws ParsingException { + try { + addItem(extract(extractor)); + } catch (Exception e) { + addError(e); + } + } +} diff --git a/playlist/PlayListInfoItemExtractor.java b/playlist/PlayListInfoItemExtractor.java new file mode 100644 index 000000000..61d84ab07 --- /dev/null +++ b/playlist/PlayListInfoItemExtractor.java @@ -0,0 +1,9 @@ +package org.schabi.newpipe.extractor.playlist; + +import org.schabi.newpipe.extractor.exceptions.ParsingException; + +public interface PlayListInfoItemExtractor { + String getThumbnailUrl() throws ParsingException; + String getPlayListName() throws ParsingException; + String getWebPageUrl() throws ParsingException; +} diff --git a/services/youtube/YoutubePlayListExtractor.java b/services/youtube/YoutubePlayListExtractor.java new file mode 100644 index 000000000..2ada1c7ff --- /dev/null +++ b/services/youtube/YoutubePlayListExtractor.java @@ -0,0 +1,223 @@ +package org.schabi.newpipe.extractor.services.youtube; + +import org.json.JSONException; +import org.json.JSONObject; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.schabi.newpipe.extractor.AbstractStreamInfo; +import org.schabi.newpipe.extractor.Downloader; +import org.schabi.newpipe.extractor.NewPipe; +import org.schabi.newpipe.extractor.Parser; +import org.schabi.newpipe.extractor.UrlIdHandler; +import org.schabi.newpipe.extractor.exceptions.ExtractionException; +import org.schabi.newpipe.extractor.exceptions.ParsingException; +import org.schabi.newpipe.extractor.playlist.PlayListExtractor; +import org.schabi.newpipe.extractor.stream_info.StreamInfoItemCollector; +import org.schabi.newpipe.extractor.stream_info.StreamInfoItemExtractor; + +import java.io.IOException; + +public class YoutubePlayListExtractor extends PlayListExtractor { + + private String TAG = YoutubePlayListExtractor.class.toString(); + + private Document doc = null; + + private boolean isAjaxPage = false; + private static String name = ""; + private static String feedUrl = ""; + private static String avatarUrl = ""; + private static String bannerUrl = ""; + private static String nextPageUrl = ""; + + public YoutubePlayListExtractor(UrlIdHandler urlIdHandler, + String url, int page, int serviceId) throws IOException, ExtractionException { + super(urlIdHandler, url, page, serviceId); + Downloader downloader = NewPipe.getDownloader(); + url = urlIdHandler.cleanUrl(url); + if(page == 0) { + String channelPageContent = downloader.download(url); + doc = Jsoup.parse(channelPageContent, url); + nextPageUrl = getNextPageUrl(doc); + isAjaxPage = false; + } else { + String ajaxDataRaw = downloader.download(nextPageUrl); + JSONObject ajaxData; + try { + ajaxData = new JSONObject(ajaxDataRaw); + final String htmlDataRaw = "" + ajaxData.getString("content_html") + "
"; + doc = Jsoup.parse(htmlDataRaw, nextPageUrl); + final String nextPageHtmlDataRaw = ajaxData.getString("load_more_widget_html"); + if(!nextPageHtmlDataRaw.isEmpty()) { + final Document nextPageData = Jsoup.parse(nextPageHtmlDataRaw, nextPageUrl); + nextPageUrl = getNextPageUrl(nextPageData); + } else { + nextPageUrl = ""; + } + } catch (JSONException e) { + throw new ParsingException("Could not parse json data for next page", e); + } + isAjaxPage = true; + } + } + + @Override + public String getName() throws ParsingException { + try { + if (!isAjaxPage) { + name = doc.select("span[class=\"qualified-channel-title-text\"]").first() + .select("a").first().text() + " - " + + doc.select("meta[name=title]").first().attr("content"); + } + return name; + } catch (Exception e) { + throw new ParsingException("Could not get playlist name"); + } + } + + @Override + public String getAvatarUrl() throws ParsingException { + try { + if(!isAjaxPage) { + avatarUrl = doc.select("div[id=gh-banner] img[class=channel-header-profile-image]").first().attr("src"); + if(avatarUrl.startsWith("//")) { + avatarUrl = "https:" + avatarUrl; + } + } + return avatarUrl; + } catch(Exception e) { + throw new ParsingException("Could not get playlist Avatar"); + } + } + + @Override + public String getBannerUrl() throws ParsingException { + try { + if(!isAjaxPage) { + Element el = doc.select("div[id=\"gh-banner\"] style").first(); + String cssContent = el.html(); + String url = "https:" + Parser.matchGroup1("url\\((.*)\\)", cssContent); + if (url.contains("s.ytimg.com")) { + bannerUrl = null; + } else { + bannerUrl = url.substring(0, url.indexOf(");")); + } + } + return bannerUrl; + } catch(Exception e) { + throw new ParsingException("Could not get playlist Banner"); + } + } + + @Override + public StreamInfoItemCollector getStreams() throws ParsingException { + StreamInfoItemCollector collector = getStreamPreviewInfoCollector(); + Element tbody = doc.select("tbody[id=\"pl-load-more-destination\"]").first(); + final YoutubeStreamUrlIdHandler youtubeStreamUrlIdHandler = YoutubeStreamUrlIdHandler.getInstance(); + for(final Element li : tbody.children()) { + collector.commit(new StreamInfoItemExtractor() { + @Override + public AbstractStreamInfo.StreamType getStreamType() throws ParsingException { + return AbstractStreamInfo.StreamType.VIDEO_STREAM; + } + + @Override + public String getWebPageUrl() throws ParsingException { + try { + return youtubeStreamUrlIdHandler.getUrl(li.attr("data-video-id")); + } catch (Exception e) { + throw new ParsingException("Could not get web page url for the video", e); + } + } + + @Override + public String getTitle() throws ParsingException { + try { + return li.attr("data-title"); + } catch (Exception e) { + throw new ParsingException("Could not get title", e); + } + } + + @Override + public int getDuration() throws ParsingException { + try { + return YoutubeParsingHelper.parseDurationString( + li.select("div[class=\"timestamp\"] span").first().text().trim()); + } catch(Exception e) { + if(isLiveStream(li)) { + // -1 for no duration + return -1; + } else { + throw new ParsingException("Could not get Duration: " + getTitle(), e); + } + } + } + + @Override + public String getUploader() throws ParsingException { + return li.select("div[class=pl-video-owner] a").text(); + } + + @Override + public String getUploadDate() throws ParsingException { + return ""; + } + + @Override + public long getViewCount() throws ParsingException { + return -1; + } + + @Override + public String getThumbnailUrl() throws ParsingException { + try { + return "https://i.ytimg.com/vi/" + youtubeStreamUrlIdHandler.getId(getWebPageUrl()) + "/hqdefault.jpg"; + } catch (Exception e) { + throw new ParsingException("Could not get thumbnail url", e); + } + } + + @Override + public boolean isAd() throws ParsingException { + return false; + } + + private boolean isLiveStream(Element item) { + Element bla = item.select("span[class*=\"yt-badge-live\"]").first(); + + if(bla == null) { + // sometimes livestreams dont have badges but sill are live streams + // if video time is not available we most likly have an offline livestream + if(item.select("span[class*=\"video-time\"]").first() == null) { + return true; + } + } + return bla != null; + } + }); + } + + return collector; + } + + @Override + public boolean hasNextPage() throws ParsingException { + return nextPageUrl != null && !nextPageUrl.isEmpty(); + } + + private String getNextPageUrl(Document d) throws ParsingException { + try { + Element button = d.select("button[class*=\"yt-uix-load-more\"]").first(); + if(button != null) { + return "https://www.youtube.com" + button.attr("data-uix-load-more-href"); + } else { + // sometimes channels are simply so small, they don't have a second/next4q page + return ""; + } + } catch(Exception e) { + throw new ParsingException("could not load next page url", e); + } + } +} diff --git a/services/youtube/YoutubePlayListUrlIdHandler.java b/services/youtube/YoutubePlayListUrlIdHandler.java new file mode 100644 index 000000000..dbe1be652 --- /dev/null +++ b/services/youtube/YoutubePlayListUrlIdHandler.java @@ -0,0 +1,38 @@ +package org.schabi.newpipe.extractor.services.youtube; + +import android.net.UrlQuerySanitizer; + +import org.schabi.newpipe.extractor.Parser; +import org.schabi.newpipe.extractor.UrlIdHandler; +import org.schabi.newpipe.extractor.exceptions.ParsingException; + +public class YoutubePlayListUrlIdHandler implements UrlIdHandler { + + private static final String ID_PATTERN = "([\\-a-zA-Z0-9_]{34})"; + + @Override + public String getUrl(String listId) { + return "https://www.youtube.com/playlist?list=" + listId; + } + + @Override + public String getId(String url) throws ParsingException { + try { + return Parser.matchGroup1("list=" + ID_PATTERN, url); + } catch (final Exception exception) { + throw new ParsingException("Error could not parse url :" + exception.getMessage(), exception); + } + } + + @Override + public String cleanUrl(String complexUrl) throws ParsingException { + return getUrl(getId(complexUrl)); + } + + @Override + public boolean acceptUrl(String videoUrl) { + final boolean hasNotEmptyUrl = videoUrl != null && !videoUrl.isEmpty(); + final boolean isYoutubeDomain = hasNotEmptyUrl && (videoUrl.contains("youtube") || videoUrl.contains("youtu.be")); + return isYoutubeDomain && videoUrl.contains("list="); + } +} diff --git a/services/youtube/YoutubeService.java b/services/youtube/YoutubeService.java index c8cc68fd5..56ef40cb9 100644 --- a/services/youtube/YoutubeService.java +++ b/services/youtube/YoutubeService.java @@ -69,12 +69,23 @@ public class YoutubeService extends StreamingService { return new YoutubeChannelUrlIdHandler(); } + + @Override + public UrlIdHandler getPlayListUrlIdHandlerInstance() { + return new YoutubePlayListUrlIdHandler(); + } + @Override public ChannelExtractor getChannelExtractorInstance(String url, int page) throws ExtractionException, IOException { return new YoutubeChannelExtractor(getChannelUrlIdHandlerInstance(), url, page, getServiceId()); } + public PlayListExtractor getPlayListExtractorInstance(String url, int page) + throws ExtractionException, IOException { + return new YoutubePlayListExtractor(getPlayListUrlIdHandlerInstance(), url, page, getServiceId()); + } + @Override public SuggestionExtractor getSuggestionExtractorInstance() { return new YoutubeSuggestionExtractor(getServiceId()); From 281d23d427b3585776e947b4f3ed8ffd9a8f484a Mon Sep 17 00:00:00 2001 From: BlenderViking Date: Tue, 21 Mar 2017 20:14:29 +0100 Subject: [PATCH 2/2] Typographic correction: Use of 4 spaces instead of a tabulation --- StreamingService.java | 5 +++-- services/youtube/YoutubeService.java | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/StreamingService.java b/StreamingService.java index 889dad7b1..dffa52059 100644 --- a/StreamingService.java +++ b/StreamingService.java @@ -2,6 +2,7 @@ package org.schabi.newpipe.extractor; import org.schabi.newpipe.extractor.channel.ChannelExtractor; import org.schabi.newpipe.extractor.exceptions.ExtractionException; +import org.schabi.newpipe.extractor.playlist.PlayListExtractor; import org.schabi.newpipe.extractor.search.SearchEngine; import org.schabi.newpipe.extractor.stream_info.StreamExtractor; @@ -52,10 +53,10 @@ public abstract class StreamingService { public abstract SearchEngine getSearchEngineInstance(); public abstract UrlIdHandler getStreamUrlIdHandlerInstance(); public abstract UrlIdHandler getChannelUrlIdHandlerInstance(); - public abstract UrlIdHandler getPlayListUrlIdHandlerInstance(); + public abstract UrlIdHandler getPlayListUrlIdHandlerInstance(); public abstract ChannelExtractor getChannelExtractorInstance(String url, int page) throws ExtractionException, IOException; - public abstract PlayListExtractor getPlayListExtractorInstance(String url, int page) + public abstract PlayListExtractor getPlayListExtractorInstance(String url, int page) throws ExtractionException, IOException; public abstract SuggestionExtractor getSuggestionExtractorInstance(); diff --git a/services/youtube/YoutubeService.java b/services/youtube/YoutubeService.java index 56ef40cb9..d27094aa3 100644 --- a/services/youtube/YoutubeService.java +++ b/services/youtube/YoutubeService.java @@ -4,6 +4,7 @@ import org.schabi.newpipe.extractor.StreamingService; import org.schabi.newpipe.extractor.UrlIdHandler; import org.schabi.newpipe.extractor.channel.ChannelExtractor; import org.schabi.newpipe.extractor.exceptions.ExtractionException; +import org.schabi.newpipe.extractor.playlist.PlayListExtractor; import org.schabi.newpipe.extractor.search.SearchEngine; import org.schabi.newpipe.extractor.SuggestionExtractor; import org.schabi.newpipe.extractor.stream_info.StreamExtractor;