Merge pull request #11 from mauriciocolli/refactor-extractor

Refactor extractor
This commit is contained in:
Mauricio Colli 2017-07-09 19:11:01 -03:00 committed by GitHub
commit 729930802e
53 changed files with 1043 additions and 902 deletions

View File

@ -5,7 +5,7 @@ import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
import java.io.IOException;
import java.util.Map;
/**
/*
* Created by Christian Schabesberger on 28.01.16.
*
* Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org>
@ -27,26 +27,35 @@ import java.util.Map;
public interface Downloader {
/**Download the text file at the supplied URL as in download(String),
/**
* Download the text file at the supplied URL as in download(String),
* but set the HTTP header field "Accept-Language" to the supplied string.
*
* @param siteUrl the URL of the text file to return the contents of
* @param language the language (usually a 2-character code) to set as the preferred language
* @return the contents of the specified text file
* @throws IOException*/
* @throws IOException
*/
String download(String siteUrl, String language) throws IOException, ReCaptchaException;
/**Download the text file at the supplied URL as in download(String),
/**
* Download the text file at the supplied URL as in download(String),
* but set the HTTP header field "Accept-Language" to the supplied string.
*
* @param siteUrl the URL of the text file to return the contents of
* @param customProperties set request header properties
* @return the contents of the specified text file
* @throws IOException*/
* @throws IOException
*/
String download(String siteUrl, Map<String, String> customProperties) throws IOException, ReCaptchaException;
/**Download (via HTTP) the text file located at the supplied URL, and return its contents.
/**
* Download (via HTTP) the text file located at the supplied URL, and return its contents.
* Primarily intended for downloading web pages.
*
* @param siteUrl the URL of the text file to download
* @return the contents of the specified text file
* @throws IOException*/
* @throws IOException
*/
String download(String siteUrl) throws IOException, ReCaptchaException;
}

35
Extractor.java Normal file
View File

@ -0,0 +1,35 @@
package org.schabi.newpipe.extractor;
import org.schabi.newpipe.extractor.stream.StreamInfoItemCollector;
import java.io.Serializable;
public abstract class Extractor implements Serializable {
private final int serviceId;
private final String url;
private final UrlIdHandler urlIdHandler;
private final StreamInfoItemCollector previewInfoCollector;
public Extractor(UrlIdHandler urlIdHandler, int serviceId, String url) {
this.urlIdHandler = urlIdHandler;
this.serviceId = serviceId;
this.url = url;
this.previewInfoCollector = new StreamInfoItemCollector(urlIdHandler, serviceId);
}
public String getUrl() {
return url;
}
public UrlIdHandler getUrlIdHandler() {
return urlIdHandler;
}
public int getServiceId() {
return serviceId;
}
protected StreamInfoItemCollector getStreamPreviewInfoCollector() {
return previewInfoCollector;
}
}

19
Info.java Normal file
View File

@ -0,0 +1,19 @@
package org.schabi.newpipe.extractor;
import java.io.Serializable;
import java.util.List;
import java.util.Vector;
public abstract class Info implements Serializable {
public int service_id = -1;
/**
* Id of this Info object <br>
* e.g. Youtube: https://www.youtube.com/watch?v=RER5qCTzZ7 > RER5qCTzZ7
*/
public String id = "";
public String url = "";
public String name = "";
public List<Throwable> errors = new Vector<>();
}

View File

@ -2,7 +2,7 @@ package org.schabi.newpipe.extractor;
import java.io.Serializable;
/**
/*
* Created by the-scrabi on 11.02.17.
*
* Copyright (C) Christian Schabesberger 2017 <chris.schabesberger@mailbox.org>

View File

@ -5,7 +5,7 @@ import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import java.util.List;
import java.util.Vector;
/**
/*
* Created by Christian Schabesberger on 12.02.17.
*
* Copyright (C) Christian Schabesberger 2017 <chris.schabesberger@mailbox.org>
@ -37,9 +37,11 @@ public class InfoItemCollector {
public List<InfoItem> getItemList() {
return itemList;
}
public List<Throwable> getErrors() {
return errors;
}
protected void addFromCollector(InfoItemCollector otherC) throws ExtractionException {
if (serviceId != otherC.serviceId) {
throw new ExtractionException("Service Id does not equal: "
@ -49,12 +51,15 @@ public class InfoItemCollector {
errors.addAll(otherC.errors);
itemList.addAll(otherC.itemList);
}
protected void addError(Exception e) {
errors.add(e);
}
protected void addItem(InfoItem item) {
itemList.add(item);
}
protected int getServiceId() {
return serviceId;
}

View File

@ -1,6 +1,6 @@
package org.schabi.newpipe.extractor;
/**
/*
* Created by Adam Howard on 08/11/15.
*
* Copyright (c) Christian Schabesberger <chris.schabesberger@mailbox.org>
@ -22,7 +22,9 @@ package org.schabi.newpipe.extractor;
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
*/
/**Static data about various media formats support by Newpipe, eg mime type, extension*/
/**
* Static data about various media formats support by Newpipe, eg mime type, extension
*/
public enum MediaFormat {
//video and audio combined formats
@ -48,10 +50,13 @@ public enum MediaFormat {
this.mimeType = mimeType;
}
/**Return the friendly name of the media format with the supplied id
/**
* Return the friendly name of the media format with the supplied id
*
* @param ident the id of the media format. Currently an arbitrary, NewPipe-specific number.
* @return the friendly name of the MediaFormat associated with this ids,
* or an empty String if none match it.*/
* or an empty String if none match it.
*/
public static String getNameById(int ident) {
for (MediaFormat vf : MediaFormat.values()) {
if (vf.id == ident) return vf.name;
@ -59,10 +64,13 @@ public enum MediaFormat {
return "";
}
/**Return the file extension of the media format with the supplied id
/**
* Return the file extension of the media format with the supplied id
*
* @param ident the id of the media format. Currently an arbitrary, NewPipe-specific number.
* @return the file extension of the MediaFormat associated with this ids,
* or an empty String if none match it.*/
* or an empty String if none match it.
*/
public static String getSuffixById(int ident) {
for (MediaFormat vf : MediaFormat.values()) {
if (vf.id == ident) return vf.suffix;
@ -70,10 +78,13 @@ public enum MediaFormat {
return "";
}
/**Return the MIME type of the media format with the supplied id
/**
* Return the MIME type of the media format with the supplied id
*
* @param ident the id of the media format. Currently an arbitrary, NewPipe-specific number.
* @return the MIME type of the MediaFormat associated with this ids,
* or an empty String if none match it.*/
* or an empty String if none match it.
*/
public static String getMimeById(int ident) {
for (MediaFormat vf : MediaFormat.values()) {
if (vf.id == ident) return vf.mimeType;

View File

@ -1,9 +1,8 @@
package org.schabi.newpipe.extractor;
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.services.youtube.YoutubeService;
/**
/*
* Created by Christian Schabesberger on 23.08.15.
*
* Copyright (C) Christian Schabesberger 2015 <chris.schabesberger@mailbox.org>
@ -23,23 +22,24 @@ import org.schabi.newpipe.extractor.services.youtube.YoutubeService;
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
*/
/**Provides access to the video streaming services supported by NewPipe.
* Currently only Youtube until the API becomes more stable.*/
/**
* Provides access to the video streaming services supported by NewPipe.
* Currently only Youtube until the API becomes more stable.
*/
@SuppressWarnings("ALL")
public class NewPipe {
private static final String TAG = NewPipe.class.toString();
private NewPipe() {
}
private static final String TAG = NewPipe.class.toString();
private static Downloader downloader = null;
public static StreamingService[] getServices() {
return ServiceList.serviceList;
}
public static StreamingService getService(int serviceId) throws ExtractionException {
for (StreamingService s : ServiceList.serviceList) {
if (s.getServiceId() == serviceId) {
@ -48,9 +48,11 @@ public class NewPipe {
}
return null;
}
public static StreamingService getService(String serviceName) throws ExtractionException {
return ServiceList.serviceList[getIdOfService(serviceName)];
}
public static String getNameOfService(int id) {
try {
return getService(id).getServiceInfo().name;
@ -60,6 +62,7 @@ public class NewPipe {
return "";
}
}
public static int getIdOfService(String serviceName) {
for (int i = 0; i < ServiceList.serviceList.length; i++) {
if (ServiceList.serviceList[i].getServiceInfo().name.equals(serviceName)) {

View File

@ -2,7 +2,7 @@ package org.schabi.newpipe.extractor;
import org.schabi.newpipe.extractor.services.youtube.YoutubeService;
/**
/*
* Created by the-scrabi on 18.02.17.
*/

View File

@ -2,32 +2,12 @@ package org.schabi.newpipe.extractor;
import org.schabi.newpipe.extractor.channel.ChannelExtractor;
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.playlist.PlayListExtractor;
import org.schabi.newpipe.extractor.playlist.PlaylistExtractor;
import org.schabi.newpipe.extractor.search.SearchEngine;
import org.schabi.newpipe.extractor.stream_info.StreamExtractor;
import org.schabi.newpipe.extractor.stream.StreamExtractor;
import java.io.IOException;
/**
* Created by Christian Schabesberger on 23.08.15.
*
* Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org>
* StreamingService.java is part of NewPipe.
*
* NewPipe is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* NewPipe is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
*/
public abstract class StreamingService {
public class ServiceInfo {
public String name = "";
@ -47,19 +27,16 @@ public abstract class StreamingService {
}
public abstract ServiceInfo getServiceInfo();
public abstract StreamExtractor getExtractorInstance(String url)
throws IOException, ExtractionException;
public abstract StreamExtractor getExtractorInstance(String url) throws IOException, ExtractionException;
public abstract SearchEngine getSearchEngineInstance();
public abstract UrlIdHandler getStreamUrlIdHandlerInstance();
public abstract UrlIdHandler getChannelUrlIdHandlerInstance();
public abstract UrlIdHandler getPlayListUrlIdHandlerInstance();
public abstract ChannelExtractor getChannelExtractorInstance(String url, int page)
throws ExtractionException, IOException;
public abstract PlayListExtractor getPlayListExtractorInstance(String url, int page)
throws ExtractionException, IOException;
public abstract UrlIdHandler getPlaylistUrlIdHandlerInstance();
public abstract ChannelExtractor getChannelExtractorInstance(String url) throws ExtractionException, IOException;
public abstract PlaylistExtractor getPlaylistExtractorInstance(String url) throws ExtractionException, IOException;
public abstract SuggestionExtractor getSuggestionExtractorInstance();
public final int getServiceId() {
return serviceId;
}
@ -70,7 +47,7 @@ public abstract class StreamingService {
public final LinkType getLinkTypeByUrl(String url) {
UrlIdHandler sH = getStreamUrlIdHandlerInstance();
UrlIdHandler cH = getChannelUrlIdHandlerInstance();
UrlIdHandler pH = getPlayListUrlIdHandlerInstance();
UrlIdHandler pH = getPlaylistUrlIdHandlerInstance();
if (sH.acceptUrl(url)) {
return LinkType.STREAM;

View File

@ -5,7 +5,7 @@ import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import java.io.IOException;
import java.util.List;
/**
/*
* Created by Christian Schabesberger on 28.09.16.
*
* Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org>
@ -33,8 +33,7 @@ public abstract class SuggestionExtractor {
this.serviceId = serviceId;
}
public abstract List<String> suggestionList(
String query,String contentCountry)
public abstract List<String> suggestionList(String query, String contentCountry)
throws ExtractionException, IOException;
public int getServiceId() {

View File

@ -2,7 +2,7 @@ package org.schabi.newpipe.extractor;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
/**
/*
* Created by Christian Schabesberger on 26.07.16.
*
* Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org>
@ -28,8 +28,10 @@ public interface UrlIdHandler {
String getId(String siteUrl) throws ParsingException;
String cleanUrl(String siteUrl) throws ParsingException;
/**When a VIEW_ACTION is caught this function will test if the url delivered within the calling
Intent was meant to be watched with this Service.
Return false if this service shall not allow to be called through ACTIONs.*/
/**
* When a VIEW_ACTION is caught this function will test if the url delivered within the calling
* Intent was meant to be watched with this Service.
* Return false if this service shall not allow to be called through ACTIONs.
*/
boolean acceptUrl(String videoUrl);
}

View File

@ -1,13 +1,14 @@
package org.schabi.newpipe.extractor.channel;
import org.schabi.newpipe.extractor.Extractor;
import org.schabi.newpipe.extractor.UrlIdHandler;
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.stream_info.StreamInfoItemCollector;
import org.schabi.newpipe.extractor.stream.StreamInfoItemCollector;
import java.io.IOException;
/**
/*
* Created by Christian Schabesberger on 25.07.16.
*
* Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org>
@ -27,36 +28,20 @@ import java.io.IOException;
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
*/
public abstract class ChannelExtractor {
private int serviceId;
private String url;
private UrlIdHandler urlIdHandler;
private StreamInfoItemCollector previewInfoCollector;
private int page = -1;
public abstract class ChannelExtractor extends Extractor {
public ChannelExtractor(UrlIdHandler urlIdHandler, String url, int page, int serviceId)
throws ExtractionException, IOException {
this.url = url;
this.page = page;
this.serviceId = serviceId;
this.urlIdHandler = urlIdHandler;
previewInfoCollector = new StreamInfoItemCollector(urlIdHandler, serviceId);
}
public String getUrl() { return url; }
public UrlIdHandler getUrlIdHandler() { return urlIdHandler; }
public StreamInfoItemCollector getStreamPreviewInfoCollector() {
return previewInfoCollector;
public ChannelExtractor(UrlIdHandler urlIdHandler, String url, int serviceId) throws ExtractionException, IOException {
super(urlIdHandler, serviceId, url);
}
public abstract String getChannelId() throws ParsingException;
public abstract String getChannelName() throws ParsingException;
public abstract String getAvatarUrl() throws ParsingException;
public abstract String getBannerUrl() throws ParsingException;
public abstract String getFeedUrl() throws ParsingException;
public abstract StreamInfoItemCollector getStreams() throws ParsingException;
public abstract long getSubscriberCount() throws ParsingException;
public abstract boolean hasNextPage() throws ParsingException;
public int getServiceId() {
return serviceId;
}
public abstract boolean hasMoreStreams();
public abstract StreamInfoItemCollector getNextStreams() throws ExtractionException, IOException;
}

View File

@ -1,14 +1,13 @@
package org.schabi.newpipe.extractor.channel;
import org.schabi.newpipe.extractor.Info;
import org.schabi.newpipe.extractor.InfoItem;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.stream_info.StreamInfoItemCollector;
import org.schabi.newpipe.extractor.stream.StreamInfoItemCollector;
import java.io.Serializable;
import java.util.List;
import java.util.Vector;
/**
/*
* Created by Christian Schabesberger on 31.07.16.
*
* Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org>
@ -28,10 +27,7 @@ import java.util.Vector;
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
*/
public class ChannelInfo implements Serializable{
public void addException(Exception e) {
errors.add(e);
}
public class ChannelInfo extends Info {
public static ChannelInfo getInfo(ChannelExtractor extractor)
throws ParsingException {
@ -39,9 +35,15 @@ public class ChannelInfo implements Serializable{
// important data
info.service_id = extractor.getServiceId();
info.channel_name = extractor.getChannelName();
info.hasNextPage = extractor.hasNextPage();
info.url = extractor.getUrl();
info.name = extractor.getChannelName();
info.hasMoreStreams = extractor.hasMoreStreams();
try {
info.id = extractor.getChannelId();
} catch (Exception e) {
info.errors.add(e);
}
try {
info.avatar_url = extractor.getAvatarUrl();
} catch (Exception e) {
@ -65,7 +67,7 @@ public class ChannelInfo implements Serializable{
info.errors.add(e);
}
try {
info.subscriberCount = extractor.getSubscriberCount();
info.subscriber_count = extractor.getSubscriberCount();
} catch (Exception e) {
info.errors.add(e);
}
@ -73,14 +75,10 @@ public class ChannelInfo implements Serializable{
return info;
}
public int service_id = -1;
public String channel_name = "";
public String avatar_url = "";
public String banner_url = "";
public String feed_url = "";
public List<InfoItem> related_streams = null;
public long subscriberCount = -1;
public boolean hasNextPage = false;
public List<Throwable> errors = new Vector<>();
public long subscriber_count = -1;
public boolean hasMoreStreams = false;
}

View File

@ -2,7 +2,7 @@ package org.schabi.newpipe.extractor.channel;
import org.schabi.newpipe.extractor.InfoItem;
/**
/*
* Created by Christian Schabesberger on 11.02.17.
*
* Copyright (C) Christian Schabesberger 2017 <chris.schabesberger@mailbox.org>
@ -30,14 +30,16 @@ public class ChannelInfoItem implements InfoItem {
public String webPageUrl = "";
public String description = "";
public long subscriberCount = -1;
public int videoAmount = -1;
public long viewCount = -1;
public InfoType infoType() {
return InfoType.CHANNEL;
}
public String getTitle() {
return channelName;
}
public String getLink() {
return webPageUrl;
}

View File

@ -1,13 +1,9 @@
package org.schabi.newpipe.extractor.channel;
import org.schabi.newpipe.extractor.InfoItemCollector;
import org.schabi.newpipe.extractor.NewPipe;
import org.schabi.newpipe.extractor.exceptions.FoundAdException;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.stream_info.StreamInfoItem;
import org.schabi.newpipe.extractor.stream_info.StreamInfoItemExtractor;
/**
/*
* Created by Christian Schabesberger on 12.02.17.
*
* Copyright (C) Christian Schabesberger 2017 <chris.schabesberger@mailbox.org>
@ -47,7 +43,7 @@ public class ChannelInfoItemCollector extends InfoItemCollector {
addError(e);
}
try {
resultItem.videoAmount = extractor.getVideoAmount();
resultItem.viewCount = extractor.getViewCount();
} catch (Exception e) {
addError(e);
}

View File

@ -2,7 +2,7 @@ package org.schabi.newpipe.extractor.channel;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
/**
/*
* Created by Christian Schabesberger on 12.02.17.
*
* Copyright (C) Christian Schabesberger 2017 <chris.schabesberger@mailbox.org>
@ -28,5 +28,5 @@ public interface ChannelInfoItemExtractor {
String getWebPageUrl() throws ParsingException;
String getDescription() throws ParsingException;
long getSubscriberCount() throws ParsingException;
int getVideoAmount() throws ParsingException;
long getViewCount() throws ParsingException;
}

View File

@ -1,6 +1,6 @@
package org.schabi.newpipe.extractor.exceptions;
/**
/*
* Created by Christian Schabesberger on 30.01.16.
*
* Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org>
@ -24,9 +24,11 @@ public class ExtractionException extends Exception {
public ExtractionException(String message) {
super(message);
}
public ExtractionException(Throwable cause) {
super(cause);
}
public ExtractionException(String message, Throwable cause) {
super(message, cause);
}

View File

@ -1,6 +1,6 @@
package org.schabi.newpipe.extractor.exceptions;
/**
/*
* Created by Christian Schabesberger on 12.09.16.
*
* Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org>
@ -24,6 +24,7 @@ public class FoundAdException extends ParsingException {
public FoundAdException(String message) {
super(message);
}
public FoundAdException(String message, Throwable cause) {
super(message, cause);
}

View File

@ -1,6 +1,6 @@
package org.schabi.newpipe.extractor.exceptions;
/**
/*
* Created by Christian Schabesberger on 31.01.16.
*
* Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org>
@ -25,6 +25,7 @@ public class ParsingException extends ExtractionException {
public ParsingException(String message) {
super(message);
}
public ParsingException(String message, Throwable cause) {
super(message, cause);
}

View File

@ -1,6 +1,6 @@
package org.schabi.newpipe.extractor.exceptions;
/**
/*
* Created by beneth <bmauduit@beneth.fr> on 07.12.16.
*
* Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org>

View File

@ -1,41 +0,0 @@
package org.schabi.newpipe.extractor.playlist;
import org.schabi.newpipe.extractor.UrlIdHandler;
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.stream_info.StreamInfoItemCollector;
import java.io.IOException;
public abstract class PlayListExtractor {
private int serviceId;
private String url;
private UrlIdHandler urlIdHandler;
private StreamInfoItemCollector previewInfoCollector;
private int page = -1;
public PlayListExtractor(UrlIdHandler urlIdHandler, String url, int page, int serviceId)
throws ExtractionException, IOException {
this.url = url;
this.page = page;
this.serviceId = serviceId;
this.urlIdHandler = urlIdHandler;
previewInfoCollector = new StreamInfoItemCollector(urlIdHandler, serviceId);
}
public String getUrl() { return url; }
public UrlIdHandler getUrlIdHandler() { return urlIdHandler; }
public StreamInfoItemCollector getStreamPreviewInfoCollector() {
return previewInfoCollector;
}
public abstract String getName() throws ParsingException;
public abstract String getAvatarUrl() throws ParsingException;
public abstract String getBannerUrl() throws ParsingException;
public abstract StreamInfoItemCollector getStreams() throws ParsingException;
public abstract boolean hasNextPage() throws ParsingException;
public int getServiceId() {
return serviceId;
}
}

View File

@ -1,51 +0,0 @@
package org.schabi.newpipe.extractor.playlist;
import org.schabi.newpipe.extractor.InfoItem;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.stream_info.StreamInfoItemCollector;
import java.util.List;
import java.util.Vector;
public class PlayListInfo {
public void addException(Exception e) {
errors.add(e);
}
public static PlayListInfo getInfo(PlayListExtractor extractor) throws ParsingException {
PlayListInfo info = new PlayListInfo();
info.playList_name = extractor.getName();
info.hasNextPage = extractor.hasNextPage();
try {
info.avatar_url = extractor.getAvatarUrl();
} catch (Exception e) {
info.errors.add(e);
}
try {
info.banner_url = extractor.getBannerUrl();
} catch (Exception e) {
info.errors.add(e);
}
try {
StreamInfoItemCollector c = extractor.getStreams();
info.related_streams = c.getItemList();
info.errors.addAll(c.getErrors());
} catch(Exception e) {
info.errors.add(e);
}
return info;
}
public int service_id = -1;
public String playList_name = "";
public String avatar_url = "";
public String banner_url = "";
public List<InfoItem> related_streams = null;
public boolean hasNextPage = false;
public List<Throwable> errors = new Vector<>();
}

View File

@ -0,0 +1,29 @@
package org.schabi.newpipe.extractor.playlist;
import org.schabi.newpipe.extractor.Extractor;
import org.schabi.newpipe.extractor.UrlIdHandler;
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.stream.StreamInfoItemCollector;
import java.io.IOException;
public abstract class PlaylistExtractor extends Extractor {
public PlaylistExtractor(UrlIdHandler urlIdHandler, String url, int serviceId) throws ExtractionException, IOException {
super(urlIdHandler, serviceId, url);
}
public abstract String getPlaylistId() throws ParsingException;
public abstract String getPlaylistName() throws ParsingException;
public abstract String getAvatarUrl() throws ParsingException;
public abstract String getBannerUrl() throws ParsingException;
public abstract String getUploaderUrl() throws ParsingException;
public abstract String getUploaderName() throws ParsingException;
public abstract String getUploaderAvatarUrl() throws ParsingException;
public abstract StreamInfoItemCollector getStreams() throws ParsingException;
public abstract long getStreamsCount() throws ParsingException;
public abstract boolean hasMoreStreams();
public abstract StreamInfoItemCollector getNextStreams() throws ExtractionException, IOException;
}

View File

@ -0,0 +1,74 @@
package org.schabi.newpipe.extractor.playlist;
import org.schabi.newpipe.extractor.Info;
import org.schabi.newpipe.extractor.InfoItem;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.stream.StreamInfoItemCollector;
import java.util.List;
public class PlaylistInfo extends Info {
public static PlaylistInfo getInfo(PlaylistExtractor extractor) throws ParsingException {
PlaylistInfo info = new PlaylistInfo();
info.service_id = extractor.getServiceId();
info.url = extractor.getUrl();
info.name = extractor.getPlaylistName();
info.hasMoreStreams = extractor.hasMoreStreams();
try {
info.id = extractor.getPlaylistId();
} catch (Exception e) {
info.errors.add(e);
}
try {
info.streams_count = extractor.getStreamsCount();
} catch (Exception e) {
info.errors.add(e);
}
try {
info.avatar_url = extractor.getAvatarUrl();
} catch (Exception e) {
info.errors.add(e);
}
try {
info.uploader_url = extractor.getUploaderUrl();
} catch (Exception e) {
info.errors.add(e);
}
try {
info.uploader_name = extractor.getUploaderName();
} catch (Exception e) {
info.errors.add(e);
}
try {
info.uploader_avatar_url = extractor.getUploaderAvatarUrl();
} catch (Exception e) {
info.errors.add(e);
}
try {
info.banner_url = extractor.getBannerUrl();
} catch (Exception e) {
info.errors.add(e);
}
try {
StreamInfoItemCollector c = extractor.getStreams();
info.related_streams = c.getItemList();
info.errors.addAll(c.getErrors());
} catch (Exception e) {
info.errors.add(e);
}
return info;
}
public String avatar_url = "";
public String banner_url = "";
public String uploader_url = "";
public String uploader_name = "";
public String uploader_avatar_url = "";
public long streams_count = 0;
public List<InfoItem> related_streams = null;
public boolean hasMoreStreams = false;
}

View File

@ -2,7 +2,7 @@ package org.schabi.newpipe.extractor.playlist;
import org.schabi.newpipe.extractor.InfoItem;
public class PlayListInfoItem implements InfoItem {
public class PlaylistInfoItem implements InfoItem {
public int serviceId = -1;
public String name = "";
@ -12,9 +12,11 @@ public class PlayListInfoItem implements InfoItem {
public InfoType infoType() {
return InfoType.PLAYLIST;
}
public String getTitle() {
return name;
}
public String getLink() {
return webPageUrl;
}

View File

@ -1,18 +1,17 @@
package org.schabi.newpipe.extractor.playlist;
import org.schabi.newpipe.extractor.InfoItemCollector;
import org.schabi.newpipe.extractor.channel.ChannelInfoItemExtractor;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
public class PlayListInfoItemCollector extends InfoItemCollector {
public PlayListInfoItemCollector(int serviceId) {
public class PlaylistInfoItemCollector extends InfoItemCollector {
public PlaylistInfoItemCollector(int serviceId) {
super(serviceId);
}
public PlayListInfoItem extract(PlayListInfoItemExtractor extractor) throws ParsingException {
final PlayListInfoItem resultItem = new PlayListInfoItem();
public PlaylistInfoItem extract(PlaylistInfoItemExtractor extractor) throws ParsingException {
final PlaylistInfoItem resultItem = new PlaylistInfoItem();
resultItem.name = extractor.getPlayListName();
resultItem.name = extractor.getPlaylistName();
resultItem.serviceId = getServiceId();
resultItem.webPageUrl = extractor.getWebPageUrl();
try {
@ -23,7 +22,7 @@ public class PlayListInfoItemCollector extends InfoItemCollector {
return resultItem;
}
public void commit(PlayListInfoItemExtractor extractor) throws ParsingException {
public void commit(PlaylistInfoItemExtractor extractor) throws ParsingException {
try {
addItem(extract(extractor));
} catch (Exception e) {

View File

@ -2,8 +2,8 @@ package org.schabi.newpipe.extractor.playlist;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
public interface PlayListInfoItemExtractor {
public interface PlaylistInfoItemExtractor {
String getThumbnailUrl() throws ParsingException;
String getPlayListName() throws ParsingException;
String getPlaylistName() throws ParsingException;
String getWebPageUrl() throws ParsingException;
}

View File

@ -6,11 +6,10 @@ import org.schabi.newpipe.extractor.channel.ChannelInfoItemCollector;
import org.schabi.newpipe.extractor.channel.ChannelInfoItemExtractor;
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.exceptions.FoundAdException;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.stream_info.StreamInfoItemCollector;
import org.schabi.newpipe.extractor.stream_info.StreamInfoItemExtractor;
import org.schabi.newpipe.extractor.stream.StreamInfoItemCollector;
import org.schabi.newpipe.extractor.stream.StreamInfoItemExtractor;
/**
/*
* Created by Christian Schabesberger on 12.02.17.
*
* Copyright (C) Christian Schabesberger 2017 <chris.schabesberger@mailbox.org>

View File

@ -2,12 +2,11 @@ package org.schabi.newpipe.extractor.search;
import org.schabi.newpipe.extractor.UrlIdHandler;
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.stream_info.StreamInfoItemCollector;
import java.io.IOException;
import java.util.EnumSet;
/**
/*
* Created by Christian Schabesberger on 10.08.15.
*
* Copyright (C) Christian Schabesberger 2015 <chris.schabesberger@mailbox.org>
@ -37,6 +36,7 @@ public abstract class SearchEngine {
super(message);
}
}
private InfoItemSearchCollector collector;
public SearchEngine(UrlIdHandler urlIdHandler, int serviceId) {
@ -46,6 +46,7 @@ public abstract class SearchEngine {
protected InfoItemSearchCollector getInfoItemSearchCollector() {
return collector;
}
//Result search(String query, int page);
public abstract InfoItemSearchCollector search(
String query, int page, String contentCountry, EnumSet<Filter> filter)

View File

@ -2,14 +2,13 @@ package org.schabi.newpipe.extractor.search;
import org.schabi.newpipe.extractor.InfoItem;
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.stream_info.StreamInfoItem;
import java.io.IOException;
import java.util.EnumSet;
import java.util.List;
import java.util.Vector;
/**
/*
* Created by Christian Schabesberger on 29.02.16.
*
* Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org>

View File

@ -1,27 +1,26 @@
package org.schabi.newpipe.extractor.services.youtube;
import org.json.JSONException;
import org.json.JSONObject;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.schabi.newpipe.extractor.AbstractStreamInfo;
import org.schabi.newpipe.extractor.Downloader;
import org.schabi.newpipe.extractor.NewPipe;
import org.schabi.newpipe.extractor.Parser;
import org.schabi.newpipe.extractor.UrlIdHandler;
import org.schabi.newpipe.extractor.channel.ChannelExtractor;
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.stream_info.StreamInfoItemCollector;
import org.schabi.newpipe.extractor.stream_info.StreamInfoItemExtractor;
import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
import org.schabi.newpipe.extractor.stream.AbstractStreamInfo;
import org.schabi.newpipe.extractor.stream.StreamInfoItemCollector;
import org.schabi.newpipe.extractor.stream.StreamInfoItemExtractor;
import org.schabi.newpipe.extractor.utils.Parser;
import java.io.IOException;
/**
/*
* Created by Christian Schabesberger on 25.07.16.
*
* Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org>
@ -41,77 +40,53 @@ import java.io.IOException;
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
*/
@SuppressWarnings("WeakerAccess")
public class YoutubeChannelExtractor extends ChannelExtractor {
private static final String CHANNEL_FEED_BASE = "https://www.youtube.com/feeds/videos.xml?channel_id=";
private static final String TAG = YoutubeChannelExtractor.class.toString();
private Document doc;
/**
* It's lazily initialized (when getNextStreams is called)
*/
private Document nextStreamsAjax;
private String nextStreamsUrl = "";
// private CSSOMParser cssParser = new CSSOMParser(new SACParserCSS3());
/*//////////////////////////////////////////////////////////////////////////
// Variables for cache purposes (not "select" the current document all over again)
//////////////////////////////////////////////////////////////////////////*/
private String channelId;
private String channelName;
private String avatarUrl;
private String bannerUrl;
private String feedUrl;
private long subscriberCount = -1;
private Document doc = null;
private boolean isAjaxPage = false;
private static String userUrl = "";
private static String channelName = "";
private static String avatarUrl = "";
private static String bannerUrl = "";
private static String feedUrl = "";
private static long subscriberCount = -1;
// the fist page is html all other pages are ajax. Every new page can be requested by sending
// this request url.
private static String nextPageUrl = "";
public YoutubeChannelExtractor(UrlIdHandler urlIdHandler, String url, int page, int serviceId)
throws ExtractionException, IOException {
super(urlIdHandler, url, page, serviceId);
Downloader downloader = NewPipe.getDownloader();
url = urlIdHandler.cleanUrl(url) ; //+ "/video?veiw=0&flow=list&sort=dd";
if(page == 0) {
if (isUserUrl(url)) {
userUrl = url;
} else {
// we first need to get the user url. Otherwise we can't find videos
String channelPageContent = downloader.download(url);
Document channelDoc = Jsoup.parse(channelPageContent, url);
userUrl = getUserUrl(channelDoc);
public YoutubeChannelExtractor(UrlIdHandler urlIdHandler, String url, int serviceId) throws ExtractionException, IOException {
super(urlIdHandler, urlIdHandler.cleanUrl(url), serviceId);
fetchDocument();
}
userUrl = userUrl + "/videos?veiw=0&flow=list&sort=dd&live_view=10000";
String pageContent = downloader.download(userUrl);
doc = Jsoup.parse(pageContent, userUrl);
nextPageUrl = getNextPageUrl(doc);
isAjaxPage = false;
} else {
String ajaxDataRaw = downloader.download(nextPageUrl);
JSONObject ajaxData;
@Override
public String getChannelId() throws ParsingException {
try {
ajaxData = new JSONObject(ajaxDataRaw);
String htmlDataRaw = ajaxData.getString("content_html");
doc = Jsoup.parse(htmlDataRaw, nextPageUrl);
if (channelId == null) {
channelId = getUrlIdHandler().getId(getUrl());
}
String nextPageHtmlDataRaw = ajaxData.getString("load_more_widget_html");
if(!nextPageHtmlDataRaw.isEmpty()) {
Document nextPageData = Jsoup.parse(nextPageHtmlDataRaw, nextPageUrl);
nextPageUrl = getNextPageUrl(nextPageData);
} else {
nextPageUrl = "";
}
} catch (JSONException e) {
throw new ParsingException("Could not parse json data for next page", e);
}
isAjaxPage = true;
return channelId;
} catch (Exception e) {
throw new ParsingException("Could not get channel id");
}
}
@Override
public String getChannelName() throws ParsingException {
try {
if(!isAjaxPage) {
channelName = doc.select("span[class=\"qualified-channel-title-text\"]").first()
.select("a").first().text();
if (channelName == null) {
channelName = doc.select("span[class=\"qualified-channel-title-text\"]").first().select("a").first().text();
}
return channelName;
} catch (Exception e) {
throw new ParsingException("Could not get channel name");
@ -121,10 +96,10 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
@Override
public String getAvatarUrl() throws ParsingException {
try {
if(!isAjaxPage) {
avatarUrl = doc.select("img[class=\"channel-header-profile-image\"]")
.first().attr("abs:src");
if (avatarUrl == null) {
avatarUrl = doc.select("img[class=\"channel-header-profile-image\"]").first().attr("abs:src");
}
return avatarUrl;
} catch (Exception e) {
throw new ParsingException("Could not get avatar", e);
@ -134,17 +109,14 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
@Override
public String getBannerUrl() throws ParsingException {
try {
if(!isAjaxPage) {
if (bannerUrl == null) {
Element el = doc.select("div[id=\"gh-banner\"]").first().select("style").first();
String cssContent = el.html();
String url = "https:" + Parser.matchGroup1("url\\(([^)]+)\\)", cssContent);
if (url.contains("s.ytimg.com") || url.contains("default_banner")) {
bannerUrl = null;
} else {
bannerUrl = url;
}
bannerUrl = url.contains("s.ytimg.com") || url.contains("default_banner") ? null : url;
}
return bannerUrl;
} catch (Exception e) {
throw new ParsingException("Could not get Banner", e);
@ -154,14 +126,105 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
@Override
public StreamInfoItemCollector getStreams() throws ParsingException {
StreamInfoItemCollector collector = getStreamPreviewInfoCollector();
Element ul;
if(isAjaxPage) {
ul = doc.select("body").first();
} else {
ul = doc.select("ul[id=\"browse-items-primary\"]").first();
Element ul = doc.select("ul[id=\"browse-items-primary\"]").first();
collectStreamsFrom(collector, ul);
return collector;
}
for(final Element li : ul.children()) {
@Override
public long getSubscriberCount() throws ParsingException {
if (subscriberCount == -1) {
Element el = doc.select("span[class*=\"yt-subscription-button-subscriber-count\"]").first();
if (el != null) {
subscriberCount = Long.parseLong(el.text().replaceAll("\\D+", ""));
} else {
throw new ParsingException("Could not get subscriber count");
}
}
return subscriberCount;
}
@Override
public String getFeedUrl() throws ParsingException {
try {
if (feedUrl == null) {
String channelId = doc.getElementsByClass("yt-uix-subscription-button").first().attr("data-channel-external-id");
feedUrl = channelId == null ? "" : CHANNEL_FEED_BASE + channelId;
}
return feedUrl;
} catch (Exception e) {
throw new ParsingException("Could not get feed url", e);
}
}
@Override
public boolean hasMoreStreams() {
return nextStreamsUrl != null && !nextStreamsUrl.isEmpty();
}
@Override
public StreamInfoItemCollector getNextStreams() throws ExtractionException, IOException {
if (!hasMoreStreams()) throw new ExtractionException("Channel doesn't have more streams");
StreamInfoItemCollector collector = new StreamInfoItemCollector(getUrlIdHandler(), getServiceId());
setupNextStreamsAjax(NewPipe.getDownloader());
collectStreamsFrom(collector, nextStreamsAjax.select("body").first());
return collector;
}
private void setupNextStreamsAjax(Downloader downloader) throws IOException, ReCaptchaException, ParsingException {
String ajaxDataRaw = downloader.download(nextStreamsUrl);
try {
JSONObject ajaxData = new JSONObject(ajaxDataRaw);
String htmlDataRaw = ajaxData.getString("content_html");
nextStreamsAjax = Jsoup.parse(htmlDataRaw, nextStreamsUrl);
String nextStreamsHtmlDataRaw = ajaxData.getString("load_more_widget_html");
if (!nextStreamsHtmlDataRaw.isEmpty()) {
Document nextStreamsData = Jsoup.parse(nextStreamsHtmlDataRaw, nextStreamsUrl);
nextStreamsUrl = getNextStreamsUrl(nextStreamsData);
} else {
nextStreamsUrl = "";
}
} catch (JSONException e) {
throw new ParsingException("Could not parse json data for next streams", e);
}
}
private String getNextStreamsUrl(Document d) throws ParsingException {
try {
Element button = d.select("button[class*=\"yt-uix-load-more\"]").first();
if (button != null) {
return button.attr("abs:data-uix-load-more-href");
} else {
// Sometimes channels are simply so small, they don't have a more streams/videos
return "";
}
} catch (Exception e) {
throw new ParsingException("could not get next streams' url", e);
}
}
private void fetchDocument() throws IOException, ReCaptchaException, ParsingException {
Downloader downloader = NewPipe.getDownloader();
String userUrl = getUrl() + "/videos?view=0&flow=list&sort=dd&live_view=10000";
String pageContent = downloader.download(userUrl);
doc = Jsoup.parse(pageContent, userUrl);
nextStreamsUrl = getNextStreamsUrl(doc);
nextStreamsAjax = null;
}
private void collectStreamsFrom(StreamInfoItemCollector collector, Element element) throws ParsingException {
collector.getItemList().clear();
for (final Element li : element.children()) {
if (li.select("div[class=\"feed-item-dismissable\"]").first() != null) {
collector.commit(new StreamInfoItemExtractor() {
@Override
@ -221,7 +284,7 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
try {
Element meta = li.select("div[class=\"yt-lockup-meta\"]").first();
Element li = meta.select("li").first();
if (li == null && meta != null) {
if (li == null) {
//this means we have a youtube red video
return "";
} else {
@ -244,10 +307,7 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
return -1;
}
output = Parser.matchGroup1("([0-9,\\. ]*)", input)
.replace(" ", "")
.replace(".", "")
.replace(",", "");
output = input.replaceAll("\\D+", "");
try {
return Long.parseLong(output);
@ -295,63 +355,5 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
});
}
}
return collector;
}
@Override
public long getSubscriberCount() throws ParsingException {
Element el = doc.select("span[class*=\"yt-subscription-button-subscriber-count\"]")
.first();
if(el != null) {
subscriberCount = Long.parseLong(el.text().replaceAll("\\D+",""));
} else if(el == null && subscriberCount == -1) {
throw new ParsingException("Could not get subscriber count");
}
return subscriberCount;
}
@Override
public String getFeedUrl() throws ParsingException {
try {
if(userUrl.contains("channel")) {
//channels don't have feeds in youtube, only user can provide such
return "";
}
if(!isAjaxPage) {
feedUrl = doc.select("link[title=\"RSS\"]").first().attr("abs:href");
}
return feedUrl;
} catch(Exception e) {
throw new ParsingException("Could not get feed url", e);
}
}
@Override
public boolean hasNextPage() throws ParsingException {
return !nextPageUrl.isEmpty();
}
private String getUserUrl(Document d) throws ParsingException {
return d.select("span[class=\"qualified-channel-title-text\"]").first()
.select("a").first().attr("abs:href");
}
private boolean isUserUrl(String url) throws ParsingException {
return url.contains("/user/");
}
private String getNextPageUrl(Document d) throws ParsingException {
try {
Element button = d.select("button[class*=\"yt-uix-load-more\"]").first();
if(button != null) {
return button.attr("abs:data-uix-load-more-href");
} else {
// sometimes channels are simply so small, they don't have a second/next4q page
return "";
}
} catch(Exception e) {
throw new ParsingException("could not load next page url", e);
}
}
}

View File

@ -1,11 +1,10 @@
package org.schabi.newpipe.extractor.services.youtube;
import org.schabi.newpipe.extractor.Parser;
import org.jsoup.nodes.Element;
import org.schabi.newpipe.extractor.channel.ChannelInfoItemExtractor;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.jsoup.nodes.Element;
/**
/*
* Created by Christian Schabesberger on 12.02.17.
*
* Copyright (C) Christian Schabesberger 2017 <chris.schabesberger@mailbox.org>
@ -32,6 +31,7 @@ public class YoutubeChannelInfoItemExtractor implements ChannelInfoItemExtractor
this.el = el;
}
@Override
public String getThumbnailUrl() throws ParsingException {
Element img = el.select("span[class*=\"yt-thumb-simple\"]").first()
.select("img").first();
@ -44,34 +44,39 @@ public class YoutubeChannelInfoItemExtractor implements ChannelInfoItemExtractor
return url;
}
@Override
public String getChannelName() throws ParsingException {
return el.select("a[class*=\"yt-uix-tile-link\"]").first()
.text();
}
@Override
public String getWebPageUrl() throws ParsingException {
return el.select("a[class*=\"yt-uix-tile-link\"]").first()
.attr("abs:href");
}
@Override
public long getSubscriberCount() throws ParsingException {
Element subsEl = el.select("span[class*=\"yt-subscriber-count\"]").first();
if (subsEl == null) {
return 0;
} else {
return Integer.parseInt(subsEl.text().replaceAll("\\D+",""));
return Long.parseLong(subsEl.text().replaceAll("\\D+", ""));
}
}
public int getVideoAmount() throws ParsingException {
@Override
public long getViewCount() throws ParsingException {
Element metaEl = el.select("ul[class*=\"yt-lockup-meta-info\"]").first();
if (metaEl == null) {
return 0;
} else {
return Integer.parseInt(metaEl.text().replaceAll("\\D+",""));
return Long.parseLong(metaEl.text().replaceAll("\\D+", ""));
}
}
@Override
public String getDescription() throws ParsingException {
Element desEl = el.select("div[class*=\"yt-lockup-description\"]").first();
if (desEl == null) {

View File

@ -1,10 +1,10 @@
package org.schabi.newpipe.extractor.services.youtube;
import org.schabi.newpipe.extractor.Parser;
import org.schabi.newpipe.extractor.UrlIdHandler;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.utils.Parser;
/**
/*
* Created by Christian Schabesberger on 25.07.16.
*
* Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org>

View File

@ -3,7 +3,7 @@ package org.schabi.newpipe.extractor.services.youtube;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
/**
/*
* Created by Christian Schabesberger on 02.03.16.
*
* Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org>

View File

@ -1,223 +0,0 @@
package org.schabi.newpipe.extractor.services.youtube;
import org.json.JSONException;
import org.json.JSONObject;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.schabi.newpipe.extractor.AbstractStreamInfo;
import org.schabi.newpipe.extractor.Downloader;
import org.schabi.newpipe.extractor.NewPipe;
import org.schabi.newpipe.extractor.Parser;
import org.schabi.newpipe.extractor.UrlIdHandler;
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.playlist.PlayListExtractor;
import org.schabi.newpipe.extractor.stream_info.StreamInfoItemCollector;
import org.schabi.newpipe.extractor.stream_info.StreamInfoItemExtractor;
import java.io.IOException;
public class YoutubePlayListExtractor extends PlayListExtractor {
private String TAG = YoutubePlayListExtractor.class.toString();
private Document doc = null;
private boolean isAjaxPage = false;
private static String name = "";
private static String feedUrl = "";
private static String avatarUrl = "";
private static String bannerUrl = "";
private static String nextPageUrl = "";
public YoutubePlayListExtractor(UrlIdHandler urlIdHandler,
String url, int page, int serviceId) throws IOException, ExtractionException {
super(urlIdHandler, url, page, serviceId);
Downloader downloader = NewPipe.getDownloader();
url = urlIdHandler.cleanUrl(url);
if(page == 0) {
String channelPageContent = downloader.download(url);
doc = Jsoup.parse(channelPageContent, url);
nextPageUrl = getNextPageUrl(doc);
isAjaxPage = false;
} else {
String ajaxDataRaw = downloader.download(nextPageUrl);
JSONObject ajaxData;
try {
ajaxData = new JSONObject(ajaxDataRaw);
final String htmlDataRaw = "<table><tbody id=\"pl-load-more-destination\">" + ajaxData.getString("content_html") + "</tbody></table>";
doc = Jsoup.parse(htmlDataRaw, nextPageUrl);
final String nextPageHtmlDataRaw = ajaxData.getString("load_more_widget_html");
if(!nextPageHtmlDataRaw.isEmpty()) {
final Document nextPageData = Jsoup.parse(nextPageHtmlDataRaw, nextPageUrl);
nextPageUrl = getNextPageUrl(nextPageData);
} else {
nextPageUrl = "";
}
} catch (JSONException e) {
throw new ParsingException("Could not parse json data for next page", e);
}
isAjaxPage = true;
}
}
@Override
public String getName() throws ParsingException {
try {
if (!isAjaxPage) {
name = doc.select("span[class=\"qualified-channel-title-text\"]").first()
.select("a").first().text() + " - " +
doc.select("meta[name=title]").first().attr("content");
}
return name;
} catch (Exception e) {
throw new ParsingException("Could not get playlist name");
}
}
@Override
public String getAvatarUrl() throws ParsingException {
try {
if(!isAjaxPage) {
avatarUrl = doc.select("div[id=gh-banner] img[class=channel-header-profile-image]").first().attr("src");
if(avatarUrl.startsWith("//")) {
avatarUrl = "https:" + avatarUrl;
}
}
return avatarUrl;
} catch(Exception e) {
throw new ParsingException("Could not get playlist Avatar");
}
}
@Override
public String getBannerUrl() throws ParsingException {
try {
if(!isAjaxPage) {
Element el = doc.select("div[id=\"gh-banner\"] style").first();
String cssContent = el.html();
String url = "https:" + Parser.matchGroup1("url\\((.*)\\)", cssContent);
if (url.contains("s.ytimg.com")) {
bannerUrl = null;
} else {
bannerUrl = url.substring(0, url.indexOf(");"));
}
}
return bannerUrl;
} catch(Exception e) {
throw new ParsingException("Could not get playlist Banner");
}
}
@Override
public StreamInfoItemCollector getStreams() throws ParsingException {
StreamInfoItemCollector collector = getStreamPreviewInfoCollector();
Element tbody = doc.select("tbody[id=\"pl-load-more-destination\"]").first();
final YoutubeStreamUrlIdHandler youtubeStreamUrlIdHandler = YoutubeStreamUrlIdHandler.getInstance();
for(final Element li : tbody.children()) {
collector.commit(new StreamInfoItemExtractor() {
@Override
public AbstractStreamInfo.StreamType getStreamType() throws ParsingException {
return AbstractStreamInfo.StreamType.VIDEO_STREAM;
}
@Override
public String getWebPageUrl() throws ParsingException {
try {
return youtubeStreamUrlIdHandler.getUrl(li.attr("data-video-id"));
} catch (Exception e) {
throw new ParsingException("Could not get web page url for the video", e);
}
}
@Override
public String getTitle() throws ParsingException {
try {
return li.attr("data-title");
} catch (Exception e) {
throw new ParsingException("Could not get title", e);
}
}
@Override
public int getDuration() throws ParsingException {
try {
return YoutubeParsingHelper.parseDurationString(
li.select("div[class=\"timestamp\"] span").first().text().trim());
} catch(Exception e) {
if(isLiveStream(li)) {
// -1 for no duration
return -1;
} else {
throw new ParsingException("Could not get Duration: " + getTitle(), e);
}
}
}
@Override
public String getUploader() throws ParsingException {
return li.select("div[class=pl-video-owner] a").text();
}
@Override
public String getUploadDate() throws ParsingException {
return "";
}
@Override
public long getViewCount() throws ParsingException {
return -1;
}
@Override
public String getThumbnailUrl() throws ParsingException {
try {
return "https://i.ytimg.com/vi/" + youtubeStreamUrlIdHandler.getId(getWebPageUrl()) + "/hqdefault.jpg";
} catch (Exception e) {
throw new ParsingException("Could not get thumbnail url", e);
}
}
@Override
public boolean isAd() throws ParsingException {
return false;
}
private boolean isLiveStream(Element item) {
Element bla = item.select("span[class*=\"yt-badge-live\"]").first();
if(bla == null) {
// sometimes livestreams dont have badges but sill are live streams
// if video time is not available we most likly have an offline livestream
if(item.select("span[class*=\"video-time\"]").first() == null) {
return true;
}
}
return bla != null;
}
});
}
return collector;
}
@Override
public boolean hasNextPage() throws ParsingException {
return nextPageUrl != null && !nextPageUrl.isEmpty();
}
private String getNextPageUrl(Document d) throws ParsingException {
try {
Element button = d.select("button[class*=\"yt-uix-load-more\"]").first();
if(button != null) {
return "https://www.youtube.com" + button.attr("data-uix-load-more-href");
} else {
// sometimes channels are simply so small, they don't have a second/next4q page
return "";
}
} catch(Exception e) {
throw new ParsingException("could not load next page url", e);
}
}
}

View File

@ -0,0 +1,332 @@
package org.schabi.newpipe.extractor.services.youtube;
import org.json.JSONException;
import org.json.JSONObject;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.schabi.newpipe.extractor.Downloader;
import org.schabi.newpipe.extractor.NewPipe;
import org.schabi.newpipe.extractor.UrlIdHandler;
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
import org.schabi.newpipe.extractor.playlist.PlaylistExtractor;
import org.schabi.newpipe.extractor.stream.AbstractStreamInfo;
import org.schabi.newpipe.extractor.stream.StreamInfoItemCollector;
import org.schabi.newpipe.extractor.stream.StreamInfoItemExtractor;
import org.schabi.newpipe.extractor.utils.Parser;
import java.io.IOException;
@SuppressWarnings("WeakerAccess")
public class YoutubePlaylistExtractor extends PlaylistExtractor {
private Document doc = null;
/**
* It's lazily initialized (when getNextStreams is called)
*/
private Document nextStreamsAjax = null;
private String nextStreamsUrl = "";
/*//////////////////////////////////////////////////////////////////////////
// Variables for cache purposes (not "select" the current document all over again)
//////////////////////////////////////////////////////////////////////////*/
private String playlistId;
private String playlistName;
private String avatarUrl;
private String bannerUrl;
private long streamsCount;
private String uploaderUrl;
private String uploaderName;
private String uploaderAvatarUrl;
public YoutubePlaylistExtractor(UrlIdHandler urlIdHandler, String url, int serviceId) throws IOException, ExtractionException {
super(urlIdHandler, urlIdHandler.cleanUrl(url), serviceId);
fetchDocument();
}
@Override
public String getPlaylistId() throws ParsingException {
try {
if (playlistId == null) {
playlistId = getUrlIdHandler().getId(getUrl());
}
return playlistId;
} catch (Exception e) {
throw new ParsingException("Could not get playlist id");
}
}
@Override
public String getPlaylistName() throws ParsingException {
try {
if (playlistName == null) {
playlistName = doc.select("div[id=pl-header] h1[class=pl-header-title]").first().text();
}
return playlistName;
} catch (Exception e) {
throw new ParsingException("Could not get playlist name");
}
}
@Override
public String getAvatarUrl() throws ParsingException {
try {
if (avatarUrl == null) {
avatarUrl = doc.select("div[id=pl-header] div[class=pl-header-thumb] img").first().attr("abs:src");
}
return avatarUrl;
} catch (Exception e) {
throw new ParsingException("Could not get playlist avatar");
}
}
@Override
public String getBannerUrl() throws ParsingException {
try {
if (bannerUrl == null) {
Element el = doc.select("div[id=\"gh-banner\"] style").first();
String cssContent = el.html();
String url = "https:" + Parser.matchGroup1("url\\((.*)\\)", cssContent);
if (url.contains("s.ytimg.com")) {
bannerUrl = null;
} else {
bannerUrl = url.substring(0, url.indexOf(");"));
}
}
return bannerUrl;
} catch (Exception e) {
throw new ParsingException("Could not get playlist Banner");
}
}
@Override
public String getUploaderUrl() throws ParsingException {
try {
if (uploaderUrl == null) {
uploaderUrl = doc.select("ul[class=\"pl-header-details\"] li").first().select("a").first().attr("abs:href");
}
return uploaderUrl;
} catch (Exception e) {
throw new ParsingException("Could not get playlist uploader name");
}
}
@Override
public String getUploaderName() throws ParsingException {
try {
if (uploaderName == null) {
uploaderName = doc.select("span[class=\"qualified-channel-title-text\"]").first().select("a").first().text();
}
return uploaderName;
} catch (Exception e) {
throw new ParsingException("Could not get playlist uploader name");
}
}
@Override
public String getUploaderAvatarUrl() throws ParsingException {
try {
if (uploaderAvatarUrl == null) {
uploaderAvatarUrl = doc.select("div[id=gh-banner] img[class=channel-header-profile-image]").first().attr("abs:src");
}
return uploaderAvatarUrl;
} catch (Exception e) {
throw new ParsingException("Could not get playlist uploader avatar");
}
}
@Override
public long getStreamsCount() throws ParsingException {
if (streamsCount <= 0) {
String input;
try {
input = doc.select("ul[class=\"pl-header-details\"] li").get(1).text();
} catch (IndexOutOfBoundsException e) {
throw new ParsingException("Could not get video count from playlist", e);
}
try {
streamsCount = Long.parseLong(input.replaceAll("\\D+", ""));
} catch (NumberFormatException e) {
// When there's no videos in a playlist, there's no number in the "innerHtml",
// all characters that is not a number is removed, so we try to parse a empty string
if (!input.isEmpty()) {
streamsCount = 0;
} else {
throw new ParsingException("Could not handle input: " + input, e);
}
}
}
return streamsCount;
}
@Override
public StreamInfoItemCollector getStreams() throws ParsingException {
StreamInfoItemCollector collector = getStreamPreviewInfoCollector();
Element tbody = doc.select("tbody[id=\"pl-load-more-destination\"]").first();
collectStreamsFrom(collector, tbody);
return collector;
}
@Override
public boolean hasMoreStreams() {
return nextStreamsUrl != null && !nextStreamsUrl.isEmpty();
}
@Override
public StreamInfoItemCollector getNextStreams() throws ExtractionException, IOException {
if (!hasMoreStreams()) throw new ExtractionException("Playlist doesn't have more streams");
StreamInfoItemCollector collector = new StreamInfoItemCollector(getUrlIdHandler(), getServiceId());
setupNextStreamsAjax(NewPipe.getDownloader());
collectStreamsFrom(collector, nextStreamsAjax.select("tbody[id=\"pl-load-more-destination\"]").first());
return collector;
}
private void setupNextStreamsAjax(Downloader downloader) throws IOException, ReCaptchaException, ParsingException {
String ajaxDataRaw = downloader.download(nextStreamsUrl);
try {
JSONObject ajaxData = new JSONObject(ajaxDataRaw);
String htmlDataRaw = "<table><tbody id=\"pl-load-more-destination\">" + ajaxData.getString("content_html") + "</tbody></table>";
nextStreamsAjax = Jsoup.parse(htmlDataRaw, nextStreamsUrl);
String nextStreamsHtmlDataRaw = ajaxData.getString("load_more_widget_html");
if (!nextStreamsHtmlDataRaw.isEmpty()) {
final Document nextStreamsData = Jsoup.parse(nextStreamsHtmlDataRaw, nextStreamsUrl);
nextStreamsUrl = getNextStreamsUrl(nextStreamsData);
} else {
nextStreamsUrl = "";
}
} catch (JSONException e) {
throw new ParsingException("Could not parse json data for next streams", e);
}
}
private void fetchDocument() throws IOException, ReCaptchaException, ParsingException {
Downloader downloader = NewPipe.getDownloader();
String pageContent = downloader.download(getUrl());
doc = Jsoup.parse(pageContent, getUrl());
nextStreamsUrl = getNextStreamsUrl(doc);
nextStreamsAjax = null;
}
private String getNextStreamsUrl(Document d) throws ParsingException {
try {
Element button = d.select("button[class*=\"yt-uix-load-more\"]").first();
if (button != null) {
return button.attr("abs:data-uix-load-more-href");
} else {
// Sometimes playlists are simply so small, they don't have a more streams/videos
return "";
}
} catch (Exception e) {
throw new ParsingException("could not get next streams' url", e);
}
}
private void collectStreamsFrom(StreamInfoItemCollector collector, Element element) throws ParsingException {
collector.getItemList().clear();
final YoutubeStreamUrlIdHandler youtubeStreamUrlIdHandler = YoutubeStreamUrlIdHandler.getInstance();
for (final Element li : element.children()) {
collector.commit(new StreamInfoItemExtractor() {
@Override
public AbstractStreamInfo.StreamType getStreamType() throws ParsingException {
return AbstractStreamInfo.StreamType.VIDEO_STREAM;
}
@Override
public String getWebPageUrl() throws ParsingException {
try {
return youtubeStreamUrlIdHandler.getUrl(li.attr("data-video-id"));
} catch (Exception e) {
throw new ParsingException("Could not get web page url for the video", e);
}
}
@Override
public String getTitle() throws ParsingException {
try {
return li.attr("data-title");
} catch (Exception e) {
throw new ParsingException("Could not get title", e);
}
}
@Override
public int getDuration() throws ParsingException {
try {
return YoutubeParsingHelper.parseDurationString(
li.select("div[class=\"timestamp\"] span").first().text().trim());
} catch (Exception e) {
if (isLiveStream(li)) {
// -1 for no duration
return -1;
} else {
throw new ParsingException("Could not get Duration: " + getTitle(), e);
}
}
}
@Override
public String getUploader() throws ParsingException {
return li.select("div[class=pl-video-owner] a").text();
}
@Override
public String getUploadDate() throws ParsingException {
return "";
}
@Override
public long getViewCount() throws ParsingException {
return -1;
}
@Override
public String getThumbnailUrl() throws ParsingException {
try {
return "https://i.ytimg.com/vi/" + youtubeStreamUrlIdHandler.getId(getWebPageUrl()) + "/hqdefault.jpg";
} catch (Exception e) {
throw new ParsingException("Could not get thumbnail url", e);
}
}
@Override
public boolean isAd() throws ParsingException {
return false;
}
private boolean isLiveStream(Element item) {
Element bla = item.select("span[class*=\"yt-badge-live\"]").first();
if (bla == null) {
// sometimes livestreams dont have badges but sill are live streams
// if video time is not available we most likly have an offline livestream
if (item.select("span[class*=\"video-time\"]").first() == null) {
return true;
}
}
return bla != null;
}
});
}
}
}

View File

@ -1,11 +1,11 @@
package org.schabi.newpipe.extractor.services.youtube;
import org.schabi.newpipe.extractor.Parser;
import org.schabi.newpipe.extractor.UrlIdHandler;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.utils.Parser;
public class YoutubePlayListUrlIdHandler implements UrlIdHandler {
public class YoutubePlaylistUrlIdHandler implements UrlIdHandler {
private static final String ID_PATTERN = "([\\-a-zA-Z0-9_]{34})";

View File

@ -10,12 +10,12 @@ import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.search.InfoItemSearchCollector;
import org.schabi.newpipe.extractor.search.SearchEngine;
import java.net.URLEncoder;
import java.io.IOException;
import java.net.URLEncoder;
import java.util.EnumSet;
/**
/*
* Created by Christian Schabesberger on 09.08.15.
*
* Copyright (C) Christian Schabesberger 2015 <chris.schabesberger@mailbox.org>
@ -70,8 +70,7 @@ public class YoutubeSearchEngine extends SearchEngine {
if (!languageCode.isEmpty()) {
//assert Pattern.matches("[a-z]{2}(-([A-Z]{2}|[0-9]{1,3}))?", languageCode);
site = downloader.download(url, languageCode);
}
else {
} else {
site = downloader.download(url);
}

View File

@ -1,18 +1,18 @@
package org.schabi.newpipe.extractor.services.youtube;
import org.schabi.newpipe.extractor.StreamingService;
import org.schabi.newpipe.extractor.SuggestionExtractor;
import org.schabi.newpipe.extractor.UrlIdHandler;
import org.schabi.newpipe.extractor.channel.ChannelExtractor;
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.playlist.PlayListExtractor;
import org.schabi.newpipe.extractor.playlist.PlaylistExtractor;
import org.schabi.newpipe.extractor.search.SearchEngine;
import org.schabi.newpipe.extractor.SuggestionExtractor;
import org.schabi.newpipe.extractor.stream_info.StreamExtractor;
import org.schabi.newpipe.extractor.stream.StreamExtractor;
import java.io.IOException;
/**
/*
* Created by Christian Schabesberger on 23.08.15.
*
* Copyright (C) Christian Schabesberger 2015 <chris.schabesberger@mailbox.org>
@ -44,17 +44,18 @@ public class YoutubeService extends StreamingService {
serviceInfo.name = "Youtube";
return serviceInfo;
}
@Override
public StreamExtractor getExtractorInstance(String url)
throws ExtractionException, IOException {
UrlIdHandler urlIdHandler = YoutubeStreamUrlIdHandler.getInstance();
if (urlIdHandler.acceptUrl(url)) {
return new YoutubeStreamExtractor(urlIdHandler, url, getServiceId());
}
else {
} else {
throw new IllegalArgumentException("supplied String is not a valid Youtube URL");
}
}
@Override
public SearchEngine getSearchEngineInstance() {
return new YoutubeSearchEngine(getStreamUrlIdHandlerInstance(), getServiceId());
@ -72,19 +73,18 @@ public class YoutubeService extends StreamingService {
@Override
public UrlIdHandler getPlayListUrlIdHandlerInstance() {
return new YoutubePlayListUrlIdHandler();
public UrlIdHandler getPlaylistUrlIdHandlerInstance() {
return new YoutubePlaylistUrlIdHandler();
}
@Override
public ChannelExtractor getChannelExtractorInstance(String url, int page)
throws ExtractionException, IOException {
return new YoutubeChannelExtractor(getChannelUrlIdHandlerInstance(), url, page, getServiceId());
public ChannelExtractor getChannelExtractorInstance(String url) throws ExtractionException, IOException {
return new YoutubeChannelExtractor(getChannelUrlIdHandlerInstance(), url, getServiceId());
}
public PlayListExtractor getPlayListExtractorInstance(String url, int page)
throws ExtractionException, IOException {
return new YoutubePlayListExtractor(getPlayListUrlIdHandlerInstance(), url, page, getServiceId());
@Override
public PlaylistExtractor getPlaylistExtractorInstance(String url) throws ExtractionException, IOException {
return new YoutubePlaylistExtractor(getPlaylistUrlIdHandlerInstance(), url, getServiceId());
}
@Override

View File

@ -8,21 +8,21 @@ import org.jsoup.nodes.Element;
import org.mozilla.javascript.Context;
import org.mozilla.javascript.Function;
import org.mozilla.javascript.ScriptableObject;
import org.schabi.newpipe.extractor.AbstractStreamInfo;
import org.schabi.newpipe.extractor.Downloader;
import org.schabi.newpipe.extractor.MediaFormat;
import org.schabi.newpipe.extractor.NewPipe;
import org.schabi.newpipe.extractor.Parser;
import org.schabi.newpipe.extractor.UrlIdHandler;
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
import org.schabi.newpipe.extractor.stream_info.AudioStream;
import org.schabi.newpipe.extractor.stream_info.StreamExtractor;
import org.schabi.newpipe.extractor.stream_info.StreamInfo;
import org.schabi.newpipe.extractor.stream_info.StreamInfoItemCollector;
import org.schabi.newpipe.extractor.stream_info.StreamInfoItemExtractor;
import org.schabi.newpipe.extractor.stream_info.VideoStream;
import org.schabi.newpipe.extractor.stream.AbstractStreamInfo;
import org.schabi.newpipe.extractor.stream.AudioStream;
import org.schabi.newpipe.extractor.stream.StreamExtractor;
import org.schabi.newpipe.extractor.stream.StreamInfo;
import org.schabi.newpipe.extractor.stream.StreamInfoItemCollector;
import org.schabi.newpipe.extractor.stream.StreamInfoItemExtractor;
import org.schabi.newpipe.extractor.stream.VideoStream;
import org.schabi.newpipe.extractor.utils.Parser;
import java.io.IOException;
import java.util.List;
@ -31,7 +31,7 @@ import java.util.Vector;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
/*
* Created by Christian Schabesberger on 06.08.15.
*
* Copyright (C) Christian Schabesberger 2015 <chris.schabesberger@mailbox.org>
@ -129,6 +129,9 @@ public class YoutubeStreamExtractor extends StreamExtractor {
public int bandWidth = -1;
}
/**
* List can be found here https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/youtube.py#L360
*/
private static final ItagItem[] itagList = {
//////////////////////////////////////////////////////////////////////////
// VIDEO ID ItagType Format Resolution FPS ///
@ -186,10 +189,6 @@ public class YoutubeStreamExtractor extends StreamExtractor {
new ItagItem(315, ItagType.VIDEO_ONLY, MediaFormat.WEBM, "2160p60" , 60)
};
/**These lists only contain itag formats that are supported by the common Android Video player.
However if you are looking for a list showing all itag formats, look at
https://github.com/rg3/youtube-dl/issues/1687 */
public static boolean itagIsSupported(int itag) {
for (ItagItem item : itagList) {
if (itag == item.id) {
@ -384,7 +383,8 @@ public class YoutubeStreamExtractor extends StreamExtractor {
je.printStackTrace();
System.err.println(
"failed to load uploader name from JSON args; trying to extract it from HTML");
} try {//fall through to fallback HTML method
}
try {//fall through to fallback HTML method
return doc.select("div.yt-user-info").first().text();
} catch (Exception e) {
throw new ParsingException("failed permanently to load uploader name.", e);
@ -640,8 +640,11 @@ public class YoutubeStreamExtractor extends StreamExtractor {
return videoOnlyStreams;
}
/**Attempts to parse (and return) the offset to start playing the video from.
* @return the offset (in seconds), or 0 if no timestamp is found.*/
/**
* Attempts to parse (and return) the offset to start playing the video from.
*
* @return the offset (in seconds), or 0 if no timestamp is found.
*/
@Override
public int getTimeStamp() throws ParsingException {
String timeStamp;
@ -806,9 +809,11 @@ public class YoutubeStreamExtractor extends StreamExtractor {
return StreamInfo.StreamType.VIDEO_STREAM;
}
/**Provides information about links to other videos on the video page, such as related videos.
/**
* Provides information about links to other videos on the video page, such as related videos.
* This is encapsulated in a StreamInfoItem object,
* which is a subset of the fields in a full StreamInfo.*/
* which is a subset of the fields in a full StreamInfo.
*/
private StreamInfoItemExtractor extractVideoPreviewInfo(final Element li) {
return new StreamInfoItemExtractor() {
@Override

View File

@ -1,13 +1,12 @@
package org.schabi.newpipe.extractor.services.youtube;
import org.jsoup.nodes.Element;
import org.schabi.newpipe.extractor.AbstractStreamInfo;
import org.schabi.newpipe.extractor.Parser;
import org.schabi.newpipe.extractor.exceptions.FoundAdException;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.stream_info.StreamInfoItemExtractor;
import org.schabi.newpipe.extractor.stream.AbstractStreamInfo;
import org.schabi.newpipe.extractor.stream.StreamInfoItemExtractor;
/**
/*
* Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org>
* YoutubeStreamInfoItemExtractor.java is part of NewPipe.
*

View File

@ -2,11 +2,11 @@ package org.schabi.newpipe.extractor.services.youtube;
import org.schabi.newpipe.extractor.Downloader;
import org.schabi.newpipe.extractor.NewPipe;
import org.schabi.newpipe.extractor.Parser;
import org.schabi.newpipe.extractor.UrlIdHandler;
import org.schabi.newpipe.extractor.exceptions.FoundAdException;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
import org.schabi.newpipe.extractor.utils.Parser;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
@ -14,7 +14,7 @@ import java.net.URI;
import java.net.URISyntaxException;
import java.net.URLDecoder;
/**
/*
* Created by Christian Schabesberger on 02.02.16.
*
* Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org>
@ -39,7 +39,8 @@ public class YoutubeStreamUrlIdHandler implements UrlIdHandler {
private static final YoutubeStreamUrlIdHandler instance = new YoutubeStreamUrlIdHandler();
private static final String ID_PATTERN = "([\\-a-zA-Z0-9_]{11})";
private YoutubeStreamUrlIdHandler() {}
private YoutubeStreamUrlIdHandler() {
}
public static YoutubeStreamUrlIdHandler getInstance() {
return instance;
@ -78,15 +79,13 @@ public class YoutubeStreamUrlIdHandler implements UrlIdHandler {
} else {
id = Parser.matchGroup1("[?&]v=" + ID_PATTERN, url);
}
}
else if(lowercaseUrl.contains("youtu.be")) {
} else if (lowercaseUrl.contains("youtu.be")) {
if (url.contains("v=")) {
id = Parser.matchGroup1("v=" + ID_PATTERN, url);
} else {
id = Parser.matchGroup1("[Yy][Oo][Uu][Tt][Uu]\\.[Bb][Ee]/" + ID_PATTERN, url);
}
}
else {
} else {
throw new ParsingException("Error no suitable url: " + url);
}
@ -100,12 +99,13 @@ public class YoutubeStreamUrlIdHandler implements UrlIdHandler {
/**
* Get the real url from a shared uri.
*
* <p>
* Shared URI's look like this:
* <pre>
* * https://www.youtube.com/shared?ci=PJICrTByb3E
* * vnd.youtube://www.youtube.com/shared?ci=PJICrTByb3E&feature=twitter-deep-link
* </pre>
*
* @param url The shared url
* @return the id of the stream
* @throws ParsingException
@ -128,7 +128,7 @@ public class YoutubeStreamUrlIdHandler implements UrlIdHandler {
// is this bad? is this fragile?:
String realId = Parser.matchGroup1("rel=\"shortlink\" href=\"https://youtu.be/" + ID_PATTERN, content);
if (sharedId.equals(realId)) {
throw new ParsingException("Got same id for as shared id: " + sharedId);
throw new ParsingException("Got same id for as shared info_id: " + sharedId);
}
return realId;
}

View File

@ -2,9 +2,9 @@ package org.schabi.newpipe.extractor.services.youtube;
import org.schabi.newpipe.extractor.Downloader;
import org.schabi.newpipe.extractor.NewPipe;
import org.schabi.newpipe.extractor.SuggestionExtractor;
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.SuggestionExtractor;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
@ -20,7 +20,7 @@ import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
/**
/*
* Created by Christian Schabesberger on 28.09.16.
*
* Copyright (C) Christian Schabesberger 2015 <chris.schabesberger@mailbox.org>

View File

@ -1,6 +1,6 @@
package org.schabi.newpipe.extractor;
package org.schabi.newpipe.extractor.stream;
/**
/*
* Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org>
* AbstractStreamInfo.java is part of NewPipe.
*
@ -18,10 +18,12 @@ package org.schabi.newpipe.extractor;
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
*/
import java.io.Serializable;
import org.schabi.newpipe.extractor.Info;
/**Common properties between StreamInfo and StreamInfoItem.*/
public abstract class AbstractStreamInfo implements Serializable{
/**
* Common properties between StreamInfo and StreamInfoItem.
*/
public abstract class AbstractStreamInfo extends Info {
public enum StreamType {
NONE, // placeholder to check if stream type was checked or not
VIDEO_STREAM,
@ -32,12 +34,8 @@ public abstract class AbstractStreamInfo implements Serializable{
}
public StreamType stream_type;
public int service_id = -1;
public String id = "";
public String title = "";
public String uploader = "";
public String thumbnail_url = "";
public String webpage_url = "";
public String upload_date = "";
public long view_count = -1;
}

View File

@ -1,8 +1,8 @@
package org.schabi.newpipe.extractor.stream_info;
package org.schabi.newpipe.extractor.stream;
import java.io.Serializable;
/**
/*
* Created by Christian Schabesberger on 04.03.16.
*
* Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org>

View File

@ -1,6 +1,6 @@
package org.schabi.newpipe.extractor.stream_info;
package org.schabi.newpipe.extractor.stream;
/**
/*
* Created by Christian Schabesberger on 10.08.15.
*
* Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org>
@ -20,60 +20,29 @@ package org.schabi.newpipe.extractor.stream_info;
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
*/
import org.schabi.newpipe.extractor.Extractor;
import org.schabi.newpipe.extractor.UrlIdHandler;
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import java.util.List;
/**Scrapes information from a video streaming service (eg, YouTube).*/
@SuppressWarnings("ALL")
public abstract class StreamExtractor {
private int serviceId;
private String url;
private UrlIdHandler urlIdHandler;
private StreamInfoItemCollector previewInfoCollector;
public class ExtractorInitException extends ExtractionException {
public ExtractorInitException(String message) {
super(message);
}
public ExtractorInitException(Throwable cause) {
super(cause);
}
public ExtractorInitException(String message, Throwable cause) {
super(message, cause);
}
}
/**
* Scrapes information from a video streaming service (eg, YouTube).
*/
public abstract class StreamExtractor extends Extractor {
public static class ContentNotAvailableException extends ParsingException {
public ContentNotAvailableException(String message) {
super(message);
}
public ContentNotAvailableException(String message, Throwable cause) {
super(message, cause);
}
}
public StreamExtractor(UrlIdHandler urlIdHandler, String url, int serviceId) {
this.serviceId = serviceId;
this.urlIdHandler = urlIdHandler;
previewInfoCollector = new StreamInfoItemCollector(urlIdHandler, serviceId);
}
protected StreamInfoItemCollector getStreamPreviewInfoCollector() {
return previewInfoCollector;
}
public String getUrl() {
return url;
}
public UrlIdHandler getUrlIdHandler() {
return urlIdHandler;
super(urlIdHandler, serviceId, url);
}
public abstract int getTimeStamp() throws ParsingException;
@ -98,9 +67,6 @@ public abstract class StreamExtractor {
public abstract StreamInfoItemCollector getRelatedVideos() throws ParsingException;
public abstract String getPageUrl();
public abstract StreamInfo.StreamType getStreamType() throws ParsingException;
public int getServiceId() {
return serviceId;
}
/**
* Analyses the webpage's document and extracts any error message there might be.

View File

@ -1,16 +1,14 @@
package org.schabi.newpipe.extractor.stream_info;
package org.schabi.newpipe.extractor.stream;
import org.schabi.newpipe.extractor.AbstractStreamInfo;
import org.schabi.newpipe.extractor.DashMpdParser;
import org.schabi.newpipe.extractor.InfoItem;
import org.schabi.newpipe.extractor.UrlIdHandler;
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.utils.DashMpdParser;
import java.io.IOException;
import java.util.List;
import java.util.Vector;
/**
/*
* Created by Christian Schabesberger on 26.08.15.
*
* Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org>
@ -30,7 +28,9 @@ import java.util.Vector;
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
*/
/**Info object for opened videos, ie the video ready to play.*/
/**
* Info object for opened videos, ie the video ready to play.
*/
@SuppressWarnings("ALL")
public class StreamInfo extends AbstractStreamInfo {
@ -40,17 +40,20 @@ public class StreamInfo extends AbstractStreamInfo {
}
}
public StreamInfo() {}
public StreamInfo() {
}
/**Creates a new StreamInfo object from an existing AbstractVideoInfo.
* All the shared properties are copied to the new StreamInfo.*/
/**
* Creates a new StreamInfo object from an existing AbstractVideoInfo.
* All the shared properties are copied to the new StreamInfo.
*/
@SuppressWarnings("WeakerAccess")
public StreamInfo(AbstractStreamInfo avi) {
this.id = avi.id;
this.title = avi.title;
this.url = avi.url;
this.name = avi.name;
this.uploader = avi.uploader;
this.thumbnail_url = avi.thumbnail_url;
this.webpage_url = avi.webpage_url;
this.upload_date = avi.upload_date;
this.upload_date = avi.upload_date;
this.view_count = avi.view_count;
@ -71,8 +74,10 @@ public class StreamInfo extends AbstractStreamInfo {
errors.add(e);
}
/**Fills out the video info fields which are common to all services.
* Probably needs to be overridden by subclasses*/
/**
* Fills out the video info fields which are common to all services.
* Probably needs to be overridden by subclasses
*/
public static StreamInfo getVideoInfo(StreamExtractor extractor)
throws ExtractionException, StreamExtractor.ContentNotAvailableException {
StreamInfo streamInfo = new StreamInfo();
@ -108,16 +113,16 @@ public class StreamInfo extends AbstractStreamInfo {
UrlIdHandler uiconv = extractor.getUrlIdHandler();
streamInfo.service_id = extractor.getServiceId();
streamInfo.webpage_url = extractor.getPageUrl();
streamInfo.url = extractor.getPageUrl();
streamInfo.stream_type = extractor.getStreamType();
streamInfo.id = uiconv.getId(extractor.getPageUrl());
streamInfo.title = extractor.getTitle();
streamInfo.name = extractor.getTitle();
streamInfo.age_limit = extractor.getAgeLimit();
if ((streamInfo.stream_type == StreamType.NONE)
|| (streamInfo.webpage_url == null || streamInfo.webpage_url.isEmpty())
|| (streamInfo.url == null || streamInfo.url.isEmpty())
|| (streamInfo.id == null || streamInfo.id.isEmpty())
|| (streamInfo.title == null /* streamInfo.title can be empty of course */)
|| (streamInfo.name == null /* streamInfo.title can be empty of course */)
|| (streamInfo.age_limit == -1)) {
throw new ExtractionException("Some important stream information was not given.");
}
@ -262,8 +267,7 @@ public class StreamInfo extends AbstractStreamInfo {
streamInfo.next_video = (StreamInfoItem) c.getItemList().get(0);
}
streamInfo.errors.addAll(c.getErrors());
}
catch(Exception e) {
} catch (Exception e) {
streamInfo.addException(e);
}
try {
@ -300,6 +304,4 @@ public class StreamInfo extends AbstractStreamInfo {
public List<InfoItem> related_streams = null;
//in seconds. some metadata is not passed using a StreamInfo object!
public int start_position = 0;
public List<Throwable> errors = new Vector<>();
}

View File

@ -1,6 +1,6 @@
package org.schabi.newpipe.extractor.stream_info;
package org.schabi.newpipe.extractor.stream;
/**
/*
* Created by Christian Schabesberger on 26.08.15.
*
* Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org>
@ -20,10 +20,11 @@ package org.schabi.newpipe.extractor.stream_info;
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
*/
import org.schabi.newpipe.extractor.AbstractStreamInfo;
import org.schabi.newpipe.extractor.InfoItem;
/**Info object for previews of unopened videos, eg search results, related videos*/
/**
* Info object for previews of unopened videos, eg search results, related videos
*/
public class StreamInfoItem extends AbstractStreamInfo implements InfoItem {
public int duration;
@ -32,10 +33,10 @@ public class StreamInfoItem extends AbstractStreamInfo implements InfoItem {
}
public String getTitle() {
return title;
return name;
}
public String getLink() {
return webpage_url;
return url;
}
}

View File

@ -1,4 +1,4 @@
package org.schabi.newpipe.extractor.stream_info;
package org.schabi.newpipe.extractor.stream;
import org.schabi.newpipe.extractor.InfoItemCollector;
import org.schabi.newpipe.extractor.NewPipe;
@ -6,10 +6,7 @@ import org.schabi.newpipe.extractor.UrlIdHandler;
import org.schabi.newpipe.extractor.exceptions.FoundAdException;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import java.util.List;
import java.util.Vector;
/**
/*
* Created by Christian Schabesberger on 28.02.16.
*
* Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org>
@ -50,15 +47,15 @@ public class StreamInfoItemCollector extends InfoItemCollector {
StreamInfoItem resultItem = new StreamInfoItem();
// important information
resultItem.service_id = getServiceId();
resultItem.webpage_url = extractor.getWebPageUrl();
resultItem.url = extractor.getWebPageUrl();
if (getUrlIdHandler() == null) {
throw new ParsingException("Error: UrlIdHandler not set");
} else if (!resultItem.webpage_url.isEmpty()) {
} else if (!resultItem.url.isEmpty()) {
resultItem.id = NewPipe.getService(getServiceId())
.getStreamUrlIdHandlerInstance()
.getId(resultItem.webpage_url);
.getId(resultItem.url);
}
resultItem.title = extractor.getTitle();
resultItem.name = extractor.getTitle();
resultItem.stream_type = extractor.getStreamType();
// optional information

View File

@ -1,9 +1,8 @@
package org.schabi.newpipe.extractor.stream_info;
package org.schabi.newpipe.extractor.stream;
import org.schabi.newpipe.extractor.AbstractStreamInfo;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
/**
/*
* Created by Christian Schabesberger on 28.02.16.
*
* Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org>

View File

@ -1,23 +1,23 @@
package org.schabi.newpipe.extractor.stream_info;
package org.schabi.newpipe.extractor.stream;
import java.io.Serializable;
/**
/*
* Created by Christian Schabesberger on 04.03.16.
* <p>
*
* Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org>
* VideoStream.java is part of NewPipe.
* <p>
*
* NewPipe is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* <p>
*
* NewPipe is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
* <p>
*
* You should have received a copy of the GNU General Public License
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
*/

View File

@ -1,8 +1,11 @@
package org.schabi.newpipe.extractor;
package org.schabi.newpipe.extractor.utils;
import org.schabi.newpipe.extractor.Downloader;
import org.schabi.newpipe.extractor.MediaFormat;
import org.schabi.newpipe.extractor.NewPipe;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
import org.schabi.newpipe.extractor.stream_info.AudioStream;
import org.schabi.newpipe.extractor.stream.AudioStream;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
@ -16,7 +19,7 @@ import java.util.Vector;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
/**
/*
* Created by Christian Schabesberger on 02.02.16.
*
* Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org>
@ -84,8 +87,7 @@ public class DashMpdParser {
audioStreams.add(new AudioStream(url, format, 0, bandwidth, samplingRate));
}
}
}
catch(Exception e) {
} catch (Exception e) {
throw new DashMpdParsingException("Could not parse Dash mpd", e);
}
return audioStreams;

View File

@ -1,4 +1,4 @@
package org.schabi.newpipe.extractor;
package org.schabi.newpipe.extractor.utils;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
@ -9,7 +9,7 @@ import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
/*
* Created by Christian Schabesberger on 02.02.16.
*
* Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org>
@ -29,7 +29,9 @@ import java.util.regex.Pattern;
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
*/
/** avoid using regex !!! */
/**
* avoid using regex !!!
*/
public class Parser {
private Parser() {
@ -51,8 +53,7 @@ public class Parser {
boolean foundMatch = mat.find();
if (foundMatch) {
return mat.group(group);
}
else {
} else {
//Log.e(TAG, "failed to find pattern \""+pattern+"\" inside of \""+input+"\"");
if (input.length() > 1024) {
throw new RegexException("failed to find pattern \"" + pattern);