mirror of
https://github.com/TeamNewPipe/NewPipeExtractor.git
synced 2024-12-13 05:40:34 +05:30
Base Implementation: Parse the upload date of StreamInfoItems
In the format '2 days ago' (in English) on a YouTube channel page. (Parser extensible to other pages.)
This commit is contained in:
parent
514ed7bdc1
commit
180836c180
@ -17,6 +17,7 @@ import org.schabi.newpipe.extractor.linkhandler.SearchQueryHandlerFactory;
|
||||
import org.schabi.newpipe.extractor.playlist.PlaylistExtractor;
|
||||
import org.schabi.newpipe.extractor.search.SearchExtractor;
|
||||
import org.schabi.newpipe.extractor.stream.StreamExtractor;
|
||||
import org.schabi.newpipe.extractor.stream.TimeAgoParser;
|
||||
import org.schabi.newpipe.extractor.subscription.SubscriptionExtractor;
|
||||
import org.schabi.newpipe.extractor.utils.Localization;
|
||||
|
||||
@ -222,7 +223,7 @@ public abstract class StreamingService {
|
||||
public ChannelExtractor getChannelExtractor(ListLinkHandler linkHandler) throws ExtractionException {
|
||||
return getChannelExtractor(linkHandler, NewPipe.getPreferredLocalization());
|
||||
}
|
||||
|
||||
|
||||
public PlaylistExtractor getPlaylistExtractor(ListLinkHandler linkHandler) throws ExtractionException {
|
||||
return getPlaylistExtractor(linkHandler, NewPipe.getPreferredLocalization());
|
||||
}
|
||||
@ -230,7 +231,7 @@ public abstract class StreamingService {
|
||||
public StreamExtractor getStreamExtractor(LinkHandler linkHandler) throws ExtractionException {
|
||||
return getStreamExtractor(linkHandler, NewPipe.getPreferredLocalization());
|
||||
}
|
||||
|
||||
|
||||
public CommentsExtractor getCommentsExtractor(ListLinkHandler urlIdHandler) throws ExtractionException {
|
||||
return getCommentsExtractor(urlIdHandler, NewPipe.getPreferredLocalization());
|
||||
}
|
||||
@ -287,7 +288,7 @@ public abstract class StreamingService {
|
||||
public StreamExtractor getStreamExtractor(String url) throws ExtractionException {
|
||||
return getStreamExtractor(getStreamLHFactory().fromUrl(url), NewPipe.getPreferredLocalization());
|
||||
}
|
||||
|
||||
|
||||
public CommentsExtractor getCommentsExtractor(String url) throws ExtractionException {
|
||||
ListLinkHandlerFactory llhf = getCommentsLHFactory();
|
||||
if(null == llhf) {
|
||||
@ -296,6 +297,9 @@ public abstract class StreamingService {
|
||||
return getCommentsExtractor(llhf.fromUrl(url), NewPipe.getPreferredLocalization());
|
||||
}
|
||||
|
||||
public TimeAgoParser getTimeAgoParser() {
|
||||
return new TimeAgoParser(TimeAgoParser.DEFAULT_AGO_PHRASES);
|
||||
}
|
||||
|
||||
/**
|
||||
* Figures out where the link is pointing to (a channel, a video, a playlist, etc.)
|
||||
|
@ -79,23 +79,22 @@ public class SoundcloudParsingHelper {
|
||||
return dl.head(apiUrl).getResponseCode() == 200;
|
||||
}
|
||||
|
||||
public static String toDateString(String time) throws ParsingException {
|
||||
static Date parseDate(String time) throws ParsingException {
|
||||
try {
|
||||
Date date;
|
||||
// Have two date formats, one for the 'api.soundc...' and the other 'api-v2.soundc...'.
|
||||
return new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'").parse(time);
|
||||
} catch (ParseException e1) {
|
||||
try {
|
||||
date = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'").parse(time);
|
||||
} catch (Exception e) {
|
||||
date = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss +0000").parse(time);
|
||||
return new SimpleDateFormat("yyyy/MM/dd HH:mm:ss +0000").parse(time);
|
||||
} catch (ParseException e2) {
|
||||
throw new ParsingException(e1.getMessage(), e2);
|
||||
}
|
||||
|
||||
SimpleDateFormat newDateFormat = new SimpleDateFormat("yyyy-MM-dd");
|
||||
return newDateFormat.format(date);
|
||||
} catch (ParseException e) {
|
||||
throw new ParsingException(e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
|
||||
static String toTextualDate(String time) throws ParsingException {
|
||||
return new SimpleDateFormat("yyyy-MM-dd").format(parseDate(time));
|
||||
}
|
||||
|
||||
/**
|
||||
* Call the endpoint "/resolve" of the api.<p>
|
||||
*
|
||||
|
@ -51,7 +51,7 @@ public class SoundcloudStreamExtractor extends StreamExtractor {
|
||||
@Nonnull
|
||||
@Override
|
||||
public String getUploadDate() throws ParsingException {
|
||||
return SoundcloudParsingHelper.toDateString(track.getString("created_at"));
|
||||
return SoundcloudParsingHelper.toTextualDate(track.getString("created_at"));
|
||||
}
|
||||
|
||||
@Nonnull
|
||||
|
@ -5,6 +5,8 @@ import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||
import org.schabi.newpipe.extractor.stream.StreamInfoItemExtractor;
|
||||
import org.schabi.newpipe.extractor.stream.StreamType;
|
||||
|
||||
import java.util.Calendar;
|
||||
|
||||
import static org.schabi.newpipe.extractor.utils.Utils.replaceHttpWithHttps;
|
||||
|
||||
public class SoundcloudStreamInfoItemExtractor implements StreamInfoItemExtractor {
|
||||
@ -41,8 +43,19 @@ public class SoundcloudStreamInfoItemExtractor implements StreamInfoItemExtracto
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getUploadDate() throws ParsingException {
|
||||
return SoundcloudParsingHelper.toDateString(itemObject.getString("created_at"));
|
||||
public String getTextualUploadDate() throws ParsingException {
|
||||
return SoundcloudParsingHelper.toTextualDate(getCreatedAt());
|
||||
}
|
||||
|
||||
@Override
|
||||
public Calendar getUploadDate() throws ParsingException {
|
||||
Calendar uploadTime = Calendar.getInstance();
|
||||
uploadTime.setTime(SoundcloudParsingHelper.parseDate(getCreatedAt()));
|
||||
return uploadTime;
|
||||
}
|
||||
|
||||
private String getCreatedAt() {
|
||||
return itemObject.getString("created_at");
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -18,6 +18,7 @@ import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler;
|
||||
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper;
|
||||
import org.schabi.newpipe.extractor.stream.StreamInfoItem;
|
||||
import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector;
|
||||
import org.schabi.newpipe.extractor.stream.TimeAgoParser;
|
||||
import org.schabi.newpipe.extractor.utils.DonationLinkHelper;
|
||||
import org.schabi.newpipe.extractor.utils.Localization;
|
||||
import org.schabi.newpipe.extractor.utils.Parser;
|
||||
@ -53,6 +54,8 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
|
||||
private static final String CHANNEL_FEED_BASE = "https://www.youtube.com/feeds/videos.xml?channel_id=";
|
||||
private static final String CHANNEL_URL_PARAMETERS = "/videos?view=0&flow=list&sort=dd&live_view=10000&gl=US&hl=en";
|
||||
|
||||
private final TimeAgoParser timeAgoParser = getService().getTimeAgoParser();
|
||||
|
||||
private Document doc;
|
||||
|
||||
public YoutubeChannelExtractor(StreamingService service, ListLinkHandler linkHandler, Localization localization) {
|
||||
@ -230,7 +233,7 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
|
||||
final String uploaderUrl = getUrl();
|
||||
for (final Element li : element.children()) {
|
||||
if (li.select("div[class=\"feed-item-dismissable\"]").first() != null) {
|
||||
collector.commit(new YoutubeStreamInfoItemExtractor(li) {
|
||||
collector.commit(new YoutubeStreamInfoItemExtractor(li, timeAgoParser) {
|
||||
@Override
|
||||
public String getUrl() throws ParsingException {
|
||||
try {
|
||||
|
@ -18,6 +18,7 @@ import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingH
|
||||
import org.schabi.newpipe.extractor.stream.StreamInfoItem;
|
||||
import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector;
|
||||
import org.schabi.newpipe.extractor.stream.StreamType;
|
||||
import org.schabi.newpipe.extractor.stream.TimeAgoParser;
|
||||
import org.schabi.newpipe.extractor.utils.Localization;
|
||||
import org.schabi.newpipe.extractor.utils.Utils;
|
||||
|
||||
@ -28,6 +29,8 @@ import java.io.IOException;
|
||||
@SuppressWarnings("WeakerAccess")
|
||||
public class YoutubePlaylistExtractor extends PlaylistExtractor {
|
||||
|
||||
private final TimeAgoParser timeAgoParser = getService().getTimeAgoParser();
|
||||
|
||||
private Document doc;
|
||||
|
||||
public YoutubePlaylistExtractor(StreamingService service, ListLinkHandler linkHandler, Localization localization) {
|
||||
@ -192,7 +195,7 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor {
|
||||
continue;
|
||||
}
|
||||
|
||||
collector.commit(new YoutubeStreamInfoItemExtractor(li) {
|
||||
collector.commit(new YoutubeStreamInfoItemExtractor(li, timeAgoParser) {
|
||||
public Element uploaderLink;
|
||||
|
||||
@Override
|
||||
@ -258,7 +261,7 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor {
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getUploadDate() throws ParsingException {
|
||||
public String getTextualUploadDate() throws ParsingException {
|
||||
return "";
|
||||
}
|
||||
|
||||
|
@ -9,6 +9,7 @@ import org.schabi.newpipe.extractor.InfoItem;
|
||||
import org.schabi.newpipe.extractor.StreamingService;
|
||||
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
|
||||
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||
import org.schabi.newpipe.extractor.linkhandler.SearchQueryHandler;
|
||||
import org.schabi.newpipe.extractor.search.InfoItemsSearchCollector;
|
||||
import org.schabi.newpipe.extractor.search.SearchExtractor;
|
||||
import org.schabi.newpipe.extractor.linkhandler.SearchQueryHandler;
|
||||
@ -129,7 +130,7 @@ public class YoutubeSearchExtractor extends SearchExtractor {
|
||||
|
||||
// video item type
|
||||
} else if ((el = item.select("div[class*=\"yt-lockup-video\"]").first()) != null) {
|
||||
collector.commit(new YoutubeStreamInfoItemExtractor(el));
|
||||
collector.commit(new YoutubeStreamInfoItemExtractor(el, getService().getTimeAgoParser()));
|
||||
} else if ((el = item.select("div[class*=\"yt-lockup-channel\"]").first()) != null) {
|
||||
collector.commit(new YoutubeChannelInfoItemExtractor(el));
|
||||
} else if ((el = item.select("div[class*=\"yt-lockup-playlist\"]").first()) != null &&
|
||||
|
@ -75,6 +75,8 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
||||
|
||||
/*//////////////////////////////////////////////////////////////////////////*/
|
||||
|
||||
private final TimeAgoParser timeAgoParser = getService().getTimeAgoParser();
|
||||
|
||||
private Document doc;
|
||||
@Nullable
|
||||
private JsonObject playerArgs;
|
||||
@ -932,7 +934,7 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
||||
* This is encapsulated in a StreamInfoItem object, which is a subset of the fields in a full StreamInfo.
|
||||
*/
|
||||
private StreamInfoItemExtractor extractVideoPreviewInfo(final Element li) {
|
||||
return new YoutubeStreamInfoItemExtractor(li) {
|
||||
return new YoutubeStreamInfoItemExtractor(li, timeAgoParser) {
|
||||
|
||||
@Override
|
||||
public String getUrl() throws ParsingException {
|
||||
@ -959,7 +961,7 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getUploadDate() throws ParsingException {
|
||||
public String getTextualUploadDate() throws ParsingException {
|
||||
return "";
|
||||
}
|
||||
|
||||
|
@ -1,12 +1,17 @@
|
||||
package org.schabi.newpipe.extractor.services.youtube.extractors;
|
||||
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper;
|
||||
import org.schabi.newpipe.extractor.stream.StreamInfoItemExtractor;
|
||||
import org.schabi.newpipe.extractor.stream.StreamType;
|
||||
import org.schabi.newpipe.extractor.stream.TimeAgoParser;
|
||||
import org.schabi.newpipe.extractor.utils.Utils;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
import java.util.Calendar;
|
||||
|
||||
/*
|
||||
* Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org>
|
||||
* YoutubeStreamInfoItemExtractor.java is part of NewPipe.
|
||||
@ -28,9 +33,18 @@ import org.schabi.newpipe.extractor.utils.Utils;
|
||||
public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor {
|
||||
|
||||
private final Element item;
|
||||
private final TimeAgoParser timeAgoParser;
|
||||
|
||||
public YoutubeStreamInfoItemExtractor(Element item) {
|
||||
private String cachedUploadDate;
|
||||
|
||||
/**
|
||||
* Creates an extractor of StreamInfoItems from a YouTube page.
|
||||
* @param item The page element
|
||||
* @param timeAgoParser A parser of the textual dates or {@code null}.
|
||||
*/
|
||||
public YoutubeStreamInfoItemExtractor(Element item, @Nullable TimeAgoParser timeAgoParser) {
|
||||
this.item = item;
|
||||
this.timeAgoParser = timeAgoParser;
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -126,20 +140,35 @@ public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor {
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getUploadDate() throws ParsingException {
|
||||
public String getTextualUploadDate() throws ParsingException {
|
||||
if (cachedUploadDate != null) {
|
||||
return cachedUploadDate;
|
||||
}
|
||||
|
||||
try {
|
||||
Element meta = item.select("div[class=\"yt-lockup-meta\"]").first();
|
||||
if (meta == null) return "";
|
||||
|
||||
Element li = meta.select("li").first();
|
||||
if(li == null) return "";
|
||||
final Elements li = meta.select("li");
|
||||
if (li.isEmpty()) return "";
|
||||
|
||||
return meta.select("li").first().text();
|
||||
return cachedUploadDate = li.first().text();
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Could not get upload date", e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Calendar getUploadDate() throws ParsingException {
|
||||
String textualUploadDate = getTextualUploadDate();
|
||||
if (timeAgoParser != null
|
||||
&& textualUploadDate != null && !"".equals(textualUploadDate)) {
|
||||
return timeAgoParser.parse(textualUploadDate);
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getViewCount() throws ParsingException {
|
||||
String input;
|
||||
|
@ -35,12 +35,15 @@ import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingH
|
||||
import org.schabi.newpipe.extractor.stream.StreamInfoItem;
|
||||
import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector;
|
||||
import org.schabi.newpipe.extractor.utils.Localization;
|
||||
import org.schabi.newpipe.extractor.stream.TimeAgoParser;
|
||||
|
||||
import javax.annotation.Nonnull;
|
||||
import java.io.IOException;
|
||||
|
||||
public class YoutubeTrendingExtractor extends KioskExtractor<StreamInfoItem> {
|
||||
|
||||
private final TimeAgoParser timeAgoParser = getService().getTimeAgoParser();
|
||||
|
||||
private Document doc;
|
||||
|
||||
public YoutubeTrendingExtractor(StreamingService service,
|
||||
@ -93,7 +96,7 @@ public class YoutubeTrendingExtractor extends KioskExtractor<StreamInfoItem> {
|
||||
for(Element ul : uls) {
|
||||
for(final Element li : ul.children()) {
|
||||
final Element el = li.select("div[class*=\"yt-lockup-dismissable\"]").first();
|
||||
collector.commit(new YoutubeStreamInfoItemExtractor(li) {
|
||||
collector.commit(new YoutubeStreamInfoItemExtractor(li, timeAgoParser) {
|
||||
@Override
|
||||
public String getUrl() throws ParsingException {
|
||||
try {
|
||||
|
@ -22,6 +22,8 @@ package org.schabi.newpipe.extractor.stream;
|
||||
|
||||
import org.schabi.newpipe.extractor.InfoItem;
|
||||
|
||||
import java.util.Calendar;
|
||||
|
||||
/**
|
||||
* Info object for previews of unopened videos, eg search results, related videos
|
||||
*/
|
||||
@ -29,7 +31,8 @@ public class StreamInfoItem extends InfoItem {
|
||||
private final StreamType streamType;
|
||||
|
||||
private String uploaderName;
|
||||
private String uploadDate;
|
||||
private String textualUploadDate;
|
||||
private Calendar uploadDate;
|
||||
private long viewCount = -1;
|
||||
private long duration = -1;
|
||||
|
||||
@ -52,14 +55,6 @@ public class StreamInfoItem extends InfoItem {
|
||||
this.uploaderName = uploader_name;
|
||||
}
|
||||
|
||||
public String getUploadDate() {
|
||||
return uploadDate;
|
||||
}
|
||||
|
||||
public void setUploadDate(String upload_date) {
|
||||
this.uploadDate = upload_date;
|
||||
}
|
||||
|
||||
public long getViewCount() {
|
||||
return viewCount;
|
||||
}
|
||||
@ -84,12 +79,36 @@ public class StreamInfoItem extends InfoItem {
|
||||
this.uploaderUrl = uploaderUrl;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return The original textual upload date as returned by the streaming service.
|
||||
* @see #getUploadDate()
|
||||
*/
|
||||
public String getTextualUploadDate() {
|
||||
return textualUploadDate;
|
||||
}
|
||||
|
||||
public void setTextualUploadDate(String upload_date) {
|
||||
this.textualUploadDate = upload_date;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return The (approximated) date and time this item was uploaded or {@code null}.
|
||||
* @see #getTextualUploadDate()
|
||||
*/
|
||||
public Calendar getUploadDate() {
|
||||
return uploadDate;
|
||||
}
|
||||
|
||||
public void setUploadDate(Calendar uploadDate) {
|
||||
this.uploadDate = uploadDate;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "StreamInfoItem{" +
|
||||
"streamType=" + streamType +
|
||||
", uploaderName='" + uploaderName + '\'' +
|
||||
", uploadDate='" + uploadDate + '\'' +
|
||||
", textualUploadDate='" + textualUploadDate + '\'' +
|
||||
", viewCount=" + viewCount +
|
||||
", duration=" + duration +
|
||||
", uploaderUrl='" + uploaderUrl + '\'' +
|
||||
|
@ -3,6 +3,8 @@ package org.schabi.newpipe.extractor.stream;
|
||||
import org.schabi.newpipe.extractor.InfoItemExtractor;
|
||||
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||
|
||||
import java.util.Calendar;
|
||||
|
||||
/*
|
||||
* Created by Christian Schabesberger on 28.02.16.
|
||||
*
|
||||
@ -64,10 +66,30 @@ public interface StreamInfoItemExtractor extends InfoItemExtractor {
|
||||
String getUploaderUrl() throws ParsingException;
|
||||
|
||||
/**
|
||||
* Extract the uploader name
|
||||
* @return the uploader name
|
||||
* @throws ParsingException thrown if there is an error in the extraction
|
||||
* Extract the textual upload date of this item.
|
||||
* The original textual date provided by the service may be used if it is short;
|
||||
* otherwise the format "yyyy-MM-dd" or an locale specific version is preferred.
|
||||
*
|
||||
* @return The original textual upload date.
|
||||
* @throws ParsingException if there is an error in the extraction
|
||||
* @see #getUploadDate()
|
||||
*/
|
||||
String getUploadDate() throws ParsingException;
|
||||
String getTextualUploadDate() throws ParsingException;
|
||||
|
||||
/**
|
||||
* Extracts the upload date and time of this item and parses it.
|
||||
* <p>
|
||||
* If the service doesn't provide an exact time, an approximation can be returned.
|
||||
* The approximation should be marked by setting seconds and milliseconds to zero.
|
||||
* <br>
|
||||
* If the service doesn't provide any date at all, then {@code null} should be returned.
|
||||
* </p>
|
||||
*
|
||||
* @return The (approximated) date and time this item was uploaded or {@code null}.
|
||||
* @throws ParsingException if there is an error in the extraction
|
||||
* or the extracted date couldn't be parsed.
|
||||
* @see #getTextualUploadDate()
|
||||
*/
|
||||
Calendar getUploadDate() throws ParsingException;
|
||||
|
||||
}
|
||||
|
@ -61,10 +61,15 @@ public class StreamInfoItemsCollector extends InfoItemsCollector<StreamInfoItem,
|
||||
addError(e);
|
||||
}
|
||||
try {
|
||||
resultItem.setUploadDate(extractor.getUploadDate());
|
||||
resultItem.setTextualUploadDate(extractor.getTextualUploadDate());
|
||||
} catch (Exception e) {
|
||||
addError(e);
|
||||
}
|
||||
try {
|
||||
resultItem.setUploadDate(extractor.getUploadDate());
|
||||
} catch (ParsingException e) {
|
||||
addError(e);
|
||||
}
|
||||
try {
|
||||
resultItem.setViewCount(extractor.getViewCount());
|
||||
} catch (Exception e) {
|
||||
|
@ -0,0 +1,158 @@
|
||||
package org.schabi.newpipe.extractor.stream;
|
||||
|
||||
/*
|
||||
* Created by wojcik.online on 2018-01-25.
|
||||
*/
|
||||
|
||||
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||
|
||||
import java.util.Calendar;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.EnumMap;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* A helper class that is meant to be used by services that need to parse upload dates in the
|
||||
* format '2 days ago' or similar.
|
||||
*/
|
||||
public class TimeAgoParser {
|
||||
|
||||
/**
|
||||
* A set of english phrases that are contained in the time units.
|
||||
* (e.g. '7 minutes ago' contains 'min')
|
||||
*/
|
||||
public static Map<TimeAgoUnit, Collection<String>> DEFAULT_AGO_PHRASES =
|
||||
new EnumMap<>(TimeAgoUnit.class);
|
||||
|
||||
private final Map<TimeAgoUnit, Collection<String>> agoPhrases;
|
||||
|
||||
private final Calendar consistentNow;
|
||||
|
||||
/**
|
||||
* Creates a helper to parse upload dates in the format '2 days ago'.
|
||||
* <p>
|
||||
* Instantiate a new {@link TimeAgoParser} every time you extract a new batch of items.
|
||||
* </p>
|
||||
* @param agoPhrases A set of phrases how to recognize the time units in a given language.
|
||||
*/
|
||||
public TimeAgoParser(Map<TimeAgoUnit, Collection<String>> agoPhrases) {
|
||||
this.agoPhrases = agoPhrases;
|
||||
consistentNow = Calendar.getInstance();
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses a textual date in the format '2 days ago' into a Calendar representation.
|
||||
* Beginning with days ago, marks the date as approximated by setting minutes, seconds
|
||||
* and milliseconds to 0.
|
||||
* @param textualDate The original date as provided by the streaming service
|
||||
* @return The parsed (approximated) time
|
||||
* @throws ParsingException if the time unit could not be recognized
|
||||
*/
|
||||
public Calendar parse(String textualDate) throws ParsingException {
|
||||
int timeAgoAmount;
|
||||
try {
|
||||
timeAgoAmount = parseTimeAgoAmount(textualDate);
|
||||
} catch (NumberFormatException e) {
|
||||
// If there is no valid number in the textual date,
|
||||
// assume it is 1 (as in 'a second ago').
|
||||
timeAgoAmount = 1;
|
||||
}
|
||||
|
||||
TimeAgoUnit timeAgoUnit = parseTimeAgoUnit(textualDate);
|
||||
return getCalendar(timeAgoAmount, timeAgoUnit);
|
||||
}
|
||||
|
||||
private int parseTimeAgoAmount(String textualDate) throws NumberFormatException {
|
||||
String timeValueStr = textualDate.replaceAll("\\D+", "");
|
||||
return Integer.parseInt(timeValueStr);
|
||||
}
|
||||
|
||||
private TimeAgoUnit parseTimeAgoUnit(String textualDate) throws ParsingException {
|
||||
for (TimeAgoUnit timeAgoUnit : agoPhrases.keySet()) {
|
||||
for (String agoPhrase : agoPhrases.get(timeAgoUnit)) {
|
||||
if (textualDate.toLowerCase().contains(agoPhrase.toLowerCase())){
|
||||
return timeAgoUnit;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
throw new ParsingException("Unable to parse the date: " + textualDate);
|
||||
}
|
||||
|
||||
private Calendar getCalendar(int timeAgoAmount, TimeAgoUnit timeAgoUnit) {
|
||||
Calendar calendarTime = getNow();
|
||||
|
||||
switch (timeAgoUnit) {
|
||||
case SECONDS:
|
||||
calendarTime.add(Calendar.SECOND, -timeAgoAmount);
|
||||
break;
|
||||
|
||||
case MINUTES:
|
||||
calendarTime.add(Calendar.MINUTE, -timeAgoAmount);
|
||||
break;
|
||||
|
||||
case HOURS:
|
||||
calendarTime.add(Calendar.HOUR_OF_DAY, -timeAgoAmount);
|
||||
break;
|
||||
|
||||
case DAYS:
|
||||
calendarTime.add(Calendar.DAY_OF_MONTH, -timeAgoAmount);
|
||||
markApproximatedTime(calendarTime);
|
||||
break;
|
||||
|
||||
case WEEKS:
|
||||
calendarTime.add(Calendar.WEEK_OF_YEAR, -timeAgoAmount);
|
||||
markApproximatedTime(calendarTime);
|
||||
break;
|
||||
|
||||
case MONTHS:
|
||||
calendarTime.add(Calendar.MONTH, -timeAgoAmount);
|
||||
markApproximatedTime(calendarTime);
|
||||
break;
|
||||
|
||||
case YEARS:
|
||||
calendarTime.add(Calendar.YEAR, -timeAgoAmount);
|
||||
// Prevent `PrettyTime` from showing '12 months ago'.
|
||||
calendarTime.add(Calendar.DAY_OF_MONTH, -1);
|
||||
markApproximatedTime(calendarTime);
|
||||
break;
|
||||
}
|
||||
|
||||
return calendarTime;
|
||||
}
|
||||
|
||||
private Calendar getNow() {
|
||||
return (Calendar) consistentNow.clone();
|
||||
}
|
||||
|
||||
/**
|
||||
* Marks the time as approximated by setting minutes, seconds and milliseconds to 0.
|
||||
* @param calendarTime Time to be marked as approximated
|
||||
*/
|
||||
private void markApproximatedTime(Calendar calendarTime) {
|
||||
calendarTime.set(Calendar.MINUTE, 0);
|
||||
calendarTime.set(Calendar.SECOND, 0);
|
||||
calendarTime.set(Calendar.MILLISECOND, 0);
|
||||
}
|
||||
|
||||
static {
|
||||
DEFAULT_AGO_PHRASES.put(TimeAgoUnit.SECONDS, Collections.singleton("sec"));
|
||||
DEFAULT_AGO_PHRASES.put(TimeAgoUnit.MINUTES, Collections.singleton("min"));
|
||||
DEFAULT_AGO_PHRASES.put(TimeAgoUnit.HOURS, Collections.singleton("hour"));
|
||||
DEFAULT_AGO_PHRASES.put(TimeAgoUnit.DAYS, Collections.singleton("day"));
|
||||
DEFAULT_AGO_PHRASES.put(TimeAgoUnit.WEEKS, Collections.singleton("week"));
|
||||
DEFAULT_AGO_PHRASES.put(TimeAgoUnit.MONTHS, Collections.singleton("month"));
|
||||
DEFAULT_AGO_PHRASES.put(TimeAgoUnit.YEARS, Collections.singleton("year"));
|
||||
}
|
||||
|
||||
public enum TimeAgoUnit {
|
||||
SECONDS,
|
||||
MINUTES,
|
||||
HOURS,
|
||||
DAYS,
|
||||
WEEKS,
|
||||
MONTHS,
|
||||
YEARS,
|
||||
}
|
||||
}
|
@ -41,6 +41,7 @@ import static java.util.Collections.singletonList;
|
||||
public class Downloader implements org.schabi.newpipe.extractor.Downloader {
|
||||
|
||||
private static final String USER_AGENT = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0";
|
||||
private static final String DEFAULT_HTTP_ACCEPT_LANGUAGE = "en";
|
||||
private static String mCookies = "";
|
||||
|
||||
private static Downloader instance = null;
|
||||
@ -171,6 +172,7 @@ public class Downloader implements org.schabi.newpipe.extractor.Downloader {
|
||||
URL url = new URL(siteUrl);
|
||||
HttpsURLConnection con = (HttpsURLConnection) url.openConnection();
|
||||
// HttpsURLConnection con = NetCipher.getHttpsURLConnection(url);
|
||||
con.setRequestProperty("Accept-Language", DEFAULT_HTTP_ACCEPT_LANGUAGE);
|
||||
return dl(con);
|
||||
}
|
||||
|
||||
|
@ -4,6 +4,7 @@ import org.schabi.newpipe.extractor.InfoItem;
|
||||
import org.schabi.newpipe.extractor.ListExtractor;
|
||||
import org.schabi.newpipe.extractor.stream.StreamInfoItem;
|
||||
|
||||
import java.util.Calendar;
|
||||
import java.util.List;
|
||||
|
||||
import static org.junit.Assert.*;
|
||||
@ -27,6 +28,14 @@ public final class DefaultTests {
|
||||
StreamInfoItem streamInfoItem = (StreamInfoItem) item;
|
||||
assertNotEmpty("Uploader name not set: " + item, streamInfoItem.getUploaderName());
|
||||
assertNotEmpty("Uploader url not set: " + item, streamInfoItem.getUploaderUrl());
|
||||
|
||||
final String textualUploadDate = streamInfoItem.getTextualUploadDate();
|
||||
if (textualUploadDate != null && !textualUploadDate.isEmpty()) {
|
||||
final Calendar uploadDate = streamInfoItem.getUploadDate();
|
||||
assertNotNull("No parsed upload date", uploadDate);
|
||||
assertTrue("Upload date not in the past", uploadDate.before(Calendar.getInstance()));
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user