mirror of
https://github.com/TeamNewPipe/NewPipeExtractor.git
synced 2025-04-28 16:00:33 +05:30
Merge pull request #1029 from AudricV/yt_fix-no-views-extraction-playlist-items
[YouTube] Fix partial non-extraction of "No views" string in stream items
This commit is contained in:
commit
b1298490c0
@ -13,11 +13,13 @@ import org.schabi.newpipe.extractor.utils.JsonUtils;
|
|||||||
import org.schabi.newpipe.extractor.utils.Parser;
|
import org.schabi.newpipe.extractor.utils.Parser;
|
||||||
import org.schabi.newpipe.extractor.utils.Utils;
|
import org.schabi.newpipe.extractor.utils.Utils;
|
||||||
|
|
||||||
|
import javax.annotation.Nonnull;
|
||||||
import javax.annotation.Nullable;
|
import javax.annotation.Nullable;
|
||||||
import java.time.Instant;
|
import java.time.Instant;
|
||||||
import java.time.OffsetDateTime;
|
import java.time.OffsetDateTime;
|
||||||
import java.time.ZoneOffset;
|
import java.time.ZoneOffset;
|
||||||
import java.time.format.DateTimeFormatter;
|
import java.time.format.DateTimeFormatter;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject;
|
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject;
|
||||||
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getThumbnailUrlFromInfoItem;
|
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getThumbnailUrlFromInfoItem;
|
||||||
@ -43,6 +45,11 @@ import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor {
|
public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor {
|
||||||
|
|
||||||
|
private static final Pattern ACCESSIBILITY_DATA_VIEW_COUNT_REGEX =
|
||||||
|
Pattern.compile("([\\d,]+) views$");
|
||||||
|
private static final String NO_VIEWS_LOWERCASE = "no views";
|
||||||
|
|
||||||
private final JsonObject videoInfo;
|
private final JsonObject videoInfo;
|
||||||
private final TimeAgoParser timeAgoParser;
|
private final TimeAgoParser timeAgoParser;
|
||||||
private StreamType cachedStreamType;
|
private StreamType cachedStreamType;
|
||||||
@ -284,20 +291,14 @@ public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor {
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
final String viewCount = getTextFromObject(videoInfo.getObject("viewCountText"));
|
// Ignore all exceptions, as the view count can be hidden by creators, and so cannot be
|
||||||
|
// found in this case
|
||||||
|
|
||||||
if (!isNullOrEmpty(viewCount)) {
|
final String viewCountText = getTextFromObject(videoInfo.getObject("viewCountText"));
|
||||||
|
if (!isNullOrEmpty(viewCountText)) {
|
||||||
try {
|
try {
|
||||||
// These approaches are language dependent
|
return getViewCountFromViewCountText(viewCountText, false);
|
||||||
if (viewCount.toLowerCase().contains("no views")) {
|
|
||||||
return 0;
|
|
||||||
} else if (viewCount.toLowerCase().contains("recommended")) {
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
return Long.parseLong(Utils.removeNonDigitCharacters(viewCount));
|
|
||||||
} catch (final Exception ignored) {
|
} catch (final Exception ignored) {
|
||||||
// Ignore all exceptions, as we can fallback to accessibility data
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -306,45 +307,70 @@ public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor {
|
|||||||
// the livestream)
|
// the livestream)
|
||||||
if (getStreamType() != StreamType.LIVE_STREAM) {
|
if (getStreamType() != StreamType.LIVE_STREAM) {
|
||||||
try {
|
try {
|
||||||
return Long.parseLong(Utils.removeNonDigitCharacters(
|
return getViewCountFromAccessibilityData();
|
||||||
// This approach is language dependent
|
|
||||||
Parser.matchGroup1("([\\d,]+) views$",
|
|
||||||
videoInfo.getObject("title")
|
|
||||||
.getObject("accessibility")
|
|
||||||
.getObject("accessibilityData")
|
|
||||||
.getString("label", ""))));
|
|
||||||
} catch (final Exception ignored) {
|
} catch (final Exception ignored) {
|
||||||
// Ignore all exceptions, as the view count can be hidden by creators, and so
|
|
||||||
// cannot be found in this case
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Fallback to a short view count, always used for livestreams (see why above)
|
// Fallback to a short view count, always used for livestreams (see why above)
|
||||||
try {
|
if (videoInfo.has("videoInfo")) {
|
||||||
// Returned in playlists, in the form: view count separator upload date
|
// Returned in playlists, in the form: view count separator upload date
|
||||||
if (videoInfo.has("videoInfo")) {
|
try {
|
||||||
return Utils.mixedNumberWordToLong(videoInfo.getObject("videoInfo")
|
return getViewCountFromViewCountText(videoInfo.getObject("videoInfo")
|
||||||
.getArray("runs")
|
.getArray("runs")
|
||||||
.getObject(0)
|
.getObject(0)
|
||||||
.getString("text"));
|
.getString("text", ""), true);
|
||||||
|
} catch (final Exception ignored) {
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (videoInfo.has("shortViewCountText")) {
|
||||||
// Returned everywhere but in playlists, used by the website to show view counts
|
// Returned everywhere but in playlists, used by the website to show view counts
|
||||||
if (videoInfo.has("shortViewCountText")) {
|
try {
|
||||||
return Utils.mixedNumberWordToLong(videoInfo.getObject("shortViewCountText")
|
final String shortViewCountText =
|
||||||
.getArray("runs")
|
getTextFromObject(videoInfo.getObject("shortViewCountText"));
|
||||||
.getObject(0)
|
if (!isNullOrEmpty(shortViewCountText)) {
|
||||||
.getString("text"));
|
return getViewCountFromViewCountText(shortViewCountText, true);
|
||||||
|
}
|
||||||
|
} catch (final Exception ignored) {
|
||||||
}
|
}
|
||||||
} catch (final Exception ignored) {
|
|
||||||
// Ignore all exceptions, as the view count can be hidden by creators, and so cannot be
|
|
||||||
// found in this case
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// No view count extracted: return -1, as the view count can be hidden by creators on videos
|
// No view count extracted: return -1, as the view count can be hidden by creators on videos
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private long getViewCountFromViewCountText(@Nonnull final String viewCountText,
|
||||||
|
final boolean isMixedNumber)
|
||||||
|
throws NumberFormatException, ParsingException {
|
||||||
|
// These approaches are language dependent
|
||||||
|
if (viewCountText.toLowerCase().contains(NO_VIEWS_LOWERCASE)) {
|
||||||
|
return 0;
|
||||||
|
} else if (viewCountText.toLowerCase().contains("recommended")) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return isMixedNumber ? Utils.mixedNumberWordToLong(viewCountText)
|
||||||
|
: Long.parseLong(Utils.removeNonDigitCharacters(viewCountText));
|
||||||
|
}
|
||||||
|
|
||||||
|
private long getViewCountFromAccessibilityData()
|
||||||
|
throws NumberFormatException, Parser.RegexException {
|
||||||
|
// These approaches are language dependent
|
||||||
|
final String videoInfoTitleAccessibilityData = videoInfo.getObject("title")
|
||||||
|
.getObject("accessibility")
|
||||||
|
.getObject("accessibilityData")
|
||||||
|
.getString("label", "");
|
||||||
|
|
||||||
|
if (videoInfoTitleAccessibilityData.toLowerCase().endsWith(NO_VIEWS_LOWERCASE)) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
return Long.parseLong(Utils.removeNonDigitCharacters(
|
||||||
|
Parser.matchGroup1(ACCESSIBILITY_DATA_VIEW_COUNT_REGEX,
|
||||||
|
videoInfoTitleAccessibilityData)));
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getThumbnailUrl() throws ParsingException {
|
public String getThumbnailUrl() throws ParsingException {
|
||||||
return getThumbnailUrlFromInfoItem(videoInfo);
|
return getThumbnailUrlFromInfoItem(videoInfo);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user