mirror of
https://github.com/TeamNewPipe/NewPipeExtractor.git
synced 2024-12-13 22:00:32 +05:30
Merge pull request #1163 from AudricV/yt-fix_comments_extraction
[YouTube] Support new comments data
This commit is contained in:
commit
6c3c2e25d7
@ -0,0 +1,316 @@
|
|||||||
|
package org.schabi.newpipe.extractor.services.youtube;
|
||||||
|
|
||||||
|
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getUrlFromNavigationEndpoint;
|
||||||
|
import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
|
||||||
|
|
||||||
|
import com.grack.nanojson.JsonObject;
|
||||||
|
|
||||||
|
import org.jsoup.nodes.Entities;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.Comparator;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Stack;
|
||||||
|
import java.util.function.Function;
|
||||||
|
import java.util.regex.Matcher;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
import javax.annotation.Nonnull;
|
||||||
|
import javax.annotation.Nullable;
|
||||||
|
|
||||||
|
public final class YoutubeDescriptionHelper {
|
||||||
|
|
||||||
|
private YoutubeDescriptionHelper() {
|
||||||
|
}
|
||||||
|
|
||||||
|
private static final String LINK_CLOSE = "</a>";
|
||||||
|
private static final String STRIKETHROUGH_OPEN = "<s>";
|
||||||
|
private static final String STRIKETHROUGH_CLOSE = "</s>";
|
||||||
|
private static final String BOLD_OPEN = "<b>";
|
||||||
|
private static final String BOLD_CLOSE = "</b>";
|
||||||
|
private static final String ITALIC_OPEN = "<i>";
|
||||||
|
private static final String ITALIC_CLOSE = "</i>";
|
||||||
|
|
||||||
|
// special link chips (e.g. for YT videos, YT channels or social media accounts):
|
||||||
|
// (u00a0) u00a0 u00a0 [/•] u00a0 <link content> u00a0 u00a0
|
||||||
|
private static final Pattern LINK_CONTENT_CLEANER_REGEX
|
||||||
|
= Pattern.compile("(?s)^ +[/•] +(.*?) +$");
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Can be a command run, or a style run.
|
||||||
|
*/
|
||||||
|
static final class Run {
|
||||||
|
@Nonnull final String open;
|
||||||
|
@Nonnull final String close;
|
||||||
|
final int pos;
|
||||||
|
@Nullable final Function<String, String> transformContent;
|
||||||
|
int openPosInOutput = -1;
|
||||||
|
|
||||||
|
Run(
|
||||||
|
@Nonnull final String open,
|
||||||
|
@Nonnull final String close,
|
||||||
|
final int pos
|
||||||
|
) {
|
||||||
|
this(open, close, pos, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
Run(
|
||||||
|
@Nonnull final String open,
|
||||||
|
@Nonnull final String close,
|
||||||
|
final int pos,
|
||||||
|
@Nullable final Function<String, String> transformContent
|
||||||
|
) {
|
||||||
|
this.open = open;
|
||||||
|
this.close = close;
|
||||||
|
this.pos = pos;
|
||||||
|
this.transformContent = transformContent;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean sameOpen(@Nonnull final Run other) {
|
||||||
|
return open.equals(other.open);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parse a video description in the new "attributed" format, which contains the entire visible
|
||||||
|
* plaintext ({@code content}) and an array of {@code commandRuns} and {@code styleRuns}.
|
||||||
|
* Returns the formatted content in HTML format, and escapes the text to make sure there are no
|
||||||
|
* XSS attacks.
|
||||||
|
*
|
||||||
|
* <p>
|
||||||
|
* {@code commandRuns} include the links and their range in the text, while {@code styleRuns}
|
||||||
|
* include the styling to apply to various ranges in the text.
|
||||||
|
* </p>
|
||||||
|
*
|
||||||
|
* @param attributedDescription the JSON object of the attributed description
|
||||||
|
* @return the parsed description, in HTML format, as a string
|
||||||
|
*/
|
||||||
|
@Nullable
|
||||||
|
public static String attributedDescriptionToHtml(
|
||||||
|
@Nullable final JsonObject attributedDescription
|
||||||
|
) {
|
||||||
|
if (isNullOrEmpty(attributedDescription)) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
final String content = attributedDescription.getString("content");
|
||||||
|
if (content == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// all run pairs must always of length at least 1, or they should be discarded,
|
||||||
|
// otherwise various assumptions made in runsToHtml may fail
|
||||||
|
final List<Run> openers = new ArrayList<>();
|
||||||
|
final List<Run> closers = new ArrayList<>();
|
||||||
|
addAllCommandRuns(attributedDescription, openers, closers);
|
||||||
|
addAllStyleRuns(attributedDescription, openers, closers);
|
||||||
|
|
||||||
|
// Note that sorting this way might put closers with the same close position in the wrong
|
||||||
|
// order with respect to their openers, causing unnecessary closes and reopens. E.g.
|
||||||
|
// <b>b<i>b&i</i></b> is instead generated as <b>b<i>b&i</b></i><b></b> if the </b> is
|
||||||
|
// encountered before the </i>. Solving this wouldn't be difficult, thanks to stable sort,
|
||||||
|
// but would require additional sorting steps which would just make this slower for the
|
||||||
|
// general case where it's unlikely there are coincident closes.
|
||||||
|
Collections.sort(openers, Comparator.comparingInt(run -> run.pos));
|
||||||
|
Collections.sort(closers, Comparator.comparingInt(run -> run.pos));
|
||||||
|
|
||||||
|
return runsToHtml(openers, closers, content);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Applies the formatting specified by the intervals stored in {@code openers} and {@code
|
||||||
|
* closers} to {@code content} in order to obtain valid HTML even when intervals overlap. For
|
||||||
|
* example <b>b<i>b&i</b>i</i> would not be valid HTML, so this function
|
||||||
|
* instead generates <b>b<i>b&i</i></b><i>i</i>. Any HTML
|
||||||
|
* special characters in {@code rawContent} are escaped to make sure there are no XSS attacks.
|
||||||
|
*
|
||||||
|
* <p>
|
||||||
|
* Every opener in {@code openers} must have a corresponding closer in {@code closers}. Every
|
||||||
|
* corresponding (opener, closer) pair must have a length of at least one (i.e. empty intervals
|
||||||
|
* are not allowed).
|
||||||
|
* </p>
|
||||||
|
*
|
||||||
|
* @param openers contains all of the places where a run begins, must have the same size of
|
||||||
|
* closers, must be ordered by {@link Run#pos}
|
||||||
|
* @param closers contains all of the places where a run ends, must have the same size of
|
||||||
|
* openers, must be ordered by {@link Run#pos}
|
||||||
|
* @param rawContent the content to apply formatting to, and to escape to avoid XSS
|
||||||
|
* @return the formatted content in HTML
|
||||||
|
*/
|
||||||
|
static String runsToHtml(
|
||||||
|
@Nonnull final List<Run> openers,
|
||||||
|
@Nonnull final List<Run> closers,
|
||||||
|
@Nonnull final String rawContent
|
||||||
|
) {
|
||||||
|
final String content = rawContent.replace('\u00a0', ' ');
|
||||||
|
final Stack<Run> openRuns = new Stack<>();
|
||||||
|
final Stack<Run> tempStack = new Stack<>();
|
||||||
|
final StringBuilder textBuilder = new StringBuilder();
|
||||||
|
int currentTextPos = 0;
|
||||||
|
int openersIndex = 0;
|
||||||
|
int closersIndex = 0;
|
||||||
|
|
||||||
|
// openers and closers have the same length, but we will surely finish openers earlier than
|
||||||
|
// closers, since every opened interval needs to be closed at some point and there can't be
|
||||||
|
// empty intervals, hence check only closersIndex < closers.size()
|
||||||
|
while (closersIndex < closers.size()) {
|
||||||
|
final int minPos = openersIndex < openers.size()
|
||||||
|
? Math.min(closers.get(closersIndex).pos, openers.get(openersIndex).pos)
|
||||||
|
: closers.get(closersIndex).pos;
|
||||||
|
|
||||||
|
// append piece of text until current index
|
||||||
|
textBuilder.append(Entities.escape(content.substring(currentTextPos, minPos)));
|
||||||
|
currentTextPos = minPos;
|
||||||
|
|
||||||
|
if (closers.get(closersIndex).pos == minPos) {
|
||||||
|
// even in case of position tie, first process closers
|
||||||
|
final Run closer = closers.get(closersIndex);
|
||||||
|
++closersIndex;
|
||||||
|
|
||||||
|
// because of the assumptions, this while wouldn't need the !openRuns.empty()
|
||||||
|
// condition, because no run will close before being opened, but let's be sure
|
||||||
|
while (!openRuns.empty()) {
|
||||||
|
final Run popped = openRuns.pop();
|
||||||
|
if (popped.sameOpen(closer)) {
|
||||||
|
// before closing the current run, if the run has a transformContent
|
||||||
|
// function, use it to transform the content of the current run, based on
|
||||||
|
// the openPosInOutput set when the current run was opened
|
||||||
|
if (popped.transformContent != null && popped.openPosInOutput >= 0) {
|
||||||
|
textBuilder.replace(popped.openPosInOutput, textBuilder.length(),
|
||||||
|
popped.transformContent.apply(
|
||||||
|
textBuilder.substring(popped.openPosInOutput)));
|
||||||
|
}
|
||||||
|
// close the run that we really need to close
|
||||||
|
textBuilder.append(popped.close);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
// we keep popping from openRuns, closing all of the runs we find,
|
||||||
|
// until we find the run that we really need to close ...
|
||||||
|
textBuilder.append(popped.close);
|
||||||
|
tempStack.push(popped);
|
||||||
|
}
|
||||||
|
while (!tempStack.empty()) {
|
||||||
|
// ... and then we reopen all of the runs that we didn't need to close
|
||||||
|
// e.g. in <b>b<i>b&i</b>i</i>, when </b> is encountered, </i></b><i> is printed
|
||||||
|
// instead, to make sure the HTML is valid, obtaining <b>b<i>b&i</i></b><i>i</i>
|
||||||
|
final Run popped = tempStack.pop();
|
||||||
|
textBuilder.append(popped.open);
|
||||||
|
openRuns.push(popped);
|
||||||
|
}
|
||||||
|
|
||||||
|
} else {
|
||||||
|
// this will never be reached if openersIndex >= openers.size() because of the
|
||||||
|
// way minPos is calculated
|
||||||
|
final Run opener = openers.get(openersIndex);
|
||||||
|
textBuilder.append(opener.open);
|
||||||
|
opener.openPosInOutput = textBuilder.length(); // save for transforming later
|
||||||
|
openRuns.push(opener);
|
||||||
|
++openersIndex;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// append last piece of text
|
||||||
|
textBuilder.append(Entities.escape(content.substring(currentTextPos)));
|
||||||
|
|
||||||
|
return textBuilder.toString()
|
||||||
|
.replace("\n", "<br>")
|
||||||
|
.replace(" ", " ");
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void addAllCommandRuns(
|
||||||
|
@Nonnull final JsonObject attributedDescription,
|
||||||
|
@Nonnull final List<Run> openers,
|
||||||
|
@Nonnull final List<Run> closers
|
||||||
|
) {
|
||||||
|
attributedDescription.getArray("commandRuns")
|
||||||
|
.stream()
|
||||||
|
.filter(JsonObject.class::isInstance)
|
||||||
|
.map(JsonObject.class::cast)
|
||||||
|
.forEach(run -> {
|
||||||
|
final JsonObject navigationEndpoint = run.getObject("onTap")
|
||||||
|
.getObject("innertubeCommand");
|
||||||
|
|
||||||
|
final int startIndex = run.getInt("startIndex", -1);
|
||||||
|
final int length = run.getInt("length", 0);
|
||||||
|
if (startIndex < 0 || length < 1 || navigationEndpoint == null) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
final String url = getUrlFromNavigationEndpoint(navigationEndpoint);
|
||||||
|
if (url == null) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
final String open = "<a href=\"" + Entities.escape(url) + "\">";
|
||||||
|
final Function<String, String> transformContent = getTransformContentFun(run);
|
||||||
|
|
||||||
|
openers.add(new Run(open, LINK_CLOSE, startIndex, transformContent));
|
||||||
|
closers.add(new Run(open, LINK_CLOSE, startIndex + length, transformContent));
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Function<String, String> getTransformContentFun(final JsonObject run) {
|
||||||
|
final String accessibilityLabel = run.getObject("onTapOptions")
|
||||||
|
.getObject("accessibilityInfo")
|
||||||
|
.getString("accessibilityLabel", "")
|
||||||
|
// accessibility labels are e.g. "Instagram Channel Link: instagram_profile_name"
|
||||||
|
.replaceFirst(" Channel Link", "");
|
||||||
|
|
||||||
|
final Function<String, String> transformContent;
|
||||||
|
if (accessibilityLabel.isEmpty() || accessibilityLabel.startsWith("YouTube: ")) {
|
||||||
|
// if there is no accessibility label, or the link points to YouTube, cleanup the link
|
||||||
|
// text, see LINK_CONTENT_CLEANER_REGEX's documentation for more details
|
||||||
|
transformContent = (content) -> {
|
||||||
|
final Matcher m = LINK_CONTENT_CLEANER_REGEX.matcher(content);
|
||||||
|
if (m.find()) {
|
||||||
|
return m.group(1);
|
||||||
|
}
|
||||||
|
return content;
|
||||||
|
};
|
||||||
|
} else {
|
||||||
|
// if there is an accessibility label, replace the link text with it, because on the
|
||||||
|
// YouTube website an ambiguous link text is next to an icon explaining which service it
|
||||||
|
// belongs to, but since we can't add icons, we instead use the accessibility label
|
||||||
|
// which contains information about the service
|
||||||
|
transformContent = (content) -> accessibilityLabel;
|
||||||
|
}
|
||||||
|
|
||||||
|
return transformContent;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void addAllStyleRuns(
|
||||||
|
@Nonnull final JsonObject attributedDescription,
|
||||||
|
@Nonnull final List<Run> openers,
|
||||||
|
@Nonnull final List<Run> closers
|
||||||
|
) {
|
||||||
|
attributedDescription.getArray("styleRuns")
|
||||||
|
.stream()
|
||||||
|
.filter(JsonObject.class::isInstance)
|
||||||
|
.map(JsonObject.class::cast)
|
||||||
|
.forEach(run -> {
|
||||||
|
final int start = run.getInt("startIndex", -1);
|
||||||
|
final int length = run.getInt("length", 0);
|
||||||
|
if (start < 0 || length < 1) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
final int end = start + length;
|
||||||
|
|
||||||
|
if (run.has("strikethrough")) {
|
||||||
|
openers.add(new Run(STRIKETHROUGH_OPEN, STRIKETHROUGH_CLOSE, start));
|
||||||
|
closers.add(new Run(STRIKETHROUGH_OPEN, STRIKETHROUGH_CLOSE, end));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (run.getBoolean("italic", false)) {
|
||||||
|
openers.add(new Run(ITALIC_OPEN, ITALIC_CLOSE, start));
|
||||||
|
closers.add(new Run(ITALIC_OPEN, ITALIC_CLOSE, end));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (run.has("weightLabel")
|
||||||
|
&& !"FONT_WEIGHT_NORMAL".equals(run.getString("weightLabel"))) {
|
||||||
|
openers.add(new Run(BOLD_OPEN, BOLD_CLOSE, start));
|
||||||
|
closers.add(new Run(BOLD_OPEN, BOLD_CLOSE, end));
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
@ -996,86 +996,6 @@ public final class YoutubeParsingHelper {
|
|||||||
return text;
|
return text;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Parse a video description in the new "attributed" format, which contains the entire visible
|
|
||||||
* plaintext ({@code content}) and an array of {@code commandRuns}.
|
|
||||||
*
|
|
||||||
* <p>
|
|
||||||
* The {@code commandRuns} include the links and their position in the text.
|
|
||||||
* </p>
|
|
||||||
*
|
|
||||||
* @param attributedDescription the JSON object of the attributed description
|
|
||||||
* @return the parsed description, in HTML format, as a string
|
|
||||||
*/
|
|
||||||
@Nullable
|
|
||||||
public static String getAttributedDescription(
|
|
||||||
@Nullable final JsonObject attributedDescription) {
|
|
||||||
if (isNullOrEmpty(attributedDescription)) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
final String content = attributedDescription.getString("content");
|
|
||||||
if (content == null) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
final JsonArray commandRuns = attributedDescription.getArray("commandRuns");
|
|
||||||
|
|
||||||
final StringBuilder textBuilder = new StringBuilder();
|
|
||||||
int textStart = 0;
|
|
||||||
|
|
||||||
for (final Object commandRun : commandRuns) {
|
|
||||||
if (!(commandRun instanceof JsonObject)) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
final JsonObject run = ((JsonObject) commandRun);
|
|
||||||
final int startIndex = run.getInt("startIndex", -1);
|
|
||||||
final int length = run.getInt("length");
|
|
||||||
final JsonObject navigationEndpoint = run.getObject("onTap")
|
|
||||||
.getObject("innertubeCommand");
|
|
||||||
|
|
||||||
if (startIndex < 0 || length < 1 || navigationEndpoint == null) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
final String url = getUrlFromNavigationEndpoint(navigationEndpoint);
|
|
||||||
|
|
||||||
if (url == null) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Append text before the link
|
|
||||||
if (startIndex > textStart) {
|
|
||||||
textBuilder.append(content, textStart, startIndex);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Trim and append link text
|
|
||||||
// Channel/Video format: 3xu00a0, (/ •), u00a0, <Name>, 2xu00a0
|
|
||||||
final String linkText = content.substring(startIndex, startIndex + length)
|
|
||||||
.replace('\u00a0', ' ')
|
|
||||||
.trim()
|
|
||||||
.replaceFirst("^[/•] *", "");
|
|
||||||
|
|
||||||
textBuilder.append("<a href=\"")
|
|
||||||
.append(Entities.escape(url))
|
|
||||||
.append("\">")
|
|
||||||
.append(Entities.escape(linkText))
|
|
||||||
.append("</a>");
|
|
||||||
|
|
||||||
textStart = startIndex + length;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Append the remaining text
|
|
||||||
if (textStart < content.length()) {
|
|
||||||
textBuilder.append(content.substring(textStart));
|
|
||||||
}
|
|
||||||
|
|
||||||
return textBuilder.toString()
|
|
||||||
.replaceAll("\\n", "<br>")
|
|
||||||
.replaceAll(" {2}", " ");
|
|
||||||
}
|
|
||||||
|
|
||||||
@Nonnull
|
@Nonnull
|
||||||
public static String getTextFromObjectOrThrow(final JsonObject textObject, final String error)
|
public static String getTextFromObjectOrThrow(final JsonObject textObject, final String error)
|
||||||
throws ParsingException {
|
throws ParsingException {
|
||||||
|
@ -0,0 +1,235 @@
|
|||||||
|
package org.schabi.newpipe.extractor.services.youtube.extractors;
|
||||||
|
|
||||||
|
import com.grack.nanojson.JsonObject;
|
||||||
|
import org.schabi.newpipe.extractor.Image;
|
||||||
|
import org.schabi.newpipe.extractor.Page;
|
||||||
|
import org.schabi.newpipe.extractor.comments.CommentsInfoItemExtractor;
|
||||||
|
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||||
|
import org.schabi.newpipe.extractor.localization.DateWrapper;
|
||||||
|
import org.schabi.newpipe.extractor.localization.TimeAgoParser;
|
||||||
|
import org.schabi.newpipe.extractor.stream.Description;
|
||||||
|
import org.schabi.newpipe.extractor.utils.Utils;
|
||||||
|
|
||||||
|
import javax.annotation.Nonnull;
|
||||||
|
import javax.annotation.Nullable;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Objects;
|
||||||
|
|
||||||
|
import static org.schabi.newpipe.extractor.services.youtube.YoutubeDescriptionHelper.attributedDescriptionToHtml;
|
||||||
|
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getImagesFromThumbnailsArray;
|
||||||
|
import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A {@link CommentsInfoItemExtractor} for YouTube comment data returned in a view model and entity
|
||||||
|
* updates.
|
||||||
|
*/
|
||||||
|
class YoutubeCommentsEUVMInfoItemExtractor implements CommentsInfoItemExtractor {
|
||||||
|
|
||||||
|
private static final String AUTHOR = "author";
|
||||||
|
private static final String PROPERTIES = "properties";
|
||||||
|
|
||||||
|
@Nonnull
|
||||||
|
private final JsonObject commentViewModel;
|
||||||
|
@Nullable
|
||||||
|
private final JsonObject commentRepliesRenderer;
|
||||||
|
@Nonnull
|
||||||
|
private final JsonObject commentEntityPayload;
|
||||||
|
@Nonnull
|
||||||
|
private final JsonObject engagementToolbarStateEntityPayload;
|
||||||
|
@Nonnull
|
||||||
|
private final String videoUrl;
|
||||||
|
@Nonnull
|
||||||
|
private final TimeAgoParser timeAgoParser;
|
||||||
|
|
||||||
|
YoutubeCommentsEUVMInfoItemExtractor(
|
||||||
|
@Nonnull final JsonObject commentViewModel,
|
||||||
|
@Nullable final JsonObject commentRepliesRenderer,
|
||||||
|
@Nonnull final JsonObject commentEntityPayload,
|
||||||
|
@Nonnull final JsonObject engagementToolbarStateEntityPayload,
|
||||||
|
@Nonnull final String videoUrl,
|
||||||
|
@Nonnull final TimeAgoParser timeAgoParser) {
|
||||||
|
this.commentViewModel = commentViewModel;
|
||||||
|
this.commentRepliesRenderer = commentRepliesRenderer;
|
||||||
|
this.commentEntityPayload = commentEntityPayload;
|
||||||
|
this.engagementToolbarStateEntityPayload = engagementToolbarStateEntityPayload;
|
||||||
|
this.videoUrl = videoUrl;
|
||||||
|
this.timeAgoParser = timeAgoParser;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getName() throws ParsingException {
|
||||||
|
return getUploaderName();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getUrl() throws ParsingException {
|
||||||
|
return videoUrl;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Nonnull
|
||||||
|
@Override
|
||||||
|
public List<Image> getThumbnails() throws ParsingException {
|
||||||
|
return getUploaderAvatars();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int getLikeCount() throws ParsingException {
|
||||||
|
final String textualLikeCount = getTextualLikeCount();
|
||||||
|
try {
|
||||||
|
if (Utils.isBlank(textualLikeCount)) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
return (int) Utils.mixedNumberWordToLong(textualLikeCount);
|
||||||
|
} catch (final Exception e) {
|
||||||
|
throw new ParsingException(
|
||||||
|
"Unexpected error while converting textual like count to like count", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getTextualLikeCount() {
|
||||||
|
return commentEntityPayload.getObject("toolbar")
|
||||||
|
.getString("likeCountNotliked");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Description getCommentText() throws ParsingException {
|
||||||
|
// Comments' text work in the same way as an attributed video description
|
||||||
|
return new Description(
|
||||||
|
attributedDescriptionToHtml(commentEntityPayload.getObject(PROPERTIES)
|
||||||
|
.getObject("content")), Description.HTML);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getTextualUploadDate() throws ParsingException {
|
||||||
|
return commentEntityPayload.getObject(PROPERTIES)
|
||||||
|
.getString("publishedTime");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Nullable
|
||||||
|
@Override
|
||||||
|
public DateWrapper getUploadDate() throws ParsingException {
|
||||||
|
final String textualPublishedTime = getTextualUploadDate();
|
||||||
|
if (isNullOrEmpty(textualPublishedTime)) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
return timeAgoParser.parse(textualPublishedTime);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getCommentId() throws ParsingException {
|
||||||
|
String commentId = commentEntityPayload.getObject(PROPERTIES)
|
||||||
|
.getString("commentId");
|
||||||
|
if (isNullOrEmpty(commentId)) {
|
||||||
|
commentId = commentViewModel.getString("commentId");
|
||||||
|
if (isNullOrEmpty(commentId)) {
|
||||||
|
throw new ParsingException("Could not get comment ID");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return commentId;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getUploaderUrl() throws ParsingException {
|
||||||
|
final JsonObject author = commentEntityPayload.getObject(AUTHOR);
|
||||||
|
String channelId = author.getString("channelId");
|
||||||
|
if (isNullOrEmpty(channelId)) {
|
||||||
|
channelId = author.getObject("channelCommand")
|
||||||
|
.getObject("innertubeCommand")
|
||||||
|
.getObject("browseEndpoint")
|
||||||
|
.getString("browseId");
|
||||||
|
if (isNullOrEmpty(channelId)) {
|
||||||
|
channelId = author.getObject("avatar")
|
||||||
|
.getObject("endpoint")
|
||||||
|
.getObject("innertubeCommand")
|
||||||
|
.getObject("browseEndpoint")
|
||||||
|
.getString("browseId");
|
||||||
|
if (isNullOrEmpty(channelId)) {
|
||||||
|
throw new ParsingException("Could not get channel ID");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return "https://www.youtube.com/channel/" + channelId;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getUploaderName() throws ParsingException {
|
||||||
|
return commentEntityPayload.getObject(AUTHOR)
|
||||||
|
.getString("displayName");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Nonnull
|
||||||
|
@Override
|
||||||
|
public List<Image> getUploaderAvatars() throws ParsingException {
|
||||||
|
return getImagesFromThumbnailsArray(commentEntityPayload.getObject("avatar")
|
||||||
|
.getObject("image")
|
||||||
|
.getArray("sources"));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean isHeartedByUploader() {
|
||||||
|
return "TOOLBAR_HEART_STATE_HEARTED".equals(
|
||||||
|
engagementToolbarStateEntityPayload.getString("heartState"));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean isPinned() {
|
||||||
|
return commentViewModel.has("pinnedText");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean isUploaderVerified() throws ParsingException {
|
||||||
|
final JsonObject author = commentEntityPayload.getObject(AUTHOR);
|
||||||
|
return author.getBoolean("isVerified") || author.getBoolean("isArtist");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int getReplyCount() throws ParsingException {
|
||||||
|
// As YouTube allows replies up to 750 comments, we cannot check if the count returned is a
|
||||||
|
// mixed number or a real number
|
||||||
|
// Assume it is a mixed one, as it matches how numbers of most properties are returned
|
||||||
|
final String replyCountString = commentEntityPayload.getObject("toolbar")
|
||||||
|
.getString("replyCount");
|
||||||
|
if (isNullOrEmpty(replyCountString)) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return (int) Utils.mixedNumberWordToLong(replyCountString);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Nullable
|
||||||
|
@Override
|
||||||
|
public Page getReplies() throws ParsingException {
|
||||||
|
if (isNullOrEmpty(commentRepliesRenderer)) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
final String continuation = commentRepliesRenderer.getArray("contents")
|
||||||
|
.stream()
|
||||||
|
.filter(JsonObject.class::isInstance)
|
||||||
|
.map(JsonObject.class::cast)
|
||||||
|
.map(content -> content.getObject("continuationItemRenderer", null))
|
||||||
|
.filter(Objects::nonNull)
|
||||||
|
.findFirst()
|
||||||
|
.map(continuationItemRenderer ->
|
||||||
|
continuationItemRenderer.getObject("continuationEndpoint")
|
||||||
|
.getObject("continuationCommand")
|
||||||
|
.getString("token"))
|
||||||
|
.orElseThrow(() ->
|
||||||
|
new ParsingException("Could not get comment replies continuation"));
|
||||||
|
return new Page(videoUrl, continuation);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean isChannelOwner() {
|
||||||
|
return commentEntityPayload.getObject(AUTHOR)
|
||||||
|
.getBoolean("isCreator");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasCreatorReply() {
|
||||||
|
return commentRepliesRenderer != null
|
||||||
|
&& commentRepliesRenderer.has("viewRepliesCreatorThumbnail");
|
||||||
|
}
|
||||||
|
}
|
@ -13,6 +13,7 @@ import org.schabi.newpipe.extractor.exceptions.ExtractionException;
|
|||||||
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||||
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler;
|
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler;
|
||||||
import org.schabi.newpipe.extractor.localization.Localization;
|
import org.schabi.newpipe.extractor.localization.Localization;
|
||||||
|
import org.schabi.newpipe.extractor.localization.TimeAgoParser;
|
||||||
import org.schabi.newpipe.extractor.utils.JsonUtils;
|
import org.schabi.newpipe.extractor.utils.JsonUtils;
|
||||||
import org.schabi.newpipe.extractor.utils.Utils;
|
import org.schabi.newpipe.extractor.utils.Utils;
|
||||||
|
|
||||||
@ -21,7 +22,6 @@ import javax.annotation.Nullable;
|
|||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.nio.charset.StandardCharsets;
|
import java.nio.charset.StandardCharsets;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getJsonPostResponse;
|
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getJsonPostResponse;
|
||||||
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject;
|
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject;
|
||||||
@ -30,6 +30,9 @@ import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
|
|||||||
|
|
||||||
public class YoutubeCommentsExtractor extends CommentsExtractor {
|
public class YoutubeCommentsExtractor extends CommentsExtractor {
|
||||||
|
|
||||||
|
private static final String COMMENT_VIEW_MODEL_KEY = "commentViewModel";
|
||||||
|
private static final String COMMENT_RENDERER_KEY = "commentRenderer";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Whether comments are disabled on video.
|
* Whether comments are disabled on video.
|
||||||
*/
|
*/
|
||||||
@ -74,8 +77,7 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
|
|||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
final String token = contents
|
final String token = contents.stream()
|
||||||
.stream()
|
|
||||||
// Only use JsonObjects
|
// Only use JsonObjects
|
||||||
.filter(JsonObject.class::isInstance)
|
.filter(JsonObject.class::isInstance)
|
||||||
.map(JsonObject.class::cast)
|
.map(JsonObject.class::cast)
|
||||||
@ -120,6 +122,21 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Nonnull
|
||||||
|
private JsonObject getMutationPayloadFromEntityKey(@Nonnull final JsonArray mutations,
|
||||||
|
@Nonnull final String commentKey)
|
||||||
|
throws ParsingException {
|
||||||
|
return mutations.stream()
|
||||||
|
.filter(JsonObject.class::isInstance)
|
||||||
|
.map(JsonObject.class::cast)
|
||||||
|
.filter(mutation -> commentKey.equals(
|
||||||
|
mutation.getString("entityKey")))
|
||||||
|
.findFirst()
|
||||||
|
.orElseThrow(() -> new ParsingException(
|
||||||
|
"Could not get comment entity payload mutation"))
|
||||||
|
.getObject("payload");
|
||||||
|
}
|
||||||
|
|
||||||
@Nonnull
|
@Nonnull
|
||||||
private InfoItemsPage<CommentsInfoItem> getInfoItemsPageForDisabledComments() {
|
private InfoItemsPage<CommentsInfoItem> getInfoItemsPageForDisabledComments() {
|
||||||
return new InfoItemsPage<>(Collections.emptyList(), null, Collections.emptyList());
|
return new InfoItemsPage<>(Collections.emptyList(), null, Collections.emptyList());
|
||||||
@ -207,8 +224,8 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
|
|||||||
return new InfoItemsPage<>(collector, getNextPage(jsonObject));
|
return new InfoItemsPage<>(collector, getNextPage(jsonObject));
|
||||||
}
|
}
|
||||||
|
|
||||||
private void collectCommentsFrom(final CommentsInfoItemsCollector collector,
|
private void collectCommentsFrom(@Nonnull final CommentsInfoItemsCollector collector,
|
||||||
final JsonObject jsonObject)
|
@Nonnull final JsonObject jsonObject)
|
||||||
throws ParsingException {
|
throws ParsingException {
|
||||||
|
|
||||||
final JsonArray onResponseReceivedEndpoints =
|
final JsonArray onResponseReceivedEndpoints =
|
||||||
@ -233,6 +250,8 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
|
|||||||
|
|
||||||
final JsonArray contents;
|
final JsonArray contents;
|
||||||
try {
|
try {
|
||||||
|
// A copy of the array is needed, otherwise the continuation item is removed from the
|
||||||
|
// original object which is used to get the continuation
|
||||||
contents = new JsonArray(JsonUtils.getArray(commentsEndpoint, path));
|
contents = new JsonArray(JsonUtils.getArray(commentsEndpoint, path));
|
||||||
} catch (final Exception e) {
|
} catch (final Exception e) {
|
||||||
// No comments
|
// No comments
|
||||||
@ -244,23 +263,80 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
|
|||||||
contents.remove(index);
|
contents.remove(index);
|
||||||
}
|
}
|
||||||
|
|
||||||
final String jsonKey = contents.getObject(0).has("commentThreadRenderer")
|
// The mutations object, which is returned in the comments' continuation
|
||||||
? "commentThreadRenderer"
|
// It contains parts of comment data when comments are returned with a view model
|
||||||
: "commentRenderer";
|
final JsonArray mutations = jsonObject.getObject("frameworkUpdates")
|
||||||
|
.getObject("entityBatchUpdate")
|
||||||
|
.getArray("mutations");
|
||||||
|
final String videoUrl = getUrl();
|
||||||
|
final TimeAgoParser timeAgoParser = getTimeAgoParser();
|
||||||
|
|
||||||
final List<Object> comments;
|
for (final Object o : contents) {
|
||||||
try {
|
if (!(o instanceof JsonObject)) {
|
||||||
comments = JsonUtils.getValues(contents, jsonKey);
|
continue;
|
||||||
} catch (final Exception e) {
|
}
|
||||||
throw new ParsingException("Unable to get parse youtube comments", e);
|
|
||||||
|
collectCommentItem(mutations, (JsonObject) o, collector, videoUrl, timeAgoParser);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
final String url = getUrl();
|
private void collectCommentItem(@Nonnull final JsonArray mutations,
|
||||||
comments.stream()
|
@Nonnull final JsonObject content,
|
||||||
.filter(JsonObject.class::isInstance)
|
@Nonnull final CommentsInfoItemsCollector collector,
|
||||||
.map(JsonObject.class::cast)
|
@Nonnull final String videoUrl,
|
||||||
.map(jObj -> new YoutubeCommentsInfoItemExtractor(jObj, url, getTimeAgoParser()))
|
@Nonnull final TimeAgoParser timeAgoParser)
|
||||||
.forEach(collector::commit);
|
throws ParsingException {
|
||||||
|
if (content.has("commentThreadRenderer")) {
|
||||||
|
final JsonObject commentThreadRenderer =
|
||||||
|
content.getObject("commentThreadRenderer");
|
||||||
|
if (commentThreadRenderer.has(COMMENT_VIEW_MODEL_KEY)) {
|
||||||
|
final JsonObject commentViewModel =
|
||||||
|
commentThreadRenderer.getObject(COMMENT_VIEW_MODEL_KEY)
|
||||||
|
.getObject(COMMENT_VIEW_MODEL_KEY);
|
||||||
|
collector.commit(new YoutubeCommentsEUVMInfoItemExtractor(
|
||||||
|
commentViewModel,
|
||||||
|
commentThreadRenderer.getObject("replies")
|
||||||
|
.getObject("commentRepliesRenderer"),
|
||||||
|
getMutationPayloadFromEntityKey(mutations,
|
||||||
|
commentViewModel.getString("commentKey", ""))
|
||||||
|
.getObject("commentEntityPayload"),
|
||||||
|
getMutationPayloadFromEntityKey(mutations,
|
||||||
|
commentViewModel.getString("toolbarStateKey", ""))
|
||||||
|
.getObject("engagementToolbarStateEntityPayload"),
|
||||||
|
videoUrl,
|
||||||
|
timeAgoParser));
|
||||||
|
} else if (commentThreadRenderer.has("comment")) {
|
||||||
|
collector.commit(new YoutubeCommentsInfoItemExtractor(
|
||||||
|
commentThreadRenderer.getObject("comment")
|
||||||
|
.getObject(COMMENT_RENDERER_KEY),
|
||||||
|
commentThreadRenderer.getObject("replies")
|
||||||
|
.getObject("commentRepliesRenderer"),
|
||||||
|
videoUrl,
|
||||||
|
timeAgoParser));
|
||||||
|
}
|
||||||
|
} else if (content.has(COMMENT_VIEW_MODEL_KEY)) {
|
||||||
|
final JsonObject commentViewModel = content.getObject(COMMENT_VIEW_MODEL_KEY);
|
||||||
|
collector.commit(new YoutubeCommentsEUVMInfoItemExtractor(
|
||||||
|
commentViewModel,
|
||||||
|
null,
|
||||||
|
getMutationPayloadFromEntityKey(mutations,
|
||||||
|
commentViewModel.getString("commentKey", ""))
|
||||||
|
.getObject("commentEntityPayload"),
|
||||||
|
getMutationPayloadFromEntityKey(mutations,
|
||||||
|
commentViewModel.getString("toolbarStateKey", ""))
|
||||||
|
.getObject("engagementToolbarStateEntityPayload"),
|
||||||
|
videoUrl,
|
||||||
|
timeAgoParser));
|
||||||
|
} else if (content.has(COMMENT_RENDERER_KEY)) {
|
||||||
|
// commentRenderers are directly returned for comment replies, so there is no
|
||||||
|
// commentRepliesRenderer to provide
|
||||||
|
// Also, YouTube has only one comment reply level
|
||||||
|
collector.commit(new YoutubeCommentsInfoItemExtractor(
|
||||||
|
content.getObject(COMMENT_RENDERER_KEY),
|
||||||
|
null,
|
||||||
|
videoUrl,
|
||||||
|
timeAgoParser));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -307,10 +383,11 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
final JsonObject countText = ajaxJson
|
final JsonObject countText = ajaxJson.getArray("onResponseReceivedEndpoints")
|
||||||
.getArray("onResponseReceivedEndpoints").getObject(0)
|
.getObject(0)
|
||||||
.getObject("reloadContinuationItemsCommand")
|
.getObject("reloadContinuationItemsCommand")
|
||||||
.getArray("continuationItems").getObject(0)
|
.getArray("continuationItems")
|
||||||
|
.getObject(0)
|
||||||
.getObject("commentsHeaderRenderer")
|
.getObject("commentsHeaderRenderer")
|
||||||
.getObject("countText");
|
.getObject("countText");
|
||||||
|
|
||||||
|
@ -22,40 +22,36 @@ import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper
|
|||||||
|
|
||||||
public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtractor {
|
public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtractor {
|
||||||
|
|
||||||
private final JsonObject json;
|
@Nonnull
|
||||||
private JsonObject commentRenderer;
|
private final JsonObject commentRenderer;
|
||||||
|
@Nullable
|
||||||
|
private final JsonObject commentRepliesRenderer;
|
||||||
|
@Nonnull
|
||||||
private final String url;
|
private final String url;
|
||||||
|
@Nonnull
|
||||||
private final TimeAgoParser timeAgoParser;
|
private final TimeAgoParser timeAgoParser;
|
||||||
|
|
||||||
public YoutubeCommentsInfoItemExtractor(final JsonObject json,
|
public YoutubeCommentsInfoItemExtractor(@Nonnull final JsonObject commentRenderer,
|
||||||
final String url,
|
@Nullable final JsonObject commentRepliesRenderer,
|
||||||
final TimeAgoParser timeAgoParser) {
|
@Nonnull final String url,
|
||||||
this.json = json;
|
@Nonnull final TimeAgoParser timeAgoParser) {
|
||||||
|
this.commentRenderer = commentRenderer;
|
||||||
|
this.commentRepliesRenderer = commentRepliesRenderer;
|
||||||
this.url = url;
|
this.url = url;
|
||||||
this.timeAgoParser = timeAgoParser;
|
this.timeAgoParser = timeAgoParser;
|
||||||
}
|
}
|
||||||
|
|
||||||
private JsonObject getCommentRenderer() throws ParsingException {
|
|
||||||
if (commentRenderer == null) {
|
|
||||||
if (json.has("comment")) {
|
|
||||||
commentRenderer = JsonUtils.getObject(json, "comment.commentRenderer");
|
|
||||||
} else {
|
|
||||||
commentRenderer = json;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return commentRenderer;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Nonnull
|
@Nonnull
|
||||||
private List<Image> getAuthorThumbnails() throws ParsingException {
|
private List<Image> getAuthorThumbnails() throws ParsingException {
|
||||||
try {
|
try {
|
||||||
return getImagesFromThumbnailsArray(JsonUtils.getArray(getCommentRenderer(),
|
return getImagesFromThumbnailsArray(JsonUtils.getArray(commentRenderer,
|
||||||
"authorThumbnail.thumbnails"));
|
"authorThumbnail.thumbnails"));
|
||||||
} catch (final Exception e) {
|
} catch (final Exception e) {
|
||||||
throw new ParsingException("Could not get author thumbnails", e);
|
throw new ParsingException("Could not get author thumbnails", e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Nonnull
|
||||||
@Override
|
@Override
|
||||||
public String getUrl() throws ParsingException {
|
public String getUrl() throws ParsingException {
|
||||||
return url;
|
return url;
|
||||||
@ -70,7 +66,7 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
|
|||||||
@Override
|
@Override
|
||||||
public String getName() throws ParsingException {
|
public String getName() throws ParsingException {
|
||||||
try {
|
try {
|
||||||
return getTextFromObject(JsonUtils.getObject(getCommentRenderer(), "authorText"));
|
return getTextFromObject(JsonUtils.getObject(commentRenderer, "authorText"));
|
||||||
} catch (final Exception e) {
|
} catch (final Exception e) {
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
@ -79,7 +75,7 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
|
|||||||
@Override
|
@Override
|
||||||
public String getTextualUploadDate() throws ParsingException {
|
public String getTextualUploadDate() throws ParsingException {
|
||||||
try {
|
try {
|
||||||
return getTextFromObject(JsonUtils.getObject(getCommentRenderer(),
|
return getTextFromObject(JsonUtils.getObject(commentRenderer,
|
||||||
"publishedTimeText"));
|
"publishedTimeText"));
|
||||||
} catch (final Exception e) {
|
} catch (final Exception e) {
|
||||||
throw new ParsingException("Could not get publishedTimeText", e);
|
throw new ParsingException("Could not get publishedTimeText", e);
|
||||||
@ -90,8 +86,7 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
|
|||||||
@Override
|
@Override
|
||||||
public DateWrapper getUploadDate() throws ParsingException {
|
public DateWrapper getUploadDate() throws ParsingException {
|
||||||
final String textualPublishedTime = getTextualUploadDate();
|
final String textualPublishedTime = getTextualUploadDate();
|
||||||
if (timeAgoParser != null && textualPublishedTime != null
|
if (textualPublishedTime != null && !textualPublishedTime.isEmpty()) {
|
||||||
&& !textualPublishedTime.isEmpty()) {
|
|
||||||
return timeAgoParser.parse(textualPublishedTime);
|
return timeAgoParser.parse(textualPublishedTime);
|
||||||
} else {
|
} else {
|
||||||
return null;
|
return null;
|
||||||
@ -118,7 +113,7 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
|
|||||||
// Try first to get the exact like count by using the accessibility data
|
// Try first to get the exact like count by using the accessibility data
|
||||||
final String likeCount;
|
final String likeCount;
|
||||||
try {
|
try {
|
||||||
likeCount = Utils.removeNonDigitCharacters(JsonUtils.getString(getCommentRenderer(),
|
likeCount = Utils.removeNonDigitCharacters(JsonUtils.getString(commentRenderer,
|
||||||
"actionButtons.commentActionButtonsRenderer.likeButton.toggleButtonRenderer"
|
"actionButtons.commentActionButtonsRenderer.likeButton.toggleButtonRenderer"
|
||||||
+ ".accessibilityData.accessibilityData.label"));
|
+ ".accessibilityData.accessibilityData.label"));
|
||||||
} catch (final Exception e) {
|
} catch (final Exception e) {
|
||||||
@ -170,11 +165,11 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
|
|||||||
*/
|
*/
|
||||||
try {
|
try {
|
||||||
// If a comment has no likes voteCount is not set
|
// If a comment has no likes voteCount is not set
|
||||||
if (!getCommentRenderer().has("voteCount")) {
|
if (!commentRenderer.has("voteCount")) {
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
|
|
||||||
final JsonObject voteCountObj = JsonUtils.getObject(getCommentRenderer(), "voteCount");
|
final JsonObject voteCountObj = JsonUtils.getObject(commentRenderer, "voteCount");
|
||||||
if (voteCountObj.isEmpty()) {
|
if (voteCountObj.isEmpty()) {
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
@ -188,7 +183,7 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
|
|||||||
@Override
|
@Override
|
||||||
public Description getCommentText() throws ParsingException {
|
public Description getCommentText() throws ParsingException {
|
||||||
try {
|
try {
|
||||||
final JsonObject contentText = JsonUtils.getObject(getCommentRenderer(), "contentText");
|
final JsonObject contentText = JsonUtils.getObject(commentRenderer, "contentText");
|
||||||
if (contentText.isEmpty()) {
|
if (contentText.isEmpty()) {
|
||||||
// completely empty comments as described in
|
// completely empty comments as described in
|
||||||
// https://github.com/TeamNewPipe/NewPipeExtractor/issues/380#issuecomment-668808584
|
// https://github.com/TeamNewPipe/NewPipeExtractor/issues/380#issuecomment-668808584
|
||||||
@ -208,7 +203,7 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
|
|||||||
@Override
|
@Override
|
||||||
public String getCommentId() throws ParsingException {
|
public String getCommentId() throws ParsingException {
|
||||||
try {
|
try {
|
||||||
return JsonUtils.getString(getCommentRenderer(), "commentId");
|
return JsonUtils.getString(commentRenderer, "commentId");
|
||||||
} catch (final Exception e) {
|
} catch (final Exception e) {
|
||||||
throw new ParsingException("Could not get comment id", e);
|
throw new ParsingException("Could not get comment id", e);
|
||||||
}
|
}
|
||||||
@ -221,27 +216,26 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean isHeartedByUploader() throws ParsingException {
|
public boolean isHeartedByUploader() {
|
||||||
final JsonObject commentActionButtonsRenderer = getCommentRenderer()
|
final JsonObject commentActionButtonsRenderer = commentRenderer.getObject("actionButtons")
|
||||||
.getObject("actionButtons")
|
|
||||||
.getObject("commentActionButtonsRenderer");
|
.getObject("commentActionButtonsRenderer");
|
||||||
return commentActionButtonsRenderer.has("creatorHeart");
|
return commentActionButtonsRenderer.has("creatorHeart");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean isPinned() throws ParsingException {
|
public boolean isPinned() {
|
||||||
return getCommentRenderer().has("pinnedCommentBadge");
|
return commentRenderer.has("pinnedCommentBadge");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean isUploaderVerified() throws ParsingException {
|
public boolean isUploaderVerified() throws ParsingException {
|
||||||
return getCommentRenderer().has("authorCommentBadge");
|
return commentRenderer.has("authorCommentBadge");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getUploaderName() throws ParsingException {
|
public String getUploaderName() throws ParsingException {
|
||||||
try {
|
try {
|
||||||
return getTextFromObject(JsonUtils.getObject(getCommentRenderer(), "authorText"));
|
return getTextFromObject(JsonUtils.getObject(commentRenderer, "authorText"));
|
||||||
} catch (final Exception e) {
|
} catch (final Exception e) {
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
@ -250,7 +244,7 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
|
|||||||
@Override
|
@Override
|
||||||
public String getUploaderUrl() throws ParsingException {
|
public String getUploaderUrl() throws ParsingException {
|
||||||
try {
|
try {
|
||||||
return "https://www.youtube.com/channel/" + JsonUtils.getString(getCommentRenderer(),
|
return "https://www.youtube.com/channel/" + JsonUtils.getString(commentRenderer,
|
||||||
"authorEndpoint.browseEndpoint.browseId");
|
"authorEndpoint.browseEndpoint.browseId");
|
||||||
} catch (final Exception e) {
|
} catch (final Exception e) {
|
||||||
return "";
|
return "";
|
||||||
@ -258,19 +252,22 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int getReplyCount() throws ParsingException {
|
public int getReplyCount() {
|
||||||
final JsonObject commentRendererJsonObject = getCommentRenderer();
|
if (commentRenderer.has("replyCount")) {
|
||||||
if (commentRendererJsonObject.has("replyCount")) {
|
return commentRenderer.getInt("replyCount");
|
||||||
return commentRendererJsonObject.getInt("replyCount");
|
|
||||||
}
|
}
|
||||||
return UNKNOWN_REPLY_COUNT;
|
return UNKNOWN_REPLY_COUNT;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Page getReplies() {
|
public Page getReplies() {
|
||||||
|
if (commentRepliesRenderer == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
final String id = JsonUtils.getString(
|
final String id = JsonUtils.getString(
|
||||||
JsonUtils.getArray(json, "replies.commentRepliesRenderer.contents")
|
JsonUtils.getArray(commentRepliesRenderer, "contents")
|
||||||
.getObject(0),
|
.getObject(0),
|
||||||
"continuationItemRenderer.continuationEndpoint.continuationCommand.token");
|
"continuationItemRenderer.continuationEndpoint.continuationCommand.token");
|
||||||
return new Page(url, id);
|
return new Page(url, id);
|
||||||
@ -280,20 +277,17 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean isChannelOwner() throws ParsingException {
|
public boolean isChannelOwner() {
|
||||||
return getCommentRenderer().getBoolean("authorIsChannelOwner");
|
return commentRenderer.getBoolean("authorIsChannelOwner");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean hasCreatorReply() throws ParsingException {
|
public boolean hasCreatorReply() {
|
||||||
try {
|
if (commentRepliesRenderer == null) {
|
||||||
final JsonObject commentRepliesRenderer = JsonUtils.getObject(json,
|
|
||||||
"replies.commentRepliesRenderer");
|
|
||||||
return commentRepliesRenderer.has("viewRepliesCreatorThumbnail");
|
|
||||||
} catch (final Exception e) {
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return commentRepliesRenderer.has("viewRepliesCreatorThumbnail");
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -22,6 +22,7 @@ package org.schabi.newpipe.extractor.services.youtube.extractors;
|
|||||||
|
|
||||||
import static org.schabi.newpipe.extractor.services.youtube.ItagItem.APPROX_DURATION_MS_UNKNOWN;
|
import static org.schabi.newpipe.extractor.services.youtube.ItagItem.APPROX_DURATION_MS_UNKNOWN;
|
||||||
import static org.schabi.newpipe.extractor.services.youtube.ItagItem.CONTENT_LENGTH_UNKNOWN;
|
import static org.schabi.newpipe.extractor.services.youtube.ItagItem.CONTENT_LENGTH_UNKNOWN;
|
||||||
|
import static org.schabi.newpipe.extractor.services.youtube.YoutubeDescriptionHelper.attributedDescriptionToHtml;
|
||||||
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.CONTENT_CHECK_OK;
|
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.CONTENT_CHECK_OK;
|
||||||
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.CPN;
|
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.CPN;
|
||||||
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.RACY_CHECK_OK;
|
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.RACY_CHECK_OK;
|
||||||
@ -30,7 +31,6 @@ import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper
|
|||||||
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.fixThumbnailUrl;
|
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.fixThumbnailUrl;
|
||||||
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.generateContentPlaybackNonce;
|
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.generateContentPlaybackNonce;
|
||||||
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.generateTParameter;
|
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.generateTParameter;
|
||||||
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getAttributedDescription;
|
|
||||||
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getImagesFromThumbnailsArray;
|
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getImagesFromThumbnailsArray;
|
||||||
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getJsonAndroidPostResponse;
|
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getJsonAndroidPostResponse;
|
||||||
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getJsonIosPostResponse;
|
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getJsonIosPostResponse;
|
||||||
@ -261,7 +261,7 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
|||||||
return new Description(videoSecondaryInfoRendererDescription, Description.HTML);
|
return new Description(videoSecondaryInfoRendererDescription, Description.HTML);
|
||||||
}
|
}
|
||||||
|
|
||||||
final String attributedDescription = getAttributedDescription(
|
final String attributedDescription = attributedDescriptionToHtml(
|
||||||
getVideoSecondaryInfoRenderer().getObject("attributedDescription"));
|
getVideoSecondaryInfoRenderer().getObject("attributedDescription"));
|
||||||
if (!isNullOrEmpty(attributedDescription)) {
|
if (!isNullOrEmpty(attributedDescription)) {
|
||||||
return new Description(attributedDescription, Description.HTML);
|
return new Description(attributedDescription, Description.HTML);
|
||||||
|
@ -3,6 +3,8 @@ package org.schabi.newpipe.extractor.stream;
|
|||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
|
|
||||||
|
import javax.annotation.Nullable;
|
||||||
|
|
||||||
public class Description implements Serializable {
|
public class Description implements Serializable {
|
||||||
|
|
||||||
public static final int HTML = 1;
|
public static final int HTML = 1;
|
||||||
@ -13,7 +15,7 @@ public class Description implements Serializable {
|
|||||||
private final String content;
|
private final String content;
|
||||||
private final int type;
|
private final int type;
|
||||||
|
|
||||||
public Description(final String content, final int type) {
|
public Description(@Nullable final String content, final int type) {
|
||||||
this.type = type;
|
this.type = type;
|
||||||
if (content == null) {
|
if (content == null) {
|
||||||
this.content = "";
|
this.content = "";
|
||||||
|
@ -0,0 +1,81 @@
|
|||||||
|
package org.schabi.newpipe.extractor.services.youtube;
|
||||||
|
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||||
|
import static org.schabi.newpipe.extractor.services.youtube.YoutubeDescriptionHelper.runsToHtml;
|
||||||
|
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
import org.schabi.newpipe.extractor.services.youtube.YoutubeDescriptionHelper.Run;
|
||||||
|
|
||||||
|
import java.util.Comparator;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.function.Function;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
public class YoutubeDescriptionHelperTest {
|
||||||
|
|
||||||
|
private static void assertRunsToHtml(final String expectedHtml,
|
||||||
|
final List<Run> openers,
|
||||||
|
final List<Run> closers,
|
||||||
|
final String content) {
|
||||||
|
assertEquals(
|
||||||
|
expectedHtml,
|
||||||
|
runsToHtml(
|
||||||
|
openers.stream()
|
||||||
|
.sorted(Comparator.comparingInt(run -> run.pos))
|
||||||
|
.collect(Collectors.toList()),
|
||||||
|
closers.stream()
|
||||||
|
.sorted(Comparator.comparingInt(run -> run.pos))
|
||||||
|
.collect(Collectors.toList()),
|
||||||
|
content
|
||||||
|
)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testNoRuns() {
|
||||||
|
assertRunsToHtml(
|
||||||
|
"abc *a* _c_ <br> <br> <a href=\"#\">test</a> &amp;",
|
||||||
|
List.of(),
|
||||||
|
List.of(),
|
||||||
|
"abc *a* _c_ <br>\u00a0\n\u00a0<a href=\"#\">test</a> &"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testNormalRuns() {
|
||||||
|
assertRunsToHtml(
|
||||||
|
"<A>hel<B>lo </B>nic</A>e <C>test</C>",
|
||||||
|
List.of(new Run("<A>", "</A>", 0), new Run("<B>", "</B>", 3),
|
||||||
|
new Run("<C>", "</C>", 11)),
|
||||||
|
List.of(new Run("<A>", "</A>", 9), new Run("<B>", "</B>", 6),
|
||||||
|
new Run("<C>", "</C>", 15)),
|
||||||
|
"hello nice test"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testOverlappingRuns() {
|
||||||
|
assertRunsToHtml(
|
||||||
|
"01<A>23<B>45</B></A><B>67</B>89",
|
||||||
|
List.of(new Run("<A>", "</A>", 2), new Run("<B>", "</B>", 4)),
|
||||||
|
List.of(new Run("<A>", "</A>", 6), new Run("<B>", "</B>", 8)),
|
||||||
|
"0123456789"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testTransformingRuns() {
|
||||||
|
final Function<String, String> tA = content -> "whatever";
|
||||||
|
final Function<String, String> tD
|
||||||
|
= content -> Integer.parseInt(content) % 2 == 0 ? "even" : "odd";
|
||||||
|
|
||||||
|
assertRunsToHtml(
|
||||||
|
"0<A>whatever</A><C>4</C>5<D>odd</D>89",
|
||||||
|
List.of(new Run("<A>", "</A>", 1, tA), new Run("<B>", "</B>", 2),
|
||||||
|
new Run("<C>", "</C>", 3), new Run("<D>", "</D>", 6, tD)),
|
||||||
|
List.of(new Run("<A>", "</A>", 4, tA), new Run("<B>", "</B>", 3),
|
||||||
|
new Run("<C>", "</C>", 5), new Run("<D>", "</D>", 8, tD)),
|
||||||
|
"0123456789"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user