Reimplement YoutubeChannelInfoItemExtractor

This commit is contained in:
wb9688 2020-02-23 18:27:28 +01:00 committed by TobiGr
parent 3187116a63
commit 8aea4d445b
2 changed files with 31 additions and 57 deletions

View File

@ -1,13 +1,12 @@
package org.schabi.newpipe.extractor.services.youtube.extractors; package org.schabi.newpipe.extractor.services.youtube.extractors;
import org.jsoup.nodes.Element; import com.grack.nanojson.JsonObject;
import org.schabi.newpipe.extractor.channel.ChannelInfoItemExtractor; import org.schabi.newpipe.extractor.channel.ChannelInfoItemExtractor;
import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeChannelLinkHandlerFactory;
import org.schabi.newpipe.extractor.utils.Utils; import org.schabi.newpipe.extractor.utils.Utils;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/* /*
* Created by Christian Schabesberger on 12.02.17. * Created by Christian Schabesberger on 12.02.17.
* *
@ -29,87 +28,62 @@ import java.util.regex.Pattern;
*/ */
public class YoutubeChannelInfoItemExtractor implements ChannelInfoItemExtractor { public class YoutubeChannelInfoItemExtractor implements ChannelInfoItemExtractor {
private final Element el; private JsonObject channelInfoItem;
public YoutubeChannelInfoItemExtractor(Element el) { public YoutubeChannelInfoItemExtractor(JsonObject channelInfoItem) {
this.el = el; this.channelInfoItem = channelInfoItem;
} }
@Override @Override
public String getThumbnailUrl() throws ParsingException { public String getThumbnailUrl() throws ParsingException {
Element img = el.select("span[class*=\"yt-thumb-simple\"]").first() try {
.select("img").first(); return channelInfoItem.getObject("thumbnails").getArray("thumbnails").getObject(0).getString("url");
} catch (Exception e) {
String url = img.attr("abs:src"); throw new ParsingException("Could not get thumbnail url", e);
if (url.contains("gif")) {
url = img.attr("abs:data-thumb");
} }
return url;
} }
@Override @Override
public String getName() throws ParsingException { public String getName() throws ParsingException {
return el.select("a[class*=\"yt-uix-tile-link\"]").first() try {
.text(); return channelInfoItem.getObject("title").getString("simpleText");
} catch (Exception e) {
throw new ParsingException("Could not get name", e);
}
} }
@Override @Override
public String getUrl() throws ParsingException { public String getUrl() throws ParsingException {
try { try {
String buttonTrackingUrl = el.select("button[class*=\"yt-uix-button\"]").first() String id = "channel/" + channelInfoItem.getString("channelId"); // Does prepending 'channel/' always work?
.attr("abs:data-href"); return YoutubeChannelLinkHandlerFactory.getInstance().getUrl(id);
Pattern channelIdPattern = Pattern.compile("(?:.*?)\\%252Fchannel\\%252F([A-Za-z0-9\\-\\_]+)(?:.*)");
Matcher match = channelIdPattern.matcher(buttonTrackingUrl);
if (match.matches()) {
return YoutubeChannelExtractor.CHANNEL_URL_BASE + match.group(1);
}
} catch(Exception ignored) {}
// fallback method for channels without "Subscribe" button (or just in case yt changes things)
// provides an url with "/user/NAME", inconsistent with stream and channel extractor: tests will fail
try {
return el.select("a[class*=\"yt-uix-tile-link\"]").first()
.attr("abs:href");
} catch (Exception e) { } catch (Exception e) {
throw new ParsingException("Could not get channel url", e); throw new ParsingException("Could not get url", e);
} }
} }
@Override @Override
public long getSubscriberCount() throws ParsingException { public long getSubscriberCount() {
final Element subsEl = el.select("span[class*=\"yt-subscriber-count\"]").first(); // TODO: get subscriber count, it's in subscriberCountText.simpleText as a string like "103M subscribers"
if (subsEl != null) { return -1;
try {
return Long.parseLong(Utils.removeNonDigitCharacters(subsEl.text()));
} catch (NumberFormatException e) {
throw new ParsingException("Could not get subscriber count", e);
}
} else {
// If the element is null, the channel have the subscriber count disabled
return -1;
}
} }
@Override @Override
public long getStreamCount() throws ParsingException { public long getStreamCount() throws ParsingException {
Element metaEl = el.select("ul[class*=\"yt-lockup-meta-info\"]").first(); try {
if (metaEl == null) { return Long.parseLong(Utils.removeNonDigitCharacters(channelInfoItem.getObject("videoCountText")
return 0; .getArray("runs").getObject(0).getString("text")));
} else { } catch (Exception e) {
return Long.parseLong(Utils.removeNonDigitCharacters(metaEl.text())); throw new ParsingException("Could not get name", e);
} }
} }
@Override @Override
public String getDescription() throws ParsingException { public String getDescription() throws ParsingException {
Element desEl = el.select("div[class*=\"yt-lockup-description\"]").first(); try {
if (desEl == null) { return channelInfoItem.getObject("descriptionSnippet").getArray("runs").getObject(0).getString("text");
return ""; } catch (Exception e) {
} else { throw new ParsingException("Could not get description url", e);
return desEl.text();
} }
} }
} }

View File

@ -130,7 +130,7 @@ public class YoutubeSearchExtractor extends SearchExtractor {
} else if (((JsonObject) item).getObject("videoRenderer") != null) { } else if (((JsonObject) item).getObject("videoRenderer") != null) {
collector.commit(new YoutubeStreamInfoItemExtractor(((JsonObject) item).getObject("videoRenderer"), timeAgoParser)); collector.commit(new YoutubeStreamInfoItemExtractor(((JsonObject) item).getObject("videoRenderer"), timeAgoParser));
} else if (((JsonObject) item).getObject("channelRenderer") != null) { } else if (((JsonObject) item).getObject("channelRenderer") != null) {
// collector.commit(new YoutubeChannelInfoItemExtractor(((JsonObject) item).getObject("channelRenderer"))); collector.commit(new YoutubeChannelInfoItemExtractor(((JsonObject) item).getObject("channelRenderer")));
} else if (((JsonObject) item).getObject("playlistRenderer") != null) { } else if (((JsonObject) item).getObject("playlistRenderer") != null) {
// collector.commit(new YoutubePlaylistInfoItemExtractor(((JsonObject) item).getObject("playlistRenderer"))); // collector.commit(new YoutubePlaylistInfoItemExtractor(((JsonObject) item).getObject("playlistRenderer")));
} }