Complete fix inconsistency in youtube channel urls

It is not always possible to get the url in the form "https://www.youtube.com/channel/...", so a not has been added whenever that happens to be the case (i.e. only in InfoStreamItems).
This commit is contained in:
Stypox 2019-08-16 21:17:03 +02:00
parent 315c5c262f
commit 216a4eb1f5
No known key found for this signature in database
GPG Key ID: 4BDF1B40A49FDD23
4 changed files with 18 additions and 10 deletions

View File

@ -50,7 +50,7 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor {
try { try {
return doc.select("div[id=pl-header] h1[class=pl-header-title]").first().text(); return doc.select("div[id=pl-header] h1[class=pl-header-title]").first().text();
} catch (Exception e) { } catch (Exception e) {
throw new ParsingException("Could not get playlist name"); throw new ParsingException("Could not get playlist name", e);
} }
} }
@ -59,7 +59,7 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor {
try { try {
return doc.select("div[id=pl-header] div[class=pl-header-thumb] img").first().attr("abs:src"); return doc.select("div[id=pl-header] div[class=pl-header-thumb] img").first().attr("abs:src");
} catch (Exception e) { } catch (Exception e) {
throw new ParsingException("Could not get playlist thumbnail"); throw new ParsingException("Could not get playlist thumbnail", e);
} }
} }
@ -72,9 +72,11 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor {
@Override @Override
public String getUploaderUrl() throws ParsingException { public String getUploaderUrl() throws ParsingException {
try { try {
return doc.select("ul[class=\"pl-header-details\"] li").first().select("a").first().attr("abs:href"); return YoutubeChannelExtractor.CHANNEL_URL_BASE +
doc.select("button[class*=\"yt-uix-subscription-button\"]")
.first().attr("data-channel-external-id");
} catch (Exception e) { } catch (Exception e) {
throw new ParsingException("Could not get playlist uploader name"); throw new ParsingException("Could not get playlist uploader url", e);
} }
} }
@ -83,7 +85,7 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor {
try { try {
return doc.select("span[class=\"qualified-channel-title-text\"]").first().select("a").first().text(); return doc.select("span[class=\"qualified-channel-title-text\"]").first().select("a").first().text();
} catch (Exception e) { } catch (Exception e) {
throw new ParsingException("Could not get playlist uploader name"); throw new ParsingException("Could not get playlist uploader name", e);
} }
} }
@ -92,7 +94,7 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor {
try { try {
return doc.select("div[id=gh-banner] img[class=channel-header-profile-image]").first().attr("abs:src"); return doc.select("div[id=gh-banner] img[class=channel-header-profile-image]").first().attr("abs:src");
} catch (Exception e) { } catch (Exception e) {
throw new ParsingException("Could not get playlist uploader avatar"); throw new ParsingException("Could not get playlist uploader avatar", e);
} }
} }
@ -248,6 +250,8 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor {
@Override @Override
public String getUploaderUrl() throws ParsingException { public String getUploaderUrl() throws ParsingException {
// this url is not always in the form "/channel/..."
// sometimes Youtube provides urls in the from "/user/..."
return getUploaderLink().attr("abs:href"); return getUploaderLink().attr("abs:href");
} }

View File

@ -107,6 +107,8 @@ public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor {
@Override @Override
public String getUploaderUrl() throws ParsingException { public String getUploaderUrl() throws ParsingException {
// this url is not always in the form "/channel/..."
// sometimes Youtube provides urls in the from "/user/..."
try { try {
try { try {
return item.select("div[class=\"yt-lockup-byline\"]").first() return item.select("div[class=\"yt-lockup-byline\"]").first()
@ -119,7 +121,7 @@ public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor {
.text().split(" - ")[0]; .text().split(" - ")[0];
} catch (Exception e) { } catch (Exception e) {
System.out.println(item.html()); System.out.println(item.html());
throw new ParsingException("Could not get uploader", e); throw new ParsingException("Could not get uploader url", e);
} }
} }

View File

@ -126,6 +126,8 @@ public class YoutubeTrendingExtractor extends KioskExtractor<StreamInfoItem> {
} }
private Element getUploaderLink() { private Element getUploaderLink() {
// this url is not always in the form "/channel/..."
// sometimes Youtube provides urls in the from "/user/..."
Element uploaderEl = el.select("div[class*=\"yt-lockup-byline \"]").first(); Element uploaderEl = el.select("div[class*=\"yt-lockup-byline \"]").first();
return uploaderEl.select("a").first(); return uploaderEl.select("a").first();
} }

View File

@ -100,7 +100,7 @@ public class YoutubePlaylistExtractorTest {
@Test @Test
public void testUploaderUrl() throws Exception { public void testUploaderUrl() throws Exception {
assertTrue(extractor.getUploaderUrl().contains("youtube.com")); assertEquals("https://www.youtube.com/channel/UCs72iRpTEuwV3y6pdWYLgiw", extractor.getUploaderUrl());
} }
@Test @Test
@ -185,8 +185,8 @@ public class YoutubePlaylistExtractorTest {
public void testMoreRelatedItems() throws Exception { public void testMoreRelatedItems() throws Exception {
ListExtractor.InfoItemsPage<StreamInfoItem> currentPage ListExtractor.InfoItemsPage<StreamInfoItem> currentPage
= defaultTestMoreItems(extractor, ServiceList.YouTube.getServiceId()); = defaultTestMoreItems(extractor, ServiceList.YouTube.getServiceId());
// Test for 2 more levels
// test for 2 more levels
for (int i = 0; i < 2; i++) { for (int i = 0; i < 2; i++) {
currentPage = extractor.getPage(currentPage.getNextPageUrl()); currentPage = extractor.getPage(currentPage.getNextPageUrl());
defaultTestListOfItems(YouTube.getServiceId(), currentPage.getItems(), currentPage.getErrors()); defaultTestListOfItems(YouTube.getServiceId(), currentPage.getItems(), currentPage.getErrors());
@ -214,7 +214,7 @@ public class YoutubePlaylistExtractorTest {
@Test @Test
public void testUploaderUrl() throws Exception { public void testUploaderUrl() throws Exception {
assertTrue(extractor.getUploaderUrl().contains("youtube.com")); assertEquals("https://www.youtube.com/channel/UCHSPWoY1J5fbDVbcnyeqwdw", extractor.getUploaderUrl());
} }
@Test @Test