From 4e04991762a6c3d2aa4fce126e8ef141a6bb0fce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bartosz=20Rumi=C5=84ski?= Date: Fri, 9 Oct 2020 00:37:34 +0200 Subject: [PATCH 1/9] Support short custom youtube channel urls --- .../YoutubeChannelLinkHandlerFactory.java | 27 +++++++++++++++---- .../YoutubeChannelLinkHandlerFactoryTest.java | 2 ++ 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeChannelLinkHandlerFactory.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeChannelLinkHandlerFactory.java index 0eb030852..89eaaed5c 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeChannelLinkHandlerFactory.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeChannelLinkHandlerFactory.java @@ -49,6 +49,17 @@ public class YoutubeChannelLinkHandlerFactory extends ListLinkHandlerFactory { return "https://www.youtube.com/" + id; } + /** + * Returns true if path conform to + * custom short channel urls like youtube.com/yourcustomname + * + * @param splitPath path segments array + * @return true - if value conform to short channel url, false - not + */ + public boolean isCustomShortChannelUrl(String[] splitPath) { + return splitPath.length == 1 && !splitPath[0].matches("playlist|watch"); + } + @Override public String getId(String url) throws ParsingException { try { @@ -60,14 +71,20 @@ public class YoutubeChannelLinkHandlerFactory extends ListLinkHandlerFactory { throw new ParsingException("the URL given is not a Youtube-URL"); } - if (!path.startsWith("/user/") && !path.startsWith("/channel/") && !path.startsWith("/c/")) { + // remove leading "/" + path = path.substring(1); + String[] splitPath = path.split("/"); + + // Handle custom short channel urls like youtube.com/yourcustomname + if (isCustomShortChannelUrl(splitPath)) { + path = "c/" + path; + splitPath = path.split("/"); + } + + if (!path.startsWith("user/") && !path.startsWith("channel/") && !path.startsWith("c/")) { throw new ParsingException("the URL given is neither a channel nor an user"); } - // remove leading "/" - path = path.substring(1); - - String[] splitPath = path.split("/"); String id = splitPath[1]; if (id == null || !id.matches("[A-Za-z0-9_-]+")) { diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeChannelLinkHandlerFactoryTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeChannelLinkHandlerFactoryTest.java index fc409ffae..178d4b8a1 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeChannelLinkHandlerFactoryTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeChannelLinkHandlerFactoryTest.java @@ -30,6 +30,8 @@ public class YoutubeChannelLinkHandlerFactoryTest { assertTrue(linkHandler.acceptUrl("https://www.youtube.com/c/creatoracademy")); + assertTrue(linkHandler.acceptUrl("https://youtube.com/DIMENSI0N")); + assertTrue(linkHandler.acceptUrl("https://www.youtube.com/channel/UClq42foiSgl7sSpLupnugGA")); assertTrue(linkHandler.acceptUrl("https://www.youtube.com/channel/UClq42foiSgl7sSpLupnugGA/videos?disable_polymer=1")); From 8c38a5509efad0ed4f2a4d84089ef3a74e113d1a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bartosz=20Rumi=C5=84ski?= Date: Fri, 9 Oct 2020 19:07:38 +0200 Subject: [PATCH 2/9] Prevent attribution_link urls to be accepted by channel links handler --- .../linkHandler/YoutubeChannelLinkHandlerFactory.java | 5 +++-- .../youtube/YoutubeChannelLinkHandlerFactoryTest.java | 3 +++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeChannelLinkHandlerFactory.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeChannelLinkHandlerFactory.java index 89eaaed5c..ca163dd6a 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeChannelLinkHandlerFactory.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeChannelLinkHandlerFactory.java @@ -56,8 +56,9 @@ public class YoutubeChannelLinkHandlerFactory extends ListLinkHandlerFactory { * @param splitPath path segments array * @return true - if value conform to short channel url, false - not */ - public boolean isCustomShortChannelUrl(String[] splitPath) { - return splitPath.length == 1 && !splitPath[0].matches("playlist|watch"); + private boolean isCustomShortChannelUrl(String[] splitPath) { + return splitPath.length == 1 && + !splitPath[0].matches("playlist|watch|attribution_link"); } @Override diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeChannelLinkHandlerFactoryTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeChannelLinkHandlerFactoryTest.java index 178d4b8a1..14c8b0614 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeChannelLinkHandlerFactoryTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeChannelLinkHandlerFactoryTest.java @@ -8,6 +8,7 @@ import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeChannelLinkHandlerFactory; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; /** @@ -46,6 +47,8 @@ public class YoutubeChannelLinkHandlerFactoryTest { assertTrue(linkHandler.acceptUrl("https://invidio.us/channel/UClq42foiSgl7sSpLupnugGA")); assertTrue(linkHandler.acceptUrl("https://invidio.us/channel/UClq42foiSgl7sSpLupnugGA/videos?disable_polymer=1")); + + assertFalse(linkHandler.acceptUrl("http://www.youtube.com/attribution_link?a=JdfC0C9V6ZI&u=%2Fwatch%3Fv%3DEhxJLojIE_o%26feature%3Dshare")); } @Test From 9d63211a6694c6263bcc02dd4303aa3a4a4e9cbc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bartosz=20Rumi=C5=84ski?= Date: Mon, 12 Oct 2020 19:56:53 +0200 Subject: [PATCH 3/9] Fix typos Co-authored-by: Tobias Groza --- .../youtube/linkHandler/YoutubeChannelLinkHandlerFactory.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeChannelLinkHandlerFactory.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeChannelLinkHandlerFactory.java index ca163dd6a..aead6701c 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeChannelLinkHandlerFactory.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeChannelLinkHandlerFactory.java @@ -51,10 +51,10 @@ public class YoutubeChannelLinkHandlerFactory extends ListLinkHandlerFactory { /** * Returns true if path conform to - * custom short channel urls like youtube.com/yourcustomname + * custom short channel URLs like youtube.com/yourcustomname * * @param splitPath path segments array - * @return true - if value conform to short channel url, false - not + * @return true - if value conform to short channel URL, false - not */ private boolean isCustomShortChannelUrl(String[] splitPath) { return splitPath.length == 1 && From 7abb4b371381dea969cbd90f2f332e751047d3c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bartosz=20Rumi=C5=84ski?= Date: Mon, 12 Oct 2020 19:57:45 +0200 Subject: [PATCH 4/9] Update extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeChannelLinkHandlerFactory.java Fix typos Co-authored-by: Tobias Groza --- .../youtube/linkHandler/YoutubeChannelLinkHandlerFactory.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeChannelLinkHandlerFactory.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeChannelLinkHandlerFactory.java index aead6701c..6f1569e7b 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeChannelLinkHandlerFactory.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeChannelLinkHandlerFactory.java @@ -76,7 +76,7 @@ public class YoutubeChannelLinkHandlerFactory extends ListLinkHandlerFactory { path = path.substring(1); String[] splitPath = path.split("/"); - // Handle custom short channel urls like youtube.com/yourcustomname + // Handle custom short channel URLs like youtube.com/yourcustomname if (isCustomShortChannelUrl(splitPath)) { path = "c/" + path; splitPath = path.split("/"); From 5ab1b053d2c8629076c3ac894af1df73916ee36e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bartosz=20Rumi=C5=84ski?= Date: Mon, 12 Oct 2020 20:11:28 +0200 Subject: [PATCH 5/9] Update youtube channel link handler tests Co-authored-by: Tobias Groza --- .../youtube/YoutubeChannelLinkHandlerFactoryTest.java | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeChannelLinkHandlerFactoryTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeChannelLinkHandlerFactoryTest.java index 14c8b0614..d2de6d292 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeChannelLinkHandlerFactoryTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeChannelLinkHandlerFactoryTest.java @@ -48,7 +48,13 @@ public class YoutubeChannelLinkHandlerFactoryTest { assertTrue(linkHandler.acceptUrl("https://invidio.us/channel/UClq42foiSgl7sSpLupnugGA")); assertTrue(linkHandler.acceptUrl("https://invidio.us/channel/UClq42foiSgl7sSpLupnugGA/videos?disable_polymer=1")); + // do not accept URLs which are not channels + assertFalse(linkHandler.acceptUrl("https://www.youtube.com/watch?v=jZViOEv90dI&t=100")); + assertFalse(linkHandler.acceptUrl("http://www.youtube.com/watch_popup?v=uEJuoEs1UxY")); assertFalse(linkHandler.acceptUrl("http://www.youtube.com/attribution_link?a=JdfC0C9V6ZI&u=%2Fwatch%3Fv%3DEhxJLojIE_o%26feature%3Dshare")); + assertFalse(linkHandler.acceptUrl("https://www.youtube.com/playlist?list=PLW5y1tjAOzI3orQNF1yGGVL5x-pR2K1d")); + assertFalse(linkHandler.acceptUrl("https://www.youtube.com/embed/jZViOEv90dI")); + assertFalse(linkHandler.acceptUrl("https://www.youtube.com/feed/subscriptions?list=PLz8YL4HVC87WJQDzVoY943URKQCsHS9XV")); } @Test From e3f996e014a0b0d06f6183bf3ae5a195be2cf6fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bartosz=20Rumi=C5=84ski?= Date: Mon, 12 Oct 2020 20:59:56 +0200 Subject: [PATCH 6/9] Exlude links which are not channels --- .../youtube/linkHandler/YoutubeChannelLinkHandlerFactory.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeChannelLinkHandlerFactory.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeChannelLinkHandlerFactory.java index 6f1569e7b..55e3e6652 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeChannelLinkHandlerFactory.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeChannelLinkHandlerFactory.java @@ -58,7 +58,7 @@ public class YoutubeChannelLinkHandlerFactory extends ListLinkHandlerFactory { */ private boolean isCustomShortChannelUrl(String[] splitPath) { return splitPath.length == 1 && - !splitPath[0].matches("playlist|watch|attribution_link"); + !splitPath[0].matches("playlist|watch|attribution_link|watch_popup|embed|feed"); } @Override From d3f80d1538286434c32fa3d2cfe7a8fe987c09a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bartosz=20Rumi=C5=84ski?= Date: Tue, 20 Oct 2020 20:06:06 +0200 Subject: [PATCH 7/9] Exlude links which are not channels --- .../youtube/linkHandler/YoutubeChannelLinkHandlerFactory.java | 2 +- .../youtube/YoutubeChannelLinkHandlerFactoryTest.java | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeChannelLinkHandlerFactory.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeChannelLinkHandlerFactory.java index 55e3e6652..288c28b28 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeChannelLinkHandlerFactory.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeChannelLinkHandlerFactory.java @@ -58,7 +58,7 @@ public class YoutubeChannelLinkHandlerFactory extends ListLinkHandlerFactory { */ private boolean isCustomShortChannelUrl(String[] splitPath) { return splitPath.length == 1 && - !splitPath[0].matches("playlist|watch|attribution_link|watch_popup|embed|feed"); + !splitPath[0].matches("playlist|watch|attribution_link|watch_popup|embed|feed|select_site"); } @Override diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeChannelLinkHandlerFactoryTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeChannelLinkHandlerFactoryTest.java index d2de6d292..12bcb8049 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeChannelLinkHandlerFactoryTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeChannelLinkHandlerFactoryTest.java @@ -47,6 +47,8 @@ public class YoutubeChannelLinkHandlerFactoryTest { assertTrue(linkHandler.acceptUrl("https://invidio.us/channel/UClq42foiSgl7sSpLupnugGA")); assertTrue(linkHandler.acceptUrl("https://invidio.us/channel/UClq42foiSgl7sSpLupnugGA/videos?disable_polymer=1")); + assertTrue(linkHandler.acceptUrl("https://www.youtube.com/watchismo")); + // do not accept URLs which are not channels assertFalse(linkHandler.acceptUrl("https://www.youtube.com/watch?v=jZViOEv90dI&t=100")); @@ -55,6 +57,8 @@ public class YoutubeChannelLinkHandlerFactoryTest { assertFalse(linkHandler.acceptUrl("https://www.youtube.com/playlist?list=PLW5y1tjAOzI3orQNF1yGGVL5x-pR2K1d")); assertFalse(linkHandler.acceptUrl("https://www.youtube.com/embed/jZViOEv90dI")); assertFalse(linkHandler.acceptUrl("https://www.youtube.com/feed/subscriptions?list=PLz8YL4HVC87WJQDzVoY943URKQCsHS9XV")); + assertFalse(linkHandler.acceptUrl("https://www.youtube.com/?app=desktop&persist_app=1")); + assertFalse(linkHandler.acceptUrl("https://m.youtube.com/select_site")); } @Test From 0e67d820bcbe65d1c7de0e8d4cbf607b13317fa5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bartosz=20Rumi=C5=84ski?= Date: Thu, 22 Oct 2020 20:13:29 +0200 Subject: [PATCH 8/9] Use static regex pattern for excluded path segments --- .../YoutubeChannelLinkHandlerFactory.java | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeChannelLinkHandlerFactory.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeChannelLinkHandlerFactory.java index 288c28b28..af6bd12ca 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeChannelLinkHandlerFactory.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeChannelLinkHandlerFactory.java @@ -1,5 +1,6 @@ package org.schabi.newpipe.extractor.services.youtube.linkHandler; +import java.util.regex.Pattern; import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.linkhandler.ListLinkHandlerFactory; import org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper; @@ -49,6 +50,7 @@ public class YoutubeChannelLinkHandlerFactory extends ListLinkHandlerFactory { return "https://www.youtube.com/" + id; } + /** * Returns true if path conform to * custom short channel URLs like youtube.com/yourcustomname @@ -56,15 +58,17 @@ public class YoutubeChannelLinkHandlerFactory extends ListLinkHandlerFactory { * @param splitPath path segments array * @return true - if value conform to short channel URL, false - not */ - private boolean isCustomShortChannelUrl(String[] splitPath) { - return splitPath.length == 1 && - !splitPath[0].matches("playlist|watch|attribution_link|watch_popup|embed|feed|select_site"); + private boolean isCustomShortChannelUrl(final String[] splitPath) { + return splitPath.length == 1 && !excludedSegments.matcher(splitPath[0]).matches(); } + private static final Pattern excludedSegments = + Pattern.compile("playlist|watch|attribution_link|watch_popup|embed|feed|select_site"); + @Override public String getId(String url) throws ParsingException { try { - URL urlObj = Utils.stringToURL(url); + final URL urlObj = Utils.stringToURL(url); String path = urlObj.getPath(); if (!Utils.isHTTP(urlObj) || !(YoutubeParsingHelper.isYoutubeURL(urlObj) || @@ -86,7 +90,7 @@ public class YoutubeChannelLinkHandlerFactory extends ListLinkHandlerFactory { throw new ParsingException("the URL given is neither a channel nor an user"); } - String id = splitPath[1]; + final String id = splitPath[1]; if (id == null || !id.matches("[A-Za-z0-9_-]+")) { throw new ParsingException("The given id is not a Youtube-Video-ID"); From 29695aed0a295f7bf84fea435a7dfd304989fcd2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bartosz=20Rumi=C5=84ski?= Date: Fri, 23 Oct 2020 16:42:13 +0200 Subject: [PATCH 9/9] Small field refactor --- .../linkHandler/YoutubeChannelLinkHandlerFactory.java | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeChannelLinkHandlerFactory.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeChannelLinkHandlerFactory.java index af6bd12ca..2dc8fc427 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeChannelLinkHandlerFactory.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeChannelLinkHandlerFactory.java @@ -33,6 +33,9 @@ public class YoutubeChannelLinkHandlerFactory extends ListLinkHandlerFactory { private static final YoutubeChannelLinkHandlerFactory instance = new YoutubeChannelLinkHandlerFactory(); + private static final Pattern excludedSegments = + Pattern.compile("playlist|watch|attribution_link|watch_popup|embed|feed|select_site"); + public static YoutubeChannelLinkHandlerFactory getInstance() { return instance; } @@ -49,8 +52,7 @@ public class YoutubeChannelLinkHandlerFactory extends ListLinkHandlerFactory { public String getUrl(String id, List contentFilters, String searchFilter) { return "https://www.youtube.com/" + id; } - - + /** * Returns true if path conform to * custom short channel URLs like youtube.com/yourcustomname @@ -62,9 +64,6 @@ public class YoutubeChannelLinkHandlerFactory extends ListLinkHandlerFactory { return splitPath.length == 1 && !excludedSegments.matcher(splitPath[0]).matches(); } - private static final Pattern excludedSegments = - Pattern.compile("playlist|watch|attribution_link|watch_popup|embed|feed|select_site"); - @Override public String getId(String url) throws ParsingException { try {