Extract stream and search meta info for YouTube

Add method to extract Google webcache URLs.
2025-04-29 00:10:35 +05:30 · 2020-12-20 19:54:12 +01:00 · 2020-12-20 19:54:12 +01:00 · bc6de14952
commit bc6de14952
parent 853a65a1a6
20 changed files with 526 additions and 29 deletions
--- a/extractor/src/main/java/org/schabi/newpipe/extractor/MetaInfo.java
+++ b/extractor/src/main/java/org/schabi/newpipe/extractor/MetaInfo.java
@ -0,0 +1,76 @@
+package org.schabi.newpipe.extractor;
+
+import org.schabi.newpipe.extractor.stream.Description;
+
+import java.io.Serializable;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.List;
+
+import javax.annotation.Nonnull;
+
+public class MetaInfo implements Serializable {
+
+    private String title = "";
+    private Description content;
+    private List<URL> urls = new ArrayList<>();
+    private List<String> urlTexts = new ArrayList<>();
+
+    public MetaInfo(@Nonnull final String title, @Nonnull final Description content,
+                    @Nonnull final List<URL> urls, @Nonnull final List<String> urlTexts) {
+        this.title = title;
+        this.content = content;
+        this.urls = urls;
+        this.urlTexts = urlTexts;
+    }
+
+    public MetaInfo() {
+    }
+
+    /**
+     * @return Title of the info. Can be empty.
+     */
+    @Nonnull
+    public String getTitle() {
+        return title;
+    }
+
+    public void setTitle(@Nonnull final String title) {
+        this.title = title;
+    }
+
+    @Nonnull
+    public Description getContent() {
+        return content;
+    }
+
+    public void setContent(@Nonnull final Description content) {
+        this.content = content;
+    }
+
+    @Nonnull
+    public List<URL> getUrls() {
+        return urls;
+    }
+
+    public void setUrls(@Nonnull final List<URL> urls) {
+        this.urls = urls;
+    }
+
+    public void addUrl(@Nonnull final URL url) {
+        urls.add(url);
+    }
+
+    @Nonnull
+    public List<String> getUrlTexts() {
+        return urlTexts;
+    }
+
+    public void setUrlTexts(@Nonnull final List<String> urlTexts) {
+        this.urlTexts = urlTexts;
+    }
+
+    public void addUrlText(@Nonnull final String urlText) {
+        urlTexts.add(urlText);
+    }
+}
--- a/extractor/src/main/java/org/schabi/newpipe/extractor/search/SearchExtractor.java
+++ b/extractor/src/main/java/org/schabi/newpipe/extractor/search/SearchExtractor.java
@ -2,12 +2,14 @@ package org.schabi.newpipe.extractor.search;

 import org.schabi.newpipe.extractor.InfoItem;
 import org.schabi.newpipe.extractor.ListExtractor;
+import org.schabi.newpipe.extractor.MetaInfo;
 import org.schabi.newpipe.extractor.StreamingService;
 import org.schabi.newpipe.extractor.exceptions.ExtractionException;
 import org.schabi.newpipe.extractor.exceptions.ParsingException;
 import org.schabi.newpipe.extractor.linkhandler.SearchQueryHandler;

 import javax.annotation.Nonnull;
+import java.util.List;

 public abstract class SearchExtractor extends ListExtractor<InfoItem> {

@ -57,4 +59,15 @@ public abstract class SearchExtractor extends ListExtractor<InfoItem> {
     * @return whether the results comes from a corrected query or not.
     */
    public abstract boolean isCorrectedSearch() throws ParsingException;
+
+    /**
+     * Meta information about the search query.
+     * <p>
+     * Example: on YouTube, if you search for "Covid-19",
+     * there is a box with information from the WHO about Covid-19 and a link to the WHO's website.
+     * @return additional meta information about the search query
+     * @throws ParsingException
+     */
+    @Nonnull 
+    public abstract List<MetaInfo> getMetaInfo() throws ParsingException;
 }
--- a/extractor/src/main/java/org/schabi/newpipe/extractor/search/SearchInfo.java
+++ b/extractor/src/main/java/org/schabi/newpipe/extractor/search/SearchInfo.java
@ -1,20 +1,20 @@
 package org.schabi.newpipe.extractor.search;

-import org.schabi.newpipe.extractor.InfoItem;
-import org.schabi.newpipe.extractor.ListExtractor;
-import org.schabi.newpipe.extractor.ListInfo;
-import org.schabi.newpipe.extractor.Page;
-import org.schabi.newpipe.extractor.StreamingService;
+import org.schabi.newpipe.extractor.*;
 import org.schabi.newpipe.extractor.exceptions.ExtractionException;
 import org.schabi.newpipe.extractor.linkhandler.SearchQueryHandler;
 import org.schabi.newpipe.extractor.utils.ExtractorHelper;

 import java.io.IOException;
+import java.util.List;
+
+import javax.annotation.Nonnull;

 public class SearchInfo extends ListInfo<InfoItem> {
    private String searchString;
    private String searchSuggestion;
    private boolean isCorrectedSearch;
+    private List<MetaInfo> metaInfo;

    public SearchInfo(int serviceId,
                      SearchQueryHandler qIHandler,
@ -51,6 +51,11 @@ public class SearchInfo extends ListInfo<InfoItem> {
        } catch (Exception e) {
            info.addError(e);
        }
+        try {
+            info.setMetaInfo(extractor.getMetaInfo());
+        } catch (Exception e) {
+            info.addError(e);
+        }

        ListExtractor.InfoItemsPage<InfoItem> page = ExtractorHelper.getItemsPageOrLogError(info, extractor);
        info.setRelatedItems(page.getItems());
@ -87,4 +92,13 @@ public class SearchInfo extends ListInfo<InfoItem> {
    public void setSearchSuggestion(String searchSuggestion) {
        this.searchSuggestion = searchSuggestion;
    }
+
+    @Nonnull
+    public List<MetaInfo> getMetaInfo() {
+        return metaInfo;
+    }
+
+    public void setMetaInfo(@Nonnull List<MetaInfo> metaInfo) {
+        this.metaInfo = metaInfo;
+    }
 }
--- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/media_ccc/extractors/MediaCCCSearchExtractor.java
+++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/media_ccc/extractors/MediaCCCSearchExtractor.java
@ -6,6 +6,7 @@ import com.grack.nanojson.JsonParser;
 import com.grack.nanojson.JsonParserException;

 import org.schabi.newpipe.extractor.InfoItem;
+import org.schabi.newpipe.extractor.MetaInfo;
 import org.schabi.newpipe.extractor.Page;
 import org.schabi.newpipe.extractor.StreamingService;
 import org.schabi.newpipe.extractor.channel.ChannelInfoItem;
@ -20,6 +21,7 @@ import org.schabi.newpipe.extractor.services.media_ccc.extractors.infoItems.Medi
 import org.schabi.newpipe.extractor.services.media_ccc.linkHandler.MediaCCCConferencesListLinkHandlerFactory;

 import java.io.IOException;
+import java.util.Collections;
 import java.util.List;

 import javax.annotation.Nonnull;
@ -55,6 +57,12 @@ public class MediaCCCSearchExtractor extends SearchExtractor {
        return false;
    }

+    @Nonnull
+    @Override
+    public List<MetaInfo> getMetaInfo() {
+        return Collections.emptyList();
+    }
+
    @Nonnull
    @Override
    public InfoItemsPage<InfoItem> getInitialPage() {
--- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/media_ccc/extractors/MediaCCCStreamExtractor.java
+++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/media_ccc/extractors/MediaCCCStreamExtractor.java
@ -6,6 +6,7 @@ import com.grack.nanojson.JsonParser;
 import com.grack.nanojson.JsonParserException;

 import org.schabi.newpipe.extractor.MediaFormat;
+import org.schabi.newpipe.extractor.MetaInfo;
 import org.schabi.newpipe.extractor.StreamingService;
 import org.schabi.newpipe.extractor.downloader.Downloader;
 import org.schabi.newpipe.extractor.exceptions.ExtractionException;
@ -301,4 +302,10 @@ public class MediaCCCStreamExtractor extends StreamExtractor {
    public List<StreamSegment> getStreamSegments() {
        return Collections.emptyList();
    }
+
+    @Nonnull
+    @Override
+    public List<MetaInfo> getMetaInfo() {
+        return Collections.emptyList();
+    }
 }
--- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/peertube/extractors/PeertubeSearchExtractor.java
+++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/peertube/extractors/PeertubeSearchExtractor.java
@ -4,6 +4,7 @@ import com.grack.nanojson.JsonObject;
 import com.grack.nanojson.JsonParser;

 import org.schabi.newpipe.extractor.InfoItem;
+import org.schabi.newpipe.extractor.MetaInfo;
 import org.schabi.newpipe.extractor.Page;
 import org.schabi.newpipe.extractor.StreamingService;
 import org.schabi.newpipe.extractor.downloader.Downloader;
@ -17,6 +18,8 @@ import org.schabi.newpipe.extractor.services.peertube.PeertubeParsingHelper;
 import org.schabi.newpipe.extractor.utils.Utils;

 import java.io.IOException;
+import java.util.Collections;
+import java.util.List;

 import javax.annotation.Nonnull;

@ -42,6 +45,12 @@ public class PeertubeSearchExtractor extends SearchExtractor {
        return false;
    }

+    @Nonnull
+    @Override
+    public List<MetaInfo> getMetaInfo() {
+        return Collections.emptyList();
+    }
+
    @Override
    public InfoItemsPage<InfoItem> getInitialPage() throws IOException, ExtractionException {
        final String pageUrl = getUrl() + "&" + START_KEY + "=0&" + COUNT_KEY + "=" + ITEMS_PER_PAGE;
--- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/peertube/extractors/PeertubeStreamExtractor.java
+++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/peertube/extractors/PeertubeStreamExtractor.java
@ -5,6 +5,7 @@ import com.grack.nanojson.JsonObject;
 import com.grack.nanojson.JsonParser;
 import com.grack.nanojson.JsonParserException;
 import org.schabi.newpipe.extractor.MediaFormat;
+import org.schabi.newpipe.extractor.MetaInfo;
 import org.schabi.newpipe.extractor.NewPipe;
 import org.schabi.newpipe.extractor.StreamingService;
 import org.schabi.newpipe.extractor.downloader.Downloader;
@ -309,6 +310,12 @@ public class PeertubeStreamExtractor extends StreamExtractor {
        return Collections.emptyList();
    }

+    @Nonnull
+    @Override
+    public List<MetaInfo> getMetaInfo() {
+        return Collections.emptyList();
+    }
+
    private String getRelatedStreamsUrl(final List<String> tags) throws UnsupportedEncodingException {
        final String url = baseUrl + PeertubeSearchQueryHandlerFactory.SEARCH_ENDPOINT;
        final StringBuilder params = new StringBuilder();
--- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudSearchExtractor.java
+++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudSearchExtractor.java
@ -8,6 +8,7 @@ import com.grack.nanojson.JsonParserException;
 import org.schabi.newpipe.extractor.InfoItem;
 import org.schabi.newpipe.extractor.InfoItemExtractor;
 import org.schabi.newpipe.extractor.InfoItemsCollector;
+import org.schabi.newpipe.extractor.MetaInfo;
 import org.schabi.newpipe.extractor.Page;
 import org.schabi.newpipe.extractor.StreamingService;
 import org.schabi.newpipe.extractor.downloader.Downloader;
@ -22,6 +23,8 @@ import java.io.IOException;
 import java.io.UnsupportedEncodingException;
 import java.net.MalformedURLException;
 import java.net.URL;
+import java.util.Collections;
+import java.util.List;

 import javax.annotation.Nonnull;

@ -47,6 +50,12 @@ public class SoundcloudSearchExtractor extends SearchExtractor {
        return false;
    }

+    @Nonnull 
+    @Override
+    public List<MetaInfo> getMetaInfo() {
+        return Collections.emptyList();
+    }
+
    @Nonnull
    @Override
    public InfoItemsPage<InfoItem> getInitialPage() throws IOException, ExtractionException {
--- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudStreamExtractor.java
+++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudStreamExtractor.java
@ -6,6 +6,7 @@ import com.grack.nanojson.JsonParser;
 import com.grack.nanojson.JsonParserException;

 import org.schabi.newpipe.extractor.MediaFormat;
+import org.schabi.newpipe.extractor.MetaInfo;
 import org.schabi.newpipe.extractor.NewPipe;
 import org.schabi.newpipe.extractor.StreamingService;
 import org.schabi.newpipe.extractor.downloader.Downloader;
@ -327,4 +328,10 @@ public class SoundcloudStreamExtractor extends StreamExtractor {
    public List<StreamSegment> getStreamSegments() {
        return Collections.emptyList();
    }
+
+    @Nonnull
+    @Override
+    public List<MetaInfo> getMetaInfo() {
+        return Collections.emptyList();
+    }
 }
--- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeParsingHelper.java
+++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeParsingHelper.java
@ -76,19 +76,16 @@ public class YoutubeParsingHelper {
    private static final String FEED_BASE_CHANNEL_ID = "https://www.youtube.com/feeds/videos.xml?channel_id=";
    private static final String FEED_BASE_USER = "https://www.youtube.com/feeds/videos.xml?user=";

-    private static final String[] RECAPTCHA_DETECTION_SELECTORS = {
-            "form[action*=\"/das_captcha\"]",
-            "input[name*=\"action_recaptcha_verify\"]"
-    };
-
-    public static Document parseAndCheckPage(final String url, final Response response) throws ReCaptchaException {
-        final Document document = Jsoup.parse(response.responseBody(), url);
-
-        for (String detectionSelector : RECAPTCHA_DETECTION_SELECTORS) {
-            if (!document.select(detectionSelector).isEmpty()) {
-                throw new ReCaptchaException("reCAPTCHA challenge requested (detected with selector: \"" + detectionSelector + "\")", url);
-            }
+    private static boolean isGoogleURL(String url) {
+        url = extractCachedUrlIfNeeded(url);
+        try {
+            final URL u = new URL(url);
+            final String host = u.getHost();
+            return host.startsWith("google.") || host.startsWith("m.google.");
+        } catch (MalformedURLException e) {
+            return false;
        }
+    }

        return document;
    }
@ -650,4 +647,124 @@ public class YoutubeParsingHelper {
            }
        }
    }
+
+    @Nonnull
+    public static List<MetaInfo> getMetaInfo(final JsonArray contents) throws ParsingException {
+        final List<MetaInfo> metaInfo = new ArrayList<>();
+        for (final Object content : contents) {
+            final JsonObject resultObject = (JsonObject) content;
+            if (resultObject.has("itemSectionRenderer")) {
+                for (final Object sectionContentObject :
+                        resultObject.getObject("itemSectionRenderer").getArray("contents")) {
+
+                    final JsonObject sectionContent = (JsonObject) sectionContentObject;
+                    if (sectionContent.has("infoPanelContentRenderer")) {
+                        metaInfo.add(getInfoPanelContent(sectionContent.getObject("infoPanelContentRenderer")));
+                    }
+                    if (sectionContent.has("clarificationRenderer")) {
+                        metaInfo.add(getClarificationRendererContent(sectionContent.getObject("clarificationRenderer")
+                        ));
+                    }
+
+                }
+            }
+        }
+        return metaInfo;
+    }
+
+    @Nonnull
+    private static MetaInfo getInfoPanelContent(final JsonObject infoPanelContentRenderer)
+            throws ParsingException {
+        final MetaInfo metaInfo = new MetaInfo();
+        final StringBuilder sb = new StringBuilder();
+        for (final Object paragraph : infoPanelContentRenderer.getArray("paragraphs")) {
+            if (sb.length() != 0) {
+                sb.append("<br>");
+            }
+            sb.append(YoutubeParsingHelper.getTextFromObject((JsonObject) paragraph));
+        }
+        metaInfo.setContent(new Description(sb.toString(), Description.HTML));
+        if (infoPanelContentRenderer.has("sourceEndpoint")) {
+            final String metaInfoLinkUrl = YoutubeParsingHelper.getUrlFromNavigationEndpoint(
+                    infoPanelContentRenderer.getObject("sourceEndpoint"));
+            try {
+                metaInfo.addUrl(new URL(Objects.requireNonNull(extractCachedUrlIfNeeded(metaInfoLinkUrl))));
+            } catch (final NullPointerException | MalformedURLException e) {
+                throw new ParsingException("Could not get metadata info URL", e);
+            }
+
+            final String metaInfoLinkText = YoutubeParsingHelper.getTextFromObject(
+                    infoPanelContentRenderer.getObject("inlineSource"));
+            if (isNullOrEmpty(metaInfoLinkText)) {
+                throw new ParsingException("Could not get metadata info link text.");
+            }
+            metaInfo.addUrlText(metaInfoLinkText);
+        }
+
+        return metaInfo;
+    }
+
+    @Nonnull
+    private static MetaInfo getClarificationRendererContent(final JsonObject clarificationRenderer)
+            throws ParsingException {
+        final MetaInfo metaInfo = new MetaInfo();
+
+        final String title = YoutubeParsingHelper.getTextFromObject(clarificationRenderer.getObject("contentTitle"));
+        final String text = YoutubeParsingHelper.getTextFromObject(clarificationRenderer.getObject("text"));
+        if (title == null || text ==  null) {
+            throw new ParsingException("Could not extract clarification renderer content");
+        }
+        metaInfo.setTitle(title);
+        metaInfo.setContent(new Description(text, Description.PLAIN_TEXT));
+
+        if (clarificationRenderer.has("actionButton")) {
+            final JsonObject actionButton = clarificationRenderer.getObject("actionButton")
+                    .getObject("buttonRenderer");
+            try {
+                final String url = YoutubeParsingHelper.getUrlFromNavigationEndpoint(actionButton.getObject("command"));
+                metaInfo.addUrl(new URL(Objects.requireNonNull(extractCachedUrlIfNeeded(url))));
+            } catch (final NullPointerException | MalformedURLException e) {
+                throw new ParsingException("Could not get metadata info URL", e);
+            }
+
+            final String metaInfoLinkText = YoutubeParsingHelper.getTextFromObject(
+                    actionButton.getObject("text"));
+            if (isNullOrEmpty(metaInfoLinkText)) {
+                throw new ParsingException("Could not get metadata info link text.");
+            }
+            metaInfo.addUrlText(metaInfoLinkText);
+        }
+
+        if (clarificationRenderer.has("secondaryEndpoint") && clarificationRenderer.has("secondarySource")) {
+            final String url = getUrlFromNavigationEndpoint(clarificationRenderer.getObject("secondaryEndpoint"));
+            // ignore Google URLs, because those point to a Google search about "Covid-19"
+            if (url != null && !isGoogleURL(url)) {
+                try {
+                    metaInfo.addUrl(new URL(url));
+                    final String description = getTextFromObject(clarificationRenderer.getObject("secondarySource"));
+                    metaInfo.addUrlText(description == null ? url : description);
+                } catch (MalformedURLException e) {
+                    throw new ParsingException("Could not get metadata info secondary URL", e);
+                }
+            }
+        }
+
+        return metaInfo;
+    }
+
+    /**
+     * Sometimes, YouTube provides URLs which use Google's cache. They look like
+     * {@code https://webcache.googleusercontent.com/search?q=cache:CACHED_URL}
+     * @param url the URL which might refer to the Google's webcache
+     * @return the URL which is referring to the original site
+     */
+    public static String extractCachedUrlIfNeeded(final String url) {
+        if (url == null) {
+            return null;
+        }
+        if (url.contains("webcache.googleusercontent.com")) {
+            return url.split("cache:")[1];
+        }
+        return url;
+    }
 }
--- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeMusicSearchExtractor.java
+++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeMusicSearchExtractor.java
@ -7,6 +7,7 @@ import com.grack.nanojson.JsonParserException;
 import com.grack.nanojson.JsonWriter;

 import org.schabi.newpipe.extractor.InfoItem;
+import org.schabi.newpipe.extractor.MetaInfo;
 import org.schabi.newpipe.extractor.Page;
 import org.schabi.newpipe.extractor.StreamingService;
 import org.schabi.newpipe.extractor.downloader.Downloader;
@ -163,6 +164,12 @@ public class YoutubeMusicSearchExtractor extends SearchExtractor {
        return !showingResultsForRenderer.isEmpty();
    }

+    @Nonnull
+    @Override
+    public List<MetaInfo> getMetaInfo() {
+        return Collections.emptyList();
+    }
+
    @Nonnull
    @Override
    public InfoItemsPage<InfoItem> getInitialPage() throws ExtractionException, IOException {
--- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java
+++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java
@ -7,6 +7,7 @@ import com.grack.nanojson.JsonParserException;
 import com.grack.nanojson.JsonWriter;

 import org.schabi.newpipe.extractor.InfoItem;
+import org.schabi.newpipe.extractor.MetaInfo;
 import org.schabi.newpipe.extractor.Page;
 import org.schabi.newpipe.extractor.StreamingService;
 import org.schabi.newpipe.extractor.downloader.Downloader;
@ -16,13 +17,11 @@ import org.schabi.newpipe.extractor.linkhandler.SearchQueryHandler;
 import org.schabi.newpipe.extractor.localization.TimeAgoParser;
 import org.schabi.newpipe.extractor.search.InfoItemsSearchCollector;
 import org.schabi.newpipe.extractor.search.SearchExtractor;
+import org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper;
 import org.schabi.newpipe.extractor.utils.JsonUtils;

 import java.io.IOException;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
+import java.util.*;

 import javax.annotation.Nonnull;

@ -106,6 +105,13 @@ public class YoutubeSearchExtractor extends SearchExtractor {
        return !showingResultsForRenderer.isEmpty();
    }

+    @Override
+    public List<MetaInfo> getMetaInfo() throws ParsingException {
+        return YoutubeParsingHelper.getMetaInfo(
+                initialData.getObject("contents").getObject("twoColumnSearchResultsRenderer")
+                        .getObject("primaryContents").getObject("sectionListRenderer").getArray("contents"));
+    }
+
    @Nonnull
    @Override
    public InfoItemsPage<InfoItem> getInitialPage() throws IOException, ExtractionException {
--- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java
+++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java
@ -13,6 +13,7 @@ import org.mozilla.javascript.Context;
 import org.mozilla.javascript.Function;
 import org.mozilla.javascript.ScriptableObject;
 import org.schabi.newpipe.extractor.MediaFormat;
+import org.schabi.newpipe.extractor.MetaInfo;
 import org.schabi.newpipe.extractor.NewPipe;
 import org.schabi.newpipe.extractor.StreamingService;
 import org.schabi.newpipe.extractor.downloader.Downloader;
@ -45,6 +46,9 @@ import org.schabi.newpipe.extractor.utils.Utils;
 import javax.annotation.Nonnull;
 import javax.annotation.Nullable;
 import java.io.IOException;
+import java.io.UnsupportedEncodingException;
+import java.net.MalformedURLException;
+import java.net.URL;
 import java.time.LocalDate;
 import java.time.OffsetDateTime;
 import java.time.format.DateTimeFormatter;
@ -1118,4 +1122,12 @@ public class YoutubeStreamExtractor extends StreamExtractor {
        }
        return segments;
    }
+
+    @Nonnull
+    @Override
+    public List<MetaInfo> getMetaInfo() throws ParsingException {
+        return YoutubeParsingHelper.getMetaInfo(
+                initialData.getObject("contents").getObject("twoColumnWatchNextResults")
+                .getObject("results").getObject("results").getArray("contents"));
+    }
 }
--- a/extractor/src/main/java/org/schabi/newpipe/extractor/stream/StreamExtractor.java
+++ b/extractor/src/main/java/org/schabi/newpipe/extractor/stream/StreamExtractor.java
@ -22,6 +22,7 @@ package org.schabi.newpipe.extractor.stream;

 import org.schabi.newpipe.extractor.Extractor;
 import org.schabi.newpipe.extractor.MediaFormat;
+import org.schabi.newpipe.extractor.MetaInfo;
 import org.schabi.newpipe.extractor.StreamingService;
 import org.schabi.newpipe.extractor.channel.ChannelExtractor;
 import org.schabi.newpipe.extractor.exceptions.ExtractionException;
@ -486,4 +487,18 @@ public abstract class StreamExtractor extends Extractor {
     */
    @Nonnull
    public abstract List<StreamSegment> getStreamSegments() throws ParsingException;
+
+    /**
+     * Meta information about the stream.
+     * <p>
+     * This can be information about the stream creator (e.g. if the creator is a public broadcaster)
+     * or further information on the topic (e.g. hints that the video might contain conspiracy theories
+     * or contains information about a current health situation like the Covid-19 pandemic).
+     * </p>
+ *     The meta information often contains links to external sources like Wikipedia or the WHO.
+     * @return The meta info of the stream or an empty List if not provided.
+     * @throws ParsingException
+     */
+    @Nonnull
+    public abstract List<MetaInfo> getMetaInfo() throws ParsingException;
 }
--- a/extractor/src/main/java/org/schabi/newpipe/extractor/stream/StreamInfo.java
+++ b/extractor/src/main/java/org/schabi/newpipe/extractor/stream/StreamInfo.java
@ -1,9 +1,6 @@
 package org.schabi.newpipe.extractor.stream;

-import org.schabi.newpipe.extractor.Info;
-import org.schabi.newpipe.extractor.InfoItem;
-import org.schabi.newpipe.extractor.NewPipe;
-import org.schabi.newpipe.extractor.StreamingService;
+import org.schabi.newpipe.extractor.*;
 import org.schabi.newpipe.extractor.exceptions.ContentNotAvailableException;
 import org.schabi.newpipe.extractor.exceptions.ContentNotSupportedException;
 import org.schabi.newpipe.extractor.exceptions.ExtractionException;
@ -13,9 +10,12 @@ import org.schabi.newpipe.extractor.utils.ExtractorHelper;

 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.List;
 import java.util.Locale;

+import javax.annotation.Nonnull;
+
 import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;

 /*
@ -329,6 +329,11 @@ public class StreamInfo extends Info {
        } catch (Exception e) {
            streamInfo.addError(e);
        }
+        try {
+            streamInfo.setMetaInfo(extractor.getMetaInfo());
+        } catch (Exception e) {
+            streamInfo.addError(e);
+        }

        streamInfo.setRelatedStreams(ExtractorHelper.getRelatedVideosOrLogError(streamInfo, extractor));

@ -379,6 +384,7 @@ public class StreamInfo extends Info {
    private Locale language = null;
    private List<String> tags = new ArrayList<>();
    private List<StreamSegment> streamSegments = new ArrayList<>();
+    private List<MetaInfo> metaInfo = new ArrayList<>();

    /**
     * Get the stream type
@ -684,4 +690,13 @@ public class StreamInfo extends Info {
    public void setStreamSegments(List<StreamSegment> streamSegments) {
        this.streamSegments = streamSegments;
    }
+
+    public void setMetaInfo(final List<MetaInfo> metaInfo) {
+        this.metaInfo = metaInfo;
+    }
+
+    @Nonnull
+    public List<MetaInfo> getMetaInfo() {
+        return this.metaInfo;
+    }
 }
--- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/DefaultSearchExtractorTest.java
+++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/DefaultSearchExtractorTest.java
@ -1,12 +1,20 @@
 package org.schabi.newpipe.extractor.services;

 import org.junit.Test;
+import org.schabi.newpipe.extractor.MetaInfo;
 import org.schabi.newpipe.extractor.search.SearchExtractor;


 import javax.annotation.Nullable;

+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.Collections;
+import java.util.List;
+import java.util.stream.Collectors;
+
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
 import static org.schabi.newpipe.extractor.ExtractorAsserts.assertEmpty;
 import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;

@ -20,6 +28,10 @@ public abstract class DefaultSearchExtractorTest extends DefaultListExtractorTes
        return false;
    }

+    public List<MetaInfo> expectedMetaInfo() throws MalformedURLException {
+        return Collections.emptyList();
+    }
+
    @Test
    @Override
    public void testSearchString() throws Exception {
@ -41,4 +53,34 @@ public abstract class DefaultSearchExtractorTest extends DefaultListExtractorTes
    public void testSearchCorrected() throws Exception {
        assertEquals(isCorrectedSearch(), extractor().isCorrectedSearch());
    }
+
+    /**
+     * @see DefaultStreamExtractorTest#testMetaInfo()
+     */
+    @Test
+    public void testMetaInfo() throws Exception {
+        final List<MetaInfo> metaInfoList = extractor().getMetaInfo();
+        final List<MetaInfo> expectedMetaInfoList = expectedMetaInfo();
+
+        for (final MetaInfo expectedMetaInfo : expectedMetaInfoList) {
+            final List<String> texts = metaInfoList.stream()
+                    .map(metaInfo -> metaInfo.getContent().getContent())
+                    .collect(Collectors.toList());
+            final List<String> titles = metaInfoList.stream().map(MetaInfo::getTitle).collect(Collectors.toList());
+            final List<URL> urls = metaInfoList.stream().flatMap(info -> info.getUrls().stream())
+                    .collect(Collectors.toList());
+            final List<String> urlTexts = metaInfoList.stream().flatMap(info -> info.getUrlTexts().stream())
+                    .collect(Collectors.toList());
+
+            assertTrue(texts.contains(expectedMetaInfo.getContent().getContent()));
+            assertTrue(titles.contains(expectedMetaInfo.getTitle()));
+
+            for (final String expectedUrlText : expectedMetaInfo.getUrlTexts()) {
+                assertTrue(urlTexts.contains(expectedUrlText));
+            }
+            for (final URL expectedUrl : expectedMetaInfo.getUrls()) {
+                assertTrue(urls.contains(expectedUrl));
+            }
+        }
+    }
 }
--- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/DefaultStreamExtractorTest.java
+++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/DefaultStreamExtractorTest.java
@ -2,6 +2,7 @@ package org.schabi.newpipe.extractor.services;

 import org.junit.Test;
 import org.schabi.newpipe.extractor.MediaFormat;
+import org.schabi.newpipe.extractor.MetaInfo;
 import org.schabi.newpipe.extractor.localization.DateWrapper;
 import org.schabi.newpipe.extractor.stream.AudioStream;
 import org.schabi.newpipe.extractor.stream.Description;
@ -15,9 +16,12 @@ import org.schabi.newpipe.extractor.stream.VideoStream;
 import javax.annotation.Nullable;
 import java.time.LocalDateTime;
 import java.time.format.DateTimeFormatter;
+import java.net.MalformedURLException;
+import java.net.URL;
 import java.util.Collections;
 import java.util.List;
 import java.util.Locale;
+import java.util.stream.Collectors;

 import static org.hamcrest.CoreMatchers.containsString;
 import static org.hamcrest.MatcherAssert.assertThat;
@ -67,6 +71,7 @@ public abstract class DefaultStreamExtractorTest extends DefaultExtractorTest<St
    public List<String> expectedTags() { return Collections.emptyList(); } // default: no tags
    public String expectedSupportInfo() { return ""; } // default: no support info available
    public int expectedStreamSegmentsCount() { return -1; } // return 0 or greater to test (default is -1 to ignore)
+    public List<MetaInfo> expectedMetaInfo() throws MalformedURLException { return Collections.emptyList(); } // default: no metadata info available

    @Test
    @Override
@ -387,4 +392,35 @@ public abstract class DefaultStreamExtractorTest extends DefaultExtractorTest<St
            assertEquals(expectedStreamSegmentsCount(), extractor().getStreamSegments().size());
        }
    }
+
+    /**
+     * @see DefaultSearchExtractorTest#testMetaInfo()
+     */
+    @Test
+    public void testMetaInfo() throws Exception {
+        final List<MetaInfo> metaInfoList = extractor().getMetaInfo();
+        final List<MetaInfo> expectedMetaInfoList = expectedMetaInfo();
+
+        for (final MetaInfo expectedMetaInfo : expectedMetaInfoList) {
+            final List<String> texts = metaInfoList.stream()
+                    .map((metaInfo) -> metaInfo.getContent().getContent())
+                    .collect(Collectors.toList());
+            final List<String> titles = metaInfoList.stream().map(MetaInfo::getTitle).collect(Collectors.toList());
+            final List<URL> urls = metaInfoList.stream().flatMap(info -> info.getUrls().stream())
+                    .collect(Collectors.toList());
+            final List<String> urlTexts = metaInfoList.stream().flatMap(info -> info.getUrlTexts().stream())
+                    .collect(Collectors.toList());
+
+            assertTrue(texts.contains(expectedMetaInfo.getContent().getContent()));
+            assertTrue(titles.contains(expectedMetaInfo.getTitle()));
+
+            for (final String expectedUrlText : expectedMetaInfo.getUrlTexts()) {
+                assertTrue(urlTexts.contains(expectedUrlText));
+            }
+            for (final URL expectedUrl : expectedMetaInfo.getUrls()) {
+                assertTrue(urls.contains(expectedUrl));
+            }
+        }
+
+    }
 }
--- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeParsingHelperTest.java
+++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeParsingHelperTest.java
@ -36,4 +36,12 @@ public class YoutubeParsingHelperTest {
        assertEquals(4445767, YoutubeParsingHelper.parseDurationString("1,234:56:07"));
        assertEquals(754, YoutubeParsingHelper.parseDurationString("12:34 "));
    }
+
+    @Test
+    public void testConvertFromGoogleCacheUrl() throws ParsingException {
+        assertEquals("https://mohfw.gov.in/",
+                YoutubeParsingHelper.extractCachedUrlIfNeeded("https://webcache.googleusercontent.com/search?q=cache:https://mohfw.gov.in/"));
+        assertEquals("https://www.infektionsschutz.de/coronavirus-sars-cov-2.html",
+                YoutubeParsingHelper.extractCachedUrlIfNeeded("https://www.infektionsschutz.de/coronavirus-sars-cov-2.html"));
+    }
 }
--- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/search/YoutubeSearchExtractorTest.java
+++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/search/YoutubeSearchExtractorTest.java
@ -3,15 +3,21 @@ package org.schabi.newpipe.extractor.services.youtube.search;
 import org.junit.BeforeClass;
 import org.junit.Test;
 import org.schabi.newpipe.DownloaderTestImpl;
-import org.schabi.newpipe.extractor.InfoItem;
-import org.schabi.newpipe.extractor.ListExtractor;
-import org.schabi.newpipe.extractor.NewPipe;
-import org.schabi.newpipe.extractor.StreamingService;
+import org.schabi.newpipe.extractor.*;
 import org.schabi.newpipe.extractor.search.SearchExtractor;
 import org.schabi.newpipe.extractor.services.DefaultSearchExtractorTest;
+import org.schabi.newpipe.extractor.services.youtube.YoutubeService;
+import org.schabi.newpipe.extractor.stream.Description;

 import javax.annotation.Nullable;

+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+
 import static java.util.Collections.singletonList;
 import static junit.framework.TestCase.assertFalse;
 import static org.junit.Assert.assertEquals;
@ -211,4 +217,39 @@ public class YoutubeSearchExtractorTest {
            assertNoDuplicatedItems(YouTube, page1, page2);
        }
    }
+
+    public static class MetaInfoTest extends DefaultSearchExtractorTest {
+        private static SearchExtractor extractor;
+        private static final String QUERY = "Covid";
+
+        @Test
+        public void clarificationTest() throws Exception {
+            NewPipe.init(DownloaderTestImpl.getInstance());
+            extractor = YouTube.getSearchExtractor(QUERY, singletonList(VIDEOS), "");
+            extractor.fetchPage();
+        }
+
+        @Override public String expectedSearchString() { return QUERY; }
+        @Override public String expectedSearchSuggestion() { return null; }
+        @Override public List<MetaInfo> expectedMetaInfo() throws MalformedURLException {
+            final List<URL> urls = new ArrayList<>();
+            urls.add(new URL("https://www.who.int/emergencies/diseases/novel-coronavirus-2019"));
+            urls.add(new URL("https://www.who.int/emergencies/diseases/novel-coronavirus-2019/covid-19-vaccines"));
+            final List<String> urlTexts = new ArrayList<>();
+            urlTexts.add("LEARN MORE");
+            urlTexts.add("Learn about vaccine progress from the WHO");
+            return Collections.singletonList(new MetaInfo(
+                    "COVID-19",
+                    new Description("Get the latest information from the WHO about coronavirus.", Description.PLAIN_TEXT),
+                    urls,
+                    urlTexts
+            ));
+        }
+        @Override public SearchExtractor extractor() { return extractor; }
+        @Override public StreamingService expectedService() { return YouTube; }
+        @Override public String expectedName() { return QUERY; }
+        @Override public String expectedId() { return QUERY; }
+        @Override public String expectedUrlContains() { return "youtube.com/results?search_query=" + QUERY; }
+        @Override public String expectedOriginalUrlContains() throws Exception { return "youtube.com/results?search_query=" + QUERY; }
+    }
 }
--- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/stream/YoutubeStreamExtractorDefaultTest.java
+++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/stream/YoutubeStreamExtractorDefaultTest.java
@ -3,16 +3,22 @@ package org.schabi.newpipe.extractor.services.youtube.stream;
 import org.junit.BeforeClass;
 import org.junit.Test;
 import org.schabi.newpipe.DownloaderTestImpl;
+import org.schabi.newpipe.extractor.MetaInfo;
 import org.schabi.newpipe.extractor.NewPipe;
 import org.schabi.newpipe.extractor.StreamingService;
 import org.schabi.newpipe.extractor.exceptions.ContentNotAvailableException;
 import org.schabi.newpipe.extractor.exceptions.ParsingException;
 import org.schabi.newpipe.extractor.services.DefaultStreamExtractorTest;
+import org.schabi.newpipe.extractor.stream.Description;
 import org.schabi.newpipe.extractor.stream.StreamExtractor;
 import org.schabi.newpipe.extractor.stream.StreamSegment;
 import org.schabi.newpipe.extractor.stream.StreamType;

+import javax.annotation.Nullable;
+import java.net.MalformedURLException;
+import java.net.URL;
 import java.util.Arrays;
+import java.util.Collections;
 import java.util.List;

 import javax.annotation.Nullable;
@ -258,4 +264,46 @@ public class YoutubeStreamExtractorDefaultTest {
            assertNotNull(segment.getPreviewUrl());
        }
    }
+
+    public static class PublicBroadcasterTest extends DefaultStreamExtractorTest {
+        private static final String ID = "q6fgbYWsMgw";
+        private static final int TIMESTAMP = 0;
+        private static final String URL = BASE_URL + ID;
+        private static StreamExtractor extractor;
+
+        @BeforeClass
+        public static void setUp() throws Exception {
+            NewPipe.init(DownloaderTestImpl.getInstance());
+            extractor = YouTube.getStreamExtractor(URL);
+            extractor.fetchPage();
+        }
+
+        @Override public StreamExtractor extractor() { return extractor; }
+        @Override public StreamingService expectedService() { return YouTube; }
+        @Override public String expectedName() { return "Was verbirgt sich am tiefsten Punkt des Ozeans?"; }
+        @Override public String expectedId() { return ID; }
+        @Override public String expectedUrlContains() { return BASE_URL + ID; }
+        @Override public String expectedOriginalUrlContains() { return URL; }
+
+        @Override public StreamType expectedStreamType() { return StreamType.VIDEO_STREAM; }
+        @Override public String expectedUploaderName() { return "Dinge Erklärt – Kurzgesagt"; }
+        @Override public String expectedUploaderUrl() { return "https://www.youtube.com/channel/UCwRH985XgMYXQ6NxXDo8npw"; }
+        @Override public List<String> expectedDescriptionContains() { return Arrays.asList("Lasst uns abtauchen!", "Angebot von funk", "Dinge"); }
+        @Override public long expectedLength() { return 631; }
+        @Override public long expectedTimestamp() { return TIMESTAMP; }
+        @Override public long expectedViewCountAtLeast() { return 1_600_000; }
+        @Nullable @Override public String expectedUploadDate() { return "2019-06-12 00:00:00.000"; }
+        @Nullable @Override public String expectedTextualUploadDate() { return "2019-06-12"; }
+        @Override public long expectedLikeCountAtLeast() { return 70000; }
+        @Override public long expectedDislikeCountAtLeast() { return 500; }
+        @Override public List<MetaInfo> expectedMetaInfo() throws MalformedURLException {
+            return Collections.singletonList(new MetaInfo(
+                    "",
+                    new Description("Funk is a German public broadcast service.", Description.PLAIN_TEXT),
+                    Collections.singletonList(new URL("https://de.wikipedia.org/wiki/Funk_(Medienangebot)?wprov=yicw1")),
+                    Collections.singletonList("Wikipedia (German)")
+            ));
+        }
+    }
+
 }