mirror of
https://github.com/TeamNewPipe/NewPipeExtractor.git
synced 2025-01-09 19:10:33 +05:30
[YouTube] Optimize extracting auto-translated captions
Faster and ordered: captions provided by the user are at the beginning of the list, auto-translated captions are at the end
This commit is contained in:
parent
ff030ad297
commit
30a4b3617d
@ -670,18 +670,35 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
||||
assertPageFetched();
|
||||
|
||||
// We cannot store the subtitles list because the media format may change
|
||||
final List<SubtitlesStream> subtitlesToReturn = new ArrayList<>();
|
||||
final List<SubtitlesStream> subtitles = new ArrayList<>();
|
||||
final List<SubtitlesStream> autoTranslatedSubtitles = new ArrayList<>();
|
||||
final JsonObject renderer = playerResponse.getObject("captions")
|
||||
.getObject("playerCaptionsTracklistRenderer");
|
||||
final JsonArray captionsArray = renderer.getArray("captionTracks");
|
||||
|
||||
// Generate list of languages available for auto-translations
|
||||
final List<String> translationLanguages;
|
||||
if (renderer.has("translationLanguages")) {
|
||||
translationLanguages = renderer.getArray("translationLanguages")
|
||||
.stream()
|
||||
.map(JsonObject.class::cast)
|
||||
.map(lang -> lang.getString("languageCode"))
|
||||
.collect(Collectors.toList());
|
||||
} else {
|
||||
translationLanguages = Collections.emptyList();
|
||||
}
|
||||
|
||||
// Add subtitles
|
||||
for (int i = 0; i < captionsArray.size(); i++) {
|
||||
final JsonObject caption = captionsArray.getObject(i);
|
||||
final String languageCode = caption.getString("languageCode");
|
||||
final String baseUrl = caption.getString("baseUrl");
|
||||
final String vssId = caption.getString("vssId");
|
||||
|
||||
if (languageCode != null && baseUrl != null && vssId != null) {
|
||||
if (languageCode == null || baseUrl == null || vssId == null) {
|
||||
continue;
|
||||
}
|
||||
|
||||
final boolean isAutoGenerated = vssId.startsWith("a.");
|
||||
final String cleanUrl = baseUrl
|
||||
// Remove preexisting format if exists
|
||||
@ -689,33 +706,36 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
||||
// Remove translation language
|
||||
.replaceAll("&tlang=[^&]*", "");
|
||||
|
||||
subtitlesToReturn.add(new SubtitlesStream.Builder()
|
||||
// add base subtitles
|
||||
subtitles.add(new SubtitlesStream.Builder()
|
||||
.setContent(cleanUrl + "&fmt=" + format.getSuffix(), true)
|
||||
.setMediaFormat(format)
|
||||
.setLanguageCode(languageCode)
|
||||
.setAutoGenerated(isAutoGenerated)
|
||||
.setAutoTranslated(false)
|
||||
.build());
|
||||
if (i == 0 && caption.getBoolean("isTranslatable")
|
||||
&& renderer.has("translationLanguages")) {
|
||||
final JsonArray languages = renderer.getArray("translationLanguages");
|
||||
for (int j = 0; j < languages.size(); j++) {
|
||||
final JsonObject lang = languages.getObject(j);
|
||||
final String tLanguageCode = lang.getString("languageCode");
|
||||
subtitlesToReturn.add(new SubtitlesStream.Builder()
|
||||
|
||||
// add auto-translations of this subtitle if available
|
||||
if (caption.getBoolean("isTranslatable")) {
|
||||
for (final String tLanguageCode : translationLanguages) {
|
||||
autoTranslatedSubtitles.add(new SubtitlesStream.Builder()
|
||||
.setContent(cleanUrl + "&fmt=" + format.getSuffix()
|
||||
+ "&tlang=" + tLanguageCode, true)
|
||||
.setMediaFormat(format)
|
||||
.setLanguageCode(tLanguageCode)
|
||||
.setAutoGenerated(isAutoGenerated)
|
||||
.setAutoGenerated(true)
|
||||
.setAutoTranslated(true)
|
||||
.setBaseLanguageCode(languageCode)
|
||||
.build());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return subtitlesToReturn;
|
||||
// add auto-translations at the end for better sorting
|
||||
subtitles.addAll(autoTranslatedSubtitles);
|
||||
|
||||
return subtitles;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -12,6 +12,8 @@ import javax.annotation.Nullable;
|
||||
|
||||
public final class SubtitlesStream extends Stream {
|
||||
private final MediaFormat format;
|
||||
@Nullable
|
||||
private final Locale baseLocale;
|
||||
private final Locale locale;
|
||||
private final boolean autoGenerated;
|
||||
private final boolean autoTranslated;
|
||||
@ -31,6 +33,8 @@ public final class SubtitlesStream extends Stream {
|
||||
@Nullable
|
||||
private String manifestUrl;
|
||||
private String languageCode;
|
||||
@Nullable
|
||||
private String baseLanguageCode;
|
||||
// Use of the Boolean class instead of the primitive type needed for setter call check
|
||||
private Boolean autoGenerated;
|
||||
private Boolean autoTranslated;
|
||||
@ -142,6 +146,18 @@ public final class SubtitlesStream extends Stream {
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the language code of the base language used to auto-translate
|
||||
* the {@link SubtitlesStream} to the current language code.
|
||||
*
|
||||
* @param baseLanguageCode the language code of the {@link SubtitlesStream}
|
||||
* @return this {@link Builder} instance
|
||||
*/
|
||||
public Builder setBaseLanguageCode(@Nullable final String baseLanguageCode) {
|
||||
this.baseLanguageCode = baseLanguageCode;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set whether the subtitles have been auto-generated by the streaming service.
|
||||
*
|
||||
@ -222,7 +238,7 @@ public final class SubtitlesStream extends Stream {
|
||||
}
|
||||
|
||||
return new SubtitlesStream(id, content, isUrl, mediaFormat, deliveryMethod,
|
||||
languageCode, autoGenerated, autoTranslated, manifestUrl);
|
||||
languageCode, autoGenerated, autoTranslated, baseLanguageCode, manifestUrl);
|
||||
}
|
||||
}
|
||||
|
||||
@ -240,6 +256,9 @@ public final class SubtitlesStream extends Stream {
|
||||
* @param languageCode the language code of the stream
|
||||
* @param autoGenerated whether the subtitles are auto-generated by the streaming service
|
||||
* @param autoTranslated whether the subtitles are auto-translated by the streaming service
|
||||
* @param baseLanguageCode the language code of the base language used to translate
|
||||
* the subtitles to the current language
|
||||
* or null if the subtitles are not auto-translated
|
||||
* @param manifestUrl the URL of the manifest this stream comes from (if applicable,
|
||||
* otherwise null)
|
||||
*/
|
||||
@ -252,6 +271,7 @@ public final class SubtitlesStream extends Stream {
|
||||
@Nonnull final String languageCode,
|
||||
final boolean autoGenerated,
|
||||
final boolean autoTranslated,
|
||||
@Nullable final String baseLanguageCode,
|
||||
@Nullable final String manifestUrl) throws ParsingException {
|
||||
super(id, content, isUrl, mediaFormat, deliveryMethod, manifestUrl);
|
||||
this.locale = LocaleCompat.forLanguageTag(languageCode).orElseThrow(
|
||||
@ -261,6 +281,13 @@ public final class SubtitlesStream extends Stream {
|
||||
this.format = mediaFormat;
|
||||
this.autoGenerated = autoGenerated;
|
||||
this.autoTranslated = autoTranslated;
|
||||
if (baseLanguageCode == null) {
|
||||
this.baseLocale = null;
|
||||
} else {
|
||||
this.baseLocale = LocaleCompat.forLanguageTag(baseLanguageCode).orElseThrow(
|
||||
() -> new ParsingException(
|
||||
"not a valid locale language code: " + baseLanguageCode));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@ -337,6 +364,37 @@ public final class SubtitlesStream extends Stream {
|
||||
return locale;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the {@link Locale baseLocale} which was used to automatically translated the subtitles
|
||||
* into the current {@link #locale}.
|
||||
*
|
||||
* @return the {@link Locale baseLocale} for the subtitle translation
|
||||
* or {@code null} if the subtitle is not auto-translated
|
||||
*/
|
||||
@Nullable
|
||||
public Locale getBaseLocale() {
|
||||
return baseLocale;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the display base language name of the subtitles.
|
||||
*
|
||||
* @return the display language name of the subtitles
|
||||
*/
|
||||
public String getDisplayBaseLanguageName() {
|
||||
return locale.getDisplayName(locale);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the language tag of the subtitles.
|
||||
*
|
||||
* @return the language tag of the subtitles
|
||||
*/
|
||||
public String getBaseLanguageTag() {
|
||||
return code;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* No subtitles which are currently extracted use an {@link ItagItem}, so {@code null} is
|
||||
* returned by this method.
|
||||
@ -348,4 +406,16 @@ public final class SubtitlesStream extends Stream {
|
||||
public ItagItem getItagItem() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "SubtitlesStream{"
|
||||
+ "format=" + format
|
||||
+ ", baseLocale=" + baseLocale
|
||||
+ ", locale=" + locale
|
||||
+ ", autoGenerated=" + autoGenerated
|
||||
+ ", autoTranslated=" + autoTranslated
|
||||
+ ", code='" + code + '\''
|
||||
+ '}';
|
||||
}
|
||||
}
|
||||
|
@ -1,5 +1,6 @@
|
||||
package org.schabi.newpipe.extractor.utils;
|
||||
|
||||
import javax.annotation.Nonnull;
|
||||
import java.util.Locale;
|
||||
import java.util.Optional;
|
||||
|
||||
@ -16,7 +17,7 @@ public final class LocaleCompat {
|
||||
|
||||
// Source: The AndroidX LocaleListCompat class's private forLanguageTagCompat() method.
|
||||
// Use Locale.forLanguageTag() on Android API level >= 21 / Java instead.
|
||||
public static Optional<Locale> forLanguageTag(final String str) {
|
||||
public static Optional<Locale> forLanguageTag(@Nonnull final String str) {
|
||||
if (str.contains("-")) {
|
||||
final String[] args = str.split("-", -1);
|
||||
if (args.length > 2) {
|
||||
|
Loading…
Reference in New Issue
Block a user