[YouTube] Add support for extracting auto-translated captions

Closes TeamNewPipe/NewPipeExtractor#977
Based on and adresses TeamNewPipe/NewPipe#8023
This commit is contained in:
TobiGr 2022-12-05 18:03:34 +01:00
parent fafd471606
commit ff030ad297
3 changed files with 64 additions and 8 deletions

View File

@ -476,6 +476,7 @@ public class PeertubeStreamExtractor extends StreamExtractor {
.setMediaFormat(fmt) .setMediaFormat(fmt)
.setLanguageCode(languageCode) .setLanguageCode(languageCode)
.setAutoGenerated(false) .setAutoGenerated(false)
.setAutoTranslated(false)
.build()); .build());
} }
} }

View File

@ -665,7 +665,8 @@ public class YoutubeStreamExtractor extends StreamExtractor {
@Override @Override
@Nonnull @Nonnull
public List<SubtitlesStream> getSubtitles(final MediaFormat format) throws ParsingException { public List<SubtitlesStream> getSubtitles(@Nonnull final MediaFormat format)
throws ParsingException {
assertPageFetched(); assertPageFetched();
// We cannot store the subtitles list because the media format may change // We cannot store the subtitles list because the media format may change
@ -673,13 +674,12 @@ public class YoutubeStreamExtractor extends StreamExtractor {
final JsonObject renderer = playerResponse.getObject("captions") final JsonObject renderer = playerResponse.getObject("captions")
.getObject("playerCaptionsTracklistRenderer"); .getObject("playerCaptionsTracklistRenderer");
final JsonArray captionsArray = renderer.getArray("captionTracks"); final JsonArray captionsArray = renderer.getArray("captionTracks");
// TODO: use this to apply auto translation to different language from a source language
// final JsonArray autoCaptionsArray = renderer.getArray("translationLanguages");
for (int i = 0; i < captionsArray.size(); i++) { for (int i = 0; i < captionsArray.size(); i++) {
final String languageCode = captionsArray.getObject(i).getString("languageCode"); final JsonObject caption = captionsArray.getObject(i);
final String baseUrl = captionsArray.getObject(i).getString("baseUrl"); final String languageCode = caption.getString("languageCode");
final String vssId = captionsArray.getObject(i).getString("vssId"); final String baseUrl = caption.getString("baseUrl");
final String vssId = caption.getString("vssId");
if (languageCode != null && baseUrl != null && vssId != null) { if (languageCode != null && baseUrl != null && vssId != null) {
final boolean isAutoGenerated = vssId.startsWith("a."); final boolean isAutoGenerated = vssId.startsWith("a.");
@ -694,7 +694,24 @@ public class YoutubeStreamExtractor extends StreamExtractor {
.setMediaFormat(format) .setMediaFormat(format)
.setLanguageCode(languageCode) .setLanguageCode(languageCode)
.setAutoGenerated(isAutoGenerated) .setAutoGenerated(isAutoGenerated)
.setAutoTranslated(false)
.build()); .build());
if (i == 0 && caption.getBoolean("isTranslatable")
&& renderer.has("translationLanguages")) {
final JsonArray languages = renderer.getArray("translationLanguages");
for (int j = 0; j < languages.size(); j++) {
final JsonObject lang = languages.getObject(j);
final String tLanguageCode = lang.getString("languageCode");
subtitlesToReturn.add(new SubtitlesStream.Builder()
.setContent(cleanUrl + "&fmt=" + format.getSuffix()
+ "&tlang=" + tLanguageCode, true)
.setMediaFormat(format)
.setLanguageCode(tLanguageCode)
.setAutoGenerated(isAutoGenerated)
.setAutoTranslated(true)
.build());
}
}
} }
} }

View File

@ -14,6 +14,7 @@ public final class SubtitlesStream extends Stream {
private final MediaFormat format; private final MediaFormat format;
private final Locale locale; private final Locale locale;
private final boolean autoGenerated; private final boolean autoGenerated;
private final boolean autoTranslated;
private final String code; private final String code;
/** /**
@ -32,6 +33,7 @@ public final class SubtitlesStream extends Stream {
private String languageCode; private String languageCode;
// Use of the Boolean class instead of the primitive type needed for setter call check // Use of the Boolean class instead of the primitive type needed for setter call check
private Boolean autoGenerated; private Boolean autoGenerated;
private Boolean autoTranslated;
/** /**
* Create a new {@link Builder} instance with default values. * Create a new {@link Builder} instance with default values.
@ -152,6 +154,18 @@ public final class SubtitlesStream extends Stream {
return this; return this;
} }
/**
* Set whether the subtitles have been automatically translated
* (i.e. by a machine like Google Translator) by the streaming service.
* @param autoTranslated whether the subtitles have been automatically translated by the
* streaming service
* @return this {@link Builder} instance
*/
public Builder setAutoTranslated(final boolean autoTranslated) {
this.autoTranslated = autoTranslated;
return this;
}
/** /**
* Build a {@link SubtitlesStream} using the builder's current values. * Build a {@link SubtitlesStream} using the builder's current values.
* *
@ -196,13 +210,19 @@ public final class SubtitlesStream extends Stream {
+ "with setIsAutoGenerated."); + "with setIsAutoGenerated.");
} }
if (autoTranslated == null) {
throw new IllegalStateException("The subtitles stream has been not set as an "
+ "automatically translated subtitles stream or not. "
+ "Please specify this information with setIsAutoTranslated.");
}
if (id == null) { if (id == null) {
id = languageCode + (mediaFormat != null ? "." + mediaFormat.suffix id = languageCode + (mediaFormat != null ? "." + mediaFormat.suffix
: ""); : "");
} }
return new SubtitlesStream(id, content, isUrl, mediaFormat, deliveryMethod, return new SubtitlesStream(id, content, isUrl, mediaFormat, deliveryMethod,
languageCode, autoGenerated, manifestUrl); languageCode, autoGenerated, autoTranslated, manifestUrl);
} }
} }
@ -219,6 +239,7 @@ public final class SubtitlesStream extends Stream {
* @param deliveryMethod the {@link DeliveryMethod} of the stream * @param deliveryMethod the {@link DeliveryMethod} of the stream
* @param languageCode the language code of the stream * @param languageCode the language code of the stream
* @param autoGenerated whether the subtitles are auto-generated by the streaming service * @param autoGenerated whether the subtitles are auto-generated by the streaming service
* @param autoTranslated whether the subtitles are auto-translated by the streaming service
* @param manifestUrl the URL of the manifest this stream comes from (if applicable, * @param manifestUrl the URL of the manifest this stream comes from (if applicable,
* otherwise null) * otherwise null)
*/ */
@ -230,6 +251,7 @@ public final class SubtitlesStream extends Stream {
@Nonnull final DeliveryMethod deliveryMethod, @Nonnull final DeliveryMethod deliveryMethod,
@Nonnull final String languageCode, @Nonnull final String languageCode,
final boolean autoGenerated, final boolean autoGenerated,
final boolean autoTranslated,
@Nullable final String manifestUrl) throws ParsingException { @Nullable final String manifestUrl) throws ParsingException {
super(id, content, isUrl, mediaFormat, deliveryMethod, manifestUrl); super(id, content, isUrl, mediaFormat, deliveryMethod, manifestUrl);
this.locale = LocaleCompat.forLanguageTag(languageCode).orElseThrow( this.locale = LocaleCompat.forLanguageTag(languageCode).orElseThrow(
@ -238,6 +260,7 @@ public final class SubtitlesStream extends Stream {
this.code = languageCode; this.code = languageCode;
this.format = mediaFormat; this.format = mediaFormat;
this.autoGenerated = autoGenerated; this.autoGenerated = autoGenerated;
this.autoTranslated = autoTranslated;
} }
/** /**
@ -250,7 +273,7 @@ public final class SubtitlesStream extends Stream {
} }
/** /**
* Return whether if the subtitles are auto-generated. * Return whether the subtitles are auto-generated.
* <p> * <p>
* Some streaming services can generate subtitles for their contents, like YouTube. * Some streaming services can generate subtitles for their contents, like YouTube.
* </p> * </p>
@ -261,6 +284,21 @@ public final class SubtitlesStream extends Stream {
return autoGenerated; return autoGenerated;
} }
/**
* Whether the subtitles are translated automatically by a machine.
*
* <p>
* Some streaming services provide automatically translated subtitles.
* YouTube, for example, uses Google translator to generate translated subtitles.
* Automatically translated subtitles might not coincide completely with the original text.
* </p>
*
* @return {code true} if the subtitles are auto-translated, {@link false} otherwise
*/
public boolean isAutoTranslated() {
return autoTranslated;
}
/** /**
* {@inheritDoc} * {@inheritDoc}
*/ */