fixed youtube comment utf8 bom

This commit is contained in:
Ritvik Saraf 2019-03-01 00:10:29 +05:30
parent e7e411dc29
commit 72262707bf
2 changed files with 16 additions and 3 deletions

View File

@ -3,6 +3,7 @@ package org.schabi.newpipe.extractor.services.youtube.extractors;
import org.schabi.newpipe.extractor.comments.CommentsInfoItemExtractor;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.utils.JsonUtils;
import org.schabi.newpipe.extractor.utils.Utils;
import com.grack.nanojson.JsonArray;
import com.grack.nanojson.JsonObject;
@ -62,7 +63,9 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
@Override
public String getCommentText() throws ParsingException {
try {
return YoutubeCommentsExtractor.getYoutubeText(JsonUtils.getObject(json, "contentText"));
String commentText = YoutubeCommentsExtractor.getYoutubeText(JsonUtils.getObject(json, "contentText"));
// youtube adds U+FEFF in some comments. eg. https://www.youtube.com/watch?v=Nj4F63E59io<feff>
return Utils.removeUTF8BOM(commentText);
} catch (Exception e) {
throw new ParsingException("Could not get comment text", e);
}

View File

@ -1,13 +1,13 @@
package org.schabi.newpipe.extractor.utils;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLDecoder;
import java.util.List;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
public class Utils {
private Utils() {
@ -120,4 +120,14 @@ public class Utils {
throw e;
}
}
public static String removeUTF8BOM(String s) {
if (s.startsWith("\uFEFF")) {
s = s.substring(1);
}
if (s.endsWith("\uFEFF")) {
s = s.substring(0, s.length()-1);
}
return s;
}
}