fix html comments

This commit is contained in:
yausername 2019-11-16 03:20:35 +05:30
parent 193442d01c
commit bb5ad49fac

View File

@ -5,6 +5,8 @@ import java.text.ParseException;
import java.text.SimpleDateFormat; import java.text.SimpleDateFormat;
import java.util.Locale; import java.util.Locale;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.schabi.newpipe.extractor.ServiceList; import org.schabi.newpipe.extractor.ServiceList;
import org.schabi.newpipe.extractor.comments.CommentsInfoItemExtractor; import org.schabi.newpipe.extractor.comments.CommentsInfoItemExtractor;
import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.exceptions.ParsingException;
@ -59,8 +61,13 @@ public class PeertubeCommentsInfoItemExtractor implements CommentsInfoItemExtrac
@Override @Override
public String getCommentText() throws ParsingException { public String getCommentText() throws ParsingException {
String htmlText = JsonUtils.getString(item, "text"); String htmlText = JsonUtils.getString(item, "text");
try {
Document doc = Jsoup.parse(htmlText);
return doc.body().text();
}catch(Exception e) {
return htmlText.replaceAll("(?s)<[^>]*>(\\s*<[^>]*>)*", ""); return htmlText.replaceAll("(?s)<[^>]*>(\\s*<[^>]*>)*", "");
} }
}
@Override @Override
public String getCommentId() throws ParsingException { public String getCommentId() throws ParsingException {