diff --git a/extractor/build.gradle b/extractor/build.gradle
index df5220dde..df47df64a 100644
--- a/extractor/build.gradle
+++ b/extractor/build.gradle
@@ -22,6 +22,10 @@ checkstyleTest {
enabled false // do not checkstyle test files
}
+ext {
+ rhinoVersion = '1.8.0'
+}
+
dependencies {
implementation project(':timeago-parser')
@@ -29,7 +33,8 @@ dependencies {
implementation 'org.jsoup:jsoup:1.18.3'
implementation "com.google.code.findbugs:jsr305:$jsr305Version"
- implementation 'org.mozilla:rhino:1.7.15'
+ implementation "org.mozilla:rhino:$rhinoVersion"
+ implementation "org.mozilla:rhino-engine:$rhinoVersion"
checkstyle "com.puppycrawl.tools:checkstyle:$checkstyleVersion"
diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/JavaScript.java b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/JavaScript.java
index ab30ed806..4522b51a4 100644
--- a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/JavaScript.java
+++ b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/JavaScript.java
@@ -10,31 +10,25 @@ public final class JavaScript {
}
public static void compileOrThrow(final String function) {
- try {
- final Context context = Context.enter();
- context.setOptimizationLevel(-1);
+ try (Context context = Context.enter()) {
+ context.setInterpretedMode(true);
// If it doesn't compile it throws an exception here
context.compileString(function, null, 1, null);
- } finally {
- Context.exit();
}
}
public static String run(final String function,
final String functionName,
final String... parameters) {
- try {
- final Context context = Context.enter();
- context.setOptimizationLevel(-1);
+ try (Context context = Context.enter()) {
+ context.setInterpretedMode(true);
final ScriptableObject scope = context.initSafeStandardObjects();
context.evaluateString(scope, function, functionName, 1, null);
final Function jsFunction = (Function) scope.get(functionName, scope);
final Object result = jsFunction.call(context, scope, scope, parameters);
return result.toString();
- } finally {
- Context.exit();
}
}
diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/jsextractor/TokenStream.java b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/jsextractor/EcmaScriptTokenStream.java
similarity index 81%
rename from extractor/src/main/java/org/schabi/newpipe/extractor/utils/jsextractor/TokenStream.java
rename to extractor/src/main/java/org/schabi/newpipe/extractor/utils/jsextractor/EcmaScriptTokenStream.java
index 81651d227..0cd5169dc 100644
--- a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/jsextractor/TokenStream.java
+++ b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/jsextractor/EcmaScriptTokenStream.java
@@ -1,18 +1,28 @@
-package org.schabi.newpipe.extractor.utils.jsextractor;
-
-import org.mozilla.javascript.Context;
-import org.mozilla.javascript.Kit;
-import org.mozilla.javascript.ObjToIntMap;
-import org.mozilla.javascript.ScriptRuntime;
-import org.schabi.newpipe.extractor.exceptions.ParsingException;
-
-/* Source: Mozilla Rhino, org.mozilla.javascript.Token
+/*
+ * Source: Mozilla Rhino, org.mozilla.javascript.TokenStream
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
- * */
-class TokenStream {
+ *
+ */
+package org.schabi.newpipe.extractor.utils.jsextractor;
+
+import org.mozilla.javascript.Kit;
+import org.mozilla.javascript.ScriptRuntime;
+import org.schabi.newpipe.extractor.exceptions.ParsingException;
+
+/**
+ * Based on Mozilla Rhino's (v1.7.14) org.mozilla.javascript.TokenStream
+ *
+ * Changes:
+ *
+ * - Tailored for {@link Lexer}
+ * - Removed all not needed code to improve performance
+ * - Optimized for ECMAScript6/2015
+ *
+ */
+class EcmaScriptTokenStream {
/*
* For chars - because we need something out-of-range
* to check. (And checking EOF by exception is annoying.)
@@ -29,127 +39,17 @@ class TokenStream {
private static final char BYTE_ORDER_MARK = '\uFEFF';
private static final char NUMERIC_SEPARATOR = '_';
- TokenStream(final String sourceString, final int lineno, final int languageVersion) {
+ EcmaScriptTokenStream(final String sourceString, final int lineno, final boolean strictMode) {
this.sourceString = sourceString;
this.sourceCursor = 0;
this.cursor = 0;
this.lineno = lineno;
- this.languageVersion = languageVersion;
+ this.strictMode = strictMode;
}
- static boolean isKeyword(final String s, final int version, final boolean isStrict) {
- return Token.EOF != stringToKeyword(s, version, isStrict);
- }
-
- private static Token stringToKeyword(final String name, final int version,
- final boolean isStrict) {
- if (version < Context.VERSION_ES6) {
- return stringToKeywordForJS(name);
- }
- return stringToKeywordForES(name, isStrict);
- }
-
- /** JavaScript 1.8 and earlier */
- private static Token stringToKeywordForJS(final String name) {
- switch (name) {
- case "break":
- return Token.BREAK;
- case "case":
- return Token.CASE;
- case "continue":
- return Token.CONTINUE;
- case "default":
- return Token.DEFAULT;
- case "delete":
- return Token.DELPROP;
- case "do":
- return Token.DO;
- case "else":
- return Token.ELSE;
- case "export":
- return Token.EXPORT;
- case "false":
- return Token.FALSE;
- case "for":
- return Token.FOR;
- case "function":
- return Token.FUNCTION;
- case "if":
- return Token.IF;
- case "in":
- return Token.IN;
- case "let":
- return Token.LET;
- case "new":
- return Token.NEW;
- case "null":
- return Token.NULL;
- case "return":
- return Token.RETURN;
- case "switch":
- return Token.SWITCH;
- case "this":
- return Token.THIS;
- case "true":
- return Token.TRUE;
- case "typeof":
- return Token.TYPEOF;
- case "var":
- return Token.VAR;
- case "void":
- return Token.VOID;
- case "while":
- return Token.WHILE;
- case "with":
- return Token.WITH;
- case "yield":
- return Token.YIELD;
- case "throw":
- return Token.THROW;
- case "catch":
- return Token.CATCH;
- case "const":
- return Token.CONST;
- case "debugger":
- return Token.DEBUGGER;
- case "finally":
- return Token.FINALLY;
- case "instanceof":
- return Token.INSTANCEOF;
- case "try":
- return Token.TRY;
- case "abstract":
- case "boolean":
- case "byte":
- case "char":
- case "class":
- case "double":
- case "enum":
- case "extends":
- case "final":
- case "float":
- case "goto":
- case "implements":
- case "import":
- case "int":
- case "interface":
- case "long":
- case "native":
- case "package":
- case "private":
- case "protected":
- case "public":
- case "short":
- case "static":
- case "super":
- case "synchronized":
- case "throws":
- case "transient":
- case "volatile":
- return Token.RESERVED;
- }
- return Token.EOF;
+ private Token stringToKeyword(final String name) {
+ return stringToKeywordForES(name, strictMode);
}
/** ECMAScript 6. */
@@ -343,40 +243,17 @@ class TokenStream {
}
ungetChar(c);
- String str = getStringFromBuffer();
+ final String str = getStringFromBuffer();
if (!containsEscape) {
// OPT we shouldn't have to make a string (object!) to
// check if it's a keyword.
// Return the corresponding token if it's a keyword
- Token result = stringToKeyword(str, languageVersion, STRICT_MODE);
+ final Token result = stringToKeyword(str);
if (result != Token.EOF) {
- if ((result == Token.LET || result == Token.YIELD)
- && languageVersion < Context.VERSION_1_7) {
- // LET and YIELD are tokens only in 1.7 and later
- string = result == Token.LET ? "let" : "yield";
- result = Token.NAME;
- }
- // Save the string in case we need to use in
- // object literal definitions.
- this.string = (String) allStrings.intern(str);
- if (result != Token.RESERVED) {
- return result;
- } else if (languageVersion >= Context.VERSION_ES6) {
- return result;
- } else if (!IS_RESERVED_KEYWORD_AS_IDENTIFIER) {
- return result;
- }
+ return result; // Always needed due to ECMAScript
}
- } else if (isKeyword(
- str,
- languageVersion,
- STRICT_MODE)) {
- // If a string contains unicodes, and converted to a keyword,
- // we convert the last character back to unicode
- str = convertLastCharToHex(str);
}
- this.string = (String) allStrings.intern(str);
return Token.NAME;
}
@@ -384,7 +261,6 @@ class TokenStream {
if (isDigit(c) || (c == '.' && isDigit(peekChar()))) {
stringBufferTop = 0;
int base = 10;
- final boolean es6 = languageVersion >= Context.VERSION_ES6;
boolean isOldOctal = false;
if (c == '0') {
@@ -392,10 +268,10 @@ class TokenStream {
if (c == 'x' || c == 'X') {
base = 16;
c = getChar();
- } else if (es6 && (c == 'o' || c == 'O')) {
+ } else if (c == 'o' || c == 'O') {
base = 8;
c = getChar();
- } else if (es6 && (c == 'b' || c == 'B')) {
+ } else if (c == 'b' || c == 'B') {
base = 2;
c = getChar();
} else if (isDigit(c)) {
@@ -438,7 +314,7 @@ class TokenStream {
throw new ParsingException("number format error");
}
- if (es6 && c == 'n') {
+ if (c == 'n') {
c = getChar();
} else if (base == 10 && (c == '.' || c == 'e' || c == 'E')) {
if (c == '.') {
@@ -466,7 +342,7 @@ class TokenStream {
}
}
ungetChar(c);
- this.string = getStringFromBuffer();
+ tokenEnd = cursor;
return Token.NUMBER;
}
@@ -562,7 +438,7 @@ class TokenStream {
escapeVal = Kit.xDigitToInt(c, 0);
if (escapeVal < 0) {
addToString('x');
- continue strLoop;
+ continue;
}
final int c1 = c;
c = getChar();
@@ -570,7 +446,7 @@ class TokenStream {
if (escapeVal < 0) {
addToString('x');
addToString(c1);
- continue strLoop;
+ continue;
}
// got 2 hex digits
c = escapeVal;
@@ -580,7 +456,7 @@ class TokenStream {
// Remove line terminator after escape to follow
// SpiderMonkey and C/C++
c = getChar();
- continue strLoop;
+ continue;
default:
if ('0' <= c && c < '8') {
@@ -605,8 +481,7 @@ class TokenStream {
c = getChar(false);
}
- final String str = getStringFromBuffer();
- this.string = (String) allStrings.intern(str);
+ tokenEnd = cursor;
return quoteChar == '`' ? Token.TEMPLATE_LITERAL : Token.STRING;
}
@@ -722,14 +597,13 @@ class TokenStream {
return Token.GT;
case '*':
- if (languageVersion >= Context.VERSION_ES6) {
- if (matchChar('*')) {
- if (matchChar('=')) {
- return Token.ASSIGN_EXP;
- }
- return Token.EXP;
+ if (matchChar('*')) {
+ if (matchChar('=')) {
+ return Token.ASSIGN_EXP;
}
+ return Token.EXP;
}
+
if (matchChar('=')) {
return Token.ASSIGN_MUL;
}
@@ -920,7 +794,6 @@ class TokenStream {
}
if (peekChar() == '*') {
tokenEnd = cursor - 1;
- this.string = new String(stringBuffer, 0, stringBufferTop);
throw new ParsingException("msg.unterminated.re.lit");
}
}
@@ -944,7 +817,6 @@ class TokenStream {
}
addToString(c);
}
- final int reEnd = stringBufferTop;
while (true) {
c = getCharIgnoreLineEnd();
@@ -959,7 +831,6 @@ class TokenStream {
}
tokenEnd = start + stringBufferTop + 2; // include slashes
- this.string = new String(stringBuffer, 0, reEnd);
}
private String getStringFromBuffer() {
@@ -1019,7 +890,6 @@ class TokenStream {
for (;;) {
if (sourceCursor == sourceString.length()) {
- hitEOF = true;
return EOF_CHAR;
}
cursor++;
@@ -1031,7 +901,6 @@ class TokenStream {
continue;
}
lineEndChar = -1;
- lineStart = sourceCursor - 1;
lineno++;
}
@@ -1078,42 +947,6 @@ class TokenStream {
tokenEnd = cursor;
}
- /** Return the current position of the scanner cursor. */
- public int getCursor() {
- return cursor;
- }
-
- /** Return the absolute source offset of the last scanned token. */
- public int getTokenBeg() {
- return tokenBeg;
- }
-
- /** Return the absolute source end-offset of the last scanned token. */
- public int getTokenEnd() {
- return tokenEnd;
- }
-
- /** Return tokenEnd - tokenBeg */
- public int getTokenLength() {
- return tokenEnd - tokenBeg;
- }
-
- public String getTokenRaw() {
- return sourceString.substring(tokenBeg, tokenEnd);
- }
-
- private static String convertLastCharToHex(final String str) {
- final int lastIndex = str.length() - 1;
- final StringBuilder buf = new StringBuilder(str.substring(0, lastIndex));
- buf.append("\\u");
- final String hexCode = Integer.toHexString(str.charAt(lastIndex));
- for (int i = 0; i < 4 - hexCode.length(); ++i) {
- buf.append('0');
- }
- buf.append(hexCode);
- return buf.toString();
- }
-
public Token nextToken() throws ParsingException {
Token tt = getToken();
while (tt == Token.EOL || tt == Token.COMMENT) {
@@ -1124,19 +957,14 @@ class TokenStream {
// stuff other than whitespace since start of line
private boolean dirtyLine;
- private String string = "";
private char[] stringBuffer = new char[128];
private int stringBufferTop;
- private final ObjToIntMap allStrings = new ObjToIntMap(50);
// Room to backtrace from to < on failed match of the last - in