diff --git a/extractor/build.gradle b/extractor/build.gradle index df5220dde..df47df64a 100644 --- a/extractor/build.gradle +++ b/extractor/build.gradle @@ -22,6 +22,10 @@ checkstyleTest { enabled false // do not checkstyle test files } +ext { + rhinoVersion = '1.8.0' +} + dependencies { implementation project(':timeago-parser') @@ -29,7 +33,8 @@ dependencies { implementation 'org.jsoup:jsoup:1.18.3' implementation "com.google.code.findbugs:jsr305:$jsr305Version" - implementation 'org.mozilla:rhino:1.7.15' + implementation "org.mozilla:rhino:$rhinoVersion" + implementation "org.mozilla:rhino-engine:$rhinoVersion" checkstyle "com.puppycrawl.tools:checkstyle:$checkstyleVersion" diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/JavaScript.java b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/JavaScript.java index ab30ed806..4522b51a4 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/JavaScript.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/JavaScript.java @@ -10,31 +10,25 @@ public final class JavaScript { } public static void compileOrThrow(final String function) { - try { - final Context context = Context.enter(); - context.setOptimizationLevel(-1); + try (Context context = Context.enter()) { + context.setInterpretedMode(true); // If it doesn't compile it throws an exception here context.compileString(function, null, 1, null); - } finally { - Context.exit(); } } public static String run(final String function, final String functionName, final String... parameters) { - try { - final Context context = Context.enter(); - context.setOptimizationLevel(-1); + try (Context context = Context.enter()) { + context.setInterpretedMode(true); final ScriptableObject scope = context.initSafeStandardObjects(); context.evaluateString(scope, function, functionName, 1, null); final Function jsFunction = (Function) scope.get(functionName, scope); final Object result = jsFunction.call(context, scope, scope, parameters); return result.toString(); - } finally { - Context.exit(); } } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/jsextractor/TokenStream.java b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/jsextractor/EcmaScriptTokenStream.java similarity index 81% rename from extractor/src/main/java/org/schabi/newpipe/extractor/utils/jsextractor/TokenStream.java rename to extractor/src/main/java/org/schabi/newpipe/extractor/utils/jsextractor/EcmaScriptTokenStream.java index 81651d227..0cd5169dc 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/jsextractor/TokenStream.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/jsextractor/EcmaScriptTokenStream.java @@ -1,18 +1,28 @@ -package org.schabi.newpipe.extractor.utils.jsextractor; - -import org.mozilla.javascript.Context; -import org.mozilla.javascript.Kit; -import org.mozilla.javascript.ObjToIntMap; -import org.mozilla.javascript.ScriptRuntime; -import org.schabi.newpipe.extractor.exceptions.ParsingException; - -/* Source: Mozilla Rhino, org.mozilla.javascript.Token +/* + * Source: Mozilla Rhino, org.mozilla.javascript.TokenStream * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. - * */ -class TokenStream { + * + */ +package org.schabi.newpipe.extractor.utils.jsextractor; + +import org.mozilla.javascript.Kit; +import org.mozilla.javascript.ScriptRuntime; +import org.schabi.newpipe.extractor.exceptions.ParsingException; + +/** + * Based on Mozilla Rhino's (v1.7.14) org.mozilla.javascript.TokenStream + *

+ * Changes: + *

+ */ +class EcmaScriptTokenStream { /* * For chars - because we need something out-of-range * to check. (And checking EOF by exception is annoying.) @@ -29,127 +39,17 @@ class TokenStream { private static final char BYTE_ORDER_MARK = '\uFEFF'; private static final char NUMERIC_SEPARATOR = '_'; - TokenStream(final String sourceString, final int lineno, final int languageVersion) { + EcmaScriptTokenStream(final String sourceString, final int lineno, final boolean strictMode) { this.sourceString = sourceString; this.sourceCursor = 0; this.cursor = 0; this.lineno = lineno; - this.languageVersion = languageVersion; + this.strictMode = strictMode; } - static boolean isKeyword(final String s, final int version, final boolean isStrict) { - return Token.EOF != stringToKeyword(s, version, isStrict); - } - - private static Token stringToKeyword(final String name, final int version, - final boolean isStrict) { - if (version < Context.VERSION_ES6) { - return stringToKeywordForJS(name); - } - return stringToKeywordForES(name, isStrict); - } - - /** JavaScript 1.8 and earlier */ - private static Token stringToKeywordForJS(final String name) { - switch (name) { - case "break": - return Token.BREAK; - case "case": - return Token.CASE; - case "continue": - return Token.CONTINUE; - case "default": - return Token.DEFAULT; - case "delete": - return Token.DELPROP; - case "do": - return Token.DO; - case "else": - return Token.ELSE; - case "export": - return Token.EXPORT; - case "false": - return Token.FALSE; - case "for": - return Token.FOR; - case "function": - return Token.FUNCTION; - case "if": - return Token.IF; - case "in": - return Token.IN; - case "let": - return Token.LET; - case "new": - return Token.NEW; - case "null": - return Token.NULL; - case "return": - return Token.RETURN; - case "switch": - return Token.SWITCH; - case "this": - return Token.THIS; - case "true": - return Token.TRUE; - case "typeof": - return Token.TYPEOF; - case "var": - return Token.VAR; - case "void": - return Token.VOID; - case "while": - return Token.WHILE; - case "with": - return Token.WITH; - case "yield": - return Token.YIELD; - case "throw": - return Token.THROW; - case "catch": - return Token.CATCH; - case "const": - return Token.CONST; - case "debugger": - return Token.DEBUGGER; - case "finally": - return Token.FINALLY; - case "instanceof": - return Token.INSTANCEOF; - case "try": - return Token.TRY; - case "abstract": - case "boolean": - case "byte": - case "char": - case "class": - case "double": - case "enum": - case "extends": - case "final": - case "float": - case "goto": - case "implements": - case "import": - case "int": - case "interface": - case "long": - case "native": - case "package": - case "private": - case "protected": - case "public": - case "short": - case "static": - case "super": - case "synchronized": - case "throws": - case "transient": - case "volatile": - return Token.RESERVED; - } - return Token.EOF; + private Token stringToKeyword(final String name) { + return stringToKeywordForES(name, strictMode); } /** ECMAScript 6. */ @@ -343,40 +243,17 @@ class TokenStream { } ungetChar(c); - String str = getStringFromBuffer(); + final String str = getStringFromBuffer(); if (!containsEscape) { // OPT we shouldn't have to make a string (object!) to // check if it's a keyword. // Return the corresponding token if it's a keyword - Token result = stringToKeyword(str, languageVersion, STRICT_MODE); + final Token result = stringToKeyword(str); if (result != Token.EOF) { - if ((result == Token.LET || result == Token.YIELD) - && languageVersion < Context.VERSION_1_7) { - // LET and YIELD are tokens only in 1.7 and later - string = result == Token.LET ? "let" : "yield"; - result = Token.NAME; - } - // Save the string in case we need to use in - // object literal definitions. - this.string = (String) allStrings.intern(str); - if (result != Token.RESERVED) { - return result; - } else if (languageVersion >= Context.VERSION_ES6) { - return result; - } else if (!IS_RESERVED_KEYWORD_AS_IDENTIFIER) { - return result; - } + return result; // Always needed due to ECMAScript } - } else if (isKeyword( - str, - languageVersion, - STRICT_MODE)) { - // If a string contains unicodes, and converted to a keyword, - // we convert the last character back to unicode - str = convertLastCharToHex(str); } - this.string = (String) allStrings.intern(str); return Token.NAME; } @@ -384,7 +261,6 @@ class TokenStream { if (isDigit(c) || (c == '.' && isDigit(peekChar()))) { stringBufferTop = 0; int base = 10; - final boolean es6 = languageVersion >= Context.VERSION_ES6; boolean isOldOctal = false; if (c == '0') { @@ -392,10 +268,10 @@ class TokenStream { if (c == 'x' || c == 'X') { base = 16; c = getChar(); - } else if (es6 && (c == 'o' || c == 'O')) { + } else if (c == 'o' || c == 'O') { base = 8; c = getChar(); - } else if (es6 && (c == 'b' || c == 'B')) { + } else if (c == 'b' || c == 'B') { base = 2; c = getChar(); } else if (isDigit(c)) { @@ -438,7 +314,7 @@ class TokenStream { throw new ParsingException("number format error"); } - if (es6 && c == 'n') { + if (c == 'n') { c = getChar(); } else if (base == 10 && (c == '.' || c == 'e' || c == 'E')) { if (c == '.') { @@ -466,7 +342,7 @@ class TokenStream { } } ungetChar(c); - this.string = getStringFromBuffer(); + tokenEnd = cursor; return Token.NUMBER; } @@ -562,7 +438,7 @@ class TokenStream { escapeVal = Kit.xDigitToInt(c, 0); if (escapeVal < 0) { addToString('x'); - continue strLoop; + continue; } final int c1 = c; c = getChar(); @@ -570,7 +446,7 @@ class TokenStream { if (escapeVal < 0) { addToString('x'); addToString(c1); - continue strLoop; + continue; } // got 2 hex digits c = escapeVal; @@ -580,7 +456,7 @@ class TokenStream { // Remove line terminator after escape to follow // SpiderMonkey and C/C++ c = getChar(); - continue strLoop; + continue; default: if ('0' <= c && c < '8') { @@ -605,8 +481,7 @@ class TokenStream { c = getChar(false); } - final String str = getStringFromBuffer(); - this.string = (String) allStrings.intern(str); + tokenEnd = cursor; return quoteChar == '`' ? Token.TEMPLATE_LITERAL : Token.STRING; } @@ -722,14 +597,13 @@ class TokenStream { return Token.GT; case '*': - if (languageVersion >= Context.VERSION_ES6) { - if (matchChar('*')) { - if (matchChar('=')) { - return Token.ASSIGN_EXP; - } - return Token.EXP; + if (matchChar('*')) { + if (matchChar('=')) { + return Token.ASSIGN_EXP; } + return Token.EXP; } + if (matchChar('=')) { return Token.ASSIGN_MUL; } @@ -920,7 +794,6 @@ class TokenStream { } if (peekChar() == '*') { tokenEnd = cursor - 1; - this.string = new String(stringBuffer, 0, stringBufferTop); throw new ParsingException("msg.unterminated.re.lit"); } } @@ -944,7 +817,6 @@ class TokenStream { } addToString(c); } - final int reEnd = stringBufferTop; while (true) { c = getCharIgnoreLineEnd(); @@ -959,7 +831,6 @@ class TokenStream { } tokenEnd = start + stringBufferTop + 2; // include slashes - this.string = new String(stringBuffer, 0, reEnd); } private String getStringFromBuffer() { @@ -1019,7 +890,6 @@ class TokenStream { for (;;) { if (sourceCursor == sourceString.length()) { - hitEOF = true; return EOF_CHAR; } cursor++; @@ -1031,7 +901,6 @@ class TokenStream { continue; } lineEndChar = -1; - lineStart = sourceCursor - 1; lineno++; } @@ -1078,42 +947,6 @@ class TokenStream { tokenEnd = cursor; } - /** Return the current position of the scanner cursor. */ - public int getCursor() { - return cursor; - } - - /** Return the absolute source offset of the last scanned token. */ - public int getTokenBeg() { - return tokenBeg; - } - - /** Return the absolute source end-offset of the last scanned token. */ - public int getTokenEnd() { - return tokenEnd; - } - - /** Return tokenEnd - tokenBeg */ - public int getTokenLength() { - return tokenEnd - tokenBeg; - } - - public String getTokenRaw() { - return sourceString.substring(tokenBeg, tokenEnd); - } - - private static String convertLastCharToHex(final String str) { - final int lastIndex = str.length() - 1; - final StringBuilder buf = new StringBuilder(str.substring(0, lastIndex)); - buf.append("\\u"); - final String hexCode = Integer.toHexString(str.charAt(lastIndex)); - for (int i = 0; i < 4 - hexCode.length(); ++i) { - buf.append('0'); - } - buf.append(hexCode); - return buf.toString(); - } - public Token nextToken() throws ParsingException { Token tt = getToken(); while (tt == Token.EOL || tt == Token.COMMENT) { @@ -1124,19 +957,14 @@ class TokenStream { // stuff other than whitespace since start of line private boolean dirtyLine; - private String string = ""; private char[] stringBuffer = new char[128]; private int stringBufferTop; - private final ObjToIntMap allStrings = new ObjToIntMap(50); // Room to backtrace from to < on failed match of the last - in