mirror of
https://github.com/TeamNewPipe/NewPipeExtractor.git
synced 2025-04-27 23:40:36 +05:30
Rework TokenStream
This commit is contained in:
parent
77ee25e3b6
commit
0c3ac0a308
@ -1,17 +1,28 @@
|
|||||||
package org.schabi.newpipe.extractor.utils.jsextractor;
|
/*
|
||||||
|
* Source: Mozilla Rhino, org.mozilla.javascript.TokenStream
|
||||||
import org.mozilla.javascript.Context;
|
|
||||||
import org.mozilla.javascript.Kit;
|
|
||||||
import org.mozilla.javascript.ScriptRuntime;
|
|
||||||
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
|
||||||
|
|
||||||
/* Source: Mozilla Rhino, org.mozilla.javascript.Token
|
|
||||||
*
|
*
|
||||||
* This Source Code Form is subject to the terms of the Mozilla Public
|
* This Source Code Form is subject to the terms of the Mozilla Public
|
||||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||||
* */
|
*
|
||||||
class TokenStream {
|
*/
|
||||||
|
package org.schabi.newpipe.extractor.utils.jsextractor;
|
||||||
|
|
||||||
|
import org.mozilla.javascript.Kit;
|
||||||
|
import org.mozilla.javascript.ScriptRuntime;
|
||||||
|
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Based on Mozilla Rhino's (v1.7.14) org.mozilla.javascript.TokenStream
|
||||||
|
* <p/>
|
||||||
|
* Changes:
|
||||||
|
* <ul>
|
||||||
|
* <li>Tailored for {@link Lexer}</li>
|
||||||
|
* <li>Removed all not needed code to improve performance</li>
|
||||||
|
* <li>Optimized for ECMAScript6/2015</li>
|
||||||
|
* </ul>
|
||||||
|
*/
|
||||||
|
class EcmaScriptTokenStream {
|
||||||
/*
|
/*
|
||||||
* For chars - because we need something out-of-range
|
* For chars - because we need something out-of-range
|
||||||
* to check. (And checking EOF by exception is annoying.)
|
* to check. (And checking EOF by exception is annoying.)
|
||||||
@ -28,125 +39,17 @@ class TokenStream {
|
|||||||
private static final char BYTE_ORDER_MARK = '\uFEFF';
|
private static final char BYTE_ORDER_MARK = '\uFEFF';
|
||||||
private static final char NUMERIC_SEPARATOR = '_';
|
private static final char NUMERIC_SEPARATOR = '_';
|
||||||
|
|
||||||
TokenStream(final String sourceString, final int lineno, final int languageVersion) {
|
EcmaScriptTokenStream(final String sourceString, final int lineno, final boolean strictMode) {
|
||||||
this.sourceString = sourceString;
|
this.sourceString = sourceString;
|
||||||
this.sourceCursor = 0;
|
this.sourceCursor = 0;
|
||||||
this.cursor = 0;
|
this.cursor = 0;
|
||||||
|
|
||||||
this.lineno = lineno;
|
this.lineno = lineno;
|
||||||
this.languageVersion = languageVersion;
|
this.strictMode = strictMode;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static Token stringToKeyword(
|
private Token stringToKeyword(final String name) {
|
||||||
final String name,
|
return stringToKeywordForES(name, strictMode);
|
||||||
final int version,
|
|
||||||
final boolean isStrict) {
|
|
||||||
if (version < Context.VERSION_ES6) {
|
|
||||||
return stringToKeywordForJS(name);
|
|
||||||
}
|
|
||||||
return stringToKeywordForES(name, isStrict);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** JavaScript 1.8 and earlier */
|
|
||||||
private static Token stringToKeywordForJS(final String name) {
|
|
||||||
switch (name) {
|
|
||||||
case "break":
|
|
||||||
return Token.BREAK;
|
|
||||||
case "case":
|
|
||||||
return Token.CASE;
|
|
||||||
case "continue":
|
|
||||||
return Token.CONTINUE;
|
|
||||||
case "default":
|
|
||||||
return Token.DEFAULT;
|
|
||||||
case "delete":
|
|
||||||
return Token.DELPROP;
|
|
||||||
case "do":
|
|
||||||
return Token.DO;
|
|
||||||
case "else":
|
|
||||||
return Token.ELSE;
|
|
||||||
case "export":
|
|
||||||
return Token.EXPORT;
|
|
||||||
case "false":
|
|
||||||
return Token.FALSE;
|
|
||||||
case "for":
|
|
||||||
return Token.FOR;
|
|
||||||
case "function":
|
|
||||||
return Token.FUNCTION;
|
|
||||||
case "if":
|
|
||||||
return Token.IF;
|
|
||||||
case "in":
|
|
||||||
return Token.IN;
|
|
||||||
case "let":
|
|
||||||
return Token.LET;
|
|
||||||
case "new":
|
|
||||||
return Token.NEW;
|
|
||||||
case "null":
|
|
||||||
return Token.NULL;
|
|
||||||
case "return":
|
|
||||||
return Token.RETURN;
|
|
||||||
case "switch":
|
|
||||||
return Token.SWITCH;
|
|
||||||
case "this":
|
|
||||||
return Token.THIS;
|
|
||||||
case "true":
|
|
||||||
return Token.TRUE;
|
|
||||||
case "typeof":
|
|
||||||
return Token.TYPEOF;
|
|
||||||
case "var":
|
|
||||||
return Token.VAR;
|
|
||||||
case "void":
|
|
||||||
return Token.VOID;
|
|
||||||
case "while":
|
|
||||||
return Token.WHILE;
|
|
||||||
case "with":
|
|
||||||
return Token.WITH;
|
|
||||||
case "yield":
|
|
||||||
return Token.YIELD;
|
|
||||||
case "throw":
|
|
||||||
return Token.THROW;
|
|
||||||
case "catch":
|
|
||||||
return Token.CATCH;
|
|
||||||
case "const":
|
|
||||||
return Token.CONST;
|
|
||||||
case "debugger":
|
|
||||||
return Token.DEBUGGER;
|
|
||||||
case "finally":
|
|
||||||
return Token.FINALLY;
|
|
||||||
case "instanceof":
|
|
||||||
return Token.INSTANCEOF;
|
|
||||||
case "try":
|
|
||||||
return Token.TRY;
|
|
||||||
case "abstract":
|
|
||||||
case "boolean":
|
|
||||||
case "byte":
|
|
||||||
case "char":
|
|
||||||
case "class":
|
|
||||||
case "double":
|
|
||||||
case "enum":
|
|
||||||
case "extends":
|
|
||||||
case "final":
|
|
||||||
case "float":
|
|
||||||
case "goto":
|
|
||||||
case "implements":
|
|
||||||
case "import":
|
|
||||||
case "int":
|
|
||||||
case "interface":
|
|
||||||
case "long":
|
|
||||||
case "native":
|
|
||||||
case "package":
|
|
||||||
case "private":
|
|
||||||
case "protected":
|
|
||||||
case "public":
|
|
||||||
case "short":
|
|
||||||
case "static":
|
|
||||||
case "super":
|
|
||||||
case "synchronized":
|
|
||||||
case "throws":
|
|
||||||
case "transient":
|
|
||||||
case "volatile":
|
|
||||||
return Token.RESERVED;
|
|
||||||
}
|
|
||||||
return Token.EOF;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** ECMAScript 6. */
|
/** ECMAScript 6. */
|
||||||
@ -346,19 +249,9 @@ class TokenStream {
|
|||||||
// check if it's a keyword.
|
// check if it's a keyword.
|
||||||
|
|
||||||
// Return the corresponding token if it's a keyword
|
// Return the corresponding token if it's a keyword
|
||||||
Token result = stringToKeyword(str, languageVersion, STRICT_MODE);
|
final Token result = stringToKeyword(str);
|
||||||
if (result != Token.EOF) {
|
if (result != Token.EOF) {
|
||||||
if ((result == Token.LET || result == Token.YIELD)
|
return result; // Always needed due to ECMAScript
|
||||||
&& languageVersion < Context.VERSION_1_7) {
|
|
||||||
result = Token.NAME;
|
|
||||||
}
|
|
||||||
// Save the string in case we need to use in
|
|
||||||
// object literal definitions.
|
|
||||||
if (result != Token.RESERVED
|
|
||||||
|| languageVersion >= Context.VERSION_ES6
|
|
||||||
|| !IS_RESERVED_KEYWORD_AS_IDENTIFIER) {
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return Token.NAME;
|
return Token.NAME;
|
||||||
@ -368,7 +261,6 @@ class TokenStream {
|
|||||||
if (isDigit(c) || (c == '.' && isDigit(peekChar()))) {
|
if (isDigit(c) || (c == '.' && isDigit(peekChar()))) {
|
||||||
stringBufferTop = 0;
|
stringBufferTop = 0;
|
||||||
int base = 10;
|
int base = 10;
|
||||||
final boolean es6 = languageVersion >= Context.VERSION_ES6;
|
|
||||||
boolean isOldOctal = false;
|
boolean isOldOctal = false;
|
||||||
|
|
||||||
if (c == '0') {
|
if (c == '0') {
|
||||||
@ -376,10 +268,10 @@ class TokenStream {
|
|||||||
if (c == 'x' || c == 'X') {
|
if (c == 'x' || c == 'X') {
|
||||||
base = 16;
|
base = 16;
|
||||||
c = getChar();
|
c = getChar();
|
||||||
} else if (es6 && (c == 'o' || c == 'O')) {
|
} else if (c == 'o' || c == 'O') {
|
||||||
base = 8;
|
base = 8;
|
||||||
c = getChar();
|
c = getChar();
|
||||||
} else if (es6 && (c == 'b' || c == 'B')) {
|
} else if (c == 'b' || c == 'B') {
|
||||||
base = 2;
|
base = 2;
|
||||||
c = getChar();
|
c = getChar();
|
||||||
} else if (isDigit(c)) {
|
} else if (isDigit(c)) {
|
||||||
@ -422,7 +314,7 @@ class TokenStream {
|
|||||||
throw new ParsingException("number format error");
|
throw new ParsingException("number format error");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (es6 && c == 'n') {
|
if (c == 'n') {
|
||||||
c = getChar();
|
c = getChar();
|
||||||
} else if (base == 10 && (c == '.' || c == 'e' || c == 'E')) {
|
} else if (base == 10 && (c == '.' || c == 'e' || c == 'E')) {
|
||||||
if (c == '.') {
|
if (c == '.') {
|
||||||
@ -705,7 +597,7 @@ class TokenStream {
|
|||||||
return Token.GT;
|
return Token.GT;
|
||||||
|
|
||||||
case '*':
|
case '*':
|
||||||
if (languageVersion >= Context.VERSION_ES6 && matchChar('*')) {
|
if (matchChar('*')) {
|
||||||
if (matchChar('=')) {
|
if (matchChar('=')) {
|
||||||
return Token.ASSIGN_EXP;
|
return Token.ASSIGN_EXP;
|
||||||
}
|
}
|
||||||
@ -1080,18 +972,16 @@ class TokenStream {
|
|||||||
|
|
||||||
// sourceCursor is an index into a small buffer that keeps a
|
// sourceCursor is an index into a small buffer that keeps a
|
||||||
// sliding window of the source stream.
|
// sliding window of the source stream.
|
||||||
int sourceCursor;
|
private int sourceCursor;
|
||||||
|
|
||||||
// cursor is a monotonically increasing index into the original
|
// cursor is a monotonically increasing index into the original
|
||||||
// source stream, tracking exactly how far scanning has progressed.
|
// source stream, tracking exactly how far scanning has progressed.
|
||||||
// Its value is the index of the next character to be scanned.
|
// Its value is the index of the next character to be scanned.
|
||||||
int cursor;
|
private int cursor;
|
||||||
|
|
||||||
// Record start and end positions of last scanned token.
|
// Record start and end positions of last scanned token.
|
||||||
int tokenBeg;
|
int tokenBeg;
|
||||||
int tokenEnd;
|
int tokenEnd;
|
||||||
|
|
||||||
private final int languageVersion;
|
private final boolean strictMode;
|
||||||
private static final boolean IS_RESERVED_KEYWORD_AS_IDENTIFIER = true;
|
|
||||||
private static final boolean STRICT_MODE = false;
|
|
||||||
}
|
}
|
@ -1,6 +1,5 @@
|
|||||||
package org.schabi.newpipe.extractor.utils.jsextractor;
|
package org.schabi.newpipe.extractor.utils.jsextractor;
|
||||||
|
|
||||||
import org.mozilla.javascript.Context;
|
|
||||||
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||||
|
|
||||||
import java.util.Stack;
|
import java.util.Stack;
|
||||||
@ -119,7 +118,7 @@ public class Lexer {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private final TokenStream stream;
|
private final EcmaScriptTokenStream stream;
|
||||||
private final LookBehind lastThree;
|
private final LookBehind lastThree;
|
||||||
private final Stack<Brace> braceStack;
|
private final Stack<Brace> braceStack;
|
||||||
private final Stack<Paren> parenStack;
|
private final Stack<Paren> parenStack;
|
||||||
@ -128,24 +127,14 @@ public class Lexer {
|
|||||||
* Create a new JavaScript lexer with the given source code
|
* Create a new JavaScript lexer with the given source code
|
||||||
*
|
*
|
||||||
* @param js JavaScript code
|
* @param js JavaScript code
|
||||||
* @param languageVersion JavaScript version (from Rhino)
|
|
||||||
*/
|
*/
|
||||||
public Lexer(final String js, final int languageVersion) {
|
public Lexer(final String js) {
|
||||||
stream = new TokenStream(js, 0, languageVersion);
|
stream = new EcmaScriptTokenStream(js, 0, false);
|
||||||
lastThree = new LookBehind();
|
lastThree = new LookBehind();
|
||||||
braceStack = new Stack<>();
|
braceStack = new Stack<>();
|
||||||
parenStack = new Stack<>();
|
parenStack = new Stack<>();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Create a new JavaScript lexer with the given source code
|
|
||||||
*
|
|
||||||
* @param js JavaScript code
|
|
||||||
*/
|
|
||||||
public Lexer(final String js) {
|
|
||||||
this(js, Context.VERSION_DEFAULT);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Continue parsing and return the next token
|
* Continue parsing and return the next token
|
||||||
* @return next token
|
* @return next token
|
||||||
|
Loading…
x
Reference in New Issue
Block a user