// Copyright 2017 the V8 project authors. All rights reserved.
|
// Use of this source code is governed by a BSD-style license that can be
|
// found in the LICENSE file.
|
|
#ifndef V8_ASMJS_ASM_SCANNER_H_
|
#define V8_ASMJS_ASM_SCANNER_H_
|
|
#include <memory>
|
#include <string>
|
#include <unordered_map>
|
|
#include "src/asmjs/asm-names.h"
|
#include "src/base/logging.h"
|
#include "src/globals.h"
|
|
namespace v8 {
|
namespace internal {
|
|
class Utf16CharacterStream;
|
|
// A custom scanner to extract the token stream needed to parse valid
|
// asm.js: http://asmjs.org/spec/latest/
|
// This scanner intentionally avoids the portion of JavaScript lexing
|
// that are not required to determine if code is valid asm.js code.
|
// * Strings are disallowed except for 'use asm'.
|
// * Only the subset of keywords needed to check asm.js invariants are
|
// included.
|
// * Identifiers are accumulated into local + global string tables
|
// (for performance).
|
class V8_EXPORT_PRIVATE AsmJsScanner {
|
public:
|
typedef int32_t token_t;
|
|
explicit AsmJsScanner(Utf16CharacterStream* stream);
|
|
// Get current token.
|
token_t Token() const { return token_; }
|
// Get position of current token.
|
size_t Position() const { return position_; }
|
// Advance to the next token.
|
void Next();
|
// Back up by one token.
|
void Rewind();
|
|
// Get raw string for current identifier. Note that the returned string will
|
// become invalid when the scanner advances, create a copy to preserve it.
|
const std::string& GetIdentifierString() const {
|
// Identifier strings don't work after a rewind.
|
DCHECK(!rewind_);
|
return identifier_string_;
|
}
|
|
// Check if we just passed a newline.
|
bool IsPrecededByNewline() const {
|
// Newline tracking doesn't work if you back up.
|
DCHECK(!rewind_);
|
return preceded_by_newline_;
|
}
|
|
#if DEBUG
|
// Debug only method to go from a token back to its name.
|
// Slow, only use for debugging.
|
std::string Name(token_t token) const;
|
#endif
|
|
// Restores old position (token after that position). Note that it is not
|
// allowed to rewind right after a seek, because previous tokens are unknown.
|
void Seek(size_t pos);
|
|
// Select whether identifiers are resolved in global or local scope,
|
// and which scope new identifiers are added to.
|
void EnterLocalScope() { in_local_scope_ = true; }
|
void EnterGlobalScope() { in_local_scope_ = false; }
|
// Drop all current local identifiers.
|
void ResetLocals();
|
|
// Methods to check if a token is an identifier and which scope.
|
bool IsLocal() const { return IsLocal(Token()); }
|
bool IsGlobal() const { return IsGlobal(Token()); }
|
static bool IsLocal(token_t token) { return token <= kLocalsStart; }
|
static bool IsGlobal(token_t token) { return token >= kGlobalsStart; }
|
// Methods to find the index position of an identifier (count starting from
|
// 0 for each scope separately).
|
static size_t LocalIndex(token_t token) {
|
DCHECK(IsLocal(token));
|
return -(token - kLocalsStart);
|
}
|
static size_t GlobalIndex(token_t token) {
|
DCHECK(IsGlobal(token));
|
return token - kGlobalsStart;
|
}
|
|
// Methods to check if the current token is a numeric literal considered an
|
// asm.js "double" (contains a dot) or an "unsigned" (without a dot). Note
|
// that numbers without a dot outside the [0 .. 2^32) range are errors.
|
bool IsUnsigned() const { return Token() == kUnsigned; }
|
uint32_t AsUnsigned() const {
|
DCHECK(IsUnsigned());
|
return unsigned_value_;
|
}
|
bool IsDouble() const { return Token() == kDouble; }
|
double AsDouble() const {
|
DCHECK(IsDouble());
|
return double_value_;
|
}
|
|
// clang-format off
|
enum {
|
// [-10000-kMaxIdentifierCount, -10000) :: Local identifiers (counting
|
// backwards)
|
// [-10000 .. -1) :: Builtin tokens like keywords
|
// (also includes some special
|
// ones like end of input)
|
// 0 .. 255 :: Single char tokens
|
// 256 .. 256+kMaxIdentifierCount :: Global identifiers
|
kLocalsStart = -10000,
|
#define V(name, _junk1, _junk2, _junk3) kToken_##name,
|
STDLIB_MATH_FUNCTION_LIST(V)
|
STDLIB_ARRAY_TYPE_LIST(V)
|
#undef V
|
#define V(name, _junk1) kToken_##name,
|
STDLIB_MATH_VALUE_LIST(V)
|
#undef V
|
#define V(name) kToken_##name,
|
STDLIB_OTHER_LIST(V)
|
KEYWORD_NAME_LIST(V)
|
#undef V
|
#define V(rawname, name) kToken_##name,
|
LONG_SYMBOL_NAME_LIST(V)
|
#undef V
|
#define V(name, value, string_name) name = value,
|
SPECIAL_TOKEN_LIST(V)
|
#undef V
|
kGlobalsStart = 256,
|
};
|
// clang-format on
|
|
private:
|
Utf16CharacterStream* stream_;
|
token_t token_;
|
token_t preceding_token_;
|
token_t next_token_; // Only set when in {rewind} state.
|
size_t position_; // Corresponds to {token} position.
|
size_t preceding_position_; // Corresponds to {preceding_token} position.
|
size_t next_position_; // Only set when in {rewind} state.
|
bool rewind_;
|
std::string identifier_string_;
|
bool in_local_scope_;
|
std::unordered_map<std::string, token_t> local_names_;
|
std::unordered_map<std::string, token_t> global_names_;
|
std::unordered_map<std::string, token_t> property_names_;
|
int global_count_;
|
double double_value_;
|
uint32_t unsigned_value_;
|
bool preceded_by_newline_;
|
|
// Consume multiple characters.
|
void ConsumeIdentifier(uc32 ch);
|
void ConsumeNumber(uc32 ch);
|
bool ConsumeCComment();
|
void ConsumeCPPComment();
|
void ConsumeString(uc32 quote);
|
void ConsumeCompareOrShift(uc32 ch);
|
|
// Classify character categories.
|
bool IsIdentifierStart(uc32 ch);
|
bool IsIdentifierPart(uc32 ch);
|
bool IsNumberStart(uc32 ch);
|
};
|
|
} // namespace internal
|
} // namespace v8
|
|
#endif // V8_ASMJS_ASM_SCANNER_H_
|