// Copyright 2011 the V8 project authors. All rights reserved.
|
// Use of this source code is governed by a BSD-style license that can be
|
// found in the LICENSE file.
|
|
#include "src/parsing/scanner-character-streams.h"
|
|
#include "include/v8.h"
|
#include "src/counters.h"
|
#include "src/globals.h"
|
#include "src/handles.h"
|
#include "src/objects-inl.h"
|
#include "src/parsing/scanner.h"
|
#include "src/unicode-inl.h"
|
|
namespace v8 {
|
namespace internal {
|
|
namespace {
|
const unibrow::uchar kUtf8Bom = 0xFEFF;
|
} // namespace
|
|
template <typename Char>
|
struct HeapStringType;
|
|
template <>
|
struct HeapStringType<uint8_t> {
|
typedef SeqOneByteString String;
|
};
|
|
template <>
|
struct HeapStringType<uint16_t> {
|
typedef SeqTwoByteString String;
|
};
|
|
template <typename Char>
|
struct Range {
|
const Char* start;
|
const Char* end;
|
|
size_t length() { return static_cast<size_t>(end - start); }
|
bool unaligned_start() const {
|
return reinterpret_cast<intptr_t>(start) % sizeof(Char) == 1;
|
}
|
};
|
|
// A Char stream backed by an on-heap SeqOneByteString or SeqTwoByteString.
|
template <typename Char>
|
class OnHeapStream {
|
public:
|
typedef typename HeapStringType<Char>::String String;
|
|
OnHeapStream(Handle<String> string, size_t start_offset, size_t end)
|
: string_(string), start_offset_(start_offset), length_(end) {}
|
|
Range<Char> GetDataAt(size_t pos) {
|
return {&string_->GetChars()[start_offset_ + Min(length_, pos)],
|
&string_->GetChars()[start_offset_ + length_]};
|
}
|
|
static const bool kCanAccessHeap = true;
|
|
private:
|
Handle<String> string_;
|
const size_t start_offset_;
|
const size_t length_;
|
};
|
|
// A Char stream backed by an off-heap ExternalOneByteString or
|
// ExternalTwoByteString.
|
template <typename Char>
|
class ExternalStringStream {
|
public:
|
ExternalStringStream(const Char* data, size_t end)
|
: data_(data), length_(end) {}
|
|
Range<Char> GetDataAt(size_t pos) {
|
return {&data_[Min(length_, pos)], &data_[length_]};
|
}
|
|
static const bool kCanAccessHeap = false;
|
|
private:
|
const Char* const data_;
|
const size_t length_;
|
};
|
|
// A Char stream backed by multiple source-stream provided off-heap chunks.
|
template <typename Char>
|
class ChunkedStream {
|
public:
|
ChunkedStream(ScriptCompiler::ExternalSourceStream* source,
|
RuntimeCallStats* stats)
|
: source_(source), stats_(stats) {}
|
|
Range<Char> GetDataAt(size_t pos) {
|
Chunk chunk = FindChunk(pos);
|
size_t buffer_end = chunk.length;
|
size_t buffer_pos = Min(buffer_end, pos - chunk.position);
|
return {&chunk.data[buffer_pos], &chunk.data[buffer_end]};
|
}
|
|
~ChunkedStream() {
|
for (Chunk& chunk : chunks_) delete[] chunk.data;
|
}
|
|
static const bool kCanAccessHeap = false;
|
|
private:
|
struct Chunk {
|
Chunk(const Char* const data, size_t position, size_t length)
|
: data(data), position(position), length(length) {}
|
const Char* const data;
|
// The logical position of data.
|
const size_t position;
|
const size_t length;
|
size_t end_position() const { return position + length; }
|
};
|
|
Chunk FindChunk(size_t position) {
|
while (V8_UNLIKELY(chunks_.empty())) FetchChunk(size_t{0});
|
|
// Walk forwards while the position is in front of the current chunk.
|
while (position >= chunks_.back().end_position() &&
|
chunks_.back().length > 0) {
|
FetchChunk(chunks_.back().end_position());
|
}
|
|
// Walk backwards.
|
for (auto reverse_it = chunks_.rbegin(); reverse_it != chunks_.rend();
|
++reverse_it) {
|
if (reverse_it->position <= position) return *reverse_it;
|
}
|
|
UNREACHABLE();
|
}
|
|
virtual void ProcessChunk(const uint8_t* data, size_t position,
|
size_t length) {
|
// Incoming data has to be aligned to Char size.
|
DCHECK_EQ(0, length % sizeof(Char));
|
chunks_.emplace_back(reinterpret_cast<const Char*>(data), position,
|
length / sizeof(Char));
|
}
|
|
void FetchChunk(size_t position) {
|
const uint8_t* data = nullptr;
|
size_t length;
|
{
|
RuntimeCallTimerScope scope(stats_,
|
RuntimeCallCounterId::kGetMoreDataCallback);
|
length = source_->GetMoreData(&data);
|
}
|
ProcessChunk(data, position, length);
|
}
|
|
ScriptCompiler::ExternalSourceStream* source_;
|
RuntimeCallStats* stats_;
|
|
protected:
|
std::vector<struct Chunk> chunks_;
|
};
|
|
template <typename Char>
|
class Utf8ChunkedStream : public ChunkedStream<uint16_t> {
|
public:
|
Utf8ChunkedStream(ScriptCompiler::ExternalSourceStream* source,
|
RuntimeCallStats* stats)
|
: ChunkedStream<uint16_t>(source, stats) {}
|
|
STATIC_ASSERT(sizeof(Char) == sizeof(uint16_t));
|
void ProcessChunk(const uint8_t* data, size_t position, size_t length) final {
|
if (length == 0) {
|
unibrow::uchar t = unibrow::Utf8::ValueOfIncrementalFinish(&state_);
|
if (t != unibrow::Utf8::kBufferEmpty) {
|
DCHECK_EQ(t, unibrow::Utf8::kBadChar);
|
incomplete_char_ = 0;
|
uint16_t* result = new uint16_t[1];
|
result[0] = unibrow::Utf8::kBadChar;
|
chunks_.emplace_back(result, position, 1);
|
position++;
|
}
|
chunks_.emplace_back(nullptr, position, 0);
|
delete[] data;
|
return;
|
}
|
|
// First count the number of complete characters that can be produced.
|
|
unibrow::Utf8::State state = state_;
|
uint32_t incomplete_char = incomplete_char_;
|
bool seen_bom = seen_bom_;
|
|
size_t i = 0;
|
size_t chars = 0;
|
while (i < length) {
|
unibrow::uchar t = unibrow::Utf8::ValueOfIncremental(data[i], &i, &state,
|
&incomplete_char);
|
if (!seen_bom && t == kUtf8Bom && position + chars == 0) {
|
seen_bom = true;
|
// BOM detected at beginning of the stream. Don't copy it.
|
} else if (t != unibrow::Utf8::kIncomplete) {
|
chars++;
|
if (t > unibrow::Utf16::kMaxNonSurrogateCharCode) chars++;
|
}
|
}
|
|
// Process the data.
|
|
// If there aren't any complete characters, update the state without
|
// producing a chunk.
|
if (chars == 0) {
|
state_ = state;
|
incomplete_char_ = incomplete_char;
|
seen_bom_ = seen_bom;
|
delete[] data;
|
return;
|
}
|
|
// Update the state and produce a chunk with complete characters.
|
uint16_t* result = new uint16_t[chars];
|
uint16_t* cursor = result;
|
i = 0;
|
|
while (i < length) {
|
unibrow::uchar t = unibrow::Utf8::ValueOfIncremental(data[i], &i, &state_,
|
&incomplete_char_);
|
if (V8_LIKELY(t < kUtf8Bom)) {
|
*(cursor++) = static_cast<uc16>(t); // The by most frequent case.
|
} else if (t == unibrow::Utf8::kIncomplete) {
|
continue;
|
} else if (!seen_bom_ && t == kUtf8Bom && position == 0 &&
|
cursor == result) {
|
// BOM detected at beginning of the stream. Don't copy it.
|
seen_bom_ = true;
|
} else if (t <= unibrow::Utf16::kMaxNonSurrogateCharCode) {
|
*(cursor++) = static_cast<uc16>(t);
|
} else {
|
*(cursor++) = unibrow::Utf16::LeadSurrogate(t);
|
*(cursor++) = unibrow::Utf16::TrailSurrogate(t);
|
}
|
}
|
|
chunks_.emplace_back(result, position, chars);
|
delete[] data;
|
}
|
|
private:
|
uint32_t incomplete_char_ = 0;
|
unibrow::Utf8::State state_ = unibrow::Utf8::State::kAccept;
|
bool seen_bom_ = false;
|
};
|
|
// Provides a buffered utf-16 view on the bytes from the underlying ByteStream.
|
// Chars are buffered if either the underlying stream isn't utf-16 or the
|
// underlying utf-16 stream might move (is on-heap).
|
template <template <typename T> class ByteStream>
|
class BufferedCharacterStream : public Utf16CharacterStream {
|
public:
|
template <class... TArgs>
|
BufferedCharacterStream(size_t pos, TArgs... args) : byte_stream_(args...) {
|
buffer_pos_ = pos;
|
}
|
|
protected:
|
bool ReadBlock() final {
|
size_t position = pos();
|
buffer_pos_ = position;
|
buffer_start_ = &buffer_[0];
|
buffer_cursor_ = buffer_start_;
|
|
Range<uint8_t> range = byte_stream_.GetDataAt(position);
|
if (range.length() == 0) {
|
buffer_end_ = buffer_start_;
|
return false;
|
}
|
|
size_t length = Min(kBufferSize, range.length());
|
i::CopyCharsUnsigned(buffer_, range.start, length);
|
buffer_end_ = &buffer_[length];
|
return true;
|
}
|
|
bool can_access_heap() final { return ByteStream<uint8_t>::kCanAccessHeap; }
|
|
private:
|
static const size_t kBufferSize = 512;
|
uc16 buffer_[kBufferSize];
|
ByteStream<uint8_t> byte_stream_;
|
};
|
|
// Provides a unbuffered utf-16 view on the bytes from the underlying
|
// ByteStream.
|
template <template <typename T> class ByteStream>
|
class UnbufferedCharacterStream : public Utf16CharacterStream {
|
public:
|
template <class... TArgs>
|
UnbufferedCharacterStream(size_t pos, TArgs... args) : byte_stream_(args...) {
|
buffer_pos_ = pos;
|
}
|
|
protected:
|
bool ReadBlock() final {
|
size_t position = pos();
|
buffer_pos_ = position;
|
Range<uint16_t> range = byte_stream_.GetDataAt(position);
|
buffer_start_ = range.start;
|
buffer_end_ = range.end;
|
buffer_cursor_ = buffer_start_;
|
if (range.length() == 0) return false;
|
|
DCHECK(!range.unaligned_start());
|
DCHECK_LE(buffer_start_, buffer_end_);
|
return true;
|
}
|
|
bool can_access_heap() final { return ByteStream<uint16_t>::kCanAccessHeap; }
|
|
ByteStream<uint16_t> byte_stream_;
|
};
|
|
// Provides a unbuffered utf-16 view on the bytes from the underlying
|
// ByteStream.
|
class RelocatingCharacterStream
|
: public UnbufferedCharacterStream<OnHeapStream> {
|
public:
|
template <class... TArgs>
|
RelocatingCharacterStream(Isolate* isolate, size_t pos, TArgs... args)
|
: UnbufferedCharacterStream<OnHeapStream>(pos, args...),
|
isolate_(isolate) {
|
isolate->heap()->AddGCEpilogueCallback(UpdateBufferPointersCallback,
|
v8::kGCTypeAll, this);
|
}
|
|
private:
|
~RelocatingCharacterStream() final {
|
isolate_->heap()->RemoveGCEpilogueCallback(UpdateBufferPointersCallback,
|
this);
|
}
|
|
static void UpdateBufferPointersCallback(v8::Isolate* v8_isolate,
|
v8::GCType type,
|
v8::GCCallbackFlags flags,
|
void* stream) {
|
reinterpret_cast<RelocatingCharacterStream*>(stream)
|
->UpdateBufferPointers();
|
}
|
|
void UpdateBufferPointers() {
|
Range<uint16_t> range = byte_stream_.GetDataAt(0);
|
if (range.start != buffer_start_) {
|
buffer_cursor_ = (buffer_cursor_ - buffer_start_) + range.start;
|
buffer_start_ = range.start;
|
buffer_end_ = range.end;
|
}
|
}
|
|
Isolate* isolate_;
|
};
|
|
// ----------------------------------------------------------------------------
|
// BufferedUtf16CharacterStreams
|
//
|
// A buffered character stream based on a random access character
|
// source (ReadBlock can be called with pos() pointing to any position,
|
// even positions before the current).
|
//
|
// TODO(verwaest): Remove together with Utf8 external streaming streams.
|
class BufferedUtf16CharacterStream : public Utf16CharacterStream {
|
public:
|
BufferedUtf16CharacterStream();
|
|
protected:
|
static const size_t kBufferSize = 512;
|
|
bool ReadBlock() final;
|
|
// FillBuffer should read up to kBufferSize characters at position and store
|
// them into buffer_[0..]. It returns the number of characters stored.
|
virtual size_t FillBuffer(size_t position) = 0;
|
|
// Fixed sized buffer that this class reads from.
|
// The base class' buffer_start_ should always point to buffer_.
|
uc16 buffer_[kBufferSize];
|
};
|
|
BufferedUtf16CharacterStream::BufferedUtf16CharacterStream()
|
: Utf16CharacterStream(buffer_, buffer_, buffer_, 0) {}
|
|
bool BufferedUtf16CharacterStream::ReadBlock() {
|
DCHECK_EQ(buffer_start_, buffer_);
|
|
size_t position = pos();
|
buffer_pos_ = position;
|
buffer_cursor_ = buffer_;
|
buffer_end_ = buffer_ + FillBuffer(position);
|
DCHECK_EQ(pos(), position);
|
DCHECK_LE(buffer_end_, buffer_start_ + kBufferSize);
|
return buffer_cursor_ < buffer_end_;
|
}
|
|
// ----------------------------------------------------------------------------
|
// Utf8ExternalStreamingStream - chunked streaming of Utf-8 data.
|
//
|
// This implementation is fairly complex, since data arrives in chunks which
|
// may 'cut' arbitrarily into utf-8 characters. Also, seeking to a given
|
// character position is tricky because the byte position cannot be dericed
|
// from the character position.
|
//
|
// TODO(verwaest): Decode utf8 chunks into utf16 chunks on the blink side
|
// instead so we don't need to buffer.
|
|
class Utf8ExternalStreamingStream : public BufferedUtf16CharacterStream {
|
public:
|
Utf8ExternalStreamingStream(
|
ScriptCompiler::ExternalSourceStream* source_stream,
|
RuntimeCallStats* stats)
|
: current_({0, {0, 0, 0, unibrow::Utf8::State::kAccept}}),
|
source_stream_(source_stream),
|
stats_(stats) {}
|
~Utf8ExternalStreamingStream() final {
|
for (size_t i = 0; i < chunks_.size(); i++) delete[] chunks_[i].data;
|
}
|
|
bool can_access_heap() final { return false; }
|
|
protected:
|
size_t FillBuffer(size_t position) final;
|
|
private:
|
// A position within the data stream. It stores:
|
// - The 'physical' position (# of bytes in the stream),
|
// - the 'logical' position (# of ucs-2 characters, also within the stream),
|
// - a possibly incomplete utf-8 char at the current 'physical' position.
|
struct StreamPosition {
|
size_t bytes;
|
size_t chars;
|
uint32_t incomplete_char;
|
unibrow::Utf8::State state;
|
};
|
|
// Position contains a StreamPosition and the index of the chunk the position
|
// points into. (The chunk_no could be derived from pos, but that'd be
|
// an expensive search through all chunks.)
|
struct Position {
|
size_t chunk_no;
|
StreamPosition pos;
|
};
|
|
// A chunk in the list of chunks, containing:
|
// - The chunk data (data pointer and length), and
|
// - the position at the first byte of the chunk.
|
struct Chunk {
|
const uint8_t* data;
|
size_t length;
|
StreamPosition start;
|
};
|
|
// Within the current chunk, skip forward from current_ towards position.
|
bool SkipToPosition(size_t position);
|
// Within the current chunk, fill the buffer_ (while it has capacity).
|
void FillBufferFromCurrentChunk();
|
// Fetch a new chunk (assuming current_ is at the end of the current data).
|
bool FetchChunk();
|
// Search through the chunks and set current_ to point to the given position.
|
// (This call is potentially expensive.)
|
void SearchPosition(size_t position);
|
|
std::vector<Chunk> chunks_;
|
Position current_;
|
ScriptCompiler::ExternalSourceStream* source_stream_;
|
RuntimeCallStats* stats_;
|
};
|
|
bool Utf8ExternalStreamingStream::SkipToPosition(size_t position) {
|
DCHECK_LE(current_.pos.chars, position); // We can only skip forward.
|
|
// Already there? Then return immediately.
|
if (current_.pos.chars == position) return true;
|
|
const Chunk& chunk = chunks_[current_.chunk_no];
|
DCHECK(current_.pos.bytes >= chunk.start.bytes);
|
|
unibrow::Utf8::State state = chunk.start.state;
|
uint32_t incomplete_char = chunk.start.incomplete_char;
|
size_t it = current_.pos.bytes - chunk.start.bytes;
|
size_t chars = chunk.start.chars;
|
while (it < chunk.length && chars < position) {
|
unibrow::uchar t = unibrow::Utf8::ValueOfIncremental(
|
chunk.data[it], &it, &state, &incomplete_char);
|
if (t == kUtf8Bom && current_.pos.chars == 0) {
|
// BOM detected at beginning of the stream. Don't copy it.
|
} else if (t != unibrow::Utf8::kIncomplete) {
|
chars++;
|
if (t > unibrow::Utf16::kMaxNonSurrogateCharCode) chars++;
|
}
|
}
|
|
current_.pos.bytes += it;
|
current_.pos.chars = chars;
|
current_.pos.incomplete_char = incomplete_char;
|
current_.pos.state = state;
|
current_.chunk_no += (it == chunk.length);
|
|
return current_.pos.chars == position;
|
}
|
|
void Utf8ExternalStreamingStream::FillBufferFromCurrentChunk() {
|
DCHECK_LT(current_.chunk_no, chunks_.size());
|
DCHECK_EQ(buffer_start_, buffer_cursor_);
|
DCHECK_LT(buffer_end_ + 1, buffer_start_ + kBufferSize);
|
|
const Chunk& chunk = chunks_[current_.chunk_no];
|
|
// The buffer_ is writable, but buffer_*_ members are const. So we get a
|
// non-const pointer into buffer that points to the same char as buffer_end_.
|
uint16_t* cursor = buffer_ + (buffer_end_ - buffer_start_);
|
DCHECK_EQ(cursor, buffer_end_);
|
|
unibrow::Utf8::State state = current_.pos.state;
|
uint32_t incomplete_char = current_.pos.incomplete_char;
|
|
// If the current chunk is the last (empty) chunk we'll have to process
|
// any left-over, partial characters.
|
if (chunk.length == 0) {
|
unibrow::uchar t = unibrow::Utf8::ValueOfIncrementalFinish(&state);
|
if (t != unibrow::Utf8::kBufferEmpty) {
|
DCHECK_EQ(t, unibrow::Utf8::kBadChar);
|
*cursor = static_cast<uc16>(t);
|
buffer_end_++;
|
current_.pos.chars++;
|
current_.pos.incomplete_char = 0;
|
current_.pos.state = state;
|
}
|
return;
|
}
|
|
size_t it = current_.pos.bytes - chunk.start.bytes;
|
while (it < chunk.length && cursor + 1 < buffer_start_ + kBufferSize) {
|
unibrow::uchar t = unibrow::Utf8::ValueOfIncremental(
|
chunk.data[it], &it, &state, &incomplete_char);
|
if (V8_LIKELY(t < kUtf8Bom)) {
|
*(cursor++) = static_cast<uc16>(t); // The by most frequent case.
|
} else if (t == unibrow::Utf8::kIncomplete) {
|
continue;
|
} else if (t == kUtf8Bom && current_.pos.bytes + it == 3) {
|
// BOM detected at beginning of the stream. Don't copy it.
|
} else if (t <= unibrow::Utf16::kMaxNonSurrogateCharCode) {
|
*(cursor++) = static_cast<uc16>(t);
|
} else {
|
*(cursor++) = unibrow::Utf16::LeadSurrogate(t);
|
*(cursor++) = unibrow::Utf16::TrailSurrogate(t);
|
}
|
}
|
|
current_.pos.bytes = chunk.start.bytes + it;
|
current_.pos.chars += (cursor - buffer_end_);
|
current_.pos.incomplete_char = incomplete_char;
|
current_.pos.state = state;
|
current_.chunk_no += (it == chunk.length);
|
|
buffer_end_ = cursor;
|
}
|
|
bool Utf8ExternalStreamingStream::FetchChunk() {
|
RuntimeCallTimerScope scope(stats_,
|
RuntimeCallCounterId::kGetMoreDataCallback);
|
DCHECK_EQ(current_.chunk_no, chunks_.size());
|
DCHECK(chunks_.empty() || chunks_.back().length != 0);
|
|
const uint8_t* chunk = nullptr;
|
size_t length = source_stream_->GetMoreData(&chunk);
|
chunks_.push_back({chunk, length, current_.pos});
|
return length > 0;
|
}
|
|
void Utf8ExternalStreamingStream::SearchPosition(size_t position) {
|
// If current_ already points to the right position, we're done.
|
//
|
// This is expected to be the common case, since we typically call
|
// FillBuffer right after the current buffer.
|
if (current_.pos.chars == position) return;
|
|
// No chunks. Fetch at least one, so we can assume !chunks_.empty() below.
|
if (chunks_.empty()) {
|
DCHECK_EQ(current_.chunk_no, 0u);
|
DCHECK_EQ(current_.pos.bytes, 0u);
|
DCHECK_EQ(current_.pos.chars, 0u);
|
FetchChunk();
|
}
|
|
// Search for the last chunk whose start position is less or equal to
|
// position.
|
size_t chunk_no = chunks_.size() - 1;
|
while (chunk_no > 0 && chunks_[chunk_no].start.chars > position) {
|
chunk_no--;
|
}
|
|
// Did we find the terminating (zero-length) chunk? Then we're seeking
|
// behind the end of the data, and position does not exist.
|
// Set current_ to point to the terminating chunk.
|
if (chunks_[chunk_no].length == 0) {
|
current_ = {chunk_no, chunks_[chunk_no].start};
|
return;
|
}
|
|
// Did we find the non-last chunk? Then our position must be within chunk_no.
|
if (chunk_no + 1 < chunks_.size()) {
|
// Fancy-pants optimization for ASCII chunks within a utf-8 stream.
|
// (Many web sites declare utf-8 encoding, but use only (or almost only) the
|
// ASCII subset for their JavaScript sources. We can exploit this, by
|
// checking whether the # bytes in a chunk are equal to the # chars, and if
|
// so avoid the expensive SkipToPosition.)
|
bool ascii_only_chunk =
|
chunks_[chunk_no].start.incomplete_char == 0 &&
|
(chunks_[chunk_no + 1].start.bytes - chunks_[chunk_no].start.bytes) ==
|
(chunks_[chunk_no + 1].start.chars - chunks_[chunk_no].start.chars);
|
if (ascii_only_chunk) {
|
size_t skip = position - chunks_[chunk_no].start.chars;
|
current_ = {chunk_no,
|
{chunks_[chunk_no].start.bytes + skip,
|
chunks_[chunk_no].start.chars + skip, 0,
|
unibrow::Utf8::State::kAccept}};
|
} else {
|
current_ = {chunk_no, chunks_[chunk_no].start};
|
SkipToPosition(position);
|
}
|
|
// Since position was within the chunk, SkipToPosition should have found
|
// something.
|
DCHECK_EQ(position, current_.pos.chars);
|
return;
|
}
|
|
// What's left: We're in the last, non-terminating chunk. Our position
|
// may be in the chunk, but it may also be in 'future' chunks, which we'll
|
// have to obtain.
|
DCHECK_EQ(chunk_no, chunks_.size() - 1);
|
current_ = {chunk_no, chunks_[chunk_no].start};
|
bool have_more_data = true;
|
bool found = SkipToPosition(position);
|
while (have_more_data && !found) {
|
DCHECK_EQ(current_.chunk_no, chunks_.size());
|
have_more_data = FetchChunk();
|
found = have_more_data && SkipToPosition(position);
|
}
|
|
// We'll return with a postion != the desired position only if we're out
|
// of data. In that case, we'll point to the terminating chunk.
|
DCHECK_EQ(found, current_.pos.chars == position);
|
DCHECK_EQ(have_more_data, chunks_.back().length != 0);
|
DCHECK_IMPLIES(!found, !have_more_data);
|
DCHECK_IMPLIES(!found, current_.chunk_no == chunks_.size() - 1);
|
}
|
|
size_t Utf8ExternalStreamingStream::FillBuffer(size_t position) {
|
buffer_cursor_ = buffer_;
|
buffer_end_ = buffer_;
|
|
SearchPosition(position);
|
bool out_of_data = current_.chunk_no != chunks_.size() &&
|
chunks_[current_.chunk_no].length == 0 &&
|
current_.pos.incomplete_char == 0;
|
|
if (out_of_data) return 0;
|
|
// Fill the buffer, until we have at least one char (or are out of data).
|
// (The embedder might give us 1-byte blocks within a utf-8 char, so we
|
// can't guarantee progress with one chunk. Thus we iterate.)
|
while (!out_of_data && buffer_cursor_ == buffer_end_) {
|
// At end of current data, but there might be more? Then fetch it.
|
if (current_.chunk_no == chunks_.size()) {
|
out_of_data = !FetchChunk();
|
}
|
FillBufferFromCurrentChunk();
|
}
|
|
DCHECK_EQ(current_.pos.chars - position,
|
static_cast<size_t>(buffer_end_ - buffer_cursor_));
|
return buffer_end_ - buffer_cursor_;
|
}
|
|
// ----------------------------------------------------------------------------
|
// ScannerStream: Create stream instances.
|
|
Utf16CharacterStream* ScannerStream::For(Isolate* isolate,
|
Handle<String> data) {
|
return ScannerStream::For(isolate, data, 0, data->length());
|
}
|
|
Utf16CharacterStream* ScannerStream::For(Isolate* isolate, Handle<String> data,
|
int start_pos, int end_pos) {
|
DCHECK_GE(start_pos, 0);
|
DCHECK_LE(start_pos, end_pos);
|
DCHECK_LE(end_pos, data->length());
|
size_t start_offset = 0;
|
if (data->IsSlicedString()) {
|
SlicedString* string = SlicedString::cast(*data);
|
start_offset = string->offset();
|
String* parent = string->parent();
|
if (parent->IsThinString()) parent = ThinString::cast(parent)->actual();
|
data = handle(parent, isolate);
|
} else {
|
data = String::Flatten(isolate, data);
|
}
|
if (data->IsExternalOneByteString()) {
|
return new BufferedCharacterStream<ExternalStringStream>(
|
static_cast<size_t>(start_pos),
|
ExternalOneByteString::cast(*data)->GetChars() + start_offset,
|
static_cast<size_t>(end_pos));
|
} else if (data->IsExternalTwoByteString()) {
|
return new UnbufferedCharacterStream<ExternalStringStream>(
|
static_cast<size_t>(start_pos),
|
ExternalTwoByteString::cast(*data)->GetChars() + start_offset,
|
static_cast<size_t>(end_pos));
|
} else if (data->IsSeqOneByteString()) {
|
return new BufferedCharacterStream<OnHeapStream>(
|
static_cast<size_t>(start_pos), Handle<SeqOneByteString>::cast(data),
|
start_offset, static_cast<size_t>(end_pos));
|
} else if (data->IsSeqTwoByteString()) {
|
return new RelocatingCharacterStream(
|
isolate, static_cast<size_t>(start_pos),
|
Handle<SeqTwoByteString>::cast(data), start_offset,
|
static_cast<size_t>(end_pos));
|
} else {
|
UNREACHABLE();
|
}
|
}
|
|
std::unique_ptr<Utf16CharacterStream> ScannerStream::ForTesting(
|
const char* data) {
|
return ScannerStream::ForTesting(data, strlen(data));
|
}
|
|
std::unique_ptr<Utf16CharacterStream> ScannerStream::ForTesting(
|
const char* data, size_t length) {
|
return std::unique_ptr<Utf16CharacterStream>(
|
new BufferedCharacterStream<ExternalStringStream>(
|
static_cast<size_t>(0), reinterpret_cast<const uint8_t*>(data),
|
static_cast<size_t>(length)));
|
}
|
|
Utf16CharacterStream* ScannerStream::For(
|
ScriptCompiler::ExternalSourceStream* source_stream,
|
v8::ScriptCompiler::StreamedSource::Encoding encoding,
|
RuntimeCallStats* stats) {
|
switch (encoding) {
|
case v8::ScriptCompiler::StreamedSource::TWO_BYTE:
|
return new UnbufferedCharacterStream<ChunkedStream>(
|
static_cast<size_t>(0), source_stream, stats);
|
case v8::ScriptCompiler::StreamedSource::ONE_BYTE:
|
return new BufferedCharacterStream<ChunkedStream>(static_cast<size_t>(0),
|
source_stream, stats);
|
case v8::ScriptCompiler::StreamedSource::UTF8:
|
return new Utf8ExternalStreamingStream(source_stream, stats);
|
}
|
UNREACHABLE();
|
}
|
|
} // namespace internal
|
} // namespace v8
|