// Copyright 2017 the V8 project authors. All rights reserved.
|
// Use of this source code is governed by a BSD-style license that can be
|
// found in the LICENSE file.
|
|
#ifndef V8_COMPILER_PERSISTENT_MAP_H_
|
#define V8_COMPILER_PERSISTENT_MAP_H_
|
|
#include <array>
|
#include <tuple>
|
|
#include "src/base/functional.h"
|
#include "src/zone/zone-containers.h"
|
|
namespace v8 {
|
namespace internal {
|
namespace compiler {
|
|
// PersistentMap is a persistent map datastructure based on hash trees (a binary
|
// tree using the bits of a hash value as addresses). The map is a conceptually
|
// infinite: All keys are initially mapped to a default value, values are
|
// deleted by overwriting them with the default value. The iterators produce
|
// exactly the keys that are not the default value. The hash values should have
|
// high variance in their high bits, so dense integers are a bad choice.
|
// Complexity:
|
// - Copy and assignment: O(1)
|
// - access: O(log n)
|
// - update: O(log n) time and space
|
// - iteration: amortized O(1) per step
|
// - Zip: O(n)
|
// - equality check: O(n)
|
// TODO(tebbi): Cache map transitions to avoid re-allocation of the same map.
|
// TODO(tebbi): Implement an O(1) equality check based on hash consing or
|
// something similar.
|
template <class Key, class Value, class Hasher = base::hash<Key>>
|
class PersistentMap {
|
public:
|
using key_type = Key;
|
using mapped_type = Value;
|
using value_type = std::pair<Key, Value>;
|
|
private:
|
static constexpr size_t kHashBits = 32;
|
enum Bit : int { kLeft = 0, kRight = 1 };
|
|
// Access hash bits starting from the high bits and compare them according to
|
// their unsigned value. This way, the order in the hash tree is compatible
|
// with numeric hash comparisons.
|
class HashValue;
|
|
struct KeyValue : std::pair<Key, Value> {
|
const Key& key() const { return this->first; }
|
const Value& value() const { return this->second; }
|
using std::pair<Key, Value>::pair;
|
};
|
|
struct FocusedTree;
|
|
public:
|
// Depth of the last added element. This is a cheap estimate for the size of
|
// the hash tree.
|
size_t last_depth() const {
|
if (tree_) {
|
return tree_->length;
|
} else {
|
return 0;
|
}
|
}
|
|
const Value& Get(const Key& key) const {
|
HashValue key_hash = HashValue(Hasher()(key));
|
const FocusedTree* tree = FindHash(key_hash);
|
return GetFocusedValue(tree, key);
|
}
|
|
// Add or overwrite an existing key-value pair.
|
void Set(Key key, Value value);
|
|
bool operator==(const PersistentMap& other) const {
|
if (tree_ == other.tree_) return true;
|
if (def_value_ != other.def_value_) return false;
|
for (const std::tuple<Key, Value, Value>& triple : Zip(other)) {
|
if (std::get<1>(triple) != std::get<2>(triple)) return false;
|
}
|
return true;
|
}
|
|
bool operator!=(const PersistentMap& other) const {
|
return !(*this == other);
|
}
|
|
// The iterator produces key-value pairs in the lexicographical order of
|
// hash value and key. It produces exactly the key-value pairs where the value
|
// is not the default value.
|
class iterator;
|
|
iterator begin() const {
|
if (!tree_) return end();
|
return iterator::begin(tree_, def_value_);
|
}
|
iterator end() const { return iterator::end(def_value_); }
|
|
// Iterator to traverse two maps in lockstep, producing matching value pairs
|
// for each key where at least one value is different from the respective
|
// default.
|
class double_iterator;
|
|
// An iterable to iterate over the two maps in lockstep.
|
struct ZipIterable {
|
PersistentMap a;
|
PersistentMap b;
|
double_iterator begin() { return double_iterator(a.begin(), b.begin()); }
|
double_iterator end() { return double_iterator(a.end(), b.end()); }
|
};
|
|
ZipIterable Zip(const PersistentMap& other) const { return {*this, other}; }
|
|
explicit PersistentMap(Zone* zone, Value def_value = Value())
|
: PersistentMap(nullptr, zone, def_value) {}
|
|
private:
|
// Find the {FocusedTree} that contains a key-value pair with key hash {hash}.
|
const FocusedTree* FindHash(HashValue hash) const;
|
|
// Find the {FocusedTree} that contains a key-value pair with key hash {hash}.
|
// Output the path to this {FocusedTree} and its length. If no such
|
// {FocusedTree} exists, return {nullptr} and output the path to the last node
|
// with a matching hash prefix. Note that {length} is the length of the found
|
// path and may be less than the length of the found {FocusedTree}.
|
const FocusedTree* FindHash(HashValue hash,
|
std::array<const FocusedTree*, kHashBits>* path,
|
int* length) const;
|
|
// Load value from the leaf node on the focused path of {tree}.
|
const Value& GetFocusedValue(const FocusedTree* tree, const Key& key) const;
|
|
// Return the {FocusedTree} representing the left (bit==kLeft) or right
|
// (bit==kRight) child of the node on the path of {tree} at tree level
|
// {level}.
|
static const FocusedTree* GetChild(const FocusedTree* tree, int level,
|
Bit bit);
|
|
// Find the leftmost path in the tree, starting at the node at tree level
|
// {level} on the path of {start}. Output the level of the leaf to {level} and
|
// the path to {path}.
|
static const FocusedTree* FindLeftmost(
|
const FocusedTree* start, int* level,
|
std::array<const FocusedTree*, kHashBits>* path);
|
|
PersistentMap(const FocusedTree* tree, Zone* zone, Value def_value)
|
: tree_(tree), def_value_(def_value), zone_(zone) {}
|
|
const FocusedTree* tree_;
|
Value def_value_;
|
Zone* zone_;
|
};
|
|
// This structure represents a hash tree with one focused path to a specific
|
// leaf. For the focused leaf, it stores key, value and key hash. The path is
|
// defined by the hash bits of the focused leaf. In a traditional tree
|
// datastructure, the nodes of a path form a linked list with the values being
|
// the pointers outside of this path. Instead of storing all of these nodes,
|
// we store an array of the pointers pointing outside of the path. This is
|
// similar to the stack used when doing DFS traversal of a tree. The hash of
|
// the leaf is used to know if the pointers point to the left or the
|
// right of the path. As there is no explicit representation of a tree node,
|
// this structure also represents all the nodes on its path. The intended node
|
// depends on the tree depth, which is always clear from the referencing
|
// context. So the pointer to a {FocusedTree} stored in the
|
// {PersistentMap.tree_} always references the root, while a pointer from a
|
// focused node of another {FocusedTree} always references to one tree level
|
// lower than before.
|
template <class Key, class Value, class Hasher>
|
struct PersistentMap<Key, Value, Hasher>::FocusedTree {
|
KeyValue key_value;
|
// The depth of the focused path, that is, the number of pointers stored in
|
// this structure.
|
int8_t length;
|
HashValue key_hash;
|
// Out-of-line storage for hash collisions.
|
const ZoneMap<Key, Value>* more;
|
using more_iterator = typename ZoneMap<Key, Value>::const_iterator;
|
// {path_array} has to be the last member: To store an array inline, we
|
// over-allocate memory for this structure and access memory beyond
|
// {path_array}. This corresponds to a flexible array member as defined in
|
// C99.
|
const FocusedTree* path_array[1];
|
const FocusedTree*& path(int i) {
|
DCHECK(i < length);
|
return reinterpret_cast<const FocusedTree**>(
|
reinterpret_cast<byte*>(this) + offsetof(FocusedTree, path_array))[i];
|
}
|
const FocusedTree* path(int i) const {
|
DCHECK(i < length);
|
return reinterpret_cast<const FocusedTree* const*>(
|
reinterpret_cast<const byte*>(this) +
|
offsetof(FocusedTree, path_array))[i];
|
}
|
};
|
|
template <class Key, class Value, class Hasher>
|
class PersistentMap<Key, Value, Hasher>::HashValue {
|
public:
|
explicit HashValue(size_t hash) : bits_(static_cast<uint32_t>(hash)) {}
|
|
Bit operator[](int pos) const {
|
DCHECK_LT(pos, kHashBits);
|
return bits_ & (static_cast<decltype(bits_)>(1) << (kHashBits - pos - 1))
|
? kRight
|
: kLeft;
|
}
|
|
bool operator<(HashValue other) const { return bits_ < other.bits_; }
|
bool operator==(HashValue other) const { return bits_ == other.bits_; }
|
bool operator!=(HashValue other) const { return bits_ != other.bits_; }
|
HashValue operator^(HashValue other) const {
|
return HashValue(bits_ ^ other.bits_);
|
}
|
|
private:
|
static_assert(sizeof(uint32_t) * 8 == kHashBits, "wrong type for bits_");
|
uint32_t bits_;
|
};
|
|
template <class Key, class Value, class Hasher>
|
class PersistentMap<Key, Value, Hasher>::iterator {
|
public:
|
const value_type operator*() const {
|
if (current_->more) {
|
return *more_iter_;
|
} else {
|
return current_->key_value;
|
}
|
}
|
|
iterator& operator++() {
|
do {
|
if (!current_) {
|
// Iterator is past the end.
|
return *this;
|
}
|
if (current_->more) {
|
DCHECK(more_iter_ != current_->more->end());
|
++more_iter_;
|
if (more_iter_ != current_->more->end()) return *this;
|
}
|
if (level_ == 0) {
|
*this = end(def_value_);
|
return *this;
|
}
|
--level_;
|
while (current_->key_hash[level_] == kRight || path_[level_] == nullptr) {
|
if (level_ == 0) {
|
*this = end(def_value_);
|
return *this;
|
}
|
--level_;
|
}
|
const FocusedTree* first_right_alternative = path_[level_];
|
level_++;
|
current_ = FindLeftmost(first_right_alternative, &level_, &path_);
|
if (current_->more) {
|
more_iter_ = current_->more->begin();
|
}
|
} while (!((**this).second != def_value()));
|
return *this;
|
}
|
|
bool operator==(const iterator& other) const {
|
if (is_end()) return other.is_end();
|
if (other.is_end()) return false;
|
if (current_->key_hash != other.current_->key_hash) {
|
return false;
|
} else {
|
return (**this).first == (*other).first;
|
}
|
}
|
bool operator!=(const iterator& other) const { return !(*this == other); }
|
|
bool operator<(const iterator& other) const {
|
if (is_end()) return false;
|
if (other.is_end()) return true;
|
if (current_->key_hash == other.current_->key_hash) {
|
return (**this).first < (*other).first;
|
} else {
|
return current_->key_hash < other.current_->key_hash;
|
}
|
}
|
|
bool is_end() const { return current_ == nullptr; }
|
|
const Value& def_value() { return def_value_; }
|
|
static iterator begin(const FocusedTree* tree, Value def_value) {
|
iterator i(def_value);
|
i.current_ = FindLeftmost(tree, &i.level_, &i.path_);
|
if (i.current_->more) {
|
i.more_iter_ = i.current_->more->begin();
|
}
|
// Skip entries with default value. PersistentMap iterators must never point
|
// to a default value.
|
while (!i.is_end() && !((*i).second != def_value)) ++i;
|
return i;
|
}
|
|
static iterator end(Value def_value) { return iterator(def_value); }
|
|
private:
|
int level_;
|
typename FocusedTree::more_iterator more_iter_;
|
const FocusedTree* current_;
|
std::array<const FocusedTree*, kHashBits> path_;
|
Value def_value_;
|
|
explicit iterator(Value def_value)
|
: level_(0), current_(nullptr), def_value_(def_value) {}
|
};
|
|
template <class Key, class Value, class Hasher>
|
class PersistentMap<Key, Value, Hasher>::double_iterator {
|
public:
|
std::tuple<Key, Value, Value> operator*() {
|
if (first_current_) {
|
auto pair = *first_;
|
return std::make_tuple(
|
pair.first, pair.second,
|
second_current_ ? (*second_).second : second_.def_value());
|
} else {
|
DCHECK(second_current_);
|
auto pair = *second_;
|
return std::make_tuple(pair.first, first_.def_value(), pair.second);
|
}
|
}
|
|
double_iterator& operator++() {
|
#ifdef DEBUG
|
iterator old_first = first_;
|
iterator old_second = second_;
|
#endif
|
if (first_current_) {
|
++first_;
|
DCHECK(old_first < first_);
|
}
|
if (second_current_) {
|
++second_;
|
DCHECK(old_second < second_);
|
}
|
return *this = double_iterator(first_, second_);
|
}
|
|
double_iterator(iterator first, iterator second)
|
: first_(first), second_(second) {
|
if (first_ == second_) {
|
first_current_ = second_current_ = true;
|
} else if (first_ < second_) {
|
first_current_ = true;
|
second_current_ = false;
|
} else {
|
DCHECK(second_ < first_);
|
first_current_ = false;
|
second_current_ = true;
|
}
|
}
|
|
bool operator!=(const double_iterator& other) {
|
return first_ != other.first_ || second_ != other.second_;
|
}
|
|
bool is_end() const { return first_.is_end() && second_.is_end(); }
|
|
private:
|
iterator first_;
|
iterator second_;
|
bool first_current_;
|
bool second_current_;
|
};
|
|
template <class Key, class Value, class Hasher>
|
void PersistentMap<Key, Value, Hasher>::Set(Key key, Value value) {
|
HashValue key_hash = HashValue(Hasher()(key));
|
std::array<const FocusedTree*, kHashBits> path;
|
int length = 0;
|
const FocusedTree* old = FindHash(key_hash, &path, &length);
|
ZoneMap<Key, Value>* more = nullptr;
|
if (!(GetFocusedValue(old, key) != value)) return;
|
if (old && !(old->more == nullptr && old->key_value.key() == key)) {
|
more = new (zone_->New(sizeof(*more))) ZoneMap<Key, Value>(zone_);
|
if (old->more) {
|
*more = *old->more;
|
} else {
|
(*more)[old->key_value.key()] = old->key_value.value();
|
}
|
(*more)[key] = value;
|
}
|
FocusedTree* tree =
|
new (zone_->New(sizeof(FocusedTree) +
|
std::max(0, length - 1) * sizeof(const FocusedTree*)))
|
FocusedTree{KeyValue(std::move(key), std::move(value)),
|
static_cast<int8_t>(length),
|
key_hash,
|
more,
|
{}};
|
for (int i = 0; i < length; ++i) {
|
tree->path(i) = path[i];
|
}
|
*this = PersistentMap(tree, zone_, def_value_);
|
}
|
|
template <class Key, class Value, class Hasher>
|
const typename PersistentMap<Key, Value, Hasher>::FocusedTree*
|
PersistentMap<Key, Value, Hasher>::FindHash(HashValue hash) const {
|
const FocusedTree* tree = tree_;
|
int level = 0;
|
while (tree && hash != tree->key_hash) {
|
while ((hash ^ tree->key_hash)[level] == 0) {
|
++level;
|
}
|
tree = level < tree->length ? tree->path(level) : nullptr;
|
++level;
|
}
|
return tree;
|
}
|
|
template <class Key, class Value, class Hasher>
|
const typename PersistentMap<Key, Value, Hasher>::FocusedTree*
|
PersistentMap<Key, Value, Hasher>::FindHash(
|
HashValue hash, std::array<const FocusedTree*, kHashBits>* path,
|
int* length) const {
|
const FocusedTree* tree = tree_;
|
int level = 0;
|
while (tree && hash != tree->key_hash) {
|
int map_length = tree->length;
|
while ((hash ^ tree->key_hash)[level] == 0) {
|
(*path)[level] = level < map_length ? tree->path(level) : nullptr;
|
++level;
|
}
|
(*path)[level] = tree;
|
tree = level < tree->length ? tree->path(level) : nullptr;
|
++level;
|
}
|
if (tree) {
|
while (level < tree->length) {
|
(*path)[level] = tree->path(level);
|
++level;
|
}
|
}
|
*length = level;
|
return tree;
|
}
|
|
template <class Key, class Value, class Hasher>
|
const Value& PersistentMap<Key, Value, Hasher>::GetFocusedValue(
|
const FocusedTree* tree, const Key& key) const {
|
if (!tree) {
|
return def_value_;
|
}
|
if (tree->more) {
|
auto it = tree->more->find(key);
|
if (it == tree->more->end())
|
return def_value_;
|
else
|
return it->second;
|
} else {
|
if (key == tree->key_value.key()) {
|
return tree->key_value.value();
|
} else {
|
return def_value_;
|
}
|
}
|
}
|
|
template <class Key, class Value, class Hasher>
|
const typename PersistentMap<Key, Value, Hasher>::FocusedTree*
|
PersistentMap<Key, Value, Hasher>::GetChild(const FocusedTree* tree, int level,
|
Bit bit) {
|
if (tree->key_hash[level] == bit) {
|
return tree;
|
} else if (level < tree->length) {
|
return tree->path(level);
|
} else {
|
return nullptr;
|
}
|
}
|
|
template <class Key, class Value, class Hasher>
|
const typename PersistentMap<Key, Value, Hasher>::FocusedTree*
|
PersistentMap<Key, Value, Hasher>::FindLeftmost(
|
const FocusedTree* start, int* level,
|
std::array<const FocusedTree*, kHashBits>* path) {
|
const FocusedTree* current = start;
|
while (*level < current->length) {
|
if (const FocusedTree* child = GetChild(current, *level, kLeft)) {
|
(*path)[*level] = GetChild(current, *level, kRight);
|
current = child;
|
++*level;
|
} else if (const FocusedTree* child = GetChild(current, *level, kRight)) {
|
(*path)[*level] = GetChild(current, *level, kLeft);
|
current = child;
|
++*level;
|
} else {
|
UNREACHABLE();
|
}
|
}
|
return current;
|
}
|
|
template <class Key, class Value, class Hasher>
|
std::ostream& operator<<(std::ostream& os,
|
const PersistentMap<Key, Value, Hasher>& map) {
|
os << "{";
|
bool first = true;
|
for (auto pair : map) {
|
if (!first) os << ", ";
|
first = false;
|
os << pair.first << ": " << pair.second;
|
}
|
return os << "}";
|
}
|
|
} // namespace compiler
|
} // namespace internal
|
} // namespace v8
|
|
#endif // V8_COMPILER_PERSISTENT_MAP_H_
|