// Copyright 2017 The Chromium OS Authors. All rights reserved.
|
// Use of this source code is governed by a BSD-style license that can be
|
// found in the LICENSE file.
|
|
#include "puffin/src/puffin_stream.h"
|
|
#include <algorithm>
|
#include <memory>
|
#include <string>
|
#include <utility>
|
#include <vector>
|
|
#include "puffin/src/bit_reader.h"
|
#include "puffin/src/bit_writer.h"
|
#include "puffin/src/include/puffin/common.h"
|
#include "puffin/src/include/puffin/huffer.h"
|
#include "puffin/src/include/puffin/puffer.h"
|
#include "puffin/src/include/puffin/stream.h"
|
#include "puffin/src/logging.h"
|
#include "puffin/src/puff_reader.h"
|
#include "puffin/src/puff_writer.h"
|
|
using std::shared_ptr;
|
using std::unique_ptr;
|
using std::vector;
|
|
namespace puffin {
|
|
namespace {
|
|
bool CheckArgsIntegrity(uint64_t puff_size,
|
const vector<BitExtent>& deflates,
|
const vector<ByteExtent>& puffs) {
|
TEST_AND_RETURN_FALSE(puffs.size() == deflates.size());
|
// Check if the |puff_size| is actually greater than the last byte of the last
|
// puff in |puffs|.
|
if (!puffs.empty()) {
|
TEST_AND_RETURN_FALSE(puff_size >=
|
puffs.back().offset + puffs.back().length);
|
}
|
|
// Check to make sure |puffs| and |deflates| are sorted and non-overlapping.
|
auto is_overlap = [](const auto& a, const auto& b) {
|
return (a.offset + a.length) > b.offset;
|
};
|
TEST_AND_RETURN_FALSE(deflates.end() == std::adjacent_find(deflates.begin(),
|
deflates.end(),
|
is_overlap));
|
TEST_AND_RETURN_FALSE(puffs.end() == std::adjacent_find(puffs.begin(),
|
puffs.end(),
|
is_overlap));
|
return true;
|
}
|
|
} // namespace
|
|
UniqueStreamPtr PuffinStream::CreateForPuff(UniqueStreamPtr stream,
|
shared_ptr<Puffer> puffer,
|
uint64_t puff_size,
|
const vector<BitExtent>& deflates,
|
const vector<ByteExtent>& puffs,
|
size_t max_cache_size) {
|
TEST_AND_RETURN_VALUE(CheckArgsIntegrity(puff_size, deflates, puffs),
|
nullptr);
|
TEST_AND_RETURN_VALUE(stream->Seek(0), nullptr);
|
|
UniqueStreamPtr puffin_stream(new PuffinStream(std::move(stream), puffer,
|
nullptr, puff_size, deflates,
|
puffs, max_cache_size));
|
TEST_AND_RETURN_VALUE(puffin_stream->Seek(0), nullptr);
|
return puffin_stream;
|
}
|
|
UniqueStreamPtr PuffinStream::CreateForHuff(UniqueStreamPtr stream,
|
shared_ptr<Huffer> huffer,
|
uint64_t puff_size,
|
const vector<BitExtent>& deflates,
|
const vector<ByteExtent>& puffs) {
|
TEST_AND_RETURN_VALUE(CheckArgsIntegrity(puff_size, deflates, puffs),
|
nullptr);
|
TEST_AND_RETURN_VALUE(stream->Seek(0), nullptr);
|
|
UniqueStreamPtr puffin_stream(new PuffinStream(
|
std::move(stream), nullptr, huffer, puff_size, deflates, puffs, 0));
|
TEST_AND_RETURN_VALUE(puffin_stream->Seek(0), nullptr);
|
return puffin_stream;
|
}
|
|
PuffinStream::PuffinStream(UniqueStreamPtr stream,
|
shared_ptr<Puffer> puffer,
|
shared_ptr<Huffer> huffer,
|
uint64_t puff_size,
|
const vector<BitExtent>& deflates,
|
const vector<ByteExtent>& puffs,
|
size_t max_cache_size)
|
: stream_(std::move(stream)),
|
puffer_(puffer),
|
huffer_(huffer),
|
puff_stream_size_(puff_size),
|
deflates_(deflates),
|
puffs_(puffs),
|
puff_pos_(0),
|
skip_bytes_(0),
|
deflate_bit_pos_(0),
|
last_byte_(0),
|
extra_byte_(0),
|
is_for_puff_(puffer_ ? true : false),
|
closed_(false),
|
max_cache_size_(max_cache_size),
|
cur_cache_size_(0) {
|
// Building upper bounds for faster seek.
|
upper_bounds_.reserve(puffs.size());
|
for (const auto& puff : puffs) {
|
upper_bounds_.emplace_back(puff.offset + puff.length);
|
}
|
upper_bounds_.emplace_back(puff_stream_size_ + 1);
|
|
// We can pass the size of the deflate stream too, but it is not necessary
|
// yet. We cannot get the size of stream from itself, because we might be
|
// writing into it and its size is not defined yet.
|
uint64_t deflate_stream_size = puff_stream_size_;
|
if (!puffs.empty()) {
|
deflate_stream_size =
|
((deflates.back().offset + deflates.back().length) / 8) +
|
puff_stream_size_ - (puffs.back().offset + puffs.back().length);
|
}
|
|
deflates_.emplace_back(deflate_stream_size * 8, 0);
|
puffs_.emplace_back(puff_stream_size_, 0);
|
|
// Look for the largest puff and deflate extents and get proper size buffers.
|
uint64_t max_puff_length = 0;
|
for (const auto& puff : puffs) {
|
max_puff_length = std::max(max_puff_length, puff.length);
|
}
|
puff_buffer_.reset(new Buffer(max_puff_length + 1));
|
if (max_cache_size_ < max_puff_length) {
|
max_cache_size_ = 0; // It means we are not caching puffs.
|
}
|
|
uint64_t max_deflate_length = 0;
|
for (const auto& deflate : deflates) {
|
max_deflate_length = std::max(max_deflate_length, deflate.length * 8);
|
}
|
deflate_buffer_.reset(new Buffer(max_deflate_length + 2));
|
}
|
|
bool PuffinStream::GetSize(uint64_t* size) const {
|
*size = puff_stream_size_;
|
return true;
|
}
|
|
bool PuffinStream::GetOffset(uint64_t* offset) const {
|
*offset = puff_pos_ + skip_bytes_;
|
return true;
|
}
|
|
bool PuffinStream::Seek(uint64_t offset) {
|
TEST_AND_RETURN_FALSE(!closed_);
|
if (!is_for_puff_) {
|
// For huffing we should not seek, only seek to zero is accepted.
|
TEST_AND_RETURN_FALSE(offset == 0);
|
}
|
|
TEST_AND_RETURN_FALSE(offset <= puff_stream_size_);
|
|
// We are searching first available puff which either includes the |offset| or
|
// it is the next available puff after |offset|.
|
auto next_puff_iter =
|
std::upper_bound(upper_bounds_.begin(), upper_bounds_.end(), offset);
|
TEST_AND_RETURN_FALSE(next_puff_iter != upper_bounds_.end());
|
auto next_puff_idx = std::distance(upper_bounds_.begin(), next_puff_iter);
|
cur_puff_ = std::next(puffs_.begin(), next_puff_idx);
|
cur_deflate_ = std::next(deflates_.begin(), next_puff_idx);
|
|
if (offset < cur_puff_->offset) {
|
// between two puffs.
|
puff_pos_ = offset;
|
auto back_track_bytes = cur_puff_->offset - puff_pos_;
|
deflate_bit_pos_ = ((cur_deflate_->offset + 7) / 8 - back_track_bytes) * 8;
|
if (cur_puff_ != puffs_.begin()) {
|
auto prev_deflate = std::prev(cur_deflate_);
|
if (deflate_bit_pos_ < prev_deflate->offset + prev_deflate->length) {
|
deflate_bit_pos_ = prev_deflate->offset + prev_deflate->length;
|
}
|
}
|
} else {
|
// Inside a puff.
|
puff_pos_ = cur_puff_->offset;
|
deflate_bit_pos_ = cur_deflate_->offset;
|
}
|
skip_bytes_ = offset - puff_pos_;
|
if (!is_for_puff_ && offset == 0) {
|
TEST_AND_RETURN_FALSE(stream_->Seek(0));
|
TEST_AND_RETURN_FALSE(SetExtraByte());
|
}
|
return true;
|
}
|
|
bool PuffinStream::Close() {
|
closed_ = true;
|
return stream_->Close();
|
}
|
|
bool PuffinStream::Read(void* buffer, size_t count) {
|
TEST_AND_RETURN_FALSE(!closed_);
|
TEST_AND_RETURN_FALSE(is_for_puff_);
|
if (cur_puff_ == puffs_.end()) {
|
TEST_AND_RETURN_FALSE(count == 0);
|
}
|
auto bytes = static_cast<uint8_t*>(buffer);
|
uint64_t length = count;
|
uint64_t bytes_read = 0;
|
while (bytes_read < length) {
|
if (puff_pos_ < cur_puff_->offset) {
|
// Reading between two deflates. We also read bytes that have at least one
|
// bit of a deflate bit stream. The byte which has both deflate and raw
|
// data will be shifted or masked off the deflate bits and the remaining
|
// value will be saved in the puff stream as an byte integer.
|
uint64_t start_byte = (deflate_bit_pos_ / 8);
|
uint64_t end_byte = (cur_deflate_->offset + 7) / 8;
|
auto bytes_to_read = std::min(length - bytes_read, end_byte - start_byte);
|
TEST_AND_RETURN_FALSE(bytes_to_read >= 1);
|
|
TEST_AND_RETURN_FALSE(stream_->Seek(start_byte));
|
TEST_AND_RETURN_FALSE(stream_->Read(bytes + bytes_read, bytes_to_read));
|
|
// If true, we read the first byte of the curret deflate. So we have to
|
// mask out the deflate bits (which are most significant bits.)
|
if ((start_byte + bytes_to_read) * 8 > cur_deflate_->offset) {
|
bytes[bytes_read + bytes_to_read - 1] &=
|
(1 << (cur_deflate_->offset & 7)) - 1;
|
}
|
|
// If true, we read the last byte of the previous deflate and we have to
|
// shift it out. The least significat bits belongs to the deflate
|
// stream. The order of these last two conditions are important because a
|
// byte can contain a finishing deflate and a starting deflate with some
|
// bits between them so we have to modify correctly. Keep in mind that in
|
// this situation both are modifying the same byte.
|
if (start_byte * 8 < deflate_bit_pos_) {
|
bytes[bytes_read] >>= deflate_bit_pos_ & 7;
|
}
|
|
// Pass |deflate_bit_pos_| for all the read bytes.
|
deflate_bit_pos_ -= deflate_bit_pos_ & 7;
|
deflate_bit_pos_ += bytes_to_read * 8;
|
if (deflate_bit_pos_ > cur_deflate_->offset) {
|
// In case it reads into the first byte of the current deflate.
|
deflate_bit_pos_ = cur_deflate_->offset;
|
}
|
|
bytes_read += bytes_to_read;
|
puff_pos_ += bytes_to_read;
|
TEST_AND_RETURN_FALSE(puff_pos_ <= cur_puff_->offset);
|
} else {
|
// Reading the deflate itself. We read all bytes including the first and
|
// last byte (which may partially include a deflate bit). Here we keep the
|
// |puff_pos_| point to the first byte of the puffed stream and
|
// |skip_bytes_| shows how many bytes in the puff we have copied till now.
|
auto start_byte = (cur_deflate_->offset / 8);
|
auto end_byte = (cur_deflate_->offset + cur_deflate_->length + 7) / 8;
|
auto bytes_to_read = end_byte - start_byte;
|
// Puff directly to buffer if it has space.
|
bool puff_directly_into_buffer =
|
max_cache_size_ == 0 && (skip_bytes_ == 0) &&
|
(length - bytes_read >= cur_puff_->length);
|
|
auto cur_puff_idx = std::distance(puffs_.begin(), cur_puff_);
|
if (max_cache_size_ == 0 ||
|
!GetPuffCache(cur_puff_idx, cur_puff_->length, &puff_buffer_)) {
|
// Did not find the puff buffer in cache. We have to build it.
|
deflate_buffer_->resize(bytes_to_read);
|
TEST_AND_RETURN_FALSE(stream_->Seek(start_byte));
|
TEST_AND_RETURN_FALSE(
|
stream_->Read(deflate_buffer_->data(), bytes_to_read));
|
BufferBitReader bit_reader(deflate_buffer_->data(), bytes_to_read);
|
|
BufferPuffWriter puff_writer(puff_directly_into_buffer
|
? bytes + bytes_read
|
: puff_buffer_->data(),
|
cur_puff_->length);
|
|
// Drop the first unused bits.
|
size_t extra_bits_len = cur_deflate_->offset & 7;
|
TEST_AND_RETURN_FALSE(bit_reader.CacheBits(extra_bits_len));
|
bit_reader.DropBits(extra_bits_len);
|
|
TEST_AND_RETURN_FALSE(
|
puffer_->PuffDeflate(&bit_reader, &puff_writer, nullptr));
|
TEST_AND_RETURN_FALSE(bytes_to_read == bit_reader.Offset());
|
TEST_AND_RETURN_FALSE(cur_puff_->length == puff_writer.Size());
|
} else {
|
// Just seek to proper location.
|
TEST_AND_RETURN_FALSE(stream_->Seek(start_byte + bytes_to_read));
|
}
|
// Copy from puff buffer to output if needed.
|
auto bytes_to_copy =
|
std::min(length - bytes_read, cur_puff_->length - skip_bytes_);
|
if (!puff_directly_into_buffer) {
|
memcpy(bytes + bytes_read, puff_buffer_->data() + skip_bytes_,
|
bytes_to_copy);
|
}
|
|
skip_bytes_ += bytes_to_copy;
|
bytes_read += bytes_to_copy;
|
|
// Move to next puff.
|
if (puff_pos_ + skip_bytes_ == cur_puff_->offset + cur_puff_->length) {
|
puff_pos_ += skip_bytes_;
|
skip_bytes_ = 0;
|
deflate_bit_pos_ = cur_deflate_->offset + cur_deflate_->length;
|
cur_puff_++;
|
cur_deflate_++;
|
if (cur_puff_ == puffs_.end()) {
|
break;
|
}
|
}
|
}
|
}
|
|
TEST_AND_RETURN_FALSE(bytes_read == length);
|
return true;
|
}
|
|
bool PuffinStream::Write(const void* buffer, size_t count) {
|
TEST_AND_RETURN_FALSE(!closed_);
|
TEST_AND_RETURN_FALSE(!is_for_puff_);
|
auto bytes = static_cast<const uint8_t*>(buffer);
|
uint64_t length = count;
|
uint64_t bytes_wrote = 0;
|
while (bytes_wrote < length) {
|
if (deflate_bit_pos_ < (cur_deflate_->offset & ~7ull)) {
|
// Between two puffs or before the first puff. We know that we are
|
// starting from the byte boundary because we have already processed the
|
// non-deflate bits of the last byte of the last deflate. Here we don't
|
// process any byte that has deflate bit.
|
TEST_AND_RETURN_FALSE((deflate_bit_pos_ & 7) == 0);
|
auto copy_len =
|
std::min((cur_deflate_->offset / 8) - (deflate_bit_pos_ / 8),
|
length - bytes_wrote);
|
TEST_AND_RETURN_FALSE(stream_->Write(bytes + bytes_wrote, copy_len));
|
bytes_wrote += copy_len;
|
puff_pos_ += copy_len;
|
deflate_bit_pos_ += copy_len * 8;
|
} else {
|
// We are in a puff. We have to buffer incoming bytes until we reach the
|
// size of the current puff so we can huff :). If the last bit of the
|
// current deflate does not end in a byte boundary, then we have to read
|
// one more byte to fill up the last byte of the deflate stream before
|
// doing anything else.
|
|
// |deflate_bit_pos_| now should be in the same byte as
|
// |cur_deflate->offset|.
|
if (deflate_bit_pos_ < cur_deflate_->offset) {
|
last_byte_ |= bytes[bytes_wrote++] << (deflate_bit_pos_ & 7);
|
skip_bytes_ = 0;
|
deflate_bit_pos_ = cur_deflate_->offset;
|
puff_pos_++;
|
TEST_AND_RETURN_FALSE(puff_pos_ == cur_puff_->offset);
|
}
|
|
auto copy_len = std::min(length - bytes_wrote,
|
cur_puff_->length + extra_byte_ - skip_bytes_);
|
TEST_AND_RETURN_FALSE(puff_buffer_->size() >= skip_bytes_ + copy_len);
|
memcpy(puff_buffer_->data() + skip_bytes_, bytes + bytes_wrote, copy_len);
|
skip_bytes_ += copy_len;
|
bytes_wrote += copy_len;
|
|
if (skip_bytes_ == cur_puff_->length + extra_byte_) {
|
// |puff_buffer_| is full, now huff into the |deflate_buffer_|.
|
auto start_byte = cur_deflate_->offset / 8;
|
auto end_byte = (cur_deflate_->offset + cur_deflate_->length + 7) / 8;
|
auto bytes_to_write = end_byte - start_byte;
|
|
deflate_buffer_->resize(bytes_to_write);
|
BufferBitWriter bit_writer(deflate_buffer_->data(), bytes_to_write);
|
BufferPuffReader puff_reader(puff_buffer_->data(), cur_puff_->length);
|
|
// Write last byte if it has any.
|
TEST_AND_RETURN_FALSE(
|
bit_writer.WriteBits(cur_deflate_->offset & 7, last_byte_));
|
last_byte_ = 0;
|
|
TEST_AND_RETURN_FALSE(huffer_->HuffDeflate(&puff_reader, &bit_writer));
|
TEST_AND_RETURN_FALSE(bit_writer.Size() == bytes_to_write);
|
TEST_AND_RETURN_FALSE(puff_reader.BytesLeft() == 0);
|
|
deflate_bit_pos_ = cur_deflate_->offset + cur_deflate_->length;
|
if (extra_byte_ == 1) {
|
deflate_buffer_->data()[bytes_to_write - 1] |=
|
puff_buffer_->data()[cur_puff_->length] << (deflate_bit_pos_ & 7);
|
deflate_bit_pos_ = (deflate_bit_pos_ + 7) & ~7ull;
|
} else if ((deflate_bit_pos_ & 7) != 0) {
|
// This happens if current and next deflate finish and end on the same
|
// byte, then we cannot write into output until we have huffed the
|
// next puff buffer, so untill then we cache it into |last_byte_| and
|
// we won't write it out.
|
last_byte_ = deflate_buffer_->data()[bytes_to_write - 1];
|
bytes_to_write--;
|
}
|
|
// Write |deflate_buffer_| into output.
|
TEST_AND_RETURN_FALSE(
|
stream_->Write(deflate_buffer_->data(), bytes_to_write));
|
|
// Move to the next deflate/puff.
|
puff_pos_ += skip_bytes_;
|
skip_bytes_ = 0;
|
cur_puff_++;
|
cur_deflate_++;
|
if (cur_puff_ == puffs_.end()) {
|
break;
|
}
|
// Find if need an extra byte to cache at the end.
|
TEST_AND_RETURN_FALSE(SetExtraByte());
|
}
|
}
|
}
|
|
TEST_AND_RETURN_FALSE(bytes_wrote == length);
|
return true;
|
}
|
|
bool PuffinStream::SetExtraByte() {
|
TEST_AND_RETURN_FALSE(cur_deflate_ != deflates_.end());
|
if ((cur_deflate_ + 1) == deflates_.end()) {
|
extra_byte_ = 0;
|
return true;
|
}
|
uint64_t end_bit = cur_deflate_->offset + cur_deflate_->length;
|
if ((end_bit & 7) && ((end_bit + 7) & ~7ull) <= (cur_deflate_ + 1)->offset) {
|
extra_byte_ = 1;
|
} else {
|
extra_byte_ = 0;
|
}
|
return true;
|
}
|
|
bool PuffinStream::GetPuffCache(int puff_id,
|
uint64_t puff_size,
|
shared_ptr<Buffer>* buffer) {
|
bool found = false;
|
// Search for it.
|
std::pair<int, shared_ptr<Buffer>> cache;
|
// TODO(*): Find a faster way of doing this? Maybe change the data structure
|
// that supports faster search.
|
for (auto iter = caches_.begin(); iter != caches_.end(); ++iter) {
|
if (iter->first == puff_id) {
|
cache = std::move(*iter);
|
found = true;
|
// Remove it so later we can add it to the begining of the list.
|
caches_.erase(iter);
|
break;
|
}
|
}
|
|
// If not found, either create one or get one from the list.
|
if (!found) {
|
// If |caches_| were full, remove last ones in the list (least used), until
|
// we have enough space for the new cache.
|
while (!caches_.empty() && cur_cache_size_ + puff_size > max_cache_size_) {
|
cache = std::move(caches_.back());
|
caches_.pop_back(); // Remove it from the list.
|
cur_cache_size_ -= cache.second->capacity();
|
}
|
// If we have not populated the cache yet, create one.
|
if (!cache.second) {
|
cache.second.reset(new Buffer(puff_size));
|
}
|
cache.second->resize(puff_size);
|
|
constexpr uint64_t kMaxSizeDifference = 20 * 1024;
|
if (puff_size + kMaxSizeDifference < cache.second->capacity()) {
|
cache.second->shrink_to_fit();
|
}
|
|
cur_cache_size_ += cache.second->capacity();
|
cache.first = puff_id;
|
}
|
|
*buffer = cache.second;
|
// By now we have either removed a cache or created new one. Now we have to
|
// insert it in the front of the list so it becomes the most recently used
|
// one.
|
caches_.push_front(std::move(cache));
|
return found;
|
}
|
|
} // namespace puffin
|