// Copyright 2017 The Chromium OS Authors. All rights reserved.
|
// Use of this source code is governed by a BSD-style license that can be
|
// found in the LICENSE file.
|
|
#include "puffin/src/huffman_table.h"
|
|
#include <algorithm>
|
#include <vector>
|
|
#include "puffin/src/logging.h"
|
|
using std::string;
|
using std::vector;
|
|
namespace puffin {
|
|
// Permutations of input Huffman code lengths (used only to read code lengths
|
// necessary for reading Huffman table.)
|
const uint8_t kPermutations[19] = {16, 17, 18, 0, 8, 7, 9, 6, 10, 5,
|
11, 4, 12, 3, 13, 2, 14, 1, 15};
|
|
// The bases of each alphabet which is added to the integer value of extra
|
// bits that comes after the Huffman code in the input to create the given
|
// length value. The last element is a guard.
|
const uint16_t kLengthBases[30] = {
|
3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27,
|
31, 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0xFFFF};
|
|
// Number of extra bits that comes after the associating Huffman code.
|
const uint8_t kLengthExtraBits[29] = {0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
|
1, 1, 2, 2, 2, 2, 3, 3, 3, 3,
|
4, 4, 4, 4, 5, 5, 5, 5, 0};
|
|
// Same as |kLengthBases| but for the distances instead of lengths. The last
|
// element is a guard.
|
const uint16_t kDistanceBases[31] = {
|
1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33,
|
49, 65, 97, 129, 193, 257, 385, 513, 769, 1025, 1537,
|
2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577, 0xFFFF};
|
|
// Same as |kLengthExtraBits| but for distances instead of lengths.
|
const uint8_t kDistanceExtraBits[30] = {0, 0, 0, 0, 1, 1, 2, 2, 3, 3,
|
4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
|
9, 9, 10, 10, 11, 11, 12, 12, 13, 13};
|
|
// 288 is the maximum number of needed huffman codes for an alphabet. Fixed
|
// huffman table needs 288 and dynamic huffman table needs maximum 286.
|
// 286 = 256 (coding a byte) +
|
// 1 (coding the end of block symbole) +
|
// 29 (coding the lengths)
|
HuffmanTable::HuffmanTable() : codeindexpairs_(288), initialized_(false) {}
|
|
bool HuffmanTable::InitHuffmanCodes(const Buffer& lens, size_t* max_bits) {
|
// Temporary buffers used in |InitHuffmanCodes|.
|
uint16_t len_count_[kMaxHuffmanBits + 1] = {0};
|
uint16_t next_code_[kMaxHuffmanBits + 1] = {0};
|
|
// 1. Count the number of codes for each length;
|
for (auto len : lens) {
|
len_count_[len]++;
|
}
|
|
for (*max_bits = kMaxHuffmanBits; *max_bits >= 1; (*max_bits)--) {
|
if (len_count_[*max_bits] != 0) {
|
break;
|
}
|
}
|
|
// Check for oversubscribed code lengths. (A code with length 'L' cannot have
|
// more than 2^L items.
|
for (size_t idx = 1; idx <= *max_bits; idx++) {
|
if (len_count_[idx] > (1 << idx)) {
|
LOG(ERROR) << "Oversubscribed code lengths error!";
|
return false;
|
}
|
}
|
|
// 2. Compute the coding of the first element for each length.
|
uint16_t code = 0;
|
len_count_[0] = 0;
|
for (size_t bits = 1; bits <= kMaxHuffmanBits; bits++) {
|
code = (code + len_count_[bits - 1]) << 1;
|
next_code_[bits] = code;
|
}
|
|
codeindexpairs_.clear();
|
// 3. Calculate all the code values.
|
for (size_t idx = 0; idx < lens.size(); idx++) {
|
auto len = lens[idx];
|
if (len == 0) {
|
continue;
|
}
|
|
// Reverse the code
|
CodeIndexPair cip;
|
cip.code = 0;
|
auto tmp_code = next_code_[len];
|
for (size_t r = 0; r < len; r++) {
|
cip.code <<= 1;
|
cip.code |= tmp_code & 1U;
|
tmp_code >>= 1;
|
}
|
cip.index = idx;
|
codeindexpairs_.push_back(cip);
|
next_code_[len]++;
|
}
|
return true;
|
}
|
|
bool HuffmanTable::BuildHuffmanCodes(const Buffer& lens,
|
vector<uint16_t>* hcodes,
|
size_t* max_bits) {
|
TEST_AND_RETURN_FALSE(InitHuffmanCodes(lens, max_bits));
|
// Sort descending based on the bit-length of the code.
|
std::sort(codeindexpairs_.begin(), codeindexpairs_.end(),
|
[&lens](const CodeIndexPair& a, const CodeIndexPair& b) {
|
return lens[a.index] > lens[b.index];
|
});
|
|
// Only zero out the part of hcodes which is valuable.
|
memset(hcodes->data(), 0, (1 << *max_bits) * sizeof(uint16_t));
|
for (const auto& cip : codeindexpairs_) {
|
// The MSB bit of the code in hcodes is set if it is a valid code and its
|
// code exists in the input Huffman table.
|
(*hcodes)[cip.code] = cip.index | 0x8000;
|
auto fill_bits = *max_bits - lens[cip.index];
|
for (auto idx = 1; idx < (1 << fill_bits); idx++) {
|
auto location = (idx << lens[cip.index]) | cip.code;
|
if (!((*hcodes)[location] & 0x8000)) {
|
(*hcodes)[location] = cip.index | 0x8000;
|
}
|
}
|
}
|
return true;
|
}
|
|
bool HuffmanTable::BuildHuffmanReverseCodes(const Buffer& lens,
|
vector<uint16_t>* rcodes,
|
size_t* max_bits) {
|
TEST_AND_RETURN_FALSE(InitHuffmanCodes(lens, max_bits));
|
// Sort ascending based on the index.
|
std::sort(codeindexpairs_.begin(), codeindexpairs_.end(),
|
[](const CodeIndexPair& a, const CodeIndexPair& b) -> bool {
|
return a.index < b.index;
|
});
|
|
size_t index = 0;
|
for (size_t idx = 0; idx < rcodes->size(); idx++) {
|
if (index < codeindexpairs_.size() && idx == codeindexpairs_[index].index) {
|
(*rcodes)[idx] = codeindexpairs_[index].code;
|
index++;
|
} else {
|
(*rcodes)[idx] = 0;
|
}
|
}
|
return true;
|
}
|
|
bool HuffmanTable::BuildFixedHuffmanTable() {
|
if (!initialized_) {
|
// For all the vectors used in this class, we set the size in the
|
// constructor and we do not change the size later. This is for optimization
|
// purposes. The total size of data in this class is approximately
|
// 2KB. Because it is a constructor return values cannot be checked.
|
lit_len_lens_.resize(288);
|
lit_len_rcodes_.resize(288);
|
lit_len_hcodes_.resize(1 << 9);
|
|
distance_lens_.resize(30);
|
distance_rcodes_.resize(30);
|
distance_hcodes_.resize(1 << 5);
|
|
size_t i = 0;
|
while (i < 144) {
|
lit_len_lens_[i++] = 8;
|
}
|
while (i < 256) {
|
lit_len_lens_[i++] = 9;
|
}
|
while (i < 280) {
|
lit_len_lens_[i++] = 7;
|
}
|
while (i < 288) {
|
lit_len_lens_[i++] = 8;
|
}
|
|
i = 0;
|
while (i < 30) {
|
distance_lens_[i++] = 5;
|
}
|
|
TEST_AND_RETURN_FALSE(
|
BuildHuffmanCodes(lit_len_lens_, &lit_len_hcodes_, &lit_len_max_bits_));
|
|
TEST_AND_RETURN_FALSE(BuildHuffmanCodes(distance_lens_, &distance_hcodes_,
|
&distance_max_bits_));
|
|
TEST_AND_RETURN_FALSE(BuildHuffmanReverseCodes(
|
lit_len_lens_, &lit_len_rcodes_, &lit_len_max_bits_));
|
|
TEST_AND_RETURN_FALSE(BuildHuffmanReverseCodes(
|
distance_lens_, &distance_rcodes_, &distance_max_bits_));
|
|
initialized_ = true;
|
}
|
return true;
|
}
|
|
bool HuffmanTable::BuildDynamicHuffmanTable(BitReaderInterface* br,
|
uint8_t* buffer,
|
size_t* length) {
|
// Initilize only once and reuse.
|
if (!initialized_) {
|
// Only resizing the arrays needed.
|
code_lens_.resize(19);
|
code_hcodes_.resize(1 << 7);
|
|
lit_len_lens_.resize(286);
|
lit_len_hcodes_.resize(1 << 15);
|
|
distance_lens_.resize(30);
|
distance_hcodes_.resize(1 << 15);
|
|
// 286: Maximum number of literal/lengths symbols.
|
// 30: Maximum number of distance symbols.
|
// The reason we reserve this to the sum of both maximum sizes is that we
|
// need to calculate both huffman codes contiguously. See b/72815313.
|
tmp_lens_.resize(286 + 30);
|
initialized_ = true;
|
}
|
|
// Read the header. Reads the first portion of the Huffman data from input and
|
// writes it into the puff |buffer|. The first portion includes the size
|
// (|num_lit_len|) of the literals/lengths Huffman code length array
|
// (|dynamic_lit_len_lens_|), the size (|num_distance|) of distance Huffman
|
// code length array (|dynamic_distance_lens_|), and the size (|num_codes|) of
|
// Huffman code length array (dynamic_code_lens_) for reading
|
// |dynamic_lit_len_lens_| and |dynamic_distance_lens_|. Then it follows by
|
// reading |dynamic_code_lens_|.
|
|
TEST_AND_RETURN_FALSE(*length >= 3);
|
size_t index = 0;
|
TEST_AND_RETURN_FALSE(br->CacheBits(14));
|
buffer[index++] = br->ReadBits(5); // HLIST
|
auto num_lit_len = br->ReadBits(5) + 257;
|
br->DropBits(5);
|
|
buffer[index++] = br->ReadBits(5); // HDIST
|
auto num_distance = br->ReadBits(5) + 1;
|
br->DropBits(5);
|
|
buffer[index++] = br->ReadBits(4); // HCLEN
|
auto num_codes = br->ReadBits(4) + 4;
|
br->DropBits(4);
|
|
TEST_AND_RETURN_FALSE(
|
CheckHuffmanArrayLengths(num_lit_len, num_distance, num_codes));
|
|
bool checked = false;
|
size_t idx = 0;
|
TEST_AND_RETURN_FALSE(*length - index >= (num_codes + 1) / 2);
|
// Two codes per byte
|
for (; idx < num_codes; idx++) {
|
TEST_AND_RETURN_FALSE(br->CacheBits(3));
|
code_lens_[kPermutations[idx]] = br->ReadBits(3);
|
if (checked) {
|
buffer[index++] |= br->ReadBits(3);
|
} else {
|
buffer[index] = br->ReadBits(3) << 4;
|
}
|
checked = !checked;
|
br->DropBits(3);
|
}
|
// Pad the last byte if odd number of codes.
|
if (checked) {
|
index++;
|
}
|
for (; idx < 19; idx++) {
|
code_lens_[kPermutations[idx]] = 0;
|
}
|
|
TEST_AND_RETURN_FALSE(
|
BuildHuffmanCodes(code_lens_, &code_hcodes_, &code_max_bits_));
|
|
// Build literals/lengths and distance Huffman code length arrays.
|
auto bytes_available = (*length - index);
|
tmp_lens_.clear();
|
TEST_AND_RETURN_FALSE(BuildHuffmanCodeLengths(
|
br, buffer + index, &bytes_available, code_max_bits_,
|
num_lit_len + num_distance, &tmp_lens_));
|
index += bytes_available;
|
|
// TODO(ahassani): Optimize this so the memcpy is not needed anymore.
|
lit_len_lens_.clear();
|
lit_len_lens_.insert(lit_len_lens_.begin(), tmp_lens_.begin(),
|
tmp_lens_.begin() + num_lit_len);
|
|
distance_lens_.clear();
|
distance_lens_.insert(distance_lens_.begin(), tmp_lens_.begin() + num_lit_len,
|
tmp_lens_.end());
|
|
TEST_AND_RETURN_FALSE(
|
BuildHuffmanCodes(lit_len_lens_, &lit_len_hcodes_, &lit_len_max_bits_));
|
|
// Build distance Huffman codes.
|
TEST_AND_RETURN_FALSE(BuildHuffmanCodes(distance_lens_, &distance_hcodes_,
|
&distance_max_bits_));
|
|
*length = index;
|
return true;
|
}
|
|
bool HuffmanTable::BuildHuffmanCodeLengths(BitReaderInterface* br,
|
uint8_t* buffer,
|
size_t* length,
|
size_t max_bits,
|
size_t num_codes,
|
Buffer* lens) {
|
size_t index = 0;
|
lens->clear();
|
for (size_t idx = 0; idx < num_codes;) {
|
TEST_AND_RETURN_FALSE(br->CacheBits(max_bits));
|
auto bits = br->ReadBits(max_bits);
|
uint16_t code;
|
size_t nbits;
|
TEST_AND_RETURN_FALSE(CodeAlphabet(bits, &code, &nbits));
|
TEST_AND_RETURN_FALSE(index < *length);
|
br->DropBits(nbits);
|
if (code < 16) {
|
buffer[index++] = code;
|
lens->push_back(code);
|
idx++;
|
} else {
|
TEST_AND_RETURN_FALSE(code < 19);
|
size_t copy_num = 0;
|
uint8_t copy_val;
|
switch (code) {
|
case 16:
|
TEST_AND_RETURN_FALSE(idx != 0);
|
TEST_AND_RETURN_FALSE(br->CacheBits(2));
|
copy_num = 3 + br->ReadBits(2);
|
buffer[index++] = 16 + br->ReadBits(2); // 3 - 6 times
|
copy_val = (*lens)[idx - 1];
|
br->DropBits(2);
|
break;
|
|
case 17:
|
TEST_AND_RETURN_FALSE(br->CacheBits(3));
|
copy_num = 3 + br->ReadBits(3);
|
buffer[index++] = 20 + br->ReadBits(3); // 3 - 10 times
|
copy_val = 0;
|
br->DropBits(3);
|
break;
|
|
case 18:
|
TEST_AND_RETURN_FALSE(br->CacheBits(7));
|
copy_num = 11 + br->ReadBits(7);
|
buffer[index++] = 28 + br->ReadBits(7); // 11 - 138 times
|
copy_val = 0;
|
br->DropBits(7);
|
break;
|
|
default:
|
LOG(ERROR) << "Invalid code!";
|
return false;
|
}
|
idx += copy_num;
|
while (copy_num--) {
|
lens->push_back(copy_val);
|
}
|
}
|
}
|
TEST_AND_RETURN_FALSE(lens->size() == num_codes);
|
*length = index;
|
return true;
|
}
|
|
bool HuffmanTable::BuildDynamicHuffmanTable(const uint8_t* buffer,
|
size_t length,
|
BitWriterInterface* bw) {
|
if (!initialized_) {
|
// Only resizing the arrays needed.
|
code_lens_.resize(19);
|
code_rcodes_.resize(19);
|
|
lit_len_lens_.resize(286);
|
lit_len_rcodes_.resize(286);
|
|
distance_lens_.resize(30);
|
distance_rcodes_.resize(30);
|
|
tmp_lens_.resize(286 + 30);
|
|
initialized_ = true;
|
}
|
|
TEST_AND_RETURN_FALSE(length >= 3);
|
size_t index = 0;
|
// Write the header.
|
size_t num_lit_len = buffer[index] + 257;
|
TEST_AND_RETURN_FALSE(bw->WriteBits(5, buffer[index++]));
|
|
size_t num_distance = buffer[index] + 1;
|
TEST_AND_RETURN_FALSE(bw->WriteBits(5, buffer[index++]));
|
|
size_t num_codes = buffer[index] + 4;
|
TEST_AND_RETURN_FALSE(bw->WriteBits(4, buffer[index++]));
|
|
TEST_AND_RETURN_FALSE(
|
CheckHuffmanArrayLengths(num_lit_len, num_distance, num_codes));
|
|
TEST_AND_RETURN_FALSE(length - index >= (num_codes + 1) / 2);
|
bool checked = false;
|
size_t idx = 0;
|
for (; idx < num_codes; idx++) {
|
uint8_t len;
|
if (checked) {
|
len = buffer[index++] & 0x0F;
|
} else {
|
len = buffer[index] >> 4;
|
}
|
checked = !checked;
|
code_lens_[kPermutations[idx]] = len;
|
TEST_AND_RETURN_FALSE(bw->WriteBits(3, len));
|
}
|
if (checked) {
|
index++;
|
}
|
for (; idx < 19; idx++) {
|
code_lens_[kPermutations[idx]] = 0;
|
}
|
|
TEST_AND_RETURN_FALSE(
|
BuildHuffmanReverseCodes(code_lens_, &code_rcodes_, &code_max_bits_));
|
|
// Build literal/lengths and distance Huffman code length arrays.
|
auto bytes_available = length - index;
|
TEST_AND_RETURN_FALSE(
|
BuildHuffmanCodeLengths(buffer + index, &bytes_available, bw,
|
num_lit_len + num_distance, &tmp_lens_));
|
index += bytes_available;
|
|
lit_len_lens_.clear();
|
lit_len_lens_.insert(lit_len_lens_.begin(), tmp_lens_.begin(),
|
tmp_lens_.begin() + num_lit_len);
|
|
distance_lens_.clear();
|
distance_lens_.insert(distance_lens_.begin(), tmp_lens_.begin() + num_lit_len,
|
tmp_lens_.end());
|
|
// Build literal/lengths Huffman reverse codes.
|
TEST_AND_RETURN_FALSE(BuildHuffmanReverseCodes(
|
lit_len_lens_, &lit_len_rcodes_, &lit_len_max_bits_));
|
|
// Build distance Huffman reverse codes.
|
TEST_AND_RETURN_FALSE(BuildHuffmanReverseCodes(
|
distance_lens_, &distance_rcodes_, &distance_max_bits_));
|
|
TEST_AND_RETURN_FALSE(length == index);
|
|
return true;
|
}
|
|
bool HuffmanTable::BuildHuffmanCodeLengths(const uint8_t* buffer,
|
size_t* length,
|
BitWriterInterface* bw,
|
size_t num_codes,
|
Buffer* lens) {
|
lens->clear();
|
uint16_t hcode;
|
size_t nbits;
|
size_t index = 0;
|
for (size_t idx = 0; idx < num_codes;) {
|
TEST_AND_RETURN_FALSE(index < *length);
|
auto pcode = buffer[index++];
|
TEST_AND_RETURN_FALSE(pcode <= 155);
|
|
auto code = pcode < 16 ? pcode : pcode < 20 ? 16 : pcode < 28 ? 17 : 18;
|
TEST_AND_RETURN_FALSE(CodeHuffman(code, &hcode, &nbits));
|
TEST_AND_RETURN_FALSE(bw->WriteBits(nbits, hcode));
|
if (code < 16) {
|
lens->push_back(code);
|
idx++;
|
} else {
|
size_t copy_num = 0;
|
uint8_t copy_val;
|
switch (code) {
|
case 16:
|
// Cannot repeat a non-existent last code if idx == 0.
|
TEST_AND_RETURN_FALSE(idx != 0);
|
TEST_AND_RETURN_FALSE(bw->WriteBits(2, pcode - 16));
|
copy_num = 3 + pcode - 16;
|
copy_val = (*lens)[idx - 1];
|
break;
|
|
case 17:
|
TEST_AND_RETURN_FALSE(bw->WriteBits(3, pcode - 20));
|
copy_num = 3 + pcode - 20;
|
copy_val = 0;
|
break;
|
|
case 18:
|
TEST_AND_RETURN_FALSE(bw->WriteBits(7, pcode - 28));
|
copy_num = 11 + pcode - 28;
|
copy_val = 0;
|
break;
|
|
default:
|
break;
|
}
|
idx += copy_num;
|
while (copy_num--) {
|
lens->push_back(copy_val);
|
}
|
}
|
}
|
TEST_AND_RETURN_FALSE(lens->size() == num_codes);
|
*length = index;
|
return true;
|
}
|
|
string BlockTypeToString(BlockType type) {
|
switch (type) {
|
case BlockType::kUncompressed:
|
return "Uncompressed";
|
|
case BlockType::kFixed:
|
return "Fixed";
|
|
case BlockType::kDynamic:
|
return "Dynamic";
|
|
default:
|
return "Unknown";
|
}
|
}
|
|
} // namespace puffin
|