/*
|
Copyright 2011 Google Inc. All Rights Reserved.
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
you may not use this file except in compliance with the License.
|
You may obtain a copy of the License at
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
Unless required by applicable law or agreed to in writing, software
|
distributed under the License is distributed on an "AS IS" BASIS,
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
See the License for the specific language governing permissions and
|
limitations under the License.
|
|
Author: lode.vandevenne@gmail.com (Lode Vandevenne)
|
Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala)
|
*/
|
|
#include "lz77.h"
|
#include "util.h"
|
|
#include <assert.h>
|
#include <stdio.h>
|
#include <stdlib.h>
|
|
void ZopfliInitLZ77Store(ZopfliLZ77Store* store) {
|
store->size = 0;
|
store->litlens = 0;
|
store->dists = 0;
|
}
|
|
void ZopfliCleanLZ77Store(ZopfliLZ77Store* store) {
|
free(store->litlens);
|
free(store->dists);
|
}
|
|
void ZopfliCopyLZ77Store(
|
const ZopfliLZ77Store* source, ZopfliLZ77Store* dest) {
|
size_t i;
|
ZopfliCleanLZ77Store(dest);
|
dest->litlens =
|
(unsigned short*)malloc(sizeof(*dest->litlens) * source->size);
|
dest->dists = (unsigned short*)malloc(sizeof(*dest->dists) * source->size);
|
|
if (!dest->litlens || !dest->dists) exit(-1); /* Allocation failed. */
|
|
dest->size = source->size;
|
for (i = 0; i < source->size; i++) {
|
dest->litlens[i] = source->litlens[i];
|
dest->dists[i] = source->dists[i];
|
}
|
}
|
|
/*
|
Appends the length and distance to the LZ77 arrays of the ZopfliLZ77Store.
|
context must be a ZopfliLZ77Store*.
|
*/
|
void ZopfliStoreLitLenDist(unsigned short length, unsigned short dist,
|
ZopfliLZ77Store* store) {
|
size_t size2 = store->size; /* Needed for using ZOPFLI_APPEND_DATA twice. */
|
ZOPFLI_APPEND_DATA(length, &store->litlens, &store->size);
|
ZOPFLI_APPEND_DATA(dist, &store->dists, &size2);
|
}
|
|
/*
|
Gets a score of the length given the distance. Typically, the score of the
|
length is the length itself, but if the distance is very long, decrease the
|
score of the length a bit to make up for the fact that long distances use large
|
amounts of extra bits.
|
|
This is not an accurate score, it is a heuristic only for the greedy LZ77
|
implementation. More accurate cost models are employed later. Making this
|
heuristic more accurate may hurt rather than improve compression.
|
|
The two direct uses of this heuristic are:
|
-avoid using a length of 3 in combination with a long distance. This only has
|
an effect if length == 3.
|
-make a slightly better choice between the two options of the lazy matching.
|
|
Indirectly, this affects:
|
-the block split points if the default of block splitting first is used, in a
|
rather unpredictable way
|
-the first zopfli run, so it affects the chance of the first run being closer
|
to the optimal output
|
*/
|
static int GetLengthScore(int length, int distance) {
|
/*
|
At 1024, the distance uses 9+ extra bits and this seems to be the sweet spot
|
on tested files.
|
*/
|
return distance > 1024 ? length - 1 : length;
|
}
|
|
void ZopfliVerifyLenDist(const unsigned char* data, size_t datasize, size_t pos,
|
unsigned short dist, unsigned short length) {
|
|
/* TODO(lode): make this only run in a debug compile, it's for assert only. */
|
size_t i;
|
|
assert(pos + length <= datasize);
|
for (i = 0; i < length; i++) {
|
if (data[pos - dist + i] != data[pos + i]) {
|
assert(data[pos - dist + i] == data[pos + i]);
|
break;
|
}
|
}
|
}
|
|
/*
|
Finds how long the match of scan and match is. Can be used to find how many
|
bytes starting from scan, and from match, are equal. Returns the last byte
|
after scan, which is still equal to the correspondinb byte after match.
|
scan is the position to compare
|
match is the earlier position to compare.
|
end is the last possible byte, beyond which to stop looking.
|
safe_end is a few (8) bytes before end, for comparing multiple bytes at once.
|
*/
|
static const unsigned char* GetMatch(const unsigned char* scan,
|
const unsigned char* match,
|
const unsigned char* end,
|
const unsigned char* safe_end) {
|
|
if (sizeof(size_t) == 8) {
|
/* 8 checks at once per array bounds check (size_t is 64-bit). */
|
while (scan < safe_end && *((size_t*)scan) == *((size_t*)match)) {
|
scan += 8;
|
match += 8;
|
}
|
} else if (sizeof(unsigned int) == 4) {
|
/* 4 checks at once per array bounds check (unsigned int is 32-bit). */
|
while (scan < safe_end
|
&& *((unsigned int*)scan) == *((unsigned int*)match)) {
|
scan += 4;
|
match += 4;
|
}
|
} else {
|
/* do 8 checks at once per array bounds check. */
|
while (scan < safe_end && *scan == *match && *++scan == *++match
|
&& *++scan == *++match && *++scan == *++match
|
&& *++scan == *++match && *++scan == *++match
|
&& *++scan == *++match && *++scan == *++match) {
|
scan++; match++;
|
}
|
}
|
|
/* The remaining few bytes. */
|
while (scan != end && *scan == *match) {
|
scan++; match++;
|
}
|
|
return scan;
|
}
|
|
#ifdef ZOPFLI_LONGEST_MATCH_CACHE
|
/*
|
Gets distance, length and sublen values from the cache if possible.
|
Returns 1 if it got the values from the cache, 0 if not.
|
Updates the limit value to a smaller one if possible with more limited
|
information from the cache.
|
*/
|
static int TryGetFromLongestMatchCache(ZopfliBlockState* s,
|
size_t pos, size_t* limit,
|
unsigned short* sublen, unsigned short* distance, unsigned short* length) {
|
/* The LMC cache starts at the beginning of the block rather than the
|
beginning of the whole array. */
|
size_t lmcpos = pos - s->blockstart;
|
|
/* Length > 0 and dist 0 is invalid combination, which indicates on purpose
|
that this cache value is not filled in yet. */
|
unsigned char cache_available = s->lmc && (s->lmc->length[lmcpos] == 0 ||
|
s->lmc->dist[lmcpos] != 0);
|
unsigned char limit_ok_for_cache = cache_available &&
|
(*limit == ZOPFLI_MAX_MATCH || s->lmc->length[lmcpos] <= *limit ||
|
(sublen && ZopfliMaxCachedSublen(s->lmc,
|
lmcpos, s->lmc->length[lmcpos]) >= *limit));
|
|
if (s->lmc && limit_ok_for_cache && cache_available) {
|
if (!sublen || s->lmc->length[lmcpos]
|
<= ZopfliMaxCachedSublen(s->lmc, lmcpos, s->lmc->length[lmcpos])) {
|
*length = s->lmc->length[lmcpos];
|
if (*length > *limit) *length = *limit;
|
if (sublen) {
|
ZopfliCacheToSublen(s->lmc, lmcpos, *length, sublen);
|
*distance = sublen[*length];
|
if (*limit == ZOPFLI_MAX_MATCH && *length >= ZOPFLI_MIN_MATCH) {
|
assert(sublen[*length] == s->lmc->dist[lmcpos]);
|
}
|
} else {
|
*distance = s->lmc->dist[lmcpos];
|
}
|
return 1;
|
}
|
/* Can't use much of the cache, since the "sublens" need to be calculated,
|
but at least we already know when to stop. */
|
*limit = s->lmc->length[lmcpos];
|
}
|
|
return 0;
|
}
|
|
/*
|
Stores the found sublen, distance and length in the longest match cache, if
|
possible.
|
*/
|
static void StoreInLongestMatchCache(ZopfliBlockState* s,
|
size_t pos, size_t limit,
|
const unsigned short* sublen,
|
unsigned short distance, unsigned short length) {
|
/* The LMC cache starts at the beginning of the block rather than the
|
beginning of the whole array. */
|
size_t lmcpos = pos - s->blockstart;
|
|
/* Length > 0 and dist 0 is invalid combination, which indicates on purpose
|
that this cache value is not filled in yet. */
|
unsigned char cache_available = s->lmc && (s->lmc->length[lmcpos] == 0 ||
|
s->lmc->dist[lmcpos] != 0);
|
|
if (s->lmc && limit == ZOPFLI_MAX_MATCH && sublen && !cache_available) {
|
assert(s->lmc->length[lmcpos] == 1 && s->lmc->dist[lmcpos] == 0);
|
s->lmc->dist[lmcpos] = length < ZOPFLI_MIN_MATCH ? 0 : distance;
|
s->lmc->length[lmcpos] = length < ZOPFLI_MIN_MATCH ? 0 : length;
|
assert(!(s->lmc->length[lmcpos] == 1 && s->lmc->dist[lmcpos] == 0));
|
ZopfliSublenToCache(sublen, lmcpos, length, s->lmc);
|
}
|
}
|
#endif
|
|
void ZopfliFindLongestMatch(ZopfliBlockState* s, const ZopfliHash* h,
|
const unsigned char* array,
|
size_t pos, size_t size, size_t limit,
|
unsigned short* sublen, unsigned short* distance, unsigned short* length) {
|
unsigned short hpos = pos & ZOPFLI_WINDOW_MASK, p, pp;
|
unsigned short bestdist = 0;
|
unsigned short bestlength = 1;
|
const unsigned char* scan;
|
const unsigned char* match;
|
const unsigned char* arrayend;
|
const unsigned char* arrayend_safe;
|
#if ZOPFLI_MAX_CHAIN_HITS < ZOPFLI_WINDOW_SIZE
|
int chain_counter = ZOPFLI_MAX_CHAIN_HITS; /* For quitting early. */
|
#endif
|
|
unsigned dist = 0; /* Not unsigned short on purpose. */
|
|
int* hhead = h->head;
|
unsigned short* hprev = h->prev;
|
int* hhashval = h->hashval;
|
int hval = h->val;
|
|
#ifdef ZOPFLI_LONGEST_MATCH_CACHE
|
if (TryGetFromLongestMatchCache(s, pos, &limit, sublen, distance, length)) {
|
assert(pos + *length <= size);
|
return;
|
}
|
#endif
|
|
assert(limit <= ZOPFLI_MAX_MATCH);
|
assert(limit >= ZOPFLI_MIN_MATCH);
|
assert(pos < size);
|
|
if (size - pos < ZOPFLI_MIN_MATCH) {
|
/* The rest of the code assumes there are at least ZOPFLI_MIN_MATCH bytes to
|
try. */
|
*length = 0;
|
*distance = 0;
|
return;
|
}
|
|
if (pos + limit > size) {
|
limit = size - pos;
|
}
|
arrayend = &array[pos] + limit;
|
arrayend_safe = arrayend - 8;
|
|
assert(hval < 65536);
|
|
pp = hhead[hval]; /* During the whole loop, p == hprev[pp]. */
|
p = hprev[pp];
|
|
assert(pp == hpos);
|
|
dist = p < pp ? pp - p : ((ZOPFLI_WINDOW_SIZE - p) + pp);
|
|
/* Go through all distances. */
|
while (dist < ZOPFLI_WINDOW_SIZE) {
|
unsigned short currentlength = 0;
|
|
assert(p < ZOPFLI_WINDOW_SIZE);
|
assert(p == hprev[pp]);
|
assert(hhashval[p] == hval);
|
|
if (dist > 0) {
|
assert(pos < size);
|
assert(dist <= pos);
|
scan = &array[pos];
|
match = &array[pos - dist];
|
|
/* Testing the byte at position bestlength first, goes slightly faster. */
|
if (pos + bestlength >= size
|
|| *(scan + bestlength) == *(match + bestlength)) {
|
|
#ifdef ZOPFLI_HASH_SAME
|
unsigned short same0 = h->same[pos & ZOPFLI_WINDOW_MASK];
|
if (same0 > 2 && *scan == *match) {
|
unsigned short same1 = h->same[(pos - dist) & ZOPFLI_WINDOW_MASK];
|
unsigned short same = same0 < same1 ? same0 : same1;
|
if (same > limit) same = limit;
|
scan += same;
|
match += same;
|
}
|
#endif
|
scan = GetMatch(scan, match, arrayend, arrayend_safe);
|
currentlength = scan - &array[pos]; /* The found length. */
|
}
|
|
if (currentlength > bestlength) {
|
if (sublen) {
|
unsigned short j;
|
for (j = bestlength + 1; j <= currentlength; j++) {
|
sublen[j] = dist;
|
}
|
}
|
bestdist = dist;
|
bestlength = currentlength;
|
if (currentlength >= limit) break;
|
}
|
}
|
|
|
#ifdef ZOPFLI_HASH_SAME_HASH
|
/* Switch to the other hash once this will be more efficient. */
|
if (hhead != h->head2 && bestlength >= h->same[hpos] &&
|
h->val2 == h->hashval2[p]) {
|
/* Now use the hash that encodes the length and first byte. */
|
hhead = h->head2;
|
hprev = h->prev2;
|
hhashval = h->hashval2;
|
hval = h->val2;
|
}
|
#endif
|
|
pp = p;
|
p = hprev[p];
|
if (p == pp) break; /* Uninited prev value. */
|
|
dist += p < pp ? pp - p : ((ZOPFLI_WINDOW_SIZE - p) + pp);
|
|
#if ZOPFLI_MAX_CHAIN_HITS < ZOPFLI_WINDOW_SIZE
|
chain_counter--;
|
if (chain_counter <= 0) break;
|
#endif
|
}
|
|
#ifdef ZOPFLI_LONGEST_MATCH_CACHE
|
StoreInLongestMatchCache(s, pos, limit, sublen, bestdist, bestlength);
|
#endif
|
|
assert(bestlength <= limit);
|
|
*distance = bestdist;
|
*length = bestlength;
|
assert(pos + *length <= size);
|
}
|
|
void ZopfliLZ77Greedy(ZopfliBlockState* s, const unsigned char* in,
|
size_t instart, size_t inend,
|
ZopfliLZ77Store* store) {
|
size_t i = 0, j;
|
unsigned short leng;
|
unsigned short dist;
|
int lengthscore;
|
size_t windowstart = instart > ZOPFLI_WINDOW_SIZE
|
? instart - ZOPFLI_WINDOW_SIZE : 0;
|
unsigned short dummysublen[259];
|
|
ZopfliHash hash;
|
ZopfliHash* h = &hash;
|
|
#ifdef ZOPFLI_LAZY_MATCHING
|
/* Lazy matching. */
|
unsigned prev_length = 0;
|
unsigned prev_match = 0;
|
int prevlengthscore;
|
int match_available = 0;
|
#endif
|
|
if (instart == inend) return;
|
|
ZopfliInitHash(ZOPFLI_WINDOW_SIZE, h);
|
ZopfliWarmupHash(in, windowstart, inend, h);
|
for (i = windowstart; i < instart; i++) {
|
ZopfliUpdateHash(in, i, inend, h);
|
}
|
|
for (i = instart; i < inend; i++) {
|
ZopfliUpdateHash(in, i, inend, h);
|
|
ZopfliFindLongestMatch(s, h, in, i, inend, ZOPFLI_MAX_MATCH, dummysublen,
|
&dist, &leng);
|
lengthscore = GetLengthScore(leng, dist);
|
|
#ifdef ZOPFLI_LAZY_MATCHING
|
/* Lazy matching. */
|
prevlengthscore = GetLengthScore(prev_length, prev_match);
|
if (match_available) {
|
match_available = 0;
|
if (lengthscore > prevlengthscore + 1) {
|
ZopfliStoreLitLenDist(in[i - 1], 0, store);
|
if (lengthscore >= ZOPFLI_MIN_MATCH && leng < ZOPFLI_MAX_MATCH) {
|
match_available = 1;
|
prev_length = leng;
|
prev_match = dist;
|
continue;
|
}
|
} else {
|
/* Add previous to output. */
|
leng = prev_length;
|
dist = prev_match;
|
lengthscore = prevlengthscore;
|
/* Add to output. */
|
ZopfliVerifyLenDist(in, inend, i - 1, dist, leng);
|
ZopfliStoreLitLenDist(leng, dist, store);
|
for (j = 2; j < leng; j++) {
|
assert(i < inend);
|
i++;
|
ZopfliUpdateHash(in, i, inend, h);
|
}
|
continue;
|
}
|
}
|
else if (lengthscore >= ZOPFLI_MIN_MATCH && leng < ZOPFLI_MAX_MATCH) {
|
match_available = 1;
|
prev_length = leng;
|
prev_match = dist;
|
continue;
|
}
|
/* End of lazy matching. */
|
#endif
|
|
/* Add to output. */
|
if (lengthscore >= ZOPFLI_MIN_MATCH) {
|
ZopfliVerifyLenDist(in, inend, i, dist, leng);
|
ZopfliStoreLitLenDist(leng, dist, store);
|
} else {
|
leng = 1;
|
ZopfliStoreLitLenDist(in[i], 0, store);
|
}
|
for (j = 1; j < leng; j++) {
|
assert(i < inend);
|
i++;
|
ZopfliUpdateHash(in, i, inend, h);
|
}
|
}
|
|
ZopfliCleanHash(h);
|
}
|
|
void ZopfliLZ77Counts(const unsigned short* litlens,
|
const unsigned short* dists,
|
size_t start, size_t end,
|
size_t* ll_count, size_t* d_count) {
|
size_t i;
|
|
for (i = 0; i < 288; i++) {
|
ll_count[i] = 0;
|
}
|
for (i = 0; i < 32; i++) {
|
d_count[i] = 0;
|
}
|
|
for (i = start; i < end; i++) {
|
if (dists[i] == 0) {
|
ll_count[litlens[i]]++;
|
} else {
|
ll_count[ZopfliGetLengthSymbol(litlens[i])]++;
|
d_count[ZopfliGetDistSymbol(dists[i])]++;
|
}
|
}
|
|
ll_count[256] = 1; /* End symbol. */
|
}
|