.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
---|
1 | 2 | /* |
---|
2 | 3 | * LZO1X Compressor from LZO |
---|
3 | 4 | * |
---|
.. | .. |
---|
20 | 21 | static noinline size_t |
---|
21 | 22 | lzo1x_1_do_compress(const unsigned char *in, size_t in_len, |
---|
22 | 23 | unsigned char *out, size_t *out_len, |
---|
23 | | - size_t ti, void *wrkmem) |
---|
| 24 | + size_t ti, void *wrkmem, signed char *state_offset, |
---|
| 25 | + const unsigned char bitstream_version) |
---|
24 | 26 | { |
---|
25 | 27 | const unsigned char *ip; |
---|
26 | 28 | unsigned char *op; |
---|
.. | .. |
---|
35 | 37 | ip += ti < 4 ? 4 - ti : 0; |
---|
36 | 38 | |
---|
37 | 39 | for (;;) { |
---|
38 | | - const unsigned char *m_pos; |
---|
| 40 | + const unsigned char *m_pos = NULL; |
---|
39 | 41 | size_t t, m_len, m_off; |
---|
40 | 42 | u32 dv; |
---|
| 43 | + u32 run_length = 0; |
---|
41 | 44 | literal: |
---|
42 | 45 | ip += 1 + ((ip - ii) >> 5); |
---|
43 | 46 | next: |
---|
44 | 47 | if (unlikely(ip >= ip_end)) |
---|
45 | 48 | break; |
---|
46 | 49 | dv = get_unaligned_le32(ip); |
---|
47 | | - t = ((dv * 0x1824429d) >> (32 - D_BITS)) & D_MASK; |
---|
48 | | - m_pos = in + dict[t]; |
---|
49 | | - dict[t] = (lzo_dict_t) (ip - in); |
---|
50 | | - if (unlikely(dv != get_unaligned_le32(m_pos))) |
---|
51 | | - goto literal; |
---|
| 50 | + |
---|
| 51 | + if (dv == 0 && bitstream_version) { |
---|
| 52 | + const unsigned char *ir = ip + 4; |
---|
| 53 | + const unsigned char *limit = ip_end |
---|
| 54 | + < (ip + MAX_ZERO_RUN_LENGTH + 1) |
---|
| 55 | + ? ip_end : ip + MAX_ZERO_RUN_LENGTH + 1; |
---|
| 56 | +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && \ |
---|
| 57 | + defined(LZO_FAST_64BIT_MEMORY_ACCESS) |
---|
| 58 | + u64 dv64; |
---|
| 59 | + |
---|
| 60 | + for (; (ir + 32) <= limit; ir += 32) { |
---|
| 61 | + dv64 = get_unaligned((u64 *)ir); |
---|
| 62 | + dv64 |= get_unaligned((u64 *)ir + 1); |
---|
| 63 | + dv64 |= get_unaligned((u64 *)ir + 2); |
---|
| 64 | + dv64 |= get_unaligned((u64 *)ir + 3); |
---|
| 65 | + if (dv64) |
---|
| 66 | + break; |
---|
| 67 | + } |
---|
| 68 | + for (; (ir + 8) <= limit; ir += 8) { |
---|
| 69 | + dv64 = get_unaligned((u64 *)ir); |
---|
| 70 | + if (dv64) { |
---|
| 71 | +# if defined(__LITTLE_ENDIAN) |
---|
| 72 | + ir += __builtin_ctzll(dv64) >> 3; |
---|
| 73 | +# elif defined(__BIG_ENDIAN) |
---|
| 74 | + ir += __builtin_clzll(dv64) >> 3; |
---|
| 75 | +# else |
---|
| 76 | +# error "missing endian definition" |
---|
| 77 | +# endif |
---|
| 78 | + break; |
---|
| 79 | + } |
---|
| 80 | + } |
---|
| 81 | +#else |
---|
| 82 | + while ((ir < (const unsigned char *) |
---|
| 83 | + ALIGN((uintptr_t)ir, 4)) && |
---|
| 84 | + (ir < limit) && (*ir == 0)) |
---|
| 85 | + ir++; |
---|
| 86 | + if (IS_ALIGNED((uintptr_t)ir, 4)) { |
---|
| 87 | + for (; (ir + 4) <= limit; ir += 4) { |
---|
| 88 | + dv = *((u32 *)ir); |
---|
| 89 | + if (dv) { |
---|
| 90 | +# if defined(__LITTLE_ENDIAN) |
---|
| 91 | + ir += __builtin_ctz(dv) >> 3; |
---|
| 92 | +# elif defined(__BIG_ENDIAN) |
---|
| 93 | + ir += __builtin_clz(dv) >> 3; |
---|
| 94 | +# else |
---|
| 95 | +# error "missing endian definition" |
---|
| 96 | +# endif |
---|
| 97 | + break; |
---|
| 98 | + } |
---|
| 99 | + } |
---|
| 100 | + } |
---|
| 101 | +#endif |
---|
| 102 | + while (likely(ir < limit) && unlikely(*ir == 0)) |
---|
| 103 | + ir++; |
---|
| 104 | + run_length = ir - ip; |
---|
| 105 | + if (run_length > MAX_ZERO_RUN_LENGTH) |
---|
| 106 | + run_length = MAX_ZERO_RUN_LENGTH; |
---|
| 107 | + } else { |
---|
| 108 | + t = ((dv * 0x1824429d) >> (32 - D_BITS)) & D_MASK; |
---|
| 109 | + m_pos = in + dict[t]; |
---|
| 110 | + dict[t] = (lzo_dict_t) (ip - in); |
---|
| 111 | + if (unlikely(dv != get_unaligned_le32(m_pos))) |
---|
| 112 | + goto literal; |
---|
| 113 | + } |
---|
52 | 114 | |
---|
53 | 115 | ii -= ti; |
---|
54 | 116 | ti = 0; |
---|
55 | 117 | t = ip - ii; |
---|
56 | 118 | if (t != 0) { |
---|
57 | 119 | if (t <= 3) { |
---|
58 | | - op[-2] |= t; |
---|
| 120 | + op[*state_offset] |= t; |
---|
59 | 121 | COPY4(op, ii); |
---|
60 | 122 | op += t; |
---|
61 | 123 | } else if (t <= 16) { |
---|
.. | .. |
---|
86 | 148 | *op++ = *ii++; |
---|
87 | 149 | } while (--t > 0); |
---|
88 | 150 | } |
---|
| 151 | + } |
---|
| 152 | + |
---|
| 153 | + if (unlikely(run_length)) { |
---|
| 154 | + ip += run_length; |
---|
| 155 | + run_length -= MIN_ZERO_RUN_LENGTH; |
---|
| 156 | + put_unaligned_le32((run_length << 21) | 0xfffc18 |
---|
| 157 | + | (run_length & 0x7), op); |
---|
| 158 | + op += 4; |
---|
| 159 | + run_length = 0; |
---|
| 160 | + *state_offset = -3; |
---|
| 161 | + goto finished_writing_instruction; |
---|
89 | 162 | } |
---|
90 | 163 | |
---|
91 | 164 | m_len = 4; |
---|
.. | .. |
---|
170 | 243 | |
---|
171 | 244 | m_off = ip - m_pos; |
---|
172 | 245 | ip += m_len; |
---|
173 | | - ii = ip; |
---|
174 | 246 | if (m_len <= M2_MAX_LEN && m_off <= M2_MAX_OFFSET) { |
---|
175 | 247 | m_off -= 1; |
---|
176 | 248 | *op++ = (((m_len - 1) << 5) | ((m_off & 7) << 2)); |
---|
.. | .. |
---|
196 | 268 | *op++ = (M4_MARKER | ((m_off >> 11) & 8) |
---|
197 | 269 | | (m_len - 2)); |
---|
198 | 270 | else { |
---|
| 271 | + if (unlikely(((m_off & 0x403f) == 0x403f) |
---|
| 272 | + && (m_len >= 261) |
---|
| 273 | + && (m_len <= 264)) |
---|
| 274 | + && likely(bitstream_version)) { |
---|
| 275 | + // Under lzo-rle, block copies |
---|
| 276 | + // for 261 <= length <= 264 and |
---|
| 277 | + // (distance & 0x80f3) == 0x80f3 |
---|
| 278 | + // can result in ambiguous |
---|
| 279 | + // output. Adjust length |
---|
| 280 | + // to 260 to prevent ambiguity. |
---|
| 281 | + ip -= m_len - 260; |
---|
| 282 | + m_len = 260; |
---|
| 283 | + } |
---|
199 | 284 | m_len -= M4_MAX_LEN; |
---|
200 | 285 | *op++ = (M4_MARKER | ((m_off >> 11) & 8)); |
---|
201 | 286 | while (unlikely(m_len > 255)) { |
---|
.. | .. |
---|
207 | 292 | *op++ = (m_off << 2); |
---|
208 | 293 | *op++ = (m_off >> 6); |
---|
209 | 294 | } |
---|
| 295 | + *state_offset = -2; |
---|
| 296 | +finished_writing_instruction: |
---|
| 297 | + ii = ip; |
---|
210 | 298 | goto next; |
---|
211 | 299 | } |
---|
212 | 300 | *out_len = op - out; |
---|
213 | 301 | return in_end - (ii - ti); |
---|
214 | 302 | } |
---|
215 | 303 | |
---|
216 | | -int lzo1x_1_compress(const unsigned char *in, size_t in_len, |
---|
| 304 | +int lzogeneric1x_1_compress(const unsigned char *in, size_t in_len, |
---|
217 | 305 | unsigned char *out, size_t *out_len, |
---|
218 | | - void *wrkmem) |
---|
| 306 | + void *wrkmem, const unsigned char bitstream_version) |
---|
219 | 307 | { |
---|
220 | 308 | const unsigned char *ip = in; |
---|
221 | 309 | unsigned char *op = out; |
---|
| 310 | + unsigned char *data_start; |
---|
222 | 311 | size_t l = in_len; |
---|
223 | 312 | size_t t = 0; |
---|
| 313 | + signed char state_offset = -2; |
---|
| 314 | + unsigned int m4_max_offset; |
---|
| 315 | + |
---|
| 316 | + // LZO v0 will never write 17 as first byte (except for zero-length |
---|
| 317 | + // input), so this is used to version the bitstream |
---|
| 318 | + if (bitstream_version > 0) { |
---|
| 319 | + *op++ = 17; |
---|
| 320 | + *op++ = bitstream_version; |
---|
| 321 | + m4_max_offset = M4_MAX_OFFSET_V1; |
---|
| 322 | + } else { |
---|
| 323 | + m4_max_offset = M4_MAX_OFFSET_V0; |
---|
| 324 | + } |
---|
| 325 | + |
---|
| 326 | + data_start = op; |
---|
224 | 327 | |
---|
225 | 328 | while (l > 20) { |
---|
226 | | - size_t ll = l <= (M4_MAX_OFFSET + 1) ? l : (M4_MAX_OFFSET + 1); |
---|
| 329 | + size_t ll = l <= (m4_max_offset + 1) ? l : (m4_max_offset + 1); |
---|
227 | 330 | uintptr_t ll_end = (uintptr_t) ip + ll; |
---|
228 | 331 | if ((ll_end + ((t + ll) >> 5)) <= ll_end) |
---|
229 | 332 | break; |
---|
230 | 333 | BUILD_BUG_ON(D_SIZE * sizeof(lzo_dict_t) > LZO1X_1_MEM_COMPRESS); |
---|
231 | 334 | memset(wrkmem, 0, D_SIZE * sizeof(lzo_dict_t)); |
---|
232 | | - t = lzo1x_1_do_compress(ip, ll, op, out_len, t, wrkmem); |
---|
| 335 | + t = lzo1x_1_do_compress(ip, ll, op, out_len, t, wrkmem, |
---|
| 336 | + &state_offset, bitstream_version); |
---|
233 | 337 | ip += ll; |
---|
234 | 338 | op += *out_len; |
---|
235 | 339 | l -= ll; |
---|
.. | .. |
---|
239 | 343 | if (t > 0) { |
---|
240 | 344 | const unsigned char *ii = in + in_len - t; |
---|
241 | 345 | |
---|
242 | | - if (op == out && t <= 238) { |
---|
| 346 | + if (op == data_start && t <= 238) { |
---|
243 | 347 | *op++ = (17 + t); |
---|
244 | 348 | } else if (t <= 3) { |
---|
245 | | - op[-2] |= t; |
---|
| 349 | + op[state_offset] |= t; |
---|
246 | 350 | } else if (t <= 18) { |
---|
247 | 351 | *op++ = (t - 3); |
---|
248 | 352 | } else { |
---|
.. | .. |
---|
273 | 377 | *out_len = op - out; |
---|
274 | 378 | return LZO_E_OK; |
---|
275 | 379 | } |
---|
| 380 | + |
---|
| 381 | +int lzo1x_1_compress(const unsigned char *in, size_t in_len, |
---|
| 382 | + unsigned char *out, size_t *out_len, |
---|
| 383 | + void *wrkmem) |
---|
| 384 | +{ |
---|
| 385 | + return lzogeneric1x_1_compress(in, in_len, out, out_len, wrkmem, 0); |
---|
| 386 | +} |
---|
| 387 | + |
---|
| 388 | +int lzorle1x_1_compress(const unsigned char *in, size_t in_len, |
---|
| 389 | + unsigned char *out, size_t *out_len, |
---|
| 390 | + void *wrkmem) |
---|
| 391 | +{ |
---|
| 392 | + return lzogeneric1x_1_compress(in, in_len, out, out_len, |
---|
| 393 | + wrkmem, LZO_VERSION); |
---|
| 394 | +} |
---|
| 395 | + |
---|
276 | 396 | EXPORT_SYMBOL_GPL(lzo1x_1_compress); |
---|
| 397 | +EXPORT_SYMBOL_GPL(lzorle1x_1_compress); |
---|
277 | 398 | |
---|
278 | 399 | MODULE_LICENSE("GPL"); |
---|
279 | 400 | MODULE_DESCRIPTION("LZO1X-1 Compressor"); |
---|