summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/lzo.txt164
-rw-r--r--lib/lzo/lzo1x_decompress_safe.c103
2 files changed, 221 insertions, 46 deletions
diff --git a/Documentation/lzo.txt b/Documentation/lzo.txt
new file mode 100644
index 000000000000..ea45dd3901e3
--- /dev/null
+++ b/Documentation/lzo.txt
@@ -0,0 +1,164 @@
+
+LZO stream format as understood by Linux's LZO decompressor
+===========================================================
+
+Introduction
+
+ This is not a specification. No specification seems to be publicly available
+ for the LZO stream format. This document describes what input format the LZO
+ decompressor as implemented in the Linux kernel understands. The file subject
+ of this analysis is lib/lzo/lzo1x_decompress_safe.c. No analysis was made on
+ the compressor nor on any other implementations though it seems likely that
+ the format matches the standard one. The purpose of this document is to
+ better understand what the code does in order to propose more efficient fixes
+ for future bug reports.
+
+Description
+
+ The stream is composed of a series of instructions, operands, and data. The
+ instructions consist in a few bits representing an opcode, and bits forming
+ the operands for the instruction, whose size and position depend on the
+ opcode and on the number of literals copied by previous instruction. The
+ operands are used to indicate :
+
+ - a distance when copying data from the dictionary (past output buffer)
+ - a length (number of bytes to copy from dictionary)
+ - the number of literals to copy, which is retained in variable "state"
+ as a piece of information for next instructions.
+
+ Optionally depending on the opcode and operands, extra data may follow. These
+ extra data can be a complement for the operand (eg: a length or a distance
+ encoded on larger values), or a literal to be copied to the output buffer.
+
+ The first byte of the block follows a different encoding from other bytes, it
+ seems to be optimized for literal use only, since there is no dictionary yet
+ prior to that byte.
+
+ Lengths are always encoded on a variable size starting with a small number
+ of bits in the operand. If the number of bits isn't enough to represent the
+ length, up to 255 may be added in increments by consuming more bytes with a
+ rate of at most 255 per extra byte (thus the compression ratio cannot exceed
+ around 255:1). The variable length encoding using #bits is always the same :
+
+ length = byte & ((1 << #bits) - 1)
+ if (!length) {
+ length = ((1 << #bits) - 1)
+ length += 255*(number of zero bytes)
+ length += first-non-zero-byte
+ }
+ length += constant (generally 2 or 3)
+
+ For references to the dictionary, distances are relative to the output
+ pointer. Distances are encoded using very few bits belonging to certain
+ ranges, resulting in multiple copy instructions using different encodings.
+ Certain encodings involve one extra byte, others involve two extra bytes
+ forming a little-endian 16-bit quantity (marked LE16 below).
+
+ After any instruction except the large literal copy, 0, 1, 2 or 3 literals
+ are copied before starting the next instruction. The number of literals that
+ were copied may change the meaning and behaviour of the next instruction. In
+ practice, only one instruction needs to know whether 0, less than 4, or more
+ literals were copied. This is the information stored in the <state> variable
+ in this implementation. This number of immediate literals to be copied is
+ generally encoded in the last two bits of the instruction but may also be
+ taken from the last two bits of an extra operand (eg: distance).
+
+ End of stream is declared when a block copy of distance 0 is seen. Only one
+ instruction may encode this distance (0001HLLL), it takes one LE16 operand
+ for the distance, thus requiring 3 bytes.
+
+ IMPORTANT NOTE : in the code some length checks are missing because certain
+ instructions are called under the assumption that a certain number of bytes
+ follow because it has already been garanteed before parsing the instructions.
+ They just have to "refill" this credit if they consume extra bytes. This is
+ an implementation design choice independant on the algorithm or encoding.
+
+Byte sequences
+
+ First byte encoding :
+
+ 0..17 : follow regular instruction encoding, see below. It is worth
+ noting that codes 16 and 17 will represent a block copy from
+ the dictionary which is empty, and that they will always be
+ invalid at this place.
+
+ 18..21 : copy 0..3 literals
+ state = (byte - 17) = 0..3 [ copy <state> literals ]
+ skip byte
+
+ 22..255 : copy literal string
+ length = (byte - 17) = 4..238
+ state = 4 [ don't copy extra literals ]
+ skip byte
+
+ Instruction encoding :
+
+ 0 0 0 0 X X X X (0..15)
+ Depends on the number of literals copied by the last instruction.
+ If last instruction did not copy any literal (state == 0), this
+ encoding will be a copy of 4 or more literal, and must be interpreted
+ like this :
+
+ 0 0 0 0 L L L L (0..15) : copy long literal string
+ length = 3 + (L ?: 15 + (zero_bytes * 255) + non_zero_byte)
+ state = 4 (no extra literals are copied)
+
+ If last instruction used to copy between 1 to 3 literals (encoded in
+ the instruction's opcode or distance), the instruction is a copy of a
+ 2-byte block from the dictionary within a 1kB distance. It is worth
+ noting that this instruction provides little savings since it uses 2
+ bytes to encode a copy of 2 other bytes but it encodes the number of
+ following literals for free. It must be interpreted like this :
+
+ 0 0 0 0 D D S S (0..15) : copy 2 bytes from <= 1kB distance
+ length = 2
+ state = S (copy S literals after this block)
+ Always followed by exactly one byte : H H H H H H H H
+ distance = (H << 2) + D + 1
+
+ If last instruction used to copy 4 or more literals (as detected by
+ state == 4), the instruction becomes a copy of a 3-byte block from the
+ dictionary from a 2..3kB distance, and must be interpreted like this :
+
+ 0 0 0 0 D D S S (0..15) : copy 3 bytes from 2..3 kB distance
+ length = 3
+ state = S (copy S literals after this block)
+ Always followed by exactly one byte : H H H H H H H H
+ distance = (H << 2) + D + 2049
+
+ 0 0 0 1 H L L L (16..31)
+ Copy of a block within 16..48kB distance (preferably less than 10B)
+ length = 2 + (L ?: 7 + (zero_bytes * 255) + non_zero_byte)
+ Always followed by exactly one LE16 : D D D D D D D D : D D D D D D S S
+ distance = 16384 + (H << 14) + D
+ state = S (copy S literals after this block)
+ End of stream is reached if distance == 16384
+
+ 0 0 1 L L L L L (32..63)
+ Copy of small block within 16kB distance (preferably less than 34B)
+ length = 2 + (L ?: 31 + (zero_bytes * 255) + non_zero_byte)
+ Always followed by exactly one LE16 : D D D D D D D D : D D D D D D S S
+ distance = D + 1
+ state = S (copy S literals after this block)
+
+ 0 1 L D D D S S (64..127)
+ Copy 3-4 bytes from block within 2kB distance
+ state = S (copy S literals after this block)
+ length = 3 + L
+ Always followed by exactly one byte : H H H H H H H H
+ distance = (H << 3) + D + 1
+
+ 1 L L D D D S S (128..255)
+ Copy 5-8 bytes from block within 2kB distance
+ state = S (copy S literals after this block)
+ length = 5 + L
+ Always followed by exactly one byte : H H H H H H H H
+ distance = (H << 3) + D + 1
+
+Authors
+
+ This document was written by Willy Tarreau <w@1wt.eu> on 2014/07/19 during an
+ analysis of the decompression code available in Linux 3.16-rc5. The code is
+ tricky, it is possible that this document contains mistakes or that a few
+ corner cases were overlooked. In any case, please report any doubt, fix, or
+ proposed updates to the author(s) so that the document can be updated.
diff --git a/lib/lzo/lzo1x_decompress_safe.c b/lib/lzo/lzo1x_decompress_safe.c
index 8563081e8da3..a1c387f6afba 100644
--- a/lib/lzo/lzo1x_decompress_safe.c
+++ b/lib/lzo/lzo1x_decompress_safe.c
@@ -19,31 +19,21 @@
#include <linux/lzo.h>
#include "lzodefs.h"
-#define HAVE_IP(t, x) \
- (((size_t)(ip_end - ip) >= (size_t)(t + x)) && \
- (((t + x) >= t) && ((t + x) >= x)))
+#define HAVE_IP(x) ((size_t)(ip_end - ip) >= (size_t)(x))
+#define HAVE_OP(x) ((size_t)(op_end - op) >= (size_t)(x))
+#define NEED_IP(x) if (!HAVE_IP(x)) goto input_overrun
+#define NEED_OP(x) if (!HAVE_OP(x)) goto output_overrun
+#define TEST_LB(m_pos) if ((m_pos) < out) goto lookbehind_overrun
-#define HAVE_OP(t, x) \
- (((size_t)(op_end - op) >= (size_t)(t + x)) && \
- (((t + x) >= t) && ((t + x) >= x)))
-
-#define NEED_IP(t, x) \
- do { \
- if (!HAVE_IP(t, x)) \
- goto input_overrun; \
- } while (0)
-
-#define NEED_OP(t, x) \
- do { \
- if (!HAVE_OP(t, x)) \
- goto output_overrun; \
- } while (0)
-
-#define TEST_LB(m_pos) \
- do { \
- if ((m_pos) < out) \
- goto lookbehind_overrun; \
- } while (0)
+/* This MAX_255_COUNT is the maximum number of times we can add 255 to a base
+ * count without overflowing an integer. The multiply will overflow when
+ * multiplying 255 by more than MAXINT/255. The sum will overflow earlier
+ * depending on the base count. Since the base count is taken from a u8
+ * and a few bits, it is safe to assume that it will always be lower than
+ * or equal to 2*255, thus we can always prevent any overflow by accepting
+ * two less 255 steps. See Documentation/lzo.txt for more information.
+ */
+#define MAX_255_COUNT ((((size_t)~0) / 255) - 2)
int lzo1x_decompress_safe(const unsigned char *in, size_t in_len,
unsigned char *out, size_t *out_len)
@@ -75,17 +65,24 @@ int lzo1x_decompress_safe(const unsigned char *in, size_t in_len,
if (t < 16) {
if (likely(state == 0)) {
if (unlikely(t == 0)) {
+ size_t offset;
+ const unsigned char *ip_last = ip;
+
while (unlikely(*ip == 0)) {
- t += 255;
ip++;
- NEED_IP(1, 0);
+ NEED_IP(1);
}
- t += 15 + *ip++;
+ offset = ip - ip_last;
+ if (unlikely(offset > MAX_255_COUNT))
+ return LZO_E_ERROR;
+
+ offset = (offset << 8) - offset;
+ t += offset + 15 + *ip++;
}
t += 3;
copy_literal_run:
#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
- if (likely(HAVE_IP(t, 15) && HAVE_OP(t, 15))) {
+ if (likely(HAVE_IP(t + 15) && HAVE_OP(t + 15))) {
const unsigned char *ie = ip + t;
unsigned char *oe = op + t;
do {
@@ -101,8 +98,8 @@ copy_literal_run:
} else
#endif
{
- NEED_OP(t, 0);
- NEED_IP(t, 3);
+ NEED_OP(t);
+ NEED_IP(t + 3);
do {
*op++ = *ip++;
} while (--t > 0);
@@ -115,7 +112,7 @@ copy_literal_run:
m_pos -= t >> 2;
m_pos -= *ip++ << 2;
TEST_LB(m_pos);
- NEED_OP(2, 0);
+ NEED_OP(2);
op[0] = m_pos[0];
op[1] = m_pos[1];
op += 2;
@@ -136,13 +133,20 @@ copy_literal_run:
} else if (t >= 32) {
t = (t & 31) + (3 - 1);
if (unlikely(t == 2)) {
+ size_t offset;
+ const unsigned char *ip_last = ip;
+
while (unlikely(*ip == 0)) {
- t += 255;
ip++;
- NEED_IP(1, 0);
+ NEED_IP(1);
}
- t += 31 + *ip++;
- NEED_IP(2, 0);
+ offset = ip - ip_last;
+ if (unlikely(offset > MAX_255_COUNT))
+ return LZO_E_ERROR;
+
+ offset = (offset << 8) - offset;
+ t += offset + 31 + *ip++;
+ NEED_IP(2);
}
m_pos = op - 1;
next = get_unaligned_le16(ip);
@@ -154,13 +158,20 @@ copy_literal_run:
m_pos -= (t & 8) << 11;
t = (t & 7) + (3 - 1);
if (unlikely(t == 2)) {
+ size_t offset;
+ const unsigned char *ip_last = ip;
+
while (unlikely(*ip == 0)) {
- t += 255;
ip++;
- NEED_IP(1, 0);
+ NEED_IP(1);
}
- t += 7 + *ip++;
- NEED_IP(2, 0);
+ offset = ip - ip_last;
+ if (unlikely(offset > MAX_255_COUNT))
+ return LZO_E_ERROR;
+
+ offset = (offset << 8) - offset;
+ t += offset + 7 + *ip++;
+ NEED_IP(2);
}
next = get_unaligned_le16(ip);
ip += 2;
@@ -174,7 +185,7 @@ copy_literal_run:
#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
if (op - m_pos >= 8) {
unsigned char *oe = op + t;
- if (likely(HAVE_OP(t, 15))) {
+ if (likely(HAVE_OP(t + 15))) {
do {
COPY8(op, m_pos);
op += 8;
@@ -184,7 +195,7 @@ copy_literal_run:
m_pos += 8;
} while (op < oe);
op = oe;
- if (HAVE_IP(6, 0)) {
+ if (HAVE_IP(6)) {
state = next;
COPY4(op, ip);
op += next;
@@ -192,7 +203,7 @@ copy_literal_run:
continue;
}
} else {
- NEED_OP(t, 0);
+ NEED_OP(t);
do {
*op++ = *m_pos++;
} while (op < oe);
@@ -201,7 +212,7 @@ copy_literal_run:
#endif
{
unsigned char *oe = op + t;
- NEED_OP(t, 0);
+ NEED_OP(t);
op[0] = m_pos[0];
op[1] = m_pos[1];
op += 2;
@@ -214,15 +225,15 @@ match_next:
state = next;
t = next;
#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
- if (likely(HAVE_IP(6, 0) && HAVE_OP(4, 0))) {
+ if (likely(HAVE_IP(6) && HAVE_OP(4))) {
COPY4(op, ip);
op += t;
ip += t;
} else
#endif
{
- NEED_IP(t, 3);
- NEED_OP(t, 0);
+ NEED_IP(t + 3);
+ NEED_OP(t);
while (t > 0) {
*op++ = *ip++;
t--;