diff options
author | Nick Terrell <terrelln@fb.com> | 2022-10-17 13:32:37 -0700 |
---|---|---|
committer | Nick Terrell <terrelln@fb.com> | 2022-10-24 12:12:32 -0700 |
commit | 2aa14b1ab2c41a4fe41efae80d58bb77da91f19f (patch) | |
tree | 17f83bdf97a2a93f8d0aa4d5daf6a92caa7bde79 /lib/zstd/compress/zstd_ldm.c | |
parent | 4782c725c1538aa9ef894ae4a3938db40be7f02c (diff) | |
download | linux-2aa14b1ab2c41a4fe41efae80d58bb77da91f19f.tar.bz2 |
zstd: import usptream v1.5.2
Updates the kernel's zstd library to v1.5.2, the latest zstd release.
The upstream tag it is updated to is `v1.5.2-kernel`, which contains
several cherry-picked commits on top of the v1.5.2 release which are
required for the kernel update. I will create this tag once the PR is
ready to merge, until then reference the temporary upstream branch
`v1.5.2-kernel-cherrypicks`.
I plan to submit this patch as part of the v6.2 merge window.
I've done basic build testing & testing on x86-64, i386, and aarch64.
I'm merging these patches into my `zstd-next` branch, which is pulled
into `linux-next` for further testing.
I've benchmarked BtrFS with zstd compression on a x86-64 machine, and
saw these results. Decompression speed is a small win across the board.
The lower compression levels 1-4 see both compression speed and
compression ratio wins. The higher compression levels see a small
compression speed loss and about neutral ratio. I expect the lower
compression levels to be used much more heavily than the high
compression levels, so this should be a net win.
Level CTime DTime Ratio
1 -2.95% -1.1% -0.7%
3 -3.5% -1.2% -0.5%
5 +3.7% -1.0% +0.0%
7 +3.2% -0.9% +0.0%
9 -4.3% -0.8% +0.1%
Signed-off-by: Nick Terrell <terrelln@fb.com>
Diffstat (limited to 'lib/zstd/compress/zstd_ldm.c')
-rw-r--r-- | lib/zstd/compress/zstd_ldm.c | 76 |
1 files changed, 57 insertions, 19 deletions
diff --git a/lib/zstd/compress/zstd_ldm.c b/lib/zstd/compress/zstd_ldm.c index 8ef7e88a5add..dd86fc83e7dd 100644 --- a/lib/zstd/compress/zstd_ldm.c +++ b/lib/zstd/compress/zstd_ldm.c @@ -57,6 +57,33 @@ static void ZSTD_ldm_gear_init(ldmRollingHashState_t* state, ldmParams_t const* } } +/* ZSTD_ldm_gear_reset() + * Feeds [data, data + minMatchLength) into the hash without registering any + * splits. This effectively resets the hash state. This is used when skipping + * over data, either at the beginning of a block, or skipping sections. + */ +static void ZSTD_ldm_gear_reset(ldmRollingHashState_t* state, + BYTE const* data, size_t minMatchLength) +{ + U64 hash = state->rolling; + size_t n = 0; + +#define GEAR_ITER_ONCE() do { \ + hash = (hash << 1) + ZSTD_ldm_gearTab[data[n] & 0xff]; \ + n += 1; \ + } while (0) + while (n + 3 < minMatchLength) { + GEAR_ITER_ONCE(); + GEAR_ITER_ONCE(); + GEAR_ITER_ONCE(); + GEAR_ITER_ONCE(); + } + while (n < minMatchLength) { + GEAR_ITER_ONCE(); + } +#undef GEAR_ITER_ONCE +} + /* ZSTD_ldm_gear_feed(): * * Registers in the splits array all the split points found in the first @@ -132,12 +159,12 @@ size_t ZSTD_ldm_getTableSize(ldmParams_t params) size_t const ldmBucketSize = ((size_t)1) << (params.hashLog - ldmBucketSizeLog); size_t const totalSize = ZSTD_cwksp_alloc_size(ldmBucketSize) + ZSTD_cwksp_alloc_size(ldmHSize * sizeof(ldmEntry_t)); - return params.enableLdm ? totalSize : 0; + return params.enableLdm == ZSTD_ps_enable ? totalSize : 0; } size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize) { - return params.enableLdm ? (maxChunkSize / params.minMatchLength) : 0; + return params.enableLdm == ZSTD_ps_enable ? (maxChunkSize / params.minMatchLength) : 0; } /* ZSTD_ldm_getBucket() : @@ -255,7 +282,7 @@ void ZSTD_ldm_fillHashTable( while (ip < iend) { size_t hashed; unsigned n; - + numSplits = 0; hashed = ZSTD_ldm_gear_feed(&hashState, ip, iend - ip, splits, &numSplits); @@ -327,16 +354,8 @@ static size_t ZSTD_ldm_generateSequences_internal( /* Initialize the rolling hash state with the first minMatchLength bytes */ ZSTD_ldm_gear_init(&hashState, params); - { - size_t n = 0; - - while (n < minMatchLength) { - numSplits = 0; - n += ZSTD_ldm_gear_feed(&hashState, ip + n, minMatchLength - n, - splits, &numSplits); - } - ip += minMatchLength; - } + ZSTD_ldm_gear_reset(&hashState, ip, minMatchLength); + ip += minMatchLength; while (ip < ilimit) { size_t hashed; @@ -361,6 +380,7 @@ static size_t ZSTD_ldm_generateSequences_internal( for (n = 0; n < numSplits; n++) { size_t forwardMatchLength = 0, backwardMatchLength = 0, bestMatchLength = 0, mLength; + U32 offset; BYTE const* const split = candidates[n].split; U32 const checksum = candidates[n].checksum; U32 const hash = candidates[n].hash; @@ -428,9 +448,9 @@ static size_t ZSTD_ldm_generateSequences_internal( } /* Match found */ + offset = (U32)(split - base) - bestEntry->offset; mLength = forwardMatchLength + backwardMatchLength; { - U32 const offset = (U32)(split - base) - bestEntry->offset; rawSeq* const seq = rawSeqStore->seq + rawSeqStore->size; /* Out of sequence storage */ @@ -447,6 +467,21 @@ static size_t ZSTD_ldm_generateSequences_internal( ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params); anchor = split + forwardMatchLength; + + /* If we find a match that ends after the data that we've hashed + * then we have a repeating, overlapping, pattern. E.g. all zeros. + * If one repetition of the pattern matches our `stopMask` then all + * repetitions will. We don't need to insert them all into out table, + * only the first one. So skip over overlapping matches. + * This is a major speed boost (20x) for compressing a single byte + * repeated, when that byte ends up in the table. + */ + if (anchor > ip + hashed) { + ZSTD_ldm_gear_reset(&hashState, anchor - minMatchLength, minMatchLength); + /* Continue the outer loop at anchor (ip + hashed == anchor). */ + ip = anchor - hashed; + break; + } } ip += hashed; @@ -500,7 +535,7 @@ size_t ZSTD_ldm_generateSequences( assert(chunkStart < iend); /* 1. Perform overflow correction if necessary. */ - if (ZSTD_window_needOverflowCorrection(ldmState->window, chunkEnd)) { + if (ZSTD_window_needOverflowCorrection(ldmState->window, 0, maxDist, ldmState->loadedDictEnd, chunkStart, chunkEnd)) { U32 const ldmHSize = 1U << params->hashLog; U32 const correction = ZSTD_window_correctOverflow( &ldmState->window, /* cycleLog */ 0, maxDist, chunkStart); @@ -544,7 +579,9 @@ size_t ZSTD_ldm_generateSequences( return 0; } -void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, U32 const minMatch) { +void +ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, U32 const minMatch) +{ while (srcSize > 0 && rawSeqStore->pos < rawSeqStore->size) { rawSeq* seq = rawSeqStore->seq + rawSeqStore->pos; if (srcSize <= seq->litLength) { @@ -622,12 +659,13 @@ void ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes) { size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_paramSwitch_e useRowMatchFinder, void const* src, size_t srcSize) { const ZSTD_compressionParameters* const cParams = &ms->cParams; unsigned const minMatch = cParams->minMatch; ZSTD_blockCompressor const blockCompressor = - ZSTD_selectBlockCompressor(cParams->strategy, ZSTD_matchState_dictMode(ms)); + ZSTD_selectBlockCompressor(cParams->strategy, useRowMatchFinder, ZSTD_matchState_dictMode(ms)); /* Input bounds */ BYTE const* const istart = (BYTE const*)src; BYTE const* const iend = istart + srcSize; @@ -673,8 +711,8 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, rep[0] = sequence.offset; /* Store the sequence */ ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, iend, - sequence.offset + ZSTD_REP_MOVE, - sequence.matchLength - MINMATCH); + STORE_OFFSET(sequence.offset), + sequence.matchLength); ip += sequence.matchLength; } } |