[BTRFS] Upgrade to 1.1
[reactos.git] / drivers / filesystems / btrfs / zstd / huf_decompress.c
1 /* ******************************************************************
2 huff0 huffman decoder,
3 part of Finite State Entropy library
4 Copyright (C) 2013-present, Yann Collet.
5
6 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
7
8 Redistribution and use in source and binary forms, with or without
9 modification, are permitted provided that the following conditions are
10 met:
11
12 * Redistributions of source code must retain the above copyright
13 notice, this list of conditions and the following disclaimer.
14 * Redistributions in binary form must reproduce the above
15 copyright notice, this list of conditions and the following disclaimer
16 in the documentation and/or other materials provided with the
17 distribution.
18
19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 You can contact the author at :
32 - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
33 ****************************************************************** */
34
35 /* **************************************************************
36 * Dependencies
37 ****************************************************************/
38 #include <string.h> /* memcpy, memset */
39 #include "compiler.h"
40 #include "bitstream.h" /* BIT_* */
41 #include "fse.h" /* to compress headers */
42 #define HUF_STATIC_LINKING_ONLY
43 #include "huf.h"
44 #include "error_private.h"
45
46
47 /* **************************************************************
48 * Error Management
49 ****************************************************************/
50 #define HUF_isError ERR_isError
51 #define CHECK_F(f) { size_t const err_ = (f); if (HUF_isError(err_)) return err_; }
52
53
54 /* **************************************************************
55 * Byte alignment for workSpace management
56 ****************************************************************/
57 #define HUF_ALIGN(x, a) HUF_ALIGN_MASK((x), (a) - 1)
58 #define HUF_ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask))
59
60
61 /*-***************************/
62 /* generic DTableDesc */
63 /*-***************************/
64 typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved; } DTableDesc;
65
66 static DTableDesc HUF_getDTableDesc(const HUF_DTable* table)
67 {
68 DTableDesc dtd;
69 memcpy(&dtd, table, sizeof(dtd));
70 return dtd;
71 }
72
73
74 /*-***************************/
75 /* single-symbol decoding */
76 /*-***************************/
77 typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX1; /* single-symbol decoding */
78
79 size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize)
80 {
81 U32 tableLog = 0;
82 U32 nbSymbols = 0;
83 size_t iSize;
84 void* const dtPtr = DTable + 1;
85 HUF_DEltX1* const dt = (HUF_DEltX1*)dtPtr;
86
87 U32* rankVal;
88 BYTE* huffWeight;
89 size_t spaceUsed32 = 0;
90
91 rankVal = (U32 *)workSpace + spaceUsed32;
92 spaceUsed32 += HUF_TABLELOG_ABSOLUTEMAX + 1;
93 huffWeight = (BYTE *)((U32 *)workSpace + spaceUsed32);
94 spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
95
96 if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge);
97
98 DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
99 /* memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */
100
101 iSize = HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize);
102 if (HUF_isError(iSize)) return iSize;
103
104 /* Table header */
105 { DTableDesc dtd = HUF_getDTableDesc(DTable);
106 if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge); /* DTable too small, Huffman tree cannot fit in */
107 dtd.tableType = 0;
108 dtd.tableLog = (BYTE)tableLog;
109 memcpy(DTable, &dtd, sizeof(dtd));
110 }
111
112 /* Calculate starting value for each rank */
113 { U32 n, nextRankStart = 0;
114 for (n=1; n<tableLog+1; n++) {
115 U32 const current = nextRankStart;
116 nextRankStart += (rankVal[n] << (n-1));
117 rankVal[n] = current;
118 } }
119
120 /* fill DTable */
121 { U32 n;
122 for (n=0; n<nbSymbols; n++) {
123 U32 const w = huffWeight[n];
124 U32 const length = (1 << w) >> 1;
125 U32 u;
126 HUF_DEltX1 D;
127 D.byte = (BYTE)n; D.nbBits = (BYTE)(tableLog + 1 - w);
128 for (u = rankVal[w]; u < rankVal[w] + length; u++)
129 dt[u] = D;
130 rankVal[w] += length;
131 } }
132
133 return iSize;
134 }
135
136 size_t HUF_readDTableX1(HUF_DTable* DTable, const void* src, size_t srcSize)
137 {
138 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
139 return HUF_readDTableX1_wksp(DTable, src, srcSize,
140 workSpace, sizeof(workSpace));
141 }
142
143 FORCE_INLINE_TEMPLATE BYTE
144 HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog)
145 {
146 size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
147 BYTE const c = dt[val].byte;
148 BIT_skipBits(Dstream, dt[val].nbBits);
149 return c;
150 }
151
152 #define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \
153 *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog)
154
155 #define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr) \
156 if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
157 HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
158
159 #define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \
160 if (MEM_64bits()) \
161 HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
162
163 HINT_INLINE size_t
164 HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX1* const dt, const U32 dtLog)
165 {
166 BYTE* const pStart = p;
167
168 /* up to 4 symbols at a time */
169 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) {
170 HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
171 HUF_DECODE_SYMBOLX1_1(p, bitDPtr);
172 HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
173 HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
174 }
175
176 /* [0-3] symbols remaining */
177 if (MEM_32bits())
178 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd))
179 HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
180
181 /* no more data to retrieve from bitstream, no need to reload */
182 while (p < pEnd)
183 HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
184
185 return pEnd-pStart;
186 }
187
188 FORCE_INLINE_TEMPLATE size_t
189 HUF_decompress1X1_usingDTable_internal_body(
190 void* dst, size_t dstSize,
191 const void* cSrc, size_t cSrcSize,
192 const HUF_DTable* DTable)
193 {
194 BYTE* op = (BYTE*)dst;
195 BYTE* const oend = op + dstSize;
196 const void* dtPtr = DTable + 1;
197 const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
198 BIT_DStream_t bitD;
199 DTableDesc const dtd = HUF_getDTableDesc(DTable);
200 U32 const dtLog = dtd.tableLog;
201
202 CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
203
204 HUF_decodeStreamX1(op, &bitD, oend, dt, dtLog);
205
206 if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
207
208 return dstSize;
209 }
210
211 FORCE_INLINE_TEMPLATE size_t
212 HUF_decompress4X1_usingDTable_internal_body(
213 void* dst, size_t dstSize,
214 const void* cSrc, size_t cSrcSize,
215 const HUF_DTable* DTable)
216 {
217 /* Check */
218 if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
219
220 { const BYTE* const istart = (const BYTE*) cSrc;
221 BYTE* const ostart = (BYTE*) dst;
222 BYTE* const oend = ostart + dstSize;
223 const void* const dtPtr = DTable + 1;
224 const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
225
226 /* Init */
227 BIT_DStream_t bitD1;
228 BIT_DStream_t bitD2;
229 BIT_DStream_t bitD3;
230 BIT_DStream_t bitD4;
231 size_t const length1 = MEM_readLE16(istart);
232 size_t const length2 = MEM_readLE16(istart+2);
233 size_t const length3 = MEM_readLE16(istart+4);
234 size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
235 const BYTE* const istart1 = istart + 6; /* jumpTable */
236 const BYTE* const istart2 = istart1 + length1;
237 const BYTE* const istart3 = istart2 + length2;
238 const BYTE* const istart4 = istart3 + length3;
239 const size_t segmentSize = (dstSize+3) / 4;
240 BYTE* const opStart2 = ostart + segmentSize;
241 BYTE* const opStart3 = opStart2 + segmentSize;
242 BYTE* const opStart4 = opStart3 + segmentSize;
243 BYTE* op1 = ostart;
244 BYTE* op2 = opStart2;
245 BYTE* op3 = opStart3;
246 BYTE* op4 = opStart4;
247 U32 endSignal = BIT_DStream_unfinished;
248 DTableDesc const dtd = HUF_getDTableDesc(DTable);
249 U32 const dtLog = dtd.tableLog;
250
251 if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
252 CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
253 CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
254 CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
255 CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
256
257 /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */
258 endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
259 while ( (endSignal==BIT_DStream_unfinished) && (op4<(oend-3)) ) {
260 HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
261 HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
262 HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
263 HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
264 HUF_DECODE_SYMBOLX1_1(op1, &bitD1);
265 HUF_DECODE_SYMBOLX1_1(op2, &bitD2);
266 HUF_DECODE_SYMBOLX1_1(op3, &bitD3);
267 HUF_DECODE_SYMBOLX1_1(op4, &bitD4);
268 HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
269 HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
270 HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
271 HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
272 HUF_DECODE_SYMBOLX1_0(op1, &bitD1);
273 HUF_DECODE_SYMBOLX1_0(op2, &bitD2);
274 HUF_DECODE_SYMBOLX1_0(op3, &bitD3);
275 HUF_DECODE_SYMBOLX1_0(op4, &bitD4);
276 BIT_reloadDStream(&bitD1);
277 BIT_reloadDStream(&bitD2);
278 BIT_reloadDStream(&bitD3);
279 BIT_reloadDStream(&bitD4);
280 }
281
282 /* check corruption */
283 /* note : should not be necessary : op# advance in lock step, and we control op4.
284 * but curiously, binary generated by gcc 7.2 & 7.3 with -mbmi2 runs faster when >=1 test is present */
285 if (op1 > opStart2) return ERROR(corruption_detected);
286 if (op2 > opStart3) return ERROR(corruption_detected);
287 if (op3 > opStart4) return ERROR(corruption_detected);
288 /* note : op4 supposed already verified within main loop */
289
290 /* finish bitStreams one by one */
291 HUF_decodeStreamX1(op1, &bitD1, opStart2, dt, dtLog);
292 HUF_decodeStreamX1(op2, &bitD2, opStart3, dt, dtLog);
293 HUF_decodeStreamX1(op3, &bitD3, opStart4, dt, dtLog);
294 HUF_decodeStreamX1(op4, &bitD4, oend, dt, dtLog);
295
296 /* check */
297 { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
298 if (!endCheck) return ERROR(corruption_detected); }
299
300 /* decoded size */
301 return dstSize;
302 }
303 }
304
305
306 typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize,
307 const void *cSrc,
308 size_t cSrcSize,
309 const HUF_DTable *DTable);
310 #if DYNAMIC_BMI2
311
312 #define HUF_DGEN(fn) \
313 \
314 static size_t fn##_default( \
315 void* dst, size_t dstSize, \
316 const void* cSrc, size_t cSrcSize, \
317 const HUF_DTable* DTable) \
318 { \
319 return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
320 } \
321 \
322 static TARGET_ATTRIBUTE("bmi2") size_t fn##_bmi2( \
323 void* dst, size_t dstSize, \
324 const void* cSrc, size_t cSrcSize, \
325 const HUF_DTable* DTable) \
326 { \
327 return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
328 } \
329 \
330 static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
331 size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \
332 { \
333 if (bmi2) { \
334 return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); \
335 } \
336 return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable); \
337 }
338
339 #else
340
341 #define HUF_DGEN(fn) \
342 static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
343 size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \
344 { \
345 (void)bmi2; \
346 return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
347 }
348
349 #endif
350
351 HUF_DGEN(HUF_decompress1X1_usingDTable_internal)
352 HUF_DGEN(HUF_decompress4X1_usingDTable_internal)
353
354
355
356 size_t HUF_decompress1X1_usingDTable(
357 void* dst, size_t dstSize,
358 const void* cSrc, size_t cSrcSize,
359 const HUF_DTable* DTable)
360 {
361 DTableDesc dtd = HUF_getDTableDesc(DTable);
362 if (dtd.tableType != 0) return ERROR(GENERIC);
363 return HUF_decompress1X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
364 }
365
366 size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
367 const void* cSrc, size_t cSrcSize,
368 void* workSpace, size_t wkspSize)
369 {
370 const BYTE* ip = (const BYTE*) cSrc;
371
372 size_t const hSize = HUF_readDTableX1_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize);
373 if (HUF_isError(hSize)) return hSize;
374 if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
375 ip += hSize; cSrcSize -= hSize;
376
377 return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
378 }
379
380
381 size_t HUF_decompress1X1_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
382 const void* cSrc, size_t cSrcSize)
383 {
384 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
385 return HUF_decompress1X1_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
386 workSpace, sizeof(workSpace));
387 }
388
389 size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
390 {
391 HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
392 return HUF_decompress1X1_DCtx (DTable, dst, dstSize, cSrc, cSrcSize);
393 }
394
395 size_t HUF_decompress4X1_usingDTable(
396 void* dst, size_t dstSize,
397 const void* cSrc, size_t cSrcSize,
398 const HUF_DTable* DTable)
399 {
400 DTableDesc dtd = HUF_getDTableDesc(DTable);
401 if (dtd.tableType != 0) return ERROR(GENERIC);
402 return HUF_decompress4X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
403 }
404
405 static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
406 const void* cSrc, size_t cSrcSize,
407 void* workSpace, size_t wkspSize, int bmi2)
408 {
409 const BYTE* ip = (const BYTE*) cSrc;
410
411 size_t const hSize = HUF_readDTableX1_wksp (dctx, cSrc, cSrcSize,
412 workSpace, wkspSize);
413 if (HUF_isError(hSize)) return hSize;
414 if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
415 ip += hSize; cSrcSize -= hSize;
416
417 return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
418 }
419
420 size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
421 const void* cSrc, size_t cSrcSize,
422 void* workSpace, size_t wkspSize)
423 {
424 return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0);
425 }
426
427
428 size_t HUF_decompress4X1_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
429 {
430 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
431 return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
432 workSpace, sizeof(workSpace));
433 }
434 size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
435 {
436 HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
437 return HUF_decompress4X1_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
438 }
439
440
441 /* *************************/
442 /* double-symbols decoding */
443 /* *************************/
444
445 typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX2; /* double-symbols decoding */
446 typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t;
447 typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1];
448 typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX];
449
450
451 /* HUF_fillDTableX2Level2() :
452 * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */
453 static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 sizeLog, const U32 consumed,
454 const U32* rankValOrigin, const int minWeight,
455 const sortedSymbol_t* sortedSymbols, const U32 sortedListSize,
456 U32 nbBitsBaseline, U16 baseSeq)
457 {
458 HUF_DEltX2 DElt;
459 U32 rankVal[HUF_TABLELOG_MAX + 1];
460
461 /* get pre-calculated rankVal */
462 memcpy(rankVal, rankValOrigin, sizeof(rankVal));
463
464 /* fill skipped values */
465 if (minWeight>1) {
466 U32 i, skipSize = rankVal[minWeight];
467 MEM_writeLE16(&(DElt.sequence), baseSeq);
468 DElt.nbBits = (BYTE)(consumed);
469 DElt.length = 1;
470 for (i = 0; i < skipSize; i++)
471 DTable[i] = DElt;
472 }
473
474 /* fill DTable */
475 { U32 s; for (s=0; s<sortedListSize; s++) { /* note : sortedSymbols already skipped */
476 const U32 symbol = sortedSymbols[s].symbol;
477 const U32 weight = sortedSymbols[s].weight;
478 const U32 nbBits = nbBitsBaseline - weight;
479 const U32 length = 1 << (sizeLog-nbBits);
480 const U32 start = rankVal[weight];
481 U32 i = start;
482 const U32 end = start + length;
483
484 MEM_writeLE16(&(DElt.sequence), (U16)(baseSeq + (symbol << 8)));
485 DElt.nbBits = (BYTE)(nbBits + consumed);
486 DElt.length = 2;
487 do { DTable[i++] = DElt; } while (i<end); /* since length >= 1 */
488
489 rankVal[weight] += length;
490 } }
491 }
492
493
494 static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog,
495 const sortedSymbol_t* sortedList, const U32 sortedListSize,
496 const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight,
497 const U32 nbBitsBaseline)
498 {
499 U32 rankVal[HUF_TABLELOG_MAX + 1];
500 const int scaleLog = nbBitsBaseline - targetLog; /* note : targetLog >= srcLog, hence scaleLog <= 1 */
501 const U32 minBits = nbBitsBaseline - maxWeight;
502 U32 s;
503
504 memcpy(rankVal, rankValOrigin, sizeof(rankVal));
505
506 /* fill DTable */
507 for (s=0; s<sortedListSize; s++) {
508 const U16 symbol = sortedList[s].symbol;
509 const U32 weight = sortedList[s].weight;
510 const U32 nbBits = nbBitsBaseline - weight;
511 const U32 start = rankVal[weight];
512 const U32 length = 1 << (targetLog-nbBits);
513
514 if (targetLog-nbBits >= minBits) { /* enough room for a second symbol */
515 U32 sortedRank;
516 int minWeight = nbBits + scaleLog;
517 if (minWeight < 1) minWeight = 1;
518 sortedRank = rankStart[minWeight];
519 HUF_fillDTableX2Level2(DTable+start, targetLog-nbBits, nbBits,
520 rankValOrigin[nbBits], minWeight,
521 sortedList+sortedRank, sortedListSize-sortedRank,
522 nbBitsBaseline, symbol);
523 } else {
524 HUF_DEltX2 DElt;
525 MEM_writeLE16(&(DElt.sequence), symbol);
526 DElt.nbBits = (BYTE)(nbBits);
527 DElt.length = 1;
528 { U32 const end = start + length;
529 U32 u;
530 for (u = start; u < end; u++) DTable[u] = DElt;
531 } }
532 rankVal[weight] += length;
533 }
534 }
535
536 size_t HUF_readDTableX2_wksp(HUF_DTable* DTable,
537 const void* src, size_t srcSize,
538 void* workSpace, size_t wkspSize)
539 {
540 U32 tableLog, maxW, sizeOfSort, nbSymbols;
541 DTableDesc dtd = HUF_getDTableDesc(DTable);
542 U32 const maxTableLog = dtd.maxTableLog;
543 size_t iSize;
544 void* dtPtr = DTable+1; /* force compiler to avoid strict-aliasing */
545 HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr;
546 U32 *rankStart;
547
548 rankValCol_t* rankVal;
549 U32* rankStats;
550 U32* rankStart0;
551 sortedSymbol_t* sortedSymbol;
552 BYTE* weightList;
553 size_t spaceUsed32 = 0;
554
555 rankVal = (rankValCol_t *)((U32 *)workSpace + spaceUsed32);
556 spaceUsed32 += (sizeof(rankValCol_t) * HUF_TABLELOG_MAX) >> 2;
557 rankStats = (U32 *)workSpace + spaceUsed32;
558 spaceUsed32 += HUF_TABLELOG_MAX + 1;
559 rankStart0 = (U32 *)workSpace + spaceUsed32;
560 spaceUsed32 += HUF_TABLELOG_MAX + 2;
561 sortedSymbol = (sortedSymbol_t *)workSpace + (spaceUsed32 * sizeof(U32)) / sizeof(sortedSymbol_t);
562 spaceUsed32 += HUF_ALIGN(sizeof(sortedSymbol_t) * (HUF_SYMBOLVALUE_MAX + 1), sizeof(U32)) >> 2;
563 weightList = (BYTE *)((U32 *)workSpace + spaceUsed32);
564 spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
565
566 if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge);
567
568 rankStart = rankStart0 + 1;
569 memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1));
570
571 DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */
572 if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
573 /* memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */
574
575 iSize = HUF_readStats(weightList, HUF_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize);
576 if (HUF_isError(iSize)) return iSize;
577
578 /* check result */
579 if (tableLog > maxTableLog) return ERROR(tableLog_tooLarge); /* DTable can't fit code depth */
580
581 /* find maxWeight */
582 for (maxW = tableLog; rankStats[maxW]==0; maxW--) {} /* necessarily finds a solution before 0 */
583
584 /* Get start index of each weight */
585 { U32 w, nextRankStart = 0;
586 for (w=1; w<maxW+1; w++) {
587 U32 current = nextRankStart;
588 nextRankStart += rankStats[w];
589 rankStart[w] = current;
590 }
591 rankStart[0] = nextRankStart; /* put all 0w symbols at the end of sorted list*/
592 sizeOfSort = nextRankStart;
593 }
594
595 /* sort symbols by weight */
596 { U32 s;
597 for (s=0; s<nbSymbols; s++) {
598 U32 const w = weightList[s];
599 U32 const r = rankStart[w]++;
600 sortedSymbol[r].symbol = (BYTE)s;
601 sortedSymbol[r].weight = (BYTE)w;
602 }
603 rankStart[0] = 0; /* forget 0w symbols; this is beginning of weight(1) */
604 }
605
606 /* Build rankVal */
607 { U32* const rankVal0 = rankVal[0];
608 { int const rescale = (maxTableLog-tableLog) - 1; /* tableLog <= maxTableLog */
609 U32 nextRankVal = 0;
610 U32 w;
611 for (w=1; w<maxW+1; w++) {
612 U32 current = nextRankVal;
613 nextRankVal += rankStats[w] << (w+rescale);
614 rankVal0[w] = current;
615 } }
616 { U32 const minBits = tableLog+1 - maxW;
617 U32 consumed;
618 for (consumed = minBits; consumed < maxTableLog - minBits + 1; consumed++) {
619 U32* const rankValPtr = rankVal[consumed];
620 U32 w;
621 for (w = 1; w < maxW+1; w++) {
622 rankValPtr[w] = rankVal0[w] >> consumed;
623 } } } }
624
625 HUF_fillDTableX2(dt, maxTableLog,
626 sortedSymbol, sizeOfSort,
627 rankStart0, rankVal, maxW,
628 tableLog+1);
629
630 dtd.tableLog = (BYTE)maxTableLog;
631 dtd.tableType = 1;
632 memcpy(DTable, &dtd, sizeof(dtd));
633 return iSize;
634 }
635
636 size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize)
637 {
638 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
639 return HUF_readDTableX2_wksp(DTable, src, srcSize,
640 workSpace, sizeof(workSpace));
641 }
642
643
644 FORCE_INLINE_TEMPLATE U32
645 HUF_decodeSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
646 {
647 size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
648 memcpy(op, dt+val, 2);
649 BIT_skipBits(DStream, dt[val].nbBits);
650 return dt[val].length;
651 }
652
653 FORCE_INLINE_TEMPLATE U32
654 HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
655 {
656 size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
657 memcpy(op, dt+val, 1);
658 if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits);
659 else {
660 if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {
661 BIT_skipBits(DStream, dt[val].nbBits);
662 if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
663 /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
664 DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);
665 } }
666 return 1;
667 }
668
669 #define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
670 ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
671
672 #define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
673 if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
674 ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
675
676 #define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
677 if (MEM_64bits()) \
678 ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
679
680 HINT_INLINE size_t
681 HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd,
682 const HUF_DEltX2* const dt, const U32 dtLog)
683 {
684 BYTE* const pStart = p;
685
686 /* up to 8 symbols at a time */
687 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) {
688 HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
689 HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
690 HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
691 HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
692 }
693
694 /* closer to end : up to 2 symbols at a time */
695 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2))
696 HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
697
698 while (p <= pEnd-2)
699 HUF_DECODE_SYMBOLX2_0(p, bitDPtr); /* no need to reload : reached the end of DStream */
700
701 if (p < pEnd)
702 p += HUF_decodeLastSymbolX2(p, bitDPtr, dt, dtLog);
703
704 return p-pStart;
705 }
706
707 FORCE_INLINE_TEMPLATE size_t
708 HUF_decompress1X2_usingDTable_internal_body(
709 void* dst, size_t dstSize,
710 const void* cSrc, size_t cSrcSize,
711 const HUF_DTable* DTable)
712 {
713 BIT_DStream_t bitD;
714
715 /* Init */
716 CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
717
718 /* decode */
719 { BYTE* const ostart = (BYTE*) dst;
720 BYTE* const oend = ostart + dstSize;
721 const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */
722 const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
723 DTableDesc const dtd = HUF_getDTableDesc(DTable);
724 HUF_decodeStreamX2(ostart, &bitD, oend, dt, dtd.tableLog);
725 }
726
727 /* check */
728 if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
729
730 /* decoded size */
731 return dstSize;
732 }
733
734
735 FORCE_INLINE_TEMPLATE size_t
736 HUF_decompress4X2_usingDTable_internal_body(
737 void* dst, size_t dstSize,
738 const void* cSrc, size_t cSrcSize,
739 const HUF_DTable* DTable)
740 {
741 if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
742
743 { const BYTE* const istart = (const BYTE*) cSrc;
744 BYTE* const ostart = (BYTE*) dst;
745 BYTE* const oend = ostart + dstSize;
746 const void* const dtPtr = DTable+1;
747 const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
748
749 /* Init */
750 BIT_DStream_t bitD1;
751 BIT_DStream_t bitD2;
752 BIT_DStream_t bitD3;
753 BIT_DStream_t bitD4;
754 size_t const length1 = MEM_readLE16(istart);
755 size_t const length2 = MEM_readLE16(istart+2);
756 size_t const length3 = MEM_readLE16(istart+4);
757 size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
758 const BYTE* const istart1 = istart + 6; /* jumpTable */
759 const BYTE* const istart2 = istart1 + length1;
760 const BYTE* const istart3 = istart2 + length2;
761 const BYTE* const istart4 = istart3 + length3;
762 size_t const segmentSize = (dstSize+3) / 4;
763 BYTE* const opStart2 = ostart + segmentSize;
764 BYTE* const opStart3 = opStart2 + segmentSize;
765 BYTE* const opStart4 = opStart3 + segmentSize;
766 BYTE* op1 = ostart;
767 BYTE* op2 = opStart2;
768 BYTE* op3 = opStart3;
769 BYTE* op4 = opStart4;
770 U32 endSignal;
771 DTableDesc const dtd = HUF_getDTableDesc(DTable);
772 U32 const dtLog = dtd.tableLog;
773
774 if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
775 CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
776 CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
777 CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
778 CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
779
780 /* 16-32 symbols per loop (4-8 symbols per stream) */
781 endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
782 for ( ; (endSignal==BIT_DStream_unfinished) & (op4<(oend-(sizeof(bitD4.bitContainer)-1))) ; ) {
783 HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
784 HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
785 HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
786 HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
787 HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
788 HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
789 HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
790 HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
791 HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
792 HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
793 HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
794 HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
795 HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
796 HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
797 HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
798 HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
799
800 endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
801 }
802
803 /* check corruption */
804 if (op1 > opStart2) return ERROR(corruption_detected);
805 if (op2 > opStart3) return ERROR(corruption_detected);
806 if (op3 > opStart4) return ERROR(corruption_detected);
807 /* note : op4 already verified within main loop */
808
809 /* finish bitStreams one by one */
810 HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
811 HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
812 HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
813 HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog);
814
815 /* check */
816 { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
817 if (!endCheck) return ERROR(corruption_detected); }
818
819 /* decoded size */
820 return dstSize;
821 }
822 }
823
824 HUF_DGEN(HUF_decompress1X2_usingDTable_internal)
825 HUF_DGEN(HUF_decompress4X2_usingDTable_internal)
826
827 size_t HUF_decompress1X2_usingDTable(
828 void* dst, size_t dstSize,
829 const void* cSrc, size_t cSrcSize,
830 const HUF_DTable* DTable)
831 {
832 DTableDesc dtd = HUF_getDTableDesc(DTable);
833 if (dtd.tableType != 1) return ERROR(GENERIC);
834 return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
835 }
836
837 size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
838 const void* cSrc, size_t cSrcSize,
839 void* workSpace, size_t wkspSize)
840 {
841 const BYTE* ip = (const BYTE*) cSrc;
842
843 size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize,
844 workSpace, wkspSize);
845 if (HUF_isError(hSize)) return hSize;
846 if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
847 ip += hSize; cSrcSize -= hSize;
848
849 return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
850 }
851
852
853 size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
854 const void* cSrc, size_t cSrcSize)
855 {
856 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
857 return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
858 workSpace, sizeof(workSpace));
859 }
860
861 size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
862 {
863 HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
864 return HUF_decompress1X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
865 }
866
867 size_t HUF_decompress4X2_usingDTable(
868 void* dst, size_t dstSize,
869 const void* cSrc, size_t cSrcSize,
870 const HUF_DTable* DTable)
871 {
872 DTableDesc dtd = HUF_getDTableDesc(DTable);
873 if (dtd.tableType != 1) return ERROR(GENERIC);
874 return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
875 }
876
877 static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
878 const void* cSrc, size_t cSrcSize,
879 void* workSpace, size_t wkspSize, int bmi2)
880 {
881 const BYTE* ip = (const BYTE*) cSrc;
882
883 size_t hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize,
884 workSpace, wkspSize);
885 if (HUF_isError(hSize)) return hSize;
886 if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
887 ip += hSize; cSrcSize -= hSize;
888
889 return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
890 }
891
892 size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
893 const void* cSrc, size_t cSrcSize,
894 void* workSpace, size_t wkspSize)
895 {
896 return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0);
897 }
898
899
900 size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
901 const void* cSrc, size_t cSrcSize)
902 {
903 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
904 return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
905 workSpace, sizeof(workSpace));
906 }
907
908 size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
909 {
910 HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
911 return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
912 }
913
914
915 /* ***********************************/
916 /* Universal decompression selectors */
917 /* ***********************************/
918
919 size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize,
920 const void* cSrc, size_t cSrcSize,
921 const HUF_DTable* DTable)
922 {
923 DTableDesc const dtd = HUF_getDTableDesc(DTable);
924 return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
925 HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
926 }
927
928 size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize,
929 const void* cSrc, size_t cSrcSize,
930 const HUF_DTable* DTable)
931 {
932 DTableDesc const dtd = HUF_getDTableDesc(DTable);
933 return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
934 HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
935 }
936
937
938 typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t;
939 static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] =
940 {
941 /* single, double, quad */
942 {{0,0}, {1,1}, {2,2}}, /* Q==0 : impossible */
943 {{0,0}, {1,1}, {2,2}}, /* Q==1 : impossible */
944 {{ 38,130}, {1313, 74}, {2151, 38}}, /* Q == 2 : 12-18% */
945 {{ 448,128}, {1353, 74}, {2238, 41}}, /* Q == 3 : 18-25% */
946 {{ 556,128}, {1353, 74}, {2238, 47}}, /* Q == 4 : 25-32% */
947 {{ 714,128}, {1418, 74}, {2436, 53}}, /* Q == 5 : 32-38% */
948 {{ 883,128}, {1437, 74}, {2464, 61}}, /* Q == 6 : 38-44% */
949 {{ 897,128}, {1515, 75}, {2622, 68}}, /* Q == 7 : 44-50% */
950 {{ 926,128}, {1613, 75}, {2730, 75}}, /* Q == 8 : 50-56% */
951 {{ 947,128}, {1729, 77}, {3359, 77}}, /* Q == 9 : 56-62% */
952 {{1107,128}, {2083, 81}, {4006, 84}}, /* Q ==10 : 62-69% */
953 {{1177,128}, {2379, 87}, {4785, 88}}, /* Q ==11 : 69-75% */
954 {{1242,128}, {2415, 93}, {5155, 84}}, /* Q ==12 : 75-81% */
955 {{1349,128}, {2644,106}, {5260,106}}, /* Q ==13 : 81-87% */
956 {{1455,128}, {2422,124}, {4174,124}}, /* Q ==14 : 87-93% */
957 {{ 722,128}, {1891,145}, {1936,146}}, /* Q ==15 : 93-99% */
958 };
959
960 /** HUF_selectDecoder() :
961 * Tells which decoder is likely to decode faster,
962 * based on a set of pre-computed metrics.
963 * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 .
964 * Assumption : 0 < dstSize <= 128 KB */
965 U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize)
966 {
967 assert(dstSize > 0);
968 assert(dstSize <= 128*1024);
969 /* decoder timing evaluation */
970 { U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize); /* Q < 16 */
971 U32 const D256 = (U32)(dstSize >> 8);
972 U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256);
973 U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256);
974 DTime1 += DTime1 >> 3; /* advantage to algorithm using less memory, to reduce cache eviction */
975 return DTime1 < DTime0;
976 } }
977
978
979 typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
980
981 size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
982 {
983 static const decompressionAlgo decompress[2] = { HUF_decompress4X1, HUF_decompress4X2 };
984
985 /* validation checks */
986 if (dstSize == 0) return ERROR(dstSize_tooSmall);
987 if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */
988 if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
989 if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
990
991 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
992 return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);
993 }
994 }
995
996 size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
997 {
998 /* validation checks */
999 if (dstSize == 0) return ERROR(dstSize_tooSmall);
1000 if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */
1001 if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
1002 if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
1003
1004 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1005 return algoNb ? HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) :
1006 HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ;
1007 }
1008 }
1009
1010 size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1011 {
1012 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1013 return HUF_decompress4X_hufOnly_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
1014 workSpace, sizeof(workSpace));
1015 }
1016
1017
1018 size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst,
1019 size_t dstSize, const void* cSrc,
1020 size_t cSrcSize, void* workSpace,
1021 size_t wkspSize)
1022 {
1023 /* validation checks */
1024 if (dstSize == 0) return ERROR(dstSize_tooSmall);
1025 if (cSrcSize == 0) return ERROR(corruption_detected);
1026
1027 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1028 return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize):
1029 HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
1030 }
1031 }
1032
1033 size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
1034 const void* cSrc, size_t cSrcSize,
1035 void* workSpace, size_t wkspSize)
1036 {
1037 /* validation checks */
1038 if (dstSize == 0) return ERROR(dstSize_tooSmall);
1039 if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */
1040 if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
1041 if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
1042
1043 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1044 return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
1045 cSrcSize, workSpace, wkspSize):
1046 HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
1047 cSrcSize, workSpace, wkspSize);
1048 }
1049 }
1050
1051 size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
1052 const void* cSrc, size_t cSrcSize)
1053 {
1054 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1055 return HUF_decompress1X_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
1056 workSpace, sizeof(workSpace));
1057 }
1058
1059
1060 size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
1061 {
1062 DTableDesc const dtd = HUF_getDTableDesc(DTable);
1063 return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
1064 HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1065 }
1066
1067 size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
1068 {
1069 const BYTE* ip = (const BYTE*) cSrc;
1070
1071 size_t const hSize = HUF_readDTableX1_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize);
1072 if (HUF_isError(hSize)) return hSize;
1073 if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
1074 ip += hSize; cSrcSize -= hSize;
1075
1076 return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
1077 }
1078
1079 size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
1080 {
1081 DTableDesc const dtd = HUF_getDTableDesc(DTable);
1082 return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
1083 HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1084 }
1085
1086 size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
1087 {
1088 /* validation checks */
1089 if (dstSize == 0) return ERROR(dstSize_tooSmall);
1090 if (cSrcSize == 0) return ERROR(corruption_detected);
1091
1092 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1093 return algoNb ? HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) :
1094 HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
1095 }
1096 }