[BTRFS] Upgrade to 1.4
[reactos.git] / drivers / filesystems / btrfs / zstd / huf_decompress.c
1 /* ******************************************************************
2 huff0 huffman decoder,
3 part of Finite State Entropy library
4 Copyright (C) 2013-present, Yann Collet.
5
6 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
7
8 Redistribution and use in source and binary forms, with or without
9 modification, are permitted provided that the following conditions are
10 met:
11
12 * Redistributions of source code must retain the above copyright
13 notice, this list of conditions and the following disclaimer.
14 * Redistributions in binary form must reproduce the above
15 copyright notice, this list of conditions and the following disclaimer
16 in the documentation and/or other materials provided with the
17 distribution.
18
19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 You can contact the author at :
32 - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
33 ****************************************************************** */
34
35 /* **************************************************************
36 * Dependencies
37 ****************************************************************/
38 #include <string.h> /* memcpy, memset */
39 #include "compiler.h"
40 #include "bitstream.h" /* BIT_* */
41 #include "fse.h" /* to compress headers */
42 #define HUF_STATIC_LINKING_ONLY
43 #include "huf.h"
44 #include "error_private.h"
45 #include <ntifs.h>
46 #include <ntddk.h>
47
48 #define HUFD_ALLOC_TAG 0x64465548 // "HUFd"
49
50
51 /* **************************************************************
52 * Error Management
53 ****************************************************************/
54 #define HUF_isError ERR_isError
55 #define CHECK_F(f) { size_t const err_ = (f); if (HUF_isError(err_)) return err_; }
56
57
58 /* **************************************************************
59 * Byte alignment for workSpace management
60 ****************************************************************/
61 #define HUF_ALIGN(x, a) HUF_ALIGN_MASK((x), (a) - 1)
62 #define HUF_ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask))
63
64
65 /*-***************************/
66 /* generic DTableDesc */
67 /*-***************************/
68 typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved; } DTableDesc;
69
70 static DTableDesc HUF_getDTableDesc(const HUF_DTable* table)
71 {
72 DTableDesc dtd;
73 memcpy(&dtd, table, sizeof(dtd));
74 return dtd;
75 }
76
77
78 /*-***************************/
79 /* single-symbol decoding */
80 /*-***************************/
81 typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX1; /* single-symbol decoding */
82
83 size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize)
84 {
85 U32 tableLog = 0;
86 U32 nbSymbols = 0;
87 size_t iSize;
88 void* const dtPtr = DTable + 1;
89 HUF_DEltX1* const dt = (HUF_DEltX1*)dtPtr;
90
91 U32* rankVal;
92 BYTE* huffWeight;
93 size_t spaceUsed32 = 0;
94
95 rankVal = (U32 *)workSpace + spaceUsed32;
96 spaceUsed32 += HUF_TABLELOG_ABSOLUTEMAX + 1;
97 huffWeight = (BYTE *)((U32 *)workSpace + spaceUsed32);
98 spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
99
100 if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge);
101
102 DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
103 /* memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */
104
105 iSize = HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize);
106 if (HUF_isError(iSize)) return iSize;
107
108 /* Table header */
109 { DTableDesc dtd = HUF_getDTableDesc(DTable);
110 if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge); /* DTable too small, Huffman tree cannot fit in */
111 dtd.tableType = 0;
112 dtd.tableLog = (BYTE)tableLog;
113 memcpy(DTable, &dtd, sizeof(dtd));
114 }
115
116 /* Calculate starting value for each rank */
117 { U32 n, nextRankStart = 0;
118 for (n=1; n<tableLog+1; n++) {
119 U32 const current = nextRankStart;
120 nextRankStart += (rankVal[n] << (n-1));
121 rankVal[n] = current;
122 } }
123
124 /* fill DTable */
125 { U32 n;
126 for (n=0; n<nbSymbols; n++) {
127 U32 const w = huffWeight[n];
128 U32 const length = (1 << w) >> 1;
129 U32 u;
130 HUF_DEltX1 D;
131 D.byte = (BYTE)n; D.nbBits = (BYTE)(tableLog + 1 - w);
132 for (u = rankVal[w]; u < rankVal[w] + length; u++)
133 dt[u] = D;
134 rankVal[w] += length;
135 } }
136
137 return iSize;
138 }
139
140 size_t HUF_readDTableX1(HUF_DTable* DTable, const void* src, size_t srcSize)
141 {
142 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
143 return HUF_readDTableX1_wksp(DTable, src, srcSize,
144 workSpace, sizeof(workSpace));
145 }
146
147 FORCE_INLINE_TEMPLATE BYTE
148 HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog)
149 {
150 size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
151 BYTE const c = dt[val].byte;
152 BIT_skipBits(Dstream, dt[val].nbBits);
153 return c;
154 }
155
156 #define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \
157 *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog)
158
159 #define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr) \
160 if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
161 HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
162
163 #define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \
164 if (MEM_64bits()) \
165 HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
166
167 HINT_INLINE size_t
168 HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX1* const dt, const U32 dtLog)
169 {
170 BYTE* const pStart = p;
171
172 /* up to 4 symbols at a time */
173 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) {
174 HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
175 HUF_DECODE_SYMBOLX1_1(p, bitDPtr);
176 HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
177 HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
178 }
179
180 /* [0-3] symbols remaining */
181 if (MEM_32bits())
182 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd))
183 HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
184
185 /* no more data to retrieve from bitstream, no need to reload */
186 while (p < pEnd)
187 HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
188
189 return pEnd-pStart;
190 }
191
192 FORCE_INLINE_TEMPLATE size_t
193 HUF_decompress1X1_usingDTable_internal_body(
194 void* dst, size_t dstSize,
195 const void* cSrc, size_t cSrcSize,
196 const HUF_DTable* DTable)
197 {
198 BYTE* op = (BYTE*)dst;
199 BYTE* const oend = op + dstSize;
200 const void* dtPtr = DTable + 1;
201 const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
202 BIT_DStream_t bitD;
203 DTableDesc const dtd = HUF_getDTableDesc(DTable);
204 U32 const dtLog = dtd.tableLog;
205
206 CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
207
208 HUF_decodeStreamX1(op, &bitD, oend, dt, dtLog);
209
210 if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
211
212 return dstSize;
213 }
214
215 FORCE_INLINE_TEMPLATE size_t
216 HUF_decompress4X1_usingDTable_internal_body(
217 void* dst, size_t dstSize,
218 const void* cSrc, size_t cSrcSize,
219 const HUF_DTable* DTable)
220 {
221 /* Check */
222 if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
223
224 { const BYTE* const istart = (const BYTE*) cSrc;
225 BYTE* const ostart = (BYTE*) dst;
226 BYTE* const oend = ostart + dstSize;
227 const void* const dtPtr = DTable + 1;
228 const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
229
230 /* Init */
231 BIT_DStream_t bitD1;
232 BIT_DStream_t bitD2;
233 BIT_DStream_t bitD3;
234 BIT_DStream_t bitD4;
235 size_t const length1 = MEM_readLE16(istart);
236 size_t const length2 = MEM_readLE16(istart+2);
237 size_t const length3 = MEM_readLE16(istart+4);
238 size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
239 const BYTE* const istart1 = istart + 6; /* jumpTable */
240 const BYTE* const istart2 = istart1 + length1;
241 const BYTE* const istart3 = istart2 + length2;
242 const BYTE* const istart4 = istart3 + length3;
243 const size_t segmentSize = (dstSize+3) / 4;
244 BYTE* const opStart2 = ostart + segmentSize;
245 BYTE* const opStart3 = opStart2 + segmentSize;
246 BYTE* const opStart4 = opStart3 + segmentSize;
247 BYTE* op1 = ostart;
248 BYTE* op2 = opStart2;
249 BYTE* op3 = opStart3;
250 BYTE* op4 = opStart4;
251 U32 endSignal = BIT_DStream_unfinished;
252 DTableDesc const dtd = HUF_getDTableDesc(DTable);
253 U32 const dtLog = dtd.tableLog;
254
255 if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
256 CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
257 CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
258 CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
259 CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
260
261 /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */
262 endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
263 while ( (endSignal==BIT_DStream_unfinished) && (op4<(oend-3)) ) {
264 HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
265 HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
266 HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
267 HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
268 HUF_DECODE_SYMBOLX1_1(op1, &bitD1);
269 HUF_DECODE_SYMBOLX1_1(op2, &bitD2);
270 HUF_DECODE_SYMBOLX1_1(op3, &bitD3);
271 HUF_DECODE_SYMBOLX1_1(op4, &bitD4);
272 HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
273 HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
274 HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
275 HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
276 HUF_DECODE_SYMBOLX1_0(op1, &bitD1);
277 HUF_DECODE_SYMBOLX1_0(op2, &bitD2);
278 HUF_DECODE_SYMBOLX1_0(op3, &bitD3);
279 HUF_DECODE_SYMBOLX1_0(op4, &bitD4);
280 BIT_reloadDStream(&bitD1);
281 BIT_reloadDStream(&bitD2);
282 BIT_reloadDStream(&bitD3);
283 BIT_reloadDStream(&bitD4);
284 }
285
286 /* check corruption */
287 /* note : should not be necessary : op# advance in lock step, and we control op4.
288 * but curiously, binary generated by gcc 7.2 & 7.3 with -mbmi2 runs faster when >=1 test is present */
289 if (op1 > opStart2) return ERROR(corruption_detected);
290 if (op2 > opStart3) return ERROR(corruption_detected);
291 if (op3 > opStart4) return ERROR(corruption_detected);
292 /* note : op4 supposed already verified within main loop */
293
294 /* finish bitStreams one by one */
295 HUF_decodeStreamX1(op1, &bitD1, opStart2, dt, dtLog);
296 HUF_decodeStreamX1(op2, &bitD2, opStart3, dt, dtLog);
297 HUF_decodeStreamX1(op3, &bitD3, opStart4, dt, dtLog);
298 HUF_decodeStreamX1(op4, &bitD4, oend, dt, dtLog);
299
300 /* check */
301 { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
302 if (!endCheck) return ERROR(corruption_detected); }
303
304 /* decoded size */
305 return dstSize;
306 }
307 }
308
309
310 typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize,
311 const void *cSrc,
312 size_t cSrcSize,
313 const HUF_DTable *DTable);
314 #if DYNAMIC_BMI2
315
316 #define HUF_DGEN(fn) \
317 \
318 static size_t fn##_default( \
319 void* dst, size_t dstSize, \
320 const void* cSrc, size_t cSrcSize, \
321 const HUF_DTable* DTable) \
322 { \
323 return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
324 } \
325 \
326 static TARGET_ATTRIBUTE("bmi2") size_t fn##_bmi2( \
327 void* dst, size_t dstSize, \
328 const void* cSrc, size_t cSrcSize, \
329 const HUF_DTable* DTable) \
330 { \
331 return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
332 } \
333 \
334 static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
335 size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \
336 { \
337 if (bmi2) { \
338 return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); \
339 } \
340 return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable); \
341 }
342
343 #else
344
345 #define HUF_DGEN(fn) \
346 static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
347 size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \
348 { \
349 (void)bmi2; \
350 return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
351 }
352
353 #endif
354
355 HUF_DGEN(HUF_decompress1X1_usingDTable_internal)
356 HUF_DGEN(HUF_decompress4X1_usingDTable_internal)
357
358
359
360 size_t HUF_decompress1X1_usingDTable(
361 void* dst, size_t dstSize,
362 const void* cSrc, size_t cSrcSize,
363 const HUF_DTable* DTable)
364 {
365 DTableDesc dtd = HUF_getDTableDesc(DTable);
366 if (dtd.tableType != 0) return ERROR(GENERIC);
367 return HUF_decompress1X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
368 }
369
370 size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
371 const void* cSrc, size_t cSrcSize,
372 void* workSpace, size_t wkspSize)
373 {
374 const BYTE* ip = (const BYTE*) cSrc;
375
376 size_t const hSize = HUF_readDTableX1_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize);
377 if (HUF_isError(hSize)) return hSize;
378 if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
379 ip += hSize; cSrcSize -= hSize;
380
381 return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
382 }
383
384
385 size_t HUF_decompress1X1_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
386 const void* cSrc, size_t cSrcSize)
387 {
388 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
389 return HUF_decompress1X1_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
390 workSpace, sizeof(workSpace));
391 }
392
393 size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
394 {
395 HUF_DTable* DTable = ExAllocatePoolWithTag(NonPagedPool, sizeof(HUF_DTable) * HUF_DTABLE_SIZE(HUF_TABLELOG_MAX-1), HUFD_ALLOC_TAG);
396 size_t ret;
397
398 if (!DTable)
399 return 0;
400
401 RtlZeroMemory(DTable, sizeof(HUF_DTable) * HUF_DTABLE_SIZE(HUF_TABLELOG_MAX-1));
402
403 DTable[0] = (U32)(HUF_TABLELOG_MAX-1) * 0x01000001;
404
405 ret = HUF_decompress1X1_DCtx (DTable, dst, dstSize, cSrc, cSrcSize);
406
407 ExFreePool(DTable);
408
409 return ret;
410 }
411
412 size_t HUF_decompress4X1_usingDTable(
413 void* dst, size_t dstSize,
414 const void* cSrc, size_t cSrcSize,
415 const HUF_DTable* DTable)
416 {
417 DTableDesc dtd = HUF_getDTableDesc(DTable);
418 if (dtd.tableType != 0) return ERROR(GENERIC);
419 return HUF_decompress4X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
420 }
421
422 static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
423 const void* cSrc, size_t cSrcSize,
424 void* workSpace, size_t wkspSize, int bmi2)
425 {
426 const BYTE* ip = (const BYTE*) cSrc;
427
428 size_t const hSize = HUF_readDTableX1_wksp (dctx, cSrc, cSrcSize,
429 workSpace, wkspSize);
430 if (HUF_isError(hSize)) return hSize;
431 if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
432 ip += hSize; cSrcSize -= hSize;
433
434 return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
435 }
436
437 size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
438 const void* cSrc, size_t cSrcSize,
439 void* workSpace, size_t wkspSize)
440 {
441 return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0);
442 }
443
444
445 size_t HUF_decompress4X1_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
446 {
447 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
448 return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
449 workSpace, sizeof(workSpace));
450 }
451 size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
452 {
453 HUF_DTable* DTable = ExAllocatePoolWithTag(NonPagedPool, sizeof(HUF_DTable) * HUF_DTABLE_SIZE(HUF_TABLELOG_MAX-1), HUFD_ALLOC_TAG);
454 size_t ret;
455
456 if (!DTable)
457 return 0;
458
459 RtlZeroMemory(DTable, sizeof(HUF_DTable) * HUF_DTABLE_SIZE(HUF_TABLELOG_MAX-1));
460
461 DTable[0] = (U32)(HUF_TABLELOG_MAX-1) * 0x01000001;
462
463 ret = HUF_decompress4X1_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
464
465 ExFreePool(DTable);
466
467 return ret;
468 }
469
470
471 /* *************************/
472 /* double-symbols decoding */
473 /* *************************/
474
475 typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX2; /* double-symbols decoding */
476 typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t;
477 typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1];
478 typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX];
479
480
481 /* HUF_fillDTableX2Level2() :
482 * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */
483 static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 sizeLog, const U32 consumed,
484 const U32* rankValOrigin, const int minWeight,
485 const sortedSymbol_t* sortedSymbols, const U32 sortedListSize,
486 U32 nbBitsBaseline, U16 baseSeq)
487 {
488 HUF_DEltX2 DElt;
489 U32 rankVal[HUF_TABLELOG_MAX + 1];
490
491 /* get pre-calculated rankVal */
492 memcpy(rankVal, rankValOrigin, sizeof(rankVal));
493
494 /* fill skipped values */
495 if (minWeight>1) {
496 U32 i, skipSize = rankVal[minWeight];
497 MEM_writeLE16(&(DElt.sequence), baseSeq);
498 DElt.nbBits = (BYTE)(consumed);
499 DElt.length = 1;
500 for (i = 0; i < skipSize; i++)
501 DTable[i] = DElt;
502 }
503
504 /* fill DTable */
505 { U32 s; for (s=0; s<sortedListSize; s++) { /* note : sortedSymbols already skipped */
506 const U32 symbol = sortedSymbols[s].symbol;
507 const U32 weight = sortedSymbols[s].weight;
508 const U32 nbBits = nbBitsBaseline - weight;
509 const U32 length = 1 << (sizeLog-nbBits);
510 const U32 start = rankVal[weight];
511 U32 i = start;
512 const U32 end = start + length;
513
514 MEM_writeLE16(&(DElt.sequence), (U16)(baseSeq + (symbol << 8)));
515 DElt.nbBits = (BYTE)(nbBits + consumed);
516 DElt.length = 2;
517 do { DTable[i++] = DElt; } while (i<end); /* since length >= 1 */
518
519 rankVal[weight] += length;
520 } }
521 }
522
523
524 static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog,
525 const sortedSymbol_t* sortedList, const U32 sortedListSize,
526 const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight,
527 const U32 nbBitsBaseline)
528 {
529 U32 rankVal[HUF_TABLELOG_MAX + 1];
530 const int scaleLog = nbBitsBaseline - targetLog; /* note : targetLog >= srcLog, hence scaleLog <= 1 */
531 const U32 minBits = nbBitsBaseline - maxWeight;
532 U32 s;
533
534 memcpy(rankVal, rankValOrigin, sizeof(rankVal));
535
536 /* fill DTable */
537 for (s=0; s<sortedListSize; s++) {
538 const U16 symbol = sortedList[s].symbol;
539 const U32 weight = sortedList[s].weight;
540 const U32 nbBits = nbBitsBaseline - weight;
541 const U32 start = rankVal[weight];
542 const U32 length = 1 << (targetLog-nbBits);
543
544 if (targetLog-nbBits >= minBits) { /* enough room for a second symbol */
545 U32 sortedRank;
546 int minWeight = nbBits + scaleLog;
547 if (minWeight < 1) minWeight = 1;
548 sortedRank = rankStart[minWeight];
549 HUF_fillDTableX2Level2(DTable+start, targetLog-nbBits, nbBits,
550 rankValOrigin[nbBits], minWeight,
551 sortedList+sortedRank, sortedListSize-sortedRank,
552 nbBitsBaseline, symbol);
553 } else {
554 HUF_DEltX2 DElt;
555 MEM_writeLE16(&(DElt.sequence), symbol);
556 DElt.nbBits = (BYTE)(nbBits);
557 DElt.length = 1;
558 { U32 const end = start + length;
559 U32 u;
560 for (u = start; u < end; u++) DTable[u] = DElt;
561 } }
562 rankVal[weight] += length;
563 }
564 }
565
566 size_t HUF_readDTableX2_wksp(HUF_DTable* DTable,
567 const void* src, size_t srcSize,
568 void* workSpace, size_t wkspSize)
569 {
570 U32 tableLog, maxW, sizeOfSort, nbSymbols;
571 DTableDesc dtd = HUF_getDTableDesc(DTable);
572 U32 const maxTableLog = dtd.maxTableLog;
573 size_t iSize;
574 void* dtPtr = DTable+1; /* force compiler to avoid strict-aliasing */
575 HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr;
576 U32 *rankStart;
577
578 rankValCol_t* rankVal;
579 U32* rankStats;
580 U32* rankStart0;
581 sortedSymbol_t* sortedSymbol;
582 BYTE* weightList;
583 size_t spaceUsed32 = 0;
584
585 rankVal = (rankValCol_t *)((U32 *)workSpace + spaceUsed32);
586 spaceUsed32 += (sizeof(rankValCol_t) * HUF_TABLELOG_MAX) >> 2;
587 rankStats = (U32 *)workSpace + spaceUsed32;
588 spaceUsed32 += HUF_TABLELOG_MAX + 1;
589 rankStart0 = (U32 *)workSpace + spaceUsed32;
590 spaceUsed32 += HUF_TABLELOG_MAX + 2;
591 sortedSymbol = (sortedSymbol_t *)workSpace + (spaceUsed32 * sizeof(U32)) / sizeof(sortedSymbol_t);
592 spaceUsed32 += HUF_ALIGN(sizeof(sortedSymbol_t) * (HUF_SYMBOLVALUE_MAX + 1), sizeof(U32)) >> 2;
593 weightList = (BYTE *)((U32 *)workSpace + spaceUsed32);
594 spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
595
596 if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge);
597
598 rankStart = rankStart0 + 1;
599 memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1));
600
601 DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */
602 if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
603 /* memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */
604
605 iSize = HUF_readStats(weightList, HUF_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize);
606 if (HUF_isError(iSize)) return iSize;
607
608 /* check result */
609 if (tableLog > maxTableLog) return ERROR(tableLog_tooLarge); /* DTable can't fit code depth */
610
611 /* find maxWeight */
612 for (maxW = tableLog; rankStats[maxW]==0; maxW--) {} /* necessarily finds a solution before 0 */
613
614 /* Get start index of each weight */
615 { U32 w, nextRankStart = 0;
616 for (w=1; w<maxW+1; w++) {
617 U32 current = nextRankStart;
618 nextRankStart += rankStats[w];
619 rankStart[w] = current;
620 }
621 rankStart[0] = nextRankStart; /* put all 0w symbols at the end of sorted list*/
622 sizeOfSort = nextRankStart;
623 }
624
625 /* sort symbols by weight */
626 { U32 s;
627 for (s=0; s<nbSymbols; s++) {
628 U32 const w = weightList[s];
629 U32 const r = rankStart[w]++;
630 sortedSymbol[r].symbol = (BYTE)s;
631 sortedSymbol[r].weight = (BYTE)w;
632 }
633 rankStart[0] = 0; /* forget 0w symbols; this is beginning of weight(1) */
634 }
635
636 /* Build rankVal */
637 { U32* const rankVal0 = rankVal[0];
638 { int const rescale = (maxTableLog-tableLog) - 1; /* tableLog <= maxTableLog */
639 U32 nextRankVal = 0;
640 U32 w;
641 for (w=1; w<maxW+1; w++) {
642 U32 current = nextRankVal;
643 nextRankVal += rankStats[w] << (w+rescale);
644 rankVal0[w] = current;
645 } }
646 { U32 const minBits = tableLog+1 - maxW;
647 U32 consumed;
648 for (consumed = minBits; consumed < maxTableLog - minBits + 1; consumed++) {
649 U32* const rankValPtr = rankVal[consumed];
650 U32 w;
651 for (w = 1; w < maxW+1; w++) {
652 rankValPtr[w] = rankVal0[w] >> consumed;
653 } } } }
654
655 HUF_fillDTableX2(dt, maxTableLog,
656 sortedSymbol, sizeOfSort,
657 rankStart0, rankVal, maxW,
658 tableLog+1);
659
660 dtd.tableLog = (BYTE)maxTableLog;
661 dtd.tableType = 1;
662 memcpy(DTable, &dtd, sizeof(dtd));
663 return iSize;
664 }
665
666 size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize)
667 {
668 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
669 return HUF_readDTableX2_wksp(DTable, src, srcSize,
670 workSpace, sizeof(workSpace));
671 }
672
673
674 FORCE_INLINE_TEMPLATE U32
675 HUF_decodeSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
676 {
677 size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
678 memcpy(op, dt+val, 2);
679 BIT_skipBits(DStream, dt[val].nbBits);
680 return dt[val].length;
681 }
682
683 FORCE_INLINE_TEMPLATE U32
684 HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
685 {
686 size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
687 memcpy(op, dt+val, 1);
688 if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits);
689 else {
690 if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {
691 BIT_skipBits(DStream, dt[val].nbBits);
692 if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
693 /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
694 DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);
695 } }
696 return 1;
697 }
698
699 #define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
700 ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
701
702 #define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
703 if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
704 ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
705
706 #define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
707 if (MEM_64bits()) \
708 ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
709
710 HINT_INLINE size_t
711 HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd,
712 const HUF_DEltX2* const dt, const U32 dtLog)
713 {
714 BYTE* const pStart = p;
715
716 /* up to 8 symbols at a time */
717 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) {
718 HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
719 HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
720 HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
721 HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
722 }
723
724 /* closer to end : up to 2 symbols at a time */
725 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2))
726 HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
727
728 while (p <= pEnd-2)
729 HUF_DECODE_SYMBOLX2_0(p, bitDPtr); /* no need to reload : reached the end of DStream */
730
731 if (p < pEnd)
732 p += HUF_decodeLastSymbolX2(p, bitDPtr, dt, dtLog);
733
734 return p-pStart;
735 }
736
737 FORCE_INLINE_TEMPLATE size_t
738 HUF_decompress1X2_usingDTable_internal_body(
739 void* dst, size_t dstSize,
740 const void* cSrc, size_t cSrcSize,
741 const HUF_DTable* DTable)
742 {
743 BIT_DStream_t bitD;
744
745 /* Init */
746 CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
747
748 /* decode */
749 { BYTE* const ostart = (BYTE*) dst;
750 BYTE* const oend = ostart + dstSize;
751 const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */
752 const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
753 DTableDesc const dtd = HUF_getDTableDesc(DTable);
754 HUF_decodeStreamX2(ostart, &bitD, oend, dt, dtd.tableLog);
755 }
756
757 /* check */
758 if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
759
760 /* decoded size */
761 return dstSize;
762 }
763
764
765 FORCE_INLINE_TEMPLATE size_t
766 HUF_decompress4X2_usingDTable_internal_body(
767 void* dst, size_t dstSize,
768 const void* cSrc, size_t cSrcSize,
769 const HUF_DTable* DTable)
770 {
771 if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
772
773 { const BYTE* const istart = (const BYTE*) cSrc;
774 BYTE* const ostart = (BYTE*) dst;
775 BYTE* const oend = ostart + dstSize;
776 const void* const dtPtr = DTable+1;
777 const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
778
779 /* Init */
780 BIT_DStream_t bitD1;
781 BIT_DStream_t bitD2;
782 BIT_DStream_t bitD3;
783 BIT_DStream_t bitD4;
784 size_t const length1 = MEM_readLE16(istart);
785 size_t const length2 = MEM_readLE16(istart+2);
786 size_t const length3 = MEM_readLE16(istart+4);
787 size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
788 const BYTE* const istart1 = istart + 6; /* jumpTable */
789 const BYTE* const istart2 = istart1 + length1;
790 const BYTE* const istart3 = istart2 + length2;
791 const BYTE* const istart4 = istart3 + length3;
792 size_t const segmentSize = (dstSize+3) / 4;
793 BYTE* const opStart2 = ostart + segmentSize;
794 BYTE* const opStart3 = opStart2 + segmentSize;
795 BYTE* const opStart4 = opStart3 + segmentSize;
796 BYTE* op1 = ostart;
797 BYTE* op2 = opStart2;
798 BYTE* op3 = opStart3;
799 BYTE* op4 = opStart4;
800 U32 endSignal;
801 DTableDesc const dtd = HUF_getDTableDesc(DTable);
802 U32 const dtLog = dtd.tableLog;
803
804 if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
805 CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
806 CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
807 CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
808 CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
809
810 /* 16-32 symbols per loop (4-8 symbols per stream) */
811 endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
812 for ( ; (endSignal==BIT_DStream_unfinished) & (op4<(oend-(sizeof(bitD4.bitContainer)-1))) ; ) {
813 HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
814 HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
815 HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
816 HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
817 HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
818 HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
819 HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
820 HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
821 HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
822 HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
823 HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
824 HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
825 HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
826 HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
827 HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
828 HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
829
830 endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
831 }
832
833 /* check corruption */
834 if (op1 > opStart2) return ERROR(corruption_detected);
835 if (op2 > opStart3) return ERROR(corruption_detected);
836 if (op3 > opStart4) return ERROR(corruption_detected);
837 /* note : op4 already verified within main loop */
838
839 /* finish bitStreams one by one */
840 HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
841 HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
842 HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
843 HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog);
844
845 /* check */
846 { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
847 if (!endCheck) return ERROR(corruption_detected); }
848
849 /* decoded size */
850 return dstSize;
851 }
852 }
853
854 HUF_DGEN(HUF_decompress1X2_usingDTable_internal)
855 HUF_DGEN(HUF_decompress4X2_usingDTable_internal)
856
857 size_t HUF_decompress1X2_usingDTable(
858 void* dst, size_t dstSize,
859 const void* cSrc, size_t cSrcSize,
860 const HUF_DTable* DTable)
861 {
862 DTableDesc dtd = HUF_getDTableDesc(DTable);
863 if (dtd.tableType != 1) return ERROR(GENERIC);
864 return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
865 }
866
867 size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
868 const void* cSrc, size_t cSrcSize,
869 void* workSpace, size_t wkspSize)
870 {
871 const BYTE* ip = (const BYTE*) cSrc;
872
873 size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize,
874 workSpace, wkspSize);
875 if (HUF_isError(hSize)) return hSize;
876 if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
877 ip += hSize; cSrcSize -= hSize;
878
879 return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
880 }
881
882
883 size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
884 const void* cSrc, size_t cSrcSize)
885 {
886 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
887 return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
888 workSpace, sizeof(workSpace));
889 }
890
891 size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
892 {
893 HUF_DTable* DTable = ExAllocatePoolWithTag(NonPagedPool, sizeof(HUF_DTable) * HUF_DTABLE_SIZE(HUF_TABLELOG_MAX), HUFD_ALLOC_TAG);
894 size_t ret;
895
896 if (!DTable)
897 return 0;
898
899 RtlZeroMemory(DTable, sizeof(HUF_DTable) * HUF_DTABLE_SIZE(HUF_TABLELOG_MAX));
900
901 DTable[0] = (U32)(HUF_TABLELOG_MAX) * 0x01000001;
902
903 ret = HUF_decompress1X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
904
905 ExFreePool(DTable);
906
907 return ret;
908 }
909
910 size_t HUF_decompress4X2_usingDTable(
911 void* dst, size_t dstSize,
912 const void* cSrc, size_t cSrcSize,
913 const HUF_DTable* DTable)
914 {
915 DTableDesc dtd = HUF_getDTableDesc(DTable);
916 if (dtd.tableType != 1) return ERROR(GENERIC);
917 return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
918 }
919
920 static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
921 const void* cSrc, size_t cSrcSize,
922 void* workSpace, size_t wkspSize, int bmi2)
923 {
924 const BYTE* ip = (const BYTE*) cSrc;
925
926 size_t hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize,
927 workSpace, wkspSize);
928 if (HUF_isError(hSize)) return hSize;
929 if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
930 ip += hSize; cSrcSize -= hSize;
931
932 return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
933 }
934
935 size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
936 const void* cSrc, size_t cSrcSize,
937 void* workSpace, size_t wkspSize)
938 {
939 return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0);
940 }
941
942
943 size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
944 const void* cSrc, size_t cSrcSize)
945 {
946 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
947 return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
948 workSpace, sizeof(workSpace));
949 }
950
951 size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
952 {
953 HUF_DTable* DTable = ExAllocatePoolWithTag(NonPagedPool, sizeof(HUF_DTable) * HUF_DTABLE_SIZE(HUF_TABLELOG_MAX), HUFD_ALLOC_TAG);
954 size_t ret;
955
956 if (!DTable)
957 return 0;
958
959 RtlZeroMemory(DTable, sizeof(HUF_DTable) * HUF_DTABLE_SIZE(HUF_TABLELOG_MAX));
960
961 DTable[0] = (U32)(HUF_TABLELOG_MAX) * 0x01000001;
962
963 ret = HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
964
965 ExFreePool(DTable);
966
967 return ret;
968 }
969
970
971 /* ***********************************/
972 /* Universal decompression selectors */
973 /* ***********************************/
974
975 size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize,
976 const void* cSrc, size_t cSrcSize,
977 const HUF_DTable* DTable)
978 {
979 DTableDesc const dtd = HUF_getDTableDesc(DTable);
980 return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
981 HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
982 }
983
984 size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize,
985 const void* cSrc, size_t cSrcSize,
986 const HUF_DTable* DTable)
987 {
988 DTableDesc const dtd = HUF_getDTableDesc(DTable);
989 return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
990 HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
991 }
992
993
994 typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t;
995 static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] =
996 {
997 /* single, double, quad */
998 {{0,0}, {1,1}, {2,2}}, /* Q==0 : impossible */
999 {{0,0}, {1,1}, {2,2}}, /* Q==1 : impossible */
1000 {{ 38,130}, {1313, 74}, {2151, 38}}, /* Q == 2 : 12-18% */
1001 {{ 448,128}, {1353, 74}, {2238, 41}}, /* Q == 3 : 18-25% */
1002 {{ 556,128}, {1353, 74}, {2238, 47}}, /* Q == 4 : 25-32% */
1003 {{ 714,128}, {1418, 74}, {2436, 53}}, /* Q == 5 : 32-38% */
1004 {{ 883,128}, {1437, 74}, {2464, 61}}, /* Q == 6 : 38-44% */
1005 {{ 897,128}, {1515, 75}, {2622, 68}}, /* Q == 7 : 44-50% */
1006 {{ 926,128}, {1613, 75}, {2730, 75}}, /* Q == 8 : 50-56% */
1007 {{ 947,128}, {1729, 77}, {3359, 77}}, /* Q == 9 : 56-62% */
1008 {{1107,128}, {2083, 81}, {4006, 84}}, /* Q ==10 : 62-69% */
1009 {{1177,128}, {2379, 87}, {4785, 88}}, /* Q ==11 : 69-75% */
1010 {{1242,128}, {2415, 93}, {5155, 84}}, /* Q ==12 : 75-81% */
1011 {{1349,128}, {2644,106}, {5260,106}}, /* Q ==13 : 81-87% */
1012 {{1455,128}, {2422,124}, {4174,124}}, /* Q ==14 : 87-93% */
1013 {{ 722,128}, {1891,145}, {1936,146}}, /* Q ==15 : 93-99% */
1014 };
1015
1016 /** HUF_selectDecoder() :
1017 * Tells which decoder is likely to decode faster,
1018 * based on a set of pre-computed metrics.
1019 * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 .
1020 * Assumption : 0 < dstSize <= 128 KB */
1021 U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize)
1022 {
1023 assert(dstSize > 0);
1024 assert(dstSize <= 128*1024);
1025 /* decoder timing evaluation */
1026 { U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize); /* Q < 16 */
1027 U32 const D256 = (U32)(dstSize >> 8);
1028 U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256);
1029 U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256);
1030 DTime1 += DTime1 >> 3; /* advantage to algorithm using less memory, to reduce cache eviction */
1031 return DTime1 < DTime0;
1032 } }
1033
1034
1035 typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
1036
1037 size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1038 {
1039 static const decompressionAlgo decompress[2] = { HUF_decompress4X1, HUF_decompress4X2 };
1040
1041 /* validation checks */
1042 if (dstSize == 0) return ERROR(dstSize_tooSmall);
1043 if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */
1044 if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
1045 if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
1046
1047 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1048 return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);
1049 }
1050 }
1051
1052 size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1053 {
1054 /* validation checks */
1055 if (dstSize == 0) return ERROR(dstSize_tooSmall);
1056 if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */
1057 if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
1058 if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
1059
1060 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1061 return algoNb ? HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) :
1062 HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ;
1063 }
1064 }
1065
1066 size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1067 {
1068 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1069 return HUF_decompress4X_hufOnly_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
1070 workSpace, sizeof(workSpace));
1071 }
1072
1073
1074 size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst,
1075 size_t dstSize, const void* cSrc,
1076 size_t cSrcSize, void* workSpace,
1077 size_t wkspSize)
1078 {
1079 /* validation checks */
1080 if (dstSize == 0) return ERROR(dstSize_tooSmall);
1081 if (cSrcSize == 0) return ERROR(corruption_detected);
1082
1083 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1084 return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize):
1085 HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
1086 }
1087 }
1088
1089 size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
1090 const void* cSrc, size_t cSrcSize,
1091 void* workSpace, size_t wkspSize)
1092 {
1093 /* validation checks */
1094 if (dstSize == 0) return ERROR(dstSize_tooSmall);
1095 if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */
1096 if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
1097 if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
1098
1099 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1100 return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
1101 cSrcSize, workSpace, wkspSize):
1102 HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
1103 cSrcSize, workSpace, wkspSize);
1104 }
1105 }
1106
1107 size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
1108 const void* cSrc, size_t cSrcSize)
1109 {
1110 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1111 return HUF_decompress1X_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
1112 workSpace, sizeof(workSpace));
1113 }
1114
1115
1116 size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
1117 {
1118 DTableDesc const dtd = HUF_getDTableDesc(DTable);
1119 return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
1120 HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1121 }
1122
1123 size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
1124 {
1125 const BYTE* ip = (const BYTE*) cSrc;
1126
1127 size_t const hSize = HUF_readDTableX1_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize);
1128 if (HUF_isError(hSize)) return hSize;
1129 if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
1130 ip += hSize; cSrcSize -= hSize;
1131
1132 return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
1133 }
1134
1135 size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
1136 {
1137 DTableDesc const dtd = HUF_getDTableDesc(DTable);
1138 return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
1139 HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1140 }
1141
1142 size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
1143 {
1144 /* validation checks */
1145 if (dstSize == 0) return ERROR(dstSize_tooSmall);
1146 if (cSrcSize == 0) return ERROR(corruption_detected);
1147
1148 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1149 return algoNb ? HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) :
1150 HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
1151 }
1152 }