1 /* ******************************************************************
3 part of Finite State Entropy library
4 Copyright (C) 2013-present, Yann Collet.
6 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
8 Redistribution and use in source and binary forms, with or without
9 modification, are permitted provided that the following conditions are
12 * Redistributions of source code must retain the above copyright
13 notice, this list of conditions and the following disclaimer.
14 * Redistributions in binary form must reproduce the above
15 copyright notice, this list of conditions and the following disclaimer
16 in the documentation and/or other materials provided with the
19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 You can contact the author at :
32 - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
33 ****************************************************************** */
35 /* **************************************************************
37 ****************************************************************/
38 #include <string.h> /* memcpy, memset */
40 #include "bitstream.h" /* BIT_* */
41 #include "fse.h" /* to compress headers */
42 #define HUF_STATIC_LINKING_ONLY
44 #include "error_private.h"
48 #define HUFD_ALLOC_TAG 0x64465548 // "HUFd"
51 /* **************************************************************
53 ****************************************************************/
54 #define HUF_isError ERR_isError
55 #define CHECK_F(f) { size_t const err_ = (f); if (HUF_isError(err_)) return err_; }
58 /* **************************************************************
59 * Byte alignment for workSpace management
60 ****************************************************************/
61 #define HUF_ALIGN(x, a) HUF_ALIGN_MASK((x), (a) - 1)
62 #define HUF_ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask))
65 /*-***************************/
66 /* generic DTableDesc */
67 /*-***************************/
68 typedef struct { BYTE maxTableLog
; BYTE tableType
; BYTE tableLog
; BYTE reserved
; } DTableDesc
;
70 static DTableDesc
HUF_getDTableDesc(const HUF_DTable
* table
)
73 memcpy(&dtd
, table
, sizeof(dtd
));
78 /*-***************************/
79 /* single-symbol decoding */
80 /*-***************************/
81 typedef struct { BYTE byte
; BYTE nbBits
; } HUF_DEltX1
; /* single-symbol decoding */
83 size_t HUF_readDTableX1_wksp(HUF_DTable
* DTable
, const void* src
, size_t srcSize
, void* workSpace
, size_t wkspSize
)
88 void* const dtPtr
= DTable
+ 1;
89 HUF_DEltX1
* const dt
= (HUF_DEltX1
*)dtPtr
;
93 size_t spaceUsed32
= 0;
95 rankVal
= (U32
*)workSpace
+ spaceUsed32
;
96 spaceUsed32
+= HUF_TABLELOG_ABSOLUTEMAX
+ 1;
97 huffWeight
= (BYTE
*)((U32
*)workSpace
+ spaceUsed32
);
98 spaceUsed32
+= HUF_ALIGN(HUF_SYMBOLVALUE_MAX
+ 1, sizeof(U32
)) >> 2;
100 if ((spaceUsed32
<< 2) > wkspSize
) return ERROR(tableLog_tooLarge
);
102 DEBUG_STATIC_ASSERT(sizeof(DTableDesc
) == sizeof(HUF_DTable
));
103 /* memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */
105 iSize
= HUF_readStats(huffWeight
, HUF_SYMBOLVALUE_MAX
+ 1, rankVal
, &nbSymbols
, &tableLog
, src
, srcSize
);
106 if (HUF_isError(iSize
)) return iSize
;
109 { DTableDesc dtd
= HUF_getDTableDesc(DTable
);
110 if (tableLog
> (U32
)(dtd
.maxTableLog
+1)) return ERROR(tableLog_tooLarge
); /* DTable too small, Huffman tree cannot fit in */
112 dtd
.tableLog
= (BYTE
)tableLog
;
113 memcpy(DTable
, &dtd
, sizeof(dtd
));
116 /* Calculate starting value for each rank */
117 { U32 n
, nextRankStart
= 0;
118 for (n
=1; n
<tableLog
+1; n
++) {
119 U32
const current
= nextRankStart
;
120 nextRankStart
+= (rankVal
[n
] << (n
-1));
121 rankVal
[n
] = current
;
126 for (n
=0; n
<nbSymbols
; n
++) {
127 U32
const w
= huffWeight
[n
];
128 U32
const length
= (1 << w
) >> 1;
131 D
.byte
= (BYTE
)n
; D
.nbBits
= (BYTE
)(tableLog
+ 1 - w
);
132 for (u
= rankVal
[w
]; u
< rankVal
[w
] + length
; u
++)
134 rankVal
[w
] += length
;
140 size_t HUF_readDTableX1(HUF_DTable
* DTable
, const void* src
, size_t srcSize
)
142 U32 workSpace
[HUF_DECOMPRESS_WORKSPACE_SIZE_U32
];
143 return HUF_readDTableX1_wksp(DTable
, src
, srcSize
,
144 workSpace
, sizeof(workSpace
));
147 FORCE_INLINE_TEMPLATE BYTE
148 HUF_decodeSymbolX1(BIT_DStream_t
* Dstream
, const HUF_DEltX1
* dt
, const U32 dtLog
)
150 size_t const val
= BIT_lookBitsFast(Dstream
, dtLog
); /* note : dtLog >= 1 */
151 BYTE
const c
= dt
[val
].byte
;
152 BIT_skipBits(Dstream
, dt
[val
].nbBits
);
156 #define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \
157 *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog)
159 #define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr) \
160 if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
161 HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
163 #define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \
165 HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
168 HUF_decodeStreamX1(BYTE
* p
, BIT_DStream_t
* const bitDPtr
, BYTE
* const pEnd
, const HUF_DEltX1
* const dt
, const U32 dtLog
)
170 BYTE
* const pStart
= p
;
172 /* up to 4 symbols at a time */
173 while ((BIT_reloadDStream(bitDPtr
) == BIT_DStream_unfinished
) & (p
< pEnd
-3)) {
174 HUF_DECODE_SYMBOLX1_2(p
, bitDPtr
);
175 HUF_DECODE_SYMBOLX1_1(p
, bitDPtr
);
176 HUF_DECODE_SYMBOLX1_2(p
, bitDPtr
);
177 HUF_DECODE_SYMBOLX1_0(p
, bitDPtr
);
180 /* [0-3] symbols remaining */
182 while ((BIT_reloadDStream(bitDPtr
) == BIT_DStream_unfinished
) & (p
< pEnd
))
183 HUF_DECODE_SYMBOLX1_0(p
, bitDPtr
);
185 /* no more data to retrieve from bitstream, no need to reload */
187 HUF_DECODE_SYMBOLX1_0(p
, bitDPtr
);
192 FORCE_INLINE_TEMPLATE
size_t
193 HUF_decompress1X1_usingDTable_internal_body(
194 void* dst
, size_t dstSize
,
195 const void* cSrc
, size_t cSrcSize
,
196 const HUF_DTable
* DTable
)
198 BYTE
* op
= (BYTE
*)dst
;
199 BYTE
* const oend
= op
+ dstSize
;
200 const void* dtPtr
= DTable
+ 1;
201 const HUF_DEltX1
* const dt
= (const HUF_DEltX1
*)dtPtr
;
203 DTableDesc
const dtd
= HUF_getDTableDesc(DTable
);
204 U32
const dtLog
= dtd
.tableLog
;
206 CHECK_F( BIT_initDStream(&bitD
, cSrc
, cSrcSize
) );
208 HUF_decodeStreamX1(op
, &bitD
, oend
, dt
, dtLog
);
210 if (!BIT_endOfDStream(&bitD
)) return ERROR(corruption_detected
);
215 FORCE_INLINE_TEMPLATE
size_t
216 HUF_decompress4X1_usingDTable_internal_body(
217 void* dst
, size_t dstSize
,
218 const void* cSrc
, size_t cSrcSize
,
219 const HUF_DTable
* DTable
)
222 if (cSrcSize
< 10) return ERROR(corruption_detected
); /* strict minimum : jump table + 1 byte per stream */
224 { const BYTE
* const istart
= (const BYTE
*) cSrc
;
225 BYTE
* const ostart
= (BYTE
*) dst
;
226 BYTE
* const oend
= ostart
+ dstSize
;
227 const void* const dtPtr
= DTable
+ 1;
228 const HUF_DEltX1
* const dt
= (const HUF_DEltX1
*)dtPtr
;
235 size_t const length1
= MEM_readLE16(istart
);
236 size_t const length2
= MEM_readLE16(istart
+2);
237 size_t const length3
= MEM_readLE16(istart
+4);
238 size_t const length4
= cSrcSize
- (length1
+ length2
+ length3
+ 6);
239 const BYTE
* const istart1
= istart
+ 6; /* jumpTable */
240 const BYTE
* const istart2
= istart1
+ length1
;
241 const BYTE
* const istart3
= istart2
+ length2
;
242 const BYTE
* const istart4
= istart3
+ length3
;
243 const size_t segmentSize
= (dstSize
+3) / 4;
244 BYTE
* const opStart2
= ostart
+ segmentSize
;
245 BYTE
* const opStart3
= opStart2
+ segmentSize
;
246 BYTE
* const opStart4
= opStart3
+ segmentSize
;
248 BYTE
* op2
= opStart2
;
249 BYTE
* op3
= opStart3
;
250 BYTE
* op4
= opStart4
;
251 U32 endSignal
= BIT_DStream_unfinished
;
252 DTableDesc
const dtd
= HUF_getDTableDesc(DTable
);
253 U32
const dtLog
= dtd
.tableLog
;
255 if (length4
> cSrcSize
) return ERROR(corruption_detected
); /* overflow */
256 CHECK_F( BIT_initDStream(&bitD1
, istart1
, length1
) );
257 CHECK_F( BIT_initDStream(&bitD2
, istart2
, length2
) );
258 CHECK_F( BIT_initDStream(&bitD3
, istart3
, length3
) );
259 CHECK_F( BIT_initDStream(&bitD4
, istart4
, length4
) );
261 /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */
262 endSignal
= BIT_reloadDStream(&bitD1
) | BIT_reloadDStream(&bitD2
) | BIT_reloadDStream(&bitD3
) | BIT_reloadDStream(&bitD4
);
263 while ( (endSignal
==BIT_DStream_unfinished
) && (op4
<(oend
-3)) ) {
264 HUF_DECODE_SYMBOLX1_2(op1
, &bitD1
);
265 HUF_DECODE_SYMBOLX1_2(op2
, &bitD2
);
266 HUF_DECODE_SYMBOLX1_2(op3
, &bitD3
);
267 HUF_DECODE_SYMBOLX1_2(op4
, &bitD4
);
268 HUF_DECODE_SYMBOLX1_1(op1
, &bitD1
);
269 HUF_DECODE_SYMBOLX1_1(op2
, &bitD2
);
270 HUF_DECODE_SYMBOLX1_1(op3
, &bitD3
);
271 HUF_DECODE_SYMBOLX1_1(op4
, &bitD4
);
272 HUF_DECODE_SYMBOLX1_2(op1
, &bitD1
);
273 HUF_DECODE_SYMBOLX1_2(op2
, &bitD2
);
274 HUF_DECODE_SYMBOLX1_2(op3
, &bitD3
);
275 HUF_DECODE_SYMBOLX1_2(op4
, &bitD4
);
276 HUF_DECODE_SYMBOLX1_0(op1
, &bitD1
);
277 HUF_DECODE_SYMBOLX1_0(op2
, &bitD2
);
278 HUF_DECODE_SYMBOLX1_0(op3
, &bitD3
);
279 HUF_DECODE_SYMBOLX1_0(op4
, &bitD4
);
280 BIT_reloadDStream(&bitD1
);
281 BIT_reloadDStream(&bitD2
);
282 BIT_reloadDStream(&bitD3
);
283 BIT_reloadDStream(&bitD4
);
286 /* check corruption */
287 /* note : should not be necessary : op# advance in lock step, and we control op4.
288 * but curiously, binary generated by gcc 7.2 & 7.3 with -mbmi2 runs faster when >=1 test is present */
289 if (op1
> opStart2
) return ERROR(corruption_detected
);
290 if (op2
> opStart3
) return ERROR(corruption_detected
);
291 if (op3
> opStart4
) return ERROR(corruption_detected
);
292 /* note : op4 supposed already verified within main loop */
294 /* finish bitStreams one by one */
295 HUF_decodeStreamX1(op1
, &bitD1
, opStart2
, dt
, dtLog
);
296 HUF_decodeStreamX1(op2
, &bitD2
, opStart3
, dt
, dtLog
);
297 HUF_decodeStreamX1(op3
, &bitD3
, opStart4
, dt
, dtLog
);
298 HUF_decodeStreamX1(op4
, &bitD4
, oend
, dt
, dtLog
);
301 { U32
const endCheck
= BIT_endOfDStream(&bitD1
) & BIT_endOfDStream(&bitD2
) & BIT_endOfDStream(&bitD3
) & BIT_endOfDStream(&bitD4
);
302 if (!endCheck
) return ERROR(corruption_detected
); }
310 typedef size_t (*HUF_decompress_usingDTable_t
)(void *dst
, size_t dstSize
,
313 const HUF_DTable
*DTable
);
316 #define HUF_DGEN(fn) \
318 static size_t fn##_default( \
319 void* dst, size_t dstSize, \
320 const void* cSrc, size_t cSrcSize, \
321 const HUF_DTable* DTable) \
323 return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
326 static TARGET_ATTRIBUTE("bmi2") size_t fn##_bmi2( \
327 void* dst, size_t dstSize, \
328 const void* cSrc, size_t cSrcSize, \
329 const HUF_DTable* DTable) \
331 return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
334 static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
335 size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \
338 return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); \
340 return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable); \
345 #define HUF_DGEN(fn) \
346 static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
347 size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \
350 return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
355 HUF_DGEN(HUF_decompress1X1_usingDTable_internal
)
356 HUF_DGEN(HUF_decompress4X1_usingDTable_internal
)
360 size_t HUF_decompress1X1_usingDTable(
361 void* dst
, size_t dstSize
,
362 const void* cSrc
, size_t cSrcSize
,
363 const HUF_DTable
* DTable
)
365 DTableDesc dtd
= HUF_getDTableDesc(DTable
);
366 if (dtd
.tableType
!= 0) return ERROR(GENERIC
);
367 return HUF_decompress1X1_usingDTable_internal(dst
, dstSize
, cSrc
, cSrcSize
, DTable
, /* bmi2 */ 0);
370 size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable
* DCtx
, void* dst
, size_t dstSize
,
371 const void* cSrc
, size_t cSrcSize
,
372 void* workSpace
, size_t wkspSize
)
374 const BYTE
* ip
= (const BYTE
*) cSrc
;
376 size_t const hSize
= HUF_readDTableX1_wksp(DCtx
, cSrc
, cSrcSize
, workSpace
, wkspSize
);
377 if (HUF_isError(hSize
)) return hSize
;
378 if (hSize
>= cSrcSize
) return ERROR(srcSize_wrong
);
379 ip
+= hSize
; cSrcSize
-= hSize
;
381 return HUF_decompress1X1_usingDTable_internal(dst
, dstSize
, ip
, cSrcSize
, DCtx
, /* bmi2 */ 0);
385 size_t HUF_decompress1X1_DCtx(HUF_DTable
* DCtx
, void* dst
, size_t dstSize
,
386 const void* cSrc
, size_t cSrcSize
)
388 U32 workSpace
[HUF_DECOMPRESS_WORKSPACE_SIZE_U32
];
389 return HUF_decompress1X1_DCtx_wksp(DCtx
, dst
, dstSize
, cSrc
, cSrcSize
,
390 workSpace
, sizeof(workSpace
));
393 size_t HUF_decompress1X1 (void* dst
, size_t dstSize
, const void* cSrc
, size_t cSrcSize
)
395 HUF_DTable
* DTable
= ExAllocatePoolWithTag(NonPagedPool
, sizeof(HUF_DTable
) * HUF_DTABLE_SIZE(HUF_TABLELOG_MAX
-1), HUFD_ALLOC_TAG
);
401 RtlZeroMemory(DTable
, sizeof(HUF_DTable
) * HUF_DTABLE_SIZE(HUF_TABLELOG_MAX
-1));
403 DTable
[0] = (U32
)(HUF_TABLELOG_MAX
-1) * 0x01000001;
405 ret
= HUF_decompress1X1_DCtx (DTable
, dst
, dstSize
, cSrc
, cSrcSize
);
412 size_t HUF_decompress4X1_usingDTable(
413 void* dst
, size_t dstSize
,
414 const void* cSrc
, size_t cSrcSize
,
415 const HUF_DTable
* DTable
)
417 DTableDesc dtd
= HUF_getDTableDesc(DTable
);
418 if (dtd
.tableType
!= 0) return ERROR(GENERIC
);
419 return HUF_decompress4X1_usingDTable_internal(dst
, dstSize
, cSrc
, cSrcSize
, DTable
, /* bmi2 */ 0);
422 static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable
* dctx
, void* dst
, size_t dstSize
,
423 const void* cSrc
, size_t cSrcSize
,
424 void* workSpace
, size_t wkspSize
, int bmi2
)
426 const BYTE
* ip
= (const BYTE
*) cSrc
;
428 size_t const hSize
= HUF_readDTableX1_wksp (dctx
, cSrc
, cSrcSize
,
429 workSpace
, wkspSize
);
430 if (HUF_isError(hSize
)) return hSize
;
431 if (hSize
>= cSrcSize
) return ERROR(srcSize_wrong
);
432 ip
+= hSize
; cSrcSize
-= hSize
;
434 return HUF_decompress4X1_usingDTable_internal(dst
, dstSize
, ip
, cSrcSize
, dctx
, bmi2
);
437 size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable
* dctx
, void* dst
, size_t dstSize
,
438 const void* cSrc
, size_t cSrcSize
,
439 void* workSpace
, size_t wkspSize
)
441 return HUF_decompress4X1_DCtx_wksp_bmi2(dctx
, dst
, dstSize
, cSrc
, cSrcSize
, workSpace
, wkspSize
, 0);
445 size_t HUF_decompress4X1_DCtx (HUF_DTable
* dctx
, void* dst
, size_t dstSize
, const void* cSrc
, size_t cSrcSize
)
447 U32 workSpace
[HUF_DECOMPRESS_WORKSPACE_SIZE_U32
];
448 return HUF_decompress4X1_DCtx_wksp(dctx
, dst
, dstSize
, cSrc
, cSrcSize
,
449 workSpace
, sizeof(workSpace
));
451 size_t HUF_decompress4X1 (void* dst
, size_t dstSize
, const void* cSrc
, size_t cSrcSize
)
453 HUF_DTable
* DTable
= ExAllocatePoolWithTag(NonPagedPool
, sizeof(HUF_DTable
) * HUF_DTABLE_SIZE(HUF_TABLELOG_MAX
-1), HUFD_ALLOC_TAG
);
459 RtlZeroMemory(DTable
, sizeof(HUF_DTable
) * HUF_DTABLE_SIZE(HUF_TABLELOG_MAX
-1));
461 DTable
[0] = (U32
)(HUF_TABLELOG_MAX
-1) * 0x01000001;
463 ret
= HUF_decompress4X1_DCtx(DTable
, dst
, dstSize
, cSrc
, cSrcSize
);
471 /* *************************/
472 /* double-symbols decoding */
473 /* *************************/
475 typedef struct { U16 sequence
; BYTE nbBits
; BYTE length
; } HUF_DEltX2
; /* double-symbols decoding */
476 typedef struct { BYTE symbol
; BYTE weight
; } sortedSymbol_t
;
477 typedef U32 rankValCol_t
[HUF_TABLELOG_MAX
+ 1];
478 typedef rankValCol_t rankVal_t
[HUF_TABLELOG_MAX
];
481 /* HUF_fillDTableX2Level2() :
482 * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */
483 static void HUF_fillDTableX2Level2(HUF_DEltX2
* DTable
, U32 sizeLog
, const U32 consumed
,
484 const U32
* rankValOrigin
, const int minWeight
,
485 const sortedSymbol_t
* sortedSymbols
, const U32 sortedListSize
,
486 U32 nbBitsBaseline
, U16 baseSeq
)
489 U32 rankVal
[HUF_TABLELOG_MAX
+ 1];
491 /* get pre-calculated rankVal */
492 memcpy(rankVal
, rankValOrigin
, sizeof(rankVal
));
494 /* fill skipped values */
496 U32 i
, skipSize
= rankVal
[minWeight
];
497 MEM_writeLE16(&(DElt
.sequence
), baseSeq
);
498 DElt
.nbBits
= (BYTE
)(consumed
);
500 for (i
= 0; i
< skipSize
; i
++)
505 { U32 s
; for (s
=0; s
<sortedListSize
; s
++) { /* note : sortedSymbols already skipped */
506 const U32 symbol
= sortedSymbols
[s
].symbol
;
507 const U32 weight
= sortedSymbols
[s
].weight
;
508 const U32 nbBits
= nbBitsBaseline
- weight
;
509 const U32 length
= 1 << (sizeLog
-nbBits
);
510 const U32 start
= rankVal
[weight
];
512 const U32 end
= start
+ length
;
514 MEM_writeLE16(&(DElt
.sequence
), (U16
)(baseSeq
+ (symbol
<< 8)));
515 DElt
.nbBits
= (BYTE
)(nbBits
+ consumed
);
517 do { DTable
[i
++] = DElt
; } while (i
<end
); /* since length >= 1 */
519 rankVal
[weight
] += length
;
524 static void HUF_fillDTableX2(HUF_DEltX2
* DTable
, const U32 targetLog
,
525 const sortedSymbol_t
* sortedList
, const U32 sortedListSize
,
526 const U32
* rankStart
, rankVal_t rankValOrigin
, const U32 maxWeight
,
527 const U32 nbBitsBaseline
)
529 U32 rankVal
[HUF_TABLELOG_MAX
+ 1];
530 const int scaleLog
= nbBitsBaseline
- targetLog
; /* note : targetLog >= srcLog, hence scaleLog <= 1 */
531 const U32 minBits
= nbBitsBaseline
- maxWeight
;
534 memcpy(rankVal
, rankValOrigin
, sizeof(rankVal
));
537 for (s
=0; s
<sortedListSize
; s
++) {
538 const U16 symbol
= sortedList
[s
].symbol
;
539 const U32 weight
= sortedList
[s
].weight
;
540 const U32 nbBits
= nbBitsBaseline
- weight
;
541 const U32 start
= rankVal
[weight
];
542 const U32 length
= 1 << (targetLog
-nbBits
);
544 if (targetLog
-nbBits
>= minBits
) { /* enough room for a second symbol */
546 int minWeight
= nbBits
+ scaleLog
;
547 if (minWeight
< 1) minWeight
= 1;
548 sortedRank
= rankStart
[minWeight
];
549 HUF_fillDTableX2Level2(DTable
+start
, targetLog
-nbBits
, nbBits
,
550 rankValOrigin
[nbBits
], minWeight
,
551 sortedList
+sortedRank
, sortedListSize
-sortedRank
,
552 nbBitsBaseline
, symbol
);
555 MEM_writeLE16(&(DElt
.sequence
), symbol
);
556 DElt
.nbBits
= (BYTE
)(nbBits
);
558 { U32
const end
= start
+ length
;
560 for (u
= start
; u
< end
; u
++) DTable
[u
] = DElt
;
562 rankVal
[weight
] += length
;
566 size_t HUF_readDTableX2_wksp(HUF_DTable
* DTable
,
567 const void* src
, size_t srcSize
,
568 void* workSpace
, size_t wkspSize
)
570 U32 tableLog
, maxW
, sizeOfSort
, nbSymbols
;
571 DTableDesc dtd
= HUF_getDTableDesc(DTable
);
572 U32
const maxTableLog
= dtd
.maxTableLog
;
574 void* dtPtr
= DTable
+1; /* force compiler to avoid strict-aliasing */
575 HUF_DEltX2
* const dt
= (HUF_DEltX2
*)dtPtr
;
578 rankValCol_t
* rankVal
;
581 sortedSymbol_t
* sortedSymbol
;
583 size_t spaceUsed32
= 0;
585 rankVal
= (rankValCol_t
*)((U32
*)workSpace
+ spaceUsed32
);
586 spaceUsed32
+= (sizeof(rankValCol_t
) * HUF_TABLELOG_MAX
) >> 2;
587 rankStats
= (U32
*)workSpace
+ spaceUsed32
;
588 spaceUsed32
+= HUF_TABLELOG_MAX
+ 1;
589 rankStart0
= (U32
*)workSpace
+ spaceUsed32
;
590 spaceUsed32
+= HUF_TABLELOG_MAX
+ 2;
591 sortedSymbol
= (sortedSymbol_t
*)workSpace
+ (spaceUsed32
* sizeof(U32
)) / sizeof(sortedSymbol_t
);
592 spaceUsed32
+= HUF_ALIGN(sizeof(sortedSymbol_t
) * (HUF_SYMBOLVALUE_MAX
+ 1), sizeof(U32
)) >> 2;
593 weightList
= (BYTE
*)((U32
*)workSpace
+ spaceUsed32
);
594 spaceUsed32
+= HUF_ALIGN(HUF_SYMBOLVALUE_MAX
+ 1, sizeof(U32
)) >> 2;
596 if ((spaceUsed32
<< 2) > wkspSize
) return ERROR(tableLog_tooLarge
);
598 rankStart
= rankStart0
+ 1;
599 memset(rankStats
, 0, sizeof(U32
) * (2 * HUF_TABLELOG_MAX
+ 2 + 1));
601 DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2
) == sizeof(HUF_DTable
)); /* if compiler fails here, assertion is wrong */
602 if (maxTableLog
> HUF_TABLELOG_MAX
) return ERROR(tableLog_tooLarge
);
603 /* memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */
605 iSize
= HUF_readStats(weightList
, HUF_SYMBOLVALUE_MAX
+ 1, rankStats
, &nbSymbols
, &tableLog
, src
, srcSize
);
606 if (HUF_isError(iSize
)) return iSize
;
609 if (tableLog
> maxTableLog
) return ERROR(tableLog_tooLarge
); /* DTable can't fit code depth */
612 for (maxW
= tableLog
; rankStats
[maxW
]==0; maxW
--) {} /* necessarily finds a solution before 0 */
614 /* Get start index of each weight */
615 { U32 w
, nextRankStart
= 0;
616 for (w
=1; w
<maxW
+1; w
++) {
617 U32 current
= nextRankStart
;
618 nextRankStart
+= rankStats
[w
];
619 rankStart
[w
] = current
;
621 rankStart
[0] = nextRankStart
; /* put all 0w symbols at the end of sorted list*/
622 sizeOfSort
= nextRankStart
;
625 /* sort symbols by weight */
627 for (s
=0; s
<nbSymbols
; s
++) {
628 U32
const w
= weightList
[s
];
629 U32
const r
= rankStart
[w
]++;
630 sortedSymbol
[r
].symbol
= (BYTE
)s
;
631 sortedSymbol
[r
].weight
= (BYTE
)w
;
633 rankStart
[0] = 0; /* forget 0w symbols; this is beginning of weight(1) */
637 { U32
* const rankVal0
= rankVal
[0];
638 { int const rescale
= (maxTableLog
-tableLog
) - 1; /* tableLog <= maxTableLog */
641 for (w
=1; w
<maxW
+1; w
++) {
642 U32 current
= nextRankVal
;
643 nextRankVal
+= rankStats
[w
] << (w
+rescale
);
644 rankVal0
[w
] = current
;
646 { U32
const minBits
= tableLog
+1 - maxW
;
648 for (consumed
= minBits
; consumed
< maxTableLog
- minBits
+ 1; consumed
++) {
649 U32
* const rankValPtr
= rankVal
[consumed
];
651 for (w
= 1; w
< maxW
+1; w
++) {
652 rankValPtr
[w
] = rankVal0
[w
] >> consumed
;
655 HUF_fillDTableX2(dt
, maxTableLog
,
656 sortedSymbol
, sizeOfSort
,
657 rankStart0
, rankVal
, maxW
,
660 dtd
.tableLog
= (BYTE
)maxTableLog
;
662 memcpy(DTable
, &dtd
, sizeof(dtd
));
666 size_t HUF_readDTableX2(HUF_DTable
* DTable
, const void* src
, size_t srcSize
)
668 U32 workSpace
[HUF_DECOMPRESS_WORKSPACE_SIZE_U32
];
669 return HUF_readDTableX2_wksp(DTable
, src
, srcSize
,
670 workSpace
, sizeof(workSpace
));
674 FORCE_INLINE_TEMPLATE U32
675 HUF_decodeSymbolX2(void* op
, BIT_DStream_t
* DStream
, const HUF_DEltX2
* dt
, const U32 dtLog
)
677 size_t const val
= BIT_lookBitsFast(DStream
, dtLog
); /* note : dtLog >= 1 */
678 memcpy(op
, dt
+val
, 2);
679 BIT_skipBits(DStream
, dt
[val
].nbBits
);
680 return dt
[val
].length
;
683 FORCE_INLINE_TEMPLATE U32
684 HUF_decodeLastSymbolX2(void* op
, BIT_DStream_t
* DStream
, const HUF_DEltX2
* dt
, const U32 dtLog
)
686 size_t const val
= BIT_lookBitsFast(DStream
, dtLog
); /* note : dtLog >= 1 */
687 memcpy(op
, dt
+val
, 1);
688 if (dt
[val
].length
==1) BIT_skipBits(DStream
, dt
[val
].nbBits
);
690 if (DStream
->bitsConsumed
< (sizeof(DStream
->bitContainer
)*8)) {
691 BIT_skipBits(DStream
, dt
[val
].nbBits
);
692 if (DStream
->bitsConsumed
> (sizeof(DStream
->bitContainer
)*8))
693 /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
694 DStream
->bitsConsumed
= (sizeof(DStream
->bitContainer
)*8);
699 #define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
700 ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
702 #define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
703 if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
704 ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
706 #define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
708 ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
711 HUF_decodeStreamX2(BYTE
* p
, BIT_DStream_t
* bitDPtr
, BYTE
* const pEnd
,
712 const HUF_DEltX2
* const dt
, const U32 dtLog
)
714 BYTE
* const pStart
= p
;
716 /* up to 8 symbols at a time */
717 while ((BIT_reloadDStream(bitDPtr
) == BIT_DStream_unfinished
) & (p
< pEnd
-(sizeof(bitDPtr
->bitContainer
)-1))) {
718 HUF_DECODE_SYMBOLX2_2(p
, bitDPtr
);
719 HUF_DECODE_SYMBOLX2_1(p
, bitDPtr
);
720 HUF_DECODE_SYMBOLX2_2(p
, bitDPtr
);
721 HUF_DECODE_SYMBOLX2_0(p
, bitDPtr
);
724 /* closer to end : up to 2 symbols at a time */
725 while ((BIT_reloadDStream(bitDPtr
) == BIT_DStream_unfinished
) & (p
<= pEnd
-2))
726 HUF_DECODE_SYMBOLX2_0(p
, bitDPtr
);
729 HUF_DECODE_SYMBOLX2_0(p
, bitDPtr
); /* no need to reload : reached the end of DStream */
732 p
+= HUF_decodeLastSymbolX2(p
, bitDPtr
, dt
, dtLog
);
737 FORCE_INLINE_TEMPLATE
size_t
738 HUF_decompress1X2_usingDTable_internal_body(
739 void* dst
, size_t dstSize
,
740 const void* cSrc
, size_t cSrcSize
,
741 const HUF_DTable
* DTable
)
746 CHECK_F( BIT_initDStream(&bitD
, cSrc
, cSrcSize
) );
749 { BYTE
* const ostart
= (BYTE
*) dst
;
750 BYTE
* const oend
= ostart
+ dstSize
;
751 const void* const dtPtr
= DTable
+1; /* force compiler to not use strict-aliasing */
752 const HUF_DEltX2
* const dt
= (const HUF_DEltX2
*)dtPtr
;
753 DTableDesc
const dtd
= HUF_getDTableDesc(DTable
);
754 HUF_decodeStreamX2(ostart
, &bitD
, oend
, dt
, dtd
.tableLog
);
758 if (!BIT_endOfDStream(&bitD
)) return ERROR(corruption_detected
);
765 FORCE_INLINE_TEMPLATE
size_t
766 HUF_decompress4X2_usingDTable_internal_body(
767 void* dst
, size_t dstSize
,
768 const void* cSrc
, size_t cSrcSize
,
769 const HUF_DTable
* DTable
)
771 if (cSrcSize
< 10) return ERROR(corruption_detected
); /* strict minimum : jump table + 1 byte per stream */
773 { const BYTE
* const istart
= (const BYTE
*) cSrc
;
774 BYTE
* const ostart
= (BYTE
*) dst
;
775 BYTE
* const oend
= ostart
+ dstSize
;
776 const void* const dtPtr
= DTable
+1;
777 const HUF_DEltX2
* const dt
= (const HUF_DEltX2
*)dtPtr
;
784 size_t const length1
= MEM_readLE16(istart
);
785 size_t const length2
= MEM_readLE16(istart
+2);
786 size_t const length3
= MEM_readLE16(istart
+4);
787 size_t const length4
= cSrcSize
- (length1
+ length2
+ length3
+ 6);
788 const BYTE
* const istart1
= istart
+ 6; /* jumpTable */
789 const BYTE
* const istart2
= istart1
+ length1
;
790 const BYTE
* const istart3
= istart2
+ length2
;
791 const BYTE
* const istart4
= istart3
+ length3
;
792 size_t const segmentSize
= (dstSize
+3) / 4;
793 BYTE
* const opStart2
= ostart
+ segmentSize
;
794 BYTE
* const opStart3
= opStart2
+ segmentSize
;
795 BYTE
* const opStart4
= opStart3
+ segmentSize
;
797 BYTE
* op2
= opStart2
;
798 BYTE
* op3
= opStart3
;
799 BYTE
* op4
= opStart4
;
801 DTableDesc
const dtd
= HUF_getDTableDesc(DTable
);
802 U32
const dtLog
= dtd
.tableLog
;
804 if (length4
> cSrcSize
) return ERROR(corruption_detected
); /* overflow */
805 CHECK_F( BIT_initDStream(&bitD1
, istart1
, length1
) );
806 CHECK_F( BIT_initDStream(&bitD2
, istart2
, length2
) );
807 CHECK_F( BIT_initDStream(&bitD3
, istart3
, length3
) );
808 CHECK_F( BIT_initDStream(&bitD4
, istart4
, length4
) );
810 /* 16-32 symbols per loop (4-8 symbols per stream) */
811 endSignal
= BIT_reloadDStream(&bitD1
) | BIT_reloadDStream(&bitD2
) | BIT_reloadDStream(&bitD3
) | BIT_reloadDStream(&bitD4
);
812 for ( ; (endSignal
==BIT_DStream_unfinished
) & (op4
<(oend
-(sizeof(bitD4
.bitContainer
)-1))) ; ) {
813 HUF_DECODE_SYMBOLX2_2(op1
, &bitD1
);
814 HUF_DECODE_SYMBOLX2_2(op2
, &bitD2
);
815 HUF_DECODE_SYMBOLX2_2(op3
, &bitD3
);
816 HUF_DECODE_SYMBOLX2_2(op4
, &bitD4
);
817 HUF_DECODE_SYMBOLX2_1(op1
, &bitD1
);
818 HUF_DECODE_SYMBOLX2_1(op2
, &bitD2
);
819 HUF_DECODE_SYMBOLX2_1(op3
, &bitD3
);
820 HUF_DECODE_SYMBOLX2_1(op4
, &bitD4
);
821 HUF_DECODE_SYMBOLX2_2(op1
, &bitD1
);
822 HUF_DECODE_SYMBOLX2_2(op2
, &bitD2
);
823 HUF_DECODE_SYMBOLX2_2(op3
, &bitD3
);
824 HUF_DECODE_SYMBOLX2_2(op4
, &bitD4
);
825 HUF_DECODE_SYMBOLX2_0(op1
, &bitD1
);
826 HUF_DECODE_SYMBOLX2_0(op2
, &bitD2
);
827 HUF_DECODE_SYMBOLX2_0(op3
, &bitD3
);
828 HUF_DECODE_SYMBOLX2_0(op4
, &bitD4
);
830 endSignal
= BIT_reloadDStream(&bitD1
) | BIT_reloadDStream(&bitD2
) | BIT_reloadDStream(&bitD3
) | BIT_reloadDStream(&bitD4
);
833 /* check corruption */
834 if (op1
> opStart2
) return ERROR(corruption_detected
);
835 if (op2
> opStart3
) return ERROR(corruption_detected
);
836 if (op3
> opStart4
) return ERROR(corruption_detected
);
837 /* note : op4 already verified within main loop */
839 /* finish bitStreams one by one */
840 HUF_decodeStreamX2(op1
, &bitD1
, opStart2
, dt
, dtLog
);
841 HUF_decodeStreamX2(op2
, &bitD2
, opStart3
, dt
, dtLog
);
842 HUF_decodeStreamX2(op3
, &bitD3
, opStart4
, dt
, dtLog
);
843 HUF_decodeStreamX2(op4
, &bitD4
, oend
, dt
, dtLog
);
846 { U32
const endCheck
= BIT_endOfDStream(&bitD1
) & BIT_endOfDStream(&bitD2
) & BIT_endOfDStream(&bitD3
) & BIT_endOfDStream(&bitD4
);
847 if (!endCheck
) return ERROR(corruption_detected
); }
854 HUF_DGEN(HUF_decompress1X2_usingDTable_internal
)
855 HUF_DGEN(HUF_decompress4X2_usingDTable_internal
)
857 size_t HUF_decompress1X2_usingDTable(
858 void* dst
, size_t dstSize
,
859 const void* cSrc
, size_t cSrcSize
,
860 const HUF_DTable
* DTable
)
862 DTableDesc dtd
= HUF_getDTableDesc(DTable
);
863 if (dtd
.tableType
!= 1) return ERROR(GENERIC
);
864 return HUF_decompress1X2_usingDTable_internal(dst
, dstSize
, cSrc
, cSrcSize
, DTable
, /* bmi2 */ 0);
867 size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable
* DCtx
, void* dst
, size_t dstSize
,
868 const void* cSrc
, size_t cSrcSize
,
869 void* workSpace
, size_t wkspSize
)
871 const BYTE
* ip
= (const BYTE
*) cSrc
;
873 size_t const hSize
= HUF_readDTableX2_wksp(DCtx
, cSrc
, cSrcSize
,
874 workSpace
, wkspSize
);
875 if (HUF_isError(hSize
)) return hSize
;
876 if (hSize
>= cSrcSize
) return ERROR(srcSize_wrong
);
877 ip
+= hSize
; cSrcSize
-= hSize
;
879 return HUF_decompress1X2_usingDTable_internal(dst
, dstSize
, ip
, cSrcSize
, DCtx
, /* bmi2 */ 0);
883 size_t HUF_decompress1X2_DCtx(HUF_DTable
* DCtx
, void* dst
, size_t dstSize
,
884 const void* cSrc
, size_t cSrcSize
)
886 U32 workSpace
[HUF_DECOMPRESS_WORKSPACE_SIZE_U32
];
887 return HUF_decompress1X2_DCtx_wksp(DCtx
, dst
, dstSize
, cSrc
, cSrcSize
,
888 workSpace
, sizeof(workSpace
));
891 size_t HUF_decompress1X2 (void* dst
, size_t dstSize
, const void* cSrc
, size_t cSrcSize
)
893 HUF_DTable
* DTable
= ExAllocatePoolWithTag(NonPagedPool
, sizeof(HUF_DTable
) * HUF_DTABLE_SIZE(HUF_TABLELOG_MAX
), HUFD_ALLOC_TAG
);
899 RtlZeroMemory(DTable
, sizeof(HUF_DTable
) * HUF_DTABLE_SIZE(HUF_TABLELOG_MAX
));
901 DTable
[0] = (U32
)(HUF_TABLELOG_MAX
) * 0x01000001;
903 ret
= HUF_decompress1X2_DCtx(DTable
, dst
, dstSize
, cSrc
, cSrcSize
);
910 size_t HUF_decompress4X2_usingDTable(
911 void* dst
, size_t dstSize
,
912 const void* cSrc
, size_t cSrcSize
,
913 const HUF_DTable
* DTable
)
915 DTableDesc dtd
= HUF_getDTableDesc(DTable
);
916 if (dtd
.tableType
!= 1) return ERROR(GENERIC
);
917 return HUF_decompress4X2_usingDTable_internal(dst
, dstSize
, cSrc
, cSrcSize
, DTable
, /* bmi2 */ 0);
920 static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable
* dctx
, void* dst
, size_t dstSize
,
921 const void* cSrc
, size_t cSrcSize
,
922 void* workSpace
, size_t wkspSize
, int bmi2
)
924 const BYTE
* ip
= (const BYTE
*) cSrc
;
926 size_t hSize
= HUF_readDTableX2_wksp(dctx
, cSrc
, cSrcSize
,
927 workSpace
, wkspSize
);
928 if (HUF_isError(hSize
)) return hSize
;
929 if (hSize
>= cSrcSize
) return ERROR(srcSize_wrong
);
930 ip
+= hSize
; cSrcSize
-= hSize
;
932 return HUF_decompress4X2_usingDTable_internal(dst
, dstSize
, ip
, cSrcSize
, dctx
, bmi2
);
935 size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable
* dctx
, void* dst
, size_t dstSize
,
936 const void* cSrc
, size_t cSrcSize
,
937 void* workSpace
, size_t wkspSize
)
939 return HUF_decompress4X2_DCtx_wksp_bmi2(dctx
, dst
, dstSize
, cSrc
, cSrcSize
, workSpace
, wkspSize
, /* bmi2 */ 0);
943 size_t HUF_decompress4X2_DCtx(HUF_DTable
* dctx
, void* dst
, size_t dstSize
,
944 const void* cSrc
, size_t cSrcSize
)
946 U32 workSpace
[HUF_DECOMPRESS_WORKSPACE_SIZE_U32
];
947 return HUF_decompress4X2_DCtx_wksp(dctx
, dst
, dstSize
, cSrc
, cSrcSize
,
948 workSpace
, sizeof(workSpace
));
951 size_t HUF_decompress4X2 (void* dst
, size_t dstSize
, const void* cSrc
, size_t cSrcSize
)
953 HUF_DTable
* DTable
= ExAllocatePoolWithTag(NonPagedPool
, sizeof(HUF_DTable
) * HUF_DTABLE_SIZE(HUF_TABLELOG_MAX
), HUFD_ALLOC_TAG
);
959 RtlZeroMemory(DTable
, sizeof(HUF_DTable
) * HUF_DTABLE_SIZE(HUF_TABLELOG_MAX
));
961 DTable
[0] = (U32
)(HUF_TABLELOG_MAX
) * 0x01000001;
963 ret
= HUF_decompress4X2_DCtx(DTable
, dst
, dstSize
, cSrc
, cSrcSize
);
971 /* ***********************************/
972 /* Universal decompression selectors */
973 /* ***********************************/
975 size_t HUF_decompress1X_usingDTable(void* dst
, size_t maxDstSize
,
976 const void* cSrc
, size_t cSrcSize
,
977 const HUF_DTable
* DTable
)
979 DTableDesc
const dtd
= HUF_getDTableDesc(DTable
);
980 return dtd
.tableType
? HUF_decompress1X2_usingDTable_internal(dst
, maxDstSize
, cSrc
, cSrcSize
, DTable
, /* bmi2 */ 0) :
981 HUF_decompress1X1_usingDTable_internal(dst
, maxDstSize
, cSrc
, cSrcSize
, DTable
, /* bmi2 */ 0);
984 size_t HUF_decompress4X_usingDTable(void* dst
, size_t maxDstSize
,
985 const void* cSrc
, size_t cSrcSize
,
986 const HUF_DTable
* DTable
)
988 DTableDesc
const dtd
= HUF_getDTableDesc(DTable
);
989 return dtd
.tableType
? HUF_decompress4X2_usingDTable_internal(dst
, maxDstSize
, cSrc
, cSrcSize
, DTable
, /* bmi2 */ 0) :
990 HUF_decompress4X1_usingDTable_internal(dst
, maxDstSize
, cSrc
, cSrcSize
, DTable
, /* bmi2 */ 0);
994 typedef struct { U32 tableTime
; U32 decode256Time
; } algo_time_t
;
995 static const algo_time_t algoTime
[16 /* Quantization */][3 /* single, double, quad */] =
997 /* single, double, quad */
998 {{0,0}, {1,1}, {2,2}}, /* Q==0 : impossible */
999 {{0,0}, {1,1}, {2,2}}, /* Q==1 : impossible */
1000 {{ 38,130}, {1313, 74}, {2151, 38}}, /* Q == 2 : 12-18% */
1001 {{ 448,128}, {1353, 74}, {2238, 41}}, /* Q == 3 : 18-25% */
1002 {{ 556,128}, {1353, 74}, {2238, 47}}, /* Q == 4 : 25-32% */
1003 {{ 714,128}, {1418, 74}, {2436, 53}}, /* Q == 5 : 32-38% */
1004 {{ 883,128}, {1437, 74}, {2464, 61}}, /* Q == 6 : 38-44% */
1005 {{ 897,128}, {1515, 75}, {2622, 68}}, /* Q == 7 : 44-50% */
1006 {{ 926,128}, {1613, 75}, {2730, 75}}, /* Q == 8 : 50-56% */
1007 {{ 947,128}, {1729, 77}, {3359, 77}}, /* Q == 9 : 56-62% */
1008 {{1107,128}, {2083, 81}, {4006, 84}}, /* Q ==10 : 62-69% */
1009 {{1177,128}, {2379, 87}, {4785, 88}}, /* Q ==11 : 69-75% */
1010 {{1242,128}, {2415, 93}, {5155, 84}}, /* Q ==12 : 75-81% */
1011 {{1349,128}, {2644,106}, {5260,106}}, /* Q ==13 : 81-87% */
1012 {{1455,128}, {2422,124}, {4174,124}}, /* Q ==14 : 87-93% */
1013 {{ 722,128}, {1891,145}, {1936,146}}, /* Q ==15 : 93-99% */
1016 /** HUF_selectDecoder() :
1017 * Tells which decoder is likely to decode faster,
1018 * based on a set of pre-computed metrics.
1019 * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 .
1020 * Assumption : 0 < dstSize <= 128 KB */
1021 U32
HUF_selectDecoder (size_t dstSize
, size_t cSrcSize
)
1023 assert(dstSize
> 0);
1024 assert(dstSize
<= 128*1024);
1025 /* decoder timing evaluation */
1026 { U32
const Q
= (cSrcSize
>= dstSize
) ? 15 : (U32
)(cSrcSize
* 16 / dstSize
); /* Q < 16 */
1027 U32
const D256
= (U32
)(dstSize
>> 8);
1028 U32
const DTime0
= algoTime
[Q
][0].tableTime
+ (algoTime
[Q
][0].decode256Time
* D256
);
1029 U32 DTime1
= algoTime
[Q
][1].tableTime
+ (algoTime
[Q
][1].decode256Time
* D256
);
1030 DTime1
+= DTime1
>> 3; /* advantage to algorithm using less memory, to reduce cache eviction */
1031 return DTime1
< DTime0
;
1035 typedef size_t (*decompressionAlgo
)(void* dst
, size_t dstSize
, const void* cSrc
, size_t cSrcSize
);
1037 size_t HUF_decompress (void* dst
, size_t dstSize
, const void* cSrc
, size_t cSrcSize
)
1039 static const decompressionAlgo decompress
[2] = { HUF_decompress4X1
, HUF_decompress4X2
};
1041 /* validation checks */
1042 if (dstSize
== 0) return ERROR(dstSize_tooSmall
);
1043 if (cSrcSize
> dstSize
) return ERROR(corruption_detected
); /* invalid */
1044 if (cSrcSize
== dstSize
) { memcpy(dst
, cSrc
, dstSize
); return dstSize
; } /* not compressed */
1045 if (cSrcSize
== 1) { memset(dst
, *(const BYTE
*)cSrc
, dstSize
); return dstSize
; } /* RLE */
1047 { U32
const algoNb
= HUF_selectDecoder(dstSize
, cSrcSize
);
1048 return decompress
[algoNb
](dst
, dstSize
, cSrc
, cSrcSize
);
1052 size_t HUF_decompress4X_DCtx (HUF_DTable
* dctx
, void* dst
, size_t dstSize
, const void* cSrc
, size_t cSrcSize
)
1054 /* validation checks */
1055 if (dstSize
== 0) return ERROR(dstSize_tooSmall
);
1056 if (cSrcSize
> dstSize
) return ERROR(corruption_detected
); /* invalid */
1057 if (cSrcSize
== dstSize
) { memcpy(dst
, cSrc
, dstSize
); return dstSize
; } /* not compressed */
1058 if (cSrcSize
== 1) { memset(dst
, *(const BYTE
*)cSrc
, dstSize
); return dstSize
; } /* RLE */
1060 { U32
const algoNb
= HUF_selectDecoder(dstSize
, cSrcSize
);
1061 return algoNb
? HUF_decompress4X2_DCtx(dctx
, dst
, dstSize
, cSrc
, cSrcSize
) :
1062 HUF_decompress4X1_DCtx(dctx
, dst
, dstSize
, cSrc
, cSrcSize
) ;
1066 size_t HUF_decompress4X_hufOnly(HUF_DTable
* dctx
, void* dst
, size_t dstSize
, const void* cSrc
, size_t cSrcSize
)
1068 U32 workSpace
[HUF_DECOMPRESS_WORKSPACE_SIZE_U32
];
1069 return HUF_decompress4X_hufOnly_wksp(dctx
, dst
, dstSize
, cSrc
, cSrcSize
,
1070 workSpace
, sizeof(workSpace
));
1074 size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable
* dctx
, void* dst
,
1075 size_t dstSize
, const void* cSrc
,
1076 size_t cSrcSize
, void* workSpace
,
1079 /* validation checks */
1080 if (dstSize
== 0) return ERROR(dstSize_tooSmall
);
1081 if (cSrcSize
== 0) return ERROR(corruption_detected
);
1083 { U32
const algoNb
= HUF_selectDecoder(dstSize
, cSrcSize
);
1084 return algoNb
? HUF_decompress4X2_DCtx_wksp(dctx
, dst
, dstSize
, cSrc
, cSrcSize
, workSpace
, wkspSize
):
1085 HUF_decompress4X1_DCtx_wksp(dctx
, dst
, dstSize
, cSrc
, cSrcSize
, workSpace
, wkspSize
);
1089 size_t HUF_decompress1X_DCtx_wksp(HUF_DTable
* dctx
, void* dst
, size_t dstSize
,
1090 const void* cSrc
, size_t cSrcSize
,
1091 void* workSpace
, size_t wkspSize
)
1093 /* validation checks */
1094 if (dstSize
== 0) return ERROR(dstSize_tooSmall
);
1095 if (cSrcSize
> dstSize
) return ERROR(corruption_detected
); /* invalid */
1096 if (cSrcSize
== dstSize
) { memcpy(dst
, cSrc
, dstSize
); return dstSize
; } /* not compressed */
1097 if (cSrcSize
== 1) { memset(dst
, *(const BYTE
*)cSrc
, dstSize
); return dstSize
; } /* RLE */
1099 { U32
const algoNb
= HUF_selectDecoder(dstSize
, cSrcSize
);
1100 return algoNb
? HUF_decompress1X2_DCtx_wksp(dctx
, dst
, dstSize
, cSrc
,
1101 cSrcSize
, workSpace
, wkspSize
):
1102 HUF_decompress1X1_DCtx_wksp(dctx
, dst
, dstSize
, cSrc
,
1103 cSrcSize
, workSpace
, wkspSize
);
1107 size_t HUF_decompress1X_DCtx(HUF_DTable
* dctx
, void* dst
, size_t dstSize
,
1108 const void* cSrc
, size_t cSrcSize
)
1110 U32 workSpace
[HUF_DECOMPRESS_WORKSPACE_SIZE_U32
];
1111 return HUF_decompress1X_DCtx_wksp(dctx
, dst
, dstSize
, cSrc
, cSrcSize
,
1112 workSpace
, sizeof(workSpace
));
1116 size_t HUF_decompress1X_usingDTable_bmi2(void* dst
, size_t maxDstSize
, const void* cSrc
, size_t cSrcSize
, const HUF_DTable
* DTable
, int bmi2
)
1118 DTableDesc
const dtd
= HUF_getDTableDesc(DTable
);
1119 return dtd
.tableType
? HUF_decompress1X2_usingDTable_internal(dst
, maxDstSize
, cSrc
, cSrcSize
, DTable
, bmi2
) :
1120 HUF_decompress1X1_usingDTable_internal(dst
, maxDstSize
, cSrc
, cSrcSize
, DTable
, bmi2
);
1123 size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable
* dctx
, void* dst
, size_t dstSize
, const void* cSrc
, size_t cSrcSize
, void* workSpace
, size_t wkspSize
, int bmi2
)
1125 const BYTE
* ip
= (const BYTE
*) cSrc
;
1127 size_t const hSize
= HUF_readDTableX1_wksp(dctx
, cSrc
, cSrcSize
, workSpace
, wkspSize
);
1128 if (HUF_isError(hSize
)) return hSize
;
1129 if (hSize
>= cSrcSize
) return ERROR(srcSize_wrong
);
1130 ip
+= hSize
; cSrcSize
-= hSize
;
1132 return HUF_decompress1X1_usingDTable_internal(dst
, dstSize
, ip
, cSrcSize
, dctx
, bmi2
);
1135 size_t HUF_decompress4X_usingDTable_bmi2(void* dst
, size_t maxDstSize
, const void* cSrc
, size_t cSrcSize
, const HUF_DTable
* DTable
, int bmi2
)
1137 DTableDesc
const dtd
= HUF_getDTableDesc(DTable
);
1138 return dtd
.tableType
? HUF_decompress4X2_usingDTable_internal(dst
, maxDstSize
, cSrc
, cSrcSize
, DTable
, bmi2
) :
1139 HUF_decompress4X1_usingDTable_internal(dst
, maxDstSize
, cSrc
, cSrcSize
, DTable
, bmi2
);
1142 size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable
* dctx
, void* dst
, size_t dstSize
, const void* cSrc
, size_t cSrcSize
, void* workSpace
, size_t wkspSize
, int bmi2
)
1144 /* validation checks */
1145 if (dstSize
== 0) return ERROR(dstSize_tooSmall
);
1146 if (cSrcSize
== 0) return ERROR(corruption_detected
);
1148 { U32
const algoNb
= HUF_selectDecoder(dstSize
, cSrcSize
);
1149 return algoNb
? HUF_decompress4X2_DCtx_wksp_bmi2(dctx
, dst
, dstSize
, cSrc
, cSrcSize
, workSpace
, wkspSize
, bmi2
) :
1150 HUF_decompress4X1_DCtx_wksp_bmi2(dctx
, dst
, dstSize
, cSrc
, cSrcSize
, workSpace
, wkspSize
, bmi2
);