5 * Copyright (C) 2004 Daniel Borca All Rights Reserved.
7 * this is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2, or (at your option)
12 * this is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with GNU Make; see the file COPYING. If not, write to
19 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
31 /***************************************************************************\
34 * The encoder was built by reversing the decoder,
35 * and is vaguely based on FXT1 codec. Note that this code
36 * is merely a proof of concept, since it is highly UNoptimized!
37 \***************************************************************************/
40 #define MAX_COMP 4 /* ever needed maximum number of components in texel */
41 #define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
42 #define N_TEXELS 16 /* number of texels in a block (always 16) */
43 #define COLOR565(v) (word)((((v)[RCOMP] & 0xf8) << 8) | (((v)[GCOMP] & 0xfc) << 3) | ((v)[BCOMP] >> 3))
46 static const int dxtn_color_tlat
[2][4] = {
51 static const int dxtn_alpha_tlat
[2][8] = {
52 { 0, 2, 3, 4, 5, 6, 7, 1 },
53 { 0, 2, 3, 4, 5, 1, 6, 7 }
58 dxt1_rgb_quantize (dword
*cc
, const byte
*lines
[], int comps
)
60 float b
, iv
[MAX_COMP
]; /* interpolation vector */
62 dword hi
; /* high doubleword */
68 int minSum
= 2000; /* big enough */
69 int maxSum
= -1; /* small enough */
70 int minCol
= 0; /* phoudoin: silent compiler! */
71 int maxCol
= 0; /* phoudoin: silent compiler! */
73 byte input
[N_TEXELS
][MAX_COMP
];
76 /* make the whole block opaque */
77 /* we will NEVER reference ACOMP of any pixel */
79 /* 4 texels each line */
80 for (l
= 0; l
< 4; l
++) {
81 for (k
= 0; k
< 4; k
++) {
82 for (i
= 0; i
< comps
; i
++) {
83 input
[k
+ l
* 4][i
] = *lines
[l
]++;
88 /* Our solution here is to find the darkest and brightest colors in
89 * the 4x4 tile and use those as the two representative colors.
90 * There are probably better algorithms to use (histogram-based).
92 for (k
= 0; k
< N_TEXELS
; k
++) {
94 for (i
= 0; i
< n_comp
; i
++) {
110 color0
= COLOR565(input
[minCol
]);
111 color1
= COLOR565(input
[maxCol
]);
113 if (color0
== color1
) {
114 /* we'll use 3-vector */
115 cc
[0] = color0
| (color1
<< 16);
118 if (black
&& ((color0
== 0) || (color1
== 0))) {
119 /* we still can use 4-vector */
123 if (black
^ (color0
<= color1
)) {
132 n_vect
= (color0
<= color1
) ? 2 : 3;
134 MAKEIVEC(n_vect
, n_comp
, iv
, b
, input
[minCol
], input
[maxCol
]);
137 cc
[0] = color0
| (color1
<< 16);
139 for (k
= N_TEXELS
- 1; k
>= 0; k
--) {
143 for (i
= 0; i
< n_comp
; i
++) {
148 /* interpolate color */
149 CALCCDOT(texel
, n_vect
, n_comp
, iv
, b
, input
[k
]);
150 texel
= dxtn_color_tlat
[black
][texel
];
162 dxt1_rgba_quantize (dword
*cc
, const byte
*lines
[], int comps
)
164 float b
, iv
[MAX_COMP
]; /* interpolation vector */
166 dword hi
; /* high doubleword */
169 const int n_comp
= 3;
172 int minSum
= 2000; /* big enough */
173 int maxSum
= -1; /* small enough */
174 int minCol
= 0; /* phoudoin: silent compiler! */
175 int maxCol
= 0; /* phoudoin: silent compiler! */
177 byte input
[N_TEXELS
][MAX_COMP
];
181 /* make the whole block opaque */
182 memset(input
, -1, sizeof(input
));
185 /* 4 texels each line */
186 for (l
= 0; l
< 4; l
++) {
187 for (k
= 0; k
< 4; k
++) {
188 for (i
= 0; i
< comps
; i
++) {
189 input
[k
+ l
* 4][i
] = *lines
[l
]++;
194 /* Our solution here is to find the darkest and brightest colors in
195 * the 4x4 tile and use those as the two representative colors.
196 * There are probably better algorithms to use (histogram-based).
198 for (k
= 0; k
< N_TEXELS
; k
++) {
200 for (i
= 0; i
< n_comp
; i
++) {
211 if (input
[k
][ACOMP
] < 128) {
216 color0
= COLOR565(input
[minCol
]);
217 color1
= COLOR565(input
[maxCol
]);
219 if (color0
== color1
) {
220 /* we'll use 3-vector */
221 cc
[0] = color0
| (color1
<< 16);
222 hi
= transparent
? -1 : 0;
224 if (transparent
^ (color0
<= color1
)) {
233 n_vect
= (color0
<= color1
) ? 2 : 3;
235 MAKEIVEC(n_vect
, n_comp
, iv
, b
, input
[minCol
], input
[maxCol
]);
238 cc
[0] = color0
| (color1
<< 16);
240 for (k
= N_TEXELS
- 1; k
>= 0; k
--) {
242 if (input
[k
][ACOMP
] >= 128) {
243 /* interpolate color */
244 CALCCDOT(texel
, n_vect
, n_comp
, iv
, b
, input
[k
]);
245 texel
= dxtn_color_tlat
[transparent
][texel
];
257 dxt3_rgba_quantize (dword
*cc
, const byte
*lines
[], int comps
)
259 float b
, iv
[MAX_COMP
]; /* interpolation vector */
261 dword lolo
, lohi
; /* low quadword: lo dword, hi dword */
262 dword hihi
; /* high quadword: high dword */
264 const int n_vect
= 3;
265 const int n_comp
= 3;
267 int minSum
= 2000; /* big enough */
268 int maxSum
= -1; /* small enough */
269 int minCol
= 0; /* phoudoin: silent compiler! */
270 int maxCol
= 0; /* phoudoin: silent compiler! */
272 byte input
[N_TEXELS
][MAX_COMP
];
276 /* make the whole block opaque */
277 memset(input
, -1, sizeof(input
));
280 /* 4 texels each line */
281 for (l
= 0; l
< 4; l
++) {
282 for (k
= 0; k
< 4; k
++) {
283 for (i
= 0; i
< comps
; i
++) {
284 input
[k
+ l
* 4][i
] = *lines
[l
]++;
289 /* Our solution here is to find the darkest and brightest colors in
290 * the 4x4 tile and use those as the two representative colors.
291 * There are probably better algorithms to use (histogram-based).
293 for (k
= 0; k
< N_TEXELS
; k
++) {
295 for (i
= 0; i
< n_comp
; i
++) {
310 for (k
= N_TEXELS
- 1; k
>= N_TEXELS
/ 2; k
--) {
313 lohi
|= input
[k
][ACOMP
] >> 4;
316 for (; k
>= 0; k
--) {
319 lolo
|= input
[k
][ACOMP
] >> 4;
323 color0
= COLOR565(input
[minCol
]);
324 color1
= COLOR565(input
[maxCol
]);
325 cc
[2] = color0
| (color1
<< 16);
328 if (color0
!= color1
) {
329 MAKEIVEC(n_vect
, n_comp
, iv
, b
, input
[minCol
], input
[maxCol
]);
332 for (k
= N_TEXELS
- 1; k
>= 0; k
--) {
334 /* interpolate color */
335 CALCCDOT(texel
, n_vect
, n_comp
, iv
, b
, input
[k
]);
336 texel
= dxtn_color_tlat
[0][texel
];
347 dxt5_rgba_quantize (dword
*cc
, const byte
*lines
[], int comps
)
349 float b
, iv
[MAX_COMP
]; /* interpolation vector */
351 qword lo
; /* low quadword */
352 dword hihi
; /* high quadword: high dword */
354 const int n_vect
= 3;
355 const int n_comp
= 3;
357 int minSum
= 2000; /* big enough */
358 int maxSum
= -1; /* small enough */
359 int minCol
= 0; /* phoudoin: silent compiler! */
360 int maxCol
= 0; /* phoudoin: silent compiler! */
361 int alpha0
= 2000; /* big enough */
362 int alpha1
= -1; /* small enough */
363 int anyZero
= 0, anyOne
= 0;
366 byte input
[N_TEXELS
][MAX_COMP
];
370 /* make the whole block opaque */
371 memset(input
, -1, sizeof(input
));
374 /* 4 texels each line */
375 for (l
= 0; l
< 4; l
++) {
376 for (k
= 0; k
< 4; k
++) {
377 for (i
= 0; i
< comps
; i
++) {
378 input
[k
+ l
* 4][i
] = *lines
[l
]++;
383 /* Our solution here is to find the darkest and brightest colors in
384 * the 4x4 tile and use those as the two representative colors.
385 * There are probably better algorithms to use (histogram-based).
387 for (k
= 0; k
< N_TEXELS
; k
++) {
389 for (i
= 0; i
< n_comp
; i
++) {
400 if (alpha0
> input
[k
][ACOMP
]) {
401 alpha0
= input
[k
][ACOMP
];
403 if (alpha1
< input
[k
][ACOMP
]) {
404 alpha1
= input
[k
][ACOMP
];
406 if (input
[k
][ACOMP
] == 0) {
409 if (input
[k
][ACOMP
] == 255) {
415 if (alpha0
== alpha1
) {
416 /* we'll use 6-vector */
417 cc
[0] = alpha0
| (alpha1
<< 8);
420 if (anyZero
&& ((alpha0
== 0) || (alpha1
== 0))) {
421 /* we still might use 8-vector */
424 if (anyOne
&& ((alpha0
== 255) || (alpha1
== 255))) {
425 /* we still might use 8-vector */
428 if ((anyZero
| anyOne
) ^ (alpha0
<= alpha1
)) {
434 a_vect
= (alpha0
<= alpha1
) ? 5 : 7;
436 /* compute interpolation vector */
437 iv
[ACOMP
] = (float)a_vect
/ (alpha1
- alpha0
);
438 b
= -iv
[ACOMP
] * alpha0
+ 0.5F
;
442 for (k
= N_TEXELS
- 1; k
>= 0; k
--) {
444 if (anyZero
| anyOne
) {
445 if (input
[k
][ACOMP
] == 0) {
447 } else if (input
[k
][ACOMP
] == 255) {
451 /* interpolate alpha */
453 float dot
= input
[k
][ACOMP
] * iv
[ACOMP
];
454 texel
= (int)(dot
+ b
);
458 } else if (texel
> a_vect
) {
462 texel
= dxtn_alpha_tlat
[anyZero
| anyOne
][texel
];
469 Q_OR32(lo
, alpha0
| (alpha1
<< 8));
470 ((qword
*)cc
)[0] = lo
;
473 color0
= COLOR565(input
[minCol
]);
474 color1
= COLOR565(input
[maxCol
]);
475 cc
[2] = color0
| (color1
<< 16);
478 if (color0
!= color1
) {
479 MAKEIVEC(n_vect
, n_comp
, iv
, b
, input
[minCol
], input
[maxCol
]);
482 for (k
= N_TEXELS
- 1; k
>= 0; k
--) {
484 /* interpolate color */
485 CALCCDOT(texel
, n_vect
, n_comp
, iv
, b
, input
[k
]);
486 texel
= dxtn_color_tlat
[0][texel
];
496 #define ENCODER(dxtn, n) \
498 dxtn##_encode (int width, int height, int comps, \
499 const void *source, int srcRowStride, \
500 void *dest, int destRowStride) \
504 dword *encoded = (dword *)dest; \
505 void *newSource = NULL; \
507 /* Replicate image if width is not M4 or height is not M4 */ \
508 if ((width & 3) | (height & 3)) { \
509 int newWidth = (width + 3) & ~3; \
510 int newHeight = (height + 3) & ~3; \
511 newSource = malloc(comps * newWidth * newHeight * sizeof(byte *));\
512 _mesa_upscale_teximage2d(width, height, newWidth, newHeight, \
513 comps, (const byte *)source, \
514 srcRowStride, (byte *)newSource); \
515 source = newSource; \
517 height = newHeight; \
518 srcRowStride = comps * newWidth; \
521 data = (const byte *)source; \
522 destRowStride = (destRowStride - width * n) / 4; \
523 for (y = 0; y < height; y += 4) { \
524 unsigned int offs = 0 + (y + 0) * srcRowStride; \
525 for (x = 0; x < width; x += 4) { \
526 const byte *lines[4]; \
527 lines[0] = &data[offs]; \
528 lines[1] = lines[0] + srcRowStride; \
529 lines[2] = lines[1] + srcRowStride; \
530 lines[3] = lines[2] + srcRowStride; \
532 dxtn##_quantize(encoded, lines, comps); \
536 encoded += destRowStride; \
539 if (newSource != NULL) { \
547 ENCODER(dxt1_rgba
, 2)
548 ENCODER(dxt3_rgba
, 4)
549 ENCODER(dxt5_rgba
, 4)
552 /***************************************************************************\
555 * The decoder is based on GL_EXT_texture_compression_s3tc
556 * specification and serves as a concept for the encoder.
557 \***************************************************************************/
560 /* lookup table for scaling 4 bit colors up to 8 bits */
561 static const byte _rgb_scale_4
[] = {
562 0, 17, 34, 51, 68, 85, 102, 119,
563 136, 153, 170, 187, 204, 221, 238, 255
566 /* lookup table for scaling 5 bit colors up to 8 bits */
567 static const byte _rgb_scale_5
[] = {
568 0, 8, 16, 25, 33, 41, 49, 58,
569 66, 74, 82, 90, 99, 107, 115, 123,
570 132, 140, 148, 156, 165, 173, 181, 189,
571 197, 206, 214, 222, 230, 239, 247, 255
574 /* lookup table for scaling 6 bit colors up to 8 bits */
575 static const byte _rgb_scale_6
[] = {
576 0, 4, 8, 12, 16, 20, 24, 28,
577 32, 36, 40, 45, 49, 53, 57, 61,
578 65, 69, 73, 77, 81, 85, 89, 93,
579 97, 101, 105, 109, 113, 117, 121, 125,
580 130, 134, 138, 142, 146, 150, 154, 158,
581 162, 166, 170, 174, 178, 182, 186, 190,
582 194, 198, 202, 206, 210, 215, 219, 223,
583 227, 231, 235, 239, 243, 247, 251, 255
587 #define CC_SEL(cc, which) (((dword *)(cc))[(which) / 32] >> ((which) & 31))
588 #define UP4(c) _rgb_scale_4[(c) & 15]
589 #define UP5(c) _rgb_scale_5[(c) & 31]
590 #define UP6(c) _rgb_scale_6[(c) & 63]
591 #define ZERO_4UBV(v) *((dword *)(v)) = 0
595 dxt1_rgb_decode_1 (const void *texture
, int stride
,
596 int i
, int j
, byte
*rgba
)
598 const byte
*src
= (const byte
*)texture
599 + ((j
/ 4) * ((stride
+ 3) / 4) + i
/ 4) * 8;
600 const int code
= (src
[4 + (j
& 3)] >> ((i
& 3) * 2)) & 0x3;
602 rgba
[RCOMP
] = UP5(CC_SEL(src
, 11));
603 rgba
[GCOMP
] = UP6(CC_SEL(src
, 5));
604 rgba
[BCOMP
] = UP5(CC_SEL(src
, 0));
605 } else if (code
== 1) {
606 rgba
[RCOMP
] = UP5(CC_SEL(src
, 27));
607 rgba
[GCOMP
] = UP6(CC_SEL(src
, 21));
608 rgba
[BCOMP
] = UP5(CC_SEL(src
, 16));
610 const word col0
= src
[0] | (src
[1] << 8);
611 const word col1
= src
[2] | (src
[3] << 8);
614 rgba
[RCOMP
] = (UP5(col0
>> 11) * 2 + UP5(col1
>> 11)) / 3;
615 rgba
[GCOMP
] = (UP6(col0
>> 5) * 2 + UP6(col1
>> 5)) / 3;
616 rgba
[BCOMP
] = (UP5(col0
) * 2 + UP5(col1
)) / 3;
618 rgba
[RCOMP
] = (UP5(col0
>> 11) + 2 * UP5(col1
>> 11)) / 3;
619 rgba
[GCOMP
] = (UP6(col0
>> 5) + 2 * UP6(col1
>> 5)) / 3;
620 rgba
[BCOMP
] = (UP5(col0
) + 2 * UP5(col1
)) / 3;
624 rgba
[RCOMP
] = (UP5(col0
>> 11) + UP5(col1
>> 11)) / 2;
625 rgba
[GCOMP
] = (UP6(col0
>> 5) + UP6(col1
>> 5)) / 2;
626 rgba
[BCOMP
] = (UP5(col0
) + UP5(col1
)) / 2;
637 dxt1_rgba_decode_1 (const void *texture
, int stride
,
638 int i
, int j
, byte
*rgba
)
640 /* Same as rgb_dxt1 above, except alpha=0 if col0<=col1 and code=3. */
641 const byte
*src
= (const byte
*)texture
642 + ((j
/ 4) * ((stride
+ 3) / 4) + i
/ 4) * 8;
643 const int code
= (src
[4 + (j
& 3)] >> ((i
& 3) * 2)) & 0x3;
645 rgba
[RCOMP
] = UP5(CC_SEL(src
, 11));
646 rgba
[GCOMP
] = UP6(CC_SEL(src
, 5));
647 rgba
[BCOMP
] = UP5(CC_SEL(src
, 0));
649 } else if (code
== 1) {
650 rgba
[RCOMP
] = UP5(CC_SEL(src
, 27));
651 rgba
[GCOMP
] = UP6(CC_SEL(src
, 21));
652 rgba
[BCOMP
] = UP5(CC_SEL(src
, 16));
655 const word col0
= src
[0] | (src
[1] << 8);
656 const word col1
= src
[2] | (src
[3] << 8);
659 rgba
[RCOMP
] = (UP5(col0
>> 11) * 2 + UP5(col1
>> 11)) / 3;
660 rgba
[GCOMP
] = (UP6(col0
>> 5) * 2 + UP6(col1
>> 5)) / 3;
661 rgba
[BCOMP
] = (UP5(col0
) * 2 + UP5(col1
)) / 3;
663 rgba
[RCOMP
] = (UP5(col0
>> 11) + 2 * UP5(col1
>> 11)) / 3;
664 rgba
[GCOMP
] = (UP6(col0
>> 5) + 2 * UP6(col1
>> 5)) / 3;
665 rgba
[BCOMP
] = (UP5(col0
) + 2 * UP5(col1
)) / 3;
670 rgba
[RCOMP
] = (UP5(col0
>> 11) + UP5(col1
>> 11)) / 2;
671 rgba
[GCOMP
] = (UP6(col0
>> 5) + UP6(col1
>> 5)) / 2;
672 rgba
[BCOMP
] = (UP5(col0
) + UP5(col1
)) / 2;
683 dxt3_rgba_decode_1 (const void *texture
, int stride
,
684 int i
, int j
, byte
*rgba
)
686 const byte
*src
= (const byte
*)texture
687 + ((j
/ 4) * ((stride
+ 3) / 4) + i
/ 4) * 16;
688 const int code
= (src
[12 + (j
& 3)] >> ((i
& 3) * 2)) & 0x3;
689 const dword
*cc
= (const dword
*)(src
+ 8);
691 rgba
[RCOMP
] = UP5(CC_SEL(cc
, 11));
692 rgba
[GCOMP
] = UP6(CC_SEL(cc
, 5));
693 rgba
[BCOMP
] = UP5(CC_SEL(cc
, 0));
694 } else if (code
== 1) {
695 rgba
[RCOMP
] = UP5(CC_SEL(cc
, 27));
696 rgba
[GCOMP
] = UP6(CC_SEL(cc
, 21));
697 rgba
[BCOMP
] = UP5(CC_SEL(cc
, 16));
698 } else if (code
== 2) {
699 /* (col0 * (4 - code) + col1 * (code - 1)) / 3 */
700 rgba
[RCOMP
] = (UP5(CC_SEL(cc
, 11)) * 2 + UP5(CC_SEL(cc
, 27))) / 3;
701 rgba
[GCOMP
] = (UP6(CC_SEL(cc
, 5)) * 2 + UP6(CC_SEL(cc
, 21))) / 3;
702 rgba
[BCOMP
] = (UP5(CC_SEL(cc
, 0)) * 2 + UP5(CC_SEL(cc
, 16))) / 3;
704 rgba
[RCOMP
] = (UP5(CC_SEL(cc
, 11)) + 2 * UP5(CC_SEL(cc
, 27))) / 3;
705 rgba
[GCOMP
] = (UP6(CC_SEL(cc
, 5)) + 2 * UP6(CC_SEL(cc
, 21))) / 3;
706 rgba
[BCOMP
] = (UP5(CC_SEL(cc
, 0)) + 2 * UP5(CC_SEL(cc
, 16))) / 3;
708 rgba
[ACOMP
] = UP4(src
[((j
& 3) * 4 + (i
& 3)) / 2] >> ((i
& 1) * 4));
713 dxt5_rgba_decode_1 (const void *texture
, int stride
,
714 int i
, int j
, byte
*rgba
)
716 const byte
*src
= (const byte
*)texture
717 + ((j
/ 4) * ((stride
+ 3) / 4) + i
/ 4) * 16;
718 const int code
= (src
[12 + (j
& 3)] >> ((i
& 3) * 2)) & 0x3;
719 const dword
*cc
= (const dword
*)(src
+ 8);
720 const byte alpha0
= src
[0];
721 const byte alpha1
= src
[1];
722 const int alphaShift
= (((j
& 3) * 4) + (i
& 3)) * 3 + 16;
723 const int acode
= ((alphaShift
== 31)
724 ? CC_SEL(src
+ 2, alphaShift
- 16)
725 : CC_SEL(src
, alphaShift
)) & 0x7;
727 rgba
[RCOMP
] = UP5(CC_SEL(cc
, 11));
728 rgba
[GCOMP
] = UP6(CC_SEL(cc
, 5));
729 rgba
[BCOMP
] = UP5(CC_SEL(cc
, 0));
730 } else if (code
== 1) {
731 rgba
[RCOMP
] = UP5(CC_SEL(cc
, 27));
732 rgba
[GCOMP
] = UP6(CC_SEL(cc
, 21));
733 rgba
[BCOMP
] = UP5(CC_SEL(cc
, 16));
734 } else if (code
== 2) {
735 /* (col0 * (4 - code) + col1 * (code - 1)) / 3 */
736 rgba
[RCOMP
] = (UP5(CC_SEL(cc
, 11)) * 2 + UP5(CC_SEL(cc
, 27))) / 3;
737 rgba
[GCOMP
] = (UP6(CC_SEL(cc
, 5)) * 2 + UP6(CC_SEL(cc
, 21))) / 3;
738 rgba
[BCOMP
] = (UP5(CC_SEL(cc
, 0)) * 2 + UP5(CC_SEL(cc
, 16))) / 3;
740 rgba
[RCOMP
] = (UP5(CC_SEL(cc
, 11)) + 2 * UP5(CC_SEL(cc
, 27))) / 3;
741 rgba
[GCOMP
] = (UP6(CC_SEL(cc
, 5)) + 2 * UP6(CC_SEL(cc
, 21))) / 3;
742 rgba
[BCOMP
] = (UP5(CC_SEL(cc
, 0)) + 2 * UP5(CC_SEL(cc
, 16))) / 3;
745 rgba
[ACOMP
] = alpha0
;
746 } else if (acode
== 1) {
747 rgba
[ACOMP
] = alpha1
;
748 } else if (alpha0
> alpha1
) {
749 rgba
[ACOMP
] = ((8 - acode
) * alpha0
+ (acode
- 1) * alpha1
) / 7;
750 } else if (acode
== 6) {
752 } else if (acode
== 7) {
755 rgba
[ACOMP
] = ((6 - acode
) * alpha0
+ (acode
- 1) * alpha1
) / 5;