5 * Copyright (C) 2004 Daniel Borca All Rights Reserved.
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * DANIEL BORCA BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
34 /***************************************************************************\
37 * The encoder was built by reversing the decoder,
38 * and is vaguely based on Texus2 by 3dfx. Note that this code
39 * is merely a proof of concept, since it is highly UNoptimized;
40 * moreover, it is sub-optimal due to initial conditions passed
41 * to Lloyd's algorithm (the interpolation modes are even worse).
42 \***************************************************************************/
45 #define MAX_COMP 4 /* ever needed maximum number of components in texel */
46 #define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
47 #define N_TEXELS 32 /* number of texels in a block (always 32) */
48 #define LL_N_REP 50 /* number of iterations in lloyd's vq */
49 #define LL_RMS_D 10 /* fault tolerance (maximum delta) */
50 #define LL_RMS_E 255 /* fault tolerance (maximum error) */
51 #define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */
52 #define ISTBLACK(v) (*((dword *)(v)) == 0)
53 #define COPY_4UBV(DST, SRC) *((dword *)(DST)) = *((dword *)(SRC))
57 fxt1_bestcol (float vec
[][MAX_COMP
], int nv
,
58 byte input
[MAX_COMP
], int nc
)
61 float err
= 1e9
; /* big enough */
63 for (j
= 0; j
< nv
; j
++) {
65 for (i
= 0; i
< nc
; i
++) {
66 e
+= (vec
[j
][i
] - input
[i
]) * (vec
[j
][i
] - input
[i
]);
79 fxt1_worst (float vec
[MAX_COMP
],
80 byte input
[N_TEXELS
][MAX_COMP
], int nc
, int n
)
83 float err
= -1.0F
; /* small enough */
85 for (k
= 0; k
< n
; k
++) {
87 for (i
= 0; i
< nc
; i
++) {
88 e
+= (vec
[i
] - input
[k
][i
]) * (vec
[i
] - input
[k
][i
]);
101 fxt1_variance (double variance
[MAX_COMP
],
102 byte input
[N_TEXELS
][MAX_COMP
], int nc
, int n
)
106 double var
, maxvar
= -1; /* small enough */
107 double teenth
= 1.0 / n
;
109 for (i
= 0; i
< nc
; i
++) {
111 for (k
= 0; k
< n
; k
++) {
116 var
= sx2
* teenth
- sx
* sx
* teenth
* teenth
;
131 fxt1_choose (float vec
[][MAX_COMP
], int nv
,
132 byte input
[N_TEXELS
][MAX_COMP
], int nc
, int n
)
135 /* Choose colors from a grid.
139 for (j
= 0; j
< nv
; j
++) {
140 int m
= j
* (n
- 1) / (nv
- 1);
141 for (i
= 0; i
< nc
; i
++) {
142 vec
[j
][i
] = input
[m
][i
];
146 /* Our solution here is to find the darkest and brightest colors in
147 * the 8x4 tile and use those as the two representative colors.
148 * There are probably better algorithms to use (histogram-based).
151 int minSum
= 2000; /* big enough */
152 int maxSum
= -1; /* small enough */
153 int minCol
= 0; /* phoudoin: silent compiler! */
154 int maxCol
= 0; /* phoudoin: silent compiler! */
164 memset(hist
, 0, sizeof(hist
));
166 for (k
= 0; k
< n
; k
++) {
170 for (i
= 0; i
< nc
; i
++) {
175 for (l
= 0; l
< n
; l
++) {
184 } else if (hist
[l
].key
== key
) {
200 for (j
= 0; j
< lenh
; j
++) {
201 for (i
= 0; i
< nc
; i
++) {
202 vec
[j
][i
] = (float)input
[hist
[j
].idx
][i
];
205 for (; j
< nv
; j
++) {
206 for (i
= 0; i
< nc
; i
++) {
207 vec
[j
][i
] = vec
[0][i
];
213 for (j
= 0; j
< nv
; j
++) {
214 for (i
= 0; i
< nc
; i
++) {
215 vec
[j
][i
] = ((nv
- 1 - j
) * input
[minCol
][i
] + j
* input
[maxCol
][i
] + (nv
- 1) / 2) / (float)(nv
- 1);
225 fxt1_lloyd (float vec
[][MAX_COMP
], int nv
,
226 byte input
[N_TEXELS
][MAX_COMP
], int nc
, int n
)
228 /* Use the generalized lloyd's algorithm for VQ:
229 * find 4 color vectors.
231 * for each sample color
232 * sort to nearest vector.
234 * replace each vector with the centroid of it's matching colors.
236 * repeat until RMS doesn't improve.
238 * if a color vector has no samples, or becomes the same as another
239 * vector, replace it with the color which is farthest from a sample.
241 * vec[][MAX_COMP] initial vectors and resulting colors
242 * nv number of resulting colors required
243 * input[N_TEXELS][MAX_COMP] input texels
244 * nc number of components in input / vec
245 * n number of input samples
248 int sum
[MAX_VECT
][MAX_COMP
]; /* used to accumulate closest texels */
249 int cnt
[MAX_VECT
]; /* how many times a certain vector was chosen */
250 float error
, lasterror
= 1e9
;
255 for (rep
= 0; rep
< LL_N_REP
; rep
++) {
256 /* reset sums & counters */
257 for (j
= 0; j
< nv
; j
++) {
258 for (i
= 0; i
< nc
; i
++) {
265 /* scan whole block */
266 for (k
= 0; k
< n
; k
++) {
269 float err
= 1e9
; /* big enough */
270 /* determine best vector */
271 for (j
= 0; j
< nv
; j
++) {
272 float e
= (vec
[j
][0] - input
[k
][0]) * (vec
[j
][0] - input
[k
][0]) +
273 (vec
[j
][1] - input
[k
][1]) * (vec
[j
][1] - input
[k
][1]) +
274 (vec
[j
][2] - input
[k
][2]) * (vec
[j
][2] - input
[k
][2]);
276 e
+= (vec
[j
][3] - input
[k
][3]) * (vec
[j
][3] - input
[k
][3]);
284 int best
= fxt1_bestcol(vec
, nv
, input
[k
], nc
, &err
);
286 /* add in closest color */
287 for (i
= 0; i
< nc
; i
++) {
288 sum
[best
][i
] += input
[k
][i
];
290 /* mark this vector as used */
292 /* accumulate error */
297 if ((error
< LL_RMS_E
) ||
298 ((error
< lasterror
) && ((lasterror
- error
) < LL_RMS_D
))) {
299 return !0; /* good match */
303 /* move each vector to the barycenter of its closest colors */
304 for (j
= 0; j
< nv
; j
++) {
306 float div
= 1.0F
/ cnt
[j
];
307 for (i
= 0; i
< nc
; i
++) {
308 vec
[j
][i
] = div
* sum
[j
][i
];
311 /* this vec has no samples or is identical with a previous vec */
312 int worst
= fxt1_worst(vec
[j
], input
, nc
, n
);
313 for (i
= 0; i
< nc
; i
++) {
314 vec
[j
][i
] = input
[worst
][i
];
320 return 0; /* could not converge fast enough */
325 fxt1_quantize_CHROMA (dword
*cc
,
326 byte input
[N_TEXELS
][MAX_COMP
])
328 const int n_vect
= 4; /* 4 base vectors to find */
329 const int n_comp
= 3; /* 3 components: R, G, B */
330 float vec
[MAX_VECT
][MAX_COMP
];
332 qword hi
; /* high quadword */
333 dword lohi
, lolo
; /* low quadword: hi dword, lo dword */
335 if (fxt1_choose(vec
, n_vect
, input
, n_comp
, N_TEXELS
) != 0) {
336 fxt1_lloyd(vec
, n_vect
, input
, n_comp
, N_TEXELS
);
339 Q_MOV32(hi
, 4); /* cc-chroma = "010" + unused bit */
340 for (j
= n_vect
- 1; j
>= 0; j
--) {
341 for (i
= 0; i
< n_comp
; i
++) {
344 Q_OR32(hi
, (dword
)(vec
[j
][i
] / 8.0F
));
347 ((qword
*)cc
)[1] = hi
;
350 /* right microtile */
351 for (k
= N_TEXELS
- 1; k
>= N_TEXELS
/ 2; k
--) {
353 lohi
|= fxt1_bestcol(vec
, n_vect
, input
[k
], n_comp
);
356 for (; k
>= 0; k
--) {
358 lolo
|= fxt1_bestcol(vec
, n_vect
, input
[k
], n_comp
);
366 fxt1_quantize_ALPHA0 (dword
*cc
,
367 byte input
[N_TEXELS
][MAX_COMP
],
368 byte reord
[N_TEXELS
][MAX_COMP
], int n
)
370 const int n_vect
= 3; /* 3 base vectors to find */
371 const int n_comp
= 4; /* 4 components: R, G, B, A */
372 float vec
[MAX_VECT
][MAX_COMP
];
374 qword hi
; /* high quadword */
375 dword lohi
, lolo
; /* low quadword: hi dword, lo dword */
377 /* the last vector indicates zero */
378 for (i
= 0; i
< n_comp
; i
++) {
382 /* the first n texels in reord are guaranteed to be non-zero */
383 if (fxt1_choose(vec
, n_vect
, reord
, n_comp
, n
) != 0) {
384 fxt1_lloyd(vec
, n_vect
, reord
, n_comp
, n
);
387 Q_MOV32(hi
, 6); /* alpha = "011" + lerp = 0 */
388 for (j
= n_vect
- 1; j
>= 0; j
--) {
391 Q_OR32(hi
, (dword
)(vec
[j
][ACOMP
] / 8.0F
));
393 for (j
= n_vect
- 1; j
>= 0; j
--) {
394 for (i
= 0; i
< n_comp
- 1; i
++) {
397 Q_OR32(hi
, (dword
)(vec
[j
][i
] / 8.0F
));
400 ((qword
*)cc
)[1] = hi
;
403 /* right microtile */
404 for (k
= N_TEXELS
- 1; k
>= N_TEXELS
/ 2; k
--) {
406 lohi
|= fxt1_bestcol(vec
, n_vect
+ 1, input
[k
], n_comp
);
409 for (; k
>= 0; k
--) {
411 lolo
|= fxt1_bestcol(vec
, n_vect
+ 1, input
[k
], n_comp
);
419 fxt1_quantize_ALPHA1 (dword
*cc
,
420 byte input
[N_TEXELS
][MAX_COMP
])
422 const int n_vect
= 3; /* highest vector number in each microtile */
423 const int n_comp
= 4; /* 4 components: R, G, B, A */
424 float vec
[1 + 1 + 1][MAX_COMP
]; /* 1.5 extrema for each sub-block */
425 float b
, iv
[MAX_COMP
]; /* interpolation vector */
427 qword hi
; /* high quadword */
428 dword lohi
, lolo
; /* low quadword: hi dword, lo dword */
432 int minColL
= 0, maxColL
= 0;
433 int minColR
= 0, maxColR
= 0;
434 int sumL
= 0, sumR
= 0;
436 /* Our solution here is to find the darkest and brightest colors in
437 * the 4x4 tile and use those as the two representative colors.
438 * There are probably better algorithms to use (histogram-based).
440 minSum
= 2000; /* big enough */
441 maxSum
= -1; /* small enough */
442 for (k
= 0; k
< N_TEXELS
/ 2; k
++) {
444 for (i
= 0; i
< n_comp
; i
++) {
457 minSum
= 2000; /* big enough */
458 maxSum
= -1; /* small enough */
459 for (; k
< N_TEXELS
; k
++) {
461 for (i
= 0; i
< n_comp
; i
++) {
475 /* choose the common vector (yuck!) */
479 float err
= 1e9
; /* big enough */
480 float tv
[2 * 2][MAX_COMP
]; /* 2 extrema for each sub-block */
481 for (i
= 0; i
< n_comp
; i
++) {
482 tv
[0][i
] = input
[minColL
][i
];
483 tv
[1][i
] = input
[maxColL
][i
];
484 tv
[2][i
] = input
[minColR
][i
];
485 tv
[3][i
] = input
[maxColR
][i
];
487 for (j1
= 0; j1
< 2; j1
++) {
488 for (j2
= 2; j2
< 4; j2
++) {
490 for (i
= 0; i
< n_comp
; i
++) {
491 e
+= (tv
[j1
][i
] - tv
[j2
][i
]) * (tv
[j1
][i
] - tv
[j2
][i
]);
500 for (i
= 0; i
< n_comp
; i
++) {
501 vec
[0][i
] = tv
[1 - v1
][i
];
502 vec
[1][i
] = (tv
[v1
][i
] * sumL
+ tv
[v2
][i
] * sumR
) / (sumL
+ sumR
);
503 vec
[2][i
] = tv
[5 - v2
][i
];
509 if (minColL
!= maxColL
) {
510 /* compute interpolation vector */
511 MAKEIVEC(n_vect
, n_comp
, iv
, b
, vec
[0], vec
[1]);
515 for (k
= N_TEXELS
/ 2 - 1; k
>= 0; k
--) {
517 /* interpolate color */
518 CALCCDOT(texel
, n_vect
, n_comp
, iv
, b
, input
[k
]);
527 /* right microtile */
529 if (minColR
!= maxColR
) {
530 /* compute interpolation vector */
531 MAKEIVEC(n_vect
, n_comp
, iv
, b
, vec
[2], vec
[1]);
535 for (k
= N_TEXELS
- 1; k
>= N_TEXELS
/ 2; k
--) {
537 /* interpolate color */
538 CALCCDOT(texel
, n_vect
, n_comp
, iv
, b
, input
[k
]);
547 Q_MOV32(hi
, 7); /* alpha = "011" + lerp = 1 */
548 for (j
= n_vect
- 1; j
>= 0; j
--) {
551 Q_OR32(hi
, (dword
)(vec
[j
][ACOMP
] / 8.0F
));
553 for (j
= n_vect
- 1; j
>= 0; j
--) {
554 for (i
= 0; i
< n_comp
- 1; i
++) {
557 Q_OR32(hi
, (dword
)(vec
[j
][i
] / 8.0F
));
560 ((qword
*)cc
)[1] = hi
;
565 fxt1_quantize_HI (dword
*cc
,
566 byte input
[N_TEXELS
][MAX_COMP
],
567 byte reord
[N_TEXELS
][MAX_COMP
], int n
)
569 const int n_vect
= 6; /* highest vector number */
570 const int n_comp
= 3; /* 3 components: R, G, B */
571 float b
= 0.0F
; /* phoudoin: silent compiler! */
572 float iv
[MAX_COMP
]; /* interpolation vector */
574 dword hihi
; /* high quadword: hi dword */
576 int minSum
= 2000; /* big enough */
577 int maxSum
= -1; /* small enough */
578 int minCol
= 0; /* phoudoin: silent compiler! */
579 int maxCol
= 0; /* phoudoin: silent compiler! */
581 /* Our solution here is to find the darkest and brightest colors in
582 * the 8x4 tile and use those as the two representative colors.
583 * There are probably better algorithms to use (histogram-based).
585 for (k
= 0; k
< n
; k
++) {
587 for (i
= 0; i
< n_comp
; i
++) {
600 hihi
= 0; /* cc-hi = "00" */
601 for (i
= 0; i
< n_comp
; i
++) {
604 hihi
|= reord
[maxCol
][i
] >> 3;
606 for (i
= 0; i
< n_comp
; i
++) {
609 hihi
|= reord
[minCol
][i
] >> 3;
612 cc
[0] = cc
[1] = cc
[2] = 0;
614 /* compute interpolation vector */
615 if (minCol
!= maxCol
) {
616 MAKEIVEC(n_vect
, n_comp
, iv
, b
, reord
[minCol
], reord
[maxCol
]);
620 for (k
= N_TEXELS
- 1; k
>= 0; k
--) {
622 dword
*kk
= (dword
*)((byte
*)cc
+ t
/ 8);
623 int texel
= n_vect
+ 1; /* transparent black */
625 if (!ISTBLACK(input
[k
])) {
626 if (minCol
!= maxCol
) {
627 /* interpolate color */
628 CALCCDOT(texel
, n_vect
, n_comp
, iv
, b
, input
[k
]);
630 kk
[0] |= texel
<< (t
& 7);
634 kk
[0] |= texel
<< (t
& 7);
641 fxt1_quantize_MIXED1 (dword
*cc
,
642 byte input
[N_TEXELS
][MAX_COMP
])
644 const int n_vect
= 2; /* highest vector number in each microtile */
645 const int n_comp
= 3; /* 3 components: R, G, B */
646 byte vec
[2 * 2][MAX_COMP
]; /* 2 extrema for each sub-block */
647 float b
, iv
[MAX_COMP
]; /* interpolation vector */
649 qword hi
; /* high quadword */
650 dword lohi
, lolo
; /* low quadword: hi dword, lo dword */
654 int minColL
= 0, maxColL
= -1;
655 int minColR
= 0, maxColR
= -1;
657 /* Our solution here is to find the darkest and brightest colors in
658 * the 4x4 tile and use those as the two representative colors.
659 * There are probably better algorithms to use (histogram-based).
661 minSum
= 2000; /* big enough */
662 maxSum
= -1; /* small enough */
663 for (k
= 0; k
< N_TEXELS
/ 2; k
++) {
664 if (!ISTBLACK(input
[k
])) {
666 for (i
= 0; i
< n_comp
; i
++) {
679 minSum
= 2000; /* big enough */
680 maxSum
= -1; /* small enough */
681 for (; k
< N_TEXELS
; k
++) {
682 if (!ISTBLACK(input
[k
])) {
684 for (i
= 0; i
< n_comp
; i
++) {
700 /* all transparent black */
702 for (i
= 0; i
< n_comp
; i
++) {
708 for (i
= 0; i
< n_comp
; i
++) {
709 vec
[0][i
] = input
[minColL
][i
];
710 vec
[1][i
] = input
[maxColL
][i
];
712 if (minColL
!= maxColL
) {
713 /* compute interpolation vector */
714 MAKEIVEC(n_vect
, n_comp
, iv
, b
, vec
[0], vec
[1]);
718 for (k
= N_TEXELS
/ 2 - 1; k
>= 0; k
--) {
719 int texel
= n_vect
+ 1; /* transparent black */
720 if (!ISTBLACK(input
[k
])) {
721 /* interpolate color */
722 CALCCDOT(texel
, n_vect
, n_comp
, iv
, b
, input
[k
]);
732 /* right microtile */
734 /* all transparent black */
736 for (i
= 0; i
< n_comp
; i
++) {
742 for (i
= 0; i
< n_comp
; i
++) {
743 vec
[2][i
] = input
[minColR
][i
];
744 vec
[3][i
] = input
[maxColR
][i
];
746 if (minColR
!= maxColR
) {
747 /* compute interpolation vector */
748 MAKEIVEC(n_vect
, n_comp
, iv
, b
, vec
[2], vec
[3]);
752 for (k
= N_TEXELS
- 1; k
>= N_TEXELS
/ 2; k
--) {
753 int texel
= n_vect
+ 1; /* transparent black */
754 if (!ISTBLACK(input
[k
])) {
755 /* interpolate color */
756 CALCCDOT(texel
, n_vect
, n_comp
, iv
, b
, input
[k
]);
766 Q_MOV32(hi
, 9 | (vec
[3][GCOMP
] & 4) | ((vec
[1][GCOMP
] >> 1) & 2)); /* chroma = "1" */
767 for (j
= 2 * 2 - 1; j
>= 0; j
--) {
768 for (i
= 0; i
< n_comp
; i
++) {
771 Q_OR32(hi
, vec
[j
][i
] >> 3);
774 ((qword
*)cc
)[1] = hi
;
779 fxt1_quantize_MIXED0 (dword
*cc
,
780 byte input
[N_TEXELS
][MAX_COMP
])
782 const int n_vect
= 3; /* highest vector number in each microtile */
783 const int n_comp
= 3; /* 3 components: R, G, B */
784 byte vec
[2 * 2][MAX_COMP
]; /* 2 extrema for each sub-block */
785 float b
, iv
[MAX_COMP
]; /* interpolation vector */
787 qword hi
; /* high quadword */
788 dword lohi
, lolo
; /* low quadword: hi dword, lo dword */
790 int minColL
= 0, maxColL
= 0;
791 int minColR
= 0, maxColR
= 0;
796 /* Our solution here is to find the darkest and brightest colors in
797 * the 4x4 tile and use those as the two representative colors.
798 * There are probably better algorithms to use (histogram-based).
800 minSum
= 2000; /* big enough */
801 maxSum
= -1; /* small enough */
802 for (k
= 0; k
< N_TEXELS
/ 2; k
++) {
804 for (i
= 0; i
< n_comp
; i
++) {
816 minSum
= 2000; /* big enough */
817 maxSum
= -1; /* small enough */
818 for (; k
< N_TEXELS
; k
++) {
820 for (i
= 0; i
< n_comp
; i
++) {
835 int maxVarL
= fxt1_variance(NULL
, input
, n_comp
, N_TEXELS
/ 2);
836 int maxVarR
= fxt1_variance(NULL
, &input
[N_TEXELS
/ 2], n_comp
, N_TEXELS
/ 2);
838 /* Scan the channel with max variance for lo & hi
839 * and use those as the two representative colors.
841 minVal
= 2000; /* big enough */
842 maxVal
= -1; /* small enough */
843 for (k
= 0; k
< N_TEXELS
/ 2; k
++) {
844 int t
= input
[k
][maxVarL
];
854 minVal
= 2000; /* big enough */
855 maxVal
= -1; /* small enough */
856 for (; k
< N_TEXELS
; k
++) {
857 int t
= input
[k
][maxVarR
];
871 for (i
= 0; i
< n_comp
; i
++) {
872 vec
[0][i
] = input
[minColL
][i
];
873 vec
[1][i
] = input
[maxColL
][i
];
875 if (minColL
!= maxColL
) {
876 /* compute interpolation vector */
877 MAKEIVEC(n_vect
, n_comp
, iv
, b
, vec
[0], vec
[1]);
881 for (k
= N_TEXELS
/ 2 - 1; k
>= 0; k
--) {
883 /* interpolate color */
884 CALCCDOT(texel
, n_vect
, n_comp
, iv
, b
, input
[k
]);
890 /* funky encoding for LSB of green */
891 if ((int)((lolo
>> 1) & 1) != (((vec
[1][GCOMP
] ^ vec
[0][GCOMP
]) >> 2) & 1)) {
892 for (i
= 0; i
< n_comp
; i
++) {
893 vec
[1][i
] = input
[minColL
][i
];
894 vec
[0][i
] = input
[maxColL
][i
];
902 /* right microtile */
904 for (i
= 0; i
< n_comp
; i
++) {
905 vec
[2][i
] = input
[minColR
][i
];
906 vec
[3][i
] = input
[maxColR
][i
];
908 if (minColR
!= maxColR
) {
909 /* compute interpolation vector */
910 MAKEIVEC(n_vect
, n_comp
, iv
, b
, vec
[2], vec
[3]);
914 for (k
= N_TEXELS
- 1; k
>= N_TEXELS
/ 2; k
--) {
916 /* interpolate color */
917 CALCCDOT(texel
, n_vect
, n_comp
, iv
, b
, input
[k
]);
923 /* funky encoding for LSB of green */
924 if ((int)((lohi
>> 1) & 1) != (((vec
[3][GCOMP
] ^ vec
[2][GCOMP
]) >> 2) & 1)) {
925 for (i
= 0; i
< n_comp
; i
++) {
926 vec
[3][i
] = input
[minColR
][i
];
927 vec
[2][i
] = input
[maxColR
][i
];
935 Q_MOV32(hi
, 8 | (vec
[3][GCOMP
] & 4) | ((vec
[1][GCOMP
] >> 1) & 2)); /* chroma = "1" */
936 for (j
= 2 * 2 - 1; j
>= 0; j
--) {
937 for (i
= 0; i
< n_comp
; i
++) {
940 Q_OR32(hi
, vec
[j
][i
] >> 3);
943 ((qword
*)cc
)[1] = hi
;
948 fxt1_quantize (dword
*cc
, const byte
*lines
[], int comps
)
951 byte reord
[N_TEXELS
][MAX_COMP
];
953 byte input
[N_TEXELS
][MAX_COMP
];
957 /* make the whole block opaque */
958 memset(input
, -1, sizeof(input
));
961 /* 8 texels each line */
962 for (l
= 0; l
< 4; l
++) {
963 for (k
= 0; k
< 4; k
++) {
964 for (i
= 0; i
< comps
; i
++) {
965 input
[k
+ l
* 4][i
] = *lines
[l
]++;
969 for (i
= 0; i
< comps
; i
++) {
970 input
[k
+ l
* 4 + 12][i
] = *lines
[l
]++;
976 * 00, 01, 02, 03, 08, 09, 0a, 0b
977 * 10, 11, 12, 13, 18, 19, 1a, 1b
978 * 04, 05, 06, 07, 0c, 0d, 0e, 0f
979 * 14, 15, 16, 17, 1c, 1d, 1e, 1f
983 * stupidity flows forth from this
988 /* skip all transparent black texels */
990 for (k
= 0; k
< N_TEXELS
; k
++) {
991 /* test all components against 0 */
992 if (!ISTBLACK(input
[k
])) {
993 /* texel is not transparent black */
994 COPY_4UBV(reord
[l
], input
[k
]);
995 if (reord
[l
][ACOMP
] < (255 - ALPHA_TS
)) {
996 /* non-opaque texel */
1006 fxt1_quantize_ALPHA0(cc
, input
, reord
, l
);
1007 } else if (l
== 0) {
1008 cc
[0] = cc
[1] = cc
[2] = -1;
1010 } else if (l
< N_TEXELS
) {
1011 fxt1_quantize_HI(cc
, input
, reord
, l
);
1013 fxt1_quantize_CHROMA(cc
, input
);
1015 (void)fxt1_quantize_ALPHA1
;
1016 (void)fxt1_quantize_MIXED1
;
1017 (void)fxt1_quantize_MIXED0
;
1020 fxt1_quantize_ALPHA1(cc
, input
);
1021 } else if (l
== 0) {
1022 cc
[0] = cc
[1] = cc
[2] = ~0UL;
1024 } else if (l
< N_TEXELS
) {
1025 fxt1_quantize_MIXED1(cc
, input
);
1027 fxt1_quantize_MIXED0(cc
, input
);
1029 (void)fxt1_quantize_ALPHA0
;
1030 (void)fxt1_quantize_HI
;
1031 (void)fxt1_quantize_CHROMA
;
1037 fxt1_encode (int width
, int height
, int comps
,
1038 const void *source
, int srcRowStride
,
1039 void *dest
, int destRowStride
)
1043 dword
*encoded
= (dword
*)dest
;
1044 void *newSource
= NULL
;
1046 /* Replicate image if width is not M8 or height is not M4 */
1047 if ((width
& 7) | (height
& 3)) {
1048 int newWidth
= (width
+ 7) & ~7;
1049 int newHeight
= (height
+ 3) & ~3;
1050 newSource
= malloc(comps
* newWidth
* newHeight
* sizeof(byte
*));
1051 _mesa_upscale_teximage2d(width
, height
, newWidth
, newHeight
,
1052 comps
, (const byte
*)source
,
1053 srcRowStride
, (byte
*)newSource
);
1057 srcRowStride
= comps
* newWidth
;
1060 data
= (const byte
*)source
;
1061 destRowStride
= (destRowStride
- width
* 2) / 4;
1062 for (y
= 0; y
< height
; y
+= 4) {
1063 unsigned int offs
= 0 + (y
+ 0) * srcRowStride
;
1064 for (x
= 0; x
< width
; x
+= 8) {
1065 const byte
*lines
[4];
1066 lines
[0] = &data
[offs
];
1067 lines
[1] = lines
[0] + srcRowStride
;
1068 lines
[2] = lines
[1] + srcRowStride
;
1069 lines
[3] = lines
[2] + srcRowStride
;
1071 fxt1_quantize(encoded
, lines
, comps
);
1072 /* 128 bits per 8x4 block */
1075 encoded
+= destRowStride
;
1078 if (newSource
!= NULL
) {
1086 /***************************************************************************\
1089 * The decoder is based on GL_3DFX_texture_compression_FXT1
1090 * specification and serves as a concept for the encoder.
1091 \***************************************************************************/
1094 /* lookup table for scaling 5 bit colors up to 8 bits */
1095 static const byte _rgb_scale_5
[] = {
1096 0, 8, 16, 25, 33, 41, 49, 58,
1097 66, 74, 82, 90, 99, 107, 115, 123,
1098 132, 140, 148, 156, 165, 173, 181, 189,
1099 197, 206, 214, 222, 230, 239, 247, 255
1102 /* lookup table for scaling 6 bit colors up to 8 bits */
1103 static const byte _rgb_scale_6
[] = {
1104 0, 4, 8, 12, 16, 20, 24, 28,
1105 32, 36, 40, 45, 49, 53, 57, 61,
1106 65, 69, 73, 77, 81, 85, 89, 93,
1107 97, 101, 105, 109, 113, 117, 121, 125,
1108 130, 134, 138, 142, 146, 150, 154, 158,
1109 162, 166, 170, 174, 178, 182, 186, 190,
1110 194, 198, 202, 206, 210, 215, 219, 223,
1111 227, 231, 235, 239, 243, 247, 251, 255
1115 #define CC_SEL(cc, which) (((dword *)(cc))[(which) / 32] >> ((which) & 31))
1116 #define UP5(c) _rgb_scale_5[(c) & 31]
1117 #define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)]
1118 #define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n)
1119 #define ZERO_4UBV(v) *((dword *)(v)) = 0
1123 fxt1_decode_1HI (const byte
*code
, int t
, byte
*rgba
)
1128 cc
= (const dword
*)(code
+ t
/ 8);
1129 t
= (cc
[0] >> (t
& 7)) & 7;
1134 cc
= (const dword
*)(code
+ 12);
1136 rgba
[BCOMP
] = UP5(CC_SEL(cc
, 0));
1137 rgba
[GCOMP
] = UP5(CC_SEL(cc
, 5));
1138 rgba
[RCOMP
] = UP5(CC_SEL(cc
, 10));
1139 } else if (t
== 6) {
1140 rgba
[BCOMP
] = UP5(CC_SEL(cc
, 15));
1141 rgba
[GCOMP
] = UP5(CC_SEL(cc
, 20));
1142 rgba
[RCOMP
] = UP5(CC_SEL(cc
, 25));
1144 rgba
[BCOMP
] = LERP(6, t
, UP5(CC_SEL(cc
, 0)), UP5(CC_SEL(cc
, 15)));
1145 rgba
[GCOMP
] = LERP(6, t
, UP5(CC_SEL(cc
, 5)), UP5(CC_SEL(cc
, 20)));
1146 rgba
[RCOMP
] = LERP(6, t
, UP5(CC_SEL(cc
, 10)), UP5(CC_SEL(cc
, 25)));
1154 fxt1_decode_1CHROMA (const byte
*code
, int t
, byte
*rgba
)
1159 cc
= (const dword
*)code
;
1164 t
= (cc
[0] >> (t
* 2)) & 3;
1167 cc
= (const dword
*)(code
+ 8 + t
/ 8);
1168 kk
= cc
[0] >> (t
& 7);
1169 rgba
[BCOMP
] = UP5(kk
);
1170 rgba
[GCOMP
] = UP5(kk
>> 5);
1171 rgba
[RCOMP
] = UP5(kk
>> 10);
1177 fxt1_decode_1MIXED (const byte
*code
, int t
, byte
*rgba
)
1183 cc
= (const dword
*)code
;
1186 t
= (cc
[1] >> (t
* 2)) & 3;
1188 col
[0][BCOMP
] = (*(const dword
*)(code
+ 11)) >> 6;
1189 col
[0][GCOMP
] = CC_SEL(cc
, 99);
1190 col
[0][RCOMP
] = CC_SEL(cc
, 104);
1192 col
[1][BCOMP
] = CC_SEL(cc
, 109);
1193 col
[1][GCOMP
] = CC_SEL(cc
, 114);
1194 col
[1][RCOMP
] = CC_SEL(cc
, 119);
1195 glsb
= CC_SEL(cc
, 126);
1196 selb
= CC_SEL(cc
, 33);
1198 t
= (cc
[0] >> (t
* 2)) & 3;
1200 col
[0][BCOMP
] = CC_SEL(cc
, 64);
1201 col
[0][GCOMP
] = CC_SEL(cc
, 69);
1202 col
[0][RCOMP
] = CC_SEL(cc
, 74);
1204 col
[1][BCOMP
] = CC_SEL(cc
, 79);
1205 col
[1][GCOMP
] = CC_SEL(cc
, 84);
1206 col
[1][RCOMP
] = CC_SEL(cc
, 89);
1207 glsb
= CC_SEL(cc
, 125);
1208 selb
= CC_SEL(cc
, 1);
1211 if (CC_SEL(cc
, 124) & 1) {
1218 rgba
[BCOMP
] = UP5(col
[0][BCOMP
]);
1219 rgba
[GCOMP
] = UP5(col
[0][GCOMP
]);
1220 rgba
[RCOMP
] = UP5(col
[0][RCOMP
]);
1221 } else if (t
== 2) {
1222 rgba
[BCOMP
] = UP5(col
[1][BCOMP
]);
1223 rgba
[GCOMP
] = UP6(col
[1][GCOMP
], glsb
);
1224 rgba
[RCOMP
] = UP5(col
[1][RCOMP
]);
1226 rgba
[BCOMP
] = (UP5(col
[0][BCOMP
]) + UP5(col
[1][BCOMP
])) / 2;
1227 rgba
[GCOMP
] = (UP5(col
[0][GCOMP
]) + UP6(col
[1][GCOMP
], glsb
)) / 2;
1228 rgba
[RCOMP
] = (UP5(col
[0][RCOMP
]) + UP5(col
[1][RCOMP
])) / 2;
1236 rgba
[BCOMP
] = UP5(col
[0][BCOMP
]);
1237 rgba
[GCOMP
] = UP6(col
[0][GCOMP
], glsb
^ selb
);
1238 rgba
[RCOMP
] = UP5(col
[0][RCOMP
]);
1239 } else if (t
== 3) {
1240 rgba
[BCOMP
] = UP5(col
[1][BCOMP
]);
1241 rgba
[GCOMP
] = UP6(col
[1][GCOMP
], glsb
);
1242 rgba
[RCOMP
] = UP5(col
[1][RCOMP
]);
1244 rgba
[BCOMP
] = LERP(3, t
, UP5(col
[0][BCOMP
]), UP5(col
[1][BCOMP
]));
1245 rgba
[GCOMP
] = LERP(3, t
, UP6(col
[0][GCOMP
], glsb
^ selb
),
1246 UP6(col
[1][GCOMP
], glsb
));
1247 rgba
[RCOMP
] = LERP(3, t
, UP5(col
[0][RCOMP
]), UP5(col
[1][RCOMP
]));
1255 fxt1_decode_1ALPHA (const byte
*code
, int t
, byte
*rgba
)
1259 cc
= (const dword
*)code
;
1260 if (CC_SEL(cc
, 124) & 1) {
1266 t
= (cc
[1] >> (t
* 2)) & 3;
1268 col0
[BCOMP
] = (*(const dword
*)(code
+ 11)) >> 6;
1269 col0
[GCOMP
] = CC_SEL(cc
, 99);
1270 col0
[RCOMP
] = CC_SEL(cc
, 104);
1271 col0
[ACOMP
] = CC_SEL(cc
, 119);
1273 t
= (cc
[0] >> (t
* 2)) & 3;
1275 col0
[BCOMP
] = CC_SEL(cc
, 64);
1276 col0
[GCOMP
] = CC_SEL(cc
, 69);
1277 col0
[RCOMP
] = CC_SEL(cc
, 74);
1278 col0
[ACOMP
] = CC_SEL(cc
, 109);
1282 rgba
[BCOMP
] = UP5(col0
[BCOMP
]);
1283 rgba
[GCOMP
] = UP5(col0
[GCOMP
]);
1284 rgba
[RCOMP
] = UP5(col0
[RCOMP
]);
1285 rgba
[ACOMP
] = UP5(col0
[ACOMP
]);
1286 } else if (t
== 3) {
1287 rgba
[BCOMP
] = UP5(CC_SEL(cc
, 79));
1288 rgba
[GCOMP
] = UP5(CC_SEL(cc
, 84));
1289 rgba
[RCOMP
] = UP5(CC_SEL(cc
, 89));
1290 rgba
[ACOMP
] = UP5(CC_SEL(cc
, 114));
1292 rgba
[BCOMP
] = LERP(3, t
, UP5(col0
[BCOMP
]), UP5(CC_SEL(cc
, 79)));
1293 rgba
[GCOMP
] = LERP(3, t
, UP5(col0
[GCOMP
]), UP5(CC_SEL(cc
, 84)));
1294 rgba
[RCOMP
] = LERP(3, t
, UP5(col0
[RCOMP
]), UP5(CC_SEL(cc
, 89)));
1295 rgba
[ACOMP
] = LERP(3, t
, UP5(col0
[ACOMP
]), UP5(CC_SEL(cc
, 114)));
1304 t
= (cc
[0] >> (t
* 2)) & 3;
1310 cc
= (const dword
*)code
;
1311 rgba
[ACOMP
] = UP5(cc
[3] >> (t
* 5 + 13));
1313 cc
= (const dword
*)(code
+ 8 + t
/ 8);
1314 kk
= cc
[0] >> (t
& 7);
1315 rgba
[BCOMP
] = UP5(kk
);
1316 rgba
[GCOMP
] = UP5(kk
>> 5);
1317 rgba
[RCOMP
] = UP5(kk
>> 10);
1324 fxt1_decode_1 (const void *texture
, int stride
,
1325 int i
, int j
, byte
*rgba
)
1327 static void (*decode_1
[]) (const byte
*, int, byte
*) = {
1328 fxt1_decode_1HI
, /* cc-high = "00?" */
1329 fxt1_decode_1HI
, /* cc-high = "00?" */
1330 fxt1_decode_1CHROMA
, /* cc-chroma = "010" */
1331 fxt1_decode_1ALPHA
, /* alpha = "011" */
1332 fxt1_decode_1MIXED
, /* mixed = "1??" */
1333 fxt1_decode_1MIXED
, /* mixed = "1??" */
1334 fxt1_decode_1MIXED
, /* mixed = "1??" */
1335 fxt1_decode_1MIXED
/* mixed = "1??" */
1338 const byte
*code
= (const byte
*)texture
+
1339 ((j
/ 4) * (stride
/ 8) + (i
/ 8)) * 16;
1340 int mode
= CC_SEL(code
, 125);
1348 decode_1
[mode
](code
, t
, rgba
);
1352 extern int cc_chroma
;
1353 extern int cc_alpha
;
1355 extern int cc_mixed
;
1356 static int *cctype
[] = {