reactos/dll/3rdparty/dxtn/fxt1.c

   1 /*
   2  * FXT1 codec
   3  * Version:  1.1
   4  *
   5  * Copyright (C) 2004  Daniel Borca   All Rights Reserved.
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the "Software"),
   9  * to deal in the Software without restriction, including without limitation
  10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  11  * and/or sell copies of the Software, and to permit persons to whom the
  12  * Software is furnished to do so, subject to the following conditions:
  13  *
  14  * The above copyright notice and this permission notice shall be included
  15  * in all copies or substantial portions of the Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  20  * DANIEL BORCA BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
  21  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  22  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23  */
  24
  25
  26 #include <stdlib.h>
  27 #include <string.h>
  28
  29 #include "types.h"
  30 #include "internal.h"
  31 #include "fxt1.h"
  32
  33
  34 /***************************************************************************\
  35  * FXT1 encoder
  36  *
  37  * The encoder was built by reversing the decoder,
  38  * and is vaguely based on Texus2 by 3dfx. Note that this code
  39  * is merely a proof of concept, since it is highly UNoptimized;
  40  * moreover, it is sub-optimal due to initial conditions passed
  41  * to Lloyd's algorithm (the interpolation modes are even worse).
  42 \***************************************************************************/
  43
  44
  45 #define MAX_COMP 4 /* ever needed maximum number of components in texel */
  46 #define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
  47 #define N_TEXELS 32 /* number of texels in a block (always 32) */
  48 #define LL_N_REP 50 /* number of iterations in lloyd's vq */
  49 #define LL_RMS_D 10 /* fault tolerance (maximum delta) */
  50 #define LL_RMS_E 255 /* fault tolerance (maximum error) */
  51 #define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */
  52 #define ISTBLACK(v) (*((dword *)(v)) == 0)
  53 #define COPY_4UBV(DST, SRC) *((dword *)(DST)) = *((dword *)(SRC))
  54
  55
  56 static int
  57 fxt1_bestcol (float vec[][MAX_COMP], int nv,
  58               byte input[MAX_COMP], int nc)
  59 {
  60     int i, j, best = -1;
  61     float err = 1e9; /* big enough */
  62
  63     for (j = 0; j < nv; j++) {
  64         float e = 0.0F;
  65         for (i = 0; i < nc; i++) {
  66             e += (vec[j][i] - input[i]) * (vec[j][i] - input[i]);
  67         }
  68         if (e < err) {
  69             err = e;
  70             best = j;
  71         }
  72     }
  73
  74     return best;
  75 }
  76
  77
  78 static int
  79 fxt1_worst (float vec[MAX_COMP],
  80             byte input[N_TEXELS][MAX_COMP], int nc, int n)
  81 {
  82     int i, k, worst = -1;
  83     float err = -1.0F; /* small enough */
  84
  85     for (k = 0; k < n; k++) {
  86         float e = 0.0F;
  87         for (i = 0; i < nc; i++) {
  88             e += (vec[i] - input[k][i]) * (vec[i] - input[k][i]);
  89         }
  90         if (e > err) {
  91             err = e;
  92             worst = k;
  93         }
  94     }
  95
  96     return worst;
  97 }
  98
  99
 100 static int
 101 fxt1_variance (double variance[MAX_COMP],
 102                byte input[N_TEXELS][MAX_COMP], int nc, int n)
 103 {
 104     int i, k, best = 0;
 105     dword sx, sx2;
 106     double var, maxvar = -1; /* small enough */
 107     double teenth = 1.0 / n;
 108
 109     for (i = 0; i < nc; i++) {
 110         sx = sx2 = 0;
 111         for (k = 0; k < n; k++) {
 112             int t = input[k][i];
 113             sx += t;
 114             sx2 += t * t;
 115         }
 116         var = sx2 * teenth - sx * sx * teenth * teenth;
 117         if (maxvar < var) {
 118             maxvar = var;
 119             best = i;
 120         }
 121         if (variance) {
 122             variance[i] = var;
 123         }
 124     }
 125
 126     return best;
 127 }
 128
 129
 130 static int
 131 fxt1_choose (float vec[][MAX_COMP], int nv,
 132              byte input[N_TEXELS][MAX_COMP], int nc, int n)
 133 {
 134 #if 0
 135     /* Choose colors from a grid.
 136      */
 137     int i, j;
 138
 139     for (j = 0; j < nv; j++) {
 140         int m = j * (n - 1) / (nv - 1);
 141         for (i = 0; i < nc; i++) {
 142             vec[j][i] = input[m][i];
 143         }
 144     }
 145 #else
 146     /* Our solution here is to find the darkest and brightest colors in
 147      * the 8x4 tile and use those as the two representative colors.
 148      * There are probably better algorithms to use (histogram-based).
 149      */
 150     int i, j, k;
 151     int minSum = 2000; /* big enough */
 152     int maxSum = -1; /* small enough */
 153     int minCol = 0; /* phoudoin: silent compiler! */
 154     int maxCol = 0; /* phoudoin: silent compiler! */
 155
 156     struct {
 157         int flag;
 158         dword key;
 159         int freq;
 160         int idx;
 161     } hist[N_TEXELS];
 162     int lenh = 0;
 163
 164     memset(hist, 0, sizeof(hist));
 165
 166     for (k = 0; k < n; k++) {
 167         int l;
 168         dword key = 0;
 169         int sum = 0;
 170         for (i = 0; i < nc; i++) {
 171             key <<= 8;
 172             key |= input[k][i];
 173             sum += input[k][i];
 174         }
 175         for (l = 0; l < n; l++) {
 176             if (!hist[l].flag) {
 177                 /* alloc new slot */
 178                 hist[l].flag = !0;
 179                 hist[l].key = key;
 180                 hist[l].freq = 1;
 181                 hist[l].idx = k;
 182                 lenh = l + 1;
 183                 break;
 184             } else if (hist[l].key == key) {
 185                 hist[l].freq++;
 186                 break;
 187             }
 188         }
 189         if (minSum > sum) {
 190             minSum = sum;
 191             minCol = k;
 192         }
 193         if (maxSum < sum) {
 194             maxSum = sum;
 195             maxCol = k;
 196         }
 197     }
 198
 199     if (lenh <= nv) {
 200         for (j = 0; j < lenh; j++) {
 201             for (i = 0; i < nc; i++) {
 202                 vec[j][i] = (float)input[hist[j].idx][i];
 203             }
 204         }
 205         for (; j < nv; j++) {
 206             for (i = 0; i < nc; i++) {
 207                 vec[j][i] = vec[0][i];
 208             }
 209         }
 210         return 0;
 211     }
 212
 213     for (j = 0; j < nv; j++) {
 214         for (i = 0; i < nc; i++) {
 215             vec[j][i] = ((nv - 1 - j) * input[minCol][i] + j * input[maxCol][i] + (nv - 1) / 2) / (float)(nv - 1);
 216         }
 217     }
 218 #endif
 219
 220     return !0;
 221 }
 222
 223
 224 static int
 225 fxt1_lloyd (float vec[][MAX_COMP], int nv,
 226             byte input[N_TEXELS][MAX_COMP], int nc, int n)
 227 {
 228     /* Use the generalized lloyd's algorithm for VQ:
 229      *     find 4 color vectors.
 230      *
 231      *     for each sample color
 232      *         sort to nearest vector.
 233      *
 234      *     replace each vector with the centroid of it's matching colors.
 235      *
 236      *     repeat until RMS doesn't improve.
 237      *
 238      *     if a color vector has no samples, or becomes the same as another
 239      *     vector, replace it with the color which is farthest from a sample.
 240      *
 241      * vec[][MAX_COMP]           initial vectors and resulting colors
 242      * nv                        number of resulting colors required
 243      * input[N_TEXELS][MAX_COMP] input texels
 244      * nc                        number of components in input / vec
 245      * n                         number of input samples
 246      */
 247
 248     int sum[MAX_VECT][MAX_COMP]; /* used to accumulate closest texels */
 249     int cnt[MAX_VECT]; /* how many times a certain vector was chosen */
 250     float error, lasterror = 1e9;
 251
 252     int i, j, k, rep;
 253
 254     /* the quantizer */
 255     for (rep = 0; rep < LL_N_REP; rep++) {
 256         /* reset sums & counters */
 257         for (j = 0; j < nv; j++) {
 258             for (i = 0; i < nc; i++) {
 259                 sum[j][i] = 0;
 260             }
 261             cnt[j] = 0;
 262         }
 263         error = 0;
 264
 265         /* scan whole block */
 266         for (k = 0; k < n; k++) {
 267 #if 1
 268             int best = -1;
 269             float err = 1e9; /* big enough */
 270             /* determine best vector */
 271             for (j = 0; j < nv; j++) {
 272                 float e = (vec[j][0] - input[k][0]) * (vec[j][0] - input[k][0]) +
 273                           (vec[j][1] - input[k][1]) * (vec[j][1] - input[k][1]) +
 274                           (vec[j][2] - input[k][2]) * (vec[j][2] - input[k][2]);
 275                 if (nc == 4) {
 276                     e += (vec[j][3] - input[k][3]) * (vec[j][3] - input[k][3]);
 277                 }
 278                 if (e < err) {
 279                     err = e;
 280                     best = j;
 281                 }
 282             }
 283 #else
 284             int best = fxt1_bestcol(vec, nv, input[k], nc, &err);
 285 #endif
 286             /* add in closest color */
 287             for (i = 0; i < nc; i++) {
 288                 sum[best][i] += input[k][i];
 289             }
 290             /* mark this vector as used */
 291             cnt[best]++;
 292             /* accumulate error */
 293             error += err;
 294         }
 295
 296         /* check RMS */
 297         if ((error < LL_RMS_E) ||
 298             ((error < lasterror) && ((lasterror - error) < LL_RMS_D))) {
 299             return !0; /* good match */
 300         }
 301         lasterror = error;
 302
 303         /* move each vector to the barycenter of its closest colors */
 304         for (j = 0; j < nv; j++) {
 305             if (cnt[j]) {
 306                 float div = 1.0F / cnt[j];
 307                 for (i = 0; i < nc; i++) {
 308                     vec[j][i] = div * sum[j][i];
 309                 }
 310             } else {
 311                 /* this vec has no samples or is identical with a previous vec */
 312                 int worst = fxt1_worst(vec[j], input, nc, n);
 313                 for (i = 0; i < nc; i++) {
 314                     vec[j][i] = input[worst][i];
 315                 }
 316             }
 317         }
 318     }
 319
 320     return 0; /* could not converge fast enough */
 321 }
 322
 323
 324 static void
 325 fxt1_quantize_CHROMA (dword *cc,
 326                       byte input[N_TEXELS][MAX_COMP])
 327 {
 328     const int n_vect = 4; /* 4 base vectors to find */
 329     const int n_comp = 3; /* 3 components: R, G, B */
 330     float vec[MAX_VECT][MAX_COMP];
 331     int i, j, k;
 332     qword hi; /* high quadword */
 333     dword lohi, lolo; /* low quadword: hi dword, lo dword */
 334
 335     if (fxt1_choose(vec, n_vect, input, n_comp, N_TEXELS) != 0) {
 336         fxt1_lloyd(vec, n_vect, input, n_comp, N_TEXELS);
 337     }
 338
 339     Q_MOV32(hi, 4); /* cc-chroma = "010" + unused bit */
 340     for (j = n_vect - 1; j >= 0; j--) {
 341         for (i = 0; i < n_comp; i++) {
 342             /* add in colors */
 343             Q_SHL(hi, 5);
 344             Q_OR32(hi, (dword)(vec[j][i] / 8.0F));
 345         }
 346     }
 347     ((qword *)cc)[1] = hi;
 348
 349     lohi = lolo = 0;
 350     /* right microtile */
 351     for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
 352         lohi <<= 2;
 353         lohi |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
 354     }
 355     /* left microtile */
 356     for (; k >= 0; k--) {
 357         lolo <<= 2;
 358         lolo |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
 359     }
 360     cc[1] = lohi;
 361     cc[0] = lolo;
 362 }
 363
 364
 365 static void
 366 fxt1_quantize_ALPHA0 (dword *cc,
 367                       byte input[N_TEXELS][MAX_COMP],
 368                       byte reord[N_TEXELS][MAX_COMP], int n)
 369 {
 370     const int n_vect = 3; /* 3 base vectors to find */
 371     const int n_comp = 4; /* 4 components: R, G, B, A */
 372     float vec[MAX_VECT][MAX_COMP];
 373     int i, j, k;
 374     qword hi; /* high quadword */
 375     dword lohi, lolo; /* low quadword: hi dword, lo dword */
 376
 377     /* the last vector indicates zero */
 378     for (i = 0; i < n_comp; i++) {
 379         vec[n_vect][i] = 0;
 380     }
 381
 382     /* the first n texels in reord are guaranteed to be non-zero */
 383     if (fxt1_choose(vec, n_vect, reord, n_comp, n) != 0) {
 384         fxt1_lloyd(vec, n_vect, reord, n_comp, n);
 385     }
 386
 387     Q_MOV32(hi, 6); /* alpha = "011" + lerp = 0 */
 388     for (j = n_vect - 1; j >= 0; j--) {
 389         /* add in alphas */
 390         Q_SHL(hi, 5);
 391         Q_OR32(hi, (dword)(vec[j][ACOMP] / 8.0F));
 392     }
 393     for (j = n_vect - 1; j >= 0; j--) {
 394         for (i = 0; i < n_comp - 1; i++) {
 395             /* add in colors */
 396             Q_SHL(hi, 5);
 397             Q_OR32(hi, (dword)(vec[j][i] / 8.0F));
 398         }
 399     }
 400     ((qword *)cc)[1] = hi;
 401
 402     lohi = lolo = 0;
 403     /* right microtile */
 404     for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
 405         lohi <<= 2;
 406         lohi |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
 407     }
 408     /* left microtile */
 409     for (; k >= 0; k--) {
 410         lolo <<= 2;
 411         lolo |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
 412     }
 413     cc[1] = lohi;
 414     cc[0] = lolo;
 415 }
 416
 417
 418 static void
 419 fxt1_quantize_ALPHA1 (dword *cc,
 420                       byte input[N_TEXELS][MAX_COMP])
 421 {
 422     const int n_vect = 3; /* highest vector number in each microtile */
 423     const int n_comp = 4; /* 4 components: R, G, B, A */
 424     float vec[1 + 1 + 1][MAX_COMP]; /* 1.5 extrema for each sub-block */
 425     float b, iv[MAX_COMP]; /* interpolation vector */
 426     int i, j, k;
 427     qword hi; /* high quadword */
 428     dword lohi, lolo; /* low quadword: hi dword, lo dword */
 429
 430     int minSum;
 431     int maxSum;
 432     int minColL = 0, maxColL = 0;
 433     int minColR = 0, maxColR = 0;
 434     int sumL = 0, sumR = 0;
 435
 436     /* Our solution here is to find the darkest and brightest colors in
 437      * the 4x4 tile and use those as the two representative colors.
 438      * There are probably better algorithms to use (histogram-based).
 439      */
 440     minSum = 2000; /* big enough */
 441     maxSum = -1; /* small enough */
 442     for (k = 0; k < N_TEXELS / 2; k++) {
 443         int sum = 0;
 444         for (i = 0; i < n_comp; i++) {
 445             sum += input[k][i];
 446         }
 447         if (minSum > sum) {
 448             minSum = sum;
 449             minColL = k;
 450         }
 451         if (maxSum < sum) {
 452             maxSum = sum;
 453             maxColL = k;
 454         }
 455         sumL += sum;
 456     }
 457     minSum = 2000; /* big enough */
 458     maxSum = -1; /* small enough */
 459     for (; k < N_TEXELS; k++) {
 460         int sum = 0;
 461         for (i = 0; i < n_comp; i++) {
 462             sum += input[k][i];
 463         }
 464         if (minSum > sum) {
 465             minSum = sum;
 466             minColR = k;
 467         }
 468         if (maxSum < sum) {
 469             maxSum = sum;
 470             maxColR = k;
 471         }
 472         sumR += sum;
 473     }
 474
 475     /* choose the common vector (yuck!) */
 476     {
 477         int j1, j2;
 478         int v1 = 0, v2 = 0;
 479         float err = 1e9; /* big enough */
 480         float tv[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
 481         for (i = 0; i < n_comp; i++) {
 482             tv[0][i] = input[minColL][i];
 483             tv[1][i] = input[maxColL][i];
 484             tv[2][i] = input[minColR][i];
 485             tv[3][i] = input[maxColR][i];
 486         }
 487         for (j1 = 0; j1 < 2; j1++) {
 488             for (j2 = 2; j2 < 4; j2++) {
 489                 float e = 0.0F;
 490                 for (i = 0; i < n_comp; i++) {
 491                     e += (tv[j1][i] - tv[j2][i]) * (tv[j1][i] - tv[j2][i]);
 492                 }
 493                 if (e < err) {
 494                     err = e;
 495                     v1 = j1;
 496                     v2 = j2;
 497                 }
 498             }
 499         }
 500         for (i = 0; i < n_comp; i++) {
 501             vec[0][i] = tv[1 - v1][i];
 502             vec[1][i] = (tv[v1][i] * sumL + tv[v2][i] * sumR) / (sumL + sumR);
 503             vec[2][i] = tv[5 - v2][i];
 504         }
 505     }
 506
 507     /* left microtile */
 508     cc[0] = 0;
 509     if (minColL != maxColL) {
 510         /* compute interpolation vector */
 511         MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
 512
 513         /* add in texels */
 514         lolo = 0;
 515         for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
 516             int texel;
 517             /* interpolate color */
 518             CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 519             /* add in texel */
 520             lolo <<= 2;
 521             lolo |= texel;
 522         }
 523
 524         cc[0] = lolo;
 525     }
 526
 527     /* right microtile */
 528     cc[1] = 0;
 529     if (minColR != maxColR) {
 530         /* compute interpolation vector */
 531         MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[1]);
 532
 533         /* add in texels */
 534         lohi = 0;
 535         for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
 536             int texel;
 537             /* interpolate color */
 538             CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 539             /* add in texel */
 540             lohi <<= 2;
 541             lohi |= texel;
 542         }
 543
 544         cc[1] = lohi;
 545     }
 546
 547     Q_MOV32(hi, 7); /* alpha = "011" + lerp = 1 */
 548     for (j = n_vect - 1; j >= 0; j--) {
 549         /* add in alphas */
 550         Q_SHL(hi, 5);
 551         Q_OR32(hi, (dword)(vec[j][ACOMP] / 8.0F));
 552     }
 553     for (j = n_vect - 1; j >= 0; j--) {
 554         for (i = 0; i < n_comp - 1; i++) {
 555             /* add in colors */
 556             Q_SHL(hi, 5);
 557             Q_OR32(hi, (dword)(vec[j][i] / 8.0F));
 558         }
 559     }
 560     ((qword *)cc)[1] = hi;
 561 }
 562
 563
 564 static void
 565 fxt1_quantize_HI (dword *cc,
 566                   byte input[N_TEXELS][MAX_COMP],
 567                   byte reord[N_TEXELS][MAX_COMP], int n)
 568 {
 569     const int n_vect = 6; /* highest vector number */
 570     const int n_comp = 3; /* 3 components: R, G, B */
 571     float b = 0.0F;       /* phoudoin: silent compiler! */
 572     float iv[MAX_COMP];   /* interpolation vector */
 573     int i, k;
 574     dword hihi; /* high quadword: hi dword */
 575
 576     int minSum = 2000; /* big enough */
 577     int maxSum = -1; /* small enough */
 578     int minCol = 0; /* phoudoin: silent compiler! */
 579     int maxCol = 0; /* phoudoin: silent compiler! */
 580
 581     /* Our solution here is to find the darkest and brightest colors in
 582      * the 8x4 tile and use those as the two representative colors.
 583      * There are probably better algorithms to use (histogram-based).
 584      */
 585     for (k = 0; k < n; k++) {
 586         int sum = 0;
 587         for (i = 0; i < n_comp; i++) {
 588             sum += reord[k][i];
 589         }
 590         if (minSum > sum) {
 591             minSum = sum;
 592             minCol = k;
 593         }
 594         if (maxSum < sum) {
 595             maxSum = sum;
 596             maxCol = k;
 597         }
 598     }
 599
 600     hihi = 0; /* cc-hi = "00" */
 601     for (i = 0; i < n_comp; i++) {
 602         /* add in colors */
 603         hihi <<= 5;
 604         hihi |= reord[maxCol][i] >> 3;
 605     }
 606     for (i = 0; i < n_comp; i++) {
 607         /* add in colors */
 608         hihi <<= 5;
 609         hihi |= reord[minCol][i] >> 3;
 610     }
 611     cc[3] = hihi;
 612     cc[0] = cc[1] = cc[2] = 0;
 613
 614     /* compute interpolation vector */
 615     if (minCol != maxCol) {
 616         MAKEIVEC(n_vect, n_comp, iv, b, reord[minCol], reord[maxCol]);
 617     }
 618
 619     /* add in texels */
 620     for (k = N_TEXELS - 1; k >= 0; k--) {
 621         int t = k * 3;
 622         dword *kk = (dword *)((byte *)cc + t / 8);
 623         int texel = n_vect + 1; /* transparent black */
 624
 625         if (!ISTBLACK(input[k])) {
 626             if (minCol != maxCol) {
 627                 /* interpolate color */
 628                 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 629                 /* add in texel */
 630                 kk[0] |= texel << (t & 7);
 631             }
 632         } else {
 633             /* add in texel */
 634             kk[0] |= texel << (t & 7);
 635         }
 636     }
 637 }
 638
 639
 640 static void
 641 fxt1_quantize_MIXED1 (dword *cc,
 642                       byte input[N_TEXELS][MAX_COMP])
 643 {
 644     const int n_vect = 2; /* highest vector number in each microtile */
 645     const int n_comp = 3; /* 3 components: R, G, B */
 646     byte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
 647     float b, iv[MAX_COMP]; /* interpolation vector */
 648     int i, j, k;
 649     qword hi; /* high quadword */
 650     dword lohi, lolo; /* low quadword: hi dword, lo dword */
 651
 652     int minSum;
 653     int maxSum;
 654     int minColL = 0, maxColL = -1;
 655     int minColR = 0, maxColR = -1;
 656
 657     /* Our solution here is to find the darkest and brightest colors in
 658      * the 4x4 tile and use those as the two representative colors.
 659      * There are probably better algorithms to use (histogram-based).
 660      */
 661     minSum = 2000; /* big enough */
 662     maxSum = -1; /* small enough */
 663     for (k = 0; k < N_TEXELS / 2; k++) {
 664         if (!ISTBLACK(input[k])) {
 665             int sum = 0;
 666             for (i = 0; i < n_comp; i++) {
 667                 sum += input[k][i];
 668             }
 669             if (minSum > sum) {
 670                 minSum = sum;
 671                 minColL = k;
 672             }
 673             if (maxSum < sum) {
 674                 maxSum = sum;
 675                 maxColL = k;
 676             }
 677         }
 678     }
 679     minSum = 2000; /* big enough */
 680     maxSum = -1; /* small enough */
 681     for (; k < N_TEXELS; k++) {
 682         if (!ISTBLACK(input[k])) {
 683             int sum = 0;
 684             for (i = 0; i < n_comp; i++) {
 685                 sum += input[k][i];
 686             }
 687             if (minSum > sum) {
 688                 minSum = sum;
 689                 minColR = k;
 690             }
 691             if (maxSum < sum) {
 692                 maxSum = sum;
 693                 maxColR = k;
 694             }
 695         }
 696     }
 697
 698     /* left microtile */
 699     if (maxColL == -1) {
 700         /* all transparent black */
 701         cc[0] = ~0UL;
 702         for (i = 0; i < n_comp; i++) {
 703             vec[0][i] = 0;
 704             vec[1][i] = 0;
 705         }
 706     } else {
 707         cc[0] = 0;
 708         for (i = 0; i < n_comp; i++) {
 709             vec[0][i] = input[minColL][i];
 710             vec[1][i] = input[maxColL][i];
 711         }
 712         if (minColL != maxColL) {
 713             /* compute interpolation vector */
 714             MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
 715
 716             /* add in texels */
 717             lolo = 0;
 718             for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
 719                 int texel = n_vect + 1; /* transparent black */
 720                 if (!ISTBLACK(input[k])) {
 721                     /* interpolate color */
 722                     CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 723                 }
 724                 /* add in texel */
 725                 lolo <<= 2;
 726                 lolo |= texel;
 727             }
 728             cc[0] = lolo;
 729         }
 730     }
 731
 732     /* right microtile */
 733     if (maxColR == -1) {
 734         /* all transparent black */
 735         cc[1] = ~0UL;
 736         for (i = 0; i < n_comp; i++) {
 737             vec[2][i] = 0;
 738             vec[3][i] = 0;
 739         }
 740     } else {
 741         cc[1] = 0;
 742         for (i = 0; i < n_comp; i++) {
 743             vec[2][i] = input[minColR][i];
 744             vec[3][i] = input[maxColR][i];
 745         }
 746         if (minColR != maxColR) {
 747             /* compute interpolation vector */
 748             MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
 749
 750             /* add in texels */
 751             lohi = 0;
 752             for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
 753                 int texel = n_vect + 1; /* transparent black */
 754                 if (!ISTBLACK(input[k])) {
 755                     /* interpolate color */
 756                     CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 757                 }
 758                 /* add in texel */
 759                 lohi <<= 2;
 760                 lohi |= texel;
 761             }
 762             cc[1] = lohi;
 763         }
 764     }
 765
 766     Q_MOV32(hi, 9 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
 767     for (j = 2 * 2 - 1; j >= 0; j--) {
 768         for (i = 0; i < n_comp; i++) {
 769             /* add in colors */
 770             Q_SHL(hi, 5);
 771             Q_OR32(hi, vec[j][i] >> 3);
 772         }
 773     }
 774     ((qword *)cc)[1] = hi;
 775 }
 776
 777
 778 static void
 779 fxt1_quantize_MIXED0 (dword *cc,
 780                       byte input[N_TEXELS][MAX_COMP])
 781 {
 782     const int n_vect = 3; /* highest vector number in each microtile */
 783     const int n_comp = 3; /* 3 components: R, G, B */
 784     byte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
 785     float b, iv[MAX_COMP]; /* interpolation vector */
 786     int i, j, k;
 787     qword hi; /* high quadword */
 788     dword lohi, lolo; /* low quadword: hi dword, lo dword */
 789
 790     int minColL = 0, maxColL = 0;
 791     int minColR = 0, maxColR = 0;
 792 #if 0
 793     int minSum;
 794     int maxSum;
 795
 796     /* Our solution here is to find the darkest and brightest colors in
 797      * the 4x4 tile and use those as the two representative colors.
 798      * There are probably better algorithms to use (histogram-based).
 799      */
 800     minSum = 2000; /* big enough */
 801     maxSum = -1; /* small enough */
 802     for (k = 0; k < N_TEXELS / 2; k++) {
 803         int sum = 0;
 804         for (i = 0; i < n_comp; i++) {
 805             sum += input[k][i];
 806         }
 807         if (minSum > sum) {
 808             minSum = sum;
 809             minColL = k;
 810         }
 811         if (maxSum < sum) {
 812             maxSum = sum;
 813             maxColL = k;
 814         }
 815     }
 816     minSum = 2000; /* big enough */
 817     maxSum = -1; /* small enough */
 818     for (; k < N_TEXELS; k++) {
 819         int sum = 0;
 820         for (i = 0; i < n_comp; i++) {
 821             sum += input[k][i];
 822         }
 823         if (minSum > sum) {
 824             minSum = sum;
 825             minColR = k;
 826         }
 827         if (maxSum < sum) {
 828             maxSum = sum;
 829             maxColR = k;
 830         }
 831     }
 832 #else
 833     int minVal;
 834     int maxVal;
 835     int maxVarL = fxt1_variance(NULL, input, n_comp, N_TEXELS / 2);
 836     int maxVarR = fxt1_variance(NULL, &input[N_TEXELS / 2], n_comp, N_TEXELS / 2);
 837
 838     /* Scan the channel with max variance for lo & hi
 839      * and use those as the two representative colors.
 840      */
 841     minVal = 2000; /* big enough */
 842     maxVal = -1; /* small enough */
 843     for (k = 0; k < N_TEXELS / 2; k++) {
 844         int t = input[k][maxVarL];
 845         if (minVal > t) {
 846             minVal = t;
 847             minColL = k;
 848         }
 849         if (maxVal < t) {
 850             maxVal = t;
 851             maxColL = k;
 852         }
 853     }
 854     minVal = 2000; /* big enough */
 855     maxVal = -1; /* small enough */
 856     for (; k < N_TEXELS; k++) {
 857         int t = input[k][maxVarR];
 858         if (minVal > t) {
 859             minVal = t;
 860             minColR = k;
 861         }
 862         if (maxVal < t) {
 863             maxVal = t;
 864             maxColR = k;
 865         }
 866     }
 867 #endif
 868
 869     /* left microtile */
 870     cc[0] = 0;
 871     for (i = 0; i < n_comp; i++) {
 872         vec[0][i] = input[minColL][i];
 873         vec[1][i] = input[maxColL][i];
 874     }
 875     if (minColL != maxColL) {
 876         /* compute interpolation vector */
 877         MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
 878
 879         /* add in texels */
 880         lolo = 0;
 881         for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
 882             int texel;
 883             /* interpolate color */
 884             CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 885             /* add in texel */
 886             lolo <<= 2;
 887             lolo |= texel;
 888         }
 889
 890         /* funky encoding for LSB of green */
 891         if ((int)((lolo >> 1) & 1) != (((vec[1][GCOMP] ^ vec[0][GCOMP]) >> 2) & 1)) {
 892             for (i = 0; i < n_comp; i++) {
 893                 vec[1][i] = input[minColL][i];
 894                 vec[0][i] = input[maxColL][i];
 895             }
 896             lolo = ~lolo;
 897         }
 898
 899         cc[0] = lolo;
 900     }
 901
 902     /* right microtile */
 903     cc[1] = 0;
 904     for (i = 0; i < n_comp; i++) {
 905         vec[2][i] = input[minColR][i];
 906         vec[3][i] = input[maxColR][i];
 907     }
 908     if (minColR != maxColR) {
 909         /* compute interpolation vector */
 910         MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
 911
 912         /* add in texels */
 913         lohi = 0;
 914         for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
 915             int texel;
 916             /* interpolate color */
 917             CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 918             /* add in texel */
 919             lohi <<= 2;
 920             lohi |= texel;
 921         }
 922
 923         /* funky encoding for LSB of green */
 924         if ((int)((lohi >> 1) & 1) != (((vec[3][GCOMP] ^ vec[2][GCOMP]) >> 2) & 1)) {
 925             for (i = 0; i < n_comp; i++) {
 926                 vec[3][i] = input[minColR][i];
 927                 vec[2][i] = input[maxColR][i];
 928             }
 929             lohi = ~lohi;
 930         }
 931
 932         cc[1] = lohi;
 933     }
 934
 935     Q_MOV32(hi, 8 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
 936     for (j = 2 * 2 - 1; j >= 0; j--) {
 937         for (i = 0; i < n_comp; i++) {
 938             /* add in colors */
 939             Q_SHL(hi, 5);
 940             Q_OR32(hi, vec[j][i] >> 3);
 941         }
 942     }
 943     ((qword *)cc)[1] = hi;
 944 }
 945
 946
 947 static void
 948 fxt1_quantize (dword *cc, const byte *lines[], int comps)
 949 {
 950     int trualpha;
 951     byte reord[N_TEXELS][MAX_COMP];
 952
 953     byte input[N_TEXELS][MAX_COMP];
 954     int i, k, l;
 955
 956     if (comps == 3) {
 957         /* make the whole block opaque */
 958         memset(input, -1, sizeof(input));
 959     }
 960
 961     /* 8 texels each line */
 962     for (l = 0; l < 4; l++) {
 963         for (k = 0; k < 4; k++) {
 964             for (i = 0; i < comps; i++) {
 965                 input[k + l * 4][i] = *lines[l]++;
 966             }
 967         }
 968         for (; k < 8; k++) {
 969             for (i = 0; i < comps; i++) {
 970                 input[k + l * 4 + 12][i] = *lines[l]++;
 971             }
 972         }
 973     }
 974
 975     /* block layout:
 976      * 00, 01, 02, 03, 08, 09, 0a, 0b
 977      * 10, 11, 12, 13, 18, 19, 1a, 1b
 978      * 04, 05, 06, 07, 0c, 0d, 0e, 0f
 979      * 14, 15, 16, 17, 1c, 1d, 1e, 1f
 980      */
 981
 982     /* [dBorca]
 983      * stupidity flows forth from this
 984      */
 985     l = N_TEXELS;
 986     trualpha = 0;
 987     if (comps == 4) {
 988         /* skip all transparent black texels */
 989         l = 0;
 990         for (k = 0; k < N_TEXELS; k++) {
 991             /* test all components against 0 */
 992             if (!ISTBLACK(input[k])) {
 993                 /* texel is not transparent black */
 994                 COPY_4UBV(reord[l], input[k]);
 995                 if (reord[l][ACOMP] < (255 - ALPHA_TS)) {
 996                     /* non-opaque texel */
 997                     trualpha = !0;
 998                 }
 999                 l++;
1000             }
1001         }
1002     }
1003
1004 #if 0
1005     if (trualpha) {
1006         fxt1_quantize_ALPHA0(cc, input, reord, l);
1007     } else if (l == 0) {
1008         cc[0] = cc[1] = cc[2] = -1;
1009         cc[3] = 0;
1010     } else if (l < N_TEXELS) {
1011         fxt1_quantize_HI(cc, input, reord, l);
1012     } else {
1013         fxt1_quantize_CHROMA(cc, input);
1014     }
1015     (void)fxt1_quantize_ALPHA1;
1016     (void)fxt1_quantize_MIXED1;
1017     (void)fxt1_quantize_MIXED0;
1018 #else
1019     if (trualpha) {
1020         fxt1_quantize_ALPHA1(cc, input);
1021     } else if (l == 0) {
1022         cc[0] = cc[1] = cc[2] = ~0UL;
1023         cc[3] = 0;
1024     } else if (l < N_TEXELS) {
1025         fxt1_quantize_MIXED1(cc, input);
1026     } else {
1027         fxt1_quantize_MIXED0(cc, input);
1028     }
1029     (void)fxt1_quantize_ALPHA0;
1030     (void)fxt1_quantize_HI;
1031     (void)fxt1_quantize_CHROMA;
1032 #endif
1033 }
1034
1035
1036 TAPI int TAPIENTRY
1037 fxt1_encode (int width, int height, int comps,
1038              const void *source, int srcRowStride,
1039              void *dest, int destRowStride)
1040 {
1041     int x, y;
1042     const byte *data;
1043     dword *encoded = (dword *)dest;
1044     void *newSource = NULL;
1045
1046     /* Replicate image if width is not M8 or height is not M4 */
1047     if ((width & 7) | (height & 3)) {
1048         int newWidth = (width + 7) & ~7;
1049         int newHeight = (height + 3) & ~3;
1050         newSource = malloc(comps * newWidth * newHeight * sizeof(byte *));
1051         _mesa_upscale_teximage2d(width, height, newWidth, newHeight,
1052                                  comps, (const byte *)source,
1053                                  srcRowStride, (byte *)newSource);
1054         source = newSource;
1055         width = newWidth;
1056         height = newHeight;
1057         srcRowStride = comps * newWidth;
1058     }
1059
1060     data = (const byte *)source;
1061     destRowStride = (destRowStride - width * 2) / 4;
1062     for (y = 0; y < height; y += 4) {
1063         unsigned int offs = 0 + (y + 0) * srcRowStride;
1064         for (x = 0; x < width; x += 8) {
1065             const byte *lines[4];
1066             lines[0] = &data[offs];
1067             lines[1] = lines[0] + srcRowStride;
1068             lines[2] = lines[1] + srcRowStride;
1069             lines[3] = lines[2] + srcRowStride;
1070             offs += 8 * comps;
1071             fxt1_quantize(encoded, lines, comps);
1072             /* 128 bits per 8x4 block */
1073             encoded += 4;
1074         }
1075         encoded += destRowStride;
1076     }
1077
1078     if (newSource != NULL) {
1079         free(newSource);
1080     }
1081
1082     return 0;
1083 }
1084
1085
1086 /***************************************************************************\
1087  * FXT1 decoder
1088  *
1089  * The decoder is based on GL_3DFX_texture_compression_FXT1
1090  * specification and serves as a concept for the encoder.
1091 \***************************************************************************/
1092
1093
1094 /* lookup table for scaling 5 bit colors up to 8 bits */
1095 static const byte _rgb_scale_5[] = {
1096     0,   8,   16,  25,  33,  41,  49,  58,
1097     66,  74,  82,  90,  99,  107, 115, 123,
1098     132, 140, 148, 156, 165, 173, 181, 189,
1099     197, 206, 214, 222, 230, 239, 247, 255
1100 };
1101
1102 /* lookup table for scaling 6 bit colors up to 8 bits */
1103 static const byte _rgb_scale_6[] = {
1104     0,   4,   8,   12,  16,  20,  24,  28,
1105     32,  36,  40,  45,  49,  53,  57,  61,
1106     65,  69,  73,  77,  81,  85,  89,  93,
1107     97,  101, 105, 109, 113, 117, 121, 125,
1108     130, 134, 138, 142, 146, 150, 154, 158,
1109     162, 166, 170, 174, 178, 182, 186, 190,
1110     194, 198, 202, 206, 210, 215, 219, 223,
1111     227, 231, 235, 239, 243, 247, 251, 255
1112 };
1113
1114
1115 #define CC_SEL(cc, which) (((dword *)(cc))[(which) / 32] >> ((which) & 31))
1116 #define UP5(c) _rgb_scale_5[(c) & 31]
1117 #define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)]
1118 #define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n)
1119 #define ZERO_4UBV(v) *((dword *)(v)) = 0
1120
1121
1122 static void
1123 fxt1_decode_1HI (const byte *code, int t, byte *rgba)
1124 {
1125     const dword *cc;
1126
1127     t *= 3;
1128     cc = (const dword *)(code + t / 8);
1129     t = (cc[0] >> (t & 7)) & 7;
1130
1131     if (t == 7) {
1132         ZERO_4UBV(rgba);
1133     } else {
1134         cc = (const dword *)(code + 12);
1135         if (t == 0) {
1136             rgba[BCOMP] = UP5(CC_SEL(cc, 0));
1137             rgba[GCOMP] = UP5(CC_SEL(cc, 5));
1138             rgba[RCOMP] = UP5(CC_SEL(cc, 10));
1139         } else if (t == 6) {
1140             rgba[BCOMP] = UP5(CC_SEL(cc, 15));
1141             rgba[GCOMP] = UP5(CC_SEL(cc, 20));
1142             rgba[RCOMP] = UP5(CC_SEL(cc, 25));
1143         } else {
1144             rgba[BCOMP] = LERP(6, t, UP5(CC_SEL(cc, 0)), UP5(CC_SEL(cc, 15)));
1145             rgba[GCOMP] = LERP(6, t, UP5(CC_SEL(cc, 5)), UP5(CC_SEL(cc, 20)));
1146             rgba[RCOMP] = LERP(6, t, UP5(CC_SEL(cc, 10)), UP5(CC_SEL(cc, 25)));
1147         }
1148         rgba[ACOMP] = 255;
1149     }
1150 }
1151
1152
1153 static void
1154 fxt1_decode_1CHROMA (const byte *code, int t, byte *rgba)
1155 {
1156     const dword *cc;
1157     dword kk;
1158
1159     cc = (const dword *)code;
1160     if (t & 16) {
1161         cc++;
1162         t &= 15;
1163     }
1164     t = (cc[0] >> (t * 2)) & 3;
1165
1166     t *= 15;
1167     cc = (const dword *)(code + 8 + t / 8);
1168     kk = cc[0] >> (t & 7);
1169     rgba[BCOMP] = UP5(kk);
1170     rgba[GCOMP] = UP5(kk >> 5);
1171     rgba[RCOMP] = UP5(kk >> 10);
1172     rgba[ACOMP] = 255;
1173 }
1174
1175
1176 static void
1177 fxt1_decode_1MIXED (const byte *code, int t, byte *rgba)
1178 {
1179     const dword *cc;
1180     int col[2][3];
1181     int glsb, selb;
1182
1183     cc = (const dword *)code;
1184     if (t & 16) {
1185         t &= 15;
1186         t = (cc[1] >> (t * 2)) & 3;
1187         /* col 2 */
1188         col[0][BCOMP] = (*(const dword *)(code + 11)) >> 6;
1189         col[0][GCOMP] = CC_SEL(cc, 99);
1190         col[0][RCOMP] = CC_SEL(cc, 104);
1191         /* col 3 */
1192         col[1][BCOMP] = CC_SEL(cc, 109);
1193         col[1][GCOMP] = CC_SEL(cc, 114);
1194         col[1][RCOMP] = CC_SEL(cc, 119);
1195         glsb = CC_SEL(cc, 126);
1196         selb = CC_SEL(cc, 33);
1197     } else {
1198         t = (cc[0] >> (t * 2)) & 3;
1199         /* col 0 */
1200         col[0][BCOMP] = CC_SEL(cc, 64);
1201         col[0][GCOMP] = CC_SEL(cc, 69);
1202         col[0][RCOMP] = CC_SEL(cc, 74);
1203         /* col 1 */
1204         col[1][BCOMP] = CC_SEL(cc, 79);
1205         col[1][GCOMP] = CC_SEL(cc, 84);
1206         col[1][RCOMP] = CC_SEL(cc, 89);
1207         glsb = CC_SEL(cc, 125);
1208         selb = CC_SEL(cc, 1);
1209     }
1210
1211     if (CC_SEL(cc, 124) & 1) {
1212         /* alpha[0] == 1 */
1213
1214         if (t == 3) {
1215             ZERO_4UBV(rgba);
1216         } else {
1217             if (t == 0) {
1218                 rgba[BCOMP] = UP5(col[0][BCOMP]);
1219                 rgba[GCOMP] = UP5(col[0][GCOMP]);
1220                 rgba[RCOMP] = UP5(col[0][RCOMP]);
1221             } else if (t == 2) {
1222                 rgba[BCOMP] = UP5(col[1][BCOMP]);
1223                 rgba[GCOMP] = UP6(col[1][GCOMP], glsb);
1224                 rgba[RCOMP] = UP5(col[1][RCOMP]);
1225             } else {
1226                 rgba[BCOMP] = (UP5(col[0][BCOMP]) + UP5(col[1][BCOMP])) / 2;
1227                 rgba[GCOMP] = (UP5(col[0][GCOMP]) + UP6(col[1][GCOMP], glsb)) / 2;
1228                 rgba[RCOMP] = (UP5(col[0][RCOMP]) + UP5(col[1][RCOMP])) / 2;
1229             }
1230             rgba[ACOMP] = 255;
1231         }
1232     } else {
1233         /* alpha[0] == 0 */
1234
1235         if (t == 0) {
1236             rgba[BCOMP] = UP5(col[0][BCOMP]);
1237             rgba[GCOMP] = UP6(col[0][GCOMP], glsb ^ selb);
1238             rgba[RCOMP] = UP5(col[0][RCOMP]);
1239         } else if (t == 3) {
1240             rgba[BCOMP] = UP5(col[1][BCOMP]);
1241             rgba[GCOMP] = UP6(col[1][GCOMP], glsb);
1242             rgba[RCOMP] = UP5(col[1][RCOMP]);
1243         } else {
1244             rgba[BCOMP] = LERP(3, t, UP5(col[0][BCOMP]), UP5(col[1][BCOMP]));
1245             rgba[GCOMP] = LERP(3, t, UP6(col[0][GCOMP], glsb ^ selb),
1246                                      UP6(col[1][GCOMP], glsb));
1247             rgba[RCOMP] = LERP(3, t, UP5(col[0][RCOMP]), UP5(col[1][RCOMP]));
1248         }
1249         rgba[ACOMP] = 255;
1250     }
1251 }
1252
1253
1254 static void
1255 fxt1_decode_1ALPHA (const byte *code, int t, byte *rgba)
1256 {
1257     const dword *cc;
1258
1259     cc = (const dword *)code;
1260     if (CC_SEL(cc, 124) & 1) {
1261         /* lerp == 1 */
1262         int col0[4];
1263
1264         if (t & 16) {
1265             t &= 15;
1266             t = (cc[1] >> (t * 2)) & 3;
1267             /* col 2 */
1268             col0[BCOMP] = (*(const dword *)(code + 11)) >> 6;
1269             col0[GCOMP] = CC_SEL(cc, 99);
1270             col0[RCOMP] = CC_SEL(cc, 104);
1271             col0[ACOMP] = CC_SEL(cc, 119);
1272         } else {
1273             t = (cc[0] >> (t * 2)) & 3;
1274             /* col 0 */
1275             col0[BCOMP] = CC_SEL(cc, 64);
1276             col0[GCOMP] = CC_SEL(cc, 69);
1277             col0[RCOMP] = CC_SEL(cc, 74);
1278             col0[ACOMP] = CC_SEL(cc, 109);
1279         }
1280
1281         if (t == 0) {
1282             rgba[BCOMP] = UP5(col0[BCOMP]);
1283             rgba[GCOMP] = UP5(col0[GCOMP]);
1284             rgba[RCOMP] = UP5(col0[RCOMP]);
1285             rgba[ACOMP] = UP5(col0[ACOMP]);
1286         } else if (t == 3) {
1287             rgba[BCOMP] = UP5(CC_SEL(cc, 79));
1288             rgba[GCOMP] = UP5(CC_SEL(cc, 84));
1289             rgba[RCOMP] = UP5(CC_SEL(cc, 89));
1290             rgba[ACOMP] = UP5(CC_SEL(cc, 114));
1291         } else {
1292             rgba[BCOMP] = LERP(3, t, UP5(col0[BCOMP]), UP5(CC_SEL(cc, 79)));
1293             rgba[GCOMP] = LERP(3, t, UP5(col0[GCOMP]), UP5(CC_SEL(cc, 84)));
1294             rgba[RCOMP] = LERP(3, t, UP5(col0[RCOMP]), UP5(CC_SEL(cc, 89)));
1295             rgba[ACOMP] = LERP(3, t, UP5(col0[ACOMP]), UP5(CC_SEL(cc, 114)));
1296         }
1297     } else {
1298         /* lerp == 0 */
1299
1300         if (t & 16) {
1301             cc++;
1302             t &= 15;
1303         }
1304         t = (cc[0] >> (t * 2)) & 3;
1305
1306         if (t == 3) {
1307             ZERO_4UBV(rgba);
1308         } else {
1309             dword kk;
1310             cc = (const dword *)code;
1311             rgba[ACOMP] = UP5(cc[3] >> (t * 5 + 13));
1312             t *= 15;
1313             cc = (const dword *)(code + 8 + t / 8);
1314             kk = cc[0] >> (t & 7);
1315             rgba[BCOMP] = UP5(kk);
1316             rgba[GCOMP] = UP5(kk >> 5);
1317             rgba[RCOMP] = UP5(kk >> 10);
1318         }
1319     }
1320 }
1321
1322
1323 TAPI void TAPIENTRY
1324 fxt1_decode_1 (const void *texture, int stride,
1325                int i, int j, byte *rgba)
1326 {
1327     static void (*decode_1[]) (const byte *, int, byte *) = {
1328         fxt1_decode_1HI,        /* cc-high   = "00?" */
1329         fxt1_decode_1HI,        /* cc-high   = "00?" */
1330         fxt1_decode_1CHROMA,    /* cc-chroma = "010" */
1331         fxt1_decode_1ALPHA,     /* alpha     = "011" */
1332         fxt1_decode_1MIXED,     /* mixed     = "1??" */
1333         fxt1_decode_1MIXED,     /* mixed     = "1??" */
1334         fxt1_decode_1MIXED,     /* mixed     = "1??" */
1335         fxt1_decode_1MIXED      /* mixed     = "1??" */
1336     };
1337
1338     const byte *code = (const byte *)texture +
1339                         ((j / 4) * (stride / 8) + (i / 8)) * 16;
1340     int mode = CC_SEL(code, 125);
1341     int t = i & 7;
1342
1343     if (t & 4) {
1344         t += 12;
1345     }
1346     t += (j & 3) * 4;
1347
1348     decode_1[mode](code, t, rgba);
1349
1350 #if VERBOSE
1351     {
1352         extern int cc_chroma;
1353         extern int cc_alpha;
1354         extern int cc_high;
1355         extern int cc_mixed;
1356         static int *cctype[] = {
1357             &cc_high,
1358             &cc_high,
1359             &cc_chroma,
1360             &cc_alpha,
1361             &cc_mixed,
1362             &cc_mixed,
1363             &cc_mixed,
1364             &cc_mixed
1365         };
1366         (*cctype[mode])++;
1367     }
1368 #endif
1369 }