reactos/dll/opengl/mesa/src/mesa/main/texcompress_fxt1.c

   1 /*
   2  * Mesa 3-D graphics library
   3  * Version:  7.1
   4  *
   5  * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the "Software"),
   9  * to deal in the Software without restriction, including without limitation
  10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  11  * and/or sell copies of the Software, and to permit persons to whom the
  12  * Software is furnished to do so, subject to the following conditions:
  13  *
  14  * The above copyright notice and this permission notice shall be included
  15  * in all copies or substantial portions of the Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  20  * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
  21  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  22  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23  */
  24
  25
  26 /**
  27  * \file texcompress_fxt1.c
  28  * GL_3DFX_texture_compression_FXT1 support.
  29  */
  30
  31
  32 #include "glheader.h"
  33 #include "imports.h"
  34 #include "colormac.h"
  35 #include "image.h"
  36 #include "macros.h"
  37 #include "mfeatures.h"
  38 #include "mipmap.h"
  39 #include "texcompress.h"
  40 #include "texcompress_fxt1.h"
  41 #include "texstore.h"
  42 #include "swrast/s_context.h"
  43
  44
  45 #if FEATURE_texture_fxt1
  46
  47
  48 static void
  49 fxt1_encode (GLuint width, GLuint height, GLint comps,
  50              const void *source, GLint srcRowStride,
  51              void *dest, GLint destRowStride);
  52
  53 void
  54 fxt1_decode_1 (const void *texture, GLint stride,
  55                GLint i, GLint j, GLubyte *rgba);
  56
  57
  58 /**
  59  * Store user's image in rgb_fxt1 format.
  60  */
  61 GLboolean
  62 _mesa_texstore_rgb_fxt1(TEXSTORE_PARAMS)
  63 {
  64    const GLubyte *pixels;
  65    GLint srcRowStride;
  66    GLubyte *dst;
  67    const GLubyte *tempImage = NULL;
  68
  69    ASSERT(dstFormat == MESA_FORMAT_RGB_FXT1);
  70
  71    if (srcFormat != GL_RGB ||
  72        srcType != GL_UNSIGNED_BYTE ||
  73        ctx->_ImageTransferState ||
  74        srcPacking->RowLength != srcWidth ||
  75        srcPacking->SwapBytes) {
  76       /* convert image to RGB/GLubyte */
  77       tempImage = _mesa_make_temp_ubyte_image(ctx, dims,
  78                                              baseInternalFormat,
  79                                              _mesa_get_format_base_format(dstFormat),
  80                                              srcWidth, srcHeight, srcDepth,
  81                                              srcFormat, srcType, srcAddr,
  82                                              srcPacking);
  83       if (!tempImage)
  84          return GL_FALSE; /* out of memory */
  85       pixels = tempImage;
  86       srcRowStride = 3 * srcWidth;
  87       srcFormat = GL_RGB;
  88    }
  89    else {
  90       pixels = _mesa_image_address2d(srcPacking, srcAddr, srcWidth, srcHeight,
  91                                      srcFormat, srcType, 0, 0);
  92
  93       srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
  94                                             srcType) / sizeof(GLubyte);
  95    }
  96
  97    dst = dstSlices[0];
  98
  99    fxt1_encode(srcWidth, srcHeight, 3, pixels, srcRowStride,
 100                dst, dstRowStride);
 101
 102    if (tempImage)
 103       free((void*) tempImage);
 104
 105    return GL_TRUE;
 106 }
 107
 108
 109 /**
 110  * Store user's image in rgba_fxt1 format.
 111  */
 112 GLboolean
 113 _mesa_texstore_rgba_fxt1(TEXSTORE_PARAMS)
 114 {
 115    const GLubyte *pixels;
 116    GLint srcRowStride;
 117    GLubyte *dst;
 118    const GLubyte *tempImage = NULL;
 119
 120    ASSERT(dstFormat == MESA_FORMAT_RGBA_FXT1);
 121
 122    if (srcFormat != GL_RGBA ||
 123        srcType != GL_UNSIGNED_BYTE ||
 124        ctx->_ImageTransferState ||
 125        srcPacking->SwapBytes) {
 126       /* convert image to RGBA/GLubyte */
 127       tempImage = _mesa_make_temp_ubyte_image(ctx, dims,
 128                                              baseInternalFormat,
 129                                              _mesa_get_format_base_format(dstFormat),
 130                                              srcWidth, srcHeight, srcDepth,
 131                                              srcFormat, srcType, srcAddr,
 132                                              srcPacking);
 133       if (!tempImage)
 134          return GL_FALSE; /* out of memory */
 135       pixels = tempImage;
 136       srcRowStride = 4 * srcWidth;
 137       srcFormat = GL_RGBA;
 138    }
 139    else {
 140       pixels = _mesa_image_address2d(srcPacking, srcAddr, srcWidth, srcHeight,
 141                                      srcFormat, srcType, 0, 0);
 142
 143       srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
 144                                             srcType) / sizeof(GLubyte);
 145    }
 146
 147    dst = dstSlices[0];
 148
 149    fxt1_encode(srcWidth, srcHeight, 4, pixels, srcRowStride,
 150                dst, dstRowStride);
 151
 152    if (tempImage)
 153       free((void*) tempImage);
 154
 155    return GL_TRUE;
 156 }
 157
 158
 159 void
 160 _mesa_fetch_texel_2d_f_rgba_fxt1( const struct swrast_texture_image *texImage,
 161                                   GLint i, GLint j, GLint k, GLfloat *texel )
 162 {
 163    /* just sample as GLubyte and convert to float here */
 164    GLubyte rgba[4];
 165    (void) k;
 166    fxt1_decode_1(texImage->Map, texImage->RowStride, i, j, rgba);
 167    texel[RCOMP] = UBYTE_TO_FLOAT(rgba[RCOMP]);
 168    texel[GCOMP] = UBYTE_TO_FLOAT(rgba[GCOMP]);
 169    texel[BCOMP] = UBYTE_TO_FLOAT(rgba[BCOMP]);
 170    texel[ACOMP] = UBYTE_TO_FLOAT(rgba[ACOMP]);
 171 }
 172
 173
 174 void
 175 _mesa_fetch_texel_2d_f_rgb_fxt1( const struct swrast_texture_image *texImage,
 176                                  GLint i, GLint j, GLint k, GLfloat *texel )
 177 {
 178    /* just sample as GLubyte and convert to float here */
 179    GLubyte rgba[4];
 180    (void) k;
 181    fxt1_decode_1(texImage->Map, texImage->RowStride, i, j, rgba);
 182    texel[RCOMP] = UBYTE_TO_FLOAT(rgba[RCOMP]);
 183    texel[GCOMP] = UBYTE_TO_FLOAT(rgba[GCOMP]);
 184    texel[BCOMP] = UBYTE_TO_FLOAT(rgba[BCOMP]);
 185    texel[ACOMP] = 1.0F;
 186 }
 187
 188
 189
 190 /***************************************************************************\
 191  * FXT1 encoder
 192  *
 193  * The encoder was built by reversing the decoder,
 194  * and is vaguely based on Texus2 by 3dfx. Note that this code
 195  * is merely a proof of concept, since it is highly UNoptimized;
 196  * moreover, it is sub-optimal due to initial conditions passed
 197  * to Lloyd's algorithm (the interpolation modes are even worse).
 198 \***************************************************************************/
 199
 200
 201 #define MAX_COMP 4 /* ever needed maximum number of components in texel */
 202 #define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
 203 #define N_TEXELS 32 /* number of texels in a block (always 32) */
 204 #define LL_N_REP 50 /* number of iterations in lloyd's vq */
 205 #define LL_RMS_D 10 /* fault tolerance (maximum delta) */
 206 #define LL_RMS_E 255 /* fault tolerance (maximum error) */
 207 #define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */
 208 #define ISTBLACK(v) (*((GLuint *)(v)) == 0)
 209
 210
 211 /*
 212  * Define a 64-bit unsigned integer type and macros
 213  */
 214 #if 1
 215
 216 #define FX64_NATIVE 1
 217
 218 typedef uint64_t Fx64;
 219
 220 #define FX64_MOV32(a, b) a = b
 221 #define FX64_OR32(a, b)  a |= b
 222 #define FX64_SHL(a, c)   a <<= c
 223
 224 #else
 225
 226 #define FX64_NATIVE 0
 227
 228 typedef struct {
 229    GLuint lo, hi;
 230 } Fx64;
 231
 232 #define FX64_MOV32(a, b) a.lo = b
 233 #define FX64_OR32(a, b)  a.lo |= b
 234
 235 #define FX64_SHL(a, c)                                 \
 236    do {                                                \
 237        if ((c) >= 32) {                                \
 238           a.hi = a.lo << ((c) - 32);                   \
 239           a.lo = 0;                                    \
 240        } else {                                        \
 241           a.hi = (a.hi << (c)) | (a.lo >> (32 - (c))); \
 242           a.lo <<= (c);                                \
 243        }                                               \
 244    } while (0)
 245
 246 #endif
 247
 248
 249 #define F(i) (GLfloat)1 /* can be used to obtain an oblong metric: 0.30 / 0.59 / 0.11 */
 250 #define SAFECDOT 1 /* for paranoids */
 251
 252 #define MAKEIVEC(NV, NC, IV, B, V0, V1)  \
 253    do {                                  \
 254       /* compute interpolation vector */ \
 255       GLfloat d2 = 0.0F;                 \
 256       GLfloat rd2;                       \
 257                                          \
 258       for (i = 0; i < NC; i++) {         \
 259          IV[i] = (V1[i] - V0[i]) * F(i); \
 260          d2 += IV[i] * IV[i];            \
 261       }                                  \
 262       rd2 = (GLfloat)NV / d2;            \
 263       B = 0;                             \
 264       for (i = 0; i < NC; i++) {         \
 265          IV[i] *= F(i);                  \
 266          B -= IV[i] * V0[i];             \
 267          IV[i] *= rd2;                   \
 268       }                                  \
 269       B = B * rd2 + 0.5f;                \
 270    } while (0)
 271
 272 #define CALCCDOT(TEXEL, NV, NC, IV, B, V)\
 273    do {                                  \
 274       GLfloat dot = 0.0F;                \
 275       for (i = 0; i < NC; i++) {         \
 276          dot += V[i] * IV[i];            \
 277       }                                  \
 278       TEXEL = (GLint)(dot + B);          \
 279       if (SAFECDOT) {                    \
 280          if (TEXEL < 0) {                \
 281             TEXEL = 0;                   \
 282          } else if (TEXEL > NV) {        \
 283             TEXEL = NV;                  \
 284          }                               \
 285       }                                  \
 286    } while (0)
 287
 288
 289 static GLint
 290 fxt1_bestcol (GLfloat vec[][MAX_COMP], GLint nv,
 291               GLubyte input[MAX_COMP], GLint nc)
 292 {
 293    GLint i, j, best = -1;
 294    GLfloat err = 1e9; /* big enough */
 295
 296    for (j = 0; j < nv; j++) {
 297       GLfloat e = 0.0F;
 298       for (i = 0; i < nc; i++) {
 299          e += (vec[j][i] - input[i]) * (vec[j][i] - input[i]);
 300       }
 301       if (e < err) {
 302          err = e;
 303          best = j;
 304       }
 305    }
 306
 307    return best;
 308 }
 309
 310
 311 static GLint
 312 fxt1_worst (GLfloat vec[MAX_COMP],
 313             GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 314 {
 315    GLint i, k, worst = -1;
 316    GLfloat err = -1.0F; /* small enough */
 317
 318    for (k = 0; k < n; k++) {
 319       GLfloat e = 0.0F;
 320       for (i = 0; i < nc; i++) {
 321          e += (vec[i] - input[k][i]) * (vec[i] - input[k][i]);
 322       }
 323       if (e > err) {
 324          err = e;
 325          worst = k;
 326       }
 327    }
 328
 329    return worst;
 330 }
 331
 332
 333 static GLint
 334 fxt1_variance (GLdouble variance[MAX_COMP],
 335                GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 336 {
 337    GLint i, k, best = 0;
 338    GLint sx, sx2;
 339    GLdouble var, maxvar = -1; /* small enough */
 340    GLdouble teenth = 1.0 / n;
 341
 342    for (i = 0; i < nc; i++) {
 343       sx = sx2 = 0;
 344       for (k = 0; k < n; k++) {
 345          GLint t = input[k][i];
 346          sx += t;
 347          sx2 += t * t;
 348       }
 349       var = sx2 * teenth - sx * sx * teenth * teenth;
 350       if (maxvar < var) {
 351          maxvar = var;
 352          best = i;
 353       }
 354       if (variance) {
 355          variance[i] = var;
 356       }
 357    }
 358
 359    return best;
 360 }
 361
 362
 363 static GLint
 364 fxt1_choose (GLfloat vec[][MAX_COMP], GLint nv,
 365              GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 366 {
 367 #if 0
 368    /* Choose colors from a grid.
 369     */
 370    GLint i, j;
 371
 372    for (j = 0; j < nv; j++) {
 373       GLint m = j * (n - 1) / (nv - 1);
 374       for (i = 0; i < nc; i++) {
 375          vec[j][i] = input[m][i];
 376       }
 377    }
 378 #else
 379    /* Our solution here is to find the darkest and brightest colors in
 380     * the 8x4 tile and use those as the two representative colors.
 381     * There are probably better algorithms to use (histogram-based).
 382     */
 383    GLint i, j, k;
 384    GLint minSum = 2000; /* big enough */
 385    GLint maxSum = -1; /* small enough */
 386    GLint minCol = 0; /* phoudoin: silent compiler! */
 387    GLint maxCol = 0; /* phoudoin: silent compiler! */
 388
 389    struct {
 390       GLint flag;
 391       GLint key;
 392       GLint freq;
 393       GLint idx;
 394    } hist[N_TEXELS];
 395    GLint lenh = 0;
 396
 397    memset(hist, 0, sizeof(hist));
 398
 399    for (k = 0; k < n; k++) {
 400       GLint l;
 401       GLint key = 0;
 402       GLint sum = 0;
 403       for (i = 0; i < nc; i++) {
 404          key <<= 8;
 405          key |= input[k][i];
 406          sum += input[k][i];
 407       }
 408       for (l = 0; l < n; l++) {
 409          if (!hist[l].flag) {
 410             /* alloc new slot */
 411             hist[l].flag = !0;
 412             hist[l].key = key;
 413             hist[l].freq = 1;
 414             hist[l].idx = k;
 415             lenh = l + 1;
 416             break;
 417          } else if (hist[l].key == key) {
 418             hist[l].freq++;
 419             break;
 420          }
 421       }
 422       if (minSum > sum) {
 423          minSum = sum;
 424          minCol = k;
 425       }
 426       if (maxSum < sum) {
 427          maxSum = sum;
 428          maxCol = k;
 429       }
 430    }
 431
 432    if (lenh <= nv) {
 433       for (j = 0; j < lenh; j++) {
 434          for (i = 0; i < nc; i++) {
 435             vec[j][i] = (GLfloat)input[hist[j].idx][i];
 436          }
 437       }
 438       for (; j < nv; j++) {
 439          for (i = 0; i < nc; i++) {
 440             vec[j][i] = vec[0][i];
 441          }
 442       }
 443       return 0;
 444    }
 445
 446    for (j = 0; j < nv; j++) {
 447       for (i = 0; i < nc; i++) {
 448          vec[j][i] = ((nv - 1 - j) * input[minCol][i] + j * input[maxCol][i] + (nv - 1) / 2) / (GLfloat)(nv - 1);
 449       }
 450    }
 451 #endif
 452
 453    return !0;
 454 }
 455
 456
 457 static GLint
 458 fxt1_lloyd (GLfloat vec[][MAX_COMP], GLint nv,
 459             GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 460 {
 461    /* Use the generalized lloyd's algorithm for VQ:
 462     *     find 4 color vectors.
 463     *
 464     *     for each sample color
 465     *         sort to nearest vector.
 466     *
 467     *     replace each vector with the centroid of its matching colors.
 468     *
 469     *     repeat until RMS doesn't improve.
 470     *
 471     *     if a color vector has no samples, or becomes the same as another
 472     *     vector, replace it with the color which is farthest from a sample.
 473     *
 474     * vec[][MAX_COMP]           initial vectors and resulting colors
 475     * nv                        number of resulting colors required
 476     * input[N_TEXELS][MAX_COMP] input texels
 477     * nc                        number of components in input / vec
 478     * n                         number of input samples
 479     */
 480
 481    GLint sum[MAX_VECT][MAX_COMP]; /* used to accumulate closest texels */
 482    GLint cnt[MAX_VECT]; /* how many times a certain vector was chosen */
 483    GLfloat error, lasterror = 1e9;
 484
 485    GLint i, j, k, rep;
 486
 487    /* the quantizer */
 488    for (rep = 0; rep < LL_N_REP; rep++) {
 489       /* reset sums & counters */
 490       for (j = 0; j < nv; j++) {
 491          for (i = 0; i < nc; i++) {
 492             sum[j][i] = 0;
 493          }
 494          cnt[j] = 0;
 495       }
 496       error = 0;
 497
 498       /* scan whole block */
 499       for (k = 0; k < n; k++) {
 500 #if 1
 501          GLint best = -1;
 502          GLfloat err = 1e9; /* big enough */
 503          /* determine best vector */
 504          for (j = 0; j < nv; j++) {
 505             GLfloat e = (vec[j][0] - input[k][0]) * (vec[j][0] - input[k][0]) +
 506                       (vec[j][1] - input[k][1]) * (vec[j][1] - input[k][1]) +
 507                       (vec[j][2] - input[k][2]) * (vec[j][2] - input[k][2]);
 508             if (nc == 4) {
 509                e += (vec[j][3] - input[k][3]) * (vec[j][3] - input[k][3]);
 510             }
 511             if (e < err) {
 512                err = e;
 513                best = j;
 514             }
 515          }
 516 #else
 517          GLint best = fxt1_bestcol(vec, nv, input[k], nc, &err);
 518 #endif
 519          assert(best >= 0);
 520          /* add in closest color */
 521          for (i = 0; i < nc; i++) {
 522             sum[best][i] += input[k][i];
 523          }
 524          /* mark this vector as used */
 525          cnt[best]++;
 526          /* accumulate error */
 527          error += err;
 528       }
 529
 530       /* check RMS */
 531       if ((error < LL_RMS_E) ||
 532           ((error < lasterror) && ((lasterror - error) < LL_RMS_D))) {
 533          return !0; /* good match */
 534       }
 535       lasterror = error;
 536
 537       /* move each vector to the barycenter of its closest colors */
 538       for (j = 0; j < nv; j++) {
 539          if (cnt[j]) {
 540             GLfloat div = 1.0F / cnt[j];
 541             for (i = 0; i < nc; i++) {
 542                vec[j][i] = div * sum[j][i];
 543             }
 544          } else {
 545             /* this vec has no samples or is identical with a previous vec */
 546             GLint worst = fxt1_worst(vec[j], input, nc, n);
 547             for (i = 0; i < nc; i++) {
 548                vec[j][i] = input[worst][i];
 549             }
 550          }
 551       }
 552    }
 553
 554    return 0; /* could not converge fast enough */
 555 }
 556
 557
 558 static void
 559 fxt1_quantize_CHROMA (GLuint *cc,
 560                       GLubyte input[N_TEXELS][MAX_COMP])
 561 {
 562    const GLint n_vect = 4; /* 4 base vectors to find */
 563    const GLint n_comp = 3; /* 3 components: R, G, B */
 564    GLfloat vec[MAX_VECT][MAX_COMP];
 565    GLint i, j, k;
 566    Fx64 hi; /* high quadword */
 567    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 568
 569    if (fxt1_choose(vec, n_vect, input, n_comp, N_TEXELS) != 0) {
 570       fxt1_lloyd(vec, n_vect, input, n_comp, N_TEXELS);
 571    }
 572
 573    FX64_MOV32(hi, 4); /* cc-chroma = "010" + unused bit */
 574    for (j = n_vect - 1; j >= 0; j--) {
 575       for (i = 0; i < n_comp; i++) {
 576          /* add in colors */
 577          FX64_SHL(hi, 5);
 578          FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
 579       }
 580    }
 581    ((Fx64 *)cc)[1] = hi;
 582
 583    lohi = lolo = 0;
 584    /* right microtile */
 585    for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
 586       lohi <<= 2;
 587       lohi |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
 588    }
 589    /* left microtile */
 590    for (; k >= 0; k--) {
 591       lolo <<= 2;
 592       lolo |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
 593    }
 594    cc[1] = lohi;
 595    cc[0] = lolo;
 596 }
 597
 598
 599 static void
 600 fxt1_quantize_ALPHA0 (GLuint *cc,
 601                       GLubyte input[N_TEXELS][MAX_COMP],
 602                       GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
 603 {
 604    const GLint n_vect = 3; /* 3 base vectors to find */
 605    const GLint n_comp = 4; /* 4 components: R, G, B, A */
 606    GLfloat vec[MAX_VECT][MAX_COMP];
 607    GLint i, j, k;
 608    Fx64 hi; /* high quadword */
 609    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 610
 611    /* the last vector indicates zero */
 612    for (i = 0; i < n_comp; i++) {
 613       vec[n_vect][i] = 0;
 614    }
 615
 616    /* the first n texels in reord are guaranteed to be non-zero */
 617    if (fxt1_choose(vec, n_vect, reord, n_comp, n) != 0) {
 618       fxt1_lloyd(vec, n_vect, reord, n_comp, n);
 619    }
 620
 621    FX64_MOV32(hi, 6); /* alpha = "011" + lerp = 0 */
 622    for (j = n_vect - 1; j >= 0; j--) {
 623       /* add in alphas */
 624       FX64_SHL(hi, 5);
 625       FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
 626    }
 627    for (j = n_vect - 1; j >= 0; j--) {
 628       for (i = 0; i < n_comp - 1; i++) {
 629          /* add in colors */
 630          FX64_SHL(hi, 5);
 631          FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
 632       }
 633    }
 634    ((Fx64 *)cc)[1] = hi;
 635
 636    lohi = lolo = 0;
 637    /* right microtile */
 638    for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
 639       lohi <<= 2;
 640       lohi |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
 641    }
 642    /* left microtile */
 643    for (; k >= 0; k--) {
 644       lolo <<= 2;
 645       lolo |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
 646    }
 647    cc[1] = lohi;
 648    cc[0] = lolo;
 649 }
 650
 651
 652 static void
 653 fxt1_quantize_ALPHA1 (GLuint *cc,
 654                       GLubyte input[N_TEXELS][MAX_COMP])
 655 {
 656    const GLint n_vect = 3; /* highest vector number in each microtile */
 657    const GLint n_comp = 4; /* 4 components: R, G, B, A */
 658    GLfloat vec[1 + 1 + 1][MAX_COMP]; /* 1.5 extrema for each sub-block */
 659    GLfloat b, iv[MAX_COMP]; /* interpolation vector */
 660    GLint i, j, k;
 661    Fx64 hi; /* high quadword */
 662    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 663
 664    GLint minSum;
 665    GLint maxSum;
 666    GLint minColL = 0, maxColL = 0;
 667    GLint minColR = 0, maxColR = 0;
 668    GLint sumL = 0, sumR = 0;
 669    GLint nn_comp;
 670    /* Our solution here is to find the darkest and brightest colors in
 671     * the 4x4 tile and use those as the two representative colors.
 672     * There are probably better algorithms to use (histogram-based).
 673     */
 674    nn_comp = n_comp;
 675    while ((minColL == maxColL) && nn_comp) {
 676        minSum = 2000; /* big enough */
 677        maxSum = -1; /* small enough */
 678        for (k = 0; k < N_TEXELS / 2; k++) {
 679            GLint sum = 0;
 680            for (i = 0; i < nn_comp; i++) {
 681                sum += input[k][i];
 682            }
 683            if (minSum > sum) {
 684                minSum = sum;
 685                minColL = k;
 686            }
 687            if (maxSum < sum) {
 688                maxSum = sum;
 689                maxColL = k;
 690            }
 691            sumL += sum;
 692        }
 693
 694        nn_comp--;
 695    }
 696
 697    nn_comp = n_comp;
 698    while ((minColR == maxColR) && nn_comp) {
 699        minSum = 2000; /* big enough */
 700        maxSum = -1; /* small enough */
 701        for (k = N_TEXELS / 2; k < N_TEXELS; k++) {
 702            GLint sum = 0;
 703            for (i = 0; i < nn_comp; i++) {
 704                sum += input[k][i];
 705            }
 706            if (minSum > sum) {
 707                minSum = sum;
 708                minColR = k;
 709            }
 710            if (maxSum < sum) {
 711                maxSum = sum;
 712                maxColR = k;
 713            }
 714            sumR += sum;
 715        }
 716
 717        nn_comp--;
 718    }
 719
 720    /* choose the common vector (yuck!) */
 721    {
 722       GLint j1, j2;
 723       GLint v1 = 0, v2 = 0;
 724       GLfloat err = 1e9; /* big enough */
 725       GLfloat tv[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
 726       for (i = 0; i < n_comp; i++) {
 727          tv[0][i] = input[minColL][i];
 728          tv[1][i] = input[maxColL][i];
 729          tv[2][i] = input[minColR][i];
 730          tv[3][i] = input[maxColR][i];
 731       }
 732       for (j1 = 0; j1 < 2; j1++) {
 733          for (j2 = 2; j2 < 4; j2++) {
 734             GLfloat e = 0.0F;
 735             for (i = 0; i < n_comp; i++) {
 736                e += (tv[j1][i] - tv[j2][i]) * (tv[j1][i] - tv[j2][i]);
 737             }
 738             if (e < err) {
 739                err = e;
 740                v1 = j1;
 741                v2 = j2;
 742             }
 743          }
 744       }
 745       for (i = 0; i < n_comp; i++) {
 746          vec[0][i] = tv[1 - v1][i];
 747          vec[1][i] = (tv[v1][i] * sumL + tv[v2][i] * sumR) / (sumL + sumR);
 748          vec[2][i] = tv[5 - v2][i];
 749       }
 750    }
 751
 752    /* left microtile */
 753    cc[0] = 0;
 754    if (minColL != maxColL) {
 755       /* compute interpolation vector */
 756       MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
 757
 758       /* add in texels */
 759       lolo = 0;
 760       for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
 761          GLint texel;
 762          /* interpolate color */
 763          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 764          /* add in texel */
 765          lolo <<= 2;
 766          lolo |= texel;
 767       }
 768
 769       cc[0] = lolo;
 770    }
 771
 772    /* right microtile */
 773    cc[1] = 0;
 774    if (minColR != maxColR) {
 775       /* compute interpolation vector */
 776       MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[1]);
 777
 778       /* add in texels */
 779       lohi = 0;
 780       for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
 781          GLint texel;
 782          /* interpolate color */
 783          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 784          /* add in texel */
 785          lohi <<= 2;
 786          lohi |= texel;
 787       }
 788
 789       cc[1] = lohi;
 790    }
 791
 792    FX64_MOV32(hi, 7); /* alpha = "011" + lerp = 1 */
 793    for (j = n_vect - 1; j >= 0; j--) {
 794       /* add in alphas */
 795       FX64_SHL(hi, 5);
 796       FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
 797    }
 798    for (j = n_vect - 1; j >= 0; j--) {
 799       for (i = 0; i < n_comp - 1; i++) {
 800          /* add in colors */
 801          FX64_SHL(hi, 5);
 802          FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
 803       }
 804    }
 805    ((Fx64 *)cc)[1] = hi;
 806 }
 807
 808
 809 static void
 810 fxt1_quantize_HI (GLuint *cc,
 811                   GLubyte input[N_TEXELS][MAX_COMP],
 812                   GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
 813 {
 814    const GLint n_vect = 6; /* highest vector number */
 815    const GLint n_comp = 3; /* 3 components: R, G, B */
 816    GLfloat b = 0.0F;       /* phoudoin: silent compiler! */
 817    GLfloat iv[MAX_COMP];   /* interpolation vector */
 818    GLint i, k;
 819    GLuint hihi; /* high quadword: hi dword */
 820
 821    GLint minSum = 2000; /* big enough */
 822    GLint maxSum = -1; /* small enough */
 823    GLint minCol = 0; /* phoudoin: silent compiler! */
 824    GLint maxCol = 0; /* phoudoin: silent compiler! */
 825
 826    /* Our solution here is to find the darkest and brightest colors in
 827     * the 8x4 tile and use those as the two representative colors.
 828     * There are probably better algorithms to use (histogram-based).
 829     */
 830    for (k = 0; k < n; k++) {
 831       GLint sum = 0;
 832       for (i = 0; i < n_comp; i++) {
 833          sum += reord[k][i];
 834       }
 835       if (minSum > sum) {
 836          minSum = sum;
 837          minCol = k;
 838       }
 839       if (maxSum < sum) {
 840          maxSum = sum;
 841          maxCol = k;
 842       }
 843    }
 844
 845    hihi = 0; /* cc-hi = "00" */
 846    for (i = 0; i < n_comp; i++) {
 847       /* add in colors */
 848       hihi <<= 5;
 849       hihi |= reord[maxCol][i] >> 3;
 850    }
 851    for (i = 0; i < n_comp; i++) {
 852       /* add in colors */
 853       hihi <<= 5;
 854       hihi |= reord[minCol][i] >> 3;
 855    }
 856    cc[3] = hihi;
 857    cc[0] = cc[1] = cc[2] = 0;
 858
 859    /* compute interpolation vector */
 860    if (minCol != maxCol) {
 861       MAKEIVEC(n_vect, n_comp, iv, b, reord[minCol], reord[maxCol]);
 862    }
 863
 864    /* add in texels */
 865    for (k = N_TEXELS - 1; k >= 0; k--) {
 866       GLint t = k * 3;
 867       GLuint *kk = (GLuint *)((char *)cc + t / 8);
 868       GLint texel = n_vect + 1; /* transparent black */
 869
 870       if (!ISTBLACK(input[k])) {
 871          if (minCol != maxCol) {
 872             /* interpolate color */
 873             CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 874             /* add in texel */
 875             kk[0] |= texel << (t & 7);
 876          }
 877       } else {
 878          /* add in texel */
 879          kk[0] |= texel << (t & 7);
 880       }
 881    }
 882 }
 883
 884
 885 static void
 886 fxt1_quantize_MIXED1 (GLuint *cc,
 887                       GLubyte input[N_TEXELS][MAX_COMP])
 888 {
 889    const GLint n_vect = 2; /* highest vector number in each microtile */
 890    const GLint n_comp = 3; /* 3 components: R, G, B */
 891    GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
 892    GLfloat b, iv[MAX_COMP]; /* interpolation vector */
 893    GLint i, j, k;
 894    Fx64 hi; /* high quadword */
 895    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 896
 897    GLint minSum;
 898    GLint maxSum;
 899    GLint minColL = 0, maxColL = -1;
 900    GLint minColR = 0, maxColR = -1;
 901
 902    /* Our solution here is to find the darkest and brightest colors in
 903     * the 4x4 tile and use those as the two representative colors.
 904     * There are probably better algorithms to use (histogram-based).
 905     */
 906    minSum = 2000; /* big enough */
 907    maxSum = -1; /* small enough */
 908    for (k = 0; k < N_TEXELS / 2; k++) {
 909       if (!ISTBLACK(input[k])) {
 910          GLint sum = 0;
 911          for (i = 0; i < n_comp; i++) {
 912             sum += input[k][i];
 913          }
 914          if (minSum > sum) {
 915             minSum = sum;
 916             minColL = k;
 917          }
 918          if (maxSum < sum) {
 919             maxSum = sum;
 920             maxColL = k;
 921          }
 922       }
 923    }
 924    minSum = 2000; /* big enough */
 925    maxSum = -1; /* small enough */
 926    for (; k < N_TEXELS; k++) {
 927       if (!ISTBLACK(input[k])) {
 928          GLint sum = 0;
 929          for (i = 0; i < n_comp; i++) {
 930             sum += input[k][i];
 931          }
 932          if (minSum > sum) {
 933             minSum = sum;
 934             minColR = k;
 935          }
 936          if (maxSum < sum) {
 937             maxSum = sum;
 938             maxColR = k;
 939          }
 940       }
 941    }
 942
 943    /* left microtile */
 944    if (maxColL == -1) {
 945       /* all transparent black */
 946       cc[0] = ~0u;
 947       for (i = 0; i < n_comp; i++) {
 948          vec[0][i] = 0;
 949          vec[1][i] = 0;
 950       }
 951    } else {
 952       cc[0] = 0;
 953       for (i = 0; i < n_comp; i++) {
 954          vec[0][i] = input[minColL][i];
 955          vec[1][i] = input[maxColL][i];
 956       }
 957       if (minColL != maxColL) {
 958          /* compute interpolation vector */
 959          MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
 960
 961          /* add in texels */
 962          lolo = 0;
 963          for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
 964             GLint texel = n_vect + 1; /* transparent black */
 965             if (!ISTBLACK(input[k])) {
 966                /* interpolate color */
 967                CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 968             }
 969             /* add in texel */
 970             lolo <<= 2;
 971             lolo |= texel;
 972          }
 973          cc[0] = lolo;
 974       }
 975    }
 976
 977    /* right microtile */
 978    if (maxColR == -1) {
 979       /* all transparent black */
 980       cc[1] = ~0u;
 981       for (i = 0; i < n_comp; i++) {
 982          vec[2][i] = 0;
 983          vec[3][i] = 0;
 984       }
 985    } else {
 986       cc[1] = 0;
 987       for (i = 0; i < n_comp; i++) {
 988          vec[2][i] = input[minColR][i];
 989          vec[3][i] = input[maxColR][i];
 990       }
 991       if (minColR != maxColR) {
 992          /* compute interpolation vector */
 993          MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
 994
 995          /* add in texels */
 996          lohi = 0;
 997          for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
 998             GLint texel = n_vect + 1; /* transparent black */
 999             if (!ISTBLACK(input[k])) {
1000                /* interpolate color */
1001                CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1002             }
1003             /* add in texel */
1004             lohi <<= 2;
1005             lohi |= texel;
1006          }
1007          cc[1] = lohi;
1008       }
1009    }
1010
1011    FX64_MOV32(hi, 9 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1012    for (j = 2 * 2 - 1; j >= 0; j--) {
1013       for (i = 0; i < n_comp; i++) {
1014          /* add in colors */
1015          FX64_SHL(hi, 5);
1016          FX64_OR32(hi, vec[j][i] >> 3);
1017       }
1018    }
1019    ((Fx64 *)cc)[1] = hi;
1020 }
1021
1022
1023 static void
1024 fxt1_quantize_MIXED0 (GLuint *cc,
1025                       GLubyte input[N_TEXELS][MAX_COMP])
1026 {
1027    const GLint n_vect = 3; /* highest vector number in each microtile */
1028    const GLint n_comp = 3; /* 3 components: R, G, B */
1029    GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
1030    GLfloat b, iv[MAX_COMP]; /* interpolation vector */
1031    GLint i, j, k;
1032    Fx64 hi; /* high quadword */
1033    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
1034
1035    GLint minColL = 0, maxColL = 0;
1036    GLint minColR = 0, maxColR = 0;
1037 #if 0
1038    GLint minSum;
1039    GLint maxSum;
1040
1041    /* Our solution here is to find the darkest and brightest colors in
1042     * the 4x4 tile and use those as the two representative colors.
1043     * There are probably better algorithms to use (histogram-based).
1044     */
1045    minSum = 2000; /* big enough */
1046    maxSum = -1; /* small enough */
1047    for (k = 0; k < N_TEXELS / 2; k++) {
1048       GLint sum = 0;
1049       for (i = 0; i < n_comp; i++) {
1050          sum += input[k][i];
1051       }
1052       if (minSum > sum) {
1053          minSum = sum;
1054          minColL = k;
1055       }
1056       if (maxSum < sum) {
1057          maxSum = sum;
1058          maxColL = k;
1059       }
1060    }
1061    minSum = 2000; /* big enough */
1062    maxSum = -1; /* small enough */
1063    for (; k < N_TEXELS; k++) {
1064       GLint sum = 0;
1065       for (i = 0; i < n_comp; i++) {
1066          sum += input[k][i];
1067       }
1068       if (minSum > sum) {
1069          minSum = sum;
1070          minColR = k;
1071       }
1072       if (maxSum < sum) {
1073          maxSum = sum;
1074          maxColR = k;
1075       }
1076    }
1077 #else
1078    GLint minVal;
1079    GLint maxVal;
1080    GLint maxVarL = fxt1_variance(NULL, input, n_comp, N_TEXELS / 2);
1081    GLint maxVarR = fxt1_variance(NULL, &input[N_TEXELS / 2], n_comp, N_TEXELS / 2);
1082
1083    /* Scan the channel with max variance for lo & hi
1084     * and use those as the two representative colors.
1085     */
1086    minVal = 2000; /* big enough */
1087    maxVal = -1; /* small enough */
1088    for (k = 0; k < N_TEXELS / 2; k++) {
1089       GLint t = input[k][maxVarL];
1090       if (minVal > t) {
1091          minVal = t;
1092          minColL = k;
1093       }
1094       if (maxVal < t) {
1095          maxVal = t;
1096          maxColL = k;
1097       }
1098    }
1099    minVal = 2000; /* big enough */
1100    maxVal = -1; /* small enough */
1101    for (; k < N_TEXELS; k++) {
1102       GLint t = input[k][maxVarR];
1103       if (minVal > t) {
1104          minVal = t;
1105          minColR = k;
1106       }
1107       if (maxVal < t) {
1108          maxVal = t;
1109          maxColR = k;
1110       }
1111    }
1112 #endif
1113
1114    /* left microtile */
1115    cc[0] = 0;
1116    for (i = 0; i < n_comp; i++) {
1117       vec[0][i] = input[minColL][i];
1118       vec[1][i] = input[maxColL][i];
1119    }
1120    if (minColL != maxColL) {
1121       /* compute interpolation vector */
1122       MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
1123
1124       /* add in texels */
1125       lolo = 0;
1126       for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
1127          GLint texel;
1128          /* interpolate color */
1129          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1130          /* add in texel */
1131          lolo <<= 2;
1132          lolo |= texel;
1133       }
1134
1135       /* funky encoding for LSB of green */
1136       if ((GLint)((lolo >> 1) & 1) != (((vec[1][GCOMP] ^ vec[0][GCOMP]) >> 2) & 1)) {
1137          for (i = 0; i < n_comp; i++) {
1138             vec[1][i] = input[minColL][i];
1139             vec[0][i] = input[maxColL][i];
1140          }
1141          lolo = ~lolo;
1142       }
1143
1144       cc[0] = lolo;
1145    }
1146
1147    /* right microtile */
1148    cc[1] = 0;
1149    for (i = 0; i < n_comp; i++) {
1150       vec[2][i] = input[minColR][i];
1151       vec[3][i] = input[maxColR][i];
1152    }
1153    if (minColR != maxColR) {
1154       /* compute interpolation vector */
1155       MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1156
1157       /* add in texels */
1158       lohi = 0;
1159       for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1160          GLint texel;
1161          /* interpolate color */
1162          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1163          /* add in texel */
1164          lohi <<= 2;
1165          lohi |= texel;
1166       }
1167
1168       /* funky encoding for LSB of green */
1169       if ((GLint)((lohi >> 1) & 1) != (((vec[3][GCOMP] ^ vec[2][GCOMP]) >> 2) & 1)) {
1170          for (i = 0; i < n_comp; i++) {
1171             vec[3][i] = input[minColR][i];
1172             vec[2][i] = input[maxColR][i];
1173          }
1174          lohi = ~lohi;
1175       }
1176
1177       cc[1] = lohi;
1178    }
1179
1180    FX64_MOV32(hi, 8 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1181    for (j = 2 * 2 - 1; j >= 0; j--) {
1182       for (i = 0; i < n_comp; i++) {
1183          /* add in colors */
1184          FX64_SHL(hi, 5);
1185          FX64_OR32(hi, vec[j][i] >> 3);
1186       }
1187    }
1188    ((Fx64 *)cc)[1] = hi;
1189 }
1190
1191
1192 static void
1193 fxt1_quantize (GLuint *cc, const GLubyte *lines[], GLint comps)
1194 {
1195    GLint trualpha;
1196    GLubyte reord[N_TEXELS][MAX_COMP];
1197
1198    GLubyte input[N_TEXELS][MAX_COMP];
1199    GLint i, k, l;
1200
1201    if (comps == 3) {
1202       /* make the whole block opaque */
1203       memset(input, -1, sizeof(input));
1204    }
1205
1206    /* 8 texels each line */
1207    for (l = 0; l < 4; l++) {
1208       for (k = 0; k < 4; k++) {
1209          for (i = 0; i < comps; i++) {
1210             input[k + l * 4][i] = *lines[l]++;
1211          }
1212       }
1213       for (; k < 8; k++) {
1214          for (i = 0; i < comps; i++) {
1215             input[k + l * 4 + 12][i] = *lines[l]++;
1216          }
1217       }
1218    }
1219
1220    /* block layout:
1221     * 00, 01, 02, 03, 08, 09, 0a, 0b
1222     * 10, 11, 12, 13, 18, 19, 1a, 1b
1223     * 04, 05, 06, 07, 0c, 0d, 0e, 0f
1224     * 14, 15, 16, 17, 1c, 1d, 1e, 1f
1225     */
1226
1227    /* [dBorca]
1228     * stupidity flows forth from this
1229     */
1230    l = N_TEXELS;
1231    trualpha = 0;
1232    if (comps == 4) {
1233       /* skip all transparent black texels */
1234       l = 0;
1235       for (k = 0; k < N_TEXELS; k++) {
1236          /* test all components against 0 */
1237          if (!ISTBLACK(input[k])) {
1238             /* texel is not transparent black */
1239             COPY_4UBV(reord[l], input[k]);
1240             if (reord[l][ACOMP] < (255 - ALPHA_TS)) {
1241                /* non-opaque texel */
1242                trualpha = !0;
1243             }
1244             l++;
1245          }
1246       }
1247    }
1248
1249 #if 0
1250    if (trualpha) {
1251       fxt1_quantize_ALPHA0(cc, input, reord, l);
1252    } else if (l == 0) {
1253       cc[0] = cc[1] = cc[2] = -1;
1254       cc[3] = 0;
1255    } else if (l < N_TEXELS) {
1256       fxt1_quantize_HI(cc, input, reord, l);
1257    } else {
1258       fxt1_quantize_CHROMA(cc, input);
1259    }
1260    (void)fxt1_quantize_ALPHA1;
1261    (void)fxt1_quantize_MIXED1;
1262    (void)fxt1_quantize_MIXED0;
1263 #else
1264    if (trualpha) {
1265       fxt1_quantize_ALPHA1(cc, input);
1266    } else if (l == 0) {
1267       cc[0] = cc[1] = cc[2] = ~0u;
1268       cc[3] = 0;
1269    } else if (l < N_TEXELS) {
1270       fxt1_quantize_MIXED1(cc, input);
1271    } else {
1272       fxt1_quantize_MIXED0(cc, input);
1273    }
1274    (void)fxt1_quantize_ALPHA0;
1275    (void)fxt1_quantize_HI;
1276    (void)fxt1_quantize_CHROMA;
1277 #endif
1278 }
1279
1280
1281
1282 /**
1283  * Upscale an image by replication, not (typical) stretching.
1284  * We use this when the image width or height is less than a
1285  * certain size (4, 8) and we need to upscale an image.
1286  */
1287 static void
1288 upscale_teximage2d(GLsizei inWidth, GLsizei inHeight,
1289                    GLsizei outWidth, GLsizei outHeight,
1290                    GLint comps, const GLubyte *src, GLint srcRowStride,
1291                    GLubyte *dest )
1292 {
1293    GLint i, j, k;
1294
1295    ASSERT(outWidth >= inWidth);
1296    ASSERT(outHeight >= inHeight);
1297 #if 0
1298    ASSERT(inWidth == 1 || inWidth == 2 || inHeight == 1 || inHeight == 2);
1299    ASSERT((outWidth & 3) == 0);
1300    ASSERT((outHeight & 3) == 0);
1301 #endif
1302
1303    for (i = 0; i < outHeight; i++) {
1304       const GLint ii = i % inHeight;
1305       for (j = 0; j < outWidth; j++) {
1306          const GLint jj = j % inWidth;
1307          for (k = 0; k < comps; k++) {
1308             dest[(i * outWidth + j) * comps + k]
1309                = src[ii * srcRowStride + jj * comps + k];
1310          }
1311       }
1312    }
1313 }
1314
1315
1316 static void
1317 fxt1_encode (GLuint width, GLuint height, GLint comps,
1318              const void *source, GLint srcRowStride,
1319              void *dest, GLint destRowStride)
1320 {
1321    GLuint x, y;
1322    const GLubyte *data;
1323    GLuint *encoded = (GLuint *)dest;
1324    void *newSource = NULL;
1325
1326    assert(comps == 3 || comps == 4);
1327
1328    /* Replicate image if width is not M8 or height is not M4 */
1329    if ((width & 7) | (height & 3)) {
1330       GLint newWidth = (width + 7) & ~7;
1331       GLint newHeight = (height + 3) & ~3;
1332       newSource = malloc(comps * newWidth * newHeight * sizeof(GLubyte));
1333       if (!newSource) {
1334          GET_CURRENT_CONTEXT(ctx);
1335          _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture compression");
1336          goto cleanUp;
1337       }
1338       upscale_teximage2d(width, height, newWidth, newHeight,
1339                          comps, (const GLubyte *) source,
1340                          srcRowStride, (GLubyte *) newSource);
1341       source = newSource;
1342       width = newWidth;
1343       height = newHeight;
1344       srcRowStride = comps * newWidth;
1345    }
1346
1347    data = (const GLubyte *) source;
1348    destRowStride = (destRowStride - width * 2) / 4;
1349    for (y = 0; y < height; y += 4) {
1350       GLuint offs = 0 + (y + 0) * srcRowStride;
1351       for (x = 0; x < width; x += 8) {
1352          const GLubyte *lines[4];
1353          lines[0] = &data[offs];
1354          lines[1] = lines[0] + srcRowStride;
1355          lines[2] = lines[1] + srcRowStride;
1356          lines[3] = lines[2] + srcRowStride;
1357          offs += 8 * comps;
1358          fxt1_quantize(encoded, lines, comps);
1359          /* 128 bits per 8x4 block */
1360          encoded += 4;
1361       }
1362       encoded += destRowStride;
1363    }
1364
1365  cleanUp:
1366    if (newSource != NULL) {
1367       free(newSource);
1368    }
1369 }
1370
1371
1372 /***************************************************************************\
1373  * FXT1 decoder
1374  *
1375  * The decoder is based on GL_3DFX_texture_compression_FXT1
1376  * specification and serves as a concept for the encoder.
1377 \***************************************************************************/
1378
1379
1380 /* lookup table for scaling 5 bit colors up to 8 bits */
1381 static const GLubyte _rgb_scale_5[] = {
1382    0,   8,   16,  25,  33,  41,  49,  58,
1383    66,  74,  82,  90,  99,  107, 115, 123,
1384    132, 140, 148, 156, 165, 173, 181, 189,
1385    197, 206, 214, 222, 230, 239, 247, 255
1386 };
1387
1388 /* lookup table for scaling 6 bit colors up to 8 bits */
1389 static const GLubyte _rgb_scale_6[] = {
1390    0,   4,   8,   12,  16,  20,  24,  28,
1391    32,  36,  40,  45,  49,  53,  57,  61,
1392    65,  69,  73,  77,  81,  85,  89,  93,
1393    97,  101, 105, 109, 113, 117, 121, 125,
1394    130, 134, 138, 142, 146, 150, 154, 158,
1395    162, 166, 170, 174, 178, 182, 186, 190,
1396    194, 198, 202, 206, 210, 215, 219, 223,
1397    227, 231, 235, 239, 243, 247, 251, 255
1398 };
1399
1400
1401 #define CC_SEL(cc, which) (((GLuint *)(cc))[(which) / 32] >> ((which) & 31))
1402 #define UP5(c) _rgb_scale_5[(c) & 31]
1403 #define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)]
1404 #define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n)
1405
1406
1407 static void
1408 fxt1_decode_1HI (const GLubyte *code, GLint t, GLubyte *rgba)
1409 {
1410    const GLuint *cc;
1411
1412    t *= 3;
1413    cc = (const GLuint *)(code + t / 8);
1414    t = (cc[0] >> (t & 7)) & 7;
1415
1416    if (t == 7) {
1417       rgba[RCOMP] = rgba[GCOMP] = rgba[BCOMP] = rgba[ACOMP] = 0;
1418    } else {
1419       GLubyte r, g, b;
1420       cc = (const GLuint *)(code + 12);
1421       if (t == 0) {
1422          b = UP5(CC_SEL(cc, 0));
1423          g = UP5(CC_SEL(cc, 5));
1424          r = UP5(CC_SEL(cc, 10));
1425       } else if (t == 6) {
1426          b = UP5(CC_SEL(cc, 15));
1427          g = UP5(CC_SEL(cc, 20));
1428          r = UP5(CC_SEL(cc, 25));
1429       } else {
1430          b = LERP(6, t, UP5(CC_SEL(cc, 0)), UP5(CC_SEL(cc, 15)));
1431          g = LERP(6, t, UP5(CC_SEL(cc, 5)), UP5(CC_SEL(cc, 20)));
1432          r = LERP(6, t, UP5(CC_SEL(cc, 10)), UP5(CC_SEL(cc, 25)));
1433       }
1434       rgba[RCOMP] = r;
1435       rgba[GCOMP] = g;
1436       rgba[BCOMP] = b;
1437       rgba[ACOMP] = 255;
1438    }
1439 }
1440
1441
1442 static void
1443 fxt1_decode_1CHROMA (const GLubyte *code, GLint t, GLubyte *rgba)
1444 {
1445    const GLuint *cc;
1446    GLuint kk;
1447
1448    cc = (const GLuint *)code;
1449    if (t & 16) {
1450       cc++;
1451       t &= 15;
1452    }
1453    t = (cc[0] >> (t * 2)) & 3;
1454
1455    t *= 15;
1456    cc = (const GLuint *)(code + 8 + t / 8);
1457    kk = cc[0] >> (t & 7);
1458    rgba[BCOMP] = UP5(kk);
1459    rgba[GCOMP] = UP5(kk >> 5);
1460    rgba[RCOMP] = UP5(kk >> 10);
1461    rgba[ACOMP] = 255;
1462 }
1463
1464
1465 static void
1466 fxt1_decode_1MIXED (const GLubyte *code, GLint t, GLubyte *rgba)
1467 {
1468    const GLuint *cc;
1469    GLuint col[2][3];
1470    GLint glsb, selb;
1471
1472    cc = (const GLuint *)code;
1473    if (t & 16) {
1474       t &= 15;
1475       t = (cc[1] >> (t * 2)) & 3;
1476       /* col 2 */
1477       col[0][BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1478       col[0][GCOMP] = CC_SEL(cc, 99);
1479       col[0][RCOMP] = CC_SEL(cc, 104);
1480       /* col 3 */
1481       col[1][BCOMP] = CC_SEL(cc, 109);
1482       col[1][GCOMP] = CC_SEL(cc, 114);
1483       col[1][RCOMP] = CC_SEL(cc, 119);
1484       glsb = CC_SEL(cc, 126);
1485       selb = CC_SEL(cc, 33);
1486    } else {
1487       t = (cc[0] >> (t * 2)) & 3;
1488       /* col 0 */
1489       col[0][BCOMP] = CC_SEL(cc, 64);
1490       col[0][GCOMP] = CC_SEL(cc, 69);
1491       col[0][RCOMP] = CC_SEL(cc, 74);
1492       /* col 1 */
1493       col[1][BCOMP] = CC_SEL(cc, 79);
1494       col[1][GCOMP] = CC_SEL(cc, 84);
1495       col[1][RCOMP] = CC_SEL(cc, 89);
1496       glsb = CC_SEL(cc, 125);
1497       selb = CC_SEL(cc, 1);
1498    }
1499
1500    if (CC_SEL(cc, 124) & 1) {
1501       /* alpha[0] == 1 */
1502
1503       if (t == 3) {
1504          /* zero */
1505          rgba[RCOMP] = rgba[BCOMP] = rgba[GCOMP] = rgba[ACOMP] = 0;
1506       } else {
1507          GLubyte r, g, b;
1508          if (t == 0) {
1509             b = UP5(col[0][BCOMP]);
1510             g = UP5(col[0][GCOMP]);
1511             r = UP5(col[0][RCOMP]);
1512          } else if (t == 2) {
1513             b = UP5(col[1][BCOMP]);
1514             g = UP6(col[1][GCOMP], glsb);
1515             r = UP5(col[1][RCOMP]);
1516          } else {
1517             b = (UP5(col[0][BCOMP]) + UP5(col[1][BCOMP])) / 2;
1518             g = (UP5(col[0][GCOMP]) + UP6(col[1][GCOMP], glsb)) / 2;
1519             r = (UP5(col[0][RCOMP]) + UP5(col[1][RCOMP])) / 2;
1520          }
1521          rgba[RCOMP] = r;
1522          rgba[GCOMP] = g;
1523          rgba[BCOMP] = b;
1524          rgba[ACOMP] = 255;
1525       }
1526    } else {
1527       /* alpha[0] == 0 */
1528       GLubyte r, g, b;
1529       if (t == 0) {
1530          b = UP5(col[0][BCOMP]);
1531          g = UP6(col[0][GCOMP], glsb ^ selb);
1532          r = UP5(col[0][RCOMP]);
1533       } else if (t == 3) {
1534          b = UP5(col[1][BCOMP]);
1535          g = UP6(col[1][GCOMP], glsb);
1536          r = UP5(col[1][RCOMP]);
1537       } else {
1538          b = LERP(3, t, UP5(col[0][BCOMP]), UP5(col[1][BCOMP]));
1539          g = LERP(3, t, UP6(col[0][GCOMP], glsb ^ selb),
1540                         UP6(col[1][GCOMP], glsb));
1541          r = LERP(3, t, UP5(col[0][RCOMP]), UP5(col[1][RCOMP]));
1542       }
1543       rgba[RCOMP] = r;
1544       rgba[GCOMP] = g;
1545       rgba[BCOMP] = b;
1546       rgba[ACOMP] = 255;
1547    }
1548 }
1549
1550
1551 static void
1552 fxt1_decode_1ALPHA (const GLubyte *code, GLint t, GLubyte *rgba)
1553 {
1554    const GLuint *cc;
1555    GLubyte r, g, b, a;
1556
1557    cc = (const GLuint *)code;
1558    if (CC_SEL(cc, 124) & 1) {
1559       /* lerp == 1 */
1560       GLuint col0[4];
1561
1562       if (t & 16) {
1563          t &= 15;
1564          t = (cc[1] >> (t * 2)) & 3;
1565          /* col 2 */
1566          col0[BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1567          col0[GCOMP] = CC_SEL(cc, 99);
1568          col0[RCOMP] = CC_SEL(cc, 104);
1569          col0[ACOMP] = CC_SEL(cc, 119);
1570       } else {
1571          t = (cc[0] >> (t * 2)) & 3;
1572          /* col 0 */
1573          col0[BCOMP] = CC_SEL(cc, 64);
1574          col0[GCOMP] = CC_SEL(cc, 69);
1575          col0[RCOMP] = CC_SEL(cc, 74);
1576          col0[ACOMP] = CC_SEL(cc, 109);
1577       }
1578
1579       if (t == 0) {
1580          b = UP5(col0[BCOMP]);
1581          g = UP5(col0[GCOMP]);
1582          r = UP5(col0[RCOMP]);
1583          a = UP5(col0[ACOMP]);
1584       } else if (t == 3) {
1585          b = UP5(CC_SEL(cc, 79));
1586          g = UP5(CC_SEL(cc, 84));
1587          r = UP5(CC_SEL(cc, 89));
1588          a = UP5(CC_SEL(cc, 114));
1589       } else {
1590          b = LERP(3, t, UP5(col0[BCOMP]), UP5(CC_SEL(cc, 79)));
1591          g = LERP(3, t, UP5(col0[GCOMP]), UP5(CC_SEL(cc, 84)));
1592          r = LERP(3, t, UP5(col0[RCOMP]), UP5(CC_SEL(cc, 89)));
1593          a = LERP(3, t, UP5(col0[ACOMP]), UP5(CC_SEL(cc, 114)));
1594       }
1595    } else {
1596       /* lerp == 0 */
1597
1598       if (t & 16) {
1599          cc++;
1600          t &= 15;
1601       }
1602       t = (cc[0] >> (t * 2)) & 3;
1603
1604       if (t == 3) {
1605          /* zero */
1606          r = g = b = a = 0;
1607       } else {
1608          GLuint kk;
1609          cc = (const GLuint *)code;
1610          a = UP5(cc[3] >> (t * 5 + 13));
1611          t *= 15;
1612          cc = (const GLuint *)(code + 8 + t / 8);
1613          kk = cc[0] >> (t & 7);
1614          b = UP5(kk);
1615          g = UP5(kk >> 5);
1616          r = UP5(kk >> 10);
1617       }
1618    }
1619    rgba[RCOMP] = r;
1620    rgba[GCOMP] = g;
1621    rgba[BCOMP] = b;
1622    rgba[ACOMP] = a;
1623 }
1624
1625
1626 void
1627 fxt1_decode_1 (const void *texture, GLint stride, /* in pixels */
1628                GLint i, GLint j, GLubyte *rgba)
1629 {
1630    static void (*decode_1[]) (const GLubyte *, GLint, GLubyte *) = {
1631       fxt1_decode_1HI,     /* cc-high   = "00?" */
1632       fxt1_decode_1HI,     /* cc-high   = "00?" */
1633       fxt1_decode_1CHROMA, /* cc-chroma = "010" */
1634       fxt1_decode_1ALPHA,  /* alpha     = "011" */
1635       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1636       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1637       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1638       fxt1_decode_1MIXED   /* mixed     = "1??" */
1639    };
1640
1641    const GLubyte *code = (const GLubyte *)texture +
1642                          ((j / 4) * (stride / 8) + (i / 8)) * 16;
1643    GLint mode = CC_SEL(code, 125);
1644    GLint t = i & 7;
1645
1646    if (t & 4) {
1647       t += 12;
1648    }
1649    t += (j & 3) * 4;
1650
1651    decode_1[mode](code, t, rgba);
1652 }
1653
1654
1655 #endif /* FEATURE_texture_fxt1 */