* jfdctint.c
*
* Copyright (C) 1991-1996, Thomas G. Lane.
- * Modification developed 2003-2009 by Guido Vollbeding.
+ * Modification developed 2003-2013 by Guido Vollbeding.
* This file is part of the Independent JPEG Group's software.
* For conditions of distribution and use, see the accompanying README file.
*
int ctr;
SHIFT_TEMPS
- /* Pass 1: process rows. */
- /* Note results are scaled up by sqrt(8) compared to a true DCT; */
- /* furthermore, we scale the results by 2**PASS1_BITS. */
+ /* Pass 1: process rows.
+ * Note results are scaled up by sqrt(8) compared to a true DCT;
+ * furthermore, we scale the results by 2**PASS1_BITS.
+ * cK represents sqrt(2) * cos(K*pi/16).
+ */
dataptr = data;
for (ctr = 0; ctr < DCTSIZE; ctr++) {
elemptr = sample_data[ctr] + start_col;
/* Even part per LL&M figure 1 --- note that published figure is faulty;
- * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
+ * rotator "c1" should be "c6".
*/
tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]);
dataptr[0] = (DCTELEM) ((tmp10 + tmp11 - 8 * CENTERJSAMPLE) << PASS1_BITS);
dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << PASS1_BITS);
- z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
+ z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); /* c6 */
/* Add fudge factor here for final descale. */
z1 += ONE << (CONST_BITS-PASS1_BITS-1);
- dataptr[2] = (DCTELEM) RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865),
- CONST_BITS-PASS1_BITS);
- dataptr[6] = (DCTELEM) RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065),
- CONST_BITS-PASS1_BITS);
+
+ dataptr[2] = (DCTELEM)
+ RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), /* c2-c6 */
+ CONST_BITS-PASS1_BITS);
+ dataptr[6] = (DCTELEM)
+ RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), /* c2+c6 */
+ CONST_BITS-PASS1_BITS);
/* Odd part per figure 8 --- note paper omits factor of sqrt(2).
- * cK represents sqrt(2) * cos(K*pi/16).
* i0..i3 in the paper are tmp0..tmp3 here.
*/
- tmp10 = tmp0 + tmp3;
- tmp11 = tmp1 + tmp2;
tmp12 = tmp0 + tmp2;
tmp13 = tmp1 + tmp3;
- z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */
+
+ z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */
/* Add fudge factor here for final descale. */
z1 += ONE << (CONST_BITS-PASS1_BITS-1);
- tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */
- tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */
- tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */
- tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */
- tmp10 = MULTIPLY(tmp10, - FIX_0_899976223); /* c7-c3 */
- tmp11 = MULTIPLY(tmp11, - FIX_2_562915447); /* -c1-c3 */
- tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* c5-c3 */
- tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */
-
+ tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* -c3+c5 */
+ tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */
tmp12 += z1;
tmp13 += z1;
- dataptr[1] = (DCTELEM)
- RIGHT_SHIFT(tmp0 + tmp10 + tmp12, CONST_BITS-PASS1_BITS);
- dataptr[3] = (DCTELEM)
- RIGHT_SHIFT(tmp1 + tmp11 + tmp13, CONST_BITS-PASS1_BITS);
- dataptr[5] = (DCTELEM)
- RIGHT_SHIFT(tmp2 + tmp11 + tmp12, CONST_BITS-PASS1_BITS);
- dataptr[7] = (DCTELEM)
- RIGHT_SHIFT(tmp3 + tmp10 + tmp13, CONST_BITS-PASS1_BITS);
+ z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
+ tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */
+ tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */
+ tmp0 += z1 + tmp12;
+ tmp3 += z1 + tmp13;
+
+ z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
+ tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */
+ tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */
+ tmp1 += z1 + tmp13;
+ tmp2 += z1 + tmp12;
+
+ dataptr[1] = (DCTELEM) RIGHT_SHIFT(tmp0, CONST_BITS-PASS1_BITS);
+ dataptr[3] = (DCTELEM) RIGHT_SHIFT(tmp1, CONST_BITS-PASS1_BITS);
+ dataptr[5] = (DCTELEM) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS);
+ dataptr[7] = (DCTELEM) RIGHT_SHIFT(tmp3, CONST_BITS-PASS1_BITS);
dataptr += DCTSIZE; /* advance pointer to next row */
}
/* Pass 2: process columns.
* We remove the PASS1_BITS scaling, but leave the results scaled up
* by an overall factor of 8.
+ * cK represents sqrt(2) * cos(K*pi/16).
*/
dataptr = data;
for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
/* Even part per LL&M figure 1 --- note that published figure is faulty;
- * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
+ * rotator "c1" should be "c6".
*/
tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
dataptr[DCTSIZE*0] = (DCTELEM) RIGHT_SHIFT(tmp10 + tmp11, PASS1_BITS);
dataptr[DCTSIZE*4] = (DCTELEM) RIGHT_SHIFT(tmp10 - tmp11, PASS1_BITS);
- z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
+ z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); /* c6 */
/* Add fudge factor here for final descale. */
z1 += ONE << (CONST_BITS+PASS1_BITS-1);
+
dataptr[DCTSIZE*2] = (DCTELEM)
- RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), CONST_BITS+PASS1_BITS);
+ RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), /* c2-c6 */
+ CONST_BITS+PASS1_BITS);
dataptr[DCTSIZE*6] = (DCTELEM)
- RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), CONST_BITS+PASS1_BITS);
+ RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), /* c2+c6 */
+ CONST_BITS+PASS1_BITS);
/* Odd part per figure 8 --- note paper omits factor of sqrt(2).
- * cK represents sqrt(2) * cos(K*pi/16).
* i0..i3 in the paper are tmp0..tmp3 here.
*/
- tmp10 = tmp0 + tmp3;
- tmp11 = tmp1 + tmp2;
tmp12 = tmp0 + tmp2;
tmp13 = tmp1 + tmp3;
- z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */
+
+ z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */
/* Add fudge factor here for final descale. */
z1 += ONE << (CONST_BITS+PASS1_BITS-1);
- tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */
- tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */
- tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */
- tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */
- tmp10 = MULTIPLY(tmp10, - FIX_0_899976223); /* c7-c3 */
- tmp11 = MULTIPLY(tmp11, - FIX_2_562915447); /* -c1-c3 */
- tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* c5-c3 */
- tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */
-
+ tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* -c3+c5 */
+ tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */
tmp12 += z1;
tmp13 += z1;
- dataptr[DCTSIZE*1] = (DCTELEM)
- RIGHT_SHIFT(tmp0 + tmp10 + tmp12, CONST_BITS+PASS1_BITS);
- dataptr[DCTSIZE*3] = (DCTELEM)
- RIGHT_SHIFT(tmp1 + tmp11 + tmp13, CONST_BITS+PASS1_BITS);
- dataptr[DCTSIZE*5] = (DCTELEM)
- RIGHT_SHIFT(tmp2 + tmp11 + tmp12, CONST_BITS+PASS1_BITS);
- dataptr[DCTSIZE*7] = (DCTELEM)
- RIGHT_SHIFT(tmp3 + tmp10 + tmp13, CONST_BITS+PASS1_BITS);
+ z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
+ tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */
+ tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */
+ tmp0 += z1 + tmp12;
+ tmp3 += z1 + tmp13;
+
+ z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
+ tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */
+ tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */
+ tmp1 += z1 + tmp13;
+ tmp2 += z1 + tmp12;
+
+ dataptr[DCTSIZE*1] = (DCTELEM) RIGHT_SHIFT(tmp0, CONST_BITS+PASS1_BITS);
+ dataptr[DCTSIZE*3] = (DCTELEM) RIGHT_SHIFT(tmp1, CONST_BITS+PASS1_BITS);
+ dataptr[DCTSIZE*5] = (DCTELEM) RIGHT_SHIFT(tmp2, CONST_BITS+PASS1_BITS);
+ dataptr[DCTSIZE*7] = (DCTELEM) RIGHT_SHIFT(tmp3, CONST_BITS+PASS1_BITS);
dataptr++; /* advance pointer to next column */
}
/* Pre-zero output coefficient block. */
MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
- /* Pass 1: process rows. */
- /* Note results are scaled up by sqrt(8) compared to a true DCT; */
- /* furthermore, we scale the results by 2**PASS1_BITS. */
- /* cK represents sqrt(2) * cos(K*pi/14). */
+ /* Pass 1: process rows.
+ * Note results are scaled up by sqrt(8) compared to a true DCT;
+ * furthermore, we scale the results by 2**PASS1_BITS.
+ * cK represents sqrt(2) * cos(K*pi/14).
+ */
dataptr = data;
for (ctr = 0; ctr < 7; ctr++) {
/* Pre-zero output coefficient block. */
MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
- /* Pass 1: process rows. */
- /* Note results are scaled up by sqrt(8) compared to a true DCT; */
- /* furthermore, we scale the results by 2**PASS1_BITS. */
- /* cK represents sqrt(2) * cos(K*pi/12). */
+ /* Pass 1: process rows.
+ * Note results are scaled up by sqrt(8) compared to a true DCT;
+ * furthermore, we scale the results by 2**PASS1_BITS.
+ * cK represents sqrt(2) * cos(K*pi/12).
+ */
dataptr = data;
for (ctr = 0; ctr < 6; ctr++) {
/* Pre-zero output coefficient block. */
MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
- /* Pass 1: process rows. */
- /* Note results are scaled up by sqrt(8) compared to a true DCT; */
- /* furthermore, we scale the results by 2**PASS1_BITS. */
- /* We scale the results further by 2 as part of output adaption */
- /* scaling for different DCT size. */
- /* cK represents sqrt(2) * cos(K*pi/10). */
+ /* Pass 1: process rows.
+ * Note results are scaled up by sqrt(8) compared to a true DCT;
+ * furthermore, we scale the results by 2**PASS1_BITS.
+ * We scale the results further by 2 as part of output adaption
+ * scaling for different DCT size.
+ * cK represents sqrt(2) * cos(K*pi/10).
+ */
dataptr = data;
for (ctr = 0; ctr < 5; ctr++) {
/* Pre-zero output coefficient block. */
MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
- /* Pass 1: process rows. */
- /* Note results are scaled up by sqrt(8) compared to a true DCT; */
- /* furthermore, we scale the results by 2**PASS1_BITS. */
- /* We must also scale the output by (8/4)**2 = 2**2, which we add here. */
- /* cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT]. */
+ /* Pass 1: process rows.
+ * Note results are scaled up by sqrt(8) compared to a true DCT;
+ * furthermore, we scale the results by 2**PASS1_BITS.
+ * We must also scale the output by (8/4)**2 = 2**2, which we add here.
+ * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT].
+ */
dataptr = data;
for (ctr = 0; ctr < 4; ctr++) {
/* Pass 2: process columns.
* We remove the PASS1_BITS scaling, but leave the results scaled up
* by an overall factor of 8.
+ * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT].
*/
dataptr = data;
/* Pre-zero output coefficient block. */
MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
- /* Pass 1: process rows. */
- /* Note results are scaled up by sqrt(8) compared to a true DCT; */
- /* furthermore, we scale the results by 2**PASS1_BITS. */
- /* We scale the results further by 2**2 as part of output adaption */
- /* scaling for different DCT size. */
- /* cK represents sqrt(2) * cos(K*pi/6). */
+ /* Pass 1: process rows.
+ * Note results are scaled up by sqrt(8) compared to a true DCT;
+ * furthermore, we scale the results by 2**PASS1_BITS.
+ * We scale the results further by 2**2 as part of output adaption
+ * scaling for different DCT size.
+ * cK represents sqrt(2) * cos(K*pi/6).
+ */
dataptr = data;
for (ctr = 0; ctr < 3; ctr++) {
/* Pre-zero output coefficient block. */
MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
- /* Pass 1: process rows. */
- /* Note results are scaled up by sqrt(8) compared to a true DCT. */
+ /* Pass 1: process rows.
+ * Note results are scaled up by sqrt(8) compared to a true DCT.
+ */
/* Row 0 */
elemptr = sample_data[0] + start_col;
int ctr;
SHIFT_TEMPS
- /* Pass 1: process rows. */
- /* Note results are scaled up by sqrt(8) compared to a true DCT; */
- /* we scale the results further by 2 as part of output adaption */
- /* scaling for different DCT size. */
- /* cK represents sqrt(2) * cos(K*pi/18). */
+ /* Pass 1: process rows.
+ * Note results are scaled up by sqrt(8) compared to a true DCT;
+ * we scale the results further by 2 as part of output adaption
+ * scaling for different DCT size.
+ * cK represents sqrt(2) * cos(K*pi/18).
+ */
dataptr = data;
ctr = 0;
int ctr;
SHIFT_TEMPS
- /* Pass 1: process rows. */
- /* Note results are scaled up by sqrt(8) compared to a true DCT; */
- /* we scale the results further by 2 as part of output adaption */
- /* scaling for different DCT size. */
- /* cK represents sqrt(2) * cos(K*pi/20). */
+ /* Pass 1: process rows.
+ * Note results are scaled up by sqrt(8) compared to a true DCT;
+ * we scale the results further by 2 as part of output adaption
+ * scaling for different DCT size.
+ * cK represents sqrt(2) * cos(K*pi/20).
+ */
dataptr = data;
ctr = 0;
int ctr;
SHIFT_TEMPS
- /* Pass 1: process rows. */
- /* Note results are scaled up by sqrt(8) compared to a true DCT; */
- /* we scale the results further by 2 as part of output adaption */
- /* scaling for different DCT size. */
- /* cK represents sqrt(2) * cos(K*pi/22). */
+ /* Pass 1: process rows.
+ * Note results are scaled up by sqrt(8) compared to a true DCT;
+ * we scale the results further by 2 as part of output adaption
+ * scaling for different DCT size.
+ * cK represents sqrt(2) * cos(K*pi/22).
+ */
dataptr = data;
ctr = 0;
int ctr;
SHIFT_TEMPS
- /* Pass 1: process rows. */
- /* Note results are scaled up by sqrt(8) compared to a true DCT. */
- /* cK represents sqrt(2) * cos(K*pi/24). */
+ /* Pass 1: process rows.
+ * Note results are scaled up by sqrt(8) compared to a true DCT.
+ * cK represents sqrt(2) * cos(K*pi/24).
+ */
dataptr = data;
ctr = 0;
int ctr;
SHIFT_TEMPS
- /* Pass 1: process rows. */
- /* Note results are scaled up by sqrt(8) compared to a true DCT. */
- /* cK represents sqrt(2) * cos(K*pi/26). */
+ /* Pass 1: process rows.
+ * Note results are scaled up by sqrt(8) compared to a true DCT.
+ * cK represents sqrt(2) * cos(K*pi/26).
+ */
dataptr = data;
ctr = 0;
int ctr;
SHIFT_TEMPS
- /* Pass 1: process rows. */
- /* Note results are scaled up by sqrt(8) compared to a true DCT. */
- /* cK represents sqrt(2) * cos(K*pi/28). */
+ /* Pass 1: process rows.
+ * Note results are scaled up by sqrt(8) compared to a true DCT.
+ * cK represents sqrt(2) * cos(K*pi/28).
+ */
dataptr = data;
ctr = 0;
int ctr;
SHIFT_TEMPS
- /* Pass 1: process rows. */
- /* Note results are scaled up by sqrt(8) compared to a true DCT. */
- /* cK represents sqrt(2) * cos(K*pi/30). */
+ /* Pass 1: process rows.
+ * Note results are scaled up by sqrt(8) compared to a true DCT.
+ * cK represents sqrt(2) * cos(K*pi/30).
+ */
dataptr = data;
ctr = 0;
int ctr;
SHIFT_TEMPS
- /* Pass 1: process rows. */
- /* Note results are scaled up by sqrt(8) compared to a true DCT; */
- /* furthermore, we scale the results by 2**PASS1_BITS. */
- /* cK represents sqrt(2) * cos(K*pi/32). */
+ /* Pass 1: process rows.
+ * Note results are scaled up by sqrt(8) compared to a true DCT;
+ * furthermore, we scale the results by 2**PASS1_BITS.
+ * cK represents sqrt(2) * cos(K*pi/32).
+ */
dataptr = data;
ctr = 0;
* We remove the PASS1_BITS scaling, but leave the results scaled up
* by an overall factor of 8.
* We must also scale the output by (8/16)**2 = 1/2**2.
+ * cK represents sqrt(2) * cos(K*pi/32).
*/
dataptr = data;
int ctr;
SHIFT_TEMPS
- /* Pass 1: process rows. */
- /* Note results are scaled up by sqrt(8) compared to a true DCT; */
- /* furthermore, we scale the results by 2**PASS1_BITS. */
- /* 16-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/32). */
+ /* Pass 1: process rows.
+ * Note results are scaled up by sqrt(8) compared to a true DCT;
+ * furthermore, we scale the results by 2**PASS1_BITS.
+ * 16-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/32).
+ */
dataptr = data;
ctr = 0;
* We remove the PASS1_BITS scaling, but leave the results scaled up
* by an overall factor of 8.
* We must also scale the output by 8/16 = 1/2.
+ * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
*/
dataptr = data;
for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
/* Even part per LL&M figure 1 --- note that published figure is faulty;
- * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
+ * rotator "c1" should be "c6".
*/
tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp11, PASS1_BITS+1);
dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp10 - tmp11, PASS1_BITS+1);
- z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
- dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, FIX_0_765366865),
- CONST_BITS+PASS1_BITS+1);
- dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 - MULTIPLY(tmp13, FIX_1_847759065),
- CONST_BITS+PASS1_BITS+1);
+ z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); /* c6 */
+ dataptr[DCTSIZE*2] = (DCTELEM)
+ DESCALE(z1 + MULTIPLY(tmp12, FIX_0_765366865), /* c2-c6 */
+ CONST_BITS+PASS1_BITS+1);
+ dataptr[DCTSIZE*6] = (DCTELEM)
+ DESCALE(z1 - MULTIPLY(tmp13, FIX_1_847759065), /* c2+c6 */
+ CONST_BITS+PASS1_BITS+1);
/* Odd part per figure 8 --- note paper omits factor of sqrt(2).
- * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
* i0..i3 in the paper are tmp0..tmp3 here.
*/
- tmp10 = tmp0 + tmp3;
- tmp11 = tmp1 + tmp2;
tmp12 = tmp0 + tmp2;
tmp13 = tmp1 + tmp3;
- z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */
-
- tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */
- tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */
- tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */
- tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */
- tmp10 = MULTIPLY(tmp10, - FIX_0_899976223); /* c7-c3 */
- tmp11 = MULTIPLY(tmp11, - FIX_2_562915447); /* -c1-c3 */
- tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* c5-c3 */
- tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */
+ z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */
+ tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* -c3+c5 */
+ tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */
tmp12 += z1;
tmp13 += z1;
- dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0 + tmp10 + tmp12,
- CONST_BITS+PASS1_BITS+1);
- dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1 + tmp11 + tmp13,
- CONST_BITS+PASS1_BITS+1);
- dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2 + tmp11 + tmp12,
- CONST_BITS+PASS1_BITS+1);
- dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp3 + tmp10 + tmp13,
- CONST_BITS+PASS1_BITS+1);
+ z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
+ tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */
+ tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */
+ tmp0 += z1 + tmp12;
+ tmp3 += z1 + tmp13;
+
+ z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
+ tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */
+ tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */
+ tmp1 += z1 + tmp13;
+ tmp2 += z1 + tmp12;
+
+ dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0, CONST_BITS+PASS1_BITS+1);
+ dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1, CONST_BITS+PASS1_BITS+1);
+ dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2, CONST_BITS+PASS1_BITS+1);
+ dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp3, CONST_BITS+PASS1_BITS+1);
dataptr++; /* advance pointer to next column */
}
/* Zero bottom row of output coefficient block. */
MEMZERO(&data[DCTSIZE*7], SIZEOF(DCTELEM) * DCTSIZE);
- /* Pass 1: process rows. */
- /* Note results are scaled up by sqrt(8) compared to a true DCT; */
- /* furthermore, we scale the results by 2**PASS1_BITS. */
- /* 14-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/28). */
+ /* Pass 1: process rows.
+ * Note results are scaled up by sqrt(8) compared to a true DCT;
+ * furthermore, we scale the results by 2**PASS1_BITS.
+ * 14-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/28).
+ */
dataptr = data;
for (ctr = 0; ctr < 7; ctr++) {
/* Zero 2 bottom rows of output coefficient block. */
MEMZERO(&data[DCTSIZE*6], SIZEOF(DCTELEM) * DCTSIZE * 2);
- /* Pass 1: process rows. */
- /* Note results are scaled up by sqrt(8) compared to a true DCT; */
- /* furthermore, we scale the results by 2**PASS1_BITS. */
- /* 12-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/24). */
+ /* Pass 1: process rows.
+ * Note results are scaled up by sqrt(8) compared to a true DCT;
+ * furthermore, we scale the results by 2**PASS1_BITS.
+ * 12-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/24).
+ */
dataptr = data;
for (ctr = 0; ctr < 6; ctr++) {
/* Zero 3 bottom rows of output coefficient block. */
MEMZERO(&data[DCTSIZE*5], SIZEOF(DCTELEM) * DCTSIZE * 3);
- /* Pass 1: process rows. */
- /* Note results are scaled up by sqrt(8) compared to a true DCT; */
- /* furthermore, we scale the results by 2**PASS1_BITS. */
- /* 10-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/20). */
+ /* Pass 1: process rows.
+ * Note results are scaled up by sqrt(8) compared to a true DCT;
+ * furthermore, we scale the results by 2**PASS1_BITS.
+ * 10-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/20).
+ */
dataptr = data;
for (ctr = 0; ctr < 5; ctr++) {
/* Zero 4 bottom rows of output coefficient block. */
MEMZERO(&data[DCTSIZE*4], SIZEOF(DCTELEM) * DCTSIZE * 4);
- /* Pass 1: process rows. */
- /* Note results are scaled up by sqrt(8) compared to a true DCT; */
- /* furthermore, we scale the results by 2**PASS1_BITS. */
- /* We must also scale the output by 8/4 = 2, which we add here. */
+ /* Pass 1: process rows.
+ * Note results are scaled up by sqrt(8) compared to a true DCT;
+ * furthermore, we scale the results by 2**PASS1_BITS.
+ * We must also scale the output by 8/4 = 2, which we add here.
+ * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
+ */
dataptr = data;
for (ctr = 0; ctr < 4; ctr++) {
elemptr = sample_data[ctr] + start_col;
/* Even part per LL&M figure 1 --- note that published figure is faulty;
- * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
+ * rotator "c1" should be "c6".
*/
tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]);
((tmp10 + tmp11 - 8 * CENTERJSAMPLE) << (PASS1_BITS+1));
dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << (PASS1_BITS+1));
- z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
+ z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); /* c6 */
/* Add fudge factor here for final descale. */
z1 += ONE << (CONST_BITS-PASS1_BITS-2);
- dataptr[2] = (DCTELEM) RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865),
- CONST_BITS-PASS1_BITS-1);
- dataptr[6] = (DCTELEM) RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065),
- CONST_BITS-PASS1_BITS-1);
+
+ dataptr[2] = (DCTELEM)
+ RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), /* c2-c6 */
+ CONST_BITS-PASS1_BITS-1);
+ dataptr[6] = (DCTELEM)
+ RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), /* c2+c6 */
+ CONST_BITS-PASS1_BITS-1);
/* Odd part per figure 8 --- note paper omits factor of sqrt(2).
- * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
* i0..i3 in the paper are tmp0..tmp3 here.
*/
- tmp10 = tmp0 + tmp3;
- tmp11 = tmp1 + tmp2;
tmp12 = tmp0 + tmp2;
tmp13 = tmp1 + tmp3;
- z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */
+
+ z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */
/* Add fudge factor here for final descale. */
z1 += ONE << (CONST_BITS-PASS1_BITS-2);
- tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */
- tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */
- tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */
- tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */
- tmp10 = MULTIPLY(tmp10, - FIX_0_899976223); /* c7-c3 */
- tmp11 = MULTIPLY(tmp11, - FIX_2_562915447); /* -c1-c3 */
- tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* c5-c3 */
- tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */
-
+ tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* -c3+c5 */
+ tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */
tmp12 += z1;
tmp13 += z1;
- dataptr[1] = (DCTELEM)
- RIGHT_SHIFT(tmp0 + tmp10 + tmp12, CONST_BITS-PASS1_BITS-1);
- dataptr[3] = (DCTELEM)
- RIGHT_SHIFT(tmp1 + tmp11 + tmp13, CONST_BITS-PASS1_BITS-1);
- dataptr[5] = (DCTELEM)
- RIGHT_SHIFT(tmp2 + tmp11 + tmp12, CONST_BITS-PASS1_BITS-1);
- dataptr[7] = (DCTELEM)
- RIGHT_SHIFT(tmp3 + tmp10 + tmp13, CONST_BITS-PASS1_BITS-1);
+ z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
+ tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */
+ tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */
+ tmp0 += z1 + tmp12;
+ tmp3 += z1 + tmp13;
+
+ z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
+ tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */
+ tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */
+ tmp1 += z1 + tmp13;
+ tmp2 += z1 + tmp12;
+
+ dataptr[1] = (DCTELEM) RIGHT_SHIFT(tmp0, CONST_BITS-PASS1_BITS-1);
+ dataptr[3] = (DCTELEM) RIGHT_SHIFT(tmp1, CONST_BITS-PASS1_BITS-1);
+ dataptr[5] = (DCTELEM) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS-1);
+ dataptr[7] = (DCTELEM) RIGHT_SHIFT(tmp3, CONST_BITS-PASS1_BITS-1);
dataptr += DCTSIZE; /* advance pointer to next row */
}
/* Pass 2: process columns.
* We remove the PASS1_BITS scaling, but leave the results scaled up
* by an overall factor of 8.
- * 4-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
+ * 4-point FDCT kernel,
+ * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT].
*/
dataptr = data;
/* Odd part */
- tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100); /* c6 */
+ tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100); /* c6 */
/* Add fudge factor here for final descale. */
tmp0 += ONE << (CONST_BITS+PASS1_BITS-1);
/* Pre-zero output coefficient block. */
MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
- /* Pass 1: process rows. */
- /* Note results are scaled up by sqrt(8) compared to a true DCT; */
- /* furthermore, we scale the results by 2**PASS1_BITS. */
- /* We scale the results further by 2 as part of output adaption */
- /* scaling for different DCT size. */
- /* 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12). */
+ /* Pass 1: process rows.
+ * Note results are scaled up by sqrt(8) compared to a true DCT;
+ * furthermore, we scale the results by 2**PASS1_BITS.
+ * We scale the results further by 2 as part of output adaption
+ * scaling for different DCT size.
+ * 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
+ */
dataptr = data;
for (ctr = 0; ctr < 3; ctr++) {
/* Pre-zero output coefficient block. */
MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
- /* Pass 1: process rows. */
- /* Note results are scaled up by sqrt(8) compared to a true DCT; */
- /* furthermore, we scale the results by 2**PASS1_BITS. */
- /* We must also scale the output by (8/4)*(8/2) = 2**3, which we add here. */
- /* 4-point FDCT kernel, */
- /* cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT]. */
+ /* Pass 1: process rows.
+ * Note results are scaled up by sqrt(8) compared to a true DCT;
+ * furthermore, we scale the results by 2**PASS1_BITS.
+ * We must also scale the output by (8/4)*(8/2) = 2**3, which we add here.
+ * 4-point FDCT kernel,
+ * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT].
+ */
dataptr = data;
for (ctr = 0; ctr < 2; ctr++) {
*/
/* Even part */
+
/* Apply unsigned->signed conversion */
data[0] = (DCTELEM) ((tmp0 + tmp1 - 2 * CENTERJSAMPLE) << 5);
/* Odd part */
+
data[1] = (DCTELEM) ((tmp0 - tmp1) << 5);
}
int ctr;
SHIFT_TEMPS
- /* Pass 1: process rows. */
- /* Note results are scaled up by sqrt(8) compared to a true DCT; */
- /* furthermore, we scale the results by 2**PASS1_BITS. */
+ /* Pass 1: process rows.
+ * Note results are scaled up by sqrt(8) compared to a true DCT;
+ * furthermore, we scale the results by 2**PASS1_BITS.
+ * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
+ */
dataptr = data;
ctr = 0;
elemptr = sample_data[ctr] + start_col;
/* Even part per LL&M figure 1 --- note that published figure is faulty;
- * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
+ * rotator "c1" should be "c6".
*/
tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]);
dataptr[0] = (DCTELEM) ((tmp10 + tmp11 - 8 * CENTERJSAMPLE) << PASS1_BITS);
dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << PASS1_BITS);
- z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
- dataptr[2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, FIX_0_765366865),
- CONST_BITS-PASS1_BITS);
- dataptr[6] = (DCTELEM) DESCALE(z1 - MULTIPLY(tmp13, FIX_1_847759065),
- CONST_BITS-PASS1_BITS);
+ z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); /* c6 */
+ dataptr[2] = (DCTELEM)
+ DESCALE(z1 + MULTIPLY(tmp12, FIX_0_765366865), /* c2-c6 */
+ CONST_BITS-PASS1_BITS);
+ dataptr[6] = (DCTELEM)
+ DESCALE(z1 - MULTIPLY(tmp13, FIX_1_847759065), /* c2+c6 */
+ CONST_BITS-PASS1_BITS);
/* Odd part per figure 8 --- note paper omits factor of sqrt(2).
- * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
* i0..i3 in the paper are tmp0..tmp3 here.
*/
- tmp10 = tmp0 + tmp3;
- tmp11 = tmp1 + tmp2;
tmp12 = tmp0 + tmp2;
tmp13 = tmp1 + tmp3;
- z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */
-
- tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */
- tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */
- tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */
- tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */
- tmp10 = MULTIPLY(tmp10, - FIX_0_899976223); /* c7-c3 */
- tmp11 = MULTIPLY(tmp11, - FIX_2_562915447); /* -c1-c3 */
- tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* c5-c3 */
- tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */
+ z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */
+ tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* -c3+c5 */
+ tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */
tmp12 += z1;
tmp13 += z1;
- dataptr[1] = (DCTELEM) DESCALE(tmp0 + tmp10 + tmp12, CONST_BITS-PASS1_BITS);
- dataptr[3] = (DCTELEM) DESCALE(tmp1 + tmp11 + tmp13, CONST_BITS-PASS1_BITS);
- dataptr[5] = (DCTELEM) DESCALE(tmp2 + tmp11 + tmp12, CONST_BITS-PASS1_BITS);
- dataptr[7] = (DCTELEM) DESCALE(tmp3 + tmp10 + tmp13, CONST_BITS-PASS1_BITS);
+ z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
+ tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */
+ tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */
+ tmp0 += z1 + tmp12;
+ tmp3 += z1 + tmp13;
+
+ z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
+ tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */
+ tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */
+ tmp1 += z1 + tmp13;
+ tmp2 += z1 + tmp12;
+
+ dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS-PASS1_BITS);
+ dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS-PASS1_BITS);
+ dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS-PASS1_BITS);
+ dataptr[7] = (DCTELEM) DESCALE(tmp3, CONST_BITS-PASS1_BITS);
ctr++;
/* Pre-zero output coefficient block. */
MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
- /* Pass 1: process rows. */
- /* Note results are scaled up by sqrt(8) compared to a true DCT; */
- /* furthermore, we scale the results by 2**PASS1_BITS. */
- /* 7-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/14). */
+ /* Pass 1: process rows.
+ * Note results are scaled up by sqrt(8) compared to a true DCT;
+ * furthermore, we scale the results by 2**PASS1_BITS.
+ * 7-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/14).
+ */
dataptr = data;
ctr = 0;
/* Pre-zero output coefficient block. */
MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
- /* Pass 1: process rows. */
- /* Note results are scaled up by sqrt(8) compared to a true DCT; */
- /* furthermore, we scale the results by 2**PASS1_BITS. */
- /* 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12). */
+ /* Pass 1: process rows.
+ * Note results are scaled up by sqrt(8) compared to a true DCT;
+ * furthermore, we scale the results by 2**PASS1_BITS.
+ * 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
+ */
dataptr = data;
ctr = 0;
/* Pre-zero output coefficient block. */
MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
- /* Pass 1: process rows. */
- /* Note results are scaled up by sqrt(8) compared to a true DCT; */
- /* furthermore, we scale the results by 2**PASS1_BITS. */
- /* 5-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/10). */
+ /* Pass 1: process rows.
+ * Note results are scaled up by sqrt(8) compared to a true DCT;
+ * furthermore, we scale the results by 2**PASS1_BITS.
+ * 5-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/10).
+ */
dataptr = data;
ctr = 0;
/* Pre-zero output coefficient block. */
MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
- /* Pass 1: process rows. */
- /* Note results are scaled up by sqrt(8) compared to a true DCT; */
- /* furthermore, we scale the results by 2**PASS1_BITS. */
- /* We must also scale the output by 8/4 = 2, which we add here. */
- /* 4-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16). */
+ /* Pass 1: process rows.
+ * Note results are scaled up by sqrt(8) compared to a true DCT;
+ * furthermore, we scale the results by 2**PASS1_BITS.
+ * We must also scale the output by 8/4 = 2, which we add here.
+ * 4-point FDCT kernel,
+ * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT].
+ */
dataptr = data;
for (ctr = 0; ctr < DCTSIZE; ctr++) {
/* Pass 2: process columns.
* We remove the PASS1_BITS scaling, but leave the results scaled up
* by an overall factor of 8.
+ * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
*/
dataptr = data;
for (ctr = 0; ctr < 4; ctr++) {
/* Even part per LL&M figure 1 --- note that published figure is faulty;
- * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
+ * rotator "c1" should be "c6".
*/
tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
dataptr[DCTSIZE*0] = (DCTELEM) RIGHT_SHIFT(tmp10 + tmp11, PASS1_BITS);
dataptr[DCTSIZE*4] = (DCTELEM) RIGHT_SHIFT(tmp10 - tmp11, PASS1_BITS);
- z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
+ z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); /* c6 */
/* Add fudge factor here for final descale. */
z1 += ONE << (CONST_BITS+PASS1_BITS-1);
+
dataptr[DCTSIZE*2] = (DCTELEM)
- RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), CONST_BITS+PASS1_BITS);
+ RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), /* c2-c6 */
+ CONST_BITS+PASS1_BITS);
dataptr[DCTSIZE*6] = (DCTELEM)
- RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), CONST_BITS+PASS1_BITS);
+ RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), /* c2+c6 */
+ CONST_BITS+PASS1_BITS);
/* Odd part per figure 8 --- note paper omits factor of sqrt(2).
- * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
* i0..i3 in the paper are tmp0..tmp3 here.
*/
- tmp10 = tmp0 + tmp3;
- tmp11 = tmp1 + tmp2;
tmp12 = tmp0 + tmp2;
tmp13 = tmp1 + tmp3;
- z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */
+
+ z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */
/* Add fudge factor here for final descale. */
z1 += ONE << (CONST_BITS+PASS1_BITS-1);
- tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */
- tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */
- tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */
- tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */
- tmp10 = MULTIPLY(tmp10, - FIX_0_899976223); /* c7-c3 */
- tmp11 = MULTIPLY(tmp11, - FIX_2_562915447); /* -c1-c3 */
- tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* c5-c3 */
- tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */
-
+ tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* -c3+c5 */
+ tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */
tmp12 += z1;
tmp13 += z1;
- dataptr[DCTSIZE*1] = (DCTELEM)
- RIGHT_SHIFT(tmp0 + tmp10 + tmp12, CONST_BITS+PASS1_BITS);
- dataptr[DCTSIZE*3] = (DCTELEM)
- RIGHT_SHIFT(tmp1 + tmp11 + tmp13, CONST_BITS+PASS1_BITS);
- dataptr[DCTSIZE*5] = (DCTELEM)
- RIGHT_SHIFT(tmp2 + tmp11 + tmp12, CONST_BITS+PASS1_BITS);
- dataptr[DCTSIZE*7] = (DCTELEM)
- RIGHT_SHIFT(tmp3 + tmp10 + tmp13, CONST_BITS+PASS1_BITS);
+ z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
+ tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */
+ tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */
+ tmp0 += z1 + tmp12;
+ tmp3 += z1 + tmp13;
+
+ z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
+ tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */
+ tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */
+ tmp1 += z1 + tmp13;
+ tmp2 += z1 + tmp12;
+
+ dataptr[DCTSIZE*1] = (DCTELEM) RIGHT_SHIFT(tmp0, CONST_BITS+PASS1_BITS);
+ dataptr[DCTSIZE*3] = (DCTELEM) RIGHT_SHIFT(tmp1, CONST_BITS+PASS1_BITS);
+ dataptr[DCTSIZE*5] = (DCTELEM) RIGHT_SHIFT(tmp2, CONST_BITS+PASS1_BITS);
+ dataptr[DCTSIZE*7] = (DCTELEM) RIGHT_SHIFT(tmp3, CONST_BITS+PASS1_BITS);
dataptr++; /* advance pointer to next column */
}
/* Pre-zero output coefficient block. */
MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
- /* Pass 1: process rows. */
- /* Note results are scaled up by sqrt(8) compared to a true DCT; */
- /* furthermore, we scale the results by 2**PASS1_BITS. */
- /* We scale the results further by 2 as part of output adaption */
- /* scaling for different DCT size. */
- /* 3-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/6). */
+ /* Pass 1: process rows.
+ * Note results are scaled up by sqrt(8) compared to a true DCT;
+ * furthermore, we scale the results by 2**PASS1_BITS.
+ * We scale the results further by 2 as part of output adaption
+ * scaling for different DCT size.
+ * 3-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/6).
+ */
dataptr = data;
for (ctr = 0; ctr < 6; ctr++) {
/* Pre-zero output coefficient block. */
MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
- /* Pass 1: process rows. */
- /* Note results are scaled up by sqrt(8) compared to a true DCT. */
- /* We must also scale the output by (8/2)*(8/4) = 2**3, which we add here. */
+ /* Pass 1: process rows.
+ * Note results are scaled up by sqrt(8) compared to a true DCT.
+ * We must also scale the output by (8/2)*(8/4) = 2**3, which we add here.
+ */
dataptr = data;
for (ctr = 0; ctr < 4; ctr++) {
/* Pre-zero output coefficient block. */
MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
- tmp0 = GETJSAMPLE(sample_data[0][start_col]);
- tmp1 = GETJSAMPLE(sample_data[1][start_col]);
+ /* Pass 1: empty. */
- /* We leave the results scaled up by an overall factor of 8.
+ /* Pass 2: process columns.
+ * We leave the results scaled up by an overall factor of 8.
* We must also scale the output by (8/1)*(8/2) = 2**5.
*/
/* Even part */
+
+ tmp0 = GETJSAMPLE(sample_data[0][start_col]);
+ tmp1 = GETJSAMPLE(sample_data[1][start_col]);
+
/* Apply unsigned->signed conversion */
data[DCTSIZE*0] = (DCTELEM) ((tmp0 + tmp1 - 2 * CENTERJSAMPLE) << 5);
/* Odd part */
+
data[DCTSIZE*1] = (DCTELEM) ((tmp0 - tmp1) << 5);
}