Information in issue #6200 revealed that 0 as default value for the blue channel is a lot more common than 255 so let's use that instead. The discussion and testing revealed no negative effects for the other formats that use the same code to initialize the memory to read blocks of image data.
1526 lines
38 KiB
C
1526 lines
38 KiB
C
/*
|
|
* DDS GIMP plugin
|
|
*
|
|
* Copyright (C) 2004-2012 Shawn Kirst <skirst@gmail.com>,
|
|
* with parts (C) 2003 Arne Reuter <homepage@arnereuter.de> where specified.
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public
|
|
* License as published by the Free Software Foundation; either
|
|
* version 2 of the License, or (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; see the file COPYING. If not, write to
|
|
* the Free Software Foundation, 51 Franklin Street, Fifth Floor
|
|
* Boston, MA 02110-1301, USA.
|
|
*/
|
|
|
|
/*
|
|
* Parts of this code have been generously released in the public domain
|
|
* by Fabian 'ryg' Giesen. The original code can be found (at the time
|
|
* of writing) here: http://mollyrocket.com/forums/viewtopic.php?t=392
|
|
*
|
|
* For more information about this code, see the README.dxt file that
|
|
* came with the source.
|
|
*/
|
|
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <math.h>
|
|
#include <glib.h>
|
|
|
|
#include "dds.h"
|
|
#include "dxt.h"
|
|
#include "endian_rw.h"
|
|
#include "mipmap.h"
|
|
#include "imath.h"
|
|
#include "vec.h"
|
|
|
|
#include "dxt_tables.h"
|
|
|
|
#define SWAP(a, b) do { typeof(a) t; t = a; a = b; b = t; } while(0)
|
|
|
|
/* SIMD constants */
|
|
static const vec4_t V4ZERO = VEC4_CONST1(0.0f);
|
|
static const vec4_t V4ONE = VEC4_CONST1(1.0f);
|
|
static const vec4_t V4HALF = VEC4_CONST1(0.5f);
|
|
static const vec4_t V4ONETHIRD = VEC4_CONST3(1.0f / 3.0f, 1.0f / 3.0f, 1.0f / 3.0f);
|
|
static const vec4_t V4TWOTHIRDS = VEC4_CONST3(2.0f / 3.0f, 2.0f / 3.0f, 2.0f / 3.0f);
|
|
static const vec4_t V4GRID = VEC4_CONST3(31.0f, 63.0f, 31.0f);
|
|
static const vec4_t V4GRIDRCP = VEC4_CONST3(1.0f / 31.0f, 1.0f / 63.0f, 1.0f / 31.0f);
|
|
static const vec4_t V4EPSILON = VEC4_CONST1(1e-04f);
|
|
|
|
typedef struct
|
|
{
|
|
unsigned int single;
|
|
unsigned int alphamask;
|
|
vec4_t points[16];
|
|
vec4_t palette[4];
|
|
vec4_t max;
|
|
vec4_t min;
|
|
vec4_t metric;
|
|
} dxtblock_t;
|
|
|
|
/* extract 4x4 BGRA block */
|
|
static void
|
|
extract_block (const unsigned char *src,
|
|
int x,
|
|
int y,
|
|
int w,
|
|
int h,
|
|
unsigned char *block)
|
|
{
|
|
int i, j;
|
|
int bw = MIN(w - x, 4);
|
|
int bh = MIN(h - y, 4);
|
|
int bx, by;
|
|
const int rem[] =
|
|
{
|
|
0, 0, 0, 0,
|
|
0, 1, 0, 1,
|
|
0, 1, 2, 0,
|
|
0, 1, 2, 3
|
|
};
|
|
|
|
for (i = 0; i < 4; ++i)
|
|
{
|
|
by = rem[(bh - 1) * 4 + i] + y;
|
|
for (j = 0; j < 4; ++j)
|
|
{
|
|
bx = rem[(bw - 1) * 4 + j] + x;
|
|
block[(i * 4 * 4) + (j * 4) + 0] =
|
|
src[(by * (w * 4)) + (bx * 4) + 0];
|
|
block[(i * 4 * 4) + (j * 4) + 1] =
|
|
src[(by * (w * 4)) + (bx * 4) + 1];
|
|
block[(i * 4 * 4) + (j * 4) + 2] =
|
|
src[(by * (w * 4)) + (bx * 4) + 2];
|
|
block[(i * 4 * 4) + (j * 4) + 3] =
|
|
src[(by * (w * 4)) + (bx * 4) + 3];
|
|
}
|
|
}
|
|
}
|
|
|
|
/* pack BGR8 to RGB565 */
|
|
static inline unsigned short
|
|
pack_rgb565 (const unsigned char *c)
|
|
{
|
|
return (mul8bit(c[2], 31) << 11) |
|
|
(mul8bit(c[1], 63) << 5) |
|
|
(mul8bit(c[0], 31) );
|
|
}
|
|
|
|
/* unpack RGB565 to BGR */
|
|
static void
|
|
unpack_rgb565 (unsigned char *dst,
|
|
unsigned short v)
|
|
{
|
|
int r = (v >> 11) & 0x1f;
|
|
int g = (v >> 5) & 0x3f;
|
|
int b = (v ) & 0x1f;
|
|
|
|
dst[0] = (b << 3) | (b >> 2);
|
|
dst[1] = (g << 2) | (g >> 4);
|
|
dst[2] = (r << 3) | (r >> 2);
|
|
}
|
|
|
|
/* linear interpolation at 1/3 point between a and b */
|
|
static void
|
|
lerp_rgb13 (unsigned char *dst,
|
|
unsigned char *a,
|
|
unsigned char *b)
|
|
{
|
|
#if 0
|
|
dst[0] = blerp(a[0], b[0], 0x55);
|
|
dst[1] = blerp(a[1], b[1], 0x55);
|
|
dst[2] = blerp(a[2], b[2], 0x55);
|
|
#else
|
|
/*
|
|
* according to the S3TC/DX10 specs, this is the correct way to do the
|
|
* interpolation (with no rounding bias)
|
|
*
|
|
* dst = (2 * a + b) / 3;
|
|
*/
|
|
dst[0] = (2 * a[0] + b[0]) / 3;
|
|
dst[1] = (2 * a[1] + b[1]) / 3;
|
|
dst[2] = (2 * a[2] + b[2]) / 3;
|
|
#endif
|
|
}
|
|
|
|
static void
|
|
vec4_endpoints_to_565 (int *start,
|
|
int *end,
|
|
const vec4_t a,
|
|
const vec4_t b)
|
|
{
|
|
int c[8] __attribute__((aligned(16)));
|
|
vec4_t ta = a * V4GRID + V4HALF;
|
|
vec4_t tb = b * V4GRID + V4HALF;
|
|
|
|
#ifdef USE_SSE
|
|
# ifdef __SSE2__
|
|
const __m128i C565 = _mm_setr_epi16(31, 63, 31, 0, 31, 63, 31, 0);
|
|
__m128i ia = _mm_cvttps_epi32(ta);
|
|
__m128i ib = _mm_cvttps_epi32(tb);
|
|
__m128i zero = _mm_setzero_si128();
|
|
__m128i words = _mm_packs_epi32(ia, ib);
|
|
words = _mm_min_epi16(C565, _mm_max_epi16(zero, words));
|
|
*((__m128i *)&c[0]) = _mm_unpacklo_epi16(words, zero);
|
|
*((__m128i *)&c[4]) = _mm_unpackhi_epi16(words, zero);
|
|
# else
|
|
const __m64 C565 = _mm_setr_pi16(31, 63, 31, 0);
|
|
__m64 lo, hi, c0, c1;
|
|
__m64 zero = _mm_setzero_si64();
|
|
lo = _mm_cvttps_pi32(ta);
|
|
hi = _mm_cvttps_pi32(_mm_movehl_ps(ta, ta));
|
|
c0 = _mm_packs_pi32(lo, hi);
|
|
lo = _mm_cvttps_pi32(tb);
|
|
hi = _mm_cvttps_pi32(_mm_movehl_ps(tb, tb));
|
|
c1 = _mm_packs_pi32(lo, hi);
|
|
c0 = _mm_min_pi16(C565, _mm_max_pi16(zero, c0));
|
|
c1 = _mm_min_pi16(C565, _mm_max_pi16(zero, c1));
|
|
*((__m64 *)&c[0]) = _mm_unpacklo_pi16(c0, zero);
|
|
*((__m64 *)&c[2]) = _mm_unpackhi_pi16(c0, zero);
|
|
*((__m64 *)&c[4]) = _mm_unpacklo_pi16(c1, zero);
|
|
*((__m64 *)&c[6]) = _mm_unpackhi_pi16(c1, zero);
|
|
_mm_empty();
|
|
# endif
|
|
#else
|
|
c[0] = (int)ta[0]; c[4] = (int)tb[0];
|
|
c[1] = (int)ta[1]; c[5] = (int)tb[1];
|
|
c[2] = (int)ta[2]; c[6] = (int)tb[2];
|
|
c[0] = MIN(31, MAX(0, c[0]));
|
|
c[1] = MIN(63, MAX(0, c[1]));
|
|
c[2] = MIN(31, MAX(0, c[2]));
|
|
c[4] = MIN(31, MAX(0, c[4]));
|
|
c[5] = MIN(63, MAX(0, c[5]));
|
|
c[6] = MIN(31, MAX(0, c[6]));
|
|
#endif
|
|
|
|
*start = ((c[2] << 11) | (c[1] << 5) | c[0]);
|
|
*end = ((c[6] << 11) | (c[5] << 5) | c[4]);
|
|
}
|
|
|
|
static void
|
|
dxtblock_init (dxtblock_t *dxtb,
|
|
const unsigned char *block,
|
|
int flags)
|
|
{
|
|
int i, c0, c;
|
|
int bc1 = (flags & DXT_BC1);
|
|
float x, y, z;
|
|
vec4_t min, max, center, t, cov, inset;
|
|
|
|
dxtb->single = 1;
|
|
dxtb->alphamask = 0;
|
|
|
|
if(flags & DXT_PERCEPTUAL)
|
|
/* ITU-R BT.709 luma coefficients */
|
|
dxtb->metric = vec4_set(0.2126f, 0.7152f, 0.0722f, 0.0f);
|
|
else
|
|
dxtb->metric = vec4_set(1.0f, 1.0f, 1.0f, 0.0f);
|
|
|
|
c0 = GETL24(block);
|
|
|
|
for (i = 0; i < 16; ++i)
|
|
{
|
|
if (bc1 && (block[4 * i + 3] < 128))
|
|
dxtb->alphamask |= (3 << (2 * i));
|
|
|
|
x = (float)block[4 * i + 0] / 255.0f;
|
|
y = (float)block[4 * i + 1] / 255.0f;
|
|
z = (float)block[4 * i + 2] / 255.0f;
|
|
|
|
dxtb->points[i] = vec4_set(x, y, z, 0);
|
|
|
|
c = GETL24(&block[4 * i]);
|
|
dxtb->single = dxtb->single && (c == c0);
|
|
}
|
|
|
|
// no need to continue if this is a single color block
|
|
if (dxtb->single)
|
|
return;
|
|
|
|
min = vec4_set1(1.0f);
|
|
max = vec4_zero();
|
|
|
|
// get bounding box extents
|
|
for (i = 0; i < 16; ++i)
|
|
{
|
|
min = vec4_min(min, dxtb->points[i]);
|
|
max = vec4_max(max, dxtb->points[i]);
|
|
}
|
|
|
|
// select diagonal
|
|
center = (max + min) * V4HALF;
|
|
cov = vec4_zero();
|
|
for (i = 0; i < 16; ++i)
|
|
{
|
|
t = dxtb->points[i] - center;
|
|
cov += t * vec4_splatz(t);
|
|
}
|
|
|
|
#ifdef USE_SSE
|
|
{
|
|
__m128 mask, tmp;
|
|
// get mask
|
|
mask = _mm_cmplt_ps(cov, _mm_setzero_ps());
|
|
// clear high bits (z, w)
|
|
mask = _mm_movelh_ps(mask, _mm_setzero_ps());
|
|
// mask and combine
|
|
tmp = _mm_or_ps(_mm_and_ps(mask, min), _mm_andnot_ps(mask, max));
|
|
min = _mm_or_ps(_mm_and_ps(mask, max), _mm_andnot_ps(mask, min));
|
|
max = tmp;
|
|
}
|
|
#else
|
|
{
|
|
float x0, x1, y0, y1;
|
|
x0 = max[0];
|
|
y0 = max[1];
|
|
x1 = min[0];
|
|
y1 = min[1];
|
|
|
|
if (cov[0] < 0) SWAP(x0, x1);
|
|
if (cov[1] < 0) SWAP(y0, y1);
|
|
|
|
max[0] = x0;
|
|
max[1] = y0;
|
|
min[0] = x1;
|
|
min[1] = y1;
|
|
}
|
|
#endif
|
|
|
|
// inset bounding box and clamp to [0,1]
|
|
inset = (max - min) * vec4_set1(1.0f / 16.0f) - vec4_set1((8.0f / 255.0f) / 16.0f);
|
|
max = vec4_min(V4ONE, vec4_max(V4ZERO, max - inset));
|
|
min = vec4_min(V4ONE, vec4_max(V4ZERO, min + inset));
|
|
|
|
// clamp to color space and save
|
|
dxtb->max = vec4_trunc(V4GRID * max + V4HALF) * V4GRIDRCP;
|
|
dxtb->min = vec4_trunc(V4GRID * min + V4HALF) * V4GRIDRCP;
|
|
}
|
|
|
|
static void
|
|
construct_palette3 (dxtblock_t *dxtb)
|
|
{
|
|
dxtb->palette[0] = dxtb->max;
|
|
dxtb->palette[1] = dxtb->min;
|
|
dxtb->palette[2] = (dxtb->max * V4HALF) + (dxtb->min * V4HALF);
|
|
dxtb->palette[3] = vec4_zero();
|
|
}
|
|
|
|
static void
|
|
construct_palette4 (dxtblock_t *dxtb)
|
|
{
|
|
dxtb->palette[0] = dxtb->max;
|
|
dxtb->palette[1] = dxtb->min;
|
|
dxtb->palette[2] = (dxtb->max * V4TWOTHIRDS) + (dxtb->min * V4ONETHIRD );
|
|
dxtb->palette[3] = (dxtb->max * V4ONETHIRD ) + (dxtb->min * V4TWOTHIRDS);
|
|
}
|
|
|
|
/*
|
|
* from nvidia-texture-tools; see LICENSE.nvtt for copyright information
|
|
*/
|
|
static void
|
|
optimize_endpoints3 (dxtblock_t *dxtb,
|
|
unsigned int indices,
|
|
vec4_t *max,
|
|
vec4_t *min)
|
|
{
|
|
float alpha, beta;
|
|
vec4_t alpha2_sum, alphax_sum;
|
|
vec4_t beta2_sum, betax_sum;
|
|
vec4_t alphabeta_sum, a, b, factor;
|
|
int i, bits;
|
|
|
|
alpha2_sum = beta2_sum = alphabeta_sum = vec4_zero();
|
|
alphax_sum = vec4_zero();
|
|
betax_sum = vec4_zero();
|
|
|
|
for (i = 0; i < 16; ++i)
|
|
{
|
|
bits = indices >> (2 * i);
|
|
|
|
// skip alpha pixels
|
|
if ((bits & 3) == 3)
|
|
continue;
|
|
|
|
beta = (float)(bits & 1);
|
|
if (bits & 2)
|
|
beta = 0.5f;
|
|
alpha = 1.0f - beta;
|
|
|
|
a = vec4_set1(alpha);
|
|
b = vec4_set1(beta);
|
|
alpha2_sum += a * a;
|
|
beta2_sum += b * b;
|
|
alphabeta_sum += a * b;
|
|
alphax_sum += dxtb->points[i] * a;
|
|
betax_sum += dxtb->points[i] * b;
|
|
}
|
|
|
|
factor = alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum;
|
|
if (vec4_cmplt(factor, V4EPSILON))
|
|
return;
|
|
factor = vec4_rcp(factor);
|
|
|
|
a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
|
|
b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
|
|
|
|
// clamp to the color space
|
|
a = vec4_min(V4ONE, vec4_max(V4ZERO, a));
|
|
b = vec4_min(V4ONE, vec4_max(V4ZERO, b));
|
|
a = vec4_trunc(V4GRID * a + V4HALF) * V4GRIDRCP;
|
|
b = vec4_trunc(V4GRID * b + V4HALF) * V4GRIDRCP;
|
|
|
|
*max = a;
|
|
*min = b;
|
|
}
|
|
|
|
/*
|
|
* from nvidia-texture-tools; see LICENSE.nvtt for copyright information
|
|
*/
|
|
static void
|
|
optimize_endpoints4 (dxtblock_t *dxtb,
|
|
unsigned int indices,
|
|
vec4_t *max,
|
|
vec4_t *min)
|
|
{
|
|
float alpha, beta;
|
|
vec4_t alpha2_sum, alphax_sum;
|
|
vec4_t beta2_sum, betax_sum;
|
|
vec4_t alphabeta_sum, a, b, factor;
|
|
int i, bits;
|
|
|
|
alpha2_sum = beta2_sum = alphabeta_sum = vec4_zero();
|
|
alphax_sum = vec4_zero();
|
|
betax_sum = vec4_zero();
|
|
|
|
for (i = 0; i < 16; ++i)
|
|
{
|
|
bits = indices >> (2 * i);
|
|
|
|
beta = (float)(bits & 1);
|
|
if (bits & 2)
|
|
beta = (1.0f + beta) / 3.0f;
|
|
alpha = 1.0f - beta;
|
|
|
|
a = vec4_set1(alpha);
|
|
b = vec4_set1(beta);
|
|
alpha2_sum += a * a;
|
|
beta2_sum += b * b;
|
|
alphabeta_sum += a * b;
|
|
alphax_sum += dxtb->points[i] * a;
|
|
betax_sum += dxtb->points[i] * b;
|
|
}
|
|
|
|
factor = alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum;
|
|
if (vec4_cmplt(factor, V4EPSILON))
|
|
return;
|
|
factor = vec4_rcp(factor);
|
|
|
|
a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
|
|
b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
|
|
|
|
// clamp to the color space
|
|
a = vec4_min(V4ONE, vec4_max(V4ZERO, a));
|
|
b = vec4_min(V4ONE, vec4_max(V4ZERO, b));
|
|
a = vec4_trunc(V4GRID * a + V4HALF) * V4GRIDRCP;
|
|
b = vec4_trunc(V4GRID * b + V4HALF) * V4GRIDRCP;
|
|
|
|
*max = a;
|
|
*min = b;
|
|
}
|
|
|
|
static unsigned int
|
|
match_colors3 (dxtblock_t *dxtb)
|
|
{
|
|
int i, idx;
|
|
unsigned int indices = 0;
|
|
vec4_t t0, t1, t2;
|
|
#ifdef USE_SSE
|
|
vec4_t d, bits, zero = _mm_setzero_ps();
|
|
int mask;
|
|
#else
|
|
float d0, d1, d2;
|
|
#endif
|
|
|
|
// match each point to the closest color
|
|
for (i = 0; i < 16; ++i)
|
|
{
|
|
// skip alpha pixels
|
|
if (((dxtb->alphamask >> (2 * i)) & 3) == 3)
|
|
{
|
|
indices |= (3 << (2 * i));
|
|
continue;
|
|
}
|
|
|
|
t0 = (dxtb->points[i] - dxtb->palette[0]) * dxtb->metric;
|
|
t1 = (dxtb->points[i] - dxtb->palette[1]) * dxtb->metric;
|
|
t2 = (dxtb->points[i] - dxtb->palette[2]) * dxtb->metric;
|
|
|
|
#ifdef USE_SSE
|
|
_MM_TRANSPOSE4_PS(t0, t1, t2, zero);
|
|
d = t0 * t0 + t1 * t1 + t2 * t2;
|
|
bits = _mm_cmplt_ps(_mm_shuffle_ps(d, d, _MM_SHUFFLE(3, 1, 0, 0)),
|
|
_mm_shuffle_ps(d, d, _MM_SHUFFLE(3, 2, 2, 1)));
|
|
mask = _mm_movemask_ps(bits);
|
|
if((mask & 3) == 3) idx = 0;
|
|
else if(mask & 4) idx = 1;
|
|
else idx = 2;
|
|
#else
|
|
d0 = vec4_dot(t0, t0);
|
|
d1 = vec4_dot(t1, t1);
|
|
d2 = vec4_dot(t2, t2);
|
|
|
|
if ((d0 < d1) && (d0 < d2))
|
|
idx = 0;
|
|
else if (d1 < d2)
|
|
idx = 1;
|
|
else
|
|
idx = 2;
|
|
#endif
|
|
|
|
indices |= (idx << (2 * i));
|
|
}
|
|
|
|
return indices;
|
|
}
|
|
|
|
static unsigned int
|
|
match_colors4 (dxtblock_t *dxtb)
|
|
{
|
|
int i;
|
|
unsigned int idx, indices = 0;
|
|
unsigned int b0, b1, b2, b3, b4;
|
|
unsigned int x0, x1, x2;
|
|
vec4_t t0, t1, t2, t3;
|
|
#ifdef USE_SSE
|
|
vec4_t d;
|
|
#else
|
|
float d[4];
|
|
#endif
|
|
|
|
// match each point to the closest color
|
|
for (i = 0; i < 16; ++i)
|
|
{
|
|
t0 = (dxtb->points[i] - dxtb->palette[0]) * dxtb->metric;
|
|
t1 = (dxtb->points[i] - dxtb->palette[1]) * dxtb->metric;
|
|
t2 = (dxtb->points[i] - dxtb->palette[2]) * dxtb->metric;
|
|
t3 = (dxtb->points[i] - dxtb->palette[3]) * dxtb->metric;
|
|
|
|
#ifdef USE_SSE
|
|
_MM_TRANSPOSE4_PS(t0, t1, t2, t3);
|
|
d = t0 * t0 + t1 * t1 + t2 * t2;
|
|
#else
|
|
d[0] = vec4_dot(t0, t0);
|
|
d[1] = vec4_dot(t1, t1);
|
|
d[2] = vec4_dot(t2, t2);
|
|
d[3] = vec4_dot(t3, t3);
|
|
#endif
|
|
|
|
b0 = d[0] > d[3];
|
|
b1 = d[1] > d[2];
|
|
b2 = d[0] > d[2];
|
|
b3 = d[1] > d[3];
|
|
b4 = d[2] > d[3];
|
|
|
|
x0 = b1 & b2;
|
|
x1 = b0 & b3;
|
|
x2 = b0 & b4;
|
|
|
|
idx = x2 | ((x0 | x1) << 1);
|
|
|
|
indices |= (idx << (2 * i));
|
|
}
|
|
|
|
return indices;
|
|
}
|
|
|
|
static float
|
|
compute_error3 (dxtblock_t *dxtb,
|
|
unsigned int indices)
|
|
{
|
|
int i, idx;
|
|
float error = 0;
|
|
vec4_t t;
|
|
|
|
// compute error
|
|
for (i = 0; i < 16; ++i)
|
|
{
|
|
idx = (indices >> (2 * i)) & 3;
|
|
// skip alpha pixels
|
|
if(idx == 3)
|
|
continue;
|
|
t = (dxtb->points[i] - dxtb->palette[idx]) * dxtb->metric;
|
|
error += vec4_dot(t, t);
|
|
}
|
|
|
|
return error;
|
|
}
|
|
|
|
static float
|
|
compute_error4 (dxtblock_t *dxtb,
|
|
unsigned int indices)
|
|
{
|
|
int i, idx;
|
|
float error = 0;
|
|
|
|
#ifdef USE_SSE
|
|
vec4_t a0, a1, a2, a3;
|
|
vec4_t b0, b1, b2, b3;
|
|
vec4_t d;
|
|
|
|
for (i = 0; i < 4; ++i)
|
|
{
|
|
idx = indices >> (8 * i);
|
|
a0 = dxtb->points[4 * i + 0];
|
|
a1 = dxtb->points[4 * i + 1];
|
|
a2 = dxtb->points[4 * i + 2];
|
|
a3 = dxtb->points[4 * i + 3];
|
|
b0 = dxtb->palette[(idx ) & 3];
|
|
b1 = dxtb->palette[(idx >> 2) & 3];
|
|
b2 = dxtb->palette[(idx >> 4) & 3];
|
|
b3 = dxtb->palette[(idx >> 6) & 3];
|
|
a0 = (a0 - b0) * dxtb->metric;
|
|
a1 = (a1 - b1) * dxtb->metric;
|
|
a2 = (a2 - b2) * dxtb->metric;
|
|
a3 = (a3 - b3) * dxtb->metric;
|
|
_MM_TRANSPOSE4_PS(a0, a1, a2, a3);
|
|
d = a0 * a0 + a1 * a1 + a2 * a2;
|
|
error += vec4_accum(d);
|
|
}
|
|
#else
|
|
vec4_t t;
|
|
|
|
// compute error
|
|
for (i = 0; i < 16; ++i)
|
|
{
|
|
idx = (indices >> (2 * i)) & 3;
|
|
t = (dxtb->points[i] - dxtb->palette[idx]) * dxtb->metric;
|
|
error += vec4_dot(t, t);
|
|
}
|
|
#endif
|
|
|
|
return error;
|
|
}
|
|
|
|
static unsigned int
|
|
compress3 (dxtblock_t *dxtb)
|
|
{
|
|
const int MAX_ITERATIONS = 8;
|
|
int i;
|
|
unsigned int indices, bestindices;
|
|
float error, besterror = FLT_MAX;
|
|
vec4_t oldmax, oldmin;
|
|
|
|
construct_palette3(dxtb);
|
|
|
|
indices = match_colors3(dxtb);
|
|
bestindices = indices;
|
|
|
|
for (i = 0; i < MAX_ITERATIONS; ++i)
|
|
{
|
|
oldmax = dxtb->max;
|
|
oldmin = dxtb->min;
|
|
|
|
optimize_endpoints3(dxtb, indices, &dxtb->max, &dxtb->min);
|
|
construct_palette3(dxtb);
|
|
indices = match_colors3(dxtb);
|
|
error = compute_error3(dxtb, indices);
|
|
|
|
if (error < besterror)
|
|
{
|
|
besterror = error;
|
|
bestindices = indices;
|
|
}
|
|
else
|
|
{
|
|
dxtb->max = oldmax;
|
|
dxtb->min = oldmin;
|
|
break;
|
|
}
|
|
}
|
|
|
|
return bestindices;
|
|
}
|
|
|
|
static unsigned int
|
|
compress4 (dxtblock_t *dxtb)
|
|
{
|
|
const int MAX_ITERATIONS = 8;
|
|
int i;
|
|
unsigned int indices, bestindices;
|
|
float error, besterror = FLT_MAX;
|
|
vec4_t oldmax, oldmin;
|
|
|
|
construct_palette4(dxtb);
|
|
|
|
indices = match_colors4(dxtb);
|
|
bestindices = indices;
|
|
|
|
for (i = 0; i < MAX_ITERATIONS; ++i)
|
|
{
|
|
oldmax = dxtb->max;
|
|
oldmin = dxtb->min;
|
|
|
|
optimize_endpoints4(dxtb, indices, &dxtb->max, &dxtb->min);
|
|
construct_palette4(dxtb);
|
|
indices = match_colors4(dxtb);
|
|
error = compute_error4(dxtb, indices);
|
|
|
|
if (error < besterror)
|
|
{
|
|
besterror = error;
|
|
bestindices = indices;
|
|
}
|
|
else
|
|
{
|
|
dxtb->max = oldmax;
|
|
dxtb->min = oldmin;
|
|
break;
|
|
}
|
|
}
|
|
|
|
return bestindices;
|
|
}
|
|
|
|
static void
|
|
encode_color_block (unsigned char *dst,
|
|
unsigned char *block,
|
|
int flags)
|
|
{
|
|
dxtblock_t dxtb;
|
|
int max16, min16;
|
|
unsigned int indices, mask;
|
|
|
|
dxtblock_init(&dxtb, block, flags);
|
|
|
|
if (dxtb.single) // single color block
|
|
{
|
|
max16 = (omatch5[block[2]][0] << 11) |
|
|
(omatch6[block[1]][0] << 5) |
|
|
(omatch5[block[0]][0] );
|
|
min16 = (omatch5[block[2]][1] << 11) |
|
|
(omatch6[block[1]][1] << 5) |
|
|
(omatch5[block[0]][1] );
|
|
|
|
indices = 0xaaaaaaaa; // 101010...
|
|
|
|
if ((flags & DXT_BC1) && dxtb.alphamask)
|
|
{
|
|
// DXT1 compression, non-opaque block. Add alpha indices.
|
|
indices |= dxtb.alphamask;
|
|
if (max16 > min16)
|
|
SWAP(max16, min16);
|
|
}
|
|
else if (max16 < min16)
|
|
{
|
|
SWAP(max16, min16);
|
|
indices ^= 0x55555555; // 010101...
|
|
}
|
|
}
|
|
else if ((flags & DXT_BC1) && dxtb.alphamask) // DXT1 compression, non-opaque block
|
|
{
|
|
indices = compress3(&dxtb);
|
|
|
|
vec4_endpoints_to_565(&max16, &min16, dxtb.max, dxtb.min);
|
|
|
|
if (max16 > min16)
|
|
{
|
|
SWAP(max16, min16);
|
|
// remap indices 0 -> 1, 1 -> 0
|
|
mask = indices & 0xaaaaaaaa;
|
|
mask = mask | (mask >> 1);
|
|
indices = (indices & mask) | ((indices ^ 0x55555555) & ~mask);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
indices = compress4(&dxtb);
|
|
|
|
vec4_endpoints_to_565(&max16, &min16, dxtb.max, dxtb.min);
|
|
|
|
if (max16 < min16)
|
|
{
|
|
SWAP(max16, min16);
|
|
indices ^= 0x55555555; // 010101...
|
|
}
|
|
}
|
|
|
|
PUTL16(dst + 0, max16);
|
|
PUTL16(dst + 2, min16);
|
|
PUTL32(dst + 4, indices);
|
|
}
|
|
|
|
static void
|
|
get_min_max_YCoCg (const unsigned char *block,
|
|
unsigned char *mincolor,
|
|
unsigned char *maxcolor)
|
|
{
|
|
int i;
|
|
|
|
mincolor[2] = mincolor[1] = 255;
|
|
maxcolor[2] = maxcolor[1] = 0;
|
|
|
|
for (i = 0; i < 16; ++i)
|
|
{
|
|
if (block[4 * i + 2] < mincolor[2]) mincolor[2] = block[4 * i + 2];
|
|
if (block[4 * i + 1] < mincolor[1]) mincolor[1] = block[4 * i + 1];
|
|
if (block[4 * i + 2] > maxcolor[2]) maxcolor[2] = block[4 * i + 2];
|
|
if (block[4 * i + 1] > maxcolor[1]) maxcolor[1] = block[4 * i + 1];
|
|
}
|
|
}
|
|
|
|
static void
|
|
scale_YCoCg (unsigned char *block,
|
|
unsigned char *mincolor,
|
|
unsigned char *maxcolor)
|
|
{
|
|
const int s0 = 128 / 2 - 1;
|
|
const int s1 = 128 / 4 - 1;
|
|
int m0, m1, m2, m3;
|
|
int mask0, mask1, scale;
|
|
int i;
|
|
|
|
m0 = abs(mincolor[2] - 128);
|
|
m1 = abs(mincolor[1] - 128);
|
|
m2 = abs(maxcolor[2] - 128);
|
|
m3 = abs(maxcolor[1] - 128);
|
|
|
|
if (m1 > m0) m0 = m1;
|
|
if (m3 > m2) m2 = m3;
|
|
if (m2 > m0) m0 = m2;
|
|
|
|
mask0 = -(m0 <= s0);
|
|
mask1 = -(m0 <= s1);
|
|
scale = 1 + (1 & mask0) + (2 & mask1);
|
|
|
|
mincolor[2] = (mincolor[2] - 128) * scale + 128;
|
|
mincolor[1] = (mincolor[1] - 128) * scale + 128;
|
|
mincolor[0] = (scale - 1) << 3;
|
|
|
|
maxcolor[2] = (maxcolor[2] - 128) * scale + 128;
|
|
maxcolor[1] = (maxcolor[1] - 128) * scale + 128;
|
|
maxcolor[0] = (scale - 1) << 3;
|
|
|
|
for (i = 0; i < 16; ++i)
|
|
{
|
|
block[i * 4 + 2] = (block[i * 4 + 2] - 128) * scale + 128;
|
|
block[i * 4 + 1] = (block[i * 4 + 1] - 128) * scale + 128;
|
|
}
|
|
}
|
|
|
|
#define INSET_SHIFT 4
|
|
|
|
static void
|
|
inset_bbox_YCoCg (unsigned char *mincolor,
|
|
unsigned char *maxcolor)
|
|
{
|
|
int inset[4], mini[4], maxi[4];
|
|
|
|
inset[2] = (maxcolor[2] - mincolor[2]) - ((1 << (INSET_SHIFT - 1)) - 1);
|
|
inset[1] = (maxcolor[1] - mincolor[1]) - ((1 << (INSET_SHIFT - 1)) - 1);
|
|
|
|
mini[2] = ((mincolor[2] << INSET_SHIFT) + inset[2]) >> INSET_SHIFT;
|
|
mini[1] = ((mincolor[1] << INSET_SHIFT) + inset[1]) >> INSET_SHIFT;
|
|
|
|
maxi[2] = ((maxcolor[2] << INSET_SHIFT) - inset[2]) >> INSET_SHIFT;
|
|
maxi[1] = ((maxcolor[1] << INSET_SHIFT) - inset[1]) >> INSET_SHIFT;
|
|
|
|
mini[2] = (mini[2] >= 0) ? mini[2] : 0;
|
|
mini[1] = (mini[1] >= 0) ? mini[1] : 0;
|
|
|
|
maxi[2] = (maxi[2] <= 255) ? maxi[2] : 255;
|
|
maxi[1] = (maxi[1] <= 255) ? maxi[1] : 255;
|
|
|
|
mincolor[2] = (mini[2] & 0xf8) | (mini[2] >> 5);
|
|
mincolor[1] = (mini[1] & 0xfc) | (mini[1] >> 6);
|
|
|
|
maxcolor[2] = (maxi[2] & 0xf8) | (maxi[2] >> 5);
|
|
maxcolor[1] = (maxi[1] & 0xfc) | (maxi[1] >> 6);
|
|
}
|
|
|
|
static void
|
|
select_diagonal_YCoCg (const unsigned char *block,
|
|
unsigned char *mincolor,
|
|
unsigned char *maxcolor)
|
|
{
|
|
unsigned char mid0, mid1, side, mask, b0, b1, c0, c1;
|
|
int i;
|
|
|
|
mid0 = ((int)mincolor[2] + maxcolor[2] + 1) >> 1;
|
|
mid1 = ((int)mincolor[1] + maxcolor[1] + 1) >> 1;
|
|
|
|
side = 0;
|
|
for (i = 0; i < 16; ++i)
|
|
{
|
|
b0 = block[i * 4 + 2] >= mid0;
|
|
b1 = block[i * 4 + 1] >= mid1;
|
|
side += (b0 ^ b1);
|
|
}
|
|
|
|
mask = -(side > 8);
|
|
mask &= -(mincolor[2] != maxcolor[2]);
|
|
|
|
c0 = mincolor[1];
|
|
c1 = maxcolor[1];
|
|
|
|
c0 ^= c1;
|
|
c1 ^= c0 & mask;
|
|
c0 ^= c1;
|
|
|
|
mincolor[1] = c0;
|
|
maxcolor[1] = c1;
|
|
}
|
|
|
|
static void
|
|
encode_YCoCg_block (unsigned char *dst,
|
|
unsigned char *block)
|
|
{
|
|
unsigned char colors[4][3], *maxcolor, *mincolor;
|
|
unsigned int mask;
|
|
int c0, c1, d0, d1, d2, d3;
|
|
int b0, b1, b2, b3, b4;
|
|
int x0, x1, x2;
|
|
int i, idx;
|
|
|
|
maxcolor = &colors[0][0];
|
|
mincolor = &colors[1][0];
|
|
|
|
get_min_max_YCoCg(block, mincolor, maxcolor);
|
|
scale_YCoCg(block, mincolor, maxcolor);
|
|
inset_bbox_YCoCg(mincolor, maxcolor);
|
|
select_diagonal_YCoCg(block, mincolor, maxcolor);
|
|
|
|
lerp_rgb13(&colors[2][0], maxcolor, mincolor);
|
|
lerp_rgb13(&colors[3][0], mincolor, maxcolor);
|
|
|
|
mask = 0;
|
|
|
|
for (i = 0; i < 16; ++i)
|
|
{
|
|
c0 = block[4 * i + 2];
|
|
c1 = block[4 * i + 1];
|
|
|
|
d0 = abs(colors[0][2] - c0) + abs(colors[0][1] - c1);
|
|
d1 = abs(colors[1][2] - c0) + abs(colors[1][1] - c1);
|
|
d2 = abs(colors[2][2] - c0) + abs(colors[2][1] - c1);
|
|
d3 = abs(colors[3][2] - c0) + abs(colors[3][1] - c1);
|
|
|
|
b0 = d0 > d3;
|
|
b1 = d1 > d2;
|
|
b2 = d0 > d2;
|
|
b3 = d1 > d3;
|
|
b4 = d2 > d3;
|
|
|
|
x0 = b1 & b2;
|
|
x1 = b0 & b3;
|
|
x2 = b0 & b4;
|
|
|
|
idx = (x2 | ((x0 | x1) << 1));
|
|
|
|
mask |= idx << (2 * i);
|
|
}
|
|
|
|
PUTL16(dst + 0, pack_rgb565(maxcolor));
|
|
PUTL16(dst + 2, pack_rgb565(mincolor));
|
|
PUTL32(dst + 4, mask);
|
|
}
|
|
|
|
/* write DXT3 alpha block */
|
|
static void
|
|
encode_alpha_block_BC2 (unsigned char *dst,
|
|
const unsigned char *block)
|
|
{
|
|
int i, a1, a2;
|
|
|
|
block += 3;
|
|
|
|
for (i = 0; i < 8; ++i)
|
|
{
|
|
a1 = mul8bit(block[8 * i + 0], 0x0f);
|
|
a2 = mul8bit(block[8 * i + 4], 0x0f);
|
|
*dst++ = (a2 << 4) | a1;
|
|
}
|
|
}
|
|
|
|
/* Write DXT5 alpha block */
|
|
static void
|
|
encode_alpha_block_BC3 (unsigned char *dst,
|
|
const unsigned char *block,
|
|
const int offset)
|
|
{
|
|
int i, v, mn, mx;
|
|
int dist, bias, dist2, dist4, bits, mask;
|
|
int a, idx, t;
|
|
|
|
block += offset;
|
|
block += 3;
|
|
|
|
/* find min/max alpha pair */
|
|
mn = mx = block[0];
|
|
for (i = 0; i < 16; ++i)
|
|
{
|
|
v = block[4 * i];
|
|
if(v > mx) mx = v;
|
|
if(v < mn) mn = v;
|
|
}
|
|
|
|
/* encode them */
|
|
*dst++ = mx;
|
|
*dst++ = mn;
|
|
|
|
/*
|
|
* determine bias and emit indices
|
|
* given the choice of mx/mn, these indices are optimal:
|
|
* http://fgiesen.wordpress.com/2009/12/15/dxt5-alpha-block-index-determination/
|
|
*/
|
|
dist = mx - mn;
|
|
dist4 = dist * 4;
|
|
dist2 = dist * 2;
|
|
bias = (dist < 8) ? (dist - 1) : (dist / 2 + 2);
|
|
bias -= mn * 7;
|
|
bits = 0;
|
|
mask = 0;
|
|
|
|
for (i = 0; i < 16; ++i)
|
|
{
|
|
a = block[4 * i] * 7 + bias;
|
|
|
|
/* select index. this is a "linear scale" lerp factor between 0
|
|
(val=min) and 7 (val=max). */
|
|
t = (a >= dist4) ? -1 : 0; idx = t & 4; a -= dist4 & t;
|
|
t = (a >= dist2) ? -1 : 0; idx += t & 2; a -= dist2 & t;
|
|
idx += (a >= dist);
|
|
|
|
/* turn linear scale into DXT index (0/1 are extremal pts) */
|
|
idx = -idx & 7;
|
|
idx ^= (2 > idx);
|
|
|
|
/* write index */
|
|
mask |= idx << bits;
|
|
if ((bits += 3) >= 8)
|
|
{
|
|
*dst++ = mask;
|
|
mask >>= 8;
|
|
bits -= 8;
|
|
}
|
|
}
|
|
}
|
|
|
|
#define BLOCK_COUNT(w, h) ((((h) + 3) >> 2) * (((w) + 3) >> 2))
|
|
#define BLOCK_OFFSET(x, y, w, bs) (((y) >> 2) * ((bs) * (((w) + 3) >> 2)) + ((bs) * ((x) >> 2)))
|
|
|
|
static void
|
|
compress_BC1 (unsigned char *dst,
|
|
const unsigned char *src,
|
|
int w,
|
|
int h,
|
|
int flags)
|
|
{
|
|
const unsigned int block_count = BLOCK_COUNT(w, h);
|
|
unsigned int i;
|
|
unsigned char block[64], *p;
|
|
int x, y;
|
|
|
|
#ifdef _OPENMP
|
|
#pragma omp parallel for schedule(dynamic, 256) private(block, p, x, y)
|
|
#endif
|
|
for (i = 0; i < block_count; ++i)
|
|
{
|
|
x = (i % ((w + 3) >> 2)) << 2;
|
|
y = (i / ((w + 3) >> 2)) << 2;
|
|
p = dst + BLOCK_OFFSET(x, y, w, 8);
|
|
extract_block(src, x, y, w, h, block);
|
|
encode_color_block(p, block, DXT_BC1 | flags);
|
|
}
|
|
}
|
|
|
|
static void
|
|
compress_BC2 (unsigned char *dst,
|
|
const unsigned char *src,
|
|
int w,
|
|
int h,
|
|
int flags)
|
|
{
|
|
const unsigned int block_count = BLOCK_COUNT(w, h);
|
|
unsigned int i;
|
|
unsigned char block[64], *p;
|
|
int x, y;
|
|
|
|
#ifdef _OPENMP
|
|
#pragma omp parallel for schedule(dynamic, 256) private(block, p, x, y)
|
|
#endif
|
|
for (i = 0; i < block_count; ++i)
|
|
{
|
|
x = (i % ((w + 3) >> 2)) << 2;
|
|
y = (i / ((w + 3) >> 2)) << 2;
|
|
p = dst + BLOCK_OFFSET(x, y, w, 16);
|
|
extract_block(src, x, y, w, h, block);
|
|
encode_alpha_block_BC2(p, block);
|
|
encode_color_block(p + 8, block, DXT_BC2 | flags);
|
|
}
|
|
}
|
|
|
|
static void
|
|
compress_BC3 (unsigned char *dst,
|
|
const unsigned char *src,
|
|
int w,
|
|
int h,
|
|
int flags)
|
|
{
|
|
const unsigned int block_count = BLOCK_COUNT(w, h);
|
|
unsigned int i;
|
|
unsigned char block[64], *p;
|
|
int x, y;
|
|
|
|
#ifdef _OPENMP
|
|
#pragma omp parallel for schedule(dynamic, 256) private(block, p, x, y)
|
|
#endif
|
|
for (i = 0; i < block_count; ++i)
|
|
{
|
|
x = (i % ((w + 3) >> 2)) << 2;
|
|
y = (i / ((w + 3) >> 2)) << 2;
|
|
p = dst + BLOCK_OFFSET(x, y, w, 16);
|
|
extract_block(src, x, y, w, h, block);
|
|
encode_alpha_block_BC3(p, block, 0);
|
|
encode_color_block(p + 8, block, DXT_BC3 | flags);
|
|
}
|
|
}
|
|
|
|
static void
|
|
compress_BC4 (unsigned char *dst,
|
|
const unsigned char *src,
|
|
int w,
|
|
int h)
|
|
{
|
|
const unsigned int block_count = BLOCK_COUNT(w, h);
|
|
unsigned int i;
|
|
unsigned char block[64], *p;
|
|
int x, y;
|
|
|
|
#ifdef _OPENMP
|
|
#pragma omp parallel for schedule(dynamic, 256) private(block, p, x, y)
|
|
#endif
|
|
for (i = 0; i < block_count; ++i)
|
|
{
|
|
x = (i % ((w + 3) >> 2)) << 2;
|
|
y = (i / ((w + 3) >> 2)) << 2;
|
|
p = dst + BLOCK_OFFSET(x, y, w, 8);
|
|
extract_block(src, x, y, w, h, block);
|
|
encode_alpha_block_BC3(p, block, -1);
|
|
}
|
|
}
|
|
|
|
static void
|
|
compress_BC5 (unsigned char *dst,
|
|
const unsigned char *src,
|
|
int w,
|
|
int h)
|
|
{
|
|
const unsigned int block_count = BLOCK_COUNT(w, h);
|
|
unsigned int i;
|
|
unsigned char block[64], *p;
|
|
int x, y;
|
|
|
|
#ifdef _OPENMP
|
|
#pragma omp parallel for schedule(dynamic, 256) private(block, p, x, y)
|
|
#endif
|
|
for (i = 0; i < block_count; ++i)
|
|
{
|
|
x = (i % ((w + 3) >> 2)) << 2;
|
|
y = (i / ((w + 3) >> 2)) << 2;
|
|
p = dst + BLOCK_OFFSET(x, y, w, 16);
|
|
extract_block(src, x, y, w, h, block);
|
|
/* Pixels are ordered as BGRA (see write_layer)
|
|
* First we encode red -1+3: channel 2;
|
|
* then we encode green -2+3: channel 1.
|
|
*/
|
|
encode_alpha_block_BC3(p, block, -1);
|
|
encode_alpha_block_BC3(p + 8, block, -2);
|
|
}
|
|
}
|
|
|
|
static void
|
|
compress_YCoCg (unsigned char *dst,
|
|
const unsigned char *src,
|
|
int w,
|
|
int h)
|
|
{
|
|
const unsigned int block_count = BLOCK_COUNT(w, h);
|
|
unsigned int i;
|
|
unsigned char block[64], *p;
|
|
int x, y;
|
|
|
|
#ifdef _OPENMP
|
|
#pragma omp parallel for schedule(dynamic, 256) private(block, p, x, y)
|
|
#endif
|
|
for (i = 0; i < block_count; ++i)
|
|
{
|
|
x = (i % ((w + 3) >> 2)) << 2;
|
|
y = (i / ((w + 3) >> 2)) << 2;
|
|
p = dst + BLOCK_OFFSET(x, y, w, 16);
|
|
extract_block(src, x, y, w, h, block);
|
|
encode_alpha_block_BC3(p, block, 0);
|
|
encode_YCoCg_block(p + 8, block);
|
|
}
|
|
}
|
|
|
|
int
|
|
dxt_compress (unsigned char *dst,
|
|
unsigned char *src,
|
|
int format,
|
|
unsigned int width,
|
|
unsigned int height,
|
|
int bpp,
|
|
int mipmaps,
|
|
int flags)
|
|
{
|
|
int i, size, w, h;
|
|
unsigned int offset;
|
|
unsigned char *tmp = NULL;
|
|
int j;
|
|
unsigned char *s;
|
|
|
|
if (bpp == 1)
|
|
{
|
|
/* grayscale promoted to BGRA */
|
|
|
|
size = get_mipmapped_size(width, height, 4, 0, mipmaps,
|
|
DDS_COMPRESS_NONE);
|
|
tmp = g_malloc(size);
|
|
|
|
for (i = j = 0; j < size; ++i, j += 4)
|
|
{
|
|
tmp[j + 0] = src[i];
|
|
tmp[j + 1] = src[i];
|
|
tmp[j + 2] = src[i];
|
|
tmp[j + 3] = 255;
|
|
}
|
|
|
|
bpp = 4;
|
|
}
|
|
else if (bpp == 2)
|
|
{
|
|
/* gray-alpha promoted to BGRA */
|
|
|
|
size = get_mipmapped_size(width, height, 4, 0, mipmaps,
|
|
DDS_COMPRESS_NONE);
|
|
tmp = g_malloc(size);
|
|
|
|
for (i = j = 0; j < size; i += 2, j += 4)
|
|
{
|
|
tmp[j + 0] = src[i];
|
|
tmp[j + 1] = src[i];
|
|
tmp[j + 2] = src[i];
|
|
tmp[j + 3] = src[i + 1];
|
|
}
|
|
|
|
bpp = 4;
|
|
}
|
|
else if (bpp == 3)
|
|
{
|
|
size = get_mipmapped_size(width, height, 4, 0, mipmaps,
|
|
DDS_COMPRESS_NONE);
|
|
tmp = g_malloc(size);
|
|
|
|
for (i = j = 0; j < size; i += 3, j += 4)
|
|
{
|
|
tmp[j + 0] = src[i + 0];
|
|
tmp[j + 1] = src[i + 1];
|
|
tmp[j + 2] = src[i + 2];
|
|
tmp[j + 3] = 255;
|
|
}
|
|
|
|
bpp = 4;
|
|
}
|
|
|
|
offset = 0;
|
|
w = width;
|
|
h = height;
|
|
s = tmp ? tmp : src;
|
|
|
|
for (i = 0; i < mipmaps; ++i)
|
|
{
|
|
switch (format)
|
|
{
|
|
case DDS_COMPRESS_BC1:
|
|
compress_BC1(dst + offset, s, w, h, flags);
|
|
break;
|
|
case DDS_COMPRESS_BC2:
|
|
compress_BC2(dst + offset, s, w, h, flags);
|
|
break;
|
|
case DDS_COMPRESS_BC3:
|
|
case DDS_COMPRESS_BC3N:
|
|
case DDS_COMPRESS_RXGB:
|
|
case DDS_COMPRESS_AEXP:
|
|
case DDS_COMPRESS_YCOCG:
|
|
compress_BC3(dst + offset, s, w, h, flags);
|
|
break;
|
|
case DDS_COMPRESS_BC4:
|
|
compress_BC4(dst + offset, s, w, h);
|
|
break;
|
|
case DDS_COMPRESS_BC5:
|
|
compress_BC5(dst + offset, s, w, h);
|
|
break;
|
|
case DDS_COMPRESS_YCOCGS:
|
|
compress_YCoCg(dst + offset, s, w, h);
|
|
break;
|
|
default:
|
|
compress_BC3(dst + offset, s, w, h, flags);
|
|
break;
|
|
}
|
|
s += (w * h * bpp);
|
|
offset += get_mipmapped_size(w, h, 0, 0, 1, format);
|
|
w = MAX(1, w >> 1);
|
|
h = MAX(1, h >> 1);
|
|
}
|
|
|
|
if (tmp)
|
|
g_free(tmp);
|
|
|
|
return 1;
|
|
}
|
|
|
|
static void
|
|
decode_color_block (unsigned char *block,
|
|
unsigned char *src,
|
|
int format)
|
|
{
|
|
int i, x, y;
|
|
unsigned char *d = block;
|
|
unsigned int indices, idx;
|
|
unsigned char colors[4][3];
|
|
unsigned short c0, c1;
|
|
|
|
c0 = GETL16(&src[0]);
|
|
c1 = GETL16(&src[2]);
|
|
|
|
unpack_rgb565(colors[0], c0);
|
|
unpack_rgb565(colors[1], c1);
|
|
|
|
if ((c0 > c1) || (format == DDS_COMPRESS_BC3))
|
|
{
|
|
lerp_rgb13(colors[2], colors[0], colors[1]);
|
|
lerp_rgb13(colors[3], colors[1], colors[0]);
|
|
}
|
|
else
|
|
{
|
|
for (i = 0; i < 3; ++i)
|
|
{
|
|
colors[2][i] = (colors[0][i] + colors[1][i] + 1) >> 1;
|
|
colors[3][i] = 255;
|
|
}
|
|
}
|
|
|
|
src += 4;
|
|
for (y = 0; y < 4; ++y)
|
|
{
|
|
indices = src[y];
|
|
for (x = 0; x < 4; ++x)
|
|
{
|
|
idx = indices & 0x03;
|
|
d[0] = colors[idx][2];
|
|
d[1] = colors[idx][1];
|
|
d[2] = colors[idx][0];
|
|
if (format == DDS_COMPRESS_BC1)
|
|
d[3] = ((c0 <= c1) && idx == 3) ? 0 : 255;
|
|
indices >>= 2;
|
|
d += 4;
|
|
}
|
|
}
|
|
}
|
|
|
|
static void
|
|
decode_alpha_block_BC2 (unsigned char *block,
|
|
unsigned char *src)
|
|
{
|
|
int x, y;
|
|
unsigned char *d = block;
|
|
unsigned int bits;
|
|
|
|
for (y = 0; y < 4; ++y)
|
|
{
|
|
bits = GETL16(&src[2 * y]);
|
|
for (x = 0; x < 4; ++x)
|
|
{
|
|
d[0] = (bits & 0x0f) * 17;
|
|
bits >>= 4;
|
|
d += 4;
|
|
}
|
|
}
|
|
}
|
|
|
|
static void
|
|
decode_alpha_block_BC3 (unsigned char *block,
|
|
unsigned char *src,
|
|
int w)
|
|
{
|
|
int x, y, code;
|
|
unsigned char *d = block;
|
|
unsigned char a0 = src[0];
|
|
unsigned char a1 = src[1];
|
|
unsigned long long bits = GETL64(src) >> 16;
|
|
|
|
for (y = 0; y < 4; ++y)
|
|
{
|
|
for (x = 0; x < 4; ++x)
|
|
{
|
|
code = ((unsigned int)bits) & 0x07;
|
|
if (code == 0)
|
|
d[0] = a0;
|
|
else if (code == 1)
|
|
d[0] = a1;
|
|
else if (a0 > a1)
|
|
d[0] = ((8 - code) * a0 + (code - 1) * a1) / 7;
|
|
else if (code >= 6)
|
|
d[0] = (code == 6) ? 0 : 255;
|
|
else
|
|
d[0] = ((6 - code) * a0 + (code - 1) * a1) / 5;
|
|
bits >>= 3;
|
|
d += 4;
|
|
}
|
|
|
|
if (w < 4)
|
|
bits >>= (3 * (4 - w));
|
|
}
|
|
}
|
|
|
|
static void
|
|
make_normal (unsigned char *dst,
|
|
unsigned char x,
|
|
unsigned char y)
|
|
{
|
|
float nx = 2.0f * ((float)x / 255.0f) - 1.0f;
|
|
float ny = 2.0f * ((float)y / 255.0f) - 1.0f;
|
|
float nz = 0.0f;
|
|
float d = 1.0f - nx * nx + ny * ny;
|
|
int z;
|
|
|
|
if (d > 0)
|
|
nz = sqrtf(d);
|
|
|
|
z = (int)(255.0f * (nz + 1) / 2.0f);
|
|
z = MAX(0, MIN(255, z));
|
|
|
|
dst[0] = x;
|
|
dst[1] = y;
|
|
dst[2] = z;
|
|
}
|
|
|
|
static void
|
|
normalize_block (unsigned char *block,
|
|
int format)
|
|
{
|
|
int x, y, tmp;
|
|
|
|
for (y = 0; y < 4; ++y)
|
|
{
|
|
for (x = 0; x < 4; ++x)
|
|
{
|
|
if (format == DDS_COMPRESS_BC3)
|
|
{
|
|
tmp = block[y * 16 + (x * 4)];
|
|
make_normal(&block[y * 16 + (x * 4)],
|
|
block[y * 16 + (x * 4) + 3],
|
|
block[y * 16 + (x * 4) + 1]);
|
|
block[y * 16 + (x * 4) + 3] = tmp;
|
|
}
|
|
else if (format == DDS_COMPRESS_BC5)
|
|
{
|
|
make_normal(&block[y * 16 + (x * 4)],
|
|
block[y * 16 + (x * 4)],
|
|
block[y * 16 + (x * 4) + 1]);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
static void
|
|
put_block (unsigned char *dst,
|
|
unsigned char *block,
|
|
unsigned int bx,
|
|
unsigned int by,
|
|
unsigned int width,
|
|
unsigned height,
|
|
int bpp)
|
|
{
|
|
int x, y, i;
|
|
unsigned char *d;
|
|
|
|
for (y = 0; y < 4 && ((by + y) < height); ++y)
|
|
{
|
|
d = dst + ((y + by) * width + bx) * bpp;
|
|
for (x = 0; x < 4 && ((bx + x) < width); ++x)
|
|
{
|
|
for (i = 0; i < bpp; ++ i)
|
|
*d++ = block[y * 16 + (x * 4) + i];
|
|
}
|
|
}
|
|
}
|
|
|
|
int
|
|
dxt_decompress (unsigned char *dst,
|
|
unsigned char *src,
|
|
int format,
|
|
unsigned int size,
|
|
unsigned int width,
|
|
unsigned int height,
|
|
int bpp,
|
|
int normals)
|
|
{
|
|
unsigned char *s;
|
|
unsigned int x, y;
|
|
unsigned char block[16 * 4];
|
|
|
|
s = src;
|
|
|
|
for (y = 0; y < height; y += 4)
|
|
{
|
|
for (x = 0; x < width; x += 4)
|
|
{
|
|
memset(block, 0, 16 * 4);
|
|
|
|
if (format == DDS_COMPRESS_BC1)
|
|
{
|
|
decode_color_block(block, s, format);
|
|
s += 8;
|
|
}
|
|
else if (format == DDS_COMPRESS_BC2)
|
|
{
|
|
decode_alpha_block_BC2(block + 3, s);
|
|
decode_color_block(block, s + 8, format);
|
|
s += 16;
|
|
}
|
|
else if (format == DDS_COMPRESS_BC3)
|
|
{
|
|
decode_alpha_block_BC3(block + 3, s, width);
|
|
decode_color_block(block, s + 8, format);
|
|
s += 16;
|
|
}
|
|
else if (format == DDS_COMPRESS_BC4)
|
|
{
|
|
decode_alpha_block_BC3(block, s, width);
|
|
s += 8;
|
|
}
|
|
else if (format == DDS_COMPRESS_BC5)
|
|
{
|
|
decode_alpha_block_BC3(block, s, width);
|
|
decode_alpha_block_BC3(block + 1, s + 8, width);
|
|
s += 16;
|
|
}
|
|
|
|
if (normals)
|
|
normalize_block(block, format);
|
|
|
|
put_block(dst, block, x, y, width, height, bpp);
|
|
}
|
|
}
|
|
|
|
return 1;
|
|
}
|