// ************************************************************************** // // // // eses eses // // eses eses // // eses eseses esesese eses Embedded Systems Group // // ese ese ese ese ese // // ese eseseses eseseses ese Department of Computer Science // // eses eses ese eses // // eses eseses eseseses eses University of Kaiserslautern // // eses eses // // // // ************************************************************************** // // The discrete cosine transform (DCT) is used in many applications like the // // JPEG image encoding. In essence, the situation is similar to the discrete // // Fourier transform: The definition is also a matrix multiplication with the // // NxN matrix C having indices c[i][j] as defined below. Simple matrix // // multiplication requires N^2 multiplications and N(N-1) additions, hence, // // O(N^2) sequential time, which can be easily reduced by N^2 processors to // // O(log(N)) time. In analogy to the fast Fourier transform FFT, there are // // also fast DCT algorithms [Rezn10] that reduce the complexity to O(Nlog(N)) // // sequential time and with N processors to O(log(N)) parallel time. // // The version below is a simple reduction of the DCT to the DFT, so that // // any FFT algorithm can be used to compute it in O(log(N)) parallel time. // // This reduction is well-known, and can be found, e.g. in [Khay03]. // // ************************************************************************** // macro K = 4; macro N = exp(2,K); // macros for DCT matrix macro pi = 3.1415926535897932384626433832795; macro sqrt(x) = exp(x,0.5); macro alpha(i) = (i==0 ? sqrt(1.0/N) : sqrt(2.0/N)); // arithmetic on complex numbers macro cadd(x,y) = (x.0+y.0,x.1+y.1); macro csub(x,y) = (x.0-y.0,x.1-y.1); macro cmul(x,y) = (x.0*y.0-x.1*y.1,x.1*y.0+x.0*y.1); macro Re(x) = x.0; macro Im(x) = x.1; // numeric precision macro eps = 1.0e-4; macro almost_equal(x,y) = (x<y ? y-x : x-y) < eps ; macro rnd(x) = (almost_equal(x,0.0) ? 0.0 : (almost_equal(x,1.0) ? 1.0 : x)); module DiscreteCosineTransform([N]real ?x,!y,[N](real * real) z,u) { // permute the inputs so that the DCT y of x is expressed by z as follows: // y[j] = sum(k=0..N-1) (alpha(j) * cos((pi*j*(2*k+1))/(2*N)) * x[k]); // = sum(k=0..N-1) (alpha(j) * cos((pi*j*(4*k+1))/(2*N)) * Re(z[k])) for(j=0..N/2-1) { z[j] = (x[2*j],0.0); z[N-1-j] = (x[2*j+1],0.0); } // perform FFT on permuted vector z with depth O(log(N)) using O(N) procs. // for(j=0..N-1) // let(re = sum(k=0..N-1) Re(cmul((rnd(cos((2*pi*j*k)/N)),-rnd(sin((2*pi*j*k)/N))),z[k]))) // let(im = sum(k=0..N-1) Im(cmul((rnd(cos((2*pi*j*k)/N)),-rnd(sin((2*pi*j*k)/N))),z[k]))) // u[j] = (re,im); FastFourierTransform(z,u); // scale FFT u accordingly to obtain the DCT y // note that u[j] = sum(k=0..N-1) (exp(e,-(2*pi*i*j*k)/N) * z[k]) // thus, alpha(j) * exp(e,-(pi*i*j)/(2*N)) * u[j] // = sum(k=0..N-1) (alpha(j) * exp(e,-(pi*i*j*(4*k+1))/(2*N)) * z[k]) for(j=0..N-1) let(phi_j = (pi*j)/(2.0*N)) let(scale_j = (cos(phi_j),-sin(phi_j))) y[j] = alpha(j) * Re(cmul(scale_j,u[j])); } drivenby { // test vector for(j=0..N-1) x[j] = j; // perform inefficient matrix multiplication and compare // note c[i][j] = alpha(i) * cos((pi*i*(2*j+1))/(2*N)) // are by definition the coefficients of the DCT matrix for(j=0..N-1) { let(dct_j = sum(k=0..N-1) (alpha(j) * cos((pi*j*(2*k+1))/(2.0*N)) * x[k])) assert(almost_equal(dct_j,y[j])); } }