/* * baseline 2D FFT code provided for assignment 3 * * To compile: gcc -lm -O3 proj3_provided.c * To run: ./a.out method N M * method == 0: dft2d_naive() * method == 1: dft2d_precompute() * method == 2: dft2d_precompute_opt() * * Example: to run 128x128 2D FFT using dft2d_precompute(): ./a.out 1 128 128 * * Some timing results on linprog: * dft2d_naive 128x128: 17.3 seconds without O3 (gcc -lm proj3_provided.c) * dft2d_naive 128x128: 9.43 seconds with O3 (gcc -lm -O3 proj3_provided.c) * dft2d_precompute 128x128: 0.95 seconds with O3 * dft2d_precompute 256x256: 15.07 seconds with O3 */ #include #include #include #include #define PI 3.14159265358979323846 typedef struct { double real; double imag; } Complex; double my_get_time() { struct timespec ts; clock_gettime(CLOCK_MONOTONIC, &ts); return ts.tv_sec + ts.tv_nsec * 1e-9; } /* Allocate 2D array */ Complex** alloc_2d(int rows, int cols) { Complex **arr = (Complex**)malloc(rows * sizeof(Complex*)); for (int i = 0; i < rows; i++) arr[i] = (Complex*)malloc(cols * sizeof(Complex)); return arr; } void free_2d(Complex **arr, int rows) { for (int i = 0; i < rows; i++) free(arr[i]); free(arr); } /* F(u,v) = \sum \sum f(x,y) * exp(-j*2*pi*(u*x/M + v*y/N)) */ void dft2d_naive(Complex **input, Complex **output, int M, int N) { for (int u = 0; u < M; u++) { for (int v = 0; v < N; v++) { output[u][v].real = 0.0; output[u][v].imag = 0.0; } } for (int u = 0; u < M; u++) { for (int v = 0; v < N; v++) { for (int x = 0; x < M; x++) { for (int y = 0; y < N; y++) { double angle = -2.0 * PI * ((double)u * x / M + (double)v * y / N); double cos_val = cos(angle); double sin_val = sin(angle); output[u][v].real += input[x][y].real * cos_val - input[x][y].imag * sin_val; output[u][v].imag += input[x][y].real * sin_val + input[x][y].imag * cos_val; } } } } } /* * Same as dft2d_naive, but pre-compute sin and cos table * to remove the sin and cos calculation in the innermost loop */ void dft2d_precompute(Complex **input, Complex **output, int M, int N) { /* Precompute cos/sin tables */ double **cos_table_u = (double**)malloc(M * sizeof(double*)); double **sin_table_u = (double**)malloc(M * sizeof(double*)); for (int u = 0; u < M; u++) { cos_table_u[u] = (double*)malloc(M * sizeof(double)); sin_table_u[u] = (double*)malloc(M * sizeof(double)); for (int x = 0; x < M; x++) { double angle = -2.0 * PI * u * x / M; cos_table_u[u][x] = cos(angle); sin_table_u[u][x] = sin(angle); } } double **cos_table_v = (double**)malloc(N * sizeof(double*)); double **sin_table_v = (double**)malloc(N * sizeof(double*)); for (int v = 0; v < N; v++) { cos_table_v[v] = (double*)malloc(N * sizeof(double)); sin_table_v[v] = (double*)malloc(N * sizeof(double)); for (int y = 0; y < N; y++) { double angle = -2.0 * PI * v * y / N; cos_table_v[v][y] = cos(angle); sin_table_v[v][y] = sin(angle); } } /* Original loop structure unchanged */ for (int u = 0; u < M; u++) { for (int v = 0; v < N; v++) { output[u][v].real = 0.0; output[u][v].imag = 0.0; } } /* this is the loop for you to optimize for performance */ for (int u = 0; u < M; u++) { for (int v = 0; v < N; v++) { for (int x = 0; x < M; x++) { for (int y = 0; y < N; y++) { /* Combine precomputed twiddles */ double cosux = cos_table_u[u][x]; double sinux = sin_table_u[u][x]; double cosvy = cos_table_v[v][y]; double sinvy = sin_table_v[v][y]; /* exp(a + b) = exp(a) * exp(b) */ double tw_real = cosux * cosvy - sinux * sinvy; double tw_imag = cosux * sinvy + sinux * cosvy; output[u][v].real += input[x][y].real * tw_real - input[x][y].imag * tw_imag; output[u][v].imag += input[x][y].real * tw_imag + input[x][y].imag * tw_real; } } } } /* Free twiddle tables */ for (int u = 0; u < M; u++) { free(cos_table_u[u]); free(sin_table_u[u]); } free(cos_table_u); free(sin_table_u); for (int v = 0; v < N; v++) { free(cos_table_v[v]); free(sin_table_v[v]); } free(cos_table_v); free(sin_table_v); } /* this is the optimized routine to be completed by you */ void dft2d_precompute_opt(Complex **input, Complex **output, int M, int N) { /* replace here */ printf("dft2d_precompute_opt has not been implemented yet. Exit!\n"); exit(0); } /* -------- Test -------- */ int main(int argc, char *argv[]) { int method = 0; int M = 4; int N = 4; if (argc > 1) { method = atoi(argv[1]); if (method < 0) method = 0; } if (argc > 2) { M = atoi(argv[2]); if (M <= 0) M = 4; } if (argc > 3) { N = atoi(argv[3]); if (N <= 0) N = 4; } else N = M; Complex **input = alloc_2d(M, N); Complex **output = alloc_2d(M, N); /* Initialize input */ for (int i = 0; i < M; i++) for (int j = 0; j < N; j++) { input[i][j].real = j + i * M; input[i][j].imag = 0.0; } #ifdef PRINTINPUTOUTPUT printf("Input:\n"); for (int i = 0; i < M; i++) { for (int j = 0; j < N; j++) printf("%6.2f ", input[i][j].real); printf("\n"); } #endif double start = my_get_time(); if (method == 0) dft2d_naive(input, output, M, N); else if (method == 1) dft2d_precompute(input, output, M, N); else dft2d_precompute_opt(input, output, M, N); printf("%dx%d 2D FFT time: %.6f seconds\n", N, M, my_get_time() - start); #ifdef PRINTINPUTOUTPUT printf("\nTest case, 2D (%dx%d) DFT Output (real, imag):\n", M, N); for (int u = 0; u < M; u++) { for (int v = 0; v < N; v++) { printf("(%8.2f,%8.2f) ", output[u][v].real, output[u][v].imag); } printf("\n"); } #endif free_2d(input, M); free_2d(output, M); return 0; }