// Mandelbrot with CUDA // // Author: Axel Huebl (Serial Code by Matze) // Date: 10th Jan 2012 // #include #include #include #include #include "cuda.h" // simulation parameters const int max_iterations = 255; const int num_cols = 1000; const int num_rows = 2000; // cuda parameters size_t blocksize = 256; // threads per block const int maxRam = 250; // ION has approx. 256 MB global RAM // Complex Numbers struct cuComplex { float r; float i; __device__ cuComplex( float a, float b ) : r(a), i(b) {} __device__ float magnitude2( void ) { return r * r + i * i; } __device__ cuComplex operator*(const cuComplex& a) { return cuComplex(r*a.r - i*a.i, i*a.r + r*a.i); } __device__ cuComplex operator*(const float& a) { return cuComplex(r*a, i*a); } __device__ cuComplex operator+(const cuComplex& a) { return cuComplex(r+a.r, i+a.i); } __device__ cuComplex operator+(const float& a) { return cuComplex(r+a, i); } }; __device__ int iterate(cuComplex c ) { cuComplex z(0., 0.); int iterations = 0; bool val = true; for( int i=0; i 2.0f); val = (val && !tmp); if( val ) ++iterations; } return iterations; } __global__ void calcMandelbrot( int* color_d, const int num_rows, const int num_cols ) { const int globalX = ( blockIdx.x * blockDim.x ) + threadIdx.x; const int globalY = blockIdx.y; const int offset = globalY * num_cols + globalX; // parameters const float c_rmin = -2.0; const float c_rmax = +1.0; const float c_imin = -1.0; const float c_imax = +1.0; const float dx = (c_rmax - c_rmin) / float(num_cols); const float dy = (c_imax - c_imin) / float(num_rows); cuComplex imaginary( 0., 1.); if( globalY < num_rows && globalX < num_cols ) { cuComplex c = ( imaginary*( c_imin+(float(globalY)*dy) ) ) + (c_rmin+(float(globalX)*dx)); color_d[offset] = iterate(c); } } int start() { FILE *output = fopen("mandelbrot.ppm", "w+b"); int *color_h, *color_d; const int nBytes = num_rows*num_cols*sizeof(int); const int globalMem = nBytes / 1024 / 1024; // in MiB printf( "Will use %d MiB of global Memory...\n", globalMem ); if( globalMem > maxRam ) { printf( "Maximum RAM is %d ... exit now...\n", maxRam); return 1; } // allocate host memory color_h = (int*)malloc(nBytes); // allocate device memory cudaMalloc( (void**)&color_d, nBytes ); // init host for( int i=0; i>>( color_d, num_rows, num_cols ); printf( "%s\n", cudaGetErrorString( cudaGetLastError() ) ); // copy to host cudaMemcpy(color_h, color_d, nBytes, cudaMemcpyDeviceToHost); printf( "Copied Memory back to Host...\n" ); fprintf(output, "P3\n"); fprintf(output, "%d %d\n%d\n\n", num_cols, num_rows, max_iterations); for (int x=0; x