Differences

This shows you the differences between two versions of the page.

Link to this comparison view

cuda_programming [2011/07/23 00:15]
stiber created
cuda_programming [2011/07/31 23:41] (current)
baracs Added Minimal Makefile
Line 14: Line 14:
  
 where ''​deviceNumber'',​ an ''​int'',​ is the GPU device index number to be used (0 to n-1). You'll need to run a device querying program (one is an example in the SDK) to know what devices are on the machine you're using. where ''​deviceNumber'',​ an ''​int'',​ is the GPU device index number to be used (0 to n-1). You'll need to run a device querying program (one is an example in the SDK) to know what devices are on the machine you're using.
 +
 +===== Minimal Makefile =====
 +A simple generic make file:
 +<​code>​
 +# Build tools
 +NVCC = /​usr/​local/​cuda-3.2/​cuda/​bin/​nvcc
 +CXX = g++
 +
 +# here are all the objects
 +GPUOBJS = cuexample.o ​
 +OBJS = cppexample.o
 +
 +# make and compile
 +cudaexample.out:​$(OBJS) $(GPUOBJS)
 + $(NVCC) -o cudaexample.out $(OBJS) $(GPUOBJS) ​
 +
 +cuexample.o:​ cuexample.cu
 + $(NVCC) -c cuexample.cu
 +
 +cppexample.o:​ cppexample.cpp
 + $(CXX) -c cppexample.cpp
 +</​code>​
 +If you wish to compile with CUDA 4.0 instead of CUDA 3.2, simply replace ''​‘cuda-3.2’''​ with ''​‘cuda-4.0’''​ in NVCC’s path. Also, when using 4.0 the default will use ''​‘LD_LIBRARY_PATH:/​usr/​local/​cuda-4.0/​cuda/​lib’''​ which is 32bit and will crash during runtime because we need the 64bit version. To fix this add the following to your ''​‘~/​.bash_profile’'':​
 +<​code>​
 +export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/​usr/​local/​cuda-4.0/​cuda/​lib64
 +</​code>​
 +You can test out the above Makefile with the following cppexample.cpp and cuexample.cpp,​\\ cppexample.cpp:​
 +<​code>​
 +#include <​cstdlib>​
 +#include <​cstdio>​
 +
 +const int DIMENSION = 10;
 +
 +extern "​C"​ void exampleHost(float * h, int blockDim, int threadDim);
 +
 +int main(void){
 + float * h = (float *)malloc(DIMENSION*DIMENSION*sizeof(float));​
 + exampleHost(h,​ DIMENSION, DIMENSION);
 + for(int i = 0; i < DIMENSION; i++){
 + for(int j = 0; j < DIMENSION; j++){
 + printf("​%2.0f ",​h[i*DIMENSION+j]);​
 +
 + printf("​\n"​);​
 + }
 + return 1;
 +}
 +</​code>​
 +cuexample.cu:​
 +<​code>​
 +__global__ void exampleDevice(float * d){
 + int idx = blockIdx.x * blockDim.x + threadIdx.x;​
 + d[idx] = idx;
 +}
 +
 +extern "​C"​ void exampleHost(float * h, int blockDim, int threadDim){
 + float * d;
 + cudaMalloc((void**)&​d,​ blockDim * threadDim*sizeof(float));​
 + exampleDevice<<<​blockDim,​ threadDim>>>​(d);​
 + cudaMemcpy(h,​ d, blockDim*threadDim*sizeof(float),​cudaMemcpyDeviceToHost); ​
 +}
 +</​code>​
Navigation

The best way to find something here is to use the search box in the upper right or the site index link below.

Print/export
Mobile QR Link