I just needed a simple app to see what CUDA version is installed on a remote machine and what the GPU is capable of.
My trivial solution compiles with nvcc -lcudart cudainfo.cu -o cudainfo and only consists of one file:
#include <iostream> using namespace std; void printDeviceProperties( int deviceNumber, cudaDeviceProp deviceProperties ) { cout << "Device "<<deviceNumber<<": " << deviceProperties.name << endl; cout << " Dedicated GPU: " << (deviceProperties.integrated ? "NO" : "Yes") << endl; cout << " Clock rate: " << deviceProperties.clockRate/1000 << " MHz" << endl; cout << " Number of multiprocessors: " << deviceProperties.multiProcessorCount << endl; cout << " Maximum threads per multiprocessor: " << deviceProperties.maxThreadsPerMultiProcessor << endl; cout << " Warp size: " << deviceProperties.warpSize << endl; cout << " Maximum threads per block: " << deviceProperties.maxThreadsPerBlock << endl; cout << " Memory:" << endl; cout << " Total global memory: " << deviceProperties.totalGlobalMem/(1024*1024) << " MiB" << endl; cout << " Total constant memory: " << deviceProperties.totalConstMem/(1024) << " KiB" << endl; cout << " Total shared memory per block: " << deviceProperties.sharedMemPerBlock/(1024) << " KiB" << endl; cout << " Total 32 bit registers per block: " << deviceProperties.regsPerBlock << endl; cout << " ECC enabled: " << (deviceProperties.ECCEnabled ? "NO" : "Yes") << endl; cout << " Compute capabilities:" << endl; cout << " CUDA version: " << deviceProperties.major << "." << deviceProperties.minor << endl; cout << " Runtime limit per kernel: " << (deviceProperties.kernelExecTimeoutEnabled ? "Yes" : "No") << endl; cout << " Concurrent kernels: " << (deviceProperties.concurrentKernels ? "supported" : "unsupported") << endl; cout << " Max dimension of threads per block: "; for (int i = 0; i < 3; ++i) cout << deviceProperties.maxThreadsDim[i] << " "; cout << endl; cout << " Maximum dimension of grid: "; for (int i = 0; i < 3; ++i) cout << deviceProperties.maxGridSize[i] << " "; cout << endl; cout << endl; return; } int main() { // Number of CUDA devices int deviceCount; cudaError_t error = cudaGetDeviceCount(&deviceCount); if (error == cudaErrorNoDevice) { cerr << "no device could be found" << endl; return -1; } else if (error == cudaErrorInsufficientDriver) { cerr << "no driver could be loaded" << endl; return -1; } int driverVersion, runtimeVersion; cudaDriverGetVersion( &driverVersion ); cudaRuntimeGetVersion( &runtimeVersion ); cout << "CUDA driver: " << driverVersion/1000 << "." << (driverVersion%100)/10 << endl; cout << "CUDA runtime: " << runtimeVersion/1000 << "." << (runtimeVersion%100)/10 << endl; switch (deviceCount) { case 0: cout << "found no CUDA devices" << endl; break; case 1: cout << "found 1 CUDA device" << endl; break; default: cout << "found " << deviceCount << " CUDA devices" << endl; } cout << endl; for (int i = 0; i < deviceCount; ++i) { cudaDeviceProp deviceProperties; if ( cudaGetDeviceProperties(&deviceProperties, i) == cudaSuccess) { printDeviceProperties( i, deviceProperties ); } else { cerr << "fatal error: could not query device information for device " << i << endl; } } return 0; } |
Maybe someone else can use this as well (I know that it’s not even a hello-world, but who knows?).