// using different memory spaces in CUDA // a __device__ or __global__ function runs on the GPU __global__ void use_local_memory_GPU(float in) { float f; f = in; } int main(int argc, char **argv) { use_local_memory_GPU<<<1, 128>>>(2.0f); float h_arr[128]; float *d_arr; cudaMalloc((void **)&d_arr, sizeof(float)*128); }