// using different memory spaces in CUDA
// a __device__ or __global__ function runs on the GPU
__global__ void use_local_memory_GPU(float in)
{
float f;
f = in;
}
int main(int argc, char **argv)
{
use_local_memory_GPU<<<1, 128>>>(2.0f);
float h_arr[128];
float *d_arr;
cudaMalloc((void **)&d_arr, sizeof(float)*128);
}