N2 GPU: N2 work visit every edge many times but only sets depth once
CPU: N work maintains frontier to minimize visits / node
int N == << 20; cublasInit(); cublasAlloc(N, sizeof(float), (void**)&d_x); cublasAlloc(N, sizeof(float), (void*)&d_y); cublasSetVector(N, sizeof(x[0]), x, y, d_x, 1); cublasSetVector(N, sizeof(y[0]), y, 1, d_y, 1); saxpy(N, 2.0, d_x, 1, y, 1); cublasGetVector(N, sizeof(y[0]), d_y, 1, y, 1); cublasFree(d_x); cublasFree(d_y); cublasShutdown(),