// Start the timer for the measurement of the kernel execution time //cudaStreamSynchronize(kernel_stream); cudaEventRecord(bones_start2,kernel_stream);