module numeric_kernels use cudafor contains ! CUDA Kernel attributes(global) subroutine sqrt_kernel(vector, vec_size) integer, value :: vec_size real(kind=8), managed, intent(inout) :: vector(0:vec_size-1) integer :: ind ind = threadIdx%x-1+blockDim%x*(blockIdx%x-1); if (ind < vec_size) then vector(ind) = sqrtOf(vector(ind)) end if end subroutine ! function will be callable from host and device code attributes(host, device) real(kind=8) function sqrtOf(x, prec_para) result (approximation) ! Approximate the square root of x up to precision real(kind=8), intent(in) :: x real(kind=8), intent(in), optional :: prec_para real(kind=8) :: prev_val, error=1.0, prec if(.not. present(prec_para)) then prec = 1e-8 else prec = prec_para end if approximation = x/2.0 if (x==0.0) then return end if if (x<0) then approximation = -1.0 return end if error = 1.0 do while (error > prec) prev_val = approximation approximation = 0.5*(approximation + x/approximation) ! error estimate (upper bound) error = (prev_val*approximation-x)*(prev_val*approximation-x)/(2*prev_val*approximation*approximation) end do end function end module ! CPU code program main use cudafor use numeric_kernels ! length of vector integer, parameter :: vec_size = 1000000 real(kind=8), managed :: vector(0:vec_size-1) real(kind=8) :: vector_cpu(0:vec_size-1) integer, parameter :: num_threads = 128 integer, parameter :: num_blocks = (vec_size+num_threads-1)/num_threads integer :: i, istat real(kind=8) :: error ! fill vector do i=0, vec_size-1 vector(i)=(2*i); end do vector_cpu = vector ! compute elementwise square root (approximately) (CPU) do i=0,10-1 vector_cpu(i) = sqrtOf(vector_cpu(i)) end do ! compute elementwise square root (approximately) (GPU) call sqrt_kernel<<>>(vector, vec_size) istat = cudaDeviceSynchronize() ! Print first ten roots do i=0,10-1 error = abs(vector(i)-vector_cpu(i)) print *, "Sqrt(",2*i,"): ",vector(i), "(GPU)", vector_cpu(i), "(CPU), abs. difference error", error if (error > 1e-5) then stop "Error larger than 1e-5" end if end do ! exit end program main