math/test/test_cyl_neumann_float.cu
Matt Borland 047c206c30
Add bessel GPU support
Add GPU support to bessel_i0

Add CUDA and NVRTC testing

Add GPU support to bessel_i1

Add CUDA and NVRTC testing of bessel_i1

Add tgamma1pm1 NVRTC impl

Add GPU support to iconv

Add GPU test to bessel_ik

Add SYCL testing of complete bessel_i

Add GPU support to bessel_j0

Ignore BOOST_MATH_INSTRUMENT_VARIABLE on NVRTC

Add bessel J0 CUDA and NVRTC testing

Add GPU support to bessel_j1

Add bessel j1 CUDA and NVRTC testing

Add GPU support to bessel jn and jy

Add SYCL bessel j testing

Add bessel_k0 GPU support

Add bessel_k0 CUDA and NVRTC testing

Add GPU support to bessel_k1

Add bessel_k1 CUDA and NVRTC testing

Add GPU support to bessel_kn

Add bessel_kn CUDA and NVRTC testing

Add SYCL testing of complete bessel_k

Make newton-rhapson GPU compatible

Make the completed bessel functions GPU compatible

Add SYCL bessel y testing

Apply changes for non-empty policy on CUDA

Add NVCC cyl_bessel_i testing

Add GPU support to sinc

Add GPU support to series functions

Add GPU support to bessel_jy_zero

Add array helper type

Make hypot GPU safe

Make bessel_yX GPU capable

Add bessel_y0 and bessel_y1 CUDA testing

Add nvrtc testing of bessel_y0 and bessel_y1

Fix macros

Add missing header

Add missing header

Markup iconv

Add iround for NVRTC

Add tgamma1pm1 with policy overload for NVRTC

Disable header

Fix factorial support for CUDA platforms

Add definition of bessel traits

Add cyl_bessel_i NVRTC testing

Fix cyl_bessel_jy warnings

Fix CUDA forward declarations

Fix maybe-unused variable warning

Add CUDA cyl_bessel_j testing

Add sign overload for lgamma

Fix warnings

Add NVRTC cyl_bessel_j testing

Add NVCC sph_bessel testing

Add NVRTC testing of sph_bessel

Add NVRTC testing of cyl_bessel_k

Add NVCC testing of cyl_bessel_k

Add NVCC testing of cyl_neumann

Add NVRTC cyl_neumann testing

Add NVRTC sph_neumann testing

Add NVCC sph_neumann testing
2024-08-20 16:52:06 -04:00

105 lines
3.1 KiB
Plaintext

// Copyright John Maddock 2016.
// Copyright Matt Borland 2024.
// Use, modification and distribution are subject to the
// Boost Software License, Version 1.0. (See accompanying file
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
#include <iostream>
#include <iomanip>
#include <vector>
#include <boost/math/special_functions.hpp>
#include "cuda_managed_ptr.hpp"
#include "stopwatch.hpp"
// For the CUDA runtime routines (prefixed with "cuda_")
#include <cuda_runtime.h>
typedef float float_type;
/**
* CUDA Kernel Device code
*
*/
__global__ void cuda_test(const float_type *in1, const float_type *in2, float_type *out, int numElements)
{
using std::cos;
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < numElements)
{
out[i] = boost::math::cyl_neumann(in1[i], in2[i]);
}
}
/**
* Host main routine
*/
int main(void)
{
// Error code to check return values for CUDA calls
cudaError_t err = cudaSuccess;
// Print the vector length to be used, and compute its size
int numElements = 50000;
std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
// Allocate the managed input vector A
cuda_managed_ptr<float_type> input_vector1(numElements);
// Allocate the managed input vector B
cuda_managed_ptr<float_type> input_vector2(numElements);
// Allocate the managed output vector C
cuda_managed_ptr<float_type> output_vector(numElements);
// Initialize the input vectors
for (int i = 0; i < numElements; ++i)
{
input_vector1[i] = rand()/(float_type)RAND_MAX;
input_vector2[i] = rand()/(float_type)RAND_MAX;
}
// Launch the Vector Add CUDA Kernel
int threadsPerBlock = 256;
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
watch w;
cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements);
cudaDeviceSynchronize();
std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
err = cudaGetLastError();
if (err != cudaSuccess)
{
std::cerr << "Failed to launch CUDA kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
return EXIT_FAILURE;
}
// Verify that the result vector is correct
std::vector<float_type> results;
results.reserve(numElements);
w.reset();
for(int i = 0; i < numElements; ++i)
results.push_back(boost::math::cyl_neumann(input_vector1[i], input_vector2[i]));
double t = w.elapsed();
// check the results
for(int i = 0; i < numElements; ++i)
{
if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
{
std::cerr << "Result verification failed at element " << i << "!" << std::endl;
return EXIT_FAILURE;
}
}
std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
std::cout << "Done\n";
return 0;
}