math/test/test_expint_float.cu
Matt Borland bbb8eee1d6
Add GPU support to expint
Add SYCL testing of expint

Add markers to forward decls

Add CUDA testing of expint

Fix static variable usage under NVRTC

Add NVRTC testing

Add configurable definition of complex

Add function aliases

Add GPU support to gegenbauer polynomials

Add SYCL testing of gegenbauer

Add NVCC testing of gegenbauer

Add NVRTC testing of gegenbauer

Add GPU support for hankel

Add SYCL testing of hankel

Add NVCC testing of cyl_hankel_1

Add comprehensive NVCC testing

Add NVRTC testing of cyl and sph hankel

Update docs

Fix writing cuda::std::complex<T> to stdout

Add GPU support to hermite

Add SYCL testing of hermite

Add CUDA testing of hermite

Add NVRTC testing of hermite

Add markers to hermite docs
2024-09-17 15:24:02 -04:00

107 lines
3.0 KiB
Plaintext

// Copyright John Maddock 2016.
// Copyright Matt Borland 2024.
// Use, modification and distribution are subject to the
// Boost Software License, Version 1.0. (See accompanying file
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
#include <iostream>
#include <iomanip>
#include <vector>
#include <random>
#include <boost/math/special_functions.hpp>
#include "cuda_managed_ptr.hpp"
#include "stopwatch.hpp"
// For the CUDA runtime routines (prefixed with "cuda_")
#include <cuda_runtime.h>
typedef float float_type;
/**
* CUDA Kernel Device code
*
*/
__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
{
using std::cos;
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < numElements)
{
out[i] = boost::math::expint(in[i]);
}
}
/**
* Host main routine
*/
int main(void)
{
// Error code to check return values for CUDA calls
cudaError_t err = cudaSuccess;
// Print the vector length to be used, and compute its size
int numElements = 50000;
std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
// Allocate the managed input vector A
cuda_managed_ptr<float_type> input_vector(numElements);
// Allocate the managed output vector C
cuda_managed_ptr<float_type> output_vector(numElements);
// Initialize the input vectors
std::mt19937_64 rng(42);
std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
for (int i = 0; i < numElements; ++i)
{
input_vector[i] = dist(rng);
}
// Launch the Vector Add CUDA Kernel
int threadsPerBlock = 256;
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
watch w;
cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
cudaDeviceSynchronize();
std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
err = cudaGetLastError();
if (err != cudaSuccess)
{
std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
return EXIT_FAILURE;
}
// Verify that the result vector is correct
std::vector<float_type> results;
results.reserve(numElements);
w.reset();
for(int i = 0; i < numElements; ++i)
results.push_back(boost::math::expint(input_vector[i]));
double t = w.elapsed();
// check the results
for(int i = 0; i < numElements; ++i)
{
if (std::isfinite(results[i]))
{
if (boost::math::epsilon_difference(output_vector[i], results[i]) > 300)
{
std::cerr << "Result verification failed at element " << i << "!" << std::endl;
return EXIT_FAILURE;
}
}
}
std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
std::cout << "Done\n";
return 0;
}