11#include <cuda_runtime.h>
12#include <device_launch_parameters.h>
13#include <vector_types.h>
14#include <vector_functions.h>
28 using uint =
unsigned int;
36#define M_PI 3.14159265358979323846
39#define M_E 2.71828182845904523536
42 constexpr Real EPSILON = std::numeric_limits<Real>::epsilon();
54 return (a % b != 0) ? (a / b + 1) : (a / b);
60 int dim =
iDivUp(totalSize, blockSize);
61 return dim == 0 ? 1 : dim;
64 static uint3 cudaGridSize2D(uint2 totalSize,
uint blockSize)
67 gridDims.
x =
iDivUp(totalSize.x, blockSize);
68 gridDims.y =
iDivUp(totalSize.y, blockSize);
70 gridDims.x = gridDims.x == 0 ? 1 : gridDims.x;
71 gridDims.y = gridDims.y == 0 ? 1 : gridDims.y;
80 gridDims.
x =
iDivUp(totalSize.x, blockSize);
81 gridDims.y =
iDivUp(totalSize.y, blockSize);
82 gridDims.z =
iDivUp(totalSize.z, blockSize);
84 gridDims.x = gridDims.x == 0 ? 1 : gridDims.x;
85 gridDims.y = gridDims.y == 0 ? 1 : gridDims.y;
86 gridDims.z = gridDims.z == 0 ? 1 : gridDims.z;
94 gridDims.
x =
iDivUp(totalSize.x, blockSize.x);
95 gridDims.y =
iDivUp(totalSize.y, blockSize.y);
96 gridDims.z =
iDivUp(totalSize.z, blockSize.z);
98 gridDims.x = gridDims.x == 0 ? 1 : gridDims.x;
99 gridDims.y = gridDims.y == 0 ? 1 : gridDims.y;
100 gridDims.z = gridDims.z == 0 ? 1 : gridDims.z;
108 static inline void checkCudaError(
const char *msg) {
109 cudaError_t err = cudaGetLastError();
110 if (cudaSuccess != err) {
111 printf(
"CUDA error: %d : %s at %s:%d \n", err, cudaGetErrorString(err), __FILE__, __LINE__);
112 throw std::runtime_error(std::string(msg) +
": " + cudaGetErrorString(err));
118#define cuSafeCall(X) X
120#define cuSafeCall(X) X; dyno::checkCudaError(#X);
128#define cuSynchronize() {}
130#define cuSynchronize() { \
132 cudaDeviceSynchronize(); \
133 cudaError_t err = cudaGetLastError(); \
134 if (err != cudaSuccess) \
136 sprintf(str, "CUDA error: %d : %s at %s:%d \n", err, cudaGetErrorString(err), __FILE__, __LINE__); \
137 throw std::runtime_error(std::string(str)); \
148#define cuExecute(size, Func, ...){ \
149 uint pDims = cudaGridSize((uint)size, BLOCK_SIZE); \
150 Func << <pDims, BLOCK_SIZE >> > ( \
155#define cuExecute2D(size, Func, ...){ \
156 uint3 pDims = cudaGridSize2D(size, 8); \
157 dim3 threadsPerBlock(8, 8, 1); \
158 Func << <pDims, threadsPerBlock >> > ( \
163#define cuExecute3D(size, Func, ...){ \
164 dim3 pDims = cudaGridSize3D(size, 8); \
165 dim3 threadsPerBlock(8, 8, 8); \
166 Func << <pDims, threadsPerBlock >> > ( \
171#define cuExecuteNoSync(size, Func, ...){ \
172 uint pDims = cudaGridSize((uint)size, BLOCK_SIZE); \
173 Func << <pDims, BLOCK_SIZE >> > ( \
182 DYN_FUNC
Bool(
bool v =
false) {
val = v ? 1 : 0; }
185 return 1 -
val ? true :
false;
189 uint tmpV = v ? 1 : 0;
219 ret.
val =
val & (v ? 1 : 0);
225 ret.
val =
val | (v ? 1 : 0);
260 uint tmpV = v ? 1 : 0;
265 uint tmpV = v ? 1 : 0;
269 DYN_FUNC
inline operator bool()
const {
282 template<
class T,
class ... Args>
283 std::shared_ptr<T>
New(Args&& ... args) { std::shared_ptr<T> p(
new T(std::forward<Args>(args)...));
return p; }
285 template<
class TA,
class TB>
288 TA* ptr =
dynamic_cast<TA*
>(b);
292 template<
class TA,
class TB>
293 inline std::shared_ptr<TA>
cast(std::shared_ptr<TB> b)
295 std::shared_ptr<TA> ptr = std::dynamic_pointer_cast<TA>(b);
DYN_FUNC Bool & operator=(const bool v)
DYN_FUNC bool operator||(const Bool &v) const
DYN_FUNC bool operator&&(const Bool &v) const
DYN_FUNC bool operator==(bool v) const
DYN_FUNC Bool & operator&=(const bool v)
DYN_FUNC bool operator!() const
DYN_FUNC Bool & operator|=(const bool v)
DYN_FUNC Bool(bool v=false)
DYN_FUNC Bool operator|(const bool v) const
DYN_FUNC Bool operator&(const bool v) const
std::shared_ptr< T > New(Args &&... args)
This is an implementation of AdditiveCCD based on peridyno.
constexpr Real REAL_EPSILON_SQUARED
constexpr uint BLOCK_SIZE
constexpr Real REAL_EPSILON
unsigned long long uint64
static uint iDivUp(uint a, uint b)