40 #ifndef __CLANG_CUDA_RUNTIME_WRAPPER_H__ 41 #define __CLANG_CUDA_RUNTIME_WRAPPER_H__ 43 #if defined(__CUDA__) && defined(__clang__) 56 #pragma push_macro("__THROW") 57 #pragma push_macro("__CUDA_ARCH__") 63 #if !defined(CUDA_VERSION) 64 #error "cuda.h did not define CUDA_VERSION" 65 #elif CUDA_VERSION < 7000 || CUDA_VERSION > 9020 66 #error "Unsupported CUDA version!" 72 #define __CUDA_ARCH__ 350 79 #define __DEVICE_LAUNCH_PARAMETERS_H__ 84 #define __DEVICE_FUNCTIONS_H__ 85 #define __MATH_FUNCTIONS_H__ 86 #define __COMMON_FUNCTIONS_H__ 89 #define __DEVICE_FUNCTIONS_DECLS_H__ 92 #if CUDA_VERSION < 9000 95 #define __CUDA_LIBDEVICE__ 99 #include "driver_types.h" 100 #include "host_config.h" 101 #include "host_defines.h" 106 #pragma push_macro("nv_weak") 109 #undef __CUDA_LIBDEVICE__ 111 #include "cuda_runtime.h" 113 #pragma pop_macro("nv_weak") 119 #define __nvvm_memcpy(s, d, n, a) __builtin_memcpy(s, d, n) 120 #define __nvvm_memset(d, c, n, a) __builtin_memset(d, c, n) 122 #if CUDA_VERSION < 9000 123 #include "crt/device_runtime.h" 125 #include "crt/host_runtime.h" 129 #undef __cxa_vec_ctor 130 #undef __cxa_vec_cctor 131 #undef __cxa_vec_dtor 133 #undef __cxa_vec_new2 134 #undef __cxa_vec_new3 135 #undef __cxa_vec_delete2 136 #undef __cxa_vec_delete 137 #undef __cxa_vec_delete3 138 #undef __cxa_pure_virtual 156 #if CUDA_VERSION >= 9000 171 #if defined(CU_DEVICE_INVALID) 172 #if !defined(__USE_FAST_MATH__) 173 #define __USE_FAST_MATH__ 0 176 #if !defined(__CUDA_PREC_DIV) 177 #define __CUDA_PREC_DIV 0 183 #pragma push_macro("__host__") 184 #define __host__ UNEXPECTED_HOST_ATTRIBUTE 190 #pragma push_macro("__forceinline__") 191 #define __forceinline__ __device__ __inline__ __attribute__((always_inline)) 192 #if CUDA_VERSION < 9000 193 #include "device_functions.hpp" 202 #pragma push_macro("__USE_FAST_MATH__") 203 #if defined(__CLANG_CUDA_APPROX_TRANSCENDENTALS__) 204 #define __USE_FAST_MATH__ 1 207 #if CUDA_VERSION >= 9000 210 #if CUDA_VERSION >= 9020 213 #include "crt/math_functions.hpp" 215 #include "math_functions.hpp" 218 #pragma pop_macro("__USE_FAST_MATH__") 220 #if CUDA_VERSION < 9000 221 #include "math_functions_dbl_ptx3.hpp" 223 #pragma pop_macro("__forceinline__") 227 #undef __MATH_FUNCTIONS_HPP__ 229 #if CUDA_VERSION < 9000 230 #include "math_functions.hpp" 247 #if CUDA_VERSION < 9000 261 #if CUDA_VERSION >= 9000 264 #include "device_atomic_functions.h" 266 #undef __DEVICE_FUNCTIONS_HPP__ 267 #include "device_atomic_functions.hpp" 268 #if CUDA_VERSION >= 9000 269 #include "crt/device_functions.hpp" 270 #include "crt/device_double_functions.hpp" 272 #include "device_functions.hpp" 274 #include "device_double_functions.h" 277 #include "sm_20_atomic_functions.hpp" 278 #include "sm_20_intrinsics.hpp" 279 #include "sm_32_atomic_functions.hpp" 289 #if CUDA_VERSION >= 8000 290 #pragma push_macro("__CUDA_ARCH__") 292 #include "sm_60_atomic_functions.hpp" 293 #include "sm_61_intrinsics.hpp" 294 #pragma pop_macro("__CUDA_ARCH__") 297 #undef __MATH_FUNCTIONS_HPP__ 303 #pragma push_macro("signbit") 304 #pragma push_macro("__GNUC__") 306 #define signbit __ignored_cuda_signbit 311 #pragma push_macro("_GLIBCXX_MATH_H") 312 #pragma push_macro("_LIBCPP_VERSION") 313 #if CUDA_VERSION >= 9000 314 #undef _GLIBCXX_MATH_H 316 #ifdef _LIBCPP_VERSION 317 #define _LIBCPP_VERSION 3700 321 #if CUDA_VERSION >= 9000 322 #include "crt/math_functions.hpp" 324 #include "math_functions.hpp" 326 #pragma pop_macro("_GLIBCXX_MATH_H") 327 #pragma pop_macro("_LIBCPP_VERSION") 328 #pragma pop_macro("__GNUC__") 329 #pragma pop_macro("signbit") 331 #pragma pop_macro("__host__") 333 #include "texture_indirect_functions.h" 336 #pragma pop_macro("__CUDA_ARCH__") 337 #pragma pop_macro("__THROW") 349 __device__
int vprintf(
const char *,
const char *);
350 __device__
void free(
void *) __attribute((nothrow));
351 __device__
void *malloc(
size_t) __attribute((nothrow))
__attribute__((malloc));
352 __device__
void __assertfail(
const char *__message,
const char *__file,
353 unsigned __line,
const char *__function,
358 __device__
static inline void __assert_fail(
const char *__message,
359 const char *__file,
unsigned __line,
360 const char *__function) {
361 __assertfail(__message, __file, __line, __function,
sizeof(
char));
366 __device__
int printf(
const char *, ...);
371 __device__
static inline void free(
void *__ptr) { ::free(__ptr); }
372 __device__
static inline void *malloc(
size_t __size) {
373 return ::malloc(__size);
380 __device__
inline __cuda_builtin_threadIdx_t::operator uint3()
const {
388 __device__
inline __cuda_builtin_blockIdx_t::operator uint3()
const {
396 __device__
inline __cuda_builtin_blockDim_t::operator dim3()
const {
397 return dim3(x, y, z);
400 __device__
inline __cuda_builtin_gridDim_t::operator dim3()
const {
401 return dim3(x, y, z);
414 #pragma push_macro("dim3") 415 #pragma push_macro("uint3") 416 #define dim3 __cuda_builtin_blockDim_t 417 #define uint3 __cuda_builtin_threadIdx_t 418 #include "curand_mtgp32_kernel.h" 419 #pragma pop_macro("dim3") 420 #pragma pop_macro("uint3") 421 #pragma pop_macro("__USE_FAST_MATH__") 424 #endif // __CLANG_CUDA_RUNTIME_WRAPPER_H__ __DEVICE__ bool signbit(float __x)
Test for sign bit.
__DEVICE__ float normcdff(float __a)
__DEVICE__ void sincospif(float __a, float *__sptr, float *__cptr)
__DEVICE__ double cospi(double __a)
__DEVICE__ double sinpi(double __a)
__DEVICE__ double normcdfinv(double __a)
__DEVICE__ float rsqrtf(float __a)
__DEVICE__ double rsqrt(double __a)
static __inline__ void int __a
__DEVICE__ float cospif(float __a)
__DEVICE__ float erfcinvf(float __a)
char __v64qi __attribute__((__vector_size__(64)))
Zeroes the upper 128 bits (bits 255:128) of all YMM registers.
__DEVICE__ float rcbrtf(float __a)
__DEVICE__ void sincospi(double __a, double *__sptr, double *__cptr)
__DEVICE__ double erfcinv(double __a)
static __inline__ vector float vector float __b
__DEVICE__ void __brkpt()
__DEVICE__ int __signbitd(double __a)
__DEVICE__ double erfcx(double __a)
int printf(__constant const char *st,...)
__DEVICE__ float sinpif(float __a)
__DEVICE__ float normcdfinvf(float __a)
__DEVICE__ double normcdf(double __a)
__DEVICE__ double rcbrt(double __a)
__DEVICE__ float erfcxf(float __a)
static __inline__ vector float vector float vector float __c