26 #ifndef __CLANG_CUDA_RUNTIME_WRAPPER_H__ 27 #define __CLANG_CUDA_RUNTIME_WRAPPER_H__ 29 #if defined(__CUDA__) && defined(__clang__) 42 #pragma push_macro("__THROW") 43 #pragma push_macro("__CUDA_ARCH__") 49 #if !defined(CUDA_VERSION) 50 #error "cuda.h did not define CUDA_VERSION" 51 #elif CUDA_VERSION < 7000 || CUDA_VERSION > 10010 52 #error "Unsupported CUDA version!" 55 #pragma push_macro("__CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__") 56 #if CUDA_VERSION >= 10000 57 #define __CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__ 63 #define __CUDA_ARCH__ 350 70 #define __DEVICE_LAUNCH_PARAMETERS_H__ 75 #define __DEVICE_FUNCTIONS_H__ 76 #define __MATH_FUNCTIONS_H__ 77 #define __COMMON_FUNCTIONS_H__ 80 #define __DEVICE_FUNCTIONS_DECLS_H__ 83 #if CUDA_VERSION < 9000 86 #define __CUDA_LIBDEVICE__ 90 #include "driver_types.h" 91 #include "host_config.h" 92 #include "host_defines.h" 97 #pragma push_macro("nv_weak") 100 #undef __CUDA_LIBDEVICE__ 102 #include "cuda_runtime.h" 104 #pragma pop_macro("nv_weak") 110 #define __nvvm_memcpy(s, d, n, a) __builtin_memcpy(s, d, n) 111 #define __nvvm_memset(d, c, n, a) __builtin_memset(d, c, n) 113 #if CUDA_VERSION < 9000 114 #include "crt/device_runtime.h" 116 #include "crt/host_runtime.h" 120 #undef __cxa_vec_ctor 121 #undef __cxa_vec_cctor 122 #undef __cxa_vec_dtor 124 #undef __cxa_vec_new2 125 #undef __cxa_vec_new3 126 #undef __cxa_vec_delete2 127 #undef __cxa_vec_delete 128 #undef __cxa_vec_delete3 129 #undef __cxa_pure_virtual 147 #if CUDA_VERSION >= 9000 162 #if defined(CU_DEVICE_INVALID) 163 #if !defined(__USE_FAST_MATH__) 164 #define __USE_FAST_MATH__ 0 167 #if !defined(__CUDA_PREC_DIV) 168 #define __CUDA_PREC_DIV 0 174 #pragma push_macro("__host__") 175 #define __host__ UNEXPECTED_HOST_ATTRIBUTE 181 #pragma push_macro("__forceinline__") 182 #define __forceinline__ __device__ __inline__ __attribute__((always_inline)) 183 #if CUDA_VERSION < 9000 184 #include "device_functions.hpp" 193 #pragma push_macro("__USE_FAST_MATH__") 194 #if defined(__CLANG_CUDA_APPROX_TRANSCENDENTALS__) 195 #define __USE_FAST_MATH__ 1 198 #if CUDA_VERSION >= 9000 201 #if CUDA_VERSION >= 9020 204 #include "crt/math_functions.hpp" 206 #include "math_functions.hpp" 209 #pragma pop_macro("__USE_FAST_MATH__") 211 #if CUDA_VERSION < 9000 212 #include "math_functions_dbl_ptx3.hpp" 214 #pragma pop_macro("__forceinline__") 218 #undef __MATH_FUNCTIONS_HPP__ 220 #if CUDA_VERSION < 9000 221 #include "math_functions.hpp" 238 #if CUDA_VERSION < 9000 241 static inline __device__
void __brkpt(
int __c) { __brkpt(); }
252 #if CUDA_VERSION >= 9000 255 #include "device_atomic_functions.h" 257 #undef __DEVICE_FUNCTIONS_HPP__ 258 #include "device_atomic_functions.hpp" 259 #if CUDA_VERSION >= 9000 260 #include "crt/device_functions.hpp" 261 #include "crt/device_double_functions.hpp" 263 #include "device_functions.hpp" 265 #include "device_double_functions.h" 268 #include "sm_20_atomic_functions.hpp" 269 #include "sm_20_intrinsics.hpp" 270 #include "sm_32_atomic_functions.hpp" 280 #if CUDA_VERSION >= 8000 281 #pragma push_macro("__CUDA_ARCH__") 283 #include "sm_60_atomic_functions.hpp" 284 #include "sm_61_intrinsics.hpp" 285 #pragma pop_macro("__CUDA_ARCH__") 288 #undef __MATH_FUNCTIONS_HPP__ 294 #pragma push_macro("signbit") 295 #pragma push_macro("__GNUC__") 297 #define signbit __ignored_cuda_signbit 302 #pragma push_macro("_GLIBCXX_MATH_H") 303 #pragma push_macro("_LIBCPP_VERSION") 304 #if CUDA_VERSION >= 9000 305 #undef _GLIBCXX_MATH_H 307 #ifdef _LIBCPP_VERSION 308 #define _LIBCPP_VERSION 3700 312 #if CUDA_VERSION >= 9000 313 #include "crt/math_functions.hpp" 315 #include "math_functions.hpp" 317 #pragma pop_macro("_GLIBCXX_MATH_H") 318 #pragma pop_macro("_LIBCPP_VERSION") 319 #pragma pop_macro("__GNUC__") 320 #pragma pop_macro("signbit") 322 #pragma pop_macro("__host__") 324 #include "texture_indirect_functions.h" 327 #pragma pop_macro("__CUDA_ARCH__") 328 #pragma pop_macro("__THROW") 340 __device__
int vprintf(
const char *,
const char *);
341 __device__
void free(
void *) __attribute((nothrow));
342 __device__
void *malloc(
size_t) __attribute((nothrow))
__attribute__((malloc));
343 __device__
void __assertfail(
const char *__message,
const char *__file,
344 unsigned __line,
const char *__function,
349 __device__
static inline void __assert_fail(
const char *__message,
350 const char *__file,
unsigned __line,
351 const char *__function) {
352 __assertfail(__message, __file, __line, __function,
sizeof(
char));
357 __device__
int printf(
const char *, ...);
362 __device__
static inline void free(
void *__ptr) { ::free(__ptr); }
363 __device__
static inline void *malloc(
size_t __size) {
364 return ::malloc(__size);
371 __device__
inline __cuda_builtin_threadIdx_t::operator uint3()
const {
379 __device__
inline __cuda_builtin_blockIdx_t::operator uint3()
const {
387 __device__
inline __cuda_builtin_blockDim_t::operator dim3()
const {
388 return dim3(x, y, z);
391 __device__
inline __cuda_builtin_gridDim_t::operator dim3()
const {
392 return dim3(x, y, z);
405 #pragma push_macro("dim3") 406 #pragma push_macro("uint3") 407 #define dim3 __cuda_builtin_blockDim_t 408 #define uint3 __cuda_builtin_threadIdx_t 409 #include "curand_mtgp32_kernel.h" 410 #pragma pop_macro("dim3") 411 #pragma pop_macro("uint3") 412 #pragma pop_macro("__USE_FAST_MATH__") 413 #pragma pop_macro("__CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__") 419 #if CUDA_VERSION >= 9020 420 extern "C" unsigned __cudaPushCallConfiguration(dim3
gridDim, dim3
blockDim,
421 size_t sharedMem = 0,
426 #endif // __CLANG_CUDA_RUNTIME_WRAPPER_H__ __DEVICE__ bool signbit(float __x)
Test for sign bit.
vector signed char unaligned_vec_schar __attribute__((aligned(1)))
Zeroes the upper 128 bits (bits 255:128) of all YMM registers.
__DEVICE__ void sincospif(float __a, float *__s, float *__c)
__DEVICE__ float normcdff(float __a)
__DEVICE__ double cospi(double __a)
__DEVICE__ double sinpi(double __a)
__DEVICE__ double normcdfinv(double __a)
__DEVICE__ float rsqrtf(float __a)
__DEVICE__ double rsqrt(double __a)
static __inline__ void int __a
__DEVICE__ float cospif(float __a)
__DEVICE__ float erfcinvf(float __a)
__DEVICE__ float rcbrtf(float __a)
__DEVICE__ double erfcinv(double __a)
static __inline__ vector float vector float __b
__CUDA_BUILTIN_VAR __cuda_builtin_gridDim_t gridDim
__DEVICE__ int __signbitd(double __a)
int printf(__constant const char *st,...) __attribute__((format(printf
__DEVICE__ double erfcx(double __a)
__DEVICE__ float sinpif(float __a)
__DEVICE__ float normcdfinvf(float __a)
__DEVICE__ double normcdf(double __a)
__DEVICE__ double rcbrt(double __a)
__DEVICE__ float erfcxf(float __a)
__DEVICE__ void sincospi(double __a, double *__s, double *__c)
static __inline__ vector float vector float vector float __c
__CUDA_BUILTIN_VAR __cuda_builtin_blockDim_t blockDim