StarPU Internal Handbook
Loading...
Searching...
No Matches
driver_cuda.h
Go to the documentation of this file.
1/* StarPU --- Runtime system for heterogeneous multicore architectures.
2 *
3 * Copyright (C) 2008-2023 Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
4 * Copyright (C) 2015 Mathieu Lirzin
5 *
6 * StarPU is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU Lesser General Public License as published by
8 * the Free Software Foundation; either version 2.1 of the License, or (at
9 * your option) any later version.
10 *
11 * StarPU is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
14 *
15 * See the GNU Lesser General Public License in COPYING.LGPL for more details.
16 */
17
18#ifndef __DRIVER_CUDA_H__
19#define __DRIVER_CUDA_H__
20
23#include <common/config.h>
24
25void _starpu_cuda_preinit(void);
26
27#ifdef STARPU_USE_CUDA
28#include <cuda.h>
29#include <cuda_runtime_api.h>
30#ifdef STARPU_HAVE_LIBNVIDIA_ML
31#include <nvml.h>
32#endif
33#endif
34
35#include <starpu.h>
36#include <core/workers.h>
37#include <datawizard/node_ops.h>
38
39#pragma GCC visibility push(hidden)
40
41extern struct _starpu_driver_ops _starpu_driver_cuda_ops;
42extern struct _starpu_node_ops _starpu_driver_cuda_node_ops;
43
44extern int _starpu_nworker_per_cuda;
45
46void _starpu_cuda_init(void);
47unsigned _starpu_get_cuda_device_count(void);
48#ifdef STARPU_HAVE_HWLOC
50hwloc_obj_t _starpu_cuda_get_hwloc_obj(hwloc_topology_t topology, int devid);
51#endif
52extern int _starpu_cuda_bus_ids[STARPU_MAXCUDADEVS+STARPU_MAXNUMANODES][STARPU_MAXCUDADEVS+STARPU_MAXNUMANODES];
53
54#if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
55void _starpu_cuda_discover_devices (struct _starpu_machine_config *);
56void _starpu_init_cuda_config(struct _starpu_machine_topology *topology, struct _starpu_machine_config *);
57void _starpu_cuda_init_worker_binding(struct _starpu_machine_config *config, int no_mp_config, struct _starpu_worker *workerarg);
58void _starpu_cuda_init_worker_memory(struct _starpu_machine_config *config, int no_mp_config, struct _starpu_worker *workerarg);
59void _starpu_init_cuda(void);
60void _starpu_init_cublas_v2_func(void);
61void _starpu_shutdown_cublas_v2_func(void);
62void _starpu_cublas_v2_init(void);
63void _starpu_cublas_v2_shutdown(void);
64void *_starpu_cuda_worker(void *);
65#ifdef STARPU_HAVE_LIBNVIDIA_ML
66nvmlDevice_t _starpu_cuda_get_nvmldev(struct cudaDeviceProp *props);
67#endif
68#else
69# define _starpu_cuda_discover_devices(config) ((void) config)
70#endif
71
72#ifdef STARPU_USE_CUDA
73#ifdef STARPU_USE_CUDA_MAP
74uintptr_t _starpu_cuda_map_ram(uintptr_t src_ptr, size_t src_offset, unsigned src_node, unsigned dst_node, size_t size, int *ret);
75int _starpu_cuda_unmap_ram(uintptr_t src_ptr, size_t src_offset, unsigned src_node, uintptr_t dst_ptr, unsigned dst_node, size_t size);
76int _starpu_cuda_update_map(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size);
77#endif
78#endif
79
80unsigned _starpu_cuda_test_request_completion(struct _starpu_async_channel *async_channel);
81void _starpu_cuda_wait_request_completion(struct _starpu_async_channel *async_channel);
82
83int _starpu_cuda_copy_interface_from_cpu_to_cuda(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req);
84int _starpu_cuda_copy_interface_from_cuda_to_cuda(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req);
85int _starpu_cuda_copy_interface_from_cuda_to_cpu(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req);
86
87int _starpu_cuda_copy_data_from_cuda_to_cuda(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel);
88int _starpu_cuda_copy_data_from_cuda_to_cpu(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel);
89int _starpu_cuda_copy_data_from_cpu_to_cuda(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel);
90
91int _starpu_cuda_copy2d_data_from_cuda_to_cuda(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t blocksize, size_t numblocks, size_t ld_src, size_t ld_dst, struct _starpu_async_channel *async_channel);
92int _starpu_cuda_copy2d_data_from_cuda_to_cpu(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t blocksize, size_t numblocks, size_t ld_src, size_t ld_dst, struct _starpu_async_channel *async_channel);
93int _starpu_cuda_copy2d_data_from_cpu_to_cuda(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t blocksize, size_t numblocks, size_t ld_src, size_t ld_dst, struct _starpu_async_channel *async_channel);
94
95int _starpu_cuda_is_direct_access_supported(unsigned node, unsigned handling_node);
96uintptr_t _starpu_cuda_malloc_on_node(unsigned dst_node, size_t size, int flags);
97void _starpu_cuda_free_on_node(unsigned dst_node, uintptr_t addr, size_t size, int flags);
98
99#pragma GCC visibility pop
100
101#endif // __DRIVER_CUDA_H__
102
Definition copy_driver.h:71
Definition drivers.h:26
Definition workers.h:441
Definition workers.h:352
int devid[STARPU_NARCH][STARPU_NMAXDEVS]
Definition workers.h:404
Definition node_ops.h:92
Definition workers.h:155