forked from deepmodeling/abacus-develop
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathparallel_device.cpp
More file actions
178 lines (172 loc) · 6.96 KB
/
parallel_device.cpp
File metadata and controls
178 lines (172 loc) · 6.96 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
#include "parallel_device.h"
#ifdef __MPI
namespace Parallel_Common
{
void isend_data(const double* buf, int count, int dest, int tag, MPI_Comm& comm, MPI_Request* request)
{
MPI_Isend(buf, count, MPI_DOUBLE, dest, tag, comm, request);
}
void isend_data(const std::complex<double>* buf, int count, int dest, int tag, MPI_Comm& comm, MPI_Request* request)
{
MPI_Isend(buf, count, MPI_DOUBLE_COMPLEX, dest, tag, comm, request);
}
void isend_data(const float* buf, int count, int dest, int tag, MPI_Comm& comm, MPI_Request* request)
{
MPI_Isend(buf, count, MPI_FLOAT, dest, tag, comm, request);
}
void isend_data(const std::complex<float>* buf, int count, int dest, int tag, MPI_Comm& comm, MPI_Request* request)
{
MPI_Isend(buf, count, MPI_COMPLEX, dest, tag, comm, request);
}
void send_data(const double* buf, int count, int dest, int tag, MPI_Comm& comm)
{
MPI_Send(buf, count, MPI_DOUBLE, dest, tag, comm);
}
void send_data(const std::complex<double>* buf, int count, int dest, int tag, MPI_Comm& comm)
{
MPI_Send(buf, count, MPI_DOUBLE_COMPLEX, dest, tag, comm);
}
void send_data(const float* buf, int count, int dest, int tag, MPI_Comm& comm)
{
MPI_Send(buf, count, MPI_FLOAT, dest, tag, comm);
}
void send_data(const std::complex<float>* buf, int count, int dest, int tag, MPI_Comm& comm)
{
MPI_Send(buf, count, MPI_COMPLEX, dest, tag, comm);
}
void recv_data(double* buf, int count, int source, int tag, MPI_Comm& comm, MPI_Status* status)
{
MPI_Recv(buf, count, MPI_DOUBLE, source, tag, comm, status);
}
void recv_data(std::complex<double>* buf, int count, int source, int tag, MPI_Comm& comm, MPI_Status* status)
{
MPI_Recv(buf, count, MPI_DOUBLE_COMPLEX, source, tag, comm, status);
}
void recv_data(float* buf, int count, int source, int tag, MPI_Comm& comm, MPI_Status* status)
{
MPI_Recv(buf, count, MPI_FLOAT, source, tag, comm, status);
}
void recv_data(std::complex<float>* buf, int count, int source, int tag, MPI_Comm& comm, MPI_Status* status)
{
MPI_Recv(buf, count, MPI_COMPLEX, source, tag, comm, status);
}
void bcast_data(std::complex<double>* object, const int& n, const MPI_Comm& comm)
{
MPI_Bcast(object, n * 2, MPI_DOUBLE, 0, comm);
}
void bcast_data(std::complex<float>* object, const int& n, const MPI_Comm& comm)
{
MPI_Bcast(object, n * 2, MPI_FLOAT, 0, comm);
}
void bcast_data(double* object, const int& n, const MPI_Comm& comm)
{
MPI_Bcast(object, n, MPI_DOUBLE, 0, comm);
}
void bcast_data(float* object, const int& n, const MPI_Comm& comm)
{
MPI_Bcast(object, n, MPI_FLOAT, 0, comm);
}
void reduce_data(std::complex<double>* object, const int& n, const MPI_Comm& comm)
{
MPI_Allreduce(MPI_IN_PLACE, object, n * 2, MPI_DOUBLE, MPI_SUM, comm);
}
void reduce_data(std::complex<float>* object, const int& n, const MPI_Comm& comm)
{
MPI_Allreduce(MPI_IN_PLACE, object, n * 2, MPI_FLOAT, MPI_SUM, comm);
}
void reduce_data(double* object, const int& n, const MPI_Comm& comm)
{
MPI_Allreduce(MPI_IN_PLACE, object, n, MPI_DOUBLE, MPI_SUM, comm);
}
void reduce_data(float* object, const int& n, const MPI_Comm& comm)
{
MPI_Allreduce(MPI_IN_PLACE, object, n, MPI_FLOAT, MPI_SUM, comm);
}
void gatherv_data(const double* sendbuf, int sendcount, double* recvbuf, const int* recvcounts, const int* displs, MPI_Comm& comm)
{
MPI_Allgatherv(sendbuf, sendcount, MPI_DOUBLE, recvbuf, recvcounts, displs, MPI_DOUBLE, comm);
}
void gatherv_data(const std::complex<double>* sendbuf, int sendcount, std::complex<double>* recvbuf, const int* recvcounts, const int* displs, MPI_Comm& comm)
{
MPI_Allgatherv(sendbuf, sendcount, MPI_DOUBLE_COMPLEX, recvbuf, recvcounts, displs, MPI_DOUBLE_COMPLEX, comm);
}
void gatherv_data(const float* sendbuf, int sendcount, float* recvbuf, const int* recvcounts, const int* displs, MPI_Comm& comm)
{
MPI_Allgatherv(sendbuf, sendcount, MPI_FLOAT, recvbuf, recvcounts, displs, MPI_FLOAT, comm);
}
void gatherv_data(const std::complex<float>* sendbuf, int sendcount, std::complex<float>* recvbuf, const int* recvcounts, const int* displs, MPI_Comm& comm)
{
MPI_Allgatherv(sendbuf, sendcount, MPI_COMPLEX, recvbuf, recvcounts, displs, MPI_COMPLEX, comm);
}
#ifndef __CUDA_MPI
template <typename T>
struct object_cpu_point<T, base_device::DEVICE_GPU>
{
bool alloc = false;
T* get(const T* object, const int& n, T* tmp_space = nullptr)
{
T* object_cpu = nullptr;
alloc = false;
if (tmp_space == nullptr)
{
base_device::memory::resize_memory_op<T, base_device::DEVICE_CPU>()(object_cpu, n);
alloc = true;
}
else
{
object_cpu = tmp_space;
}
base_device::memory::synchronize_memory_op<T, base_device::DEVICE_CPU, base_device::DEVICE_GPU>()(object_cpu,
object,
n);
return object_cpu;
}
void sync_h2d(T* object, const T* object_cpu, const int& n)
{
base_device::memory::synchronize_memory_op<T, base_device::DEVICE_GPU, base_device::DEVICE_CPU>()(object,
object_cpu,
n);
}
void sync_d2h(T* object_cpu, const T* object, const int& n)
{
base_device::memory::synchronize_memory_op<T, base_device::DEVICE_CPU, base_device::DEVICE_GPU>()(object_cpu,
object,
n);
}
void del(T* object_cpu)
{
if (alloc)
{
base_device::memory::delete_memory_op<T, base_device::DEVICE_CPU>()(object_cpu);
}
}
};
template <typename T>
struct object_cpu_point<T, base_device::DEVICE_CPU>
{
bool alloc = false;
T* get(const T* object, const int& n, T* tmp_space = nullptr)
{
return const_cast<T*>(object);
}
void sync_h2d(T* object, const T* object_cpu, const int& n)
{
}
void sync_d2h(T* object_cpu, const T* object, const int& n)
{
}
void del(T* object_cpu)
{
}
};
template struct object_cpu_point<double, base_device::DEVICE_CPU>;
template struct object_cpu_point<double, base_device::DEVICE_GPU>;
template struct object_cpu_point<std::complex<double>, base_device::DEVICE_CPU>;
template struct object_cpu_point<std::complex<double>, base_device::DEVICE_GPU>;
template struct object_cpu_point<float, base_device::DEVICE_CPU>;
template struct object_cpu_point<float, base_device::DEVICE_GPU>;
template struct object_cpu_point<std::complex<float>, base_device::DEVICE_CPU>;
template struct object_cpu_point<std::complex<float>, base_device::DEVICE_GPU>;
#endif
} // namespace Parallel_Common
#endif