forked from abacusmodeling/abacus-develop
-
Notifications
You must be signed in to change notification settings - Fork 221
Expand file tree
/
Copy pathparallel_2d.cpp
More file actions
144 lines (125 loc) · 3.85 KB
/
parallel_2d.cpp
File metadata and controls
144 lines (125 loc) · 3.85 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
#include "parallel_2d.h"
#include "source_base/module_external/scalapack_connector.h"
#include <cassert>
#include <numeric>
bool Parallel_2D::in_this_processor(const int iw1_all, const int iw2_all) const
{
return global2local_row(iw1_all) != -1 && global2local_col(iw2_all) != -1;
}
int Parallel_2D::get_global_row_size() const
{
if (!is_serial)
{
#ifdef __MPI
return desc[2];
#endif
}
return nrow;
}
int Parallel_2D::get_global_col_size() const
{
if (!is_serial)
{
#ifdef __MPI
return desc[3];
#endif
}
return ncol;
}
#ifdef __MPI
MPI_Comm Parallel_2D::comm() const
{
// it is an error to call blacs_get with an invalid BLACS context
if (blacs_ctxt < 0)
{
return MPI_COMM_NULL;
}
int sys_ctxt = 0;
Cblacs_get(blacs_ctxt, 10, &sys_ctxt);
// blacs_get with "what" = 10 takes a BLACS context and returns the index
// of the associated system context (MPI communicator) that can be used by
// blacs2sys_handle to get the MPI communicator.
return Cblacs2sys_handle(sys_ctxt);
}
void Parallel_2D::_init_proc_grid(const MPI_Comm comm, const bool mode)
{
// determine the number of rows and columns of the process grid
// by factorizing n = p * q such that p, q are closest and p <= q
int num_proc = 0;
MPI_Comm_size(comm, &num_proc);
dim0 = static_cast<int>(std::sqrt(num_proc + 0.5));
while (dim1 = num_proc / dim0, dim0 * dim1 != num_proc)
{
--dim0;
}
if (mode)
{
std::swap(dim0, dim1);
}
// initialize the BLACS grid accordingly
blacs_ctxt = Csys2blacs_handle(comm);
char order = 'R'; // row-major
Cblacs_gridinit(&blacs_ctxt, &order, dim0, dim1);
Cblacs_gridinfo(blacs_ctxt, &dim0, &dim1, &coord[0], &coord[1]);
}
void Parallel_2D::_set_dist_info(const int mg, const int ng, const int nb)
{
this->nb = nb;
// number of local rows and columns
const int zero = 0;
nrow = numroc_(&mg, &nb, &coord[0], &zero, &dim0);
ncol = numroc_(&ng, &nb, &coord[1], &zero, &dim1);
nloc = static_cast<int64_t>(nrow) * ncol;
// initialize the ScaLAPACK descriptor
int info = 0, lld = std::max(nrow, 1);
descinit_(desc, &mg, &ng, &nb, &nb, &zero, &zero, &blacs_ctxt, &lld, &info);
// generate the global-to-local and local-to-global index maps
local2global_row_.resize(nrow);
global2local_row_ = std::vector<int>(mg, -1);
for (int i = 0; i < nrow; ++i)
{
local2global_row_[i] = (i / nb * dim0 + coord[0]) * nb + i % nb;
global2local_row_[local2global_row_[i]] = i;
}
local2global_col_.resize(ncol);
global2local_col_ = std::vector<int>(ng, -1);
for (int j = 0; j < ncol; ++j)
{
local2global_col_[j] = (j / nb * dim1 + coord[1]) * nb + j % nb;
global2local_col_[local2global_col_[j]] = j;
}
}
int Parallel_2D::init(const int mg, const int ng, const int nb, const MPI_Comm comm, const bool mode)
{
_init_proc_grid(comm, mode);
_set_dist_info(mg, ng, nb);
return nrow == 0 || ncol == 0;
}
int Parallel_2D::set(const int mg, const int ng, const int nb, const int blacs_ctxt)
{
this->blacs_ctxt = blacs_ctxt;
Cblacs_gridinfo(blacs_ctxt, &dim0, &dim1, &coord[0], &coord[1]);
_set_dist_info(mg, ng, nb);
return nrow == 0 || ncol == 0;
}
#endif
void Parallel_2D::set_serial(const int mg, const int ng)
{
assert(mg > 0 && ng > 0);
nb = 1;
dim0 = dim1 = 1;
coord[0] = coord[1] = 0;
nrow = mg;
ncol = ng;
nloc = static_cast<int64_t>(nrow) * ncol;
local2global_row_.resize(nrow);
local2global_col_.resize(ncol);
std::iota(local2global_row_.begin(), local2global_row_.end(), 0);
std::iota(local2global_col_.begin(), local2global_col_.end(), 0);
global2local_row_ = local2global_row_;
global2local_col_ = local2global_col_;
is_serial = true;
#ifdef __MPI
blacs_ctxt = -1;
#endif
}