diff --git a/lab12/exc4/.$architecture.drawio.bkp b/lab12/exc4/.$architecture.drawio.bkp
new file mode 100644
index 0000000..673f591
--- /dev/null
+++ b/lab12/exc4/.$architecture.drawio.bkp
@@ -0,0 +1,299 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/lab12/exc4/Makefile b/lab12/exc4/Makefile
index 0b9574d..4ad4826 100644
--- a/lab12/exc4/Makefile
+++ b/lab12/exc4/Makefile
@@ -12,7 +12,7 @@ debug: $(SOURCE_FILES)
mpicxx $(SOURCE_FILES) -g -o $(PROGRAM_NAME) ${COMPILER_FLAGS}
run: release
- mpirun -np 4 ./$(PROGRAM_NAME) 3072 3e-6 100000
+ mpirun -np 1 ./$(PROGRAM_NAME) 3072 3e-6 100000
plot: plot.py
python3 plot.py
\ No newline at end of file
diff --git a/lab12/exc4/architecture.drawio b/lab12/exc4/architecture.drawio
new file mode 100644
index 0000000..1ad5982
--- /dev/null
+++ b/lab12/exc4/architecture.drawio
@@ -0,0 +1,335 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/lab12/exc4/jacobi b/lab12/exc4/jacobi
new file mode 100755
index 0000000..76c3f9a
Binary files /dev/null and b/lab12/exc4/jacobi differ
diff --git a/lab12/exc4/jacobi.cpp b/lab12/exc4/jacobi.cpp
index 9170a26..8f2cf90 100644
--- a/lab12/exc4/jacobi.cpp
+++ b/lab12/exc4/jacobi.cpp
@@ -6,44 +6,73 @@
#include "jacobi.h"
#include "matrix_io.h"
-Jacobi::Jacobi() {
+Jacobi::Jacobi()
+{
MPI_Comm_rank(MPI_COMM_WORLD, &rank_);
MPI_Comm_size(MPI_COMM_WORLD, &numProc_);
+ MPI_Comm_split_type(MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &shm_comm_);
+ MPI_Comm_rank(shm_comm_, &shm_rank_);
+ is_first_on_node_ = (shm_rank_ == 0);
}
-bool Jacobi::isFirstRank() const {
+bool Jacobi::isFirstRank() const
+{
return rank_ == 0;
}
-bool Jacobi::isLastRank() const {
+bool Jacobi::isLastRank() const
+{
return rank_ == numProc_ - 1;
}
-int Jacobi::numRowsWithHalos(int numRowsWithoutHalos) const {
- if (isFirstRank() || isLastRank()) {
+int Jacobi::numRowsWithHalos(int numRowsWithoutHalos) const
+{
+ if (isFirstRank() || isLastRank())
+ {
return numRowsWithoutHalos + 1;
- } else {
+ }
+ else
+ {
return numRowsWithoutHalos + 2;
}
}
-int Jacobi::numRowsWithoutHalos(int numRowsWithHalos) const {
- if (isFirstRank() || isLastRank()) {
+int Jacobi::numRowsWithoutHalos(int numRowsWithHalos) const
+{
+ if (isFirstRank() || isLastRank())
+ {
return numRowsWithHalos - 1;
- } else {
+ }
+ else
+ {
return numRowsWithHalos - 2;
}
}
-int Jacobi::lowerOffset() const {
+int Jacobi::lowerOffset() const
+{
return isFirstRank() ? 0 : 1;
}
-int Jacobi::upperOffset() const {
+int Jacobi::upperOffset() const
+{
return isLastRank() ? 0 : 1;
}
-void Jacobi::exchangeHaloLayers(Matrix& phi) {
+void Jacobi::exchangeHaloLayers(Matrix &phi)
+{
+ if (is_first_on_node_)
+ {
+ exchangeHaloLayersNodeMPIProcFirst(phi);
+ }
+ else
+ {
+ exchangeHaloLayersNodeMPIProcSecond(phi);
+ }
+}
+
+void Jacobi::exchangeHaloLayersNodeMPIProcFirst(Matrix &phi)
+{
int n = phi.rows();
int sendSize = phi.cols();
int tag = 0;
@@ -51,8 +80,9 @@ void Jacobi::exchangeHaloLayers(Matrix& phi) {
std::vector req;
// Communication with upper partner
- if (!isLastRank()) {
- int upper = rank_ + 1;
+ if (!isLastRank())
+ {
+ int upper = rank_ - 1;
MPI_Isend(&phi(n - 2, 0), sendSize, MPI_DOUBLE, upper, tag, MPI_COMM_WORLD,
&req.emplace_back());
MPI_Irecv(&phi(n - 1, 0), sendSize, MPI_DOUBLE, upper, tag, MPI_COMM_WORLD,
@@ -60,7 +90,43 @@ void Jacobi::exchangeHaloLayers(Matrix& phi) {
}
// Communication with lower partner
- if (!isFirstRank()) {
+ if (!isFirstRank())
+ {
+ // communication with second rank on same node via shared memory
+ // We write our send row to shared memory row 0
+ for (int j = 0; j < sendSize; ++j)
+ shared_rows_[0 * sendSize + j] = phi(n - 2, j); // our last inner row
+
+ MPI_Win_sync(win_); // ensure memory visibility
+
+ // Wait for second proc to write its row back to shared memory row 1
+ MPI_Win_sync(win_);
+ for (int j = 0; j < sendSize; ++j)
+ phi(n - 1, j) = shared_rows_[1 * sendSize + j]; // halo from second proc
+ }
+
+ // Wait for communication to finish
+ std::vector stat(req.size());
+ MPI_Waitall(req.size(), req.data(), stat.data());
+}
+
+void Jacobi::exchangeHaloLayersNodeMPIProcSecond(Matrix &phi)
+{
+ int n = phi.rows();
+ int sendSize = phi.cols();
+ int tag = 0;
+
+ std::vector req;
+
+ // Communication with upper partner
+ if (!isLastRank())
+ {
+ // communication with first rank on same node via shared memory
+ }
+
+ // Communication with lower partner
+ if (!isFirstRank())
+ {
int lower = rank_ - 1;
MPI_Isend(&phi(1, 0), sendSize, MPI_DOUBLE, lower, tag, MPI_COMM_WORLD,
&req.emplace_back());
@@ -73,35 +139,42 @@ void Jacobi::exchangeHaloLayers(Matrix& phi) {
MPI_Waitall(req.size(), req.data(), stat.data());
}
-Matrix Jacobi::addHaloLayers(const Matrix& withoutHalos) {
+Matrix Jacobi::addHaloLayers(const Matrix &withoutHalos)
+{
const int numRows = withoutHalos.rows();
const int numCols = withoutHalos.cols();
Matrix withHalos = Matrix::zeros(numRowsWithHalos(numRows), numCols);
- for (int i = 0; i < numRows; ++i) {
- for (int j = 0; j < numCols; ++j) {
+ for (int i = 0; i < numRows; ++i)
+ {
+ for (int j = 0; j < numCols; ++j)
+ {
withHalos(i + lowerOffset(), j) = withoutHalos(i, j);
}
}
return withHalos;
}
-Matrix Jacobi::removeHaloLayers(const Matrix& withHalos) {
+Matrix Jacobi::removeHaloLayers(const Matrix &withHalos)
+{
const int numRows = numRowsWithoutHalos(withHalos.rows());
const int numCols = withHalos.cols();
Matrix withoutHalos = Matrix::zeros(numRows, numCols);
- for (int i = 0; i < numRows; ++i) {
- for (int j = 0; j < numCols; ++j) {
+ for (int i = 0; i < numRows; ++i)
+ {
+ for (int j = 0; j < numCols; ++j)
+ {
withoutHalos(i, j) = withHalos(i + lowerOffset(), j);
}
}
return withoutHalos;
}
-Jacobi::Result Jacobi::run(const Matrix& init, double eps, int maxNumIter) {
+Jacobi::Result Jacobi::run(const Matrix &init, double eps, int maxNumIter)
+{
std::vector phi(2, addHaloLayers(init));
const int numRows = phi[0].rows();
const int numCols = phi[0].cols();
@@ -109,15 +182,18 @@ Jacobi::Result Jacobi::run(const Matrix& init, double eps, int maxNumIter) {
int nIter = 0;
double dist = std::numeric_limits::max();
- int t0 = 0; // array index of current timestep
- int t1 = 1; // array index of next timestep
- while (dist > eps && nIter < maxNumIter) {
+ int t0 = 0; // array index of current timestep
+ int t1 = 1; // array index of next timestep
+ while (dist > eps && nIter < maxNumIter)
+ {
dist = 0;
exchangeHaloLayers(phi[t0]);
- for (int i = 1; i < numRows - 1; ++i) {
- for (int j = 1; j < numCols - 1; ++j) {
+ for (int i = 1; i < numRows - 1; ++i)
+ {
+ for (int j = 1; j < numCols - 1; ++j)
+ {
phi[t1](i, j) = .25 * (phi[t0](i + 1, j) + phi[t0](i - 1, j) +
phi[t0](i, j + 1) + phi[t0](i, j - 1));
diff --git a/lab12/exc4/jacobi.h b/lab12/exc4/jacobi.h
index e552131..f3d8479 100644
--- a/lab12/exc4/jacobi.h
+++ b/lab12/exc4/jacobi.h
@@ -31,6 +31,9 @@ class Jacobi {
private:
void exchangeHaloLayers(Matrix& phi);
+ void exchangeHaloLayersNodeMPIProcFirst(Matrix& phi);
+ void exchangeHaloLayersNodeMPIProcSecond(Matrix& phi);
+
Matrix addHaloLayers(const Matrix& phi);
Matrix removeHaloLayers(const Matrix& phi);
@@ -43,8 +46,26 @@ class Jacobi {
int numRowsWithHalos(int numRowsWithoutHalos) const;
int numRowsWithoutHalos(int numRowsWithHalos) const;
+ // Global rank on cluster
int rank_ = MPI_PROC_NULL;
+
+ // Local rank on node
+ int shm_rank_ = MPI_PROC_NULL;
+
+ // Communicator for local domain of node
+ MPI_Comm shm_comm_;
+
+ // True if this MPI proc is first on node
+ bool is_first_on_node_;
+
+ // count of MPI procs
int numProc_ = 1;
};
+// 4 times horizontal split | with mpi
+
+// 12 times vertical split - with openmp
+
+
+
#endif // JACOBI_H
\ No newline at end of file
diff --git a/lab12/exc4/jacobi2.cpp b/lab12/exc4/jacobi2.cpp
new file mode 100644
index 0000000..b404a08
--- /dev/null
+++ b/lab12/exc4/jacobi2.cpp
@@ -0,0 +1,169 @@
+#include
+#include
+#include
+
+#include "matrix.h"
+#include "jacobi.h"
+#include "matrix_io.h"
+
+Jacobi::Jacobi()
+{
+ MPI_Comm_rank(MPI_COMM_WORLD, &rank_);
+ MPI_Comm_size(MPI_COMM_WORLD, &numProc_);
+}
+
+bool Jacobi::isFirstRank() const
+{
+ return rank_ == 0;
+}
+
+bool Jacobi::isLastRank() const
+{
+ return rank_ == numProc_ - 1;
+}
+
+int Jacobi::numRowsWithHalos(int numRowsWithoutHalos) const
+{
+ if (isFirstRank() || isLastRank())
+ {
+ return numRowsWithoutHalos + 1;
+ }
+ else
+ {
+ return numRowsWithoutHalos + 2;
+ }
+}
+
+int Jacobi::numRowsWithoutHalos(int numRowsWithHalos) const
+{
+ if (isFirstRank() || isLastRank())
+ {
+ return numRowsWithHalos - 1;
+ }
+ else
+ {
+ return numRowsWithHalos - 2;
+ }
+}
+
+int Jacobi::lowerOffset() const
+{
+ return isFirstRank() ? 0 : 1;
+}
+
+int Jacobi::upperOffset() const
+{
+ return isLastRank() ? 0 : 1;
+}
+
+void Jacobi::exchangeHaloLayers(Matrix &phi)
+{
+ int n = phi.rows();
+ int sendSize = phi.cols();
+ int tag = 0;
+
+ std::vector req;
+
+ // Communication with upper partner
+ if (!isLastRank())
+ {
+ int upper = rank_ + 1;
+ MPI_Isend(&phi(n - 2, 0), sendSize, MPI_DOUBLE, upper, tag, MPI_COMM_WORLD,
+ &req.emplace_back());
+ MPI_Irecv(&phi(n - 1, 0), sendSize, MPI_DOUBLE, upper, tag, MPI_COMM_WORLD,
+ &req.emplace_back());
+ }
+
+ // Communication with lower partner
+ if (!isFirstRank())
+ {
+ int lower = rank_ - 1;
+ MPI_Isend(&phi(1, 0), sendSize, MPI_DOUBLE, lower, tag, MPI_COMM_WORLD,
+ &req.emplace_back());
+ MPI_Irecv(&phi(0, 0), sendSize, MPI_DOUBLE, lower, tag, MPI_COMM_WORLD,
+ &req.emplace_back());
+ }
+
+ // Wait for communication to finish
+ std::vector stat(req.size());
+ MPI_Waitall(req.size(), req.data(), stat.data());
+}
+
+Matrix Jacobi::addHaloLayers(const Matrix &withoutHalos)
+{
+ const int numRows = withoutHalos.rows();
+ const int numCols = withoutHalos.cols();
+
+ Matrix withHalos = Matrix::zeros(numRowsWithHalos(numRows), numCols);
+
+ for (int i = 0; i < numRows; ++i)
+ {
+ for (int j = 0; j < numCols; ++j)
+ {
+ withHalos(i + lowerOffset(), j) = withoutHalos(i, j);
+ }
+ }
+ return withHalos;
+}
+
+Matrix Jacobi::removeHaloLayers(const Matrix &withHalos)
+{
+ const int numRows = numRowsWithoutHalos(withHalos.rows());
+ const int numCols = withHalos.cols();
+
+ Matrix withoutHalos = Matrix::zeros(numRows, numCols);
+
+ for (int i = 0; i < numRows; ++i)
+ {
+ for (int j = 0; j < numCols; ++j)
+ {
+ withoutHalos(i, j) = withHalos(i + lowerOffset(), j);
+ }
+ }
+ return withoutHalos;
+}
+
+Jacobi::Result Jacobi::run(const Matrix &init, double eps, int maxNumIter)
+{
+ std::vector phi(2, addHaloLayers(init));
+ const int numRows = phi[0].rows();
+ const int numCols = phi[0].cols();
+
+ int nIter = 0;
+ double dist = std::numeric_limits::max();
+
+ int t0 = 0; // array index of current timestep
+ int t1 = 1; // array index of next timestep
+
+#pragma omp parallel num_threads(6) firstprivate(nIter, t0, t1)
+ while (dist > eps && nIter < maxNumIter)
+ {
+ dist = 0;
+
+#pragma omp single
+ exchangeHaloLayers(phi[t0]);
+
+ for (int i = 1; i < numRows - 1; ++i)
+ {
+
+#pragma omp for schedule(static, numCols / 12) reduction(max : dist)
+ for (int j = 1; j < numCols - 1; ++j)
+ {
+ phi[t1](i, j) = .25 * (phi[t0](i + 1, j) + phi[t0](i - 1, j) +
+ phi[t0](i, j + 1) + phi[t0](i, j - 1));
+
+ const double diff = phi[t1](i, j) - phi[t0](i, j);
+ dist = std::max(dist, std::abs(diff));
+ }
+ }
+
+#pragma omp barrier
+#pragma omp single
+ MPI_Allreduce(MPI_IN_PLACE, &dist, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
+
+ nIter++;
+ std::swap(t0, t1);
+ }
+
+ return {removeHaloLayers(phi[t0]), dist, nIter};
+}
diff --git a/lab12/exc4/run.sh b/lab12/exc4/run.sh
index 0d1a3eb..04b41a1 100644
--- a/lab12/exc4/run.sh
+++ b/lab12/exc4/run.sh
@@ -2,8 +2,12 @@
#SBATCH --job-name=jacobi
#SBATCH --output=jacobi.out
#SBATCH --error=jacobi.err
-#SBATCH --ntasks=48
-#SBATCH --cpus-per-task=1
+#SBATCH --ntasks=8 # 8 MPI processes (1 per socket)
+#SBATCH --cpus-per-task=6 # 6 threads per MPI process
#SBATCH --time=01:00:00
-srun --mpi=pmix ./jacobi
\ No newline at end of file
+export OMP_NUM_THREADS=6
+export OMP_PROC_BIND=true
+export OMP_PLACES=cores
+
+srun --mpi=pmix ./jacobi