diff --git a/lab12/exc4/.$architecture.drawio.bkp b/lab12/exc4/.$architecture.drawio.bkp new file mode 100644 index 0000000..673f591 --- /dev/null +++ b/lab12/exc4/.$architecture.drawio.bkp @@ -0,0 +1,299 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/lab12/exc4/Makefile b/lab12/exc4/Makefile index 0b9574d..4ad4826 100644 --- a/lab12/exc4/Makefile +++ b/lab12/exc4/Makefile @@ -12,7 +12,7 @@ debug: $(SOURCE_FILES) mpicxx $(SOURCE_FILES) -g -o $(PROGRAM_NAME) ${COMPILER_FLAGS} run: release - mpirun -np 4 ./$(PROGRAM_NAME) 3072 3e-6 100000 + mpirun -np 1 ./$(PROGRAM_NAME) 3072 3e-6 100000 plot: plot.py python3 plot.py \ No newline at end of file diff --git a/lab12/exc4/architecture.drawio b/lab12/exc4/architecture.drawio new file mode 100644 index 0000000..1ad5982 --- /dev/null +++ b/lab12/exc4/architecture.drawio @@ -0,0 +1,335 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/lab12/exc4/jacobi b/lab12/exc4/jacobi new file mode 100755 index 0000000..76c3f9a Binary files /dev/null and b/lab12/exc4/jacobi differ diff --git a/lab12/exc4/jacobi.cpp b/lab12/exc4/jacobi.cpp index 9170a26..8f2cf90 100644 --- a/lab12/exc4/jacobi.cpp +++ b/lab12/exc4/jacobi.cpp @@ -6,44 +6,73 @@ #include "jacobi.h" #include "matrix_io.h" -Jacobi::Jacobi() { +Jacobi::Jacobi() +{ MPI_Comm_rank(MPI_COMM_WORLD, &rank_); MPI_Comm_size(MPI_COMM_WORLD, &numProc_); + MPI_Comm_split_type(MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &shm_comm_); + MPI_Comm_rank(shm_comm_, &shm_rank_); + is_first_on_node_ = (shm_rank_ == 0); } -bool Jacobi::isFirstRank() const { +bool Jacobi::isFirstRank() const +{ return rank_ == 0; } -bool Jacobi::isLastRank() const { +bool Jacobi::isLastRank() const +{ return rank_ == numProc_ - 1; } -int Jacobi::numRowsWithHalos(int numRowsWithoutHalos) const { - if (isFirstRank() || isLastRank()) { +int Jacobi::numRowsWithHalos(int numRowsWithoutHalos) const +{ + if (isFirstRank() || isLastRank()) + { return numRowsWithoutHalos + 1; - } else { + } + else + { return numRowsWithoutHalos + 2; } } -int Jacobi::numRowsWithoutHalos(int numRowsWithHalos) const { - if (isFirstRank() || isLastRank()) { +int Jacobi::numRowsWithoutHalos(int numRowsWithHalos) const +{ + if (isFirstRank() || isLastRank()) + { return numRowsWithHalos - 1; - } else { + } + else + { return numRowsWithHalos - 2; } } -int Jacobi::lowerOffset() const { +int Jacobi::lowerOffset() const +{ return isFirstRank() ? 0 : 1; } -int Jacobi::upperOffset() const { +int Jacobi::upperOffset() const +{ return isLastRank() ? 0 : 1; } -void Jacobi::exchangeHaloLayers(Matrix& phi) { +void Jacobi::exchangeHaloLayers(Matrix &phi) +{ + if (is_first_on_node_) + { + exchangeHaloLayersNodeMPIProcFirst(phi); + } + else + { + exchangeHaloLayersNodeMPIProcSecond(phi); + } +} + +void Jacobi::exchangeHaloLayersNodeMPIProcFirst(Matrix &phi) +{ int n = phi.rows(); int sendSize = phi.cols(); int tag = 0; @@ -51,8 +80,9 @@ void Jacobi::exchangeHaloLayers(Matrix& phi) { std::vector req; // Communication with upper partner - if (!isLastRank()) { - int upper = rank_ + 1; + if (!isLastRank()) + { + int upper = rank_ - 1; MPI_Isend(&phi(n - 2, 0), sendSize, MPI_DOUBLE, upper, tag, MPI_COMM_WORLD, &req.emplace_back()); MPI_Irecv(&phi(n - 1, 0), sendSize, MPI_DOUBLE, upper, tag, MPI_COMM_WORLD, @@ -60,7 +90,43 @@ void Jacobi::exchangeHaloLayers(Matrix& phi) { } // Communication with lower partner - if (!isFirstRank()) { + if (!isFirstRank()) + { + // communication with second rank on same node via shared memory + // We write our send row to shared memory row 0 + for (int j = 0; j < sendSize; ++j) + shared_rows_[0 * sendSize + j] = phi(n - 2, j); // our last inner row + + MPI_Win_sync(win_); // ensure memory visibility + + // Wait for second proc to write its row back to shared memory row 1 + MPI_Win_sync(win_); + for (int j = 0; j < sendSize; ++j) + phi(n - 1, j) = shared_rows_[1 * sendSize + j]; // halo from second proc + } + + // Wait for communication to finish + std::vector stat(req.size()); + MPI_Waitall(req.size(), req.data(), stat.data()); +} + +void Jacobi::exchangeHaloLayersNodeMPIProcSecond(Matrix &phi) +{ + int n = phi.rows(); + int sendSize = phi.cols(); + int tag = 0; + + std::vector req; + + // Communication with upper partner + if (!isLastRank()) + { + // communication with first rank on same node via shared memory + } + + // Communication with lower partner + if (!isFirstRank()) + { int lower = rank_ - 1; MPI_Isend(&phi(1, 0), sendSize, MPI_DOUBLE, lower, tag, MPI_COMM_WORLD, &req.emplace_back()); @@ -73,35 +139,42 @@ void Jacobi::exchangeHaloLayers(Matrix& phi) { MPI_Waitall(req.size(), req.data(), stat.data()); } -Matrix Jacobi::addHaloLayers(const Matrix& withoutHalos) { +Matrix Jacobi::addHaloLayers(const Matrix &withoutHalos) +{ const int numRows = withoutHalos.rows(); const int numCols = withoutHalos.cols(); Matrix withHalos = Matrix::zeros(numRowsWithHalos(numRows), numCols); - for (int i = 0; i < numRows; ++i) { - for (int j = 0; j < numCols; ++j) { + for (int i = 0; i < numRows; ++i) + { + for (int j = 0; j < numCols; ++j) + { withHalos(i + lowerOffset(), j) = withoutHalos(i, j); } } return withHalos; } -Matrix Jacobi::removeHaloLayers(const Matrix& withHalos) { +Matrix Jacobi::removeHaloLayers(const Matrix &withHalos) +{ const int numRows = numRowsWithoutHalos(withHalos.rows()); const int numCols = withHalos.cols(); Matrix withoutHalos = Matrix::zeros(numRows, numCols); - for (int i = 0; i < numRows; ++i) { - for (int j = 0; j < numCols; ++j) { + for (int i = 0; i < numRows; ++i) + { + for (int j = 0; j < numCols; ++j) + { withoutHalos(i, j) = withHalos(i + lowerOffset(), j); } } return withoutHalos; } -Jacobi::Result Jacobi::run(const Matrix& init, double eps, int maxNumIter) { +Jacobi::Result Jacobi::run(const Matrix &init, double eps, int maxNumIter) +{ std::vector phi(2, addHaloLayers(init)); const int numRows = phi[0].rows(); const int numCols = phi[0].cols(); @@ -109,15 +182,18 @@ Jacobi::Result Jacobi::run(const Matrix& init, double eps, int maxNumIter) { int nIter = 0; double dist = std::numeric_limits::max(); - int t0 = 0; // array index of current timestep - int t1 = 1; // array index of next timestep - while (dist > eps && nIter < maxNumIter) { + int t0 = 0; // array index of current timestep + int t1 = 1; // array index of next timestep + while (dist > eps && nIter < maxNumIter) + { dist = 0; exchangeHaloLayers(phi[t0]); - for (int i = 1; i < numRows - 1; ++i) { - for (int j = 1; j < numCols - 1; ++j) { + for (int i = 1; i < numRows - 1; ++i) + { + for (int j = 1; j < numCols - 1; ++j) + { phi[t1](i, j) = .25 * (phi[t0](i + 1, j) + phi[t0](i - 1, j) + phi[t0](i, j + 1) + phi[t0](i, j - 1)); diff --git a/lab12/exc4/jacobi.h b/lab12/exc4/jacobi.h index e552131..f3d8479 100644 --- a/lab12/exc4/jacobi.h +++ b/lab12/exc4/jacobi.h @@ -31,6 +31,9 @@ class Jacobi { private: void exchangeHaloLayers(Matrix& phi); + void exchangeHaloLayersNodeMPIProcFirst(Matrix& phi); + void exchangeHaloLayersNodeMPIProcSecond(Matrix& phi); + Matrix addHaloLayers(const Matrix& phi); Matrix removeHaloLayers(const Matrix& phi); @@ -43,8 +46,26 @@ class Jacobi { int numRowsWithHalos(int numRowsWithoutHalos) const; int numRowsWithoutHalos(int numRowsWithHalos) const; + // Global rank on cluster int rank_ = MPI_PROC_NULL; + + // Local rank on node + int shm_rank_ = MPI_PROC_NULL; + + // Communicator for local domain of node + MPI_Comm shm_comm_; + + // True if this MPI proc is first on node + bool is_first_on_node_; + + // count of MPI procs int numProc_ = 1; }; +// 4 times horizontal split | with mpi + +// 12 times vertical split - with openmp + + + #endif // JACOBI_H \ No newline at end of file diff --git a/lab12/exc4/jacobi2.cpp b/lab12/exc4/jacobi2.cpp new file mode 100644 index 0000000..b404a08 --- /dev/null +++ b/lab12/exc4/jacobi2.cpp @@ -0,0 +1,169 @@ +#include +#include +#include + +#include "matrix.h" +#include "jacobi.h" +#include "matrix_io.h" + +Jacobi::Jacobi() +{ + MPI_Comm_rank(MPI_COMM_WORLD, &rank_); + MPI_Comm_size(MPI_COMM_WORLD, &numProc_); +} + +bool Jacobi::isFirstRank() const +{ + return rank_ == 0; +} + +bool Jacobi::isLastRank() const +{ + return rank_ == numProc_ - 1; +} + +int Jacobi::numRowsWithHalos(int numRowsWithoutHalos) const +{ + if (isFirstRank() || isLastRank()) + { + return numRowsWithoutHalos + 1; + } + else + { + return numRowsWithoutHalos + 2; + } +} + +int Jacobi::numRowsWithoutHalos(int numRowsWithHalos) const +{ + if (isFirstRank() || isLastRank()) + { + return numRowsWithHalos - 1; + } + else + { + return numRowsWithHalos - 2; + } +} + +int Jacobi::lowerOffset() const +{ + return isFirstRank() ? 0 : 1; +} + +int Jacobi::upperOffset() const +{ + return isLastRank() ? 0 : 1; +} + +void Jacobi::exchangeHaloLayers(Matrix &phi) +{ + int n = phi.rows(); + int sendSize = phi.cols(); + int tag = 0; + + std::vector req; + + // Communication with upper partner + if (!isLastRank()) + { + int upper = rank_ + 1; + MPI_Isend(&phi(n - 2, 0), sendSize, MPI_DOUBLE, upper, tag, MPI_COMM_WORLD, + &req.emplace_back()); + MPI_Irecv(&phi(n - 1, 0), sendSize, MPI_DOUBLE, upper, tag, MPI_COMM_WORLD, + &req.emplace_back()); + } + + // Communication with lower partner + if (!isFirstRank()) + { + int lower = rank_ - 1; + MPI_Isend(&phi(1, 0), sendSize, MPI_DOUBLE, lower, tag, MPI_COMM_WORLD, + &req.emplace_back()); + MPI_Irecv(&phi(0, 0), sendSize, MPI_DOUBLE, lower, tag, MPI_COMM_WORLD, + &req.emplace_back()); + } + + // Wait for communication to finish + std::vector stat(req.size()); + MPI_Waitall(req.size(), req.data(), stat.data()); +} + +Matrix Jacobi::addHaloLayers(const Matrix &withoutHalos) +{ + const int numRows = withoutHalos.rows(); + const int numCols = withoutHalos.cols(); + + Matrix withHalos = Matrix::zeros(numRowsWithHalos(numRows), numCols); + + for (int i = 0; i < numRows; ++i) + { + for (int j = 0; j < numCols; ++j) + { + withHalos(i + lowerOffset(), j) = withoutHalos(i, j); + } + } + return withHalos; +} + +Matrix Jacobi::removeHaloLayers(const Matrix &withHalos) +{ + const int numRows = numRowsWithoutHalos(withHalos.rows()); + const int numCols = withHalos.cols(); + + Matrix withoutHalos = Matrix::zeros(numRows, numCols); + + for (int i = 0; i < numRows; ++i) + { + for (int j = 0; j < numCols; ++j) + { + withoutHalos(i, j) = withHalos(i + lowerOffset(), j); + } + } + return withoutHalos; +} + +Jacobi::Result Jacobi::run(const Matrix &init, double eps, int maxNumIter) +{ + std::vector phi(2, addHaloLayers(init)); + const int numRows = phi[0].rows(); + const int numCols = phi[0].cols(); + + int nIter = 0; + double dist = std::numeric_limits::max(); + + int t0 = 0; // array index of current timestep + int t1 = 1; // array index of next timestep + +#pragma omp parallel num_threads(6) firstprivate(nIter, t0, t1) + while (dist > eps && nIter < maxNumIter) + { + dist = 0; + +#pragma omp single + exchangeHaloLayers(phi[t0]); + + for (int i = 1; i < numRows - 1; ++i) + { + +#pragma omp for schedule(static, numCols / 12) reduction(max : dist) + for (int j = 1; j < numCols - 1; ++j) + { + phi[t1](i, j) = .25 * (phi[t0](i + 1, j) + phi[t0](i - 1, j) + + phi[t0](i, j + 1) + phi[t0](i, j - 1)); + + const double diff = phi[t1](i, j) - phi[t0](i, j); + dist = std::max(dist, std::abs(diff)); + } + } + +#pragma omp barrier +#pragma omp single + MPI_Allreduce(MPI_IN_PLACE, &dist, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); + + nIter++; + std::swap(t0, t1); + } + + return {removeHaloLayers(phi[t0]), dist, nIter}; +} diff --git a/lab12/exc4/run.sh b/lab12/exc4/run.sh index 0d1a3eb..04b41a1 100644 --- a/lab12/exc4/run.sh +++ b/lab12/exc4/run.sh @@ -2,8 +2,12 @@ #SBATCH --job-name=jacobi #SBATCH --output=jacobi.out #SBATCH --error=jacobi.err -#SBATCH --ntasks=48 -#SBATCH --cpus-per-task=1 +#SBATCH --ntasks=8 # 8 MPI processes (1 per socket) +#SBATCH --cpus-per-task=6 # 6 threads per MPI process #SBATCH --time=01:00:00 -srun --mpi=pmix ./jacobi \ No newline at end of file +export OMP_NUM_THREADS=6 +export OMP_PROC_BIND=true +export OMP_PLACES=cores + +srun --mpi=pmix ./jacobi