diff --git a/lab12/exc4/.$architecture.drawio.dtmp b/lab12/exc4/.$architecture.drawio.dtmp
new file mode 100644
index 0000000..2857145
--- /dev/null
+++ b/lab12/exc4/.$architecture.drawio.dtmp
@@ -0,0 +1,344 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/lab12/exc4/jacobi.cpp b/lab12/exc4/jacobi.cpp
index 8f2cf90..e32aa71 100644
--- a/lab12/exc4/jacobi.cpp
+++ b/lab12/exc4/jacobi.cpp
@@ -92,17 +92,28 @@ void Jacobi::exchangeHaloLayersNodeMPIProcFirst(Matrix &phi)
// Communication with lower partner
if (!isFirstRank())
{
+ SharedmemStates *states = reinterpret_cast(baseptr);
+ double *shm0 = reinterpret_cast(states + 1); // row 0
+ double *shm1 = shm0 + cols; // row 1
+
// communication with second rank on same node via shared memory
// We write our send row to shared memory row 0
- for (int j = 0; j < sendSize; ++j)
- shared_rows_[0 * sendSize + j] = phi(n - 2, j); // our last inner row
+ while (states[0] == SharedmemState::Unread)
+ {MPI_Win_sync(win_);}
+
+ for (int j = 0; j < sendSize; ++j)
+ shm0[j] = phi(n - 2, j); // our last inner row
+ states[0] = SharedmemState::Unread;
MPI_Win_sync(win_); // ensure memory visibility
// Wait for second proc to write its row back to shared memory row 1
- MPI_Win_sync(win_);
+ while (states[1] == SharedmemState::Read)
+ {MPI_Win_sync(win_);}
+
for (int j = 0; j < sendSize; ++j)
- phi(n - 1, j) = shared_rows_[1 * sendSize + j]; // halo from second proc
+ phi(n - 1, j) = shm1[j]; // halo from second proc
+ states[1] = SharedmemState::Read;
}
// Wait for communication to finish
@@ -121,7 +132,28 @@ void Jacobi::exchangeHaloLayersNodeMPIProcSecond(Matrix &phi)
// Communication with upper partner
if (!isLastRank())
{
- // communication with first rank on same node via shared memory
+ SharedmemStates *states = reinterpret_cast(baseptr);
+ double *shm0 = reinterpret_cast(states + 1); // row 0
+ double *shm1 = shm0 + cols; // row 1
+
+ // communication with second rank on same node via shared memory
+ // We write our send row to shared memory row 0
+
+ while (states[1] == SharedmemState::Unread)
+ {MPI_Win_sync(win_);}
+
+ for (int j = 0; j < sendSize; ++j)
+ shm1[j] = phi(1, j); // our last inner row
+ states[1] = SharedmemState::Unread;
+ MPI_Win_sync(win_); // ensure memory visibility
+
+ // Wait for first proc to write its row back to shared memory row 0
+ while (states[0] == SharedmemState::Read)
+ {MPI_Win_sync(win_);}
+
+ for (int j = 0; j < sendSize; ++j)
+ phi(0, j) = shm0[j]; // halo from first proc
+ states[0] = SharedmemState::Read;
}
// Communication with lower partner
@@ -179,6 +211,9 @@ Jacobi::Result Jacobi::run(const Matrix &init, double eps, int maxNumIter)
const int numRows = phi[0].rows();
const int numCols = phi[0].cols();
+ MPI_Aint size = numCols * 2 * sizeof(double) + sizeof(SharedmemStates);
+ MPI_Win_allocate_shared(size, sizeof(char), MPI_INFO_NULL, shm_comm_, &baseptr_, &win_);
+
int nIter = 0;
double dist = std::numeric_limits::max();
diff --git a/lab12/exc4/jacobi.h b/lab12/exc4/jacobi.h
index f3d8479..e7a95d6 100644
--- a/lab12/exc4/jacobi.h
+++ b/lab12/exc4/jacobi.h
@@ -60,8 +60,28 @@ class Jacobi {
// count of MPI procs
int numProc_ = 1;
+
+ void* baseptr_;
+
+ MPI_Win win_;
+
+ SharedmemStates shm_states_ = {
+ {Read, Read}
+ };
};
+enum SharedmemState
+{
+ Unread = 0,
+ Read = 1
+};
+
+struct SharedmemStates
+{
+ SharedmemState shmStates[2]; // Flags: one for each row
+};
+
+
// 4 times horizontal split | with mpi
// 12 times vertical split - with openmp