diff --git a/lab12/exc4/Makefile b/lab12/exc4/Makefile
new file mode 100644
index 0000000..0b9574d
--- /dev/null
+++ b/lab12/exc4/Makefile
@@ -0,0 +1,18 @@
+PROGRAM_NAME = jacobi
+SOURCE_FILES = main.cpp jacobi.cpp matrix_io.cpp
+
+COMPILER_FLAGS = -std=c++17 -Wall -Wextra -Wall -Werror -Wno-sign-compare -Wno-unused-result -Wno-cast-function-type
+
+default: debug
+
+release: $(SOURCE_FILES)
+	mpicxx $(SOURCE_FILES) -O3 -o $(PROGRAM_NAME) ${COMPILER_FLAGS}
+
+debug: $(SOURCE_FILES)
+	mpicxx $(SOURCE_FILES) -g -o $(PROGRAM_NAME) ${COMPILER_FLAGS}
+
+run: release
+	mpirun -np 4 ./$(PROGRAM_NAME) 3072 3e-6 100000
+
+plot: plot.py
+	python3 plot.py
\ No newline at end of file
diff --git a/lab12/exc4/jacobi.cpp b/lab12/exc4/jacobi.cpp
new file mode 100644
index 0000000..9170a26
--- /dev/null
+++ b/lab12/exc4/jacobi.cpp
@@ -0,0 +1,136 @@
+#include <cmath>
+#include <mpi.h>
+#include <cassert>
+
+#include "matrix.h"
+#include "jacobi.h"
+#include "matrix_io.h"
+
+Jacobi::Jacobi() {
+  MPI_Comm_rank(MPI_COMM_WORLD, &rank_);
+  MPI_Comm_size(MPI_COMM_WORLD, &numProc_);
+}
+
+bool Jacobi::isFirstRank() const {
+  return rank_ == 0;
+}
+
+bool Jacobi::isLastRank() const {
+  return rank_ == numProc_ - 1;
+}
+
+int Jacobi::numRowsWithHalos(int numRowsWithoutHalos) const {
+  if (isFirstRank() || isLastRank()) {
+    return numRowsWithoutHalos + 1;
+  } else {
+    return numRowsWithoutHalos + 2;
+  }
+}
+
+int Jacobi::numRowsWithoutHalos(int numRowsWithHalos) const {
+  if (isFirstRank() || isLastRank()) {
+    return numRowsWithHalos - 1;
+  } else {
+    return numRowsWithHalos - 2;
+  }
+}
+
+int Jacobi::lowerOffset() const {
+  return isFirstRank() ? 0 : 1;
+}
+
+int Jacobi::upperOffset() const {
+  return isLastRank() ? 0 : 1;
+}
+
+void Jacobi::exchangeHaloLayers(Matrix& phi) {
+  int n = phi.rows();
+  int sendSize = phi.cols();
+  int tag = 0;
+
+  std::vector<MPI_Request> req;
+
+  // Communication with upper partner
+  if (!isLastRank()) {
+    int upper = rank_ + 1;
+    MPI_Isend(&phi(n - 2, 0), sendSize, MPI_DOUBLE, upper, tag, MPI_COMM_WORLD,
+              &req.emplace_back());
+    MPI_Irecv(&phi(n - 1, 0), sendSize, MPI_DOUBLE, upper, tag, MPI_COMM_WORLD,
+              &req.emplace_back());
+  }
+
+  // Communication with lower partner
+  if (!isFirstRank()) {
+    int lower = rank_ - 1;
+    MPI_Isend(&phi(1, 0), sendSize, MPI_DOUBLE, lower, tag, MPI_COMM_WORLD,
+              &req.emplace_back());
+    MPI_Irecv(&phi(0, 0), sendSize, MPI_DOUBLE, lower, tag, MPI_COMM_WORLD,
+              &req.emplace_back());
+  }
+
+  // Wait for communication to finish
+  std::vector<MPI_Status> stat(req.size());
+  MPI_Waitall(req.size(), req.data(), stat.data());
+}
+
+Matrix Jacobi::addHaloLayers(const Matrix& withoutHalos) {
+  const int numRows = withoutHalos.rows();
+  const int numCols = withoutHalos.cols();
+
+  Matrix withHalos = Matrix::zeros(numRowsWithHalos(numRows), numCols);
+
+  for (int i = 0; i < numRows; ++i) {
+    for (int j = 0; j < numCols; ++j) {
+      withHalos(i + lowerOffset(), j) = withoutHalos(i, j);
+    }
+  }
+  return withHalos;
+}
+
+Matrix Jacobi::removeHaloLayers(const Matrix& withHalos) {
+  const int numRows = numRowsWithoutHalos(withHalos.rows());
+  const int numCols = withHalos.cols();
+
+  Matrix withoutHalos = Matrix::zeros(numRows, numCols);
+
+  for (int i = 0; i < numRows; ++i) {
+    for (int j = 0; j < numCols; ++j) {
+      withoutHalos(i, j) = withHalos(i + lowerOffset(), j);
+    }
+  }
+  return withoutHalos;
+}
+
+Jacobi::Result Jacobi::run(const Matrix& init, double eps, int maxNumIter) {
+  std::vector<Matrix> phi(2, addHaloLayers(init));
+  const int numRows = phi[0].rows();
+  const int numCols = phi[0].cols();
+
+  int nIter = 0;
+  double dist = std::numeric_limits<double>::max();
+
+  int t0 = 0;  // array index of current timestep
+  int t1 = 1;  // array index of next timestep
+  while (dist > eps && nIter < maxNumIter) {
+    dist = 0;
+
+    exchangeHaloLayers(phi[t0]);
+
+    for (int i = 1; i < numRows - 1; ++i) {
+      for (int j = 1; j < numCols - 1; ++j) {
+        phi[t1](i, j) = .25 * (phi[t0](i + 1, j) + phi[t0](i - 1, j) +
+                               phi[t0](i, j + 1) + phi[t0](i, j - 1));
+
+        const double diff = phi[t1](i, j) - phi[t0](i, j);
+        dist = std::max(dist, std::abs(diff));
+      }
+    }
+
+    MPI_Allreduce(MPI_IN_PLACE, &dist, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
+
+    nIter++;
+    std::swap(t0, t1);
+  }
+
+  return {removeHaloLayers(phi[t0]), dist, nIter};
+}
diff --git a/lab12/exc4/jacobi.h b/lab12/exc4/jacobi.h
new file mode 100644
index 0000000..e552131
--- /dev/null
+++ b/lab12/exc4/jacobi.h
@@ -0,0 +1,50 @@
+#ifndef JACOBI_H
+#define JACOBI_H
+
+#include "matrix.h"
+
+/**
+ * Base class for Jacobi algorithms.
+ * Defines a struct result which is returned by the algorithm.
+ */
+class Jacobi {
+ public:
+  /**
+   * Result of the computation
+   */
+  struct Result {
+    Matrix phi = Matrix::zeros(0, 0);  // The converged solution matrix
+    double finalDist = 0;              // The final distance
+    int numIter = 0;                   // The number of iterations made
+  };
+
+  Jacobi();
+
+  /**
+   * Run the Jacobi algorithm on the initial matrix init.
+   * The algorithm terminates if either the maximum number of iterations
+   * exceeds maxNumIter or if the difference between two steps is smaller
+   * than eps.
+   */
+  Result run(const Matrix& init, double eps, int maxNumIter);
+
+ private:
+  void exchangeHaloLayers(Matrix& phi);
+
+  Matrix addHaloLayers(const Matrix& phi);
+  Matrix removeHaloLayers(const Matrix& phi);
+
+  bool isFirstRank() const;
+  bool isLastRank() const;
+
+  int lowerOffset() const;
+  int upperOffset() const;
+
+  int numRowsWithHalos(int numRowsWithoutHalos) const;
+  int numRowsWithoutHalos(int numRowsWithHalos) const;
+
+  int rank_ = MPI_PROC_NULL;
+  int numProc_ = 1;
+};
+
+#endif  // JACOBI_H
\ No newline at end of file
diff --git a/lab12/exc4/main.cpp b/lab12/exc4/main.cpp
new file mode 100644
index 0000000..e06150a
--- /dev/null
+++ b/lab12/exc4/main.cpp
@@ -0,0 +1,94 @@
+#include <iostream>
+#include <stdexcept>
+#include <chrono>
+#include <mpi.h>
+#include "jacobi.h"
+#include "matrix.h"
+#include "matrix_io.h"
+
+
+Matrix getInitialCondition(int n, int rank, int numProc) {
+  if (n % numProc != 0) {
+    throw std::runtime_error(
+        "Matrix dimension not divisible by number of processes.");
+  }
+
+  const int numRowsLocal = n / numProc;
+  const int numRowsTotal = n;
+  const int numCols = n;
+
+  // const int numCols = n;
+  const int i0 = rank * numRowsLocal;
+  Matrix m = Matrix::zeros(numRowsLocal, numCols);
+  for (int i = 0; i < numRowsLocal; ++i) {
+    const double x = 1. * (i + i0) / numRowsTotal;
+    for (int j = 0; j < numCols; ++j) {
+      const double y = 1. * j / numCols;
+
+      // The lower left corner is hot
+      m(i, j) += exp(-0.5 * (x * x + y * y) * 10) * exp(-100 * x * y);
+
+      // The right side is cold
+      m(i, j) += -1.0 * exp(-20 * (1 - y));
+    }
+  }
+
+  return m;
+}
+
+Matrix getDistributedInitialCondition(int n) {
+  int rank, numProc;
+  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+  MPI_Comm_size(MPI_COMM_WORLD, &numProc);
+
+  return getInitialCondition(n, rank, numProc);
+}
+
+Matrix getCompleteInitialCondition(int n) {
+  return getInitialCondition(n, 0, 1);
+}
+
+double benchmark(const Matrix& init,
+                 double eps,
+                 int maxNumIter,
+                 int numBenchmarkRuns) {
+  double err = 0.0;
+  int numIter = 0;
+
+  auto start = std::chrono::system_clock::now();
+  for (int i = 0; i < numBenchmarkRuns; ++i) {
+    auto result = Jacobi().run(init, eps, maxNumIter);
+    err = result.finalDist;
+    numIter = result.numIter;
+  }
+  auto end = std::chrono::system_clock::now();
+
+  std::chrono::duration<double, std::milli> diff = end - start;
+  double avgTime = diff.count() / numBenchmarkRuns;
+
+  int rank;
+  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+  if (rank == 0) {
+    std::cout << "Finished running Jacobi, nIter=" << numIter
+              << ", error=" << err << ", time=" << avgTime << "ms" << std::endl;
+  }
+
+  return avgTime;
+}
+
+int main(int argc, char* argv[]) {
+  MPI_Init(&argc, &argv);
+
+  int rank, numProc;
+  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+  MPI_Comm_size(MPI_COMM_WORLD, &numProc);
+
+  int n = 3072;
+  double eps = 3e-6;
+  int maxNumIter = 100000;
+
+  Matrix init = getDistributedInitialCondition(n);
+  benchmark(init, eps, maxNumIter, 10);
+
+  MPI_Finalize();
+}
\ No newline at end of file
diff --git a/lab12/exc4/matrix.h b/lab12/exc4/matrix.h
new file mode 100644
index 0000000..fd3df1d
--- /dev/null
+++ b/lab12/exc4/matrix.h
@@ -0,0 +1,411 @@
+/* matrix.h, an extrmely simple matrix class.
+ * Version 2.1
+ * Copyright (C) 2022-2025 Tobias Kreilos, Offenburg University of Applied
+ * Sciences
+ *
+ * Licensed under the Apache License, Version 2.0(the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+#ifndef MATRIX_H
+#define MATRIX_H
+
+#include <vector>
+#include <iostream>
+#include <iomanip>
+#include <cmath>
+
+/**
+ * Matrix class
+ *
+ * This class implements a matrix of size m x n. The matrix is stored in
+ * a one-dimensional array of size m*n. The matrix is stored in column-major
+ * order, i.e. the first column is stored first, then the second column, etc.
+ *
+ * Static functions are provided to create a matrix of zeros or an uninitialized
+ * matrix. The uninitialized matrix is not initialized, i.e. the entries are
+ * not set to zero. This is useful for performance reasons, e.g. to control
+ * the placement of matrix entries in locality-domain memory.
+ *
+ *
+ */
+class Matrix {
+ public:
+  /**
+   * Create a matrix of size m x n and initialize all entries to zero.
+   * @param rows number of rows
+   * @param cols number of columns
+   */
+  static Matrix zeros(int numRows, int numCols);
+
+  /**
+   * Create a square matrix of size n x n and initialize all entries to zero.
+   * @param n number of rows and columns
+   */
+  static Matrix zeros(int n);
+
+  /**
+   * Create a matrix of size m x n and initialize all entries to zero.
+   * @param dim number of rows and columns
+   */
+  static Matrix zeros(std::pair<int, int> dim);
+
+  /**
+   * Create a matrix of size m x n and do not initialize the entries.
+   * @param rows number of rows
+   * @param cols number of columns
+   */
+  static Matrix uninit(int m, int n);
+
+  /**
+   * Create a square matrix of size n x n and do not initialize the entries.
+   * @param n number of rows and columns
+   */
+  static Matrix uninit(int n);
+
+  /**
+   * Create a matrix of size m x n and do not initialize the entries.
+   * @param dim number of rows and columns
+   */
+  static Matrix uninit(std::pair<int, int> dim);
+
+  Matrix(const Matrix& other);
+  Matrix(Matrix&& other);
+  ~Matrix();
+  Matrix& operator=(const Matrix& other);
+  Matrix& operator=(Matrix&& other);
+
+  // Access element a_ij of the matrix
+  double& operator()(int i, int j);
+  const double& operator()(int i, int j) const;
+
+  // Obtain a pointer to the underlying data
+  double* data();
+  const double* data() const;
+
+  // Getter functions for the dimensions
+  std::pair<int, int> dim() const;
+  int rows() const;
+  int cols() const;
+  int numEntries() const;
+
+  // Comparison operators
+  bool operator==(const Matrix& b);
+  bool operator!=(const Matrix& b);
+
+  // addition
+  Matrix& operator+=(const Matrix& b);
+
+  // subtraction
+  Matrix& operator-=(const Matrix& b);
+
+  // scalar multiplication
+  Matrix& operator*=(double x);
+
+  // scalar division
+  Matrix& operator/=(double x);
+
+ private:
+  // Constructor is private to prevent creating an uninitialized matrix
+  // accidentally. Use Matrix::zeros() or Matrix::uninit() instead
+  Matrix(int m, int n);
+
+  int numRows_;   // number of rows
+  int numCols_;   // number of columns
+  double* data_;  // the matrix' entries
+};
+
+/**
+ * Vector class
+ * 
+ * This class implements a vector of size n. The vector is stored in a
+ * Matrix of size n x 1.
+ * 
+ * Constructors are provided to create a vector of zeros or an uninitialized
+ * vector. The uninitialized vector is not initialized, i.e. the entries are
+ * not set to zero. This is useful for performance reasons, e.g. to control
+ * the placement of vector entries in locality-domain memory.
+ */
+class Vector {
+ public:
+  static Vector zeros(int n) {
+    Vector vec;
+    vec.data_ = Matrix::zeros(n, 1);
+    return vec;
+  }
+
+  static Vector uninit(int n) {
+    Vector vec;
+    vec.data_ = Matrix::uninit(n, 1);
+    return vec;
+  }
+
+  bool operator==(const Vector& b) { return data_ == b.data_; }
+  bool operator!=(const Vector& b) { return !operator==(b); }
+  double& operator()(int i) { return data_(i, 0); }
+  const double& operator()(int i) const { return data_(i, 0); }
+  double* data() { return data_.data(); }
+  const double* data() const { return data_.data(); }
+
+  Vector operator+=(const Vector& b) {
+    data_ += b.data_;
+    return *this;
+  }
+
+  Vector operator-=(const Vector& b) {
+    data_ -= b.data_;
+    return *this;
+  }
+  Vector operator*=(double x) {
+    data_ *= x;
+    return *this;
+  }
+  Vector operator/=(double x) {
+    data_ /= x;
+    return *this;
+  }
+
+  int size() const { return data_.rows(); }
+
+ private:
+  Matrix data_ = Matrix::zeros(0, 0);
+};
+
+/********** Implementation below ********************/
+
+inline Matrix Matrix::zeros(int m, int n) {
+  Matrix mat(m, n);
+  for (int i = 0; i < m; ++i) {
+    for (int j = 0; j < n; ++j) {
+      mat(i, j) = 0;
+    }
+  }
+  return mat;
+}
+
+inline Matrix Matrix::zeros(int n) {
+  return zeros(n, n);
+}
+
+inline Matrix Matrix::zeros(std::pair<int, int> dim) {
+  return zeros(dim.first, dim.second);
+}
+
+inline Matrix Matrix::uninit(int m, int n) {
+  return Matrix(m, n);
+}
+
+inline Matrix Matrix::uninit(int n) {
+  return uninit(n, n);
+}
+
+inline Matrix Matrix::uninit(std::pair<int, int> dim) {
+  return uninit(dim.first, dim.second);
+}
+
+inline Matrix::Matrix(const Matrix& other) {
+  numRows_ = other.numRows_;
+  numCols_ = other.numCols_;
+  if (numRows_ == 0 || numCols_ == 0) {
+    data_ = nullptr;
+    return;
+  }
+  data_ = new double[numRows_ * numCols_];
+  for (int i = 0; i < numRows_; ++i) {
+    for (int j = 0; j < numCols_; ++j) {
+      operator()(i, j) = other(i, j);
+    }
+  }
+}
+
+inline Matrix::Matrix(Matrix&& other) {
+  numRows_ = other.numRows_;
+  numCols_ = other.numCols_;
+  data_ = other.data_;
+  other.data_ = nullptr;
+  other.numRows_ = 0;
+  other.numCols_ = 0;
+}
+
+inline Matrix::~Matrix() {
+  if (data_ != nullptr) {
+    delete[] data_;
+    data_ = nullptr;
+  }
+}
+
+inline Matrix& Matrix::operator=(const Matrix& other) {
+  if (this != &other) {
+    if (data_ != nullptr) {
+      delete[] data_;
+    }
+    numRows_ = other.numRows_;
+    numCols_ = other.numCols_;
+    data_ = new double[numRows_ * numCols_];
+    for (int i = 0; i < numRows_; ++i) {
+      for (int j = 0; j < numCols_; ++j) {
+        operator()(i, j) = other(i, j);
+      }
+    }
+  }
+  return *this;
+}
+
+inline Matrix& Matrix::operator=(Matrix&& other) {
+  if (this != &other) {
+    if (data_ != nullptr) {
+      delete[] data_;
+    }
+    numRows_ = other.numRows_;
+    numCols_ = other.numCols_;
+    data_ = other.data_;
+    other.data_ = nullptr;
+    other.numRows_ = 0;
+    other.numCols_ = 0;
+  }
+  return *this;
+}
+
+inline double& Matrix::operator()(int i, int j) {
+  return data_[i * numCols_ + j];
+}
+
+inline const double& Matrix::operator()(int i, int j) const {
+  return data_[i * numCols_ + j];
+}
+
+inline double* Matrix::data() {
+  return data_;
+}
+
+inline const double* Matrix::data() const {
+  return data_;
+}
+
+inline std::pair<int, int> Matrix::dim() const {
+  return std::pair<int, int>(numRows_, numCols_);
+}
+
+inline int Matrix::rows() const {
+  return numRows_;
+}
+
+inline int Matrix::cols() const {
+  return numCols_;
+}
+
+inline int Matrix::numEntries() const {
+  return numRows_ * numCols_;
+}
+
+inline bool Matrix::operator==(const Matrix& b) {
+  const double eps = 1e-12;
+  if (numRows_ != b.numRows_ || numCols_ != b.numCols_) {
+    return false;
+  }
+  for (int i = 0; i < numRows_; ++i) {
+    for (int j = 0; j < numCols_; ++j) {
+      if (fabs(operator()(i, j) - b(i, j)) > eps) {
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
+inline bool Matrix::operator!=(const Matrix& b) {
+  return !operator==(b);
+}
+
+inline Matrix& Matrix::operator+=(const Matrix& b) {
+  for (int i = 0; i < numRows_; ++i) {
+    for (int j = 0; j < numCols_; ++j) {
+      operator()(i, j) += b(i, j);
+    }
+  }
+  return *this;
+}
+
+inline Matrix& Matrix::operator-=(const Matrix& b) {
+  for (int i = 0; i < numRows_; ++i) {
+    for (int j = 0; j < numCols_; ++j) {
+      operator()(i, j) -= b(i, j);
+    }
+  }
+  return *this;
+}
+
+inline Matrix& Matrix::operator*=(double x) {
+  for (int i = 0; i < numRows_; ++i) {
+    for (int j = 0; j < numCols_; ++j) {
+      operator()(i, j) *= x;
+    }
+  }
+  return *this;
+}
+
+inline Matrix& Matrix::operator/=(double x) {
+  for (int i = 0; i < numRows_; ++i) {
+    for (int j = 0; j < numCols_; ++j) {
+      operator()(i, j) /= x;
+    }
+  }
+  return *this;
+}
+
+inline Matrix::Matrix(int m, int n) : numRows_(m), numCols_(n) {
+  data_ = new double[numRows_ * numCols_];
+  if (data_ == nullptr) {
+    throw std::bad_alloc();
+  }
+}
+
+inline std::ostream& operator<<(std::ostream& os, const Matrix& a) {
+  const int width = 10;
+  const int precision = 4;
+
+  const auto originalPrecision = os.precision();
+  os << std::setprecision(precision);
+
+  for (int i = 0; i < a.rows(); ++i) {
+    for (int j = 0; j < a.cols(); ++j) {
+      os << std::setw(width) << a(i, j) << " ";
+    }
+    if (i != a.rows() - 1)
+      os << "\n";
+  }
+
+  os << std::setprecision(originalPrecision);
+  return os;
+}
+
+inline bool equalWithinRange(const Matrix& a,
+                             const Matrix& b,
+                             double eps = 1e-12) {
+  if (a.rows() != b.rows() || a.cols() != b.cols())
+    return false;
+
+  int m = a.rows();
+  int n = a.cols();
+  for (int i = 0; i < m; ++i) {
+    for (int j = 0; j < n; ++j) {
+      if (fabs(a(i, j) - b(i, j)) > eps) {
+        return false;
+      }
+    }
+  }
+
+  return true;
+}
+
+#endif  // MATRIX_H
\ No newline at end of file
diff --git a/lab12/exc4/matrix_io.cpp b/lab12/exc4/matrix_io.cpp
new file mode 100644
index 0000000..b5d9708
--- /dev/null
+++ b/lab12/exc4/matrix_io.cpp
@@ -0,0 +1,116 @@
+#include <fstream>
+#include <mpi.h>
+#include <exception>
+#include "matrix.h"
+#include "matrix_io.h"
+
+MatrixIO::MatrixIO() {
+  MPI_Comm_rank(MPI_COMM_WORLD, &rank_);
+  MPI_Comm_size(MPI_COMM_WORLD, &numProc_);
+}
+
+void MatrixIO::saveDistributed(const Matrix& distributedMatrix,
+                               const std::string& filename) {
+  Matrix mat = gatherMatrixOnRoot(distributedMatrix);
+
+  if (rank_ == 0) {
+    saveSerial(mat, filename);
+  }
+}
+
+void MatrixIO::saveSerial(const Matrix& m, const std::string& filename) {
+  std::ofstream fout(filename);
+  const int numRows = m.rows();
+  const int numCols = m.cols();
+  fout << "# " << numRows << "\t" << numCols << "\n";
+  for (int i = 0; i < numRows; ++i) {
+    for (int j = 0; j < numCols; ++j) {
+      fout << m(i, j) << "\t";
+    }
+    fout << "\n";
+  }
+}
+
+Matrix MatrixIO::loadDistributed(const std::string& filename) {
+  Matrix matrixOnRoot = Matrix::zeros(0, 0);
+  if (rank_ == 0) {
+    matrixOnRoot = loadSerial(filename);
+  }
+
+  Matrix distributedMatrix = scatterMatrixFromRoot(matrixOnRoot);
+  return distributedMatrix;
+}
+
+Matrix MatrixIO::loadSerial(const std::string& filename) {
+  std::ifstream fin(filename);
+
+  // Check first character (has to be #)
+  std::string s;
+  fin >> s;
+  if (s != "#") {
+    throw std::runtime_error("Error, not reading expecte character #\n");
+  }
+
+  // Read in number of rows and cols and create matrix
+  int numRows, numCols;
+  fin >> numRows >> numCols;
+  Matrix mat = Matrix::zeros(numRows, numCols);
+
+  // Read in matrix contents
+  for (int i = 0; i < numRows; ++i) {
+    for (int j = 0; j < numCols; ++j) {
+      fin >> mat(i, j);
+    }
+  }
+
+  return mat;
+}
+
+Matrix MatrixIO::gatherMatrixOnRoot(const Matrix& distributedMatrix) {
+  const int numRowsLocal = distributedMatrix.rows();
+  const int numCols = distributedMatrix.cols();
+  const int numRowsTotal = numRowsLocal * numProc_;
+
+  Matrix matrixOnRoot = Matrix::zeros(0, 0);
+  if (rank_ == 0) {
+    matrixOnRoot = Matrix::zeros(numRowsTotal, numCols);
+  }
+
+  const int sendSize = distributedMatrix.numEntries();
+
+  MPI_Gather(&distributedMatrix(0, 0), sendSize, MPI_DOUBLE,
+             &matrixOnRoot(0, 0), sendSize, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+
+  return matrixOnRoot;
+}
+
+Matrix MatrixIO::scatterMatrixFromRoot(const Matrix& matrixOnRoot) {
+  int numRowsTotal = matrixOnRoot.rows();
+  int numCols = matrixOnRoot.cols();
+
+  MPI_Bcast(&numRowsTotal, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&numCols, 1, MPI_INT, 0, MPI_COMM_WORLD);
+
+  if (numRowsTotal % numProc_ != 0)
+    throw std::runtime_error(
+        "Number of rows not divisible by number of processes.");
+
+  const int numRowsLocal = numRowsTotal / numProc_;
+
+  Matrix distributedMatrix = Matrix::zeros(numRowsLocal, numCols);
+
+  const int sendSize = distributedMatrix.numEntries();
+
+  MPI_Scatter(matrixOnRoot.data(), sendSize, MPI_DOUBLE,
+              distributedMatrix.data(), sendSize, MPI_DOUBLE, 0,
+              MPI_COMM_WORLD);
+
+  const int i0 = rank_ * numRowsLocal;
+
+  for (int i = 0; i < numRowsLocal; ++i) {
+    std::cout << "Scattered Line " << i0 + i << ": "
+              << distributedMatrix(i, numCols - 1) << "\n";
+  }
+
+  return distributedMatrix;
+}
diff --git a/lab12/exc4/matrix_io.h b/lab12/exc4/matrix_io.h
new file mode 100644
index 0000000..1560e93
--- /dev/null
+++ b/lab12/exc4/matrix_io.h
@@ -0,0 +1,51 @@
+#include "matrix.h"
+#include <fstream>
+#include <mpi.h>
+#include <exception>
+
+/**
+ * File i/o for distributed matrices.
+ * Matrices may be distributed along the first dimension (i.e. rows are
+ * distributed). Only works if number of rows is divisible by number of
+ * processes.
+ *
+ * File format is compatible with python numpy loadtxt
+ * First line: # numRows numCols
+ * One line of the matrix per line in the file, separated by whitespace
+ */
+class MatrixIO {
+ public:
+  MatrixIO();
+
+  /**
+   * Store a matrix on disk, that is not distribued. Should be called by a
+   * single rank only.
+   */
+  void saveSerial(const Matrix& m, const std::string& filename);
+
+  /**
+   * Load a matrix from disk, that is not distribued. Should be called by a
+   * single rank only.
+   */
+  Matrix loadSerial(const std::string& filename);
+
+  /**
+   * Store a distributed matrix on disk. Should be called collectively by
+   * all ranks.
+   */
+  void saveDistributed(const Matrix& m, const std::string& filename);
+
+  /**
+   * Load a matrix from disk and distribute it along the first axis.
+   * Should be called collectively by all ranks.
+   */
+  Matrix loadDistributed(const std::string& filename);
+
+ private:
+  Matrix gatherMatrixOnRoot(const Matrix& distributedMatrix);
+
+  Matrix scatterMatrixFromRoot(const Matrix& matrixOnRoot);
+
+  int rank_ = MPI_PROC_NULL;
+  int numProc_ = 0;
+};
diff --git a/lab12/exc4/plot.py b/lab12/exc4/plot.py
new file mode 100644
index 0000000..0a007b5
--- /dev/null
+++ b/lab12/exc4/plot.py
@@ -0,0 +1,24 @@
+import pylab as plt
+
+
+ser_init = plt.loadtxt("serial_init.asc")
+par_init = plt.loadtxt("parallel_init.asc")
+ser_data = plt.loadtxt("result_serial.asc")
+par_data = plt.loadtxt("result_parallel.asc")
+# diff = par_data - ser_data
+
+plt.figure()
+plt.subplot(221)
+plt.title("Serial initial condition")
+plt.contourf(ser_init, cmap='jet', levels=100)
+plt.subplot(222)
+plt.title("Parallel initial condition")
+plt.contourf(par_init, cmap='jet', levels=100)
+plt.subplot(223)
+plt.title("Serial result")
+plt.contourf(ser_data, cmap='jet', levels=100)
+plt.subplot(224)
+plt.title("Parallel result")
+plt.contourf(par_data, cmap="jet", levels=100)
+
+plt.show()
diff --git a/lab12/exc4/run.sh b/lab12/exc4/run.sh
new file mode 100644
index 0000000..0d1a3eb
--- /dev/null
+++ b/lab12/exc4/run.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+#SBATCH --job-name=jacobi
+#SBATCH --output=jacobi.out
+#SBATCH --error=jacobi.err
+#SBATCH --ntasks=48
+#SBATCH --cpus-per-task=1
+#SBATCH --time=01:00:00
+
+srun --mpi=pmix ./jacobi
\ No newline at end of file