lab05

2025-04-19 11:49:51 +02:00
parent 2c8782be9e
commit 024e162991
11 changed files with 2591 additions and 12 deletions
--- a/lab03/jacobi_bench2_changed_flops.cpp
+++ b/lab03/jacobi_bench2_changed_flops.cpp
@@ -33,9 +33,6 @@ Matrix jacobi(const Matrix& init, double eps, int maxNumIter) {

    std::swap(tmp, phi);

-
-    // std::cout << l2phi << ", " << l2tmp << ", " << dist << std::endl;
-
    nIter++;
  }

--- a/lab03/plot/main.py
+++ b/lab03/plot/main.py
@@ -24,7 +24,7 @@ def parse_benchmark_data(json_file):
                size = benchmark.get('Size')
                real_time = benchmark.get('real_time')
                if size is not None and real_time is not None:
-                    flops.append(7 * iterations / real_time)
+                    flops.append((7 * iterations * (size-2)**2) / (real_time / 1000)) #
                    sizes.append(size)
                    real_times.append(real_time)
            return sizes, real_times, flops
@@ -52,21 +52,27 @@ def create_plot(sizes, real_times, flops):

    plt.figure(figsize=(10, 6))
    plt.scatter(sizes, real_times, marker='o', color='blue', label='Real Time (ms)')
-    plt.scatter(sizes, flops, marker='x', color='red', label='FLOPs')
+    
    plt.xlabel("Size")
-    plt.ylabel("Values")
+    plt.ylim(0, 50000)
+    plt.ylabel("Real Time (ms)", color='blue')
    plt.title("Benchmark: Real Time and FLOPs vs. Size")
    plt.grid(True)
-    plt.legend()  # Show the legend to distinguish the plots
-    plt.twinx()  # Create a second y-axis to avoid overlapping labels if scales differ significantly
-    plt.ylabel("FLOPs", color='red')
-    plt.tick_params(axis='y', labelcolor='red')
-    plt.gca().yaxis.grid(False) # Turn off grid for the second y-axis
+    plt.legend(loc='upper left')
+
+    ax2 = plt.twinx()
+    ax2.scatter(sizes, flops, marker='x', color='red', label='FLOPs')
+    ax2.set_ylabel("FLOPs", color='red')
+    
+    ax2.set_yscale("log")
+    ax2.tick_params(axis='y', labelcolor='red')
+    ax2.yaxis.grid(False)
+    ax2.legend(loc='upper right') # Separate legend for the second y-axis

    plt.show()

 if __name__ == "__main__":
-    json_file = 'results.json'
+    json_file = 'E:\\Repositories\\hi_per\\lab03\\plot\\results_flops.json'
    sizes, real_times, flops = parse_benchmark_data(json_file)

    if sizes and real_times:
--- a/lab03/plot/results_flops.json
+++ b/lab03/plot/results_flops.json
--- a/lab03/results.json
+++ b/lab03/results.json
@@ -0,0 +1,350 @@
+{
+  "context": {
+    "date": "2025-04-05T12:56:49+02:00",
+    "host_name": "hpcvl1",
+    "executable": "./changed",
+    "num_cpus": 12,
+    "mhz_per_cpu": 3100,
+    "cpu_scaling_enabled": true,
+    "caches": [
+      {
+        "type": "Data",
+        "level": 1,
+        "size": 32768,
+        "num_sharing": 1
+      },
+      {
+        "type": "Instruction",
+        "level": 1,
+        "size": 32768,
+        "num_sharing": 1
+      },
+      {
+        "type": "Unified",
+        "level": 2,
+        "size": 262144,
+        "num_sharing": 1
+      },
+      {
+        "type": "Unified",
+        "level": 3,
+        "size": 15728640,
+        "num_sharing": 6
+      }
+    ],
+    "load_avg": [0.05,0.24,0.22],
+    "library_build_type": "release"
+  },
+  "benchmarks": [
+    {
+      "name": "benchmarkJacobi/100",
+      "run_name": "benchmarkJacobi/100",
+      "run_type": "iteration",
+      "repetitions": 0,
+      "repetition_index": 0,
+      "threads": 1,
+      "iterations": 9,
+      "real_time": 6.8756017219533931e+01,
+      "cpu_time": 6.8755010333333345e+01,
+      "time_unit": "ms",
+      "Size": 1.0000000000000000e+02
+    },
+    {
+      "name": "benchmarkJacobi/120",
+      "run_name": "benchmarkJacobi/120",
+      "run_type": "iteration",
+      "repetitions": 0,
+      "repetition_index": 0,
+      "threads": 1,
+      "iterations": 6,
+      "real_time": 1.1800753599770057e+02,
+      "cpu_time": 1.1800501733333333e+02,
+      "time_unit": "ms",
+      "Size": 1.2000000000000000e+02
+    },
+    {
+      "name": "benchmarkJacobi/140",
+      "run_name": "benchmarkJacobi/140",
+      "run_type": "iteration",
+      "repetitions": 0,
+      "repetition_index": 0,
+      "threads": 1,
+      "iterations": 3,
+      "real_time": 2.0829310268163681e+02,
+      "cpu_time": 2.0826808400000004e+02,
+      "time_unit": "ms",
+      "Size": 1.4000000000000000e+02
+    },
+    {
+      "name": "benchmarkJacobi/160",
+      "run_name": "benchmarkJacobi/160",
+      "run_type": "iteration",
+      "repetitions": 0,
+      "repetition_index": 0,
+      "threads": 1,
+      "iterations": 2,
+      "real_time": 3.4099373250501230e+02,
+      "cpu_time": 3.4098604050000006e+02,
+      "time_unit": "ms",
+      "Size": 1.6000000000000000e+02
+    },
+    {
+      "name": "benchmarkJacobi/180",
+      "run_name": "benchmarkJacobi/180",
+      "run_type": "iteration",
+      "repetitions": 0,
+      "repetition_index": 0,
+      "threads": 1,
+      "iterations": 2,
+      "real_time": 4.7466103651095182e+02,
+      "cpu_time": 4.7461016350000017e+02,
+      "time_unit": "ms",
+      "Size": 1.8000000000000000e+02
+    },
+    {
+      "name": "benchmarkJacobi/200",
+      "run_name": "benchmarkJacobi/200",
+      "run_type": "iteration",
+      "repetitions": 0,
+      "repetition_index": 0,
+      "threads": 1,
+      "iterations": 1,
+      "real_time": 7.8404610703000799e+02,
+      "cpu_time": 7.8403886700000044e+02,
+      "time_unit": "ms",
+      "Size": 2.0000000000000000e+02
+    },
+    {
+      "name": "benchmarkJacobi/220",
+      "run_name": "benchmarkJacobi/220",
+      "run_type": "iteration",
+      "repetitions": 0,
+      "repetition_index": 0,
+      "threads": 1,
+      "iterations": 1,
+      "real_time": 9.5265144202858210e+02,
+      "cpu_time": 9.5258721600000035e+02,
+      "time_unit": "ms",
+      "Size": 2.2000000000000000e+02
+    },
+    {
+      "name": "benchmarkJacobi/240",
+      "run_name": "benchmarkJacobi/240",
+      "run_type": "iteration",
+      "repetitions": 0,
+      "repetition_index": 0,
+      "threads": 1,
+      "iterations": 1,
+      "real_time": 1.2943584609893151e+03,
+      "cpu_time": 1.2943392549999996e+03,
+      "time_unit": "ms",
+      "Size": 2.4000000000000000e+02
+    },
+    {
+      "name": "benchmarkJacobi/260",
+      "run_name": "benchmarkJacobi/260",
+      "run_type": "iteration",
+      "repetitions": 0,
+      "repetition_index": 0,
+      "threads": 1,
+      "iterations": 1,
+      "real_time": 1.6564792509889230e+03,
+      "cpu_time": 1.6564014849999999e+03,
+      "time_unit": "ms",
+      "Size": 2.6000000000000000e+02
+    },
+    {
+      "name": "benchmarkJacobi/280",
+      "run_name": "benchmarkJacobi/280",
+      "run_type": "iteration",
+      "repetitions": 0,
+      "repetition_index": 0,
+      "threads": 1,
+      "iterations": 1,
+      "real_time": 2.1374929199810140e+03,
+      "cpu_time": 2.1374186579999996e+03,
+      "time_unit": "ms",
+      "Size": 2.8000000000000000e+02
+    },
+    {
+      "name": "benchmarkJacobi/300",
+      "run_name": "benchmarkJacobi/300",
+      "run_type": "iteration",
+      "repetitions": 0,
+      "repetition_index": 0,
+      "threads": 1,
+      "iterations": 1,
+      "real_time": 2.5861008230131119e+03,
+      "cpu_time": 2.5859757499999992e+03,
+      "time_unit": "ms",
+      "Size": 3.0000000000000000e+02
+    },
+    {
+      "name": "benchmarkJacobi/320",
+      "run_name": "benchmarkJacobi/320",
+      "run_type": "iteration",
+      "repetitions": 0,
+      "repetition_index": 0,
+      "threads": 1,
+      "iterations": 1,
+      "real_time": 3.0040800810093060e+03,
+      "cpu_time": 3.0039900199999997e+03,
+      "time_unit": "ms",
+      "Size": 3.2000000000000000e+02
+    },
+    {
+      "name": "benchmarkJacobi/340",
+      "run_name": "benchmarkJacobi/340",
+      "run_type": "iteration",
+      "repetitions": 0,
+      "repetition_index": 0,
+      "threads": 1,
+      "iterations": 1,
+      "real_time": 3.7402528999955393e+03,
+      "cpu_time": 3.7400758830000032e+03,
+      "time_unit": "ms",
+      "Size": 3.4000000000000000e+02
+    },
+    {
+      "name": "benchmarkJacobi/360",
+      "run_name": "benchmarkJacobi/360",
+      "run_type": "iteration",
+      "repetitions": 0,
+      "repetition_index": 0,
+      "threads": 1,
+      "iterations": 1,
+      "real_time": 4.1913283950416371e+03,
+      "cpu_time": 4.1911908370000010e+03,
+      "time_unit": "ms",
+      "Size": 3.6000000000000000e+02
+    },
+    {
+      "name": "benchmarkJacobi/380",
+      "run_name": "benchmarkJacobi/380",
+      "run_type": "iteration",
+      "repetitions": 0,
+      "repetition_index": 0,
+      "threads": 1,
+      "iterations": 1,
+      "real_time": 4.9076469169813208e+03,
+      "cpu_time": 4.9074275540000017e+03,
+      "time_unit": "ms",
+      "Size": 3.8000000000000000e+02
+    },
+    {
+      "name": "benchmarkJacobi/400",
+      "run_name": "benchmarkJacobi/400",
+      "run_type": "iteration",
+      "repetitions": 0,
+      "repetition_index": 0,
+      "threads": 1,
+      "iterations": 1,
+      "real_time": 5.6230468910071068e+03,
+      "cpu_time": 5.6220422259999978e+03,
+      "time_unit": "ms",
+      "Size": 4.0000000000000000e+02
+    },
+    {
+      "name": "benchmarkJacobi/420",
+      "run_name": "benchmarkJacobi/420",
+      "run_type": "iteration",
+      "repetitions": 0,
+      "repetition_index": 0,
+      "threads": 1,
+      "iterations": 1,
+      "real_time": 6.4518437779624946e+03,
+      "cpu_time": 6.4485558550000023e+03,
+      "time_unit": "ms",
+      "Size": 4.2000000000000000e+02
+    },
+    {
+      "name": "benchmarkJacobi/440",
+      "run_name": "benchmarkJacobi/440",
+      "run_type": "iteration",
+      "repetitions": 0,
+      "repetition_index": 0,
+      "threads": 1,
+      "iterations": 1,
+      "real_time": 7.2091685429913923e+03,
+      "cpu_time": 7.2053253419999946e+03,
+      "time_unit": "ms",
+      "Size": 4.4000000000000000e+02
+    },
+    {
+      "name": "benchmarkJacobi/460",
+      "run_name": "benchmarkJacobi/460",
+      "run_type": "iteration",
+      "repetitions": 0,
+      "repetition_index": 0,
+      "threads": 1,
+      "iterations": 1,
+      "real_time": 7.9389561029965989e+03,
+      "cpu_time": 7.9348689429999977e+03,
+      "time_unit": "ms",
+      "Size": 4.6000000000000000e+02
+    },
+    {
+      "name": "benchmarkJacobi/480",
+      "run_name": "benchmarkJacobi/480",
+      "run_type": "iteration",
+      "repetitions": 0,
+      "repetition_index": 0,
+      "threads": 1,
+      "iterations": 1,
+      "real_time": 8.6234069429920055e+03,
+      "cpu_time": 8.6190785900000083e+03,
+      "time_unit": "ms",
+      "Size": 4.8000000000000000e+02
+    },
+    {
+      "name": "benchmarkJacobi/500",
+      "run_name": "benchmarkJacobi/500",
+      "run_type": "iteration",
+      "repetitions": 0,
+      "repetition_index": 0,
+      "threads": 1,
+      "iterations": 1,
+      "real_time": 9.5808592480025254e+03,
+      "cpu_time": 9.5758657460000049e+03,
+      "time_unit": "ms",
+      "Size": 5.0000000000000000e+02
+    },
+    {
+      "name": "benchmarkJacobi/520",
+      "run_name": "benchmarkJacobi/520",
+      "run_type": "iteration",
+      "repetitions": 0,
+      "repetition_index": 0,
+      "threads": 1,
+      "iterations": 1,
+      "real_time": 1.0666655368986540e+04,
+      "cpu_time": 1.0661205230000007e+04,
+      "time_unit": "ms",
+      "Size": 5.2000000000000000e+02
+    },
+    {
+      "name": "benchmarkJacobi/540",
+      "run_name": "benchmarkJacobi/540",
+      "run_type": "iteration",
+      "repetitions": 0,
+      "repetition_index": 0,
+      "threads": 1,
+      "iterations": 1,
+      "real_time": 1.1506040445994586e+04,
+      "cpu_time": 1.1500157705999996e+04,
+      "time_unit": "ms",
+      "Size": 5.4000000000000000e+02
+    },
+    {
+      "name": "benchmarkJacobi/560",
+      "run_name": "benchmarkJacobi/560",
+      "run_type": "iteration",
+      "repetitions": 0,
+      "repetition_index": 0,
+      "threads": 1,
+      "iterations": 1,
+      "real_time": 1.2268758816004265e+04,
+      "cpu_time": 1.2262452234999997e+04,
+      "time_unit": "ms",
+      "Size": 5.6000000000000000e+02
+    }
--- a/lab05/foo
+++ b/lab05/foo
--- a/lab05/fooBar.cpp
+++ b/lab05/fooBar.cpp
@@ -0,0 +1,29 @@
+#include <iostream>
+#include <thread>
+#include <mutex>
+
+std::mutex fooPrinted;
+std::mutex barPrinted;
+
+const int n = 100;
+void foo() {
+  for (int i = 0; i < n; ++i) {
+    barPrinted.lock();
+    std::cout << "foo";
+    fooPrinted.unlock();
+  }
+}
+void bar() {
+  for (int i = 0; i < n; ++i) {
+    fooPrinted.lock();
+    std::cout << "bar\n";
+    barPrinted.unlock();
+  }
+}
+int main() {
+  fooPrinted.lock();
+  std::thread t1(foo);
+  std::thread t2(bar);
+  t1.join();
+  t2.join();
+}
--- a/lab05/matrix.h
+++ b/lab05/matrix.h
@@ -0,0 +1,343 @@
+/**
+ * matrix.h a very simplistic class for m times n matrices.
+ */
+
+#ifndef MATRIX_H
+#define MATRIX_H
+
+#include <vector>
+#include <iostream>
+#include <iomanip>
+#include <cmath>
+
+// A very simplistic vector class for vectors of size n
+class Vector {
+ public:
+  // constructors
+  Vector(int n) : n_(n), data_(n_, 0) {}
+  Vector(const Vector& other) = default;
+  Vector(Vector&& other) = default;
+  ~Vector() = default;
+
+  // assignment operators
+  Vector& operator=(const Vector& other) = default;
+  Vector& operator=(Vector&& other) = default;
+
+  // element access
+  double& operator()(int i) { return data_[i]; }
+  const double& operator()(int i) const { return data_[i]; }
+
+  // getter functions for the dimensions
+  int dim() const { return n_; }
+
+  // comparison operators
+  bool operator==(const Vector& b) { return (data_ == b.data_); }
+  bool operator!=(const Vector& b) { return (data_ != b.data_); }
+
+  // addition
+  Vector& operator+=(const Vector& b) {
+    for (int i = 0; i < n_; ++i) {
+      operator()(i) += b(i);
+    }
+    return *this;
+  }
+
+  // subtraction
+  Vector& operator-=(const Vector& b) {
+    for (int i = 0; i < n_; ++i) {
+      operator()(i) -= b(i);
+    }
+    return *this;
+  }
+
+  // scalar multiplication
+  Vector& operator*=(double x) {
+    for (int i = 0; i < n_; ++i) {
+      operator()(i) *= x;
+    }
+    return *this;
+  }
+
+  // dot product between two vectors
+  double dot(const Vector& other) const {
+    double sum = 0;
+    for (int i = 0; i < n_; ++i) {
+      sum += operator()(i) * other(i);
+    }
+    return sum;
+  }
+
+ private:
+  int n_;                     // vector dimension
+  std::vector<double> data_;  // the vectors entries
+};
+
+inline double dot(const Vector& v1, const Vector& v2) {
+  return v1.dot(v2);
+}
+
+// Print the vector as a table
+inline std::ostream& operator<<(std::ostream& os, const Vector& a) {
+  const int width = 10;
+  const int precision = 4;
+
+  const auto originalPrecision = os.precision();
+  os << std::setprecision(precision);
+
+  for (int i = 0; i < a.dim(); ++i) {
+    os << std::setw(width) << a(i) << " ";
+  }
+
+  os << "\n";
+
+  os << std::setprecision(originalPrecision);
+  return os;
+}
+
+// A very simple class for m times n matrices
+class Matrix {
+ public:
+  // constructors
+  Matrix() : Matrix(0, 0) {}
+  Matrix(int m, int n) : m_(m), n_(n), data_(m_ * n_, 0) {}
+  Matrix(std::pair<int, int> dim) : Matrix(dim.first, dim.second) {}
+  Matrix(int n) : Matrix(n, n) {}
+  Matrix(const Matrix& other) = default;
+  Matrix(Matrix&& other) = default;
+  ~Matrix() = default;
+
+  // assignment operators
+  Matrix& operator=(const Matrix& other) = default;
+  Matrix& operator=(Matrix&& other) = default;
+
+  // element access
+  double& operator()(int i, int j) { return data_[i * n_ + j]; }
+  const double& operator()(int i, int j) const { return data_[i * n_ + j]; }
+
+  // getter functions for the dimensions
+  std::pair<int, int> dim() const { return std::pair<int, int>(m_, n_); }
+  int dim1() const { return m_; }
+  int dim2() const { return n_; }
+  int numEntries() const { return data_.size(); }
+
+  // comparison operators
+  bool operator==(const Matrix& b) { return (data_ == b.data_); }
+  bool operator!=(const Matrix& b) { return (data_ != b.data_); }
+
+  // addition
+  Matrix& operator+=(const Matrix& b) {
+    for (int i = 0; i < m_; ++i) {
+      for (int j = 0; j < n_; ++j) {
+        operator()(i, j) += b(i, j);
+      }
+    }
+    return *this;
+  }
+
+  // subtraction
+  Matrix& operator-=(const Matrix& b) {
+    for (int i = 0; i < m_; ++i) {
+      for (int j = 0; j < n_; ++j) {
+        operator()(i, j) -= b(i, j);
+      }
+    }
+    return *this;
+  }
+
+  // scalar multiplication
+  Matrix& operator*=(double x) {
+    for (int i = 0; i < m_; ++i) {
+      for (int j = 0; j < n_; ++j) {
+        operator()(i, j) *= x;
+      }
+    }
+    return *this;
+  }
+
+  // scalar division
+  Matrix& operator/=(double x) {
+    for (int i = 0; i < m_; ++i) {
+      for (int j = 0; j < n_; ++j) {
+        operator()(i, j) /= x;
+      }
+    }
+    return *this;
+  }
+
+  // matrix product (only for square matrices of equal dimension)
+  Matrix& operator*=(const Matrix& b) {
+    if (dim1() != dim2()) {
+      std::cout << "Error in matrix multiplication: no square matrix\n";
+    } else if (dim1() != b.dim1() || dim2() != b.dim2()) {
+      std::cout << "Error in matrix multiplication: dimensions do not match\n";
+    } else {
+      Matrix a = *this;
+      Matrix& c = *this;
+      const int m = dim1();
+      for (int i = 0; i < m; ++i) {
+        for (int j = 0; j < m; ++j) {
+          for (int k = 0; k < m; ++k) {
+            c(i, j) += a(i, k) * b(k, j);
+          }
+        }
+      }
+    }
+
+    return *this;
+  }
+
+ public:
+  int m_;                     // first dimension
+  int n_;                     // second dimension
+  std::vector<double> data_;  // the matrix' entries
+};
+
+// Print the matrix as a table
+inline std::ostream& operator<<(std::ostream& os, const Matrix& a) {
+  const int width = 10;
+  const int precision = 4;
+
+  const auto originalPrecision = os.precision();
+  os << std::setprecision(precision);
+
+  for (int i = 0; i < a.dim1(); ++i) {
+    for (int j = 0; j < a.dim2(); ++j) {
+      os << std::setw(width) << a(i, j) << " ";
+    }
+    if (i != a.dim1() - 1)
+      os << "\n";
+  }
+
+  os << std::setprecision(originalPrecision);
+  return os;
+}
+
+// matrix product
+inline Matrix operator*(const Matrix& a, const Matrix& b) {
+  if (a.dim2() == b.dim1()) {
+    int m = a.dim1();
+    int n = a.dim2();
+    int p = b.dim2();
+    Matrix c(m, p);
+    for (int i = 0; i < m; ++i) {
+      for (int j = 0; j < p; ++j) {
+        for (int k = 0; k < n; ++k) {
+          c(i, j) += a(i, k) * b(k, j);
+        }
+      }
+    }
+    return c;
+  } else {
+    return Matrix(0, 0);
+  }
+}
+
+inline bool equalWithinRange(const Matrix& a,
+                             const Matrix& b,
+                             double eps = 1e-12) {
+  if (a.dim1() != b.dim1() || a.dim2() != b.dim2())
+    return false;
+
+  int m = a.dim1();
+  int n = a.dim2();
+  for (int i = 0; i < m; ++i) {
+    for (int j = 0; j < n; ++j) {
+      if (fabs(a(i, j) - b(i, j)) > eps) {
+        return false;
+      }
+    }
+  }
+
+  return true;
+}
+
+// A very simple class for "3D-Matrices" (tensors) with dimension l x m x n
+class Matrix3D {
+ public:
+  // constructors
+  Matrix3D(int l, int m, int n) : l_(l), m_(m), n_(n), data_(l) {
+    for (int i = 0; i < l_; ++i) {
+      data_[i] = std::vector<std::vector<double>>(m_);
+      for (int j = 0; j < m_; ++j) {
+        data_[i][j] = std::vector<double>(n_, 0);
+      }
+    }
+  }
+  Matrix3D(int n) : Matrix3D(n, n, n) {}
+  Matrix3D(const Matrix3D& other) = default;
+  Matrix3D(Matrix3D&& other) = default;
+  ~Matrix3D() = default;
+
+  // assignment operators
+  Matrix3D& operator=(const Matrix3D& other) = default;
+  Matrix3D& operator=(Matrix3D&& other) = default;
+
+  // element access
+  double& operator()(int i, int j, int k) { return data_[i][j][k]; }
+  const double& operator()(int i, int j, int k) const { return data_[i][j][k]; }
+
+  // getter functions for the dimensions
+  int dim1() const { return l_; }
+  int dim2() const { return m_; }
+  int dim3() const { return n_; }
+
+  // comparison operators
+  bool operator==(const Matrix3D& b) { return (data_ == b.data_); }
+  bool operator!=(const Matrix3D& b) { return (data_ != b.data_); }
+
+  // addition
+  Matrix3D& operator+=(const Matrix3D& b) {
+    for (int i = 0; i < l_; ++i) {
+      for (int j = 0; j < m_; ++j) {
+        for (int k = 0; k < n_; ++k) {
+          operator()(i, j, k) += b(i, j, k);
+        }
+      }
+    }
+    return *this;
+  }
+
+  // substraction
+  Matrix3D& operator-=(const Matrix3D& b) {
+    for (int i = 0; i < l_; ++i) {
+      for (int j = 0; j < m_; ++j) {
+        for (int k = 0; k < n_; ++k) {
+          operator()(i, j, k) -= b(i, j, k);
+        }
+      }
+    }
+    return *this;
+  }
+
+  // scalar multiplication
+  Matrix3D& operator*=(double x) {
+    for (int i = 0; i < l_; ++i) {
+      for (int j = 0; j < m_; ++j) {
+        for (int k = 0; k < n_; ++k) {
+          operator()(i, j, k) *= x;
+        }
+      }
+    }
+    return *this;
+  }
+
+  // scalar division
+  Matrix3D& operator/=(double x) {
+    for (int i = 0; i < l_; ++i) {
+      for (int j = 0; j < m_; ++j) {
+        for (int k = 0; k < n_; ++k) {
+          operator()(i, j, k) /= x;
+        }
+      }
+    }
+    return *this;
+  }
+
+ private:
+  int l_;                                               // first dimension
+  int m_;                                               // second dimension
+  int n_;                                               // third dimension
+  std::vector<std::vector<std::vector<double>>> data_;  // the tensors' entries
+};
+
+#endif  // MATRIX_H
--- a/lab05/matrixVectorProduct
+++ b/lab05/matrixVectorProduct
@@ -0,0 +1,48 @@
+#include <cmath>
+#include <iostream>
+#include <thread>
+#include "matrix.h"
+#include "test.h"
+
+// Create the matrix and vector to be multiplied and fill them
+// with some sensible initial values.
+std::pair<Matrix, std::vector<double>> createMatrixAndVector() {
+  const int n = 1e3*9;
+  Matrix mat(n, n);
+  for (int i = 0; i < n; ++i) {
+    for (int j = 0; j < n; ++j) {
+      mat(i, j) = pow(-1, i) * (i + j);
+    }
+  }
+
+  std::vector<double> vec(n);
+  for (int i = 0; i < n; ++i) {
+    vec[i] = 1. / (i + 1);
+  }
+
+  return std::pair(mat, vec);
+}
+
+// Verify that the computed result is correct. Rather inefficient,
+// since it runs on a single core.
+void verifyResult(const std::vector<double> result) {
+  auto [mat, vec] = createMatrixAndVector();
+  const int n = vec.size();
+
+  for (int i = 0; i < n; ++i) {
+    double expected = 0;
+    for (int j = 0; j < n; ++j) {
+      expected += mat(i, j) * vec[j];
+    }
+    check(result[i], expected);
+  }
+}
+
+int main() {
+  auto [mat, vec] = createMatrixAndVector();
+  std::vector<double> result(vec.size(), 0);
+
+  // TODO: compute result = mat * vec with multiple threads
+
+  verifyResult(result);
+}
--- a/lab05/matrixVectorProductDistributedIndices.cpp
+++ b/lab05/matrixVectorProductDistributedIndices.cpp
@@ -0,0 +1,57 @@
+#include <cmath>
+#include <iostream>
+#include <thread>
+#include "matrix.h"
+#include "test.h"
+
+// Create the matrix and vector to be multiplied and fill them
+// with some sensible initial values.
+std::pair<Matrix, std::vector<double>> createMatrixAndVector() {
+  const int n = 1e3*9;
+  Matrix mat(n, n);
+  for (int i = 0; i < n; ++i) {
+    for (int j = 0; j < n; ++j) {
+      mat(i, j) = pow(-1, i) * (i + j);
+    }
+  }
+
+  std::vector<double> vec(n);
+  for (int i = 0; i < n; ++i) {
+    vec[i] = 1. / (i + 1);
+  }
+
+  return std::pair(mat, vec);
+}
+
+// Verify that the computed result is correct. Rather inefficient,
+// since it runs on a single core.
+void verifyResult(const std::vector<double> result) {
+  auto [mat, vec] = createMatrixAndVector();
+  const int n = vec.size();
+
+  for (int i = 0; i < n; ++i) {
+    double expected = 0;
+    for (int j = 0; j < n; ++j) {
+      expected += mat(i, j) * vec[j];
+    }
+    check(result[i], expected);
+  }
+}
+
+void computeResult(const Matrix& mat, const std::vector<double>& vec, int start, int end, std::vector<double>& result) {
+  int n = vec.size();
+  for (int x = start; x < end; x++) {
+    for (int y = 0; y < n; y++) {
+      result[y] = mat(x,y) * vec[x];
+    }
+  }
+}
+
+int main() {
+  auto [mat, vec] = createMatrixAndVector();
+  std::vector<double> result(vec.size(), 0);
+  result = mat * vec;
+  // TODO: compute result = mat * vec with multiple threads
+
+  verifyResult(result);
+}
--- a/lab05/raceCondition.cpp
+++ b/lab05/raceCondition.cpp
@@ -0,0 +1,60 @@
+// adapted from modernescpp.com
+
+#include <iostream>
+#include <thread>
+#include "test.h"
+#include <atomic>
+#include <mutex>
+
+std::mutex mut;
+
+struct Account {
+  std::atomic<int> balance{100};
+};
+
+void transferMoney(int amount, Account& from, Account& to) {
+  using namespace std::chrono_literals;
+  mut.lock();
+  if (from.balance.load() >= amount) {
+    std::this_thread::sleep_for(1ns);
+    from.balance -= amount;
+    std::this_thread::sleep_for(1ns);
+    to.balance += amount;
+  }
+  mut.unlock();
+}
+
+int testTransferMoney() {
+  Account account1;
+  Account account2;
+
+  std::thread thr1(transferMoney, 80, std::ref(account1), std::ref(account2));
+  std::thread thr2(transferMoney, 60, std::ref(account1), std::ref(account2));
+  std::thread thr3(transferMoney, 10, std::ref(account2), std::ref(account1));
+
+  thr1.join();
+  thr2.join();
+  thr3.join();
+
+  std::cout << "\nChecking balance of account1: ";
+  check(account1.balance > 0, true);
+  std::cout << "Checking balance of account2: ";
+  check(account2.balance > 0, true);
+  std::cout << "Checking sum of accounts:     ";
+  check(account1.balance + account2.balance, 200);
+
+  return account1.balance + account2.balance;
+}
+
+int main() {
+  int erroneousTransfers = 0;
+  for (int i = 0; i < 10000; ++i) {
+    int sum = testTransferMoney();
+    if (sum != 200) {
+      erroneousTransfers++;
+    }
+  }
+
+  std::cout << "\n\nThere were " << erroneousTransfers
+            << " transfers where money appeared or disappeared.\n";
+}
--- a/lab05/test.h
+++ b/lab05/test.h
@@ -0,0 +1,305 @@
+/** test.h, an extremly simple test framework.
+ * Version 1.7
+ * Copyright (C) 2022-2024 Tobias Kreilos, Offenburg University of Applied
+ * Sciences
+ */
+
+/**
+ * The framework defines a function check(a,b) that can be called with
+ * parameters of different types. The function asserts
+ * that the two paramters are equal (within a certain, predefined range for
+ * floating point numbers) and prints the result of the comparison on the
+ * command line. Additionally a summary of all tests is printed at the end of
+ * the program.
+ * There is a TEST macro, which you can place outside main to group
+ * tests together. Code in the macro is automatically executed at the beginning
+ * of the program.
+ * The file also defines a class InstanceCount, that can be used to
+ * count how many instances of an object are still alive at the end of a
+ * program. To use it, derive your class from InstanceCount<ClassName> and the
+ * message is automatically printed at the end of the program.
+ *
+ * The functions are thread- and reentrant-safe. Support for OpenMP is included.
+ * Execution with MPI is supported, but no collection of the results occurs. All
+ * tests are executed locally, results are printed for every node separately.
+ *
+ * Caution: the TEST macro uses static storage of objects, so be aware of the
+ * static initialization order fiasco when using multiple source files.
+ *
+ * Example usage:
+ *
+ * #include "test.h"
+ * TEST(MyTest) {
+ *   check(1, 1);
+ * }
+ *
+ * int main() {
+ *   const std::string s = "Hi";
+ *   check(s, "Hi");
+ * }
+ */
+
+#ifndef VERY_SIMPLE_TEST_H
+#define VERY_SIMPLE_TEST_H
+
+#include <atomic>
+#include <cmath>
+#include <iomanip>
+#include <iostream>
+#include <sstream>
+
+#ifdef _OPENMP
+#include <omp.h>
+#endif
+
+/** Simple macro to execute the code that follows the macro (without call from
+ * main)
+ *
+ * Define a class, that is directly instantiated
+ * and contains the test code in the constructor.
+ *
+ * Usage:
+ * TEST(MyTest) {
+ *    // test code
+ * }
+ */
+#define TEST(name)              \
+  struct _TestClass##name {     \
+    _TestClass##name();         \
+  } _TestClass##name##Instance; \
+  _TestClass##name::_TestClass##name()
+
+// Use a namespace to hide implementation details
+namespace Test::Detail {
+
+/**
+ * Make it possible to print the underlying value of class enums with ostream
+ *
+ * The expression typename std::enable_if<std::is_enum<T>::value,
+ * std::ostream>::type decays to ostream if the type T is an enum. Otherwise,
+ * the function is not generated.
+ */
+template <typename T>
+std::ostream& operator<<(
+    typename std::enable_if<std::is_enum<T>::value, std::ostream>::type& stream,
+    const T& e) {
+  return stream << static_cast<typename std::underlying_type<T>::type>(e);
+}
+
+/**
+ * Convert anything to a string.
+ */
+template <typename T>
+std::string toString(const T& t) {
+  std::ostringstream ss;
+  ss << std::setprecision(10);
+  ss << t;
+  return "\"" + ss.str() + "\"";
+}
+
+/**
+ * Convert bools to string "true" or "false" instead of 0 and 1
+ */
+template <>
+inline std::string toString<bool>(const bool& b) {
+  return b ? "\"true\"" : "\"false\"";
+}
+
+/**
+ * Comparison function for different types
+ */
+template <typename T>
+bool isEqual(const T& t1, const T& t2) {
+  return t1 == t2;
+}
+
+/**
+ * Double values are equal if they differ no more than 1e-8
+ */
+template <>
+inline bool isEqual<double>(const double& expectedValue,
+                            const double& actualValue) {
+  const double epsilon = 1e-4;
+  const double distance = fabs(actualValue - expectedValue);
+  return (distance < epsilon);
+}
+
+/**
+ * Float values are equal if they differ no more than 1e-4
+ */
+template <>
+inline bool isEqual<float>(const float& expectedValue,
+                           const float& actualValue) {
+  const double epsilon = 1e-4;
+  const double distance = fabs(actualValue - expectedValue);
+  return (distance < epsilon);
+}
+
+/**
+ * This class realizes some basics of the test framework.
+ * Test summary is printed in the destructor.
+ * Apart from that, the class implements counting of total and failed tests,
+ * comparison of floating point numbers within sensible boundaries and prints
+ * the result of each test on the command line.
+ */
+class Test {
+ public:
+  /**
+   * Test class is a Singleton
+   */
+  static Test& instance() {
+    static Test test;
+    return test;
+  }
+
+  /**
+   * the main entry point for tests. Test two values for equality and output the
+   * result.
+   */
+  template <typename T>
+  bool check(const T& expectedValue, const T& actualValue) {
+    bool testResult = isEqual(expectedValue, actualValue);
+    if (testResult == true) {
+      registerPassingTest();
+#ifdef _OPENMP
+#pragma omp critical
+#endif
+      std::cout << "Test successful! Expected value == actual value (="
+                << toString(expectedValue) << ")" << std::endl;
+    } else {
+      registerFailingTest();
+#ifdef _OPENMP
+#pragma omp critical
+#endif
+      std::cout << "Error in test: expected value " << toString(expectedValue)
+                << ", but actual value was " << toString(actualValue)
+                << std::endl;
+    }
+
+    return testResult;
+  }
+
+ private:
+  /**
+   * Print a summary of all tests at the end of program execution.
+   *
+   * Since the Test class is a static Singleton, destruction happens when the
+   * program terminates, so this is a good place to print the summary.
+   */
+  ~Test() {
+    std::cout << "\n--------------------------------------" << std::endl;
+    std::cout << "Test summary:" << std::endl;
+    std::cout << "Executed tests: " << numTests_ << std::endl;
+    std::cout << "Failed tests: " << numFailedTests_ << std::endl;
+  }
+
+  void registerPassingTest() { numTests_++; }
+
+  void registerFailingTest() {
+    numTests_++;
+    numFailedTests_++;
+  }
+
+  /**
+   * For statistics
+   */
+  std::atomic<int> numTests_ = 0;
+
+  /**
+   * For statistics
+   */
+  std::atomic<int> numFailedTests_ = 0;
+};
+
+template <typename T>
+class InstanceCounterHelper {
+ public:
+  ~InstanceCounterHelper() {
+    std::cout << "The remaining number of objects of type " << typeid(T).name()
+              << " at the end of the program is " << count;
+    if (count > 0)
+      std::cout << " (NOT zero!)";
+    std::cout << "\nThe total number of objects created was " << total
+              << std::endl;
+  }
+
+  void increment() {
+    count++;
+    total++;
+  }
+
+  void decrement() { count--; }
+
+ private:
+  std::atomic<int> count = 0;
+  std::atomic<int> total = 0;
+};
+
+}  // namespace Test::Detail
+
+/**
+ * Count the instances of a class T.
+ * Result gets printed automatically at the end of the program.
+ * To use it, inherit T from InstanceCounter<T>, e.g.
+ * class MyClass : InstanceCounter<MyClass>
+ */
+template <typename T>
+class InstanceCounter {
+ public:
+  InstanceCounter() { counter().increment(); }
+
+  InstanceCounter(const InstanceCounter&) { counter().increment(); }
+
+  InstanceCounter(const InstanceCounter&&) { counter().increment(); }
+
+  virtual ~InstanceCounter() { counter().decrement(); }
+
+  Test::Detail::InstanceCounterHelper<T>& counter() {
+    static Test::Detail::InstanceCounterHelper<T> c;
+    return c;
+  }
+};
+
+/**
+ * Check if the expected value is equal to the actual value.
+ * Result is printed on the command line and at the end of the program, a
+ * summary of all tests is printed.
+ */
+template <typename T1, typename T2>
+void check(const T1& actualValue, const T2& expectedValue) {
+  const T1& expectedValueCasted{
+      expectedValue};  // allows conversion in general, but avoids narrowing
+                       // conversion
+  Test::Detail::Test::instance().check(expectedValueCasted, actualValue);
+}
+
+// allow conversion from int to double explicitely
+template <>
+inline void check(const double& actualValue, const int& expectedValue) {
+  Test::Detail::Test::instance().check(static_cast<double>(expectedValue),
+                                       actualValue);
+}
+
+/**
+ * Check if the entered value is true.
+ * Result is printed on the command line and at the end of the program, a
+ * summary of all tests is printed.
+ */
+inline void check(bool a) {
+  Test::Detail::Test::instance().check(true, a);
+}
+
+#endif  // VERY_SIMPLE_TEST_H
+
+/**
+ * V1.0: Creation of framework
+ * V1.1: make check(bool) inline, automatically convert expected value type to
+ * actual value type
+ * V1.2: added possibilty to count constructions and destructions of some type
+ * V1.3: tweaks on check for int and double types
+ * V1.4: Adding thread safety in OpenMP programs
+ * V1.5: reduce accuraccy in comparing double and float to 1e-8
+ * V1.6: Increase precision for printing floating point values
+ * V1.7: Put #ifdef _OPENMP around pragmas to avoid warnings when compiling
+ *       without -fopenmp
+ */