diff --git a/lab07/gaus_seidel/gaus_seidel.cpp b/lab07/gaus_seidel/gaus_seidel.cpp
index c7fea03..0451c49 100644
--- a/lab07/gaus_seidel/gaus_seidel.cpp
+++ b/lab07/gaus_seidel/gaus_seidel.cpp
@@ -3,15 +3,19 @@
 #include <iostream>
 #include <omp.h>
 
-void gauss_seidel(Matrix &phi, int maxNumIter) {
+void gauss_seidel(Matrix &phi, int maxNumIter)
+{
   const int m = phi.dim1();
   const int n = phi.dim2();
 
   const double osth = 1. / 4;
 
-  for (int iter = 0; iter < maxNumIter; ++iter) {
-    for (int i = 1; i < m - 1; ++i) {
-      for (int j = 1; j < n - 1; ++j) {
+  for (int iter = 0; iter < maxNumIter; ++iter)
+  {
+    for (int i = 1; i < m - 1; ++i)
+    {
+      for (int j = 1; j < n - 1; ++j)
+      {
         phi(i, j) = osth * (phi(i + 1, j) + phi(i - 1, j) + phi(i, j + 1) +
                             phi(i, j - 1));
       }
@@ -19,28 +23,33 @@ void gauss_seidel(Matrix &phi, int maxNumIter) {
   }
 }
 
-void gauss_seidel_par(Matrix &phi, int maxNumIter) {
+void gauss_seidel_par(Matrix &phi, int maxNumIter)
+{
   const int m = phi.dim1();
   const int n = phi.dim2();
 
 #pragma omp parallel num_threads(12)
   {
-    for (int iter = 0; iter < maxNumIter; ++iter) {
-      const double osth = 1. / 4;
-      int num_theads = omp_get_num_threads();
-      int thread_num = omp_get_thread_num();
-      int chunk = (m - 2) / num_theads;
-      int start = 1 + chunk * thread_num;
-      int end = chunk * (thread_num + 1);
+    const double osth = 1. / 4;
+    int num_theads = omp_get_num_threads();
+    int thread_num = omp_get_thread_num();
+    int chunk = (m - 2) / num_theads;
+    int start = 1 + chunk * thread_num;
+    int end = chunk * (thread_num + 1);
 
+    for (int iter = 0; iter < maxNumIter; ++iter)
+    {
       // printf("thread %d, start: %d, end: %d\n", omp_get_thread_num(), start,
       //        end);
 
-      for (int j = 1; j < n + num_theads - 1; ++j) {
+      for (int j = 1; j < n + num_theads - 1; ++j)
+      {
         int k = j - thread_num;
 
-        if (k > 0 && k < n - 1) {
-          for (int i = start; i <= end; ++i) {
+        if (k > 0 && k < n - 1)
+        {
+          for (int i = start; i <= end; ++i)
+          {
             // printf("thread %d, i: %d, j: %d, k: %d\n", omp_get_thread_num(),
             // i, j,
             //        k);
@@ -55,27 +64,36 @@ void gauss_seidel_par(Matrix &phi, int maxNumIter) {
   }
 }
 
-void fill_matrix(Matrix &matrix, const int filler) {
+void fill_matrix(Matrix &matrix, const int filler)
+{
   const int m = matrix.dim1();
   const int n = matrix.dim2();
 
-  for (int i = 0; i < m; ++i) {
-    for (int j = 0; j < n; ++j) {
-      if (i == 0 || i == m - 1 || j == 0 || j == n - 1) {
+  for (int i = 0; i < m; ++i)
+  {
+    for (int j = 0; j < n; ++j)
+    {
+      if (i == 0 || i == m - 1 || j == 0 || j == n - 1)
+      {
         matrix(i, j) = filler;
-      } else
+      }
+      else
         matrix(i, j) = 0;
     }
   }
 }
 
-void check(const Matrix &a, const Matrix &b) {
+void check(const Matrix &a, const Matrix &b)
+{
   const int m = a.dim1();
   const int n = a.dim2();
 
-  for (int i = 0; i < m; ++i) {
-    for (int j = 0; j < n; ++j) {
-      if (a(i, j) != b(i, j)) {
+  for (int i = 0; i < m; ++i)
+  {
+    for (int j = 0; j < n; ++j)
+    {
+      if (a(i, j) != b(i, j))
+      {
         std::cout << "Not equal at (" << i << ", " << j << "), a: " << a(i, j)
                   << " != " << b(i, j) << " :b" << std::endl;
       }
@@ -83,43 +101,51 @@ void check(const Matrix &a, const Matrix &b) {
   }
 }
 
-void print_matrix(const Matrix &matrix) {
+void print_matrix(const Matrix &matrix)
+{
   const int m = matrix.dim1();
   const int n = matrix.dim2();
 
-  for (int i = 0; i < m; ++i) {
-    for (int j = 0; j < n; ++j) {
+  for (int i = 0; i < m; ++i)
+  {
+    for (int j = 0; j < n; ++j)
+    {
       std::cout << matrix(i, j) << " ";
     }
     std::cout << std::endl;
   }
 }
 
-void benchmarkParallel(benchmark::State &state) {
+void benchmarkParallel(benchmark::State &state)
+{
   int iterations = state.range(0);
   Matrix matrix = Matrix(30002, 20002);
 
   fill_matrix(matrix, 10);
 
-  for (auto _ : state) {
+  for (auto _ : state)
+  {
     gauss_seidel_par(matrix, iterations);
     benchmark::DoNotOptimize(matrix);
   }
 }
 
-void benchmarkSerial(benchmark::State &state) {
+void benchmarkSerial(benchmark::State &state)
+{
   int iterations = state.range(0);
   Matrix matrix = Matrix(30002, 20002);
 
   fill_matrix(matrix, 10);
 
-  for (auto _ : state) {
+  for (auto _ : state)
+  {
     gauss_seidel(matrix, iterations);
     benchmark::DoNotOptimize(matrix);
   }
 }
 
-int main(int argc, char **argv) {
+int main(int argc, char **argv)
+{
   ::benchmark::Initialize(&argc, argv);
 
   benchmark::RegisterBenchmark("gaus_seidel_parallel", benchmarkParallel)
diff --git a/lab08/excercise_2/vectorTriad.cpp b/lab08/excercise_2/vectorTriad.cpp
new file mode 100644
index 0000000..ac804bc
--- /dev/null
+++ b/lab08/excercise_2/vectorTriad.cpp
@@ -0,0 +1,30 @@
+#include <benchmark/benchmark.h>
+#include <omp.h>
+#include <chrono>
+#include <fstream>
+#include <iostream>
+#include <sstream>
+#include <vector>
+
+static void vectorTriad(benchmark::State& state) {
+  const int n = 1e9;
+#pragma omp parallel num_threads(6)
+  {
+    std::cout << "CPU: " << sched_getcpu() << " Thread: " << omp_get_thread_num() << std::endl;
+    const int numThreads = omp_get_num_threads();
+    const int nPerThread = n / numThreads;
+    std::vector<double> A(nPerThread);
+    std::vector<double> B(nPerThread, 1.);
+    std::vector<double> C(nPerThread, 2.);
+    std::vector<double> D(nPerThread, 3.);
+    for (auto _ : state) {
+      for (int i = 0; i < nPerThread; ++i) {
+        A[i] = B[i] + C[i] * D[i];
+      }
+      volatile double dummy = A[0];
+    }
+  }
+}
+
+BENCHMARK(vectorTriad)->Unit(benchmark::kMillisecond);
+BENCHMARK_MAIN();
\ No newline at end of file
diff --git a/lab08/excercise_2/vectorTriad.sh b/lab08/excercise_2/vectorTriad.sh
new file mode 100644
index 0000000..7851723
--- /dev/null
+++ b/lab08/excercise_2/vectorTriad.sh
@@ -0,0 +1,12 @@
+#!/bin/bash                                                                                                             
+#SBATCH --job-name=excercise2_vectorTriad                                                                                        
+#SBATCH --output=excercise2_vectorTriad.out                                                                             
+#SBATCH --ntasks=1                                                                                                      
+#SBATCH --cpus-per-task=6                                                                                               
+export OMP_PLACES=cores                                                                                                 
+export OMP_PROC_BIND=close                                                                                                                                                                                                                      
+# Load modules or activate environment if needed                                                                        
+# module load gcc cmake ...                                                                                                                                                                                                                     
+# Run the benchmark binary                                                                                              
+g++ vectorTriad.cpp -o vector_triad -fopenmp -O3 -lbenchmark                                                              
+./vector_triad --benchmark_out=excercise2_vectorTriad.json --benchmark_out_format=json
\ No newline at end of file
diff --git a/lab08/exercise_3/gauss_seidel.cpp b/lab08/exercise_3/gauss_seidel.cpp
new file mode 100644
index 0000000..72bdda0
--- /dev/null
+++ b/lab08/exercise_3/gauss_seidel.cpp
@@ -0,0 +1,230 @@
+#include "matrix.h"
+#include <benchmark/benchmark.h>
+#include <iostream>
+#include <omp.h>
+
+void gauss_seidel_serial(Matrix &phi, int maxNumIter)
+{
+    const int m = phi.dim1();
+    const int n = phi.dim2();
+
+    const double osth = 1. / 4;
+
+    for (int iter = 0; iter < maxNumIter; ++iter)
+    {
+        for (int i = 1; i < m - 1; ++i)
+        {
+            for (int j = 1; j < n - 1; ++j)
+            {
+                phi(i, j) = osth * (phi(i + 1, j) + phi(i - 1, j) + phi(i, j + 1) +
+                                    phi(i, j - 1));
+            }
+        }
+    }
+}
+
+void gauss_seidel_parallel_kai(Matrix &phi, int maxNumIter)
+{
+    const int m = phi.dim1();
+    const int n = phi.dim2();
+
+#pragma omp parallel num_threads(12)
+    {
+        const double osth = 1. / 4;
+        int num_theads = omp_get_num_threads();
+        int thread_num = omp_get_thread_num();
+        int chunk = (m - 2) / num_theads;
+        int start = 1 + chunk * thread_num;
+        int end = chunk * (thread_num + 1);
+
+        for (int iter = 0; iter < maxNumIter; ++iter)
+        {
+            // printf("thread %d, start: %d, end: %d\n", omp_get_thread_num(), start,
+            //        end);
+
+            for (int j = 1; j < n + num_theads - 1; ++j)
+            {
+                int k = j - thread_num;
+
+                if (k > 0 && k < n - 1)
+                {
+                    for (int i = start; i <= end; ++i)
+                    {
+                        // printf("thread %d, i: %d, j: %d, k: %d\n", omp_get_thread_num(),
+                        // i, j,
+                        //        k);
+
+                        phi(i, k) = osth * (phi(i + 1, k) + phi(i - 1, k) + phi(i, k + 1) +
+                                            phi(i, k - 1));
+                    }
+                }
+#pragma omp barrier
+            }
+        }
+    }
+}
+
+void gauss_seidel_parallel_kreilos(Matrix &phi, int maxNumIter)
+{
+    const int n = phi.dim1();
+    const int m = phi.dim2();
+
+    const double osth = 1. / 4;
+
+#pragma omp parallel
+    {
+        const int threadId = omp_get_thread_num();
+        const int numThreads = omp_get_num_threads();
+        const int chunkSize = m / numThreads;
+        const int i0 = std::max(1, chunkSize * threadId);
+        const int i1 = std::min(chunkSize * (threadId + 1), m - 1);
+        for (int iIter = 0; iIter < maxNumIter; iIter++)
+        {
+            for (int step = 0; step < n - 1 + numThreads; ++step)
+            {
+                int j = step - threadId;
+                if (j > 0 && j < n - 1)
+                {
+                    for (int i = i0; i < i1; ++i)
+                    {
+                        phi(i, j) = osth * (phi(i + 1, j) + phi(i - 1, j) + phi(i, j + 1) + phi(i, j - 1));
+                    }
+                }
+#pragma omp barrier
+            }
+        }
+    }
+}
+
+void fill_matrix(Matrix &matrix, const int filler)
+{
+    const int m = matrix.dim1();
+    const int n = matrix.dim2();
+
+    for (int i = 0; i < m; ++i)
+    {
+        for (int j = 0; j < n; ++j)
+        {
+            if (i == 0 || i == m - 1 || j == 0 || j == n - 1)
+            {
+                matrix(i, j) = filler;
+            }
+            else
+                matrix(i, j) = 0;
+        }
+    }
+}
+
+void check(const Matrix &a, const Matrix &b)
+{
+    const int m = a.dim1();
+    const int n = a.dim2();
+
+    for (int i = 0; i < m; ++i)
+    {
+        for (int j = 0; j < n; ++j)
+        {
+            if (a(i, j) != b(i, j))
+            {
+                std::cout << "Not equal at (" << i << ", " << j << "), a: " << a(i, j)
+                          << " != " << b(i, j) << " :b" << std::endl;
+            }
+        }
+    }
+}
+
+void print_matrix(const Matrix &matrix)
+{
+    const int m = matrix.dim1();
+    const int n = matrix.dim2();
+
+    for (int i = 0; i < m; ++i)
+    {
+        for (int j = 0; j < n; ++j)
+        {
+            std::cout << matrix(i, j) << " ";
+        }
+        std::cout << std::endl;
+    }
+}
+
+void benchmarkParallelKai(benchmark::State &state)
+{
+    int iterations = state.range(0);
+    Matrix matrix = Matrix(30002, 30002);
+
+    fill_matrix(matrix, 10);
+
+    for (auto _ : state)
+    {
+        gauss_seidel_parallel_kai(matrix, iterations);
+        benchmark::DoNotOptimize(matrix);
+    }
+}
+
+void benchmarkParallelKreilos(benchmark::State &state)
+{
+    int iterations = state.range(0);
+    Matrix matrix = Matrix(30002, 30002);
+
+    fill_matrix(matrix, 10);
+
+    for (auto _ : state)
+    {
+        gauss_seidel_parallel_kreilos(matrix, iterations);
+        benchmark::DoNotOptimize(matrix);
+    }
+}
+
+void benchmarkSerial(benchmark::State &state)
+{
+    int iterations = state.range(0);
+    Matrix matrix = Matrix(30002, 30002);
+
+    fill_matrix(matrix, 10);
+
+    for (auto _ : state)
+    {
+        gauss_seidel_serial(matrix, iterations);
+        benchmark::DoNotOptimize(matrix);
+    }
+}
+
+int main(int argc, char **argv)
+{
+    ::benchmark::Initialize(&argc, argv);
+
+    benchmark::RegisterBenchmark("gaus_seidel_parallel_kai", benchmarkParallelKai)
+        ->Arg(100)
+        ->Unit(benchmark::kMillisecond);
+
+    benchmark::RegisterBenchmark("gaus_seidel_parallel_kreilos", benchmarkParallelKreilos)
+        ->Arg(100)
+        ->Unit(benchmark::kMillisecond);
+
+    benchmark::RegisterBenchmark("gaus_seidel_serial", benchmarkSerial)
+        ->Arg(100)
+        ->Unit(benchmark::kMillisecond);
+
+    ::benchmark::RunSpecifiedBenchmarks();
+
+    return 0;
+}
+
+// int main(int argc, char **argv) {
+//   int iterations = 2;
+//   Matrix matrix = Matrix(2402, 2402);
+//
+//   fill_matrix(matrix, 10);
+//
+//   Matrix matrix2 = Matrix(2402, 2402);
+//
+//   fill_matrix(matrix2, 10);
+//
+//   gauss_seidel_par(matrix, iterations);
+//   gauss_seidel(matrix2, iterations);
+//
+//   check(matrix, matrix2);
+//
+//   return 0;
+// }
\ No newline at end of file
diff --git a/lab08/exercise_3/matrix.h b/lab08/exercise_3/matrix.h
new file mode 100644
index 0000000..559f04a
--- /dev/null
+++ b/lab08/exercise_3/matrix.h
@@ -0,0 +1,257 @@
+/**
+ * matrix.h a very simplistic class for m times n matrices.
+ */
+
+#ifndef MATRIX_H
+#define MATRIX_H
+
+#include <vector>
+#include <iostream>
+#include <iomanip>
+#include <cmath>
+
+// A very simple class for m times n matrices
+class Matrix {
+ public:
+  // constructors
+  Matrix() : Matrix(0, 0) {}
+
+  Matrix(int m, int n, bool init = true) : m_(m), n_(n) {
+    data_ = new double[m_ * n_];
+    if (data_ == nullptr) {
+      std::cout << "Error: not enough memory for matrix\n";
+      exit(1);
+    }
+    if (init) {
+      for (int i = 0; i < m_; ++i) {
+        for (int j = 0; j < n_; ++j) {
+          operator()(i, j) = 0;
+        }
+      }
+    }
+  }
+
+  Matrix(std::pair<int, int> dim, bool init = true)
+      : Matrix(dim.first, dim.second, init) {}
+
+  Matrix(int n) : Matrix(n, n) {}
+
+  Matrix(const Matrix& other) {
+    m_ = other.m_;
+    n_ = other.n_;
+    if (m_ == 0 || n_ == 0) {
+      data_ = nullptr;
+      return;
+    }
+    data_ = new double[m_ * n_];
+    for (int i = 0; i < m_; ++i) {
+      for (int j = 0; j < n_; ++j) {
+        operator()(i, j) = other(i, j);
+      }
+    }
+  }
+  Matrix(Matrix&& other) {
+    m_ = other.m_;
+    n_ = other.n_;
+    data_ = other.data_;
+    other.data_ = nullptr;
+    other.m_ = 0;
+    other.n_ = 0;
+  }
+
+  ~Matrix() {
+    if (data_ != nullptr) {
+      delete[] data_;
+      data_ = nullptr;
+    }
+  }
+
+  // assignment operators
+  Matrix& operator=(const Matrix& other) {
+    if (this != &other) {
+      if (data_ != nullptr) {
+        delete[] data_;
+      }
+      m_ = other.m_;
+      n_ = other.n_;
+      data_ = new double[m_ * n_];
+      for (int i = 0; i < m_; ++i) {
+        for (int j = 0; j < n_; ++j) {
+          operator()(i, j) = other(i, j);
+        }
+      }
+    }
+    return *this;
+  }
+
+  Matrix& operator=(Matrix&& other) {
+    if (this != &other) {
+      if (data_ != nullptr) {
+        delete[] data_;
+      }
+      m_ = other.m_;
+      n_ = other.n_;
+      data_ = other.data_;
+      other.data_ = nullptr;
+      other.m_ = 0;
+      other.n_ = 0;
+    }
+    return *this;
+  }
+
+  // element access
+  double& operator()(int i, int j) { return data_[i * n_ + j]; }
+  const double& operator()(int i, int j) const { return data_[i * n_ + j]; }
+
+  // getter functions for the dimensions
+  std::pair<int, int> dim() const { return std::pair<int, int>(m_, n_); }
+  int dim1() const { return m_; }
+  int dim2() const { return n_; }
+  int numEntries() const { return m_ * n_; }
+
+  // comparison operators
+  bool operator==(const Matrix& b) {
+    const double eps = 1e-12;
+    if (m_ != b.m_ || n_ != b.n_) {
+      return false;
+    }
+    for (int i = 0; i < m_; ++i) {
+      for (int j = 0; j < n_; ++j) {
+        if (fabs(operator()(i, j) - b(i, j)) > eps) {
+          return false;
+        }
+      }
+    }
+    return true;
+  }
+  bool operator!=(const Matrix& b) { return !operator==(b); }
+  // comparison operators
+
+  // addition
+  Matrix& operator+=(const Matrix& b) {
+    for (int i = 0; i < m_; ++i) {
+      for (int j = 0; j < n_; ++j) {
+        operator()(i, j) += b(i, j);
+      }
+    }
+    return *this;
+  }
+
+  // subtraction
+  Matrix& operator-=(const Matrix& b) {
+    for (int i = 0; i < m_; ++i) {
+      for (int j = 0; j < n_; ++j) {
+        operator()(i, j) -= b(i, j);
+      }
+    }
+    return *this;
+  }
+
+  // scalar multiplication
+  Matrix& operator*=(double x) {
+    for (int i = 0; i < m_; ++i) {
+      for (int j = 0; j < n_; ++j) {
+        operator()(i, j) *= x;
+      }
+    }
+    return *this;
+  }
+
+  // scalar division
+  Matrix& operator/=(double x) {
+    for (int i = 0; i < m_; ++i) {
+      for (int j = 0; j < n_; ++j) {
+        operator()(i, j) /= x;
+      }
+    }
+    return *this;
+  }
+
+  // matrix product (only for square matrices of equal dimension)
+  Matrix& operator*=(const Matrix& b) {
+    if (dim1() != dim2()) {
+      std::cout << "Error in matrix multiplication: no square matrix\n";
+    } else if (dim1() != b.dim1() || dim2() != b.dim2()) {
+      std::cout << "Error in matrix multiplication: dimensions do not match\n";
+    } else {
+      Matrix a = *this;
+      Matrix& c = *this;
+      const int m = dim1();
+      for (int i = 0; i < m; ++i) {
+        for (int j = 0; j < m; ++j) {
+          for (int k = 0; k < m; ++k) {
+            c(i, j) += a(i, k) * b(k, j);
+          }
+        }
+      }
+    }
+
+    return *this;
+  }
+
+ public:
+  int m_;         // first dimension
+  int n_;         // second dimension
+  double* data_;  // the matrix' entries
+};
+
+// Print the matrix as a table
+inline std::ostream& operator<<(std::ostream& os, const Matrix& a) {
+  const int width = 10;
+  const int precision = 4;
+
+  const auto originalPrecision = os.precision();
+  os << std::setprecision(precision);
+
+  for (int i = 0; i < a.dim1(); ++i) {
+    for (int j = 0; j < a.dim2(); ++j) {
+      os << std::setw(width) << a(i, j) << " ";
+    }
+    if (i != a.dim1() - 1)
+      os << "\n";
+  }
+
+  os << std::setprecision(originalPrecision);
+  return os;
+}
+
+// matrix product
+inline Matrix operator*(const Matrix& a, const Matrix& b) {
+  if (a.dim2() == b.dim1()) {
+    int m = a.dim1();
+    int n = a.dim2();
+    int p = b.dim2();
+    Matrix c(m, p);
+    for (int i = 0; i < m; ++i) {
+      for (int j = 0; j < p; ++j) {
+        for (int k = 0; k < n; ++k) {
+          c(i, j) += a(i, k) * b(k, j);
+        }
+      }
+    }
+    return c;
+  } else {
+    return Matrix(0, 0);
+  }
+}
+
+inline bool equalWithinRange(const Matrix& a,
+                             const Matrix& b,
+                             double eps = 1e-12) {
+  if (a.dim1() != b.dim1() || a.dim2() != b.dim2())
+    return false;
+
+  int m = a.dim1();
+  int n = a.dim2();
+  for (int i = 0; i < m; ++i) {
+    for (int j = 0; j < n; ++j) {
+      if (fabs(a(i, j) - b(i, j)) > eps) {
+        return false;
+      }
+    }
+  }
+
+  return true;
+}
+
+#endif  // MATRIX_H
\ No newline at end of file