diff --git a/lab07/gaus_seidel/gaus_seidel.cpp b/lab07/gaus_seidel/gaus_seidel.cpp index c7fea03..0451c49 100644 --- a/lab07/gaus_seidel/gaus_seidel.cpp +++ b/lab07/gaus_seidel/gaus_seidel.cpp @@ -3,15 +3,19 @@ #include #include -void gauss_seidel(Matrix &phi, int maxNumIter) { +void gauss_seidel(Matrix &phi, int maxNumIter) +{ const int m = phi.dim1(); const int n = phi.dim2(); const double osth = 1. / 4; - for (int iter = 0; iter < maxNumIter; ++iter) { - for (int i = 1; i < m - 1; ++i) { - for (int j = 1; j < n - 1; ++j) { + for (int iter = 0; iter < maxNumIter; ++iter) + { + for (int i = 1; i < m - 1; ++i) + { + for (int j = 1; j < n - 1; ++j) + { phi(i, j) = osth * (phi(i + 1, j) + phi(i - 1, j) + phi(i, j + 1) + phi(i, j - 1)); } @@ -19,28 +23,33 @@ void gauss_seidel(Matrix &phi, int maxNumIter) { } } -void gauss_seidel_par(Matrix &phi, int maxNumIter) { +void gauss_seidel_par(Matrix &phi, int maxNumIter) +{ const int m = phi.dim1(); const int n = phi.dim2(); #pragma omp parallel num_threads(12) { - for (int iter = 0; iter < maxNumIter; ++iter) { - const double osth = 1. / 4; - int num_theads = omp_get_num_threads(); - int thread_num = omp_get_thread_num(); - int chunk = (m - 2) / num_theads; - int start = 1 + chunk * thread_num; - int end = chunk * (thread_num + 1); + const double osth = 1. / 4; + int num_theads = omp_get_num_threads(); + int thread_num = omp_get_thread_num(); + int chunk = (m - 2) / num_theads; + int start = 1 + chunk * thread_num; + int end = chunk * (thread_num + 1); + for (int iter = 0; iter < maxNumIter; ++iter) + { // printf("thread %d, start: %d, end: %d\n", omp_get_thread_num(), start, // end); - for (int j = 1; j < n + num_theads - 1; ++j) { + for (int j = 1; j < n + num_theads - 1; ++j) + { int k = j - thread_num; - if (k > 0 && k < n - 1) { - for (int i = start; i <= end; ++i) { + if (k > 0 && k < n - 1) + { + for (int i = start; i <= end; ++i) + { // printf("thread %d, i: %d, j: %d, k: %d\n", omp_get_thread_num(), // i, j, // k); @@ -55,27 +64,36 @@ void gauss_seidel_par(Matrix &phi, int maxNumIter) { } } -void fill_matrix(Matrix &matrix, const int filler) { +void fill_matrix(Matrix &matrix, const int filler) +{ const int m = matrix.dim1(); const int n = matrix.dim2(); - for (int i = 0; i < m; ++i) { - for (int j = 0; j < n; ++j) { - if (i == 0 || i == m - 1 || j == 0 || j == n - 1) { + for (int i = 0; i < m; ++i) + { + for (int j = 0; j < n; ++j) + { + if (i == 0 || i == m - 1 || j == 0 || j == n - 1) + { matrix(i, j) = filler; - } else + } + else matrix(i, j) = 0; } } } -void check(const Matrix &a, const Matrix &b) { +void check(const Matrix &a, const Matrix &b) +{ const int m = a.dim1(); const int n = a.dim2(); - for (int i = 0; i < m; ++i) { - for (int j = 0; j < n; ++j) { - if (a(i, j) != b(i, j)) { + for (int i = 0; i < m; ++i) + { + for (int j = 0; j < n; ++j) + { + if (a(i, j) != b(i, j)) + { std::cout << "Not equal at (" << i << ", " << j << "), a: " << a(i, j) << " != " << b(i, j) << " :b" << std::endl; } @@ -83,43 +101,51 @@ void check(const Matrix &a, const Matrix &b) { } } -void print_matrix(const Matrix &matrix) { +void print_matrix(const Matrix &matrix) +{ const int m = matrix.dim1(); const int n = matrix.dim2(); - for (int i = 0; i < m; ++i) { - for (int j = 0; j < n; ++j) { + for (int i = 0; i < m; ++i) + { + for (int j = 0; j < n; ++j) + { std::cout << matrix(i, j) << " "; } std::cout << std::endl; } } -void benchmarkParallel(benchmark::State &state) { +void benchmarkParallel(benchmark::State &state) +{ int iterations = state.range(0); Matrix matrix = Matrix(30002, 20002); fill_matrix(matrix, 10); - for (auto _ : state) { + for (auto _ : state) + { gauss_seidel_par(matrix, iterations); benchmark::DoNotOptimize(matrix); } } -void benchmarkSerial(benchmark::State &state) { +void benchmarkSerial(benchmark::State &state) +{ int iterations = state.range(0); Matrix matrix = Matrix(30002, 20002); fill_matrix(matrix, 10); - for (auto _ : state) { + for (auto _ : state) + { gauss_seidel(matrix, iterations); benchmark::DoNotOptimize(matrix); } } -int main(int argc, char **argv) { +int main(int argc, char **argv) +{ ::benchmark::Initialize(&argc, argv); benchmark::RegisterBenchmark("gaus_seidel_parallel", benchmarkParallel) diff --git a/lab08/excercise_2/vectorTriad.cpp b/lab08/excercise_2/vectorTriad.cpp new file mode 100644 index 0000000..ac804bc --- /dev/null +++ b/lab08/excercise_2/vectorTriad.cpp @@ -0,0 +1,30 @@ +#include +#include +#include +#include +#include +#include +#include + +static void vectorTriad(benchmark::State& state) { + const int n = 1e9; +#pragma omp parallel num_threads(6) + { + std::cout << "CPU: " << sched_getcpu() << " Thread: " << omp_get_thread_num() << std::endl; + const int numThreads = omp_get_num_threads(); + const int nPerThread = n / numThreads; + std::vector A(nPerThread); + std::vector B(nPerThread, 1.); + std::vector C(nPerThread, 2.); + std::vector D(nPerThread, 3.); + for (auto _ : state) { + for (int i = 0; i < nPerThread; ++i) { + A[i] = B[i] + C[i] * D[i]; + } + volatile double dummy = A[0]; + } + } +} + +BENCHMARK(vectorTriad)->Unit(benchmark::kMillisecond); +BENCHMARK_MAIN(); \ No newline at end of file diff --git a/lab08/excercise_2/vectorTriad.sh b/lab08/excercise_2/vectorTriad.sh new file mode 100644 index 0000000..7851723 --- /dev/null +++ b/lab08/excercise_2/vectorTriad.sh @@ -0,0 +1,12 @@ +#!/bin/bash +#SBATCH --job-name=excercise2_vectorTriad +#SBATCH --output=excercise2_vectorTriad.out +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=6 +export OMP_PLACES=cores +export OMP_PROC_BIND=close +# Load modules or activate environment if needed +# module load gcc cmake ... +# Run the benchmark binary +g++ vectorTriad.cpp -o vector_triad -fopenmp -O3 -lbenchmark +./vector_triad --benchmark_out=excercise2_vectorTriad.json --benchmark_out_format=json \ No newline at end of file diff --git a/lab08/exercise_3/gauss_seidel.cpp b/lab08/exercise_3/gauss_seidel.cpp new file mode 100644 index 0000000..72bdda0 --- /dev/null +++ b/lab08/exercise_3/gauss_seidel.cpp @@ -0,0 +1,230 @@ +#include "matrix.h" +#include +#include +#include + +void gauss_seidel_serial(Matrix &phi, int maxNumIter) +{ + const int m = phi.dim1(); + const int n = phi.dim2(); + + const double osth = 1. / 4; + + for (int iter = 0; iter < maxNumIter; ++iter) + { + for (int i = 1; i < m - 1; ++i) + { + for (int j = 1; j < n - 1; ++j) + { + phi(i, j) = osth * (phi(i + 1, j) + phi(i - 1, j) + phi(i, j + 1) + + phi(i, j - 1)); + } + } + } +} + +void gauss_seidel_parallel_kai(Matrix &phi, int maxNumIter) +{ + const int m = phi.dim1(); + const int n = phi.dim2(); + +#pragma omp parallel num_threads(12) + { + const double osth = 1. / 4; + int num_theads = omp_get_num_threads(); + int thread_num = omp_get_thread_num(); + int chunk = (m - 2) / num_theads; + int start = 1 + chunk * thread_num; + int end = chunk * (thread_num + 1); + + for (int iter = 0; iter < maxNumIter; ++iter) + { + // printf("thread %d, start: %d, end: %d\n", omp_get_thread_num(), start, + // end); + + for (int j = 1; j < n + num_theads - 1; ++j) + { + int k = j - thread_num; + + if (k > 0 && k < n - 1) + { + for (int i = start; i <= end; ++i) + { + // printf("thread %d, i: %d, j: %d, k: %d\n", omp_get_thread_num(), + // i, j, + // k); + + phi(i, k) = osth * (phi(i + 1, k) + phi(i - 1, k) + phi(i, k + 1) + + phi(i, k - 1)); + } + } +#pragma omp barrier + } + } + } +} + +void gauss_seidel_parallel_kreilos(Matrix &phi, int maxNumIter) +{ + const int n = phi.dim1(); + const int m = phi.dim2(); + + const double osth = 1. / 4; + +#pragma omp parallel + { + const int threadId = omp_get_thread_num(); + const int numThreads = omp_get_num_threads(); + const int chunkSize = m / numThreads; + const int i0 = std::max(1, chunkSize * threadId); + const int i1 = std::min(chunkSize * (threadId + 1), m - 1); + for (int iIter = 0; iIter < maxNumIter; iIter++) + { + for (int step = 0; step < n - 1 + numThreads; ++step) + { + int j = step - threadId; + if (j > 0 && j < n - 1) + { + for (int i = i0; i < i1; ++i) + { + phi(i, j) = osth * (phi(i + 1, j) + phi(i - 1, j) + phi(i, j + 1) + phi(i, j - 1)); + } + } +#pragma omp barrier + } + } + } +} + +void fill_matrix(Matrix &matrix, const int filler) +{ + const int m = matrix.dim1(); + const int n = matrix.dim2(); + + for (int i = 0; i < m; ++i) + { + for (int j = 0; j < n; ++j) + { + if (i == 0 || i == m - 1 || j == 0 || j == n - 1) + { + matrix(i, j) = filler; + } + else + matrix(i, j) = 0; + } + } +} + +void check(const Matrix &a, const Matrix &b) +{ + const int m = a.dim1(); + const int n = a.dim2(); + + for (int i = 0; i < m; ++i) + { + for (int j = 0; j < n; ++j) + { + if (a(i, j) != b(i, j)) + { + std::cout << "Not equal at (" << i << ", " << j << "), a: " << a(i, j) + << " != " << b(i, j) << " :b" << std::endl; + } + } + } +} + +void print_matrix(const Matrix &matrix) +{ + const int m = matrix.dim1(); + const int n = matrix.dim2(); + + for (int i = 0; i < m; ++i) + { + for (int j = 0; j < n; ++j) + { + std::cout << matrix(i, j) << " "; + } + std::cout << std::endl; + } +} + +void benchmarkParallelKai(benchmark::State &state) +{ + int iterations = state.range(0); + Matrix matrix = Matrix(30002, 30002); + + fill_matrix(matrix, 10); + + for (auto _ : state) + { + gauss_seidel_parallel_kai(matrix, iterations); + benchmark::DoNotOptimize(matrix); + } +} + +void benchmarkParallelKreilos(benchmark::State &state) +{ + int iterations = state.range(0); + Matrix matrix = Matrix(30002, 30002); + + fill_matrix(matrix, 10); + + for (auto _ : state) + { + gauss_seidel_parallel_kreilos(matrix, iterations); + benchmark::DoNotOptimize(matrix); + } +} + +void benchmarkSerial(benchmark::State &state) +{ + int iterations = state.range(0); + Matrix matrix = Matrix(30002, 30002); + + fill_matrix(matrix, 10); + + for (auto _ : state) + { + gauss_seidel_serial(matrix, iterations); + benchmark::DoNotOptimize(matrix); + } +} + +int main(int argc, char **argv) +{ + ::benchmark::Initialize(&argc, argv); + + benchmark::RegisterBenchmark("gaus_seidel_parallel_kai", benchmarkParallelKai) + ->Arg(100) + ->Unit(benchmark::kMillisecond); + + benchmark::RegisterBenchmark("gaus_seidel_parallel_kreilos", benchmarkParallelKreilos) + ->Arg(100) + ->Unit(benchmark::kMillisecond); + + benchmark::RegisterBenchmark("gaus_seidel_serial", benchmarkSerial) + ->Arg(100) + ->Unit(benchmark::kMillisecond); + + ::benchmark::RunSpecifiedBenchmarks(); + + return 0; +} + +// int main(int argc, char **argv) { +// int iterations = 2; +// Matrix matrix = Matrix(2402, 2402); +// +// fill_matrix(matrix, 10); +// +// Matrix matrix2 = Matrix(2402, 2402); +// +// fill_matrix(matrix2, 10); +// +// gauss_seidel_par(matrix, iterations); +// gauss_seidel(matrix2, iterations); +// +// check(matrix, matrix2); +// +// return 0; +// } \ No newline at end of file diff --git a/lab08/exercise_3/matrix.h b/lab08/exercise_3/matrix.h new file mode 100644 index 0000000..559f04a --- /dev/null +++ b/lab08/exercise_3/matrix.h @@ -0,0 +1,257 @@ +/** + * matrix.h a very simplistic class for m times n matrices. + */ + +#ifndef MATRIX_H +#define MATRIX_H + +#include +#include +#include +#include + +// A very simple class for m times n matrices +class Matrix { + public: + // constructors + Matrix() : Matrix(0, 0) {} + + Matrix(int m, int n, bool init = true) : m_(m), n_(n) { + data_ = new double[m_ * n_]; + if (data_ == nullptr) { + std::cout << "Error: not enough memory for matrix\n"; + exit(1); + } + if (init) { + for (int i = 0; i < m_; ++i) { + for (int j = 0; j < n_; ++j) { + operator()(i, j) = 0; + } + } + } + } + + Matrix(std::pair dim, bool init = true) + : Matrix(dim.first, dim.second, init) {} + + Matrix(int n) : Matrix(n, n) {} + + Matrix(const Matrix& other) { + m_ = other.m_; + n_ = other.n_; + if (m_ == 0 || n_ == 0) { + data_ = nullptr; + return; + } + data_ = new double[m_ * n_]; + for (int i = 0; i < m_; ++i) { + for (int j = 0; j < n_; ++j) { + operator()(i, j) = other(i, j); + } + } + } + Matrix(Matrix&& other) { + m_ = other.m_; + n_ = other.n_; + data_ = other.data_; + other.data_ = nullptr; + other.m_ = 0; + other.n_ = 0; + } + + ~Matrix() { + if (data_ != nullptr) { + delete[] data_; + data_ = nullptr; + } + } + + // assignment operators + Matrix& operator=(const Matrix& other) { + if (this != &other) { + if (data_ != nullptr) { + delete[] data_; + } + m_ = other.m_; + n_ = other.n_; + data_ = new double[m_ * n_]; + for (int i = 0; i < m_; ++i) { + for (int j = 0; j < n_; ++j) { + operator()(i, j) = other(i, j); + } + } + } + return *this; + } + + Matrix& operator=(Matrix&& other) { + if (this != &other) { + if (data_ != nullptr) { + delete[] data_; + } + m_ = other.m_; + n_ = other.n_; + data_ = other.data_; + other.data_ = nullptr; + other.m_ = 0; + other.n_ = 0; + } + return *this; + } + + // element access + double& operator()(int i, int j) { return data_[i * n_ + j]; } + const double& operator()(int i, int j) const { return data_[i * n_ + j]; } + + // getter functions for the dimensions + std::pair dim() const { return std::pair(m_, n_); } + int dim1() const { return m_; } + int dim2() const { return n_; } + int numEntries() const { return m_ * n_; } + + // comparison operators + bool operator==(const Matrix& b) { + const double eps = 1e-12; + if (m_ != b.m_ || n_ != b.n_) { + return false; + } + for (int i = 0; i < m_; ++i) { + for (int j = 0; j < n_; ++j) { + if (fabs(operator()(i, j) - b(i, j)) > eps) { + return false; + } + } + } + return true; + } + bool operator!=(const Matrix& b) { return !operator==(b); } + // comparison operators + + // addition + Matrix& operator+=(const Matrix& b) { + for (int i = 0; i < m_; ++i) { + for (int j = 0; j < n_; ++j) { + operator()(i, j) += b(i, j); + } + } + return *this; + } + + // subtraction + Matrix& operator-=(const Matrix& b) { + for (int i = 0; i < m_; ++i) { + for (int j = 0; j < n_; ++j) { + operator()(i, j) -= b(i, j); + } + } + return *this; + } + + // scalar multiplication + Matrix& operator*=(double x) { + for (int i = 0; i < m_; ++i) { + for (int j = 0; j < n_; ++j) { + operator()(i, j) *= x; + } + } + return *this; + } + + // scalar division + Matrix& operator/=(double x) { + for (int i = 0; i < m_; ++i) { + for (int j = 0; j < n_; ++j) { + operator()(i, j) /= x; + } + } + return *this; + } + + // matrix product (only for square matrices of equal dimension) + Matrix& operator*=(const Matrix& b) { + if (dim1() != dim2()) { + std::cout << "Error in matrix multiplication: no square matrix\n"; + } else if (dim1() != b.dim1() || dim2() != b.dim2()) { + std::cout << "Error in matrix multiplication: dimensions do not match\n"; + } else { + Matrix a = *this; + Matrix& c = *this; + const int m = dim1(); + for (int i = 0; i < m; ++i) { + for (int j = 0; j < m; ++j) { + for (int k = 0; k < m; ++k) { + c(i, j) += a(i, k) * b(k, j); + } + } + } + } + + return *this; + } + + public: + int m_; // first dimension + int n_; // second dimension + double* data_; // the matrix' entries +}; + +// Print the matrix as a table +inline std::ostream& operator<<(std::ostream& os, const Matrix& a) { + const int width = 10; + const int precision = 4; + + const auto originalPrecision = os.precision(); + os << std::setprecision(precision); + + for (int i = 0; i < a.dim1(); ++i) { + for (int j = 0; j < a.dim2(); ++j) { + os << std::setw(width) << a(i, j) << " "; + } + if (i != a.dim1() - 1) + os << "\n"; + } + + os << std::setprecision(originalPrecision); + return os; +} + +// matrix product +inline Matrix operator*(const Matrix& a, const Matrix& b) { + if (a.dim2() == b.dim1()) { + int m = a.dim1(); + int n = a.dim2(); + int p = b.dim2(); + Matrix c(m, p); + for (int i = 0; i < m; ++i) { + for (int j = 0; j < p; ++j) { + for (int k = 0; k < n; ++k) { + c(i, j) += a(i, k) * b(k, j); + } + } + } + return c; + } else { + return Matrix(0, 0); + } +} + +inline bool equalWithinRange(const Matrix& a, + const Matrix& b, + double eps = 1e-12) { + if (a.dim1() != b.dim1() || a.dim2() != b.dim2()) + return false; + + int m = a.dim1(); + int n = a.dim2(); + for (int i = 0; i < m; ++i) { + for (int j = 0; j < n; ++j) { + if (fabs(a(i, j) - b(i, j)) > eps) { + return false; + } + } + } + + return true; +} + +#endif // MATRIX_H \ No newline at end of file