This commit is contained in:
WickedJack99
2025-05-17 13:16:04 +02:00
parent b8df925c9b
commit 575631f91c
5 changed files with 587 additions and 32 deletions

View File

@@ -3,15 +3,19 @@
#include <iostream>
#include <omp.h>
void gauss_seidel(Matrix &phi, int maxNumIter) {
void gauss_seidel(Matrix &phi, int maxNumIter)
{
const int m = phi.dim1();
const int n = phi.dim2();
const double osth = 1. / 4;
for (int iter = 0; iter < maxNumIter; ++iter) {
for (int i = 1; i < m - 1; ++i) {
for (int j = 1; j < n - 1; ++j) {
for (int iter = 0; iter < maxNumIter; ++iter)
{
for (int i = 1; i < m - 1; ++i)
{
for (int j = 1; j < n - 1; ++j)
{
phi(i, j) = osth * (phi(i + 1, j) + phi(i - 1, j) + phi(i, j + 1) +
phi(i, j - 1));
}
@@ -19,28 +23,33 @@ void gauss_seidel(Matrix &phi, int maxNumIter) {
}
}
void gauss_seidel_par(Matrix &phi, int maxNumIter) {
void gauss_seidel_par(Matrix &phi, int maxNumIter)
{
const int m = phi.dim1();
const int n = phi.dim2();
#pragma omp parallel num_threads(12)
{
for (int iter = 0; iter < maxNumIter; ++iter) {
const double osth = 1. / 4;
int num_theads = omp_get_num_threads();
int thread_num = omp_get_thread_num();
int chunk = (m - 2) / num_theads;
int start = 1 + chunk * thread_num;
int end = chunk * (thread_num + 1);
const double osth = 1. / 4;
int num_theads = omp_get_num_threads();
int thread_num = omp_get_thread_num();
int chunk = (m - 2) / num_theads;
int start = 1 + chunk * thread_num;
int end = chunk * (thread_num + 1);
for (int iter = 0; iter < maxNumIter; ++iter)
{
// printf("thread %d, start: %d, end: %d\n", omp_get_thread_num(), start,
// end);
for (int j = 1; j < n + num_theads - 1; ++j) {
for (int j = 1; j < n + num_theads - 1; ++j)
{
int k = j - thread_num;
if (k > 0 && k < n - 1) {
for (int i = start; i <= end; ++i) {
if (k > 0 && k < n - 1)
{
for (int i = start; i <= end; ++i)
{
// printf("thread %d, i: %d, j: %d, k: %d\n", omp_get_thread_num(),
// i, j,
// k);
@@ -55,27 +64,36 @@ void gauss_seidel_par(Matrix &phi, int maxNumIter) {
}
}
void fill_matrix(Matrix &matrix, const int filler) {
void fill_matrix(Matrix &matrix, const int filler)
{
const int m = matrix.dim1();
const int n = matrix.dim2();
for (int i = 0; i < m; ++i) {
for (int j = 0; j < n; ++j) {
if (i == 0 || i == m - 1 || j == 0 || j == n - 1) {
for (int i = 0; i < m; ++i)
{
for (int j = 0; j < n; ++j)
{
if (i == 0 || i == m - 1 || j == 0 || j == n - 1)
{
matrix(i, j) = filler;
} else
}
else
matrix(i, j) = 0;
}
}
}
void check(const Matrix &a, const Matrix &b) {
void check(const Matrix &a, const Matrix &b)
{
const int m = a.dim1();
const int n = a.dim2();
for (int i = 0; i < m; ++i) {
for (int j = 0; j < n; ++j) {
if (a(i, j) != b(i, j)) {
for (int i = 0; i < m; ++i)
{
for (int j = 0; j < n; ++j)
{
if (a(i, j) != b(i, j))
{
std::cout << "Not equal at (" << i << ", " << j << "), a: " << a(i, j)
<< " != " << b(i, j) << " :b" << std::endl;
}
@@ -83,43 +101,51 @@ void check(const Matrix &a, const Matrix &b) {
}
}
void print_matrix(const Matrix &matrix) {
void print_matrix(const Matrix &matrix)
{
const int m = matrix.dim1();
const int n = matrix.dim2();
for (int i = 0; i < m; ++i) {
for (int j = 0; j < n; ++j) {
for (int i = 0; i < m; ++i)
{
for (int j = 0; j < n; ++j)
{
std::cout << matrix(i, j) << " ";
}
std::cout << std::endl;
}
}
void benchmarkParallel(benchmark::State &state) {
void benchmarkParallel(benchmark::State &state)
{
int iterations = state.range(0);
Matrix matrix = Matrix(30002, 20002);
fill_matrix(matrix, 10);
for (auto _ : state) {
for (auto _ : state)
{
gauss_seidel_par(matrix, iterations);
benchmark::DoNotOptimize(matrix);
}
}
void benchmarkSerial(benchmark::State &state) {
void benchmarkSerial(benchmark::State &state)
{
int iterations = state.range(0);
Matrix matrix = Matrix(30002, 20002);
fill_matrix(matrix, 10);
for (auto _ : state) {
for (auto _ : state)
{
gauss_seidel(matrix, iterations);
benchmark::DoNotOptimize(matrix);
}
}
int main(int argc, char **argv) {
int main(int argc, char **argv)
{
::benchmark::Initialize(&argc, argv);
benchmark::RegisterBenchmark("gaus_seidel_parallel", benchmarkParallel)

View File

@@ -0,0 +1,30 @@
#include <benchmark/benchmark.h>
#include <omp.h>
#include <chrono>
#include <fstream>
#include <iostream>
#include <sstream>
#include <vector>
static void vectorTriad(benchmark::State& state) {
const int n = 1e9;
#pragma omp parallel num_threads(6)
{
std::cout << "CPU: " << sched_getcpu() << " Thread: " << omp_get_thread_num() << std::endl;
const int numThreads = omp_get_num_threads();
const int nPerThread = n / numThreads;
std::vector<double> A(nPerThread);
std::vector<double> B(nPerThread, 1.);
std::vector<double> C(nPerThread, 2.);
std::vector<double> D(nPerThread, 3.);
for (auto _ : state) {
for (int i = 0; i < nPerThread; ++i) {
A[i] = B[i] + C[i] * D[i];
}
volatile double dummy = A[0];
}
}
}
BENCHMARK(vectorTriad)->Unit(benchmark::kMillisecond);
BENCHMARK_MAIN();

View File

@@ -0,0 +1,12 @@
#!/bin/bash
#SBATCH --job-name=excercise2_vectorTriad
#SBATCH --output=excercise2_vectorTriad.out
#SBATCH --ntasks=1
#SBATCH --cpus-per-task=6
export OMP_PLACES=cores
export OMP_PROC_BIND=close
# Load modules or activate environment if needed
# module load gcc cmake ...
# Run the benchmark binary
g++ vectorTriad.cpp -o vector_triad -fopenmp -O3 -lbenchmark
./vector_triad --benchmark_out=excercise2_vectorTriad.json --benchmark_out_format=json

View File

@@ -0,0 +1,230 @@
#include "matrix.h"
#include <benchmark/benchmark.h>
#include <iostream>
#include <omp.h>
void gauss_seidel_serial(Matrix &phi, int maxNumIter)
{
const int m = phi.dim1();
const int n = phi.dim2();
const double osth = 1. / 4;
for (int iter = 0; iter < maxNumIter; ++iter)
{
for (int i = 1; i < m - 1; ++i)
{
for (int j = 1; j < n - 1; ++j)
{
phi(i, j) = osth * (phi(i + 1, j) + phi(i - 1, j) + phi(i, j + 1) +
phi(i, j - 1));
}
}
}
}
void gauss_seidel_parallel_kai(Matrix &phi, int maxNumIter)
{
const int m = phi.dim1();
const int n = phi.dim2();
#pragma omp parallel num_threads(12)
{
const double osth = 1. / 4;
int num_theads = omp_get_num_threads();
int thread_num = omp_get_thread_num();
int chunk = (m - 2) / num_theads;
int start = 1 + chunk * thread_num;
int end = chunk * (thread_num + 1);
for (int iter = 0; iter < maxNumIter; ++iter)
{
// printf("thread %d, start: %d, end: %d\n", omp_get_thread_num(), start,
// end);
for (int j = 1; j < n + num_theads - 1; ++j)
{
int k = j - thread_num;
if (k > 0 && k < n - 1)
{
for (int i = start; i <= end; ++i)
{
// printf("thread %d, i: %d, j: %d, k: %d\n", omp_get_thread_num(),
// i, j,
// k);
phi(i, k) = osth * (phi(i + 1, k) + phi(i - 1, k) + phi(i, k + 1) +
phi(i, k - 1));
}
}
#pragma omp barrier
}
}
}
}
void gauss_seidel_parallel_kreilos(Matrix &phi, int maxNumIter)
{
const int n = phi.dim1();
const int m = phi.dim2();
const double osth = 1. / 4;
#pragma omp parallel
{
const int threadId = omp_get_thread_num();
const int numThreads = omp_get_num_threads();
const int chunkSize = m / numThreads;
const int i0 = std::max(1, chunkSize * threadId);
const int i1 = std::min(chunkSize * (threadId + 1), m - 1);
for (int iIter = 0; iIter < maxNumIter; iIter++)
{
for (int step = 0; step < n - 1 + numThreads; ++step)
{
int j = step - threadId;
if (j > 0 && j < n - 1)
{
for (int i = i0; i < i1; ++i)
{
phi(i, j) = osth * (phi(i + 1, j) + phi(i - 1, j) + phi(i, j + 1) + phi(i, j - 1));
}
}
#pragma omp barrier
}
}
}
}
void fill_matrix(Matrix &matrix, const int filler)
{
const int m = matrix.dim1();
const int n = matrix.dim2();
for (int i = 0; i < m; ++i)
{
for (int j = 0; j < n; ++j)
{
if (i == 0 || i == m - 1 || j == 0 || j == n - 1)
{
matrix(i, j) = filler;
}
else
matrix(i, j) = 0;
}
}
}
void check(const Matrix &a, const Matrix &b)
{
const int m = a.dim1();
const int n = a.dim2();
for (int i = 0; i < m; ++i)
{
for (int j = 0; j < n; ++j)
{
if (a(i, j) != b(i, j))
{
std::cout << "Not equal at (" << i << ", " << j << "), a: " << a(i, j)
<< " != " << b(i, j) << " :b" << std::endl;
}
}
}
}
void print_matrix(const Matrix &matrix)
{
const int m = matrix.dim1();
const int n = matrix.dim2();
for (int i = 0; i < m; ++i)
{
for (int j = 0; j < n; ++j)
{
std::cout << matrix(i, j) << " ";
}
std::cout << std::endl;
}
}
void benchmarkParallelKai(benchmark::State &state)
{
int iterations = state.range(0);
Matrix matrix = Matrix(30002, 30002);
fill_matrix(matrix, 10);
for (auto _ : state)
{
gauss_seidel_parallel_kai(matrix, iterations);
benchmark::DoNotOptimize(matrix);
}
}
void benchmarkParallelKreilos(benchmark::State &state)
{
int iterations = state.range(0);
Matrix matrix = Matrix(30002, 30002);
fill_matrix(matrix, 10);
for (auto _ : state)
{
gauss_seidel_parallel_kreilos(matrix, iterations);
benchmark::DoNotOptimize(matrix);
}
}
void benchmarkSerial(benchmark::State &state)
{
int iterations = state.range(0);
Matrix matrix = Matrix(30002, 30002);
fill_matrix(matrix, 10);
for (auto _ : state)
{
gauss_seidel_serial(matrix, iterations);
benchmark::DoNotOptimize(matrix);
}
}
int main(int argc, char **argv)
{
::benchmark::Initialize(&argc, argv);
benchmark::RegisterBenchmark("gaus_seidel_parallel_kai", benchmarkParallelKai)
->Arg(100)
->Unit(benchmark::kMillisecond);
benchmark::RegisterBenchmark("gaus_seidel_parallel_kreilos", benchmarkParallelKreilos)
->Arg(100)
->Unit(benchmark::kMillisecond);
benchmark::RegisterBenchmark("gaus_seidel_serial", benchmarkSerial)
->Arg(100)
->Unit(benchmark::kMillisecond);
::benchmark::RunSpecifiedBenchmarks();
return 0;
}
// int main(int argc, char **argv) {
// int iterations = 2;
// Matrix matrix = Matrix(2402, 2402);
//
// fill_matrix(matrix, 10);
//
// Matrix matrix2 = Matrix(2402, 2402);
//
// fill_matrix(matrix2, 10);
//
// gauss_seidel_par(matrix, iterations);
// gauss_seidel(matrix2, iterations);
//
// check(matrix, matrix2);
//
// return 0;
// }

257
lab08/exercise_3/matrix.h Normal file
View File

@@ -0,0 +1,257 @@
/**
* matrix.h a very simplistic class for m times n matrices.
*/
#ifndef MATRIX_H
#define MATRIX_H
#include <vector>
#include <iostream>
#include <iomanip>
#include <cmath>
// A very simple class for m times n matrices
class Matrix {
public:
// constructors
Matrix() : Matrix(0, 0) {}
Matrix(int m, int n, bool init = true) : m_(m), n_(n) {
data_ = new double[m_ * n_];
if (data_ == nullptr) {
std::cout << "Error: not enough memory for matrix\n";
exit(1);
}
if (init) {
for (int i = 0; i < m_; ++i) {
for (int j = 0; j < n_; ++j) {
operator()(i, j) = 0;
}
}
}
}
Matrix(std::pair<int, int> dim, bool init = true)
: Matrix(dim.first, dim.second, init) {}
Matrix(int n) : Matrix(n, n) {}
Matrix(const Matrix& other) {
m_ = other.m_;
n_ = other.n_;
if (m_ == 0 || n_ == 0) {
data_ = nullptr;
return;
}
data_ = new double[m_ * n_];
for (int i = 0; i < m_; ++i) {
for (int j = 0; j < n_; ++j) {
operator()(i, j) = other(i, j);
}
}
}
Matrix(Matrix&& other) {
m_ = other.m_;
n_ = other.n_;
data_ = other.data_;
other.data_ = nullptr;
other.m_ = 0;
other.n_ = 0;
}
~Matrix() {
if (data_ != nullptr) {
delete[] data_;
data_ = nullptr;
}
}
// assignment operators
Matrix& operator=(const Matrix& other) {
if (this != &other) {
if (data_ != nullptr) {
delete[] data_;
}
m_ = other.m_;
n_ = other.n_;
data_ = new double[m_ * n_];
for (int i = 0; i < m_; ++i) {
for (int j = 0; j < n_; ++j) {
operator()(i, j) = other(i, j);
}
}
}
return *this;
}
Matrix& operator=(Matrix&& other) {
if (this != &other) {
if (data_ != nullptr) {
delete[] data_;
}
m_ = other.m_;
n_ = other.n_;
data_ = other.data_;
other.data_ = nullptr;
other.m_ = 0;
other.n_ = 0;
}
return *this;
}
// element access
double& operator()(int i, int j) { return data_[i * n_ + j]; }
const double& operator()(int i, int j) const { return data_[i * n_ + j]; }
// getter functions for the dimensions
std::pair<int, int> dim() const { return std::pair<int, int>(m_, n_); }
int dim1() const { return m_; }
int dim2() const { return n_; }
int numEntries() const { return m_ * n_; }
// comparison operators
bool operator==(const Matrix& b) {
const double eps = 1e-12;
if (m_ != b.m_ || n_ != b.n_) {
return false;
}
for (int i = 0; i < m_; ++i) {
for (int j = 0; j < n_; ++j) {
if (fabs(operator()(i, j) - b(i, j)) > eps) {
return false;
}
}
}
return true;
}
bool operator!=(const Matrix& b) { return !operator==(b); }
// comparison operators
// addition
Matrix& operator+=(const Matrix& b) {
for (int i = 0; i < m_; ++i) {
for (int j = 0; j < n_; ++j) {
operator()(i, j) += b(i, j);
}
}
return *this;
}
// subtraction
Matrix& operator-=(const Matrix& b) {
for (int i = 0; i < m_; ++i) {
for (int j = 0; j < n_; ++j) {
operator()(i, j) -= b(i, j);
}
}
return *this;
}
// scalar multiplication
Matrix& operator*=(double x) {
for (int i = 0; i < m_; ++i) {
for (int j = 0; j < n_; ++j) {
operator()(i, j) *= x;
}
}
return *this;
}
// scalar division
Matrix& operator/=(double x) {
for (int i = 0; i < m_; ++i) {
for (int j = 0; j < n_; ++j) {
operator()(i, j) /= x;
}
}
return *this;
}
// matrix product (only for square matrices of equal dimension)
Matrix& operator*=(const Matrix& b) {
if (dim1() != dim2()) {
std::cout << "Error in matrix multiplication: no square matrix\n";
} else if (dim1() != b.dim1() || dim2() != b.dim2()) {
std::cout << "Error in matrix multiplication: dimensions do not match\n";
} else {
Matrix a = *this;
Matrix& c = *this;
const int m = dim1();
for (int i = 0; i < m; ++i) {
for (int j = 0; j < m; ++j) {
for (int k = 0; k < m; ++k) {
c(i, j) += a(i, k) * b(k, j);
}
}
}
}
return *this;
}
public:
int m_; // first dimension
int n_; // second dimension
double* data_; // the matrix' entries
};
// Print the matrix as a table
inline std::ostream& operator<<(std::ostream& os, const Matrix& a) {
const int width = 10;
const int precision = 4;
const auto originalPrecision = os.precision();
os << std::setprecision(precision);
for (int i = 0; i < a.dim1(); ++i) {
for (int j = 0; j < a.dim2(); ++j) {
os << std::setw(width) << a(i, j) << " ";
}
if (i != a.dim1() - 1)
os << "\n";
}
os << std::setprecision(originalPrecision);
return os;
}
// matrix product
inline Matrix operator*(const Matrix& a, const Matrix& b) {
if (a.dim2() == b.dim1()) {
int m = a.dim1();
int n = a.dim2();
int p = b.dim2();
Matrix c(m, p);
for (int i = 0; i < m; ++i) {
for (int j = 0; j < p; ++j) {
for (int k = 0; k < n; ++k) {
c(i, j) += a(i, k) * b(k, j);
}
}
}
return c;
} else {
return Matrix(0, 0);
}
}
inline bool equalWithinRange(const Matrix& a,
const Matrix& b,
double eps = 1e-12) {
if (a.dim1() != b.dim1() || a.dim2() != b.dim2())
return false;
int m = a.dim1();
int n = a.dim2();
for (int i = 0; i < m; ++i) {
for (int j = 0; j < n; ++j) {
if (fabs(a(i, j) - b(i, j)) > eps) {
return false;
}
}
}
return true;
}
#endif // MATRIX_H