diff --git a/lab07/aaron/HPC-Lab-07.pdf b/lab07/aaron/HPC-Lab-07.pdf new file mode 100644 index 0000000..b3db1de Binary files /dev/null and b/lab07/aaron/HPC-Lab-07.pdf differ diff --git a/lab07/aaron/e1/jacobiWaveSplitWork.cpp b/lab07/aaron/e1/jacobiWaveSplitWork.cpp new file mode 100644 index 0000000..4f5db92 --- /dev/null +++ b/lab07/aaron/e1/jacobiWaveSplitWork.cpp @@ -0,0 +1,191 @@ +/** + * + * Schritt 1 + * ------------- + * |T0|--|--|--| + * ------------- + * |--|--|--|--| + * ------------- + * |--|--|--|--| + * ------------- + * |--|--|--|--| + * ------------- + * + * Schritt 2 + * ------------- + * |xx|T0|--|--| + * ------------- + * |T1|--|--|--| + * ------------- + * |--|--|--|--| + * ------------- + * |--|--|--|--| + * ------------- + * + * Schritt 3 + * ------------- + * |xx|xx|T0|--| + * ------------- + * |xx|T1|--|--| + * ------------- + * |T2|--|--|--| + * ------------- + * |--|--|--|--| + * ------------- + * + * Schritt 4 + * ------------- + * |xx|xx|xx|T0| + * ------------- + * |xx|xx|T1|--| + * ------------- + * |xx|T2|--|--| + * ------------- + * |T3|--|--|--| + * ------------- + */ + +#include "matrix.h" +#include +#include +#include + +void gauss_seidel(Matrix &phi, int maxNumIter) +{ + const int m = phi.dim1(); + const int n = phi.dim2(); + const double osth = 1. / 4; + for (int iIter = 0; iIter < maxNumIter; ++iIter) + { + // Shared data per iteration + std::vector> column(m); + for (int i = 0; i < m; ++i) + { + column[i].store(1); + } + std::atomic threadsCount(0); + +#pragma omp parallel for schedule(static) + for (int rowToCalculate = 1; rowToCalculate < (n - 1); rowToCalculate++) + { + int row = rowToCalculate; + while (column[row] < n - 1) + { + // All threads beneath T_row1 have to check specific circumstances, where + // T_row1 has no condition to wait for + if (row != 1) + { + // T_rowx has to wait at least until T_row(x-1) has calculated values from + // its last row + // T_rowx will calculate value x at row,column if T_row(x-1) has calculated + // its value at row-1, column-1 + while (column[row] == column[row - 1]) + { + // Wait for wave (thread) above + } + } + + // Central jacobi calculation + phi(row, column[row]) = osth * (phi(row + 1, column[row]) + phi(row - 1, column[row]) + phi(row, column[row] + 1) + + phi(row, column[row] - 1)); + + // Increment column index + column[row]++; + } + } + } +} + +void gauss_seidel_lin(Matrix &phi, int maxNumIter) +{ + const int m = phi.dim1(); + const int n = phi.dim2(); + + const double osth = 1. / 4; + + for (int iter = 0; iter < maxNumIter; ++iter) + { + for (int i = 1; i < m - 1; ++i) + { + for (int j = 1; j < n - 1; ++j) + { + phi(i, j) = osth * (phi(i + 1, j) + phi(i - 1, j) + phi(i, j + 1) + + phi(i, j - 1)); + } + } + } +} + +void fill_matrix(Matrix &matrix, const int filler) +{ + const int m = matrix.dim1(); + const int n = matrix.dim2(); + + for (int i = 0; i < m; ++i) + { + for (int j = 0; j < n; ++j) + { + if (i == 0 || i == m - 1 || j == 0 || j == n - 1) + { + matrix(i, j) = filler; + } + else + matrix(i, j) = 0; + } + } +} + +void check(const Matrix &a, const Matrix &b) +{ + const int m = a.dim1(); + const int n = a.dim2(); + + for (int i = 0; i < m; ++i) + { + for (int j = 0; j < n; ++j) + { + if (a(i, j) != b(i, j)) + { + std::cout << "Not equal at (" << i << ", " << j << "), a: " << a(i, j) + << " != " << b(i, j) << " :b" << std::endl; + } + } + } +} + +void print_matrix(const Matrix &matrix) +{ + const int m = matrix.dim1(); + const int n = matrix.dim2(); + + for (int i = 0; i < m; ++i) + { + for (int j = 0; j < n; ++j) + { + std::cout << matrix(i, j) << " "; + } + std::cout << std::endl; + } +} + +int main() +{ + Matrix first = Matrix(8, 8); + Matrix sec = Matrix(8, 8); + + fill_matrix(first, 10); + fill_matrix(sec, 10); + + print_matrix(first); + print_matrix(sec); + + gauss_seidel_lin(first, 1000); + gauss_seidel(sec, 1000); + + check(first, sec); + + print_matrix(first); + print_matrix(sec); + + return 0; +} \ No newline at end of file diff --git a/lab07/aaron/e1/jacobiWaveThreadsNumEqualsRowsNum.cpp b/lab07/aaron/e1/jacobiWaveThreadsNumEqualsRowsNum.cpp new file mode 100644 index 0000000..3f434eb --- /dev/null +++ b/lab07/aaron/e1/jacobiWaveThreadsNumEqualsRowsNum.cpp @@ -0,0 +1,198 @@ +// WORKS! + +/** + * Spalte 0 1 2 3 4 5 6 + * Schritt + * 0 + * + * 1 + * + * 2 + * + * 3 + * + * Über Schritte iterieren statt ? + * + * Beispiel A: jeder Thread bearbeitet eine Zeile, es gibt genau m Threads. + * D.h. m / t = 1 + * + * Beispiel B: es gibt eine konstante Anzahl Threads t es gibt m Zeilen. + * Thread tx nimmt sich irgendeine Zeile und + * + * Schritt 1 + * ------------- + * |T0|--|--|--| + * ------------- + * |--|--|--|--| + * ------------- + * |--|--|--|--| + * ------------- + * |--|--|--|--| + * ------------- + * + * Schritt 2 + * ------------- + * |xx|T0|--|--| + * ------------- + * |T1|--|--|--| + * ------------- + * |--|--|--|--| + * ------------- + * |--|--|--|--| + * ------------- + * + * Schritt 3 + * ------------- + * |xx|xx|T0|--| + * ------------- + * |xx|T1|--|--| + * ------------- + * |T2|--|--|--| + * ------------- + * |--|--|--|--| + * ------------- + * + * Schritt 4 + * ------------- + * |xx|xx|xx|T0| + * ------------- + * |xx|xx|T1|--| + * ------------- + * |xx|T2|--|--| + * ------------- + * |T3|--|--|--| + * ------------- + */ + +#include "matrix.h" +#include +#include +#include + +void gauss_seidel(Matrix &phi, int maxNumIter) +{ + const int m = phi.dim1(); + const int n = phi.dim2(); + const double osth = 1. / 4; + for (int iIter = 0; iIter < maxNumIter; ++iIter) + { + int threadNum = m - 2; + std::vector> columnsCalculated(threadNum); + for (int i = 0; i < threadNum; ++i) + { + columnsCalculated[i].store(1); + } +#pragma omp parallel num_threads(threadNum) + { + int threadId = omp_get_thread_num(); + int row = threadId + 1; + while (columnsCalculated[threadId] < n - 1) + { + int currentCol = columnsCalculated[threadId]; + if (threadId != 0) + { + while (currentCol == columnsCalculated[threadId - 1]) + { + } + } + phi(row, currentCol) = osth * (phi(row + 1, currentCol) + phi(row - 1, currentCol) + phi(row, currentCol + 1) + + phi(row, currentCol - 1)); + columnsCalculated[threadId]++; + } + } + } +} + +void gauss_seidel_lin(Matrix &phi, int maxNumIter) +{ + const int m = phi.dim1(); + const int n = phi.dim2(); + + const double osth = 1. / 4; + + for (int iter = 0; iter < maxNumIter; ++iter) + { + for (int i = 1; i < m - 1; ++i) + { + for (int j = 1; j < n - 1; ++j) + { + phi(i, j) = osth * (phi(i + 1, j) + phi(i - 1, j) + phi(i, j + 1) + + phi(i, j - 1)); + } + } + } +} + +void fill_matrix(Matrix &matrix, const int filler) +{ + const int m = matrix.dim1(); + const int n = matrix.dim2(); + + for (int i = 0; i < m; ++i) + { + for (int j = 0; j < n; ++j) + { + if (i == 0 || i == m - 1 || j == 0 || j == n - 1) + { + matrix(i, j) = filler; + } + else + matrix(i, j) = 0; + } + } +} + +void check(const Matrix &a, const Matrix &b) +{ + const int m = a.dim1(); + const int n = a.dim2(); + + for (int i = 0; i < m; ++i) + { + for (int j = 0; j < n; ++j) + { + if (a(i, j) != b(i, j)) + { + std::cout << "Not equal at (" << i << ", " << j << "), a: " << a(i, j) + << " != " << b(i, j) << " :b" << std::endl; + } + } + } +} + +void print_matrix(const Matrix &matrix) +{ + const int m = matrix.dim1(); + const int n = matrix.dim2(); + + for (int i = 0; i < m; ++i) + { + for (int j = 0; j < n; ++j) + { + std::cout << matrix(i, j) << " "; + } + std::cout << std::endl; + } +} + +int main() +{ + Matrix first = Matrix(8, 8); + Matrix sec = Matrix(8, 8); + + fill_matrix(first, 10); + fill_matrix(sec, 10); + + print_matrix(first); + print_matrix(sec); + + gauss_seidel_lin(first, 1); + gauss_seidel(sec, 1); + + check(first, sec); + + print_matrix(first); + print_matrix(sec); + + return 0; +} \ No newline at end of file diff --git a/lab07/aaron/e1/matrix.h b/lab07/aaron/e1/matrix.h new file mode 100644 index 0000000..8b2ae24 --- /dev/null +++ b/lab07/aaron/e1/matrix.h @@ -0,0 +1,343 @@ +/** + * matrix.h a very simplistic class for m times n matrices. + */ + +#ifndef MATRIX_H +#define MATRIX_H + +#include +#include +#include +#include + +// A very simplistic vector class for vectors of size n +class Vector { + public: + // constructors + Vector(int n) : n_(n), data_(n_, 0) {} + Vector(const Vector& other) = default; + Vector(Vector&& other) = default; + ~Vector() = default; + + // assignment operators + Vector& operator=(const Vector& other) = default; + Vector& operator=(Vector&& other) = default; + + // element access + double& operator()(int i) { return data_[i]; } + const double& operator()(int i) const { return data_[i]; } + + // getter functions for the dimensions + int dim() const { return n_; } + + // comparison operators + bool operator==(const Vector& b) { return (data_ == b.data_); } + bool operator!=(const Vector& b) { return (data_ != b.data_); } + + // addition + Vector& operator+=(const Vector& b) { + for (int i = 0; i < n_; ++i) { + operator()(i) += b(i); + } + return *this; + } + + // subtraction + Vector& operator-=(const Vector& b) { + for (int i = 0; i < n_; ++i) { + operator()(i) -= b(i); + } + return *this; + } + + // scalar multiplication + Vector& operator*=(double x) { + for (int i = 0; i < n_; ++i) { + operator()(i) *= x; + } + return *this; + } + + // dot product between two vectors + double dot(const Vector& other) const { + double sum = 0; + for (int i = 0; i < n_; ++i) { + sum += operator()(i) * other(i); + } + return sum; + } + + private: + int n_; // vector dimension + std::vector data_; // the vectors entries +}; + +inline double dot(const Vector& v1, const Vector& v2) { + return v1.dot(v2); +} + +// Print the vector as a table +inline std::ostream& operator<<(std::ostream& os, const Vector& a) { + const int width = 10; + const int precision = 4; + + const auto originalPrecision = os.precision(); + os << std::setprecision(precision); + + for (int i = 0; i < a.dim(); ++i) { + os << std::setw(width) << a(i) << " "; + } + + os << "\n"; + + os << std::setprecision(originalPrecision); + return os; +} + +// A very simple class for m times n matrices +class Matrix { + public: + // constructors + Matrix() : Matrix(0, 0) {} + Matrix(int m, int n) : m_(m), n_(n), data_(m_ * n_, 0) {} + Matrix(std::pair dim) : Matrix(dim.first, dim.second) {} + Matrix(int n) : Matrix(n, n) {} + Matrix(const Matrix& other) = default; + Matrix(Matrix&& other) = default; + ~Matrix() = default; + + // assignment operators + Matrix& operator=(const Matrix& other) = default; + Matrix& operator=(Matrix&& other) = default; + + // element access + double& operator()(int i, int j) { return data_[i * n_ + j]; } + const double& operator()(int i, int j) const { return data_[i * n_ + j]; } + + // getter functions for the dimensions + std::pair dim() const { return std::pair(m_, n_); } + int dim1() const { return m_; } + int dim2() const { return n_; } + int numEntries() const { return data_.size(); } + + // comparison operators + bool operator==(const Matrix& b) { return (data_ == b.data_); } + bool operator!=(const Matrix& b) { return (data_ != b.data_); } + + // addition + Matrix& operator+=(const Matrix& b) { + for (int i = 0; i < m_; ++i) { + for (int j = 0; j < n_; ++j) { + operator()(i, j) += b(i, j); + } + } + return *this; + } + + // subtraction + Matrix& operator-=(const Matrix& b) { + for (int i = 0; i < m_; ++i) { + for (int j = 0; j < n_; ++j) { + operator()(i, j) -= b(i, j); + } + } + return *this; + } + + // scalar multiplication + Matrix& operator*=(double x) { + for (int i = 0; i < m_; ++i) { + for (int j = 0; j < n_; ++j) { + operator()(i, j) *= x; + } + } + return *this; + } + + // scalar division + Matrix& operator/=(double x) { + for (int i = 0; i < m_; ++i) { + for (int j = 0; j < n_; ++j) { + operator()(i, j) /= x; + } + } + return *this; + } + + // matrix product (only for square matrices of equal dimension) + Matrix& operator*=(const Matrix& b) { + if (dim1() != dim2()) { + std::cout << "Error in matrix multiplication: no square matrix\n"; + } else if (dim1() != b.dim1() || dim2() != b.dim2()) { + std::cout << "Error in matrix multiplication: dimensions do not match\n"; + } else { + Matrix a = *this; + Matrix& c = *this; + const int m = dim1(); + for (int i = 0; i < m; ++i) { + for (int j = 0; j < m; ++j) { + for (int k = 0; k < m; ++k) { + c(i, j) += a(i, k) * b(k, j); + } + } + } + } + + return *this; + } + + public: + int m_; // first dimension + int n_; // second dimension + std::vector data_; // the matrix' entries +}; + +// Print the matrix as a table +inline std::ostream& operator<<(std::ostream& os, const Matrix& a) { + const int width = 10; + const int precision = 4; + + const auto originalPrecision = os.precision(); + os << std::setprecision(precision); + + for (int i = 0; i < a.dim1(); ++i) { + for (int j = 0; j < a.dim2(); ++j) { + os << std::setw(width) << a(i, j) << " "; + } + if (i != a.dim1() - 1) + os << "\n"; + } + + os << std::setprecision(originalPrecision); + return os; +} + +// matrix product +inline Matrix operator*(const Matrix& a, const Matrix& b) { + if (a.dim2() == b.dim1()) { + int m = a.dim1(); + int n = a.dim2(); + int p = b.dim2(); + Matrix c(m, p); + for (int i = 0; i < m; ++i) { + for (int j = 0; j < p; ++j) { + for (int k = 0; k < n; ++k) { + c(i, j) += a(i, k) * b(k, j); + } + } + } + return c; + } else { + return Matrix(0, 0); + } +} + +inline bool equalWithinRange(const Matrix& a, + const Matrix& b, + double eps = 1e-12) { + if (a.dim1() != b.dim1() || a.dim2() != b.dim2()) + return false; + + int m = a.dim1(); + int n = a.dim2(); + for (int i = 0; i < m; ++i) { + for (int j = 0; j < n; ++j) { + if (fabs(a(i, j) - b(i, j)) > eps) { + return false; + } + } + } + + return true; +} + +// A very simple class for "3D-Matrices" (tensors) with dimension l x m x n +class Matrix3D { + public: + // constructors + Matrix3D(int l, int m, int n) : l_(l), m_(m), n_(n), data_(l) { + for (int i = 0; i < l_; ++i) { + data_[i] = std::vector>(m_); + for (int j = 0; j < m_; ++j) { + data_[i][j] = std::vector(n_, 0); + } + } + } + Matrix3D(int n) : Matrix3D(n, n, n) {} + Matrix3D(const Matrix3D& other) = default; + Matrix3D(Matrix3D&& other) = default; + ~Matrix3D() = default; + + // assignment operators + Matrix3D& operator=(const Matrix3D& other) = default; + Matrix3D& operator=(Matrix3D&& other) = default; + + // element access + double& operator()(int i, int j, int k) { return data_[i][j][k]; } + const double& operator()(int i, int j, int k) const { return data_[i][j][k]; } + + // getter functions for the dimensions + int dim1() const { return l_; } + int dim2() const { return m_; } + int dim3() const { return n_; } + + // comparison operators + bool operator==(const Matrix3D& b) { return (data_ == b.data_); } + bool operator!=(const Matrix3D& b) { return (data_ != b.data_); } + + // addition + Matrix3D& operator+=(const Matrix3D& b) { + for (int i = 0; i < l_; ++i) { + for (int j = 0; j < m_; ++j) { + for (int k = 0; k < n_; ++k) { + operator()(i, j, k) += b(i, j, k); + } + } + } + return *this; + } + + // substraction + Matrix3D& operator-=(const Matrix3D& b) { + for (int i = 0; i < l_; ++i) { + for (int j = 0; j < m_; ++j) { + for (int k = 0; k < n_; ++k) { + operator()(i, j, k) -= b(i, j, k); + } + } + } + return *this; + } + + // scalar multiplication + Matrix3D& operator*=(double x) { + for (int i = 0; i < l_; ++i) { + for (int j = 0; j < m_; ++j) { + for (int k = 0; k < n_; ++k) { + operator()(i, j, k) *= x; + } + } + } + return *this; + } + + // scalar division + Matrix3D& operator/=(double x) { + for (int i = 0; i < l_; ++i) { + for (int j = 0; j < m_; ++j) { + for (int k = 0; k < n_; ++k) { + operator()(i, j, k) /= x; + } + } + } + return *this; + } + + private: + int l_; // first dimension + int m_; // second dimension + int n_; // third dimension + std::vector>> data_; // the tensors' entries +}; + +#endif // MATRIX_H \ No newline at end of file diff --git a/lab07/aaron/e2/__MACOSX/upload/._create_data.py b/lab07/aaron/e2/__MACOSX/upload/._create_data.py new file mode 100644 index 0000000..5c3c52c Binary files /dev/null and b/lab07/aaron/e2/__MACOSX/upload/._create_data.py differ diff --git a/lab07/aaron/e2/__MACOSX/upload/._dbscan.cpp b/lab07/aaron/e2/__MACOSX/upload/._dbscan.cpp new file mode 100644 index 0000000..5c3c52c Binary files /dev/null and b/lab07/aaron/e2/__MACOSX/upload/._dbscan.cpp differ diff --git a/lab07/aaron/e2/__MACOSX/upload/._dbscan.h b/lab07/aaron/e2/__MACOSX/upload/._dbscan.h new file mode 100644 index 0000000..5c3c52c Binary files /dev/null and b/lab07/aaron/e2/__MACOSX/upload/._dbscan.h differ diff --git a/lab07/aaron/e2/__MACOSX/upload/._makefile b/lab07/aaron/e2/__MACOSX/upload/._makefile new file mode 100644 index 0000000..5c3c52c Binary files /dev/null and b/lab07/aaron/e2/__MACOSX/upload/._makefile differ diff --git a/lab07/aaron/e2/__MACOSX/upload/._plot.py b/lab07/aaron/e2/__MACOSX/upload/._plot.py new file mode 100644 index 0000000..5c3c52c Binary files /dev/null and b/lab07/aaron/e2/__MACOSX/upload/._plot.py differ diff --git a/lab07/aaron/e2/__MACOSX/upload/._run.cpp b/lab07/aaron/e2/__MACOSX/upload/._run.cpp new file mode 100644 index 0000000..5c3c52c Binary files /dev/null and b/lab07/aaron/e2/__MACOSX/upload/._run.cpp differ diff --git a/lab07/aaron/e2/upload/benchmark.cpp b/lab07/aaron/e2/upload/benchmark.cpp new file mode 100644 index 0000000..41aa205 --- /dev/null +++ b/lab07/aaron/e2/upload/benchmark.cpp @@ -0,0 +1,23 @@ +#include +#include +#include +#include +#include "dbscan.h" + +using namespace HPC; + +static void BM_DBSCAN(benchmark::State& state) { + // Load points from file + std::vector points = readPointsFromFile("data"); + + // Create DBSCAN object with parameters from the benchmark state + DBSCAN ds(5, 0.01); + + // Measure the time taken to run DBSCAN + for (auto _ : state) { + ds.run(points); + } +} + +BENCHMARK(BM_DBSCAN)->Unit(benchmark::kMillisecond)->Iterations(10); +BENCHMARK_MAIN(); \ No newline at end of file diff --git a/lab07/aaron/e2/upload/create_data.py b/lab07/aaron/e2/upload/create_data.py new file mode 100644 index 0000000..145515a --- /dev/null +++ b/lab07/aaron/e2/upload/create_data.py @@ -0,0 +1,12 @@ +from sklearn.datasets import make_blobs +from sklearn.preprocessing import StandardScaler +import numpy as np + +centers = [[1, 1], [-1, -1], [1, -1], [-1.5, -1.5], [-2, 2], [1, 3]] +X, labels_true = make_blobs( + n_samples=27*1024, centers=centers, cluster_std=0.25, random_state=0 +) + +X = StandardScaler().fit_transform(X) + +np.savetxt("data", X) diff --git a/lab07/aaron/e2/upload/dbscan_parallel.cpp b/lab07/aaron/e2/upload/dbscan_parallel.cpp new file mode 100644 index 0000000..16a3ae6 --- /dev/null +++ b/lab07/aaron/e2/upload/dbscan_parallel.cpp @@ -0,0 +1,65 @@ +#include "dbscan_parallel.h" +#include +#include + +namespace HPC { + +DBSCAN::DBSCAN(int minPts, double eps) : minPoints_(minPts), epsilon_(eps) {} + +void DBSCAN::run(const std::vector& points) { + initializeNeighbors(); + + dataset_ = points; + const int n = dataset_.size(); + + int clusterIndex = 0; + for (int i = 0; i < n; ++i) { + Point& point = dataset_[i]; + if (point.clusterID < 0) { + std::set neighbours = point.neighbors; + if (neighbours.size() < minPoints_) { + point.clusterID = noiseID; + } else { + clusterIndex++; + expandCluster(point, neighbours, clusterIndex); + } + } + } +} + +bool DBSCAN::expandCluster(Point& p, std::set& neighbours, int clusterID) { + p.clusterID = clusterID; + + std::set updatedNeighbours = neighbours; + + // Use of do-while instead of clearing neighbors + do { + neighbours = updatedNeighbours; + + for (int i : neighbours) { + Point& pPrime = dataset_[i]; + if (pPrime.clusterID < 0) { + pPrime.clusterID = clusterID; // serves as marking the point as visited + std::set newNeighbours = pPrime.neighbors; + if (newNeighbours.size() >= minPoints_) { + updatedNeighbours.merge(newNeighbours); + } + } + } + } while (updatedNeighbours.size() != neighbours.size()); + return true; +} + +std::set DBSCAN::initializeNeighbors() { + #pragma omp parallel for + for (int i = 0; i < dataset_.size(); ++i) { + Point& pointToCheckNeighborsFor = dataset_[i]; + for (int j = 0; j < dataset_.size(); ++j) { + if (pointToCheckNeighborsFor.distance(dataset_[j]) <= epsilon_) { + pointToCheckNeighborsFor.neighbors.insert(j); + } + } + } +} + +} // namespace HPC \ No newline at end of file diff --git a/lab07/aaron/e2/upload/dbscan_parallel.h b/lab07/aaron/e2/upload/dbscan_parallel.h new file mode 100644 index 0000000..8584c6a --- /dev/null +++ b/lab07/aaron/e2/upload/dbscan_parallel.h @@ -0,0 +1,37 @@ +#ifndef DBSCAN_H +#define DBSCAN_H + +#include +#include + +#include "point.h" + +namespace HPC { + +class DBSCAN { + public: + DBSCAN(int minPts, double eps); + + void run(const std::vector& points); + + const std::vector& getPoints() const { return dataset_; } + + private: + std::set regionQuery(const Point& point) const; + std::set DBSCAN::initializeNeighbors(); + bool expandCluster(Point& point, std::set& neighbours, int clusterID); + + // void merge(std::vector& n, const std::vector& nPrime) const; + + const int unclassifiedID = -1; + const int noiseID = -2; + + const int minPoints_; + const double epsilon_; + + std::vector dataset_; +}; + +} // namespace HPC + +#endif // DBSCAN_H diff --git a/lab07/aaron/e2/upload/dbscan_serial.cpp b/lab07/aaron/e2/upload/dbscan_serial.cpp new file mode 100644 index 0000000..b940748 --- /dev/null +++ b/lab07/aaron/e2/upload/dbscan_serial.cpp @@ -0,0 +1,60 @@ +#include "dbscan.h" +#include +#include + +namespace HPC { + +DBSCAN::DBSCAN(int minPts, double eps) : minPoints_(minPts), epsilon_(eps) {} + +void DBSCAN::run(const std::vector& points) { + dataset_ = points; + const int n = dataset_.size(); + + int clusterIndex = 0; + for (int i = 0; i < n; ++i) { + Point& point = dataset_[i]; + if (point.clusterID < 0) { + std::set neighbours = regionQuery(point); + if (neighbours.size() < minPoints_) { + point.clusterID = noiseID; + } else { + clusterIndex++; + expandCluster(point, neighbours, clusterIndex); + } + } + } +} + +bool DBSCAN::expandCluster(Point& p, std::set& neighbours, int clusterID) { + p.clusterID = clusterID; + + std::set updatedNeighbours = neighbours; + neighbours.clear(); + while (updatedNeighbours.size() != neighbours.size()) { + neighbours = updatedNeighbours; + + for (int i : neighbours) { + Point& pPrime = dataset_[i]; + if (pPrime.clusterID < 0) { + pPrime.clusterID = clusterID; // serves as marking the point as visited + std::set newNeighbours = regionQuery(pPrime); + if (newNeighbours.size() >= minPoints_) { + updatedNeighbours.merge(newNeighbours); + } + } + } + } + return true; +} + +std::set DBSCAN::regionQuery(const Point& point) const { + std::set neighbours; + for (int i = 0; i < dataset_.size(); ++i) { + if (point.distance(dataset_[i]) <= epsilon_) { + neighbours.insert(i); + } + } + return neighbours; +} + +} // namespace HPC \ No newline at end of file diff --git a/lab07/aaron/e2/upload/dbscan_serial.h b/lab07/aaron/e2/upload/dbscan_serial.h new file mode 100644 index 0000000..7f90e75 --- /dev/null +++ b/lab07/aaron/e2/upload/dbscan_serial.h @@ -0,0 +1,36 @@ +#ifndef DBSCAN_H +#define DBSCAN_H + +#include +#include + +#include "point.h" + +namespace HPC { + +class DBSCAN { + public: + DBSCAN(int minPts, double eps); + + void run(const std::vector& points); + + const std::vector& getPoints() const { return dataset_; } + + private: + std::set regionQuery(const Point& point) const; + bool expandCluster(Point& point, std::set& neighbours, int clusterID); + + // void merge(std::vector& n, const std::vector& nPrime) const; + + const int unclassifiedID = -1; + const int noiseID = -2; + + const int minPoints_; + const double epsilon_; + + std::vector dataset_; +}; + +} // namespace HPC + +#endif // DBSCAN_H diff --git a/lab07/aaron/e2/upload/makefile b/lab07/aaron/e2/upload/makefile new file mode 100644 index 0000000..e1863e0 --- /dev/null +++ b/lab07/aaron/e2/upload/makefile @@ -0,0 +1,43 @@ +# Makefile for DBSCAN program + +# ---------------------------------------------------- +# Parameters +# Change these parameters according to your needs. + +# SOURCE_FILES: The source files of the algorithm, used for each build. +# You can add more source files here if needed. +SOURCE_FILES = dbscan.cpp point.cpp + +# Main rogram, used to cluster the data and save the result. +# PROGRAM_NAME: The name of the program that will be generated after compilation. +PROGRAM_NAME = dbscan +RUN_MAIN = run.cpp + +# Benchmark program: This program is used to benchmark the performance of the algorithm. +# It is not used for the actual clustering process. +BENCHMARK_PROGRAM_NAME = dbscan_bench +BENCHMARK_MAIN = benchmark.cpp + +COMPILER_FLAGS = -fopenmp -std=c++17 -lpthread + +# ---------------------------------------------------- +# The actual makefile rules, only change these if you really need to. + +# Default target +# The default target is the one that will be executed when you run 'make' without any arguments. +default: release + +release: $(RUN_MAIN) $(SOURCE_FILES) + g++ $(RUN_MAIN) $(SOURCE_FILES) $(COMPILER_FLAGS) -o $(PROGRAM_NAME) -O3 + +debug: $(RUN_MAIN) $(SOURCE_FILES) + g++ $(RUN_MAIN) $(SOURCE_FILES) $(COMPILER_FLAGS) -o $(PROGRAM_NAME) -O0 -g + +benchmark: $(BENCHMARK_MAIN) $(SOURCE_FILES) + g++ $(BENCHMARK_MAIN) $(SOURCE_FILES) $(COMPILER_FLAGS) -o $(BENCHMARK_PROGRAM_NAME) -O3 -lbenchmark + +run_bench: benchmark + ./$(BENCHMARK_PROGRAM_NAME) + +run: release + ./$(PROGRAM_NAME) diff --git a/lab07/aaron/e2/upload/plot.py b/lab07/aaron/e2/upload/plot.py new file mode 100644 index 0000000..63e876f --- /dev/null +++ b/lab07/aaron/e2/upload/plot.py @@ -0,0 +1,14 @@ +import pylab as plt +import numpy as np + +plt.figure() +points = plt.loadtxt("clustered") +cluster_index_column = 2 +clusters = np.unique(points[:, cluster_index_column]) +print(clusters) +for c in clusters: + points_in_cluster = points[np.where( + points[:, cluster_index_column] == c)[0]] + plt.scatter(points_in_cluster[:, 0], points_in_cluster[:, 1], label=c) + +plt.show() diff --git a/lab07/aaron/e2/upload/point.cpp b/lab07/aaron/e2/upload/point.cpp new file mode 100644 index 0000000..f2fca1b --- /dev/null +++ b/lab07/aaron/e2/upload/point.cpp @@ -0,0 +1,55 @@ +#include +#include + +#include "point.h" + +Point::Point(const std::vector& coordinatesIn) + : coordinates(coordinatesIn) {} + +double& Point::operator()(int i) { + return coordinates[i]; +} + +const double& Point::operator()(int i) const { + return coordinates[i]; +} + +double Point::distance(const Point& other) const { + double distance = 0; + for (int i = 0; i < coordinates.size(); ++i) { + const double p = coordinates[i]; + const double q = other.coordinates[i]; + distance += (p - q) * (p - q); + } + + return distance; +} + +std::vector readPointsFromFile(const std::string& filename) { + std::vector points; + std::ifstream fin(filename); + + double x, y; + + while (fin >> x >> y) { + Point point({x, y}); + points.push_back(point); + } + return points; +} + +std::ostream& operator<<(std::ostream& os, const Point& point) { + for (auto coordinate : point.coordinates) { + os << coordinate << "\t"; + } + os << point.clusterID; + return os; +} + +void writePointsToFile(const std::vector& points, + const std::string& filename) { + std::ofstream fout(filename); + for (auto point : points) { + fout << point << "\n"; + } +} \ No newline at end of file diff --git a/lab07/aaron/e2/upload/point.h b/lab07/aaron/e2/upload/point.h new file mode 100644 index 0000000..c77bbc2 --- /dev/null +++ b/lab07/aaron/e2/upload/point.h @@ -0,0 +1,53 @@ +#ifndef POINT_H +#define POINT_H + +#include +#include +#include + +/** + * Class representing a point in the dataset. + * + * Stores the coordinates of the point, its cluster ID, and whether it is a core + * point. + */ +class Point { + public: + Point(const std::vector& coordinatesIn); + + double& operator()(int i); + const double& operator()(int i) const; + + double distance(const Point& other) const; + + std::vector coordinates; + int clusterID = -1; + bool isCorePoint = false; + std::set neighbors; +}; + +/** + * Read points from a file and return them as a vector of Point objects. + */ +std::vector readPointsFromFile(const std::string& filename); + +/** + * Print a point to an output stream. The + * coordinates are separated by tabs, and the + * cluster ID is printed at the end. + */ +std::ostream& operator<<(std::ostream& os, const Point& point); + +/** + * Write points to a file. + * + * Each point is written on a new line, with + * coordinates separated by tabs and the + * cluster ID at the end. + * + * Can be read with numpy.loadtxt, the last column give the cluster ID. + */ +void writePointsToFile(const std::vector& points, + const std::string& filename); + +#endif // POINT_H \ No newline at end of file diff --git a/lab07/aaron/e2/upload/run.cpp b/lab07/aaron/e2/upload/run.cpp new file mode 100644 index 0000000..a66ab54 --- /dev/null +++ b/lab07/aaron/e2/upload/run.cpp @@ -0,0 +1,19 @@ +#include +#include +#include +#include "dbscan.h" + +using namespace HPC; + + +int main() { + + std::vector points = readPointsFromFile("data"); + + DBSCAN ds(5, 0.01); + ds.run(points); + + writePointsToFile(ds.getPoints(), "clustered"); + + return 0; +}