diff --git a/lab07/aaron/e1/jacobiWaveSplitWork.cpp b/lab07/aaron/e1/jacobiWaveSplitWork.cpp index 00d22b6..35567d0 100644 --- a/lab07/aaron/e1/jacobiWaveSplitWork.cpp +++ b/lab07/aaron/e1/jacobiWaveSplitWork.cpp @@ -66,7 +66,7 @@ void gauss_seidel(Matrix &phi, int maxNumIter) } std::atomic threadsCount(0); -#pragma omp parallel for schedule(static) +#pragma omp parallel for schedule(static, 10) for (int rowToCalculate = 1; rowToCalculate < (n - 1); rowToCalculate++) { int row = rowToCalculate; diff --git a/lab07/aaron/e2/compete.zip b/lab07/aaron/e2/compete.zip new file mode 100644 index 0000000..1913918 Binary files /dev/null and b/lab07/aaron/e2/compete.zip differ diff --git a/lab07/aaron/e2/compete/benchmark.cpp b/lab07/aaron/e2/compete/benchmark.cpp new file mode 100644 index 0000000..41aa205 --- /dev/null +++ b/lab07/aaron/e2/compete/benchmark.cpp @@ -0,0 +1,23 @@ +#include +#include +#include +#include +#include "dbscan.h" + +using namespace HPC; + +static void BM_DBSCAN(benchmark::State& state) { + // Load points from file + std::vector points = readPointsFromFile("data"); + + // Create DBSCAN object with parameters from the benchmark state + DBSCAN ds(5, 0.01); + + // Measure the time taken to run DBSCAN + for (auto _ : state) { + ds.run(points); + } +} + +BENCHMARK(BM_DBSCAN)->Unit(benchmark::kMillisecond)->Iterations(10); +BENCHMARK_MAIN(); \ No newline at end of file diff --git a/lab07/aaron/e2/compete/create_data.py b/lab07/aaron/e2/compete/create_data.py new file mode 100644 index 0000000..145515a --- /dev/null +++ b/lab07/aaron/e2/compete/create_data.py @@ -0,0 +1,12 @@ +from sklearn.datasets import make_blobs +from sklearn.preprocessing import StandardScaler +import numpy as np + +centers = [[1, 1], [-1, -1], [1, -1], [-1.5, -1.5], [-2, 2], [1, 3]] +X, labels_true = make_blobs( + n_samples=27*1024, centers=centers, cluster_std=0.25, random_state=0 +) + +X = StandardScaler().fit_transform(X) + +np.savetxt("data", X) diff --git a/lab07/aaron/e2/compete/dbscan.cpp b/lab07/aaron/e2/compete/dbscan.cpp new file mode 100644 index 0000000..dff2f60 --- /dev/null +++ b/lab07/aaron/e2/compete/dbscan.cpp @@ -0,0 +1,68 @@ +#include "dbscan.h" +#include +#include +#include +#include + +namespace HPC { + +DBSCAN::DBSCAN(int minPts, double eps) : minPoints_(minPts), epsilon_(eps) {} + +void DBSCAN::run(const std::vector &points) { + + dataset_ = points; + const int n = dataset_.size(); + + initializeNeighbors(); + + int clusterIndex = 0; + for (int i = 0; i < n; ++i) { + Point &point = dataset_[i]; + if (point.clusterID < 0) { + std::set neighbours = point.neighbors; + if (neighbours.size() < minPoints_) { + point.clusterID = noiseID; + } else { + clusterIndex++; + expandCluster(point, neighbours, clusterIndex); + } + } + } +} + +bool DBSCAN::expandCluster(Point &p, std::set &neighbours, int clusterID) { + p.clusterID = clusterID; + + std::set updatedNeighbours = neighbours; + + // Use of do-while instead of clearing neighbors + do { + neighbours = updatedNeighbours; + + for (int i : neighbours) { + Point &pPrime = dataset_[i]; + if (pPrime.clusterID < 0) { + pPrime.clusterID = clusterID; // serves as marking the point as visited + std::set newNeighbours = pPrime.neighbors; + if (newNeighbours.size() >= minPoints_) { + updatedNeighbours.merge(newNeighbours); + } + } + } + } while (updatedNeighbours.size() != neighbours.size()); + return true; +} + +void DBSCAN::initializeNeighbors() { +#pragma omp parallel for + for (int i = 0; i < dataset_.size(); ++i) { + Point &pointToCheckNeighborsFor = dataset_[i]; + for (int j = 0; j < dataset_.size(); ++j) { + if (pointToCheckNeighborsFor.distance(dataset_[j]) <= epsilon_) { + pointToCheckNeighborsFor.neighbors.insert(j); + } + } + } +} + +} // namespace HPC diff --git a/lab07/aaron/e2/compete/dbscan.h b/lab07/aaron/e2/compete/dbscan.h new file mode 100644 index 0000000..e13064d --- /dev/null +++ b/lab07/aaron/e2/compete/dbscan.h @@ -0,0 +1,37 @@ +#ifndef DBSCAN_H +#define DBSCAN_H + +#include +#include + +#include "point.h" + +namespace HPC { + +class DBSCAN { +public: + DBSCAN(int minPts, double eps); + + void run(const std::vector &points); + + const std::vector &getPoints() const { return dataset_; } + +private: + std::set regionQuery(const Point &point) const; + void initializeNeighbors(); + bool expandCluster(Point &point, std::set &neighbours, int clusterID); + + // void merge(std::vector& n, const std::vector& nPrime) const; + + const int unclassifiedID = -1; + const int noiseID = -2; + + const int minPoints_; + const double epsilon_; + + std::vector dataset_; +}; + +} // namespace HPC + +#endif // DBSCAN_H diff --git a/lab07/aaron/e2/compete/makefile b/lab07/aaron/e2/compete/makefile new file mode 100644 index 0000000..e1863e0 --- /dev/null +++ b/lab07/aaron/e2/compete/makefile @@ -0,0 +1,43 @@ +# Makefile for DBSCAN program + +# ---------------------------------------------------- +# Parameters +# Change these parameters according to your needs. + +# SOURCE_FILES: The source files of the algorithm, used for each build. +# You can add more source files here if needed. +SOURCE_FILES = dbscan.cpp point.cpp + +# Main rogram, used to cluster the data and save the result. +# PROGRAM_NAME: The name of the program that will be generated after compilation. +PROGRAM_NAME = dbscan +RUN_MAIN = run.cpp + +# Benchmark program: This program is used to benchmark the performance of the algorithm. +# It is not used for the actual clustering process. +BENCHMARK_PROGRAM_NAME = dbscan_bench +BENCHMARK_MAIN = benchmark.cpp + +COMPILER_FLAGS = -fopenmp -std=c++17 -lpthread + +# ---------------------------------------------------- +# The actual makefile rules, only change these if you really need to. + +# Default target +# The default target is the one that will be executed when you run 'make' without any arguments. +default: release + +release: $(RUN_MAIN) $(SOURCE_FILES) + g++ $(RUN_MAIN) $(SOURCE_FILES) $(COMPILER_FLAGS) -o $(PROGRAM_NAME) -O3 + +debug: $(RUN_MAIN) $(SOURCE_FILES) + g++ $(RUN_MAIN) $(SOURCE_FILES) $(COMPILER_FLAGS) -o $(PROGRAM_NAME) -O0 -g + +benchmark: $(BENCHMARK_MAIN) $(SOURCE_FILES) + g++ $(BENCHMARK_MAIN) $(SOURCE_FILES) $(COMPILER_FLAGS) -o $(BENCHMARK_PROGRAM_NAME) -O3 -lbenchmark + +run_bench: benchmark + ./$(BENCHMARK_PROGRAM_NAME) + +run: release + ./$(PROGRAM_NAME) diff --git a/lab07/aaron/e2/compete/plot.py b/lab07/aaron/e2/compete/plot.py new file mode 100644 index 0000000..63e876f --- /dev/null +++ b/lab07/aaron/e2/compete/plot.py @@ -0,0 +1,14 @@ +import pylab as plt +import numpy as np + +plt.figure() +points = plt.loadtxt("clustered") +cluster_index_column = 2 +clusters = np.unique(points[:, cluster_index_column]) +print(clusters) +for c in clusters: + points_in_cluster = points[np.where( + points[:, cluster_index_column] == c)[0]] + plt.scatter(points_in_cluster[:, 0], points_in_cluster[:, 1], label=c) + +plt.show() diff --git a/lab07/aaron/e2/compete/point.cpp b/lab07/aaron/e2/compete/point.cpp new file mode 100644 index 0000000..13b4181 --- /dev/null +++ b/lab07/aaron/e2/compete/point.cpp @@ -0,0 +1,52 @@ +#include +#include + +#include "point.h" + +Point::Point(const std::vector &coordinatesIn) + : coordinates(coordinatesIn) {} + +double &Point::operator()(int i) { return coordinates[i]; } + +const double &Point::operator()(int i) const { return coordinates[i]; } + +double Point::distance(const Point &other) const { + double distance = 0; + for (int i = 0; i < coordinates.size(); ++i) { + const double p = coordinates[i]; + const double q = other.coordinates[i]; + distance += (p - q) * (p - q); + } + + return distance; +} + +std::vector readPointsFromFile(const std::string &filename) { + std::vector points; + std::ifstream fin(filename); + + double x, y; + + while (fin >> x >> y) { + Point point({x, y}); + points.push_back(point); + } + return points; +} + +std::ostream &operator<<(std::ostream &os, const Point &point) { + for (auto coordinate : point.coordinates) { + os << coordinate << "\t"; + } + os << point.clusterID; + os << "\t" << point.neighbors.size(); + return os; +} + +void writePointsToFile(const std::vector &points, + const std::string &filename) { + std::ofstream fout(filename); + for (auto point : points) { + fout << point << "\n"; + } +} diff --git a/lab07/aaron/e2/compete/point.h b/lab07/aaron/e2/compete/point.h new file mode 100644 index 0000000..c77bbc2 --- /dev/null +++ b/lab07/aaron/e2/compete/point.h @@ -0,0 +1,53 @@ +#ifndef POINT_H +#define POINT_H + +#include +#include +#include + +/** + * Class representing a point in the dataset. + * + * Stores the coordinates of the point, its cluster ID, and whether it is a core + * point. + */ +class Point { + public: + Point(const std::vector& coordinatesIn); + + double& operator()(int i); + const double& operator()(int i) const; + + double distance(const Point& other) const; + + std::vector coordinates; + int clusterID = -1; + bool isCorePoint = false; + std::set neighbors; +}; + +/** + * Read points from a file and return them as a vector of Point objects. + */ +std::vector readPointsFromFile(const std::string& filename); + +/** + * Print a point to an output stream. The + * coordinates are separated by tabs, and the + * cluster ID is printed at the end. + */ +std::ostream& operator<<(std::ostream& os, const Point& point); + +/** + * Write points to a file. + * + * Each point is written on a new line, with + * coordinates separated by tabs and the + * cluster ID at the end. + * + * Can be read with numpy.loadtxt, the last column give the cluster ID. + */ +void writePointsToFile(const std::vector& points, + const std::string& filename); + +#endif // POINT_H \ No newline at end of file diff --git a/lab07/aaron/e2/compete/run.cpp b/lab07/aaron/e2/compete/run.cpp new file mode 100644 index 0000000..7ca8e35 --- /dev/null +++ b/lab07/aaron/e2/compete/run.cpp @@ -0,0 +1,29 @@ +#include +#include +#include +#include "dbscan.h" + +using namespace HPC; + +int main() +{ + + std::vector points = readPointsFromFile("data"); + + DBSCAN ds(5, 0.01); + // Zeitmessung starten + auto start = std::chrono::high_resolution_clock::now(); + + ds.run(points); + + // Zeitmessung beenden + auto end = std::chrono::high_resolution_clock::now(); + + // Dauer berechnen in Millisekunden + auto duration = std::chrono::duration_cast(end - start).count(); + + std::cout << "Laufzeit: " << duration << " ms" << std::endl; + writePointsToFile(ds.getPoints(), "clustered"); + + return 0; +} diff --git a/lab07/results/plot_dbscan.py b/lab07/results/plot_dbscan.py new file mode 100644 index 0000000..36dcbcd --- /dev/null +++ b/lab07/results/plot_dbscan.py @@ -0,0 +1,26 @@ +import csv +import matplotlib.pyplot as plt + +# Read performance data from CSV file +with open('lab07\\results\\dbscna_results.csv', 'r') as f: + reader = csv.reader(f) + data = next(reader) # Read first line + times = list(map(int, data)) # Convert to integers + +# X: thread count (1 to n), Y: performance (1/time) +threads = list(range(1, len(times) + 1)) +performance = [1 / t for t in times] # You could multiply by a constant to scale if needed + +speedup = [times[0] / t for t in times] + +efficiency = [] +for i in range (1, len(speedup) + 1): + efficiency.append(speedup[i-1] / i) + +# Plot +plt.plot(threads, efficiency, marker='o') +plt.xlabel('Thread Count') +plt.ylabel('Efficiency (Speedup / Thread Count)') +plt.title('Thread Count vs Efficiency') +plt.grid(True) +plt.show()