From 31a5ec80465c2884440979976b004612b99064b2 Mon Sep 17 00:00:00 2001 From: WickedJack99 Date: Sat, 17 May 2025 11:52:48 +0200 Subject: [PATCH] lab07 --- lab07/aaron/e1/jacobiWaveSplitWork.cpp | 2 +- lab07/aaron/e2/compete.zip | Bin 0 -> 5511 bytes lab07/aaron/e2/compete/benchmark.cpp | 23 +++++++++ lab07/aaron/e2/compete/create_data.py | 12 +++++ lab07/aaron/e2/compete/dbscan.cpp | 68 +++++++++++++++++++++++++ lab07/aaron/e2/compete/dbscan.h | 37 ++++++++++++++ lab07/aaron/e2/compete/makefile | 43 ++++++++++++++++ lab07/aaron/e2/compete/plot.py | 14 +++++ lab07/aaron/e2/compete/point.cpp | 52 +++++++++++++++++++ lab07/aaron/e2/compete/point.h | 53 +++++++++++++++++++ lab07/aaron/e2/compete/run.cpp | 29 +++++++++++ lab07/results/plot_dbscan.py | 26 ++++++++++ 12 files changed, 358 insertions(+), 1 deletion(-) create mode 100644 lab07/aaron/e2/compete.zip create mode 100644 lab07/aaron/e2/compete/benchmark.cpp create mode 100644 lab07/aaron/e2/compete/create_data.py create mode 100644 lab07/aaron/e2/compete/dbscan.cpp create mode 100644 lab07/aaron/e2/compete/dbscan.h create mode 100644 lab07/aaron/e2/compete/makefile create mode 100644 lab07/aaron/e2/compete/plot.py create mode 100644 lab07/aaron/e2/compete/point.cpp create mode 100644 lab07/aaron/e2/compete/point.h create mode 100644 lab07/aaron/e2/compete/run.cpp create mode 100644 lab07/results/plot_dbscan.py diff --git a/lab07/aaron/e1/jacobiWaveSplitWork.cpp b/lab07/aaron/e1/jacobiWaveSplitWork.cpp index 00d22b6..35567d0 100644 --- a/lab07/aaron/e1/jacobiWaveSplitWork.cpp +++ b/lab07/aaron/e1/jacobiWaveSplitWork.cpp @@ -66,7 +66,7 @@ void gauss_seidel(Matrix &phi, int maxNumIter) } std::atomic threadsCount(0); -#pragma omp parallel for schedule(static) +#pragma omp parallel for schedule(static, 10) for (int rowToCalculate = 1; rowToCalculate < (n - 1); rowToCalculate++) { int row = rowToCalculate; diff --git a/lab07/aaron/e2/compete.zip b/lab07/aaron/e2/compete.zip new file mode 100644 index 0000000000000000000000000000000000000000..19139186b6614993cf75f9a9784a19108490a432 GIT binary patch literal 5511 zcma)=2UJtrwuY(F1VRZ-1f)stAP7hkq=?keJ49N59GcXKQUnbxfGE9*fPxY!(u;JY z8IUd@9i*dlxry(dYw+s1Z)dEXWMqtQtjzhZx&JwJweSgOaWEGYQe^hy!@qCDIBYof z9&S*uH(1!mj|>Nw0Q19-8_19xhlt=Rs}pAB%dk4>{-6-!5aal2TxWdiz6tZn zMyR{h(Q@;0v@yNvA!5+Wog#}8u?DNO1p$<_q)m!#Qam#9S0TtbK|$?dhemVh1Ca=i zbN#DLFMy=qj8dnwp%oFz#_AlbhAAw%=T_BTp!|+k0|>^{(j^sgTi|$d`s|TB`Am*3 zYL7#p%Q(JA8f^O?vcu}2gR)sQ*XT9FDrTQuxKyGvfAH?&Zo>C0u0@`p+08Vv>ipP| z?0Ys%k_#yuj-Up)z9wj<0{9x8#SaXb>L7sx3x*6t3^bd#IlvBjEjTAgolIk=Ooc#CCT0s zZ0ilScChuf6@vQzuPlM$wB5r*&WG>GgiG=lC=-)l|NuNn0p=9v918?Ne+XlS_zYx z+4&TFU-*0_DX+}KdWK1%+zQaYo(D&U~NN8?uw*IT4z_{ zzFO7rX=3>OpkvOZ9IoZDz2toNDGBDLNxe5V9tdYdnQytX7xUtBXg41BiVKKuXz^2D zCZFb;HQvRXWWm?`hmf5CVwnD+X6tf&V?so|p#mm^ZUT8JQ)uOzpu_kxgsH!r7HX^z zI@o#H+q(ZrGl^>B{8%x zTG=S#iNK4nH1;OVL0~rkc1?I%{NdxNyMpz_`KSd(FWaoS&!bBub4V+CVv}X6n-}>y zUs}($*-DghTd8j`$!7N1(_ZB4%bqnXj}+%E!*$zU-VJ#g@3LURGE?FS;V>sq>^+Zi zva+Qh&!8S@8AA}vq-*6ID8O$NlF#11&`XME)KOVeZa8LsUUy?GD}Y)C251VMpHL?( z(OL%tyjRQ2DtzeoUWt(rvBpxOgElU37Urxn-+ATm>Z%{@x-phxesX^{OD1 zWwG-GTi(Ei3MKY6$4_tY)3`H3<1A)T$Xr=GI;PM_C}8%lmA{S)o^;Shmpi*q)5;P!cy!*G=m1@R8vDGZV=!U%$}#^m)%+d=2f;NEmpMAp|cvSGdd8Z~sCkO2*g!SDiLhUutjzb*H)&`mU?%WSf zv(^(x>l5PDbNj19e8r2oul!uk`e&~qsgq>rSHUG7V2T-uNKu=_Re61S9pWLFId6p^ zWB+{ELtZ0VlM#@0)urdd%VVksg7mWQKLwSrw)iMfU2Cn^C~Ziyo6;H0d`8BINOOI6 zJ)%^n>~1!`_#XZ7mwF4kEi?5cLS^vhlk!w(ndkXaSGs|_7=6c_hP#EJ%fs+FY<)-n zqHn-|>)Yvf#X%OWY|Q$hZvAdd39mikH)6{+%u!v7*Uh7&fGQOp;_|elzSc*^K#0m! zj|10Q!b6sNR-cZJ4##;ue)4Yn#A~Z!w4|k9wZ-oMpz`@8gtPg5R{CBu{%xaW|5(ui zU}^9eW!Q9?vcYr4Wj z2_~V5yi#=VB$V^wHLcoNX0>9ktkXWe{t6u$d4i8r5@TxO9fSELO>XBeUvklYFqk9! zN_#fMG*!u7>7fGC;8gY#DeFy2joAi7RHwe#2TJ$2hbOn39ls2u>vK&Trz0&Aqe}~N zVHsP|tC6z)xsy;i!PV=l;NeY&4+o$EGWcP1-~q8;kRvy)rUb}O=vYpE;{&utqKjlc zfaKNYKC?BINqiH9f7QSts3PH$6=!m}exctu-#TQumgFzb#DtCYm6VC5OIls>fL?Dd z7%nDvNR9(ogv`g<<1qIH_XE1uFgQq1EmPKHr!yB$DYKa zjq{h{!$B29{5Phuo`MDwZ2&M_Id5;;SPu_~hzrGpGPL*(v*LnaXgTL^=f9 z&^aN;y`5!hzFhH8Fl~DCk}ylX?}MOBX2x#e9cGZ)v?r1Pv`45*?1oSVPfOG~c_>nG z`7%Q{jAK_1?7V!uYTjlxs@$@vQWDW#a8Y`0%bfMTjMu^{BeXJckcn88!rZ#U;k{SA z3nd=V2BBEkqhs&+Su_6Q+m@4H!?z(0t_SNThCst=U7<9*Dh4t_*;pnUI<~w@ffox& zSt^Q2A*~+eP!WeZ?|TE20X}sZZK?ATR$tH}$pa0V^f`KOTBwtl--pvOdc#AVLjct; zNux^oU6{Wv`lRK4o%N*A@px<@YE|Cin0MVO$KT~IynD^Q1woK`NX^F-I>v7m*SVJH zH$n?8(695YeG4oI4BO!`cWusR)QFqE*hklWu;^F)9qA8h%x~N9k+i#NCabXn3Tp2* z^C)qDK5PrlH8jraB31jyMQU-sQ4c8$6sI)y0rwZv1}Pf(rOCW|-*(QA z$x=*`Z8BIRJimB~2Q*u7Wmhx|hW3irS`i7##Lltd zavH=FY(m_%jp}}Ym%-i~ z7r4a~Y!@PgimnKHXr7nU&wG7&-cS@dbpy;@72Uj2#xU2{&XyYOiqMPYbnaXgmOS1j zH(8>}dXOBS`jt)bX(%N#$lSumHpO z!oeFSffZz{Z4Et8;KKDuO^~_mx?x+05k*vdCf@W($CEn|;EsEB{gQ<9jL*f@@$S+2 z_VbZ@3*QAQ_SQe9RPh@*#R~^qQs$2?265mSJBD+O-CE*hC?&f^MSf+ooNb3fa{qm= za)X$5mFRZtv?=b3p^jdk<**w%?v7mF$pYBp_?^5+t|paL<9>Q4DF=oE?{I|is6cKI z(8FIAmc-mqeO>Ki?)72|%D=~+j?2Vc$YN8)QBy?OY)}noi%#JE{Or6vv25Y~oW<*^ zSFc?CycUyVO71C=7Li)D_nCi58*Xb|*vM@c-4WbemO8Pio*J+ApX&v5=uFt_#ryja zar!dkAm<$EZ?WS>Q(0c=MSL4<2uT{K_8cxiRBJVNb zWZnvwmA)lWs$Dq9sW<+F5F;B%?@;3iS~3IZ?(C;*<}J_4=FGwRA)DVm!867f|09}k z!zOnoQ9#{>F+3~`M^E>a;x}dLaIHsXc`O_o0d&lQ-@3jj=N6rQa=g%?Or1oN!PwyM zvEUzA^Z~(dcGo(1TuuQj&z!i>EwjpN`LWo+jQQb;a^hIuu(Ml;Hni-M9>mpwSzKXSyu?q=i^IyvM!Xaint-hhCfLB@b$e|GS<#}ty z1CN&p8YG4-g>V`+W=K-Mz%rj zt(|z6+9A=4NmG+6LC?2nqK2#O`nb4Fp%XX5xF^?Bhc(>7b{n?vROGE_Ufy2uedYXu zB8_E@P^xFiCJ~;Nurbj#vA)+u_I&28XwbpThCdLdfgji=L0zJ0@K%=}jef8G6}fzGQ8I zHz%LDmI}P2vxMgdFMs~*4ug?B?nbFRfnUC?aZdVL1d%tQfSD#!l}JgGC@bmSp#dv9{p2wpV4+dq#Xaz_n2)lHTzkP&KopUZ8z>!M|?JG zZ%>v06RNV_wLN0?mWxzT-F7{iS0K9lM8|jILFls~H(jJ*3`Mp&YdDv-lR^oDt{vNx zn?tDqKXMk60n{=^7m#~Dd?YR&E&iWoLJXK8(2wg>3G?>H2@xTBV1!PbuGat2{+B)Uas(JSjq>F_WTS(b$bT-5K;M`mg=H(f={u_zfG=>SH%a)Mwa2_&?eI z(D!f^N($`&QiOGep3I~3ZSW& zHUOK7F=^Q5g!0T@{YZaO|76*I6IhI|!G3g6oq?v1VL|`sa?n_e%fa3&=DCMmgwF1D zfc#hNZ^Mdt3SvVs^$YuyoP}BeenS6L&Col=R5D#HLUd+va7fWFYqWN#sWIRF1(3Lp AI{*Lx literal 0 HcmV?d00001 diff --git a/lab07/aaron/e2/compete/benchmark.cpp b/lab07/aaron/e2/compete/benchmark.cpp new file mode 100644 index 0000000..41aa205 --- /dev/null +++ b/lab07/aaron/e2/compete/benchmark.cpp @@ -0,0 +1,23 @@ +#include +#include +#include +#include +#include "dbscan.h" + +using namespace HPC; + +static void BM_DBSCAN(benchmark::State& state) { + // Load points from file + std::vector points = readPointsFromFile("data"); + + // Create DBSCAN object with parameters from the benchmark state + DBSCAN ds(5, 0.01); + + // Measure the time taken to run DBSCAN + for (auto _ : state) { + ds.run(points); + } +} + +BENCHMARK(BM_DBSCAN)->Unit(benchmark::kMillisecond)->Iterations(10); +BENCHMARK_MAIN(); \ No newline at end of file diff --git a/lab07/aaron/e2/compete/create_data.py b/lab07/aaron/e2/compete/create_data.py new file mode 100644 index 0000000..145515a --- /dev/null +++ b/lab07/aaron/e2/compete/create_data.py @@ -0,0 +1,12 @@ +from sklearn.datasets import make_blobs +from sklearn.preprocessing import StandardScaler +import numpy as np + +centers = [[1, 1], [-1, -1], [1, -1], [-1.5, -1.5], [-2, 2], [1, 3]] +X, labels_true = make_blobs( + n_samples=27*1024, centers=centers, cluster_std=0.25, random_state=0 +) + +X = StandardScaler().fit_transform(X) + +np.savetxt("data", X) diff --git a/lab07/aaron/e2/compete/dbscan.cpp b/lab07/aaron/e2/compete/dbscan.cpp new file mode 100644 index 0000000..dff2f60 --- /dev/null +++ b/lab07/aaron/e2/compete/dbscan.cpp @@ -0,0 +1,68 @@ +#include "dbscan.h" +#include +#include +#include +#include + +namespace HPC { + +DBSCAN::DBSCAN(int minPts, double eps) : minPoints_(minPts), epsilon_(eps) {} + +void DBSCAN::run(const std::vector &points) { + + dataset_ = points; + const int n = dataset_.size(); + + initializeNeighbors(); + + int clusterIndex = 0; + for (int i = 0; i < n; ++i) { + Point &point = dataset_[i]; + if (point.clusterID < 0) { + std::set neighbours = point.neighbors; + if (neighbours.size() < minPoints_) { + point.clusterID = noiseID; + } else { + clusterIndex++; + expandCluster(point, neighbours, clusterIndex); + } + } + } +} + +bool DBSCAN::expandCluster(Point &p, std::set &neighbours, int clusterID) { + p.clusterID = clusterID; + + std::set updatedNeighbours = neighbours; + + // Use of do-while instead of clearing neighbors + do { + neighbours = updatedNeighbours; + + for (int i : neighbours) { + Point &pPrime = dataset_[i]; + if (pPrime.clusterID < 0) { + pPrime.clusterID = clusterID; // serves as marking the point as visited + std::set newNeighbours = pPrime.neighbors; + if (newNeighbours.size() >= minPoints_) { + updatedNeighbours.merge(newNeighbours); + } + } + } + } while (updatedNeighbours.size() != neighbours.size()); + return true; +} + +void DBSCAN::initializeNeighbors() { +#pragma omp parallel for + for (int i = 0; i < dataset_.size(); ++i) { + Point &pointToCheckNeighborsFor = dataset_[i]; + for (int j = 0; j < dataset_.size(); ++j) { + if (pointToCheckNeighborsFor.distance(dataset_[j]) <= epsilon_) { + pointToCheckNeighborsFor.neighbors.insert(j); + } + } + } +} + +} // namespace HPC diff --git a/lab07/aaron/e2/compete/dbscan.h b/lab07/aaron/e2/compete/dbscan.h new file mode 100644 index 0000000..e13064d --- /dev/null +++ b/lab07/aaron/e2/compete/dbscan.h @@ -0,0 +1,37 @@ +#ifndef DBSCAN_H +#define DBSCAN_H + +#include +#include + +#include "point.h" + +namespace HPC { + +class DBSCAN { +public: + DBSCAN(int minPts, double eps); + + void run(const std::vector &points); + + const std::vector &getPoints() const { return dataset_; } + +private: + std::set regionQuery(const Point &point) const; + void initializeNeighbors(); + bool expandCluster(Point &point, std::set &neighbours, int clusterID); + + // void merge(std::vector& n, const std::vector& nPrime) const; + + const int unclassifiedID = -1; + const int noiseID = -2; + + const int minPoints_; + const double epsilon_; + + std::vector dataset_; +}; + +} // namespace HPC + +#endif // DBSCAN_H diff --git a/lab07/aaron/e2/compete/makefile b/lab07/aaron/e2/compete/makefile new file mode 100644 index 0000000..e1863e0 --- /dev/null +++ b/lab07/aaron/e2/compete/makefile @@ -0,0 +1,43 @@ +# Makefile for DBSCAN program + +# ---------------------------------------------------- +# Parameters +# Change these parameters according to your needs. + +# SOURCE_FILES: The source files of the algorithm, used for each build. +# You can add more source files here if needed. +SOURCE_FILES = dbscan.cpp point.cpp + +# Main rogram, used to cluster the data and save the result. +# PROGRAM_NAME: The name of the program that will be generated after compilation. +PROGRAM_NAME = dbscan +RUN_MAIN = run.cpp + +# Benchmark program: This program is used to benchmark the performance of the algorithm. +# It is not used for the actual clustering process. +BENCHMARK_PROGRAM_NAME = dbscan_bench +BENCHMARK_MAIN = benchmark.cpp + +COMPILER_FLAGS = -fopenmp -std=c++17 -lpthread + +# ---------------------------------------------------- +# The actual makefile rules, only change these if you really need to. + +# Default target +# The default target is the one that will be executed when you run 'make' without any arguments. +default: release + +release: $(RUN_MAIN) $(SOURCE_FILES) + g++ $(RUN_MAIN) $(SOURCE_FILES) $(COMPILER_FLAGS) -o $(PROGRAM_NAME) -O3 + +debug: $(RUN_MAIN) $(SOURCE_FILES) + g++ $(RUN_MAIN) $(SOURCE_FILES) $(COMPILER_FLAGS) -o $(PROGRAM_NAME) -O0 -g + +benchmark: $(BENCHMARK_MAIN) $(SOURCE_FILES) + g++ $(BENCHMARK_MAIN) $(SOURCE_FILES) $(COMPILER_FLAGS) -o $(BENCHMARK_PROGRAM_NAME) -O3 -lbenchmark + +run_bench: benchmark + ./$(BENCHMARK_PROGRAM_NAME) + +run: release + ./$(PROGRAM_NAME) diff --git a/lab07/aaron/e2/compete/plot.py b/lab07/aaron/e2/compete/plot.py new file mode 100644 index 0000000..63e876f --- /dev/null +++ b/lab07/aaron/e2/compete/plot.py @@ -0,0 +1,14 @@ +import pylab as plt +import numpy as np + +plt.figure() +points = plt.loadtxt("clustered") +cluster_index_column = 2 +clusters = np.unique(points[:, cluster_index_column]) +print(clusters) +for c in clusters: + points_in_cluster = points[np.where( + points[:, cluster_index_column] == c)[0]] + plt.scatter(points_in_cluster[:, 0], points_in_cluster[:, 1], label=c) + +plt.show() diff --git a/lab07/aaron/e2/compete/point.cpp b/lab07/aaron/e2/compete/point.cpp new file mode 100644 index 0000000..13b4181 --- /dev/null +++ b/lab07/aaron/e2/compete/point.cpp @@ -0,0 +1,52 @@ +#include +#include + +#include "point.h" + +Point::Point(const std::vector &coordinatesIn) + : coordinates(coordinatesIn) {} + +double &Point::operator()(int i) { return coordinates[i]; } + +const double &Point::operator()(int i) const { return coordinates[i]; } + +double Point::distance(const Point &other) const { + double distance = 0; + for (int i = 0; i < coordinates.size(); ++i) { + const double p = coordinates[i]; + const double q = other.coordinates[i]; + distance += (p - q) * (p - q); + } + + return distance; +} + +std::vector readPointsFromFile(const std::string &filename) { + std::vector points; + std::ifstream fin(filename); + + double x, y; + + while (fin >> x >> y) { + Point point({x, y}); + points.push_back(point); + } + return points; +} + +std::ostream &operator<<(std::ostream &os, const Point &point) { + for (auto coordinate : point.coordinates) { + os << coordinate << "\t"; + } + os << point.clusterID; + os << "\t" << point.neighbors.size(); + return os; +} + +void writePointsToFile(const std::vector &points, + const std::string &filename) { + std::ofstream fout(filename); + for (auto point : points) { + fout << point << "\n"; + } +} diff --git a/lab07/aaron/e2/compete/point.h b/lab07/aaron/e2/compete/point.h new file mode 100644 index 0000000..c77bbc2 --- /dev/null +++ b/lab07/aaron/e2/compete/point.h @@ -0,0 +1,53 @@ +#ifndef POINT_H +#define POINT_H + +#include +#include +#include + +/** + * Class representing a point in the dataset. + * + * Stores the coordinates of the point, its cluster ID, and whether it is a core + * point. + */ +class Point { + public: + Point(const std::vector& coordinatesIn); + + double& operator()(int i); + const double& operator()(int i) const; + + double distance(const Point& other) const; + + std::vector coordinates; + int clusterID = -1; + bool isCorePoint = false; + std::set neighbors; +}; + +/** + * Read points from a file and return them as a vector of Point objects. + */ +std::vector readPointsFromFile(const std::string& filename); + +/** + * Print a point to an output stream. The + * coordinates are separated by tabs, and the + * cluster ID is printed at the end. + */ +std::ostream& operator<<(std::ostream& os, const Point& point); + +/** + * Write points to a file. + * + * Each point is written on a new line, with + * coordinates separated by tabs and the + * cluster ID at the end. + * + * Can be read with numpy.loadtxt, the last column give the cluster ID. + */ +void writePointsToFile(const std::vector& points, + const std::string& filename); + +#endif // POINT_H \ No newline at end of file diff --git a/lab07/aaron/e2/compete/run.cpp b/lab07/aaron/e2/compete/run.cpp new file mode 100644 index 0000000..7ca8e35 --- /dev/null +++ b/lab07/aaron/e2/compete/run.cpp @@ -0,0 +1,29 @@ +#include +#include +#include +#include "dbscan.h" + +using namespace HPC; + +int main() +{ + + std::vector points = readPointsFromFile("data"); + + DBSCAN ds(5, 0.01); + // Zeitmessung starten + auto start = std::chrono::high_resolution_clock::now(); + + ds.run(points); + + // Zeitmessung beenden + auto end = std::chrono::high_resolution_clock::now(); + + // Dauer berechnen in Millisekunden + auto duration = std::chrono::duration_cast(end - start).count(); + + std::cout << "Laufzeit: " << duration << " ms" << std::endl; + writePointsToFile(ds.getPoints(), "clustered"); + + return 0; +} diff --git a/lab07/results/plot_dbscan.py b/lab07/results/plot_dbscan.py new file mode 100644 index 0000000..36dcbcd --- /dev/null +++ b/lab07/results/plot_dbscan.py @@ -0,0 +1,26 @@ +import csv +import matplotlib.pyplot as plt + +# Read performance data from CSV file +with open('lab07\\results\\dbscna_results.csv', 'r') as f: + reader = csv.reader(f) + data = next(reader) # Read first line + times = list(map(int, data)) # Convert to integers + +# X: thread count (1 to n), Y: performance (1/time) +threads = list(range(1, len(times) + 1)) +performance = [1 / t for t in times] # You could multiply by a constant to scale if needed + +speedup = [times[0] / t for t in times] + +efficiency = [] +for i in range (1, len(speedup) + 1): + efficiency.append(speedup[i-1] / i) + +# Plot +plt.plot(threads, efficiency, marker='o') +plt.xlabel('Thread Count') +plt.ylabel('Efficiency (Speedup / Thread Count)') +plt.title('Thread Count vs Efficiency') +plt.grid(True) +plt.show()