This commit is contained in:
WickedJack99
2025-05-17 11:52:48 +02:00
parent 6b64ed76bc
commit 31a5ec8046
12 changed files with 358 additions and 1 deletions

View File

@@ -66,7 +66,7 @@ void gauss_seidel(Matrix &phi, int maxNumIter)
}
std::atomic<int> threadsCount(0);
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static, 10)
for (int rowToCalculate = 1; rowToCalculate < (n - 1); rowToCalculate++)
{
int row = rowToCalculate;

BIN
lab07/aaron/e2/compete.zip Normal file

Binary file not shown.

View File

@@ -0,0 +1,23 @@
#include <iostream>
#include <fstream>
#include <chrono>
#include <benchmark/benchmark.h>
#include "dbscan.h"
using namespace HPC;
static void BM_DBSCAN(benchmark::State& state) {
// Load points from file
std::vector<Point> points = readPointsFromFile("data");
// Create DBSCAN object with parameters from the benchmark state
DBSCAN ds(5, 0.01);
// Measure the time taken to run DBSCAN
for (auto _ : state) {
ds.run(points);
}
}
BENCHMARK(BM_DBSCAN)->Unit(benchmark::kMillisecond)->Iterations(10);
BENCHMARK_MAIN();

View File

@@ -0,0 +1,12 @@
from sklearn.datasets import make_blobs
from sklearn.preprocessing import StandardScaler
import numpy as np
centers = [[1, 1], [-1, -1], [1, -1], [-1.5, -1.5], [-2, 2], [1, 3]]
X, labels_true = make_blobs(
n_samples=27*1024, centers=centers, cluster_std=0.25, random_state=0
)
X = StandardScaler().fit_transform(X)
np.savetxt("data", X)

View File

@@ -0,0 +1,68 @@
#include "dbscan.h"
#include <atomic>
#include <cmath>
#include <iostream>
#include <omp.h>
namespace HPC {
DBSCAN::DBSCAN(int minPts, double eps) : minPoints_(minPts), epsilon_(eps) {}
void DBSCAN::run(const std::vector<Point> &points) {
dataset_ = points;
const int n = dataset_.size();
initializeNeighbors();
int clusterIndex = 0;
for (int i = 0; i < n; ++i) {
Point &point = dataset_[i];
if (point.clusterID < 0) {
std::set<int> neighbours = point.neighbors;
if (neighbours.size() < minPoints_) {
point.clusterID = noiseID;
} else {
clusterIndex++;
expandCluster(point, neighbours, clusterIndex);
}
}
}
}
bool DBSCAN::expandCluster(Point &p, std::set<int> &neighbours, int clusterID) {
p.clusterID = clusterID;
std::set<int> updatedNeighbours = neighbours;
// Use of do-while instead of clearing neighbors
do {
neighbours = updatedNeighbours;
for (int i : neighbours) {
Point &pPrime = dataset_[i];
if (pPrime.clusterID < 0) {
pPrime.clusterID = clusterID; // serves as marking the point as visited
std::set<int> newNeighbours = pPrime.neighbors;
if (newNeighbours.size() >= minPoints_) {
updatedNeighbours.merge(newNeighbours);
}
}
}
} while (updatedNeighbours.size() != neighbours.size());
return true;
}
void DBSCAN::initializeNeighbors() {
#pragma omp parallel for
for (int i = 0; i < dataset_.size(); ++i) {
Point &pointToCheckNeighborsFor = dataset_[i];
for (int j = 0; j < dataset_.size(); ++j) {
if (pointToCheckNeighborsFor.distance(dataset_[j]) <= epsilon_) {
pointToCheckNeighborsFor.neighbors.insert(j);
}
}
}
}
} // namespace HPC

View File

@@ -0,0 +1,37 @@
#ifndef DBSCAN_H
#define DBSCAN_H
#include <set>
#include <vector>
#include "point.h"
namespace HPC {
class DBSCAN {
public:
DBSCAN(int minPts, double eps);
void run(const std::vector<Point> &points);
const std::vector<Point> &getPoints() const { return dataset_; }
private:
std::set<int> regionQuery(const Point &point) const;
void initializeNeighbors();
bool expandCluster(Point &point, std::set<int> &neighbours, int clusterID);
// void merge(std::vector<int>& n, const std::vector<int>& nPrime) const;
const int unclassifiedID = -1;
const int noiseID = -2;
const int minPoints_;
const double epsilon_;
std::vector<Point> dataset_;
};
} // namespace HPC
#endif // DBSCAN_H

View File

@@ -0,0 +1,43 @@
# Makefile for DBSCAN program
# ----------------------------------------------------
# Parameters
# Change these parameters according to your needs.
# SOURCE_FILES: The source files of the algorithm, used for each build.
# You can add more source files here if needed.
SOURCE_FILES = dbscan.cpp point.cpp
# Main rogram, used to cluster the data and save the result.
# PROGRAM_NAME: The name of the program that will be generated after compilation.
PROGRAM_NAME = dbscan
RUN_MAIN = run.cpp
# Benchmark program: This program is used to benchmark the performance of the algorithm.
# It is not used for the actual clustering process.
BENCHMARK_PROGRAM_NAME = dbscan_bench
BENCHMARK_MAIN = benchmark.cpp
COMPILER_FLAGS = -fopenmp -std=c++17 -lpthread
# ----------------------------------------------------
# The actual makefile rules, only change these if you really need to.
# Default target
# The default target is the one that will be executed when you run 'make' without any arguments.
default: release
release: $(RUN_MAIN) $(SOURCE_FILES)
g++ $(RUN_MAIN) $(SOURCE_FILES) $(COMPILER_FLAGS) -o $(PROGRAM_NAME) -O3
debug: $(RUN_MAIN) $(SOURCE_FILES)
g++ $(RUN_MAIN) $(SOURCE_FILES) $(COMPILER_FLAGS) -o $(PROGRAM_NAME) -O0 -g
benchmark: $(BENCHMARK_MAIN) $(SOURCE_FILES)
g++ $(BENCHMARK_MAIN) $(SOURCE_FILES) $(COMPILER_FLAGS) -o $(BENCHMARK_PROGRAM_NAME) -O3 -lbenchmark
run_bench: benchmark
./$(BENCHMARK_PROGRAM_NAME)
run: release
./$(PROGRAM_NAME)

View File

@@ -0,0 +1,14 @@
import pylab as plt
import numpy as np
plt.figure()
points = plt.loadtxt("clustered")
cluster_index_column = 2
clusters = np.unique(points[:, cluster_index_column])
print(clusters)
for c in clusters:
points_in_cluster = points[np.where(
points[:, cluster_index_column] == c)[0]]
plt.scatter(points_in_cluster[:, 0], points_in_cluster[:, 1], label=c)
plt.show()

View File

@@ -0,0 +1,52 @@
#include <fstream>
#include <iostream>
#include "point.h"
Point::Point(const std::vector<double> &coordinatesIn)
: coordinates(coordinatesIn) {}
double &Point::operator()(int i) { return coordinates[i]; }
const double &Point::operator()(int i) const { return coordinates[i]; }
double Point::distance(const Point &other) const {
double distance = 0;
for (int i = 0; i < coordinates.size(); ++i) {
const double p = coordinates[i];
const double q = other.coordinates[i];
distance += (p - q) * (p - q);
}
return distance;
}
std::vector<Point> readPointsFromFile(const std::string &filename) {
std::vector<Point> points;
std::ifstream fin(filename);
double x, y;
while (fin >> x >> y) {
Point point({x, y});
points.push_back(point);
}
return points;
}
std::ostream &operator<<(std::ostream &os, const Point &point) {
for (auto coordinate : point.coordinates) {
os << coordinate << "\t";
}
os << point.clusterID;
os << "\t" << point.neighbors.size();
return os;
}
void writePointsToFile(const std::vector<Point> &points,
const std::string &filename) {
std::ofstream fout(filename);
for (auto point : points) {
fout << point << "\n";
}
}

View File

@@ -0,0 +1,53 @@
#ifndef POINT_H
#define POINT_H
#include <vector>
#include <set>
#include <string>
/**
* Class representing a point in the dataset.
*
* Stores the coordinates of the point, its cluster ID, and whether it is a core
* point.
*/
class Point {
public:
Point(const std::vector<double>& coordinatesIn);
double& operator()(int i);
const double& operator()(int i) const;
double distance(const Point& other) const;
std::vector<double> coordinates;
int clusterID = -1;
bool isCorePoint = false;
std::set<int> neighbors;
};
/**
* Read points from a file and return them as a vector of Point objects.
*/
std::vector<Point> readPointsFromFile(const std::string& filename);
/**
* Print a point to an output stream. The
* coordinates are separated by tabs, and the
* cluster ID is printed at the end.
*/
std::ostream& operator<<(std::ostream& os, const Point& point);
/**
* Write points to a file.
*
* Each point is written on a new line, with
* coordinates separated by tabs and the
* cluster ID at the end.
*
* Can be read with numpy.loadtxt, the last column give the cluster ID.
*/
void writePointsToFile(const std::vector<Point>& points,
const std::string& filename);
#endif // POINT_H

View File

@@ -0,0 +1,29 @@
#include <iostream>
#include <fstream>
#include <chrono>
#include "dbscan.h"
using namespace HPC;
int main()
{
std::vector<Point> points = readPointsFromFile("data");
DBSCAN ds(5, 0.01);
// Zeitmessung starten
auto start = std::chrono::high_resolution_clock::now();
ds.run(points);
// Zeitmessung beenden
auto end = std::chrono::high_resolution_clock::now();
// Dauer berechnen in Millisekunden
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();
std::cout << "Laufzeit: " << duration << " ms" << std::endl;
writePointsToFile(ds.getPoints(), "clustered");
return 0;
}

View File

@@ -0,0 +1,26 @@
import csv
import matplotlib.pyplot as plt
# Read performance data from CSV file
with open('lab07\\results\\dbscna_results.csv', 'r') as f:
reader = csv.reader(f)
data = next(reader) # Read first line
times = list(map(int, data)) # Convert to integers
# X: thread count (1 to n), Y: performance (1/time)
threads = list(range(1, len(times) + 1))
performance = [1 / t for t in times] # You could multiply by a constant to scale if needed
speedup = [times[0] / t for t in times]
efficiency = []
for i in range (1, len(speedup) + 1):
efficiency.append(speedup[i-1] / i)
# Plot
plt.plot(threads, efficiency, marker='o')
plt.xlabel('Thread Count')
plt.ylabel('Efficiency (Speedup / Thread Count)')
plt.title('Thread Count vs Efficiency')
plt.grid(True)
plt.show()