lab07
This commit is contained in:
@@ -66,7 +66,7 @@ void gauss_seidel(Matrix &phi, int maxNumIter)
|
||||
}
|
||||
std::atomic<int> threadsCount(0);
|
||||
|
||||
#pragma omp parallel for schedule(static)
|
||||
#pragma omp parallel for schedule(static, 10)
|
||||
for (int rowToCalculate = 1; rowToCalculate < (n - 1); rowToCalculate++)
|
||||
{
|
||||
int row = rowToCalculate;
|
||||
|
||||
BIN
lab07/aaron/e2/compete.zip
Normal file
BIN
lab07/aaron/e2/compete.zip
Normal file
Binary file not shown.
23
lab07/aaron/e2/compete/benchmark.cpp
Normal file
23
lab07/aaron/e2/compete/benchmark.cpp
Normal file
@@ -0,0 +1,23 @@
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <chrono>
|
||||
#include <benchmark/benchmark.h>
|
||||
#include "dbscan.h"
|
||||
|
||||
using namespace HPC;
|
||||
|
||||
static void BM_DBSCAN(benchmark::State& state) {
|
||||
// Load points from file
|
||||
std::vector<Point> points = readPointsFromFile("data");
|
||||
|
||||
// Create DBSCAN object with parameters from the benchmark state
|
||||
DBSCAN ds(5, 0.01);
|
||||
|
||||
// Measure the time taken to run DBSCAN
|
||||
for (auto _ : state) {
|
||||
ds.run(points);
|
||||
}
|
||||
}
|
||||
|
||||
BENCHMARK(BM_DBSCAN)->Unit(benchmark::kMillisecond)->Iterations(10);
|
||||
BENCHMARK_MAIN();
|
||||
12
lab07/aaron/e2/compete/create_data.py
Normal file
12
lab07/aaron/e2/compete/create_data.py
Normal file
@@ -0,0 +1,12 @@
|
||||
from sklearn.datasets import make_blobs
|
||||
from sklearn.preprocessing import StandardScaler
|
||||
import numpy as np
|
||||
|
||||
centers = [[1, 1], [-1, -1], [1, -1], [-1.5, -1.5], [-2, 2], [1, 3]]
|
||||
X, labels_true = make_blobs(
|
||||
n_samples=27*1024, centers=centers, cluster_std=0.25, random_state=0
|
||||
)
|
||||
|
||||
X = StandardScaler().fit_transform(X)
|
||||
|
||||
np.savetxt("data", X)
|
||||
68
lab07/aaron/e2/compete/dbscan.cpp
Normal file
68
lab07/aaron/e2/compete/dbscan.cpp
Normal file
@@ -0,0 +1,68 @@
|
||||
#include "dbscan.h"
|
||||
#include <atomic>
|
||||
#include <cmath>
|
||||
#include <iostream>
|
||||
#include <omp.h>
|
||||
|
||||
namespace HPC {
|
||||
|
||||
DBSCAN::DBSCAN(int minPts, double eps) : minPoints_(minPts), epsilon_(eps) {}
|
||||
|
||||
void DBSCAN::run(const std::vector<Point> &points) {
|
||||
|
||||
dataset_ = points;
|
||||
const int n = dataset_.size();
|
||||
|
||||
initializeNeighbors();
|
||||
|
||||
int clusterIndex = 0;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
Point &point = dataset_[i];
|
||||
if (point.clusterID < 0) {
|
||||
std::set<int> neighbours = point.neighbors;
|
||||
if (neighbours.size() < minPoints_) {
|
||||
point.clusterID = noiseID;
|
||||
} else {
|
||||
clusterIndex++;
|
||||
expandCluster(point, neighbours, clusterIndex);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool DBSCAN::expandCluster(Point &p, std::set<int> &neighbours, int clusterID) {
|
||||
p.clusterID = clusterID;
|
||||
|
||||
std::set<int> updatedNeighbours = neighbours;
|
||||
|
||||
// Use of do-while instead of clearing neighbors
|
||||
do {
|
||||
neighbours = updatedNeighbours;
|
||||
|
||||
for (int i : neighbours) {
|
||||
Point &pPrime = dataset_[i];
|
||||
if (pPrime.clusterID < 0) {
|
||||
pPrime.clusterID = clusterID; // serves as marking the point as visited
|
||||
std::set<int> newNeighbours = pPrime.neighbors;
|
||||
if (newNeighbours.size() >= minPoints_) {
|
||||
updatedNeighbours.merge(newNeighbours);
|
||||
}
|
||||
}
|
||||
}
|
||||
} while (updatedNeighbours.size() != neighbours.size());
|
||||
return true;
|
||||
}
|
||||
|
||||
void DBSCAN::initializeNeighbors() {
|
||||
#pragma omp parallel for
|
||||
for (int i = 0; i < dataset_.size(); ++i) {
|
||||
Point &pointToCheckNeighborsFor = dataset_[i];
|
||||
for (int j = 0; j < dataset_.size(); ++j) {
|
||||
if (pointToCheckNeighborsFor.distance(dataset_[j]) <= epsilon_) {
|
||||
pointToCheckNeighborsFor.neighbors.insert(j);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace HPC
|
||||
37
lab07/aaron/e2/compete/dbscan.h
Normal file
37
lab07/aaron/e2/compete/dbscan.h
Normal file
@@ -0,0 +1,37 @@
|
||||
#ifndef DBSCAN_H
|
||||
#define DBSCAN_H
|
||||
|
||||
#include <set>
|
||||
#include <vector>
|
||||
|
||||
#include "point.h"
|
||||
|
||||
namespace HPC {
|
||||
|
||||
class DBSCAN {
|
||||
public:
|
||||
DBSCAN(int minPts, double eps);
|
||||
|
||||
void run(const std::vector<Point> &points);
|
||||
|
||||
const std::vector<Point> &getPoints() const { return dataset_; }
|
||||
|
||||
private:
|
||||
std::set<int> regionQuery(const Point &point) const;
|
||||
void initializeNeighbors();
|
||||
bool expandCluster(Point &point, std::set<int> &neighbours, int clusterID);
|
||||
|
||||
// void merge(std::vector<int>& n, const std::vector<int>& nPrime) const;
|
||||
|
||||
const int unclassifiedID = -1;
|
||||
const int noiseID = -2;
|
||||
|
||||
const int minPoints_;
|
||||
const double epsilon_;
|
||||
|
||||
std::vector<Point> dataset_;
|
||||
};
|
||||
|
||||
} // namespace HPC
|
||||
|
||||
#endif // DBSCAN_H
|
||||
43
lab07/aaron/e2/compete/makefile
Normal file
43
lab07/aaron/e2/compete/makefile
Normal file
@@ -0,0 +1,43 @@
|
||||
# Makefile for DBSCAN program
|
||||
|
||||
# ----------------------------------------------------
|
||||
# Parameters
|
||||
# Change these parameters according to your needs.
|
||||
|
||||
# SOURCE_FILES: The source files of the algorithm, used for each build.
|
||||
# You can add more source files here if needed.
|
||||
SOURCE_FILES = dbscan.cpp point.cpp
|
||||
|
||||
# Main rogram, used to cluster the data and save the result.
|
||||
# PROGRAM_NAME: The name of the program that will be generated after compilation.
|
||||
PROGRAM_NAME = dbscan
|
||||
RUN_MAIN = run.cpp
|
||||
|
||||
# Benchmark program: This program is used to benchmark the performance of the algorithm.
|
||||
# It is not used for the actual clustering process.
|
||||
BENCHMARK_PROGRAM_NAME = dbscan_bench
|
||||
BENCHMARK_MAIN = benchmark.cpp
|
||||
|
||||
COMPILER_FLAGS = -fopenmp -std=c++17 -lpthread
|
||||
|
||||
# ----------------------------------------------------
|
||||
# The actual makefile rules, only change these if you really need to.
|
||||
|
||||
# Default target
|
||||
# The default target is the one that will be executed when you run 'make' without any arguments.
|
||||
default: release
|
||||
|
||||
release: $(RUN_MAIN) $(SOURCE_FILES)
|
||||
g++ $(RUN_MAIN) $(SOURCE_FILES) $(COMPILER_FLAGS) -o $(PROGRAM_NAME) -O3
|
||||
|
||||
debug: $(RUN_MAIN) $(SOURCE_FILES)
|
||||
g++ $(RUN_MAIN) $(SOURCE_FILES) $(COMPILER_FLAGS) -o $(PROGRAM_NAME) -O0 -g
|
||||
|
||||
benchmark: $(BENCHMARK_MAIN) $(SOURCE_FILES)
|
||||
g++ $(BENCHMARK_MAIN) $(SOURCE_FILES) $(COMPILER_FLAGS) -o $(BENCHMARK_PROGRAM_NAME) -O3 -lbenchmark
|
||||
|
||||
run_bench: benchmark
|
||||
./$(BENCHMARK_PROGRAM_NAME)
|
||||
|
||||
run: release
|
||||
./$(PROGRAM_NAME)
|
||||
14
lab07/aaron/e2/compete/plot.py
Normal file
14
lab07/aaron/e2/compete/plot.py
Normal file
@@ -0,0 +1,14 @@
|
||||
import pylab as plt
|
||||
import numpy as np
|
||||
|
||||
plt.figure()
|
||||
points = plt.loadtxt("clustered")
|
||||
cluster_index_column = 2
|
||||
clusters = np.unique(points[:, cluster_index_column])
|
||||
print(clusters)
|
||||
for c in clusters:
|
||||
points_in_cluster = points[np.where(
|
||||
points[:, cluster_index_column] == c)[0]]
|
||||
plt.scatter(points_in_cluster[:, 0], points_in_cluster[:, 1], label=c)
|
||||
|
||||
plt.show()
|
||||
52
lab07/aaron/e2/compete/point.cpp
Normal file
52
lab07/aaron/e2/compete/point.cpp
Normal file
@@ -0,0 +1,52 @@
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
|
||||
#include "point.h"
|
||||
|
||||
Point::Point(const std::vector<double> &coordinatesIn)
|
||||
: coordinates(coordinatesIn) {}
|
||||
|
||||
double &Point::operator()(int i) { return coordinates[i]; }
|
||||
|
||||
const double &Point::operator()(int i) const { return coordinates[i]; }
|
||||
|
||||
double Point::distance(const Point &other) const {
|
||||
double distance = 0;
|
||||
for (int i = 0; i < coordinates.size(); ++i) {
|
||||
const double p = coordinates[i];
|
||||
const double q = other.coordinates[i];
|
||||
distance += (p - q) * (p - q);
|
||||
}
|
||||
|
||||
return distance;
|
||||
}
|
||||
|
||||
std::vector<Point> readPointsFromFile(const std::string &filename) {
|
||||
std::vector<Point> points;
|
||||
std::ifstream fin(filename);
|
||||
|
||||
double x, y;
|
||||
|
||||
while (fin >> x >> y) {
|
||||
Point point({x, y});
|
||||
points.push_back(point);
|
||||
}
|
||||
return points;
|
||||
}
|
||||
|
||||
std::ostream &operator<<(std::ostream &os, const Point &point) {
|
||||
for (auto coordinate : point.coordinates) {
|
||||
os << coordinate << "\t";
|
||||
}
|
||||
os << point.clusterID;
|
||||
os << "\t" << point.neighbors.size();
|
||||
return os;
|
||||
}
|
||||
|
||||
void writePointsToFile(const std::vector<Point> &points,
|
||||
const std::string &filename) {
|
||||
std::ofstream fout(filename);
|
||||
for (auto point : points) {
|
||||
fout << point << "\n";
|
||||
}
|
||||
}
|
||||
53
lab07/aaron/e2/compete/point.h
Normal file
53
lab07/aaron/e2/compete/point.h
Normal file
@@ -0,0 +1,53 @@
|
||||
#ifndef POINT_H
|
||||
#define POINT_H
|
||||
|
||||
#include <vector>
|
||||
#include <set>
|
||||
#include <string>
|
||||
|
||||
/**
|
||||
* Class representing a point in the dataset.
|
||||
*
|
||||
* Stores the coordinates of the point, its cluster ID, and whether it is a core
|
||||
* point.
|
||||
*/
|
||||
class Point {
|
||||
public:
|
||||
Point(const std::vector<double>& coordinatesIn);
|
||||
|
||||
double& operator()(int i);
|
||||
const double& operator()(int i) const;
|
||||
|
||||
double distance(const Point& other) const;
|
||||
|
||||
std::vector<double> coordinates;
|
||||
int clusterID = -1;
|
||||
bool isCorePoint = false;
|
||||
std::set<int> neighbors;
|
||||
};
|
||||
|
||||
/**
|
||||
* Read points from a file and return them as a vector of Point objects.
|
||||
*/
|
||||
std::vector<Point> readPointsFromFile(const std::string& filename);
|
||||
|
||||
/**
|
||||
* Print a point to an output stream. The
|
||||
* coordinates are separated by tabs, and the
|
||||
* cluster ID is printed at the end.
|
||||
*/
|
||||
std::ostream& operator<<(std::ostream& os, const Point& point);
|
||||
|
||||
/**
|
||||
* Write points to a file.
|
||||
*
|
||||
* Each point is written on a new line, with
|
||||
* coordinates separated by tabs and the
|
||||
* cluster ID at the end.
|
||||
*
|
||||
* Can be read with numpy.loadtxt, the last column give the cluster ID.
|
||||
*/
|
||||
void writePointsToFile(const std::vector<Point>& points,
|
||||
const std::string& filename);
|
||||
|
||||
#endif // POINT_H
|
||||
29
lab07/aaron/e2/compete/run.cpp
Normal file
29
lab07/aaron/e2/compete/run.cpp
Normal file
@@ -0,0 +1,29 @@
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <chrono>
|
||||
#include "dbscan.h"
|
||||
|
||||
using namespace HPC;
|
||||
|
||||
int main()
|
||||
{
|
||||
|
||||
std::vector<Point> points = readPointsFromFile("data");
|
||||
|
||||
DBSCAN ds(5, 0.01);
|
||||
// Zeitmessung starten
|
||||
auto start = std::chrono::high_resolution_clock::now();
|
||||
|
||||
ds.run(points);
|
||||
|
||||
// Zeitmessung beenden
|
||||
auto end = std::chrono::high_resolution_clock::now();
|
||||
|
||||
// Dauer berechnen in Millisekunden
|
||||
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();
|
||||
|
||||
std::cout << "Laufzeit: " << duration << " ms" << std::endl;
|
||||
writePointsToFile(ds.getPoints(), "clustered");
|
||||
|
||||
return 0;
|
||||
}
|
||||
26
lab07/results/plot_dbscan.py
Normal file
26
lab07/results/plot_dbscan.py
Normal file
@@ -0,0 +1,26 @@
|
||||
import csv
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
# Read performance data from CSV file
|
||||
with open('lab07\\results\\dbscna_results.csv', 'r') as f:
|
||||
reader = csv.reader(f)
|
||||
data = next(reader) # Read first line
|
||||
times = list(map(int, data)) # Convert to integers
|
||||
|
||||
# X: thread count (1 to n), Y: performance (1/time)
|
||||
threads = list(range(1, len(times) + 1))
|
||||
performance = [1 / t for t in times] # You could multiply by a constant to scale if needed
|
||||
|
||||
speedup = [times[0] / t for t in times]
|
||||
|
||||
efficiency = []
|
||||
for i in range (1, len(speedup) + 1):
|
||||
efficiency.append(speedup[i-1] / i)
|
||||
|
||||
# Plot
|
||||
plt.plot(threads, efficiency, marker='o')
|
||||
plt.xlabel('Thread Count')
|
||||
plt.ylabel('Efficiency (Speedup / Thread Count)')
|
||||
plt.title('Thread Count vs Efficiency')
|
||||
plt.grid(True)
|
||||
plt.show()
|
||||
Reference in New Issue
Block a user