add upload
This commit is contained in:
23
lab07/upload/benchmark.cpp
Normal file
23
lab07/upload/benchmark.cpp
Normal file
@@ -0,0 +1,23 @@
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <chrono>
|
||||
#include <benchmark/benchmark.h>
|
||||
#include "dbscan.h"
|
||||
|
||||
using namespace HPC;
|
||||
|
||||
static void BM_DBSCAN(benchmark::State& state) {
|
||||
// Load points from file
|
||||
std::vector<Point> points = readPointsFromFile("data");
|
||||
|
||||
// Create DBSCAN object with parameters from the benchmark state
|
||||
DBSCAN ds(5, 0.01);
|
||||
|
||||
// Measure the time taken to run DBSCAN
|
||||
for (auto _ : state) {
|
||||
ds.run(points);
|
||||
}
|
||||
}
|
||||
|
||||
BENCHMARK(BM_DBSCAN)->Unit(benchmark::kMillisecond)->Iterations(10);
|
||||
BENCHMARK_MAIN();
|
||||
12
lab07/upload/create_data.py
Normal file
12
lab07/upload/create_data.py
Normal file
@@ -0,0 +1,12 @@
|
||||
from sklearn.datasets import make_blobs
|
||||
from sklearn.preprocessing import StandardScaler
|
||||
import numpy as np
|
||||
|
||||
centers = [[1, 1], [-1, -1], [1, -1], [-1.5, -1.5], [-2, 2], [1, 3]]
|
||||
X, labels_true = make_blobs(
|
||||
n_samples=27*1024, centers=centers, cluster_std=0.25, random_state=0
|
||||
)
|
||||
|
||||
X = StandardScaler().fit_transform(X)
|
||||
|
||||
np.savetxt("data", X)
|
||||
60
lab07/upload/dbscan.cpp
Normal file
60
lab07/upload/dbscan.cpp
Normal file
@@ -0,0 +1,60 @@
|
||||
#include "dbscan.h"
|
||||
#include <cmath>
|
||||
#include <iostream>
|
||||
|
||||
namespace HPC {
|
||||
|
||||
DBSCAN::DBSCAN(int minPts, double eps) : minPoints_(minPts), epsilon_(eps) {}
|
||||
|
||||
void DBSCAN::run(const std::vector<Point>& points) {
|
||||
dataset_ = points;
|
||||
const int n = dataset_.size();
|
||||
|
||||
int clusterIndex = 0;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
Point& point = dataset_[i];
|
||||
if (point.clusterID < 0) {
|
||||
std::set<int> neighbours = regionQuery(point);
|
||||
if (neighbours.size() < minPoints_) {
|
||||
point.clusterID = noiseID;
|
||||
} else {
|
||||
clusterIndex++;
|
||||
expandCluster(point, neighbours, clusterIndex);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool DBSCAN::expandCluster(Point& p, std::set<int>& neighbours, int clusterID) {
|
||||
p.clusterID = clusterID;
|
||||
|
||||
std::set<int> updatedNeighbours = neighbours;
|
||||
neighbours.clear();
|
||||
while (updatedNeighbours.size() != neighbours.size()) {
|
||||
neighbours = updatedNeighbours;
|
||||
|
||||
for (int i : neighbours) {
|
||||
Point& pPrime = dataset_[i];
|
||||
if (pPrime.clusterID < 0) {
|
||||
pPrime.clusterID = clusterID; // serves as marking the point as visited
|
||||
std::set<int> newNeighbours = regionQuery(pPrime);
|
||||
if (newNeighbours.size() >= minPoints_) {
|
||||
updatedNeighbours.merge(newNeighbours);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
std::set<int> DBSCAN::regionQuery(const Point& point) const {
|
||||
std::set<int> neighbours;
|
||||
for (int i = 0; i < dataset_.size(); ++i) {
|
||||
if (point.distance(dataset_[i]) <= epsilon_) {
|
||||
neighbours.insert(i);
|
||||
}
|
||||
}
|
||||
return neighbours;
|
||||
}
|
||||
|
||||
} // namespace HPC
|
||||
36
lab07/upload/dbscan.h
Normal file
36
lab07/upload/dbscan.h
Normal file
@@ -0,0 +1,36 @@
|
||||
#ifndef DBSCAN_H
|
||||
#define DBSCAN_H
|
||||
|
||||
#include <vector>
|
||||
#include <set>
|
||||
|
||||
#include "point.h"
|
||||
|
||||
namespace HPC {
|
||||
|
||||
class DBSCAN {
|
||||
public:
|
||||
DBSCAN(int minPts, double eps);
|
||||
|
||||
void run(const std::vector<Point>& points);
|
||||
|
||||
const std::vector<Point>& getPoints() const { return dataset_; }
|
||||
|
||||
private:
|
||||
std::set<int> regionQuery(const Point& point) const;
|
||||
bool expandCluster(Point& point, std::set<int>& neighbours, int clusterID);
|
||||
|
||||
// void merge(std::vector<int>& n, const std::vector<int>& nPrime) const;
|
||||
|
||||
const int unclassifiedID = -1;
|
||||
const int noiseID = -2;
|
||||
|
||||
const int minPoints_;
|
||||
const double epsilon_;
|
||||
|
||||
std::vector<Point> dataset_;
|
||||
};
|
||||
|
||||
} // namespace HPC
|
||||
|
||||
#endif // DBSCAN_H
|
||||
43
lab07/upload/makefile
Normal file
43
lab07/upload/makefile
Normal file
@@ -0,0 +1,43 @@
|
||||
# Makefile for DBSCAN program
|
||||
|
||||
# ----------------------------------------------------
|
||||
# Parameters
|
||||
# Change these parameters according to your needs.
|
||||
|
||||
# SOURCE_FILES: The source files of the algorithm, used for each build.
|
||||
# You can add more source files here if needed.
|
||||
SOURCE_FILES = dbscan.cpp point.cpp
|
||||
|
||||
# Main rogram, used to cluster the data and save the result.
|
||||
# PROGRAM_NAME: The name of the program that will be generated after compilation.
|
||||
PROGRAM_NAME = dbscan
|
||||
RUN_MAIN = run.cpp
|
||||
|
||||
# Benchmark program: This program is used to benchmark the performance of the algorithm.
|
||||
# It is not used for the actual clustering process.
|
||||
BENCHMARK_PROGRAM_NAME = dbscan_bench
|
||||
BENCHMARK_MAIN = benchmark.cpp
|
||||
|
||||
COMPILER_FLAGS = -fopenmp -std=c++17 -lpthread
|
||||
|
||||
# ----------------------------------------------------
|
||||
# The actual makefile rules, only change these if you really need to.
|
||||
|
||||
# Default target
|
||||
# The default target is the one that will be executed when you run 'make' without any arguments.
|
||||
default: release
|
||||
|
||||
release: $(RUN_MAIN) $(SOURCE_FILES)
|
||||
g++ $(RUN_MAIN) $(SOURCE_FILES) $(COMPILER_FLAGS) -o $(PROGRAM_NAME) -O3
|
||||
|
||||
debug: $(RUN_MAIN) $(SOURCE_FILES)
|
||||
g++ $(RUN_MAIN) $(SOURCE_FILES) $(COMPILER_FLAGS) -o $(PROGRAM_NAME) -O0 -g
|
||||
|
||||
benchmark: $(BENCHMARK_MAIN) $(SOURCE_FILES)
|
||||
g++ $(BENCHMARK_MAIN) $(SOURCE_FILES) $(COMPILER_FLAGS) -o $(BENCHMARK_PROGRAM_NAME) -O3 -lbenchmark
|
||||
|
||||
run_bench: benchmark
|
||||
./$(BENCHMARK_PROGRAM_NAME)
|
||||
|
||||
run: release
|
||||
./$(PROGRAM_NAME)
|
||||
14
lab07/upload/plot.py
Normal file
14
lab07/upload/plot.py
Normal file
@@ -0,0 +1,14 @@
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
plt.figure()
|
||||
points = np.loadtxt("clustered")
|
||||
cluster_index_column = 2
|
||||
clusters = np.unique(points[:, cluster_index_column])
|
||||
print(clusters)
|
||||
for c in clusters:
|
||||
points_in_cluster = points[np.where(
|
||||
points[:, cluster_index_column] == c)[0]]
|
||||
plt.scatter(points_in_cluster[:, 0], points_in_cluster[:, 1], label=c)
|
||||
|
||||
plt.show()
|
||||
55
lab07/upload/point.cpp
Normal file
55
lab07/upload/point.cpp
Normal file
@@ -0,0 +1,55 @@
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
|
||||
#include "point.h"
|
||||
|
||||
Point::Point(const std::vector<double>& coordinatesIn)
|
||||
: coordinates(coordinatesIn) {}
|
||||
|
||||
double& Point::operator()(int i) {
|
||||
return coordinates[i];
|
||||
}
|
||||
|
||||
const double& Point::operator()(int i) const {
|
||||
return coordinates[i];
|
||||
}
|
||||
|
||||
double Point::distance(const Point& other) const {
|
||||
double distance = 0;
|
||||
for (int i = 0; i < coordinates.size(); ++i) {
|
||||
const double p = coordinates[i];
|
||||
const double q = other.coordinates[i];
|
||||
distance += (p - q) * (p - q);
|
||||
}
|
||||
|
||||
return distance;
|
||||
}
|
||||
|
||||
std::vector<Point> readPointsFromFile(const std::string& filename) {
|
||||
std::vector<Point> points;
|
||||
std::ifstream fin(filename);
|
||||
|
||||
double x, y;
|
||||
|
||||
while (fin >> x >> y) {
|
||||
Point point({x, y});
|
||||
points.push_back(point);
|
||||
}
|
||||
return points;
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, const Point& point) {
|
||||
for (auto coordinate : point.coordinates) {
|
||||
os << coordinate << "\t";
|
||||
}
|
||||
os << point.clusterID;
|
||||
return os;
|
||||
}
|
||||
|
||||
void writePointsToFile(const std::vector<Point>& points,
|
||||
const std::string& filename) {
|
||||
std::ofstream fout(filename);
|
||||
for (auto point : points) {
|
||||
fout << point << "\n";
|
||||
}
|
||||
}
|
||||
51
lab07/upload/point.h
Normal file
51
lab07/upload/point.h
Normal file
@@ -0,0 +1,51 @@
|
||||
#ifndef POINT_H
|
||||
#define POINT_H
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
/**
|
||||
* Class representing a point in the dataset.
|
||||
*
|
||||
* Stores the coordinates of the point, its cluster ID, and whether it is a core
|
||||
* point.
|
||||
*/
|
||||
class Point {
|
||||
public:
|
||||
Point(const std::vector<double>& coordinatesIn);
|
||||
|
||||
double& operator()(int i);
|
||||
const double& operator()(int i) const;
|
||||
|
||||
double distance(const Point& other) const;
|
||||
|
||||
std::vector<double> coordinates;
|
||||
int clusterID = -1;
|
||||
bool isCorePoint = false;
|
||||
};
|
||||
|
||||
/**
|
||||
* Read points from a file and return them as a vector of Point objects.
|
||||
*/
|
||||
std::vector<Point> readPointsFromFile(const std::string& filename);
|
||||
|
||||
/**
|
||||
* Print a point to an output stream. The
|
||||
* coordinates are separated by tabs, and the
|
||||
* cluster ID is printed at the end.
|
||||
*/
|
||||
std::ostream& operator<<(std::ostream& os, const Point& point);
|
||||
|
||||
/**
|
||||
* Write points to a file.
|
||||
*
|
||||
* Each point is written on a new line, with
|
||||
* coordinates separated by tabs and the
|
||||
* cluster ID at the end.
|
||||
*
|
||||
* Can be read with numpy.loadtxt, the last column give the cluster ID.
|
||||
*/
|
||||
void writePointsToFile(const std::vector<Point>& points,
|
||||
const std::string& filename);
|
||||
|
||||
#endif // POINT_H
|
||||
19
lab07/upload/run.cpp
Normal file
19
lab07/upload/run.cpp
Normal file
@@ -0,0 +1,19 @@
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <chrono>
|
||||
#include "dbscan.h"
|
||||
|
||||
using namespace HPC;
|
||||
|
||||
|
||||
int main() {
|
||||
|
||||
std::vector<Point> points = readPointsFromFile("data");
|
||||
|
||||
DBSCAN ds(5, 0.01);
|
||||
ds.run(points);
|
||||
|
||||
writePointsToFile(ds.getPoints(), "clustered");
|
||||
|
||||
return 0;
|
||||
}
|
||||
Reference in New Issue
Block a user