diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..5d4c0b7174f856f03407cbcf43ee3ccbf820bb58 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,22 @@ +cmake_minimum_required(VERSION 3.16) +project(grouper) + +set(CMAKE_CXX_STANDARD 14) + +find_package(Python 3.8 COMPONENTS Interpreter Development REQUIRED) +find_package(OpenMP) + +if(Python_FOUND) + message(STATUS "Python Found: ${Python_EXECUTABLE}") + message(STATUS "Python Found: ${Python_INCLUDE_DIRS}") + message(STATUS "Python Found: ${Python_LIBRARIES}") + message(STATUS "Python Found: ${Python_LIBRARY_DIRS}") + include_directories(${Python_INCLUDE_DIRS}) +endif() + +add_library(grouper SHARED meanshift.cpp) +SET_TARGET_PROPERTIES(grouper PROPERTIES PREFIX "") +if(Python_FOUND) + target_link_libraries(grouper ${Python_LIBRARIES} OpenMP::OpenMP_CXX) +endif() + diff --git a/lsmtool/operations/_meanshiftc.py b/lsmtool/operations/_meanshiftc.py new file mode 100644 index 0000000000000000000000000000000000000000..901579e01a4b641619e7672be8aa8e778dc9b6c9 --- /dev/null +++ b/lsmtool/operations/_meanshiftc.py @@ -0,0 +1,75 @@ +# -*- coding: utf-8 -*- +# +# Defines meanshift grouper functions used by the group operation. Based on +# implementation of Francesco de Gasperin (see +# https://github.com/revoltek/LiLF/blob/master/lib_dd.py) +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +import numpy as np +import grouper +import logging +log = logging.getLogger('LSMTool.Group') + + +class Grouper(object): + """ + Class to group a list of coordinates and fluxes by the meanshift algorighm. + Based on: http://www.chioka.in/meanshift-algorithm-for-the-rest-of-us-python/ + Parameters + ---------- + coords : list + List of (x, y) pixel coordinates for source positions + fluxes: list + total flux density for each source + kernel_size : float + Kernel size in pixels + n_iterations : int + Number of iterations + look_distance : float + Look distance in pixels + grouping_distance : float + Grouping distance in pixels + """ + def __init__(self, coords, fluxes, kernel_size, n_iterations, look_distance, + grouping_distance): + self.coords = np.array(coords) + self.fluxes = fluxes + self.kernel_size = kernel_size + self.n_iterations = n_iterations + self.look_distance = look_distance + self.grouping_distance = grouping_distance + self.past_coords = [np.copy(self.coords)] + self.clusters = [] + self.g = grouper.Grouper() + self.g.readCoordinates(self.coords, self.fluxes) + self.g.setKernelSize(self.kernel_size) + self.g.setNumberOfIterations(self.n_iterations) + self.g.setLookDistance(self.look_distance) + self.g.setGroupingDistance(self.grouping_distance) + + + def run(self): + self.g.run() + + def grouping(self): + """ + Take the last coords set and group sources nearby, then return a list of lists. + Each list has the index of one cluster. + """ + + self.g.group(self.clusters) + + log.info('Creating %i groups.' % len(self.clusters)) + return self.clusters diff --git a/meanshift.cpp b/meanshift.cpp new file mode 100644 index 0000000000000000000000000000000000000000..45e8293fac33a8844a95becfeb7049e16a3bb8ac --- /dev/null +++ b/meanshift.cpp @@ -0,0 +1,177 @@ +#include <iostream> +#include <memory> +#include <vector> +#include <cmath> +#include <pybind11/pybind11.h> +#include <pybind11/numpy.h> + + +namespace py = pybind11; +typedef double cdt; // coordinate data type, float or double +typedef std::vector<std::pair<cdt,cdt>> ctype; // type for coordinates + +class Grouper{ + +public: +Grouper() : _kernelSize(1.0), _numberOfIterations(10), _lookDistance(1.0), _groupingDistance(1.0){ + +}; +void setKernelSize(double kernelSize){_kernelSize = kernelSize;} +void setNumberOfIterations (int numberOfIterations){_numberOfIterations = numberOfIterations;} +void setLookDistance (double lookDistance){_lookDistance = lookDistance;} +void setGroupingDistance (double groupingDistance){_groupingDistance = groupingDistance;} +void readCoordinates(py::array_t<cdt> array, py::array_t<cdt> farray); +void run(); +void group(py::list l); + +private: +double gaussian_kernel(double distance); + ctype _coordinates; + std::vector<cdt> _fluxes; + double _kernelSize; + int _numberOfIterations; + double _lookDistance; + double _groupingDistance; + +}; + + +void Grouper::readCoordinates(py::array_t<cdt> array, py::array_t<cdt> farray){ + auto arraybuf = array.request(); + auto farraybuf = farray.request(); + cdt* coordinates = (cdt*) arraybuf.ptr; + cdt* fluxes = (cdt*) farraybuf.ptr; + if (arraybuf.ndim != 2 || arraybuf.shape[1] != 2){ + py::print("Grouper::readCoordinates: expected 2D array with two columns!"); + } + if (farraybuf.ndim != 1){ + py::print("Grouper::readCoordinates: expected 1D array!"); + } + unsigned N = arraybuf.shape[0]; + if (farraybuf.shape[0] != N){ + py::print("Grouper::readCoordinates: array of fluxes does not have the same length as array of coordinates!"); + } + for (unsigned n=0; n<N; ++n){ + _coordinates.push_back(std::pair<cdt, cdt>(coordinates[2*n],coordinates[2*n+1])); + _fluxes.push_back(fluxes[n]); + } + +} + +static double euclid_distance(std::pair<cdt, cdt> coordinate1, std::pair<cdt,cdt> coordinate2){ +// Euclidian distance between two points + double result = 0.0; + double distx = coordinate1.first - coordinate2.first; + double disty = coordinate1.second - coordinate2.second; + return sqrt(distx * distx + disty * disty); +} + +static std::vector<unsigned> neighbourhood_points(std::pair<cdt,cdt> centroid, ctype coordinates, double max_distance){ +std::vector<unsigned> result; + for (unsigned n=0; n<coordinates.size(); ++n){ + double distance = euclid_distance(centroid, coordinates[n]); + if (distance < max_distance){ + result.push_back(n); + } + } +return result; +} + +double Grouper::gaussian_kernel(double distance){ +// Gaussian kernel + double result = 1.0 / (_kernelSize * std::sqrt(2.0*3.14159265359)); + result *= std::exp(-0.5 * distance * distance / (_kernelSize * _kernelSize)); + return result; +} + +void Grouper::run(){ + // run the algorithm + + if (_coordinates.size() == 0){ + py::print("Grouper::run: coordinates have not been read!"); + } + + + ctype newcoords = _coordinates; + + for (unsigned it = 0; it < _numberOfIterations; ++it){ + py::print("starting iteration ", it); +#pragma omp parallel + for (unsigned n=0; n<_coordinates.size(); ++n){ + std::vector<unsigned> idx_neighbours = neighbourhood_points(_coordinates[n], _coordinates, _lookDistance); + double denominator = 0.0; + double numx = 0.0; + double numy = 0.0; + for (auto i : idx_neighbours){ + double distance = euclid_distance(_coordinates[i], _coordinates[n]); + double weight = gaussian_kernel(distance); + weight *= _fluxes[i]; + numx += weight * _coordinates[i].first; + numy += weight * _coordinates[i].second; + denominator += weight; + } + newcoords[n] = std::pair<cdt,cdt>(numx/denominator, numy/denominator); + } + double maxdiff = 0.0; + unsigned n=0; + for (auto coordinate : _coordinates){ + double diff = euclid_distance(coordinate, newcoords[n]); + if (diff > maxdiff){ + maxdiff = diff; + } + ++n; + } + _coordinates = newcoords; + if ((it > 1) && (maxdiff < _groupingDistance / 2.0)){ + break; + } + + } + + +} + +void Grouper::group(py::list l){ + + ctype coords_to_check = _coordinates; + + while (coords_to_check.size() > 0){ + auto idx_cluster = neighbourhood_points(coords_to_check[0], _coordinates, _groupingDistance); + auto idx_cluster_to_remove = neighbourhood_points(coords_to_check[0], coords_to_check, _groupingDistance); + unsigned n = 0; + for (auto idx : idx_cluster_to_remove){ + coords_to_check.erase(coords_to_check.begin() + idx - n); + ++n; + } + + py::list newcluster; + for (auto idx : idx_cluster){ + newcluster.append(py::int_(idx)); + } + l.append(newcluster); + + } +} + +PYBIND11_MODULE(grouper, m) +{ + py::class_<Grouper>(m, "Grouper") + .def(py::init<>()) + .def("run", &Grouper::run) + .def("setKernelSize", &Grouper::setKernelSize) + .def("setNumberOfIterations", &Grouper::setNumberOfIterations) + .def("setLookDistance", &Grouper::setLookDistance) + .def("setGroupingDistance", &Grouper::setGroupingDistance) + .def("readCoordinates", &Grouper::readCoordinates) + .def("group", &Grouper::group) +; +} + + +int main(){ + + Py_Initialize(); + + return 0; +} + diff --git a/test.py b/test.py new file mode 100644 index 0000000000000000000000000000000000000000..dd5366e5c91ee0fce030b88adb240df26a03b535 --- /dev/null +++ b/test.py @@ -0,0 +1,18 @@ +import grouper +import numpy as np + +a = np.array([[1.0, 2.0],[3.0,4.0],[5.0,6.0]]) +f = np.array([1.0,2.0,3.0]) + +g = grouper.Grouper() + +g.readCoordinates(a,f) + +g.setLookDistance(5.0) +g.setGroupingDistance(5.0) + +g.run() +b = [] +g.group(b) + +print(b) diff --git a/testlsm.py b/testlsm.py new file mode 100644 index 0000000000000000000000000000000000000000..dfd4b4a37c95d91f5f1a65289a8e242ee5ed491b --- /dev/null +++ b/testlsm.py @@ -0,0 +1,6 @@ +import lsmtool + +s = lsmtool.load('test_large.sky') + +s.group('meanshift', byPatch=True,lookDistance=0.075,groupingDistance=0.01) +