Source code for rain.nodes.sklearn.cluster

"""
 Copyright (C) 2023 Università degli Studi di Camerino and Sigma S.p.A.
 Authors: Alessandro Antinori, Rosario Capparuccia, Riccardo Coltrinari, Flavio Corradini, Marco Piangerelli, Barbara Re, Marco Scarpetta

 This program is free software: you can redistribute it and/or modify
 it under the terms of the GNU Affero General Public License as
 published by the Free Software Foundation, either version 3 of the
 License, or (at your option) any later version.

 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU Affero General Public License for more details.

 You should have received a copy of the GNU Affero General Public License
 along with this program.  If not, see <https://www.gnu.org/licenses/>.
 """

import pandas

from rain.core.parameter import KeyValueParameter, Parameters
from rain.nodes.sklearn.node_structure import SklearnClusterer
from sklearn.cluster import KMeans


[docs]class SimpleKMeans(SklearnClusterer): """A clusterer for the sklearn KMeans that uses the 'sklearn.cluster.KMeans'. Input ----- fitted_model : sklearn.base.BaseEstimator A previously fitted model. dataset : pandas.DataFrame The dataset to be used by the estimator. score_targets : pandas.DataFrame The dataset that will be used as targets (labels) to perform the scoring. Output ------ fitted_model : sklearn.base.BaseEstimator The model that results from the fit of the estimator. predictions : pandas.DataFrame The predictions that result from the predict. score_value : float The score value that results from the scoring. transformed_dataset : pandas.DataFrame The dataset that results from the transform. labels : pandas.DataFrame Labels of each point. It corresponds to the 'labels_' attribute of the sklearn KMeans. Parameters ---------- node_id : str Id of the node. execute : [fit, predict, score, transform] List of strings to specify the methods to execute. The allowed strings are those from the _method attribute. n_clusters : int The number of clusters to form as well as the number of centroids to generate. """ _output_vars = {"labels": pandas.DataFrame} def __init__(self, node_id: str, execute: list, n_clusters: int = 8): super(SimpleKMeans, self).__init__(node_id, execute) self.parameters = Parameters( n_clusters=KeyValueParameter("n_clusters", int, n_clusters) ) self._estimator_or_function = KMeans(**self.parameters.get_dict())
[docs] def execute(self): super(SimpleKMeans, self).execute() self.labels = self.fitted_model.labels_