"""
Copyright (C) 2023 Università degli Studi di Camerino and Sigma S.p.A.
Authors: Alessandro Antinori, Rosario Capparuccia, Riccardo Coltrinari, Flavio Corradini, Marco Piangerelli, Barbara Re, Marco Scarpetta
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
"""
import pandas
from rain.core.parameter import KeyValueParameter, Parameters
from rain.nodes.sklearn.node_structure import SklearnClusterer
from sklearn.cluster import KMeans
[docs]class SimpleKMeans(SklearnClusterer):
"""A clusterer for the sklearn KMeans that uses the 'sklearn.cluster.KMeans'.
Input
-----
fitted_model : sklearn.base.BaseEstimator
A previously fitted model.
dataset : pandas.DataFrame
The dataset to be used by the estimator.
score_targets : pandas.DataFrame
The dataset that will be used as targets (labels) to perform the scoring.
Output
------
fitted_model : sklearn.base.BaseEstimator
The model that results from the fit of the estimator.
predictions : pandas.DataFrame
The predictions that result from the predict.
score_value : float
The score value that results from the scoring.
transformed_dataset : pandas.DataFrame
The dataset that results from the transform.
labels : pandas.DataFrame
Labels of each point.
It corresponds to the 'labels_' attribute of the sklearn KMeans.
Parameters
----------
node_id : str
Id of the node.
execute : [fit, predict, score, transform]
List of strings to specify the methods to execute.
The allowed strings are those from the _method attribute.
n_clusters : int
The number of clusters to form as well as the number of centroids to generate.
"""
_output_vars = {"labels": pandas.DataFrame}
def __init__(self, node_id: str, execute: list, n_clusters: int = 8):
super(SimpleKMeans, self).__init__(node_id, execute)
self.parameters = Parameters(
n_clusters=KeyValueParameter("n_clusters", int, n_clusters)
)
self._estimator_or_function = KMeans(**self.parameters.get_dict())
[docs] def execute(self):
super(SimpleKMeans, self).execute()
self.labels = self.fitted_model.labels_