Source code for rain.nodes.sklearn.decomposition

"""
 Copyright (C) 2023 Università degli Studi di Camerino and Sigma S.p.A.
 Authors: Alessandro Antinori, Rosario Capparuccia, Riccardo Coltrinari, Flavio Corradini, Marco Piangerelli, Barbara Re, Marco Scarpetta

 This program is free software: you can redistribute it and/or modify
 it under the terms of the GNU Affero General Public License as
 published by the Free Software Foundation, either version 3 of the
 License, or (at your option) any later version.

 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU Affero General Public License for more details.

 You should have received a copy of the GNU Affero General Public License
 along with this program.  If not, see <https://www.gnu.org/licenses/>.
 """

from sklearn.decomposition import PCA

from rain.core.parameter import Parameters, KeyValueParameter
from rain.nodes.sklearn.node_structure import (
    SklearnEstimator,
    TransformerMixin,
    ScorerMixin,
)


[docs]class SklearnPCA(SklearnEstimator, ScorerMixin, TransformerMixin): """ Node representation of a sklearn PCA estimator that uses the 'sklearn.decomposition.PCA'. Input ----- fitted_model : sklearn.base.BaseEstimator A previously fitted model. dataset : pandas.DataFrame The dataset to be used by the estimator. score_targets : pandas.DataFrame The dataset that will be used as targets (labels) to perform the scoring. Output ------ fitted_model : sklearn.base.BaseEstimator The model that results from the fit of the estimator. score_value : float The score value that results from the scoring. transformed_dataset : pandas.DataFrame The dataset that results from the transform. Parameters ---------- execute : [fit, score, transform] List of strings to specify the methods to execute. The allowed strings are those from the _method attribute. n_components : int Number of components to keep. whiten : bool When True (False by default) the components_ vectors are multiplied by the square root of n_samples and then divided by the singular values to ensure uncorrelated outputs with unit component-wise variances. svd_solver : {auto, full, arpack, randomized}, default=auto Svd solver. tol : float Tolerance for singular values computed by svd_solver == 'arpack'. Must be positive. iterated_power : int Number of iterations for the power method computed by svd_solver == 'randomized'. Must be positive. random_state : int Used when the 'arpack' or 'randomized' solvers are used. Pass an int for reproducible results across multiple function calls. """ def __init__( self, node_id: str, execute: list, n_components=None, *, whiten=False, svd_solver="auto", tol=0.0, iterated_power="auto", random_state=None ): super(SklearnPCA, self).__init__(node_id, execute) self.parameters = Parameters( n_components=KeyValueParameter("n_components", int, n_components), whiten=KeyValueParameter("whiten", bool, whiten), svd_solver=KeyValueParameter("svd_solver", str, svd_solver), tol=KeyValueParameter("tol", float, tol), iterated_power=KeyValueParameter("iterated_power", str, iterated_power), random_state=KeyValueParameter("random_state", int, random_state), ) self._estimator_or_function = PCA(**self.parameters.get_dict())