Estimator template

To add a multimodal estimator based on the groundwork of scikit-multimodallearn, please feel free to use the following template, while complying with the Developer’s Guide of the scikit-learn project to ensure full compatibility.

import numpy as np
from sklearn.base import ClassifierMixin, BaseEstimator
from sklearn.utils import check_X_y
from sklearn.utils.multiclass import check_classification_targets
from sklearn.utils.validation import check_is_fitted
from multimodal.boosting.boost import UBoosting


class NewMultiModalEstimator(BaseEstimator, ClassifierMixin, UBoosting):
    r""""
    Your documentation
    """

    def __init__(self, your_attributes=None, ):
        self.your_attributes = your_attributes

    def fit(self, X, y, views_ind=None):
        """Build a multimodal classifier from the training set (X, y).

        Parameters
        ----------
        X : dict dictionary with all views
            or
            `MultiModalData` ,  `MultiModalArray`, `MultiModalSparseArray`
            or
            {array-like, sparse matrix}, shape = (n_samples, n_features)
            Training multi-view input samples.
            Sparse matrix can be CSC, CSR, COO, DOK, or LIL.
            COO, DOK and LIL are converted to CSR.

        y : array-like, shape = (n_samples,)
            Target values (class labels).

        views_ind : array-like (default=[0, n_features//2, n_features])
            Paramater specifying how to extract the data views from X:

            - If views_ind is a 1-D array of sorted integers, the entries
              indicate the limits of the slices used to extract the views,
              where view ``n`` is given by
              ``X[:, views_ind[n]:views_ind[n+1]]``.

              With this convention each view is therefore a view (in the NumPy
              sense) of X and no copy of the data is done.

            - If views_ind is an array of arrays of integers, then each array
              of integers ``views_ind[n]`` specifies the indices of the view
              ``n``, which is then given by ``X[:, views_ind[n]]``.

              With this convention each view creates therefore a partial copy
              of the data in X. This convention is thus more flexible but less
              efficient than the previous one.

        Returns
        -------
        self : object
            Returns self.

        Raises
        ------
        ValueError  estimator must support sample_weight

        ValueError where `X` and `view_ind` are not compatibles
        """

        # _global_X_transform processes the multimodal dataset to transform the
        # in the MultiModalArray format.
        self.X_ = self._global_X_transform(X, views_ind=views_ind)

        # Ensure proper format for views_ind and return number of views.
        views_ind_, n_views = self.X_._validate_views_ind(self.X_.views_ind,
                                                          self.X_.shape[1])

        # According to scikit learn guidelines.
        check_X_y(self.X_, y)
        if not isinstance(y, np.ndarray):
            y = np.asarray(y)
        check_classification_targets(y)
        self._validate_estimator()

        return self


    def predict(self, X):
        """Predict classes for X.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape = (n_samples, n_features)
            Multi-view input samples.
            Sparse matrix can be CSC, CSR, COO, DOK, or LIL.
            COO, DOK and LIL are converted to CSR.

        Returns
        -------
        y : numpy.ndarray, shape = (n_samples,)
            Predicted classes.

        Raises
        ------
        ValueError   'X' input matrix must be have the same total number of features
                     of 'X' fit data
        """
        # According to scikit learn guidelines
        check_is_fitted(self, ("your_attributes"))

        # _global_X_transform processes the multimodal dataset to transform the
        # in the MultiModalArray format.
        X = self._global_X_transform(X, views_ind=self.X_.views_ind)

        # Ensure that X is in the proper format.
        X = self._validate_X_predict(X)

        # Returning fake multi-class labels
        return np.random.randint(0, 5, size=X.shape[0])