MuCombo 3 views, 3 classes example

In this toy example, we generate data from three classes, split between three two-dimensional views.

For each view, the data are generated so that the points for two classes are well seperated, while the points for the third class are not seperated with the two other classes. That means that, taken separately, none of the single views allows for a good classification of the data.

Nevertheless, the MuCoMbo algorithm take adavantage of the complementarity of the views to rightly classify the points.

Representation of the data, View 0, View 1, View 2

Out:

/usr/local/lib/python3.10/dist-packages/sklearn/utils/deprecation.py:101: FutureWarning: Attribute `base_estimator_` was deprecated in version 1.2 and will be removed in 1.4. Use `estimator_` instead.
  warnings.warn(msg, category=FutureWarning)

After 4 iterations, the MuCuMBo classifier reaches exact classification for the
learning samples:
  - iteration 1, score: 0.6666666666666666
  - iteration 2, score: 0.6666666666666666
  - iteration 3, score: 1.0

The resulting MuCuMBo classifier uses four sub-classifiers that are wheighted
using the following weights:
  estimator weights alpha: [[0.54930614 0.54930614 0.54930614]]

The first figure displays the data, splitting the representation between the
three views.

The second figure displays the classification results for the sub-classifiers
on the learning sample data.

import numpy as np
from multimodal.boosting.combo import MuComboClassifier
from matplotlib import pyplot as plt


def generate_data(n_samples, lim):
    """Generate random data in a rectangle"""
    lim = np.array(lim)
    n_features = lim.shape[0]
    data = np.random.random((n_samples, n_features))
    data = (lim[:, 1]-lim[:, 0]) * data + lim[:, 0]
    return data


seed = 12
np.random.seed(seed)

n_samples = 300

view_0 = np.concatenate((generate_data(n_samples, [[0., 1.], [0., 1.]]),
                         generate_data(n_samples, [[1., 2.], [0., 1.]]),
                         generate_data(n_samples, [[0., 2.], [0., 1.]])))

view_1 = np.concatenate((generate_data(n_samples, [[1., 2.], [0., 1.]]),
                         generate_data(n_samples, [[0., 2.], [0., 1.]]),
                         generate_data(n_samples, [[0., 1.], [0., 1.]])))

view_2 = np.concatenate((generate_data(n_samples, [[0., 2.], [0., 1.]]),
                         generate_data(n_samples, [[0., 1.], [0., 1.]]),
                         generate_data(n_samples, [[1., 2.], [0., 1.]])))

X = np.concatenate((view_0, view_1, view_2), axis=1)

y = np.zeros(3*n_samples, dtype=np.int64)
y[n_samples:2*n_samples] = 1
y[2*n_samples:] = 2

views_ind = np.array([0, 2, 4, 6])

n_estimators = 4
clf = MuComboClassifier(n_estimators=n_estimators)
clf.fit(X, y, views_ind)

print('\nAfter 4 iterations, the MuCuMBo classifier reaches exact '
      'classification for the\nlearning samples:')
for ind, score in enumerate(clf.staged_score(X, y)):
    print('  - iteration {}, score: {}'.format(ind + 1, score))

print('\nThe resulting MuCuMBo classifier uses four sub-classifiers that are '
      'wheighted\nusing the following weights:\n'
      '  estimator weights alpha: {}'.format(clf.estimator_weights_alpha_))

# print('\nThe first sub-classifier uses the data of view 0 to compute '
#       'its classification\nresults, the second and third sub-classifiers use '
#       'the data of view 1, while the\nfourth one uses the data of '
#       'view 2:\n'
#       '  best views: {}'. format(clf.best_views_))

print('\nThe first figure displays the data, splitting the representation '
      'between the\nthree views.')

styles = ('.b', '.r', '.g')
fig = plt.figure(figsize=(12., 11.))
fig.suptitle('Representation of the data', size=16)
for ind_view in range(3):
    ax = plt.subplot(3, 1, ind_view + 1)
    ax.set_title('View {}'.format(ind_view))
    ind_feature = ind_view * 2
    for ind_class in range(3):
        ind_samples = (y == ind_class)
        ax.plot(X[ind_samples, ind_feature],
                X[ind_samples, ind_feature + 1],
                styles[ind_class],
                label='Class {}'.format(ind_class))
    ax.legend(loc='upper left', framealpha=0.9)

print('\nThe second figure displays the classification results for the '
      'sub-classifiers\non the learning sample data.\n')

# fig = plt.figure(figsize=(14., 11.))
# fig.suptitle('Classification results on the learning data for the '
#              'sub-classifiers', size=16)
# for ind_estimator in range(n_estimators):
#     best_view = clf.best_views_[ind_estimator]
#     y_pred = clf.estimators_[ind_estimator].predict(
#         X[:, 2*best_view:2*best_view+2])
#     background_color = (1.0, 1.0, 0.9)
#     for ind_view in range(3):
#         ax = plt.subplot(3, 4, ind_estimator + 4*ind_view + 1)
#         if ind_view == best_view:
#             ax.set_facecolor(background_color)
#         ax.set_title(
#             'Sub-classifier {} - View {}'.format(ind_estimator, ind_view))
#         ind_feature = ind_view * 2
#         for ind_class in range(3):
#             ind_samples = (y_pred == ind_class)
#             ax.plot(X[ind_samples, ind_feature],
#                     X[ind_samples, ind_feature + 1],
#                     styles[ind_class],
#                     label='Class {}'.format(ind_class))
#         ax.legend(title='Predicted class:', loc='upper left', framealpha=0.9)

plt.show()

Total running time of the script: ( 0 minutes 0.263 seconds)

Gallery generated by Sphinx-Gallery