Source code for hal.ml.features

#!/usr/bin/env python
# coding: utf-8


"""Collection of methods to find weights of features and select the best
ones"""

from sklearn.feature_selection import SelectKBest, chi2, RFECV
from sklearn.model_selection import StratifiedKFold
from sklearn.svm import SVC


[docs]class FeatureSelect: """Selects best features""" def __init__(self, x, y): """ :param x: x matrix :param y: y array """ self.x_train = x self.y_train = y
[docs] def select_k_best(self, k): """Selects k best features in dataset :param k: features to select :return: k best features """ x_new = SelectKBest(chi2, k=k).fit_transform(self.x_train, self.y_train) return x_new
[docs] def get_best(self): """Finds the optimal number of features :return: optimal number of features and ranking """ svc = SVC(kernel="linear") rfecv = RFECV( estimator=svc, step=1, cv=StratifiedKFold(self.y_train, 2), scoring="log_loss" ) rfecv.fit(self.x_train, self.y_train) return rfecv.n_features_, rfecv.ranking_