0

For I school project I've created the following AdaBoost classifier implemententation with Python:

from DecisionStump import *
from sklearn.base import BaseEstimator


class AdaBoost(BaseEstimator):
    def __init__(self, boosting_rounds):
        self.boosting_rounds = boosting_rounds  # number of weak learners used

    def fit(self, x, y):
        self.feature_names_in_ = np.array(x.columns)  # BaseEstimator required
        self.n_features_in_ = x.shape[1]  # BaseEstimator required

        self.weak_learners = []  # [ (learner, amount of say), ... ]
        self.decision_stumps = []

        n_rows = x.shape[0]  # number of examples in training set
        attributes = x.columns  # features in training set

        # dataset example weights
        d = np.full(n_rows, 1 / n_rows)

        # preparing all decision stumps
        for a in attributes:
            # finding possible thresholds for decision stump
            values = x[a].unique()
            values.sort()
            thresholds = []
            for i in range(1, len(values)):
                thresholds.append((values[i] + values[i - 1]) / 2)
            for threshold in thresholds:
                self.decision_stumps.append(DecisionStump(a, threshold))

        for t in range(0, self.boosting_rounds):
            # choosing decision stump that minimizes weights dependent error
            min_error = float("inf")
            h = None  # chosen weak learner

            for ds in self.decision_stumps:
                ds_error = 0
                predictions = ds.predict(x)
                for i in range(0, n_rows):
                    if predictions[i] != y.iloc[i][0]:  # misclassification
                        ds_error += d[i]
                if ds_error < min_error:
                    min_error = ds_error
                    h = ds

            # amount of say
            small = 1e-10
            alpha = 0.5 * np.log((1 - min_error) / (min_error + small))  # small avoids division by 0

            # storing learner with corresponding amount of say
            self.weak_learners.append((h, alpha))
            print("Added weak learner: Decision stump of feature \"" + str(h.attribute) + "\" with threshold " + str(
                h.threshold) + " with error " + str(min_error))

            # updating weights
            wl_predictions = h.predict(x)
            for i in range(0, len(d)):
                d[i] = d[i] * np.exp(-alpha * y.iloc[i] * wl_predictions[i])
            z = np.sum(d)  # normalisation factor
            for i in range(0, len(d)):
                d[i] /= z

        return self  # BaseEstimator required

    def predict(self, x):
        ab_prediction = np.zeros(x.shape[0])  # AdaBoost prediction
        for i in range(0, len(self.weak_learners)):
            wl = self.weak_learners[i][0]
            alpha = self.weak_learners[i][1]
            weighted_wl_prediction = wl.predict(x) * alpha
            ab_prediction = [v + w for v, w in zip(ab_prediction, weighted_wl_prediction)]

        ab_prediction = np.sign(ab_prediction)
        # casting float items to int
        return [int(y) for y in ab_prediction]

    # Used when plotting learning curves with scikit-learn
    def score(self, x, y):
        predictions = self.predict(x)
        accuracy = 100 * (predictions == y.values).sum() / len(y)
        return accuracy

As you can see I create a DecisionStump for every threshold of every attribute so when I use the fit() method on a dataset containing features with many different values (i.e. height of people) the time it takes to train is way too long. I think i should use a subset of all the possible decision stumps but what criteria should I use? Should I instead group the big-cardinality features in classes (i.e. class1: height between 150 and 155, class2: height between 155 and 160, ...)?

desertnaut
  • 57,590
  • 26
  • 140
  • 166

0 Answers0