Source code for bdranalytics.sklearn.preprocessing.tests.test_scaling

import unittest

import numpy as np
from sklearn.dummy import DummyRegressor
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

from bdranalytics.sklearn.preprocessing import ScaledRegressor


[docs]class TestPreprocessing(unittest.TestCase):
[docs] @staticmethod def create_regression_dataset(n_rows=1000): """ Creates a data set with only numerical data """ X = np.random.rand(n_rows, 2) y = np.random.rand(n_rows)
return X, y
[docs] def test_dummy_pipeline(self): """ Just checking setup of a dummy regressor in a pipeline :return: None """ X, y = self.create_regression_dataset(n_rows=20) predictor_constant = 3 predictor = DummyRegressor( strategy="constant", constant=predictor_constant) y_hat = Pipeline([("predict", predictor)]).fit(X, y).predict(X)
np.allclose(y_hat, np.repeat(predictor_constant, len(y)))
[docs] def test_scaled_target(self): X, y = self.create_regression_dataset(n_rows=20) y_mean = np.mean(y) predictor_constant = 0 # 0 will be multiplied by std , and then added to the mean predictor = DummyRegressor( strategy="constant", constant=predictor_constant) scaler = StandardScaler() y_hat = Pipeline([("predict", ScaledRegressor(scaler, predictor))]).fit( X, y).predict(X)
np.allclose(y_hat, np.repeat(y_mean, len(y)))
[docs] def test_scaled_target_with_set_params(self): X, y = self.create_regression_dataset(n_rows=20) y_mean = np.mean(y) predictor_constant = 10 # 0 will be multiplied by std , and then added to the mean predictor = DummyRegressor( strategy="constant", constant=predictor_constant) scaler = StandardScaler() pipeline = Pipeline([("predict", ScaledRegressor(scaler, predictor))]) pipeline.set_params(predict__estimator__constant=0) y_hat = pipeline.fit(X, y).predict(X)
np.allclose(y_hat, np.repeat(y_mean, len(y))) if __name__ == '__main__': unittest.main()