Source code for bdranalytics.pdlearn.tests.test_pipeline
import numpy as np
import pandas as pd
import unittest
from sklearn.pipeline import FeatureUnion, Pipeline
from bdranalytics.pdlearn.pipeline import PdFeatureUnion, PdFeatureChain
from bdranalytics.pdlearn.preprocessing import PdLagTransformer, PdWindowTransformer
[docs]class TestLagTransformer(unittest.TestCase):
[docs] def test_lagtransformer(self):
orig_data = pd.DataFrame(data=np.arange(15).reshape(
5, 3), columns=["col1", "col2", "col3"])
lagged = PdLagTransformer(1).fit_transform(orig_data)
np.testing.assert_array_equal(
lagged.columns, ["col1_lag1", "col2_lag1", "col3_lag1"])
np.testing.assert_array_equal(lagged.iloc[1, :], orig_data.iloc[0, :])
np.testing.assert_array_equal(lagged.iloc[0, :], np.repeat(np.nan, 3))
[docs] def test_lagtransformer_on_numpy(self):
orig_data = np.arange(15).reshape(5, 3)
lagged = PdLagTransformer(1).fit_transform(orig_data)
np.testing.assert_array_equal(
lagged.columns, ["0_lag1", "1_lag1", "2_lag1"])
np.testing.assert_array_equal(lagged.iloc[1, :], orig_data[0, :])
np.testing.assert_array_equal(lagged.iloc[0, :], np.repeat(np.nan, 3))
[docs] def test_windowtransformer(self):
orig_data = pd.DataFrame(data=np.arange(
14, -1, -1).reshape(5, 3), columns=["col1", "col2", "col3"])
result = PdWindowTransformer(
lambda window: window.max(), window=2).fit_transform(orig_data)
np.testing.assert_array_equal(
result.columns, ["col1_window2", "col2_window2", "col3_window2"])
np.testing.assert_array_equal(result.iloc[0, :], np.repeat(np.nan, 3))
# orig data is [ [14, 13, 12], [11, 10, 9],.., thus rolling max at row 1 should be values of row 0
np.testing.assert_array_equal(result.iloc[1, :], orig_data.iloc[0, :])
[docs] def test_windowtransformer_on_numpy(self):
orig_data = np.arange(14, -1, -1).reshape(5, 3)
result = PdWindowTransformer(
lambda window: window.max(), window=2).fit_transform(orig_data)
np.testing.assert_array_equal(
result.columns, ["0_window2", "1_window2", "2_window2"])
np.testing.assert_array_equal(result.iloc[0, :], np.repeat(np.nan, 3))
# orig data is [ [14, 13, 12], [11, 10, 9],.., thus rolling max at row 1 should be values of row 0
np.testing.assert_array_equal(result.iloc[1, :], orig_data[0, :])
[docs] def test_featureunion(self):
orig_data = pd.DataFrame(data=np.arange(15).reshape(
5, 3), columns=["col1", "col2", "col3"])
result = PdFeatureUnion([
('lag', PdLagTransformer(1)),
('window', PdWindowTransformer(lambda window: window.max(), window=2))]
).fit_transform(orig_data)
np.testing.assert_array_equal(result.columns,
["col1_lag1", "col2_lag1", "col3_lag1", "col1_window2", "col2_window2",
"col3_window2"])
np.testing.assert_array_equal(
result.iloc[:, 0:3],
PdLagTransformer(1).fit_transform(orig_data))
np.testing.assert_array_equal(
result.iloc[:, 3:6],
PdWindowTransformer(lambda window: window.max(), window=2).fit_transform(orig_data))
np.testing.assert_array_equal(result,
FeatureUnion([
("lag", PdLagTransformer(1)),
("window", PdWindowTransformer(
lambda window: window.max(), window=2))
]).fit_transform(orig_data))
[docs] def test_featurechain(self):
orig_data = pd.DataFrame(data=np.arange(15).reshape(
5, 3), columns=["col1", "col2", "col3"])
result = PdFeatureChain([
('lag', PdLagTransformer(1)),
('window', PdWindowTransformer(lambda window: window.max(), window=2))]).fit_transform(orig_data)
np.testing.assert_array_equal(result.columns,
["col1_lag1_window2", "col2_lag1_window2", "col3_lag1_window2"])
np.testing.assert_array_equal(
result,
PdWindowTransformer(lambda window: window.max(), window=2).fit_transform(
PdLagTransformer(1).fit_transform(orig_data)
)
)
np.testing.assert_array_equal(result,
Pipeline(steps=[
("lag", PdLagTransformer(1)),
("window", PdWindowTransformer(
lambda window: window.max(), window=2))
]).fit_transform(orig_data))