Skip to content

Commit

Permalink
fix for parameter uncertainty in additive generic models
Browse files Browse the repository at this point in the history
  • Loading branch information
FelixWick committed Nov 25, 2023
1 parent e7e16cf commit d28924b
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 7 deletions.
27 changes: 26 additions & 1 deletion cyclic_boosting/generic_loss.py
Original file line number Diff line number Diff line change
Expand Up @@ -550,7 +550,32 @@ def uncertainty_gamma(y: np.ndarray, weights: np.ndarray) -> float:


def uncertainty_gaussian(y: np.ndarray, weights: np.ndarray) -> float:
return np.sqrt(np.sum(weights * y) / (np.sum(weights) - 1) / np.sum(weights))
weighted_mean_y = np.sum(y * weights) / np.sum(weights)
variance_prior = np.sum((y - weighted_mean_y) * (y - weighted_mean_y) * weights) / np.sum(weights)
if variance_prior <= 1e-9:
variance_prior = 1.0

sum_y = np.sum(weights * y)
sum_weights = np.sum(weights)
mean_y = sum_y / sum_weights
weighted_squared_residual_sum = np.sum(weights * (y - mean_y) ** 2)

n_prior = 1
a_0 = 0.5 * n_prior
b_0 = a_0 * variance_prior
a = a_0 + 0.5 * sum_weights
b = b_0 + 0.5 * weighted_squared_residual_sum
variance_y = b / a
w = weights / variance_y

sum_w = np.sum(w)
sum_vw = np.sum(weights * w)
w0 = 1e-2
sum_w += w0
sum_vw += w0
variance_weighted_mean = sum_vw / sum_w**2

return np.sqrt(variance_weighted_mean)


def uncertainty_beta(y: np.ndarray, weights: np.ndarray, link_func) -> float:
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "cyclic-boosting"
version = "1.2.2"
version = "1.2.3"
description = "Implementation of Cyclic Boosting machine learning algorithms"
authors = ["Blue Yonder GmbH"]
packages = [{include = "cyclic_boosting"}]
Expand Down
19 changes: 14 additions & 5 deletions tests/test_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -724,10 +724,10 @@ def test_additive_quantile_regression_median(is_plot, prepare_data, default_feat
yhat = CB_est.predict(X.copy())

quantile_acc = evaluate_quantile(y, yhat)
np.testing.assert_almost_equal(quantile_acc, 0.4973, 3)
np.testing.assert_almost_equal(quantile_acc, 0.4950, 3)

mad = np.nanmean(np.abs(y - yhat))
np.testing.assert_almost_equal(mad, 1.6991, 3)
np.testing.assert_almost_equal(mad, 1.7062, 3)


def test_additive_quantile_regression_90(is_plot, prepare_data, default_features, feature_properties):
Expand All @@ -743,23 +743,32 @@ def test_additive_quantile_regression_90(is_plot, prepare_data, default_features
yhat = CB_est.predict(X.copy())

quantile_acc = evaluate_quantile(y, yhat)
np.testing.assert_almost_equal(quantile_acc, 0.8969, 3)
np.testing.assert_almost_equal(quantile_acc, 0.8934, 3)


def test_additive_regression_mad(is_plot, prepare_data, default_features, feature_properties):
X, y = prepare_data
X = X[default_features]

plobs = [
observers.PlottingObserver(iteration=1),
observers.PlottingObserver(iteration=-1),
]

CB_est = pipeline_CBAdditiveGenericCRegressor(
feature_properties=feature_properties,
costs=costs_mad,
observers=plobs,
)
CB_est.fit(X.copy(), y)

if is_plot:
plot_CB("analysis_CB_iterlast", [CB_est[-1].observers[-1]], CB_est[-2])

yhat = CB_est.predict(X.copy())

mad = np.nanmean(np.abs(y - yhat))
np.testing.assert_almost_equal(mad, 1.6991, 3)
np.testing.assert_almost_equal(mad, 1.7062, 3)


def test_additive_regression_mse(is_plot, prepare_data, default_features, feature_properties):
Expand All @@ -775,7 +784,7 @@ def test_additive_regression_mse(is_plot, prepare_data, default_features, featur
yhat = CB_est.predict(X.copy())

mad = np.nanmean(np.abs(y - yhat))
np.testing.assert_almost_equal(mad, 1.748, 3)
np.testing.assert_almost_equal(mad, 1.738, 3)


def test_multiplicative_regression_mad(is_plot, prepare_data, default_features, feature_properties):
Expand Down

0 comments on commit d28924b

Please sign in to comment.