diff --git a/cyclic_boosting/generic_loss.py b/cyclic_boosting/generic_loss.py index a475fbf..766cfb2 100644 --- a/cyclic_boosting/generic_loss.py +++ b/cyclic_boosting/generic_loss.py @@ -13,7 +13,7 @@ from cyclic_boosting.base import CyclicBoostingBase, gaussian_matching_by_quantiles, Feature, CBLinkPredictionsFactors from cyclic_boosting.link import LogLinkMixin, IdentityLinkMixin, LogitLinkMixin -from cyclic_boosting.utils import continuous_cdf_from_discrete_pdf, get_X_column +from cyclic_boosting.utils import continuous_quantile_from_discrete_pdf, get_X_column from cyclic_boosting.classification import get_beta_priors from typing import Tuple, Union @@ -408,7 +408,7 @@ def quantile_global_scale( if weights is None: raise RuntimeError("The weights have to be initialized.") - global_scale_link_ = link_func(continuous_cdf_from_discrete_pdf(y, quantile)) + global_scale_link_ = link_func(continuous_quantile_from_discrete_pdf(y, quantile)) prior_pred_link_offset_ = None if prior_prediction_column is not None: diff --git a/cyclic_boosting/quantile_matching.py b/cyclic_boosting/quantile_matching.py index d1f065f..3d879c6 100644 --- a/cyclic_boosting/quantile_matching.py +++ b/cyclic_boosting/quantile_matching.py @@ -6,24 +6,24 @@ from typing import Optional -def cdf_fit_gaussian(quantiles: np.ndarray, cdf_values: np.ndarray, mode: Optional[str] = "ppf") -> callable: +def quantile_fit_gaussian(quantiles: np.ndarray, quantile_values: np.ndarray, mode: Optional[str] = "ppf") -> callable: """ - Interpolation between pairs of CDF values (potentially estimated by means - of quantile regression) and corresponding quantiles according to a - Gaussian distribution as assumed PDF. + Interpolation of a quantile function (with quantiles estimated, e.g., by + means of quantile regression) according to a Gaussian distribution as + assumed PDF. Parameters ---------- quantiles : np.ndarray - quantile values - cdf_values : np.ndarray - CDF values corresponding to quantile values + quantiles (x values of quantile function) + quantile_values : np.ndarray + quantile values (y values of quantile function) mode : str decides about kind of returned callable, possible values are: - - ``ppf``: input quantile, output CDF value (default) - - ``dist``: fitted Gaussian (scipy function) - - ``cdf``: input CDF value, output quantile + - ``ppf``: quantile (default) + - ``dist``: fitted negative binomial (scipy function) + - ``cdf``: CDF function Returns ------- @@ -32,9 +32,9 @@ def cdf_fit_gaussian(quantiles: np.ndarray, cdf_values: np.ndarray, mode: Option """ def f(x, mu, sigma): - return norm(loc=mu, scale=sigma).cdf(x) + return norm(loc=mu, scale=sigma).ppf(x) - mu, sigma = curve_fit(f, cdf_values, quantiles)[0] + mu, sigma = curve_fit(f, quantiles, quantile_values)[0] if mode == "ppf": return norm(mu, sigma).ppf elif mode == "dist": @@ -45,25 +45,24 @@ def f(x, mu, sigma): raise Exception("Invalid mode.") -def cdf_fit_gamma(quantiles: np.ndarray, cdf_values: np.ndarray, mode: Optional[str] = "ppf") -> callable: +def quantile_fit_gamma(quantiles: np.ndarray, quantile_values: np.ndarray, mode: Optional[str] = "ppf") -> callable: """ - Interpolation between pairs of CDF values (potentially estimated by means - of quantile regression) and corresponding quantiles according to a - Gamma distribution as assumed PDF (i.e., continuous, non-negative target - values). + Interpolation of a quantile function (with quantiles estimated, e.g., by + means of quantile regression) according to a Gamma distribution as assumed + PDF (i.e., continuous, non-negative target values). Parameters ---------- quantiles : np.ndarray - quantile values - cdf_values : np.ndarray - CDF values corresponding to quantile values + quantiles (x values of quantile function) + quantile_values : np.ndarray + quantile values (y values of quantile function) mode : str decides about kind of returned callable, possible values are: - - ``ppf``: input quantile, output CDF value (default) - - ``dist``: fitted Gamma (scipy function) - - ``cdf``: input CDF value, output quantile + - ``ppf``: quantile (default) + - ``dist``: fitted negative binomial (scipy function) + - ``cdf``: CDF function Returns ------- @@ -72,9 +71,9 @@ def cdf_fit_gamma(quantiles: np.ndarray, cdf_values: np.ndarray, mode: Optional[ """ def f(x, alpha, beta): - return gamma(alpha, scale=1 / beta).cdf(x) + return gamma(alpha, scale=1 / beta).ppf(x) - alpha, beta = curve_fit(f, cdf_values, quantiles, p0=[2.0, 0.9])[0] + alpha, beta = curve_fit(f, quantiles, quantile_values, p0=[2.0, 0.9])[0] if mode == "ppf": return gamma(alpha, scale=1 / beta).ppf elif mode == "dist": @@ -88,47 +87,52 @@ def f(x, alpha, beta): def _nbinom_cdf_mu_var(x: float, mu: float, var: float) -> callable: """ Calculation of negative binomial parameters n and p from given mean and - variance, and subsequent call of cumulative distribution function. + variance, and subsequent call of its cumulative distribution function. + Parameters ---------- x : float - value of random variable following a negative binomial distribution + value of random variable following negative binomial distribution mu : float mean of negative binomial distribution var : float variance of negative binomial distribution + + Returns + ------- + callable + negative binomial cumulative distribution function """ n = mu * mu / (var - mu) p = mu / var return nbinom(n, p).cdf(x) -def cdf_fit_nbinom(quantiles: np.ndarray, cdf_values: np.ndarray, mode: Optional[str] = "ppf") -> callable: +def quantile_fit_nbinom(quantiles: np.ndarray, quantile_values: np.ndarray, mode: Optional[str] = "ppf") -> callable: """ - Interpolation between pairs of CDF values (potentially estimated by means - of quantile regression) and corresponding quantiles according to a - negative binomial distribution as assumed PDF (i.e., discrete, non-negative - target values). + Interpolation of a quantile function (with quantiles estimated, e.g., by + means of quantile regression) according to a negative binomial distribution + as assumed PDF (i.e., discrete, non-negative target values). Parameters ---------- quantiles : np.ndarray - quantile values - cdf_values : np.ndarray - CDF values corresponding to quantile values + quantiles (x values of quantile function) + quantile_values : np.ndarray + quantile values (y values of quantile function) mode : str decides about kind of returned callable, possible values are: - - ``ppf``: input quantile, output CDF value (default) + - ``ppf``: quantile (default) - ``dist``: fitted negative binomial (scipy function) - - ``cdf``: input CDF value, output quantile + - ``cdf``: CDF function Returns ------- callable fitted negative binomial function (see mode) """ - mu, var = curve_fit(_nbinom_cdf_mu_var, cdf_values, quantiles, p0=[1.2, 1.4])[0] + mu, var = curve_fit(_nbinom_cdf_mu_var, quantile_values, quantiles, p0=[2.2, 2.4])[0] n = mu * mu / (var - mu) p = mu / var if mode == "ppf": @@ -141,23 +145,23 @@ def cdf_fit_nbinom(quantiles: np.ndarray, cdf_values: np.ndarray, mode: Optional raise Exception("Invalid mode.") -def cdf_fit_spline(quantiles: np.ndarray, cdf_values: np.ndarray) -> callable: +def quantile_fit_spline(quantiles: np.ndarray, quantile_values: np.ndarray) -> callable: """ - Interpolation between pairs of CDF values (potentially estimated by means - of quantile regression) and corresponding quantiles according to a - smoothing spline (i.e., arbitrary target distribution). + Interpolation of a quantile function (with quantiles estimated, e.g., by + means of quantile regression) according to a smoothing spline (i.e., + arbitrary target distribution). Parameters ---------- quantiles : np.ndarray - quantile values - cdf_values : np.ndarray - CDF values corresponding to quantile values + quantiles (x values of quantile function) + quantile_values : np.ndarray + quantile values (y values of quantile function) Returns ------- callable - fitted spline function (input quantile, output CDF value) + spline fitted to quantile function """ - spl = InterpolatedUnivariateSpline(quantiles, cdf_values, k=3, bbox=[0, 1], ext=3) + spl = InterpolatedUnivariateSpline(quantiles, quantile_values, k=3, bbox=[0, 1], ext=3) return spl diff --git a/cyclic_boosting/utils.py b/cyclic_boosting/utils.py index e99431a..ffb3416 100644 --- a/cyclic_boosting/utils.py +++ b/cyclic_boosting/utils.py @@ -981,10 +981,10 @@ def get_feature_column_names(X, exclude_columns=[]): return features -def continuous_cdf_from_discrete_pdf(y, quantile): +def continuous_quantile_from_discrete_pdf(y, quantile): """ - Calculates a continous CDF value approximation for a given quantile from an - array of potentially discrete values. + Calculates a continous quantile value approximation for a given quantile + from an array of potentially discrete values. Parameters ---------- @@ -996,7 +996,7 @@ def continuous_cdf_from_discrete_pdf(y, quantile): Returns ------- float - calculated CDF value + calculated quantile value """ sorted_y = np.sort(y) quantile_index = int(quantile * (len(y) - 1)) @@ -1032,7 +1032,7 @@ def get_normalized_values(values: Iterable) -> List[float]: def smear_discrete_cdftruth(cdf_func: callable, y: int) -> float: """ Smearing of the CDF value of a sample from a discrete random variable. Main - usage is for a histogram of CDF values to check estimated individual + usage is for a histogram of CDF values to check an estimated individual probability distribution (should be flat). Parameters diff --git a/pyproject.toml b/pyproject.toml index 010c55c..40831a8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "cyclic-boosting" -version = "1.1.1" +version = "1.1.2" description = "Implementation of Cyclic Boosting machine learning algorithms" authors = ["Blue Yonder GmbH"] packages = [{include = "cyclic_boosting"}] diff --git a/tests/test_integration.py b/tests/test_integration.py index 097e6ff..8877b6e 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -20,7 +20,7 @@ pipeline_CBAdditiveGenericCRegressor, pipeline_CBGenericClassifier, ) -from cyclic_boosting.quantile_matching import cdf_fit_gamma, cdf_fit_nbinom, cdf_fit_spline +from cyclic_boosting.quantile_matching import quantile_fit_gamma, quantile_fit_nbinom, quantile_fit_spline from cyclic_boosting.utils import smear_discrete_cdftruth from tests.utils import plot_CB, costs_mad, costs_mse @@ -429,29 +429,28 @@ def test_multiplicative_quantile_regression_spline(is_plot, prepare_data, featur X, y = prepare_data quantiles = [] - cdf_values = [] + quantile_values = [] for quantile in [0.1, 0.3, 0.5, 0.7, 0.9]: CB_est = cb_multiplicative_quantile_regressor_model( quantile=quantile, features=features, feature_properties=feature_properties ) CB_est.fit(X.copy(), y) yhat = CB_est.predict(X.copy()) - cdf_values.append(yhat) + quantile_values.append(yhat) quantiles.append(quantile) quantiles = np.asarray(quantiles) - cdf_values = np.asarray(cdf_values) + quantile_values = np.asarray(quantile_values) i = 24 - spl_fit = cdf_fit_spline(quantiles, cdf_values[:, i]) - + spl_fit = quantile_fit_spline(quantiles, quantile_values[:, i]) np.testing.assert_almost_equal(spl_fit(0.2), 0.679, 3) np.testing.assert_almost_equal(spl_fit(0.5), 2.202, 3) np.testing.assert_almost_equal(spl_fit(0.8), 4.297, 3) if is_plot: - plt.plot(quantiles, cdf_values[:, i], "ro") - xs = np.linspace(0, 1, 100) + plt.plot(quantiles, quantile_values[:, i], "ro") + xs = np.linspace(0.0, 1.0, 100) plt.plot(xs, spl_fit(xs)) plt.savefig("spline_integration" + str(i) + ".png") plt.clf() @@ -462,40 +461,40 @@ def test_multiplicative_quantile_regression_pdf_gamma(is_plot, prepare_data, fea X, y = prepare_data quantiles = [] - cdf_values = [] + quantile_values = [] for quantile in [0.1, 0.3, 0.5, 0.7, 0.9]: CB_est = cb_multiplicative_quantile_regressor_model( quantile=quantile, features=features, feature_properties=feature_properties ) CB_est.fit(X.copy(), y) yhat = CB_est.predict(X.copy()) - cdf_values.append(yhat) + quantile_values.append(yhat) quantiles.append(quantile) quantiles = np.asarray(quantiles) - cdf_values = np.asarray(cdf_values) + quantile_values = np.asarray(quantile_values) cdf_truth_list = [] n_samples = len(X) for i in range(n_samples): - gamma_fit_cdf = cdf_fit_gamma(quantiles, cdf_values[:, i], mode="cdf") if i == 24: - gamma_fit = cdf_fit_gamma(quantiles, cdf_values[:, i]) - np.testing.assert_almost_equal(gamma_fit(0.2), 0.877, 3) - np.testing.assert_almost_equal(gamma_fit(0.5), 2.14, 3) - np.testing.assert_almost_equal(gamma_fit(0.8), 4.296, 3) + gamma_fit = quantile_fit_gamma(quantiles, quantile_values[:, i]) + np.testing.assert_almost_equal(gamma_fit(0.2), 0.829, 3) + np.testing.assert_almost_equal(gamma_fit(0.5), 2.049, 3) + np.testing.assert_almost_equal(gamma_fit(0.8), 4.147, 3) + + if is_plot: + plt.plot(quantiles, quantile_values[:, i], "ro") + xs = np.linspace(0.0, 1.0, 100) + plt.plot(xs, gamma_fit(xs)) + plt.savefig("gamma_integration_" + str(i) + ".png") + plt.clf() if is_plot: + gamma_fit_cdf = quantile_fit_gamma(quantiles, quantile_values[:, i], mode="cdf") cdf_truth = smear_discrete_cdftruth(gamma_fit_cdf, y[i]) cdf_truth_list.append(cdf_truth) - if i == 24: - plt.plot(cdf_values[:, i], quantiles, "ro") - xs = np.linspace(0, cdf_values[:, i].max(), 100) - plt.plot(xs, gamma_fit_cdf(xs)) - plt.savefig("gamma_integration_" + str(i) + ".png") - plt.clf() - cdf_truth = np.asarray(cdf_truth_list) if is_plot: plt.hist(cdf_truth[cdf_truth > 0], bins=30) @@ -508,40 +507,40 @@ def test_multiplicative_quantile_regression_pdf_nbinom(is_plot, prepare_data, fe X, y = prepare_data quantiles = [] - cdf_values = [] + quantile_values = [] for quantile in [0.1, 0.3, 0.5, 0.7, 0.9]: CB_est = cb_multiplicative_quantile_regressor_model( quantile=quantile, features=features, feature_properties=feature_properties ) CB_est.fit(X.copy(), y) yhat = CB_est.predict(X.copy()) - cdf_values.append(yhat) + quantile_values.append(yhat) quantiles.append(quantile) quantiles = np.asarray(quantiles) - cdf_values = np.asarray(cdf_values) + quantile_values = np.asarray(quantile_values) cdf_truth_list = [] n_samples = len(X) for i in range(n_samples): - nbinom_fit_cdf = cdf_fit_nbinom(quantiles, cdf_values[:, i], mode="cdf") if i == 24: - nbinom_fit = cdf_fit_nbinom(quantiles, cdf_values[:, i]) + nbinom_fit = quantile_fit_nbinom(quantiles, quantile_values[:, i]) np.testing.assert_equal(nbinom_fit(0.2), 1) np.testing.assert_equal(nbinom_fit(0.5), 2) np.testing.assert_equal(nbinom_fit(0.8), 4) + if is_plot: + plt.plot(quantiles, quantile_values[:, i], "ro") + xs = np.linspace(0.0, 1.0, 100) + plt.plot(xs, nbinom_fit(xs)) + plt.savefig("nbinom_integration_" + str(i) + ".png") + plt.clf() + if is_plot: + nbinom_fit_cdf = quantile_fit_nbinom(quantiles, quantile_values[:, i], mode="cdf") cdf_truth = smear_discrete_cdftruth(nbinom_fit_cdf, y[i]) cdf_truth_list.append(cdf_truth) - if i == 24: - plt.plot(cdf_values[:, i], quantiles, "ro") - xs = np.linspace(0, cdf_values[:, i].max(), 100) - plt.plot(xs, nbinom_fit_cdf(xs)) - plt.savefig("nbinom_integration_" + str(i) + ".png") - plt.clf() - cdf_truth = np.asarray(cdf_truth_list) if is_plot: plt.hist(cdf_truth, bins=30) diff --git a/tests/test_quantile_matching.py b/tests/test_quantile_matching.py index d32749f..2b4f222 100644 --- a/tests/test_quantile_matching.py +++ b/tests/test_quantile_matching.py @@ -2,33 +2,38 @@ from scipy.stats import norm, gamma, nbinom import matplotlib.pyplot as plt -from cyclic_boosting.quantile_matching import cdf_fit_gaussian, cdf_fit_gamma, cdf_fit_nbinom, cdf_fit_spline +from cyclic_boosting.quantile_matching import ( + quantile_fit_gaussian, + quantile_fit_gamma, + quantile_fit_nbinom, + quantile_fit_spline, +) def test_cdf_fit_gaussian(is_plot): quantiles = np.array([0.1, 0.3, 0.5, 0.7, 0.9]) mu_exp = 0.3 sigma_exp = 1.4 - cdf_values = norm.ppf(quantiles, mu_exp, sigma_exp) + quantile_values = norm.ppf(quantiles, mu_exp, sigma_exp) - gaussian_fit_quantiles = cdf_fit_gaussian(quantiles, cdf_values) + gaussian_fit_quantiles = quantile_fit_gaussian(quantiles, quantile_values) np.testing.assert_almost_equal(gaussian_fit_quantiles(0.2), -0.878, 3) np.testing.assert_almost_equal(gaussian_fit_quantiles(0.5), 0.3, 3) np.testing.assert_almost_equal(gaussian_fit_quantiles(0.8), 1.478, 3) - gaussian_fit_quantiles_pdf = cdf_fit_gaussian(quantiles, cdf_values, mode="dist") + gaussian_fit_quantiles_pdf = quantile_fit_gaussian(quantiles, quantile_values, mode="dist") np.testing.assert_almost_equal(gaussian_fit_quantiles_pdf.mean(), mu_exp, 3) np.testing.assert_almost_equal(gaussian_fit_quantiles_pdf.std(), sigma_exp, 3) - gaussian_fit_quantiles_cdf = cdf_fit_gaussian(quantiles, cdf_values, mode="cdf") + gaussian_fit_quantiles_cdf = quantile_fit_gaussian(quantiles, quantile_values, mode="cdf") np.testing.assert_almost_equal(gaussian_fit_quantiles_cdf(-0.9), 0.196, 3) np.testing.assert_almost_equal(gaussian_fit_quantiles_cdf(0.3), 0.5, 3) np.testing.assert_almost_equal(gaussian_fit_quantiles_cdf(1.5), 0.804, 3) if is_plot: - plt.plot(cdf_values, quantiles, "ro") - xs = np.linspace(cdf_values.min(), cdf_values.max(), 100) - plt.plot(xs, gaussian_fit_quantiles_cdf(xs)) + plt.plot(quantiles, quantile_values, "ro") + xs = np.linspace(0.0, 1.0, 100) + plt.plot(xs, gaussian_fit_quantiles(xs)) plt.savefig("gaussian.png") plt.clf() @@ -39,26 +44,26 @@ def test_cdf_fit_gamma(is_plot): sigma_exp = 1.4 alpha_exp = mu_exp * mu_exp / (sigma_exp * sigma_exp) beta_exp = mu_exp / (sigma_exp * sigma_exp) - cdf_values = gamma.ppf(quantiles, alpha_exp, scale=1 / beta_exp) + quantile_values = gamma.ppf(quantiles, alpha_exp, scale=1 / beta_exp) - gamma_fit_quantiles = cdf_fit_gamma(quantiles, cdf_values) + gamma_fit_quantiles = quantile_fit_gamma(quantiles, quantile_values) np.testing.assert_almost_equal(gamma_fit_quantiles(0.2), 1.12, 3) np.testing.assert_almost_equal(gamma_fit_quantiles(0.5), 2.023, 3) np.testing.assert_almost_equal(gamma_fit_quantiles(0.8), 3.322, 3) - gamma_fit_quantiles_pdf = cdf_fit_gamma(quantiles, cdf_values, mode="dist") + gamma_fit_quantiles_pdf = quantile_fit_gamma(quantiles, quantile_values, mode="dist") np.testing.assert_almost_equal(gamma_fit_quantiles_pdf.mean(), mu_exp, 3) np.testing.assert_almost_equal(gamma_fit_quantiles_pdf.std(), sigma_exp, 3) - gamma_fit_quantiles_cdf = cdf_fit_gamma(quantiles, cdf_values, mode="cdf") + gamma_fit_quantiles_cdf = quantile_fit_gamma(quantiles, quantile_values, mode="cdf") np.testing.assert_almost_equal(gamma_fit_quantiles_cdf(1.1), 0.194, 3) np.testing.assert_almost_equal(gamma_fit_quantiles_cdf(2.0), 0.493, 3) np.testing.assert_almost_equal(gamma_fit_quantiles_cdf(3.3), 0.796, 3) if is_plot: - plt.plot(cdf_values, quantiles, "ro") - xs = np.linspace(0, cdf_values.max(), 100) - plt.plot(xs, gamma_fit_quantiles_cdf(xs)) + plt.plot(quantiles, quantile_values, "ro") + xs = np.linspace(0.0, 1.0, 100) + plt.plot(xs, gamma_fit_quantiles(xs)) plt.savefig("gamma.png") plt.clf() @@ -69,43 +74,43 @@ def test_cdf_fit_nbinom(is_plot): sigma_exp = 3.1 n_exp = mu_exp * mu_exp / (sigma_exp * sigma_exp - mu_exp) p_exp = mu_exp / (sigma_exp * sigma_exp) - cdf_values = nbinom.ppf(quantiles, n_exp, p_exp) + quantile_values = nbinom.ppf(quantiles, n_exp, p_exp) np.random.seed(42) - nbinom_fit_quantiles = cdf_fit_nbinom(quantiles, cdf_values) + nbinom_fit_quantiles = quantile_fit_nbinom(quantiles, quantile_values) np.testing.assert_equal(nbinom_fit_quantiles(0.2), 3.0) np.testing.assert_equal(nbinom_fit_quantiles(0.5), 5.0) np.testing.assert_equal(nbinom_fit_quantiles(0.8), 8.0) - nbinom_fit_quantiles_pdf = cdf_fit_nbinom(quantiles, cdf_values, mode="dist") + nbinom_fit_quantiles_pdf = quantile_fit_nbinom(quantiles, quantile_values, mode="dist") np.testing.assert_almost_equal(nbinom_fit_quantiles_pdf.mean(), 5.850, 3) np.testing.assert_almost_equal(nbinom_fit_quantiles_pdf.std(), 3.234, 3) - nbinom_fit_quantiles_cdf = cdf_fit_nbinom(quantiles, cdf_values, mode="cdf") + nbinom_fit_quantiles_cdf = quantile_fit_nbinom(quantiles, quantile_values, mode="cdf") np.testing.assert_almost_equal(nbinom_fit_quantiles_cdf(3), 0.251, 3) np.testing.assert_almost_equal(nbinom_fit_quantiles_cdf(5), 0.51, 3) np.testing.assert_almost_equal(nbinom_fit_quantiles_cdf(8), 0.809, 3) np.testing.assert_almost_equal(nbinom_fit_quantiles_cdf(8.2), 0.809, 3) if is_plot: - plt.plot(cdf_values, quantiles, "ro") - xs = np.linspace(0, cdf_values.max(), 100) - plt.plot(xs, nbinom_fit_quantiles_cdf(xs)) + plt.plot(quantiles, quantile_values, "ro") + xs = np.linspace(0.0, 1.0, 100) + plt.plot(xs, nbinom_fit_quantiles(xs)) plt.savefig("nbinom.png") plt.clf() -def test_cdf_fit_spline(is_plot): +def test_quantile_fit_spline(is_plot): quantiles = np.array([0.1, 0.3, 0.5, 0.7, 0.9]) - cdf_values = np.array([-1.7, 0.3, 1.5, 2.7, 5.8]) + quantile_values = np.array([-1.7, 0.3, 1.5, 2.7, 5.8]) - spl = cdf_fit_spline(quantiles, cdf_values) + spl = quantile_fit_spline(quantiles, quantile_values) np.testing.assert_almost_equal(spl(0.2), -0.567, 3) np.testing.assert_almost_equal(spl(0.5), 1.5, 3) np.testing.assert_almost_equal(spl(0.8), 3.877, 3) if is_plot: - plt.plot(quantiles, cdf_values, "ro") + plt.plot(quantiles, quantile_values, "ro") xs = np.linspace(0, 1, 100) plt.plot(xs, spl(xs)) plt.savefig("spline.png") diff --git a/tests/test_utils.py b/tests/test_utils.py index 35d2dc8..b5d59a7 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -39,7 +39,7 @@ def test_get_feature_column_names(): np.testing.assert_equal(features, ["b", "c"]) -def continuous_cdf_from_discrete_pdf(): +def continuous_quantile_from_discrete_pdf(): y = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) quantile_value = utils.continuous_quantile_from_discrete(y, 0.8) assert quantile_value == 8.0