Skip to content

Commit

Permalink
fixed some misnomers
Browse files Browse the repository at this point in the history
  • Loading branch information
FelixWick committed Oct 8, 2023
1 parent 8898382 commit 37873ab
Show file tree
Hide file tree
Showing 7 changed files with 125 additions and 117 deletions.
4 changes: 2 additions & 2 deletions cyclic_boosting/generic_loss.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

from cyclic_boosting.base import CyclicBoostingBase, gaussian_matching_by_quantiles, Feature, CBLinkPredictionsFactors
from cyclic_boosting.link import LogLinkMixin, IdentityLinkMixin, LogitLinkMixin
from cyclic_boosting.utils import continuous_cdf_from_discrete_pdf, get_X_column
from cyclic_boosting.utils import continuous_quantile_from_discrete_pdf, get_X_column
from cyclic_boosting.classification import get_beta_priors

from typing import Tuple, Union
Expand Down Expand Up @@ -408,7 +408,7 @@ def quantile_global_scale(
if weights is None:
raise RuntimeError("The weights have to be initialized.")

global_scale_link_ = link_func(continuous_cdf_from_discrete_pdf(y, quantile))
global_scale_link_ = link_func(continuous_quantile_from_discrete_pdf(y, quantile))

prior_pred_link_offset_ = None
if prior_prediction_column is not None:
Expand Down
98 changes: 51 additions & 47 deletions cyclic_boosting/quantile_matching.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,24 +6,24 @@
from typing import Optional


def cdf_fit_gaussian(quantiles: np.ndarray, cdf_values: np.ndarray, mode: Optional[str] = "ppf") -> callable:
def quantile_fit_gaussian(quantiles: np.ndarray, quantile_values: np.ndarray, mode: Optional[str] = "ppf") -> callable:
"""
Interpolation between pairs of CDF values (potentially estimated by means
of quantile regression) and corresponding quantiles according to a
Gaussian distribution as assumed PDF.
Interpolation of a quantile function (with quantiles estimated, e.g., by
means of quantile regression) according to a Gaussian distribution as
assumed PDF.
Parameters
----------
quantiles : np.ndarray
quantile values
cdf_values : np.ndarray
CDF values corresponding to quantile values
quantiles (x values of quantile function)
quantile_values : np.ndarray
quantile values (y values of quantile function)
mode : str
decides about kind of returned callable, possible values are:
- ``ppf``: input quantile, output CDF value (default)
- ``dist``: fitted Gaussian (scipy function)
- ``cdf``: input CDF value, output quantile
- ``ppf``: quantile (default)
- ``dist``: fitted negative binomial (scipy function)
- ``cdf``: CDF function
Returns
-------
Expand All @@ -32,9 +32,9 @@ def cdf_fit_gaussian(quantiles: np.ndarray, cdf_values: np.ndarray, mode: Option
"""

def f(x, mu, sigma):
return norm(loc=mu, scale=sigma).cdf(x)
return norm(loc=mu, scale=sigma).ppf(x)

mu, sigma = curve_fit(f, cdf_values, quantiles)[0]
mu, sigma = curve_fit(f, quantiles, quantile_values)[0]
if mode == "ppf":
return norm(mu, sigma).ppf
elif mode == "dist":
Expand All @@ -45,25 +45,24 @@ def f(x, mu, sigma):
raise Exception("Invalid mode.")


def cdf_fit_gamma(quantiles: np.ndarray, cdf_values: np.ndarray, mode: Optional[str] = "ppf") -> callable:
def quantile_fit_gamma(quantiles: np.ndarray, quantile_values: np.ndarray, mode: Optional[str] = "ppf") -> callable:
"""
Interpolation between pairs of CDF values (potentially estimated by means
of quantile regression) and corresponding quantiles according to a
Gamma distribution as assumed PDF (i.e., continuous, non-negative target
values).
Interpolation of a quantile function (with quantiles estimated, e.g., by
means of quantile regression) according to a Gamma distribution as assumed
PDF (i.e., continuous, non-negative target values).
Parameters
----------
quantiles : np.ndarray
quantile values
cdf_values : np.ndarray
CDF values corresponding to quantile values
quantiles (x values of quantile function)
quantile_values : np.ndarray
quantile values (y values of quantile function)
mode : str
decides about kind of returned callable, possible values are:
- ``ppf``: input quantile, output CDF value (default)
- ``dist``: fitted Gamma (scipy function)
- ``cdf``: input CDF value, output quantile
- ``ppf``: quantile (default)
- ``dist``: fitted negative binomial (scipy function)
- ``cdf``: CDF function
Returns
-------
Expand All @@ -72,9 +71,9 @@ def cdf_fit_gamma(quantiles: np.ndarray, cdf_values: np.ndarray, mode: Optional[
"""

def f(x, alpha, beta):
return gamma(alpha, scale=1 / beta).cdf(x)
return gamma(alpha, scale=1 / beta).ppf(x)

alpha, beta = curve_fit(f, cdf_values, quantiles, p0=[2.0, 0.9])[0]
alpha, beta = curve_fit(f, quantiles, quantile_values, p0=[2.0, 0.9])[0]
if mode == "ppf":
return gamma(alpha, scale=1 / beta).ppf
elif mode == "dist":
Expand All @@ -88,47 +87,52 @@ def f(x, alpha, beta):
def _nbinom_cdf_mu_var(x: float, mu: float, var: float) -> callable:
"""
Calculation of negative binomial parameters n and p from given mean and
variance, and subsequent call of cumulative distribution function.
variance, and subsequent call of its cumulative distribution function.
Parameters
----------
x : float
value of random variable following a negative binomial distribution
value of random variable following negative binomial distribution
mu : float
mean of negative binomial distribution
var : float
variance of negative binomial distribution
Returns
-------
callable
negative binomial cumulative distribution function
"""
n = mu * mu / (var - mu)
p = mu / var
return nbinom(n, p).cdf(x)


def cdf_fit_nbinom(quantiles: np.ndarray, cdf_values: np.ndarray, mode: Optional[str] = "ppf") -> callable:
def quantile_fit_nbinom(quantiles: np.ndarray, quantile_values: np.ndarray, mode: Optional[str] = "ppf") -> callable:
"""
Interpolation between pairs of CDF values (potentially estimated by means
of quantile regression) and corresponding quantiles according to a
negative binomial distribution as assumed PDF (i.e., discrete, non-negative
target values).
Interpolation of a quantile function (with quantiles estimated, e.g., by
means of quantile regression) according to a negative binomial distribution
as assumed PDF (i.e., discrete, non-negative target values).
Parameters
----------
quantiles : np.ndarray
quantile values
cdf_values : np.ndarray
CDF values corresponding to quantile values
quantiles (x values of quantile function)
quantile_values : np.ndarray
quantile values (y values of quantile function)
mode : str
decides about kind of returned callable, possible values are:
- ``ppf``: input quantile, output CDF value (default)
- ``ppf``: quantile (default)
- ``dist``: fitted negative binomial (scipy function)
- ``cdf``: input CDF value, output quantile
- ``cdf``: CDF function
Returns
-------
callable
fitted negative binomial function (see mode)
"""
mu, var = curve_fit(_nbinom_cdf_mu_var, cdf_values, quantiles, p0=[1.2, 1.4])[0]
mu, var = curve_fit(_nbinom_cdf_mu_var, quantile_values, quantiles, p0=[2.2, 2.4])[0]
n = mu * mu / (var - mu)
p = mu / var
if mode == "ppf":
Expand All @@ -141,23 +145,23 @@ def cdf_fit_nbinom(quantiles: np.ndarray, cdf_values: np.ndarray, mode: Optional
raise Exception("Invalid mode.")


def cdf_fit_spline(quantiles: np.ndarray, cdf_values: np.ndarray) -> callable:
def quantile_fit_spline(quantiles: np.ndarray, quantile_values: np.ndarray) -> callable:
"""
Interpolation between pairs of CDF values (potentially estimated by means
of quantile regression) and corresponding quantiles according to a
smoothing spline (i.e., arbitrary target distribution).
Interpolation of a quantile function (with quantiles estimated, e.g., by
means of quantile regression) according to a smoothing spline (i.e.,
arbitrary target distribution).
Parameters
----------
quantiles : np.ndarray
quantile values
cdf_values : np.ndarray
CDF values corresponding to quantile values
quantiles (x values of quantile function)
quantile_values : np.ndarray
quantile values (y values of quantile function)
Returns
-------
callable
fitted spline function (input quantile, output CDF value)
spline fitted to quantile function
"""
spl = InterpolatedUnivariateSpline(quantiles, cdf_values, k=3, bbox=[0, 1], ext=3)
spl = InterpolatedUnivariateSpline(quantiles, quantile_values, k=3, bbox=[0, 1], ext=3)
return spl
10 changes: 5 additions & 5 deletions cyclic_boosting/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -981,10 +981,10 @@ def get_feature_column_names(X, exclude_columns=[]):
return features


def continuous_cdf_from_discrete_pdf(y, quantile):
def continuous_quantile_from_discrete_pdf(y, quantile):
"""
Calculates a continous CDF value approximation for a given quantile from an
array of potentially discrete values.
Calculates a continous quantile value approximation for a given quantile
from an array of potentially discrete values.
Parameters
----------
Expand All @@ -996,7 +996,7 @@ def continuous_cdf_from_discrete_pdf(y, quantile):
Returns
-------
float
calculated CDF value
calculated quantile value
"""
sorted_y = np.sort(y)
quantile_index = int(quantile * (len(y) - 1))
Expand Down Expand Up @@ -1032,7 +1032,7 @@ def get_normalized_values(values: Iterable) -> List[float]:
def smear_discrete_cdftruth(cdf_func: callable, y: int) -> float:
"""
Smearing of the CDF value of a sample from a discrete random variable. Main
usage is for a histogram of CDF values to check estimated individual
usage is for a histogram of CDF values to check an estimated individual
probability distribution (should be flat).
Parameters
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "cyclic-boosting"
version = "1.1.1"
version = "1.1.2"
description = "Implementation of Cyclic Boosting machine learning algorithms"
authors = ["Blue Yonder GmbH"]
packages = [{include = "cyclic_boosting"}]
Expand Down
Loading

0 comments on commit 37873ab

Please sign in to comment.