Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

added cosine decaying learning rate option #142

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 16 additions & 12 deletions dadapy/_utils/differentiable_imbalance.py
Original file line number Diff line number Diff line change
Expand Up @@ -391,7 +391,7 @@ def _optimize_dii(
l_rate: float = None,
constrain: bool = False,
l1_penalty: float = 0.0,
decaying_lr: bool = True,
decaying_lr: str = "exp",
period: np.ndarray = None,
groundtruthperiod: np.ndarray = None,
cythond: bool = True,
Expand All @@ -416,8 +416,8 @@ def _optimize_dii(
Constrain the sum of the weights to sum up to the number of weights. Default: False
l1_penalty: float
The l1-regularization strength, if sparcity is needed. Default: 0 (l1-regularization turned off).
decaying_lr: bool
Use exponentially decaying learning rate in gradient descent or not. Default: True.
decaying_lr: string
"exp" for exponentially decaying learning rate (cut in half every 10 epochs) or "cos" for cosine decaying learning rate. "static" for no decay in the learning rate. Default: "exp"
period : float or numpy.ndarray/list, optional
D(input data) periods (input should be formatted to be periodic starting at 0). If not a list, the same period is assumed for all D features
Default is None, which means no periodic boundary conditions are applied. If some of the input feature do not have a a period, set those to 0.
Expand Down Expand Up @@ -483,7 +483,7 @@ def _optimize_dii(

diis[0] = _return_dii(dists_rescaled_A, rank_matrix_B, lambd)
l1_penalties[0] = l1_penalty * np.sum(np.abs(weights))
lrate = l_rate # for not expon. decaying learning rates
lrate = 1 * l_rate # for not expon. decaying learning rates; "1*" ensures deepcopy

for i_epoch in range(n_epochs):
# compute gradient * SCALING!!!! to be scale invariant in case of no adaptive lambda
Expand All @@ -509,8 +509,10 @@ def _optimize_dii(
)
break
else:
# exponentially decaying lr
if decaying_lr == True:
# exponentially or cosine decaying lr
if decaying_lr == "cos":
lrate = l_rate * 0.5 * (1 + np.cos((np.pi * i_epoch) / n_epochs))
elif decaying_lr == "exp":
lrate = l_rate * 2 ** (
-i_epoch / 10
) # every 10 epochs the learning rate will be halfed
Expand Down Expand Up @@ -576,7 +578,7 @@ def _optimize_dii_static_zeros(
n_epochs: int = 100,
l_rate: float = 0.1,
constrain: bool = False,
decaying_lr: bool = True,
decaying_lr: str = "exp",
period: np.ndarray = None,
groundtruthperiod: np.ndarray = None,
cythond: bool = True,
Expand All @@ -591,7 +593,7 @@ def _optimize_dii_static_zeros(
l_rate (float): learning rate. Has to be tuned, especially if constrain=True (otherwise optmization could fail)
constrain (bool): if True, rescale the weights so the biggest weight = 1
l1_penalty (float): l1 regularization strength
decaying_lr (bool): default: True. Apply decaying learning rate = l_rate * 2**(-i_epoch/10) - every 10 epochs the learning rate will be halfed
decaying_lr (string): "exp" for exponentially decaying learning rate (cut in half every 10 epochs) or "cos" for cosine decaying learning rate. "static" for no decay in the learning rate. Default: "exp"
period (float or np.ndarray/list): D(input) periods (input formatted to be 0-period). If not a list, the same period is assumed for all D features
groundtruthperiod (float or np.ndarray/list): D(groundtruth) periods (groundtruth formatted to be 0-period).
If not a list, the same period is assumed for all D(groundtruth) features
Expand Down Expand Up @@ -672,7 +674,9 @@ def _optimize_dii_static_zeros(
gradient[weights == 0] = 0

# exponentially decaying lr
if decaying_lr == True:
if decaying_lr == "cos":
lrate = l_rate * 0.5 * (1 + np.cos((np.pi * i_epoch) / n_epochs))
elif decaying_lr == "exp":
lrate = l_rate * 2 ** (
-i_epoch / 10
) # every 10 epochs the learning rate will be halfed
Expand Down Expand Up @@ -732,7 +736,7 @@ def _refine_lasso_optimization(
n_epochs=50,
l_rate=None,
constrain=False,
decaying_lr=True,
decaying_lr="exp",
period=None,
groundtruthperiod=None,
cythond=True,
Expand All @@ -752,8 +756,8 @@ def _refine_lasso_optimization(
l_rate (float or None): if None, the learning rate is determined automatically with optimize_learning_rate
n_epochs (int): number of epochs in each optimization cycle
constrain (bool): if True, rescale the weights so the biggest weight = 1
decaying_lr (bool): default: True. Apply decaying learning rate = l_rate * 2**(-i_epoch/10) - every 10 epochs the learning rate will be halfed
period (float or np.ndarray/list): D(input) periods (input formatted to be 0-period). If not a list, the same period is assumed for all D features
decaying_lr (string):
"exp" for exponentially decaying learning rate (cut in half every 10 epochs) or "cos" for cosine decaying learning rate. "static" for no decay in the learning rate. Default: "exp" period (float or np.ndarray/list): D(input) periods (input formatted to be 0-period). If not a list, the same period is assumed for all D features
groundtruthperiod (float or np.ndarray/list): D(groundtruth) periods (groundtruth formatted to be 0-period). If not a list, the same period is assumed for all D(groundtruth) features
cythond (bool): Flag indicating whether to use Cython-based distance computation methods.
Should be True (default) unless you want to test the Python-based methods.
Expand Down
38 changes: 24 additions & 14 deletions dadapy/feature_weighting.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ def return_optimal_learning_rate(
n_samples: int = 200,
initial_weights: Union[np.ndarray, int, float] = None,
lambd: float = None,
decaying_lr: bool = True,
decaying_lr: str = "exp",
trial_learning_rates: np.ndarray = None,
):
"""Find the optimal learning rate for the optimization of the DII by testing several on a reduced set
Expand All @@ -197,9 +197,11 @@ def return_optimal_learning_rate(
initial_weights (np.ndarray or list): D(input) initial weights for the input features. No zeros allowed here
lambd (float): softmax scaling. If None (preferred),
this chosen automatically with compute_optimial_lambda
decaying_lr (bool): default: True.
Apply decaying learning rate = l_rate * 2**(-i_epoch/10)
- every 10 epochs the learning rate will be halfed
decaying_lr (string): Default: "exp".
"exp" for exponentially decaying learning rate (cut in half every 10 epochs):
lrate = l_rate_initial * 2**(-i_epoch/10),
or "cos" for cosine decaying learning rate: lrate = l_rate_initial * 0.5 * (1+ cos((pi * i_epoch)/n_epochs)).
"static" for no decay in the learning rate.
trial_learning_rates (np.ndarray or list or None): learning rates to try.
If None are given, a sensible set of learning rates is tested.
Returns:
Expand Down Expand Up @@ -366,7 +368,7 @@ def return_weights_optimize_dii(
lambd: float = None,
learning_rate: float = None,
l1_penalty: float = 0.0,
decaying_lr: bool = True,
decaying_lr: str = "exp",
):
"""Optimize the differentiable information imbalance using gradient descent
of the DII between input data object A and groundtruth data object B.
Expand All @@ -389,9 +391,11 @@ def return_weights_optimize_dii(
The learning rate of the gradient descent. If None, automatically estimated to be fast.
l1_penalty: float, optional
The l1-regularization strength, if sparcity is needed. Default: 0 (l1-regularization turned off).
decaying_lr: bool
Use exponentially decaying learning rate in gradient descent or not. Default: True.

decaying_lr (string): Default: "exp".
"exp" for exponentially decaying learning rate (cut in half every 10 epochs):
lrate = l_rate_initial * 2**(-i_epoch/10),
or "cos" for cosine decaying learning rate: lrate = l_rate_initial * 0.5 * (1+ cos((pi * i_epoch)/n_epochs)).
"static" for no decay in the learning rate.
Returns:
final_weights: np.ndarray, shape (D). Array of the optmized weights.

Expand Down Expand Up @@ -454,7 +458,7 @@ def return_backward_greedy_dii_elimination(
n_epochs: int = 100,
learning_rate: float = None,
constrain: bool = False,
decaying_lr: bool = True,
decaying_lr: str = "exp",
):
"""Do a stepwise backward elimination of feature weights, always eliminating the lowest weight;
after each elimination the DII is optimized by gradient descent using the remaining features
Expand All @@ -469,8 +473,11 @@ def return_backward_greedy_dii_elimination(
Has to be tuned, especially if constrain=True (otherwise optmization could fail)
constrain (bool): if True, rescale the weights so the biggest weight = 1
l1_penalty (float): l1 regularization strength
decaying_lr (bool): default: True. Apply decaying learning rate = l_rate * 2**(-i_epoch/10)
- every 10 epochs the learning rate will be halfed
decaying_lr (string): Default: "exp".
"exp" for exponentially decaying learning rate (cut in half every 10 epochs):
lrate = l_rate_initial * 2**(-i_epoch/10),
or "cos" for cosine decaying learning rate: lrate = l_rate_initial * 0.5 * (1+ cos((pi * i_epoch)/n_epochs)).
"static" for no decay in the learning rate.

Returns:
final_diis: np.ndarray, shape (D). Array of the optmized DII for each of the according weights.
Expand Down Expand Up @@ -571,7 +578,7 @@ def return_lasso_optimization_dii_search(
learning_rate: float = None,
l1_penalties: Union[list, float] = None,
constrain: bool = False,
decaying_lr: bool = True,
decaying_lr: str = "exp",
refine: bool = False,
plotlasso: bool = True,
):
Expand All @@ -591,8 +598,11 @@ def return_lasso_optimization_dii_search(
l1_penalties (list or None): l1 regularization strengths to be tested.
If None (default), a list of 10 sensible l1-penalties is tested,
which are chosen depending on the learning rate.
decaying_lr (bool): default: True. Apply decaying learning rate = l_rate * 2**(-i_epoch/10)
- every 10 epochs the learning rate will be halfed.
decaying_lr (string): Default: "exp".
"exp" for exponentially decaying learning rate (cut in half every 10 epochs):
lrate = l_rate_initial * 2**(-i_epoch/10),
or "cos" for cosine decaying learning rate: lrate = l_rate_initial * 0.5 * (1+ cos((pi * i_epoch)/n_epochs)).
"static" for no decay in the learning rate.
refine (bool): default: False. If True, the l1-penalties are added in between penalties
where the number of non-zero weights changes by more than one.
This is done to find the optimal l1-penalty for each number of non-zero weights.
Expand Down
Loading