Skip to content

Commit

Permalink
Modify ConditionalGradient optimizer with handling case of gradient b…
Browse files Browse the repository at this point in the history
…e 0 (#558)

* Add files via upload

* Add files via upload

* Add files via upload

* Add files via upload

* Add files via upload

* Add files via upload

* Add files via upload

* Add files via upload

* Add files via upload

* Add files via upload

* Add files via upload

* Add files via upload

* Add files via upload

* Add files via upload

* add CG optimizer

* Revert "add CG optimizer"

This reverts commit 953fa39.

* Add files via upload

* Add files via upload

* Add files via upload

* Add files via upload

* Add files via upload

* Revert "Add files via upload"

This reverts commit de3cf0b.

* Handling the case of gradient be 0

* Handle the case of gradient to be 0

* Modify the format of epsilon in the argument

* Adding missing part in init function for epsilon

* fixing line overlong issue
  • Loading branch information
pkan2 authored and WindQAQ committed Oct 2, 2019
1 parent d20e803 commit 813e88d
Showing 1 changed file with 23 additions and 7 deletions.
30 changes: 23 additions & 7 deletions tensorflow_addons/optimizers/conditional_gradient.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,16 +31,23 @@ class ConditionalGradient(tf.keras.optimizers.Optimizer):
See https://arxiv.org/pdf/1803.06453.pdf
```
variable -= (1-learning_rate)
* (variable + lambda_ * gradient / frobenius_norm(gradient))
variable -= (1-learning_rate) * (variable + lambda_ * gradient
/ (frobenius_norm(gradient) + epsilon))
```
Note that we choose "lambda_" here to refer to the constraint "lambda" in the paper.
Note that we choose "lambda_" here to refer to the constraint "lambda" in
the paper.
And 'epsilon' is constant with tiny value as compared to the value of
frobenius_norm of gradient. The purpose of 'epsilon' here is to avoid the
case that the value of frobenius_norm of gradient is 0.
In this implementation, we choose 'epsilon' with value of 10^-7.
"""

def __init__(self,
learning_rate,
lambda_,
epsilon=1e-7,
use_locking=False,
name='ConditionalGradient',
**kwargs):
Expand All @@ -50,19 +57,24 @@ def __init__(self,
learning_rate: A `Tensor` or a floating point value.
The learning rate.
lambda_: A `Tensor` or a floating point value. The constraint.
epsilon: A `Tensor` or a floating point value. A small constant
for numerical stability when handling the case of norm of
gradient to be zero.
use_locking: If `True` use locks for update operations.
name: Optional name prefix for the operations created when
applying gradients. Defaults to 'ConditionalGradient'
"""
super(ConditionalGradient, self).__init__(name=name, **kwargs)
self._set_hyper('learning_rate', kwargs.get('lr', learning_rate))
self._set_hyper('lambda_', lambda_)
self.epsilon = epsilon or tf.keras.backend.epsilon()
self._set_hyper('use_locking', use_locking)

def get_config(self):
config = {
'learning_rate': self._serialize_hyperparameter('learning_rate'),
'lambda_': self._serialize_hyperparameter('lambda_'),
'epsilon': self.epsilon,
'use_locking': self._serialize_hyperparameter('use_locking')
}
base_config = super(ConditionalGradient, self).get_config()
Expand All @@ -79,6 +91,8 @@ def _prepare_local(self, var_device, var_dtype, apply_state):
self._get_hyper('learning_rate', var_dtype))
apply_state[(var_device, var_dtype)]['lambda_'] = tf.identity(
self._get_hyper('lambda_', var_dtype))
apply_state[(var_device, var_dtype)]['epsilon'] = tf.convert_to_tensor(
self.epsilon, var_dtype)

def _resource_apply_dense(self, grad, var, apply_state=None):
def frobenius_norm(m):
Expand All @@ -91,8 +105,9 @@ def frobenius_norm(m):
frobenius_norm(grad), name='norm', dtype=var.dtype.base_dtype)
lr = coefficients['learning_rate']
lambda_ = coefficients['lambda_']
var_update_tensor = (
tf.math.multiply(var, lr) - (1 - lr) * lambda_ * grad / norm)
epsilon = coefficients['epsilon']
var_update_tensor = (tf.math.multiply(var, lr) -
(1 - lr) * lambda_ * grad / (norm + epsilon))
var_update_kwargs = {
'resource': var.handle,
'value': var_update_tensor,
Expand All @@ -111,9 +126,10 @@ def frobenius_norm(m):
frobenius_norm(grad), name='norm', dtype=var.dtype.base_dtype)
lr = coefficients['learning_rate']
lambda_ = coefficients['lambda_']
epsilon = coefficients['epsilon']
var_slice = tf.gather(var, indices)
var_update_value = (
tf.math.multiply(var_slice, lr) - (1 - lr) * lambda_ * grad / norm)
var_update_value = (tf.math.multiply(var_slice, lr) -
(1 - lr) * lambda_ * grad / (norm + epsilon))
var_update_kwargs = {
'resource': var.handle,
'indices': indices,
Expand Down

0 comments on commit 813e88d

Please sign in to comment.