B
    ӻd*                 @   sj   d Z ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddl	m
Z
 e
dG d	d
 d
ejZdS )z'Ftrl-proximal optimizer implementation.    )optimizer_v2)	array_ops)init_ops)math_ops)gen_training_ops)keras_exportzkeras.optimizers.Ftrlc                   sV   e Zd ZdZd fdd	Zdd	 Z fd
dZdddZdddZ fddZ	  Z
S )Ftrla
  Optimizer that implements the FTRL algorithm.

  "Follow The Regularized Leader" (FTRL) is an optimization algorithm developed
  at Google for click-through rate prediction in the early 2010s. It is most
  suitable for shallow models with large and sparse feature spaces.
  The algorithm is described by
  [McMahan et al., 2013](https://research.google.com/pubs/archive/41159.pdf).
  The Keras version has support for both online L2 regularization
  (the L2 regularization described in the paper
  above) and shrinkage-type L2 regularization
  (which is the addition of an L2 penalty to the loss function).

  Initialization:

  ```python
  n = 0
  sigma = 0
  z = 0
  ```

  Update rule for one variable `w`:

  ```python
  prev_n = n
  n = n + g ** 2
  sigma = (sqrt(n) - sqrt(prev_n)) / lr
  z = z + g - sigma * w
  if abs(z) < lambda_1:
    w = 0
  else:
    w = (sgn(z) * lambda_1 - z) / ((beta + sqrt(n)) / alpha + lambda_2)
  ```

  Notation:

  - `lr` is the learning rate
  - `g` is the gradient for the variable
  - `lambda_1` is the L1 regularization strength
  - `lambda_2` is the L2 regularization strength

  Check the documentation for the `l2_shrinkage_regularization_strength`
  parameter for more details when shrinkage is enabled, in which case gradient
  is replaced with a gradient with shrinkage.

  Args:
    learning_rate: A `Tensor`, floating point value, or a schedule that is a
      `tf.keras.optimizers.schedules.LearningRateSchedule`. The learning rate.
    learning_rate_power: A float value, must be less or equal to zero.
      Controls how the learning rate decreases during training. Use zero for
      a fixed learning rate.
    initial_accumulator_value: The starting value for accumulators.
      Only zero or positive values are allowed.
    l1_regularization_strength: A float value, must be greater than or
      equal to zero. Defaults to 0.0.
    l2_regularization_strength: A float value, must be greater than or
      equal to zero. Defaults to 0.0.
    name: Optional name prefix for the operations created when applying
      gradients.  Defaults to `"Ftrl"`.
    l2_shrinkage_regularization_strength: A float value, must be greater than
      or equal to zero. This differs from L2 above in that the L2 above is a
      stabilization penalty, whereas this L2 shrinkage is a magnitude penalty.
      When input is sparse shrinkage will only happen on the active weights.
    beta: A float value, representing the beta value from the paper.
      Defaults to 0.0.
    **kwargs: Keyword arguments. Allowed to be one of
      `"clipnorm"` or `"clipvalue"`.
      `"clipnorm"` (float) clips gradients by norm; `"clipvalue"` (float) clips
      gradients by value.

  Reference:
    - [McMahan et al., 2013](
      https://research.google.com/pubs/archive/41159.pdf)
  MbP?      皙?        c	       
         s   t t| j|f|	 |dk r(td| |dkr<td| |dk rPtd| |dk rdtd| |dk rxtd| | d| | d| j | d	| | d
| | d| | d| || _|| _d S )Ng        z9initial_accumulator_value %f needs to be positive or zeroz3learning_rate_power %f needs to be negative or zeroz:l1_regularization_strength %f needs to be positive or zeroz:l2_regularization_strength %f needs to be positive or zerozDl2_shrinkage_regularization_strength %f needs to be positive or zerolearning_ratedecaylearning_rate_powerl1_regularization_strengthl2_regularization_strengthbeta)superr   __init__
ValueErrorZ
_set_hyper_initial_decay_initial_accumulator_value%_l2_shrinkage_regularization_strength)
selfr   r   initial_accumulator_valuer   r   name$l2_shrinkage_regularization_strengthr   kwargs)	__class__ [/var/www/html/venv/lib/python3.7/site-packages/tensorflow/python/keras/optimizer_v2/ftrl.pyr   f   s8    
zFtrl.__init__c             C   sD   x>|D ]6}|j j}tj| j|d}| |d| | |d qW d S )N)dtypeaccumulatorlinear)r!   
base_dtyper   Zconstant_initializerr   Zadd_slot)r   Zvar_listvarr!   initr   r   r    _create_slots   s    
zFtrl._create_slotsc                sz   t t| ||| |||f tt| d|t| d|t| d|t| d|t	| j
|d d S )Nr   r   r   r   )r   r   r   r   r   )r   r   _prepare_localupdatedictr   identityZ
_get_hyperr   castr   )r   
var_device	var_dtypeapply_state)r   r   r    r(      s    zFtrl._prepare_localNc       
      C   s   |j |jj }}|pi ||fp,| ||}|d |d d|d    }| |d}| |d}	| jdkrtj|j	|j	|	j	||d |d ||d	 | j
d
	S tj|j	|j	|	j	||d |d ||d |d	 | j
d
S d S )Nr   r   g       @lr_tr"   r#   g        r   r   )	r%   accumr#   gradlrl1l2lr_poweruse_lockingr   )
r%   r1   r#   r2   r3   r4   r5   l2_shrinkager6   r7   )devicer!   r$   get_fallback_apply_stateget_slotr   r   ZResourceApplyFtrlhandle_use_lockingZResourceApplyFtrlV2)
r   r2   r%   r/   r-   r.   coefficients#adjusted_l2_regularization_strengthr1   r#   r   r   r    _resource_apply_dense   s:    

zFtrl._resource_apply_densec             C   s   |j |jj }}|pi ||fp,| ||}|d |d d|d    }| |d}	| |d}
| jdkrtj|j	|	j	|
j	|||d |d ||d	 | j
d

S tj|j	|	j	|
j	|||d |d ||d |d	 | j
dS d S )Nr   r   g       @r0   r"   r#   g        r   r   )
r%   r1   r#   r2   indicesr3   r4   r5   r6   r7   r   )r%   r1   r#   r2   rB   r3   r4   r5   r8   r6   r7   )r9   r!   r$   r:   r;   r<   r   r   ZResourceSparseApplyFtrlr=   r>   ZResourceSparseApplyFtrlV2)r   r2   r%   rB   r/   r-   r.   r?   r@   r1   r#   r   r   r    _resource_apply_sparse   s>    

zFtrl._resource_apply_sparsec                sR   t t|  }|| d| j| j| d| d| d| d| jd |S )Nr   r   r   r   r   )r   r   r   r   r   r   r   r   )r   r   
get_configr)   Z_serialize_hyperparameterr   r   r   )r   config)r   r   r    rD      s    zFtrl.get_config)r	   r
   r   r   r   r   r   r   )N)N)__name__
__module____qualname____doc__r   r'   r(   rA   rC   rD   __classcell__r   r   )r   r    r      s   J       "	
&
(r   N)rI   Z$tensorflow.python.keras.optimizer_v2r   Ztensorflow.python.opsr   r   r   Ztensorflow.python.trainingr   Z tensorflow.python.util.tf_exportr   ZOptimizerV2r   r   r   r   r    <module>   s   