B
    ӻd;                 @   s   d Z ddlmZ ddlmZ ddlmZ ddlmZ ddl	m
Z
 ddl	mZ ddl	mZ dd	lmZ dd
lmZ edg dG dd deZd!ddZd"ddZdd Zdd ZejfddZedg ddejdfddZdd Zdd  ZdS )#z$Utilities related to loss functions.    )distribution_strategy_context)ops)backend)keras_tensor)	array_ops)control_flow_ops)math_ops)ragged_tensor)keras_exportzkeras.losses.Reduction)Zv1c               @   s8   e Zd ZdZdZdZdZdZedd Z	edd	 Z
d
S )ReductionV2ai  Types of loss reduction.

  Contains the following values:

  * `AUTO`: Indicates that the reduction option will be determined by the usage
     context. For almost all cases this defaults to `SUM_OVER_BATCH_SIZE`. When
     used with `tf.distribute.Strategy`, outside of built-in training loops such
     as `tf.keras` `compile` and `fit`, we expect reduction value to be
     `SUM` or `NONE`. Using `AUTO` in that case will raise an error.
  * `NONE`: No **additional** reduction is applied to the output of the wrapped
     loss function. When non-scalar losses are returned to Keras functions like
     `fit`/`evaluate`, the unreduced vector loss is passed to the optimizer
     but the reported loss will be a scalar value.

     Caution: **Verify the shape of the outputs when using** `Reduction.NONE`.
     The builtin loss functions wrapped by the loss classes reduce
     one dimension (`axis=-1`, or `axis` if specified by loss function).
     `Reduction.NONE` just means that no **additional** reduction is applied by
     the class wrapper. For categorical losses with an example input shape of
     `[batch, W, H, n_classes]` the `n_classes` dimension is reduced. For
     pointwise losses your must include a dummy axis so that `[batch, W, H, 1]`
     is reduced to `[batch, W, H]`. Without the dummy axis `[batch, W, H]`
     will be incorrectly reduced to `[batch, W]`.

  * `SUM`: Scalar sum of weighted losses.
  * `SUM_OVER_BATCH_SIZE`: Scalar `SUM` divided by number of elements in losses.
     This reduction type is not supported when used with
     `tf.distribute.Strategy` outside of built-in training loops like `tf.keras`
     `compile`/`fit`.

     You can implement 'SUM_OVER_BATCH_SIZE' using global batch size like:
     ```
     with strategy.scope():
       loss_obj = tf.keras.losses.CategoricalCrossentropy(
           reduction=tf.keras.losses.Reduction.NONE)
       ....
       loss = tf.reduce_sum(loss_obj(labels, predictions)) *
           (1. / global_batch_size)
     ```

  Please see the [custom training guide](
  https://www.tensorflow.org/tutorials/distribute/custom_training) for more
  details on this.
  autononesumZsum_over_batch_sizec             C   s   | j | j| j| jfS )N)AUTONONESUMSUM_OVER_BATCH_SIZE)cls r   \/var/www/html/venv/lib/python3.7/site-packages/tensorflow/python/keras/utils/losses_utils.pyallQ   s    zReductionV2.allc             C   s   ||   krtd| d S )NzInvalid Reduction Key %s.)r   
ValueError)r   keyr   r   r   validateU   s    zReductionV2.validateN)__name__
__module____qualname____doc__r   r   r   r   classmethodr   r   r   r   r   r   r      s   -r   Nc       	   	      s|  t |p
db ttjs(tt tjs>t  j}|j} j}|j}|dk	r|dk	r|| }||d kr|j	d 
drtdgn*||d kr|j	d 
drt dg  fS tt  }|dks|j	d 
dr$tt|d |fddfdd|dks@|j	d 
drjtt|d | fdd fd	d  fS Q R X dS )
a$  Squeeze last dim if ranks differ from expected by exactly 1.

  In the common case where we expect shapes to match, `expected_rank_diff`
  defaults to 0, and we squeeze the last dimension of the larger rank if they
  differ by 1.

  But, for example, if `labels` contains class IDs and `predictions` contains 1
  probability per class, we expect `predictions` to have 1 more dimension than
  `labels`, so `expected_rank_diff` would be 1. In this case, we'd squeeze
  `labels` if `rank(predictions) - rank(labels) == 0`, and
  `predictions` if `rank(predictions) - rank(labels) == 2`.

  This will use static shape if available. Otherwise, it will add graph
  operations, which could result in a performance hit.

  Args:
    labels: Label values, a `Tensor` whose dimensions match `predictions`.
    predictions: Predicted values, a `Tensor` of arbitrary dimensions.
    expected_rank_diff: Expected result of `rank(predictions) - rank(labels)`.
    name: Name of the op.

  Returns:
    Tuple of `labels` and `predictions`, possibly with last dim squeezed.
  remove_squeezable_dimensionsN   c                  s   t  dgS )Nr!   )r   squeezer   )predictionsr   r   <lambda>       z.remove_squeezable_dimensions.<locals>.<lambda>c                  s    S )Nr   r   )r#   r   r   r$      r%   c                  s   t  dgS )Nr!   )r   r"   r   )labelsr   r   r$      r%   c                  s    S )Nr   r   )r&   r   r   r$      r%   )r   
name_scope
isinstancer	   RaggedTensorr   "convert_to_tensor_v2_with_dispatchshapendimsZdimsZis_compatible_withr   r"   rankr   condr   equal)	r&   r#   Zexpected_rank_diffnameZpredictions_shapeZpredictions_rankZlabels_shapeZlabels_rank	rank_diffr   )r&   r#   r   r   [   s@    




r   c                s  j }|j}dk	rj }|j}|dk	rX|dk	rX|| dksH|d dkrt\ndtt fddtdt d fdd}ttd|\dkr̈fS j }|j}	|	dkrfS |dk	rB|	dk	rB|	| dkrt	dgn||	 dkr8t
dgfS t}
|
t fddfd	d
  fdd}tt|
dfdd|fS )a  Squeeze or expand last dimension if needed.

  1. Squeezes last dim of `y_pred` or `y_true` if their rank differs by 1
  (using `remove_squeezable_dimensions`).
  2. Squeezes or expands last dim of `sample_weight` if its rank differs by 1
  from the new rank of `y_pred`.
  If `sample_weight` is scalar, it is kept scalar.

  This will use static shape if available. Otherwise, it will add graph
  operations, which could result in a performance hit.

  Args:
    y_pred: Predicted values, a `Tensor` of arbitrary dimensions.
    y_true: Optional label `Tensor` whose dimensions match `y_pred`.
    sample_weight: Optional weight scalar or `Tensor` whose dimensions match
      `y_pred`.

  Returns:
    Tuple of `y_pred`, `y_true` and `sample_weight`. Each of them possibly has
    the last dimension squeezed,
    `sample_weight` could be extended by one dimension.
    If `sample_weight` is None, (y_pred, y_true) is returned.
  Nr    r!   c                  s
   t  S )N)r   r   )y_predy_truer   r   r$      s   z.squeeze_or_expand_dimensions.<locals>.<lambda>c                  s   t  fddS )Nc                  s    fS )Nr   r   )r2   r3   r   r   r$      r%   z@squeeze_or_expand_dimensions.<locals>.<lambda>.<locals>.<lambda>)r   r.   r   )is_last_dim_1squeeze_dimsr2   r3   r   r   r$      s   r   c                  s   t  dgS )Nr!   )r   r"   r   )sample_weightr   r   r$      r%   c                 s*   fdd} t t d| fddS )Nc                  s   t  dgS )Nr!   )r   expand_dimsr   )r6   r   r   r$      r%   zMsqueeze_or_expand_dimensions.<locals>._maybe_expand_weights.<locals>.<lambda>r!   c                  s    S )Nr   r   )r6   r   r   r$      r%   )r   r.   r   r/   )Zexpand_weights)r1   r6   r   r   _maybe_expand_weights   s    z;squeeze_or_expand_dimensions.<locals>._maybe_expand_weightsc                  s   t td S )Nr    )r   r.   r   r/   r   )r8   maybe_squeeze_weightsr1   r   r   _maybe_adjust_weights   s    z;squeeze_or_expand_dimensions.<locals>._maybe_adjust_weightsc                  s    S )Nr   r   )r6   r   r   r$      r%   )r+   r,   r   r   r-   r   r/   r   r.   r"   r7   )r2   r3   r6   Zy_pred_shapeZy_pred_rankZy_true_shapeZy_true_rankZmaybe_squeeze_dimsZweights_shapeZweights_rankZweights_rank_tensorr:   r   )r8   r4   r9   r1   r6   r5   r2   r3   r   squeeze_or_expand_dimensions   sH    


r;   c             C   s   t | }t j||ddS )a,  Computes a safe mean of the losses.

  Args:
    losses: `Tensor` whose elements contain individual loss measurements.
    num_present: The number of measurable elements in `losses`.

  Returns:
    A scalar representing the mean of `losses`. If `num_present` is zero,
      then zero is returned.
  value)r0   )r   
reduce_sumZ
div_no_nan)lossesZnum_presentZ
total_lossr   r   r   
_safe_mean   s    
r?   c          	   C   s0   t d}tjtj| |d| jdS Q R X dS )z3Computes the number of elements in `losses` tensor.Znum_elements)r0   )dtypeN)r   r'   r   castr   sizer@   )r>   scoper   r   r   _num_elements  s    rD   c             C   s6   |t jkr| }n"t| }|t jkr2t|t| }|S )z2Reduces the individual weighted loss measurements.)r   r   r   r=   r   r?   rD   )weighted_losses	reductionlossr   r   r   reduce_weighted_loss  s    


rH   z/keras.__internal__.losses.compute_weighted_lossc          	   C   s   t | |t jkrt j}|dkr&d}t|p0d |t _t	| t
jtjfs\t| } | j}t	|t
jsxt|}t| d} t|d}t| d|\} }}t| |}t||}t||}|S Q R X dS )a  Computes the weighted loss.

  Args:
    losses: `Tensor` of shape `[batch_size, d1, ... dN]`.
    sample_weight: Optional `Tensor` whose rank is either 0, or the same rank as
      `losses`, or be broadcastable to `losses`.
    reduction: (Optional) Type of `tf.keras.losses.Reduction` to apply to loss.
      Default value is `SUM_OVER_BATCH_SIZE`.
    name: Optional name for the op.

  Raises:
    ValueError: If the shape of `sample_weight` is not compatible with `losses`.

  Returns:
    Weighted loss `Tensor` of the same type as `losses`. If `reduction` is
    `NONE`, this has the same shape as `losses`; otherwise, it is scalar.
  Ng      ?Zweighted_lossfloat32)r   r   r   r   r   r'   r   Zget_default_graphZ_last_loss_reductionr(   r   ZKerasTensorr	   r)   r*   r@   r   rA   r;   multiplyrH   )r>   r6   rF   r0   Zinput_dtype_rE   rG   r   r   r   compute_weighted_loss  s*    





rL   c             C   s"   t  j}|dkr| d| 9 } | S )zBScales and returns the given loss value by the number of replicas.r    g      ?)r   Zget_strategyZnum_replicas_in_sync)Z
loss_valueZnum_replicasr   r   r   scale_loss_for_distributionO  s    
rM   c                st   d xT| D ]L}|j jrJ dks,|j j jkr4|j  n|j  hddhkrJd |j jr
| S q
W  rp fdd| D } | S )ab  Cast a list of losses to a common dtype.

  If any loss is floating-point, they will all be casted to the most-precise
  floating-point loss. Otherwise the losses are not casted. We also skip casting
  losses if there are any complex losses.

  Args:
    losses: A list of losses.

  Returns:
    `losses`, but they have been casted to a common dtype.
  NZbfloat16Zfloat16rI   c                s   g | ]}t | qS r   )r   rA   ).0rG   )highest_floatr   r   
<listcomp>o  s    z/cast_losses_to_common_dtype.<locals>.<listcomp>)r@   Zis_floatingrB   Z
is_complex)r>   rG   r   )rO   r   cast_losses_to_common_dtypeX  s    
rQ   )r   N)NN)r   Ztensorflow.python.distributer   Ztensorflow.python.frameworkr   Ztensorflow.python.kerasr   Ztensorflow.python.keras.enginer   Ztensorflow.python.opsr   r   r   Ztensorflow.python.ops.raggedr	   Z tensorflow.python.util.tf_exportr
   objectr   r   r;   r?   rD   r   rH   rL   rM   rQ   r   r   r   r   <module>   s,   >
>
Y
7	