B
    d(                 @   sl   d Z ddlm  mZ ddlmZ ddlmZ ddl	m
Z
 ddlmZ G dd de
jZd	d
 Zdd ZdS )zBase class for attention layers that can be used in sequence DNN/CNN models.

This file follows the terminology of https://arxiv.org/abs/1706.03762 Figure 2.
Attention is formed by three tensors: Query, Key and Value.
    N)logging)backend)
base_layer)control_flow_utilc                   sp   e Zd ZdZd fdd	Z fddZdd Zdd
dZdddZdddZ	dd Z
dd Z fddZ  ZS )BaseDenseAttentionaX  Base Attention class for Dense networks.

    This class is suitable for Dense or CNN networks, and not for RNN networks.

    Implementations of attention mechanisms should inherit from this class, and
    reuse the `apply_attention_scores()` method.

    Args:
      dropout: Float between 0 and 1. Fraction of the units to drop for the
        attention scores.

    Call Args:
      inputs: List of the following tensors:
        * query: Query `Tensor` of shape `[batch_size, Tq, dim]`.
        * value: Value `Tensor` of shape `[batch_size, Tv, dim]`.
        * key: Optional key `Tensor` of shape `[batch_size, Tv, dim]`. If not
          given, will use `value` for both `key` and `value`, which is the most
          common case.
      mask: List of the following tensors:
        * query_mask: A boolean mask `Tensor` of shape `[batch_size, Tq]`. If
          given, the output will be zero at the positions where `mask==False`.
        * value_mask: A boolean mask `Tensor` of shape `[batch_size, Tv]`. If
          given, will apply the mask such that values at positions where
          `mask==False` do not contribute to the result.
      training: Python boolean indicating whether the layer should behave in
        training mode (adding dropout) or in inference mode (no dropout).
      return_attention_scores: bool, if `True`, returns the attention scores
        (after masking and softmax) as an additional output argument.

    Output:

      Attention outputs of shape `[batch_size, Tq, dim]`.
      [Optional] Attention scores after masking and softmax with shape
        `[batch_size, Tq, Tv]`.
            c                s>   d|krt d |dd| _t jf | || _d| _d S )Ncausalzi`causal` argument is deprecated. Please use `use_causal_mask` in call() method to specify causal masking.FT)r   warningpopr   super__init__dropoutZsupports_masking)selfr   kwargs)	__class__ ]/var/www/html/venv/lib/python3.7/site-packages/keras/layers/attention/base_dense_attention.pyr   B   s    zBaseDenseAttention.__init__c                s    | j dkrt | d| _d S )Nr   T)r   r   buildZbuilt)r   input_shape)r   r   r   r   O   s    
zBaseDenseAttention.buildc             C   s   t S )zCalculates attention scores.

        Args:
          query: Query tensor of shape `[batch_size, Tq, dim]`.
          key: Key tensor of shape `[batch_size, Tv, dim]`.

        Returns:
          Tensor of shape `[batch_size, Tq, Tv]`.
        )NotImplementedError)r   querykeyr   r   r   _calculate_scoresV   s    
z$BaseDenseAttention._calculate_scoresNc                s   |dk	rPt |}|jt jkr8|dt j||jd 8 }n|dt j||jd 8 }|dkr`t }t j| j	dkr fdd}t
||fdd	t |fS )
aM  Applies attention scores to the given value tensor.

        To use this method in your attention layer, follow the steps:

        * Use `query` tensor of shape `[batch_size, Tq]` and `key` tensor of
          shape `[batch_size, Tv]` to calculate the attention `scores`.
        * Pass `scores` and `value` tensors to this method. The method applies
          `scores_mask`, calculates `attention_distribution = softmax(scores)`,
          then returns `matmul(attention_distribution, value).
        * Apply `query_mask` and return the result.

        Args:
          scores: Scores float tensor of shape `[batch_size, Tq, Tv]`.
          value: Value tensor of shape `[batch_size, Tv, dim]`.
          scores_mask: A boolean mask `Tensor` of shape `[batch_size, 1, Tv]` or
            `[batch_size, Tq, Tv]`. If given, scores at positions where
            `scores_mask==False` do not contribute to the result. It must
            contain at least one `True` value in each line along the last
            dimension.
          training: Python boolean indicating whether the layer should behave in
            training mode (adding dropout) or in inference mode (no dropout).

        Returns:
          Tensor of shape `[batch_size, Tq, dim]`.
          Attention scores after masking and softmax with shape
            `[batch_size, Tq, Tv]`.
        Ng     @)dtypeg    eAr   c                  s    j j jdS )N)Zrate)Z_random_generatorr   r   )r   weightsr   r   dropped_weights   s    z9BaseDenseAttention._apply_scores.<locals>.dropped_weightsc                  s
   t  S )N)tfidentityr   )r   r   r   <lambda>       z2BaseDenseAttention._apply_scores.<locals>.<lambda>)r   Zlogical_notr   Zfloat16castr   Zlearning_phasennZsoftmaxr   r   Z
smart_condmatmul)r   scoresvaluescores_masktrainingZpadding_maskr   r   )r   r   r   _apply_scoresb   s    

z BaseDenseAttention._apply_scoresFc             C   s"  | j ||d |d }|d }t|dkr2|d n|}|rB|d nd }	|rR|d nd }
| j||d}|
d k	rztj|
dd}
| js|rt|}tjt|d d |dd  gdd}t	|}nd }t
|
|}| j||||d\}}|	d k	rtj|	d	d}	|tj|	|jd
9 }|r||fS |S )N)inputsmaskr         )r   r   )axis)r#   r$   r%   r&   )r   )_validate_call_argslenr   r   Zexpand_dimsr   shapeconcatZ	ones_like_lower_triangular_mask_merge_masksr'   r    r   )r   r(   r)   r&   Zreturn_attention_scoresZuse_causal_maskqvkq_maskZv_maskr#   Zscores_shapeZcausal_mask_shapeZcausal_maskr%   resultZattention_scoresr   r   r   call   s0    

$


zBaseDenseAttention.callc             C   s4   | j ||d |r0|d }|d kr&d S t|S d S )N)r(   r)   r   )r/   r   Zconvert_to_tensor)r   r(   r)   r8   r   r   r   compute_mask   s    
zBaseDenseAttention.compute_maskc             C   s   t |d S )Nr   )r   ZTensorShape)r   r   r   r   r   compute_output_shape   s    z'BaseDenseAttention.compute_output_shapec             C   s   | j j}t|ts&t| d| dt|dk s>t|dkrVt| dt| d|rt|tsxt| d| dt|dk st|t|krt| dt| ddS )	z'Validates arguments of the call method.zc layer must be called on a list of inputs, namely [query, value] or [query, value, key]. Received: .r+      zl layer accepts inputs list of length 2 or 3, namely [query, value] or [query, value, key]. Received length: zG layer mask must be a list, namely [query_mask, value_mask]. Received: zZ layer mask must be a list of length 2, namely [query_mask, value_mask]. Received length: N)r   __name__
isinstancelist
ValueErrorr0   )r   r(   r)   
class_namer   r   r   r/      s    

z&BaseDenseAttention._validate_call_argsc                s0   d| j i}t  }tt| t|  S )Nr   )r   r   
get_configdictrA   items)r   configZbase_config)r   r   r   rD      s    

zBaseDenseAttention.get_config)r   )NN)NNFF)N)r?   
__module____qualname____doc__r   r   r   r'   r:   r;   r<   r/   rD   __classcell__r   r   )r   r   r      s   #
8   
$
	r   c             C   s@   t jt j| t jddd}t jt j| t jddd}t ||S )zCCreates a lower-triangular boolean mask over the last 2 dimensions.)r1   r   r,   )r-   r.   )r   ZcumsumZonesZint32Zgreater_equal)r1   Z	row_indexZ	col_indexr   r   r   r3      s    r3   c             C   s$   | d kr|S |d kr| S t | |S )N)r   logical_and)xyr   r   r   r4      s
    r4   )rJ   Ztensorflow.compat.v2compatZv2r   Zabslr   Zkerasr   Zkeras.enginer   Zkeras.utilsr   ZBaseRandomLayerr   r3   r4   r   r   r   r   <module>   s    W