B
    ӻd                @   s  d Z ddlZddlZddlZddlmZ ddlm	Z	 ddl
mZ ddl
mZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlm Z  ddlm!Z! ddl"m#Z$ ddl%m&Z' ddl(m)Z) ddl*m+Z+ ddl,m-Z- dZ.e+dG dd deZ/e+dG d d! d!eZ0e+d"G d#d$ d$eZ1e-j2G d%d& d&e3Z4e+d'G d(d) d)e4eZ5e+d*G d+d, d,e0Z6e+d-gd.G d/d0 d0e4eZ7e+d1gd.G d2d3 d3e0Z8e+d4gd.G d5d6 d6e4eZ9e+d7G d8d9 d9e9Z:e+d:gd.G d;d< d<e0Z;dLd>d?Z<d@dA Z=dBdC Z>dDdE Z?dFdG Z@dHdI ZAdJdK ZBdS )Mz(Recurrent layers and their base classes.    N)distribution_strategy_context)context)ops)tensor_shape)activations)backend)constraints)initializers)regularizers)Layer)	InputSpec)layer_serialization)control_flow_util)generic_utils)tf_utils)	array_ops)control_flow_ops)math_ops)	state_ops)
tf_logging)base)nest)keras_export)doc_controlszbRNN `implementation=2` is not supported when `recurrent_dropout` is set. Using `implementation=1`.zkeras.layers.StackedRNNCellsc                   st   e Zd ZdZ fddZedd Zedd Zdd	d
ZdddZ	e
jdd Z fddZedddZ  ZS )StackedRNNCellsaM  Wrapper allowing a stack of RNN cells to behave as a single cell.

  Used to implement efficient stacked RNNs.

  Args:
    cells: List of RNN cell instances.

  Examples:

  ```python
  batch_size = 3
  sentence_max_length = 5
  n_features = 2
  new_shape = (batch_size, sentence_max_length, n_features)
  x = tf.constant(np.reshape(np.arange(30), new_shape), dtype = tf.float32)

  rnn_cells = [tf.keras.layers.LSTMCell(128) for _ in range(2)]
  stacked_lstm = tf.keras.layers.StackedRNNCells(rnn_cells)
  lstm_layer = tf.keras.layers.RNN(stacked_lstm)

  result = lstm_layer(x)
  ```
  c                st   x8|D ]0}dt |kr td|dt |krtd|qW || _|dd| _| jr^td tt| j	f | d S )Ncallz4All cells must have a `call` method. received cells:
state_sizez=All cells must have a `state_size` attribute. received cells:reverse_state_orderFzreverse_state_order=True in StackedRNNCells will soon be deprecated. Please update the code to work with the natural order of states if you rely on the RNN states, eg RNN(return_state=True).)
dir
ValueErrorcellspopr   loggingwarningsuperr   __init__)selfr    kwargscell)	__class__ Z/var/www/html/venv/lib/python3.7/site-packages/tensorflow/python/keras/layers/recurrent.pyr%   Q   s    


zStackedRNNCells.__init__c             C   s*   t dd | jr| jd d d n| jD S )Nc             s   s   | ]}|j V  qd S )N)r   ).0cr*   r*   r+   	<genexpr>i   s    z-StackedRNNCells.state_size.<locals>.<genexpr>)tupler   r    )r&   r*   r*   r+   r   g   s    zStackedRNNCells.state_sizec             C   sR   t | jd dd d k	r"| jd jS t| jd jrB| jd jd S | jd jS d S )Nr/   output_sizer   )getattrr    r1   _is_multiple_stater   )r&   r*   r*   r+   r1   l   s
    zStackedRNNCells.output_sizeNc             C   sl   g }x^| j r| jd d d n| jD ]>}t|dd }|rL|||||d q"|t|||| q"W t|S )Nr/   get_initial_state)inputs
batch_sizedtype)r   r    r2   append$_generate_zero_filled_state_for_cellr0   )r&   r5   r6   r7   Zinitial_statesr(   get_initial_state_fnr*   r*   r+   r4   u   s    "z!StackedRNNCells.get_initial_statec             K   s   | j r| jd d d n| j}t|t|}g }xt| j|D ]\}	}t|rV|n|g}t|	dd d k	}
t	|dkr|
r|d n|}t
|	jdr||d< n|dd  t|	r|	jn|	j}t
|	jdr|||fd|i|\}}n|||f|\}}|| q@W |t|t|fS )Nr/   _is_tf_rnn_cell   r   training	constants)r   r   r   pack_sequence_asflattenzipr    	is_nestedr2   lenr   has_argr   r!   callable__call__r8   )r&   r5   statesr>   r=   r'   r   Znested_statesZnew_nested_statesr(   is_tf_rnn_cellcell_call_fnr*   r*   r+   r      s$    

zStackedRNNCells.callc          
   C   s   t |tr|d }x| jD ]}t |trV|jsVt|j || d|_W d Q R X t	|dd d k	rn|j
}nt|jr|jd }n|j}t|d gt|  }qW d| _d S )Nr   Tr1   )
isinstancelistr    r   builtr   
name_scopenamebuildr2   r1   r3   r   r0   r   TensorShapeas_list)r&   input_shaper(   
output_dimr*   r*   r+   rO      s    



zStackedRNNCells.buildc                sV   g }x| j D ]}|t| qW d|i}tt|  }tt|	 t|	  S )Nr    )
r    r8   r   serialize_keras_objectr$   r   
get_configdictrK   items)r&   r    r(   configbase_config)r)   r*   r+   rU      s    zStackedRNNCells.get_configc             C   sB   ddl m} g }x$|dD ]}||||d qW | |f|S )Nr   )deserializer    )custom_objects)tensorflow.python.keras.layersrZ   r!   r8   )clsrX   r[   deserialize_layerr    Zcell_configr*   r*   r+   from_config   s    zStackedRNNCells.from_config)NNN)NN)N)__name__
__module____qualname____doc__r%   propertyr   r1   r4   r   r   shape_type_conversionrO   rU   classmethodr_   __classcell__r*   r*   )r)   r+   r   7   s   	

r   zkeras.layers.RNNc                   s   e Zd ZdZd' fdd	Ze fddZedd Zeje	j
d	d Zd
d Zdd Zdd Zedd Zejdd Zd( fdd	Zd)ddZdd Zdd Zdd Zd*dd Z fd!d"Zed+d#d$Zed%d& Z  ZS ),RNNa#  Base class for recurrent layers.

  See [the Keras RNN API guide](https://www.tensorflow.org/guide/keras/rnn)
  for details about the usage of RNN API.

  Args:
    cell: A RNN cell instance or a list of RNN cell instances.
      A RNN cell is a class that has:
      - A `call(input_at_t, states_at_t)` method, returning
        `(output_at_t, states_at_t_plus_1)`. The call method of the
        cell can also take the optional argument `constants`, see
        section "Note on passing external constants" below.
      - A `state_size` attribute. This can be a single integer
        (single state) in which case it is the size of the recurrent
        state. This can also be a list/tuple of integers (one size per state).
        The `state_size` can also be TensorShape or tuple/list of
        TensorShape, to represent high dimension state.
      - A `output_size` attribute. This can be a single integer or a
        TensorShape, which represent the shape of the output. For backward
        compatible reason, if this attribute is not available for the
        cell, the value will be inferred by the first element of the
        `state_size`.
      - A `get_initial_state(inputs=None, batch_size=None, dtype=None)`
        method that creates a tensor meant to be fed to `call()` as the
        initial state, if the user didn't specify any initial state via other
        means. The returned initial state should have a shape of
        [batch_size, cell.state_size]. The cell might choose to create a
        tensor full of zeros, or full of other values based on the cell's
        implementation.
        `inputs` is the input tensor to the RNN layer, which should
        contain the batch size as its shape[0], and also dtype. Note that
        the shape[0] might be `None` during the graph construction. Either
        the `inputs` or the pair of `batch_size` and `dtype` are provided.
        `batch_size` is a scalar tensor that represents the batch size
        of the inputs. `dtype` is `tf.DType` that represents the dtype of
        the inputs.
        For backward compatibility, if this method is not implemented
        by the cell, the RNN layer will create a zero filled tensor with the
        size of [batch_size, cell.state_size].
      In the case that `cell` is a list of RNN cell instances, the cells
      will be stacked on top of each other in the RNN, resulting in an
      efficient stacked RNN.
    return_sequences: Boolean (default `False`). Whether to return the last
      output in the output sequence, or the full sequence.
    return_state: Boolean (default `False`). Whether to return the last state
      in addition to the output.
    go_backwards: Boolean (default `False`).
      If True, process the input sequence backwards and return the
      reversed sequence.
    stateful: Boolean (default `False`). If True, the last state
      for each sample at index i in a batch will be used as initial
      state for the sample of index i in the following batch.
    unroll: Boolean (default `False`).
      If True, the network will be unrolled, else a symbolic loop will be used.
      Unrolling can speed-up a RNN, although it tends to be more
      memory-intensive. Unrolling is only suitable for short sequences.
    time_major: The shape format of the `inputs` and `outputs` tensors.
      If True, the inputs and outputs will be in shape
      `(timesteps, batch, ...)`, whereas in the False case, it will be
      `(batch, timesteps, ...)`. Using `time_major = True` is a bit more
      efficient because it avoids transposes at the beginning and end of the
      RNN calculation. However, most TensorFlow data is batch-major, so by
      default this function accepts input and emits output in batch-major
      form.
    zero_output_for_mask: Boolean (default `False`).
      Whether the output should use zeros for the masked timesteps. Note that
      this field is only used when `return_sequences` is True and mask is
      provided. It can useful if you want to reuse the raw output sequence of
      the RNN without interference from the masked timesteps, eg, merging
      bidirectional RNNs.

  Call arguments:
    inputs: Input tensor.
    mask: Binary tensor of shape `[batch_size, timesteps]` indicating whether
      a given timestep should be masked. An individual `True` entry indicates
      that the corresponding timestep should be utilized, while a `False`
      entry indicates that the corresponding timestep should be ignored.
    training: Python boolean indicating whether the layer should behave in
      training mode or in inference mode. This argument is passed to the cell
      when calling it. This is for use with cells that use dropout.
    initial_state: List of initial state tensors to be passed to the first
      call of the cell.
    constants: List of constant tensors to be passed to the cell at each
      timestep.

  Input shape:
    N-D tensor with shape `[batch_size, timesteps, ...]` or
    `[timesteps, batch_size, ...]` when time_major is True.

  Output shape:
    - If `return_state`: a list of tensors. The first tensor is
      the output. The remaining tensors are the last states,
      each with shape `[batch_size, state_size]`, where `state_size` could
      be a high dimension tensor shape.
    - If `return_sequences`: N-D tensor with shape
      `[batch_size, timesteps, output_size]`, where `output_size` could
      be a high dimension tensor shape, or
      `[timesteps, batch_size, output_size]` when `time_major` is True.
    - Else, N-D tensor with shape `[batch_size, output_size]`, where
      `output_size` could be a high dimension tensor shape.

  Masking:
    This layer supports masking for input data with a variable number
    of timesteps. To introduce masks to your data,
    use an [tf.keras.layers.Embedding] layer with the `mask_zero` parameter
    set to `True`.

  Note on using statefulness in RNNs:
    You can set RNN layers to be 'stateful', which means that the states
    computed for the samples in one batch will be reused as initial states
    for the samples in the next batch. This assumes a one-to-one mapping
    between samples in different successive batches.

    To enable statefulness:
      - Specify `stateful=True` in the layer constructor.
      - Specify a fixed batch size for your model, by passing
        If sequential model:
          `batch_input_shape=(...)` to the first layer in your model.
        Else for functional model with 1 or more Input layers:
          `batch_shape=(...)` to all the first layers in your model.
        This is the expected shape of your inputs
        *including the batch size*.
        It should be a tuple of integers, e.g. `(32, 10, 100)`.
      - Specify `shuffle=False` when calling `fit()`.

    To reset the states of your model, call `.reset_states()` on either
    a specific layer, or on your entire model.

  Note on specifying the initial state of RNNs:
    You can specify the initial state of RNN layers symbolically by
    calling them with the keyword argument `initial_state`. The value of
    `initial_state` should be a tensor or list of tensors representing
    the initial state of the RNN layer.

    You can specify the initial state of RNN layers numerically by
    calling `reset_states` with the keyword argument `states`. The value of
    `states` should be a numpy array or list of numpy arrays representing
    the initial state of the RNN layer.

  Note on passing external constants to RNNs:
    You can pass "external" constants to the cell using the `constants`
    keyword argument of `RNN.__call__` (as well as `RNN.call`) method. This
    requires that the `cell.call` method accepts the same keyword argument
    `constants`. Such constants can be used to condition the cell
    transformation on additional static inputs (not changing over time),
    a.k.a. an attention mechanism.

  Examples:

  ```python
  # First, let's define a RNN Cell, as a layer subclass.

  class MinimalRNNCell(keras.layers.Layer):

      def __init__(self, units, **kwargs):
          self.units = units
          self.state_size = units
          super(MinimalRNNCell, self).__init__(**kwargs)

      def build(self, input_shape):
          self.kernel = self.add_weight(shape=(input_shape[-1], self.units),
                                        initializer='uniform',
                                        name='kernel')
          self.recurrent_kernel = self.add_weight(
              shape=(self.units, self.units),
              initializer='uniform',
              name='recurrent_kernel')
          self.built = True

      def call(self, inputs, states):
          prev_output = states[0]
          h = backend.dot(inputs, self.kernel)
          output = h + backend.dot(prev_output, self.recurrent_kernel)
          return output, [output]

  # Let's use this cell in a RNN layer:

  cell = MinimalRNNCell(32)
  x = keras.Input((None, 5))
  layer = RNN(cell)
  y = layer(x)

  # Here's how to use the cell to build a stacked RNN:

  cells = [MinimalRNNCell(32), MinimalRNNCell(64)]
  x = keras.Input((None, 5))
  layer = RNN(cells)
  y = layer(x)
  ```
  Fc       
         s   t |ttfrt|}dt|kr,td|dt|kr@td|dd| _d|krd|ksfd	|kr|d	d |dd f}	|	|d< tt	| j
f | || _|| _|| _|| _|| _|| _|| _d
| _d | _d | _d | _d | _d| _|rt rtdd S )Nr   z7`cell` should have a `call` method. The RNN was passed:r   zbThe RNN cell should have an attribute `state_size` (tuple of integers, one integer per RNN state).zero_output_for_maskFrR   	input_diminput_lengthTr   zFRNNs with stateful=True not yet supported with tf.distribute.Strategy.)rJ   rK   r0   r   r   r   r!   ri   r$   rh   r%   r(   return_sequencesreturn_statego_backwardsstatefulunroll
time_majorZsupports_masking
input_spec
state_spec_statesconstants_spec_num_constants
ds_contextZhas_strategy)
r&   r(   rl   rm   rn   ro   rp   rq   r'   rR   )r)   r*   r+   r%     s<    	
zRNN.__init__c                s   | j r
dS tt| jS )NF)rp   r$   rh   !_use_input_spec_as_call_signature)r&   )r)   r*   r+   rx     s    z%RNN._use_input_spec_as_call_signaturec             C   s<   | j d kr6tdd | jj}t| jjr0|S |gS | j S )Nc             S   s   d S )Nr*   )_r*   r*   r+   <lambda>      zRNN.states.<locals>.<lambda>)rt   r   map_structurer(   r   rB   )r&   stater*   r*   r+   rG     s    
z
RNN.statesc             C   s
   || _ d S )N)rt   )r&   rG   r*   r*   r+   rG     s    c          	      s"  t |tr|d }yt|}W n& ttfk
rF   t|d }Y nX |d  |d jrh   t	j
jr~j
j}n
j
jg} fdd}tj
dd d k	rtt|j
j}t|dkr|d n|}n||d }jr fdd}t||}t|t| S |S d S )Nr   r<   c                sV   t |  }jrBjr.t  g| }qRt  g| }nt  g| }|S )N)r   rP   rQ   rl   rq   )Zflat_output_sizerS   output_shape)batchr&   	time_stepr*   r+   _get_output_shape  s    z3RNN.compute_output_shape.<locals>._get_output_shaper1   c                s    gt |   }t |S )N)r   rP   rQ   )Z
flat_statestate_shape)r   r*   r+   _get_state_shape   s    z2RNN.compute_output_shape.<locals>._get_state_shape)rJ   rK   r   rP   r   	TypeErrorr   r@   rq   r3   r(   r   r2   r|   r1   rC   rm   r   to_list)r&   rR   r   r   r~   r   r   r*   )r   r&   r   r+   compute_output_shape  s0    



zRNN.compute_output_shapec             C   sD   t |d }| jr|nd }| jr<dd | jD }|g| S |S d S )Nr   c             S   s   g | ]}d qS )Nr*   )r,   ry   r*   r*   r+   
<listcomp>  s    z$RNN.compute_mask.<locals>.<listcomp>)r   r@   rl   rm   rG   )r&   r5   maskZoutput_maskZ
state_maskr*   r*   r+   compute_mask  s    
zRNN.compute_maskc          	      s|  t |tr|d } fdd} fdd}yt|}W n ttfk
rP   Y nX t|s jd k	rv|| jd< n||g _||}n> jd k	rt	|| jd< nt
t	|| _t	||}t  jtr jjst jj  j| d j_W d Q R X t jjr,t jj}n
 jjg} jd k	rR | j ndd |D  _ jrr   d _d S )	Nr   c                sT   t | tjr|  }nt| } jr(dnd\}} js>d||< d||< tt|dS )z!Convert input shape to InputSpec.)r<   r   )r   r<   N)shape)	rJ   r   rP   rQ   rK   rq   ro   r   r0   )r   Zinput_spec_shapeZbatch_indexZtime_step_index)r&   r*   r+   get_input_spec  s    
z!RNN.build.<locals>.get_input_specc                s@   t | tjrt|  }  jr*| dd  S | d f| dd   S )Nr<   r      )rJ   r   rP   r0   rQ   rq   )r   )r&   r*   r+   get_step_input_shape)  s    z'RNN.build.<locals>.get_step_input_shapeTc             S   s&   g | ]}t d gt|  dqS )N)r   )r   r   rP   rQ   )r,   dimr*   r*   r+   r   X  s   zRNN.build.<locals>.<listcomp>)rJ   rK   r   rP   r   r   r   rB   rr   r|   r   r   r(   r   rL   r   rM   rN   rO   r3   r   rs   _validate_state_specro   reset_states)r&   rR   r   r   Zstep_input_shaper   r*   )r&   r+   rO     s@    
	





z	RNN.buildc             C   sx   t d|| }t| }t|}t|t|kr8|x:t||D ],\}}t|jdd 	t|sD|qDW dS )ae  Validate the state spec between the initial_state and the state_size.

    Args:
      cell_state_sizes: list, the `state_size` attribute from the cell.
      init_state_specs: list, the `state_spec` from the initial_state that is
        passed in `call()`.

    Raises:
      ValueError: When initial state spec is not compatible with the state size.
    zAn `initial_state` was passed that is not compatible with `cell.state_size`. Received `state_spec`={}; however `cell.state_size` is {}r<   N)
r   formatr   r@   rC   rA   r   rP   r   Zis_compatible_with)Zcell_state_sizesZinit_state_specsZvalidation_errorZflat_cell_state_sizesZflat_state_specsZcell_state_specZcell_state_sizer*   r*   r+   r   _  s    


zRNN._validate_state_specc             C   s   t | jdd }t|r&t|d }t|}| jr>|d n|d }|j}|r`|d ||d}nt	|| jj
|}t|s|g}t|S )Nr4   r   r<   )r5   r6   r7   )r2   r(   r   rB   r@   r   r   rq   r7   _generate_zero_filled_stater   rK   )r&   r5   r:   rR   r6   r7   Z
init_stater*   r*   r+   r4   }  s    



zRNN.get_initial_stateNc                s  t |||| j\}}}|d kr:|d kr:tt| j|f|S g }g }|d k	rn||7 }tdd || _|| j7 }|d k	r||7 }dd |D | _t	|| _|| j7 }t
|}|rt|d nd}x"|D ]}	t|	|krtdqW |rX|g| }
| jr
| j| }nttdd || }|| _tt| j|
f|}| jd t	|  | _|S |d k	rj||d	< |d k	r|||d
< tt| j|f|S d S )Nc             S   s   t t| dS )N)r   )r   r   	int_shape)sr*   r*   r+   rz     r{   zRNN.__call__.<locals>.<lambda>c             S   s   g | ]}t t|d qS ))r   )r   r   r   )r,   Zconstantr*   r*   r+   r     s    z RNN.__call__.<locals>.<listcomp>r   TzThe initial state or constants of an RNN layer cannot be specified with a mix of Keras tensors and non-Keras tensors (a "Keras tensor" is a tensor that was returned by a Keras layer, or by `Input`)c             S   s   d S )Nr*   )ry   r*   r*   r+   rz     r{   initial_stater>   )_standardize_argsrv   r$   rh   rF   r   r|   rs   ru   rC   r@   r   is_keras_tensorr   rL   rr   r   r   )r&   r5   r   r>   r'   Zadditional_inputsZadditional_specsZflat_additional_inputsr   ZtensorZ
full_inputZfull_input_specoutput)r)   r*   r+   rF     sL    







zRNN.__call__c                s2  t |\}}|d k	}|| |||\}}}j tjtrjxjjD ]}| qXW |d k	rt	
|d }t	|rt t	
|d }	n
t |}	jr|	d n|	d }
jr|
d krtdi tjjdr|d< tjdd d k	tjrjjnjj |rTtjjds@td fdd	}n fd
d	}t j||||j|j|d k	r|n|
jjd
\}}}jrdd tt	
jt	
|D }| jrt j|||jd}n|}jr*t|t t!fs|g}nt |}t"|| S |S d S )Nr   r<   a  Cannot unroll a RNN if the time dimension is undefined. 
- If using a Sequential model, specify the time dimension by passing an `input_shape` or `batch_input_shape` argument to your first layer. If your first layer is an Embedding, you can also use the `input_length` argument.
- If using the functional API, specify the time dimension by passing a `shape` or `batch_shape` argument to your Input layer.r=   r;   r>   z#RNN cell does not support constantsc                sn   |j  d  }|d j   }t|dkr8r8|d n|} | |fd|i\}}t|sf|g}||fS )Nr<   r   r>   )rv   rC   r   rB   )r5   rG   r>   r   
new_states)rI   rH   r'   r&   r*   r+   step  s    
zRNN.call.<locals>.stepc                sF   t |dkrr|d n|} | |f\}}t|s>|g}||fS )Nr<   r   )rC   r   rB   )r5   rG   r   r   )rI   rH   r'   r*   r+   r     s
    
)r>   rn   r   rp   rk   rq   ri   c             S   s   g | ]\}}t ||qS r*   )r   Zassign)r,   Z
self_stater}   r*   r*   r+   r   0  s    zRNN.call.<locals>.<listcomp>)rn   )#r   Zconvert_inputs_if_ragged_validate_args_if_ragged_process_inputs_maybe_reset_cell_dropout_maskr(   rJ   r   r    r   r@   rB   r   rq   rp   r   r   rD   r   r2   rE   rF   Zrnnrn   ri   ro   rA   rG   Z
add_updaterl   Zmaybe_convert_to_raggedrm   rK   r0   r   )r&   r5   r   r=   r   r>   Zrow_lengthsis_ragged_inputr(   rR   Z	timestepsr   Zlast_outputoutputsrG   Zupdatesr   r*   )rI   rH   r'   r&   r+   r     sh    


zRNN.callc                s  t |tjjrdt |tsdjs,|dd   n |dj   |j d  }t dkr\d  |d }jr d k	rt	dd t
jD }tj|dkfdd fdddd	 qЈj n d krЈ| t tjkrtd
ttj d tt  d | |fS )Nr<   r   c             S   s   g | ]}t |qS r*   )r   Zcount_nonzero_v2)r,   r   r*   r*   r+   r   Z  s   z'RNN._process_inputs.<locals>.<listcomp>c                  s    j S )N)rG   r*   )r&   r*   r+   rz   ^  r{   z%RNN._process_inputs.<locals>.<lambda>c                  s    S )Nr*   r*   )r   r*   r+   rz   _  r{   T)Ztrue_fnZfalse_fnstrictz
Layer has z states but was passed z initial states.)rJ   collectionsabcSequencer0   rv   rC   ro   r   Zadd_nr   r@   rG   r   Zcondr4   r   str)r&   r5   r   r>   Znon_zero_countr*   )r   r&   r+   r   D  s2    






 zRNN._process_inputsc             C   s6   |sd S |d k	r$t dt| d | jr2t dd S )Nz The mask that was passed in was zs and cannot be applied to RaggedTensor inputs. Please make sure that there is no mask passed in by upstream layers.zThe input received contains RaggedTensors and does not support unrolling. Disable unrolling by passing `unroll=False` in the RNN Layer constructor.)r   r   rp   )r&   r   r   r*   r*   r+   r   l  s    zRNN._validate_args_if_raggedc             C   s   t |tr|  |  d S )N)rJ   DropoutRNNCellMixinreset_dropout_maskreset_recurrent_dropout_mask)r&   r(   r*   r*   r+   r   z  s    
z"RNN._maybe_reset_cell_dropout_maskc          
   C   sJ  | j stdd}| jdk	r2t| jd d j}|dkr@d}n| jrN|d n|d }|sbtdt| jd dkrt	| j
ddrt| j
jd|| jpt d}n tt|| j
j| jpt }ttj|}t| j
j|| _t| jsF| jg| _n@|dkr`xLtt| jt| j
jD ],\}}t|t|gt|   q.W nt| j}t|}	t|	t|krtd| j d	 tt| d
 tt|	 d t| g }
xttt|	|D ]b\}\}}|j|jkr(tdt| d | j d t||f d t|j |
 ||f qW t!|
 dS )a  Reset the recorded states for the stateful RNN layer.

    Can only be used when RNN layer is constructed with `stateful` = `True`.
    Args:
      states: Numpy arrays that contains the value for the initial state, which
        will be feed to cell at the first time step. When the value is None,
        zero filled numpy array will be created based on the cell state size.

    Raises:
      AttributeError: When the RNN layer is not stateful.
      ValueError: When the batch size of the RNN layer is unknown.
      ValueError: When the input numpy array is not compatible with the RNN
        layer state, either size wise or dtype wise.
    zLayer must be stateful.Nr   r<   aI  If a RNN is stateful, it needs to know its batch size. Specify the batch size of your input tensors: 
- If using a Sequential model, specify the batch size by passing a `batch_input_shape` argument to your first layer.
- If using the functional API, specify the batch size by passing a `batch_shape` argument to your Input layer.r4   )r5   r6   r7   zLayer z	 expects z states, but it received z state values. Input received: zState z is incompatible with layer z: expected shape=z, found shape=)"ro   AttributeErrorrr   r   r@   r   rq   r   rG   r2   r(   r4   r7   r   Zfloatxr   r   r|   variabler?   rB   rA   	set_valuenpzerosr   rP   rQ   rC   rN   r   	enumerater8   Zbatch_set_value)r&   rG   Z
spec_shaper6   Zflat_init_state_valuesZflat_states_variablesr}   sizeZflat_statesZflat_input_statesZset_value_tuplesivaluer*   r*   r+   r     sT    




$
86zRNN.reset_statesc                sx   | j | j| j| j| j| jd}| jr.| j|d< | jr>| j|d< t	| j
|d< tt|  }tt| t|  S )N)rl   rm   rn   ro   rp   rq   num_constantsri   r(   )rl   rm   rn   ro   rp   rq   rv   ri   r   rT   r(   r$   rh   rU   rV   rK   rW   )r&   rX   rY   )r)   r*   r+   rU     s    


zRNN.get_configc             C   s@   ddl m} ||d|d}|dd}| |f|}||_|S )Nr   )rZ   r(   )r[   r   )r\   rZ   r!   rv   )r]   rX   r[   r^   r(   r   layerr*   r*   r+   r_     s    zRNN.from_configc             C   s
   t | S )N)r   ZRNNSavedModelSaver)r&   r*   r*   r+   _trackable_saved_model_saver  s    z RNN._trackable_saved_model_saver)FFFFFF)NN)NNNN)N)N)r`   ra   rb   rc   r%   rd   rx   rG   setter	trackable no_automatic_dependency_trackingr   r   rO   staticmethodr   r   Zdo_not_doc_inheritabler4   rF   r   r   r   r   r   rU   rf   r_   r   rg   r*   r*   )r)   r+   rh      s<    @     .
5ID   
h(
Nrh   zkeras.layers.AbstractRNNCellc               @   s:   e Zd ZdZdd Zedd Zedd Zdd	d
ZdS )AbstractRNNCella   Abstract object representing an RNN cell.

  See [the Keras RNN API guide](https://www.tensorflow.org/guide/keras/rnn)
  for details about the usage of RNN API.

  This is the base class for implementing RNN cells with custom behavior.

  Every `RNNCell` must have the properties below and implement `call` with
  the signature `(output, next_state) = call(input, state)`.

  Examples:

  ```python
    class MinimalRNNCell(AbstractRNNCell):

      def __init__(self, units, **kwargs):
        self.units = units
        super(MinimalRNNCell, self).__init__(**kwargs)

      @property
      def state_size(self):
        return self.units

      def build(self, input_shape):
        self.kernel = self.add_weight(shape=(input_shape[-1], self.units),
                                      initializer='uniform',
                                      name='kernel')
        self.recurrent_kernel = self.add_weight(
            shape=(self.units, self.units),
            initializer='uniform',
            name='recurrent_kernel')
        self.built = True

      def call(self, inputs, states):
        prev_output = states[0]
        h = backend.dot(inputs, self.kernel)
        output = h + backend.dot(prev_output, self.recurrent_kernel)
        return output, output
  ```

  This definition of cell differs from the definition used in the literature.
  In the literature, 'cell' refers to an object with a single scalar output.
  This definition refers to a horizontal array of such units.

  An RNN cell, in the most abstract setting, is anything that has
  a state and performs some operation that takes a matrix of inputs.
  This operation results in an output matrix with `self.output_size` columns.
  If `self.state_size` is an integer, this operation also results in a new
  state matrix with `self.state_size` columns.  If `self.state_size` is a
  (possibly nested tuple of) TensorShape object(s), then it should return a
  matching structure of Tensors having shape `[batch_size].concatenate(s)`
  for each `s` in `self.batch_size`.
  c             C   s   t ddS )a  The function that contains the logic for one RNN step calculation.

    Args:
      inputs: the input tensor, which is a slide from the overall RNN input by
        the time dimension (usually the second dimension).
      states: the state tensor from previous step, which has the same shape
        as `(batch, state_size)`. In the case of timestep 0, it will be the
        initial state user specified, or zero filled tensor otherwise.

    Returns:
      A tuple of two tensors:
        1. output tensor for the current timestep, with size `output_size`.
        2. state tensor for next step, which has the shape of `state_size`.
    zAbstract methodN)NotImplementedError)r&   r5   rG   r*   r*   r+   r   %  s    zAbstractRNNCell.callc             C   s   t ddS )zsize(s) of state(s) used by this cell.

    It can be represented by an Integer, a TensorShape or a tuple of Integers
    or TensorShapes.
    zAbstract methodN)r   )r&   r*   r*   r+   r   6  s    zAbstractRNNCell.state_sizec             C   s   t ddS )z>Integer or TensorShape: size of outputs produced by this cell.zAbstract methodN)r   )r&   r*   r*   r+   r1   ?  s    zAbstractRNNCell.output_sizeNc             C   s   t | |||S )N)r9   )r&   r5   r6   r7   r*   r*   r+   r4   D  s    z!AbstractRNNCell.get_initial_state)NNN)	r`   ra   rb   rc   r   rd   r   r1   r4   r*   r*   r*   r+   r     s
   6	r   c                   s~   e Zd ZdZ fddZejdd Zdd Zdd	 Z	dddZ
dddZdddZdddZ fddZ fddZ  ZS )r   a_  Object that hold dropout related fields for RNN Cell.

  This class is not a standalone RNN cell. It suppose to be used with a RNN cell
  by multiple inheritance. Any cell that mix with class should have following
  fields:
    dropout: a float number within range [0, 1). The ratio that the input
      tensor need to dropout.
    recurrent_dropout: a float number within range [0, 1). The ratio that the
      recurrent state weights need to dropout.
  This object will create and cache created dropout masks, and reuse them for
  the incoming data, so that the same mask is used for every batch input.
  c                s   |    tt| j|| d S )N) _create_non_trackable_mask_cacher$   r   r%   )r&   argsr'   )r)   r*   r+   r%   W  s    zDropoutRNNCellMixin.__init__c             C   s    t | j| _t | j| _dS )a  Create the cache for dropout and recurrent dropout mask.

    Note that the following two masks will be used in "graph function" mode,
    e.g. these masks are symbolic tensors. In eager mode, the `eager_*_mask`
    tensors will be generated differently than in the "graph function" case,
    and they will be cached.

    Also note that in graph mode, we still cache those masks only because the
    RNN could be created with `unroll=True`. In that case, the `cell.call()`
    function will be invoked multiple times, and we want to ensure same mask
    is used every time.

    Also the caches are created without tracking. Since they are not picklable
    by python when deepcopy, we don't want `layer._obj_reference_counts_dict`
    to track it by default.
    N)r   ContextValueCache_create_dropout_mask_dropout_mask_cache_create_recurrent_dropout_mask_recurrent_dropout_mask_cache)r&   r*   r*   r+   r   [  s    
z4DropoutRNNCellMixin._create_non_trackable_mask_cachec             C   s   | j   dS )a  Reset the cached dropout masks if any.

    This is important for the RNN layer to invoke this in it `call()` method so
    that the cached mask is cleared before calling the `cell.call()`. The mask
    should be cached across the timestep within the same batch, but shouldn't
    be cached between batches. Otherwise it will introduce unreasonable bias
    against certain index of data within the batch.
    N)r   clear)r&   r*   r*   r+   r   r  s    	z&DropoutRNNCellMixin.reset_dropout_maskc             C   s   | j   dS )a  Reset the cached recurrent dropout masks if any.

    This is important for the RNN layer to invoke this in it call() method so
    that the cached mask is cleared before calling the cell.call(). The mask
    should be cached across the timestep within the same batch, but shouldn't
    be cached between batches. Otherwise it will introduce unreasonable bias
    against certain index of data within the batch.
    N)r   r   )r&   r*   r*   r+   r   }  s    	z0DropoutRNNCellMixin.reset_recurrent_dropout_maskr<   c             C   s   t t|| j||dS )N)r=   count)_generate_dropout_maskr   	ones_likedropout)r&   r5   r=   r   r*   r*   r+   r     s
    z(DropoutRNNCellMixin._create_dropout_maskc             C   s   t t|| j||dS )N)r=   r   )r   r   r   recurrent_dropout)r&   r5   r=   r   r*   r*   r+   r     s
    z2DropoutRNNCellMixin._create_recurrent_dropout_maskc             C   s*   | j dkrdS t|||d}| jj|dS )aw  Get the dropout mask for RNN cell's input.

    It will create mask based on context if there isn't any existing cached
    mask. If a new mask is generated, it will update the cache in the cell.

    Args:
      inputs: The input tensor whose shape will be used to generate dropout
        mask.
      training: Boolean tensor, whether its in training mode, dropout will be
        ignored in non-training mode.
      count: Int, how many dropout mask will be generated. It is useful for cell
        that has internal weights fused together.
    Returns:
      List of mask tensor, generated or cached mask based on context.
    r   N)r5   r=   r   )r'   )r   rV   r   
setdefault)r&   r5   r=   r   init_kwargsr*   r*   r+   get_dropout_mask_for_cell  s    
z-DropoutRNNCellMixin.get_dropout_mask_for_cellc             C   s*   | j dkrdS t|||d}| jj|dS )ay  Get the recurrent dropout mask for RNN cell.

    It will create mask based on context if there isn't any existing cached
    mask. If a new mask is generated, it will update the cache in the cell.

    Args:
      inputs: The input tensor whose shape will be used to generate dropout
        mask.
      training: Boolean tensor, whether its in training mode, dropout will be
        ignored in non-training mode.
      count: Int, how many dropout mask will be generated. It is useful for cell
        that has internal weights fused together.
    Returns:
      List of mask tensor, generated or cached mask based on context.
    r   N)r5   r=   r   )r'   )r   rV   r   r   )r&   r5   r=   r   r   r*   r*   r+   #get_recurrent_dropout_mask_for_cell  s    
z7DropoutRNNCellMixin.get_recurrent_dropout_mask_for_cellc                s*   t t|  }|dd  |dd  |S )Nr   r   )r$   r   __getstate__r!   )r&   r}   )r)   r*   r+   r     s    z DropoutRNNCellMixin.__getstate__c                s4   t | j|d< t | j|d< tt| | d S )Nr   r   )r   r   r   r   r$   r   __setstate__)r&   r}   )r)   r*   r+   r     s
    z DropoutRNNCellMixin.__setstate__)r<   )r<   )r<   )r<   )r`   ra   rb   rc   r%   r   r   r   r   r   r   r   r   r   r   r   rg   r*   r*   )r)   r+   r   H  s   



r   zkeras.layers.SimpleRNNCellc                   sP   e Zd ZdZd fd	d
	Zejdd ZdddZdddZ	 fddZ
  ZS )SimpleRNNCella:  Cell class for SimpleRNN.

  See [the Keras RNN API guide](https://www.tensorflow.org/guide/keras/rnn)
  for details about the usage of RNN API.

  This class processes one step within the whole time sequence input, whereas
  `tf.keras.layer.SimpleRNN` processes the whole sequence.

  Args:
    units: Positive integer, dimensionality of the output space.
    activation: Activation function to use.
      Default: hyperbolic tangent (`tanh`).
      If you pass `None`, no activation is applied
      (ie. "linear" activation: `a(x) = x`).
    use_bias: Boolean, (default `True`), whether the layer uses a bias vector.
    kernel_initializer: Initializer for the `kernel` weights matrix,
      used for the linear transformation of the inputs. Default:
      `glorot_uniform`.
    recurrent_initializer: Initializer for the `recurrent_kernel`
      weights matrix, used for the linear transformation of the recurrent state.
      Default: `orthogonal`.
    bias_initializer: Initializer for the bias vector. Default: `zeros`.
    kernel_regularizer: Regularizer function applied to the `kernel` weights
      matrix. Default: `None`.
    recurrent_regularizer: Regularizer function applied to the
      `recurrent_kernel` weights matrix. Default: `None`.
    bias_regularizer: Regularizer function applied to the bias vector. Default:
      `None`.
    kernel_constraint: Constraint function applied to the `kernel` weights
      matrix. Default: `None`.
    recurrent_constraint: Constraint function applied to the `recurrent_kernel`
      weights matrix. Default: `None`.
    bias_constraint: Constraint function applied to the bias vector. Default:
      `None`.
    dropout: Float between 0 and 1. Fraction of the units to drop for the linear
      transformation of the inputs. Default: 0.
    recurrent_dropout: Float between 0 and 1. Fraction of the units to drop for
      the linear transformation of the recurrent state. Default: 0.

  Call arguments:
    inputs: A 2D tensor, with shape of `[batch, feature]`.
    states: A 2D tensor with shape of `[batch, units]`, which is the state from
      the previous time step. For timestep 0, the initial state provided by user
      will be feed to cell.
    training: Python boolean indicating whether the layer should behave in
      training mode or in inference mode. Only relevant when `dropout` or
      `recurrent_dropout` is used.

  Examples:

  ```python
  inputs = np.random.random([32, 10, 8]).astype(np.float32)
  rnn = tf.keras.layers.RNN(tf.keras.layers.SimpleRNNCell(4))

  output = rnn(inputs)  # The output has shape `[32, 4]`.

  rnn = tf.keras.layers.RNN(
      tf.keras.layers.SimpleRNNCell(4),
      return_sequences=True,
      return_state=True)

  # whole_sequence_output has shape `[32, 10, 4]`.
  # final_state has shape `[32, 4]`.
  whole_sequence_output, final_state = rnn(inputs)
  ```
  tanhTglorot_uniform
orthogonalr   N        c                s  |dk rt d| dt r0|dd| _n|dd| _tt| jf | || _t	
|| _|| _t
|| _t
|| _t
|| _t
|| _t
|| _t
|	| _t
|
| _t
|| _t
|| _tdtd|| _tdtd|| _| j| _| j| _d S )	Nr   zFReceived an invalid value for units, expected a positive integer, got .enable_caching_deviceTFg      ?g        )r   r   #executing_eagerly_outside_functionsr!   _enable_caching_devicer$   r   r%   unitsr   get
activationuse_biasr	   kernel_initializerrecurrent_initializerbias_initializerr
   kernel_regularizerrecurrent_regularizerbias_regularizerr   kernel_constraintrecurrent_constraintbias_constraintminmaxr   r   r   r1   )r&   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r'   )r)   r*   r+   r%     s,    zSimpleRNNCell.__init__c             C   s   t | }| j|d | jfd| j| j| j|d| _| j| j| jfd| j| j| j	|d| _
| jr| j| jfd| j| j| j|d| _nd | _d| _d S )Nr/   kernel)r   rN   initializerregularizer
constraintcaching_devicerecurrent_kernelbiasT)_caching_device
add_weightr   r   r   r   r   r   r   r   r   r   r   r   r   r   rL   )r&   rR   default_caching_devicer*   r*   r+   rO   C  s2    


zSimpleRNNCell.buildc       
      C   s   t |r|d n|}| ||}| ||}|d k	rJt|| | j}nt|| j}| jd k	rpt|| j}|d k	r|| }|t|| j	 }| j
d k	r| 
|}t |r|gn|}	||	fS )Nr   )r   rB   r   r   r   dotr   r   bias_addr   r   )
r&   r5   rG   r=   Zprev_outputdp_maskrec_dp_maskhr   	new_stater*   r*   r+   r   `  s     


zSimpleRNNCell.callc             C   s   t | |||S )N)r9   )r&   r5   r6   r7   r*   r*   r+   r4   v  s    zSimpleRNNCell.get_initial_statec                s   | j t| j| jt| jt| jt| jt	| j
t	| jt	| jt| jt| jt| j| j| jd}|t|  tt|  }tt| t|  S )N)r   r   r   r   r   r   r   r   r   r   r   r   r   r   )r   r   	serializer   r   r	   r   r   r   r
   r   r   r   r   r   r   r   r   r   update!_config_for_enable_caching_devicer$   r   rU   rV   rK   rW   )r&   rX   rY   )r)   r*   r+   rU   y  s"    










zSimpleRNNCell.get_config)r   Tr   r   r   NNNNNNr   r   )N)NNN)r`   ra   rb   rc   r%   r   re   rO   r   r4   rU   rg   r*   r*   )r)   r+   r     s$   C             

r   zkeras.layers.SimpleRNNc                   s   e Zd ZdZd. fd
d	Zd/ fdd	Zedd Zedd Zedd Z	edd Z
edd Zedd Zedd Zedd Zedd Zed d! Zed"d# Zed$d% Zed&d' Zed(d) Z fd*d+Zed,d- Z  ZS )0	SimpleRNNa  Fully-connected RNN where the output is to be fed back to input.

  See [the Keras RNN API guide](https://www.tensorflow.org/guide/keras/rnn)
  for details about the usage of RNN API.

  Args:
    units: Positive integer, dimensionality of the output space.
    activation: Activation function to use.
      Default: hyperbolic tangent (`tanh`).
      If you pass None, no activation is applied
      (ie. "linear" activation: `a(x) = x`).
    use_bias: Boolean, (default `True`), whether the layer uses a bias vector.
    kernel_initializer: Initializer for the `kernel` weights matrix,
      used for the linear transformation of the inputs. Default:
      `glorot_uniform`.
    recurrent_initializer: Initializer for the `recurrent_kernel`
      weights matrix, used for the linear transformation of the recurrent state.
      Default: `orthogonal`.
    bias_initializer: Initializer for the bias vector. Default: `zeros`.
    kernel_regularizer: Regularizer function applied to the `kernel` weights
      matrix. Default: `None`.
    recurrent_regularizer: Regularizer function applied to the
      `recurrent_kernel` weights matrix. Default: `None`.
    bias_regularizer: Regularizer function applied to the bias vector. Default:
      `None`.
    activity_regularizer: Regularizer function applied to the output of the
      layer (its "activation"). Default: `None`.
    kernel_constraint: Constraint function applied to the `kernel` weights
      matrix. Default: `None`.
    recurrent_constraint: Constraint function applied to the `recurrent_kernel`
      weights matrix.  Default: `None`.
    bias_constraint: Constraint function applied to the bias vector. Default:
      `None`.
    dropout: Float between 0 and 1.
      Fraction of the units to drop for the linear transformation of the inputs.
      Default: 0.
    recurrent_dropout: Float between 0 and 1.
      Fraction of the units to drop for the linear transformation of the
      recurrent state. Default: 0.
    return_sequences: Boolean. Whether to return the last output
      in the output sequence, or the full sequence. Default: `False`.
    return_state: Boolean. Whether to return the last state
      in addition to the output. Default: `False`
    go_backwards: Boolean (default False).
      If True, process the input sequence backwards and return the
      reversed sequence.
    stateful: Boolean (default False). If True, the last state
      for each sample at index i in a batch will be used as initial
      state for the sample of index i in the following batch.
    unroll: Boolean (default False).
      If True, the network will be unrolled,
      else a symbolic loop will be used.
      Unrolling can speed-up a RNN,
      although it tends to be more memory-intensive.
      Unrolling is only suitable for short sequences.

  Call arguments:
    inputs: A 3D tensor, with shape `[batch, timesteps, feature]`.
    mask: Binary tensor of shape `[batch, timesteps]` indicating whether
      a given timestep should be masked. An individual `True` entry indicates
      that the corresponding timestep should be utilized, while a `False` entry
      indicates that the corresponding timestep should be ignored.
    training: Python boolean indicating whether the layer should behave in
      training mode or in inference mode. This argument is passed to the cell
      when calling it. This is only relevant if `dropout` or
      `recurrent_dropout` is used.
    initial_state: List of initial state tensors to be passed to the first
      call of the cell.

  Examples:

  ```python
  inputs = np.random.random([32, 10, 8]).astype(np.float32)
  simple_rnn = tf.keras.layers.SimpleRNN(4)

  output = simple_rnn(inputs)  # The output has shape `[32, 4]`.

  simple_rnn = tf.keras.layers.SimpleRNN(
      4, return_sequences=True, return_state=True)

  # whole_sequence_output has shape `[32, 10, 4]`.
  # final_state has shape `[32, 4]`.
  whole_sequence_output, final_state = simple_rnn(inputs)
  ```
  r   Tr   r   r   N        Fc                s   d|kr| d td d|kr4d| di}ni }t|f||||||||	||||||d|ddd|}tt| j|f|||||d| t|
| _	t
d	d
g| _d S )NimplementationzhThe `implementation` argument in `SimpleRNN` has been deprecated. Please remove it from your layer call.r   r7   	trainableT)r   r   r   r   r   r   r   r   r   r   r   r   r   r7   r   )rl   rm   rn   ro   rp      )ndim)r!   r"   r#   r   r   r$   r   r%   r
   activity_regularizerr   rr   )r&   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rl   rm   rn   ro   rp   r'   cell_kwargsr(   )r)   r*   r+   r%     sF    


zSimpleRNN.__init__c                s   t t| j||||dS )N)r   r=   r   )r$   r   r   )r&   r5   r   r=   r   )r)   r*   r+   r   2  s    
zSimpleRNN.callc             C   s   | j jS )N)r(   r   )r&   r*   r*   r+   r   6  s    zSimpleRNN.unitsc             C   s   | j jS )N)r(   r   )r&   r*   r*   r+   r   :  s    zSimpleRNN.activationc             C   s   | j jS )N)r(   r   )r&   r*   r*   r+   r   >  s    zSimpleRNN.use_biasc             C   s   | j jS )N)r(   r   )r&   r*   r*   r+   r   B  s    zSimpleRNN.kernel_initializerc             C   s   | j jS )N)r(   r   )r&   r*   r*   r+   r   F  s    zSimpleRNN.recurrent_initializerc             C   s   | j jS )N)r(   r   )r&   r*   r*   r+   r   J  s    zSimpleRNN.bias_initializerc             C   s   | j jS )N)r(   r   )r&   r*   r*   r+   r   N  s    zSimpleRNN.kernel_regularizerc             C   s   | j jS )N)r(   r   )r&   r*   r*   r+   r   R  s    zSimpleRNN.recurrent_regularizerc             C   s   | j jS )N)r(   r   )r&   r*   r*   r+   r   V  s    zSimpleRNN.bias_regularizerc             C   s   | j jS )N)r(   r   )r&   r*   r*   r+   r   Z  s    zSimpleRNN.kernel_constraintc             C   s   | j jS )N)r(   r   )r&   r*   r*   r+   r   ^  s    zSimpleRNN.recurrent_constraintc             C   s   | j jS )N)r(   r   )r&   r*   r*   r+   r   b  s    zSimpleRNN.bias_constraintc             C   s   | j jS )N)r(   r   )r&   r*   r*   r+   r   f  s    zSimpleRNN.dropoutc             C   s   | j jS )N)r(   r   )r&   r*   r*   r+   r   j  s    zSimpleRNN.recurrent_dropoutc                s   | j t| j| jt| jt| jt| jt	| j
t	| jt	| jt	| jt| jt| jt| j| j| jd}tt|  }|t| j |d= tt| t|  S )N)r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r(   )r   r   r   r   r   r	   r   r   r   r
   r   r   r   r   r   r   r   r   r   r   r$   r   rU   r   r   r(   rV   rK   rW   )r&   rX   rY   )r)   r*   r+   rU   n  s&    











zSimpleRNN.get_configc             C   s   d|kr| d | f |S )Nr   )r!   )r]   rX   r*   r*   r+   r_     s    
zSimpleRNN.from_config)r   Tr   r   r   NNNNNNNr   r   FFFFF)NNN)r`   ra   rb   rc   r%   r   rd   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rU   rf   r_   rg   r*   r*   )r)   r+   r     sJ   V                  )&r   zkeras.layers.GRUCell)Zv1c                   sP   e Zd ZdZd fdd	Zejdd ZdddZ fddZ	dddZ
  ZS )GRUCellaz  Cell class for the GRU layer.

  Args:
    units: Positive integer, dimensionality of the output space.
    activation: Activation function to use.
      Default: hyperbolic tangent (`tanh`).
      If you pass None, no activation is applied
      (ie. "linear" activation: `a(x) = x`).
    recurrent_activation: Activation function to use
      for the recurrent step.
      Default: hard sigmoid (`hard_sigmoid`).
      If you pass `None`, no activation is applied
      (ie. "linear" activation: `a(x) = x`).
    use_bias: Boolean, whether the layer uses a bias vector.
    kernel_initializer: Initializer for the `kernel` weights matrix,
      used for the linear transformation of the inputs.
    recurrent_initializer: Initializer for the `recurrent_kernel`
      weights matrix,
      used for the linear transformation of the recurrent state.
    bias_initializer: Initializer for the bias vector.
    kernel_regularizer: Regularizer function applied to
      the `kernel` weights matrix.
    recurrent_regularizer: Regularizer function applied to
      the `recurrent_kernel` weights matrix.
    bias_regularizer: Regularizer function applied to the bias vector.
    kernel_constraint: Constraint function applied to
      the `kernel` weights matrix.
    recurrent_constraint: Constraint function applied to
      the `recurrent_kernel` weights matrix.
    bias_constraint: Constraint function applied to the bias vector.
    dropout: Float between 0 and 1.
      Fraction of the units to drop for the linear transformation of the inputs.
    recurrent_dropout: Float between 0 and 1.
      Fraction of the units to drop for
      the linear transformation of the recurrent state.
    reset_after: GRU convention (whether to apply reset gate after or
      before matrix multiplication). False = "before" (default),
      True = "after" (CuDNN compatible).

  Call arguments:
    inputs: A 2D tensor.
    states: List of state tensors corresponding to the previous timestep.
    training: Python boolean indicating whether the layer should behave in
      training mode or in inference mode. Only relevant when `dropout` or
      `recurrent_dropout` is used.
  r   hard_sigmoidTr   r   r   N        Fc                sX  |dk rt d| dt r0|dd| _n|dd| _tt| jf | || _t	
|| _t	
|| _|| _t
|| _t
|| _t
|| _t
|| _t
|	| _t
|
| _t
|| _t
|| _t
|| _tdtd|| _tdtd|| _|d	d
}| jdkr8|d
kr8tt  d
| _!n|| _!|| _"| j| _#| j| _$d S )Nr   zFReceived an invalid value for units, expected a positive integer, got r   r   TFg      ?g        r   r<   )%r   r   r   r!   r   r$   r   r%   r   r   r   r   recurrent_activationr   r	   r   r   r   r
   r   r   r   r   r   r   r   r   r   r   r   r"   debugRECURRENT_DROPOUT_WARNING_MSGr   reset_afterr   r1   )r&   r   r   r  r   r   r   r   r   r   r   r   r   r   r   r   r  r'   r   )r)   r*   r+   r%     s:    
zGRUCell.__init__c             C   s   |d }t | }| j|| jd fd| j| j| j|d| _| j| j| jd fd| j| j| j	|d| _
| jr| js|d| j f}ndd| j f}| j|d| j| j| j|d| _nd | _d| _d S )	Nr/   r   r   )r   rN   r   r   r   r   r   r   r   T)r   r   r   r   r   r   r   r   r   r   r   r   r  r   r   r   r   rL   )r&   rR   rj   r   Z
bias_shaper*   r*   r+   rO     s8    

zGRUCell.buildc             C   sd  t |r|d n|}| j||dd}| j||dd}| jr`| jsP| jd  }}nt| j\}}| j	dkrd| j
  k rdk rn n&||d  }	||d  }
||d  }n|}	|}
|}t|	| jd d d | jf }t|
| jd d | j| jd f }t|| jd d | jd d f }| jrvt||d | j }t||| j| jd  }t||| jd d  }d| j  k rdk rn n&||d  }||d  }||d  }n|}|}|}t|| jd d d | jf }t|| jd d | j| jd f }| jrN| jrNt||d | j }t||| j| jd  }| || }| || }| jrt|| jd d | jd d f }| jrt||| jd d  }|| }n(t|| | jd d | jd d f }| || }n6d| j
  k rdk r&n n||d  }t|| j}| jrHt||}tj|ddd	\}}}| jrt|| j}| jrt||}n$t|| jd d d d| j f }tj|| j| jdgdd	\}}}| || }| || }| jr|| }n(t|| | jd d d| j d f }| || }|| d| |  }t |rX|gn|}||fS )
Nr   r   )r   r<   g        g      ?r   r/   )axis)r   rB   r   r   r   r  r   r   Zunstackr   r   r   r   r   r   r   r   r   r  r   split)r&   r5   rG   r=   h_tm1r   r   Z
input_biasZrecurrent_biasZinputs_zZinputs_rZinputs_hZx_zZx_rZx_hZh_tm1_zZh_tm1_rZh_tm1_hZrecurrent_zZrecurrent_rzrZrecurrent_hhhZmatrix_xZmatrix_innerr   r   r*   r*   r+   r   +  s     &$ " 
$ 
$zGRUCell.callc                s   | j t| jt| j| jt| jt| jt| j	t
| jt
| jt
| jt| jt| jt| j| j| j| j| jd}|t|  tt|  }tt| t|  S )N)r   r   r  r   r   r   r   r   r   r   r   r   r   r   r   r   r  )r   r   r   r   r  r   r	   r   r   r   r
   r   r   r   r   r   r   r   r   r   r   r  r   r   r$   r   rU   rV   rK   rW   )r&   rX   rY   )r)   r*   r+   rU     s(    











zGRUCell.get_configc             C   s   t | |||S )N)r9   )r&   r5   r6   r7   r*   r*   r+   r4     s    zGRUCell.get_initial_state)r   r  Tr   r   r   NNNNNNr  r  F)N)NNN)r`   ra   rb   rc   r%   r   re   rO   r   rU   r4   rg   r*   r*   )r)   r+   r     s(   /              )&
ir   zkeras.layers.GRUc                   s  e Zd ZdZd5 fdd	Zd6 fdd	Zedd Zedd Zedd Z	edd Z
edd Zedd Zedd Zedd Zedd  Zed!d" Zed#d$ Zed%d& Zed'd( Zed)d* Zed+d, Zed-d. Zed/d0 Z fd1d2Zed3d4 Z  ZS )7GRUaU  Gated Recurrent Unit - Cho et al. 2014.

  There are two variants. The default one is based on 1406.1078v3 and
  has reset gate applied to hidden state before matrix multiplication. The
  other one is based on original 1406.1078v1 and has the order reversed.

  The second variant is compatible with CuDNNGRU (GPU-only) and allows
  inference on CPU. Thus it has separate biases for `kernel` and
  `recurrent_kernel`. Use `'reset_after'=True` and
  `recurrent_activation='sigmoid'`.

  Args:
    units: Positive integer, dimensionality of the output space.
    activation: Activation function to use.
      Default: hyperbolic tangent (`tanh`).
      If you pass `None`, no activation is applied
      (ie. "linear" activation: `a(x) = x`).
    recurrent_activation: Activation function to use
      for the recurrent step.
      Default: hard sigmoid (`hard_sigmoid`).
      If you pass `None`, no activation is applied
      (ie. "linear" activation: `a(x) = x`).
    use_bias: Boolean, whether the layer uses a bias vector.
    kernel_initializer: Initializer for the `kernel` weights matrix,
      used for the linear transformation of the inputs.
    recurrent_initializer: Initializer for the `recurrent_kernel`
      weights matrix, used for the linear transformation of the recurrent state.
    bias_initializer: Initializer for the bias vector.
    kernel_regularizer: Regularizer function applied to
      the `kernel` weights matrix.
    recurrent_regularizer: Regularizer function applied to
      the `recurrent_kernel` weights matrix.
    bias_regularizer: Regularizer function applied to the bias vector.
    activity_regularizer: Regularizer function applied to
      the output of the layer (its "activation")..
    kernel_constraint: Constraint function applied to
      the `kernel` weights matrix.
    recurrent_constraint: Constraint function applied to
      the `recurrent_kernel` weights matrix.
    bias_constraint: Constraint function applied to the bias vector.
    dropout: Float between 0 and 1.
      Fraction of the units to drop for
      the linear transformation of the inputs.
    recurrent_dropout: Float between 0 and 1.
      Fraction of the units to drop for
      the linear transformation of the recurrent state.
    return_sequences: Boolean. Whether to return the last output
      in the output sequence, or the full sequence.
    return_state: Boolean. Whether to return the last state
      in addition to the output.
    go_backwards: Boolean (default False).
      If True, process the input sequence backwards and return the
      reversed sequence.
    stateful: Boolean (default False). If True, the last state
      for each sample at index i in a batch will be used as initial
      state for the sample of index i in the following batch.
    unroll: Boolean (default False).
      If True, the network will be unrolled,
      else a symbolic loop will be used.
      Unrolling can speed-up a RNN,
      although it tends to be more memory-intensive.
      Unrolling is only suitable for short sequences.
    time_major: The shape format of the `inputs` and `outputs` tensors.
      If True, the inputs and outputs will be in shape
      `(timesteps, batch, ...)`, whereas in the False case, it will be
      `(batch, timesteps, ...)`. Using `time_major = True` is a bit more
      efficient because it avoids transposes at the beginning and end of the
      RNN calculation. However, most TensorFlow data is batch-major, so by
      default this function accepts input and emits output in batch-major
      form.
    reset_after: GRU convention (whether to apply reset gate after or
      before matrix multiplication). False = "before" (default),
      True = "after" (CuDNN compatible).

  Call arguments:
    inputs: A 3D tensor.
    mask: Binary tensor of shape `(samples, timesteps)` indicating whether
      a given timestep should be masked. An individual `True` entry indicates
      that the corresponding timestep should be utilized, while a `False`
      entry indicates that the corresponding timestep should be ignored.
    training: Python boolean indicating whether the layer should behave in
      training mode or in inference mode. This argument is passed to the cell
      when calling it. This is only relevant if `dropout` or
      `recurrent_dropout` is used.
    initial_state: List of initial state tensors to be passed to the first
      call of the cell.
  r   r  Tr   r   r   N        Fc                s   | dd}|dkrtd d|kr6d| di}ni }t|f||||||||	|
||||||||d|ddd	|}tt| j|f|||||d
| t|| _	t
ddg| _d S )Nr   r<   r   zm`implementation=0` has been deprecated, and now defaults to `implementation=1`.Please update your layer call.r   r7   r   T)r   r  r   r   r   r   r   r   r   r   r   r   r   r   r   r  r7   r   )rl   rm   rn   ro   rp   r   )r   )r!   r"   r#   r   r   r$   r  r%   r
   r   r   rr   )r&   r   r   r  r   r   r   r   r   r   r   r   r   r   r   r   r   rl   rm   rn   ro   rp   r  r'   r   r   r(   )r)   r*   r+   r%     sL    

zGRU.__init__c                s   t t| j||||dS )N)r   r=   r   )r$   r  r   )r&   r5   r   r=   r   )r)   r*   r+   r   P  s    
zGRU.callc             C   s   | j jS )N)r(   r   )r&   r*   r*   r+   r   T  s    z	GRU.unitsc             C   s   | j jS )N)r(   r   )r&   r*   r*   r+   r   X  s    zGRU.activationc             C   s   | j jS )N)r(   r  )r&   r*   r*   r+   r  \  s    zGRU.recurrent_activationc             C   s   | j jS )N)r(   r   )r&   r*   r*   r+   r   `  s    zGRU.use_biasc             C   s   | j jS )N)r(   r   )r&   r*   r*   r+   r   d  s    zGRU.kernel_initializerc             C   s   | j jS )N)r(   r   )r&   r*   r*   r+   r   h  s    zGRU.recurrent_initializerc             C   s   | j jS )N)r(   r   )r&   r*   r*   r+   r   l  s    zGRU.bias_initializerc             C   s   | j jS )N)r(   r   )r&   r*   r*   r+   r   p  s    zGRU.kernel_regularizerc             C   s   | j jS )N)r(   r   )r&   r*   r*   r+   r   t  s    zGRU.recurrent_regularizerc             C   s   | j jS )N)r(   r   )r&   r*   r*   r+   r   x  s    zGRU.bias_regularizerc             C   s   | j jS )N)r(   r   )r&   r*   r*   r+   r   |  s    zGRU.kernel_constraintc             C   s   | j jS )N)r(   r   )r&   r*   r*   r+   r     s    zGRU.recurrent_constraintc             C   s   | j jS )N)r(   r   )r&   r*   r*   r+   r     s    zGRU.bias_constraintc             C   s   | j jS )N)r(   r   )r&   r*   r*   r+   r     s    zGRU.dropoutc             C   s   | j jS )N)r(   r   )r&   r*   r*   r+   r     s    zGRU.recurrent_dropoutc             C   s   | j jS )N)r(   r   )r&   r*   r*   r+   r     s    zGRU.implementationc             C   s   | j jS )N)r(   r  )r&   r*   r*   r+   r    s    zGRU.reset_afterc                s   | j t| jt| j| jt| jt| jt| j	t
| jt
| jt
| jt
| jt| jt| jt| j| j| j| j| jd}|t| j tt|  }|d= tt| t|  S )N)r   r   r  r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r(   ) r   r   r   r   r  r   r	   r   r   r   r
   r   r   r   r   r   r   r   r   r   r   r   r  r   r   r(   r$   r  rU   rV   rK   rW   )r&   rX   rY   )r)   r*   r+   rU     s,    












zGRU.get_configc             C   s&   d|kr|d dkrd|d< | f |S )Nr   r   r<   r*   )r]   rX   r*   r*   r+   r_     s    zGRU.from_config)r   r  Tr   r   r   NNNNNNNr  r  FFFFFF)NNN)r`   ra   rb   rc   r%   r   rd   r   r   r  r   r   r   r   r   r   r   r   r   r   r   r   r   r  rU   rf   r_   rg   r*   r*   )r)   r+   r    sT   X                    ,,r  zkeras.layers.LSTMCellc                   s`   e Zd ZdZd fd
d	Zejdd Zdd Zdd Z	dddZ
 fddZdddZ  ZS )LSTMCella  Cell class for the LSTM layer.

  Args:
    units: Positive integer, dimensionality of the output space.
    activation: Activation function to use.
      Default: hyperbolic tangent (`tanh`).
      If you pass `None`, no activation is applied
      (ie. "linear" activation: `a(x) = x`).
    recurrent_activation: Activation function to use
      for the recurrent step.
      Default: hard sigmoid (`hard_sigmoid`).
      If you pass `None`, no activation is applied
      (ie. "linear" activation: `a(x) = x`).
    use_bias: Boolean, whether the layer uses a bias vector.
    kernel_initializer: Initializer for the `kernel` weights matrix,
      used for the linear transformation of the inputs.
    recurrent_initializer: Initializer for the `recurrent_kernel`
      weights matrix,
      used for the linear transformation of the recurrent state.
    bias_initializer: Initializer for the bias vector.
    unit_forget_bias: Boolean.
      If True, add 1 to the bias of the forget gate at initialization.
      Setting it to true will also force `bias_initializer="zeros"`.
      This is recommended in [Jozefowicz et al., 2015](
        http://www.jmlr.org/proceedings/papers/v37/jozefowicz15.pdf)
    kernel_regularizer: Regularizer function applied to
      the `kernel` weights matrix.
    recurrent_regularizer: Regularizer function applied to
      the `recurrent_kernel` weights matrix.
    bias_regularizer: Regularizer function applied to the bias vector.
    kernel_constraint: Constraint function applied to
      the `kernel` weights matrix.
    recurrent_constraint: Constraint function applied to
      the `recurrent_kernel` weights matrix.
    bias_constraint: Constraint function applied to the bias vector.
    dropout: Float between 0 and 1.
      Fraction of the units to drop for
      the linear transformation of the inputs.
    recurrent_dropout: Float between 0 and 1.
      Fraction of the units to drop for
      the linear transformation of the recurrent state.

  Call arguments:
    inputs: A 2D tensor.
    states: List of state tensors corresponding to the previous timestep.
    training: Python boolean indicating whether the layer should behave in
      training mode or in inference mode. Only relevant when `dropout` or
      `recurrent_dropout` is used.
  r   r  Tr   r   r   N        c                s^  |dk rt d| dt r0|dd| _n|dd| _tt| jf | || _t	
|| _t	
|| _|| _t
|| _t
|| _t
|| _|| _t
|	| _t
|
| _t
|| _t
|| _t
|| _t
|| _tdtd|| _tdtd|| _|d	d
}| jdkr>|d
kr>t t! d
| _"n|| _"| j| jg| _#| j| _$d S )Nr   zFReceived an invalid value for units, expected a positive integer, got r   r   TFg      ?g        r   r<   )%r   r   r   r!   r   r$   r  r%   r   r   r   r   r  r   r	   r   r   r   unit_forget_biasr
   r   r   r   r   r   r   r   r   r   r   r   r"   r  r  r   r   r1   )r&   r   r   r  r   r   r   r   r  r   r   r   r   r   r   r   r   r'   r   )r)   r*   r+   r%     s:    
zLSTMCell.__init__c                s   t  }|d } j| jd fd j j j|d _ j j jd fd j j j	|d _
 jr jr| fdd}n j} j jd fd| j j|d _nd  _d	 _d S )
Nr/      r   )r   rN   r   r   r   r   r   c                sN   t  j jff||td jff|| j jd ff||gS )Nonesr   )r   Zconcatenater   r   r	   r   )ry   r   r'   )r&   r*   r+   r   M	  s    z(LSTMCell.build.<locals>.bias_initializerr   T)r   r   r   r   r   r   r   r   r   r   r   r   r  r   r   r   r   rL   )r&   rR   r   rj   r   r*   )r&   r+   rO   7	  s:    


zLSTMCell.buildc             C   s   |\}}}}|\}}	}
}|  |t|| jddd| jf  }|  |t|	| jdd| j| jd f  }|| || |t|
| jdd| jd | jd f    }|  |t|| jdd| jd df  }||fS )z.Computes carry and output using split kernels.Nr   r   )r  r   r   r   r   r   )r&   xr	  c_tm1x_ix_fx_cx_oh_tm1_ih_tm1_fh_tm1_ch_tm1_or   fr-   or*   r*   r+   _compute_carry_and_output`	  s    &
&.*z"LSTMCell._compute_carry_and_outputc             C   sH   |\}}}}|  |}|  |}|| || |  }	|  |}
|	|
fS )z.Computes carry and output using fused kernels.)r  r   )r&   r
  r  z0z1z2z3r   r  r-   r  r*   r*   r+   _compute_carry_and_output_fusedn	  s    


z(LSTMCell._compute_carry_and_output_fusedc       !      C   sZ  |d }|d }| j ||dd}| j||dd}| jdkrd| j  k rRdk rn n2||d  }||d  }	||d  }
||d  }n|}|}	|}
|}tj| jddd\}}}}t||}t|	|}t|
|}t||}| j	r4tj| j
ddd\}}}}t||}t||}t||}t||}d| j  k rNdk rn n2||d  }||d  }||d  }||d  }n|}|}|}|}||||f}||||f}| |||\}}nd	| j  k rdk rn n||d  }t|| j}|t|| j7 }| j	r t|| j
}tj|ddd}| ||\}}|| | } | | |gfS )
Nr   r<   r  )r   g      ?r   r   )Znum_or_size_splitsr  g        )r   r   r   r   r   r  r   r   r   r   r   r   r   r   r   r%  r   )!r&   r5   rG   r=   r	  r  r   r   Zinputs_iZinputs_fZinputs_cZinputs_oZk_iZk_fZk_cZk_or  r  r  r  Zb_iZb_fZb_cZb_or  r  r  r  r  r-   r  r
  r   r*   r*   r+   r   w	  sd    zLSTMCell.callc                s   | j t| jt| j| jt| jt| jt| j	| j
t| jt| jt| jt| jt| jt| j| j| j| jd}|t|  tt|  }tt| t|  S )N)r   r   r  r   r   r   r   r  r   r   r   r   r   r   r   r   r   )r   r   r   r   r  r   r	   r   r   r   r  r
   r   r   r   r   r   r   r   r   r   r   r   r   r$   r  rU   rV   rK   rW   )r&   rX   rY   )r)   r*   r+   rU   	  s(    











zLSTMCell.get_configc             C   s   t t| |||S )N)rK   r9   )r&   r5   r6   r7   r*   r*   r+   r4   	  s    zLSTMCell.get_initial_state)r   r  Tr   r   r   TNNNNNNr  r  )N)NNN)r`   ra   rb   rc   r%   r   re   rO   r   r%  r   rU   r4   rg   r*   r*   )r)   r+   r    s,   2              ()	
<)r  z#keras.experimental.PeepholeLSTMCellc                   s>   e Zd ZdZd fd
d	Z fddZdd Zdd Z  ZS )PeepholeLSTMCella8  Equivalent to LSTMCell class but adds peephole connections.

  Peephole connections allow the gates to utilize the previous internal state as
  well as the previous hidden state (which is what LSTMCell is limited to).
  This allows PeepholeLSTMCell to better learn precise timings over LSTMCell.

  From [Gers et al., 2002](
    http://www.jmlr.org/papers/volume3/gers02a/gers02a.pdf):

  "We find that LSTM augmented by 'peephole connections' from its internal
  cells to its multiplicative gates can learn the fine distinction between
  sequences of spikes spaced either 50 or 49 time steps apart without the help
  of any short training exemplars."

  The peephole implementation is based on:

  [Sak et al., 2014](https://research.google.com/pubs/archive/43905.pdf)

  Example:

  ```python
  # Create 2 PeepholeLSTMCells
  peephole_lstm_cells = [PeepholeLSTMCell(size) for size in [128, 256]]
  # Create a layer composed sequentially of the peephole LSTM cells.
  layer = RNN(peephole_lstm_cells)
  input = keras.Input((timesteps, input_dim))
  output = layer(input)
  ```
  r   r  Tr   r   r   N        c                sP   t d tt| jf |||||||||	|
|||||||ddd| d S )Nz`tf.keras.experimental.PeepholeLSTMCell` is deprecated and will be removed in a future version. Please use tensorflow_addons.rnn.PeepholeLSTMCell instead.r   r<   )r   r   r  r   r   r   r   r  r   r   r   r   r   r   r   r   r   )warningswarnr$   r&  r%   r!   )r&   r   r   r  r   r   r   r   r  r   r   r   r   r   r   r   r   r'   )r)   r*   r+   r%   
  s(    
zPeepholeLSTMCell.__init__c                s\   t t| | | j| jfd| jd| _| j| jfd| jd| _| j| jfd| jd| _d S )Ninput_gate_peephole_weights)r   rN   r   forget_gate_peephole_weightsoutput_gate_peephole_weights)	r$   r&  rO   r   r   r   r*  r+  r,  )r&   rR   )r)   r*   r+   rO   +
  s    zPeepholeLSTMCell.buildc             C   s  |\}}}}|\}}	}
}|  |t|| jd d d | jf  | j|  }|  |t|	| jd d | j| jd f  | j|  }|| || |t|
| jd d | jd | jd f    }|  |t|| jd d | jd d f  | j|  }||fS )Nr   r   )	r  r   r   r   r   r*  r+  r   r,  )r&   r  r	  r  r  r  r  r  r  r  r  r  r   r  r-   r  r*   r*   r+   r   =
  s    "(.&z*PeepholeLSTMCell._compute_carry_and_outputc             C   sf   |\}}}}|  || j|  }|  || j|  }|| || |  }	|  || j|	  }
|	|
fS )N)r  r*  r+  r   r,  )r&   r
  r  r!  r"  r#  r$  r   r  r-   r  r*   r*   r+   r%  M
  s    z0PeepholeLSTMCell._compute_carry_and_output_fused)r   r  Tr   r   r   TNNNNNNr'  r'  )	r`   ra   rb   rc   r%   rO   r   r%  rg   r*   r*   )r)   r+   r&  	  s&                 r&  zkeras.layers.LSTMc                   s  e Zd ZdZd5 fdd	Zd6 fdd	Zedd Zedd Zedd Z	edd Z
edd Zedd Zedd Zedd Zedd  Zed!d" Zed#d$ Zed%d& Zed'd( Zed)d* Zed+d, Zed-d. Zed/d0 Z fd1d2Zed3d4 Z  ZS )7LSTMa  Long Short-Term Memory layer - Hochreiter 1997.

   Note that this cell is not optimized for performance on GPU. Please use
  `tf.compat.v1.keras.layers.CuDNNLSTM` for better performance on GPU.

  Args:
    units: Positive integer, dimensionality of the output space.
    activation: Activation function to use.
      Default: hyperbolic tangent (`tanh`).
      If you pass `None`, no activation is applied
      (ie. "linear" activation: `a(x) = x`).
    recurrent_activation: Activation function to use
      for the recurrent step.
      Default: hard sigmoid (`hard_sigmoid`).
      If you pass `None`, no activation is applied
      (ie. "linear" activation: `a(x) = x`).
    use_bias: Boolean, whether the layer uses a bias vector.
    kernel_initializer: Initializer for the `kernel` weights matrix,
      used for the linear transformation of the inputs..
    recurrent_initializer: Initializer for the `recurrent_kernel`
      weights matrix,
      used for the linear transformation of the recurrent state.
    bias_initializer: Initializer for the bias vector.
    unit_forget_bias: Boolean.
      If True, add 1 to the bias of the forget gate at initialization.
      Setting it to true will also force `bias_initializer="zeros"`.
      This is recommended in [Jozefowicz et al., 2015](
        http://www.jmlr.org/proceedings/papers/v37/jozefowicz15.pdf).
    kernel_regularizer: Regularizer function applied to
      the `kernel` weights matrix.
    recurrent_regularizer: Regularizer function applied to
      the `recurrent_kernel` weights matrix.
    bias_regularizer: Regularizer function applied to the bias vector.
    activity_regularizer: Regularizer function applied to
      the output of the layer (its "activation").
    kernel_constraint: Constraint function applied to
      the `kernel` weights matrix.
    recurrent_constraint: Constraint function applied to
      the `recurrent_kernel` weights matrix.
    bias_constraint: Constraint function applied to the bias vector.
    dropout: Float between 0 and 1.
      Fraction of the units to drop for
      the linear transformation of the inputs.
    recurrent_dropout: Float between 0 and 1.
      Fraction of the units to drop for
      the linear transformation of the recurrent state.
    return_sequences: Boolean. Whether to return the last output.
      in the output sequence, or the full sequence.
    return_state: Boolean. Whether to return the last state
      in addition to the output.
    go_backwards: Boolean (default False).
      If True, process the input sequence backwards and return the
      reversed sequence.
    stateful: Boolean (default False). If True, the last state
      for each sample at index i in a batch will be used as initial
      state for the sample of index i in the following batch.
    unroll: Boolean (default False).
      If True, the network will be unrolled,
      else a symbolic loop will be used.
      Unrolling can speed-up a RNN,
      although it tends to be more memory-intensive.
      Unrolling is only suitable for short sequences.
    time_major: The shape format of the `inputs` and `outputs` tensors.
      If True, the inputs and outputs will be in shape
      `(timesteps, batch, ...)`, whereas in the False case, it will be
      `(batch, timesteps, ...)`. Using `time_major = True` is a bit more
      efficient because it avoids transposes at the beginning and end of the
      RNN calculation. However, most TensorFlow data is batch-major, so by
      default this function accepts input and emits output in batch-major
      form.

  Call arguments:
    inputs: A 3D tensor.
    mask: Binary tensor of shape `(samples, timesteps)` indicating whether
      a given timestep should be masked. An individual `True` entry indicates
      that the corresponding timestep should be utilized, while a `False`
      entry indicates that the corresponding timestep should be ignored.
    training: Python boolean indicating whether the layer should behave in
      training mode or in inference mode. This argument is passed to the cell
      when calling it. This is only relevant if `dropout` or
      `recurrent_dropout` is used.
    initial_state: List of initial state tensors to be passed to the first
      call of the cell.
  r   r  Tr   r   r   N        Fc                s   | dd}|dkrtd d|kr6d| di}ni }t|f||||||||	|
||||||||d|ddd	|}tt| j|f|||||d
| t|| _	t
ddg| _d S )Nr   r<   r   zm`implementation=0` has been deprecated, and now defaults to `implementation=1`.Please update your layer call.r   r7   r   T)r   r  r   r   r   r  r   r   r   r   r   r   r   r   r   r   r7   r   )rl   rm   rn   ro   rp   r   )r   )r!   r"   r#   r  r   r$   r-  r%   r
   r   r   rr   )r&   r   r   r  r   r   r   r   r  r   r   r   r   r   r   r   r   r   rl   rm   rn   ro   rp   r'   r   r   r(   )r)   r*   r+   r%   
  sL    

zLSTM.__init__c                s   t t| j||||dS )N)r   r=   r   )r$   r-  r   )r&   r5   r   r=   r   )r)   r*   r+   r   
  s    
z	LSTM.callc             C   s   | j jS )N)r(   r   )r&   r*   r*   r+   r   
  s    z
LSTM.unitsc             C   s   | j jS )N)r(   r   )r&   r*   r*   r+   r   
  s    zLSTM.activationc             C   s   | j jS )N)r(   r  )r&   r*   r*   r+   r  
  s    zLSTM.recurrent_activationc             C   s   | j jS )N)r(   r   )r&   r*   r*   r+   r     s    zLSTM.use_biasc             C   s   | j jS )N)r(   r   )r&   r*   r*   r+   r     s    zLSTM.kernel_initializerc             C   s   | j jS )N)r(   r   )r&   r*   r*   r+   r   	  s    zLSTM.recurrent_initializerc             C   s   | j jS )N)r(   r   )r&   r*   r*   r+   r     s    zLSTM.bias_initializerc             C   s   | j jS )N)r(   r  )r&   r*   r*   r+   r    s    zLSTM.unit_forget_biasc             C   s   | j jS )N)r(   r   )r&   r*   r*   r+   r     s    zLSTM.kernel_regularizerc             C   s   | j jS )N)r(   r   )r&   r*   r*   r+   r     s    zLSTM.recurrent_regularizerc             C   s   | j jS )N)r(   r   )r&   r*   r*   r+   r     s    zLSTM.bias_regularizerc             C   s   | j jS )N)r(   r   )r&   r*   r*   r+   r   !  s    zLSTM.kernel_constraintc             C   s   | j jS )N)r(   r   )r&   r*   r*   r+   r   %  s    zLSTM.recurrent_constraintc             C   s   | j jS )N)r(   r   )r&   r*   r*   r+   r   )  s    zLSTM.bias_constraintc             C   s   | j jS )N)r(   r   )r&   r*   r*   r+   r   -  s    zLSTM.dropoutc             C   s   | j jS )N)r(   r   )r&   r*   r*   r+   r   1  s    zLSTM.recurrent_dropoutc             C   s   | j jS )N)r(   r   )r&   r*   r*   r+   r   5  s    zLSTM.implementationc                s   | j t| jt| j| jt| jt| jt| j	| j
t| jt| jt| jt| jt| jt| jt| j| j| j| jd}|t| j tt|  }|d= tt| t|  S )N)r   r   r  r   r   r   r   r  r   r   r   r   r   r   r   r   r   r   r(   ) r   r   r   r   r  r   r	   r   r   r   r  r
   r   r   r   r   r   r   r   r   r   r   r   r   r   r(   r$   r-  rU   rV   rK   rW   )r&   rX   rY   )r)   r*   r+   rU   9  s,    












zLSTM.get_configc             C   s&   d|kr|d dkrd|d< | f |S )Nr   r   r<   r*   )r]   rX   r*   r*   r+   r_   e  s    zLSTM.from_config)r   r  Tr   r   r   TNNNNNNNr.  r.  FFFFF)NNN)r`   ra   rb   rc   r%   r   rd   r   r   r  r   r   r   r   r  r   r   r   r   r   r   r   r   r   rU   rf   r_   rg   r*   r*   )r)   r+   r-  X
  sT   U                    ,,r-  r<   c                s@   fdd |dkr0 fddt |D S tj dS )Nc                  s   t  S )N)r   r   r*   )r  rater*   r+   dropped_inputsm  s    z._generate_dropout_mask.<locals>.dropped_inputsr<   c                s   g | ]}t j d qS ))r=   )r   in_train_phase)r,   ry   )r0  r  r=   r*   r+   r   r  s   z*_generate_dropout_mask.<locals>.<listcomp>)r=   )ranger   r1  )r  r/  r=   r   r*   )r0  r  r/  r=   r+   r   l  s
    r   c             C   s   t | tr|dkr|dkst|r>| | d }| d|  } t| dkrb| dd }| dd } t| dkrxt| } n| d } dd }||}||}| ||fS )a  Standardizes `__call__` to a single list of tensor inputs.

  When running a model loaded from a file, the input tensors
  `initial_state` and `constants` can be passed to `RNN.__call__()` as part
  of `inputs` instead of by the dedicated keyword arguments. This method
  makes sure the arguments are separated and that `initial_state` and
  `constants` are lists of tensors (or None).

  Args:
    inputs: Tensor or list/tuple of tensors. which may include constants
      and initial states. In that case `num_constant` must be specified.
    initial_state: Tensor or list of tensors or None, initial states.
    constants: Tensor or list of tensors or None, constant tensors.
    num_constants: Expected number of constants (if constants are passed as
      part of the `inputs` list.

  Returns:
    inputs: Single tensor or tuple of tensors.
    initial_state: List of tensors or None.
    constants: List of tensors or None.
  Nr<   r   c             S   s.   | d kst | tr| S t | tr(t| S | gS )N)rJ   rK   r0   )r  r*   r*   r+   to_list_or_none  s
    
z*_standardize_args.<locals>.to_list_or_none)rJ   rK   AssertionErrorrC   r0   )r5   r   r>   r   r3  r*   r*   r+   r   x  s    

r   c             C   s   t | dot| tj S )z6Check whether the state_size contains multiple states.__len__)hasattrrJ   r   rP   )r   r*   r*   r+   r3     s    
r3   c             C   s*   |d k	rt |d }|j}t|| j|S )Nr   )r   r   r7   r   r   )r(   r5   r6   r7   r*   r*   r+   r9     s    r9   c                sP    dksdkr t d  fdd}t|rDt||S ||S dS )zBGenerate a zero filled tensor with shape [batch_size, state_size].Nz]batch_size and dtype cannot be None while constructing initial state: batch_size={}, dtype={}c                s&   t |  } g| }tj|dS )N)r7   )r   rP   rQ   r   r   )Zunnested_state_sizeZ	flat_dimsZinit_state_size)batch_size_tensorr7   r*   r+   create_zeros  s    
z1_generate_zero_filled_state.<locals>.create_zeros)r   r   r   rB   r|   )r7  r   r7   r8  r*   )r7  r7   r+   r     s    

r   c             C   s^   t  rdS t| ddsdS tt r8td dS | j	j
| j	jkrVtd dS dd S )a  Returns the caching device for the RNN variable.

  This is useful for distributed training, when variable is not located as same
  device as the training worker. By enabling the device cache, this allows
  worker to read the variable once and cache locally, rather than read it every
  time step from remote when it is needed.

  Note that this is assuming the variable that cell needs for each time step is
  having the same value in the forward path, and only gets updated in the
  backprop. It is true for all the default cells (SimpleRNN, GRU, LSTM). If the
  cell body relies on any variable that gets updated every time step, then
  caching device will cause it to read the stall value.

  Args:
    rnn_cell: the rnn cell instance.
  Nr   Fa$  Variable read device caching has been disabled because the RNN is in tf.while_loop loop context, which will cause reading stalled value in forward path. This could slow down the training due to duplicated variable reads. Please consider updating your code to remove tf.while_loop if possible.a\  Variable read device caching has been disabled since it doesn't work with the mixed precision API. This is likely to cause a slowdown for RNN training due to duplicated read of variable for each timestep, which will be significant in a multi remote worker setting. Please consider disabling mixed precision API if the performance has been affected.c             S   s   | j S )N)Zdevice)opr*   r*   r+   rz     r{   z!_caching_device.<locals>.<lambda>)r   Zexecuting_eagerlyr2   r   ZIsInWhileLoopr   Zget_default_graphr"   r#   Z_dtype_policyZcompute_dtypeZvariable_dtype)rnn_cellr*   r*   r+   r     s    
r   c             C   s    t  }| j|krd| jiS i S )a]  Return the dict config for RNN cell wrt to enable_caching_device field.

  Since enable_caching_device is a internal implementation detail for speed up
  the RNN variable read when running on the multi remote worker setting, we
  don't want this config to be serialized constantly in the JSON. We will only
  serialize this field when a none default value is used to create the cell.
  Args:
    rnn_cell: the RNN cell for serialize.

  Returns:
    A dict which contains the JSON config for enable_caching_device value or
    empty dict if the enable_caching_device value is same as the default value.
  r   )r   r   r   )r:  Zdefault_enable_caching_devicer*   r*   r+   r     s    

r   )Nr<   )Crc   r   r(  numpyr   Ztensorflow.python.distributer   rw   Ztensorflow.python.eagerr   Ztensorflow.python.frameworkr   r   Ztensorflow.python.kerasr   r   r   r	   r
   Z)tensorflow.python.keras.engine.base_layerr   Z)tensorflow.python.keras.engine.input_specr   Z*tensorflow.python.keras.saving.saved_modelr   Ztensorflow.python.keras.utilsr   r   r   Ztensorflow.python.opsr   r   r   r   Ztensorflow.python.platformr   r"   Ztensorflow.python.trackabler   r   Ztensorflow.python.utilr   Z tensorflow.python.util.tf_exportr   Ztensorflow.tools.docsr   r  r   rh   r   Zdo_not_generate_docsobjectr   r   r   r   r  r  r&  r-  r   r   r3   r9   r   r   r   r*   r*   r*   r+   <module>   s          -Z 
 M ~
    
  v  
=1