B
    ӻd&                @   sV  d Z ddlZddlmZ ddlmZ ddlmZ ddlmZ ddlm	Z	 ddlm
Z
 dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddl m!Z! dZ"dZ#dZ$dZ%dZ&dZ'dZ(dZ)d Z*d!d" Z+G d#d$ d$e,Z-e!d%g d&G d'd( d(ej.Z.e!d)g d&G d*d+ d+ej/ej0Z0d,d- Z1d.d/ Z2d0d1 Z3e!d2g d&G d3d4 d4ej4Z4e!d5g d&G d6d7 d7ej/ej5Z5dSd9d:Z6d;d< Z7d=d> Z8d?d@ Z9dAdB Z:dCdD Z;dEdF Z<dGdH Z=dIdJ Z>dKdL Z?dMdN Z@dOdP ZAdQdR ZBdS )TzRecurrent layers for TF 2.    N)context)function)get_device_name)config)constant_op)device)dtypes)ops)activations)backend)	InputSpec)	recurrent)	array_ops)control_flow_ops)gen_cudnn_rnn_ops)math_ops)nn)	state_ops)	variables)	sysconfig)
tf_logging)keras_exportZapi_implementsZapi_preferred_deviceZCPUGPU      z4Layer %s will use cuDNN kernels when running on GPU.zLayer %s will not use cuDNN kernels since it doesn't meet the criteria. It will use a generic GPU kernel as fallback when running on GPU.c               C   s   dS )NF r   r   r   ]/var/www/html/venv/lib/python3.7/site-packages/tensorflow/python/keras/layers/recurrent_v2.py_use_new_codeB   s    r   c               @   s    e Zd ZdZdd Zdd ZdS )_DefunWrapperz;A wrapper with no deep copy of the Defun in LSTM/GRU layer.c          	   C   s|   || _ || _|| _| jdkr,td| jd| j d| jt| jd tt  i}| jdkrbt	}nt
}tj||dd| _d S )	N)lstmgruz>Defun wrapper only applies to LSTM and GRU layer, but given {}
time_majorgo_backwards_r   F)
attributes	autograph)r!   r"   
layer_name
ValueErrorformat_FUNCTION_API_NAME_ATTRIBUTEstruuiduuid4lstm_with_backend_selectiongru_with_backend_selectionr   defun_with_attributesdefun_layer)selfr!   r"   r&   supportive_attributesZ
layer_funcr   r   r   __init__K   s     


z_DefunWrapper.__init__c             C   s&   t | | j| j| j}||t| < |S )N)typer!   r"   r&   id)r1   memoZnew_wrapperr   r   r   __deepcopy__b   s    z_DefunWrapper.__deepcopy__N)__name__
__module____qualname____doc__r3   r7   r   r   r   r   r   H   s   r   zkeras.layers.GRUCell)Zv1c                   s"   e Zd ZdZd fd
d	Z  ZS )GRUCella  Cell class for the GRU layer.

  See [the Keras RNN API guide](https://www.tensorflow.org/guide/keras/rnn)
  for details about the usage of RNN API.

  This class processes one step within the whole time sequence input, whereas
  `tf.keras.layer.GRU` processes the whole sequence.

  For example:

  >>> inputs = tf.random.normal([32, 10, 8])
  >>> rnn = tf.keras.layers.RNN(tf.keras.layers.GRUCell(4))
  >>> output = rnn(inputs)
  >>> print(output.shape)
  (32, 4)
  >>> rnn = tf.keras.layers.RNN(
  ...    tf.keras.layers.GRUCell(4),
  ...    return_sequences=True,
  ...    return_state=True)
  >>> whole_sequence_output, final_state = rnn(inputs)
  >>> print(whole_sequence_output.shape)
  (32, 10, 4)
  >>> print(final_state.shape)
  (32, 4)

  Args:
    units: Positive integer, dimensionality of the output space.
    activation: Activation function to use. Default: hyperbolic tangent
      (`tanh`). If you pass None, no activation is applied
      (ie. "linear" activation: `a(x) = x`).
    recurrent_activation: Activation function to use for the recurrent step.
      Default: sigmoid (`sigmoid`). If you pass `None`, no activation is
      applied (ie. "linear" activation: `a(x) = x`).
    use_bias: Boolean, (default `True`), whether the layer uses a bias vector.
    kernel_initializer: Initializer for the `kernel` weights matrix,
      used for the linear transformation of the inputs. Default:
      `glorot_uniform`.
    recurrent_initializer: Initializer for the `recurrent_kernel`
      weights matrix, used for the linear transformation of the recurrent state.
      Default: `orthogonal`.
    bias_initializer: Initializer for the bias vector. Default: `zeros`.
    kernel_regularizer: Regularizer function applied to the `kernel` weights
      matrix. Default: `None`.
    recurrent_regularizer: Regularizer function applied to the
      `recurrent_kernel` weights matrix. Default: `None`.
    bias_regularizer: Regularizer function applied to the bias vector. Default:
      `None`.
    kernel_constraint: Constraint function applied to the `kernel` weights
      matrix. Default: `None`.
    recurrent_constraint: Constraint function applied to the `recurrent_kernel`
      weights matrix. Default: `None`.
    bias_constraint: Constraint function applied to the bias vector. Default:
      `None`.
    dropout: Float between 0 and 1. Fraction of the units to drop for the
      linear transformation of the inputs. Default: 0.
    recurrent_dropout: Float between 0 and 1. Fraction of the units to drop for
      the linear transformation of the recurrent state. Default: 0.
    reset_after: GRU convention (whether to apply reset gate after or
      before matrix multiplication). False = "before",
      True = "after" (default and CuDNN compatible).

  Call arguments:
    inputs: A 2D tensor, with shape of `[batch, feature]`.
    states: A 2D tensor with shape of `[batch, units]`, which is the state from
      the previous time step. For timestep 0, the initial state provided by user
      will be feed to cell.
    training: Python boolean indicating whether the layer should behave in
      training mode or in inference mode. Only relevant when `dropout` or
      `recurrent_dropout` is used.
  tanhsigmoidTglorot_uniform
orthogonalzerosN        c                sF   t t| j|f||||||||	|
||||||dd|d| d S )Nimplementationr   )
activationrecurrent_activationuse_biaskernel_initializerrecurrent_initializerbias_initializerkernel_regularizerrecurrent_regularizerbias_regularizerkernel_constraintrecurrent_constraintbias_constraintdropoutrecurrent_dropoutrC   reset_after)superr<   r3   pop)r1   unitsrD   rE   rF   rG   rH   rI   rJ   rK   rL   rM   rN   rO   rP   rQ   rR   kwargs)	__class__r   r   r3      s&    

zGRUCell.__init__)r=   r>   Tr?   r@   rA   NNNNNNrB   rB   T)r8   r9   r:   r;   r3   __classcell__r   r   )rW   r   r<   i   s    G              r<   zkeras.layers.GRUc                   s4   e Zd ZdZd fdd	ZdddZdd Z  ZS )GRUaX  Gated Recurrent Unit - Cho et al. 2014.

  See [the Keras RNN API guide](https://www.tensorflow.org/guide/keras/rnn)
  for details about the usage of RNN API.

  Based on available runtime hardware and constraints, this layer
  will choose different implementations (cuDNN-based or pure-TensorFlow)
  to maximize the performance. If a GPU is available and all
  the arguments to the layer meet the requirement of the CuDNN kernel
  (see below for details), the layer will use a fast cuDNN implementation.

  The requirements to use the cuDNN implementation are:

  1. `activation` == `tanh`
  2. `recurrent_activation` == `sigmoid`
  3. `recurrent_dropout` == 0
  4. `unroll` is `False`
  5. `use_bias` is `True`
  6. `reset_after` is `True`
  7. Inputs, if use masking, are strictly right-padded.
  8. Eager execution is enabled in the outermost context.

  There are two variants of the GRU implementation. The default one is based on
  [v3](https://arxiv.org/abs/1406.1078v3) and has reset gate applied to hidden
  state before matrix multiplication. The other one is based on
  [original](https://arxiv.org/abs/1406.1078v1) and has the order reversed.

  The second variant is compatible with CuDNNGRU (GPU-only) and allows
  inference on CPU. Thus it has separate biases for `kernel` and
  `recurrent_kernel`. To use this variant, set `'reset_after'=True` and
  `recurrent_activation='sigmoid'`.

  For example:

  >>> inputs = tf.random.normal([32, 10, 8])
  >>> gru = tf.keras.layers.GRU(4)
  >>> output = gru(inputs)
  >>> print(output.shape)
  (32, 4)
  >>> gru = tf.keras.layers.GRU(4, return_sequences=True, return_state=True)
  >>> whole_sequence_output, final_state = gru(inputs)
  >>> print(whole_sequence_output.shape)
  (32, 10, 4)
  >>> print(final_state.shape)
  (32, 4)

  Args:
    units: Positive integer, dimensionality of the output space.
    activation: Activation function to use.
      Default: hyperbolic tangent (`tanh`).
      If you pass `None`, no activation is applied
      (ie. "linear" activation: `a(x) = x`).
    recurrent_activation: Activation function to use
      for the recurrent step.
      Default: sigmoid (`sigmoid`).
      If you pass `None`, no activation is applied
      (ie. "linear" activation: `a(x) = x`).
    use_bias: Boolean, (default `True`), whether the layer uses a bias vector.
    kernel_initializer: Initializer for the `kernel` weights matrix,
      used for the linear transformation of the inputs. Default:
      `glorot_uniform`.
    recurrent_initializer: Initializer for the `recurrent_kernel`
       weights matrix, used for the linear transformation of the recurrent
       state. Default: `orthogonal`.
    bias_initializer: Initializer for the bias vector. Default: `zeros`.
    kernel_regularizer: Regularizer function applied to the `kernel` weights
      matrix. Default: `None`.
    recurrent_regularizer: Regularizer function applied to the
      `recurrent_kernel` weights matrix. Default: `None`.
    bias_regularizer: Regularizer function applied to the bias vector. Default:
      `None`.
    activity_regularizer: Regularizer function applied to the output of the
      layer (its "activation"). Default: `None`.
    kernel_constraint: Constraint function applied to the `kernel` weights
      matrix. Default: `None`.
    recurrent_constraint: Constraint function applied to the `recurrent_kernel`
      weights matrix. Default: `None`.
    bias_constraint: Constraint function applied to the bias vector. Default:
      `None`.
    dropout: Float between 0 and 1. Fraction of the units to drop for the linear
      transformation of the inputs. Default: 0.
    recurrent_dropout: Float between 0 and 1. Fraction of the units to drop for
      the linear transformation of the recurrent state. Default: 0.
    return_sequences: Boolean. Whether to return the last output
      in the output sequence, or the full sequence. Default: `False`.
    return_state: Boolean. Whether to return the last state in addition to the
      output. Default: `False`.
    go_backwards: Boolean (default `False`).
      If True, process the input sequence backwards and return the
      reversed sequence.
    stateful: Boolean (default False). If True, the last state
      for each sample at index i in a batch will be used as initial
      state for the sample of index i in the following batch.
    unroll: Boolean (default False).
      If True, the network will be unrolled,
      else a symbolic loop will be used.
      Unrolling can speed-up a RNN,
      although it tends to be more memory-intensive.
      Unrolling is only suitable for short sequences.
    time_major: The shape format of the `inputs` and `outputs` tensors.
      If True, the inputs and outputs will be in shape
      `[timesteps, batch, feature]`, whereas in the False case, it will be
      `[batch, timesteps, feature]`. Using `time_major = True` is a bit more
      efficient because it avoids transposes at the beginning and end of the
      RNN calculation. However, most TensorFlow data is batch-major, so by
      default this function accepts input and emits output in batch-major
      form.
    reset_after: GRU convention (whether to apply reset gate after or
      before matrix multiplication). False = "before",
      True = "after" (default and CuDNN compatible).

  Call arguments:
    inputs: A 3D tensor, with shape `[batch, timesteps, feature]`.
    mask: Binary tensor of shape `[samples, timesteps]` indicating whether
      a given timestep should be masked  (optional, defaults to `None`).
      An individual `True` entry indicates that the corresponding timestep
      should be utilized, while a `False` entry indicates that the
      corresponding timestep should be ignored.
    training: Python boolean indicating whether the layer should behave in
      training mode or in inference mode. This argument is passed to the cell
      when calling it. This is only relevant if `dropout` or
      `recurrent_dropout` is used  (optional, defaults to `None`).
    initial_state: List of initial state tensors to be passed to the first
      call of the cell  (optional, defaults to `None` which causes creation
      of zero-filled initial state tensors).
  r=   r>   Tr?   r@   rA   N        Fc                s   | dd| _tt| j|f||||||||	|
||||||| dd|||||||d| | jtjtjfko| j	tj
tj
fko|dko| o|o|ot | _tdr| jrtt| j  ntt| j  t rt||d| _d S )	Nreturn_runtimeFrC   r   )rD   rE   rF   rG   rH   rI   rJ   rK   rL   activity_regularizerrM   rN   rO   rP   rQ   rC   return_sequencesreturn_stater"   statefulunrollr!   rR   r   r   r    )rT   _return_runtimerS   rY   r3   rD   r
   r=   r   rE   r>   r	   #executing_eagerly_outside_functions_could_use_gpu_kernelr   list_logical_devicesloggingdebug_CUDNN_AVAILABLE_MSGnamewarning_CUDNN_NOT_AVAILABLE_MSGr   r   _defun_wrapper)r1   rU   rD   rE   rF   rG   rH   rI   rJ   rK   rL   r\   rM   rN   rO   rP   rQ   r]   r^   r"   r_   r`   r!   rR   rV   )rW   r   r   r3   Z  sJ    


zGRU.__init__c                sj  t |\}}|d k	}|| ||d \}}}t|trH|d }t |}jr`|d n|d }	|srjsd|i 	j
  fdd}
t j|
||d j|j|d k	r|n|	jjd
\}}}tt}n|||||\}}}}jrtjd |d g}| jr8t j|||jd}n|}jrR|gt| S jrb||fS |S d S )Nr   r   trainingc                s   j | |f S )N)cell)cell_inputscell_states)rV   r1   r   r   step  s    zGRU.call.<locals>.step)	constantsr"   maskr`   input_lengthr!   zero_output_for_mask)r"   )r   convert_inputs_if_ragged_validate_args_if_ragged_process_inputs
isinstancelist	int_shaper!   rc   _maybe_reset_cell_dropout_maskrm   rnnr"   r`   rt   _runtime_RUNTIME_UNKNOWN_defun_gru_callr_   r   assignstates
add_updater]   maybe_convert_to_raggedr^   ra   )r1   inputsrr   rl   initial_staterow_lengthsis_ragged_inputr#   input_shape	timestepsrp   last_outputoutputsr   runtimeupdatesoutputr   )rV   r1   r   call  sL    




zGRU.callc             C   sv  |    | j||dd}|d k	r,||d  }t r|t|d t| jjt| jjt| jj|| j| j	|| j
d
}| jjf |\}}	}
}n|t|d t| jjt| jjt| jj|| j| j	|d	}| }|d| j
i t rRt }|tks|d ko"tdo"|d kp"t|| j}|r>tf |\}}	}
}ntf |\}}	}
}ntf |\}}	}
}|
g}||	||fS )N   )countr   )
r   init_hkernelrecurrent_kernelbiasrr   r!   r"   sequence_lengthsrt   )	r   r   r   r   r   rr   r!   r"   r   rt   r   )reset_dropout_maskget_dropout_mask_for_cellr   _read_variable_valuerm   r   r   r   r!   r"   rt   rk   r0   copyupdater   executing_eagerly_get_context_device_type_GPU_DEVICE_NAMEr   rd   is_cudnn_supported_inputsgpu_grustandard_grur.   )r1   r   r   rl   rr   r   dropout_maskZ
gru_kwargsr   r   new_hr   Zgpu_gru_kwargsZnormal_gru_kwargsdevice_typecan_use_gpur   r   r   r   r     sR    










zGRU._defun_gru_call)r=   r>   Tr?   r@   rA   NNNNNNNrZ   rZ   FFFFFFT)NNN)r8   r9   r:   r;   r3   r   r   rX   r   r   )rW   r   rY      s2                        1
;rY   c
                s   t | }
|r|
d n|
d }t|\  fdd}t j|| |gdd||||dk	r`|n||	d
\}}}|||d ttfS )a  GRU with standard kernel implementation.

  This implementation can be run on all types of hardware.

  This implementation lifts out all the layer weights and make them function
  parameters. It has same number of tensor input params as the CuDNN
  counterpart. The RNN step logic has been simplified, eg dropout and mask is
  removed since CuDNN implementation does not support that.

  Args:
    inputs: Input tensor of GRU layer.
    init_h: Initial state tensor for the cell output.
    kernel: Weights for cell kernel.
    recurrent_kernel: Weights for cell recurrent kernel.
    bias: Weights for cell kernel bias and recurrent bias. The bias contains the
      combined input_bias and recurrent_bias.
    mask: Binary tensor of shape `(samples, timesteps)` indicating whether
      a given timestep should be masked. An individual `True` entry indicates
      that the corresponding timestep should be utilized, while a `False` entry
      indicates that the corresponding timestep should be ignored.
    time_major: Boolean, whether the inputs are in the format of
      [time, batch, feature] or [batch, time, feature].
    go_backwards: Boolean (default False). If True, process the input sequence
      backwards and return the reversed sequence.
    sequence_lengths: The lengths of all sequences coming from a variable length
      input, such as ragged tensors. If the input has a fixed timestep size,
      this should be None.
    zero_output_for_mask: Boolean, whether to output zero for masked timestep.

  Returns:
    last_output: output tensor for the last timestep, which has shape
      [batch, units].
    outputs: output tensor for all timesteps, which has shape
      [batch, time, units].
    state_0: the cell output, which has same shape as init_h.
    runtime: constant string tensor which indicate real runtime hardware. This
      value is for testing purpose and should be used by user.
  r   r   c                s   |d }t | }t | }tj|ddd\}}}t |}t |}tj|ddd\}}	}
t|| }t||	 }t|||
  }|| d| |  }||gfS )z5Step function that will be used by Keras RNN backend.r   r   r   )axis)r   dotbias_addr   splitr   r>   r=   )rn   ro   h_tm1Zmatrix_xZx_zZx_rZx_hZmatrix_innerZrecurrent_zZrecurrent_rZrecurrent_hzrhhh)
input_biasr   recurrent_biasr   r   r   rp   K  s    zstandard_gru.<locals>.stepNF)rq   r`   r!   rr   r"   rs   rt   )r   rz   r   Zunstackr|   r}   _RUNTIME_CPU)r   r   r   r   r   rr   r!   r"   r   rt   r   r   rp   r   r   
new_statesr   )r   r   r   r   r   r     s    )
r   c	          
   C   s  |s$|dkr$t j| dd} d\}	}
n|r,dnd\}	}
t j||	d}t j|ddd}|t j|ddd7 }t t|d	}t d
 r|d |d  |d< |d< |d |d  |d< |d< |d |d  |d< |d< |d |d  |d< |d< t||t	
dgdd}|dk	rt||}|dk	r|r:t j| ||	|
d} tj| |d|dd||d\}}}}}|rt j|||	|
d}t j||	gd}n4|rt j| dgd} tj| |d|ddd\}}}}|d }|s|dkrt j|dddgd}t j||	d}|dk	r|}|||ttfS )z>GRU with CuDNN implementation which is only available for GPU.N)r   r   r   )perm)r   r   )r   r   )r   r   r      Zis_cuda_buildr      T)weightsbiasesshapetranspose_weights)seq_axis
batch_axisr    )inputinput_hinput_cparamsis_trainingrnn_moder   r!   )r   r   r   r   r   r   r   )r   	transposeexpand_dimsr   r   flattenr   get_build_info_canonical_to_paramsr   constantcalculate_sequence_by_maskreverse_sequence_v2r   
CudnnRNNV3reverseCudnnRNNsqueezer}   _RUNTIME_GPU)r   r   r   r   r   rr   r!   r"   r   r   r   r   r   r   r   r#   r   r   r   r   r   q  sb    





r   c
                s   | |||||||||	d
dd  t  r`ttfddt fddifdd\}
}}}nTdtt  }||d	}t|tt	|}t|t |}|f \}
}}}t
|f |
|||fS )
aw  Call the GRU with optimized backend kernel selection.

  Under the hood, this function will create two TF function, one with the most
  generic kernel and can run on all device condition, and the second one with
  CuDNN specific kernel, which can only run on GPU.

  The first function will be called with normal_lstm_params, while the second
  function is not called, but only registered in the graph. The Grappler will
  do the proper graph rewrite and swap the optimized TF function based on the
  device placement.

  Args:
    inputs: Input tensor of GRU layer.
    init_h: Initial state tensor for the cell output.
    kernel: Weights for cell kernel.
    recurrent_kernel: Weights for cell recurrent kernel.
    bias: Weights for cell kernel bias and recurrent bias. Only recurrent bias
      is used in this case.
    mask: Boolean tensor for mask out the steps within sequence.
      An individual `True` entry indicates that the corresponding timestep
      should be utilized, while a `False` entry indicates that the corresponding
      timestep should be ignored.
    time_major: Boolean, whether the inputs are in the format of
      [time, batch, feature] or [batch, time, feature].
    go_backwards: Boolean (default False). If True, process the input sequence
      backwards and return the reversed sequence.
    sequence_lengths: The lengths of all sequences coming from a variable length
      input, such as ragged tensors. If the input has a fixed timestep size,
      this should be None.
    zero_output_for_mask: Boolean, whether to output zero for masked timestep.

  Returns:
    List of output tensors, same as standard_gru.
  )
r   r   r   r   r   rr   r!   r"   r   rt   c
                sr   dkr"t  d	S  f	dd}
 	f
dd}tjt|
|dS )z<Use CuDNN kernel when mask is none or strictly right padded.N)	r   r   r   r   r   rr   r!   r"   r   c                  s   t  d	S )N)	r   r   r   r   r   rr   r!   r"   r   )r   r   )	r   r"   r   r   r   rr   r   r   r!   r   r   cudnn_gru_fn	  s    zOgru_with_backend_selection.<locals>.gpu_gru_with_fallback.<locals>.cudnn_gru_fnc                  s   t  	d
S )N)
r   r   r   r   r   rr   r!   r"   r   rt   )r   r   )
r   r"   r   r   r   rr   r   r   r!   rt   r   r   standard_gru_fn  s    zRgru_with_backend_selection.<locals>.gpu_gru_with_fallback.<locals>.standard_gru_fn)true_fnfalse_fn)r   r   condr   )r   r   r   r   r   rr   r!   r"   r   rt   r   r   r   )
r   r"   r   r   r   rr   r   r   r!   rt   r   gpu_gru_with_fallback  s"    z9gru_with_backend_selection.<locals>.gpu_gru_with_fallbackc                  s
   t f  S )N)r   r   )r   r   r   <lambda>,      z,gru_with_backend_selection.<locals>.<lambda>c                  s
    f S )Nr   r   )r   r   r   r   r   -  r   c                  s
   t f  S )N)r   r   )r   r   r   r   .  r   Zgru_)r!   r"   )r   r   execute_fn_for_device_CPU_DEVICE_NAMEr   r*   r+   r,   _generate_defun_backendr   _function_register)r   r   r   r   r   rr   r!   r"   r   rt   r   r   r   r   api_namesupportive_attributeZdefun_standard_gruZdefun_gpu_grur   )r   r   r   r.     s8    &.r.   zkeras.layers.LSTMCellc                   s"   e Zd ZdZd fd
d	Z  ZS )LSTMCella  Cell class for the LSTM layer.

  See [the Keras RNN API guide](https://www.tensorflow.org/guide/keras/rnn)
  for details about the usage of RNN API.

  This class processes one step within the whole time sequence input, whereas
  `tf.keras.layer.LSTM` processes the whole sequence.

  For example:

  >>> inputs = tf.random.normal([32, 10, 8])
  >>> rnn = tf.keras.layers.RNN(tf.keras.layers.LSTMCell(4))
  >>> output = rnn(inputs)
  >>> print(output.shape)
  (32, 4)
  >>> rnn = tf.keras.layers.RNN(
  ...    tf.keras.layers.LSTMCell(4),
  ...    return_sequences=True,
  ...    return_state=True)
  >>> whole_seq_output, final_memory_state, final_carry_state = rnn(inputs)
  >>> print(whole_seq_output.shape)
  (32, 10, 4)
  >>> print(final_memory_state.shape)
  (32, 4)
  >>> print(final_carry_state.shape)
  (32, 4)

  Args:
    units: Positive integer, dimensionality of the output space.
    activation: Activation function to use. Default: hyperbolic tangent
      (`tanh`). If you pass `None`, no activation is applied (ie. "linear"
      activation: `a(x) = x`).
    recurrent_activation: Activation function to use for the recurrent step.
      Default: sigmoid (`sigmoid`). If you pass `None`, no activation is applied
      (ie. "linear" activation: `a(x) = x`).
    use_bias: Boolean, (default `True`), whether the layer uses a bias vector.
    kernel_initializer: Initializer for the `kernel` weights matrix, used for
      the linear transformation of the inputs. Default: `glorot_uniform`.
    recurrent_initializer: Initializer for the `recurrent_kernel` weights
      matrix, used for the linear transformation of the recurrent state.
      Default: `orthogonal`.
    bias_initializer: Initializer for the bias vector. Default: `zeros`.
    unit_forget_bias: Boolean (default `True`). If True, add 1 to the bias of
      the forget gate at initialization. Setting it to true will also force
      `bias_initializer="zeros"`. This is recommended in [Jozefowicz et
        al.](http://www.jmlr.org/proceedings/papers/v37/jozefowicz15.pdf)
    kernel_regularizer: Regularizer function applied to the `kernel` weights
      matrix. Default: `None`.
    recurrent_regularizer: Regularizer function applied to
      the `recurrent_kernel` weights matrix. Default: `None`.
    bias_regularizer: Regularizer function applied to the bias vector. Default:
      `None`.
    kernel_constraint: Constraint function applied to the `kernel` weights
      matrix. Default: `None`.
    recurrent_constraint: Constraint function applied to the `recurrent_kernel`
      weights matrix. Default: `None`.
    bias_constraint: Constraint function applied to the bias vector. Default:
      `None`.
    dropout: Float between 0 and 1. Fraction of the units to drop for the linear
      transformation of the inputs. Default: 0.
    recurrent_dropout: Float between 0 and 1. Fraction of the units to drop for
      the linear transformation of the recurrent state. Default: 0.

  Call arguments:
    inputs: A 2D tensor, with shape of `[batch, feature]`.
    states: List of 2 tensors that corresponding to the cell's units. Both of
      them have shape `[batch, units]`, the first tensor is the memory state
      from previous time step, the second tensor is the carry state from
      previous time step. For timestep 0, the initial state provided by user
      will be feed to cell.
    training: Python boolean indicating whether the layer should behave in
      training mode or in inference mode. Only relevant when `dropout` or
      `recurrent_dropout` is used.
  r=   r>   Tr?   r@   rA   N        c                sF   t t| j|f||||||||	|
|||||||ddd| d S )NrC   r   )rD   rE   rF   rG   rH   rI   unit_forget_biasrJ   rK   rL   rM   rN   rO   rP   rQ   rC   )rS   r   r3   rT   )r1   rU   rD   rE   rF   rG   rH   rI   r   rJ   rK   rL   rM   rN   rO   rP   rQ   rV   )rW   r   r   r3     s&    
zLSTMCell.__init__)r=   r>   Tr?   r@   rA   TNNNNNNr   r   )r8   r9   r:   r;   r3   rX   r   r   )rW   r   r   H  s    K              r   zkeras.layers.LSTMc                   s,   e Zd ZdZd fdd	ZdddZ  ZS )LSTMa  Long Short-Term Memory layer - Hochreiter 1997.

  See [the Keras RNN API guide](https://www.tensorflow.org/guide/keras/rnn)
  for details about the usage of RNN API.

  Based on available runtime hardware and constraints, this layer
  will choose different implementations (cuDNN-based or pure-TensorFlow)
  to maximize the performance. If a GPU is available and all
  the arguments to the layer meet the requirement of the CuDNN kernel
  (see below for details), the layer will use a fast cuDNN implementation.

  The requirements to use the cuDNN implementation are:

  1. `activation` == `tanh`
  2. `recurrent_activation` == `sigmoid`
  3. `recurrent_dropout` == 0
  4. `unroll` is `False`
  5. `use_bias` is `True`
  6. Inputs, if use masking, are strictly right-padded.
  7. Eager execution is enabled in the outermost context.

  For example:

  >>> inputs = tf.random.normal([32, 10, 8])
  >>> lstm = tf.keras.layers.LSTM(4)
  >>> output = lstm(inputs)
  >>> print(output.shape)
  (32, 4)
  >>> lstm = tf.keras.layers.LSTM(4, return_sequences=True, return_state=True)
  >>> whole_seq_output, final_memory_state, final_carry_state = lstm(inputs)
  >>> print(whole_seq_output.shape)
  (32, 10, 4)
  >>> print(final_memory_state.shape)
  (32, 4)
  >>> print(final_carry_state.shape)
  (32, 4)

  Args:
    units: Positive integer, dimensionality of the output space.
    activation: Activation function to use.
      Default: hyperbolic tangent (`tanh`). If you pass `None`, no activation
      is applied (ie. "linear" activation: `a(x) = x`).
    recurrent_activation: Activation function to use for the recurrent step.
      Default: sigmoid (`sigmoid`). If you pass `None`, no activation is
      applied (ie. "linear" activation: `a(x) = x`).
    use_bias: Boolean (default `True`), whether the layer uses a bias vector.
    kernel_initializer: Initializer for the `kernel` weights matrix, used for
      the linear transformation of the inputs. Default: `glorot_uniform`.
    recurrent_initializer: Initializer for the `recurrent_kernel` weights
      matrix, used for the linear transformation of the recurrent state.
      Default: `orthogonal`.
    bias_initializer: Initializer for the bias vector. Default: `zeros`.
    unit_forget_bias: Boolean (default `True`). If True, add 1 to the bias of
      the forget gate at initialization. Setting it to true will also force
      `bias_initializer="zeros"`. This is recommended in [Jozefowicz et
          al.](http://www.jmlr.org/proceedings/papers/v37/jozefowicz15.pdf).
    kernel_regularizer: Regularizer function applied to the `kernel` weights
      matrix. Default: `None`.
    recurrent_regularizer: Regularizer function applied to the
      `recurrent_kernel` weights matrix. Default: `None`.
    bias_regularizer: Regularizer function applied to the bias vector. Default:
      `None`.
    activity_regularizer: Regularizer function applied to the output of the
      layer (its "activation"). Default: `None`.
    kernel_constraint: Constraint function applied to the `kernel` weights
      matrix. Default: `None`.
    recurrent_constraint: Constraint function applied to the `recurrent_kernel`
      weights matrix. Default: `None`.
    bias_constraint: Constraint function applied to the bias vector. Default:
      `None`.
    dropout: Float between 0 and 1. Fraction of the units to drop for the linear
      transformation of the inputs. Default: 0.
    recurrent_dropout: Float between 0 and 1. Fraction of the units to drop for
      the linear transformation of the recurrent state. Default: 0.
    return_sequences: Boolean. Whether to return the last output. in the output
      sequence, or the full sequence. Default: `False`.
    return_state: Boolean. Whether to return the last state in addition to the
      output. Default: `False`.
    go_backwards: Boolean (default `False`). If True, process the input sequence
      backwards and return the reversed sequence.
    stateful: Boolean (default `False`). If True, the last state for each sample
      at index i in a batch will be used as initial state for the sample of
      index i in the following batch.
    time_major: The shape format of the `inputs` and `outputs` tensors.
      If True, the inputs and outputs will be in shape
      `[timesteps, batch, feature]`, whereas in the False case, it will be
      `[batch, timesteps, feature]`. Using `time_major = True` is a bit more
      efficient because it avoids transposes at the beginning and end of the
      RNN calculation. However, most TensorFlow data is batch-major, so by
      default this function accepts input and emits output in batch-major
      form.
    unroll: Boolean (default `False`). If True, the network will be unrolled,
      else a symbolic loop will be used. Unrolling can speed-up a RNN, although
      it tends to be more memory-intensive. Unrolling is only suitable for short
      sequences.

  Call arguments:
    inputs: A 3D tensor with shape `[batch, timesteps, feature]`.
    mask: Binary tensor of shape `[batch, timesteps]` indicating whether
      a given timestep should be masked (optional, defaults to `None`).
      An individual `True` entry indicates that the corresponding timestep
      should be utilized, while a `False` entry indicates that the corresponding
      timestep should be ignored.
    training: Python boolean indicating whether the layer should behave in
      training mode or in inference mode. This argument is passed to the cell
      when calling it. This is only relevant if `dropout` or
      `recurrent_dropout` is used (optional, defaults to `None`).
    initial_state: List of initial state tensors to be passed to the first
      call of the cell (optional, defaults to `None` which causes creation
      of zero-filled initial state tensors).
  r=   r>   Tr?   r@   rA   N        Fc                s   | dd| _tt| j|f||||||||	|
|||||||| dd||||||d| dd | j| jfD | _| jtj	t
j	fko| jtjt
jfko|dko| o|ot | _td	r| jrtt| j  ntt| j  t rt||d
| _d S )Nr[   FrC   r   )rD   rE   rF   rG   rH   rI   r   rJ   rK   rL   r\   rM   rN   rO   rP   rQ   rC   r]   r^   r"   r_   r!   r`   c             S   s   g | ]}t d |fdqS )N)r   )r   ).0dimr   r   r   
<listcomp>g  s    z!LSTM.__init__.<locals>.<listcomp>r   r   r   )rT   r[   rS   r   r3   rU   Z
state_specrD   r
   r=   r   rE   r>   r	   rb   rc   r   rd   re   rf   rg   rh   ri   rj   r   r   rk   )r1   rU   rD   rE   rF   rG   rH   rI   r   rJ   rK   rL   r\   rM   rN   rO   rP   rQ   r]   r^   r"   r_   r!   r`   rV   )rW   r   r   r3   .  sL    



zLSTM.__init__c                s  t |\}}|d k	}|| ||d \}}}t|trH|d }t |}jr`|d n|d }	|srjsd|i 	j
  fdd}
t j|
||d j|j|d k	r|n|	jjd
\}}}tt}n  j||dd}|d k	r||d  }t rr|t|d t|d tj
jtj
jtj
j|jj|jd	}jjf |\}}}}}n|t|d t|d tj
jtj
jtj
j|jj|d

}| }|dji t rHt }|tks |d kot !do|d kpt"|j}|r2t#f |\}}}}}nt$f |\}}}}}nt%f |\}}}}}||g}j&rdd t'j(|D })| j*rt j+|||jd}n|}j,r|gt| S j-r||fS |S d S )Nr   r   rl   c                s   j | |f S )N)rm   )r   r   )rV   r1   r   r   rp     s    zLSTM.call.<locals>.step)rq   r"   rr   r`   rs   r!   rt   r   )r   )r   r   init_cr   r   r   rr   r!   r"   r   rt   )
r   r   r   r   r   r   rr   r!   r"   r   rt   r   c             S   s   g | ]\}}t ||qS r   )r   r   )r   Z
self_statestater   r   r   r     s   zLSTM.call.<locals>.<listcomp>)r"   ).r   ru   rv   rw   rx   ry   rz   r!   rc   r{   rm   r|   r"   r`   rt   r}   r~   r   r   r   r   r   r   r   rk   r0   r   r   r   r   r   r   r   rd   r   gpu_lstmstandard_lstmr-   r_   zipr   r   r]   r   r^   r[   )r1   r   rr   rl   r   r   r   r#   r   r   rp   r   r   r   r   r   Zlstm_kwargsr   new_cZgpu_lstm_kwargsZnormal_lstm_kwargsr   r   r   r   r   )rV   r1   r   r   y  s    


















z	LSTM.call)r=   r>   Tr?   r@   rA   TNNNNNNNr   r   FFFFFF)NNN)r8   r9   r:   r;   r3   r   rX   r   r   )rW   r   r     s0   p                     4r   Fc                sD   fdd  fdd| D } fdd|D }t j| | ddS )a  Utility function convert variable to CuDNN compatible parameter.

  Note that Keras weights for kernels are different from the CuDNN format. Eg.:

  ```
    Keras                 CuDNN
    [[0, 1, 2],  <--->  [[0, 2, 4],
     [3, 4, 5]]          [1, 3, 5]]
  ```

  If the input weights need to be in a unified format, then set
  `transpose_weights=True` to convert the weights.

  Args:
    weights: list of weights for the individual kernels and recurrent kernels.
    biases: list of biases for individual gate.
    shape: the shape for the converted variables that will be feed to CuDNN.
    transpose_weights: boolean, whether to transpose the weights.

  Returns:
    The converted weights that can be feed to CuDNN ops as param.
  c                s    rt | S | S )N)r   r   )w)r   r   r   convert"  s    z%_canonical_to_params.<locals>.convertc                s   g | ]}t  |qS r   )r   reshape)r   x)r   r   r   r   r   %  s    z(_canonical_to_params.<locals>.<listcomp>c                s   g | ]}t | qS r   )r   r   )r   r   )r   r   r   r   &  s    r   )r   )r   concat)r   r   r   r   r   )r   r   r   r   r     s    r   c                s|   t | }|r|d n|d } fdd}t j|| ||gdd||||	dk	rR|	n||
d
\}}}|||d |d ttfS )a  LSTM with standard kernel implementation.

  This implementation can be run on all types for hardware.

  This implementation lifts out all the layer weights and make them function
  parameters. It has same number of tensor input params as the CuDNN
  counterpart. The RNN step logic has been simplified, eg dropout and mask is
  removed since CuDNN implementation does not support that.

  Note that the first half of the bias tensor should be ignored by this impl.
  The CuDNN impl need an extra set of input gate bias. In order to make the both
  function take same shape of parameter, that extra set of bias is also feed
  here.

  Args:
    inputs: input tensor of LSTM layer.
    init_h: initial state tensor for the cell output.
    init_c: initial state tensor for the cell hidden state.
    kernel: weights for cell kernel.
    recurrent_kernel: weights for cell recurrent kernel.
    bias: weights for cell kernel bias and recurrent bias. Only recurrent bias
      is used in this case.
    mask: Boolean tensor for mask out the steps within sequence.
      An individual `True` entry indicates that the corresponding timestep
      should be utilized, while a `False` entry indicates that the corresponding
      timestep should be ignored.
    time_major: boolean, whether the inputs are in the format of
      [time, batch, feature] or [batch, time, feature].
    go_backwards: Boolean (default False). If True, process the input sequence
      backwards and return the reversed sequence.
    sequence_lengths: The lengths of all sequences coming from a variable length
      input, such as ragged tensors. If the input has a fixed timestep size,
      this should be None.
    zero_output_for_mask: Boolean, whether to output zero for masked timestep.

  Returns:
    last_output: output tensor for the last timestep, which has shape
      [batch, units].
    outputs: output tensor for all timesteps, which has shape
      [batch, time, units].
    state_0: the cell output, which has same shape as init_h.
    state_1: the cell hidden state, which has same shape as init_c.
    runtime: constant string tensor which indicate real runtime hardware. This
      value is for testing purpose and should be used by user.
  r   r   c                s   |d }|d }t | }|t |7 }t | }tj|ddd\}}}}t|}	t|}
|
| |	t|  }t|}|t| }|||gfS )z5Step function that will be used by Keras RNN backend.r   r   r   )r   )r   r   r   r   r   r   r>   r=   )rn   ro   r   Zc_tm1r   Zz0Zz1Zz2Zz3ifcor   )r   r   r   r   r   rp   ]  s    


zstandard_lstm.<locals>.stepNF)rq   r`   r!   rr   r"   rs   rt   )r   rz   r|   r}   r   )r   r   r   r   r   r   rr   r!   r"   r   rt   r   r   rp   r   r   r   r   )r   r   r   r   r   *  s    0
r   c
          
      s  |s$|dkr$t j| dd} d\}
}n|r,dnd\}
}t j||
d}t j||
d}t j|dddt j|ddd7 t t ||fd	 t d
 rʇfdddD t j dd	d  fdddD  tt  dt	
dgdd}|dk	rt||}	|	dk	rl|rt j| |	|
|d} tj| |||dd|	|d\}}}}}|rt j||	|
|d}t j||
gd}n4|rt j| d	gd} tj| |||ddd\}}}}|d }|s|dkrt j|dd	dgd}t j||
d}t j||
d}|dk	r|}||||ttfS )a  LSTM with either CuDNN or ROCm implementation which is only available for GPU.

  Note that currently only right padded data is supported, or the result will be
  polluted by the unmasked data which should be filtered.

  Args:
    inputs: Input tensor of LSTM layer.
    init_h: Initial state tensor for the cell output.
    init_c: Initial state tensor for the cell hidden state.
    kernel: Weights for cell kernel.
    recurrent_kernel: Weights for cell recurrent kernel.
    bias: Weights for cell kernel bias and recurrent bias. Only recurrent bias
      is used in this case.
    mask: Boolean tensor for mask out the steps within sequence.
      An individual `True` entry indicates that the corresponding timestep
      should be utilized, while a `False` entry indicates that the corresponding
      timestep should be ignored.
    time_major: Boolean, whether the inputs are in the format of [time, batch,
      feature] or [batch, time, feature].
    go_backwards: Boolean (default False). If True, process the input sequence
      backwards and return the reversed sequence.
    sequence_lengths: The lengths of all sequences coming from a variable length
      input, such as ragged tensors. If the input has a fixed timestep size,
      this should be None.

  Returns:
    last_output: Output tensor for the last timestep, which has shape
      [batch, units].
    outputs: Output tensor for all timesteps, which has shape
      [batch, time, units].
    state_0: The cell output, which has same shape as init_h.
    state_1: The cell hidden state, which has same shape as init_c.
    runtime: Constant string tensor which indicate real runtime hardware. This
      value is for testing purpose and should not be used by user.
  N)r   r   r   )r   )r   r   )r   r   )r   r   r   r   Zis_rocm_buildc                s   g | ]} | qS r   r   )r   r   )r   r   r   r     s    zgpu_lstm.<locals>.<listcomp>)r   r   r   r   r         r      c                s   g | ]} | qS r   r   )r   r   )	full_biasr   r   r     s    r   T)r   r   r   r   )r   r   r   )r   r   r   r   r   r   r   r!   )r   r   r   r   r   r   r   )r   r   r   r   r   Z
zeros_liker   r   r   r   r   r   r   r   r   r   r   r   r}   r   )r   r   r   r   r   r   rr   r!   r"   r   r   r   r   r   r   r   r#   r   r   )r   r   r   r     sd    %





r   c                s   | |||||||||	|
ddd  t  rdttfddt fddifdd\}}}}}nVdtt  }||d	}t|tt	|}t|t |}|f \}}}}}t
|f |||||fS )
a  Call the LSTM with optimized backend kernel selection.

  Under the hood, this function will create two TF function, one with the most
  generic kernel and can run on all device condition, and the second one with
  CuDNN specific kernel, which can only run on GPU.

  The first function will be called with normal_lstm_params, while the second
  function is not called, but only registered in the graph. The Grappler will
  do the proper graph rewrite and swap the optimized TF function based on the
  device placement.

  Args:
    inputs: Input tensor of LSTM layer.
    init_h: Initial state tensor for the cell output.
    init_c: Initial state tensor for the cell hidden state.
    kernel: Weights for cell kernel.
    recurrent_kernel: Weights for cell recurrent kernel.
    bias: Weights for cell kernel bias and recurrent bias. Only recurrent bias
      is used in this case.
    mask: Boolean tensor for mask out the steps within sequence.
      An individual `True` entry indicates that the corresponding timestep
      should be utilized, while a `False` entry indicates that the corresponding
      timestep should be ignored.
    time_major: Boolean, whether the inputs are in the format of
      [time, batch, feature] or [batch, time, feature].
    go_backwards: Boolean (default False). If True, process the input sequence
      backwards and return the reversed sequence.
    sequence_lengths: The lengths of all sequences coming from a variable length
      input, such as ragged tensors. If the input has a fixed timestep size,
      this should be None.
    zero_output_for_mask: Boolean, whether to output zero for masked timestep.

  Returns:
    List of output tensors, same as standard_lstm.
  )r   r   r   r   r   r   rr   r!   r"   r   rt   c                sx   dkr$t  	d
S  	f
dd} 	
fdd}tjt	||dS )z<Use CuDNN kernel when mask is none or strictly right padded.N)
r   r   r   r   r   r   rr   r!   r"   r   c                  s   t  	d
S )N)
r   r   r   r   r   r   rr   r!   r"   r   )r   r   )
r   r"   r   r   r   r   rr   r   r   r!   r   r   cudnn_lstm_fnB  s    zRlstm_with_backend_selection.<locals>.gpu_lstm_with_fallback.<locals>.cudnn_lstm_fnc                  s   t  	
dS )N)r   r   r   r   r   r   rr   r!   r"   r   rt   )r   r   )r   r"   r   r   r   r   rr   r   r   r!   rt   r   r   stardard_lstm_fnO  s    zUlstm_with_backend_selection.<locals>.gpu_lstm_with_fallback.<locals>.stardard_lstm_fn)r   r   )r   r   r   r   )r   r   r   r   r   r   rr   r!   r"   r   rt   r   r   r   )r   r"   r   r   r   r   rr   r   r   r!   rt   r   gpu_lstm_with_fallback1  s$     z;lstm_with_backend_selection.<locals>.gpu_lstm_with_fallbackc                  s
   t f  S )N)r   r   )r   r   r   r   g  r   z-lstm_with_backend_selection.<locals>.<lambda>c                  s
    f S )Nr   r   )r   r   r   r   r   h  r   c                  s
   t f  S )N)r   r   )r   r   r   r   i  r   Zlstm_)r!   r"   )r   r   r   r   r   r*   r+   r,   r   r   r   )r   r   r   r   r   r   rr   r!   r"   r   rt   r   r   r   r   r   r   r   Zdefun_standard_lstmZdefun_gpu_lstmr   )r   r   r   r-     s:    (1r-   c             C   sF   t | d }tjt| tjdd}t j||d}tt	| |S )aA  Check the mask tensor and see if it right padded.

  For CuDNN kernel, it uses the sequence length param to skip the tailing
  timestep. If the data is left padded, or not a strict right padding (has
  masked value in the middle of the sequence), then CuDNN kernel won't be work
  properly in those cases.

  Left padded data: [[False, False, True, True, True]].
  Right padded data: [[True, True, True, False, False]].
  Mixture of mask/unmasked data: [[True, False, True, False, False]].

  Note that for the mixed data example above, the actually data RNN should see
  are those 2 Trues (index 0 and 2), the index 1 False should be ignored and not
  pollute the internal states.

  Args:
    mask: the Boolean tensor with shape [batch, timestep]

  Returns:
    boolean scalar tensor, whether the mask is strictly right padded.
  r   )r   )maxlen)
r   r   r   
reduce_sumcastr   int32Zsequence_mask
reduce_allequal)rr   Zmax_seq_lengthZcount_of_trueZright_padded_maskr   r   r   is_sequence_right_padded  s
    
r  c             C   s   t t jt | ddS )Nr   )r   )r   Z
reduce_anyr   logical_not)rr   r   r   r   has_fully_masked_sequence  s    r  c             C   s(   |rt | } tt| tt| S )N)r   r   r   logical_andr  r  r  )rr   r!   r   r   r   r     s
    
r   c             C   s$   |rdnd}t jt | tj|dS )a/  Calculate the sequence length tensor (1-D) based on the masking tensor.

  The masking tensor is a 2D boolean tensor with shape [batch, timestep]. For
  any timestep that should be masked, the corresponding field will be False.
  Consider the following example:
    a = [[True, True, False, False],
         [True, True, True, False]]
  It is a (2, 4) tensor, and the corresponding sequence length result should be
  1D tensor with value [2, 3]. Note that the masking tensor must be right
  padded that could be checked by, e.g., `is_sequence_right_padded()`.

  Args:
    mask: Boolean tensor with shape [batch, timestep] or [timestep, batch] if
      time_major=True.
    time_major: Boolean, which indicates whether the mask is time major or batch
      major.
  Returns:
    sequence_length: 1D int32 tensor.
  r   r   )r   )r   r   r   r   r   )rr   r!   Ztimestep_indexr   r   r   r     s    r   c             C   s&   t | t|i}|| tj||ddS )NF)funcr$   r%   )r)   _FUNCTION_DEVICE_ATTRIBUTEr   r   r/   )Zunique_api_nameZpreferred_devicer  r2   Zfunction_attributesr   r   r   r     s    
r   c              C   s    t  } | dkrdS tj| jS )zAParse the current context and return the device type, eg CPU/GPU.N)r   r   Z
DeviceSpecZfrom_stringr   )Zcurrent_devicer   r   r   r     s    r   c          	   C   s(   t d tj| tjddS Q R X d S )Nz/cpu:0r   )Zdtyperh   )r	   r   r   r   r   Zfloat32)Zruntime_namer   r   r   r}     s    r}   c             C   s   t | tjr|  S | S )z/Read the value of a variable if it is variable.)rx   r   VariableZ
read_value)vr   r   r   r     s    r   c             O   s    | j ||}|  |  |S )a  Register a specialization of a `Function` into the graph.

  This won't actually call the function with the inputs, and only put the
  function definition into graph. Register function with different input param
  will result into multiple version of functions registered in graph.

  Args:
    func: the `Function` instance that generated by a @defun
    *args: input arguments for the Python function.
    **kwargs: input keyword arguments for the Python function.

  Returns:
    a `ConcreteFunction` object specialized to inputs and execution context.

  Raises:
    ValueError: When the input function is not a defun wrapped python function.
  )Zget_concrete_functionZadd_to_graphZadd_gradient_functions_to_graph)r  argsrV   Zconcrete_funcr   r   r   r     s    r   )F)Cr;   r+   Ztensorflow.python.eagerr   r   Ztensorflow.python.eager.contextr   Ztensorflow.python.frameworkr   r   r   r   r	   Ztensorflow.python.kerasr
   r   Z)tensorflow.python.keras.engine.input_specr   Ztensorflow.python.keras.layersr   Ztensorflow.python.opsr   r   r   r   r   r   r   Ztensorflow.python.platformr   r   re   Z tensorflow.python.util.tf_exportr   r)   r  r   r   r~   r   r   rg   rj   r   objectr   r<   ZDropoutRNNCellMixinrY   r   r   r.   r   r   r   r   r   r-   r  r  r   r   r   r   r}   r   r   r   r   r   r   <module>   s   !
o  ETV 
s  P
U} 	