B
    ٻdf                 @   s  d Z ddlZddlmZ ddlmZ ddlmZ ddlmZ ddlm	Z	 ddl
mZ dd	l
mZ dd
l
mZ ddl
mZ ddl
mZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ dZdd Zd3ddZd4ddZdd ZG dd dejZedg dG dd  d ejZ edgdG d!d" d"ej!Z"G d#d$ d$ejZ#G d%d& d&ej$Z%ed'g dG d(d) d)ej&Z'ed'gdG d*d+ d+ej!Z(G d,d- d-ejZ)ed.g dG d/d0 d0ejZ*ed.gdG d1d2 d2ej!Z+e, re*Z-e'Z.e Z/ne+Z-e(Z.e"Z/dS )5z$Python wrappers for reader Datasets.    N)tf2)dataset_ops)from_tensor_slices_op)structured_function)convert)dtypes)ops)tensor_shape)tensor_spec)	type_spec)	array_ops)gen_dataset_ops)gen_experimental_dataset_ops)nest)	tf_exporti   c             C   s   t | tjrt| S | S )zHConvert pathlib-like objects to str (__fspath__ compatibility, PEP 519).)
isinstanceosPathLikefspath)path r   T/var/www/html/venv/lib/python3.7/site-packages/tensorflow/python/data/ops/readers.py_normalise_fspath%   s    r   c             C   s   t | tjr\t| }|tjkr0td|dt| }|t	
g std|dn\tt| } tj| tjd} | jtjkrtd| jdtj| dgdd	} tj| d
|d} | S )a8  Creates (or validates) a dataset of filenames.

  Args:
    filenames: Either a list or dataset of filenames. If it is a list, it is
      convert to a dataset. If it is a dataset, its type and shape is validated.
    name: (Optional.) A name for the tf.data operation.

  Returns:
    A dataset of filenames.
  zNThe `filenames` argument must contain `tf.string` elements. Got a dataset of `z` elements.zvThe `filenames` argument must contain `tf.string` elements of shape [] (i.e. scalars). Got a dataset of element shape .)Z
dtype_hintzAThe `filenames` argument must contain `tf.string` elements. Got `Zflat_filenames)nameT)Zis_filesr   )r   r   	DatasetV2Zget_legacy_output_typesr   string	TypeErrorZget_legacy_output_shapesZis_compatible_withr	   ZTensorShaper   Zmap_structurer   r   convert_to_tensordtyper   Zreshaper   ZTensorSliceDataset)	filenamesr   Zelement_typeZelement_shaper   r   r   %_create_or_validate_filenames_dataset*   s$    


r"   c          
      sX    fdd}|dkr"|j ||dS |tjkr<|j|||dS t|||dddd|dS dS )	a  Creates a dataset that reads the given files using the given reader.

  Args:
    dataset_creator: A function that takes in a single file name and returns a
      dataset.
    filenames: A `tf.data.Dataset` containing one or more filenames.
    num_parallel_reads: The number of parallel reads we should do.
    name: (Optional.) A name for the tf.data operation.

  Returns:
    A `Dataset` that reads data from `filenames`.
  c                s   t j| tjdd}  | S )Nfilename)r   )r   r   r   r   )r#   )dataset_creatorr   r   read_one_file_   s    z-_create_dataset_reader.<locals>.read_one_fileN)r   )Znum_parallel_callsr      F)cycle_lengthblock_lengthsloppybuffer_output_elementsprefetch_input_elementsr   )Zflat_mapr   ZAUTOTUNEZ
interleaveParallelInterleaveDataset)r$   r!   num_parallel_readsr   r%   r   )r$   r   _create_dataset_readerN   s    
r.   c             C   s    t | tjr|  S t| S dS )z0Returns the type of `value` if it is a TypeSpec.N)r   r   ZTypeSpecZ
value_typetype)valuer   r   r   	_get_typet   s    r1   c                   s.   e Zd ZdZd fdd	Zedd Z  ZS )_TextLineDatasetz;A `Dataset` comprising records from one or more text files.Nc                sf   || _ tjd|dtjd| _tjd|td| _|| _t	j
| j | j| j| j d}tt| | dS )a  Creates a `TextLineDataset`.

    Args:
      filenames: A `tf.string` tensor containing one or more filenames.
      compression_type: (Optional.) A `tf.string` scalar evaluating to one of
        `""` (no compression), `"ZLIB"`, or `"GZIP"`.
      buffer_size: (Optional.) A `tf.int64` scalar denoting the number of bytes
        to buffer. A value of 0 results in the default buffering values chosen
        based on the compression type.
      name: (Optional.) A name for the tf.data operation.
    compression_type )argument_defaultargument_dtypebuffer_size)r5   )metadataN)
_filenamesr   optional_param_to_tensorr   r   _compression_type!_DEFAULT_READER_BUFFER_SIZE_BYTES_buffer_size_namer   Ztext_line_dataset	_metadataSerializeToStringsuperr2   __init__)selfr!   r3   r7   r   variant_tensor)	__class__r   r   rB      s"    
z_TextLineDataset.__init__c             C   s   t g tjS )N)r
   
TensorSpecr   r   )rC   r   r   r   element_spec   s    z_TextLineDataset.element_spec)NNN)__name__
__module____qualname____doc__rB   propertyrG   __classcell__r   r   )rE   r   r2   }   s
     r2   zdata.TextLineDataset)Zv1c                   s.   e Zd ZdZd fdd	Zedd Z  ZS )TextLineDatasetV2a  Creates a `Dataset` comprising lines from one or more text files.

  The `tf.data.TextLineDataset` loads text from text files and creates a dataset
  where each line of the files becomes an element of the dataset.

  For example, suppose we have 2 files "text_lines0.txt" and "text_lines1.txt"
  with the following lines:

  >>> with open('/tmp/text_lines0.txt', 'w') as f:
  ...   f.write('the cow\n')
  ...   f.write('jumped over\n')
  ...   f.write('the moon\n')
  >>> with open('/tmp/text_lines1.txt', 'w') as f:
  ...   f.write('jack and jill\n')
  ...   f.write('went up\n')
  ...   f.write('the hill\n')

  We can construct a TextLineDataset from them as follows:

  >>> dataset = tf.data.TextLineDataset(['/tmp/text_lines0.txt',
  ...                                    '/tmp/text_lines1.txt'])

  The elements of the dataset are expected to be:

  >>> for element in dataset.as_numpy_iterator():
  ...   print(element)
  b'the cow'
  b'jumped over'
  b'the moon'
  b'jack and jill'
  b'went up'
  b'the hill'
  Nc                s\   t |d}|| _| _ | _ fdd}t|||d| _| jj}tt| 	| dS )a  Creates a `TextLineDataset`.

    The elements of the dataset will be the lines of the input files, using
    the newline character '\n' to denote line splits. The newline characters
    will be stripped off of each element.

    Args:
      filenames: A `tf.data.Dataset` whose elements are `tf.string` scalars, a
        `tf.string` tensor, or a value that can be converted to a `tf.string`
        tensor (such as a list of Python strings).
      compression_type: (Optional.) A `tf.string` scalar evaluating to one of
        `""` (no compression), `"ZLIB"`, or `"GZIP"`.
      buffer_size: (Optional.) A `tf.int64` scalar denoting the number of bytes
        to buffer. A value of 0 results in the default buffering values chosen
        based on the compression type.
      num_parallel_reads: (Optional.) A `tf.int64` scalar representing the
        number of files to read in parallel. If greater than one, the records of
        files read in parallel are outputted in an interleaved order. If your
        input pipeline is I/O bottlenecked, consider setting this parameter to a
        value greater than one to parallelize the I/O. If `None`, files will be
        read sequentially.
      name: (Optional.) A name for the tf.data operation.
    )r   c                s   t |  dS )N)r   )r2   )r#   )r7   r3   r   r   r   
creator_fn   s    z.TextLineDatasetV2.__init__.<locals>.creator_fnN)
r"   r9   r;   r=   r.   _impl_variant_tensorrA   rN   rB   )rC   r!   r3   r7   r-   r   rO   rD   )rE   )r7   r3   r   r   rB      s    zTextLineDatasetV2.__init__c             C   s   t g tjS )N)r
   rF   r   r   )rC   r   r   r   rG      s    zTextLineDatasetV2.element_spec)NNNN)rH   rI   rJ   rK   rB   rL   rG   rM   r   r   )rE   r   rN      s   "   'rN   c                   sF   e Zd ZdZd fdd	Zejje_edd Zejdd Z  Z	S )	TextLineDatasetV1z9A `Dataset` comprising lines from one or more text files.Nc                s$   t |||||}tt| | d S )N)rN   rA   rR   rB   )rC   r!   r3   r7   r-   r   wrapped)rE   r   r   rB     s    zTextLineDatasetV1.__init__c             C   s   | j jS )N)_datasetr9   )rC   r   r   r   r9     s    zTextLineDatasetV1._filenamesc             C   s   || j _d S )N)rT   r9   )rC   r0   r   r   r   r9     s    )NNNN)
rH   rI   rJ   rK   rB   rN   rL   r9   setterrM   r   r   )rE   r   rR      s      
rR   c                   s.   e Zd ZdZd fdd	Zedd Z  ZS )_TFRecordDatasetz?A `Dataset` comprising records from one or more TFRecord files.Nc                sf   || _ tjd|dtjd| _tjd|td| _|| _t	j
| j | j| j| j d}tt| | dS )a  Creates a `TFRecordDataset`.

    Args:
      filenames: A `tf.string` tensor containing one or more filenames.
      compression_type: (Optional.) A `tf.string` scalar evaluating to one of
        `""` (no compression), `"ZLIB"`, or `"GZIP"`.
      buffer_size: (Optional.) A `tf.int64` scalar representing the number of
        bytes in the read buffer. 0 means no buffering.
      name: (Optional.) A name for the tf.data operation.
    r3   r4   )r5   r6   r7   )r5   )r8   N)r9   r   r:   r   r   r;   r<   r=   r>   r   Ztf_record_datasetr?   r@   rA   rV   rB   )rC   r!   r3   r7   r   rD   )rE   r   r   rB     s    
z_TFRecordDataset.__init__c             C   s   t g tjS )N)r
   rF   r   r   )rC   r   r   r   rG   9  s    z_TFRecordDataset.element_spec)NNN)rH   rI   rJ   rK   rB   rL   rG   rM   r   r   )rE   r   rV     s
     rV   c                   s>   e Zd ZdZd fdd	Zdd Zedd Zd	d
 Z  Z	S )r,   zHA `Dataset` that maps a function over its input and flattens the result.Nc	       
         s  || _ tj||  |d| _t| jjtjsDt	dt
| jjd| jjj| _tj|tjdd| _tj|tjdd| _tjd|d| d	| _tjd
|d| d	| _|dkrd| _n|rd| _nd| _|| _tj| j j| jjj| j| j| j| jf| jj| jd| j}	tt|  ||	 dS )z=See `tf.data.experimental.parallel_interleave()` for details.)Zdatasetz<The `map_func` argument must return a `Dataset` object. Got r   r'   )r    r   r(   r*      )r5   r+   Ndefaultfalsetrue)fZdeterministic)!Z_input_datasetr   ZStructuredFunctionWrapper_transformation_name	_map_funcr   Zoutput_structurer   ZDatasetSpecr   r1   _element_specr   r   r   int64Z_cycle_lengthZ_block_lengthr   r:   Z_buffer_output_elementsZ_prefetch_input_elementsZ_deterministicr>   ged_opsZ%legacy_parallel_interleave_dataset_v2rQ   functionZcaptured_inputsZ_common_argsrA   r,   rB   )
rC   Zinput_datasetZmap_funcr'   r(   r)   r*   r+   r   rD   )rE   r   r   rB   A  sJ    

z"ParallelInterleaveDataset.__init__c             C   s   | j gS )N)r]   )rC   r   r   r   
_functionst  s    z$ParallelInterleaveDataset._functionsc             C   s   | j S )N)r^   )rC   r   r   r   rG   w  s    z&ParallelInterleaveDataset.element_specc             C   s   dS )Nz*tf.data.experimental.parallel_interleave()r   )rC   r   r   r   r\   {  s    z.ParallelInterleaveDataset._transformation_name)N)
rH   rI   rJ   rK   rB   rb   rL   rG   r\   rM   r   r   )rE   r   r,   >  s
   
+r,   zdata.TFRecordDatasetc                   s6   e Zd ZdZd	 fdd	Zdd Zedd Z  ZS )
TFRecordDatasetV2a  A `Dataset` comprising records from one or more TFRecord files.

  This dataset loads TFRecords from the files as bytes, exactly as they were
  written.`TFRecordDataset` does not do any parsing or decoding on its own.
  Parsing and decoding can be done by applying `Dataset.map` transformations
  after the `TFRecordDataset`.

  A minimal example is given below:

  >>> import tempfile
  >>> example_path = os.path.join(tempfile.gettempdir(), "example.tfrecords")
  >>> np.random.seed(0)

  >>> # Write the records to a file.
  ... with tf.io.TFRecordWriter(example_path) as file_writer:
  ...   for _ in range(4):
  ...     x, y = np.random.random(), np.random.random()
  ...
  ...     record_bytes = tf.train.Example(features=tf.train.Features(feature={
  ...         "x": tf.train.Feature(float_list=tf.train.FloatList(value=[x])),
  ...         "y": tf.train.Feature(float_list=tf.train.FloatList(value=[y])),
  ...     })).SerializeToString()
  ...     file_writer.write(record_bytes)

  >>> # Read the data back out.
  >>> def decode_fn(record_bytes):
  ...   return tf.io.parse_single_example(
  ...       # Data
  ...       record_bytes,
  ...
  ...       # Schema
  ...       {"x": tf.io.FixedLenFeature([], dtype=tf.float32),
  ...        "y": tf.io.FixedLenFeature([], dtype=tf.float32)}
  ...   )

  >>> for batch in tf.data.TFRecordDataset([example_path]).map(decode_fn):
  ...   print("x = {x:.4f},  y = {y:.4f}".format(**batch))
  x = 0.5488,  y = 0.7152
  x = 0.6028,  y = 0.5449
  x = 0.4237,  y = 0.6459
  x = 0.4376,  y = 0.8918
  Nc                sb   t |d}|| _| _ | _|| _ fdd}t|||d| _| jj}tt	| 
| dS )a  Creates a `TFRecordDataset` to read one or more TFRecord files.

    Each element of the dataset will contain a single TFRecord.

    Args:
      filenames: A `tf.string` tensor or `tf.data.Dataset` containing one or
        more filenames.
      compression_type: (Optional.) A `tf.string` scalar evaluating to one of
        `""` (no compression), `"ZLIB"`, or `"GZIP"`.
      buffer_size: (Optional.) A `tf.int64` scalar representing the number of
        bytes in the read buffer. If your input pipeline is I/O bottlenecked,
        consider setting this parameter to a value 1-100 MBs. If `None`, a
        sensible default for both local and remote file systems is used.
      num_parallel_reads: (Optional.) A `tf.int64` scalar representing the
        number of files to read in parallel. If greater than one, the records of
        files read in parallel are outputted in an interleaved order. If your
        input pipeline is I/O bottlenecked, consider setting this parameter to a
        value greater than one to parallelize the I/O. If `None`, files will be
        read sequentially.
      name: (Optional.) A name for the tf.data operation.

    Raises:
      TypeError: If any argument does not have the expected type.
      ValueError: If any argument does not have the expected shape.
    )r   c                s   t |  dS )N)r   )rV   )r#   )r7   r3   r   r   r   rO     s    z.TFRecordDatasetV2.__init__.<locals>.creator_fnN)r"   r9   r;   r=   Z_num_parallel_readsr.   rP   rQ   rA   rc   rB   )rC   r!   r3   r7   r-   r   rO   rD   )rE   )r7   r3   r   r   rB     s    zTFRecordDatasetV2.__init__c             C   s
   | j  S )N)rP   _inputs)rC   r   r   r   rd     s    zTFRecordDatasetV2._inputsc             C   s   t g tjS )N)r
   rF   r   r   )rC   r   r   r   rG     s    zTFRecordDatasetV2.element_spec)NNNN)	rH   rI   rJ   rK   rB   rd   rL   rG   rM   r   r   )rE   r   rc     s   +   *rc   c                   sF   e Zd ZdZd fdd	Zejje_edd Zejdd Z  Z	S )	TFRecordDatasetV1z?A `Dataset` comprising records from one or more TFRecord files.Nc                s&   t |||||d}tt| | d S )N)r   )rc   rA   re   rB   )rC   r!   r3   r7   r-   r   rS   )rE   r   r   rB     s    zTFRecordDatasetV1.__init__c             C   s   | j jS )N)rT   r9   )rC   r   r   r   r9     s    zTFRecordDatasetV1._filenamesc             C   s   || j _d S )N)rT   r9   )rC   r0   r   r   r   r9     s    )NNNN)
rH   rI   rJ   rK   rB   rc   rL   r9   rU   rM   r   r   )rE   r   re     s      
re   c                   s.   e Zd ZdZd fdd	Zedd Z  ZS )_FixedLengthRecordDatasetzBA `Dataset` of fixed-length records from one or more binary files.Nc       	   	      s   || _ tj|tjdd| _td|| _td|| _	td|t
| _tjd|dtjd| _|| _tj| j | j| j| j	| j| j| j d	}tt| | d
S )a
  Creates a `FixedLengthRecordDataset`.

    Args:
      filenames: A `tf.string` tensor containing one or more filenames.
      record_bytes: A `tf.int64` scalar representing the number of bytes in each
        record.
      header_bytes: (Optional.) A `tf.int64` scalar representing the number of
        bytes to skip at the start of a file.
      footer_bytes: (Optional.) A `tf.int64` scalar representing the number of
        bytes to ignore at the end of a file.
      buffer_size: (Optional.) A `tf.int64` scalar representing the number of
        bytes to buffer when reading.
      compression_type: (Optional.) A `tf.string` scalar evaluating to one of
        `""` (no compression), `"ZLIB"`, or `"GZIP"`.
      name: (Optional.) A name for the tf.data operation.
    record_bytes)r    r   header_bytesfooter_bytesr7   r3   r4   )r5   r6   )r8   N)r9   r   r   r   r_   _record_bytesr   r:   _header_bytes_footer_bytesr<   r=   r   r;   r>   r   Zfixed_length_record_dataset_v2r?   r@   rA   rf   rB   )	rC   r!   rg   rh   ri   r7   r3   r   rD   )rE   r   r   rB     s0    

z"_FixedLengthRecordDataset.__init__c             C   s   t g tjS )N)r
   rF   r   r   )rC   r   r   r   rG   1  s    z&_FixedLengthRecordDataset.element_spec)NNNNN)rH   rI   rJ   rK   rB   rL   rG   rM   r   r   )rE   r   rf     s       +rf   zdata.FixedLengthRecordDatasetc                   s.   e Zd ZdZd fdd	Zedd Z  ZS )FixedLengthRecordDatasetV2a  A `Dataset` of fixed-length records from one or more binary files.

  The `tf.data.FixedLengthRecordDataset` reads fixed length records from binary
  files and creates a dataset where each record becomes an element of the
  dataset. The binary files can have a fixed length header and a fixed length
  footer, which will both be skipped.

  For example, suppose we have 2 files "fixed_length0.bin" and
  "fixed_length1.bin" with the following content:

  >>> with open('/tmp/fixed_length0.bin', 'wb') as f:
  ...   f.write(b'HEADER012345FOOTER')
  >>> with open('/tmp/fixed_length1.bin', 'wb') as f:
  ...   f.write(b'HEADER6789abFOOTER')

  We can construct a `FixedLengthRecordDataset` from them as follows:

  >>> dataset1 = tf.data.FixedLengthRecordDataset(
  ...     filenames=['/tmp/fixed_length0.bin', '/tmp/fixed_length1.bin'],
  ...     record_bytes=2, header_bytes=6, footer_bytes=6)

  The elements of the dataset are:

  >>> for element in dataset1.as_numpy_iterator():
  ...   print(element)
  b'01'
  b'23'
  b'45'
  b'67'
  b'89'
  b'ab'
  Nc	                st   t |d}|| _| _| _| _ | _| _ fdd}	t|	||d| _| jj	}
t
t| |
 dS )a  Creates a `FixedLengthRecordDataset`.

    Args:
      filenames: A `tf.string` tensor or `tf.data.Dataset` containing one or
        more filenames.
      record_bytes: A `tf.int64` scalar representing the number of bytes in each
        record.
      header_bytes: (Optional.) A `tf.int64` scalar representing the number of
        bytes to skip at the start of a file.
      footer_bytes: (Optional.) A `tf.int64` scalar representing the number of
        bytes to ignore at the end of a file.
      buffer_size: (Optional.) A `tf.int64` scalar representing the number of
        bytes to buffer when reading.
      compression_type: (Optional.) A `tf.string` scalar evaluating to one of
        `""` (no compression), `"ZLIB"`, or `"GZIP"`.
      num_parallel_reads: (Optional.) A `tf.int64` scalar representing the
        number of files to read in parallel. If greater than one, the records of
        files read in parallel are outputted in an interleaved order. If your
        input pipeline is I/O bottlenecked, consider setting this parameter to a
        value greater than one to parallelize the I/O. If `None`, files will be
        read sequentially.
      name: (Optional.) A name for the tf.data operation.
    )r   c          	      s   t |  dS )N)r   )rf   )r#   )r7   r3   ri   rh   r   rg   r   r   rO     s    z7FixedLengthRecordDatasetV2.__init__.<locals>.creator_fnN)r"   r9   rj   rk   rl   r=   r;   r.   rP   rQ   rA   rm   rB   )rC   r!   rg   rh   ri   r7   r3   r-   r   rO   rD   )rE   )r7   r3   ri   rh   r   rg   r   rB   Y  s     
z#FixedLengthRecordDatasetV2.__init__c             C   s   t g tjS )N)r
   rF   r   r   )rC   r   r   r   rG     s    z'FixedLengthRecordDatasetV2.element_spec)NNNNNN)rH   rI   rJ   rK   rB   rL   rG   rM   r   r   )rE   r   rm   6  s   !     0rm   c                   sF   e Zd ZdZd fdd	Zejje_edd Zejdd Z  Z	S )	FixedLengthRecordDatasetV1zBA `Dataset` of fixed-length records from one or more binary files.Nc	       
   
      s,   t ||||||||d}	tt| |	 d S )N)r   )rm   rA   rn   rB   )
rC   r!   rg   rh   ri   r7   r3   r-   r   rS   )rE   r   r   rB     s    	z#FixedLengthRecordDatasetV1.__init__c             C   s   | j jS )N)rT   r9   )rC   r   r   r   r9     s    z%FixedLengthRecordDatasetV1._filenamesc             C   s   || j _d S )N)rT   r9   )rC   r0   r   r   r   r9     s    )NNNNNN)
rH   rI   rJ   rK   rB   rm   rL   r9   rU   rM   r   r   )rE   r   rn     s        
rn   )N)NN)0rK   r   Ztensorflow.pythonr   Ztensorflow.python.data.opsr   r   r   Ztensorflow.python.data.utilr   Ztensorflow.python.frameworkr   r   r	   r
   r   Ztensorflow.python.opsr   r   r   r`   Ztensorflow.python.utilr   Z tensorflow.python.util.tf_exportr   r<   r   r"   r.   r1   ZDatasetSourcer2   rN   ZDatasetV1AdapterrR   rV   ZUnaryDatasetr,   r   rc   re   rf   rm   rn   enabledZFixedLengthRecordDatasetZTFRecordDatasetZTextLineDatasetr   r   r   r   <module>   sZ   
& 
#	+
T
(A
c
:
_
"
