B
    W0d&                 @  sN  d Z ddlmZ ddlZddlmZmZmZmZm	Z	 ddl
ZddlmZ ddlmZ ddlmZmZ ddlmZmZ dd	lmZmZmZmZ erdd
lmZmZmZmZ dZ ddddddZ!dde dfdddddddddZ"de fdddddddZ#d dddd!d"d#Z$de dfd$ddddd%d&d'Z%de dfdddddd%d(d)Z&dS )*z"
data hash pandas / numpy objects
    )annotationsN)TYPE_CHECKINGHashableIterableIteratorcast)lib)hash_object_array)	ArrayLikeFrameOrSeriesUnion)is_categorical_dtypeis_list_like)ABCDataFrameABCIndexABCMultiIndex	ABCSeries)CategoricalIndex
MultiIndexSeriesZ0123456789123456zIterator[np.ndarray]intz
np.ndarray)arrays	num_itemsreturnc             C  s   yt | }W n  tk
r,   tjg tjdS X t|g| } td}t|td }xBt| D ]6\}}|| }||N }||9 }|td| | 7 }qdW |d |kst	d|td7 }|S )z
    Parameters
    ----------
    arrays : Iterator[np.ndarray]
    num_items : int

    Returns
    -------
    np.ndarray[uint64]

    Should be the same as CPython's tupleobject.c
    )dtypeiCB ixV4 iXB    zFed in wrong num_itemsi| )
nextStopIterationnparrayuint64	itertoolschainZ
zeros_like	enumerateAssertionError)r   r   firstZmultoutiaZ	inverse_i r)   J/var/www/html/venv/lib/python3.7/site-packages/pandas/core/util/hashing.pycombine_hash_arrays0   s    
r+   Tutf8zIndex | FrameOrSeriesUnionboolstrz
str | Noner   )objindexencodinghash_key
categorizer   c               s  ddl m} dkrtttr8|tdddS ttrptj j	ddd}||ddd}ntt
rtj j	ddd}|rȇ fd	d
dD }t|g|}	t|	d}||jddd}nttrj fdd
 D }
tj}|rL fdd
dD }|d7 }t|
|}dd
 |D }
t|
|}||jddd}ntdt |S )a~  
    Return a data hash of the Index/Series/DataFrame.

    Parameters
    ----------
    obj : Index, Series, or DataFrame
    index : bool, default True
        Include the index in the hash (if Series/DataFrame).
    encoding : str, default 'utf8'
        Encoding for data & key when strings.
    hash_key : str, default _default_hash_key
        Hash_key for string key to encode.
    categorize : bool, default True
        Whether to first categorize object arrays before hashing. This is more
        efficient when the array contains duplicate values.

    Returns
    -------
    Series of uint64, same length as the object
    r   )r   Nr    F)r   copy)r4   )r0   r   r4   c             3  s$   | ]}t jd  djV  qdS )F)r0   r1   r2   r3   N)hash_pandas_objectr0   _values).0_)r3   r1   r2   r/   r)   r*   	<genexpr>   s   z%hash_pandas_object.<locals>.<genexpr>)N   c             3  s"   | ]\}}t |j V  qd S )N)
hash_arrayr6   )r7   r8   Zseries)r3   r1   r2   r)   r*   r9      s   c             3  s$   | ]}t jd  djV  qdS )F)r0   r1   r2   r3   N)r5   r0   r6   )r7   r8   )r3   r1   r2   r/   r)   r*   r9      s   r   c             s  s   | ]
}|V  qd S )Nr)   )r7   xr)   r)   r*   r9      s    zUnexpected type for hashing )pandasr   _default_hash_key
isinstancer   hash_tuplesr   r;   r6   astyper   r!   r"   r+   r0   r   itemslencolumns	TypeErrortype)r/   r0   r1   r2   r3   r   hZserZ
index_iterr   hashesr   Zindex_hash_generator_hashesr)   )r3   r1   r2   r/   r*   r5   P   s@    







r5   z+MultiIndex | Iterable[tuple[Hashable, ...]])valsr1   r2   r   c               sz   t | stdddlm m} t| ts6|| n|  fddtj	D }fdd|D }t
|t|}|S )a  
    Hash an MultiIndex / listlike-of-tuples efficiently.

    Parameters
    ----------
    vals : MultiIndex or listlike-of-tuples
    encoding : str, default 'utf8'
    hash_key : str, default _default_hash_key

    Returns
    -------
    ndarray[np.uint64] of hashed values
    z'must be convertible to a list-of-tuplesr   )r   r   c               s(   g | ] } j | j| d ddqS )FT)orderedfastpath)codesZlevels)r7   level)r   mir)   r*   
<listcomp>   s   zhash_tuples.<locals>.<listcomp>c             3  s   | ]}t | d V  qdS ))r1   r2   N)_hash_categorical)r7   cat)r1   r2   r)   r*   r9      s    zhash_tuples.<locals>.<genexpr>)r   rE   r=   r   r   r?   r   from_tuplesrangeZnlevelsr+   rC   )rJ   r1   r2   r   Zcat_valsrH   rG   r)   )r   r1   r2   rO   r*   r@      s    
r@   r   )rR   r1   r2   r   c             C  sd   t | jj}t|||dd}|  }t|r<|| j}nt j	t|dd}|
 r`tj||< |S )a  
    Hash a Categorical by hashing its categories, and then mapping the codes
    to the hashes

    Parameters
    ----------
    cat : Categorical
    encoding : str
    hash_key : str

    Returns
    -------
    ndarray[np.uint64] of hashed values, same size as len(c)
    F)r3   r    )r   )r   Zasarray
categoriesr6   r;   ZisnarC   ZtakerM   Zzerosanyr   Zu8max)rR   r1   r2   valueshashedmaskresultr)   r)   r*   rQ      s    	
rQ   r
   )rJ   r1   r2   r3   r   c             C  s\   t | dstd| j}t|r6td| } t| ||S t| tjsN| 	 \} }t
| |||S )aK  
    Given a 1d array, return an array of deterministic integers.

    Parameters
    ----------
    vals : ndarray or ExtensionArray
    encoding : str, default 'utf8'
        Encoding for data & key when strings.
    hash_key : str, default _default_hash_key
        Hash_key for string key to encode.
    categorize : bool, default True
        Whether to first categorize object arrays before hashing. This is more
        efficient when the array contains duplicate values.

    Returns
    -------
    ndarray[np.uint64, ndim=1]
        Hashed values, same length as the vals.
    r   zmust pass a ndarray-liker   )hasattrrE   r   r   r   rQ   r?   r   ZndarrayZ_values_for_factorize_hash_ndarray)rJ   r1   r2   r3   r   r8   r)   r)   r*   r;      s    

r;   c             C  st  | j }t|tjr4tt| dtt|   S t|trJ| 	d} nt
|jtjtjfrt| dj	ddd} nt
|jtjr|jdkr| d| j j 	d} n|rdd	lm}m}m} || dd
\}}	||||	ddd}
t|
||S yt| ||} W n. tk
r.   t| 	t	t||} Y nX | | d? N } | td9 } | | d? N } | td9 } | | d? N } | S )z!
    See hash_array.__doc__.
       u8i8F)r4      ur   )r   r   	factorize)sortT)rK   rL      l   e9z    l   b&&&	    )r   r   Z
issubdtypeZ
complex128r;   realimagr?   r-   rA   
issubclassrF   Z
datetime64Ztimedelta64viewnumberitemsizer=   r   r   rb   rQ   r	   rE   r.   objectr    )rJ   r1   r2   r3   r   r   r   rb   rM   rU   rR   r)   r)   r*   r\   *  s2    	 
r\   )'__doc__
__future__r   r!   typingr   r   r   r   r   numpyr   Zpandas._libsr   Zpandas._libs.hashingr	   Zpandas._typingr
   r   Zpandas.core.dtypes.commonr   r   Zpandas.core.dtypes.genericr   r   r   r   r=   r   r   r   r   r>   r+   r5   r@   rQ   r;   r\   r)   r)   r)   r*   <module>   s4   	"Y+((