B
    0d                 @   s   d Z ddlmZ ddlmZ ddlZddlZddlm	Z
 ddlmZ ddlmZ ddlmZ ddlmZ dd	lmZ d
d Zdd Zdd Zdd Zdd Zdd Zdd ZdS )zTest the 20news downloader, if the data is available,
or if specifically requested via environment variable
(e.g. for travis cron job).    )partial)patchN)check_as_frame)check_pandas_dependency_message)check_return_X_y)assert_allclose_dense_sparse)	normalizec       	      C   s,  | ddd}|j dst| d|jddd dd}|j|jdd  ksNtt|j d	d
gksjtt|j	t|jkstt|j	t|j
kst|j
d	 }|j|jd	  }|j|}|j
t|j|kd	 d	  }||kst| dddd\}}t|t|j
kst|j|jjks(td S )NallF)subsetshufflez.. _20newsgroups_dataset:)r
   
categoriesr   r      T)r
   r   Z
return_X_y)DESCR
startswithAssertionErrorZtarget_namesnpuniquetargettolistlen	filenamesdataindexwhereshape)	fetch_20newsgroups_fxtr   Z	data2catsZentry1categorylabelZentry2Xy r#   T/var/www/html/venv/lib/python3.7/site-packages/sklearn/datasets/tests/test_20news.pytest_20news   s     
r%   c             C   s\   | dd}t |d t |jks$tt |d t |jks>tt |d t |jksXtdS )zuChecks the length consistencies within the bunch

    This is a non-regression test for a bug present in 0.16.1.
    r	   )r
   r   r   r   N)r   r   r   r   r   )r   r   r#   r#   r$   test_20news_length_consistency3   s    
r&   c             C   sB  | dd}t |jst|jjdks*t|jjd dks>t|jjtjksPt|j	
ds`t| dd}t |jszt|jjdkst|jjd d	kst|jjtjkst|j	
dstt| dd}t|| | d
d}t |jst|jjdkst|jjd dkst|jjtjks,t|j	
ds>td S )Ntrain)r
   )i2,  i; r   i2,  z.. _20newsgroups_dataset:test)il  i; il  r	   )iI  i; iI  )spZisspmatrix_csrr   r   r   r   Zdtyper   Zfloat64r   r   r   r   )!fetch_20newsgroups_vectorized_fxtbunchZ
fetch_funcr#   r#   r$   test_20news_vectorized?   s(    



r,   c             C   sf   | dd}| dd}|d d d }|d d d }t |t| ttjj| dddsbtd S )NF)r   Tr   d   r   )Zaxis)r   r   r   ZallcloseZlinalgZnormZtodenser   )r*   r!   ZX_ZX_normr#   r#   r$   test_20news_normalization]   s    

r.   c                s   t d | dd}t||  |j}|jdks2tt fdd|jjD sPtxdD ]}||	 ksVtqVW d|	 ks~t|j
jdkstd S )	NZpandasT)as_frame)i2,  i< c                s   g | ]}t | jqS r#   )
isinstanceZSparseDtype).0col)pdr#   r$   
<listcomp>o   s    z(test_20news_as_frame.<locals>.<listcomp>)beginnerZ	beginnersZ	beginningZ
beginningsZbeginsZbegleyZbegoneZcategory_class)pytestZimportorskipr   framer   r   r	   r   Zdtypeskeysr   name)r*   r+   r7   Zexpected_featurer#   )r3   r$   test_20news_as_frameg   s    



	r:   c             C   s   t |  d S )N)r   )r*   Zhide_available_pandasr#   r#   r$   test_as_frame_no_pandas   s    r;   c             C   s`   t dN}t d:}d|_d|_d}tjt|d | dd W d Q R X W d Q R X W d Q R X d S )Nzos.path.existszjoblib.loadT)r!   r"   zThe cached dataset located in)match)r/   )r   Zreturn_valuer6   Zraises
ValueError)r*   Zmock_is_existZ	mock_loaderr_msgr#   r#   r$   test_outdated_pickle   s    

r?   )__doc__	functoolsr   Zunittest.mockr   r6   numpyr   Zscipy.sparsesparser)   Z"sklearn.datasets.tests.test_commonr   r   r   Zsklearn.utils._testingr   Zsklearn.preprocessingr   r%   r&   r,   r.   r:   r;   r?   r#   r#   r#   r$   <module>   s"    
