B
    0d                 @   s\  d Z ddlZddlZddlZddlZddlZddlmZ ddlm	Z	 ddl
ZddlZddlZddlZddlmZ ddlmZ ddlmZmZmZmZmZmZmZmZmZ ddlmZ dd	l m!Z!m"Z" dd
l#m$Z$ ddl%m&Z& ddl'm(Z( ddl)m*Z* ddl m+Z+ dZ,dZ-e*eddZdd Z.dd Z/G dd dZ0dd Z1ej23dddde4fddde4fdddej5fdddej5fd ddej5fd ddej5fd!ddej6fd!ddej5fd"ddd#fd"ddd#fg
d$d% Z7ej23d&d'ddgd(d) Z8e+d*d+ Z9e+d,d- Z:e+d.d/ Z;e+d0d1 Z<e+d2d3 Z=d4d5 Z>e+d6d7 Z?e+d8d9 Z@e+d:d; ZAe+d<d= ZBe+d>d? ZCe+d@dA ZDe+dBdC ZEej23dDddEgdFdG ZFdHdI ZGej23dDddEgdJdK ZHej23dDddEgdLdM ZIdNdO ZJej23dDddEgdPdQ ZKej23dDddEgdRdS ZLdTdU ZMej23dDddEgdVdW ZNej23dDddEgdXdY ZOej23dDddEgdZd[ ZPej23dDddEgd\d] ZQd^d_ ZRej23dDddEgd`da ZSej23dDddEgej23dbddEgdcdd ZTdedf ZUdgdh ZVej23dDddEgdidj ZWej23dDddEgdkdl ZXej23dDddEgdmdn ZYej23dDddEgdodp ZZej23dDddEgdqdr Z[ej23dDddEgdsdt Z\ej23dDddEgdudv Z]ej23dDddEgdwdx Z^ej23dDddEgdydz Z_ej23dDddEgd{d| Z`ej23dDddEgd}d~ Zadd Zbej23dDddEgdd Zce+ej23dddEgdd Zddd Zedd ZfdS )zTest the openml loader.
    N)	resources)BytesIO)config_context)fetch_openml)	_open_openml_url_arff
_DATA_FILE_convert_arff_data_convert_arff_data_dataframe_get_data_description_by_id_get_local_path_retry_with_clean_cache_feature_to_dtype)is_scalar_nan)assert_allcloseassert_array_equal)	HTTPError)check_return_X_y)ArffContainerType)partial)fails_if_pypyz"sklearn.datasets.tests.data.openmlT)	data_homec       
   	   C   s   dd }t | dd dd}t| d }|d  dk}|dkrBtdt|d	 }t|d d
,}tjdd |D |rxtj	ntj
dd}W d Q R X tjt|d dd}x6tt|jD ]$}	tj|d d |	f |||	 qW d S )Nc                sb   | j | }|| jkrL| j|   fdd| jd d |f D }tj|ddS | jd d |f S d S )Nc                s$   g | ]}t |rd n
 t| qS )N)r   int).0idx)cat T/var/www/html/venv/lib/python3.7/site-packages/sklearn/datasets/tests/test_openml.py
<listcomp>=   s   z>_test_features_list.<locals>.decode_column.<locals>.<listcomp>O)dtype)feature_names
categoriesdatanparray)
data_bunchZcol_idxZcol_nameresultr   )r   r   decode_column6   s    



z*_test_features_list.<locals>.decode_columnF)data_idcachetarget_columnas_frameformatZsparse_arffTzIThis test is not intended for sparse data, to keep code relatively simpleZfile_id)r   c             s   s   | ]}| d V  qdS )zutf-8N)decode)r   liner   r   r   	<genexpr>S   s    z&_test_features_list.<locals>.<genexpr>)return_typeZencode_nominalr#   r   )r    )r   r   lower
ValueErrorr   r-   r   r   loadZCOOZ	DENSE_GENr$   r%   listrangelenr!   testingr   )
r)   r(   r&   Zdata_descriptionsparseurlfZ	data_arffZdata_downloadedir   r   r   _test_features_list2   s$    
r=   c          
   C   s  t ||ddd}t|jd | ks&tt " tjdtd t |ddd W d Q R X t | d|dd}|jd |ksxt|jj	||fkstt
|tr|jj	|fkst|j|gkstn0t
|tr|jj	|t|fkst|j|kst|jj|kst|jj|kstt|j|ks tx |jD ]}t
|ts(tq(W x|j D ]~\}}|j|}t . tjdtd	d
 t|jd d |f }W d Q R X |t| }t|ttt|ksNtqNW |
r0t | ddd}tj|j|j |jjtjkrtj|j|j nt|j|js0t|	rNt
|jt j!j"s~tn0t
|jtj#sbtt$t%|j|ks~tt&t | d|dd}t'|| |S )NF)nameversionr*   r,   idignore)category)r>   r*   r,   )r)   r*   r+   r,   r>   zelementwise comparison failed)rB   message)r)   r*   r,   )(r   r   detailsAssertionErrorwarningscatch_warningssimplefilterUserWarningr#   shape
isinstancestrtargettarget_namesr5   r7   r    r!   r"   itemsindexfilterwarningsDeprecationWarningr$   uniqueisfinitesetr6   r8   r   float64Zarray_equalscipyr9   
csr_matrixZndarrayZcount_nonzeroisnanr   r   )r)   	data_namedata_versionr+   expected_observationsexpected_featuresexpected_missingexpected_data_dtypeexpected_target_dtypeexpect_sparsecompare_default_targetZdata_by_name_idZ
data_by_idfeaturer"   Zfeature_idxvaluesZdata_by_id_defaultZ
fetch_funcr   r   r   _fetch_dataset_from_openmlc   sb    



"$
re   c               @   sF   e Zd Zdd ZdddZdd Zdd	 Zd
d Zdd Zdd Z	dS )_MockHTTPResponsec             C   s   || _ || _d S )N)r#   is_gzip)selfr#   rg   r   r   r   __init__   s    z_MockHTTPResponse.__init__c             C   s   | j |S )N)r#   read)rh   amtr   r   r   rk      s    z_MockHTTPResponse.readc             C   s   | j   d S )N)r#   close)rh   r   r   r   rm      s    z_MockHTTPResponse.closec             C   s   | j rddiS i S )NzContent-Encodinggzip)rg   )rh   r   r   r   info   s    z_MockHTTPResponse.infoc             C   s
   t | jS )N)iterr#   )rh   r   r   r   __iter__   s    z_MockHTTPResponse.__iter__c             C   s   | S )Nr   )rh   r   r   r   	__enter__   s    z_MockHTTPResponse.__enter__c             C   s   dS )NFr   )rh   exc_typeexc_valexc_tbr   r   r   __exit__   s    z_MockHTTPResponse.__exit__N)rj   )
__name__
__module____qualname__ri   rk   rm   ro   rq   rr   rv   r   r   r   r   rf      s   
rf   c                s   d
ddddt j	td d|  fdd	  	fd
d
fddfddfdd 	fdd
fdd}tr| tjjd| d S )Nz$https://openml.org/api/v1/json/data/z-https://openml.org/api/v1/json/data/features/zhttps://openml.org/data/v1/z)https://openml.org/api/v1/json/data/list/z.gz.id_c                s~   t dd| tdd  |   }|dddddd	d
dddddddddddddddS )Nz\W-zhttps://openml.org/z-json-data-listz-jdlz-json-data-featuresz-jdfz-json-data-qualitiesz-jdqz
-json-dataz-jdz
-data_namez-dnz	-downloadz-dlz-limitz-lz-data_versionz-dvz-statusz-sz-deactivatedz-dactz-activez-act)resubr7   replace)r:   suffixoutput)path_suffixr   r   
_file_name   s    "z4_monkey_patch_webbased_functions.<locals>._file_namec          	      sr   |  |st | |}t|D}|rDrDt| }t|dS |d}t| }t|dS W d Q R X d S )NTrbF)
startswithrE   r   open_binaryr   rk   rf   )r:   has_gzip_headerexpected_prefixr   data_file_namer;   fpdecompressed_f)r   data_modulegzip_responseread_fnr   r   _mock_urlopen_shared   s    


z>_monkey_patch_webbased_functions.<locals>._mock_urlopen_sharedc                s    | |ddS )Nz.json)r:   r   r   r   r   )r:   r   )r   url_prefix_data_descriptionr   r   _mock_urlopen_data_description  s
    zH_monkey_patch_webbased_functions.<locals>._mock_urlopen_data_descriptionc                s    | |ddS )Nz.json)r:   r   r   r   r   )r:   r   )r   url_prefix_data_featuresr   r   _mock_urlopen_data_features  s
    zE_monkey_patch_webbased_functions.<locals>._mock_urlopen_data_featuresc                s    | |ddS )Nz.arff)r:   r   r   r   r   )r:   r   )r   url_prefix_download_datar   r   _mock_urlopen_download_data  s
    zE_monkey_patch_webbased_functions.<locals>._mock_urlopen_download_datac          	      s   |  st | d}t|(}|d}| d}t|}W d Q R X d|krltd ddd d dt|@}|rt	| }t
|dS |d}t	| }t
|d	S W d Q R X d S )
Nz.jsonr   zutf-8errori  zSimulated mock error)r:   codemsghdrsr   TF)r   rE   r   r   rk   r.   jsonloadsr   r   rf   )r:   r   r   r;   r   Z	decoded_sZ	json_datar   )r   r   r   url_prefix_data_listr   r   _mock_urlopen_data_list$  s     



zA_monkey_patch_webbased_functions.<locals>._mock_urlopen_data_listc                sv   |   }| ddk}|r*||S |r>||S |rR||S |rf ||S td| d S )NzAccept-encodingrn   zUnknown mocking URL pattern: %s)get_full_url
get_headerr   r3   )requestr:   r   )r   r   r   r   r   r   r   r   r   r   _mock_urlopen<  s    







z7_monkey_patch_webbased_functions.<locals>._mock_urlopenurlopen)rn   openOPENML_TEST_DATA_MODULEtest_offlinesetattrsklearndatasets_openml)contextr)   r   r   r   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   r    _monkey_patch_webbased_functions   s     r   zfeature, expected_dtypestring0)Z	data_typeZnumber_of_missing_values1numericrealintegerZnominalrB   c             C   s   t | |kstd S )N)r   rE   )rc   Zexpected_dtyper   r   r   test_feature_to_dtypeO  s    r   rc   Zdatatimec          	   C   s0   d | }tjt|d t|  W d Q R X d S )NzUnsupported feature: {})match)r-   pytestraisesr3   r   )rc   r   r   r   r   test_feature_to_dtype_errorb  s    
r   c             C   s  t d}|jjj}d}d}d}d}|dddg}tjgd	 }d
dddg}	d}
t| |d t|ddd}|j	}|j
}|j}t||jstt|j|kst|j|kstt|j|	kstt|j|	kst|j|
gkstt||jst|j|kst|j|kst|j|
ks$t|jjs2tt||jsDt|j|ksTtt|j||g kspt|jjs~td S )Npandas=   )      )r   )r      zIris-setosazIris-versicolorzIris-virginicar   sepallength
sepalwidthpetallength
petalwidthclassTF)r)   r,   r*   )r   importorskipapitypesCategoricalDtyper$   rV   r   r   r#   rM   framerK   	DataFramerE   alldtypesrJ   columnsr!   rN   Seriesr    r>   rP   Z	is_unique)monkeypatchpdr   r)   
data_shapetarget_shapeframe_shapeZtarget_dtypedata_dtypes
data_namestarget_namebunchr#   rM   r   r   r   r   test_fetch_openml_iris_pandasm  s>    

r   c             C   sf   t d d}t| |d t|ddd}|j}|j}t|ddd}|j}|j}t|| t|| d S )Nr   r   TF)r)   r,   r*   )r   r   r   r   r#   rM   r   r   )r   r)   Zframe_bunchZ
frame_dataZframe_targetZ
norm_bunchZ	norm_dataZnorm_targetr   r   r   /test_fetch_openml_iris_pandas_equal_to_no_frame  s    

r   c             C   s  t d}|jjj}d}d}d}d}ddg}|dd	d
g}tjtjg|g }	dddg}
tjtjg}ddg}t| |d t|dd|d}|j	}|j
}|j}t||jstt|j|	kst|j|kstt|j|
kstt|j|
kst|j|kstt||jstt|j|ks*t|j|ks:tt|j|ksPtt||jsbt|j|ksrtt|jtjgd |g kstd S )Nr   r   )r      )r      )r   r   r   r   zIris-setosazIris-versicolorzIris-virginicar   r   r   TF)r)   r,   r*   r+   r   )r   r   r   r   r   r$   rV   r   r   r#   rM   r   rK   r   rE   r   r   rJ   r   r!   rN   )r   r   r   r)   r   r   r   r+   	cat_dtyper   r   Ztarget_dtypesrN   r   r#   rM   r   r   r   r   )test_fetch_openml_iris_multitarget_pandas  s>    


r   c                s  t d}|jjj d}d}d}d}d}d}d}t| |d	 t|d	|d
d}	|	j}
|	j}|	j	}t
|
|jsnt|
j|ks|tt fdd|
jD }tdd |
jD }||kst||kstt
||jst|j|kstt
|j stt
||jst|j|kstd S )Nr   r   r   )   &   )r   )r   '          TF)r)   r,   r+   r*   c                s   g | ]}t | r|qS r   )rK   )r   r    )r   r   r   r     s    z3test_fetch_openml_anneal_pandas.<locals>.<listcomp>c             S   s   g | ]}|j d kr|qS )r;   )kind)r   r    r   r   r   r     s    )r   r   r   r   r   r   r   r#   rM   r   rK   r   rE   rJ   r7   r   r   r    )r   r   r)   r+   r   r   r   expected_data_categoriesexpected_data_floatsr   r#   rM   r   n_categoriesn_floatsr   )r   r   test_fetch_openml_anneal_pandas  s6    

r   c             C   s  t d}|jjj}d}d}d}d}|dddd	d
dddddddddddddddddddddd d!d"d#g}|gtjgd$  }d%d&d'd(d)d*d+g}	d,}
t| |d- t|d-d.d/}|j	}|j
}|j}t||jst|j|kstt|j|kstt|j|	kstt|j|	kst|j|
gks,tt||js>t|j|ksNt|jtjks`t|j|
ksptt||jst|j|kstd S )0Nr   i1  )      )r   )r      ZadviserZamdahlZapolloZbasfZbtiZ	burroughszc.r.dZcdcZcambexdecdgZ	formationz
four-phaseZgouldhpZharrisZ	honeywellZibmZiplZmagnusonZ	microdataZnasZncrZnixdorfzperkin-elmerprimeZsiemensZsperryZsratusZwangr   vendorZMYCTZMMINZMMAXZCACHZCHMINZCHMAXr   TF)r)   r,   r*   )r   r   r   r   r   r$   rV   r   r   r#   rM   r   rK   r   rE   rJ   r   r   r   r!   rN   r   r    r>   )r   r   r   r)   r   r   r   r   r   r!   r   r   r#   rM   r   r   r   r   test_fetch_openml_cpu_pandas  sr    

r   c          	   C   s@   d}t | |d d}tjt|d t|ddd W d Q R X d S )Ni$  Tz(Cannot return dataframe with sparse data)r   F)r)   r,   r*   )r   r   r   r3   r   )r   r)   r   r   r   r   0test_fetch_openml_australian_pandas_error_sparseM  s
    r   c             C   sp   t d}d}t| |d t|ddd}t|j|js:td}t| |d t|ddd}t|jtj	j
sltd S )Nr   r   TautoF)r)   r,   r*   i$  )r   r   r   r   rK   r#   r   rE   rW   r9   rX   )r   r   r)   r#   r   r   r   test_fetch_openml_as_frame_autoY  s    
r   c          
   C   s`   t d d}t| |d d}t jt|d* tdd t|ddd	 W d Q R X W d Q R X d S )
Nr   i_  Tz*Could not adhere to working_memory config.)r   gư>)Zworking_memoryF)r)   r,   r*   )r   r   r   warnsrI   r   r   )r   r)   r   r   r   r   :test_convert_arff_data_dataframe_warning_low_memory_pandasj  s    
r   c                s   t d}|jjj d}d}d}d}d}d}t| |d t|dd	dd
\}}	t||js\t	|j
|ksjt	t fdd|jD }
tdd |jD }||
kst	||kst	t|	|jst	|	j
|kst	|	j|kst	d S )Nr   i_  )
      )r   r   r   r   TF)r)   r,   r*   
return_X_yc                s   g | ]}t | r|qS r   )rK   )r   r    )r   r   r   r     s    zCtest_fetch_openml_adultcensus_pandas_return_X_y.<locals>.<listcomp>c             S   s   g | ]}|j d kr|qS )r;   )r   )r   r    r   r   r   r     s    )r   r   r   r   r   r   r   rK   r   rE   rJ   r7   r   r   r>   )r   r   r)   r   r   r   r   r+   Xyr   r   r   )r   r   /test_fetch_openml_adultcensus_pandas_return_X_yy  s(    

r   c                s  t d}|jjj d}d}d}d}d}d}d}t| |d	 t|d	d
d}	|	j}
|	j}|	j	}t
|
|jslt|
j|ksztt fdd|
jD }tdd |
jD }||kst||kstt
||jst|j|kst|j|kstt
||jst|j|ks
td S )Nr   i_  )r   r   )r   )r      r   r   r   TF)r)   r,   r*   c                s   g | ]}t | r|qS r   )rK   )r   r    )r   r   r   r     s    z8test_fetch_openml_adultcensus_pandas.<locals>.<listcomp>c             S   s   g | ]}|j d kr|qS )r;   )r   )r   r    r   r   r   r     s    )r   r   r   r   r   r   r   r#   rM   r   rK   r   rE   rJ   r7   r   r   r>   )r   r   r)   r   r   r   r   r   r+   r   r#   rM   r   r   r   r   )r   r   $test_fetch_openml_adultcensus_pandas  s4    

r   c                s6  t d}|jjj d}d}d}d}d}d}d}t| |d	 t|d	d
d}	|	j}
|	j}|	j	}t
|
|jslt|
j|ksztt|
jtjkstt
||jstt
|j st|j|kst|j|kstt
||jst|j|kstt fdd|jD }tdd |jD }||ks$t||ks2td S )Nr   i  )r   M   )r   )r   N   r      r   TF)r)   r,   r*   c                s   g | ]}t | r|qS r   )rK   )r   r    )r   r   r   r     s    z8test_fetch_openml_miceprotein_pandas.<locals>.<listcomp>c             S   s   g | ]}|j d kr|qS )r;   )r   )r   r    r   r   r   r     s    )r   r   r   r   r   r   r   r#   rM   r   rK   r   rE   rJ   r$   r   r   rV   r   r    r>   r7   )r   r   r)   r   r   r   r+   Zframe_n_categoriesZframe_n_floatsr   r#   rM   r   r   r   r   )r   r   $test_fetch_openml_miceprotein_pandas  s8    

r   c                s$  t d}|jjj d}ddddddg}d	}d
}d}d}d}t| |d t|dd|d}	|	j}
|	j}|	j	}t
|
|jszt|
j|kstt
||jst|j|kstt|j|kstt
||jst|j|kstt fdd|jD }tdd |jD }||kst||ks td S )Nr   i  zamazed.suprisedzhappy.pleasedzrelaxing.calmzquiet.stillz
sad.lonelyzangry.aggresive)   H   )r   r   )r   r   r   r   TF)r)   r,   r*   r+   c                s   g | ]}t | r|qS r   )rK   )r   r    )r   r   r   r     s    z5test_fetch_openml_emotions_pandas.<locals>.<listcomp>c             S   s   g | ]}|j d kr|qS )r;   )r   )r   r    r   r   r   r     s    )r   r   r   r   r   r   r   r#   rM   r   rK   r   rE   rJ   r$   r   r   r7   r   )r   r   r)   r+   r   r   r   Zexpected_frame_categoriesZexpected_frame_floatsr   r#   rM   r   r   r   r   )r   r   !test_fetch_openml_emotions_pandas  s@    

r  c                s  t d}|jjj}d}d}d}d}tjt|ddgtjtjtjttjt|dd	d
gttjt|ddgd ddddddddddddddg} fdd|D }dddddddddddddg}	d}
t| |d t	|ddd }|j
}|j}|j}t||jst|j|kstt|j|	ks"t|j|
gks4tt||jsFt|j|ksVt|j|
ksft|j |
 ksztt||jst|j|kstt|j|kstd S )!Nr   i  )i  r   )i  )i  r   ZfemaleZmaleCQSr   r   )pclassr>   sexagesibspparchticketfarecabinembarkedboatbodyz	home.destsurvivedr  r  r>   r  r  r  r	  r
  r  r  r  r  r  z	home.destc                s   g | ]} | qS r   r   )r   col)name_to_dtyper   r   r   H  s    z4test_fetch_openml_titanic_pandas.<locals>.<listcomp>TF)r)   r,   r*   )r   r   r   r   r   r$   rV   objectr   r   r#   rM   r   rK   r   rE   rJ   r   r   rN   r   r>   r    r   )r   r   r   r)   r   r   r   Zframe_columnsZframe_dtypesr!   r   r   r#   rM   r   r   )r  r    test_fetch_openml_titanic_pandas  s    


r  r   Fc          	   C   sD   d}d}t | || d}tjt|d t|ddd W d Q R X d S )Nr   iriszMultiple active versions of the dataset matching the name iris exist. Versions may be fundamentally different, returning version 1.)r   F)r>   r,   r*   )r   r   r   rI   r   )r   r   r)   rZ   r   r   r   r   test_fetch_openml_iriso  s    r  c             C   s   d}t | |d t| d S )Nr   F)r   r=   )r   r)   r   r   r   test_decode_iris  s    r  c       	      C   sR   d}d}d}ddg}d}d}d}t | || t|||||||tjtjd	d	d
 d S )Nr   r  r   r   r   r   r   r   F)ra   rb   )r   re   r$   rV   )	r   r   r)   rZ   r[   r+   r\   r]   r^   r   r   r   "test_fetch_openml_iris_multitarget  s(    r  c       	      C   sL   d}d}d}d}d}d}d}t | || t|||||||tjtdd	d
 d S )Nr   annealr   r   r   r   i  FT)ra   rb   )r   re   r$   rV   r  )	r   r   r)   rZ   r[   r+   r\   r]   r^   r   r   r   test_fetch_openml_anneal  s(    r  c             C   s   d}t | |d t| d S )Nr   F)r   r=   )r   r)   r   r   r   test_decode_anneal  s    r  c       	      C   sR   d}d}d}dddg}d}d}d	}t | || t|||||||tjtd
d
d d S )Nr   r  r   r   zproduct-typerJ   r   $   i  F)ra   rb   )r   re   r$   rV   r  )	r   r   r)   rZ   r[   r+   r\   r]   r^   r   r   r   $test_fetch_openml_anneal_multitarget  s(    
r  c       	      C   sN   d}d}d}d}d}d}d}t | || t|||||||tjtjdd	d
 d S )Ni1  cpur   r   r   r   r   FT)ra   rb   )r   re   r$   rV   )	r   r   r)   rZ   r[   r+   r\   r]   r^   r   r   r   test_fetch_openml_cpu  s(    r  c             C   s   d}t | |d t| d S )Ni1  F)r   r=   )r   r)   r   r   r   test_decode_cpu  s    r   c       
      C   sn   d}d}d}d}d}d}d}t | || d}	tjt|	d	* tf |||||||d
tjtdd W d Q R X d S )Ni$  Z
Australianr   YU   r   r   z,Version 1 of dataset Australian is inactive,)r   TF)r)   rZ   r[   r+   r\   r]   r^   ra   r_   r`   rb   )r   r   r   rI   re   r$   rV   r  )
r   r   r)   rZ   r[   r+   r\   r]   r^   r   r   r   r   test_fetch_openml_australian  s,    r#  c       	      C   sL   d}d}d}d}d}d}d}t | || t|||||||tjtdd	d
 d S )Ni_  zadult-censusr   r   r   r   r   FT)ra   rb   )r   re   r$   rV   r  )	r   r   r)   rZ   r[   r+   r\   r]   r^   r   r   r   test_fetch_openml_adultcensus  s(    r$  c       	      C   sL   d}d}d}d}d}d}d}t | || t|||||||tjtddd	 d S )
Ni  ZMiceProteinr   r   r   r   FT)ra   rb   )r   re   r$   rV   r  )	r   r   r)   rZ   r[   r+   r\   r]   r^   r   r   r   test_fetch_openml_miceprotein:  s(    r%  c       	      C   sX   d}d}d}dddddd	g}d
}d}d}t | || t|||||||tjtddd d S )Ni  Zemotionsr   zamazed.suprisedzhappy.pleasedzrelaxing.calmzquiet.stillz
sad.lonelyzangry.aggresiver   r   r   FT)ra   rb   )r   re   r$   rV   r  )	r   r   r)   rZ   r[   r+   r\   r]   r^   r   r   r   test_fetch_openml_emotionsX  s2    r&  c             C   s   d}t | |d t| d S )Ni  F)r   r=   )r   r)   r   r   r   test_decode_emotionsz  s    r'  c       	      C   st   d}t | || tjjj|}t|d}t||}t	||}t
j|sRtt||}| | ksptd S )Nr   scikit_learn_data)r   r   r   r   r   r-   rL   mkdirr   r   ospathisfilerE   rk   )	r   r   tmpdirr)   openml_pathcache_directoryZ	response1locationZ	response2r   r   r   test_open_openml_url_cache  s    


r1  write_to_diskc          	      s   d}t jjj|}t|d}t||  fdd}| t jjd| t	j
tdd t|| W d Q R X tj rtd S )Nr   r(  c          	      s0   r$t  d}|d W d Q R X tdd S )Nw zInvalid request)r   writer3   )r   r;   )r0  r2  r   r   r     s    z>test_open_openml_url_unlinks_local_path.<locals>._mock_urlopenr   zInvalid request)r   )r   r   r   r   r-   rL   r)  r   r   r   r   r3   r   r*  r+  existsrE   )r   r   r-  r2  r)   r.  r/  r   r   )r0  r2  r   'test_open_openml_url_unlinks_local_path  s    
r7  c          	      s   d}t jjj|}t| d}t|| t	tj
  t d}|d W d Q R X t|| fdd}d}tjt|d | }W d Q R X |d	kstd S )
Nr   r(  r3  r4  c                  s   t j rtddS )NzFile exist!r   )r*  r+  r6  	Exceptionr   )r0  r   r   
_load_data  s    z/test_retry_with_clean_cache.<locals>._load_dataz!Invalid cache, redownloading file)r   r   )r   r   r   r   r-   rL   r)  r   r*  makedirsr+  dirnamer   r5  r   r   r   RuntimeWarningrE   )r-  r)   r.  r/  r;   r9  Zwarn_msgr'   r   )r0  r   test_retry_with_clean_cache  s    
r=  c          	   C   s\   d}t jjj|}t| d}t||dd }d}tj	t
|d |  W d Q R X d S )Nr   r(  c               S   s   t d ddd d dd S )Ni  zSimulated mock error)r:   r   r   r   r   )r   r   r   r   r   r9    s    z:test_retry_with_clean_cache_http_error.<locals>._load_datazSimulated mock error)r   )r   r   r   r   r-   rL   r)  r   r   r   r   )r-  r)   r.  r/  r9  	error_msgr   r   r   &test_retry_with_clean_cache_http_error  s    r?  c       
      C   s   dd }d}t |d}t| || t|d|ddd\}}| tjjd| t|d|ddd\}}	tj	
|| tj	
||	 d S )	Nc             S   s   t d|   d S )NzhThis mechanism intends to test correct cachehandling. As such, urlopen should never be accessed. URL: %s)r3   r   )r   r   r   r   _mock_urlopen_raise  s    z4test_fetch_openml_cache.<locals>._mock_urlopen_raiser   r(  TF)r)   r*   r   r   r,   r   )rL   r)  r   r   r   r   r   r   r$   r8   r   )
r   r   r-  r@  r)   r/  Z	X_fetchedZ	y_fetchedZX_cachedZy_cachedr   r   r   test_fetch_openml_cache  s&    rA  c             C   sR   d}d }d}d}t | || t||ddd}|jj||fks@t|jd ksNtd S )Nr   r   r   F)r)   r+   r*   r,   )r   r   r#   rJ   rE   rM   )r   r   r)   r+   r\   r]   r#   r   r   r   test_fetch_openml_notarget  s    rB  c          	   C   s   d}t | || d}tjt|d t|ddd}W d Q R X |jjdksLttjt|d td ddddd	}W d Q R X t|j	d
 |kstd S )Ni  z(Version 1 of dataset glass2 is inactive,)r   F)r)   r*   r,   )   	   glass2r   )r)   r>   r*   r?   r,   r@   )
r   r   r   rI   r   r#   rJ   rE   r   rD   )r   r   r)   r   Zglas2Zglas2_by_versionr   r   r   test_fetch_openml_inactive  s    rF  c          	   C   s>   d}t | || d}tjt|d tddd W d Q R X d S )Ni  zNo active dataset glass2 found)r   rE  F)r>   r*   )r   r   r   r3   r   )r   r   r)   r   r   r   r   test_fetch_nonexiting  s
    rG  c          	   C   sH   d}ddg}t | || d}tjt|d t||dd W d Q R X d S )Nr   r   r   z2Can only handle homogeneous multi-target datasets,)r   F)r)   r+   r*   )r   r   r   r3   r   )r   r   r)   targetsr   r   r   r   test_raises_illegal_multitarget  s    rI  c          	   C   s  d}d}d}t | || d}||}tjt|d t||ddd W d Q R X d}||}tjt|d t||ddd W d Q R X d}||}tjt|d t||d	gddd W d Q R X d}||}tjt|d t||d	gddd W d Q R X d S )
Ni  z,target_column={} has flag is_row_identifier.z$target_column={} has flag is_ignore.ZMouseID)r   F)r)   r+   r*   r,   ZGenotyper   )r   r-   r   r   rI   r   )r   r   r)   Zexpected_row_id_msgZexpected_ignore_msgZ
target_colr   r   r   r   test_warn_ignore_attribute(  s<    



rJ  c          	   C   s@   d}t | || d}tjt|d t|ddd W d Q R X d S )Ni  zOSTRING attributes are not supported for array representation. Try as_frame=True)r   F)r)   r*   r,   )r   r   r   r3   r   )r   r   r)   r   r   r   r   'test_string_attribute_without_dataframeP  s
    rK  c          	   C   s@   d}t | || d}tjt|d t|ddd W d Q R X d S )Nr   zJOpenML registered a problem with the dataset. It might be unusable. Error:)r   F)r)   r*   r,   )r   r   r   rI   r   )r   r   r)   r   r   r   r   test_dataset_with_openml_error]  s
    rL  c          	   C   s@   d}t | || d}tjt|d t|ddd W d Q R X d S )Nr   zFOpenML raised a warning on the dataset. It might be unusable. Warning:)r   F)r)   r*   r,   )r   r   r   rI   r   )r   r   r)   r   r   r   r    test_dataset_with_openml_warningf  s
    rM  c          	   C   sl   d}t | || d}tjt|d t|ddd W d Q R X tjt|d t|ddgdd W d Q R X d S )Nr   zCould not find target_column=)r   Z	undefinedF)r)   r+   r*   r   )r   r   r   KeyErrorr   )r   r   r)   r   r   r   r   test_illegal_columno  s    rO  c          	   C   s>   d}t | || d}tjt|d t|dd W d Q R X d S )Nr   zTarget column )r   family)r)   r+   )r   r   r   r3   r   )r   r   r)   r   r   r   r   .test_fetch_openml_raises_missing_values_target{  s
    rQ  c           	   C   s   d} t jt| d tdd dd W d Q R X d} t jt| d tddd W d Q R X t jt| d tdddd W d Q R X d	} t jt| d t  W d Q R X d S )
Nz?Dataset data_id=-1 and version=version passed, but you can only)r   rj   r?   )r)   r>   r?   z9Dataset data_id=-1 and name=name passed, but you can onlyZnAmE)r)   r>   zFNeither name nor data_id are provided. Please provide name or data_id.)r   r   r3   r   )r   r   r   r   )test_fetch_openml_raises_illegal_argument  s    rR  c             C   sT   d}t | || tjj|ddd}|d k	s.t|d jdks@td|d ksPtd S )N>   F)r)   r*   r,   r#   )e      Zanimalr!   )r   r   r   r   rE   rJ   )r   r   r)   Zdatasetr   r   r   &test_fetch_openml_with_ignored_feature  s    rV  r,   c          	      s  |rt d d}t| |d td d|  }d}|d  t||.}t|d}t|	 }	d	|	t
|	d
 < W d Q R X t d}
|
|	 W d Q R X tjjj fdd}| tjjd| t t}tjj|d|d W d Q R X |dstd S )Nr   r   Trz   r{   zdata-v1-dl-1666876.arff.gzztest_invalid_checksum.arffr   %   r   wbc          	      sL   |   }|dr@t d}| }W d Q R X tt|ddS | S d S )Nzdata/v1/download/1666876r   T)rg   )r   endswithr   rk   rf   r   )r   r:   r;   Zcorrupted_data)corrupt_copy_pathmocked_openml_urlr   r   swap_file_mock  s    
z9test_fetch_openml_verify_checksum.<locals>.swap_file_mockr   F)r)   r*   r,   Z1666876)r   r   r   r   r   r   rn   r   	bytearrayrk   r7   GzipFiler5  r   r   r   r   r   r   r3   r   r   rE   )r   r,   r*   r-  r)   Zoriginal_data_moduleZoriginal_data_file_nameZ	orig_fileZ	orig_gzipr#   Zmodified_gzipr\  excr   )rZ  r[  r   !test_fetch_openml_verify_checksum  s*    

	r`  c           	   C   s   t d dd tdD ddg d} d}t jt|d t| d	gd	gd d
 W d Q R X ttdddg d} d}t jt|d t| dgi  W d Q R X d S )Nr   c             s   s   | ]
}|V  qd S )Nr   )r   elr   r   r   r0     s    z.test_convert_arff_data_type.<locals>.<genexpr>r   r4  )r#   descriptionZrelation
attributesz8shape must be provided when arr\['data'\] is a Generator)r   r   )rJ   zBarff\['data'\] must be a generator when converting to pd.DataFramea)r   r   r6   r   r3   r	   r5   r
   )Zarffr   r   r   r   test_convert_arff_data_type  s    
re  c             C   s`   t d d}t| |d t|ddd}|jjd }|jd   sJtt	|j
ddd	g d
S )zRcheck that missing values in categories are compatible with pandas
    categoricalr   iY  TF)r)   r*   r,   r  ZFEMALEZMALE_N)r   r   r   r   r#   r   ZisnaanyrE   r   r"   )r   r)   Zpenguinsr   r   r   r   test_missing_values_pandas  s    
rh  )g__doc__rn   rF   r   r*  r}   	importlibr   ior   numpyr$   Zscipy.sparserW   r   r   r   Zsklearn.datasetsr   Zfetch_openml_origZsklearn.datasets._openmlr   r   r   r	   r
   r   r   r   r   Zsklearn.utilsr   Zsklearn.utils._testingr   r   urllib.errorr   Z"sklearn.datasets.tests.test_commonr   Zsklearn.externals._arffr   	functoolsr   r   r   r   r=   re   rf   r   markZparametrizer  rV   Zint64r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r  r  r  r  r  r  r   r#  r$  r%  r&  r'  r1  r7  r=  r?  rA  rB  rF  rG  rI  rJ  rK  rL  rM  rO  rQ  rR  rV  r`  re  rh  r   r   r   r   <module>   s   ,1_s
--*F(,1R"""(			-