B
    0d.                 @   s
  d Z ddlZddlZddlmZ ddlmZ ddlmZ ddlmZ ddl	m
Z
 ddlmZ dd	lmZ dd
l	mZ ddlmZmZ ddlmZ ddlmZ ddlmZmZ ddlmZmZ edZe Zeej j!Z"ej#e" e_#ej e" e_ e Z$ee$j j!Z"e$j#e" e$_#e$j e" e$_ dd Z%dd Z&dd Z'dd Z(dd Z)dd Z*dd Z+ej,-ddd gd!d" Z.d#d$ Z/d%d& Z0d'd( Z1d)d* Z2d+d, Z3ed-ef d.d/id0ej,-d1d2d3gd4d5 Z4ed-ef d.d6id0ej,-d1d2d3gd7d8 Z5d9d: Z6d;d< Z7dS )=zD
Testing for Isolation Forest algorithm (sklearn.ensemble.iforest).
    N)assert_array_equal)assert_array_almost_equal)ignore_warnings)assert_allclose)ParameterGrid)IsolationForest)_average_path_length)train_test_split)load_diabetes	load_iris)check_random_state)roc_auc_score)
csc_matrix
csr_matrix)Mockpatchc           	   C   s   t ddgddgg} t ddgddgg}tdgdddgddgd	}t 2 x*|D ]"}tf d
ti|| | qVW W dQ R X dS )z6Check Isolation Forest for various parameter settings.r            g      ?g      ?TF)n_estimatorsmax_samples	bootstraprandom_stateN)nparrayr   r   r   rngfitpredict)X_trainX_testgridparams r"   U/var/www/html/venv/lib/python3.7/site-packages/sklearn/ensemble/tests/test_iforest.pytest_iforest.   s    
r$   c              C   s   t d} ttjdd tjdd | d\}}}}tddgddgd	}xttgD ]t}||}||}x^|D ]V}	tf d
dd|		|}
|

|}tf d
dd|		|}|
|}t|| qlW qRW dS )z=Check IForest for various parameter settings on sparse input.r   N2   )r   g      ?g      ?TF)r   r   
   r   )r   r   )r   r	   diabetesdatatargetr   r   r   r   r   r   r   )r   r   r   y_trainy_testr    Zsparse_formatZX_train_sparseZX_test_sparser!   Zsparse_classifierZsparse_resultsZdense_classifierZdense_resultsr"   r"   r#   test_iforest_sparse<   s"    (


r,   c           	   C   s  t j} tt tdd|  W dQ R X tt tdd|  W dQ R X tt tdd|  W dQ R X d}tjt|d tdd|  W dQ R X td}td	d|  W dQ R X d
d |D }t	|dkst
td}ttdd|  W dQ R X dd |D }t	|dks6t
tt tdd|  W dQ R X tt tdd|  W dQ R X tt( t | | ddddf  W dQ R X dS )z7Test that it gives proper exception on deficient input.)r   Ng        g       @z3max_samples will be set to n_samples for estimation)matchi  autoc             S   s   g | ]}t |jtr|qS r"   )
issubclasscategoryUserWarning).0eachr"   r"   r#   
<listcomp>n   s    z&test_iforest_error.<locals>.<listcomp>r   r   c             S   s   g | ]}t |jtr|qS r"   )r0   r1   r2   )r3   r4   r"   r"   r#   r5   r   s    Zfoobarg      ?r   )irisr(   pytestZraises
ValueErrorr   r   warnsr2   lenAssertionErrorr   Zint64r   )Xwarn_msgrecordZuser_warningsr"   r"   r#   test_iforest_errorX   s0     r?   c           	   C   sJ   t j} t | }x2|jD ](}|jttt	| j
d kstqW dS )zDCheck max_depth recalculation when max_samples is reset to n_samplesr   N)r6   r(   r   r   estimators_	max_depthintr   ceillog2shaper;   )r<   clfestr"   r"   r#   test_recalculate_max_depth   s    rH   c           	   C   s   t j} t | }|j| jd ks&ttdd}d}tjt	|d ||  W d Q R X |j| jd kslttdd| }|jd| jd  kstd S )Nr   i  )r   z3max_samples will be set to n_samples for estimation)r.   g?)
r6   r(   r   r   max_samples_rE   r;   r7   r9   r2   )r<   rF   r=   r"   r"   r#   test_max_samples_attribute   s    
rJ   c        	      C   s   t d} ttjtj| d\}}}}tddd|}|jdd ||}|jdd ||}t	|| tddd|}||}t	|| dS )	zCheck parallel regression.r   )r   r   )n_jobsr   r   )rK   r   N)
r   r	   r'   r(   r)   r   r   
set_paramsr   r   )	r   r   r   r*   r+   Zensembley1y2Zy3r"   r"   r#    test_iforest_parallel_regression   s    



rO   c              C   s   t d} d| dd }tj|d |d f }|dd }| jdddd	}tj|dd |f }td
gd dgd  }td| d|}|| }t	||dkst
dS )z#Test Isolation Forest performs wellr   g333333?x   Nd      )   r   )lowhighsizer   rT   r   )r   r   g\(\?)r   randnr   Zr_uniformr   r   r   decision_functionr   r;   )r   r<   r   Z
X_outliersr   r+   rF   Zy_predr"   r"   r#   test_iforest_performance   s    r[   contaminationg      ?r/   c          	   C   s   ddgddgddgddgddgddgddgddgg}t t| d	}|| || }||}t|dd  t|d d kstt	|ddg ddg   d S )
Nr-   r   r      r   rR      )r   r\   )
r   r   r   rZ   r   r   minmaxr;   r   )r\   r<   rF   Zdecision_funcpredr"   r"   r#   test_iforest_works   s    4

(rc   c              C   s&   t j} t | }|j|jks"td S )N)r6   r(   r   r   rI   Z_max_samplesr;   )r<   rF   r"   r"   r#   test_max_samples_consistency   s    rd   c              C   sV   t d} ttjd d tjd d | d\}}}}tdd}||| || d S )Nr   r%   )r   g?)Zmax_features)r   r	   r'   r(   r)   r   r   r   )r   r   r   r*   r+   rF   r"   r"   r#    test_iforest_subsampled_features   s    (
re   c              C   s   dt dt j  d } dt dt j  d }ttdgdg ttdgdg ttd	gd
g ttdg| g ttdg|g ttt dd	ddgdd
| |g tt d}t|t | d S )Ng       @g      @g?g     0@g}?r   g        r   r   g      ?   i  )	r   logZeuler_gammar   r   r   Zaranger   sort)Z
result_oneZ
result_twoZavg_path_lengthr"   r"   r#    test_iforest_average_path_length   s    ri   c              C   s   ddgddgddgg} t dd| }t  | }t|ddgg|ddgg|j  t|ddgg|ddgg|j  t|ddgg|ddgg d S )Nr   r   g?)r\   g       @)r   r   r   Zscore_samplesrZ   Zoffset_)r   Zclf1Zclf2r"   r"   r#   test_score_samples   s    rj   c              C   sv   t d} | dd}tdd| dd}|| |jd }|jdd || t|jdks`t|jd |ksrtdS )	z/Test iterative addition of iTrees to an iForestr   rT   r   r&   T)r   r   r   Z
warm_start)r   N)r   rX   r   r   r@   rL   r:   r;   )r   r<   rF   Ztree_1r"   r"   r#   test_iforest_warm_start
  s    


rk   z*sklearn.ensemble._iforest.get_chunk_n_rowsZreturn_valuer   )Zside_effectzcontamination, n_predict_calls)g      ?r   )r/   r   c             C   s   t | | j|kstd S )N)rc   
call_countr;   )mocked_get_chunkr\   n_predict_callsr"   r"   r#   test_iforest_chunks_works1!  s    ro   r&   c             C   s   t | | j|kstd S )N)rc   rl   r;   )rm   r\   rn   r"   r"   r#   test_iforest_chunks_works2,  s    rp   c              C   s|  t d} t }||  t jd}t|| dks<tt||	dddksZtt|| d dksttt|| d dkstt 
|	dddd} t }||  t|| dkstt||	dddkstt|t ddkst|	dd} t }||  t|| dks:tt||	dddksZtt|t ddksxtdS )z=Test whether iforest predicts inliers when using uniform data)rQ   r&   r   r   rQ   r&   N)r   Zonesr   r   randomZRandomStateallr   r;   rX   repeat)r<   Ziforestr   r"   r"   r#   test_iforest_with_uniform_data6  s(    



 rt   c           	   C   sV   t ddgddgg} t ddg}t | |}tjtdd |j W d Q R X d S )Nr   r   r   rS   r   z`n_features_` was deprecated)r.   )r   r   r   r   r7   r9   FutureWarningZn_features_)r<   yrG   r"   r"   r#   test_n_features_deprecationY  s
    rw   )8__doc__r7   numpyr   Zsklearn.utils._testingr   r   r   r   Zsklearn.model_selectionr   Zsklearn.ensembler   Zsklearn.ensemble._iforestr   r	   Zsklearn.datasetsr
   r   Zsklearn.utilsr   Zsklearn.metricsr   Zscipy.sparser   r   Zunittest.mockr   r   r   r6   Zpermutationr)   rW   Zpermr(   r'   r$   r,   r?   rH   rJ   rO   r[   markZparametrizerc   rd   re   ri   rj   rk   ro   rp   rt   rw   r"   r"   r"   r#   <module>   s^   '#