B
    0doz                 @   s.  d Z ddlmZ ddlZddlZddlZddlmZ ddl	m
Z
 ddl	mZ ddlmZmZ ddlmZmZ dd	lmZmZ dd
lmZmZ ddlmZmZ ddlmZmZ ddlmZm Z  ddl!m"Z" ddl#m$Z$ ddl%m&Z& ddlm'Z' ddl(m)Z)m*Z*m+Z+ ddl,m-Z- ddl.m/Z/m0Z0 ddlm1Z1 ddl2m3Z3m4Z4 e-dZ5e* Z6e57e6j8j9Z:e6j;e: e6_;e6j8e: e6_8e) Z<e57e<j8j9Z:e<j;e: e<_;e<j8e: e<_8dd Z=ej>?dee3e4gdddddddddddd dd!ddd d"gd#d$d%d&gd'd( Z@d)d* ZAd+d, ZBG d-d. d.eZCd/d0 ZDd1d2 ZEd3d4 ZFd5d6 ZGd7d8 ZHd9d: ZId;d< ZJd=d> ZKd?d@ ZLdAdB ZMdCdD ZNdEdF ZOG dGdH dHeZPdIdJ ZQdodLdMZRdNdO ZSdPdQ ZTdRdS ZUdTdU ZVdVdW ZWdXdY ZXdZd[ ZYd\d] ZZd^d_ Z[d`da Z\dbdc Z]ddde Z^dfdg Z_dhdi Z`djdk Zaej>?dleegdmdn ZbdS )pzE
Testing for the bagging ensemble module (sklearn.ensemble.bagging).
    )productN)BaseEstimator)assert_array_equal)assert_array_almost_equal)DummyClassifierDummyRegressor)GridSearchCVParameterGrid)BaggingClassifierBaggingRegressor)
PerceptronLogisticRegression)KNeighborsClassifierKNeighborsRegressor)DecisionTreeClassifierDecisionTreeRegressor)SVCSVR)SparseRandomProjection)make_pipeline)SelectKBest)train_test_split)load_diabetes	load_irismake_hastie_10_2)check_random_state)FunctionTransformerscale)cycle)
csc_matrix
csr_matrixc        	      C   s   t d} ttjtj| d\}}}}tddgddgddgddgd	}d t td
dtddt	 t
 g}x>t|t|D ],\}}tf || dd|||| qvW d S )Nr   )random_stateg      ?g      ?      TF)max_samplesmax_features	bootstrapbootstrap_features   )Zmax_iter   )	max_depth)base_estimatorr!   n_estimators)r   r   irisdatatargetr	   r   r   r   r   r   zipr   r
   fitpredict)	rngX_trainX_testy_trainy_testgrid
estimatorsparamsr+    r;   U/var/www/html/venv/lib/python3.7/site-packages/sklearn/ensemble/tests/test_bagging.pytest_classification4   s*    r=   zsparse_format, params, methodg      ?r)   T)r$   r%   r&   r'   g      ?r#   F)r%   r&   r'   )r$   r&   r'   r2   predict_probapredict_log_probadecision_functionc                s   G dd dt }td}tttjtj|d\}}}}| |}	| |}
tf |ddddd	||	|}t	|||
}tf |ddddd	|||}t	|||}t
|| t|	 d
d |jD }t fdd|D std S )Nc                   s    e Zd ZdZ fddZ  ZS )z-test_sparse_classification.<locals>.CustomSVCz7SVC variant that records the nature of the training setc                s   t  || t|| _| S )N)superr1   type
data_type_)selfXy)	__class__r;   r<   r1   r   s    
z1test_sparse_classification.<locals>.CustomSVC.fit)__name__
__module____qualname____doc__r1   __classcell__r;   r;   )rG   r<   	CustomSVCo   s   rM   r   )r!   Zlinearovr)Zkerneldecision_function_shaper"   )r+   r!   c             S   s   g | ]
}|j qS r;   )rC   ).0ir;   r;   r<   
<listcomp>   s    z.test_sparse_classification.<locals>.<listcomp>c                s   g | ]}| kqS r;   r;   )rP   t)sparse_typer;   r<   rR      s    )r   r   r   r   r-   r.   r/   r
   r1   getattrr   rB   estimators_allAssertionError)sparse_formatr:   methodrM   r3   r4   r5   r6   r7   X_train_sparseX_test_sparsesparse_classifiersparse_resultsZdense_classifierdense_resultstypesr;   )rT   r<   test_sparse_classificationU   s,    


ra   c              C   s   t d} ttjd d tjd d | d\}}}}tddgddgddgddgd}xNd t t t t	 gD ]4}x.|D ]&}t
f || d	|||| qvW qlW d S )
Nr   2   )r!   g      ?g      ?TF)r$   r%   r&   r'   )r+   r!   )r   r   diabetesr.   r/   r	   r   r   r   r   r   r1   r2   )r3   r4   r5   r6   r7   r8   r+   r:   r;   r;   r<   test_regression   s$    (
rd   c                 s:  t d} ttjd d tjd d | d\}}}}G dd dt}ddddd	d
dddd	ddddddddg}xttgD ]}||}||}	x|D ]}
tf | dd|
	||}|
|	}tf | dd|
	||
|}t| dd |jD }t|| t fdd|D s$tt|| qW q|W d S )Nr   rb   )r!   c                   s    e Zd ZdZ fddZ  ZS )z)test_sparse_regression.<locals>.CustomSVRz7SVC variant that records the nature of the training setc                s   t  || t|| _| S )N)rA   r1   rB   rC   )rD   rE   rF   )rG   r;   r<   r1      s    
z-test_sparse_regression.<locals>.CustomSVR.fit)rH   rI   rJ   rK   r1   rL   r;   r;   )rG   r<   	CustomSVR   s   re   g      ?r)   T)r$   r%   r&   r'   g      ?r#   F)r%   r&   r'   )r$   r&   r'   r"   )r+   r!   c             S   s   g | ]
}|j qS r;   )rC   )rP   rQ   r;   r;   r<   rR      s    z*test_sparse_regression.<locals>.<listcomp>c                s   g | ]}| kqS r;   r;   )rP   rS   )rT   r;   r<   rR      s    )r   r   rc   r.   r/   r   r   r    r   r1   r2   rB   rV   r   rW   rX   )r3   r4   r5   r6   r7   re   Zparameter_setsrY   r[   r\   r:   r]   r^   r_   r`   r;   )rT   r<   test_sparse_regression   s<    (




rf   c               @   s   e Zd Zdd ZdS )DummySizeEstimatorc             C   s   |j d | _t|| _d S )Nr   )shapetraining_size_joblibhashtraining_hash_)rD   rE   rF   r;   r;   r<   r1      s    zDummySizeEstimator.fitN)rH   rI   rJ   r1   r;   r;   r;   r<   rg      s   rg   c        	      C   s  t d} ttjtj| d\}}}}t ||}tt dd| d||}||||||ksft	tt dd| d||}||||||kst	tt
 dd||}g }x.|jD ]$}|j|jd kst	||j qW tt|t|kst	d S )Nr   )r!   g      ?F)r+   r$   r&   r!   T)r+   r&   )r   r   rc   r.   r/   r   r1   r   scorerX   rg   rV   ri   rh   appendrl   lenset)	r3   r4   r5   r6   r7   r+   ensembleZtraining_hash	estimatorr;   r;   r<   test_bootstrap_samples   s4    rs   c              C   s   t d} ttjtj| d\}}}}tt dd| d||}x.|jD ]$}tjj	d t
|j	d ksDtqDW tt dd| d||}x.|jD ]$}tjj	d t
|j	d kstqW d S )Nr   )r!   g      ?F)r+   r%   r'   r!   r"   T)r   r   rc   r.   r/   r   r   r1   estimators_features_rh   npuniquerX   )r3   r4   r5   r6   r7   rq   featuresr;   r;   r<   test_bootstrap_features  s&    $rx   c           	   C   s   t d} ttjtj| d\}}}}tjddd tt | d	||}t
tj||ddtt| t
||t|| tt | dd		||}t
tj||ddtt| t
||t|| W d Q R X d S )
Nr   )r!   ignore)divideinvalid)r+   r!   r"   )Zaxis   )r+   r!   r$   )r   r   r-   r.   r/   ru   Zerrstater
   r   r1   r   sumr>   Zonesro   expr?   r   )r3   r4   r5   r6   r7   rq   r;   r;   r<   test_probability7  s$    ""r   c        	   
   C   s   t d} ttjtj| d\}}}}xt t gD ]|}t|ddd| d||}|	||}t
||j dk sptd}tjt|d$ t|d	dd| d}||| W d Q R X q0W d S )
Nr   )r!   d   T)r+   r,   r&   	oob_scorer!   g?z{Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.)matchr"   )r   r   r-   r.   r/   r   r   r
   r1   rm   abs
oob_score_rX   pytestwarnsUserWarning)	r3   r4   r5   r6   r7   r+   clf
test_scorewarn_msgr;   r;   r<   test_oob_score_classificationZ  s,    r   c        	   	   C   s   t d} ttjtj| d\}}}}tt ddd| d||}|||}t	||j
 dk s`td}tjt|d& tt d	dd| d}||| W d Q R X d S )
Nr   )r!   rb   T)r+   r,   r&   r   r!   g?z{Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.)r   r"   )r   r   rc   r.   r/   r   r   r1   rm   r   r   rX   r   r   r   )	r3   r4   r5   r6   r7   r   r   r   Zregrr;   r;   r<   test_oob_score_regression  s*    r   c              C   sf   t d} ttjtj| d\}}}}tt ddd| d||}t ||}t|	||	| d S )Nr   )r!   r"   F)r+   r,   r&   r'   r!   )
r   r   rc   r.   r/   r   r   r1   r   r2   )r3   r4   r5   r6   r7   Zclf1Zclf2r;   r;   r<   test_single_estimator  s    r   c           	   C   s  t jt j } }t }tt t|dd| | W d Q R X tt t|dd| | W d Q R X tt t|dd| | W d Q R X tt t|dd| | W d Q R X tt t|dd| | W d Q R X tt t|dd| | W d Q R X tt t|dd| | W d Q R X tt t|dd| | W d Q R X tt t|dd| | W d Q R X tt t|dd| | W d Q R X t	t|| |d	rt
d S )
N)r$   g        g       @i  Zfoobar)r%   r|   r@   )r-   r.   r/   r   r   raises
ValueErrorr
   r1   hasattrrX   )rE   rF   baser;   r;   r<   
test_error  s.    r   c              C   s  t tjtjdd\} }}}tt ddd| |}||}|jdd ||}t	|| tt ddd| |}||}t	|| tt
ddddd| |}||}|jdd ||}	t	||	 tt
ddddd| |}||}
t	||
 d S )	Nr   )r!      )n_jobsr!   r"   )r   rN   )rO   )r   r-   r.   r/   r
   r   r1   r>   
set_paramsr   r   r@   )r4   r5   r6   r7   rq   y1y2y3Z
decisions1Z
decisions2Z
decisions3r;   r;   r<   test_parallel_classification  s4    








r   c        	      C   s   t d} ttjtj| d\}}}}tt ddd||}|jdd |	|}|jdd |	|}t
|| tt ddd||}|	|}t
|| d S )Nr   )r!   r   )r   r!   r"   )r   r)   )r   r   rc   r.   r/   r   r   r1   r   r2   r   )	r3   r4   r5   r6   r7   rq   r   r   r   r;   r;   r<   test_parallel_regression  s    



r   c              C   sD   t jt j } }d||dk< ddd}ttt |dd| | d S )Nr"   r)   )r"   r)   )r,   Zbase_estimator__CZroc_auc)Zscoring)r-   r.   r/   r   r
   r   r1   )rE   rF   
parametersr;   r;   r<   test_gridsearch  s    
r   c              C   s0  t d} ttjtj| d\}}}}td ddd||}t|jt	sHt
tt	 ddd||}t|jt	spt
tt ddd||}t|jtst
ttjtj| d\}}}}td ddd||}t|jtst
tt ddd||}t|jtst
tt ddd||}t|jts,t
d S )Nr   )r!   r   )r   r!   )r   r   r-   r.   r/   r
   r1   
isinstanceZbase_estimator_r   rX   r   rc   r   r   r   )r3   r4   r5   r6   r7   rq   r;   r;   r<   test_base_estimator'  s*    r   c              C   sL   t ttddt dd} | tjtj t| d j	d d j
tsHtd S )Nr"   )kr)   )r%   r   r   )r
   r   r   r   r1   r-   r.   r/   r   stepsr!   intrX   )rr   r;   r;   r<   test_bagging_with_pipelineS  s    r   c               @   s   e Zd Zdd Zdd ZdS )DummyZeroEstimatorc             C   s   t || _| S )N)ru   rv   classes_)rD   rE   rF   r;   r;   r<   r1   \  s    zDummyZeroEstimator.fitc             C   s   | j tj|jd td S )Nr   )dtype)r   ru   Zzerosrh   r   )rD   rE   r;   r;   r<   r2   `  s    zDummyZeroEstimator.predictN)rH   rI   rJ   r1   r2   r;   r;   r;   r<   r   [  s   r   c           	   C   sj   t t } td}| tjtjtj t	t
, | jtjtj|jdtjjd dd W d Q R X d S )Nr   
   )size)Zsample_weight)r
   r   r   r1   r-   r.   r/   r2   r   r   r   randintrh   )rr   r3   r;   r;   r<   1test_bagging_sample_weight_unsupported_but_passedd  s    
r   *   c             C   s   t ddd\}}d }xLdD ]D}|d kr6t|| dd}n|j|d ||| t||kstqW td| d	d}||| td
d |D tdd |D kstd S )Nr(   r"   )	n_samplesr!   )r|   r   T)r,   r!   
warm_start)r,   r   Fc             S   s   g | ]
}|j qS r;   )r!   )rP   treer;   r;   r<   rR     s    z#test_warm_start.<locals>.<listcomp>c             S   s   g | ]
}|j qS r;   )r!   )rP   r   r;   r;   r<   rR     s    )r   r
   r   r1   ro   rX   rp   )r!   rE   rF   clf_wsr,   Z	clf_no_wsr;   r;   r<   test_warm_startq  s    
r   c           	   C   sZ   t ddd\} }tddd}|| | |jdd tt || | W d Q R X d S )	Nr(   r"   )r   r!   r|   T)r,   r   r#   )r,   )r   r
   r1   r   r   r   r   )rE   rF   r   r;   r;   r<   $test_warm_start_smaller_n_estimators  s    r   c        	   	   C   s   t ddd\} }t| |dd\}}}}tdddd	}||| ||}|d
7 }d}tjt|d ||| W d Q R X t||| d S )Nr(   r"   )r   r!   +   )r!   r|   TS   )r,   r   r!   g      ?z;Warm-start fitting without increasing n_estimators does not)r   )	r   r   r
   r1   r2   r   r   r   r   )	rE   rF   r4   r5   r6   r7   r   Zy_predr   r;   r;   r<   "test_warm_start_equal_n_estimators  s    
r   c        
      C   s   t ddd\} }t| |dd\}}}}tdddd	}||| |jd
d ||| ||}td
ddd	}||| ||}	t||	 d S )Nr(   r"   )r   r!   r   )r!   r|   TiE  )r,   r   r!   r   )r,   F)r   r   r
   r1   r   r2   r   )
rE   rF   r4   r5   r6   r7   r   r   r   r   r;   r;   r<   test_warm_start_equivalence  s    

r   c           	   C   sD   t ddd\} }tdddd}tt || | W d Q R X d S )Nr(   r"   )r   r!   r|   T)r,   r   r   )r   r
   r   r   r   r1   )rE   rF   r   r;   r;   r<   $test_warm_start_with_oob_score_fails  s    r   c           	   C   sh   t ddd\} }tddd}|| | |jdddd	 || | tt t|d
 W d Q R X d S )Nr   r"   )r   r!   r|   T)r,   r   Fr   )r   r   r,   r   )r   r
   r1   r   r   r   AttributeErrorrU   )rE   rF   r   r;   r;   r<   $test_oob_score_removed_on_warm_start  s    r   c              C   sH   t ddd\} }tt ddddd}|| |j|| |jksDtd S )N   r"   )r   r!   g      ?T)r$   r%   r   r!   )r   r
   r   r1   r   rX   )rE   rF   baggingr;   r;   r<   test_oob_score_consistency  s    r   c              C   s   t ddd\} }tt ddddd}|| | |j}|j}|j}t|t|ksVtt|d t| d ksrt|d j	j
d	kstd}|| }|| }|| }	| | d d |f }
|| }|	j}|	|
| |	j}t|| d S )
Nr   r"   )r   r!   g      ?F)r$   r%   r!   r&   r   r)   rQ   )r   r
   r   r1   estimators_samples_rt   rV   ro   rX   r   kindcoef_r   )rE   rF   r   Zestimators_samplesZestimators_featuresr9   Zestimator_indexZestimator_samplesZestimator_featuresrr   r4   r6   Z
orig_coefsZ	new_coefsr;   r;   r<   test_estimators_samples  s0    r   c              C   s   t  } | j| j }}ttddt }t|ddd}||| |jd j	d d j
 }|jd }|jd }|jd }|| d d |f }	|| }
||	|
 t|j	d d j
| d S )Nr)   )Zn_componentsg      ?r   )r+   r$   r!   r   r"   )r   r.   r/   r   r   r   r
   r1   rV   r   r   copyr   rt   r   )r-   rE   rF   Zbase_pipeliner   Zpipeline_estimator_coefrr   Zestimator_sampleZestimator_featurer4   r6   r;   r;   r<   %test_estimators_samples_deterministic  s    


r   c              C   sH   d} t d|  dd\}}tt | ddd}||| |j| ksDtd S )Nr   r)   r"   )r   r!   g      ?)r$   r%   r!   )r   r
   r   r1   Z_max_samplesrX   )r$   rE   rF   r   r;   r;   r<   test_max_samples_consistency"  s    r   c              C   s   d} dgdgdggd }dddgd }dddgd }dddgd }t d	| d
||j}t d	| d
||j}t d	| d
||j}||g||gkstd S )Nr|   r   r   r"   ABCr)   T)r   r!   )r
   r1   r   rX   )r!   rE   ZY1ZY2ZY3x1Zx2Zx3r;   r;   r<   !test_set_oob_score_label_encoding1  s    


r   c             C   s"   | j ddd} d| t|  < | S )NfloatT)r   r   )Zastyperu   isfinite)rE   r;   r;   r<   replaceK  s    r   c           
   C   s:  t dddgdd dgdt jdgdt jdgdt jdgg} t dddddgt dddgdddgdddgdddgdddggg}x|D ]}t }ttt|}|	| |
|  t|}|	| |
| }|j|jkstt }t|}tt |	| | W d Q R X t|}tt |	| | W d Q R X qW d S )Nr"   r   r|   r)      	      )ru   arraynaninfNINFr   r   r   r   r1   r2   r   rh   rX   r   r   r   )rE   Zy_valuesrF   Z	regressorpipelineZbagging_regressory_hatr;   r;   r<   *test_bagging_regressor_with_missing_inputsQ  s6    


r   c           	   C   s  t dddgdd dgdt jdgdt jdgdt jdgg} t dddddg}t }ttt|}|	| |
|  t|}|	| | |
| }|j|jkst||  ||  t }t|}tt |	| | W d Q R X t|}tt |	| | W d Q R X d S )Nr"   r   r|   r)   r   )ru   r   r   r   r   r   r   r   r   r1   r2   r
   rh   rX   r?   r>   r   r   r   )rE   rF   
classifierr   Zbagging_classifierr   r;   r;   r<   +test_bagging_classifier_with_missing_inputsz  s.    




r   c              C   sD   t ddgddgg} t ddg}tt ddd}|| | d S )Nr"   r)   r   r#   r   g333333?)r%   r!   )ru   r   r
   r   r1   )rE   rF   r   r;   r;   r<   test_bagging_small_max_features  s    r   c              C   sj   t jd} | dd}t d}G dd dt}t| ddd}||| t|j	d j
|jd  d S )Nr      r#   c               @   s   e Zd ZdZdd ZdS )z8test_bagging_get_estimators_indices.<locals>.MyEstimatorz7An estimator which stores y indices information at fit.c             S   s
   || _ d S )N)_sample_indices)rD   rE   rF   r;   r;   r<   r1     s    z<test_bagging_get_estimators_indices.<locals>.MyEstimator.fitN)rH   rI   rJ   rK   r1   r;   r;   r;   r<   MyEstimator  s   r   r"   )r+   r,   r!   )ru   randomZRandomStateZrandnZaranger   r   r1   r   rV   r   r   )r3   rE   rF   r   r   r;   r;   r<   #test_bagging_get_estimators_indices  s    
r   	Estimatorc          	   C   sV   t ddgddgg}t ddg}|  ||}tjtdd |j W d Q R X d S )Nr"   r)   r   r#   r   z`n_features_` was deprecated)r   )ru   r   r1   r   r   FutureWarningZn_features_)r   rE   rF   Zestr;   r;   r<   test_n_features_deprecation  s
    r   )r   )crK   	itertoolsr   numpyru   rj   r   Zsklearn.baser   Zsklearn.utils._testingr   r   Zsklearn.dummyr   r   Zsklearn.model_selectionr   r	   Zsklearn.ensembler
   r   Zsklearn.linear_modelr   r   Zsklearn.neighborsr   r   Zsklearn.treer   r   Zsklearn.svmr   r   Zsklearn.random_projectionr   Zsklearn.pipeliner   Zsklearn.feature_selectionr   r   Zsklearn.datasetsr   r   r   Zsklearn.utilsr   Zsklearn.preprocessingr   r   r   Zscipy.sparser   r    r3   r-   Zpermutationr/   r   Zpermr.   rc   r=   markZparametrizera   rd   rf   rg   rs   rx   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r;   r;   r;   r<   <module>   s   !
,:*#%$!),	

() 
