B
    0dP                 @   s"  d Z ddlZddlZddlmZ ddlmZ ddlmZ ddlmZ ddlm	Z	 ddl
mZmZ dd	l
mZ dd
lmZ ddlmZ ddlmZmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZm Z  ddl!m"Z"m#Z# ddl$m%Z% ddl&m'Z' ddl(m)Z) ej*+dZ,ddgddgddgddgddgddggZ-ddddddgZ.ddddddgZ/ddgddgddggZ0dddgZ1dddgZ2e)3 Z4e,5e4j6j7Z8e%e4j9e4j6e,d\e4_9e4_6e): Z;e%e;j9e;j6e,d\e;_9e;_6dd  Z<d!d" Z=ej>?d#d$d%gd&d' Z@d(d) ZAd*d+ ZBej>?d,d-d.d/gd0d1 ZCej>?d#d$d%gd2d3 ZDd4d5 ZEd6d7 ZFd8d9 ZGd:d; ZHd<d= ZId>d? ZJd@dA ZKdBdC ZLdDdE ZMdFdG ZNej>?d#d$d%gdHdI ZOdJdK ZPej>?d#d$d%gdLdM ZQej>?dNe e4j9e4j6fe e;j9e;j6fgdOdP ZRdS )Qz6Testing for the boost module (sklearn.ensemble.boost).    N)
csc_matrix)
csr_matrix)
coo_matrix)
dok_matrix)
lil_matrix)assert_array_equalassert_array_less)assert_array_almost_equal)BaseEstimator)clone)DummyClassifierDummyRegressor)LinearRegression)train_test_split)GridSearchCV)AdaBoostClassifier)AdaBoostRegressor)_samme_proba)SVCSVR)DecisionTreeClassifierDecisionTreeRegressor)shuffle)NoSampleWeightWrapper)datasets      foo   )random_statec                 s   t dddgdddgddd	gddd
gg  t  jddd d t jf   G  fddd} |  }t|dt  }t|j j t 	|
 sttt j|ddddddg tt j|ddddddg d S )Nr   gư>r   gRQ?g333333?g?igRQ?g      ?g&.>)axisc                   s   e Zd Z fddZdS )z'test_samme_proba.<locals>.MockEstimatorc                s   t |j j  S )N)r   shape)selfX)probs ]/var/www/html/venv/lib/python3.7/site-packages/sklearn/ensemble/tests/test_weight_boosting.pypredict_probaB   s    z5test_samme_proba.<locals>.MockEstimator.predict_probaN)__name__
__module____qualname__r)   r'   )r&   r'   r(   MockEstimatorA   s   r-   r    r   )nparrayabssumnewaxisr   	ones_liker   r#   isfiniteallAssertionErrorZargminargmax)r-   ZmockZsamme_probar'   )r&   r(   test_samme_proba6   s    &$r8   c              C   s>   t tt} t t| }t|tt ttdf d S )Nr   )r.   Zoneslenr%   r   fitr	   r)   )Zy_tclfr'   r'   r(   test_oneclass_adaboost_probaS   s    r<   	algorithmSAMMEzSAMME.Rc             C   sz   t | dd}|tt t|tt tt	t
t|j |tjttdfks\t|tjttfksvtd S )Nr   )r=   r!   r   )r   r:   r%   y_classr   predictT	y_t_classr.   uniqueasarrayclasses_r)   r#   r9   r6   decision_function)r=   r;   r'   r'   r(   test_classification_toy\   s    rG   c              C   s*   t dd} | tt t| tt d S )Nr   )r!   )r   r:   r%   y_regrr   r@   rA   y_t_regr)r;   r'   r'   r(   test_regression_toyg   s    
rJ   c              C   s  t tj} d  }}xdD ]}t|d}|tjtj t| |j |	tj}|dkr`|}|}|j
d t| ksvt|tjj
d t| kst|tjtj}|dkstd||f t|jdkstttdd |jD t|jkstqW d	|_td
t |	tj|  d S )N)r>   zSAMME.R)r=   r>   r   g?z'Failed with algorithm %s and score = %fc             s   s   | ]}|j V  qd S )N)r!   ).0estr'   r'   r(   	<genexpr>   s    ztest_iris.<locals>.<genexpr>zSAMME.Rr   )r.   rC   iristargetr   r:   datar   rE   r)   r#   r9   r6   rF   scoreestimators_setr=   r   r0   )classesZ	clf_sammeZ
prob_sammealgr;   probarQ   r'   r'   r(   	test_irisn   s&    

rW   lossZlinearZsquareZexponentialc             C   st   t | dd}|tjtj |tjtj}|dks8tt|jdksJttt	dd |jD t|jksptd S )Nr   )rX   r!   g333333?r   c             s   s   | ]}|j V  qd S )N)r!   )rK   rL   r'   r'   r(   rM      s    z test_diabetes.<locals>.<genexpr>)
r   r:   diabetesrP   rO   rQ   r6   r9   rR   rS   )rX   regrQ   r'   r'   r(   test_diabetes   s    r[   c             C   s  t jd}|jdtjjd}|jdtjjd}t| dd}|j	tj
tj|d |tj
}dd |tj
D }|tj
}dd |tj
D }|jtj
tj|d}	d	d |jtj
tj|dD }
t|dkstt||d
  t|dkstt||d
  t|
dkstt|	|
d
  tddd}|j	tj
tj|d |tj
}dd |tj
D }|jtj
tj|d}	dd |jtj
tj|dD }
t|dkstt||d
  t|
dkstt|	|
d
  d S )Nr   
   )size)r=   n_estimators)sample_weightc             S   s   g | ]}|qS r'   r'   )rK   pr'   r'   r(   
<listcomp>   s    z'test_staged_predict.<locals>.<listcomp>c             S   s   g | ]}|qS r'   r'   )rK   r`   r'   r'   r(   ra      s    c             S   s   g | ]}|qS r'   r'   )rK   sr'   r'   r(   ra      s    r   )r^   r!   c             S   s   g | ]}|qS r'   r'   )rK   r`   r'   r'   r(   ra      s    c             S   s   g | ]}|qS r'   r'   )rK   rb   r'   r'   r(   ra      s   )r.   randomRandomStaterandintrN   rO   r#   rY   r   r:   rP   r@   staged_predictr)   staged_predict_probarQ   staged_scorer9   r6   r	   r   )r=   rngZiris_weightsZdiabetes_weightsr;   ZpredictionsZstaged_predictionsrV   Zstaged_probasrQ   Zstaged_scoresr'   r'   r(   test_staged_predict   s:    rj   c              C   sh   t t d} dddd}t| |}|tjtj tt dd} ddd}t| |}|t	jt	j d S )N)base_estimator)r   r   )r>   zSAMME.R)r^   base_estimator__max_depthr=   r   )rk   r!   )r^   rl   )
r   r   r   r:   rN   rP   rO   r   r   rY   )boost
parametersr;   r'   r'   r(   test_gridsearch   s    


ro   c              C   s   dd l } xxdD ]p}t|d}|tjtj |tjtj}| |}| |}t	||j
ksbt|tjtj}||kstqW tdd}|tjtj |tjtj}| |}| |}t	||j
kst|tjtj}||kstd S )Nr   )r>   zSAMME.R)r=   )r!   )pickler   r:   rN   rP   rO   rQ   dumpsloadstype	__class__r6   r   rY   )rp   rU   objrQ   rb   obj2Zscore2r'   r'   r(   test_pickle   s$    






rw   c           	   C   s   t jdddddddd\} }x`dD ]X}t|d	}|| | |j}|jd dksTt|d dtjf |dd  k	 s"tq"W d S )
Ni  r\   r    r   Fr   )	n_samples
n_featuresZn_informativeZn_redundantZ
n_repeatedr   r!   )r>   zSAMME.R)r=   )
r   Zmake_classificationr   r:   Zfeature_importances_r#   r6   r.   r2   r5   )r%   yrU   r;   Zimportancesr'   r'   r(   test_importances   s    

r{   c            	   C   s   t t tddtt W d Q R X t t tddtt W d Q R X t t  t jtttdgd W d Q R X d S )Nr   )learning_rater   )r=   )r_   )	pytestraises
ValueErrorr   r:   r%   r?   r.   rD   r'   r'   r'   r(   
test_error  s    r   c           	   C   s   ddl m}  t|  }|tt tt dd}|tt ddl m} t	| dd}|tt t	t
 dd}|tt ddgddgddgddgg}dd	dd
g}tt dd}tjtdd ||| W d Q R X d S )Nr   )RandomForestClassifierr>   )r=   )RandomForestRegressor)r!   r   r   barr   zworse than random)match)sklearn.ensembler   r   r:   r%   rH   r   r?   r   r   r   r}   r~   r   )r   r;   r   ZX_failZy_failr'   r'   r(   test_base_estimator  s    
r   c           	   C   s@   d} t dddd}tjt| d |tjtj W d Q R X d S )Nz+Sample weights have reached infinite values   g      @r>   )r^   r|   r=   )r   )r   r}   ZwarnsUserWarningr:   rN   rP   rO   )msgr;   r'   r'   r(   test_sample_weights_infinite:  s    r   c              C   sR  G dd dt } tjddddd\}}t|}t||dd	\}}}}xtttt	t
gD ]}||}||}	t| d
dddd||}
t| d
dddd||}|
|	}||}t|| |
|	}||}t|| |
|	}||}t|| |
|	}||}t|| |
|	|}|||}t|| |
|	}||}x"t||D ]\}}t|| q`W |
|	}||}x"t||D ]\}}t|| qW |
|	}||}x"t||D ]\}}t|| qW |
|	|}|||}x"t||D ]\}}t|| qW dd |
jD }tdd |D sXtqXW d S )Nc                   s"   e Zd ZdZd fdd	Z  ZS )z-test_sparse_classification.<locals>.CustomSVCz8SVC variant that records the nature of the training set.Nc                s    t  j|||d t|| _| S )z<Modification on fit caries data type for later verification.)r_   )superr:   rs   
data_type_)r$   r%   rz   r_   )rt   r'   r(   r:   G  s    
z1test_sparse_classification.<locals>.CustomSVC.fit)N)r*   r+   r,   __doc__r:   __classcell__r'   r'   )rt   r(   	CustomSVCD  s   r   r         *   )Z	n_classesrx   ry   r!   r   )r!   T)Zprobabilityr>   )rk   r!   r=   c             S   s   g | ]
}|j qS r'   )r   )rK   ir'   r'   r(   ra     s    z.test_sparse_classification.<locals>.<listcomp>c             S   s   g | ]}|t kp|tkqS r'   )r   r   )rK   tr'   r'   r(   ra     s    )r   r   Zmake_multilabel_classificationr.   Zravelr   r   r   r   r   r   r   r:   r@   r   rF   r	   Zpredict_log_probar)   rQ   Zstaged_decision_functionziprf   rg   rh   rR   r5   r6   )r   r%   rz   X_trainX_testy_trainy_testsparse_formatX_train_sparseX_test_sparsesparse_classifierdense_classifiersparse_resultsdense_results
sprase_res	dense_restypesr'   r'   r(   test_sparse_classificationA  sf    	



















r   c              C   s  G dd dt } tjddddd\}}t||dd	\}}}}xtttttgD ]}||}||}	t	|  dd

||}
t	|  dd

|| }}|
|	}||}t|| |
|	}||}x t||D ]\}}t|| qW dd |
jD }tdd |D sLtqLW d S )Nc                   s"   e Zd ZdZd fdd	Z  ZS )z)test_sparse_regression.<locals>.CustomSVRz8SVR variant that records the nature of the training set.Nc                s    t  j|||d t|| _| S )z<Modification on fit caries data type for later verification.)r_   )r   r:   rs   r   )r$   r%   rz   r_   )rt   r'   r(   r:     s    
z-test_sparse_regression.<locals>.CustomSVR.fit)N)r*   r+   r,   r   r:   r   r'   r'   )rt   r(   	CustomSVR  s   r   r   2   r   r   )rx   ry   	n_targetsr!   r   )r!   )rk   r!   c             S   s   g | ]
}|j qS r'   )r   )rK   r   r'   r'   r(   ra     s    z*test_sparse_regression.<locals>.<listcomp>c             S   s   g | ]}|t kp|tkqS r'   )r   r   )rK   r   r'   r'   r(   ra     s    )r   r   Zmake_regressionr   r   r   r   r   r   r   r:   r@   r	   rf   r   rR   r5   r6   )r   r%   rz   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r'   r'   r(   test_sparse_regression  s,    	




r   c              C   sF   G dd dt } t|  dd}|tt t|jt|jksBtdS )z
    AdaBoostRegressor should work without sample_weights in the base estimator
    The random weighted sampling is done internally in the _boost method in
    AdaBoostRegressor.
    c               @   s   e Zd Zdd Zdd ZdS )z=test_sample_weight_adaboost_regressor.<locals>.DummyEstimatorc             S   s   d S )Nr'   )r$   r%   rz   r'   r'   r(   r:     s    zAtest_sample_weight_adaboost_regressor.<locals>.DummyEstimator.fitc             S   s   t |jd S )Nr   )r.   Zzerosr#   )r$   r%   r'   r'   r(   r@     s    zEtest_sample_weight_adaboost_regressor.<locals>.DummyEstimator.predictN)r*   r+   r,   r:   r@   r'   r'   r'   r(   DummyEstimator  s   r   r    )r^   N)	r
   r   r:   r%   rH   r9   Zestimator_weights_Zestimator_errors_r6   )r   rm   r'   r'   r(   %test_sample_weight_adaboost_regressor  s    r   c              C   s   t jd} | ddd}| ddgd}| d}ttdd}||| || |	| t
t }||| || dS )zX
    Check that the AdaBoost estimators can work with n-dimensional
    data matrix
    r   r   r    r   Zmost_frequent)ZstrategyN)r.   rc   rd   Zrandnchoicer   r   r:   r@   r)   r   r   )ri   r%   Zycyrrm   r'   r'   r(   test_multidimensional_X  s    



r   c          	   C   s\   t jt j }}tt }t|| d}d|jj}t	j
t|d ||| W d Q R X d S )N)rk   r=   z {} doesn't support sample_weight)r   )rN   rP   rO   r   r   r   formatrt   r*   r}   r~   r   r:   )r=   r%   rz   rk   r;   err_msgr'   r'   r(   -test_adaboostclassifier_without_sample_weight  s    

r   c        
      C   sR  t jd} t jdddd}d| d | |jd d  }|d	d
}|d	  d9  < d|d	< tt d
dd}t	|}t	|}|
|| |
|d d	 |d d	  t |}d|d	< |j
|||d ||d d	 |d d	 }||d d	 |d d	 }||d d	 |d d	 }	||k s,t||	k s:t|t|	ksNtd S )Nr   r   d   i  )numg?g?g-C6?r   r   r\   i'  )rk   r^   r!   )r_   )r.   rc   rd   ZlinspaceZrandr#   Zreshaper   r   r   r:   r3   rQ   r6   r}   Zapprox)
ri   r%   rz   Zregr_no_outlierZregr_with_weightZregr_with_outlierr_   Zscore_with_outlierZscore_no_outlierZscore_with_weightr'   r'   r(   $test_adaboostregressor_sample_weight  s*     
r   c             C   sZ   t tjddddi\}}}}t| dd}||| ttj||dd|	| d S )NT)Z
return_X_yr!   r   )r=   r!   r   )r"   )
r   r   Zload_digitsr   r:   r   r.   r7   r)   r@   )r=   r   r   r   r   modelr'   r'   r(    test_adaboost_consistent_predict(  s    r   zmodel, X, yc          	   C   sD   t |}d|d< d}tjt|d | j|||d W d Q R X d S )Nir   z,sample_weight cannot contain negative weight)r   )r_   )r.   r3   r}   r~   r   r:   )r   r%   rz   r_   r   r'   r'   r(   #test_adaboost_negative_weight_error8  s
    
r   )Sr   numpyr.   r}   Zscipy.sparser   r   r   r   r   Zsklearn.utils._testingr   r   r	   Zsklearn.baser
   r   Zsklearn.dummyr   r   Zsklearn.linear_modelr   Zsklearn.model_selectionr   r   r   r   r   Z!sklearn.ensemble._weight_boostingr   Zsklearn.svmr   r   Zsklearn.treer   r   Zsklearn.utilsr   Zsklearn.utils._mockingr   Zsklearnr   rc   rd   ri   r%   r?   rH   rA   rB   rI   Z	load_irisrN   ZpermutationrO   r]   ZpermrP   Zload_diabetesrY   r8   r<   markZparametrizerG   rJ   rW   r[   rj   ro   rw   r{   r   r   r   r   r   r   r   r   r   r   r   r'   r'   r'   r(   <module>   sx   (

	".]0&