B
    0dV                 @   s  d dl Z d dlmZ d dlmZ d dlZd dlZd dlm	Z
 d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlm Z  d dlm!Z! d dl"m#Z# dd Z$dd Z%ej&'dg e(dfd d!d"ge(dfe)g e(dfe)d d!d"ge(dfej**d#e(dfgd$d% Z+ej&'d&i d#d#d'gd(d) Z,d*d+ Z-d,d- Z.d.d/ Z/ej&'d0d1d id2fd3d id4fgd5d6 Z0d7d8 Z1d9d: Z2d;d< Z3d=d> Z4d?d@ Z5dAdB Z6ej&'dCdDd#d ge)dDd#d ge7dDd#d ggdEdF Z8dGdH Z9dIdJ Z:dKdL Z;dMdN Z<dOdP Z=dQdR Z>dSdT Z?dUdV Z@dWdX ZAdYdZ ZBd[d\ ZCd]d^ ZDd_d` ZEdadb ZFdcdd ZGdedf ZHdgdh ZIdS )i    N)defaultdict)partial)assert_array_equal)assert_almost_equal)assert_array_almost_equal)make_classification)make_multilabel_classification)make_hastie_10_2)make_regression)
make_blobs)make_friedman1)make_friedman2)make_friedman3)make_low_rank_matrix)
make_moons)make_circles)make_sparse_coded_signal)make_sparse_uncorrelated)make_spd_matrix)make_swiss_roll)make_s_curve)make_biclusters)make_checkerboard)assert_all_finitec              C   sR  ddg} t ddddddddd d | d	d
\}}| ddgks<t|jdksNtd|jdks`tdt|jdksxtdt|d	kdkstdt|dkdkstdt|dkdkstdt dddd	d	ddd	d\}}|jdkstd|jdkstdt|d|jfg|jd  |jd |jd jd	 dksNtd!d S )"Ng?g      ?d               Fr   )	n_samples
n_featuresn_informativen_redundant
n_repeated	n_classesn_clusters_per_class	hypercubeshiftscaleweightsrandom_state)r   r   zX shape mismatch)r   zy shape mismatch)r   zUnexpected number of classes
   z(Unexpected number of samples in class #0   z(Unexpected number of samples in class #1   A   z(Unexpected number of samples in class #2i     Tg      ?)r   r    r!   r"   r#   r&   r(   r*   )i  r/   )i   z Unexpected number of unique rows)	r   AssertionErrorshapenpuniquesumviewdtypeZreshape)r)   Xy r;   _/var/www/html/venv/lib/python3.7/site-packages/sklearn/datasets/tests/test_samples_generator.pytest_make_classification#   sL    "
r=   c              C   s  d} t t| ddddddd}xBddgdfddgd dfdd	gd
 dfddgd dfddd	gdfddgd dftddgdfgD ]\}}}t|}|| }|d }xdD ]}||||||||dd\}	}
|	j||fkst|
j|fkstt|	}|jd	|j
d d}tj|dd\}}t||ks:tdtt}x&t||
D ]\}}|| | qNW x(| D ]}t||ksttdqtW t||kstdtt|
t|
 | dg| dd xtt|D ]x}|	||k jdd}|rtt||  t|ddd n6tt& tt||  t|ddd W d Q R X qW qW qW tt |ddddd! W d Q R X tt |ddddd! W d Q R X d S )"zTest the construction of informative features in make_classification

    Also tests `n_clusters_per_class`, `n_classes`, `hypercube` and
    fully-specified `weights`.
    g    .Ar   r   F)	class_sepr"   r#   Zflip_yr'   r(   shuffler-   gUUUUUU?r   g      ?   g      ?g      ?r+   @   2   )FT)r   r$   r)   r    r!   r%   r&   r*   z|S{0})r8   T)Zreturn_inversez6Wrong number of clusters, or not in distinct quadrantsz"Wrong number of clusters per classzWrong number of classesz!Wrong number of samples per class)err_msg)axisr   z/Clusters are not centered on hypercube vertices)decimalrC   z5Clusters should not be centered on hypercube verticesN)r    r!   r$   r%   )r   r   intlenr3   r2   r4   signr7   formatstridesr5   r   setzipaddvaluesr   bincountrangeZmeanabsonespytestraises
ValueError)r>   maker!   r)   r%   r$   
n_clustersr   r&   r9   r:   ZsignsZunique_signsZcluster_indexZclusters_by_classZclusterclsZclustersZcentroidr;   r;   r<   -test_make_classification_informative_featuresS   s    


 rY   zweights, err_type, err_msgz:Weights specified but incompatible with number of classes.g      ?g      ?g?r   c          	   C   s(   t j||d t| d W d Q R X d S )N)match)r)   )rS   rT   r   )r)   Zerr_typerC   r;   r;   r<   %test_make_classification_weights_type   s    r[   kwargs)r$   r!   c             C   sV   t f ddgdd| \}}t f tddgdd| \}}t|| t|| d S )Ng?g?r   )r)   r*   )r   r4   arrayr   )r\   X1y1X2y2r;   r;   r<   1test_make_classification_weights_array_or_list_ok   s    "
rb   c           	   C   s   xt ddD ]\} }tddddd| d\}}|jd	ks>td
| s\tdd |D dks\ttdd |D |ksvttdd |D dkstqW d S )N)TF)r   r   r   r   r   r   F)r   r    r$   r*   return_indicatorallow_unlabeled)r   r   zX shape mismatchc             S   s   g | ]}t |qS r;   )max).0r:   r;   r;   r<   
<listcomp>   s    zHtest_make_multilabel_classification_return_sequences.<locals>.<listcomp>r-   c             S   s   g | ]}t |qS r;   )rG   )rf   r:   r;   r;   r<   rg      s    c             S   s   g | ]}t |qS r;   )rG   )rf   r:   r;   r;   r<   rg      s    )rL   r   r3   r2   re   min)rd   
min_lengthr9   Yr;   r;   r<   4test_make_multilabel_classification_return_sequences   s    rk   c              C   s   xlt ddD ]^\} }tdddd| d\}}|jdks<td	|jd
ksNtdttj|dd|kstqW tdddd| dd\}}}}t|| t|| |jdkstt	| d |jdkstt	|jdddgd  d S )N)TF)r   r   r,   r   r   r   )r   r    r$   r*   rd   )r,   r   zX shape mismatch)r,   r   zY shape mismatch)rD   T)r   r    r$   r*   rd   Zreturn_distributions)r   r   )r   r   )
rL   r   r3   r2   r4   allr6   r   r   r   )rd   ri   r9   rj   r`   ZY2Zp_cZp_w_cr;   r;   r<   4test_make_multilabel_classification_return_indicator   s.     

rm   c           	   C   sf   x`t ddD ]R\} }tddddd| d\}}|jd	ks>td
|jdksPtdt|stqW d S )N)TF)r   r   r,   r   r   r   sparse)r   r    r$   r*   rc   rd   )r,   r   zX shape mismatch)r,   r   zY shape mismatch)rL   r   r3   r2   spissparse)rd   ri   r9   rj   r;   r;   r<   ;test_make_multilabel_classification_return_indicator_sparse  s    rq   zparams, err_msgr$   z 'n_classes' should be an integerlengthz'length' should be an integerc          	   C   s(   t jt|d tf |  W d Q R X d S )N)rZ   )rS   rT   rU   r   )paramsrC   r;   r;   r<   3test_make_multilabel_classification_valid_arguments  s    rt   c              C   sP   t ddd\} }| jdks"td|jdks4tdt|jdksLtd	d S )
Nr   r   )r   r*   )r   r+   zX shape mismatch)r   zy shape mismatch)r-   zUnexpected number of classes)r	   r3   r2   r4   r5   )r9   r:   r;   r;   r<   test_make_hastie_10_2(  s    ru   c           
   C   s   t ddddddddd	\} }}| jd
ks0td|jdksBtd|jdksTtdt|dkdksltdtt|t| | ddd t ddd\} }| jdkstd S )Nr   r+   r   r   Tg        g      ?r   )r   r    r!   effective_rankcoefZbiasnoiser*   )r   r+   zX shape mismatch)r   zy shape mismatch)r+   zcoef shape mismatchz)Unexpected number of informative featuresr   )rE   )r   r    )r   r   )r
   r3   r2   r6   r   r4   stddot)r9   r:   cr;   r;   r<   test_make_regression/  s      r|   c           	   C   s   t dddddddd\} }}| jdks.td	|jd
ks@td|jdksRtdtt|dkdd tt|t| | ddd d S )Nr   r+   r   Tg      ?r   )r   r    r!   	n_targetsrw   rx   r*   )r   r+   zX shape mismatch)r   r   zy shape mismatch)r+   r   zcoef shape mismatchg        z)Unexpected number of informative featuresr   )rE   )	r
   r3   r2   r   r6   r   r4   ry   rz   )r9   r:   r{   r;   r;   r<    test_make_regression_multitargetH  s    r~   c              C   s   t dddg} t ddgddgddgg}tddd|| d	\}}|jd
ksTtd|jdksftdt |jdks~tdx<tt|| D ]*\}\}}t|||k | 	 |dd qW d S )Ng?g?g?g        g      ?r   rB   r-   )r*   r   r    centerscluster_std)rB   r-   zX shape mismatch)rB   zy shape mismatch)r   zUnexpected number of blobsr   zUnexpected std)
r4   r]   r   r3   r2   r5   	enumeraterL   r   ry   )cluster_stdsZcluster_centersr9   r:   ictrry   r;   r;   r<   test_make_blobs\  s    r   c              C   s\   dddg} t | ddd\}}|jt| dfks6tdttj|t| d| ksXtd	d S )
NrB      r   r-   r   )r   r    r*   zX shape mismatch)	minlengthz$Incorrect number of samples per blob)r   r3   r6   r2   rl   r4   rO   rG   )r   r9   r:   r;   r;   r<   test_make_blobs_n_samples_listn  s    
r   c              C   s   dddg} t ddgddgddgg}t dddg}t| ||dd\}}|jt| d	fksdtd
tt j|t| d| kstdx<t	t
||D ]*\}\}}t|||k |  |dd qW d S )Nr   g        g      ?g?g?g?r   )r   r   r   r*   r-   zX shape mismatch)r   z$Incorrect number of samples per blobr   zUnexpected std)r4   r]   r   r3   r6   r2   rl   rO   rG   r   rL   r   ry   )r   r   r   r9   r:   r   r   ry   r;   r;   r<   +test_make_blobs_n_samples_list_with_centersx  s    
r   r   r   c             C   sV   d }t | |dd\}}|jt| dfks0tdttj|t| d| ksRtdd S )Nr   )r   r   r*   r-   zX shape mismatch)r   z$Incorrect number of samples per blob)r   r3   r6   r2   rl   r4   rO   rG   )r   r   r9   r:   r;   r;   r<   &test_make_blobs_n_samples_centers_none  s    r   c              C   s<   ddg} d}t | |ddd\}}}|jt| |fks8td S )Nr+   r   r   Tr   )r   r    Zreturn_centersr*   )r   r3   rG   r2   )r   r    r9   r:   r   r;   r;   r<   test_make_blobs_return_centers  s
    r   c           	   C   s  dddg} t ddgddgddgg}t dddg}td|  d|d d	  }tjt|d
 t| |d d	 d W d Q R X td| d|d d	  }tjt|d
 t| ||d d	 d W d Q R X dd}tjt|d
 t| dd W d Q R X d S )Nr   g        g      ?g?g?g?zMLength of `n_samples` not consistent with number of centers. Got n_samples = z and centers = r1   )rZ   )r   zNLength of `clusters_std` not consistent with number of centers. Got centers = z and cluster_std = )r   r   z8Parameter `centers` must be array-like. Got {!r} insteadr   )	r4   r]   reescaperS   rT   rU   r   rI   )r   r   r   Zwrong_centers_msgZwrong_std_msgZwrong_type_msgr;   r;   r<   test_make_blobs_error  s    
 r   c           	   C   s   t ddddd\} }| jdks&td|jdks8td	t|dttj| d d df  | d d d
f   d| d d df d d   d| d d df   d| d d df    d S )Nr   r+   g        r   )r   r    rx   r*   )r   r+   zX shape mismatch)r   zy shape mismatchr   r   r-   g      ?r   r@   )r   r3   r2   r   r4   sinpi)r9   r:   r;   r;   r<   test_make_friedman1  s    r   c           	   C   s   t dddd\} }| jdks$td|jdks6tdt|| d d df d	 | d d d
f | d d d	f  d
| d d d
f | d d df    d	  d  d S )Nr   g        r   )r   rx   r*   )r   r@   zX shape mismatch)r   zy shape mismatchr-   r   r   g      ?)r   r3   r2   r   )r9   r:   r;   r;   r<   test_make_friedman2  s
    r   c           
   C   s   t dddd\} }| jdks$td|jdks6tdt|t| d d d	f | d d d
f  d	| d d d	f | d d df    | d d df   d S )Nr   g        r   )r   rx   r*   )r   r@   zX shape mismatch)r   zy shape mismatchr   r-   r   )r   r3   r2   r   r4   Zarctan)r9   r:   r;   r;   r<   test_make_friedman3  s
    r   c              C   sZ   t dddddd} | jdks$tddd	lm} || \}}}t|d d
k sVtdd S )NrB   r,   r   g{Gz?r   )r   r    rv   Ztail_strengthr*   )rB   r,   zX shape mismatch)svdg?zX rank is not approximately 5)r   r3   r2   numpy.linalgr   r6   )r9   r   usvr;   r;   r<   test_make_low_rank_matrix  s    r   c              C   s   t dddddd\} }}| jdks*td|jd	ks<td
|jdksNtdx(|jD ]}tt|dksVtdqVW tt|||  tt	|d j
ddt|jd  d S )Nr      r+   r   r   )r   Zn_componentsr    Zn_nonzero_coefsr*   )r+   r   zY shape mismatch)r+   r   zD shape mismatch)r   r   zX shape mismatchzNon-zero coefs mismatchr-   )rD   r   )r   r3   r2   TrG   r4   Zflatnonzeror   rz   sqrtr6   rR   )rj   Dr9   colr;   r;   r<   test_make_sparse_coded_signal  s    r   c              C   s:   t dddd\} }| jdks$td|jdks6tdd S )	Nr   r+   r   )r   r    r*   )r   r+   zX shape mismatch)r   zy shape mismatch)r   r3   r2   )r9   r:   r;   r;   r<   test_make_sparse_uncorrelated  s    r   c              C   sb   t ddd} | jdkstdt| | j ddlm} || \}}t|dkt	dgd d d S )	Nr   r   )Zn_dimr*   )r   r   zX shape mismatch)eigTzX is not positive-definite)
r   r3   r2   r   r   r   r   r   r4   r]   )r9   r   Zeigenvalues_r;   r;   r<   test_make_spd_matrix   s    r   c              C   sz   t dddd\} }| jdks$td|jdks6tdt| d d df |t|  t| d d d	f |t|  d S )
Nr   g        r   )r   rx   r*   )r   r   zX shape mismatch)r   zt shape mismatchr-   )r   r3   r2   r   r4   cosr   )r9   tr;   r;   r<   test_make_swiss_roll  s
     r   c              C   s   t dddd\} }| jdks$td|jdks6tdt| d d df t| t| d d d	f t|t|d
   d S )Nr   g        r   )r   rx   r*   )r   r   zX shape mismatch)r   zt shape mismatchr-   r   )r   r3   r2   r   r4   r   rH   r   )r9   r   r;   r;   r<   test_make_s_curve  s
    r   c              C   s   t ddddd\} }}| jdks(td|jdks:td|jdksLtd	t|  t| t| t ddddd\}}}t| | d S )
N)r   r   r@   Tr   )r3   rW   r?   r*   zX shape mismatch)r@   r   zrows shape mismatchzcolumns shape mismatch)r   r3   r2   r   r   )r9   rowscolsr`   r   r;   r;   r<   test_make_biclusters   s    r   c              C   s   t ddddd\} }}| jdks(td|jdks:td|jdksLtdt dd	ddd\} }}t|  t| t| t dd	ddd\}}}t dd	ddd\}}}t|| d S )
N)r   r   )r   r   Tr   )r3   rW   r?   r*   zX shape mismatchzrows shape mismatchzcolumns shape mismatchr-   )r   r3   r2   r   r   )r9   r   r   r^   r   r`   r;   r;   r<   test_make_checkerboard4  s"    r   c              C   sb   t ddd\} }xLt| |D ]>\}}|dkr4ddgnddg}|| d  }t|dd	d
 qW d S )Nr   F)r?   r   g        g      ?g      ?r-   z$Point is not on expected unit circle)rC   )r   rL   r6   r   )r9   r:   xlabelcenterdist_sqrr;   r;   r<   test_make_moonsO  s    r   c           	   C   s   t dd\} }t|dkdkr2t|dkdks:td| jdksLtd	|jd
ks^tdtjtdd t dddgd W d Q R X tjtdd t dd W d Q R X d S )N)   r   )r   r   r   r   r   z$Number of samples in a moon is wrong)   r-   zX shape mismatch)r   zy shape mismatchz8`n_samples` can be either an int or a two-element tuple.)rZ   r-   r   )r+   )r   r4   r6   r2   r3   rS   rT   rU   )r9   r:   r;   r;   r<   test_make_moons_unbalancedY  s    &

r   c           	   C   s<  d} xdD ]\}}}t |dd | d\}}|j|dfks>td|j|fksRtdddg}x\t||D ]N\}}|| d  }	|d	krd
n| d }
|d	krd
n| d }
t|	|
dd qfW ||d	k j|dfkstd||dk j|dfks
tdq
W tt t dd W d Q R X tt t d
d W d Q R X d S )Ng333333?))r   r   r@   )r   r@   r@   F)r?   rx   factorr-   zX shape mismatchzy shape mismatchg        r   g      ?zPoint is not on expected circle)rC   z1Samples not correctly distributed across circles.r   g{Gz)r   )	r   r3   r2   rL   r6   r   rS   rT   rU   )r   r   Zn_outerZn_innerr9   r:   r   r   r   r   Zdist_expr;   r;   r<   test_make_circlesn  s0    


r   c           	   C   s   t dd\} }t|dkdks(tdt|dkdksBtd| jd	ksTtd
|jdksftdtjtdd t dddgd W d Q R X tjtdd t dd W d Q R X d S )N)r-   r   )r   r   r-   z*Number of samples in inner circle is wrongr   r   z*Number of samples in outer circle is wrong)r+   r-   zX shape mismatch)r+   zy shape mismatchz8`n_samples` can be either an int or a two-element tuple.)rZ   r   )r   r4   r6   r2   r3   rS   rT   rU   )r9   r:   r;   r;   r<   test_make_circles_unbalanced  s    

r   )Jr   collectionsr   	functoolsr   numpyr4   rS   Zscipy.sparsern   ro   Zsklearn.utils._testingr   r   r   Zsklearn.datasetsr   r   r	   r
   r   r   r   r   r   r   r   r   r   r   r   r   r   r   Zsklearn.utils.validationr   r=   rY   markZparametrizerU   r]   randomr[   rb   rk   rm   rq   rt   ru   r|   r~   r   r   r   tupler   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r;   r;   r;   r<   <module>   s   0c
	 

2
		
!