B
    0d'G                 @   s  d dl mZ d dlZd dlmZ d dlZd dlmZ	 d dl
Z
d dlZd dlmZ d dlmZ d dlZd dlmZ d dlmZ d dlmZ d dlZd d	lmZmZmZ d
ZdZdZdZdZeZdd Z dd Z!dd Z"dd Z#dd Z$dd Z%dd Z&dd Z'dd  Z(d!d" Z)d#d$ Z*d%d& Z+ej,-d'd(d) Z.d*d+ Z/d,d- Z0d.d/ Z1d0d1 Z2d2d3 Z3d4d5 Z4d6d7 Z5d8d9 Z6d:d; Z7d<d= Z8d>d? Z9ej,:d@d dAdBdCdDgej,:dEdFdGgej,:dHdIdJdKgdLdM Z;dNdO Z<dPdQ Z=dS )R    )BZ2FileN)BytesIO)	resources)NamedTemporaryFile)assert_array_equal)assert_array_almost_equal)fails_if_pypy)load_svmlight_fileload_svmlight_filesdump_svmlight_filezsklearn.datasets.tests.datazsvmlight_classification.txtzsvmlight_multilabel.txtzsvmlight_invalid.txtzsvmlight_invalid_order.txtc          	   K   s$   t t| }t|f|S Q R X dS )zG
    Helper to load resource `filename` with `importlib.resources`
    N)r   open_binaryTEST_DATA_MODULEr	   )filenamekwargsf r   ]/var/www/html/venv/lib/python3.7/site-packages/sklearn/datasets/tests/test_svmlight_format.py_load_svmlight_local_test_file   s    r   c              C   s  t t\} }| jjd dks t| jd dks2t| jd dksDt|jd dksVtx&dD ]\}}}| ||f |ks\tq\W | d dkst| d dkst| d	 dkst| d
 dkst| d dkst| d  d9  < | d dkstt|ddddddg d S )Nr               ))r      g      @)r   
   g)r      g      ?)r      g      ?)r      )r         )r      )r   r   )r      )r      )r      )r   r   r   r   r       )r   datafileindptrshapeAssertionErrorr   )Xyijvalr   r   r   test_load_svmlight_file&   s     r.   c           
   C   st   t tt^} t| } t| \}}t| tj}z(t|\}}t	|j
|j
 t	|| W d t| X W d Q R X d S )N)r   pathr   r%   strr	   osopenO_RDONLYr   dataclose)	data_pathX1y1fdX2y2r   r   r   test_load_svmlight_file_fdJ   s    r<   c              C   s(   t tdd\} }|ddddgks$td S )NT)
multilabel)r   r   )r   r   )r   r   )r   	multifiler(   )r)   r*   r   r   r   "test_load_svmlight_file_multilabel\   s    r?   c           	   C   s   t tt&} tt| gd tjd\}}}}W d Q R X t|	 |	  t
|| |jtjksdt|jtjksttt tt*} tt| gd tjd\}}}}}	}
W d Q R X |j|jkst|j|	jkst|	jtjkstd S )Nr   )dtyper    )r   r/   r   r%   r
   r0   npfloat32r   toarrayr   r@   r(   float64)r6   ZX_trainZy_trainZX_testZy_testr7   r8   r:   r;   ZX3Zy3r   r   r   test_load_svmlight_filesa   s    (
,rE   c           	   C   s   t tdd\} }| jjd dks$t| jd dks6t| jd dksHtx&dD ]\}}}| ||f |ksNtqNW tt t tdd W d Q R X d S )	N   )
n_featuresr   r   r   r   ))r   r   g      @)r   r   g)r   r   g      ?)r   r   r   r   )r   r%   r&   r'   r(   pytestraises
ValueError)r)   r*   r+   r,   r-   r   r   r   "test_load_svmlight_file_n_featurest   s    rK   c        	      C   s6  t t\} }tdddf}|  ttt,}t|j	d}t
|| W d Q R X W d Q R X t|j	\}}t|j	 W d Q R X t|  |  t|| tdddd}|  ttt*}t|j	d}t
|| W d Q R X W d Q R X t|j	\}}t|j	 W d Q R X t|  |  t|| d S )Nzsklearn-testz.gz)prefixsuffixwbz.bz2)r   r%   r   r5   r   r   r   gzipr2   nameshutilcopyfileobjr	   r1   remover   rC   r   )	r)   r*   tmpr   Zfh_outZXgzZygzZXbzZybzr   r   r   test_load_compressed   s&     
 rU   c            	   C   s"   t t tt W d Q R X d S )N)rH   rI   rJ   r   invalidfiler   r   r   r   test_load_invalid_file   s    rW   c            	   C   s"   t t tt W d Q R X d S )N)rH   rI   rJ   r   invalidfile2r   r   r   r   test_load_invalid_order_file   s    rY   c           	   C   s.   t d} tt t| dd W d Q R X d S )Ns   -1 4:1.
1 0:1
F)
zero_based)r   rH   rI   rJ   r	   )r   r   r   r   test_load_zero_based   s    r[   c        
      C   sv   d} d}t | }t|dd\}}|jdks.tt | }t |}t||gdd\}}}}	|jdksdt|jdksrtd S )Ns   -1 1:1 2:2 3:3
s   -1 0:0 1:1
auto)rZ   )r   r    )r   r$   )r   r	   r'   r(   r
   )
Zdata1Zdata2f1r)   r*   f2r7   r8   r:   r;   r   r   r   test_load_zero_based_auto   s    r_   c              C   s   d} t t| dd\}}t|dddg t| ddgd	d
gddgg tt| gdd}t t| dd}xV||fD ]J\}}}t|dddg t|dddg t| ddgd	d
gddgg qtW d S )NsM   
    3 qid:1 1:0.53 2:0.12
    2 qid:1 1:0.13 2:0.1
    7 qid:2 1:0.87 2:0.12F)query_idr    r   r   g(\?gQ?gp=
ף?g?gףp=
?Tr   )r	   r   r   rC   r
   )r4   r)   r*   Zres1Zres2qidr   r   r   test_load_with_qid   s     rb   zPtesting the overflow of 32 bit sparse indexing requires a large amount of memoryc              C   sf   d dd tddD } tt| dd\}}}t|dd	 d
dd
dg tt|tdd d	S )zU
    load large libsvm / svmlight file with qid attribute. Tests 64-bit query ID
       
c             s   s   | ]}d  | V  qdS )z.3 qid:{0} 1:0.53 2:0.12
2 qid:{0} 1:0.13 2:0.1N)formatencode).0r+   r   r   r   	<genexpr>   s   z&test_load_large_qid.<locals>.<genexpr>r   i ZbT)r`   Nr    r   )joinranger	   r   r   rA   uniquearange)r4   r)   r*   ra   r   r   r   test_load_large_qid   s    rm   c              C   sd   t tP ttt8} ttt }tt	| t	|t	| g W d Q R X W d Q R X W d Q R X d S )N)
rH   rI   rJ   r   r/   r   r%   rV   r
   r0   )r6   Zinvalid_pathr   r   r   test_load_invalid_file2   s
    rn   c            	   C   s"   t t td W d Q R X d S )NgzG?)rH   rI   	TypeErrorr	   r   r   r   r   test_not_a_filename   s    rp   c            	   C   s"   t t td W d Q R X d S )Nztrou pic nic douille)rH   rI   IOErrorr	   r   r   r   r   test_invalid_filename   s    rr   c              C   s  t t\} }|  }t|}| t| jd  }|t|jd  }x| ||fD ]}x|||fD ]|}xtdD ]j}xbtjtj	tj
tjgD ]H}	t }
t|r|jd dkr|j}||	}t|||
d|d |
d |
 }t|d}dtj |ks
t|
 }t|d}dd	g| d
 |ks6tt|
|	|d\}}|j|	ksXtt| j|j | }t|r| }n|}|	tjkrt||d t|j|	dd|d qt||d t|j|	dd|d qW qvW qhW qTW d S )Nr   )TFr   test)commentrZ   zutf-8zscikit-learn %sonezeroz-based)r@   rZ   r$   F)copyr   )r   r%   rC   sp
csr_matrixrA   rl   r'   rB   rD   Zint32Zint64r   issparseTZastyper   seekreadliner0   sklearn__version__r(   r	   r@   r   Zsorted_indicesindicesr   )ZX_sparsey_denseZX_densey_sparseZX_slicedZy_slicedr)   r*   rZ   r@   r   ZX_inputrt   r:   r;   ZX2_denseZX_input_denser   r   r   	test_dump   sH    





r   c              C   s   dddddgdddddgdddddgg} dddgdddgdddgg}t |}x`||gD ]T}t }t| ||dd |d | dkst| dkst| d	ksXtqXW d S )
Nr   r   r    r   T)r=   s   1 0:1 2:3 4:5
s   0,2 
s   0,1 1:5 3:1
)rx   ry   r   r   r|   r}   r(   )r)   r   r   r*   r   r   r   r   test_dump_multilabel=  s    (

r   c        
   	   C   s   d} d}d}d}d}| ||||gdddd	d	gd	d	d	d	d	gd	d	d	d	d	gd	d	d	d	d	gg}| ||||g}t  }t||| |d	 | d
kst| dkst| dkst| dkst| dkst|d	 t|\}}	t||  t||	 d S )Nr   g @gGz@g     ?g      ?g    eAg NgmCgkcEr   s+   1 0:1 1:2.1 2:3.01 3:1.000000000000001 4:1
s!   2.1 0:1000000000 1:2e+18 2:3e+27
s   3.01 
s   1.000000000000001 
s   1 
)r   r   r|   r}   r(   r	   r   rC   )
ru   twothreeexactZalmostr)   r*   r   r:   r;   r   r   r   test_dump_conciseK  s.    

r   c           	   C   s  t t\} }|  } t }d}t| |||dd |d t|dd\}}t| |  t|| d}t }t	t
 t| |||d W d Q R X |d}t }t| |||dd |d t|dd\}}t| |  t|| t }t	t t| ||d	d W d Q R X d S )
Nz*This is a comment
spanning multiple lines.F)rt   rZ   r   )rZ   s   It is true that
½² = ¼)rt   zutf-8zI've got a  .)r   r%   rC   r   r   r|   r	   r   rH   rI   UnicodeDecodeErrordecoderJ   )r)   r*   r   Zascii_commentr:   r;   Zutf8_commentZunicode_commentr   r   r   test_dump_commentj  s.    




r   c           	   C   sn   t t\} }t }|g}tt t| || W d Q R X t }tt t| |d d | W d Q R X d S )N)r   r%   r   rH   rI   rJ   r   )r)   r*   r   Zy2dr   r   r   test_dump_invalid  s    r   c              C   s   t t\} }|  } t| jd d }t }t| |||dd |d t	|ddd\}}}t
| |  t
|| t
|| d S )Nr   r   T)r`   rZ   )r   r%   rC   rA   rl   r'   r   r   r|   r	   r   )r)   r*   r`   r   r7   r8   Z	query_id1r   r   r   test_dump_query_id  s    

r   c              C   s  d} t t| dd\}}}dddgddd	gddd	gddd	gg}dd
d
dg}d
dddg}t|| t| | t|| t }t||||dd |d
 t |ddd\}}}t|| t| | t|| |d
 t |ddd\}}t|| t| | d S )Ns   
    1 qid:0 0:1 1:2 2:3
    0 qid:72048431380967004 0:1440446648 1:72048431380967004 2:236784985
    0 qid:-9223372036854775807 0:1440446648 1:72048431380967004 2:236784985
    3 qid:9223372036854775807  0:1440446648 1:72048431380967004 2:236784985T)r`   r   r   r    ixUl   \.,N^iYr   l l    )r`   rZ   F)r	   r   r   rC   r   r|   )r4   r)   r*   ra   true_Xtrue_yZtrueQIDr   r   r   r   test_load_with_long_qid  s,    






r   c              C   s~   t  } ttjdd}tdddg}t|||  xDdD ]<}| d t| d|d\}}t	|| t	|
 |
  q:W d S )N)r    r$   )r'   r   r   )r\   TFr$   )rG   rZ   )r   rx   ry   rA   Zzerosarrayr   r|   r	   r   rC   )r   r   r   rZ   r)   r*   r   r   r   test_load_zeros  s    


r   sparsityg?g      ?gGz?r   	n_samples   e   rG   r   r   )   c             C   s  t jd}|jdd||fd}| r0d||| k < t|}|jdd|d}t }t||| |	d t
| }d}|d }	|	| }
d| d }||	 }t||||
d	\}}t|||	|d	\}}t|||d
\}}t |||g}t|||g}t|| t| |  d S )Nr   g        g      ?)lowhighsizer   r    r$   r   )rG   offsetlength)rG   r   )rA   randomRandomStateuniformrx   ry   randintr   r   r|   lengetvaluer	   concatenatevstackr   rC   )r   r   rG   rngr)   r*   r   r   Zmark_0Zmark_1Zlength_0Zmark_2Zlength_1X_0y_0X_1y_1ZX_2Zy_2y_concatX_concatr   r   r   test_load_with_offsets  s0    


r   c              C   sv  t jd} t ddddddgddddddgddddddgddddddgddddddgddddddgddddddgg}t|}|j\}}| jdd|d}t |d }t	 }t
||||d |d t| }xt|D ]}|d t||d	d|d
\}	}
}t||d	|dd
\}}}t ||g}t |
|g}t|	|g}t|| t|| t| |  qW d S )Nr   r   r   r    r$   r   )r   r   r   )r`   T)rG   r`   r   r   r   )rA   r   r   r   rx   ry   r'   r   rl   r   r   r|   r   r   rj   r	   r   r   r   r   rC   )r   r)   r   rG   r*   r`   r   r   markr   r   Zq_0r   r   Zq_1Zq_concatr   r   r   r   r   "test_load_offset_exhaustive_splits  s:    





r   c            	   C   s,   t jtdd ttddd W d Q R X d S )Nzn_features is required)matchr    )r   r   )rH   rI   rJ   r   r%   r   r   r   r   test_load_with_offsets_error)  s    r   )>bz2r   rO   ior   numpyrA   Zscipy.sparsesparserx   r1   rQ   	importlibr   tempfiler   rH   Zsklearn.utils._testingr   r   r   r~   Zsklearn.datasetsr	   r
   r   r   r%   r>   rV   rX   Z
pytestmarkr   r.   r<   r?   rE   rK   rU   rW   rY   r[   r_   rb   r   skiprm   rn   rp   rr   r   r   r   r   r   r   r   r   Zparametrizer   r   r   r   r   r   r   <module>   sd   $E!$ %*