B
    0d                 @   sd  d Z ddlZddlZddlmZ ddlZddlZddl	m
Z
 ddlmZ ddlmZ ddlmZmZ dd	lmZ dd
lmZmZ ddlmZmZmZ ddlmZmZ eejj Z!dd Z"dd Z#dd Z$d7ddZ%dd Z&dd Z'dd Z(d8ddZ)d d! Z*d9d%d&Z+d:d'd(Z,d)d* Z-d;d,d-Z.d<dd$d.d+d"d#d/d0d1d0d/dddd2d3d4Z/G d5d6 d6eeZ0dS )=z$ Non-negative matrix factorization.
    N)sqrt   )_update_cdnmf_fast   )config_context)BaseEstimatorTransformerMixin)ConvergenceWarning)check_random_statecheck_array)randomized_svdsafe_sparse_dotsquared_norm)check_is_fittedcheck_non_negativec             C   s   t t| S )zDot product-based Euclidean norm implementation.

    See: http://fseoane.net/blog/2011/computing-the-vector-norm/

    Parameters
    ----------
    x : array-like
        Vector for which to compute the norm.
    )r   r   )x r   L/var/www/html/venv/lib/python3.7/site-packages/sklearn/decomposition/_nmf.pynorm   s    
r   c             C   s   t |  | S )zTrace of np.dot(X, Y.T).

    Parameters
    ----------
    X : array-like
        First matrix.
    Y : array-like
        Second matrix.
    )npdotravel)XYr   r   r   	trace_dot(   s    
r   c             C   sV   t | } t| |kr.td||t| f t| | t| dkrRtd| d S )Nz=Array with wrong shape passed to %s. Expected %s, but got %s r   z$Array passed to %s is full of zeros.)r   r   shape
ValueErrorr   max)Ar   Zwhomr   r   r   _check_init5   s    
r   Fc          
   C   st  t |}t| st| } t|}t|}|dkrt| rt| j| j}ttj	|j
||g|}t| |j
 |}|| d|  d }nt| t|| d }|rt|d S |S t| rt||| j}	| j}
nt||}| }	|  }
|
tk}|	| }	|
| }
t|	|	dk< |dkrvttj|ddtj|dd}|
|	 }t|
t|}|||
  7 }n|dkr|
|	 }t|t| j tt| }nt| rd}xNt| jd D ],}|tt||dd|f | 7 }qW nt|| }t|
|	|d  }|
|  ||  }|||d  7 }|||d   }|rltd| S |S dS )ak  Compute the beta-divergence of X and dot(W, H).

    Parameters
    ----------
    X : float or array-like of shape (n_samples, n_features)

    W : float or array-like of shape (n_samples, n_components)

    H : float or array-like of shape (n_components, n_features)

    beta : float or {'frobenius', 'kullback-leibler', 'itakura-saito'}
        Parameter of the beta-divergence.
        If beta == 2, this is half the Frobenius *squared* norm.
        If beta == 1, this is the generalized Kullback-Leibler divergence.
        If beta == 0, this is the Itakura-Saito divergence.
        Else, this is the general beta-divergence.

    square_root : bool, default=False
        If True, return np.sqrt(2 * res)
        For beta == 2, it corresponds to the Frobenius norm.

    Returns
    -------
        res : float
            Beta divergence of X and np.dot(X, H).
    r   g       @r   r   )axisN)_beta_loss_to_floatspissparser   Z
atleast_2dr   datar   linalg	multi_dotTr   r   _special_sparse_dotr   EPSILONsumlogproductr   range)r   WHbetasquare_rootZnorm_XZnorm_WHZ
cross_prodresZWH_dataX_dataWHindicesZsum_WHdivZsum_WH_betaiZsum_X_WHr   r   r   _beta_divergenceA   sZ    






 
(.r8   c             C   s   t |r| \}}|jd }t|}| jd }t||| }x\td||D ]L}	t|	|	| }
t	| ||
 ddf |j
||
 ddf jdd||
< qPW t j|||ff|jd}| S t| |S dS )z0Computes np.dot(W, H), only where X is non zero.r   r   N)r    )r   )r"   r#   Znonzeror   r   emptyr   r-   slicemultiplyr'   r*   Z
coo_matrixZtocsrr   )r.   r/   r   iiZjjZn_valsZdot_valsn_componentsZ
batch_sizestartbatchr4   r   r   r   r(      s    



.r(   c       	      C   s   |dks|dkrJ|dkr|n|}|| }|| }|d|  }|d|  }nDd\}}}}|dkrr| | }| d|  }|dkr| | }| d|  }||||fS )z:Compute L1 and L2 regularization coefficients for W and H.r   sameg      ?)g        g        g        g        )bothtransformation)rA   
componentsr   )	alphaalpha_Walpha_Hl1_ratioregularizationl1_reg_Wl1_reg_Hl2_reg_Wl2_reg_Hr   r   r   _compute_regularization   s    rM   c             C   sJ   dddd}t | tr&| |kr&||  } t | tjsFtd| | f | S )z"Convert string beta_loss to float.r   r   r   )	frobeniuszkullback-leiblerzitakura-saitozEInvalid beta_loss parameter: got %r instead of one of %r, or a float.)
isinstancestrnumbersNumberr   keys)	beta_lossZallowed_beta_lossr   r   r   r!      s    r!   warnư>c             C   sv  |dkrt dt d}t| d | j\}}|dk	rX|dkrX|t||krXtd||dkrx|t||krtd}nd}|dkrt	| 
 | }t|}||||j| jdd	 }	||||j| jdd	 }
tj|	|	d
 tj|
|
d
 |
|	fS t| ||d\}}}t|}
t|}	t	|d t|dddf  |
dddf< t	|d t|dddf  |	dddf< xtd|D ]}|dd|f ||ddf  }}t|dt|d }}tt|dtt|d }}t|t| }}t|t| }}|| ||  }}||krJ|| }|| }|}n|| }|| }|}t	|| | }|| |
dd|f< || |	|ddf< qW d|
|
|k < d|	|	|k < |dkrn|dkr| 
 }||
|
dk< ||	|	dk< n|dkr^t|}| 
 }t||t|
|
dk  d |
|
dk< t||t|	|	dk  d |	|	dk< ntd|df |
|	fS )a  Algorithms for NMF initialization.

    Computes an initial guess for the non-negative
    rank k matrix approximation for X: X = WH.

    Parameters
    ----------
    X : array-like of shape (n_samples, n_features)
        The data matrix to be decomposed.

    n_components : int
        The number of components desired in the approximation.

    init :  {'random', 'nndsvd', 'nndsvda', 'nndsvdar'}, default=None
        Method used to initialize the procedure.
        Default: None.
        Valid options:

        - None: 'nndsvd' if n_components <= min(n_samples, n_features),
            otherwise 'random'.

        - 'random': non-negative random matrices, scaled with:
            sqrt(X.mean() / n_components)

        - 'nndsvd': Nonnegative Double Singular Value Decomposition (NNDSVD)
            initialization (better for sparseness)

        - 'nndsvda': NNDSVD with zeros filled with the average of X
            (better when sparsity is not desired)

        - 'nndsvdar': NNDSVD with zeros filled with small random values
            (generally faster, less accurate alternative to NNDSVDa
            for when sparsity is not desired)

        - 'custom': use custom matrices W and H

    eps : float, default=1e-6
        Truncate all values less then this in output to zero.

    random_state : int, RandomState instance or None, default=None
        Used when ``init`` == 'nndsvdar' or 'random'. Pass an int for
        reproducible results across multiple function calls.
        See :term:`Glossary <random_state>`.

    Returns
    -------
    W : array-like of shape (n_samples, n_components)
        Initial guesses for solving X ~= WH.

    H : array-like of shape (n_components, n_features)
        Initial guesses for solving X ~= WH.

    References
    ----------
    C. Boutsidis, E. Gallopoulos: SVD based initialization: A head start for
    nonnegative matrix factorization - Pattern Recognition, 2008
    http://tinyurl.com/nndsvd
    rU   zThe 'init' value, when 'init=None' and n_components is less than n_samples and n_features, will be changed from 'nndsvd' to 'nndsvda' in 1.1 (renaming of 0.26).NzNMF initializationrandomzLinit = '{}' can only be used when n_components <= min(n_samples, n_features)nndsvdF)copy)out)random_stater   r   nndsvdanndsvdard   z3Invalid init parameter: got %r instead of one of %r)NrW   rX   r\   r]   )warningsrU   FutureWarningr   r   minr   formatr   r   meanr
   ZrandnZastypedtypeabsr   Z
zeros_liker-   maximumminimumr   len)r   r=   initepsr[   	n_samples
n_featuresavgrngr/   r.   USVjr   yZx_pZy_pZx_nZy_nZx_p_nrmZy_p_nrmZx_n_nrmZy_n_nrmZm_pZm_nuvsigmaZlbdr   r   r   _initialize_nmf   s    ;



00"&



*,rw   c             C   s   |j d }t|j|}t| |}	|dkrF|jdd|d   |7  < |dkrV|	|8 }	|rf||}
n
t|}
tj|
tj	d}
t
|||	|
S )zHelper function for _fit_coordinate_descent.

    Update W to minimize the objective function, iterating once over all
    coordinates. By symmetry, to update H, one can call
    _update_coordinate_descent(X.T, Ht, W, ...).

    r   g        N)rd   )r   r   r   r'   r   ZflatpermutationZarangeZasarrayZintpr   )r   r.   HtZl1_regZl2_regshuffler[   r=   HHtXHtrx   r   r   r   _update_coordinate_descent  s    


r}   -C6?   Tc          
   C   s   t |jdd}t | dd} t|}xtd|d D ]}d}|t| ||||||7 }|	rp|t| j||||||7 }|dkr||}|dkrP |
rtd||  || |kr2|
rtd	|d  P q2W ||j|fS )
a  Compute Non-negative Matrix Factorization (NMF) with Coordinate Descent

    The objective function is minimized with an alternating minimization of W
    and H. Each minimization is done with a cyclic (up to a permutation of the
    features) Coordinate Descent.

    Parameters
    ----------
    X : array-like of shape (n_samples, n_features)
        Constant matrix.

    W : array-like of shape (n_samples, n_components)
        Initial guess for the solution.

    H : array-like of shape (n_components, n_features)
        Initial guess for the solution.

    tol : float, default=1e-4
        Tolerance of the stopping condition.

    max_iter : int, default=200
        Maximum number of iterations before timing out.

    l1_reg_W : float, default=0.
        L1 regularization parameter for W.

    l1_reg_H : float, default=0.
        L1 regularization parameter for H.

    l2_reg_W : float, default=0.
        L2 regularization parameter for W.

    l2_reg_H : float, default=0.
        L2 regularization parameter for H.

    update_H : bool, default=True
        Set to True, both W and H will be estimated from initial guesses.
        Set to False, only W will be estimated.

    verbose : int, default=0
        The verbosity level.

    shuffle : bool, default=False
        If true, randomize the order of coordinates in the CD solver.

    random_state : int, RandomState instance or None, default=None
        Used to randomize the coordinates in the CD solver, when
        ``shuffle`` is set to ``True``. Pass an int for reproducible
        results across multiple function calls.
        See :term:`Glossary <random_state>`.

    Returns
    -------
    W : ndarray of shape (n_samples, n_components)
        Solution to the non-negative least squares problem.

    H : ndarray of shape (n_components, n_features)
        Solution to the non-negative least squares problem.

    n_iter : int
        The number of iterations done by the algorithm.

    References
    ----------
    Cichocki, Andrzej, and Phan, Anh-Huy. "Fast local algorithms for
    large scale nonnegative matrix and tensor factorizations."
    IEICE transactions on fundamentals of electronics, communications and
    computer sciences 92.3: 708-721, 2009.
    C)ordercsr)accept_sparser   g        r   z
violation:zConverged at iteration)r   r'   r
   r-   r}   print)r   r.   r/   tolmax_iterrI   rJ   rK   rL   update_Hverboserz   r[   ry   rn   n_iterZ	violationZviolation_initr   r   r   _fit_coordinate_descent  s*    Ur   c             C   sR  |dkrT|	dkrt | |j}	|
r&|	}n|	 }|dkrDt||j}t||}nt||| }t| rx|j}| j}n(|}| }| }|d dk rt	||dk< |d dk rt	||dk< |dkrtj
|||d n6|dkr|dC }|dC }||9 }n||d C }||9 }t ||j}|dkrJ|dkr6tj|dd	}|tjddf }nt| rt|j}xt| jd D ]^}t||ddf |}|d dk rt	||dk< ||d C }t||j||ddf< qrW n||d C }t||j}|}|dkr||7 }|dkr|||  }t	||dk< || }|}|dkrF||C }||||	fS )
z&Update W in Multiplicative Update NMF.r   Ng      ?r   g       @r   )rZ   )r    )r   r'   rY   r   r   r(   r"   r#   r$   r)   divider*   newaxisr9   r   r-   )r   r.   r/   rT   rI   rK   gammaH_sumr{   r|   r   	numeratordenominator	WH_safe_XWH_safe_X_datar3   r4   ZWHHtr7   WHidelta_Wr   r   r   _multiplicative_update_w  sl    



"


r   c             C   s$  |dkr,t |j| }tj|j||g}nt||| }	t| rP|	j}
| j}n(|	}
| }|		 }|d dk rxt
||dk< |d dk rt
|
|
dk< |dkrtj||
|
d n6|dkr|
dC }
|
dC }
|
|9 }
n|
|d C }
|
|9 }
t |j|	}|dkr$tj|dd}d||dk< |d	d	tjf }nt| rt|j}xt| jd D ]^}t||d	d	|f }|d dk rt
||dk< ||d C }t|j||d	d	|f< qLW n||d C }t|j|}|}|dkr||7 }|dkr|||  }t
||dk< || }|}|dkr ||C }|S )
z&Update H in Multiplicative Update NMF.r   g      ?r   g       @r   )rZ   r   )r    N)r   r'   r   r%   r&   r(   r"   r#   r$   rY   r)   r   r*   r   r9   r   r-   r   )r   r.   r/   rT   rJ   rL   r   r   r   r   r   r3   r4   ZW_sumZWtWHr7   r   delta_Hr   r   r   _multiplicative_update_h  s`    


"


r   rN   c             C   s  t   }t|}|dk r&dd|  }n|dkr<d|d  }nd}t| |||dd}|}d\}}}xtd|d D  ]}t| ||||||||||
\}}}}||9 }|dk rd||ttjjk < |
rt	| |||||	|}||9 }d\}}}|dkrd||ttjjk < |d	krr|d
 d	krrt| |||dd}|rXt   }t
d||| |f  || | |k rlP |}qrW |r|d	ks|d
 d	krt   }t
d||| f  |||fS )a  Compute Non-negative Matrix Factorization with Multiplicative Update.

    The objective function is _beta_divergence(X, WH) and is minimized with an
    alternating minimization of W and H. Each minimization is done with a
    Multiplicative Update.

    Parameters
    ----------
    X : array-like of shape (n_samples, n_features)
        Constant input matrix.

    W : array-like of shape (n_samples, n_components)
        Initial guess for the solution.

    H : array-like of shape (n_components, n_features)
        Initial guess for the solution.

    beta_loss : float or {'frobenius', 'kullback-leibler',             'itakura-saito'}, default='frobenius'
        String must be in {'frobenius', 'kullback-leibler', 'itakura-saito'}.
        Beta divergence to be minimized, measuring the distance between X
        and the dot product WH. Note that values different from 'frobenius'
        (or 2) and 'kullback-leibler' (or 1) lead to significantly slower
        fits. Note that for beta_loss <= 0 (or 'itakura-saito'), the input
        matrix X cannot contain zeros.

    max_iter : int, default=200
        Number of iterations.

    tol : float, default=1e-4
        Tolerance of the stopping condition.

    l1_reg_W : float, default=0.
        L1 regularization parameter for W.

    l1_reg_H : float, default=0.
        L1 regularization parameter for H.

    l2_reg_W : float, default=0.
        L2 regularization parameter for W.

    l2_reg_H : float, default=0.
        L2 regularization parameter for H.

    update_H : bool, default=True
        Set to True, both W and H will be estimated from initial guesses.
        Set to False, only W will be estimated.

    verbose : int, default=0
        The verbosity level.

    Returns
    -------
    W : ndarray of shape (n_samples, n_components)
        Solution to the non-negative least squares problem.

    H : ndarray of shape (n_components, n_features)
        Solution to the non-negative least squares problem.

    n_iter : int
        The number of iterations done by the algorithm.

    References
    ----------
    Fevotte, C., & Idier, J. (2011). Algorithms for nonnegative matrix
    factorization with the beta-divergence. Neural Computation, 23(9).
    r   g      ?g       @r   T)r1   )NNNg        r   
   z0Epoch %02d reached after %.3f seconds, error: %fz&Epoch %02d reached after %.3f seconds.)timer!   r8   r-   r   r   finfofloat64rj   r   r   )r   r.   r/   rT   r   r   rI   rJ   rK   rL   r   r   
start_timer   Zerror_at_initZprevious_errorr   r{   r|   r   r   r   errorZ	iter_timeend_timer   r   r   _fit_multiplicative_update  sL    Q
"

r   cd
deprecatedg        r@   )ri   r   solverrT   r   r   rD   rE   rF   rG   rH   r[   r   rz   c            C   sr   t | dtjtjgd} t||||||	||
||||||d}tdd |j| |||d\}}}W dQ R X |||fS )a  Compute Non-negative Matrix Factorization (NMF).

    Find two non-negative matrices (W, H) whose product approximates the non-
    negative matrix X. This factorization can be used for example for
    dimensionality reduction, source separation or topic extraction.

    The objective function is:

        .. math::

            0.5 * ||X - WH||_{loss}^2

            + alpha\_W * l1_{ratio} * n\_features * ||vec(W)||_1

            + alpha\_H * l1_{ratio} * n\_samples * ||vec(H)||_1

            + 0.5 * alpha\_W * (1 - l1_{ratio}) * n\_features * ||W||_{Fro}^2

            + 0.5 * alpha\_H * (1 - l1_{ratio}) * n\_samples * ||H||_{Fro}^2

    Where:

    :math:`||A||_{Fro}^2 = \sum_{i,j} A_{ij}^2` (Frobenius norm)

    :math:`||vec(A)||_1 = \sum_{i,j} abs(A_{ij})` (Elementwise L1 norm)

    The generic norm :math:`||X - WH||_{loss}^2` may represent
    the Frobenius norm or another supported beta-divergence loss.
    The choice between options is controlled by the `beta_loss` parameter.

    The regularization terms are scaled by `n_features` for `W` and by `n_samples` for
    `H` to keep their impact balanced with respect to one another and to the data fit
    term as independent as possible of the size `n_samples` of the training set.

    The objective function is minimized with an alternating minimization of W
    and H. If H is given and update_H=False, it solves for W only.

    Parameters
    ----------
    X : array-like of shape (n_samples, n_features)
        Constant matrix.

    W : array-like of shape (n_samples, n_components), default=None
        If init='custom', it is used as initial guess for the solution.

    H : array-like of shape (n_components, n_features), default=None
        If init='custom', it is used as initial guess for the solution.
        If update_H=False, it is used as a constant, to solve for W only.

    n_components : int, default=None
        Number of components, if n_components is not set all features
        are kept.

    init : {'random', 'nndsvd', 'nndsvda', 'nndsvdar', 'custom'}, default=None
        Method used to initialize the procedure.

        Valid options:

        - None: 'nndsvd' if n_components < n_features, otherwise 'random'.

        - 'random': non-negative random matrices, scaled with:
            sqrt(X.mean() / n_components)

        - 'nndsvd': Nonnegative Double Singular Value Decomposition (NNDSVD)
            initialization (better for sparseness)

        - 'nndsvda': NNDSVD with zeros filled with the average of X
            (better when sparsity is not desired)

        - 'nndsvdar': NNDSVD with zeros filled with small random values
            (generally faster, less accurate alternative to NNDSVDa
            for when sparsity is not desired)

        - 'custom': use custom matrices W and H if `update_H=True`. If
          `update_H=False`, then only custom matrix H is used.

        .. versionchanged:: 0.23
            The default value of `init` changed from 'random' to None in 0.23.

    update_H : bool, default=True
        Set to True, both W and H will be estimated from initial guesses.
        Set to False, only W will be estimated.

    solver : {'cd', 'mu'}, default='cd'
        Numerical solver to use:

        - 'cd' is a Coordinate Descent solver that uses Fast Hierarchical
            Alternating Least Squares (Fast HALS).

        - 'mu' is a Multiplicative Update solver.

        .. versionadded:: 0.17
           Coordinate Descent solver.

        .. versionadded:: 0.19
           Multiplicative Update solver.

    beta_loss : float or {'frobenius', 'kullback-leibler',             'itakura-saito'}, default='frobenius'
        Beta divergence to be minimized, measuring the distance between X
        and the dot product WH. Note that values different from 'frobenius'
        (or 2) and 'kullback-leibler' (or 1) lead to significantly slower
        fits. Note that for beta_loss <= 0 (or 'itakura-saito'), the input
        matrix X cannot contain zeros. Used only in 'mu' solver.

        .. versionadded:: 0.19

    tol : float, default=1e-4
        Tolerance of the stopping condition.

    max_iter : int, default=200
        Maximum number of iterations before timing out.

    alpha : float, default=0.0
        Constant that multiplies the regularization terms. Set it to zero to have no
        regularization. When using `alpha` instead of `alpha_W` and `alpha_H`, the
        regularization terms are not scaled by the `n_features` (resp. `n_samples`)
        factors for `W` (resp. `H`).

        .. deprecated:: 1.0
            The `alpha` parameter is deprecated in 1.0 and will be removed in 1.2.
            Use `alpha_W` and `alpha_H` instead.

    alpha_W : float, default=0.0
        Constant that multiplies the regularization terms of `W`. Set it to zero
        (default) to have no regularization on `W`.

        .. versionadded:: 1.0

    alpha_H : float or "same", default="same"
        Constant that multiplies the regularization terms of `H`. Set it to zero to
        have no regularization on `H`. If "same" (default), it takes the same value as
        `alpha_W`.

        .. versionadded:: 1.0

    l1_ratio : float, default=0.0
        The regularization mixing parameter, with 0 <= l1_ratio <= 1.
        For l1_ratio = 0 the penalty is an elementwise L2 penalty
        (aka Frobenius Norm).
        For l1_ratio = 1 it is an elementwise L1 penalty.
        For 0 < l1_ratio < 1, the penalty is a combination of L1 and L2.

    regularization : {'both', 'components', 'transformation'}, default=None
        Select whether the regularization affects the components (H), the
        transformation (W), both or none of them.

        .. deprecated:: 1.0
            The `regularization` parameter is deprecated in 1.0 and will be removed in
            1.2. Use `alpha_W` and `alpha_H` instead.

    random_state : int, RandomState instance or None, default=None
        Used for NMF initialisation (when ``init`` == 'nndsvdar' or
        'random'), and in Coordinate Descent. Pass an int for reproducible
        results across multiple function calls.
        See :term:`Glossary <random_state>`.

    verbose : int, default=0
        The verbosity level.

    shuffle : bool, default=False
        If true, randomize the order of coordinates in the CD solver.

    Returns
    -------
    W : ndarray of shape (n_samples, n_components)
        Solution to the non-negative least squares problem.

    H : ndarray of shape (n_components, n_features)
        Solution to the non-negative least squares problem.

    n_iter : int
        Actual number of iterations.

    Examples
    --------
    >>> import numpy as np
    >>> X = np.array([[1,1], [2, 1], [3, 1.2], [4, 1], [5, 0.8], [6, 1]])
    >>> from sklearn.decomposition import non_negative_factorization
    >>> W, H, n_iter = non_negative_factorization(X, n_components=2,
    ... init='random', random_state=0)

    References
    ----------
    Cichocki, Andrzej, and P. H. A. N. Anh-Huy. "Fast local algorithms for
    large scale nonnegative matrix and tensor factorizations."
    IEICE transactions on fundamentals of electronics, communications and
    computer sciences 92.3: 708-721, 2009.

    Fevotte, C., & Idier, J. (2011). Algorithms for nonnegative matrix
    factorization with the beta-divergence. Neural Computation, 23(9).
    )r   csc)r   rd   )r=   ri   r   rT   r   r   r[   rD   rE   rF   rG   r   rz   rH   T)assume_finite)r.   r/   r   N)r   r   r   float32NMFr   _fit_transform)r   r.   r/   r=   ri   r   r   rT   r   r   rD   rE   rF   rG   rH   r[   r   rz   Zestr   r   r   r   non_negative_factorizationj  s(     V"r   c               @   s   e Zd ZdZd#dddddddd	d
d	ddddddZdd Zdd Zdd Zdd Zd$ddZ	d%ddZ
d&ddZdd  Zd!d" ZdS )'r   a4  Non-Negative Matrix Factorization (NMF).

    Find two non-negative matrices (W, H) whose product approximates the non-
    negative matrix X. This factorization can be used for example for
    dimensionality reduction, source separation or topic extraction.

    The objective function is:

        .. math::

            0.5 * ||X - WH||_{loss}^2

            + alpha\_W * l1_{ratio} * n\_features * ||vec(W)||_1

            + alpha\_H * l1_{ratio} * n\_samples * ||vec(H)||_1

            + 0.5 * alpha\_W * (1 - l1_{ratio}) * n\_features * ||W||_{Fro}^2

            + 0.5 * alpha\_H * (1 - l1_{ratio}) * n\_samples * ||H||_{Fro}^2

    Where:

    :math:`||A||_{Fro}^2 = \sum_{i,j} A_{ij}^2` (Frobenius norm)

    :math:`||vec(A)||_1 = \sum_{i,j} abs(A_{ij})` (Elementwise L1 norm)

    The generic norm :math:`||X - WH||_{loss}` may represent
    the Frobenius norm or another supported beta-divergence loss.
    The choice between options is controlled by the `beta_loss` parameter.

    The regularization terms are scaled by `n_features` for `W` and by `n_samples` for
    `H` to keep their impact balanced with respect to one another and to the data fit
    term as independent as possible of the size `n_samples` of the training set.

    The objective function is minimized with an alternating minimization of W
    and H.

    Read more in the :ref:`User Guide <NMF>`.

    Parameters
    ----------
    n_components : int, default=None
        Number of components, if n_components is not set all features
        are kept.

    init : {'random', 'nndsvd', 'nndsvda', 'nndsvdar', 'custom'}, default=None
        Method used to initialize the procedure.
        Default: None.
        Valid options:

        - `None`: 'nndsvd' if n_components <= min(n_samples, n_features),
          otherwise random.

        - `'random'`: non-negative random matrices, scaled with:
          sqrt(X.mean() / n_components)

        - `'nndsvd'`: Nonnegative Double Singular Value Decomposition (NNDSVD)
          initialization (better for sparseness)

        - `'nndsvda'`: NNDSVD with zeros filled with the average of X
          (better when sparsity is not desired)

        - `'nndsvdar'` NNDSVD with zeros filled with small random values
          (generally faster, less accurate alternative to NNDSVDa
          for when sparsity is not desired)

        - `'custom'`: use custom matrices W and H

    solver : {'cd', 'mu'}, default='cd'
        Numerical solver to use:
        'cd' is a Coordinate Descent solver.
        'mu' is a Multiplicative Update solver.

        .. versionadded:: 0.17
           Coordinate Descent solver.

        .. versionadded:: 0.19
           Multiplicative Update solver.

    beta_loss : float or {'frobenius', 'kullback-leibler',             'itakura-saito'}, default='frobenius'
        Beta divergence to be minimized, measuring the distance between X
        and the dot product WH. Note that values different from 'frobenius'
        (or 2) and 'kullback-leibler' (or 1) lead to significantly slower
        fits. Note that for beta_loss <= 0 (or 'itakura-saito'), the input
        matrix X cannot contain zeros. Used only in 'mu' solver.

        .. versionadded:: 0.19

    tol : float, default=1e-4
        Tolerance of the stopping condition.

    max_iter : int, default=200
        Maximum number of iterations before timing out.

    random_state : int, RandomState instance or None, default=None
        Used for initialisation (when ``init`` == 'nndsvdar' or
        'random'), and in Coordinate Descent. Pass an int for reproducible
        results across multiple function calls.
        See :term:`Glossary <random_state>`.

    alpha : float, default=0.0
        Constant that multiplies the regularization terms. Set it to zero to
        have no regularization. When using `alpha` instead of `alpha_W` and `alpha_H`,
        the regularization terms are not scaled by the `n_features` (resp. `n_samples`)
        factors for `W` (resp. `H`).

        .. versionadded:: 0.17
           *alpha* used in the Coordinate Descent solver.

        .. deprecated:: 1.0
            The `alpha` parameter is deprecated in 1.0 and will be removed in 1.2.
            Use `alpha_W` and `alpha_H` instead.

    alpha_W : float, default=0.0
        Constant that multiplies the regularization terms of `W`. Set it to zero
        (default) to have no regularization on `W`.

        .. versionadded:: 1.0

    alpha_H : float or "same", default="same"
        Constant that multiplies the regularization terms of `H`. Set it to zero to
        have no regularization on `H`. If "same" (default), it takes the same value as
        `alpha_W`.

        .. versionadded:: 1.0

    l1_ratio : float, default=0.0
        The regularization mixing parameter, with 0 <= l1_ratio <= 1.
        For l1_ratio = 0 the penalty is an elementwise L2 penalty
        (aka Frobenius Norm).
        For l1_ratio = 1 it is an elementwise L1 penalty.
        For 0 < l1_ratio < 1, the penalty is a combination of L1 and L2.

        .. versionadded:: 0.17
           Regularization parameter *l1_ratio* used in the Coordinate Descent
           solver.

    verbose : int, default=0
        Whether to be verbose.

    shuffle : bool, default=False
        If true, randomize the order of coordinates in the CD solver.

        .. versionadded:: 0.17
           *shuffle* parameter used in the Coordinate Descent solver.

    regularization : {'both', 'components', 'transformation', None},                      default='both'
        Select whether the regularization affects the components (H), the
        transformation (W), both or none of them.

        .. versionadded:: 0.24

        .. deprecated:: 1.0
            The `regularization` parameter is deprecated in 1.0 and will be removed in
            1.2. Use `alpha_W` and `alpha_H` instead.

    Attributes
    ----------
    components_ : ndarray of shape (n_components, n_features)
        Factorization matrix, sometimes called 'dictionary'.

    n_components_ : int
        The number of components. It is same as the `n_components` parameter
        if it was given. Otherwise, it will be same as the number of
        features.

    reconstruction_err_ : float
        Frobenius norm of the matrix difference, or beta-divergence, between
        the training data ``X`` and the reconstructed data ``WH`` from
        the fitted model.

    n_iter_ : int
        Actual number of iterations.

    n_features_in_ : int
        Number of features seen during :term:`fit`.

        .. versionadded:: 0.24

    feature_names_in_ : ndarray of shape (`n_features_in_`,)
        Names of features seen during :term:`fit`. Defined only when `X`
        has feature names that are all strings.

        .. versionadded:: 1.0

    See Also
    --------
    DictionaryLearning : Find a dictionary that sparsely encodes data.
    MiniBatchSparsePCA : Mini-batch Sparse Principal Components Analysis.
    PCA : Principal component analysis.
    SparseCoder : Find a sparse representation of data from a fixed,
        precomputed dictionary.
    SparsePCA : Sparse Principal Components Analysis.
    TruncatedSVD : Dimensionality reduction using truncated SVD.

    References
    ----------
    Cichocki, Andrzej, and P. H. A. N. Anh-Huy. "Fast local algorithms for
    large scale nonnegative matrix and tensor factorizations."
    IEICE transactions on fundamentals of electronics, communications and
    computer sciences 92.3: 708-721, 2009.

    Fevotte, C., & Idier, J. (2011). Algorithms for nonnegative matrix
    factorization with the beta-divergence. Neural Computation, 23(9).

    Examples
    --------
    >>> import numpy as np
    >>> X = np.array([[1, 1], [2, 1], [3, 1.2], [4, 1], [5, 0.8], [6, 1]])
    >>> from sklearn.decomposition import NMF
    >>> model = NMF(n_components=2, init='random', random_state=0)
    >>> W = model.fit_transform(X)
    >>> H = model.components_
    NrU   r   rN   g-C6?r   r   g        r@   r   F)ri   r   rT   r   r   r[   rD   rE   rF   rG   r   rz   rH   c            C   sX   || _ || _|| _|| _|| _|| _|| _|| _|	| _|
| _	|| _
|| _|| _|| _d S )N)r=   ri   r   rT   r   r   r[   rD   rE   rF   rG   r   rz   rH   )selfr=   ri   r   rT   r   r   r[   rD   rE   rF   rG   r   rz   rH   r   r   r   __init__2  s    zNMF.__init__c             C   s   ddiS )NZrequires_positive_XTr   )r   r   r   r   
_more_tagsS  s    zNMF._more_tagsc             C   s  | j | _| jd kr|jd | _t| jtjr6| jdkrHtd| jdt| jtjr`| jdk rrtd| jdt| jtj	r| jdk rtd| jdt
| j| _d}| j|krtd| jd	| | jd
kr| jdkrtd| jd| j| jd
kr| jdkrtdt | jdkr<tdt | j}nd}| jdkrtdt d}| j|kr~td| jd	| | j}nd}t|| j| j| j|\| _| _| _| _| S )Nr   r   zCNumber of components must be a positive integer; got (n_components=)zGMaximum number of iterations must be a positive integer; got (max_iter=z;Tolerance for stopping criteria must be positive; got (tol=)r   muzInvalid solver parameter: got z instead of one of r   )r   rN   z$Invalid beta_loss parameter: solver z does not handle beta_loss = rX   zThe multiplicative update ('mu') solver cannot update zeros present in the initialization, and so leads to poorer results when used jointly with init='nndsvd'. You may try init='nndsvda' or init='nndsvdar' instead.r   ze`alpha` was deprecated in version 1.0 and will be removed in 1.2. Use `alpha_W` and `alpha_H` insteadg        zn`regularization` was deprecated in version 1.0 and will be removed in 1.2. Use `alpha_W` and `alpha_H` instead)rA   rC   rB   Nz&Invalid regularization parameter: got rA   )r=   _n_componentsr   rO   rQ   Integralr   r   r   rR   r!   rT   
_beta_lossr   ri   r_   rU   UserWarningrD   r`   rH   rM   rE   rF   rG   	_l1_reg_W	_l1_reg_H	_l2_reg_W	_l2_reg_H)r   r   Zallowed_solverrD   Zallowed_regularizationrH   r   r   r   _check_paramsV  sX    


$zNMF._check_paramsc             C   s  |j \}}| jdkrj|rjt|| j|fd t||| jfd |j|jksT|j|jkrhtd|j|jn|st|| j|fd |j|jkrtd|j| jdkrt	|
 | j }tj|| jf||jd}ntj|| jf|jd}nt|| j| j| jd\}}||fS )	NZcustomzNMF (input H)zNMF (input W)zKH and W should have the same dtype as X. Got H.dtype = {} and W.dtype = {}.z4H should have the same dtype as X. Got H.dtype = {}.r   )rd   )ri   r[   )r   ri   r   r   rd   	TypeErrorrb   r   r   r   rc   fullZzerosrw   r[   )r   r   r.   r/   r   rk   rl   rm   r   r   r   
_check_w_h  s*    


zNMF._check_w_hc             C   sl   |j \}}| jdks| jdkrH|| j }|| j }|| j }|| j }n| j}| j}| j}| j}||||fS )Nr   r@   )r   rE   rF   r   r   r   r   )r   r   rk   rl   rI   rJ   rK   rL   r   r   r   _scale_regularization  s    



zNMF._scale_regularizationc          	   C   sv   | j |dtjtjgd}tdd | j|||d\}}}W dQ R X t|||| jdd| _|j	d | _
|| _|| _|S )	a^  Learn a NMF model for the data X and returns the transformed data.

        This is more efficient than calling fit followed by transform.

        Parameters
        ----------
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
            Training vector, where `n_samples` is the number of samples
            and `n_features` is the number of features.

        y : Ignored
            Not used, present for API consistency by convention.

        W : array-like of shape (n_samples, n_components)
            If init='custom', it is used as initial guess for the solution.

        H : array-like of shape (n_components, n_features)
            If init='custom', it is used as initial guess for the solution.

        Returns
        -------
        W : ndarray of shape (n_samples, n_components)
            Transformed data.
        )r   r   )r   rd   T)r   )r.   r/   N)r1   r   )_validate_datar   r   r   r   r   r8   r   Zreconstruction_err_r   Zn_components_components_Zn_iter_)r   r   rs   r.   r/   r   r   r   r   fit_transform  s     zNMF.fit_transformTc             C   s  t |d | | | dkr2| jdkr2td| ||||\}}| |\}}}}	| jdkrt|||| j	| j
||||	|| j| j| jd\}}}
nH| jdkrt|||| j| j
| j	||||	|| jd\}}}
ntd| j |
| j
kr| j	dkrtd	| j
 t |||
fS )
a  Learn a NMF model for the data X and returns the transformed data.

        Parameters
        ----------
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
            Data matrix to be decomposed

        y : Ignored

        W : array-like of shape (n_samples, n_components)
            If init='custom', it is used as initial guess for the solution.

        H : array-like of shape (n_components, n_features)
            If init='custom', it is used as initial guess for the solution.
            If update_H=False, it is used as a constant, to solve for W only.

        update_H : bool, default=True
            If True, both W and H will be estimated from initial guesses,
            this corresponds to a call to the 'fit_transform' method.
            If False, only W will be estimated, this corresponds to a call
            to the 'transform' method.

        Returns
        -------
        W : ndarray of shape (n_samples, n_components)
            Transformed data.

        H : ndarray of shape (n_components, n_features)
            Factorization matrix, sometimes called 'dictionary'.

        n_iter_ : int
            Actual number of iterations.
        zNMF (input X)r   z|When beta_loss <= 0 and X contains zeros, the solver may diverge. Please add small values to X, or use a positive beta_loss.r   )r   r   rz   r[   r   )r   r   zInvalid solver parameter '%s'.zLMaximum number of iterations %d reached. Increase it to improve convergence.)r   r   ra   r   r   r   r   r   r   r   r   r   rz   r[   r   r_   rU   r	   )r   r   rs   r.   r/   r   rI   rJ   rK   rL   r   r   r   r   r     sV    "



zNMF._fit_transformc             K   s   | j |f| | S )aS  Learn a NMF model for the data X.

        Parameters
        ----------
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
            Training vector, where `n_samples` is the number of samples
            and `n_features` is the number of features.

        y : Ignored
            Not used, present for API consistency by convention.

        **params : kwargs
            Parameters (keyword arguments) and values passed to
            the fit_transform instance.

        Returns
        -------
        self : object
            Returns the instance itself.
        )r   )r   r   rs   paramsr   r   r   fitn  s    zNMF.fitc          	   C   sR   t |  | j|dtjtjgdd}tdd | j|| jdd^}}W dQ R X |S )a  Transform the data X according to the fitted NMF model.

        Parameters
        ----------
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
            Training vector, where `n_samples` is the number of samples
            and `n_features` is the number of features.

        Returns
        -------
        W : ndarray of shape (n_samples, n_components)
            Transformed data.
        )r   r   F)r   rd   resetT)r   )r/   r   N)r   r   r   r   r   r   r   r   )r   r   r.   _r   r   r   	transform  s     zNMF.transformc             C   s   t |  t|| jS )a  Transform data back to its original space.

        .. versionadded:: 0.18

        Parameters
        ----------
        W : {ndarray, sparse matrix} of shape (n_samples, n_components)
            Transformed data matrix.

        Returns
        -------
        X : {ndarray, sparse matrix} of shape (n_samples, n_features)
            Returns a data matrix of the original shape.
        )r   r   r   r   )r   r.   r   r   r   inverse_transform  s    zNMF.inverse_transform)N)NNN)NNNT)N)__name__
__module____qualname____doc__r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   X  s2    Y\
*
`
r   )F)rU   rV   N)
r~   r   r   r   r   r   Tr   FN)NNNT)	rN   r   r~   r   r   r   r   Tr   )NNN)1r   rQ   numpyr   Zscipy.sparsesparser"   r   r_   mathr   Z_cdnmf_fastr   _configr   baser   r   
exceptionsr	   utilsr
   r   Zutils.extmathr   r   r   Zutils.validationr   r   r   r   rj   r)   r   r   r   r8   r(   rM   r!   rw   r}   r   r   r   r   r   r   r   r   r   r   <module>   s   
j
 "         
s   
_V        
    \