B
    0d-                 @   s   d dl Zd dlmZ ddlmZmZ ddlmZ ddl	m
Z
 ddlmZ dd	lmZ dd
lmZ dddZG dd deeeZdS )    N)optimize   )BaseEstimatorRegressorMixin   )LinearModel)axis0_safe_slice)_check_sample_weight)safe_sparse_dot)_check_optimize_resultc              C   sX  |j \}}|d | j d k}|r(| d }	| d }
| d| } t|}|t||  }|r`||	8 }t|}|||
 k}|| }t|}|j d | }|| }t|}d| t||  |
| |d   }||  }||  | }t|j|}||
 }|rt|d }nt|d }t	|| | }d|
 t|| |d|< t
|}|| dk }d||< t	|||}|| | }|d|  d| t|| 8  < |d|  |d |  7  < ||d< |d  ||d  8  < |d  ||
 8  < |r,d	t| |
 |d< |d  d| t| 8  < ||
 | | }||t| |  7 }||fS )
a  Returns the Huber loss and the gradient.

    Parameters
    ----------
    w : ndarray, shape (n_features + 1,) or (n_features + 2,)
        Feature vector.
        w[:n_features] gives the coefficients
        w[-1] gives the scale factor and if the intercept is fit w[-2]
        gives the intercept factor.

    X : ndarray of shape (n_samples, n_features)
        Input data.

    y : ndarray of shape (n_samples,)
        Target vector.

    epsilon : float
        Robustness of the Huber estimator.

    alpha : float
        Regularization parameter.

    sample_weight : ndarray of shape (n_samples,), default=None
        Weight assigned to each sample.

    Returns
    -------
    loss : float
        Huber loss.

    gradient : ndarray, shape (len(w))
        Returns the derivative of the Huber loss with respect to each
        coefficient, intercept and the scale as a vector.
    r   r   Ng       @r   g      g       )shapenpsumr
   absZcount_nonzerodotTzerosr   Z	ones_like) wXyepsilonalphasample_weight_Z
n_featuresfit_interceptZ	interceptsigmaZ	n_samplesZlinear_lossZabs_linear_lossZoutliers_maskZoutliersZnum_outliersZn_non_outliersZoutliers_swZn_sw_outliersZoutlier_lossZnon_outliersZweighted_non_outliersZweighted_lossZsquared_lossZgradZX_non_outliersZsigned_outliersZsigned_outliers_maskZ
X_outliersZsw_outliersZloss r   M/var/www/html/venv/lib/python3.7/site-packages/sklearn/linear_model/_huber.py_huber_loss_and_gradient   sX    #






"r    c               @   s2   e Zd ZdZdddddddd	d
ZdddZdS )HuberRegressora  Linear regression model that is robust to outliers.

    The Huber Regressor optimizes the squared loss for the samples where
    ``|(y - X'w) / sigma| < epsilon`` and the absolute loss for the samples
    where ``|(y - X'w) / sigma| > epsilon``, where w and sigma are parameters
    to be optimized. The parameter sigma makes sure that if y is scaled up
    or down by a certain factor, one does not need to rescale epsilon to
    achieve the same robustness. Note that this does not take into account
    the fact that the different features of X may be of different scales.

    This makes sure that the loss function is not heavily influenced by the
    outliers while not completely ignoring their effect.

    Read more in the :ref:`User Guide <huber_regression>`

    .. versionadded:: 0.18

    Parameters
    ----------
    epsilon : float, greater than 1.0, default=1.35
        The parameter epsilon controls the number of samples that should be
        classified as outliers. The smaller the epsilon, the more robust it is
        to outliers.

    max_iter : int, default=100
        Maximum number of iterations that
        ``scipy.optimize.minimize(method="L-BFGS-B")`` should run for.

    alpha : float, default=0.0001
        Regularization parameter.

    warm_start : bool, default=False
        This is useful if the stored attributes of a previously used model
        has to be reused. If set to False, then the coefficients will
        be rewritten for every call to fit.
        See :term:`the Glossary <warm_start>`.

    fit_intercept : bool, default=True
        Whether or not to fit the intercept. This can be set to False
        if the data is already centered around the origin.

    tol : float, default=1e-05
        The iteration will stop when
        ``max{|proj g_i | i = 1, ..., n}`` <= ``tol``
        where pg_i is the i-th component of the projected gradient.

    Attributes
    ----------
    coef_ : array, shape (n_features,)
        Features got by optimizing the Huber loss.

    intercept_ : float
        Bias.

    scale_ : float
        The value by which ``|y - X'w - c|`` is scaled down.

    n_features_in_ : int
        Number of features seen during :term:`fit`.

        .. versionadded:: 0.24

    feature_names_in_ : ndarray of shape (`n_features_in_`,)
        Names of features seen during :term:`fit`. Defined only when `X`
        has feature names that are all strings.

        .. versionadded:: 1.0

    n_iter_ : int
        Number of iterations that
        ``scipy.optimize.minimize(method="L-BFGS-B")`` has run for.

        .. versionchanged:: 0.20

            In SciPy <= 1.0.0 the number of lbfgs iterations may exceed
            ``max_iter``. ``n_iter_`` will now report at most ``max_iter``.

    outliers_ : array, shape (n_samples,)
        A boolean mask which is set to True where the samples are identified
        as outliers.

    See Also
    --------
    RANSACRegressor : RANSAC (RANdom SAmple Consensus) algorithm.
    TheilSenRegressor : Theil-Sen Estimator robust multivariate regression model.
    SGDRegressor : Fitted by minimizing a regularized empirical loss with SGD.

    References
    ----------
    .. [1] Peter J. Huber, Elvezio M. Ronchetti, Robust Statistics
           Concomitant scale estimates, pg 172
    .. [2] Art B. Owen (2006), A robust hybrid of lasso and ridge regression.
           https://statweb.stanford.edu/~owen/reports/hhu.pdf

    Examples
    --------
    >>> import numpy as np
    >>> from sklearn.linear_model import HuberRegressor, LinearRegression
    >>> from sklearn.datasets import make_regression
    >>> rng = np.random.RandomState(0)
    >>> X, y, coef = make_regression(
    ...     n_samples=200, n_features=2, noise=4.0, coef=True, random_state=0)
    >>> X[:4] = rng.uniform(10, 20, (4, 2))
    >>> y[:4] = rng.uniform(10, 20, 4)
    >>> huber = HuberRegressor().fit(X, y)
    >>> huber.score(X, y)
    -7.284...
    >>> huber.predict(X[:1,])
    array([806.7200...])
    >>> linear = LinearRegression().fit(X, y)
    >>> print("True coefficients:", coef)
    True coefficients: [20.4923...  34.1698...]
    >>> print("Huber coefficients:", huber.coef_)
    Huber coefficients: [17.7906... 31.0106...]
    >>> print("Linear Regression coefficients:", linear.coef_)
    Linear Regression coefficients: [-1.9221...  7.0226...]
    g?d   g-C6?FTgh㈵>)r   max_iterr   
warm_startr   tolc            C   s(   || _ || _|| _|| _|| _|| _d S )N)r   r#   r   r$   r   r%   )selfr   r#   r   r$   r   r%   r   r   r   __init__   s    
zHuberRegressor.__init__Nc          
   C   s  | j ||ddgdtjtjgd\}}t||}| jdk rFtd| j | jrpt| drpt	| j
| j| jgf}n8| jrt|jd d	 }nt|jd d }d|d
< ttj tjg|jd df}ttjjd |d
 d< tjt|dd||| j| j|f| j| jd
d|d}|j}|jd	kr4td|j td|| j| _|d
 | _| jrb|d | _nd| _|d|jd  | _
t|t || j
 | j }|| j| j k| _!| S )a5  Fit the model according to the given training data.

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            Training vector, where `n_samples` is the number of samples and
            `n_features` is the number of features.

        y : array-like, shape (n_samples,)
            Target vector relative to X.

        sample_weight : array-like, shape (n_samples,)
            Weight given to each sample.

        Returns
        -------
        self : object
            Fitted `HuberRegressor` estimator.
        FZcsrT)copyZaccept_sparseZ	y_numericZdtypeg      ?z6epsilon should be greater than or equal to 1.0, got %fcoef_r   r   r   r   
   zL-BFGS-B)maxiterZgtolZiprint)methodZjacargsoptionsboundszEHuberRegressor convergence failed: l-BFGS-b solver terminated with %sZlbfgsr   g        N)"Z_validate_datar   Zfloat64Zfloat32r	   r   
ValueErrorr$   hasattrZconcatenater)   Z
intercept_Zscale_r   r   r   ZtileinfZfinfoZepsr   Zminimizer    r   r#   r%   xstatusmessager   Zn_iter_r   r
   Z	outliers_)r&   r   r   r   
parametersr/   Zopt_resZresidualr   r   r   fit  sR    

 

zHuberRegressor.fit)N)__name__
__module____qualname____doc__r'   r7   r   r   r   r   r!   ~   s   u	r!   )N)numpyr   Zscipyr   baser   r   _baser   utilsr   Zutils.validationr	   Zutils.extmathr
   Zutils.optimizer   r    r!   r   r   r   r   <module>   s   
n