# We can not use pytest here, because we run
# build_tools/azure/test_pytest_soft_dependency.sh on these
# tests to make sure estimator_checks works without pytest.

import unittest
import sys
import warnings

import numpy as np
import scipy.sparse as sp
import joblib

from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.datasets import make_multilabel_classification
from sklearn.utils import deprecated
from sklearn.utils._testing import (
    raises,
    ignore_warnings,
    MinimalClassifier,
    MinimalRegressor,
    MinimalTransformer,
    SkipTest,
)
from sklearn.utils.validation import check_is_fitted
from sklearn.utils.fixes import np_version, parse_version
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.linear_model import LinearRegression, SGDClassifier
from sklearn.mixture import GaussianMixture
from sklearn.cluster import MiniBatchKMeans
from sklearn.decomposition import PCA
from sklearn.linear_model import MultiTaskElasticNet, LogisticRegression
from sklearn.svm import SVC, NuSVC
from sklearn.neighbors import KNeighborsRegressor
from sklearn.utils.validation import check_array
from sklearn.utils import all_estimators
from sklearn.exceptions import SkipTestWarning
from sklearn.utils.metaestimators import available_if

from sklearn.utils.estimator_checks import (
    _NotAnArray,
    _set_checking_parameters,
    check_class_weight_balanced_linear_classifier,
    check_classifier_data_not_an_array,
    check_classifiers_multilabel_output_format_decision_function,
    check_classifiers_multilabel_output_format_predict,
    check_classifiers_multilabel_output_format_predict_proba,
    check_dataframe_column_names_consistency,
    check_estimator,
    check_estimator_get_tags_default_keys,
    check_estimators_unfitted,
    check_fit_score_takes_y,
    check_no_attributes_set_in_init,
    check_regressor_data_not_an_array,
    check_outlier_corruption,
    set_random_state,
    check_fit_check_is_fitted,
)


class CorrectNotFittedError(ValueError):
    """Exception class to raise if estimator is used before fitting.

    Like NotFittedError, it inherits from ValueError, but not from
    AttributeError. Used for testing only.
    """


class BaseBadClassifier(ClassifierMixin, BaseEstimator):
    def fit(self, X, y):
        return self

    def predict(self, X):
        return np.ones(X.shape[0])


class ChangesDict(BaseEstimator):
    def __init__(self, key=0):
        self.key = key

    def fit(self, X, y=None):
        X, y = self._validate_data(X, y)
        return self

    def predict(self, X):
        X = check_array(X)
        self.key = 1000
        return np.ones(X.shape[0])


class SetsWrongAttribute(BaseEstimator):
    def __init__(self, acceptable_key=0):
        self.acceptable_key = acceptable_key

    def fit(self, X, y=None):
        self.wrong_attribute = 0
        X, y = self._validate_data(X, y)
        return self


class ChangesWrongAttribute(BaseEstimator):
    def __init__(self, wrong_attribute=0):
        self.wrong_attribute = wrong_attribute

    def fit(self, X, y=None):
        self.wrong_attribute = 1
        X, y = self._validate_data(X, y)
        return self


class ChangesUnderscoreAttribute(BaseEstimator):
    def fit(self, X, y=None):
        self._good_attribute = 1
        X, y = self._validate_data(X, y)
        return self


class RaisesErrorInSetParams(BaseEstimator):
    def __init__(self, p=0):
        self.p = p

    def set_params(self, **kwargs):
        if "p" in kwargs:
            p = kwargs.pop("p")
            if p < 0:
                raise ValueError("p can't be less than 0")
            self.p = p
        return super().set_params(**kwargs)

    def fit(self, X, y=None):
        X, y = self._validate_data(X, y)
        return self


class HasMutableParameters(BaseEstimator):
    def __init__(self, p=object()):
        self.p = p

    def fit(self, X, y=None):
        X, y = self._validate_data(X, y)
        return self


class HasImmutableParameters(BaseEstimator):
    # Note that object is an uninitialized class, thus immutable.
    def __init__(self, p=42, q=np.int32(42), r=object):
        self.p = p
        self.q = q
        self.r = r

    def fit(self, X, y=None):
        X, y = self._validate_data(X, y)
        return self


class ModifiesValueInsteadOfRaisingError(BaseEstimator):
    def __init__(self, p=0):
        self.p = p

    def set_params(self, **kwargs):
        if "p" in kwargs:
            p = kwargs.pop("p")
            if p < 0:
                p = 0
            self.p = p
        return super().set_params(**kwargs)

    def fit(self, X, y=None):
        X, y = self._validate_data(X, y)
        return self


class ModifiesAnotherValue(BaseEstimator):
    def __init__(self, a=0, b="method1"):
        self.a = a
        self.b = b

    def set_params(self, **kwargs):
        if "a" in kwargs:
            a = kwargs.pop("a")
            self.a = a
            if a is None:
                kwargs.pop("b")
                self.b = "method2"
        return super().set_params(**kwargs)

    def fit(self, X, y=None):
        X, y = self._validate_data(X, y)
        return self


class NoCheckinPredict(BaseBadClassifier):
    def fit(self, X, y):
        X, y = self._validate_data(X, y)
        return self


class NoSparseClassifier(BaseBadClassifier):
    def fit(self, X, y):
        X, y = self._validate_data(X, y, accept_sparse=["csr", "csc"])
        if sp.issparse(X):
            raise ValueError("Nonsensical Error")
        return self

    def predict(self, X):
        X = check_array(X)
        return np.ones(X.shape[0])


class CorrectNotFittedErrorClassifier(BaseBadClassifier):
    def fit(self, X, y):
        X, y = self._validate_data(X, y)
        self.coef_ = np.ones(X.shape[1])
        return self

    def predict(self, X):
        check_is_fitted(self)
        X = check_array(X)
        return np.ones(X.shape[0])


class NoSampleWeightPandasSeriesType(BaseEstimator):
    def fit(self, X, y, sample_weight=None):
        # Convert data
        X, y = self._validate_data(
            X, y, accept_sparse=("csr", "csc"), multi_output=True, y_numeric=True
        )
        # Function is only called after we verify that pandas is installed
        from pandas import Series

        if isinstance(sample_weight, Series):
            raise ValueError(
                "Estimator does not accept 'sample_weight'of type pandas.Series"
            )
        return self

    def predict(self, X):
        X = check_array(X)
        return np.ones(X.shape[0])


class BadBalancedWeightsClassifier(BaseBadClassifier):
    def __init__(self, class_weight=None):
        self.class_weight = class_weight

    def fit(self, X, y):
        from sklearn.preprocessing import LabelEncoder
        from sklearn.utils import compute_class_weight

        label_encoder = LabelEncoder().fit(y)
        classes = label_encoder.classes_
        class_weight = compute_class_weight(self.class_weight, classes=classes, y=y)

        # Intentionally modify the balanced class_weight
        # to simulate a bug and raise an exception
        if self.class_weight == "balanced":
            class_weight += 1.0

        # Simply assigning coef_ to the class_weight
        self.coef_ = class_weight
        return self


class BadTransformerWithoutMixin(BaseEstimator):
    def fit(self, X, y=None):
        X = self._validate_data(X)
        return self

    def transform(self, X):
        X = check_array(X)
        return X


class NotInvariantPredict(BaseEstimator):
    def fit(self, X, y):
        # Convert data
        X, y = self._validate_data(
            X, y, accept_sparse=("csr", "csc"), multi_output=True, y_numeric=True
        )
        return self

    def predict(self, X):
        # return 1 if X has more than one element else return 0
        X = check_array(X)
        if X.shape[0] > 1:
            return np.ones(X.shape[0])
        return np.zeros(X.shape[0])


class NotInvariantSampleOrder(BaseEstimator):
    def fit(self, X, y):
        X, y = self._validate_data(
            X, y, accept_sparse=("csr", "csc"), multi_output=True, y_numeric=True
        )
        # store the original X to check for sample order later
        self._X = X
        return self

    def predict(self, X):
        X = check_array(X)
        # if the input contains the same elements but different sample order,
        # then just return zeros.
        if (
            np.array_equiv(np.sort(X, axis=0), np.sort(self._X, axis=0))
            and (X != self._X).any()
        ):
            return np.zeros(X.shape[0])
        return X[:, 0]


class LargeSparseNotSupportedClassifier(BaseEstimator):
    def fit(self, X, y):
        X, y = self._validate_data(
            X,
            y,
            accept_sparse=("csr", "csc", "coo"),
            accept_large_sparse=True,
            multi_output=True,
            y_numeric=True,
        )
        if sp.issparse(X):
            if X.getformat() == "coo":
                if X.row.dtype == "int64" or X.col.dtype == "int64":
                    raise ValueError("Estimator doesn't support 64-bit indices")
            elif X.getformat() in ["csc", "csr"]:
                assert "int64" not in (
                    X.indices.dtype,
                    X.indptr.dtype,
                ), "Estimator doesn't support 64-bit indices"

        return self


class SparseTransformer(BaseEstimator):
    def fit(self, X, y=None):
        self.X_shape_ = self._validate_data(X).shape
        return self

    def fit_transform(self, X, y=None):
        return self.fit(X, y).transform(X)

    def transform(self, X):
        X = check_array(X)
        if X.shape[1] != self.X_shape_[1]:
            raise ValueError("Bad number of features")
        return sp.csr_matrix(X)


class EstimatorInconsistentForPandas(BaseEstimator):
    def fit(self, X, y):
        try:
            from pandas import DataFrame

            if isinstance(X, DataFrame):
                self.value_ = X.iloc[0, 0]
            else:
                X = check_array(X)
                self.value_ = X[1, 0]
            return self

        except ImportError:
            X = check_array(X)
            self.value_ = X[1, 0]
            return self

    def predict(self, X):
        X = check_array(X)
        return np.array([self.value_] * X.shape[0])


class UntaggedBinaryClassifier(SGDClassifier):
    # Toy classifier that only supports binary classification, will fail tests.
    def fit(self, X, y, coef_init=None, intercept_init=None, sample_weight=None):
        super().fit(X, y, coef_init, intercept_init, sample_weight)
        if len(self.classes_) > 2:
            raise ValueError("Only 2 classes are supported")
        return self

    def partial_fit(self, X, y, classes=None, sample_weight=None):
        super().partial_fit(X=X, y=y, classes=classes, sample_weight=sample_weight)
        if len(self.classes_) > 2:
            raise ValueError("Only 2 classes are supported")
        return self


class TaggedBinaryClassifier(UntaggedBinaryClassifier):
    # Toy classifier that only supports binary classification.
    def _more_tags(self):
        return {"binary_only": True}


class EstimatorMissingDefaultTags(BaseEstimator):
    def _get_tags(self):
        tags = super()._get_tags().copy()
        del tags["allow_nan"]
        return tags


class RequiresPositiveYRegressor(LinearRegression):
    def fit(self, X, y):
        X, y = self._validate_data(X, y, multi_output=True)
        if (y <= 0).any():
            raise ValueError("negative y values not supported!")
        return super().fit(X, y)

    def _more_tags(self):
        return {"requires_positive_y": True}


class PoorScoreLogisticRegression(LogisticRegression):
    def decision_function(self, X):
        return super().decision_function(X) + 1

    def _more_tags(self):
        return {"poor_score": True}


class PartialFitChecksName(BaseEstimator):
    def fit(self, X, y):
        self._validate_data(X, y)
        return self

    def partial_fit(self, X, y):
        reset = not hasattr(self, "_fitted")
        self._validate_data(X, y, reset=reset)
        self._fitted = True
        return self


def test_not_an_array_array_function():
    if np_version < parse_version("1.17"):
        raise SkipTest("array_function protocol not supported in numpy <1.17")
    not_array = _NotAnArray(np.ones(10))
    msg = "Don't want to call array_function sum!"
    with raises(TypeError, match=msg):
        np.sum(not_array)
    # always returns True
    assert np.may_share_memory(not_array, None)


def test_check_fit_score_takes_y_works_on_deprecated_fit():
    # Tests that check_fit_score_takes_y works on a class with
    # a deprecated fit method

    class TestEstimatorWithDeprecatedFitMethod(BaseEstimator):
        @deprecated("Deprecated for the purpose of testing check_fit_score_takes_y")
        def fit(self, X, y):
            return self

    check_fit_score_takes_y("test", TestEstimatorWithDeprecatedFitMethod())


def test_check_estimator():
    # tests that the estimator actually fails on "bad" estimators.
    # not a complete test of all checks, which are very extensive.

    # check that we have a set_params and can clone
    msg = "Passing a class was deprecated"
    with raises(TypeError, match=msg):
        check_estimator(object)
    msg = (
        "Parameter 'p' of estimator 'HasMutableParameters' is of type "
        "object which is not allowed"
    )
    # check that the "default_constructible" test checks for mutable parameters
    check_estimator(HasImmutableParameters())  # should pass
    with raises(AssertionError, match=msg):
        check_estimator(HasMutableParameters())
    # check that values returned by get_params match set_params
    msg = "get_params result does not match what was passed to set_params"
    with raises(AssertionError, match=msg):
        check_estimator(ModifiesValueInsteadOfRaisingError())
    with warnings.catch_warnings(record=True) as records:
        check_estimator(RaisesErrorInSetParams())
    assert UserWarning in [rec.category for rec in records]

    with raises(AssertionError, match=msg):
        check_estimator(ModifiesAnotherValue())
    # check that we have a fit method
    msg = "object has no attribute 'fit'"
    with raises(AttributeError, match=msg):
        check_estimator(BaseEstimator())
    # check that fit does input validation
    msg = "Did not raise"
    with raises(AssertionError, match=msg):
        check_estimator(BaseBadClassifier())
    # check that sample_weights in fit accepts pandas.Series type
    try:
        from pandas import Series  # noqa

        msg = (
            "Estimator NoSampleWeightPandasSeriesType raises error if "
            "'sample_weight' parameter is of type pandas.Series"
        )
        with raises(ValueError, match=msg):
            check_estimator(NoSampleWeightPandasSeriesType())
    except ImportError:
        pass
    # check that predict does input validation (doesn't accept dicts in input)
    msg = "Estimator doesn't check for NaN and inf in predict"
    with raises(AssertionError, match=msg):
        check_estimator(NoCheckinPredict())
    # check that estimator state does not change
    # at transform/predict/predict_proba time
    msg = "Estimator changes __dict__ during predict"
    with raises(AssertionError, match=msg):
        check_estimator(ChangesDict())
    # check that `fit` only changes attributes that
    # are private (start with an _ or end with a _).
    msg = (
        "Estimator ChangesWrongAttribute should not change or mutate  "
        "the parameter wrong_attribute from 0 to 1 during fit."
    )
    with raises(AssertionError, match=msg):
        check_estimator(ChangesWrongAttribute())
    check_estimator(ChangesUnderscoreAttribute())
    # check that `fit` doesn't add any public attribute
    msg = (
        r"Estimator adds public attribute\(s\) during the fit method."
        " Estimators are only allowed to add private attributes"
        " either started with _ or ended"
        " with _ but wrong_attribute added"
    )
    with raises(AssertionError, match=msg):
        check_estimator(SetsWrongAttribute())
    # check for sample order invariance
    name = NotInvariantSampleOrder.__name__
    method = "predict"
    msg = (
        "{method} of {name} is not invariant when applied to a dataset"
        "with different sample order."
    ).format(method=method, name=name)
    with raises(AssertionError, match=msg):
        check_estimator(NotInvariantSampleOrder())
    # check for invariant method
    name = NotInvariantPredict.__name__
    method = "predict"
    msg = ("{method} of {name} is not invariant when applied to a subset.").format(
        method=method, name=name
    )
    with raises(AssertionError, match=msg):
        check_estimator(NotInvariantPredict())
    # check for sparse matrix input handling
    name = NoSparseClassifier.__name__
    msg = "Estimator %s doesn't seem to fail gracefully on sparse data" % name
    with raises(AssertionError, match=msg):
        check_estimator(NoSparseClassifier())

    # Large indices test on bad estimator
    msg = (
        "Estimator LargeSparseNotSupportedClassifier doesn't seem to "
        r"support \S{3}_64 matrix, and is not failing gracefully.*"
    )
    with raises(AssertionError, match=msg):
        check_estimator(LargeSparseNotSupportedClassifier())

    # does error on binary_only untagged estimator
    msg = "Only 2 classes are supported"
    with raises(ValueError, match=msg):
        check_estimator(UntaggedBinaryClassifier())

    # non-regression test for estimators transforming to sparse data
    check_estimator(SparseTransformer())

    # doesn't error on actual estimator
    check_estimator(LogisticRegression())
    check_estimator(LogisticRegression(C=0.01))
    check_estimator(MultiTaskElasticNet())

    # doesn't error on binary_only tagged estimator
    check_estimator(TaggedBinaryClassifier())

    # Check regressor with requires_positive_y estimator tag
    msg = "negative y values not supported!"
    with raises(ValueError, match=msg):
        check_estimator(RequiresPositiveYRegressor())

    # Does not raise error on classifier with poor_score tag
    check_estimator(PoorScoreLogisticRegression())


def test_check_outlier_corruption():
    # should raise AssertionError
    decision = np.array([0.0, 1.0, 1.5, 2.0])
    with raises(AssertionError):
        check_outlier_corruption(1, 2, decision)
    # should pass
    decision = np.array([0.0, 1.0, 1.0, 2.0])
    check_outlier_corruption(1, 2, decision)


def test_check_estimator_transformer_no_mixin():
    # check that TransformerMixin is not required for transformer tests to run
    with raises(AttributeError, ".*fit_transform.*"):
        check_estimator(BadTransformerWithoutMixin())


def test_check_estimator_clones():
    # check that check_estimator doesn't modify the estimator it receives
    from sklearn.datasets import load_iris

    iris = load_iris()

    for Estimator in [
        GaussianMixture,
        LinearRegression,
        SGDClassifier,
        PCA,
        ExtraTreesClassifier,
        MiniBatchKMeans,
    ]:
        with ignore_warnings(category=FutureWarning):
            # when 'est = SGDClassifier()'
            est = Estimator()
            _set_checking_parameters(est)
            set_random_state(est)
            # without fitting
            old_hash = joblib.hash(est)
            check_estimator(est)
        assert old_hash == joblib.hash(est)

        with ignore_warnings(category=FutureWarning):
            # when 'est = SGDClassifier()'
            est = Estimator()
            _set_checking_parameters(est)
            set_random_state(est)
            # with fitting
            est.fit(iris.data + 10, iris.target)
            old_hash = joblib.hash(est)
            check_estimator(est)
        assert old_hash == joblib.hash(est)


def test_check_estimators_unfitted():
    # check that a ValueError/AttributeError is raised when calling predict
    # on an unfitted estimator
    msg = "Did not raise"
    with raises(AssertionError, match=msg):
        check_estimators_unfitted("estimator", NoSparseClassifier())

    # check that CorrectNotFittedError inherit from either ValueError
    # or AttributeError
    check_estimators_unfitted("estimator", CorrectNotFittedErrorClassifier())


def test_check_no_attributes_set_in_init():
    class NonConformantEstimatorPrivateSet(BaseEstimator):
        def __init__(self):
            self.you_should_not_set_this_ = None

    class NonConformantEstimatorNoParamSet(BaseEstimator):
        def __init__(self, you_should_set_this_=None):
            pass

    msg = (
        "Estimator estimator_name should not set any"
        " attribute apart from parameters during init."
        r" Found attributes \['you_should_not_set_this_'\]."
    )
    with raises(AssertionError, match=msg):
        check_no_attributes_set_in_init(
            "estimator_name", NonConformantEstimatorPrivateSet()
        )

    msg = (
        "Estimator estimator_name should store all parameters as an attribute"
        " during init"
    )
    with raises(AttributeError, match=msg):
        check_no_attributes_set_in_init(
            "estimator_name", NonConformantEstimatorNoParamSet()
        )


def test_check_estimator_pairwise():
    # check that check_estimator() works on estimator with _pairwise
    # kernel or metric

    # test precomputed kernel
    est = SVC(kernel="precomputed")
    check_estimator(est)

    # test precomputed metric
    est = KNeighborsRegressor(metric="precomputed")
    check_estimator(est)


def test_check_classifier_data_not_an_array():
    with raises(AssertionError, match="Not equal to tolerance"):
        check_classifier_data_not_an_array(
            "estimator_name", EstimatorInconsistentForPandas()
        )


def test_check_regressor_data_not_an_array():
    with raises(AssertionError, match="Not equal to tolerance"):
        check_regressor_data_not_an_array(
            "estimator_name", EstimatorInconsistentForPandas()
        )


def test_check_estimator_get_tags_default_keys():
    estimator = EstimatorMissingDefaultTags()
    err_msg = (
        r"EstimatorMissingDefaultTags._get_tags\(\) is missing entries"
        r" for the following default tags: {'allow_nan'}"
    )
    with raises(AssertionError, match=err_msg):
        check_estimator_get_tags_default_keys(estimator.__class__.__name__, estimator)

    # noop check when _get_tags is not available
    estimator = MinimalTransformer()
    check_estimator_get_tags_default_keys(estimator.__class__.__name__, estimator)


def test_check_dataframe_column_names_consistency():
    err_msg = "Estimator does not have a feature_names_in_"
    with raises(ValueError, match=err_msg):
        check_dataframe_column_names_consistency("estimator_name", BaseBadClassifier())
    check_dataframe_column_names_consistency("estimator_name", PartialFitChecksName())

    lr = LogisticRegression()
    check_dataframe_column_names_consistency(lr.__class__.__name__, lr)
    lr.__doc__ = "Docstring that does not document the estimator's attributes"
    err_msg = (
        "Estimator LogisticRegression does not document its feature_names_in_ attribute"
    )
    with raises(ValueError, match=err_msg):
        check_dataframe_column_names_consistency(lr.__class__.__name__, lr)


class _BaseMultiLabelClassifierMock(ClassifierMixin, BaseEstimator):
    def __init__(self, response_output):
        self.response_output = response_output

    def fit(self, X, y):
        return self

    def _more_tags(self):
        return {"multilabel": True}


def test_check_classifiers_multilabel_output_format_predict():
    n_samples, test_size, n_outputs = 100, 25, 5
    _, y = make_multilabel_classification(
        n_samples=n_samples,
        n_features=2,
        n_classes=n_outputs,
        n_labels=3,
        length=50,
        allow_unlabeled=True,
        random_state=0,
    )
    y_test = y[-test_size:]

    class MultiLabelClassifierPredict(_BaseMultiLabelClassifierMock):
        def predict(self, X):
            return self.response_output

    # 1. inconsistent array type
    clf = MultiLabelClassifierPredict(response_output=y_test.tolist())
    err_msg = (
        r"MultiLabelClassifierPredict.predict is expected to output a "
        r"NumPy array. Got <class 'list'> instead."
    )
    with raises(AssertionError, match=err_msg):
        check_classifiers_multilabel_output_format_predict(clf.__class__.__name__, clf)
    # 2. inconsistent shape
    clf = MultiLabelClassifierPredict(response_output=y_test[:, :-1])
    err_msg = (
        r"MultiLabelClassifierPredict.predict outputs a NumPy array of "
        r"shape \(25, 4\) instead of \(25, 5\)."
    )
    with raises(AssertionError, match=err_msg):
        check_classifiers_multilabel_output_format_predict(clf.__class__.__name__, clf)
    # 3. inconsistent dtype
    clf = MultiLabelClassifierPredict(response_output=y_test.astype(np.float64))
    err_msg = (
        r"MultiLabelClassifierPredict.predict does not output the same "
        r"dtype than the targets."
    )
    with raises(AssertionError, match=err_msg):
        check_classifiers_multilabel_output_format_predict(clf.__class__.__name__, clf)


def test_check_classifiers_multilabel_output_format_predict_proba():
    n_samples, test_size, n_outputs = 100, 25, 5
    _, y = make_multilabel_classification(
        n_samples=n_samples,
        n_features=2,
        n_classes=n_outputs,
        n_labels=3,
        length=50,
        allow_unlabeled=True,
        random_state=0,
    )
    y_test = y[-test_size:]

    class MultiLabelClassifierPredictProba(_BaseMultiLabelClassifierMock):
        def predict_proba(self, X):
            return self.response_output

    # 1. unknown output type
    clf = MultiLabelClassifierPredictProba(response_output=sp.csr_matrix(y_test))
    err_msg = (
        "Unknown returned type .*csr_matrix.* by "
        r"MultiLabelClassifierPredictProba.predict_proba. A list or a Numpy "
        r"array is expected."
    )
    with raises(ValueError, match=err_msg):
        check_classifiers_multilabel_output_format_predict_proba(
            clf.__class__.__name__,
            clf,
        )
    # 2. for list output
    # 2.1. inconsistent length
    clf = MultiLabelClassifierPredictProba(response_output=y_test.tolist())
    err_msg = (
        "When MultiLabelClassifierPredictProba.predict_proba returns a list, "
        "the list should be of length n_outputs and contain NumPy arrays. Got "
        f"length of {test_size} instead of {n_outputs}."
    )
    with raises(AssertionError, match=err_msg):
        check_classifiers_multilabel_output_format_predict_proba(
            clf.__class__.__name__,
            clf,
        )
    # 2.2. array of inconsistent shape
    response_output = [np.ones_like(y_test) for _ in range(n_outputs)]
    clf = MultiLabelClassifierPredictProba(response_output=response_output)
    err_msg = (
        r"When MultiLabelClassifierPredictProba.predict_proba returns a list, "
        r"this list should contain NumPy arrays of shape \(n_samples, 2\). Got "
        r"NumPy arrays of shape \(25, 5\) instead of \(25, 2\)."
    )
    with raises(AssertionError, match=err_msg):
        check_classifiers_multilabel_output_format_predict_proba(
            clf.__class__.__name__,
            clf,
        )
    # 2.3. array of inconsistent dtype
    response_output = [
        np.ones(shape=(y_test.shape[0], 2), dtype=np.int64) for _ in range(n_outputs)
    ]
    clf = MultiLabelClassifierPredictProba(response_output=response_output)
    err_msg = (
        "When MultiLabelClassifierPredictProba.predict_proba returns a list, "
        "it should contain NumPy arrays with floating dtype."
    )
    with raises(AssertionError, match=err_msg):
        check_classifiers_multilabel_output_format_predict_proba(
            clf.__class__.__name__,
            clf,
        )
    # 2.4. array does not contain probability (each row should sum to 1)
    response_output = [
        np.ones(shape=(y_test.shape[0], 2), dtype=np.float64) for _ in range(n_outputs)
    ]
    clf = MultiLabelClassifierPredictProba(response_output=response_output)
    err_msg = (
        r"When MultiLabelClassifierPredictProba.predict_proba returns a list, "
        r"each NumPy array should contain probabilities for each class and "
        r"thus each row should sum to 1"
    )
    with raises(AssertionError, match=err_msg):
        check_classifiers_multilabel_output_format_predict_proba(
            clf.__class__.__name__,
            clf,
        )
    # 3 for array output
    # 3.1. array of inconsistent shape
    clf = MultiLabelClassifierPredictProba(response_output=y_test[:, :-1])
    err_msg = (
        r"When MultiLabelClassifierPredictProba.predict_proba returns a NumPy "
        r"array, the expected shape is \(n_samples, n_outputs\). Got \(25, 4\)"
        r" instead of \(25, 5\)."
    )
    with raises(AssertionError, match=err_msg):
        check_classifiers_multilabel_output_format_predict_proba(
            clf.__class__.__name__,
            clf,
        )
    # 3.2. array of inconsistent dtype
    response_output = np.zeros_like(y_test, dtype=np.int64)
    clf = MultiLabelClassifierPredictProba(response_output=response_output)
    err_msg = (
        r"When MultiLabelClassifierPredictProba.predict_proba returns a NumPy "
        r"array, the expected data type is floating."
    )
    with raises(AssertionError, match=err_msg):
        check_classifiers_multilabel_output_format_predict_proba(
            clf.__class__.__name__,
            clf,
        )
    # 4. array does not contain probabilities
    clf = MultiLabelClassifierPredictProba(response_output=y_test * 2.0)
    err_msg = (
        r"When MultiLabelClassifierPredictProba.predict_proba returns a NumPy "
        r"array, this array is expected to provide probabilities of the "
        r"positive class and should therefore contain values between 0 and 1."
    )
    with raises(AssertionError, match=err_msg):
        check_classifiers_multilabel_output_format_predict_proba(
            clf.__class__.__name__,
            clf,
        )


def test_check_classifiers_multilabel_output_format_decision_function():
    n_samples, test_size, n_outputs = 100, 25, 5
    _, y = make_multilabel_classification(
        n_samples=n_samples,
        n_features=2,
        n_classes=n_outputs,
        n_labels=3,
        length=50,
        allow_unlabeled=True,
        random_state=0,
    )
    y_test = y[-test_size:]

    class MultiLabelClassifierDecisionFunction(_BaseMultiLabelClassifierMock):
        def decision_function(self, X):
            return self.response_output

    # 1. inconsistent array type
    clf = MultiLabelClassifierDecisionFunction(response_output=y_test.tolist())
    err_msg = (
        r"MultiLabelClassifierDecisionFunction.decision_function is expected "
        r"to output a NumPy array. Got <class 'list'> instead."
    )
    with raises(AssertionError, match=err_msg):
        check_classifiers_multilabel_output_format_decision_function(
            clf.__class__.__name__,
            clf,
        )
    # 2. inconsistent shape
    clf = MultiLabelClassifierDecisionFunction(response_output=y_test[:, :-1])
    err_msg = (
        r"MultiLabelClassifierDecisionFunction.decision_function is expected "
        r"to provide a NumPy array of shape \(n_samples, n_outputs\). Got "
        r"\(25, 4\) instead of \(25, 5\)"
    )
    with raises(AssertionError, match=err_msg):
        check_classifiers_multilabel_output_format_decision_function(
            clf.__class__.__name__,
            clf,
        )
    # 3. inconsistent dtype
    clf = MultiLabelClassifierDecisionFunction(response_output=y_test)
    err_msg = (
        r"MultiLabelClassifierDecisionFunction.decision_function is expected "
        r"to output a floating dtype."
    )
    with raises(AssertionError, match=err_msg):
        check_classifiers_multilabel_output_format_decision_function(
            clf.__class__.__name__,
            clf,
        )


def run_tests_without_pytest():
    """Runs the tests in this file without using pytest."""
    main_module = sys.modules["__main__"]
    test_functions = [
        getattr(main_module, name)
        for name in dir(main_module)
        if name.startswith("test_")
    ]
    test_cases = [unittest.FunctionTestCase(fn) for fn in test_functions]
    suite = unittest.TestSuite()
    suite.addTests(test_cases)
    runner = unittest.TextTestRunner()
    runner.run(suite)


def test_check_class_weight_balanced_linear_classifier():
    # check that ill-computed balanced weights raises an exception
    msg = "Classifier estimator_name is not computing class_weight=balanced properly"
    with raises(AssertionError, match=msg):
        check_class_weight_balanced_linear_classifier(
            "estimator_name", BadBalancedWeightsClassifier
        )


def test_all_estimators_all_public():
    # all_estimator should not fail when pytest is not installed and return
    # only public estimators
    with warnings.catch_warnings(record=True) as record:
        estimators = all_estimators()
    # no warnings are raised
    assert not record
    for est in estimators:
        assert not est.__class__.__name__.startswith("_")


if __name__ == "__main__":
    # This module is run as a script to check that we have no dependency on
    # pytest for estimator checks.
    run_tests_without_pytest()


def test_xfail_ignored_in_check_estimator():
    # Make sure checks marked as xfail are just ignored and not run by
    # check_estimator(), but still raise a warning.
    with warnings.catch_warnings(record=True) as records:
        check_estimator(NuSVC())
    assert SkipTestWarning in [rec.category for rec in records]


# FIXME: this test should be uncommented when the checks will be granular
# enough. In 0.24, these tests fail due to low estimator performance.
def test_minimal_class_implementation_checks():
    # Check that third-party library can run tests without inheriting from
    # BaseEstimator.
    # FIXME
    raise SkipTest
    minimal_estimators = [MinimalTransformer(), MinimalRegressor(), MinimalClassifier()]
    for estimator in minimal_estimators:
        check_estimator(estimator)


def test_check_fit_check_is_fitted():
    class Estimator(BaseEstimator):
        def __init__(self, behavior="attribute"):
            self.behavior = behavior

        def fit(self, X, y, **kwargs):
            if self.behavior == "attribute":
                self.is_fitted_ = True
            elif self.behavior == "method":
                self._is_fitted = True
            return self

        @available_if(lambda self: self.behavior in {"method", "always-true"})
        def __sklearn_is_fitted__(self):
            if self.behavior == "always-true":
                return True
            return hasattr(self, "_is_fitted")

    with raises(Exception, match="passes check_is_fitted before being fit"):
        check_fit_check_is_fitted("estimator", Estimator(behavior="always-true"))

    check_fit_check_is_fitted("estimator", Estimator(behavior="method"))
    check_fit_check_is_fitted("estimator", Estimator(behavior="attribute"))