B
    W0d                 @  s@  d Z ddlmZ ddlmZmZ ddlmZ ddlm	Z	 ddl
mZmZmZmZmZmZmZ ddlZddlZddlmZmZ dd	lmZmZmZmZ dd
lmZm Z m!Z! ddl"m#Z#m$Z$m%Z%m&Z&m'Z'm(Z(m)Z)m*Z* ddl+m,Z,m-Z- ddl.m/Z/m0Z0 ddl1m2Z2m3Z3m4Z4 ddl5m6Z6 ddl7m8Z8 ddl9m:  m;Z< ddl=m>Z> ddl?m@Z@ ddlAmBZB ddlCmDZD ddlEmFZFmGZGmHZHmIZImJZJ ddlKmLZLmMZMmNZN ddlOmPZP ddlQmRZR ddlSmTZT edddgZUeeVedef f ZWedZXdd d!d"d#ZYd d$d%d&d'ZZeZePeDj[G d(d) d)eFeP Z\eZe@eDj]G d*d+ d+eFe@ Z^dS ),z
Define the SeriesGroupBy and DataFrameGroupBy
classes that hold the groupby interfaces (and some implementations).

These are user facing as the result of the ``df.groupby(...)`` operations,
which here returns a DataFrameGroupBy object.
    )annotations)abc
namedtuple)partial)dedent)AnyCallableHashableIterableMappingTypeVarUnionN)lib	reduction)	ArrayLikeFrameOrSeriesFrameOrSeriesUnion	Manager2D)AppenderSubstitutiondoc)ensure_int64is_boolis_categorical_dtypeis_dict_likeis_integer_dtypeis_interval_dtypeis_numeric_dtype	is_scalar)isnanotna)
algorithmsnanops)maybe_mangle_lambdasreconstruct_funcvalidate_func_kwargs)GroupByApply)SpecificationError)!create_series_with_explicit_dtype)	DataFrame)NDFrame)base)GroupBy_agg_template_apply_docs_transform_templategroup_selection_context)Index
MultiIndexall_indexes_same)Series)maybe_use_numba)boxplot_frame_groupbyNamedAggcolumnZaggfunc.ScalarResultstrztype[FrameOrSeries])nameklassc               s0    fdd}t | }|jpd|_ |_t|S )z
    Create a property for a GroupBy subclass to dispatch to DataFrame/Series.

    Parameters
    ----------
    name : str
    klass : {DataFrame, Series}

    Returns
    -------
    property
    c               s
   |   S )N)Z_make_wrapper)self)r;    M/var/www/html/venv/lib/python3.7/site-packages/pandas/core/groupby/generic.pypropt   s    zgenerate_property.<locals>.prop )getattr__doc____name__property)r;   r<   r@   Zparent_methodr>   )r;   r?   generate_propertyf   s
    
rF   zfrozenset[str])r<   	allowlistc               s    fdd}|S )a  
    Create GroupBy member defs for DataFrame/Series names in a allowlist.

    Parameters
    ----------
    klass : DataFrame or Series class
        class where members are defined.
    allowlist : frozenset[str]
        Set of names of klass methods to be constructed

    Returns
    -------
    class decorator

    Notes
    -----
    Since we don't want to override methods explicitly defined in the
    base class, any such name is skipped.
    c               s4   x. D ]&}t | |rqt|}t| || qW | S )N)hasattrrF   setattr)clsr;   r@   )rG   r<   r>   r?   pinner   s    


z*pin_allowlisted_properties.<locals>.pinnerr>   )r<   rG   rK   r>   )rG   r<   r?   pin_allowlisted_properties}   s    rL   c                  s  e Zd ZejZddddZedZe	e
d jde
d d	 fd
dZeeedddMdddddZeZddddZdNdddddddZddddd Zdddd!d"ZdOdd$d%dd&d'd(d)Zd*d+ Zedd,e	edddd-d.ZdPdddd1d2d3Zddd4d5d6Zddd7d8Zddd9d:d;ZdQdd<d=d>ZdRddd?d@dAZee j!dBdC Z!dSdddddDdEdFZ"dddGdHZ#dTdKdLZ$  Z%S )USeriesGroupByzIterable[Series])returnc             c  s   | j V  d S )N)_selected_obj)r=   r>   r>   r?   _iterate_slices   s    zSeriesGroupBy._iterate_slicesa  
    Examples
    --------
    >>> s = pd.Series([1, 2, 3, 4])

    >>> s
    0    1
    1    2
    2    3
    3    4
    dtype: int64

    >>> s.groupby([1, 1, 2, 2]).min()
    1    1
    2    3
    dtype: int64

    >>> s.groupby([1, 1, 2, 2]).agg('min')
    1    1
    2    3
    dtype: int64

    >>> s.groupby([1, 1, 2, 2]).agg(['min', 'max'])
       min  max
    1    1    2
    2    3    4

    The output column names can be controlled by passing
    the desired column names and aggregations as keyword arguments.

    >>> s.groupby([1, 1, 2, 2]).agg(
    ...     minimum='min',
    ...     maximum='max',
    ... )
       minimum  maximum
    1        1        2
    2        3        4

    .. versionchanged:: 1.3.0

        The resulting dtype will reflect the return value of the aggregating function.

    >>> s.groupby([1, 1, 2, 2]).agg(lambda x: x.astype(float).min())
    1    1.0
    2    3.0
    dtype: float64templateZseriesZseries_examples)inputexamplesc               s   t  j|f||S )N)superapply)r=   funcargskwargs)	__class__r>   r?   rU      s    zSeriesGroupBy.applyr4   )rS   r<   N)engineengine_kwargsc         	   O  sh  t |r^t|  | j}W d Q R X | j| |f|d|i|\}}| jj| ||jdS |d k}	d }
|	r~t	|\}
}i }t
|trt| |||S t
|tjrt|}| |}|	r|
|_|S t|}|r|s|st| | S | jjdkr| j|f||S y| j|f||S  tk
rb   | j|f||}tt|| jjd d}t||tdS X d S )Nr[   )indexr;      r   )r;   )r\   dtype_if_empty)r5   r0   rO   _aggregate_with_numbaZto_frameobj_constructorZravelr;   r%   
isinstancer:   rB   r   r
   r#   _aggregate_multiple_funcscolumnscomZget_cython_funcgroupernkeys_python_agg_generalKeyError_aggregate_namedr1   sortednamesr(   object)r=   rV   rZ   r[   rW   rX   dataresultr\   
relabelingrd   retZcyfuncr>   r>   r?   	aggregate   s>    
 


zSeriesGroupBy.aggregater)   c             C  s6  t |trtdn^tdd |D rBdd |D }tt| }n0g }x |D ]}|t|p`| qLW t||}i }x4t	|D ](\}\}}t
j||d}| |||< qW tdd | D rdd	lm}	 |	| d
dd | D d}
|
S dd | D }| jj|d d}tdd |D |_| |}|S )Nznested renamer is not supportedc             s  s   | ]}t |ttfV  qd S )N)rb   tuplelist).0xr>   r>   r?   	<genexpr>  s    z:SeriesGroupBy._aggregate_multiple_funcs.<locals>.<genexpr>c             S  s&   g | ]}t |ttfs||fn|qS r>   )rb   rs   rt   )ru   rv   r>   r>   r?   
<listcomp>  s    z;SeriesGroupBy._aggregate_multiple_funcs.<locals>.<listcomp>)labelpositionc             s  s   | ]}t |tV  qd S )N)rb   r)   )ru   rv   r>   r>   r?   rw   1  s    r   )concatr]   c             S  s   g | ]
}|j qS r>   )ry   )ru   keyr>   r>   r?   rx   5  s    )axiskeysc             S  s   i | ]\}}||j qS r>   )rz   )ru   r|   valr>   r>   r?   
<dictcomp>;  s    z;SeriesGroupBy._aggregate_multiple_funcs.<locals>.<dictcomp>)r\   c             s  s   | ]}|j V  qd S )N)ry   )ru   r|   r>   r>   r?   rw   =  s    )rb   dictr'   anynextzipappendre   Zget_callable_name	enumerater+   Z	OutputKeyrr   valuesZpandasr{   r~   itemsr`   _constructor_expanddimr1   rd   _reindex_output)r=   argrd   fresultsidxr;   rV   r|   r{   res_dfindexed_outputoutputr>   r>   r?   rc     s.    




z'SeriesGroupBy._aggregate_multiple_funcsr:   r   boolint)howaltnumeric_only	min_countc       
        s   j }|j}|j|r:t|js:ttj d dddd fdd}||}jj	|j
j|jd}	|	S )N.z! does not implement numeric_only.r   )r   rN   c               sL   y j jd| jd d}W n& tk
rF   j| j d}Y nX |S )Nrr   r]   )r}   r   )ndimr   )rf   _cython_operationr   NotImplementedError_agg_py_fallback)r   ro   )r   rn   r   r   r=   r>   r?   
array_funcR  s    z5SeriesGroupBy._cython_agg_general.<locals>.array_func)r\   r;   )rO   _values_mgrr   dtyper   typerD   r`   ra   rf   result_indexr;   r   )
r=   r   r   r   r   r`   Zobjvalsr   ro   serr>   )r   rn   r   r   r=   r?   _cython_agg_generalB  s    z!SeriesGroupBy._cython_agg_generalz+Mapping[base.OutputKey, Series | ArrayLike])r   rN   c             C  sL   t |dkst| jj}| jj}tt| }| jj	|||d}| 
|S )a  
        Wraps the output of a SeriesGroupBy aggregation into the expected result.

        Parameters
        ----------
        output : Mapping[base.OutputKey, Union[Series, ArrayLike]]
            Data to wrap.

        Returns
        -------
        Series

        Notes
        -----
        In the vast majority of cases output will only contain one element.
        The exception is operations that expand dimensions, like ohlc.
        r]   )r\   r;   )lenAssertionErrorr`   r;   rf   r   r   iterr   ra   r   )r=   r   r;   r\   r   ro   r>   r>   r?   _wrap_aggregated_outputg  s    z%SeriesGroupBy._wrap_aggregated_outputc             C  sP   t |dkst| jj}tt| }| jj|| jj|d}t	|t
sLt|S )a   
        Wraps the output of a SeriesGroupBy aggregation into the expected result.

        Parameters
        ----------
        output : dict[base.OutputKey, Union[Series, np.ndarray, ExtensionArray]]
            Dict with a sole key of 0 and a value of the result values.

        Returns
        -------
        Series

        Notes
        -----
        output should always contain one element. It is specified as a dict
        for consistency with DataFrame methods and _wrap_aggregated_output.
        r]   )r\   r;   )r   r   r`   r;   r   r   r   ra   r\   rb   r4   )r=   r   r;   r   ro   r>   r>   r?   _wrap_transformed_output  s    z&SeriesGroupBy._wrap_transformed_outputFr1   zlist[Any] | Noner   )rn   r~   r   not_indexed_samerN   c       
        s   t  dkr*jjg jjjj|jdS |dk	s6tdd fdd}t|d t	r| }jj
||d}|}|jjd	}jj|_|S t|d ttfrj ||d
S jj|| jjd}	|	S dS )a  
        Wrap the output of SeriesGroupBy.apply into the expected result.

        Parameters
        ----------
        data : Series
            Input data for groupby operation.
        keys : Index
            Keys of groups that Series was grouped by.
        values : Optional[List[Any]]
            Applied output for each group.
        not_indexed_same : bool, default False
            Whether the applied outputs are not indexed the same as the group axes.

        Returns
        -------
        DataFrame or Series
        r   )r;   r\   r   Nr1   )rN   c                s8   j jdkr tj j jd} nt j jd d} | S )Nr]   )rl   r   )r;   )rf   rg   r2   from_tuplesrl   r1   )r\   )r~   r=   r>   r?   
_get_index  s    z6SeriesGroupBy._wrap_applied_output.<locals>._get_index)r\   )dropna)r   )rn   r\   r;   )r   r`   ra   r;   rf   r   r   r   rb   r   r   r   stackobservedr4   r)   _concat_objects)
r=   rn   r~   r   r   r   r\   r   Zres_serro   r>   )r~   r=   r?   _wrap_applied_output  s(    


z"SeriesGroupBy._wrap_applied_outputc       	      O  sd   i }d}xV| D ]N\}}t |d| ||f||}t|}|sTt||j d}|||< qW |S )NFr;   T)rm   __setattr__libreductionZextract_resultZcheck_result_arrayr   )	r=   rV   rW   rX   ro   Zinitializedr;   groupr   r>   r>   r?   rj     s    
zSeriesGroupBy._aggregate_named)r<   c            O  s   | j |f|||d|S )N)rZ   r[   )
_transform)r=   rV   rZ   r[   rW   rX   r>   r>   r?   	transform  s    zSeriesGroupBy.transformTr   )r   r   r}   c          
   K  s   |dkst | j}y| jjd|j||f|}W n: tk
rh } zt| d|j d|W d d }~X Y nX |j|| j	j
|jdS )Nr   r   z is not supported for z dtype)r\   r;   )r   rO   rf   r   r   r   	TypeErrorr   ra   r`   r\   r;   )r=   r   r   r}   rX   r`   ro   errr>   r>   r?   _cython_transform  s    *zSeriesGroupBy._cython_transform)rV   rN   c             O  s   t |stt| j}g }xB| D ]:\}}t|d| ||f||}||||jd q W |rddlm	}	 |	|}
| 
|
}n| jjtjd}| jj|_|S )z2
        Transform with a callable func`.
        r;   )r\   r   )r{   )r   )callabler   r   r`   rm   r   r   r\   pandas.core.reshape.concatr{   _set_result_index_orderedra   npZfloat64r;   )r=   rV   rW   rX   r<   r   r;   r   resr{   concatenatedro   r>   r>   r?   _transform_general  s    

z SeriesGroupBy._transform_generalc             C  s   dS )NTr>   )r=   ro   r>   r>   r?   _can_use_transform_fast*  s    z%SeriesGroupBy._can_use_transform_fast)ro   rN   c             C  sH   | j j\}}}|j| j jdd}t|j|}| jj|| jj	| jj
dS )zf
        fast version of transform, only applicable to
        builtin/cythonizable functions
        F)copy)r\   r;   )rf   
group_inforeindexr   r!   Ztake_ndr   r`   ra   r\   r;   )r=   ro   ids_outr>   r>   r?   _wrap_transform_fast_result-  s    z)SeriesGroupBy._wrap_transform_fast_result)r   c          
     s   t tr fddn fddddfddyfdd	D }W n0 ttfk
r } ztd
|W dd}~X Y nX ||}|S )ao  
        Return a copy of a Series excluding elements from groups that
        do not satisfy the boolean criterion specified by func.

        Parameters
        ----------
        func : function
            To apply to each group. Should return True or False.
        dropna : Drop groups that do not pass the filter. True by default;
            if False, groups that evaluate False are filled with NaNs.

        Notes
        -----
        Functions that mutate the passed object can produce unexpected
        behavior or errors and are not supported. See :ref:`gotchas.udf-mutation`
        for more details.

        Examples
        --------
        >>> df = pd.DataFrame({'A' : ['foo', 'bar', 'foo', 'bar',
        ...                           'foo', 'bar'],
        ...                    'B' : [1, 2, 3, 4, 5, 6],
        ...                    'C' : [2.0, 5., 8., 1., 2., 9.]})
        >>> grouped = df.groupby('A')
        >>> df.groupby('A').B.filter(lambda x: x.mean() > 3.)
        1    2
        3    4
        5    6
        Name: B, dtype: int64

        Returns
        -------
        filtered : Series
        c               s   t |  S )N)rB   )rv   )rW   rV   rX   r>   r?   <lambda>[      z&SeriesGroupBy.filter.<locals>.<lambda>c               s   | f S )Nr>   )rv   )rW   rV   rX   r>   r?   r   ]  r   r   )rN   c               s    | }|ot |S )N)r    )rv   b)wrapperr>   r?   true_and_notna`  s    z,SeriesGroupBy.filter.<locals>.true_and_notnac               s"   g | ]\}}|r  |qS r>   )r   )ru   r;   r   )r=   r   r>   r?   rx   f  s    z(SeriesGroupBy.filter.<locals>.<listcomp>z'the filter must return a boolean resultN)rb   r:   
ValueErrorr   _apply_filter)r=   rV   r   rW   rX   indicesr   filteredr>   )rW   rV   rX   r=   r   r   r?   filter7  s    #
zSeriesGroupBy.filter)r   rN   c          	   C  s  | j j\}}}| jj}tj|dd\}}t||f}|| }|| }tjddt	|dd |dd kd  f }tjd|dd |dd kf }|dk}	|rd||< d||	< n&d||	tjd|	dd f @ < d||< tj
||jddd}
t|r.|d dkr(|
dd }|t| }n|
}n|
dd }| j j}t|t|krztjt||
jd	| }}
|
||| < | jj||| jjd
}| j|ddS )z
        Return number of unique elements in the group.

        Returns
        -------
        Series
            Number of unique values within each group.
        F)sortr   r]   Nr   int64)r   )r   )r\   r;   )
fill_value)rf   r   r`   r   r!   	factorizer   lexsortr_nonzeroaddreduceatastyper   Zflatnonzeror   zerosr   ra   r;   r   )r=   r   r   r   r   codessorterr   incmaskr   r   riro   r>   r>   r?   nuniquen  s6    	0"

zSeriesGroupBy.nuniquec               s*   |   fdd}| jdkr"|jS | S )Nc               s   | j f  S )N)describe)rv   )rX   r>   r?   r     r   z(SeriesGroupBy.describe.<locals>.<lambda>r]   )rU   r}   TZunstack)r=   rX   ro   r>   )rX   r?   r     s    
zSeriesGroupBy.describe)	normalizer   	ascendingr   c       !        s  ddl m} ddlm} jj\}}	}	jj}
 	fdd}d k	r\t	sl| S nt
|
jrl| S |dk| |
  }}
d krtj|
dd\}}d	d
 }n8|t|
dd}|jj}|j|jjd|jd}dd
 }t|jrt|j|j|f}nt||f}|| ||  }}dt|dd  |d d kd  }tjd|f }t|sf|}||tdd ||td dk}tjd|f }t|
s|}d||< tttj|df d }ttjtj ||djj!}fdd|D |||g }dd jj"D |g }jj#jj$g }|rn|d dk% rRd}n| fdd|D  }}r|&d}ttj|t|f }|r||dk }tj'||d | }n|}|| }	r4d kr4|r||  n|| }t r|n| |f}|| |d |  }|d< d k	rtj(t|ddx<|d d D ],}tjd|dd  |d d kf O q^W ) t|d  }tt*|t+t*|g}, d |d g}|||ddd\}	}t-|dk|| d}	rDt r|n| |d f}|| |d |  }|d< dddfddfdd|d d D }|.|d  t/|||dd} t0|jrt1|}jj2|| jj$d S )!Nr   )get_join_indexers)cutc                 s   j tj dS )N)r   r   r   bins)rU   r4   value_countsr>   )r   r   r   r=   r   r>   r?   apply_series_value_counts  s    z=SeriesGroupBy.value_counts.<locals>.apply_series_value_countsr   T)r   c             S  s   | | S )Nr>   )labr   r>   r>   r?   r     r   z,SeriesGroupBy.value_counts.<locals>.<lambda>)Zinclude_lowest)Z
allow_fillr   c             S  s   | | j jd S )Nr   )Z_multiindexr   )r   r   r>   r>   r?   r     r   r]   )Zrepeatsc               s   g | ]} |qS r>   r>   )ru   level_codes)repr>   r?   rx     s    z.SeriesGroupBy.value_counts.<locals>.<listcomp>c             S  s   g | ]
}|j qS r>   )Zgroup_index)ru   Zpingr>   r>   r?   rx     s    Fc               s   g | ]}|  qS r>   r>   )ru   r   )r   r>   r?   rx     s    floatr   )r   left)r   r   z
np.ndarray)	lev_codesrN   c               s   t |   S )N)r   repeat)r   )diffnbinr>   r?   build_codes4  s    z/SeriesGroupBy.value_counts.<locals>.build_codesc               s   g | ]} |qS r>   r>   )ru   r   )r   r>   r?   rx   7  s    )levelsr   rl   verify_integrity)r\   r;   )3Zpandas.core.reshape.merger   Zpandas.core.reshape.tiler   rf   r   r`   r   r   iterabler   r   r!   r   r4   cat
categoriestaker   Z	_na_valuer   r   r   rightr   r   r   slicer   r   r   r   r   Zreconstructed_codes	groupingsrl   r;   allr   atr   sumZarangeZtileZcumsumwherer   r2   r   r   ra   )!r=   r   r   r   r   r   r   r   r   r   r   r   r   levZllabr   Z	idchangesr   Zlchangesr   r   r   r   rl   dmaccr   r   Zncatr   r   mir>   )
r   r   r   r   r   r   r   r   r=   r   r?   r     s    		



&
 



,$zSeriesGroupBy.value_countsc       	      C  sl   | j j\}}}| jj}|dkt| @ }|p.d}tj|| |d}| jj|| j j| jj	dd}| j
|ddS )z
        Compute count of group, excluding missing values.

        Returns
        -------
        Series
            Count of values within each group.
        r   r   )	minlengthr   )r\   r;   r   )r   )rf   r   r`   r   r   r   Zbincountra   r   r;   r   )	r=   r   r   ngroupsr   r   r
  r   ro   r>   r>   r?   count@  s    	zSeriesGroupBy.countr]   padc               sd   r|   fddS  dkr,d dt|  d}|| jj}|jd}|| d S )	z=Calculate pct_change of each value to previous entry in groupc               s   | j  dS )N)periodsfill_methodlimitfreq)
pct_change)rv   )r  r  r  r  r>   r?   r   ]  s   z*SeriesGroupBy.pct_change.<locals>.<lambda>Nr  r   )r  )r  r  r]   )rU   rB   groupbyrf   r   shift)r=   r  r  r  r  ZfilledZfill_grpZshiftedr>   )r  r  r  r  r?   r  X  s    zSeriesGroupBy.pct_change)N)r   )F)Tr   )T)T)FTFNT)r]   r  NN)&rD   
__module____qualname__r+   series_apply_allowlist_apply_allowlistrP   r   _agg_examples_docr   r.   formatrU   r   r-   rr   aggrc   r   r   r   r   rj   r   r/   r   r   r   r   r   r   r   r4   r   r   r  r  __classcell__r>   r>   )rY   r?   rM      sD   .2-$#8
75	     rM   c                  s  e Zd ZejZedZee	eddd^dddddZ
e
Zdd	d
dZd_ddddddddZdd	ddZdd	ddZd`ddZddddddZdaddddd"d#d$Zd%d& Zedd'eedddd(d)Zdd	d*d+Zddd,d-d.Zd/d0 Zdddd1d2d3Zddd4d5d6Zdbd7d8Zd9d	 fd:d;Zdcdd<d=d>Zd?d	d@dAZ ddBd,dCdDZ!dEddFdGdHZ"dEddFdIdJZ#d?ddKdLdMZ$dNdOdPdQZ%dNdd4dRdSZ&dd	dTdUZ'dddddVdWdXZ(ee)j*j+deddYdZd[Z*ee)j,j+dfddYd\d]Z,e-Z.  Z/S )gDataFrameGroupBya  
    Examples
    --------
    >>> df = pd.DataFrame(
    ...     {
    ...         "A": [1, 1, 2, 2],
    ...         "B": [1, 2, 3, 4],
    ...         "C": [0.362838, 0.227877, 1.267767, -0.562860],
    ...     }
    ... )

    >>> df
       A  B         C
    0  1  1  0.362838
    1  1  2  0.227877
    2  2  3  1.267767
    3  2  4 -0.562860

    The aggregation is for each column.

    >>> df.groupby('A').agg('min')
       B         C
    A
    1  1  0.227877
    2  3 -0.562860

    Multiple aggregations

    >>> df.groupby('A').agg(['min', 'max'])
        B             C
      min max       min       max
    A
    1   1   2  0.227877  0.362838
    2   3   4 -0.562860  1.267767

    Select a column for aggregation

    >>> df.groupby('A').B.agg(['min', 'max'])
       min  max
    A
    1    1    2
    2    3    4

    Different aggregations per column

    >>> df.groupby('A').agg({'B': ['min', 'max'], 'C': 'sum'})
        B             C
      min max       sum
    A
    1   1   2  0.590715
    2   3   4  0.704907

    To control the output names with different aggregations per column,
    pandas supports "named aggregation"

    >>> df.groupby("A").agg(
    ...     b_min=pd.NamedAgg(column="B", aggfunc="min"),
    ...     c_sum=pd.NamedAgg(column="C", aggfunc="sum"))
       b_min     c_sum
    A
    1      1  0.590715
    2      3  0.704907

    - The keywords are the *output* column names
    - The values are tuples whose first element is the column to select
      and the second element is the aggregation to apply to that column.
      Pandas provides the ``pandas.NamedAgg`` namedtuple with the fields
      ``['column', 'aggfunc']`` to make it clearer what the arguments are.
      As usual, the aggregation can be a callable or a string alias.

    See :ref:`groupby.aggregate.named` for more.

    .. versionchanged:: 1.3.0

        The resulting dtype will reflect the return value of the aggregating function.

    >>> df.groupby("A")[["B"]].agg(lambda x: x.astype(float).min())
          B
    A
    1   1.0
    2   3.0r)   )rS   r<   N)rZ   r[   c         
   O  s  t |rVt|  | j}W d Q R X | j||f|d|i|\}}| jj|||jdS t|f|\}	}}
}t|}t	| |||}|
 }t|s|d k	r|S |	r|d k	r|jd d |f }|
|_|d kr| jjdkr| j|f||S |s|r| j|f||}n| jdkr | |}|S t	| |gdi d}y|
 }W n> tk
r| } zdt|krb | |}W d d }~X Y nRX | j}t|tr| jj |_n.|jj|jjtt|jjd |jd|_| js| | t tt!||_"|j#d	d
S )Nr[   )r\   rd   r]   r>   )rW   rX   z
no results)levelr   T)datetime)$r5   r0   rO   r_   r`   ra   rd   r$   r#   r&   r  r   ilocrf   rg   rh   _aggregate_framer}   r   r:   rb   r4   _obj_with_exclusionsr   
_set_namesrl   rt   rangenlevelsZ	droplevelas_index_insert_inaxis_grouper_inplacer1   r   r\   _convert)r=   rV   rZ   r[   rW   rX   rn   ro   r\   rp   rd   orderopZgbar   Zsobjr>   r>   r?   rr     sP    




zDataFrameGroupBy.aggregatezIterable[Series])rN   c             c  s`   | j }| jdkr|j}t|tr4|j| jkr4|V  n(x&| D ]\}}|| jkrRq>|V  q>W d S )Nr]   )rO   r}   r   rb   r4   r;   
exclusionsr   )r=   r`   ry   r   r>   r>   r?   rP     s    

z DataFrameGroupBy._iterate_slicesr   r:   r   r   r   )r   r   r   r   rN   c               s     |rjddddd fdd}j|dd}t|tk r|tjd	tj d
 d dtdd 	|S )NF)r   r   )r   rN   c               sL   y j jd| jd d}W n& tk
rF   j| j d}Y nX |S )Nrr   r]   )r}   r   )r   r   )rf   r   r   r   r   )r   ro   )r   rn   r   r   r=   r>   r?   r   *  s    z8DataFrameGroupBy._cython_agg_general.<locals>.array_funcT)ignore_failureszDropping invalid columns in r   zQ is deprecated. In a future version, a TypeError will be raised. Before calling .z=, select only columns which should be valid for the function.   )
stacklevel)
_get_data_to_aggregateget_numeric_datagrouped_reducer   warningswarnr   rD   FutureWarning_wrap_agged_manager)r=   r   r   r   r   r   new_mgrr>   )r   rn   r   r   r=   r?   r      s    z$DataFrameGroupBy._cython_agg_generalc             O  s   | j jdkrtd| j}i }| jdkrTx`| D ] \}}||f||}|||< q.W n6x4| jD ]*}| j||d}	||	f||}|||< q\W | j j}
|jd| j  }| j	j
|||
d}| jdkr|j}|S )Nr]   zNumber of keys must be 1r   )r`   )r\   rd   )rf   rg   r   r"  r}   r   Z	get_groupr   axesr`   ra   r   )r=   rV   rW   rX   r`   ro   r;   rn   ZfresZgrp_dfr   Zother_axr   r>   r>   r?   r!  H  s$    

z!DataFrameGroupBy._aggregate_framec             O  st   | j }i }xPt|D ]D\}}|jd d |f }t||| j| jd}	|	j|f||||< qW | j|}
|j	|
_	|
S )N)	selectionrf   r+  )
r"  r   r   rM   rf   r+  rr   r`   ra   rd   )r=   rV   rW   rX   r`   ro   iitemr   Zcolgr   r>   r>   r?   _aggregate_item_by_itemc  s    z(DataFrameGroupBy._aggregate_item_by_itemFc             C  s  t |dkr:| jj| jj|jd}|j|j dd}|S t	t
j| d }|d kr\| j S t|trv| j|||dS | jr| jjnd }t|tjtfr| jj||| jdS t|ts| jr| jj||dS | jj||| jgd}| | |S n| |||||S d S )Nr   )r\   rd   F)r   )r   )r\   r;   )r\   )r   r`   ra   rf   r   rd   r   Zdtypesto_dictr   re   Znot_nonerb   r)   r   r&  r   Zndarrayr1   _constructor_slicedZ
_selectionr4   r'  _wrap_applied_output_series)r=   rn   r~   r   r   ro   first_not_none	key_indexr>   r>   r?   r   w  s.    



z%DataFrameGroupBy._wrap_applied_outputzlist[Series]r   )r   r   rN   c               sn  |  }tf dti|  fdd|D }tdd |D }| jr| j| j}t|dkof|j	dk}	|	r|d |d _
| j|||dS |rdd	lm}
 |
|S |s| j||d
dS tdd |D }| jdkr|}|j }|j
d kr,dd |D }t|dkr,t|d |_
n|j}|}|j}|jtkr@| }| jj|||d}| jsd| | | |S )Nr^   c               s   g | ]}|d k	r|n qS )Nr>   )ru   rv   )backupr>   r?   rx     s    z@DataFrameGroupBy._wrap_applied_output_series.<locals>.<listcomp>c             s  s   | ]}|j V  qd S )N)r\   )ru   rv   r>   r>   r?   rw     s    z?DataFrameGroupBy._wrap_applied_output_series.<locals>.<genexpr>r]   r   )r   )r{   Tc             S  s   g | ]}t |qS r>   )r   Zasarray)ru   vr>   r>   r?   rx     s    c             S  s   h | ]
}|j qS r>   )r;   )ru   rB  r>   r>   r?   	<setcomp>  s    z?DataFrameGroupBy._wrap_applied_output_series.<locals>.<setcomp>)r\   rd   )Z_construct_axes_dictr(   rm   r3   squeezerO   	_get_axisr}   r   r%  r;   r   r   r{   r   Zvstackr\   r   rt   r   r   tolistr`   ra   r&  r'  r   )r=   r~   r   r   r?  r@  rX   Zall_indexed_sameZapplied_indexZsingular_seriesr{   Zstacked_valuesr\   rd   rl   ro   r>   )rA  r?   r>    sB    


z,DataFrameGroupBy._wrap_applied_output_seriesTr   )r   r   r}   rN   c       	        s   |dkst  }|r$|jdd}ddd fdd}|j|dd	}|d
|jd
  t|t|k rtjdt	j
 d  d  dtdd j|}jd
kr|j}|S )Nr   F)r   r   )bvaluesrN   c               s   j jd|  dfS )Nr   r]   )rf   r   )rG  )r   rX   r=   r>   r?   arr_func  s    z4DataFrameGroupBy._cython_transform.<locals>.arr_funcT)r,  r]   zDropping invalid columns in r   zQ is deprecated. In a future version, a TypeError will be raised. Before calling .zJ, select only columns which should be valid for the transforming function.r-  )r.  )r   r/  r0  r1  set_axisr7  r   r2  r3  r   rD   r4  r`   ra   r}   r   )	r=   r   r   r}   rX   mgrrH  Zres_mgrr   r>   )r   rX   r=   r?   r     s     
z"DataFrameGroupBy._cython_transformc             O  s  ddl m} g }| j}| jj|| jd}| j|f||\}}	x |D ]\}
}t|d|
 y| 	||	|\}}W nJ t
k
r   | ||S  tk
r } zd}t||W d d }~X Y nX t|trTt|jsqFnl|j|jr||gt|j dd}|j|_|j|_n2| jjt|jgt|j |j|j|jd}|| qF|| qFW | jdkrt|jn|j}| jdkrdnd}||| jdd	}|j||dd
}| |S )Nr   )r{   )r}   r;   z3transform must return a scalar value for each groupr]   )rd   r\   F)r}   r   )r}   r   )r   r{   r"  rf   get_iteratorr}   _define_pathsrm   r   _choose_pathr   _transform_item_by_itemr   rb   r4   r   prodshaper\   is_r   rd   r`   ra   Zconcatenater   reshaper   r   r   )r=   rV   rW   rX   r{   Zappliedr`   gen	fast_path	slow_pathr;   r   pathr   r   msgrZconcat_indexZ
other_axisr   r>   r>   r?   r     sB    

z#DataFrameGroupBy._transform_general)r<   c            O  s   | j |f|||d|S )N)rZ   r[   )r   )r=   rV   rZ   r[   rW   rX   r>   r>   r?   r   J  s    zDataFrameGroupBy.transformc             C  s   t |to|j| jjS )N)rb   r)   rd   equalsr"  )r=   ro   r>   r>   r?   r   Q  s    z(DataFrameGroupBy._can_use_transform_fast)ro   rN   c             C  s@   | j }| jj\}}}|j| jjdd}|j|dd}|j|_|S )z6
        Fast transform path for aggregations
        F)r   r   )r}   )r"  rf   r   r   r   r   r\   )r=   ro   r`   r   r   r   r>   r>   r?   r   V  s    z,DataFrameGroupBy._wrap_transform_fast_resultc               sX   t tr. fdd} fdd}n" fdd} fdd}||fS )Nc               s   t |  S )N)rB   )r   )rW   rV   rX   r>   r?   r   e  r   z0DataFrameGroupBy._define_paths.<locals>.<lambda>c               s   | j  fddjdS )Nc               s   t |  S )N)rB   )rv   )rW   rV   rX   r>   r?   r   g  r   zBDataFrameGroupBy._define_paths.<locals>.<lambda>.<locals>.<lambda>)r}   )rU   r}   )r   )rW   rV   rX   r=   r>   r?   r   f  s   c               s   | f S )Nr>   )r   )rW   rV   rX   r>   r?   r   j  r   c               s   | j  fddjdS )Nc               s   | f S )Nr>   )rv   )rW   rV   rX   r>   r?   r   l  r   zBDataFrameGroupBy._define_paths.<locals>.<lambda>.<locals>.<lambda>)r}   )rU   r}   )r   )rW   rV   rX   r=   r>   r?   r   k  s   )rb   r:   )r=   rV   rW   rX   rT  rU  r>   )rW   rV   rX   r=   r?   rL  c  s    
zDataFrameGroupBy._define_paths)rT  rU  r   c             C  s   |}||}y||}W n, t k
r.    Y n tk
rD   ||fS X t|tsX||fS |j|jsn||fS ||r||}||fS )N)r   	Exceptionrb   r)   rd   rY  )r=   rT  rU  r   rV  r   Zres_fastr>   r>   r?   rM  p  s    


zDataFrameGroupBy._choose_path)r`   rN   c          	   C  s   i }g }xt |D ]~\}}|jd d |f }t||| j| jd}y||||< W n2 tk
r   tjdt	| j
 dtdd Y qX || qW |std|j|}	| jj||jd}
|	|
_|
S )N)r8  rf   r+  zDropping invalid columns in z.transform is deprecated. In a future version, a TypeError will be raised. Before calling .transform, select only columns which should be valid for the transforming function.   )r.  z)Transform function invalid for data types)r\   )r   r   rM   rf   r+  r   r   r2  r3  r   rD   r4  r   rd   r   r`   ra   r\   )r=   r`   r   r   Zindsr9  colsubsetsgbrd   ro   r>   r>   r?   rN    s.    
z(DataFrameGroupBy._transform_item_by_itemc          	   O  s   g }| j }| jj|| jd}x|D ]\}}	t|	d| ||	f||}
y|
 }
W n tk
rh   Y nX t|
st	|
rt
|
r|
rt|
r|| | q"tdt|
j dq"W | ||S )a1  
        Return a copy of a DataFrame excluding filtered elements.

        Elements from groups are filtered if they do not satisfy the
        boolean criterion specified by func.

        Parameters
        ----------
        func : function
            Function to apply to each subframe. Should return True or False.
        dropna : Drop groups that do not pass the filter. True by default;
            If False, groups that evaluate False are filled with NaNs.

        Returns
        -------
        filtered : DataFrame

        Notes
        -----
        Each subframe is endowed the attribute 'name' in case you need to know
        which group you are working on.

        Functions that mutate the passed object can produce unexpected
        behavior or errors and are not supported. See :ref:`gotchas.udf-mutation`
        for more details.

        Examples
        --------
        >>> df = pd.DataFrame({'A' : ['foo', 'bar', 'foo', 'bar',
        ...                           'foo', 'bar'],
        ...                    'B' : [1, 2, 3, 4, 5, 6],
        ...                    'C' : [2.0, 5., 8., 1., 2., 9.]})
        >>> grouped = df.groupby('A')
        >>> grouped.filter(lambda x: x['B'].mean() > 3.)
             A  B    C
        1  bar  2  5.0
        3  bar  4  1.0
        5  bar  6  9.0
        )r}   r;   zfilter function returned a z, but expected a scalar bool)rO   rf   rK  r}   rm   r   rD  AttributeErrorr   r   r   r    r   r   r   r   rD   r   )r=   rV   r   rW   rX   r   r`   rS  r;   r   r   r>   r>   r?   r     s     (zDataFrameGroupBy.filterz DataFrameGroupBy | SeriesGroupByc               sD   | j dkrtdt|tr8t|dkr8tjdtdd t 	|S )Nr]   z'Cannot subset columns when using axis=1zmIndexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.   )r.  )
r}   r   rb   rs   r   r2  r3  r4  rT   __getitem__)r=   r|   )rY   r>   r?   ra    s    
zDataFrameGroupBy.__getitem__)r   c             C  s   |dkrR|dkr| j }t|| j| j| j| j| j|| j| j| j| j	| j
| j| jdS |dkr|dkrl| j | }t|| j| j|| j| j| j	| j
| jd	S tddS )a  
        sub-classes to define
        return a sliced object

        Parameters
        ----------
        key : string / list of selections
        ndim : {1, 2}
            requested ndim of result
        subset : object, default None
            subset to act on
        r`  N)r}   r  rf   r+  r8  r&  r   
group_keysrD  r   mutatedr   r]   )r  rf   r8  r   rb  rD  r   r   zinvalid ndim for _gotitem)r`   r  rf   r}   r  r+  r&  r   rb  rD  r   rc  r   rM   r   )r=   r|   r   r]  r>   r>   r?   _gotitem  s@    


zDataFrameGroupBy._gotitemr   c             C  s"   | j }| jdkr|jjS |jS d S )Nr]   )r"  r}   r   r   )r=   r`   r>   r>   r?   r/  5  s    
z'DataFrameGroupBy._get_data_to_aggregateNonec             C  sd   |j }xXtt| jjt| j tdd | jjD D ]$\}}}|r8||kr8|d|| q8W d S )Nc             S  s   g | ]
}|j qS r>   )in_axis)ru   grpr>   r>   r?   rx   B  s    zCDataFrameGroupBy._insert_inaxis_grouper_inplace.<locals>.<listcomp>r   )rd   r   reversedrf   rl   Zget_group_levelsr   insert)r=   ro   rd   r;   r  rf  r>   r>   r?   r'  <  s    
"z/DataFrameGroupBy._insert_inaxis_grouper_inplacez+Mapping[base.OutputKey, Series | ArrayLike])r   rN   c             C  s   dd |  D }tdd |D }|| jd| j j | j|}||_	| j
sj| | | }n
| jj|_| jdkr|j}|j| jjr| jj |_| |S )a  
        Wraps the output of DataFrameGroupBy aggregations into the expected result.

        Parameters
        ----------
        output : Mapping[base.OutputKey, Union[Series, np.ndarray]]
           Data to wrap.

        Returns
        -------
        DataFrame
        c             S  s   i | ]\}}||j qS r>   )rz   )ru   r|   r   r>   r>   r?   r   Y  s    z<DataFrameGroupBy._wrap_aggregated_output.<locals>.<dictcomp>c             S  s   g | ]
}|j qS r>   )ry   )ru   r|   r>   r>   r?   rx   Z  s    z<DataFrameGroupBy._wrap_aggregated_output.<locals>.<listcomp>r]   )r   r1   r#  r"  rE  r}   rl   r`   ra   rd   r&  r'  _consolidaterf   r   r\   r   rY  r   r   )r=   r   r   rd   ro   r>   r>   r?   r   I  s    



z(DataFrameGroupBy._wrap_aggregated_outputc             C  sz   dd |  D }| j|}| jdkr:|j}| jj|_n2tdd |D }|| jd| j j	 ||_| jj
|_
|S )a(  
        Wraps the output of DataFrameGroupBy transformations into the expected result.

        Parameters
        ----------
        output : Mapping[base.OutputKey, Union[Series, np.ndarray, ExtensionArray]]
            Data to wrap.

        Returns
        -------
        DataFrame
        c             S  s   i | ]\}}||j qS r>   )rz   )ru   r|   r   r>   r>   r?   r   ~  s    z=DataFrameGroupBy._wrap_transformed_output.<locals>.<dictcomp>r]   c             s  s   | ]}|j V  qd S )N)ry   )ru   r|   r>   r>   r?   rw     s    z<DataFrameGroupBy._wrap_transformed_output.<locals>.<genexpr>)r   r`   ra   r}   r   rd   r1   r#  rE  rl   r\   )r=   r   r   ro   rd   r>   r>   r?   r   o  s    

z)DataFrameGroupBy._wrap_transformed_output)rJ  rN   c             C  s   | j sZ|jd dkr|jd nd}tt|}|d| | j|}| | | }n | j	j
}|d| | j|}| jdkr|j}| |jddS )Nr   r]   T)r  )r&  rP  r1   r$  rI  r`   ra   r'  rj  rf   r   r}   r   r   r(  )r=   rJ  rowsr\   ro   r>   r>   r?   r5    s    


z$DataFrameGroupBy._wrap_agged_managerr   )r`   c             c  sD   x>t |jD ]0\}}|t|jd d |f || j| jdfV  qW d S )N)r8  rf   r+  )r   rd   rM   r   rf   r+  )r=   r`   r9  Zcolnamer>   r>   r?   _iterate_column_groupbys  s    z)DataFrameGroupBy._iterate_column_groupbysc               sV   ddl m} |j} fdd| |D }t|sDtg || jjdS |||ddS d S )Nr   )r{   c               s   g | ]\}} |qS r>   r>   )ru   r   Zcol_groupby)rV   r>   r?   rx     s    z>DataFrameGroupBy._apply_to_column_groupbys.<locals>.<listcomp>)rd   r\   r]   )r~   r}   )r   r{   rd   rl  r   r)   rf   r   )r=   rV   r`   r{   rd   r   r>   )rV   r?   _apply_to_column_groupbys  s    z*DataFrameGroupBy._apply_to_column_groupbysc          	     sr   |   }| jj\ } dkddd fdd}||}t| dd | |}W dQ R X | j|d	d
S )z
        Compute count of group, excluding missing values.

        Returns
        -------
        DataFrame
            Count of values within each group.
        r   r   )rG  rN   c               sF   | j dkr"t| dd @ }nt|  @ }tj| dd}|S )Nr]   r   )labelsZmax_binr}   )r   r   rR  r   Zcount_level_2d)rG  ZmaskedZcounted)r   r   r  r>   r?   hfunc  s
    
z%DataFrameGroupBy.count.<locals>.hfuncr   TNr   )r   )r/  rf   r   r1  re   Ztemp_setattrr5  r   )r=   rn   r   ro  r6  ro   r>   )r   r   r  r?   r    s    	
zDataFrameGroupBy.count)r   rN   c               s^   | j dkr|  fddS | j}| j fdd|d}| jsZttt||_| 	| |S )a  
        Return DataFrame with counts of unique elements in each position.

        Parameters
        ----------
        dropna : bool, default True
            Don't include NaN in the counts.

        Returns
        -------
        nunique: DataFrame

        Examples
        --------
        >>> df = pd.DataFrame({'id': ['spam', 'egg', 'egg', 'spam',
        ...                           'ham', 'ham'],
        ...                    'value1': [1, 5, 5, 2, 5, 5],
        ...                    'value2': list('abbaxy')})
        >>> df
             id  value1 value2
        0  spam       1      a
        1   egg       5      b
        2   egg       5      b
        3  spam       2      a
        4   ham       5      x
        5   ham       5      y

        >>> df.groupby('id').nunique()
              value1  value2
        id
        egg        1       1
        ham        1       2
        spam       2       1

        Check for rows with the same id but conflicting values:

        >>> df.groupby('id').filter(lambda g: (g.nunique() > 1).any())
             id  value1 value2
        0  spam       1      a
        3  spam       2      a
        4   ham       5      x
        5   ham       5      y
        r   c               s
   |   S )N)r   )r^  )r   r>   r?   r   	  r   z*DataFrameGroupBy.nunique.<locals>.<lambda>c               s
   |   S )N)r   )r^  )r   r>   r?   r     r   )r`   )
r}   rh   r"  rm  r&  r1   r$  r   r\   r'  )r=   r   r`   r   r>   )r   r?   r     s    -

zDataFrameGroupBy.nunique)skipnac               s8   t    dkrd nd fdd}| || jS )Nr   Fc               sH   | j tjdd}|j}|   fdd|D }| j||jdS )NZargmax)r}   rp  r   c               s"   g | ]}|d kr | nt jqS )r   )r   nan)ru   r9  )r\   r>   r?   rx   &  s    z9DataFrameGroupBy.idxmax.<locals>.func.<locals>.<listcomp>)r\   )_reducer"   Z	nanargmaxr   rE  r=  r\   )dfr   r   ro   )r}   r   rp  )r\   r?   rV     s    
z%DataFrameGroupBy.idxmax.<locals>.func)r)   _get_axis_number_python_apply_generalr"  )r=   r}   rp  rV   r>   )r}   r   rp  r?   idxmax  s    
zDataFrameGroupBy.idxmaxc               s8   t    dkrd nd fdd}| || jS )Nr   Fc               sH   | j tjdd}|j}|   fdd|D }| j||jdS )NZargmin)r}   rp  r   c               s"   g | ]}|d kr | nt jqS )r   )r   rq  )ru   r9  )r\   r>   r?   rx   ;  s    z9DataFrameGroupBy.idxmin.<locals>.func.<locals>.<listcomp>)r\   )rr  r"   Z	nanargminr   rE  r=  r\   )rs  r   r   ro   )r}   r   rp  )r\   r?   rV   0  s    
z%DataFrameGroupBy.idxmin.<locals>.func)r)   rt  ru  r"  )r=   r}   rp  rV   r>   )r}   r   rp  r?   idxmin+  s    
zDataFrameGroupBy.idxmin)N)r   )F)Tr   )T)N)T)r   T)r   T)0rD   r  r  r+   dataframe_apply_allowlistr  r   r  r   r-   rr   r  rP   r   r!  r;  r   r>  r   r   r   r   r/   r   r   r   rL  rM  rN  r   ra  rd  r/  r'  r   r   r5  rl  rm  r  r   r)   rv  rC   rw  r6   Zboxplotr  r>   r>   )rY   r?   r  k  sN   QJ'
.L'2%
D1&	"<

r  )_rC   
__future__r   collectionsr   r   	functoolsr   textwrapr   typingr   r   r	   r
   r   r   r   r2  numpyr   Zpandas._libsr   r   r   Zpandas._typingr   r   r   r   Zpandas.util._decoratorsr   r   r   Zpandas.core.dtypes.commonr   r   r   r   r   r   r   r   Zpandas.core.dtypes.missingr   r    Zpandas.corer!   r"   Zpandas.core.aggregationr#   r$   r%   Zpandas.core.applyr&   Zpandas.core.baser'   Zpandas.core.commoncorecommonre   Zpandas.core.constructionr(   Zpandas.core.framer)   Zpandas.core.genericr*   Zpandas.core.groupbyr+   Zpandas.core.groupby.groupbyr,   r-   r.   r/   r0   Zpandas.core.indexes.apir1   r2   r3   Zpandas.core.seriesr4   Zpandas.core.util.numba_r5   Zpandas.plottingr6   r7   r:   Z	AggScalarr9   rF   rL   r  rM   rx  r  r>   r>   r>   r?   <module>   sP   $	(
$     N