
    >h]                         S r SSKrSSKJr  SSKJr  SSKJr  SSK	J
r
JrJrJr  S r " S S	\5      rSS
 jrSS jrS rS rg)z|
Rank based methods for inferential statistics

Created on Sat Aug 15 10:18:53 2020

Author: Josef Perktold
License: BSD-3

    N)stats)rankdata)HolderTuple)_tconfint_generic_tstat_generic_zconfint_generic_zstat_genericc                 *   [         R                  " U 5      n [         R                  " U5      n[        U 5      n[        U5      nUS:X  d  US:X  a  [        S5      e[         R                  " X45      nUR
                  S:  a  [         R                  " [        SU5      nO[        U5      nUSU nXRS nUR
                  S:  a9  [         R                  " [        SU 5      n[         R                  " [        SU5      n	O[        U 5      n[        U5      n	XgX4$ )a  Compute midranks for two samples

Parameters
----------
x1, x2 : array_like
    Original data for two samples that will be converted to midranks.

Returns
-------
rank1 : ndarray
    Midranks of the first sample in the pooled sample.
rank2 : ndarray
    Midranks of the second sample in the pooled sample.
ranki1 : ndarray
    Internal midranks of the first sample.
ranki2 : ndarray
    Internal midranks of the second sample.

r   one sample has zero length   N)npasarraylen
ValueErrorconcatenatendimapply_along_axisr   )
x1x2nobs1nobs2
x_combinedrankrank1rank2ranki1ranki2s
             rC:\Users\julio\OneDrive\Documentos\Trabajo\Ideas Frescas\venv\Lib\site-packages\statsmodels/stats/nonparametric.pyrankdata_2sampr      s    ( 
BB	BBGEGEzUaZ566)J""8Q
;
#%LELE$$Xq"5$$Xq"5""''    c                   T    \ rS rSrSrSS jrSS jrS r  SS jrSS jr	SS	 jr
S
rg)RankCompareResultE   zResults for rank comparison

This is a subclass of HolderTuple that includes results from intermediate
computations, as well as methods for hypothesis tests, confidence intervals
and summary.
Nc                     UnUc  SnU R                   U-
  n[        R                  " U R                  U R                  -  5      nU R
                  SL a  [        XVX#5      $ [        XVU R                  UU5      $ )a{  
Confidence interval for probability that sample 1 has larger values

Confidence interval is for the shifted probability

    P(x1 > x2) + 0.5 * P(x1 = x2) - value

Parameters
----------
value : float
    Value, default 0, shifts the confidence interval,
    e.g. ``value=0.5`` centers the confidence interval at zero.
alpha : float
    Significance level for the confidence interval, coverage is
    ``1-alpha``
alternative : str
    The alternative hypothesis, H1, has to be one of the following

       * 'two-sided' : H1: ``prob - value`` not equal to 0.
       * 'larger' :   H1: ``prob - value > 0``
       * 'smaller' :  H1: ``prob - value < 0``

Returns
-------
lower : float or ndarray
    Lower confidence limit. This is -inf for the one-sided alternative
    "smaller".
upper : float or ndarray
    Upper confidence limit. This is inf for the one-sided alternative
    "larger".

r   F)	prob1r   sqrtvarnobsuse_tr   r   df)selfvaluealphaalternativep0diffstd_diffs          r   conf_intRankCompareResult.conf_intM   so    D :BzzB77488dii/0::$TUHH$TTWWe%02 2r    c           	      2   Un[         R                  " U R                  U R                  -  5      nU R                  (       d  [        U R                  X4USS9u  pVSnO%[        U R                  X4U R                  USS9u  pVSn[        UUU R                  US9nU$ )a  test for superiority probability

H0: P(x1 > x2) + 0.5 * P(x1 = x2) = value

The alternative is that the probability is either not equal, larger
or smaller than the null-value depending on the chosen alternative.

Parameters
----------
value : float
    Value of the probability under the Null hypothesis.
alternative : str
    The alternative hypothesis, H1, has to be one of the following

       * 'two-sided' : H1: ``prob - value`` not equal to 0.
       * 'larger' :   H1: ``prob - value > 0``
       * 'smaller' :  H1: ``prob - value < 0``

Returns
-------
res : HolderTuple
    HolderTuple instance with the following main attributes

    statistic : float
        Test statistic for z- or t-test
    pvalue : float
        Pvalue of the test based on either normal or t distribution.

r   )r0   normalt)	statisticpvaluer*   distribution)
r   r&   r'   r(   r)   r	   r%   r   r*   r   )	r+   r,   r.   r/   r1   statpvdistrress	            r   test_prob_superior$RankCompareResult.test_prob_superior{   s    >  77488dii/0 zz%djj"+,.HDE%djj"&1;HDED!#!WW',
 
r    c                 r   U R                  USS9nU R                  USS9n[        R                  " UR                  UR                  :  [        5      nSn[        [        R                  " UUR                  UR                  /5      [        R                  " XSR                  UR                  /5      UUUS9nU$ )a  test of stochastic (non-)equivalence of p = P(x1 > x2)

Null hypothesis:  p < low or p > upp
Alternative hypothesis:  low < p < upp

where p is the probability that a random draw from the population of
the first sample has a larger value than a random draw from the
population of the second sample, specifically

    p = P(x1 > x2) + 0.5 * P(x1 = x2)

If the pvalue is smaller than a threshold, say 0.05, then we reject the
hypothesis that the probability p that distribution 1 is stochastically
superior to distribution 2 is outside of the interval given by
thresholds low and upp.

Parameters
----------
low, upp : float
    equivalence interval low < mean < upp

Returns
-------
res : HolderTuple
    HolderTuple instance with the following main attributes

    pvalue : float
        Pvalue of the equivalence test given by the larger pvalue of
        the two one-sided tests.
    statistic : float
        Test statistic of the one-sided test that has the larger
        pvalue.
    results_larger : HolderTuple
        Results instanc with test statistic, pvalue and degrees of
        freedom for lower threshold test.
    results_smaller : HolderTuple
        Results instanc with test statistic, pvalue and degrees of
        freedom for upper threshold test.

larger)r.   smallerz7Equivalence test for Prob(x1 > x2) + 0.5 Prob(x1 = x2) )r7   r8   results_largerresults_smallertitle)r>   r   r   r8   intr   chooser7   )r+   lowuppt1t2idx_maxrE   r=   s           r   tost_prob_superior$RankCompareResult.tost_prob_superior   s    T $$Sh$?$$Si$@ **RYY2C8IBIIg/1||R\\.J%L "$7YY		4J!K)+*, % 
r    c                 V    U R                  X4S9u  pVXU-  -   nXU-  -   nUS:  a  XpXx4$ )aN  confidence interval of a linear transformation of prob1

This computes the confidence interval for

    d = const + slope * prob1

Default values correspond to Somers' d.

Parameters
----------
const, slope : float
    Constant and slope for linear (affine) transformation.
alpha : float
    Significance level for the confidence interval, coverage is
    ``1-alpha``
alternative : str
    The alternative hypothesis, H1, has to be one of the following

       * 'two-sided' : H1: ``prob - value`` not equal to 0.
       * 'larger' :   H1: ``prob - value > 0``
       * 'smaller' :  H1: ``prob - value < 0``

Returns
-------
lower : float or ndarray
    Lower confidence limit. This is -inf for the one-sided alternative
    "smaller".
upper : float or ndarray
    Upper confidence limit. This is inf for the one-sided alternative
    "larger".

)r-   r.   r   )r2   )	r+   constsloper-   r.   low_pupp_prH   rI   s	            r   confint_lintransf#RankCompareResult.confint_lintransf   sB    F }}5}Jem#em#19xr    c                     Uc  U R                   n[        R                  R                  U5      [        R
                  " S5      -  $ )a  
Cohen's d, standardized mean difference under normality assumption.

This computes the standardized mean difference, Cohen's d, effect size
that is equivalent to the rank based probability ``p`` of being
stochastically larger if we assume that the data is normally
distributed, given by

    :math: `d = F^{-1}(p) * \sqrt{2}`

where :math:`F^{-1}` is the inverse of the cdf of the normal
distribution.

Parameters
----------
prob : float in (0, 1)
    Probability to be converted to Cohen's d effect size.
    If prob is None, then the ``prob1`` attribute is used.

Returns
-------
equivalent Cohen's d effect size under normality assumption.

   )r%   r   normppfr   r&   )r+   probs     r   effectsize_normal#RankCompareResult.effectsize_normal  s2    2 <::Dzz~~d#bggaj00r    c           	         Sn[         R                  " U R                  5      nU R                  c  U R	                  5       u  pVOU R                  nU R
                  n[         R                  " U5      n[         R                  " U R                  US95      nUR                  S   S:  a  UR                  nU R                  n	[         R                  " [         R                  " U R                  5      5      n
[         R                  " U5      nUc'  [        [        U5      5       Vs/ s H  nSU-  PM
     nnU Vs/ s H  nSU-  PM
     nnSnSSKJn  U" XXXx4X<U	XS	9nU$ s  snf s  snf )
a  summary table for probability that random draw x1 is larger than x2

Parameters
----------
alpha : float
    Significance level for confidence intervals. Coverage is 1 - alpha
xname : None or list of str
    If None, then each row has a name column with generic names.
    If xname is a list of strings, then it will be included as part
    of those names.

Returns
-------
SimpleTable instance with methods to convert to different output
formats.
None)r-   r   r   zc%dzprob(x1>x2) %sz-Probability sample 1 is stochastically larger)summary_params)ynamexnamer)   rE   r-   )r   
atleast_1dr%   r8   r>   r7   
atleast_2dr2   shapeTr)   r&   var_probranger   statsmodels.iolib.summaryr_   )r+   r-   ra   r`   effectr7   r8   pvaluescir)   sdiixname2rE   r_   summs                   r   summaryRankCompareResult.summary2  s2   $ tzz*;; $ 7 7 9Iv[[FI--']]4==u=5688A;?B

]]2774==12MM),	=*/F*<=*<BURZ*<E=278%B"R'%8?<tR&,$)u$)8  >8s   "E7E# )N皙?	two-sided)g      ?rt   )rW   rs   rt   )N)rs   N)__name__
__module____qualname____firstlineno____doc__r2   r>   rM   rT   r[   rp   __static_attributes__rr   r    r   r"   r"   E   s1    ,2\3j9v :>&1(T1:,r    r"   c                    [         R                  " U 5      n [         R                  " U5      n[        U 5      n[        U5      nX4-   nUS:X  d  US:X  a  [        S5      e[	        X5      u  pgp[         R
                  " USS9n
[         R
                  " USS9n[         R
                  " USS9n[         R
                  " U	SS9n[         R                  " [         R                  " Xh-
  U
-
  U-   S5      SS9nXS-
  -  n[         R                  " [         R                  " Xy-
  U-
  U-   S5      SS9nXS-
  -  nX4-  X-
  -  nUX4-   [         R                  " X>-  XO-  -   5      -  -  nU(       a  [         R                  " X>-  XO-  -   S5      n[         R                  " X>-  S5      US-
  -  nU[         R                  " XO-  S5      US-
  -  -  nUU-  nS[        R                  R                  [         R                  " U5      U5      -  nO8S[        R                  R                  [         R                  " U5      5      -  nSnXU-
  S-  -  nXU-
  S-  -  nUU-  UU-  -   nUUU-  UU-  -   -  nXS-   S-  -
  U-  nXS-   S-  -
  U-  n[        S0 SU_S	U_S
U_SU_SU_SU_SU_SU_SU_SU_SU_SU
_SU_SU_SU_SUS-  S-
  _SUS-  S-
  _SU_SU_6$ )u  
Statistics and tests for the probability that x1 has larger values than x2.

p is the probability that a random draw from the population of
the first sample has a larger value than a random draw from the
population of the second sample, specifically

        p = P(x1 > x2) + 0.5 * P(x1 = x2)

This is a measure underlying Wilcoxon-Mann-Whitney's U test,
Fligner-Policello test and Brunner-Munzel test, and
Inference is based on the asymptotic distribution of the Brunner-Munzel
test. The half probability for ties corresponds to the use of midranks
and make it valid for discrete variables.

The Null hypothesis for stochastic equality is p = 0.5, which corresponds
to the Brunner-Munzel test.

Parameters
----------
x1, x2 : array_like
    Array of samples, should be one-dimensional.
use_t : boolean
    If use_t is true, the t distribution with Welch-Satterthwaite type
    degrees of freedom is used for p-value and confidence interval.
    If use_t is false, then the normal distribution is used.

Returns
-------
res : RankCompareResult
    The results instance contains the results for the Brunner-Munzel test
    and has methods for hypothesis tests, confidence intervals and summary.

    statistic : float
        The Brunner-Munzel W statistic.
    pvalue : float
        p-value assuming an t distribution. One-sided or
        two-sided, depending on the choice of `alternative` and `use_t`.

See Also
--------
RankCompareResult
scipy.stats.brunnermunzel : Brunner-Munzel test for stochastic equality
scipy.stats.mannwhitneyu : Mann-Whitney rank test on two samples.

Notes
-----
Wilcoxon-Mann-Whitney assumes equal variance or equal distribution under
the Null hypothesis. Fligner-Policello test allows for unequal variances
but assumes continuous distribution, i.e. no ties.
Brunner-Munzel extend the test to allow for unequal variance and discrete
or ordered categorical random variables.

Brunner and Munzel recommended to estimate the p-value by t-distribution
when the size of data is 50 or less. If the size is lower than 10, it would
be better to use permuted Brunner Munzel test (see [2]_) for the test
of stochastic equality.

This measure has been introduced in the literature under many different
names relying on a variety of assumptions.
In psychology, McGraw and Wong (1992) introduced it as Common Language
effect size for the continuous, normal distribution case,
Vargha and Delaney (2000) [3]_ extended it to the nonparametric
continuous distribution case as in Fligner-Policello.

WMW and related tests can only be interpreted as test of medians or tests
of central location only under very restrictive additional assumptions
such as both distribution are identical under the equality null hypothesis
(assumed by Mann-Whitney) or both distributions are symmetric (shown by
Fligner-Policello). If the distribution of the two samples can differ in
an arbitrary way, then the equality Null hypothesis corresponds to p=0.5
against an alternative p != 0.5.  see for example Conroy (2012) [4]_ and
Divine et al (2018) [5]_ .

Note: Brunner-Munzel and related literature define the probability that x1
is stochastically smaller than x2, while here we use stochastically larger.
This equivalent to switching x1 and x2 in the two sample case.

References
----------
.. [1] Brunner, E. and Munzel, U. "The nonparametric Benhrens-Fisher
       problem: Asymptotic theory and a small-sample approximation".
       Biometrical Journal. Vol. 42(2000): 17-25.
.. [2] Neubert, K. and Brunner, E. "A studentized permutation test for the
       non-parametric Behrens-Fisher problem". Computational Statistics and
       Data Analysis. Vol. 51(2007): 5192-5204.
.. [3] Vargha, András, and Harold D. Delaney. 2000. “A Critique and
       Improvement of the CL Common Language Effect Size Statistics of
       McGraw and Wong.” Journal of Educational and Behavioral Statistics
       25 (2): 101–32. https://doi.org/10.3102/10769986025002101.
.. [4] Conroy, Ronán M. 2012. “What Hypotheses Do ‘Nonparametric’ Two-Group
       Tests Actually Test?” The Stata Journal: Promoting Communications on
       Statistics and Stata 12 (2): 182–90.
       https://doi.org/10.1177/1536867X1201200202.
.. [5] Divine, George W., H. James Norton, Anna E. Barón, and Elizabeth
       Juarez-Colunga. 2018. “The Wilcoxon–Mann–Whitney Procedure Fails as
       a Test of Medians.” The American Statistician 72 (3): 278–86.
       https://doi.org/10.1080/00031305.2017.1305291.

r   r   axisg       @r   rW   Nr7   r8   s1s2var1var2r'   rf   r   r   r(   mean1mean2r%   prob2somersd1somersd2r*   r)   rr   )r   r   r   r   r   meansumpowerr&   r   r6   sfabsrX   r"   )r   r   r)   r   r   r(   r   r   r   r   meanr1meanr2meanri1meanri2S1S2wbfndf_numerdf_denomr*   r8   r   r   rf   r'   r%   r   s                              r   rank_compare_2indepr   a  s   J 
BB	BBGEGE=DzUaZ566#1"#9 E&WWU#FWWU#Fggf1%Gggf1%G	&07:C@q	IB!)OB	&07:C@q	IB!)OB=FO,DU]bggej5:&=>>>D 88EJ3S988EJ,	:BHHUZ-;; UWWZZtb11UZZ]]266$<00 !!D!!Dute|+H
$,-
.CqyAo%.EqyAo%.E t F r b "&-17:&. $) 16 =A $*	 28	
 $)
 16 ',ai!m ?Dai!m !# +0 r    c                 p   [         R                  " U 5      n [         R                  " U5      nU R                  5       UR                  5       pTX-  nX-  n[         R                  " S/U45      R	                  SS9n[         R                  " S/U45      R	                  SS9n	USS USS -   S-  n
U	SS U	SS -   S-  nX-  R                  5       nX-  R                  5       nUS-  U-  R                  5       US-  -
  nU
S-  U-  R                  5       US-  -
  nXU-
  -  XU-
  -  -   nXE-   nUU-  nX-  U-  XB-
  -  nX-  U-  XR-
  -  nUU-   S-  US-  US-
  -  US-  US-
  -  -   -  n[        S0 SS_SS_S	S_S
S_SU_SU_SU_SU_SU_SU_SU_SS_SS_SU_SU_SUS-  S-
  _SUS-  S-
  _SU_SU_6nU$ )a  
Stochastically larger probability for 2 independent ordinal samples.

This is a special case of `rank_compare_2indep` when the data are given as
counts of two independent ordinal, i.e. ordered multinomial, samples.

The statistic of interest is the probability that a random draw from the
population of the first sample has a larger value than a random draw from
the population of the second sample, specifically

    p = P(x1 > x2) + 0.5 * P(x1 = x2)

Parameters
----------
count1 : array_like
    Counts of the first sample, categories are assumed to be ordered.
count2 : array_like
    Counts of the second sample, number of categories and ordering needs
    to be the same as for sample 1.
ddof : scalar
    Degrees of freedom correction for variance estimation. The default
    ddof=1 corresponds to `rank_compare_2indep`.
use_t : bool
    If use_t is true, the t distribution with Welch-Satterthwaite type
    degrees of freedom is used for p-value and confidence interval.
    If use_t is false, then the normal distribution is used.

Returns
-------
res : RankCompareResult
    This includes methods for hypothesis tests and confidence intervals
    for the probability that sample 1 is stochastically larger than
    sample 2.

See Also
--------
rank_compare_2indep
RankCompareResult

Notes
-----
The implementation is based on the appendix of Munzel and Hauschke (2003)
with the addition of ``ddof`` so that the results match the general
function `rank_compare_2indep`.

r   r}   r   Nru   rW   r7   r8   r   r   r   r   r'   rf   r   r   r(   r   r   r%   r   r   r   r*   r)   rr   )r   r   r   r   cumsumr"   )count1count2ddofr)   r   r   freq1freq2cdf1cdf2cdfm1cdfm2r%   r   r   r   rf   r(   r'   vn1vn2r*   r=   s                          r   rank_compare_2ordinalr     sj   ` ZZFZZF::<5NENE>>A3,'..A.6D>>A3,'..A.6D !"XSb	!Q&E!"XSb	!Q&E]!E]!E1Hu!!#eQh.D1Hu!!#eQh.D%(==H=D
/C
,
%,
/C
,
%,
/C
)a36UQY/#q&EAI2FF	GB
 d 4 D T !%,069%- #( 05 <@ #'	 /3	
 #(
 05 &+QY] >CQY]  " */C Jr    c                 8    U R                  UR                  5      $ )a  
Probability indicating that distr1 is stochastically larger than distr2.

This computes

    p = P(x1 > x2)

for two continuous distributions, where `distr1` and `distr2` are the
distributions of random variables x1 and x2 respectively.

Parameters
----------
distr1, distr2 : distributions
    Two instances of scipy.stats.distributions. The required methods are
    cdf of the second distribution and expect of the first distribution.

Returns
-------
p : probability x1 is larger than x2


Notes
-----
This is a one-liner that is added mainly as reference.

Examples
--------
>>> from scipy import stats
>>> prob_larger_continuous(stats.norm, stats.t(5))
0.4999999999999999

# which is the same as
>>> stats.norm.expect(stats.t(5).cdf)
0.4999999999999999

# distribution 1 with smaller mean (loc) than distribution 2
>>> prob_larger_continuous(stats.norm, stats.norm(loc=1))
0.23975006109347669

)expectcdf)distr1distr2s     r   prob_larger_continuousr   P  s    T ==$$r    c                 n    [         R                  R                  U [        R                  " S5      -  5      $ )a  
Convert Cohen's d effect size to stochastically-larger-probability.

This assumes observations are normally distributed.

Computed as

    p = Prob(x1 > x2) = F(d / sqrt(2))

where `F` is cdf of normal distribution. Cohen's d is defined as

    d = (mean1 - mean2) / std

where ``std`` is the pooled within standard deviation.

Parameters
----------
d : float or array_like
    Cohen's d effect size for difference mean1 - mean2.

Returns
-------
prob : float or ndarray
    Prob(x1 > x2)
rW   )r   rX   r   r   r&   )ds    r   cohensd2problargerr   }  s#    6 ::>>!bggaj.))r    )T)r   T)rz   numpyr   scipyr   scipy.statsr   statsmodels.stats.baser   statsmodels.stats.weightstatsr   r   r   r	   r   r"   r   r   r   r   rr   r    r   <module>r      sP        . )(XY YxXvQh*%Z*r    