
    >h8\                     t   S SK Jr  S SKrS SKrS SKJrJr  S SKrS SK	J
r
  S SKJrJrJr  S SKJr  S SKJr  S rS	 rS
 rS rS rS rS rS r " S S5      r " S S5      r\S:X  aP  S SKrS SKJr  \R@                  " SS/ SQS9r!\" S\!S9RE                  5       r#\" S\!S9RE                  5       r$\" \#SS9r%gg)    )lrangeN)	DataFrameIndex)stats)_has_intercept_intercept_idx_remove_intercept_patsy)summary2)OLSc                     Uc  U R                  5       $ US:X  a  U R                  $ US:X  a  U R                  $ US:X  a  U R                  $ US:X  a  U R                  $ [        SU-  5      e)Nhc0hc1hc2hc3z robust options %s not understood)
cov_paramscov_HC0cov_HC1cov_HC2cov_HC3
ValueError)modelrobusts     jC:\Users\julio\OneDrive\Documentos\Trabajo\Ideas Frescas\venv\Lib\site-packages\statsmodels/stats/anova.py_get_covariancer      sk    ~!!	5}}	5}}	5}}	5}};fDEE    c                 6   UR                  SS5      nUR                  SS5      nUR                  SS5      nUR                  SS5      nU(       a  UR                  5       nU R                  R                  nU R                  R                  nUR
                  S   nU R                  R                  n	U R                  R                  R                  n
U R                  R                  n[        U
R                  5      [        U
5      -
  S-   nS	U-  nS
SSX-/n[        [        R                  " US45      US9nUS;   a  [!        XXxXXX5
      $ US;   a  [#        X
XUU5      $ US;   a  [%        X
XUU5      $ US;   a  ['        S5      e[)        S[+        U5      -  5      e)a  
Anova table for one fitted linear model.

Parameters
----------
model : fitted linear model results instance
    A fitted linear model
typ : int or str {1,2,3} or {"I","II","III"}
    Type of sum of squares to use.

**kwargs**

scale : float
    Estimate of variance, If None, will be estimated from the largest
model. Default is None.
    test : str {"F", "Chisq", "Cp"} or None
    Test statistics to provide. Default is "F".

Notes
-----
Use of this function is discouraged. Use anova_lm instead.
testFscaleNtyp   r   r   zPR(>%s)dfsum_sqmean_sq   columnsr!   I)   II)   III)   IVzType IV not yet implementedzType %s not understood)getlowerr   endogexogshapeendog_namesdatadesign_info
exog_nameslentermsr   r   npzerosanova1_lm_singleanova2_lm_singleanova3_lm_singleNotImplementedErrorr   str)r   kwargsr   r   r    r   r2   r3   nobsresponse_namer7   r8   n_rowspr_testnamestables                   r   anova_singlerI   #   s   . ::fc"DJJw%E
**UA
CZZ$'FKKE;;D::a=DKK++M++""..K''J+##$~k'BBQFF$G8Y6Ebhh{+U;E
hd+ &g? 	?			F' &( 	(	
	F' &( 	(			!"?@@1CH<==r   c
                    [        U SS5      n
U
cB  [        R                  R                  U5      u  p[        R                  " UR
                  U5      n
[        R                  " [        UR                  5      [        UR                  5      45      nUR                   Vs/ s H  oR                  U5      PM     nn[        U5       H  u  nnSUUU4'   M     [        R                  " XS-  5      n[        U5      nUU)    n[        R                  " UR                  5      nUU)    nUR                  5       n[!        US/-   5      Ul        [        R$                  UU)    R'                  S5      U4   UR(                  USS/4'   U R*                  U R,                  4UR(                  SSS/4'   US:X  a  US   US   -  U R*                  U R,                  -  -  XW'   [.        R0                  R3                  US   US   U R,                  5      XX'   [        R4                  [        R4                  4UR(                  SXx/4'   US   US   -  US	'   U$ s  snf )
a  
Anova table for one fitted linear model.

Parameters
----------
model : fitted linear model results instance
    A fitted linear model

**kwargs**

scale : float
    Estimate of variance, If None, will be estimated from the largest
model. Default is None.
    test : str {"F", "Chisq", "Cp"} or None
    Test statistics to provide. Default is "F".

Notes
-----
Use of this function is discouraged. Use anova_lm instead.
effectsNr!   r*   Residualr"   r#   r   r$   )getattrr;   linalgqrdotTr<   r9   r:   column_names
term_namesslice	enumerater   arraytolistr   indexc_sumlocssrdf_residr   fsfnan)r   r2   r3   rC   r7   rH   rE   r   rF   r   rK   qrarrnameslicesislice_r#   idxrS   rX   s                         r   r=   r=   _   s
   . eY-Giill4 &&e$
((C))*C0H0H,IJ
KC2=2H2HI2H$%2HFIf%&AvI & VVC!$F

%CSD\F+001JSD!JE,-EK)+sC4y}}Q/?/G)HEIIedH%%&-2YY-FEIIj8D/)*s{h%+5		ENN24E#Jd$)NN413		*to-.Xt4E)L/ Js   Ic                 P   UR                   SS n[        U5      nSSX4/n[        [        R                  " US45      US9n[        U S5      n	[        X5      n
/ n/ n[        U5       GH  u  pUR                  U5      n[        UR                  UR                  5      n/ n[        UR                  5      nU H  n[        UR                  5      nUR                  U5      (       d  M0  UU:X  a  M8  UR                  U5      nUR                  [        UR                  UR                  5      5        UR                  [        UR                  UR                  5      5        M     [        R                  " U R                   R"                  R$                  S   5      U   n[        R                  " U R                   R"                  R$                  S   5      U   nUR&                  (       a  [        R(                  " [        R(                  " UU
5      UR*                  5      nSSKJn  UR1                  U5      u  nnUR$                  S   UR$                  S   -
  n[        R(                  " USS2U* S24   R*                  U5      nOUnUR$                  S   nUS	:X  ab  U R3                  UU
S
9nUR4                  =UR6                  UR8                  U   U4'   nUR:                  UR6                  UR8                  U   U4'   UUR6                  UR8                  U   S4'   UR=                  UR                  5        UR=                  UR?                  5       5        GM     [A        US/-   5      Ul        URB                  [        RD                  " XR                   R"                  R$                  S   S-   /-   5         nX   US   -  U RF                  -  U RH                  -  nUUS'   U RF                  U RH                  [        RJ                  [        RJ                  4UR6                  SSSX4/4'   U$ )aA  
Anova type II table for one fitted linear model.

Parameters
----------
model : fitted linear model results instance
    A fitted linear model

**kwargs**

scale : float
    Estimate of variance, If None, will be estimated from the largest
model. Default is None.
    test : str {"F", "Chisq", "Cp"} or None
    Test statistics to provide. Default is "F".

Notes
-----
Use of this function is discouraged. Use anova_lm instead.

Type II
Sum of Squares compares marginal contribution of terms. Thus, it is
not particularly useful for models with significant interaction terms.
Nr#   r"   r.   r&   r!   r   )rN   r   cov_prL   )&r:   r	   r   r;   r<   r   rU   rT   r   startstopsetfactorsissubsetextendeyer   r3   r4   sizerP   rQ   scipyrN   rO   f_testfvaluer[   rX   pvalueappendrd   r   ilocargsortr\   r]   r`   )r   r7   rE   r   rF   r   
terms_inforG   rH   cov
robust_cov	col_orderrX   rf   termcolsL1L2term_sett	other_setcolLVLrN   
orth_compl_rb   L12r^   
test_valuer\   s                                  r   r>   r>      sT   2 ""1%J(4JtT+Ebhh{+u=E
%
&C /JIEZ(   &DJJ		*t||$AAIII  ++H	4I!''*		&CHH56		&CHH56  VVEKK$$**1-.r2VVEKK$$**1-.r277&&:.rtt4C$!99S>LJqbhhqk)A &&AqbcE*,,b1CCA3;S
3A;<88CEIIekk!nd*+j12EIIekk!ng-. +,		%++a.$&'$TYY[!O )R ,-EKJJrzz){{/?/?/E/Ea/H/J.K"KLME
+d
#eii
/
>CE(O=BYY<ANN<>FFBFF=LEIIj8D$889 Lr   c                    U[        U5      -  nUR                  nSSX4/n[        [        R                  " US45      US9n[        X5      n	/ n
/ n[        U5       GH  u  pUR                  U5      n[        R                  " U R                  R                  R                  S   5      U   nUnUR                  S   nUS:X  ab  U R                  UU	S9nUR                  =UR                  UR                  U   U4'   nUR                   UR                  UR                  U   U4'   UUR                  UR                  U   S4'   UR#                  UR%                  5       5        GM
     ['        US	/-   5      Ul        X   US   -  U R(                  -  U R*                  -  nUUS'   U R(                  U R*                  [        R,                  [        R,                  4UR                  S	SSX4/4'   U$ )
Nr#   r"   r.   r&   r!   r   r   rj   rL   )r   r:   r   r;   r<   r   rU   rT   rr   r   r3   r4   ru   rv   r[   rX   rw   rx   rd   r   r\   r]   r`   )r   r7   rE   r   rF   r   r{   rG   rH   r|   r~   rX   rf   r   r   r   r   rb   r^   r   r\   s                        r   r?   r?      s   
n[))F""JtT+Ebhh{+u=E
%
(CIEZ(  &VVEKK$$**1-.t4HHQK3;S,A;<88CEIIekk!nd*+j12EIIekk!ng-. +,		%++a.$&'TYY[! )" ,-EK +d
#eii
/
>CE(O=BYY<ANN<>FFBFF=LEIIj8D$889 Lr   c                     UR                  SS5      n[        U 5      S:X  a  U S   n[        U40 UD6$ US;  a  [        S[	        U5      -  5      eUR                  SS5      nUR                  SS	5      n[        U 5      nS
U-  nSSSSXG/n[        [        R                  " US45      US9n	U(       d  U S   R                  nU  V
s/ s H  oR                  PM     sn
U	S'   U  V
s/ s H  oR                  PM     sn
U	S'   [        R                  " U	S   R                  5      * U	R                  U	R                  SS	 S4'   U	S   R                  5       * U	S'   US:X  am  U	S   U	S   -  U-  U	S'   [        R                   R#                  U	S   U	S   U	S   5      X'   [        R$                  U	R                  U	S   R'                  5       U4'   U	$ s  sn
f s  sn
f )a  
Anova table for one or more fitted linear models.

Parameters
----------
args : fitted linear model results instance
    One or more fitted linear models
scale : float
    Estimate of variance, If None, will be estimated from the largest
    model. Default is None.
test : str {"F", "Chisq", "Cp"} or None
    Test statistics to provide. Default is "F".
typ : str or int {"I","II","III"} or {1,2,3}
    The type of Anova test to perform. See notes.
robust : {None, "hc0", "hc1", "hc2", "hc3"}
    Use heteroscedasticity-corrected coefficient covariance matrix.
    If robust covariance is desired, it is recommended to use `hc3`.

Returns
-------
anova : DataFrame
    When args is a single model, return is DataFrame with columns:

    sum_sq : float64
        Sum of squares for model terms.
    df : float64
        Degrees of freedom for model terms.
    F : float64
        F statistic value for significance of adding model terms.
    PR(>F) : float64
        P-value for significance of adding model terms.

    When args is multiple models, return is DataFrame with columns:

    df_resid : float64
        Degrees of freedom of residuals in models.
    ssr : float64
        Sum of squares of residuals in models.
    df_diff : float64
        Degrees of freedom difference from previous model in args
    ss_dff : float64
        Difference in ssr from previous model in args
    F : float64
        F statistic comparing to previous model in args
    PR(>F): float64
        P-value for significance comparing to previous model in args

Notes
-----
Model statistics are given in the order of args. Models must have been fit
using the formula api.

See Also
--------
model_results.compare_f_test, model_results.compare_lm_test

Examples
--------
>>> import statsmodels.api as sm
>>> from statsmodels.formula.api import ols
>>> moore = sm.datasets.get_rdataset("Moore", "carData", cache=True) # load
>>> data = moore.data
>>> data = data.rename(columns={"partner.status" :
...                             "partner_status"}) # make name pythonic
>>> moore_lm = ols('conformity ~ C(fcategory, Sum)*C(partner_status, Sum)',
...                 data=data).fit()
>>> table = sm.stats.anova_lm(moore_lm, typ=2) # Type 2 Anova DataFrame
>>> print(table)
r    r!   r   r(   z6Multiple models only supported for type I. Got type %sr   r   r   NzPr(>%s)r]   r\   df_diffss_diff   r&   )r0   r9   rI   r   rA   r   r;   r<   r   r\   r]   diffvaluesr[   rX   r   r^   r_   r`   isnull)argsrB   r    r   r   r   n_modelsrF   rG   rH   mdls              r   anova_lmr     s   L **UA
C 4yA~QE,V,,
( '),S2 3 	3 ::fc"DJJw%E4yH$G	9dDEbhh!}-u=ER'+,tGGt,E%L156#6E*-/WWU:5F5M5M-N,NEIIekk!"oy()e))++E)s{9%i(885@c
E#Ji0@$)*$57 35&&		%*##%w./L -6s   G$Gc                 \    [         R                  " S/U-  5      nU H  nX   nSX5'   M     U$ )NTF)r;   rV   )re   slices_to_excludenindr   ss         r   
_not_slicer     s5    
((D6!8
C!L " Jr   c                     [        X$UR                  S   5      nX5   n[        R                  " XSS2U4   R	                  U5      5      nUR
                  R	                  U5      n[        U 5      [        U5      -
  nXx4$ )a  
Residual sum of squares of OLS model excluding factors in `keys`
Assumes x matrix is orthogonal

Parameters
----------
y : array_like
    dependent variable
x : array_like
    independent variables
term_slices : a dict of slices
    term_slices[key] is a boolean array specifies the parameters
    associated with the factor `key`
params : ndarray
    OLS solution of y = x * params
keys : keys for term_slices
    factors to be excluded

Returns
-------
rss : float
    residual sum of squares
df : int
    degrees of freedom
r!   N)r   r4   r;   subtractrP   rQ   r9   )	yxterm_slicesparamskeysr   params1r\   r]   s	            r   _ssr_reduced_modelr     si    4 [

3CkG
++a1c6w/
0C
%%))C.C1vG$H=r   c                   8    \ rS rSrSr  S	S jrS rS rS rSr	g)
AnovaRMi  a:  
Repeated measures Anova using least squares regression

The full model regression residual sum of squares is
used to compare with the reduced model for calculating the
within-subject effect sum of squares [1].

Currently, only fully balanced within-subject designs are supported.
Calculation of between-subject effects and corrections for violation of
sphericity are not yet implemented.

Parameters
----------
data : DataFrame
depvar : str
    The dependent variable in `data`
subject : str
    Specify the subject id
within : list[str]
    The within-subject factors
between : list[str]
    The between-subject factors, this is not yet implemented
aggregate_func : {None, 'mean', callable}
    If the data set contains more than a single observation per subject
    and cell of the specified model, this function will be used to
    aggregate the data before running the Anova. `None` (the default) will
    not perform any aggregation; 'mean' is s shortcut to `numpy.mean`.
    An exception will be raised if aggregation is required, but no
    aggregation function was specified.

Returns
-------
results : AnovaResults instance

Raises
------
ValueError
    If the data need to be aggregated, but `aggregate_func` was not
    specified.

Notes
-----
This implementation currently only supports fully balanced designs. If the
data contain more than one observation per subject and cell of the design,
these observations need to be aggregated into a single observation
before the Anova is calculated, either manually or by passing an aggregation
function via the `aggregate_func` keyword argument.
Note that if the input data set was not balanced before performing the
aggregation, the implied heteroscedasticity of the data is ignored.

References
----------
.. [*] Rutherford, Andrew. Anova and ANCOVA: a GLM approach. John Wiley & Sons, 2011.
Nc                    Xl         X l        X@l        SU;   a  [        S5      eXPl        Ub  [        S5      eX0l        US:X  a   [        R                  R                  U l
        OX`l
        UR                  UR                  U/U-   S95      (       d+  U R                  b  U R                  5         OSn[        U5      eU R                  5         g )NCzSFactor name cannot be 'C'! This is in conflict with patsy's contrast function name.z)Between subject effect not yet supported!mean)subsetzThe data set contains more than one observation per subject and cell. Either aggregate the data manually, or pass the `aggregate_func` parameter.)r6   depvarwithinr   betweenr@   subjectpdSeriesr   aggregate_funcequalsdrop_duplicates
_aggregate_check_data_balanced)selfr6   r   r   r   r   r   msgs           r   __init__AnovaRM.__init__  s    	&= D E E% '7 8 8V#"$))..D"0{{4//y67I/JKK"".!A !o%!!#r   c                     U R                   R                  U R                  /U R                  -   SS9U R                     R                  U R                  5      U l         g )NF)as_index)r6   groupbyr   r   r   aggr   r   s    r   r   AnovaRM._aggregate  sQ    YYgt||nt{{:',  ..2kk; c$--. 		r   c                 b   SnU R                    H,  nU[        U R                  U   R                  5       5      -  nM.     0 n[	        U R                  R
                  S   5       Hb  n/ nU R                    H.  nUR                  U R                  U   R                  U   5        M0     [        U5      nXS;   a  X5   S-   X5'   M^  SX5'   Md     Sn[        U5      U:w  a  [        U5      eUW   nU H  nXU   :w  d  M  [        U5      e   U R                  R
                  S   X-  :  a  [        S5      eg)zraise if data is not balanced

This raises a ValueError if the data is not balanced, and
returns None if it is balance

Return might change
r!   r   zData is unbalanced.z9There are more than 1 element in a cell! Missing factors?N)
r   r9   r6   uniqueranger4   rx   ry   tupler   )	r   factor_levelswi
cell_countrX   keyr   error_messagecounts	            r   r   AnovaRM._check_data_balanced  s'    ++BS2!5!5!788M  
499??1-.EC{{

499S>..u56 #*C ",/A"5
"#
 / .z?m+]++3C3' //  99??1 55 ) * * 6r   c           	         U R                   U R                     R                  nU R                   Vs/ s H  nSU-  PM
     nnSU R                  -  nX4/-   n[
        R                  " SR                  U5      U R                   S9nUR                  R                  nU HI  n[        R                  " S/UR                  S   -  5      n	SXU   '   [        R                  " U	5      Xx'   MK     SR                  U5      /n
[        XzUR                  S   5      n	USS2U	4   n[        X5      nUR                  5       nUR                   UR                  S   :  a  [#        S	5      eU
 H  nUR%                  U5        M     U H  nXx   U	   Xx'   M     UR&                  nUR(                  nUR*                  n/ S
Qn[,        R.                  " [        R0                  " S5      US9nU GH  nU R                  U;  d  M  US:w  d  M  [3        XX}U/5      u  nnUU-
  nUU-
  U-  nUSR                  USS 5      :X  d  US-   U-   U;  a  X-  nUnO$[3        XX}US-   U-   /5      u  nnUU-
  nUU-
  U-  nUU-  n[4        R6                  R9                  UUU5      nUR;                  SS5      R;                  SS5      nUUR<                  US4'   UUR<                  US4'   UUR<                  US4'   UUR<                  US4'   GM     [?        U5      $ s  snf )zVestimate the model and compute the Anova table

Returns
-------
AnovaResults instance
z
C(%s, Sum)*r6   Fr!   T:Nz$Independent variables are collinear.)F ValueNum DFDen DFPr > F)r   r.   r&   	Interceptr   zC( z, Sum)r   r   r   r   ) r6   r   r   r   r   patsydmatrixjoinr7   term_name_slicesr;   rV   r4   r   r   fitrankr   popr   r]   r\   r   r   r<   r   r   r^   r_   replacer[   AnovaResults)r   r   rf   r   r   ro   r   r   r   r   term_excluder   resultsr   r]   r\   r'   anova_tablessr1	df_resid1df1msmmsedf2r   pr   s                              r   r   AnovaRM.fit&  s    IIdkk")) -1KK8Kq,"K8-9$MM#((7+$))<mm44C((E71771:-.C$(CC !!xx}K  )*AGGAJ?afI A	))+::
"CDDAOOA C*/4K ##kk;ll288F#3WEC||3&3++="4+u#6i(*czS(388GCRL11sW,K?.C"C&8ksW,-'/OD) $h.C#:,C#IGGJJq#s+{{4,44XrB34i025h/25h/23h// 2 K((m 9s   L)r   r   r6   r   r   r   )NNN)
__name__
__module____qualname____firstlineno____doc__r   r   r   r   __static_attributes__ r   r   r   r     s(    5n DH $$<0*B@)r   r   c                   *    \ rS rSrSrS rS rS rSrg)r   ii  zD
Anova results class

Attributes
----------
anova_table : DataFrame
c                     Xl         g Nr   )r   r   s     r   r   AnovaResults.__init__q  s    &r   c                 >    U R                  5       R                  5       $ r   )summary__str__r   s    r   r   AnovaResults.__str__t  s    ||~%%''r   c                     [         R                  " 5       nUR                  S5        UR                  U R                  5        U$ )zLcreate summary results

Returns
-------
summary : summary2.Summary instance
Anova)r
   Summary	add_titleadd_dfr   )r   summs     r   r   AnovaResults.summaryw  s5     !wD$$%r   r   N)	r   r   r   r   r   r   r   r   r   r   r   r   r   r   i  s    '(r   r   __main__)olsz	moore.csvr!   )partner_status
conformity	fcategoryfscore)skiprowsrG   z5conformity ~ C(fcategory, Sum)*C(partner_status, Sum)r   z#conformity ~ C(partner_status, Sum)r*   )r    )&statsmodels.compat.pythonr   numpyr;   pandasr   r   r   r   rt   r    statsmodels.formula.formulatoolsr   r   r	   statsmodels.iolibr
   #statsmodels.regression.linear_modelr   r   rI   r=   r>   r?   r   r   r   r   r   r   statsmodels.formula.apir  read_csvmoorer   moore_lmmooreBrH   r   r   r   <module>r     s    ,   #   
 ' 3F"9>x4nVp%NgTD}) })@ 8 z+
 OOK!#9:E J  #  6UCGGIF X1%E/ r   