
    >h                     <   S r SSKJr  SSKJr  SSKrSSKrSSK	J
r
  SSKJrJr  SSKJrJr  SSKJrJrJrJr  SS	KJrJrJrJrJrJr  SS
KJr  / SQrSr S r!S r" " S S5      r#S/S jr$S/S jr%  S0S jr&  S1S jr'\" SS5      S2SSSSS.S jj5       r(\" SS5      S3S j5       r)\" SS5      S2S j5       r*S\RV                  S\,S S4S! jr-S4S" jr.S# r/  S5S$ jr0\" S%S5        S6S& j5       r1S7S' jr2  S8S( jr3S9S) jr4S* r5\" S+S5        S:S, j5       r6S- r7S;S. jr8g)<a  
Various Statistical Tests

Author: josef-pktd
License: BSD-3

Notes
-----
Almost fully verified against R or Gretl, not all options are the same.
In many cases of Lagrange multiplier tests both the LM test and the F test is
returned. In some but not all cases, R has the option to choose the test
statistic. Some alternative test statistic results have not been verified.

TODO
* refactor to store intermediate results

missing:

* pvalues for breaks_hansen
* additional options, compare with R, check where ddof is appropriate
* new tests:
  - breaks_ap, more recent breaks tests
  - specification tests against nonparametric alternatives
    )deprecate_kwarg)IterableN)stats)OLSRegressionResultsWrapper)anderson_statistic	normal_ad)kstest_exponential
kstest_fitkstest_normal
lilliefors)
array_like	bool_like	dict_like
float_likeint_likestring_like)lagmat)r   r   r   r
   r	   compare_cox	compare_jacorr_breusch_godfreyacorr_ljungboxacorr_lmhet_archhet_breuschpaganhet_goldfeldquandt	het_white
spec_white	linear_lmlinear_rainbowlinear_harvey_collierr   z_The exog in results_x and in results_z are nested. {test} requires that models are non-nested.
c                    U R                   S   UR                   S   :  a  g[        R                  R                  XSS9S   nXU-  -
  n[        R                  R	                  [        R
                  X4   5      UR                   S   :H  $ )z
Check if a larger exog nests a smaller exog

Parameters
----------
small : ndarray
    exog from smaller model
large : ndarray
    exog from larger model

Returns
-------
bool
    True if small is nested by large
   FNrcondr   )shapenplinalglstsqmatrix_rankc_)smalllargecoeferrs       oC:\Users\julio\OneDrive\Documentos\Trabajo\Ideas Frescas\venv\Lib\site-packages\statsmodels/stats/diagnostic.py_check_nested_exogr1   @   ss    " {{1~A&99??5t?4Q7D
$,
C99  uz!23u{{1~EE    c                    [        U [        5      (       d  [        S5      e[        U[        5      (       d  [        S5      e[        R                  " U R
                  R                  UR
                  R                  5      (       d  [        S5      eU R
                  R                  nUR
                  R                  nSnUR                  S   UR                  S   ::  a  U=(       d    [        X#5      nU$ U=(       d    [        X25      nU$ )Nz2results_x must come from a linear regression modelz2results_z must come from a linear regression modelz/endogenous variables in models are not the sameFr#   )
isinstancer   	TypeErrorr'   allclosemodelendog
ValueErrorexogr&   r1   )	results_x	results_zxznesteds        r0   _check_nested_resultsr@   X   s    i!9::LMMi!9::LMM;;y,,ioo.C.CDDJKKAAFwwqzQWWQZ3-a3 M 3-a3Mr2   c                       \ rS rSrS rSrg)ResultsStorek   c                 D    [        U SU R                  R                  5      $ )N_str)getattr	__class____name__)selfs    r0   __str__ResultsStore.__str__l   s    tVT^^%<%<==r2    N)rH   
__module____qualname____firstlineno__rJ   __static_attributes__rL   r2   r0   rB   rB   k   s    >r2   rB   c                 8   [        X5      (       a  [        [        R                  SS95      eU R                  R
                  nUR                  R
                  nU R                  R                  R                  S   nU R                  U-  nUR                  U-  nU R                  n[        X5      R                  5       n	U	R                  n
[        X5      R                  5       nUR                  nU[        R                  " U
R                  U
5      U-  -   nUS-  [        R                   " U5      [        R                   " U5      -
  -  nU[        R                  " UR                  U5      -  US-  -  nU[        R"                  " U5      -  nS[$        R&                  R)                  [        R*                  " U5      5      -  nU(       aN  [-        5       nU	Ul        UUl        UUl        UUl        UUl        UUl        [$        R&                  Ul        UUU4$ UU4$ )a  
Compute the Cox test for non-nested models

Parameters
----------
results_x : Result instance
    result instance of first model
results_z : Result instance
    result instance of second model
store : bool, default False
    If true, then the intermediate results are returned.

Returns
-------
tstat : float
    t statistic for the test that including the fitted values of the
    first model in the second model has no effect.
pvalue : float
    two-sided pvalue for the t statistic
res_store : ResultsStore, optional
    Intermediate results. Returned if store is True.

Notes
-----
Tests of non-nested hypothesis might not provide unambiguous answers.
The test should be performed in both directions and it is possible
that both or neither test rejects. see [1]_ for more information.

Formulas from [1]_, section 8.3.4 translated to code

Matches results for Example 8.3 in Greene

References
----------
.. [1] Greene, W. H. Econometric Analysis. New Jersey. Prentice Hall;
   5th edition. (2002).
zCox comparisontestr          @   )r@   r9   NESTED_ERRORformatr7   r:   r8   r&   ssrfittedvaluesr   fitresidr'   dotTlogsqrtr   normsfabsrB   res_dxres_xzxc01v01qpvaluedist)r;   r<   storer=   r>   nobssigma2_xsigma2_zyhat_xrc   err_zxrd   err_xzx	sigma2_zxre   rf   rg   pvalress                      r0   r   r   p   s   L Y22,,2B,CDDAA??  &&q)D}}t#H}}t#H##F^!F\\F&n  "GmmG266&((F3d::I
)rvvh'"&&*;;
<C
RVVGIIw/
/)q.
@CbggclAuzz}}RVVAY''Dn

::$|d7Nr2   c                    [        X5      (       a  [        [        R                  SS95      eU R                  R
                  nUR                  R                  nU R                  n[        U[        R                  " XT45      5      R                  5       nUR                  S   nUR                  S   nU(       aE  [        5       n	Xil        [         R"                  " UR$                  5      U	l        Xyl        Xl        XxU	4$ Xx4$ )a  
Compute the J-test for non-nested models

Parameters
----------
results_x : RegressionResults
    The result instance of first model.
results_z : RegressionResults
    The result instance of second model.
store : bool, default False
    If true, then the intermediate results are returned.

Returns
-------
tstat : float
    t statistic for the test that including the fitted values of the
    first model in the second model has no effect.
pvalue : float
    two-sided pvalue for the t statistic
res_store : ResultsStore, optional
    Intermediate results. Returned if store is True.

Notes
-----
From description in Greene, section 8.3.3. Matches results for Example
8.3, Greene.

Tests of non-nested hypothesis might not provide unambiguous answers.
The test should be performed in both directions and it is possible
that both or neither test rejects. see Greene for more information.

References
----------
.. [1] Greene, W. H. Econometric Analysis. New Jersey. Prentice Hall;
   5th edition. (2002).
zJ comparisonrR   r   )r@   r9   rV   rW   r7   r8   r:   rY   r   r'   column_stackrZ   tvaluespvaluesrB   res_zxr   tdf_residri   teststatrh   )
r;   r<   rj   yr>   rn   rx   tstatrr   rs   s
             r0   r   r      s    L Y22,,.,ABBAA##FBOOVK01557FNN1E>>!Dn
776??+
C;r2   	nonrobustc                 B   [        X5      (       a  [        [        R                  SS95      eU R                  R
                  nU R                  R                  nUR                  R                  nS nU" XEXbU5      nU" XFXRU5      n	[        R                  " X/SS// SQS9$ )a  
Davidson-MacKinnon encompassing test for comparing non-nested models

Parameters
----------
results_x : Result instance
    result instance of first model
results_z : Result instance
    result instance of second model
cov_type : str, default "nonrobust
    Covariance type. The default is "nonrobust` which uses the classic
    OLS covariance estimator. Specify one of "HC0", "HC1", "HC2", "HC3"
    to use White's covariance estimator. All covariance types supported
    by ``OLS.fit`` are accepted.
cov_kwargs : dict, default None
    Dictionary of covariance options passed to ``OLS.fit``. See OLS.fit
    for more details.

Returns
-------
DataFrame
    A DataFrame with two rows and four columns. The row labeled x
    contains results for the null that the model contained in
    results_x is equivalent to the encompassing model. The results in
    the row labeled z correspond to the test that the model contained
    in results_z are equivalent to the encompassing model. The columns
    are the test statistic, its p-value, and the numerator and
    denominator degrees of freedom. The test statistic has an F
    distribution. The numerator degree of freedom is the number of
    variables in the encompassing model that are not in the x or z model.
    The denominator degree of freedom is the number of observations minus
    the number of variables in the nesting model.

Notes
-----
The null is that the fit produced using x is the same as the fit
produced using both x and z. When testing whether x is encompassed,
the model estimated is

.. math::

    Y = X\beta + Z_1\gamma + \epsilon

where :math:`Z_1` are the columns of :math:`Z` that are not spanned by
:math:`X`. The null is :math:`H_0:\gamma=0`. When testing whether z is
encompassed, the roles of :math:`X` and :math:`Z` are reversed.

Implementation of  Davidson and MacKinnon (1993)'s encompassing test.
Performs two Wald tests where models x and z are compared to a model
that nests the two. The Wald tests are performed by using an OLS
regression.
zTesting encompassingrR   c                 B   X![         R                  R                  XS S9S   -  -
  n[         R                  R                  U5      u  pgn[         R                  " [         R
                  5      R                  n	UR                  SSS9[        UR                  5      -  U	-  n
[         R                  " U5      U
:  nXXS S 2U4   -  n[         R                  " X/5      nUR                  S   nUR                  S   n[        X5      R                  X4S9n[         R                  " X45      n[         R                  " U5      US S 2U* S 24'   UR                  USSS9nUR                   UR"                  nn[%        UR&                  5      [%        UR(                  5      nnUUUU4$ )	Nr$   r   T)axiskeepdimsr#   )cov_typecov_kwdsuse_fscalar)r'   r(   r)   svdfinfodoubleepsmaxr&   rb   hstackr   rZ   zeroseye	wald_test	statisticrh   intdf_numdf_denom)r8   abcov_estr   r/   usvr   tolnon_zeroaugaug_regk_akrs   r_matrixrS   statrh   r   r   s                          r0   _test_nested*compare_encompassing.<locals>._test_nested-  sT   biiooa$o7:::))--$ahhryy!%%eede+c#))n<sB66!9s?akN"))QH%iilMM!%!%%w%J88SH%VVC[SDE}}XT$}?~~t{{ft{{+S-?VVX--r2   r=   r>   )r   rh   r   r   )indexcolumns)	r@   r9   rV   rW   r7   r8   r:   pd	DataFrame)
r;   r<   r   
cov_kwargsr|   r=   r>   r   x_nestedz_nesteds
             r0   compare_encompassingr      s    l Y22,,2H,IJJAAA.& A!z:HA!z:H<<,"Cj HJ Jr2   c           
         SSK Jn  [        U S5      n [        USSS9n[        USSS9nUb  US	::  a  [	        S
5      eUS:  a  [	        S5      eU R
                  S   nU(       Ga  US	-
  n	U" X	SS9n
U(       dF  XS-   -  [        R                  " U
S	U	S	-    S-  U[        R                  " S	U	S	-   5      -
  -  5      -  nO"U[        R                  " U
S	U	S	-    S-  5      -  nSn[        R                  " U[        R                  " U5      -  5      n[        R                  " U
5      R                  5       [        R                  " U5      -  nX::  a2  U[        R                  " S	U5      [        R                  " U5      -  -
  nOUS[        R                  " S	U5      -  -
  n[        R                  " U5      n[        S	U5      n[        US5      n[        R                  " S	US	-   5      nOUb.  [        R                  " S	[        US-  SU-  5      S	-   [        S9nOiUc+  [        R                  " S	[        US-  S5      S	-   [        S9nO;[!        U["        5      (       d&  [        US5      n[        R                  " S	US	-   5      n[        USSS9nUR                  5       n	U" X	SS9n
U
S	U	S	-    S-  U[        R                  " S	U	S	-   5      -
  -  nXS-   -  [        R                  " U5      US	-
     -  nX-
  n[        R$                  " U[        R&                  5      nUS:  n[(        R*                  R-                  UU   UU   5      UU'   U(       d  [.        R0                  " UUS.US9$ U[        R                  " U
S	U	S	-    S-  5      US	-
     -  n[        R$                  " U[        R&                  5      n[(        R*                  R-                  UU   UU   5      UU'   [.        R0                  " UUUUS.US9$ )aj  
Ljung-Box test of autocorrelation in residuals.

Parameters
----------
x : array_like
    The data series. The data is demeaned before the test statistic is
    computed.
lags : {int, array_like}, default None
    If lags is an integer then this is taken to be the largest lag
    that is included, the test result is reported for all smaller lag
    length. If lags is a list or array, then all lags are included up to
    the largest lag in the list, however only the tests for the lags in
    the list are reported. If lags is None, then the default maxlag is
    min(10, nobs // 5). The default number of lags changes if period
    is set.
boxpierce : bool, default False
    If true, then additional to the results of the Ljung-Box test also the
    Box-Pierce test results are returned.
model_df : int, default 0
    Number of degrees of freedom consumed by the model. In an ARMA model,
    this value is usually p+q where p is the AR order and q is the MA
    order. This value is subtracted from the degrees-of-freedom used in
    the test so that the adjusted dof for the statistics are
    lags - model_df. If lags - model_df <= 0, then NaN is returned.
period : int, default None
    The period of a Seasonal time series.  Used to compute the max lag
    for seasonal data which uses min(2*period, nobs // 5) if set. If None,
    then the default rule is used to set the number of lags. When set, must
    be >= 2.
auto_lag : bool, default False
    Flag indicating whether to automatically determine the optimal lag
    length based on threshold of maximum correlation value.

Returns
-------
DataFrame
    Frame with columns:

    * lb_stat - The Ljung-Box test statistic.
    * lb_pvalue - The p-value based on chi-square distribution. The
      p-value is computed as 1 - chi2.cdf(lb_stat, dof) where dof is
      lag - model_df. If lag - model_df <= 0, then NaN is returned for
      the pvalue.
    * bp_stat - The Box-Pierce test statistic.
    * bp_pvalue - The p-value based for Box-Pierce test on chi-square
      distribution. The p-value is computed as 1 - chi2.cdf(bp_stat, dof)
      where dof is lag - model_df. If lag - model_df <= 0, then NaN is
      returned for the pvalue.

See Also
--------
statsmodels.regression.linear_model.OLS.fit
    Regression model fitting.
statsmodels.regression.linear_model.RegressionResults
    Results from linear regression models.
statsmodels.stats.stattools.q_stat
    Ljung-Box test statistic computed from estimated
    autocorrelations.

Notes
-----
Ljung-Box and Box-Pierce statistic differ in their scaling of the
autocorrelation function. Ljung-Box test is has better finite-sample
properties.

References
----------
.. [*] Green, W. "Econometric Analysis," 5th ed., Pearson, 2003.
.. [*] J. Carlos Escanciano, Ignacio N. Lobato
      "An automatic Portmanteau test for serial correlation".,
      Volume 151, 2009.

Examples
--------
>>> import statsmodels.api as sm
>>> data = sm.datasets.sunspots.load_pandas().data
>>> res = sm.tsa.ARMA(data["SUNACTIVITY"], (1,1)).fit(disp=-1)
>>> sm.stats.acorr_ljungbox(res.resid, lags=[10], return_df=True)
       lb_stat     lb_pvalue
10  214.106992  1.827374e-40
r   )acfr=   periodToptionalmodel_dfFr#   zperiod must be >= 2zmodel_df must be >= 0)nlagsfftrU   g333333@lags   dtype
   r   )lb_stat	lb_pvalue)r   )r   r   bp_stat	bp_pvalue)statsmodels.tsa.stattoolsr   r   r   r9   r&   r'   cumsumaranger_   r^   rb   r   argmaxminr   r4   r   	full_likenanr   chi2ra   r   r   )r=   r   	boxpiercer   r   	return_dfauto_lagr   rk   maxlagsacfq_sacfrg   	thresholdthreshold_metricsacf2	qljungboxadj_lagsrr   loc
qboxpiercepvalbps                         r0   r   r   G  s   j .1cAfh6F*u=Hfk.//!|011771:D 1.Qh'iiQvz 2a 7#'"))Avz*B#B!D EEF BIId1VaZ&8A&=>>F GGAt,-	66$<++-= )ryyD1BFF4L@AFq299Q#556F yy 1d|f%yyD1H%		yyC	1v:6:#F	yyC	2.2#>h''f%yyD1H%dF%0DXXZF qE*D6A:!#tbii6A:.F'FGEq!BIIe$4TAX$>>IH<<	266*D
Q,C

inhsm<DI||	E"&( 	( 		$q!"4"9:4!8DDJ\\)RVV,F**--
3#?F3K<<ID$.VE"$ $r2   r   r   )r   ddofr   r   c                   [        U SSS9n [        US5      nUc  0 OUn[        US5      nU R                  S   nUb  Uc  [	        US-  S	U-  5      nOUc  [	        S
US-  5      nOUn[        U SS2S4   USS9n	U	R                  S   n[        R                  [        R                  " US45      U	4   n	X* S n
[        5       nUn[        XSS2SUS-   24   5      R                  UUS9n[        UR                  5      n[        UR                  5      nUS:X  a2  Xt-
  UR                  -  n[         R"                  R%                  UU5      nO}[        R&                  " [        R(                  " US45      [        R*                  " U5      45      nUR-                  USSS9n[        UR.                  5      n[        UR0                  5      nU(       a  Xl        Xl        UUXU4$ UUX4$ )a  
Lagrange Multiplier tests for autocorrelation.

This is a generic Lagrange Multiplier test for autocorrelation. Returns
Engle's ARCH test if resid is the squared residual array. Breusch-Godfrey
is a variation on this test with additional exogenous variables.

Parameters
----------
resid : array_like
    Time series to test.
nlags : int, default None
    Highest lag to use.
store : bool, default False
    If true then the intermediate results are also returned.
period : int, default none
    The period of a Seasonal time series.  Used to compute the max lag
    for seasonal data which uses min(2*period, nobs // 5) if set. If None,
    then the default rule is used to set the number of lags. When set, must
    be >= 2.
ddof : int, default 0
    The number of degrees of freedom consumed by the model used to
    produce resid. The default value is 0.
cov_type : str, default "nonrobust"
    Covariance type. The default is "nonrobust` which uses the classic
    OLS covariance estimator. Specify one of "HC0", "HC1", "HC2", "HC3"
    to use White's covariance estimator. All covariance types supported
    by ``OLS.fit`` are accepted.
cov_kwargs : dict, default None
    Dictionary of covariance options passed to ``OLS.fit``. See OLS.fit for
    more details.

Returns
-------
lm : float
    Lagrange multiplier test statistic.
lmpval : float
    The p-value for Lagrange multiplier test.
fval : float
    The f statistic of the F test, alternative version of the same
    test based on F test for the parameter restriction.
fpval : float
    The pvalue of the F test.
res_store : ResultsStore, optional
    Intermediate results. Only returned if store=True.

See Also
--------
het_arch
    Conditional heteroskedasticity testing.
acorr_breusch_godfrey
    Breusch-Godfrey test for serial correlation.
acorr_ljung_box
    Ljung-Box test for serial correlation.

Notes
-----
The test statistic is computed as (nobs - ddof) * r2 where r2 is the
R-squared from a regression on the residual on nlags lags of the
residual.
r[   r#   ndimr   Nr   r   r   rU   r   bothtrimr   r   r~   FTr   )r   r   r   r&   r   r   r'   r+   onesrB   r   rZ   floatfvaluef_pvaluersquaredr   r   ra   r   r   r   r   r   rh   resolsusedlag)r[   r   rj   r   r   r   r   rk   r   xdallxshort	res_storer   r   fvalfpvallmlmpvalr   	test_stats                       r0   r   r     s   @ ugA.E8Z0H!)zJ:|4J;;q>DemTQYF
+	R#5D>67E;;q>DEE"''4)$e+,E56]FIGq,7Q;,/044h@J 5 LFD&//"E;kV__,r7+ 99bhh|4bffWoFG$$XU4$H	9&&'y''(!#64	1164&&r2   c                     [        U S-  XUS9$ )a  
Engle's Test for Autoregressive Conditional Heteroscedasticity (ARCH).

Parameters
----------
resid : ndarray
    residuals from an estimation, or time series
nlags : int, default None
    Highest lag to use.
store : bool, default False
    If true then the intermediate results are also returned
ddof : int, default 0
    If the residuals are from a regression, or ARMA estimation, then there
    are recommendations to correct the degrees of freedom by the number
    of parameters that have been estimated, for example ddof=p+q for an
    ARMA(p,q).

Returns
-------
lm : float
    Lagrange multiplier test statistic
lmpval : float
    p-value for Lagrange multiplier test
fval : float
    fstatistic for F test, alternative version of the same test based on
    F test for the parameter restriction
fpval : float
    pvalue for F test
res_store : ResultsStore, optional
    Intermediate results. Returned if store is True.

Notes
-----
verified against R:FinTS::ArchTest
rU   )r   rj   r   )r   )r[   r   rj   r   s       r0   r   r   K  s    J EQJetDDr2   resultsrs   c                 ,   [         R                  " U R                  5      R                  5       nUR                  S:w  a  [        S5      eU R                  R                  nUR                  S   nUc  [        SUS-  5      n[         R                  " [         R                  " U5      U45      n[        USS2S4   USS9nUR                  S   n[         R                  [         R                  " US45      U4   nX5* S nUc  UnO[         R                  " XF45      nUR                  S   n	[!        Xx5      R#                  5       n
U
R%                  [         R&                  " XX-
  5      5      nUR(                  nUR*                  n[-        [         R                  " U5      5      n[-        [         R                  " U5      5      nXZR.                  -  n[0        R2                  R5                  X5      nU(       a  [7        5       nU
Ul        UUl        XXU4$ XX4$ )	av  
Breusch-Godfrey Lagrange Multiplier tests for residual autocorrelation.

Parameters
----------
res : RegressionResults
    Estimation results for which the residuals are tested for serial
    correlation.
nlags : int, optional
    Number of lags to include in the auxiliary regression. (nlags is
    highest lag).
store : bool, default False
    If store is true, then an additional class instance that contains
    intermediate results is returned.

Returns
-------
lm : float
    Lagrange multiplier test statistic.
lmpval : float
    The p-value for Lagrange multiplier test.
fval : float
    The value of the f statistic for F test, alternative version of the
    same test based on F test for the parameter restriction.
fpval : float
    The pvalue for F test.
res_store : ResultsStore
    A class instance that holds intermediate results. Only returned if
    store=True.

Notes
-----
BG adds lags of residual to exog in the design matrix for the auxiliary
regression with residuals as endog. See [1]_, section 12.7.1.

References
----------
.. [1] Greene, W. H. Econometric Analysis. New Jersey. Prentice Hall;
  5th edition. (2002).
r#   zFModel resid must be a 1d array. Cannot be used on multivariate models.r   Nr   r   r   r   )r'   asarrayr[   squeezer   r9   r7   r:   r&   r   concatenater   r   r+   r   ru   r   rZ   f_testr   r   rh   r   r   r   r   ra   rB   r   r   )rs   r   rj   r=   exog_oldrk   r   r   r:   k_varsr   ftr   r   r   r   r   s                    r0   r   r   s  s   V 	

399%%'Avv{ 1 2 	2yy~~H771:D}B	"
+,A1QW:u62E;;q>DEE"''4)$e+,EuvYF01ZZ]F""$F	rvveV^<	=B99DIIED!"D"**U#$E		BZZ]]2%F  N	!	!	4	114&&r2   r=   	test_namereturnc                     U R                  SS9n[        R                  " X R                  SS9-
  S:H  US:g  -  5      (       a  U R                  S   S:  a  [        U S35      eg)z
Check validity of the exogenous regressors in a heteroskedasticity test

Parameters
----------
x : ndarray
    The exogenous regressor array
test_name : str
    The test name for the exception
r   r   r#   rU   zI test requires exog to have at least two columns where one is a constant.N)r   r'   anyr   r&   r9   )r=   r   x_maxs      r0   _check_het_testr     sm     EEqEMEFFUUUU]*q0UaZ@AA771:>k 3 3
 	
 r2   c                    [        USSS9n[        US5        [        U SSS9S-  nU(       d  U[        R                  " U5      -  nUR                  u  pV[        XC5      R                  5       nUR                  nUR                  n	U(       a  XWR                  -  OUR                  S-  n
U
[        R                  R                  XS-
  5      X4$ )u	  
Breusch-Pagan Lagrange Multiplier test for heteroscedasticity

The tests the hypothesis that the residual variance does not depend on
the variables in x in the form

.. :math: \sigma_i = \sigma * f(\alpha_0 + \alpha z_i)

Homoscedasticity implies that :math:`\alpha=0`.

Parameters
----------
resid : array_like
    For the Breusch-Pagan test, this should be the residual of a
    regression. If an array is given in exog, then the residuals are
    calculated by the an OLS regression or resid on exog. In this case
    resid should contain the dependent variable. Exog can be the same as x.
exog_het : array_like
    This contains variables suspected of being related to
    heteroscedasticity in resid.
robust : bool, default True
    Flag indicating whether to use the Koenker version of the
    test (default) which assumes independent and identically distributed
    error terms, or the original Breusch-Pagan version which assumes
    residuals are normally distributed.

Returns
-------
lm : float
    lagrange multiplier statistic
lm_pvalue : float
    p-value of lagrange multiplier test
fvalue : float
    f-statistic of the hypothesis that the error variance does not depend
    on x
f_pvalue : float
    p-value for the f-statistic

Notes
-----
Assumes x contains constant (for counting dof and calculation of R^2).
In the general description of LM test, Greene mentions that this test
exaggerates the significance of results in small or moderately large
samples. In this case the F-statistic is preferable.

**Verification**

Chisquare test statistic is exactly (<1e-13) the same result as bptest
in R-stats with defaults (studentize=True).

**Implementation**

This is calculated using the generic formula for LM test using $R^2$
(Greene, section 17.6) and not with the explicit formula
(Greene, section 11.4.3), unless `robust` is set to False.
The degrees of freedom for the p-value assume x is full rank.

References
----------
.. [1] Greene, W. H. Econometric Analysis. New Jersey. Prentice Hall;
   5th edition. (2002).
.. [2]  Breusch, T. S.; Pagan, A. R. (1979). "A Simple Test for
   Heteroskedasticity and Random Coefficient Variation". Econometrica.
   47 (5): 1287–1294.
.. [3] Koenker, R. (1981). "A note on studentizing a test for
   heteroskedasticity". Journal of Econometrics 17 (1): 107–112.
exog_hetrU   r   zThe Breusch-Paganr[   r#   )r   r   r'   meanr&   r   rZ   r   r   r   essr   r   ra   )r[   r   robustr=   r|   rk   nvarsr   r   r   r   s              r0   r   r     s    H 	8Za0AA*+5'*a/A
N''KDY]]_F==DOOE#)	vzzA~Buzz}}R+T88r2   c                 ^   [        USSS9n[        U SSUR                  S   S4S9n[        US5        UR                  u  pE[        R                  " U5      u  pgUS	S	2U4   US	S	2U4   -  nUR                  u  pHXUS-
  -  S
-  U-   :X  d   e[        US-  U5      R                  5       n	U	R                  n
U	R                  nXIR                  -  nU	R                  [        R                  R                  U5      S-
  :X  d   e[        R                  R                  XR                  5      nXX4$ )a  
White's Lagrange Multiplier Test for Heteroscedasticity.

Parameters
----------
resid : array_like
    The residuals. The squared residuals are used as the endogenous
    variable.
exog : array_like
    The explanatory variables for the variance. Squares and interaction
    terms are automatically included in the auxiliary regression.

Returns
-------
lm : float
    The lagrange multiplier statistic.
lm_pvalue :float
    The p-value of lagrange multiplier test.
fvalue : float
    The f-statistic of the hypothesis that the error variance does not
    depend on x. This is an alternative test variant not the original
    LM test.
f_pvalue : float
    The p-value for the f-statistic.

Notes
-----
Assumes x contains constant (for counting dof).

question: does f-statistic make sense? constant ?

References
----------
Greene section 11.4.1 5th edition p. 222. Test statistic reproduces
Greene 5th, example 11.3.
r:   rU   r   r[   r   r#   )r   r&   zWhite's heteroskedasticityNrT   )r   r&   r   r'   triu_indicesr   rZ   r   r   r   df_modelr(   r*   r   r   ra   )r[   r:   r=   r|   rk   nvars0i0i1r  r   r   r   r   r   s                 r0   r   r   .  s   J 	4a(A5'!''!*aAAA3477LD__V$FBQU8a2hD**KDfqj)B.7777a""$F==DOOE		B
 ??bii33D9A====ZZ]]2/Ft""r2   c                 r   [         R                  " U5      n[         R                  " U 5      n UR                  u  pxUc  US-  nOSU:  a  US:  a  [        Xs-  5      nUc  Un	O!SU:  a  US:  a  U[        Xt-  5      -   n	OX4-   n	Ub)  [         R                  " USS2U4   5      n
X
   n XSS24   n[        U SU USU 5      R                  5       n[        X	S XS 5      R                  5       nUR                  UR                  -  nUR                  5       S;   a7  [        R                  R                  XR                  UR                  5      nSnOUR                  5       S;   a;  [        R                  R                  SU-  UR                  UR                  5      nS	nOUR                  5       S
;   az  [        R                  R                  XR                  UR                  5      n[        R                  R                  XR                  UR                  5      nS[        UU5      -  nSnO[        S5      eU(       at  [!        5       nSUl        UUl        UUl        UR                  UR                  4Ul        UUl        UUl        UUl        UUl        SR3                  XU5      Ul        XUU4$ XU4$ )a  
Goldfeld-Quandt homoskedasticity test.

This test examines whether the residual variance is the same in 2
subsamples.

Parameters
----------
y : array_like
    endogenous variable
x : array_like
    exogenous variable, regressors
idx : int, default None
    column index of variable according to which observations are
    sorted for the split
split : {int, float}, default None
    If an integer, this is the index at which sample is split.
    If a float in 0<split<1 then split is interpreted as fraction
    of the observations in the first sample. If None, uses nobs//2.
drop : {int, float}, default None
    If this is not None, then observation are dropped from the middle
    part of the sorted series. If 0<split<1 then split is interpreted
    as fraction of the number of observations to be dropped.
    Note: Currently, observations are dropped between split and
    split+drop, where split and drop are the indices (given by rounding
    if specified as fraction). The first sample is [0:split], the
    second sample is [split+drop:]
alternative : {"increasing", "decreasing", "two-sided"}
    The default is increasing. This specifies the alternative for the
    p-value calculation.
store : bool, default False
    Flag indicating to return the regression results

Returns
-------
fval : float
    value of the F-statistic
pval : float
    p-value of the hypothesis that the variance in one subsample is
    larger than in the other subsample
ordering : str
    The ordering used in the alternative.
res_store : ResultsStore, optional
    Storage for the intermediate and final results that are calculated

Notes
-----
The Null hypothesis is that the variance in the two sub-samples are the
same. The alternative hypothesis, can be increasing, i.e. the variance
in the second sample is larger than in the first, or decreasing or
two-sided.

Results are identical R, but the drop option is defined differently.
(sorting by idx not tested yet)
NrU   r   r#   )iinc
increasingr  )ddec
decreasing      ?r  )2z2-sided	two-sidedr  zinvalid alternativez5Test Results for Goldfeld-Quandt test ofheterogeneityzThe Goldfeld-Quandt test for null hypothesis that the variance in the second
subsample is {} than in the first subsample:
F-statistic ={:8.4f} and p-value ={:8.4f})r'   r   r&   r   argsortr   rZ   	mse_residlowerr   fra   rz   cdfr   r9   rB   __doc__r   r   df_fvalresols1resols2orderingsplitrW   rE   )r|   r=   idxr  dropalternativerj   rk   r  start2xsortindr  r  r   r   r  fpval_smfpval_lars   s                      r0   r   r   h  sr   r 	

1A


1A''KD}	
e)%!)DL!|
d(T[))
::a3i(KkN!FU)QvY'++-G!G*aj)--/Gw000D88

4!1!173C3CD				 :	:

29g&6&68H8HI				 =	=77;;t%5%5w7G7GH77::d$4$4g6F6FGC(++.//n&	'')9)9:	- .4VHE-J 	
 Hc))  r2   resultc                    [        U [        5      (       d  [        S5      e[        U R                  R
                  5      (       a2  U R                  R                  R                  S   S:X  a  [        S5      e[        USSS9n[        USSS	9n[        US
5      n[        U[        5      (       a/  US:  a  [        S5      e[        R                  " SUS-   [        S9nOr [        R                  " U[        S9nUR"                  S:w  d=  [%        ['        U5      5      UR                  S   :w  d  US:  R)                  5       (       a  [        S5      eU R                  R                  nUS:X  a)  [        R*                  " U R,                  5      SS2S4   nGOIUS:X  as  U R                  R                  nXfR/                  SS9:H  XfR1                  SS9:H  -  nUR3                  SS9nUR3                  5       (       a  [        S5      eUSS2U) 4   nOSSKJn	  UnU R
                  (       ar  [        R                  " UR                  S   5      R9                  5       n
U
R;                  [        U R                  R<                  R>                  5      5        USS2U
4   nU	" US[        U R
                  5      [        U R
                  5      SS9nUR@                  SS2SS24   n[        RB                  " U/U Vs/ s H  oU-  PM	     sn-   5      nU R                  RD                  nU" U R                  R<                  RF                  U5      nUc  0 OUnURI                  XES9n UR                  S   UR                  S   -
  nUR                  S   n[        RJ                  " UUUU-
  S9nU RM                  UUSS9$ ! [          a    [        S5      ef = fs  snf )a1  
Ramsey's RESET test for neglected nonlinearity

Parameters
----------
res : RegressionResults
    A results instance from a linear regression.
power : {int, List[int]}, default 3
    The maximum power to include in the model, if an integer. Includes
    powers 2, 3, ..., power. If an list of integers, includes all powers
    in the list.
test_type : str, default "fitted"
    The type of augmentation to use:

    * "fitted" : (default) Augment regressors with powers of fitted values.
    * "exog" : Augment exog with powers of exog. Excludes binary
      regressors.
    * "princomp": Augment exog with powers of first principal component of
      exog.
use_f : bool, default False
    Flag indicating whether an F-test should be used (True) or a
    chi-square test (False).
cov_type : str, default "nonrobust
    Covariance type. The default is "nonrobust` which uses the classic
    OLS covariance estimator. Specify one of "HC0", "HC1", "HC2", "HC3"
    to use White's covariance estimator. All covariance types supported
    by ``OLS.fit`` are accepted.
cov_kwargs : dict, default None
    Dictionary of covariance options passed to ``OLS.fit``. See OLS.fit
    for more details.

Returns
-------
ContrastResults
    Test results for Ramsey's Reset test. See notes for implementation
    details.

Notes
-----
The RESET test uses an augmented regression of the form

.. math::

   Y = X\beta + Z\gamma + \epsilon

where :math:`Z` are a set of regressors that are one of:

* Powers of :math:`X\hat{\beta}` from the original regression.
* Powers of :math:`X`, excluding the constant and binary regressors.
* Powers of the first principal component of :math:`X`. If the
  model includes a constant, this column is dropped before computing
  the principal component. In either case, the principal component
  is extracted from the correlation matrix of remaining columns.

The test is a Wald test of the null :math:`H_0:\gamma=0`. If use_f
is True, then the quadratic-form test statistic is divided by the
number of restrictions and the F distribution is used to compute
the critical value.
z/result must come from a linear regression modelr#   zjexog contains only a constant column. The RESET test requires exog to have at least 1 non-constant column.	test_type)fittedr:   princomp)optionsr   Tr   r   rU   zpower must be >= 2r   z,power must be an integer or list of integersr   z.power must contains distinct integers all >= 2r(  Nr:   r   z+Model contains only constant or binary data)PCAnipals)ncompstandardizedemeanmethodr   )r   r   )'r4   r   r5   boolr7   
k_constantr:   r&   r9   r   r   r   r   r'   r   array	Exceptionr   lensetr   r   rY   r   r   allstatsmodels.multivariate.pcar+  tolistpopdata	const_idxfactorsr   rG   r8   rZ   r   r   )rs   powerr'  r   r   r   r:   r   binaryr+  retainpcapaug_exog	mod_classmodnrestrnparamsr_mats                      r0   linear_resetrI    s9   | c344IJJCII  !!ciinn&:&:1&=&B 0 1 	1 I{$BDI:|dCJeW%E%19122		!UQYc2	MHHU#.E ::?c#e*oQ?!!MNN99>>DHjj))*1d73	f	iinn888++a8H0HI#::<<JKK!fW*o4>>YYsyy|,335FJJs399>>3345ai.C#QD,@cnn-h@kk!RaR% yy$U";U!8U";;<H		##I
CIINN(((
3C!)zJ
''8'
;C^^AA.FnnQGFF67gfn5E==eD=99E  	MKLL	M4 #<s   N5 3O
5Oc                 R    [        XSUS9n[        R                  " US   SS S5      $ )a  
Harvey Collier test for linearity

The Null hypothesis is that the regression is correctly modeled as linear.

Parameters
----------
res : RegressionResults
    A results instance from a linear regression.
order_by : array_like, default None
    Integer array specifying the order of the residuals. If not provided,
    the order of the residuals is not changed. If provided, must have
    the same number of observations as the endogenous variable.
skip : int, default None
    The number of observations to use for initial OLS, if None then skip is
    set equal to the number of regressors (columns in exog).

Returns
-------
tvalue : float
    The test statistic, based on ttest_1sample.
pvalue : float
    The pvalue of the test.

See Also
--------
statsmodels.stats.diadnostic.recursive_olsresiduals
    Recursive OLS residual calculation used in the test.

Notes
-----
This test is a t-test that the mean of the recursive ols residuals is zero.
Calculating the recursive residuals might take some time for large samples.
ffffff?)skipalphaorder_by   Nr   )recursive_olsresidualsr   ttest_1samp)rs   rN  rL  rrs       r0   r!   r!   P  s0    L 
 dX	NBRU12Y**r2   c                 6   [        U [        5      (       d  [        S5      e[        US5      n[	        US5      nU R
                  nU R                  R                  nU R                  R                  nUb  U(       a  [        S5      eUb  [        U[        R                  5      (       a  [        USSSS9nO[        U[        5      (       a  U/n U R                  R                  R                  U   R!                  5       nS
n	X;   a  U	S-  n	X;   a  M  [        R&                  " UR(                  S   5      X'   UR+                  U5      n[        R,                  " X   5      nXb   nXr   nU(       a  Uc  [/        U5      S-  OUn[        U[0        5      (       a+  SUs=::  a  S::  d  O  [        S5      e[/        XES-
  -  5      nO)[3        US5      nSUs=:  a	  US-
  :  d  O  [        S5      eXtUS-    n
SSKJn   Xz-
  n[        R8                  R;                  UR<                  U-  U-  5      nU" XzSUS9n[        RB                  " URE                  5       5      nXo   nX   n[        RF                  " SSU-
  -  U-  5      RI                  [.        5      n[        RJ                  " UX-  -   5      RI                  [.        5      nUU-
  UR(                  S   :  a  [        S5      e[M        UU5      n[O        UU   UU   5      RQ                  5       nUR                  R                  R(                  S   nURR                  nU RR                  nUU-
  UU-
  -  U-  URT                  -  n[V        RX                  R[                  UUU-
  URT                  5      nUU4$ ! ["        [$        4 a    [        S	5      ef = f! [        R8                  R>                   aF    XwRA                  S5      -
  n[        R8                  R;                  UR<                  U-  U-  5      n GNf = f)a@  
Rainbow test for linearity

The null hypothesis is the fit of the model using full sample is the same
as using a central subset. The alternative is that the fits are difference.
The rainbow test has power against many different forms of nonlinearity.

Parameters
----------
res : RegressionResults
    A results instance from a linear regression.
frac : float, default 0.5
    The fraction of the data to include in the center model.
order_by : {ndarray, str, List[str]}, default None
    If an ndarray, the values in the array are used to sort the
    observations. If a string or a list of strings, these are interpreted
    as column name(s) which are then used to lexicographically sort the
    data.
use_distance : bool, default False
    Flag indicating whether data should be ordered by the Mahalanobis
    distance to the center.
center : {float, int}, default None
    If a float, the value must be in [0, 1] and the center is center *
    nobs of the ordered data.  If an integer, must be in [0, nobs) and
    is interpreted as the observation of the ordered data to use.

Returns
-------
fstat : float
    The test statistic based on the F test.
pvalue : float
    The pvalue of the test.

Notes
-----
This test assumes residuals are homoskedastic and may reject a correct
linear specification if the residuals are heteroskedastic.
z3res must be a results instance from a linear model.fracuse_distancez7order_by and use_distance cannot be simultaneouslyused.rN  r#   r   )r   r   zvorder_by must contain valid column names from the exog data used to construct res,and exog must be a pandas DataFrame.	__index___r   rU           r  z&center must be in (0, 1) when a float.centerz(center must be in [0, nobs) when an int.)cdistmahalanobis)metricVI      ?zqfrac is too small to perform test. frac * nobsmust be greater than the number of exogenousvariables in the model.).r4   r   r5   r   r   rk   r7   r8   r:   r9   r'   ndarrayr   strr;  	orig_exogcopy
IndexErrorKeyErrorr   r&   sort_valuesr   r   r   r   scipy.spatial.distancerZ  r(   invr]   LinAlgErrorr   r  ravelceilastypefloorslicer   rZ   rX   rz   r   r  ra   )rs   rT  rN  rU  rY  rk   r8   r:   colsname
center_obsrZ  r/   viri   r  lowidxuppidxmi_slres_minobs_miss_missfstatrr   s                            r0   r    r    {  s   P c344MNNdF#D\>:L88DIIOOE99>>D ! " 	"h

++!(JQeLH(C(($:Hyy~~//9>>@
 D, ,4::a=1DJ##H-Dzz$*-H~#)>Tavfe$$&'C' !IJJ6*+Ffh/Fv(q( !KLL*
0	3#Csuus{T12B TmCjj&
yWWSAH%,-44S9FXXft{*+2237FA& 3 4 	4 &&!EutE{+//1Fll  &&q)GJJE	B%ZD7N+e3fooEE77::eTG^V__=D$;a ) H !G H HH6 yy$$ 	31%Csuus{T12B	3s   1N 93N4 N14A PPc           	         Uc  S n[         R                  " U5      n[         R                  " X" USS2SS24   5      45      nUR                  u  pE[	        X5      R                  5       nUR                  [         R                  " US-
  US-  S-
  U5      5      nXFR                  -  n[        R                  R                  XS-
  5      n	XU4$ )a  
Lagrange multiplier test for linearity against functional alternative

# TODO: Remove the restriction
limitations: Assumes currently that the first column is integer.
Currently it does not check whether the transformed variables contain NaNs,
for example log of negative number.

Parameters
----------
resid : ndarray
    residuals of a regression
exog : ndarray
    exogenous variables for which linearity is tested
func : callable, default None
    If func is None, then squares are used. func needs to take an array
    of exog and return an array of transformed variables.

Returns
-------
lm : float
   Lagrange multiplier test statistic
lm_pval : float
   p-value of Lagrange multiplier tes
ftest : ContrastResult instance
   the results from the F test variant of this test

Notes
-----
Written to match Gretl's linearity test. The test runs an auxiliary
regression of the residuals on the combined original and transformed
regressors. The Null hypothesis is that the linear specification is
correct.
Nc                 0    [         R                  " U S5      $ )NrU   )r'   r>  )r=   s    r0   funclinear_lm.<locals>.func  s    88Aq>!r2   r#   rU   )r'   r   ru   r&   r   rZ   r   r   r   r   r   ra   )
r[   r:   r|  exog_auxrk   r   lsftestr   lm_pvals
             r0   r   r     s    F |	"::dDd412;&789H::LD	U		!	!	#BIIbffVaZ!a@AE		BjjmmB
+Gr2   c                 P   [        USSS9n[        U SSS9nUR                  S   S:  d3  [        R                  " [        R                  " US5      S:H  5      (       d  [        S5      e[        R                  " UR                  S   5      u  pE[        R                  " US	S	2U4   US	S	2U4   -  SS5      nS
nSnXgUR                  S5      -  -   n[        R                  R                  USS9n	[        R                  " U	R                  5       5      [        R                  " U5      :  n
US	S	2[        R                  " U
) 5      S   4   nX3-  nU[        R                  " U5      -
  n[        R                   " UR"                  U5      nU[        R                  " USS9-
  nXS	S	2S	4   -  nUR"                  R!                  U5      nUR!                  [        R                  R%                  X5      5      nUR                  S   n[&        R(                  R+                  UU5      nUUU4$ )a  
White's Two-Moment Specification Test

Parameters
----------
resid : array_like
    OLS residuals.
exog : array_like
    OLS design matrix.

Returns
-------
stat : float
    The test statistic.
pval : float
    A chi-square p-value for test statistic.
dof : int
    The degrees of freedom.

See Also
--------
het_white
    White's test for heteroskedasticity.

Notes
-----
Implements the two-moment specification test described by White's
Theorem 2 (1980, p. 823) which compares the standard OLS covariance
estimator with White's heteroscedasticity-consistent estimator. The
test statistic is shown to be chi-square distributed.

Null hypothesis is homoscedastic and correctly specified.

Assumes the OLS design matrix contains an intercept term and at least
one variable. The intercept is removed to calculate the test statistic.

Interaction terms (squares and crosses of OLS regressors) are added to
the design matrix to calculate the test statistic.

Degrees-of-freedom (full rank) = nvar + nvar * (nvar + 1) / 2

Linearly dependent columns are removed to avoid singular matrix error.

References
----------
.. [*] White, H. (1980). A heteroskedasticity-consistent covariance matrix
   estimator and a direct test for heteroscedasticity. Econometrica, 48:
   817-838.
r:   rU   r   r[   r#   r   rX  zPWhite's specification test requires at least twocolumns where one is a constant.Ng+=gvIh%<=r)moder   )r   r&   r'   r   ptpr9   r  deletevarr(   qrrb   diagonalr_   wherer   r\   r]   solver   r   ra   )r[   r:   r=   er  r  atolrtolr   r  masksqesqmndevsr  devxr   r   dofrr   s                      r0   r   r     s   d 	4a(A5'*AwwqzA~RVVBFF1aLC$788 < = 	= __QWWQZ(FB99Qq"uX!R%(!Q/D DD
#
#C
		T$A66!**,"''#,.D288TE?1%%&D %CRWWS\!H
tvvx A"''$Q''DQWD

4A55&'D **Q-C::==s#Ds?r2   
olsresultsc           
         [        U [        5      (       d  [        S5      eU R                  R                  nU R                  R
                  n[        USSSSUR                  S   4S9nUb  Xd   nXT   nUR                  u  pxUc  Un[        R                  [        R                  " Xx45      -  n	[        R                  [        R                  " U5      -  n
[        R                  [        R                  " U5      -  n[        R                  [        R                  " U5      -  nUSU n[        R                  R                  U5      UR                  S   :  a  S	n[        U5      eUSU n[        R                  R                  [        R                  " UR                   U5      U[        R"                  " U5      -  -   5      n[        R                  " UR                   U5      n[        R                  " UU5      nUXS-
  '   [        R                  " XaS-
     U5      nUXS-
  '   XQS-
     U-
  XS-
  '   S[        R                  " XaS-
     [        R                  " UXaS-
     5      5      -   XS-
  '   [%        X5       H  nUUUS-   2SS24   nUU   n[        R                  " UU5      n[        R&                  " U5      UU'   UU-
  n[        R&                  " U5      U
U'   [        R                  " UUR                   5      nS[        R                  " UU5      -   nU[        R                  " UUR                   5      U-  -
  nUUU-  U-  R)                  5       -   nUU	U'   [        R&                  " U5      UU'   GM      U
[        R*                  " U5      -  nXq-
  nUUS R-                  SS
9nU[        R*                  " U5      -  nUUS-
  S R/                  5       nUS:X  a  SnOUS:X  a  SnOUS:X  a  SnO[        S5      eU[        R*                  " U5      -  SU-  [        R0                  " SXq-
  5      -  [        R*                  " U5      -  -   [        R2                  " S/S//5      -  n XUUUUU 4$ )u  
Calculate recursive ols with residuals and Cusum test statistic

Parameters
----------
res : RegressionResults
    Results from estimation of a regression model.
skip : int, default None
    The number of observations to use for initial OLS, if None then skip is
    set equal to the number of regressors (columns in exog).
lamda : float, default 0.0
    The weight for Ridge correction to initial (X'X)^{-1}.
alpha : {0.90, 0.95, 0.99}, default 0.95
    Confidence level of test, currently only two values supported,
    used for confidence interval in cusum graph.
order_by : array_like, default None
    Integer array specifying the order of the residuals. If not provided,
    the order of the residuals is not changed. If provided, must have
    the same number of observations as the endogenous variable.

Returns
-------
rresid : ndarray
    The recursive ols residuals.
rparams : ndarray
    The recursive ols parameter estimates.
rypred : ndarray
    The recursive prediction of endogenous variable.
rresid_standardized : ndarray
    The recursive residuals standardized so that N(0,sigma2) distributed,
    where sigma2 is the error variance.
rresid_scaled : ndarray
    The recursive residuals normalize so that N(0,1) distributed.
rcusum : ndarray
    The cumulative residuals for cusum test.
rcusumci : ndarray
    The confidence interval for cusum test using a size of alpha.

Notes
-----
It produces same recursive residuals as other version. This version updates
the inverse of the X'X matrix and does not require matrix inversion during
updating. looks efficient but no timing

Confidence interval in Greene and Brown, Durbin and Evans is the same as
in Ploberger after a little bit of algebra.

References
----------
jplv to check formulas, follows Harvey
BigJudge 5.5.2b for formula for inverse(X'X) updating
Greene section 7.5.2

Brown, R. L., J. Durbin, and J. M. Evans. “Techniques for Testing the
Constancy of Regression Relationships over Time.”
Journal of the Royal Statistical Society. Series B (Methodological) 37,
no. 2 (1975): 149-192.
z!res a regression results instancerN  r   Tr#   r   )r   r   r   r&   Nz"The initial regressor matrix, x[:skip], issingular. You must use a value of
skip large enough to ensure that the first OLS estimator is well-defined.
)r   g?g333333?rK  gtV?gGz?g}?5^I?z#alpha can only be 0.9, 0.95 or 0.99rU   g      r  )r4   r   r5   r7   r8   r:   r   r&   r'   r   r   r(   r*   r9   rg  r\   r]   r   ranger   ri  r_   r  r   r   r3  )!rs   rL  lamdarM  rN  r|   r=   rk   r  rparamsrresidrypredrvarrawx0err_msgy0xtxixtybetayipredr
  xiyiresiditmpr   rresid_scalednrrsigma2rresid_standardizedrcusumr   rcusumcis!                                    r0   rP  rP  m  s   z c344;<<		A		A(Jed 7H KK''KD|ffrxx..GVVbhhtn$FVVbhhtn$Fffrxx~%G	
5DB	yyR 288A;. !!	
5DB99==b)EBFF5M,AABD
&&r
C66$DG1HVVAQhK&FF!8{V+F!8BFF1AX;tQax[0IJJG1H4qQwz]qT D!JJv&q	fJJv&q	 ffT244 C bffS#%%(2--sV|b(//11
ZZ^
% ( RWWW--M
+C 45!%%1%-F'"''&/9 +224F }	$	$>?? BGGCL 1q5299Q+D#DrwwH $ 3%#()HV%8-H r2   c                    U R                   R                  n[        U R                  SUR                  S   S4S9nUR                  u  p4US-  n[
        R                  XSS2S4   -  XUR                  5       -
  4   nUR                  S5      nX6SS2SS2S4   USS2SSS24   -  R                  S5      -  nUSS2SS2S4   USS2SSS24   -  R                  S5      n	[
        R                  " [
        R                  " [
        R                  R                  U5      U	5      5      n
[
        R                  " / SQS[        4S	[         4/S
9nX4$ )aa  
Test for model stability, breaks in parameters for ols, Hansen 1992

Parameters
----------
olsresults : RegressionResults
    Results from estimation of a regression model.

Returns
-------
teststat : float
    Hansen's test statistic.
crit : ndarray
    The critical values at alpha=0.95 for different nvars.

Notes
-----
looks good in example, maybe not very powerful for small changes in
parameters

According to Greene, distribution of test statistics depends on nvar but
not on nobs.

Test statistic is verified against R:strucchange

References
----------
Greene section 7.5.1, notation follows Greene
r[   r   r#   )r&   rU   N))rU   g)\(?)   gffffff?)   g      @)   gGz@rk   critr   )r7   r:   r   r[   r&   r'   r+   r   r   sumtracer\   r(   rg  r3  r   r   )r  r=   r[   rk   r  resid2r   scorer  r   hcrit95s               r0   breaks_hansenr    s%   < 	Az''QHE''KDaZF	qD>!F[[]$:;	<BIIaLE1a:AtQJ/44Q77A	q!Tz	U1dA:.	.33A6A
		a(!,-AXXC$c]VUO<>F 9r2   c                    [         R                  " U 5      R                  5       n [        U 5      nU S-  R	                  5       nUS:  a
  X2U-
  -  U-  nU R                  5       [         R                  " U5      -  n[         R                  " U5      R                  5       n/ SQn[        R                  R                  U5      nXWU4$ )u  
Cusum test for parameter stability based on ols residuals.

Parameters
----------
resid : ndarray
    An array of residuals from an OLS estimation.
ddof : int
    The number of parameters in the OLS estimation, used as degrees
    of freedom correction for error variance.

Returns
-------
sup_b : float
    The test statistic, maximum of absolute value of scaled cumulative OLS
    residuals.
pval : float
    Probability of observing the data under the null hypothesis of no
    structural change, based on asymptotic distribution which is a Brownian
    Bridge
crit: list
    The tabulated critical values, for alpha = 1%, 5% and 10%.

Notes
-----
Tested against R:structchange.

Not clear: Assumption 2 in Ploberger, Kramer assumes that exog x have
asymptotically zero mean, x.mean(0) = [1, 0, 0, ..., 0]
Is this really necessary? I do not see how it can affect the test statistic
under the null. It does make a difference under the alternative.
Also, the asymptotic distribution of test statistic depends on this.

From examples it looks like there is little power for standard cusum if
exog (other than constant) have mean zero.

References
----------
Ploberger, Werner, and Walter Kramer. “The Cusum Test with OLS Residuals.”
Econometrica 60, no. 2 (March 1992): 271-285.
rU   r   ))r#   gGz?)r   g(\?)r   gQ?)r'   r   ri  r5  r  r   r_   rb   r   r   	kstwobignra   )r[   r   rk   
nobssigma2r   sup_br  rr   s           r0   breaks_cusumolsresidr  +  s    T JJu##%Eu:D1*!!#Jax$;/$6
,,AFF1IMMOE-D
 ??e$Dr2   )F)r~   N)NFr   NTF)NF)NFr   )T)NNNr  F)rO  r(  Fr~   N)NN)r^  NFN)N)NrX  rK  N)r   )9r  statsmodels.compat.pandasr   collections.abcr   numpyr'   pandasr   scipyr   #statsmodels.regression.linear_modelr   r   statsmodels.stats._adnormr   r	   statsmodels.stats._lillieforsr
   r   r   r   statsmodels.tools.validationr   r   r   r   r   r   statsmodels.tsa.tsatoolsr   __all__rV   r1   r@   rB   r   r   r   r   r   r   r   r_  r`  r   r   r   r   rI  r!   r    r   r   rP  r  r  rL   r2   r0   <module>r     s  0 6 $    M C   ,LF0&> >
DN6r 9D$(TJn FJ,1X$v 7#e'tkde' $e'P 7#$E $$EN E"O' #O'd
rzz 
c 
d 
,O9d7#t 9=7<q!h 5!9>26p: "p:f(+V ?Dk\.bPf u%<@$(M &M`*Z9r2   