
    >h_>                         S r SSKrSSKrSSKrSSKJr  SSK	J
r
  S rSS jrSS jrSS jrSS	 jrS
 rSS jrS rS rS rSS jrS rS rS r " S S\5      rSS jrSS jrg)z
Utility functions models code
    N)_is_using_pandas)
array_likec                     [        U [        5      (       a  U $ [        U [        5      (       a  U R                  S5      $ [        U 5      $ )Nlatin1)
isinstancestrbytesdecode)ss    jC:\Users\julio\OneDrive\Documentos\Trabajo\Ideas Frescas\venv\Lib\site-packages\statsmodels/tools/tools.pyasstr2r      s9    !S	Au		xx!!1v    c                 >    0 n[        U 5       H  u  p4XBX1-   '   M     U$ )zX
Helper function to create a dictionary mapping a column number
to the name in tmp_arr.
)	enumerate)tmp_arroffsetcol_mapicol_names        r   _make_dictnamesr      s)    
 G )&
 *Nr   c                    [         R                  " U 5      n U R                  S:X  a	  U SS2S4   n Ub  [         R                  " U5      nUR                  S:X  a	  USS2S4   n[         R                  " [         R
                  " U 5      R                  U5      ) [         R
                  " U5      R                  U5      ) 5      nX   X   4$ [         R
                  " U 5      R                  U5      ) nX   $ )aa  
Returns views on the arrays Y and X where missing observations are dropped.

Y : array_like
X : array_like, optional
axis : int
    Axis along which to look for missing observations.  Default is 1, ie.,
    observations in rows.

Returns
-------
Y : ndarray
    All Y where the
X : ndarray

Notes
-----
If either Y or X is 1d, it is reshaped to be 2d.
   N)npasarrayndimarraylogical_andisnanany)YXaxiskeepidxs       r   drop_missingr$       s    ( 	

1Avv{agJ}HHQK66Q;!T'
A.."((1+//$"7!7"$((1+//$"7!79z1:%%88A;??4((zr   c                     [        S5      e)ax
  
Construct a dummy matrix from categorical variables

.. deprecated:: 0.12

   Use pandas.get_dummies instead.

Parameters
----------
data : array_like
    An array, Series or DataFrame.  This can be either a 1d vector of
    the categorical variable or a 2d array with the column specifying
    the categorical variable specified by the col argument.
col : {str, int, None}
    If data is a DataFrame col must in a column of data. If data is a
    Series, col must be either the name of the Series or None. For arrays,
    `col` can be an int that is the (zero-based) column index
    number.  `col` can only be None for a 1d array.  The default is None.
dictnames : bool, optional
    If True, a dictionary mapping the column number to the categorical
    name is returned.  Used to have information about plain arrays.
drop : bool
    Whether or not keep the categorical variable in the returned matrix.

Returns
-------
dummy_matrix : array_like
    A matrix of dummy (indicator/binary) float variables for the
    categorical data.
dictnames :  dict[int, str], optional
    Mapping between column numbers and categorical names.

Notes
-----
This returns a dummy variable for *each* distinct variable.  If a
a DaataFrame is provided, the names for the new variable is the
old variable name - underscore - category name.  So if the a variable
'vote' had answers as 'yes' or 'no' then the returned array would have to
new variables-- 'vote_yes' and 'vote_no'.  There is currently
no name checking.

Examples
--------
>>> import numpy as np
>>> import statsmodels.api as sm

Univariate examples

>>> import string
>>> string_var = [string.ascii_lowercase[0:5],
...               string.ascii_lowercase[5:10],
...               string.ascii_lowercase[10:15],
...               string.ascii_lowercase[15:20],
...               string.ascii_lowercase[20:25]]
>>> string_var *= 5
>>> string_var = np.asarray(sorted(string_var))
>>> design = sm.tools.categorical(string_var, drop=True)

Or for a numerical categorical variable

>>> instr = np.floor(np.arange(10,60, step=2)/10)
>>> design = sm.tools.categorical(instr, drop=True)

With a structured array

>>> num = np.random.randn(25,2)
>>> struct_ar = np.zeros((25,1),
...                      dtype=[('var1', 'f4'),('var2', 'f4'),
...                             ('instrument','f4'),('str_instr','a5')])
>>> struct_ar['var1'] = num[:,0][:,None]
>>> struct_ar['var2'] = num[:,1][:,None]
>>> struct_ar['instrument'] = instr[:,None]
>>> struct_ar['str_instr'] = string_var[:,None]
>>> design = sm.tools.categorical(struct_ar, col='instrument', drop=True)

Or

>>> design2 = sm.tools.categorical(struct_ar, col='str_instr', drop=True)
zcategorical has been removed)NotImplementedError)datacol	dictnamesdrops       r   categoricalr+   G   s    ` <
==r   c                    [        U S5      (       a  SSKJn  U" U SXS9$ [        R                  " U 5      nUR
                  nUS:X  a
  USS2S4   nOUR
                  S:  a  [        S5      e[        R                  " USS	9S:H  nU[        R                  " US
:g  SS	9-  nUR                  5       (       a~  US:X  a  U$ US:X  ap  US:X  a  [        S5      e[        R                  " UR                  S   5      nSR                  Xv    Vs/ s H  n[        U5      PM     sn5      n	[        SU	 S35      e[        R                  " UR                  S   5      U/nU(       a  UOUSSS2   n[        R                  " U5      $ s  snf )a  
Add a column of ones to an array.

Parameters
----------
data : array_like
    A column-ordered design matrix.
prepend : bool
    If true, the constant is in the first column.  Else the constant is
    appended (last column).
has_constant : str {'raise', 'add', 'skip'}
    Behavior if ``data`` already has a constant. The default will return
    data without adding another constant. If 'raise', will raise an
    error if any column has a constant value. Using 'add' will add a
    column of 1s if a constant column is present.

Returns
-------
array_like
    The original values with a constant (column of ones) as the first or
    last column. Returned value type depends on input type.

Notes
-----
When the input is a pandas Series or DataFrame, the added column's name
is 'const'.
Nr   )	add_trendc)trendprependhas_constantr      z)Only implemented for 2-dimensional arraysr"           skipraisezdata is constant.,z
Column(s) z are constant.)r   statsmodels.tsa.tsatoolsr-   r   r   r   
ValueErrorptpallr   arangeshapejoinr   onescolumn_stack)
r'   r0   r1   r-   xr   is_nonzero_constcolumnsr.   colss
             r   add_constantrF      s_   8 d##6S'UU 	

4A66DqyagJ	
!DEEvvaa(A-qCxa006!HW$qy !455))AGGAJ/xx1J K1JAQ1J KL :dV>!BCC		a A!DbD'A??1 !Ls   E?c                    [        U SSS9n [        USSS9nU R                  S:X  a	  U SSS24   OU n U R                  S   UR                  S   :w  a  [        SUR                  S   -  5      e[        R
                  " X/5      n[        R                  R                  U5      [        R                  R                  U5      :w  a  g	g
)a  
True if (Q, P) contrast `c` is estimable for (N, P) design `d`.

From an Q x P contrast matrix `C` and an N x P design matrix `D`, checks if
the contrast `C` is estimable by looking at the rank of ``vstack([C,D])``
and verifying it is the same as the rank of `D`.

Parameters
----------
c : array_like
    A contrast matrix with shape (Q, P). If 1 dimensional assume shape is
    (1, P).
d : array_like
    The design matrix, (N, P).

Returns
-------
bool
    True if the contrast `c` is estimable on design `d`.

Examples
--------
>>> d = np.array([[1, 1, 1, 0, 0, 0],
...               [0, 0, 0, 1, 1, 1],
...               [1, 1, 1, 1, 1, 1]]).T
>>> isestimable([1, 0, 0], d)
False
>>> isestimable([1, -1, 0], d)
True
r.   r2   )maxdimdr   r   NzContrast should have %d columnsFT)r   r   r>   r:   r   vstacklinalgmatrix_rank)r.   rI   news      r   isestimablerO      s    > 	1c!$A1c"Affk$'
qAwwqzQWWQZ:QWWQZGHH
))QF
C	yyS!RYY%:%:1%==r   c           	      |   [         R                  " U 5      n U R                  5       n [         R                  R	                  U S5      u  p#n[         R
                  " U5      nUR                  S   nUR                  S   nU[         R                  R                  U5      -  n[        [        Xv5      5       H  n	X9   U:  a  SX9   -  X9'   M  SX9'   M     [         R                  " [         R                  " U5      [         R                  " USS2[         R                  4   [         R                  " U5      5      5      n
X4$ )zm
Return the pinv of an array X as well as the singular values
used in computation.

Code adapted from numpy.
Fr   r         ?r4   N)r   r   	conjugaterL   svdcopyr>   maximumreducerangemindot	transposemultiplynewaxis)rB   rcondur   vts_origmncutoffr   ress              r   pinv_extendedre      s     	

1A	Ayy}}Q&HA"WWQZF	
A
ARZZ&&q))F3q94&=ad7ADAD	 
 &&b!2;;qBJJ/?/1||A$@ AC;r   c                 h   [         R                  " U 5      n [         R                  " U [         R                  S9n[         R                  " U R
                  5      nU) nX3   U R
                  U   S:  -  X3'   SU R
                  U   -  UR
                  U'   [         R                  UR
                  U'   U$ )z
Reciprocal of an array with entries less than or equal to 0 set to 0.

Parameters
----------
x : array_like
    The input array.

Returns
-------
ndarray
    The array with 0-filled reciprocals.
dtyper   rQ   r   r   
zeros_likefloat64r   flatnan)rB   outnansposs       r   reciprrq     s     	

1A
--
,C88AFFD%Cx166#;?+CH!&&+%CHHSMVVCHHTNJr   c                 h   [         R                  " U 5      n [         R                  " U [         R                  S9n[         R                  " U R
                  5      nU) nX3   U R
                  U   S:g  -  X3'   SU R
                  U   -  UR
                  U'   [         R                  UR
                  U'   U$ )z
Reciprocal of an array with entries less than 0 set to 0.

Parameters
----------
x : array_like
    The input array.

Returns
-------
ndarray
    The array with 0-filled reciprocals.
rg   r   rQ   ri   )rB   rn   ro   non_zeros       r   recipr0rt   /  s     	

1A
--
,C88AFFDuH!+qvvh/?1/DEHqvvh//CHHXVVCHHTNJr   c                     [         R                  R                  U S-  S5      n[         R                  " U5       Vs/ s H  o SS2U4   PM     nn[         R                  " [         R
                  " U5      5      $ s  snf )z
Erase columns of zeros: can save some time in pseudoinverse.

Parameters
----------
matrix : ndarray
    The array to clean.

Returns
-------
ndarray
    The cleaned array.
r2   r   N)r   addrV   flatnonzeror   rZ   )matrixcolsumr   vals       r   clean0r{   G  s^     VV]]619a(F!#!7
8!7A!Q$<!7C
888BLL%&& 9s   A:c                    Uc  [         R                  R                  U 5      n[         R                  R                  U SS9u  p#n[         R                  " U5      nUSSS2   n/ n[        U5       H  nUR                  USS2XW   4   5        M     [         R                  " [         R                  " U5      5      R                  [         R                  5      $ )a  
Return an array whose column span is the same as x.

Parameters
----------
x : ndarray
    The array to adjust, 2d.
r : int, optional
    The rank of x. If not provided, determined by `np.linalg.matrix_rank`.

Returns
-------
ndarray
    The array adjusted to have full rank.

Notes
-----
If the rank of x is known it can be specified as r -- no check
is made to ensure that this really is the rank of x.
NF)full_matricesr8   )r   rL   rM   rS   argsortrW   appendr   rZ   astyperk   )rB   rvrI   r^   ordervaluer   s           r   fullrankr   Z  s    * 	yII!!!$iimmAUm3GA!JJqME$B$KEE1XQq%({^$ ::bll5)*11"**==r   c                 B    [        U5      nSX1'   U R                  U5      $ )a  
Unsqueeze a collapsed array.

Parameters
----------
data : ndarray
    The data to unsqueeze.
axis : int
    The axis to unsqueeze.
oldshape : tuple[int]
    The original shape before the squeeze or reduce operation.

Returns
-------
ndarray
    The unsqueezed array.

Examples
--------
>>> from numpy import mean
>>> from numpy.random import standard_normal
>>> x = standard_normal((3,4,5))
>>> m = mean(x, axis=1)
>>> m.shape
(3, 5)
>>> m = unsqueeze(m, 1, x.shape)
>>> m.shape
(3, 1, 5)
>>>
r   )listreshape)r'   r"   oldshapenewshapes       r   	unsqueezer   {  s#    > H~HHN<<!!r   c                 h   [         R                  " [         R                  " U 5      US:g  5      n[         R                  " U S:g  [         R                  " U5      5      nX#-   n[         R                  " [         R                  " U 5      [         R                  " U5      5      n[         R                  XT'   U$ )z
Returns np.dot(left_matrix, right_matrix) with the convention that
nan * 0 = 0 and nan * x = nan if x != 0.

Parameters
----------
A, B : ndarray
r   )r   rY   r   
nan_to_numrm   )ABshould_be_nan_1should_be_nan_2should_be_nanCs         r   nan_dotr     sy     ffRXXa[163Offa1frxx{3O#5M 	r}}Qq!12AvvAHr   c                     [        U SU 5      $ )zx
Gets raw results back from wrapped results.

Can be used in plotting functions or other post-estimation type
routines.
_results)getattr)resultss    r   maybe_unwrap_resultsr     s     7J00r   c                   ,   ^  \ rS rSrSrU 4S jrSrU =r$ )Bunchi  z
Returns a dict-like object with keys accessible via attribute lookup.

Parameters
----------
*args
    Arguments passed to dict constructor, tuples (key, value).
**kwargs
    Keyword agument passed to dict constructor, key=value.
c                 2   > [         TU ]  " U0 UD6  X l        g N)super__init____dict__)selfargskwargs	__class__s      r   r   Bunch.__init__  s    $)&)r   )r   )__name__
__module____qualname____firstlineno____doc__r   __static_attributes____classcell__)r   s   @r   r   r     s    	 r   r   c                 P   U c  U $ [        U S5      nU R                  S:X  a  U(       a  X R                  4$ U S4$ U R                  S:  a  [        S5      eU(       a  U R                  OSnU(       a  [
        R                  " U 5      SS2S4   U4$ [        R                  " U 5      U4$ )a  

Parameters
----------
x : ndarray, Series, DataFrame or None
    Input to verify dimensions, and to transform as necesary
ndarray : bool
    Flag indicating whether to always return a NumPy array. Setting False
    will return an pandas DataFrame when the input is a Series or a
    DataFrame.

Returns
-------
out : ndarray, DataFrame or None
    array or DataFrame with 2 dimensiona.  One dimensional arrays are
    returned as nobs by 1. None is returned if x is None.
names : list of str or None
    list containing variables names when the input is a pandas datatype.
    Returns None if the input is an ndarray.

Notes
-----
Accepts None for simplicity
Nr2   zx mst be 1 or 2-dimensional.)	r   r   rD   r:   namer   r   pd	DataFrame)rB   ndarray	is_pandasr   s       r   
_ensure_2dr     s    2 	y D)Ivv{ii<d7N	
!788166DDzz!}QW%t++||A$$r   c                    [        U SSS9n US:X  aw  U SS2[        R                  " U S:g  SS94   n U [        R                  " U S-  R	                  S5      5      -  n U R
                  U -  n [        R                  R                  XSS	9$ US
:X  a  [        R                  R                  U SS9u  n[        R                  " [        R                  " U5      5      nUc:  US   U R                  S   -  [        R                  " [        5      R                  -  n[!        XA:  R	                  5       5      $ [        R                  R                  XS9$ )a  
Matrix rank calculation using QR or SVD

Parameters
----------
m : array_like
    A 2-d array-like object to test
tol : float, optional
    The tolerance to use when testing the matrix rank. If not provided
    an appropriate value is selected.
method : {"ip", "qr", "svd"}
    The method used. "ip" uses the inner-product of a normalized version
    of m and then computes the rank using NumPy's matrix_rank.
    "qr" uses a QR decomposition and is the default. "svd" defers to
    NumPy's matrix_rank.

Returns
-------
int
    The rank of m.

Notes
-----
When using a QR factorization, the rank is determined by the number of
elements on the leading diagonal of the R matrix that are above tol
in absolute value.
ra   r2   rJ   ipNr   r3   T)tol	hermitianqrr   )moder   )r   )r   r   r   sqrtsumTrL   rM   scipyr   absdiagr>   finfofloatepsint)ra   r   methodr   abs_diags        r   rM   rM     s   8 	1c"A~aQQ''(aQ((CC!Gyy$$Q4$@@	4\\__QS_)66"''!*%;1+
*RXXe_-@-@@CHN'')**yy$$Q$00r   )r   )Nr   )NFF)Tr5   )gV瞯<r   )F)Nr   )r   numpyr   pandasr   scipy.linalgr   statsmodels.tools.datar   statsmodels.tools.validationr   r   r   r$   r+   rF   rO   re   rq   rt   r{   r   r   r   r   dictr   r   rM    r   r   <module>r      s       3 3 NP>h7t'T000'&>B!"H.1D  (%V)1r   