
    =h)                     $   S SK Jr  S SKJr  S SKJrJr  S SKJrJ	r	J
r
JrJr  S SKrS SKJrJr  S SKJr  S SKJr  S SKrS S	KJrJrJr  SS
 jr " S S\5      rSS jrS rS r S r!S r"S r#SS jr$S r%SS jr&SS jr'SS jr(SS jr)S r*S S jr+g)!    )lrange)StringIO)environmakedirs)abspathdirnameexists
expanduserjoinN)	HTTPErrorURLError)urljoin)urlopen)Indexread_csv
read_statac                 4    [        XS-   5      n[        U5      $ )a  
Download and return an example dataset from Stata.

Parameters
----------
data : str
    Name of dataset to fetch.
baseurl : str
    The base URL to the stata datasets.
as_df : bool
    Deprecated. Always returns a DataFrame

Returns
-------
dta : DataFrame
    A DataFrame containing the Stata dataset.

Examples
--------
>>> dta = webuse('auto')

Notes
-----
Make sure baseurl has trailing forward slash. Does not do any
error checking in response URLs.
z.dta)r   r   )databaseurlas_dfurls       mC:\Users\julio\OneDrive\Documentos\Trabajo\Ideas Frescas\venv\Lib\site-packages\statsmodels/datasets/utils.pywebuser      s    6 ';
'Cc?    c                        \ rS rSrS rS rSrg)Dataset.   c                     S U l         S U l        S U l        S U l        [        R                  X5        X l         U R                  R                  [        5      U l	        g !    g = fN)
endogexogr   namesdict__init____dict__astypefloatraw_data)selfkws     r   r$   Dataset.__init__/   sT    
		
d	 II,,U3DM	s   $A A"c                 ,    [        U R                  5      $ r   )str	__class__)r)   s    r   __repr__Dataset.__repr__?   s    4>>""r   )r%   r   r    r!   r"   r(   N)__name__
__module____qualname____firstlineno__r$   r/   __static_attributes__ r   r   r   r   .   s     #r   r   c           	         U R                   n[        U[        5      (       aA  XA   nX   R                  5       nUc  U R	                  U/SS9nOXU      R                  5       nOU R
                  S S 2U4   R                  5       n[        UR                   5      nUc  U R	                  USS9nO@[        U[        5      (       a  XU      R                  5       nOXU      R                  5       nUbJ  [        U R                  S S 2U4   5      nXl	        UR                  5       Ul	        U R                  XC   5      n [        UR                   5      n	[        U [        U5      UXuU	S9n
U
$ )N   )axis)r   r"   r    r!   
endog_name	exog_name)columns
isinstanceintcopydroploclistr   ilocindex	set_indexr   )r   	endog_idxexog_idx	index_idxr"   r:   r    r!   rD   r;   datasets              r   process_pandasrJ   C   sE   LLE)S!!%
 %%'99j\92Dh(--/DI&++-%--(
99Za90D#&&h(--/Dh(--/Ddii9-.ZZ\
~~e./T\\"I4tE{%)MGNr   c           
          U R                   R                  [        [        S[	        U 5      S-   5      5      5      (       a  U R                  SS9n U $ )zs
All the Rdatasets have the integer row.labels from R if there is no
real index. Strip this for a zero-based index
r8   T)r@   )rD   equalsr   r   lenreset_index)r   s    r   _maybe_reset_indexrO   c   sE    
 zzvaTQ7899T*Kr   c                 R    U SL a  S n U $ U SL a  [        S 5      n U $ [        U 5      n U $ )NFT)get_data_home)caches    r   
_get_cacherS   m   s@    ~
 L	 
$d# L e$Lr   c                     SS K n[        US5       nUR                  UR                  U 5      5        S S S 5        g ! , (       d  f       g = f)Nr   wb)zlibopenwritecompress)r   
cache_pathrV   zfs       r   	_cache_itr\   x   s2    	j$	2
t$% 
 		s	   !;
A	c                     SS K n[        U S5       nUR                  UR                  5       5      sS S S 5        $ ! , (       d  f       g = f)Nr   rb)rV   rW   
decompressread)rZ   rV   r[   s      r   _open_cachera   ~   s/    	j$	2rwwy) 
 		s	   :
Ac                    SnUb  U R                  S5      S   R                  SS5      nUR                  S5      n[        U5      S:  a  US==   S	-  ss'   OUS
==   S	-  ss'   SR                  U5      S-   n[        X5      n [	        U5      nSnU(       d'  [        U SS9R                  5       nUb  [        UW5        WU4$ !    N6= f)z
Tries to load data from cache location otherwise downloads it. If it
downloads the data and cache is not None then it will put the downloaded
data in the cache path.
Fz:///,.r8   z-v2r   z.zipT   )timeout)splitreplacerM   r   ra   r   r`   r\   )r   rR   
from_cache	file_namerZ   r   s         r   _urlopen_cachedrn      s     JIIe$R(00c:	OOC(	y>AbMU"MaLE!LHHY'&0	%+
	z*DJ
 sA&++-dJ'	s   C Cc                     XS-   U-  -   n [        XB5      u  pVUR	                  SS5      n[        U5      U4$ ! [         a$  nS[        U5      ;   a  [        SU-  5      eUeS nAff = f)Nz.%s404zDataset %s was not found.utf-8strict)rn   r   r-   
ValueErrordecoder   )base_urldatanamerR   	extensionr   r   rl   errs           r   	_get_datary      s{    
&)3
3C*36 ;;w)DD>:%%  CH88CDDI	s   8 
A&A!!A&c           	      d   Sn[        X25      u  pEUR                  SS5      n[        [        U5      5      n[        R
                  " UR                  U :H  UR                  U:H  5      nUR                  5       (       d  [        SU  SU SU S35      eUR                  U   nUS   R                  S	   $ )
NzRhttps://raw.githubusercontent.com/vincentarelbundock/Rdatasets/master/datasets.csvrq   rr   zItem z from Package z& was not found. Check the CSV file at z  to verify the Item and Package.Titler   )rn   rt   r   r   nplogical_andItemPackageanyrs   rA   rC   )	rv   packagerR   	index_urlr   _rD   idxdataset_metas	            r   _get_dataset_metar      s    1Ii/GD;;w)DXd^$E
..x/'1I
JC7799H:^G9 5(k)IK
 	
 99S>L %%a((r   c                     SU-   S-   nSU-   S-   n[        U5      n[        X0U5      u  pV[        USS9n[        U5      n[	        XU5      n[        X@US5      u  p[        XXR                  5       XUS9$ )	a  download and return R dataset

Parameters
----------
dataname : str
    The name of the dataset you want to download
package : str
    The package in which the dataset is found. The default is the core
    'datasets' package.
cache : bool or str
    If True, will download this data into the STATSMODELS_DATA folder.
    The default location is a folder called statsmodels_data in the
    user home folder. Otherwise, you can specify a path to a folder to
    use for caching the data. If False, the data will not be cached.

Returns
-------
dataset : Dataset
    A `statsmodels.data.utils.Dataset` instance. This objects has
    attributes:

    * data - A pandas DataFrame containing the data
    * title - The dataset title
    * package - The package from which the data came
    * from_cache - Whether not cached data was retrieved
    * __doc__ - The verbatim R documentation.

Notes
-----
If the R dataset has an integer index. This is reset to be zero-based.
Otherwise the index is preserved. The caching facilities are dumb. That
is, no download dates, e-tags, or otherwise identifying information
is checked to see if the data should be downloaded again or not. If the
dataset is in the cache, it's used.
zJhttps://raw.githubusercontent.com/vincentarelbundock/Rdatasets/master/csv/rd   zJhttps://raw.githubusercontent.com/vincentarelbundock/Rdatasets/master/doc/z/rst/r   )	index_colrst)r   __doc__r   titlerl   )rS   ry   r   rO   r   r   r`   )
rv   r   rR   data_base_urldocs_base_urlr   rl   r   docr   s
             r   get_rdatasetr      s    J##*++./M##*++23MuE %@DDA&Dd#Dh7E}u=FChhj'(* *r   c                     U c!  [         R                  " S[        SS5      5      n [        U 5      n [	        U 5      (       d  [        U 5        U $ )a  Return the path of the statsmodels data dir.

This folder is used by some large dataset loaders to avoid
downloading the data several times.

By default the data dir is set to a folder named 'statsmodels_data'
in the user home folder.

Alternatively, it can be set by the 'STATSMODELS_DATA' environment
variable or programatically by giving an explicit folder path. The
'~' symbol is expanded to the user home folder.

If the folder does not already exist, it is automatically created.
STATSMODELS_DATA~statsmodels_data)r   getr   r
   r	   r   	data_homes    r   rQ   rQ      sI     KK 2 $S*< =?	9%I)r   c                 F    [        U 5      n [        R                  " U 5        g)z.Delete all the content of the data home cache.N)rQ   shutilrmtreer   s    r   clear_data_homer     s    i(I
MM)r   c                 X    U c  SOU n  [        U 5        g! [         a
  n SnAgSnAff = f)zCheck if internet is availableNzhttps://github.comFT)r   r   )r   rx   s     r   check_internetr     s4    "%+
3C   s    
))c                    / nU  H}  nUR                  S5      (       a  UR                  S5      (       a  USS nO7UR                  S5      (       a  USS nOUR                  S5      (       a  USS nUR                  U5        M     Xl        U $ )z
Remove leading and trailing single quotes

Parameters
----------
df : DataFrame
    DataFrame to process

Returns
-------
df : DataFrame
    DataFrame with stripped column names

Notes
-----
In-place modification
'r8   rc   N)
startswithendswithappendr<   )dfr<   cs      r   strip_column_namesr   "  s    $ G<<!**T"2"2!BA\\$!"AZZ#2Aq  JIr   c                     [        [        U 5      5      n[        XA5      nUS:w  a  SOSn0 nUS:X  a  SS0n[        U4X&S.UD6nU(       a  UR	                  [
        5      nU$ )zStandard simple csv loaderre   pythonr   float_precisionhigh)sepengine)r   r   r   r   r&   r'   )		base_filecsv_namer   convert_floatfilepathfilenamer   r   r   s	            r   load_csvr   A  sh    wy)*HH&HXFO},f5HH#HHD{{5!Kr   )z%https://www.stata-press.com/data/r11/T)r   NN)csv)datasetsFr   )re   F),statsmodels.compat.pythonr   ior   osr   r   os.pathr   r   r	   r
   r   r   urllib.errorr   r   urllib.parser   urllib.requestr   numpyr|   pandasr   r   r   r   r#   r   rJ   rO   rS   r\   ra   rn   ry   r   r   rQ   r   r   r   r   r6   r   r   <module>r      s    ,    > >  ,   "  . .>#d #*@&*<&)$2*n0>r   