
    >hjU                     \    S r SSKJr  SSKrSSKJr  SSKJr   " S S5      r	 " S S	5      r
g)
aV  
Author: Austin Adams

This class implements Oaxaca-Blinder Decomposition. It returns
a OaxacaResults Class:

OaxacaBlinder:
Two-Fold (two_fold)
Three-Fold (three_fold)

OaxacaResults:
Table Summary (summary)

Oaxaca-Blinder is a statistical method that is used to explain
the differences between two mean values. The idea is to show
from two mean values what can be explained by the data and
what cannot by using OLS regression frameworks.

"The original use by Oaxaca's was to explain the wage
differential between two different groups of workers,
but the method has since been applied to numerous other
topics." (Wikipedia)

The model is designed to accept two endogenous response variables
and two exogenous explanitory variables. They are then fit using
the specific type of decomposition that you want.

The method was famously used in Card and Krueger's paper
"School Quality and Black-White Relative Earnings: A Direct Assessment" (1992)

General reference for Oaxaca-Blinder:

B. Jann "The Blinder-Oaxaca decomposition for linear
regression models," The Stata Journal, 2008.

Econometrics references for regression models:

E. M. Kitagawa  "Components of a Difference Between Two Rates"
Journal of the American Statistical Association, 1955.

A. S. Blinder "Wage Discrimination: Reduced Form and Structural
Estimates," The Journal of Human Resources, 1973.
    )dedentN)OLS)add_constantc                   R    \ rS rSrSr    S	S jrS
S jrSS jr     SS jrSr	g)OaxacaBlinder7   a  
Class to perform Oaxaca-Blinder Decomposition.

Parameters
----------
endog : array_like
    The endogenous variable or the dependent variable that you are trying
    to explain.
exog : array_like
    The exogenous variable(s) or the independent variable(s) that you are
    using to explain the endogenous variable.
bifurcate : {int, str}
    The column of the exogenous variable(s) on which to split. This would
    generally be the group that you wish to explain the two means for.
    Int of the column for a NumPy array or int/string for the name of
    the column in Pandas.
hasconst : bool, optional
    Indicates whether the two exogenous variables include a user-supplied
    constant. If True, a constant is assumed. If False, a constant is added
    at the start. If nothing is supplied, then True is assumed.
swap : bool, optional
    Imitates the STATA Oaxaca command by allowing users to choose to swap
    groups. Unlike STATA, this is assumed to be True instead of False
cov_type : str, optional
    See regression.linear_model.RegressionResults for a description of the
    available covariance estimators
cov_kwds : dict, optional
    See linear_model.RegressionResults.get_robustcov_results for a
    description required keywords for alternative covariance estimators

Notes
-----
Please check if your data includes at constant. This will still run, but
will return incorrect values if set incorrectly.

You can access the models by using their code as an attribute, e.g.,
_t_model for the total model, _f_model for the first model, _s_model for
the second model.

Examples
--------
>>> import numpy as np
>>> import statsmodels.api as sm
>>> data = sm.datasets.ccards.load()

'3' is the column of which we want to explain or which indicates
the two groups. In this case, it is if you rent.

>>> model = sm.OaxacaBlinder(df.endog, df.exog, 3, hasconst = False)
>>> model.two_fold().summary()
Oaxaca-Blinder Two-fold Effects
Unexplained Effect: 27.94091
Explained Effect: 130.80954
Gap: 158.75044

>>> model.three_fold().summary()
Oaxaca-Blinder Three-fold Effects
Endowments Effect: 321.74824
Coefficient Effect: 75.45371
Interaction Effect: -238.45151
Gap: 158.75044
Nc                 H   [        [        U5      5      R                  S5      S:w  aF  UR                  R	                  U5      n[
        R                  " U5      [
        R                  " U5      p!S U l        X0l        X`l	        Xpl
        [
        R                  " X#SS9U l        X l        X@l        US S 2U4   n[
        R                  " X45      n[
        R                   " U5      n	Xl        U[
        R$                  " US S 2U4   U	S   :H  5         n
U[
        R$                  " US S 2U4   U	S   :H  5         nU[
        R$                  " US S 2S4   U	S   :H  5         nU[
        R$                  " US S 2S4   U	S   :H  5         n[
        R                  " XSS9n
[
        R                  " XSS9nUS S 2S4   nUS S 2S4   nUS S 2S4   U l        [)        U5      [)        U5      sU l        U l        UR/                  5       UR/                  5       -
  U l        U(       aK  U R0                  S:  a;  XpXpUR/                  5       UR/                  5       -
  U l        U	S   U	S   sU	S'   U	S'   Xl        USL aF  [5        U
SS9n
[5        USS9n[5        U R                  SS9U l        [5        U R                  SS9U l        [
        R.                  " U
SS9U l        [
        R.                  " USS9U l        [;        X5      R=                  XgS9U l        [;        X5      R=                  XgS9U l         g )	Npandas   axisr   Fprependcov_typecov_kwds)!strtypefindcolumnsget_locnparraytwo_fold_type	bifurcater   r   deleteneumarkexoghasconstcolumn_stackuniquebi_colwhereendoglenlen_flen_smeangapbir   exog_f_meanexog_s_meanr   fit_f_model_s_model)selfr%   r   r   r    swapr   r   r#   r+   exog_fexog_sendog_fendog_ss                 kC:\Users\julio\OneDrive\Documentos\Trabajo\Ideas Frescas\venv\Lib\site-packages\statsmodels/stats/oaxaca.py__init__OaxacaBlinder.__init__w   s    tDz?)R/,,Y7I((5/288D>4!"  yyq9	 al#0YYv bhhtAyL1RU:;<bhhtAyL1RU:;<q!t1!567q!t1!567615615!Q$-!Q$-1a4[
!$Ws7|
DJ<<>GLLN2DHHqL&W#F||~6DHa5"Q%LBqE2a5u!&%8F!&%8F$TYY>DI'eDDL77627762G,00 1 
 G,00 1 
    c                    U R                   b  U R                   nU R                  b  U R                  nU R                  b  U R                  SU R                  -
  /nU R                  nU R                  n/ n/ n/ n	/ n
/ n[        SU5       GH3  n[        R                  " U R                  U R                  45      nU R                  n[        U5      n[        R                  R                  SXS9nUU   nUU   n[        R                  " XSS9nU[        R                  " USS2U4   US   :H  5         nU[        R                  " USS2U4   US   :H  5         nU[        R                  " USS2S4   US   :H  5         nU[        R                  " USS2S4   US   :H  5         n[        R                  " UUSS9n[        R                  " UUSS9nUSS2S4   nUSS2S4   nUSS2S4   nU R                   nU R"                  SL a(  [%        USS9n[%        USS9n[%        USS9n[%        USS9n['        UU5      R)                  U R*                  U R,                  S9n['        UU5      R)                  U R*                  U R,                  S9n[        R.                  " USS9n[        R.                  " USS9nUS	:X  a  UU-
  UR0                  -  nUUR0                  UR0                  -
  -  nUU-
  UR0                  UR0                  -
  -  nUR3                  U5        UR3                  U5        U	R3                  U5        GM  US
:X  d  GM  [        U5      n[        U5      nUS:X  a,  UUU-   -  UR0                  -  UUU-   -  UR0                  -  -   n OUS:X  a  SUR0                  UR0                  -   -  n OUS:X  a&  WS   UR0                  -  US   UR0                  -  -   n OUS:X  a;  ['        UU5      R)                  U R*                  U R,                  S9n!U!R0                  n ON['        X5      R)                  U R*                  U R,                  S9n![        R                  " U!R0                  U5      n UUR0                  U -
  -  UU UR0                  -
  -  -   n"UU-
  U -  n#UR3                  U"5        U
R3                  U#5        GM6     [5        X#-  5      [5        USU-
  -  5      n%n$US	:X  a  [        R6                  " [        R8                  " U5      U%U$ 5      [        R6                  " [        R8                  " U5      U%U$ 5      [        R6                  " [        R8                  " U	5      U%U$ 5      /$ US
:X  aZ  [        R6                  " [        R8                  " U5      U%U$ 5      [        R6                  " [        R8                  " U
5      U%U$ 5      /$ g)zc
A helper function to calculate the variance/std. Used to keep
the decomposition functions cleaner
Nr   r   )highsizer   Fr   r         cottonreimers      ?self_submittednuemark)submitted_nsubmitted_confsubmitted_weightr+   r   ranger   r!   r#   r%   r   r&   randomrandintr   r$   r   r    r   r   r.   r   r   r)   paramsappendintstdsort)&r1   decomp_typenconfrG   r+   r   endow_eff_listcoef_eff_listint_eff_listexp_eff_listunexp_eff_list_r%   r   amountsamplesr   r3   r4   r5   r6   r   r/   r0   r,   r-   	endow_effcoef_effint_effr'   r(   t_params_t_modelunexplained	explainedr<   lows&                                         r7   varianceOaxacaBlinder.variance   sm   
 '  A*&&D  ,%%D)))  WWNN	q!AOOT[[$**$=>E99DZFii'''DG'NE=Diia8G"((49#5A#>?@F"((49#5A#>?@FBHHU1a4[BqE%9:;GBHHU1a4[BqE%9:;GYYvyq9FYYvyq9FadmGadmG!Q$KE ..M}}%%fe<%fe<#D%8&w>7F+// 0 H 7F+// 0 H ''&q1K''&q1Ka(;6(//I	&(//HOO*KL&4OOhoo5 %%i0$$X.##G,!FF H, % 7(// I/(//A H #i/"hoo&GHH"&66(+hoo=*1-?@ 
 #i/"5'266!%  7  H  (H  #5/33!%  4  H  "yy)DH*hoo.HI8hoo#=> );6(B	%%k2##I.k n M3qAH~#6c!rww~.s489rww}-c$78rww|,S67 
 Arww~.s489rww|,S67  r:   c                 F   X l         X0l        SU l        SnU R                  U R                  -
  U R
                  R                  -  U l        U R                  U R                  R                  U R
                  R                  -
  -  U l	        U R                  U R                  -
  U R                  R                  U R
                  R                  -
  -  U l
        USL a  U R                  S5      n[        U R                  U R                  U R                  U R                  4SUS9$ )an  
Calculates the three-fold Oaxaca Blinder Decompositions

Parameters
----------
std: boolean, optional
    If true, bootstrapped standard errors will be calculated.
n: int, optional
    A amount of iterations to calculate the bootstrapped
    standard errors. This defaults to 5000.
conf: float, optional
    This is the confidence required for the standard error
    calculation. Defaults to .99, but could be anything less
    than or equal to one. One is heavy discouraged, due to the
    extreme outliers inflating the variance.

Returns
-------
OaxacaResults
    A results container for the three-fold decomposition.
NTr>   std_val)rE   rF   rG   r,   r-   r0   rK   r[   r/   r\   r]   rc   OaxacaResultsr*   )r1   rN   rQ   rR   rg   s        r7   
three_foldOaxacaBlinder.three_fold1  s    , " $t///MM  ! ((MM  4==#7#77
 ((4+;+;;MM  4==#7#77
 $;mmA&G^^T]]DLL$((C
 	
r:   c                    X@l         XPl        SnX l        X0l        US:X  a  U R                  U R                  U R
                  -   -  U R                  R                  -  U R
                  U R                  U R
                  -   -  U R                  R                  -  -   U l	        GOUS:X  a7  SU R                  R                  U R                  R                  -   -  U l	        GODUS:X  aT  Uc  [        S5      eUSU-
  /nUS   U R                  R                  -  US   U R                  R                  -  -   U l	        OUS	:X  ac  [        U R                  U R                  5      R                  U R                  U R                   S
9U l        U R"                  R                  U l	        O[        U R                  U R$                  5      R                  U R                  U R                   S
9U l        [&        R(                  " U R"                  R                  U R*                  5      U l	        U R,                  U R                  R                  U R                  -
  -  U R.                  U R                  U R                  R                  -
  -  -   U l        U R,                  U R.                  -
  U R                  -  U l        USL a  U R5                  S5      n[7        U R0                  U R2                  U R8                  4SUS9$ )as  
Calculates the two-fold or pooled Oaxaca Blinder Decompositions

Methods
-------
std: boolean, optional
    If true, bootstrapped standard errors will be calculated.

two_fold_type: string, optional
    This method allows for the specific calculation of the
    non-discriminatory model. There are four different types
    available at this time. pooled, cotton, reimers, self_submitted.
    Pooled is assumed and if a non-viable parameter is given,
    pooled will be ran.

    pooled - This type assumes that the pooled model's parameters
    (a normal regression) is the non-discriminatory model.
    This includes the indicator variable. This is generally
    the best idea. If you have economic justification for
    using others, then use others.

    nuemark - This is similar to the pooled type, but the regression
    is not done including the indicator variable.

    cotton - This type uses the adjusted in Cotton (1988), which
    accounts for the undervaluation of one group causing the
    overevalution of another. It uses the sample size weights for
    a linear combination of the two model parameters

    reimers - This type uses a linear combination of the two
    models with both parameters being 50% of the
    non-discriminatory model.

    self_submitted - This allows the user to submit their
    own weights. Please be sure to put the weight of the larger mean
    group only. This should be submitted in the
    submitted_weights variable.

submitted_weight: int/float, required only for self_submitted,
    This is the submitted weight for the larger mean. If the
    weight for the larger mean is p, then the weight for the
    other mean is 1-p. Only submit the first value.

n: int, optional
    A amount of iterations to calculate the bootstrapped
    standard errors. This defaults to 5000.
conf: float, optional
    This is the confidence required for the standard error
    calculation. Defaults to .99, but could be anything less
    than or equal to one. One is heavy discouraged, due to the
    extreme outliers inflating the variance.

Returns
-------
OaxacaResults
    A results container for the two-fold decomposition.
Nr@   rA   rB   rC   zPlease submit weightsr   r   rD   r   Tr?   rf   )rE   rF   r   rG   r'   r(   r/   rK   r0   r^   
ValueErrorr   r%   r   r.   r   r   r_   r   r   r   r   r,   r-   r`   ra   rc   rh   r*   )r1   rN   r   rG   rQ   rR   rg   s          r7   two_foldOaxacaBlinder.two_fold^  sp   B "* 0H$

djj4::569M9MMtzzDJJ67$--:N:NNPDM i'4==#7#7$--:N:N#NODM..' !899 0!6F2FG #dmm&:&::"1%(<(<<= M
 i'

DLL9== > DM !MM00DM  

DII6:: ; DM IIdmm&:&:DNNKDM  4 4t}} DE1E1E!EFH **T-=-==N$;mmA&Gt~~txx8!W
 	
r:   )r/   r0   r_   r+   r#   r   r\   r   r   r%   r[   r   r,   r-   ra   r*   r    r]   r'   r(   r   rF   rE   rG   r^   r   r`   )TT	nonrobustN)i  gGz?)FNN)FpooledNNN)
__name__
__module____qualname____firstlineno____doc__r8   rc   ri   rm   __static_attributes__ r:   r7   r   r   7   s@    =H ?
Bwr+
^ 
n
r:   r   c                   (    \ rS rSrSrSS jrS rSrg)rh   i  a  
This class summarizes the fit of the OaxacaBlinder model.

Use .summary() to get a table of the fitted values or
use .params to receive a list of the values
use .std to receive a list of the standard errors

If a two-fold model was fitted, this will return
unexplained effect, explained effect, and the
mean gap. The list will always be of the following order
and type. If standard error was asked for, then standard error
calculations will also be included for each variable after each
calculated effect.

unexplained : float
    This is the effect that cannot be explained by the data at hand.
    This does not mean it cannot be explained with more.
explained : float
    This is the effect that can be explained using the data.
gap : float
    This is the gap in the mean differences of the two groups.

If a three-fold model was fitted, this will
return characteristic effect, coefficient effect
interaction effect, and the mean gap. The list will
be of the following order and type. If standard error was asked
for, then standard error calculations will also be included for
each variable after each calculated effect.

endowment effect : float
    This is the effect due to the group differences in
    predictors
coefficient effect : float
    This is the effect due to differences of the coefficients
    of the two groups
interaction effect : float
    This is the effect due to differences in both effects
    existing at the same time between the two groups.
gap : float
    This is the gap in the mean differences of the two groups.

Attributes
----------
params
    A list of all values for the fitted models.
std
    A list of standard error calculations.
Nc                 (    Xl         X0l        X l        g N)rK   rN   
model_type)r1   resultsr{   rg   s       r7   r8   OaxacaResults.__init__  s    $r:   c                    U R                   S:X  a  U R                  cH  [        [        SU R                  S   S SU R                  S   S SU R                  S   S 35      5        Oh[        [        S	R                  U R                  S   U R                  S   U R                  S   U R                  S   U R                  S   5      5      5        U R                   S
:X  a  U R                  cY  [        [        SU R                  S   S SU R                  S   S SU R                  S   S SU R                  S
   S 35      5        g[        [        SU R                  S   S SU R                  S   S SU R                  S   S SU R                  S   S SU R                  S   S SU R                  S   S SU R                  S
   S 35      5        gg)z7
Print a summary table with the Oaxaca-Blinder effects
r?   NzT                Oaxaca-Blinder Two-fold Effects
                Unexplained Effect: r   z.5fz#
                Explained Effect: r   z
                Gap: a                  Oaxaca-Blinder Two-fold Effects
                Unexplained Effect: {:.5f}
                Unexplained Standard Error: {:.5f}
                Explained Effect: {:.5f}
                Explained Standard Error: {:.5f}
                Gap: {:.5f}r>   zT                Oaxaca-Blinder Three-fold Effects
                Endowment Effect: z%
                Coefficient Effect: z%
                Interaction Effect: z+
                Endowment Standard Error: z-
                Coefficient Standard Error: z-
                Interaction Standard Error: )r{   rN   printr   rK   format)r1   s    r7   summaryOaxacaResults.summary  s    ??axx%%)[[^C$8 9##';;q>#"6 7kk!nS)	-   &v KKN HHQK KKN HHQK KKN " ??axx##';;q>#"6 7%%)[[^C$8 9%%)[[^C$8 9kk!nS)-	 ##';;q>#"6 7++/88A;s*; <%%)[[^C$8 9--1XXa[,= >%%)[[^C$8 9--1XXa[,= >kk!nS)-
  r:   )r{   rK   rN   rz   )rq   rr   rs   rt   ru   r8   r   rv   rw   r:   r7   rh   rh     s    /b%
:r:   rh   )ru   textwrapr   numpyr   #statsmodels.regression.linear_modelr   statsmodels.tools.toolsr   r   rh   rw   r:   r7   <module>r      s2   *V   3 0U
 U
pq qr:   