
    >hB                        S r SSKJrJr  SSKJr  SSKrSSKJ	r
  SSKJr  SCS jrS rSDS	 jrS
 rS rS rSES jrSES jrSFS jrSES jrSES jrSES jrSGS jrSHS jr\S:X  Ga  \" S5        \" S5        / SQr/ SQr\ H  r\" \" \5      5        M     \ H  r\" \" \5      5        M     \" \" \5      5        \" \" \5      5        / SQr \
RB                  " S5        \
RD                  " S5        \
RF                  " S5        \RH                  " SSS 5      r%\
RL                  " \%\" \%5      5        \
RB                  " S5        \
RD                  " S!5        \
RF                  " S5        \RH                  " SSS"5      r%\
RL                  " \%\" \\" \%S\%-
  5      5      5        \RN                  " / S#Q/ S$Q/ S%Q/5      r(\(RS                  S5      r*\(RS                  S5      r+\" \*5      r,\" \+5      r-\" \(5      r.\" \*\+\(5      r/\" \+\*\(5      r0\" S\Rb                  5      \Rd                  " \*\+5      -  r3\" S\Rb                  5      \Rd                  " \+\*5      -  r4\" \*\+\(5      r5\" S&5        \" \,\-\.\/\0\3\4\55        \" S'5        \RN                  " / S(Q5      r\" \5      r6\  \" S)5        \" S*5        \" S+5        \" S,5        \" S-5        \" S.\*-  5        \" S/5        S\*S   -
  r7\" S0\7-  5        \" \7S\7-
  /5      r8\" S1\8-  5        \" S2\8\7\Rr                  " S5      -  -   \/4-  5        \" S35        \" S4\7S5 S6\,S-
  \Rr                  " S75      -  S5 35        \" S85        \" S95        \RN                  " / S:Q/ S;Q/ S<Q/5      r:\" \:5        \" S=5        \" S>\Rv                  " S\" \:S   5      \" \:S   5      /5      S-
  \Rr                  " S75      -  -  5        \" S?5        \RN                  " / S@Q/ SAQ/ SBQ/5      r<gg)Ia:  
Information Theoretic and Entropy Measures

References
----------
Golan, As. 2008. "Information and Entropy Econometrics -- A Review and
    Synthesis." Foundations And Trends in Econometrics 2(1-2), 1-145.

Golan, A., Judge, G., and Miller, D.  1996.  Maximum Entropy Econometrics.
    Wiley & Sons, Chichester.
    )lziplmap)statsN)pyplot)	logsumexpc                 6   Uc  [        U 5      $ [        R                  " U 5      n [        U R                  5      nSX!'   U R                  US9n[        R                  " [        R                  " XR                  U5      -
  5      R                  US95      nX4-   nU$ )a  
Compute the log of the sum of exponentials log(e^{a_1}+...e^{a_n}) of a

Avoids numerical overflow.

Parameters
----------
a : array_like
    The vector to exponentiate and sum
axis : int, optional
    The axis along which to apply the operation.  Defaults is None.

Returns
-------
sum(log(exp(a)))

Notes
-----
This function was taken from the mailing list
http://mail.scipy.org/pipermail/scipy-user/2009-October/022931.html

This should be superceded by the ufunc when it is finished.
   )axis)
sp_logsumexpnpasarraylistshapemaxlogexpreshapesum)ar
   shpa_maxslses         oC:\Users\julio\OneDrive\Documentos\Trabajo\Ideas Frescas\venv\Lib\site-packages\statsmodels/sandbox/infotheo.pyr   r   -   s    0 |A


1A
qww-CCIEEtEE
rvva--,,-11t1<=A9CJ    c                 
   [         R                  " U 5      n [         R                  " [         R                  " U 5      S5      (       a<  [         R                  " U S:  5      (       a  [         R                  " U S:*  5      (       d  gg)z;
Checks to see if `X` is a proper probability distribution
r	   r   FT)r   r   allcloser   allXs    r   _isproperdistr!   Q   sP     	

1A;;rvvay!$$BFF1a4LLq!tr   c                 X   [        U 5      nUc*  [        R                  " [        R                  " U5      5      nUS:X  a0  [        R                  " U[
        R                  " U 5      -  U-  5      nUS:X  a  [        R                  " U 5      [        R                  " U 5      -
  n[        R                  " XR-  5      n[
        R                  " U 5      u  pg[        R                  " U5      nSnUS   n	XUS   '   [        SU5       H$  n
Xj   X-   :  a  XXz   '   M  Xj   n	US-  nXXz   '   M&     W$ )z
Discretize `X`

Parameters
----------
bins : int, optional
    Number of bins.  Default is floor(sqrt(N))
method : str
    "ef" is equal-frequency binning
    "ew" is equal-width binning

Examples
--------
efewr	   r   )lenr   floorsqrtceilr   rankdatar   minfastsortzerosrange)r    methodnbinsnobsdiscretewidthsvecivecbinnumbaseis              r   
discretizer8   [   s     q6D}'~7755>>!#44T9:~q	BFF1I%%^^A&
88D>Aw"aqAw%$*!w!$*!  Or   c                 \    [         R                  " U5      [         R                  " U 5      -  $ )z
There is a one-to-one transformation of the entropy value from
a log base b to a log base a :

H_{b}(X)=log_{b}(a)[H_{a}(X)]

Returns
-------
log_{b}(a)
)r   r   )r   bs     r   logbasechanger;      s     66!9RVVAYr   c                 <    [        [        R                  S5      U -  $ )z
Converts from nats to bits
   r;   r   er   s    r   
natstobitsr@      s     q!A%%r   c                 <    [        S[        R                  5      U -  $ )z
Converts from bits to nats
r=   r>   r   s    r   
bitstonatsrB      s     BDD!A%%r   r=   c                 n   [         R                  " U 5      n [         R                  " U S:*  5      (       a  [         R                  " U S:  5      (       d  [        S5      e[         R                  " [         R
                  " U [         R                  " U 5      -  5      5      * nUS:w  a  [        SU5      U-  $ U$ )a  
This is Shannon's entropy

Parameters
----------
logbase, int or np.e
    The base of the log
px : 1d or 2d array_like
    Can be a discrete probability distribution, a 2d joint distribution,
    or a sequence of probabilities.

Returns
-----
For log base 2 (bits) given a discrete distribution
    H(p) = sum(px * log2(1/px) = -sum(pk*log2(px)) = E[log2(1/p(X))]

For log base 2 (bits) given a joint distribution
    H(px,py) = -sum_{k,j}*w_{kj}log2(w_{kj})

Notes
-----
shannonentropy(0) is defined as 0
r	   r   &px does not define proper distributionr=   )r   r   r   
ValueErrorr   
nan_to_numlog2r;   )pxlogbaseentropys      r   shannonentropyrK      s    2 
BB66"'??"&&q//ABBvvbmmBrwwr{N344G!|Qw''11r   c                 >   [         R                  " U 5      n [         R                  " U S:*  5      (       a  [         R                  " U S:  5      (       d  [        S5      eUS:w  a$  [	        SU5      * [         R
                  " U 5      -  $ [         R
                  " U 5      * $ )z
Shannon's information

Parameters
----------
px : float or array_like
    `px` is a discrete probability distribution

Returns
-------
For logbase = 2
np.log2(px)
r	   r   rD   r=   )r   r   r   rE   r;   rG   )rH   rI   s     r   shannoninforM      sr     
BB66"'??"&&q//ABB!|q))BGGBK77}r   c           	      z   [        U 5      (       a  [        U5      (       d  [        S5      eUb  [        U5      (       d  [        S5      eUc  [        R                  " X5      n[        R                  " U[        R
                  " [        R                  " X-  5      5      -  5      nUS:X  a  U$ [        SU5      U-  $ )a\  
Return the conditional entropy of X given Y.

Parameters
----------
px : array_like
py : array_like
pxpy : array_like, optional
    If pxpy is None, the distributions are assumed to be independent
    and conendtropy(px,py) = shannonentropy(px)
logbase : int or np.e

Returns
-------
sum_{kj}log(q_{j}/w_{kj}

where q_{j} = Y[j]
and w_kj = X[k,j]
1px or py is not a proper probability distribution&pxpy is not a proper joint distribtionr=   )r!   rE   r   outerr   rF   rG   r;   )rH   pypxpyrI   condents        r   condentropyrU      s    ( M"$5$5LMMd 3 3ABB|xxffTBMM"''"'*:;;<G!|Q(722r   c                     [        U 5      (       a  [        U5      (       d  [        S5      eUb  [        U5      (       d  [        S5      eUc  [        R                  " X5      n[	        XS9[        XUUS9-
  $ )a  
Returns the mutual information between X and Y.

Parameters
----------
px : array_like
    Discrete probability distribution of random variable X
py : array_like
    Discrete probability distribution of random variable Y
pxpy : 2d array_like
    The joint probability distribution of random variables X and Y.
    Note that if X and Y are independent then the mutual information
    is zero.
logbase : int or np.e, optional
    Default is 2 (bits)

Returns
-------
shannonentropy(px) - condentropy(px,py,pxpy)
rO   rP   rI   )r!   rE   r   rQ   rK   rU   rH   rR   rS   rI   s       r   
mutualinforY      ss    * M"$5$5LMMd 3 3ABB|xx".R42  r   c                     [        U 5      (       a  [        U5      (       d  [        S5      eUb  [        U5      (       d  [        S5      eUc  [        R                  " X5      n[	        XX#S9[        UUS9-  $ )a  
An information theoretic correlation measure.

Reflects linear and nonlinear correlation between two random variables
X and Y, characterized by the discrete probability distributions px and py
respectively.

Parameters
----------
px : array_like
    Discrete probability distribution of random variable X
py : array_like
    Discrete probability distribution of random variable Y
pxpy : 2d array_like, optional
    Joint probability distribution of X and Y.  If pxpy is None, X and Y
    are assumed to be independent.
logbase : int or np.e, optional
    Default is 2 (bits)

Returns
-------
mutualinfo(px,py,pxpy,logbase=logbase)/shannonentropy(py,logbase=logbase)

Notes
-----
This is also equivalent to

corrent(px,py,pxpy) = 1 - condent(px,py,pxpy)/shannonentropy(py)
rO   rP   rW   )r!   rE   r   rQ   rY   rK   rX   s       r   correntr[     ss    < M"$5$5LMMd 3 3ABB|xxbD1.3  r   c                     [        U 5      (       a  [        U5      (       d  [        S5      eUb  [        U5      (       d  [        S5      eUc  [        R                  " X5      n[	        XX#S9[	        XX#S9-   $ )a  
An information theoretic covariance measure.

Reflects linear and nonlinear correlation between two random variables
X and Y, characterized by the discrete probability distributions px and py
respectively.

Parameters
----------
px : array_like
    Discrete probability distribution of random variable X
py : array_like
    Discrete probability distribution of random variable Y
pxpy : 2d array_like, optional
    Joint probability distribution of X and Y.  If pxpy is None, X and Y
    are assumed to be independent.
logbase : int or np.e, optional
    Default is 2 (bits)

Returns
-------
condent(px,py,pxpy,logbase=logbase) + condent(py,px,pxpy,
        logbase=logbase)

Notes
-----
This is also equivalent to

covent(px,py,pxpy) = condent(px,py,pxpy) + condent(py,px,pxpy)
rO   rP   rW   )r!   rE   r   rQ   rT   rX   s       r   coventr]   =  ss    > M"$5$5LMMd 3 3ABB|xx BD2bd45 6r   r	   c                    [        U 5      (       d  [        S5      e[        U5      nUS:X  a"  [        U 5      nUS:w  a  [	        SU5      U-  $ U$ S[        U5      R                  5       ;   d  U[        R                  :X  a+  [        R                  " [        R                  " U 5      5      * $ X-  n [        R                  " U R                  5       5      nUS:X  a  SSU-
  -  U-  $ SSU-
  -  [	        SU5      -  U-  $ )a  
Renyi's generalized entropy

Parameters
----------
px : array_like
    Discrete probability distribution of random variable X.  Note that
    px is assumed to be a proper probability distribution.
logbase : int or np.e, optional
    Default is 2 (bits)
alpha : float or inf
    The order of the entropy.  The default is 1, which in the limit
    is just Shannon's entropy.  2 is Renyi (Collision) entropy.  If
    the string "inf" or numpy.inf is specified the min-entropy is returned.
measure : str, optional
    The type of entropy measure desired.  'R' returns Renyi entropy
    measure.  'T' returns the Tsallis entropy measure.

Returns
-------
1/(1-alpha)*log(sum(px**alpha))

In the limit as alpha -> 1, Shannon's entropy is returned.

In the limit as alpha -> inf, min-entropy is returned.
z+px is not a proper probability distributionr	   r=   inf)r!   rE   floatrK   r;   strlowerr   r_   r   r   r   )rH   alpharI   measuregenents        r   renyientropyrf   k  s    : FGG%LEz#a< G,v55	#e*""$	$rvvbz""" 
BVVBFFHF!|!E'{V##!E'{]1g66??r   c                     g)aO  
Generalized cross-entropy measures.

Parameters
----------
px : array_like
    Discrete probability distribution of random variable X
py : array_like
    Discrete probability distribution of random variable Y
pxpy : 2d array_like, optional
    Joint probability distribution of X and Y.  If pxpy is None, X and Y
    are assumed to be independent.
logbase : int or np.e, optional
    Default is 2 (bits)
measure : str, optional
    The measure is the type of generalized cross-entropy desired. 'T' is
    the cross-entropy version of the Tsallis measure.  'CR' is Cressie-Read
    measure.
N )rH   rR   rS   rc   rI   rd   s         r   gencrossentropyri     s    r   __main__zQFrom Golan (2008) "Information and Entropy Econometrics -- A Review and Synthesisz	Table 3.1)皙?rk   rk   rk   rk   )S㥛?g;On?g'1Z?gK?gMbp?)gh㈵>g-C6?gMbP?g{Gz?g?g333333?rk   g      ?g333333?gffffff?g?g?      ?o   InformationProbabilityi Entropye   )r   r   UUUUUU?)qq?rt   rt   )gqq?rt   UUUUUU?z	Table 3.3zdiscretize functions)2g3333335@g     @F@g      ?@g     3@gLD@gYC@g333333&@g/@gfffff?@g9@g3333334@gffffff,@g      8@g      5@g&@g      2@L0@g3333336@g333333@g;@rv   ǧA@-@g1@g333333<@gffffff0@g     0@g      G@g      #@g2@g@@g:@g0@g333333@gffffff5@g      4@gL=@rw   g @g     6@g)@gfffff:@g     9@gfffff6@gffffff&@g333334@g333333:@g"@g%@g333333/@z0Example in section 3.6 of Golan, using table 3.3z'Bounding errors using Fano's inequalityz"H(P_{e}) + P_{e}log(K-1) >= H(X|Y)zor, a weaker inequalityzP_{e} >= [H(X|Y) - 1]/log(K)z	P(x) = %sz?X = 3 has the highest probability, so this is the estimate Xhatz1The probability of error Pe is 1 - p(X=3) = %0.4gzH(Pe) = %0.4g and K=3z-H(Pe) + Pe*log(K-1) = %0.4g >= H(X|Y) = %0.4gzor using the weaker inequalityzPe = z0.4gz >= [H(X) - 1]/log(K) =    z>Consider now, table 3.5, where there is additional informationz.The conditional probabilities of P(X|Y=y) are )        ry   g      ?)rs   rs   rs   )ru   rs   rm   z2The probability of error given this information iszPe = [H(X|Y) -1]/log(K) = %0.4gz+such that more information lowers the error)gV-?gV-?gw/?)g(\?g+?g%C?)gzG?rl   gPn?)N)r#   N)r=   )Nr=   )r	   r=   R)r	   r=   T)=__doc__statsmodels.compat.pythonr   r   scipyr   numpyr   
matplotlibr   pltscipy.specialr   r   r!   r8   r;   r@   rB   rK   rM   rU   rY   r[   r]   rf   ri   __name__printr    Yr7   psubplotylabelxlabellinspacexplotarraywr   rH   rR   H_XH_YH_XY	H_XgivenY	H_YgivenXr?   rJ   D_YXD_XYI_XYdiscXpeH_perG   w2meanmarkovchainrh   r   r   <module>r      s  
D 1   $ 3!H#P&& F,3@<&P(6\.@f, z	  	+ 	A"Ak!n k!n 	.
	.
;AKKJJ}JJ}
AaAHHQA KKJJyJJ}
AaAHHQ^T!AaC[12
 	*-.?@AA	
qB	
qB

C

C!DBr!$IBr!$I244 r2!66D244 r2!66DbAD	+	#c4ItT4@	
 !hh : ;A qME 
	
<=	
34	
./	
#$	
()	+
	
KL	
RUB	
=
BC2ad)$D	
!D
()	
9"RWWQZ-+, -	
*+	E"T2C!GRWWQZ3G2M
NO	
JK	
:;	:./?@	AB	"I	
>?	
+>"Q%;PQ_`bcd`eQf8g0hij0jlnlslstulv/v
wx	
78 ((,-=>NOPKi r   