
    >hk              
           S r SSKJrJr  SSKJr  SSKrSSKJrJ	r	J
r
Jr  SSKJr  SSKJr  S/rS	 rSS jrS rS rS rS rSS jrS rS rS rS rS rS rS rSSS
SS SSSS
S4
S jrg)zCreate a mosaic plot from a contingency table.

It allows to visualize multivariate categorical data in a rigorous
and informative way.

see the docstring of the mosaic function for more informations.
    )lrangelzip)productN)arraycumsumiterabler_)	DataFrame)utilsmosaicc                 X   [        U 5      (       dX  U S:X  a  [        SS/5      n ODU S:  a  [        SS/5      n O0U S:  a  [        SR                  U 5      5      e[        U SU -
  /5      n [        R
                  " U [        S9n [        R                  " U S:  5      (       a  [        SR                  U 5      5      e[        R                  " U S5      (       a  [        SR                  U 5      5      e[        U 5      S:  a  [        SS/5      $ [        S[        U 5      4   nXS	   S-  -  nU$ )
z
return a list of proportions of the available space given the division
if only a number is given, it will assume a split in two pieces
r                 ?   z.proportions should be positive,given value: {})dtypezBat least one proportion should be greater than zerogiven value: {}   )r   r   
ValueErrorformatnpasarrayfloatanyallcloselenr	   r   )
proportionlefts     rC:\Users\julio\OneDrive\Documentos\Trabajo\Ideas Frescas\venv\Lib\site-packages\statsmodels/graphics/mosaicplot.py_normalize_splitr      s(   
 J?Sz*J1_Sz*J!^ 006z0BD D 
C*,<=>JJe4J	vvj1n ,,2F:,>@ 	@	{{:q!!$fZ0
 	

 :c3Z  a
##$DHsNDK    Tc                    [        U 5      [        U5      [        U5      [        U5      4u  ppxUS:  d  US:  a  [        SR                  Xx5      5      e[        U5      n	U	SS n
U	SS U
-
  nX[        R
                  " [        U	5      S-
  5      -  -  n
U
S   US   -   U
S   -
  nX-  n
X-  nU(       a  U OUX(       a  UOU-  -   n
X(       a  UOU-  n[        X5       VVs/ s H  u  pU(       a  XX4OXX~4PM     nnnU$ s  snnf )aQ  
Split the given rectangle in n segments whose proportion is specified
along the given axis if a gap is inserted, they will be separated by a
certain amount of space, retaining the relative proportion between them
a gap of 1 correspond to a plot that is half void and the remaining half
space is proportionally divided among the pieces.
r   z/dimension of the square less thanzero w={} h={}Nr   r   )r   r   r   r   r   aranger   zip)xywidthheightr   
horizontalgapwhproportionsstarting	amplitude	extensionsaresultss                   r   _split_rectr3   7   s0    q58U5\5=@JA!	A1q5 ++16!<9 	9":.K 3BHAB(*I biiK 01 4555H y},x{:I HI  Q(:a1*MMH*Q!4I  464DA  *a|a|;4  6N6s    D c                 h   ^^ [        T5      m[        UU4S jU R                  5        5       5      nU$ )zn
Make partial sum on a counter dict.
Given a match for the beginning of the category, it will sum each value.
c              3   B   >#    U  H  u  pUS T T:X  d  M  Uv   M     g 7fN ).0kvLpartial_keys      r   	<genexpr>_reduce_dict.<locals>.<genexpr>d   s$     I0daAbqE[4H0s   	)r   sumitems)
count_dictr<   countr;   s    ` @r   _reduce_dictrC   ^   s-    
 	KAIj..0IIELr    c           
          0 n[        U5      nU R                  5        HB  u  nu  ppX8SU :X  a+  [        XXX$U5      n[        X5       H  u  pXX4-   '   M     M<  XX4Xh'   MD     U$ )z
Given a dictionary where each entry  is a rectangle, a list of key and
value (count of elements in each category) it split each rect accordingly,
as long as the key start with the tuple key_subset.  The other keys are
returned without modification.
N)r   r@   r3   r#   )	rect_dictkeysvalues
key_subsetr(   r)   resultr;   namer$   r%   r*   r+   	divisionskeyrects                   r   _key_splittingrN   h   sy     FJA'oo/lqQbq!#A!CHI 1	(,tf}% 2 !<FL 0 Mr    c                     [         R                  " U 5      (       a)  [        U [        5      (       d  [	        S U  5       5      nU$ [        U 5      4nU$ )zconvert an object in a tuple of strings (even if it is not iterable,
like a single integer number, but keep the string healthy)
c              3   8   #    U  H  n[        U5      v   M     g 7fr6   )str)r8   os     r   r=   _tuplify.<locals>.<genexpr>   s     (CqCFFCs   )r   r   
isinstancerQ   tuple)objress     r   _tuplifyrX   |   sH     
{{3
3 4 4(C(( J 3xkJr    c           
          / n[        U 6  H9  n[        U5      nUR                  [        U Vs0 s H  oDS_M     sn5      5        M;     U$ s  snf )zuse the Ordered dict to implement a simple ordered set
return each level of each category
[[key_1_level_1,key_2_level_1],[key_1_level_2,key_2_level_2]]
N)r#   rX   appendlist)rF   rW   i	tuplefiedjs        r   _categories_levelr_      sN    
 C4\QK	

4)4)QD)456  J 5s   A
c           
         [        [        5       S4/5      n[        [        U R	                  5       5      5      n[        U5      n[        R                  " U5      (       d   [        U5       Vs/ s H
  obSU-  -  PM     nn[        U5      U:  a/  US   n[        U6 [        U5       Vs/ s H
  ogSU-  -  PM     sn-   nUSU n[        [        U6 5       Vs0 s H  nXU   _M
     n	n[        U5       HZ  u  p[        [        USU
 6 5      nU H4  nU Vs/ s H  n[        XU4-   5      PM     nnX*   n[        X;XUU5      nM6     U(       + nM\     U$ s  snf s  snf s  snf s  snf )a  
Split a square in a hierarchical way given a contingency table.

Hierarchically split the unit square in alternate directions
in proportion to the subdivision contained in the contingency table
count_dict.  This is the function that actually perform the tiling
for the creation of the mosaic plot.  If the gap array has been specified
it will insert a corresponding amount of space (proportional to the
unit length), while retaining the proportionality of the tiles.

Parameters
----------
count_dict : dict
    Dictionary containing the contingency table.
    Each category should contain a non-negative number
    with a tuple as index.  It expects that all the combination
    of keys to be represents; if that is not true, will
    automatically consider the missing values as 0
horizontal : bool
    The starting direction of the split (by default along
    the horizontal axis)
gap : float or array of floats
    The list of gaps to be applied on each subdivision.
    If the length of the given array is less of the number
    of subcategories (or if it's a single number) it will extend
    it with exponentially decreasing gaps

Returns
-------
base_rect : dict
    A dictionary containing the result of the split.
    To each key is associated a 4-tuple of coordinates
    that are required to create the corresponding rectangle:

        0 - x position of the lower left corner
        1 - y position of the lower left corner
        2 - width of the rectangle
        3 - height of the rectangle
)r   r   r   r   g      ?r   N)dictrU   r_   r[   rF   r   r   r   ranger   	enumeraterC   rN   )rA   r(   r)   	base_rectcategories_levelsr;   idxlastr9   count_orderedcat_idxcat_enum	base_keysrL   partial
part_countnew_gaps                    r   _hierarchical_splitro      s   R uw-./I)$z/@*ABA ;;s+0848CSCZ84
3x!|2wCjqB#3#:-BB
bq'C "&g/@&A!BD!BA 1%!B  D&'89"3HW"=>?	C ,45+3 '}WJ6FG+3  5 lG&yJ'17<I  $^
 : 3 5 C
D5s   ,E'EEE c                 p    SSK Jn  U" [        U 5      R                  SSS5      5      R                  S5      $ )z0Transform a color from the hsv space to the rgb.r   )
hsv_to_rgbr      )matplotlib.colorsrq   r   reshape)hsvrq   s     r   _single_hsv_to_rgbrv      s.    ,eCj((Aq12::1==r    c                    [        [        U R                  5       5      5      n[        U5      n[        US   5      n[        R
                  " SSUS-   5      SS nUS:  a  [        US   5      OSn[        R
                  " SSUS-   5      SS	 nUS:  a  [        US   5      OSn[        R
                  " SSUS-   5      SS	 nUS
:  a  [        US
   5      OSn/ SQSUS-    n[        [        U5      US   5      n[        [        U5      US:  a  US   OS/5      n[        [        U5      US:  a  US   OS/5      n[        [        U5      US
:  a  US
   OS/5      n0 n[        XEXg5       H  u  ppU	u  pU
u  nnUu  nnUu  nnU4U(       a  U4O	[        5       -   nUU(       a  U4O	[        5       -   nUU(       a  U4O	[        5       -   n[        XU/5      n[        U5      USS.nUUU'   M     U$ )aD  "Create the default properties of the mosaic given the data
first it will varies the color hue (first category) then the color
saturation (second category) and then the color value
(third category).  If a fourth category is found, it will put
decoration on the rectangle.  Does not manage more than four
level of categories
r   r   r   r   Nr   g      ?r   rr   ) /-|+ry   )colorhatchlw)r_   r[   rF   r   r   linspacer   r   rU   r   rv   )datare   Nlevelsr;   hue
saturationvaluer   
propertiesr+   r0   r:   thvhnsvsnvvvntvtnlevelru   props                           r   _create_default_propertiesr      s    *$tyy{*;<#$Ga !A
++c3A
&s
+C%,q[a !aAS#q1u-cr2J%,q[a !aAKKS!a%("-E%,q[a !aA$Va!e,E
tCy+A.
/Cd:&-4q[&q)rdDJe-4q[&q)rdDEe-4q[&q)rdDE Jcu<
aBBB""%'2""%'2""%'2RRL!+C02QG 
5 = r    c                 2  ^ [        U S5      (       a  [        U S5      (       a  [        X5      n Sn [        U R                  5       5      nU VVs0 s H  u  pg[        U5      U_M     n nn[        [        U R                  5       5      5      n[        U6 n	U	 Vs0 s H  ofU R                  US5      _M     n
nU
n Uc  [        [!        U5      5      OUn0 n
U R                  5        H  u  mn[        U4S jU 5       5      nXU'   M!     U
n U $ ! [         av    [
        R                  " U 5      n 0 n[
        R                  " U R                  5       H  n[        S U 5       5      nX   X5'   M     Un [        U R                  5       5      n GN?f = fs  snnf s  snf )aN  normalize the data to a dict with tuples of strings as keys
right now it works with:

    0 - dictionary (or equivalent mappable)
    1 - pandas.Series with simple or hierarchical indexes
    2 - numpy.ndarrays
    3 - everything that can be converted to a numpy array
    4 - pandas.DataFrame (via the _normalize_dataframe function)
pivotgroupbyNc              3   $   #    U  H  ov   M     g 7fr6   r7   )r8   r\   s     r   r=   "_normalize_data.<locals>.<genexpr>-  s     (CqC   r   c              3   .   >#    U  H
  nTU   v   M     g 7fr6   r7   )r8   r\   rL   s     r   r=   r   >  s     .1As   )hasattr_normalize_dataframer[   r@   AttributeErrorr   r   ndindexshaperU   rX   r_   rF   r   getr   r   )r   indexr@   temprf   rJ   r9   r:   re   indexescontingencyr   new_keyrL   s                @r   _normalize_datar     st    tW'$	":":#D0#TZZ\" (--utqHQKNuD-)$tyy{*;<()G.56gdhhq!n$gK6D /4mF3()*EKjjl
U...$G # DK5  	# zz$::djj)C(C((DDJ * TZZ\"	# . 7s   D FFA<F
Fc                     X   R                  5       nUR                  USSS9nX1   R                  5       nUR                  SS9nUR	                  S5      nU$ )zyTake a pandas DataFrame and count the element present in the
given columns, return a hierarchical index on those columns
F)sortobservedr   )axisr   )dropnar   rB   meanfillna)	dataframer   r   groupedcountedaverageds         r   r   r   D  s\     ""$Dll5uul=Gn""$G|||#Hs#HOr    c                    [        U S5      n [        [        U R                  5       5      5      n[	        U5      nS[        S U R                  5        5       5      -  n/ n[        U5       H`  n0 nX    HB  nSXg'   U R                  5        H  u  pXxU   :X  d  M  Xg==   U	-  ss'   M     Xg==   U-  ss'   MD     UR                  U5        Mb     0 n
U R                  5        HI  u  pSn[        U5       H  u  pXU   U   -  nM     X-  [        R                  " X;-  SU-
  -  5      4X'   MK     U
R                  5        VVVs0 s H  u  nu  pXU   U-
  U-  _M     nnnn0 nUR                  5        HN  u  nnUS:  a  SOUSU-   -  nUS:  a  SOUSU-   -  nSU-
  U-
  S-  nUS	:  a  S
O	US:  a  SOSnUUU/US.UU'   MP     U$ s  snnnf )zwevaluate colors from the indipendence properties of the matrix
It will encounter problem if one category has all zeros
Nr   c              3   $   #    U  H  ov   M     g 7fr6   r7   )r8   r:   s     r   r=   (_statistical_coloring.<locals>.<genexpr>Z  s     /Aar   r   r   r   r          @r   r$   rx   rR   ry   )r~   r   )r   r_   r[   rF   r   r?   rG   rb   r@   rZ   rc   r   sqrt)r   re   r   totallevels_count	level_idxr   r   rL   r   expectedbaser\   r9   mr0   sigmaspropsdevredbluegreenr   s                          r   _statistical_coloringr   S  s    4&D)$tyy{*;<#$G#////E L7^	
&1E #J"jjl
	N*%.% + & 2 	J' $ Hjjl
cNDAOA&&D #bggelcDj.I&JJ	 # 5=NN4DE4Dyq&1aq'A+""4DFEELLNS1Wc3!c'?AgsC28$4sT!S(Qw38C #UD1EBc
 # L Fs   Gc                 2    US:X  a  U $ XS-  -   U-  U-  U-  $ )Nr   r   r7   )r$   r*   r+   Ws       r   _get_positionr   ~  s)    AvCK1q 1$$r    c                   ^^^ [        [        U R                  5       5      5      m[        T5      S:  a  Sn[	        U5      e0 n[        U R                  5       5      nU(       + nUR                  5       nUR                  5       n	UR                  UR                  U	R                  UR                  /n
UR                  UR                  U	R                  UR                  /nU(       a  U
SS U
SS -   n
USS USS -   n[        X5       H  u  pU" / 5        U" / 5        M     [        T5       GH:  u  p[        5       nU H  nU(       a  US:X  a  / SQmO/ SQmOUS:X  a  / SQmO/ SQm[        UU4S	 j[!        U5       5       5      nUU4-   nU VVs0 s H  u  nnUUSUS-    :X  d  M  UU_M     nnn[        UR#                  5       5      n[%        S
 U 5       5      m[%        U4S jU 5       5      n[%        U4S jU 5       5      nX-   S-  nUS-  (       a  UOUUU'   M     X   " [        UR#                  5       5      5        X   " [        UR                  5       5      X>   S9  GM=     U$ s  snnf )zfind the position of the label for each value of each category

right now it supports only up to the four categories

ax: the axis on which the label should be applied
rotation: the rotation list for each side
   zrmaximum of 4 level supported for axes labeling... and 4is already a lot of levels, are you sure you need them all?r   Nrr   )r   r   r   )r   r   r   )r   r   r   c              3   :   >#    U  H  nTU   TU      v   M     g 7fr6   r7   )r8   r\   
categoriesindex_selects     r   r=   !_create_labels.<locals>.<genexpr>  s%      7%5 'qM,q/:%5s   c              3   0   #    U  H  u  pp4X4-  v   M     g 7fr6   r7   )r8   r$   r%   r*   r+   s        r   r=   r     s     3dlqQAEds   c              3   D   >#    U  H  u  pp4[        XUT5      v   M     g 7fr6   r   r8   r$   r%   r*   r+   r   s        r   r=   r     !     KdlqQaAq11d    c              3   D   >#    U  H  u  pp4[        X$UT5      v   M     g 7fr6   r   r   s        r   r=   r     r   r   r   )rotation)r_   r[   rF   r   r   r@   twinxtwiny
set_xticks
set_yticksset_xticklabelsset_yticklabelsr#   rc   ra   rU   rb   rG   r?   )rectsr(   axr   msglabelsr@   verticalax2ax3	ticks_pos	ticks_labposlabr   r   level_ticksr   basekeyr9   r:   subsetvalsx_laby_labsider   r   r   s                             @@@r   _create_labelsr     s`    #4

#56J
:MoFE~H ((*C
((*Cs~~s~~NI##R%7%7$$c&9&9;I abMIbqM1	abMIbqM1		-BB .
 &j1	fE
 >#/L#/L>#/L#/L  7%*9%57 7G(G', ;utq!$.9q=(99 aduF ; (D3d33AKdKKEKdKKE (A-D*.(KI N 	T+"4"4"678T+"2"2"45&.&9	;W 2Z M+;s   I-
(I-
g{Gzt?c                     g r6   r7   )rL   s    r   <lambda>r     s    $r    ry   Fr   c           
      P  ^ [        U [        5      (       a  Uc  [        S5      eSSKJn  [
        R                  " U5      u  p[        X5      n [        XUS9nUc  S nU(       a  [        U 5      nO[        U 5      n[        U[        5      (       a  UmU4S jnUR                  5        Hj  u  nnUu  nnnnU" U5      nU(       a  UOX   nU" U5      nU" UU4UU4SU0UD6nUR                  U5        UR                  UUS-  -   UUS-  -   US	S	S
S9  Ml     U	(       a2  [        R                   " U
5      (       a  U
nOU
/S-  n[#        XUU5      nODUR%                  / 5        UR'                  / 5        UR)                  / 5        UR+                  / 5        UR-                  U5        X4$ )a  Create a mosaic plot from a contingency table.

It allows to visualize multivariate categorical data in a rigorous
and informative way.

Parameters
----------
data : {dict, Series, ndarray, DataFrame}
    The contingency table that contains the data.
    Each category should contain a non-negative number
    with a tuple as index.  It expects that all the combination
    of keys to be represents; if that is not true, will
    automatically consider the missing values as 0.  The order
    of the keys will be the same as the one of insertion.
    If a dict of a Series (or any other dict like object)
    is used, it will take the keys as labels.  If a
    np.ndarray is provided, it will generate a simple
    numerical labels.
index : list, optional
    Gives the preferred order for the category ordering. If not specified
    will default to the given order.  It does not support named indexes
    for hierarchical Series.  If a DataFrame is provided, it expects
    a list with the name of the columns.
ax : Axes, optional
    The graph where display the mosaic. If not given, will
    create a new figure
horizontal : bool, optional
    The starting direction of the split (by default along
    the horizontal axis)
gap : {float, sequence[float]}
    The list of gaps to be applied on each subdivision.
    If the length of the given array is less of the number
    of subcategories (or if it's a single number) it will extend
    it with exponentially decreasing gaps
properties : dict[str, callable], optional
    A function that for each tile in the mosaic take the key
    of the tile and returns the dictionary of properties
    of the generated Rectangle, like color, hatch or similar.
    A default properties set will be provided fot the keys whose
    color has not been defined, and will use color variation to help
    visually separates the various categories. It should return None
    to indicate that it should use the default property for the tile.
    A dictionary of the properties for each key can be passed,
    and it will be internally converted to the correct function
labelizer : dict[str, callable], optional
    A function that generate the text to display at the center of
    each tile base on the key of that tile
title : str, optional
    The title of the axis
statistic : bool, optional
    If true will use a crude statistical model to give colors to the plot.
    If the tile has a constraint that is more than 2 standard deviation
    from the expected value under independence hypothesis, it will
    go from green to red (for positive deviations, blue otherwise) and
    will acquire an hatching when crosses the 3 sigma.
axes_label : bool, optional
    Show the name of each value of each category
    on the axis (default) or hide them.
label_rotation : {float, list[float]}
    The rotation of the axis label (if present). If a list is given
    each axis can have a different rotation

Returns
-------
fig : Figure
    The figure containing the plot.
rects : dict
    A dictionary that has the same keys of the original
    dataset, that holds a reference to the coordinates of the
    tile and the Rectangle that represent it.

References
----------
A Brief History of the Mosaic Display
    Michael Friendly, York University, Psychology Department
    Journal of Computational and Graphical Statistics, 2001

Mosaic Displays for Loglinear Models.
    Michael Friendly, York University, Psychology Department
    Proceedings of the Statistical Graphics Section, 1992, 61-68.

Mosaic displays for multi-way contingency tables.
    Michael Friendly, York University, Psychology Department
    Journal of the american statistical association
    March 1994, Vol. 89, No. 425, Theory and Methods

Examples
--------
>>> import numpy as np
>>> import pandas as pd
>>> import matplotlib.pyplot as plt
>>> from statsmodels.graphics.mosaicplot import mosaic

The most simple use case is to take a dictionary and plot the result

>>> data = {'a': 10, 'b': 15, 'c': 16}
>>> mosaic(data, title='basic dictionary')
>>> plt.show()

A more useful example is given by a dictionary with multiple indices.
In this case we use a wider gap to a better visual separation of the
resulting plot

>>> data = {('a', 'b'): 1, ('a', 'c'): 2, ('d', 'b'): 3, ('d', 'c'): 4}
>>> mosaic(data, gap=0.05, title='complete dictionary')
>>> plt.show()

The same data can be given as a simple or hierarchical indexed Series

>>> rand = np.random.random
>>> from itertools import product
>>> tuples = list(product(['bar', 'baz', 'foo', 'qux'], ['one', 'two']))
>>> index = pd.MultiIndex.from_tuples(tuples, names=['first', 'second'])
>>> data = pd.Series(rand(8), index=index)
>>> mosaic(data, title='hierarchical index series')
>>> plt.show()

The third accepted data structure is the np array, for which a
very simple index will be created.

>>> rand = np.random.random
>>> data = 1+rand((2,2))
>>> mosaic(data, title='random non-labeled array')
>>> plt.show()

If you need to modify the labeling and the coloring you can give
a function tocreate the labels and one with the graphical properties
starting from the key tuple

>>> data = {'a': 10, 'b': 15, 'c': 16}
>>> props = lambda key: {'color': 'r' if 'a' in key else 'gray'}
>>> labelizer = lambda k: {('a',): 'first', ('b',): 'second',
...                        ('c',): 'third'}[k]
>>> mosaic(data, title='colored dictionary', properties=props,
...        labelizer=labelizer)
>>> plt.show()

Using a DataFrame as source, specifying the name of the columns of interest

>>> gender = ['male', 'male', 'male', 'female', 'female', 'female']
>>> pet = ['cat', 'dog', 'dog', 'cat', 'dog', 'cat']
>>> data = pd.DataFrame({'gender': gender, 'pet': pet})
>>> mosaic(data, ['pet', 'gender'], title='DataFrame as Source')
>>> plt.show()

.. plot :: plots/graphics_mosaicplot_mosaic.py
z<You must pass an index if data is a DataFrame. See examples.r   )	Rectangle)r(   r)   c                 $    SR                  U 5      $ )N
)join)r9   s    r   r   mosaic.<locals>.<lambda>}  s    diilr    c                 (   > TR                  U S 5      $ r6   )r   )rL   
color_dicts    r   r   r     s    T!:r    labelr   centersmaller)havasizer   )rT   r
   r   matplotlib.patchesr   r   create_mpl_axr   ro   r   r   ra   r@   	add_patchtextr   r   r   r   r   r   r   	set_title)r   r   r   r(   r)   r   	labelizertitle	statistic
axes_labellabel_rotationr   figr   default_propsr9   r:   r$   r%   r*   r+   confr   r  Rectr   r   r   s                              @r   r   r     s   n $	""u} * + 	+ - !!"%GC4'DEE *	-d3248*d##
:
1
1a!}-"2|!QA;T;U;
T
AE	1q1u9dx9 	 	.  ;;~&&%H&'!+H2x@
b
2
b
2LL:r    )Tg?) __doc__statsmodels.compat.pythonr   r   	itertoolsr   numpyr   r   r   r   r	   pandasr
   statsmodels.graphicsr   __all__r   r3   rC   rN   rX   r_   ro   rv   r   r   r   r   r   r   r   r7   r    r   <module>r     s    3   - -  &*@$N(	IX>,^-`(V%Pf 5&$uFr    