
    ăhw%                        S SK Jr  S SKrS SKr SSKJr  SSK
Jr  \R                  \R                  \R                  S	 5       5       5       r\R                  \R                  \R                  S
 5       5       5       rSS jr\R                  SS j5       rSS jrSS jr\R                  SS j5       rSS jrSS jrSS jrSS jrSS jrSS jrg! \ a0    \R                  " 5       S:w  a  \R                  " S5        S SK	Jr   Nf = f)    )unicode_literalsN   )StringMatcherPyPyzYUsing slow pure-python SequenceMatcher. Install python-Levenshtein to remove this warning)SequenceMatcher)utilsc                     [         R                  " X5      u  p[        S X5      n[         R                  " SUR	                  5       -  5      $ )Nd   )r   make_type_consistentr   intrratio)s1s2ms      gC:\Users\julio\OneDrive\Documentos\Trabajo\IdeasFrscas\Cabanna\env\Lib\site-packages\fuzzywuzzy/fuzz.pyr   r      s;     ''/FBb%A::cAGGIo&&    c                    [         R                  " X5      u  p[        U 5      [        U5      ::  a  U nUnOUnU n[        SX#5      nUR	                  5       n/ nU Hf  nUS   US   -
  S:  a  US   US   -
  OSnU[        U5      -   n	X8U	 n
[        SX*5      nUR                  5       nUS:  a    gUR                  U5        Mh     [         R                  " S[        U5      -  5      $ )zN"Return the ratio of the most similar substring
as a number between 0 and 100.Nr   r   gףp=
?r
   )	r   r   lenr   get_matching_blocksr   appendr   max)r   r   shorterlongerr   blocksscoresblock
long_startlong_endlong_substrm2rs                r   partial_ratior"      s     ''/FB
2w#b'g.A""$F F-21Xa-@A,EU1Xa(1
G,1T78HHJt8MM!  ::cCK'((r   c                     U(       a  [         R                  " XS9OU nUR                  5       nSR                  [	        U5      5      nUR                  5       $ )z*Return a cleaned string with token sorted.force_ascii )r   full_processsplitjoinsortedstrip)sr%   r'   tstokenssorted_strings         r   _process_and_sortr0   K   sG     <H		A	7QBXXZF IIfVn-M  r   c                 d    [        XUS9n[        XUS9nU(       a  [        XV5      $ [        XV5      $ )Nr'   )r0   r"   r   )r   r   partialr%   r'   sorted1sorted2s          r   _token_sortr6   Z   s3    lKGlKGW..W&&r   c                     [        XSX#S9$ )zhReturn a measure of the sequences' similarity between 0 and 100
but sorting the token before comparing.
Fr3   r%   r'   r6   r   r   r%   r'   s       r   token_sort_ratior;   e   s     ru+aar   c                     [        XSX#S9$ )zuReturn the ratio of the most similar substring as a number between
0 and 100 but sorting the token before comparing.
Tr8   r9   r:   s       r   partial_token_sort_ratior=   l   s     rt``r   c                 V   U(       d  X:X  a  gU(       a  [         R                  " XS9OU nU(       a  [         R                  " XS9OUn[         R                  " U5      (       d  g[         R                  " U5      (       d  g[        UR	                  5       5      n[        UR	                  5       5      nUR                  U5      n	UR                  U5      n
UR                  U5      nSR                  [        U	5      5      nSR                  [        U
5      5      nSR                  [        U5      5      nUS-   U-   nUS-   U-   nUR                  5       nUR                  5       nUR                  5       nU(       a  [        nO[        nU" X5      U" UU5      U" UU5      /n[        U5      $ )zFind all alphanumeric tokens in each string...
- treat them as a set
- construct two strings of the form:
    <sorted_intersection><sorted_remainder>
- take ratios of those two strings
- controls for unordered partial matchesr
   r$   r   r&   )r   r'   validate_stringsetr(   intersection
differencer)   r*   r+   r"   r   r   )r   r   r3   r%   r'   p1p2tokens1tokens2rA   diff1to2diff2to1sorted_sectsorted_1to2sorted_2to1combined_1to2combined_2to1
ratio_funcpairwises                      r   
_token_setrP   s   so    BH<H		B	8bB<H		B	8bB  $$  $$ "((*oG"((*oG''0L!!'*H!!'*H((6,/0K((6(+,K((6(+,K#%3M#%3M ##%K!'')M!'')M"

 	;.;.=-0H
 x=r   c                     [        XSX#S9$ )NFr8   rP   r:   s       r   token_set_ratiorS      s    be``r   c                     [        XSX#S9$ )NTr8   rR   r:   s       r   partial_token_set_ratiorU      s    bd__r   c                     U(       a)  [         R                  " XS9n[         R                  " XS9nOU nUn[         R                  " U5      (       d  g[         R                  " U5      (       d  g[        XE5      $ )ax  
Quick ratio comparison between two strings.

Runs full_process from utils on both strings
Short circuits if either of the strings is empty after processing.

:param s1:
:param s2:
:param force_ascii: Allow only ASCII characters (Default: True)
:full_process: Process inputs, used here to avoid double processing in extract functions (Default: True)
:return: similarity ratio
r$   r   )r   r'   r?   r   )r   r   r%   r'   rC   rD   s         r   QRatiorW      sc     <<  $$  $$=r   c                     [        XSUS9$ )zr
Unicode quick ratio

Calls QRatio with force_ascii set to False

:param s1:
:param s2:
:return: similarity ratio
Fr%   r'   )rW   r   r   r'   s      r   UQRatior[      s     "e,GGr   c                    U(       a)  [         R                  " XS9n[         R                  " XS9nOU nUn[         R                  " U5      (       d  g[         R                  " U5      (       d  gSnSnSn[        XE5      n	[	        [        [        U5      [        U5      5      5      [        [        U5      [        U5      5      -  n
U
S:  a  SnU
S:  a  S	nU(       aN  [        XE5      U-  n[        XESS
9U-  U-  n[        XESS
9U-  U-  n[         R                  " [        XX5      5      $ [        XESS
9U-  n[        XESS
9U-  n[         R                  " [        XU5      5      $ )a  
Return a measure of the sequences' similarity between 0 and 100, using different algorithms.

**Steps in the order they occur**

#. Run full_process from utils on both strings
#. Short circuit if this makes either string empty
#. Take the ratio of the two processed strings (fuzz.ratio)
#. Run checks to compare the length of the strings
    * If one of the strings is more than 1.5 times as long as the other
      use partial_ratio comparisons - scale partial results by 0.9
      (this makes sure only full results can return 100)
    * If one of the strings is over 8 times as long as the other
      instead scale by 0.6

#. Run the other ratio functions
    * if using partial ratio functions call partial_ratio,
      partial_token_sort_ratio and partial_token_set_ratio
      scale all of these by the ratio based on length
    * otherwise call token_sort_ratio and token_set_ratio
    * all token based comparisons are scaled by 0.95
      (on top of any partial scalars)

#. Take the highest value from these results
   round it and return it as an integer.

:param s1:
:param s2:
:param force_ascii: Allow only ascii characters
:type force_ascii: bool
:full_process: Process inputs, used here to avoid double processing in extract functions (Default: True)
:return:
r$   r   Tgffffff?g?g      ?F   g333333?r2   )r   r'   r?   r   floatr   r   minr"   r=   rU   r   r;   rS   )r   r   r%   r'   rC   rD   try_partialunbase_scalepartial_scalebase	len_ratior3   ptsorptsertsortsers                   r   WRatiori      sW   F <<  $$  $$ KLM=Dc#b'3r7+,s3r7CG/DDI 3 1}'-7(eD*+'UC*+ zz#dU:;;U;lJrE:\Izz#d$/00r   c                     [        XSUS9$ )zReturn a measure of the sequences' similarity between 0 and 100,
using different algorithms. Same as WRatio but preserving unicode.
FrY   )ri   rZ   s      r   UWRatiork   .  s     "e,GGr   )T)TTT)TT)
__future__r   platformwarningsr   r   ImportErrorpython_implementationwarndifflib r   check_for_nonecheck_for_equivalencecheck_empty_stringr   r"   r0   r6   r;   r=   rP   rS   rU   rW   r[   ri   rk    r   r   <module>rx      s'   (  (?  '   ' ")   ")R! ' 'ba 1 1ha`:
HK1\HK	  (%%'61qr'(s   C 3DD