
    $9ii%                        S SK Jr  S SKJr  S SKJrJr  S SKJr  S SKr	S SK
Jr  S SKJr  S SKJr  S S	KJr  S S
KJr  S SKJr  \ " S S5      5       r " S S5      rg)    )annotations)	dataclass)datetimedate)TupleN)	webdriver)By)Service)WebDriverWait)expected_conditions)ChromeDriverManagerc                  *    \ rS rSr% S\S'   S\S'   Srg)ScrapeResult   pd.DataFramesalidasllegadas N)__name__
__module____qualname____firstlineno____annotations____static_attributes__r       aC:\Users\julio\OneDrive\Documentos\Trabajo\Ideas Frescas\Proyectos\Vuelos_Mazatlan\src\scraper.pyr   r      s    r   r   c                      \ rS rSrSrSSS jjrSS jrSSS jjrSS jrSS jr	SSS jjr
\SS	 j5       rSS
 jrSS jrSrg)OMAFlightScraper   u   
Scraper dinámico para OMA Mazatlán (estatus de vuelos).
HTML real: filas son divs .table-row.departure / .table-row.arrival,
celdas son .item__1 ... .item__6
c                   Xl         X0l        [        R                  " 5       nU(       a  UR	                  S5        UR	                  S5        UR	                  S5        UR	                  S5        UR	                  S5        [        [        5       R                  5       5      n[        R                  " XTS9U l	        [        U R                  U R                  5      U l        g )Nz--headless=newz--no-sandboxz--disable-dev-shm-usagez--window-size=1400,900z--lang=es-MX)serviceoptions)urltimeoutr   ChromeOptionsadd_argumentChromeServicer   installChromedriverr   wait)selfr#   headlesstimeout_secondsr"   r!   s         r   __init__OMAFlightScraper.__init__   s    &))+  !12^,6756^, 3 5 = = ?@&&wH!$++t||<	r   c                Z     U R                   R                  5         g ! [         a     g f = fN)r*   quit	Exception)r,   s    r   closeOMAFlightScraper.close.   s)    	KK 		s    
**c                   [        U5       Hk  n U R                  R                  [        R                  " U5      5      nU R
                  R                  SU5        U R
                  R                  SU5          g   g! [         a     M}  f = f)N.arguments[0].scrollIntoView({block:'center'});arguments[0].click();TF)ranger+   untilECelement_to_be_clickabler*   execute_scriptr4   )r,   locatortries_els        r   _safe_clickOMAFlightScraper._safe_click7   sy    uAYY__R%?%?%HI**+[]_`**+BBG    s   A'A<<
B
	B
c                &   [         R                  S4[         R                  S4[         R                  S4/nU HE  n U R                  R                  " U6 nU(       a!  U R                  R                  SUS   5          g MG     g ! [         a     MW  f = f)Nz//button[contains(.,'Aceptar')]z//button[contains(.,'ACEPTAR')]z"button#onetrust-accept-btn-handlerr9   r   )r	   XPATHCSS_SELECTORr*   find_elementsr>   r4   )r,   
candidateslocbtnss       r   _dismiss_cookie_banner_if_any.OMAFlightScraper._dismiss_cookie_banner_if_anyB   s    XX89XX89__BC


 C{{00#6KK../FQP    s   ?B
BBc                   UR                  5       R                  S5      (       a  [        R                  S4nO[        R                  S4nU R	                  USS9  UR                  5       R                  S5      (       a@  U R
                  R                  [        R                  " [        R                  S45      5        gU R
                  R                  [        R                  " [        R                  S45      5        g)	zH
tab: "SALIDAS" o "LLEGADAS"
En HTML: #departureChange y #arrivalChange
SALz#departureChangez#arrivalChange   )r@   z.table-row.departurez.table-row.arrivalN)	upper
startswithr	   rG   rC   r+   r;   r<   presence_of_element_located)r,   tabrJ   s      r   
_go_to_tabOMAFlightScraper._go_to_tabT   s    
 99;!!%((??$67C??$45CA& 99;!!%((IIOOB::BOOMc;defIIOOB::BOOMa;bcdr   c                  ^ ^^ S	U 4S jjmT" 5       m[        U5       H  nT R                  R                  [        R                  S5      nU(       d    g T R                  R                  SUS   5        T R                  R                  SUS   5         [        T R                  S5      R                  UU4S j5        T" 5       mM     g! [         a       gf = f! [         a       gf = f)
u   
En tu HTML: <a id="allFlights" class="table-bottom-link">VER MÁS VUELOS</a>
A veces carga más filas vía AJAX o cambia el DOM.
Estrategia:
- medir conteo de filas totales (arrival+departure)
- click al link mientras crezca el conteo
c                   > TR                   R                  [        R                  S5      n U  Vs/ s HE  nSUR	                  S5      =(       d    S;  d  M#  SUR	                  S5      =(       d    S;  d  MC  UPMG     n n[        U 5      $ s  snf )NzF.table-wrapper .table-row.departure, .table-wrapper .table-row.arrivaltable-headerclass table-title-row)r*   rH   r	   rG   get_attributelen)rowsrr,   s     r   count_all_rows<OMAFlightScraper._expand_all_flights.<locals>.count_all_rowsr   s    ;;,,R__  ?G  HD#  ]t!~aoog>V>\Z\']Abs|}  }L  }L  MT  }U  }[  Y[  c\AtD  ]t9 ]s   "B	B	6B	za#allFlights.table-bottom-linkr8   r   r9      c                   > T" 5       T:  $ r2   r   )dra   prevs    r   <lambda>6OMAFlightScraper._expand_all_flights.<locals>.<lambda>   s    n>NQU>Ur   N)returnint)	r:   r*   rH   r	   rG   r>   r4   r   r;   )r,   
max_clicksrA   linkra   rf   s   `   @@r   _expand_all_flights$OMAFlightScraper._expand_all_flightsj   s    	 z"A;;,,R__>^_D**+[]abc]de**+BDGL
dkk1-334UV%' #    s$   >C
1C

CC
C)(C)c                    U R                  [        R                  S5      nUR                  =(       d    SR                  5       nU(       a  U$  UR                  [        R                  S5      nUR                  S5      =(       d    SR                  5       nU(       a  U$ UR                  S5      =(       d    SR                  5       nU(       a  UR                  S5      S   R                  S5      S	   nUR                  S
S5      R                  SS5      R                  SS5      R                  5       n[        U5      S::  a  UR                  5       $ UR                  5       $  g! [         a     gf = f! [         a     gf = f)u   
La aerolínea puede venir como:
- texto directo
- img src=/img/logo_xxx.png (sin alt)
En ese caso tomamos el filename del src como fallback.
z.item__2r[   imgaltsrc/.r   logo_- rA      )find_elementr	   rG   r4   textstripr]   splitreplacer^   rQ   title)row_elcelltxtrp   rq   rr   names          r   _get_airline_name"OMAFlightScraper._get_airline_name   sG   	&&r
CD yyB%%'J	##BOOU;C$$U+1r88:C
$$U+1r88:Cyy~b)//4Q7||GR088cBJJ3PSTZZ\'*4yA~tzz|G4::<G	  -  		&  		s1    E& AE6  B3E6 E6 &
E32E36
FFc                  ^ US:X  a  SOSnU R                   R                  [        R                  " [        R
                  U45      5        U R                  R                  [        R
                  U5      n/ nU HD  mTR                  S5      =(       d    SnSU;   a  M%  SU;   d  SU;   a  M3  UR                  T5        MF     / nU H  mSU4S	 jjnU" S
5      nU R                  T5      n	U" S5      n
U" S5      nU" S5      nU" S5      nU(       d  U
(       d	  U(       d  M[  UU	U
UUS.nUS:X  a  XS'   OXS'   UR                  U5        M     [        R                  " U5      n[        R                  " 5       R                  5       n[         R"                  " 5       R%                  S5      nUR&                  (       d&  UR)                  SSU5        UR)                  SSU5        U$ )u{   
mode: "salidas" o "llegadas"
Usa:
  - .table-row.departure
  - .table-row.arrival
Excluye nullfs (No hay información...)
r   z#.table-wrapper .table-row.departurez!.table-wrapper .table-row.arrivalrZ   r[   nullfsrY   r\   c                   >  TR                  [        R                  U 5      R                  =(       d    SR	                  5       $ ! [
         a     gf = f)Nr[   )rz   r	   rG   r{   r|   r4   )cssr`   s    r   	cell_text1OMAFlightScraper._extract_rows.<locals>.cell_text   sC    NN2??C@EEKRRTT  s   A A 
AAz.item__1z.item__3z.item__4z.item__5z.item__6)Hora	AerolineaNoVueloTerminalEstatusDestinoOrigenz%Y-%m-%d %H:%M:%Sr   Fecha   	ScrapedAt)r   strri   r   )r+   r;   r<   rS   r	   rG   r*   rH   r]   appendr   pd	DataFramer   today	isoformatr   nowstrftimeemptyinsert)r,   modeselr_   cleanedclsdatar   hora	aerolineano_vuelodest_origenterminalestatusrowdfr   
scraped_atr`   s                     @r   _extract_rowsOMAFlightScraper._extract_rows   s    8<y7H3Nq 			667MNO{{((#> A??7+1rC3$(9S(@NN1  A Z(D..q1I ,H#J/K ,H
+G H &#$"C y !,I +HKK? B \\$

&&(\\^,,-@A
xxIIa%(IIaj1	r   c                   U R                   R                  U R                  5        U R                  5         U R                  R                  [        R                  " [        R                  S45      5        U R                  5         U R                  S5        U R                  S5      nU R                  S5        U R                  S5      n[        XS9$ )Nz.table-wrapperSALIDASr   LLEGADASr   )r   r   )r*   getr#   rL   r+   r;   r<   rS   r	   rG   rm   rU   r   r   )r,   r   r   s      r   
scrape_allOMAFlightScraper.scrape_all   s    !**, 			66IY7Z[\  " 		"$$Y/ 	
#%%j1G??r   )r*   r$   r#   r+   N)T   )r#   r   r-   boolr.   rj   ri   None)ri   r   )   )r?   zTuple[By, str]r@   rj   ri   r   )rT   r   ri   r   )   )rk   rj   ri   r   )ri   r   )r   r   ri   r   )ri   r   )r   r   r   r   __doc__r/   r5   rC   rL   rU   rm   staticmethodr   r   r   r   r   r   r   r   r      sI    = 	$e,!L  BCP@r   r   )
__future__r   dataclassesr   r   r   typingr   pandasr   seleniumr   selenium.webdriver.common.byr	   !selenium.webdriver.chrome.servicer
   r'   selenium.webdriver.support.uir   selenium.webdriver.supportr   r<   webdriver_manager.chromer   r   r   r   r   r   <module>r      sM    " ! #    + F 7 @ 8   
s@ s@r   