from __future__ import annotations

from dataclasses import dataclass
from datetime import datetime, date
from typing import Tuple

import pandas as pd
from selenium import webdriver # type: ignore
from selenium.webdriver.common.by import By # type: ignore
from selenium.webdriver.chrome.service import Service as ChromeService # type: ignore
from selenium.webdriver.support.ui import WebDriverWait # type: ignore
from selenium.webdriver.support import expected_conditions as EC # type: ignore
from webdriver_manager.chrome import ChromeDriverManager # type: ignore


@dataclass
class ScrapeResult:
    salidas: pd.DataFrame
    llegadas: pd.DataFrame


class OMAFlightScraper:
    """
    Scraper dinámico para OMA Mazatlán (estatus de vuelos).
    HTML real: filas son divs .table-row.departure / .table-row.arrival,
    celdas son .item__1 ... .item__6
    """

    def __init__(self, url: str, headless: bool = True, timeout_seconds: int = 25) -> None:
        self.url = url
        self.timeout = timeout_seconds

        options = webdriver.ChromeOptions()
        if headless:
            options.add_argument("--headless=new")
        options.add_argument("--no-sandbox")
        options.add_argument("--disable-dev-shm-usage")
        options.add_argument("--window-size=1400,900")
        options.add_argument("--lang=es-MX")

        service = ChromeService(ChromeDriverManager().install())
        self.driver = webdriver.Chrome(service=service, options=options)
        self.wait = WebDriverWait(self.driver, self.timeout)

    def close(self) -> None:
        try:
            self.driver.quit()
        except Exception:
            pass

    # --------------------------
    # Helpers
    # --------------------------
    def _safe_click(self, locator: Tuple[By, str], tries: int = 3) -> bool:
        for _ in range(tries):
            try:
                el = self.wait.until(EC.element_to_be_clickable(locator))
                self.driver.execute_script("arguments[0].scrollIntoView({block:'center'});", el)
                self.driver.execute_script("arguments[0].click();", el)
                return True
            except Exception:
                continue
        return False

    def _dismiss_cookie_banner_if_any(self) -> None:
        candidates = [
            (By.XPATH, "//button[contains(.,'Aceptar')]"),
            (By.XPATH, "//button[contains(.,'ACEPTAR')]"),
            (By.CSS_SELECTOR, "button#onetrust-accept-btn-handler"),
        ]
        for loc in candidates:
            try:
                btns = self.driver.find_elements(*loc)
                if btns:
                    self.driver.execute_script("arguments[0].click();", btns[0])
                    return
            except Exception:
                continue

    # --------------------------
    # Tabs: Salidas / Llegadas
    # --------------------------
    def _go_to_tab(self, tab: str) -> None:
        """
        tab: "SALIDAS" o "LLEGADAS"
        En HTML: #departureChange y #arrivalChange
        """
        if tab.upper().startswith("SAL"):
            loc = (By.CSS_SELECTOR, "#departureChange")
        else:
            loc = (By.CSS_SELECTOR, "#arrivalChange")

        self._safe_click(loc, tries=5)

        # Esperar a que existan filas de ese tipo en DOM (aunque estén ocultas)
        # Las filas existen como .table-row.departure o .table-row.arrival
        if tab.upper().startswith("SAL"):
            self.wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, ".table-row.departure")))
        else:
            self.wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, ".table-row.arrival")))

    # --------------------------
    # Expandir: VER MÁS VUELOS
    # --------------------------
    def _expand_all_flights(self, max_clicks: int = 30) -> None:
        """
        En tu HTML: <a id="allFlights" class="table-bottom-link">VER MÁS VUELOS</a>
        A veces carga más filas vía AJAX o cambia el DOM.
        Estrategia:
        - medir conteo de filas totales (arrival+departure)
        - click al link mientras crezca el conteo
        """
        def count_all_rows() -> int:
            rows = self.driver.find_elements(By.CSS_SELECTOR, ".table-wrapper .table-row.departure, .table-wrapper .table-row.arrival")
            # filtra headers/titles si algún día cambian
            rows = [r for r in rows if "table-header" not in (r.get_attribute("class") or "") and "table-title-row" not in (r.get_attribute("class") or "")]
            return len(rows)

        prev = count_all_rows()

        for _ in range(max_clicks):
            link = self.driver.find_elements(By.CSS_SELECTOR, "a#allFlights.table-bottom-link")
            if not link:
                break

            try:
                self.driver.execute_script("arguments[0].scrollIntoView({block:'center'});", link[0])
                self.driver.execute_script("arguments[0].click();", link[0])
            except Exception:
                break

            # Esperar a que aumente el conteo
            try:
                WebDriverWait(self.driver, 8).until(lambda d: count_all_rows() > prev)
                prev = count_all_rows()
            except Exception:
                # No creció: ya no hay más
                break

    # --------------------------
    # Extracción de "tabla" por divs
    # --------------------------
    @staticmethod
    def _get_airline_name(row_el) -> str:
        """
        La aerolínea puede venir como:
        - texto directo
        - img src=/img/logo_xxx.png (sin alt)
        En ese caso tomamos el filename del src como fallback.
        """
        try:
            cell = row_el.find_element(By.CSS_SELECTOR, ".item__2")
        except Exception:
            return ""

        txt = (cell.text or "").strip()
        if txt:
            return txt

        # intentar imagen
        try:
            img = cell.find_element(By.CSS_SELECTOR, "img")
            alt = (img.get_attribute("alt") or "").strip()
            if alt:
                return alt
            src = (img.get_attribute("src") or "").strip()
            if src:
                # /img/logo_aeromexico.png -> aeromexico
                name = src.split("/")[-1].split(".")[0]
                name = name.replace("logo_", "").replace("-", " ").replace("_", " ").strip()
                return name.upper() if len(name) <= 4 else name.title()
        except Exception:
            return ""

        return ""

    def _extract_rows(self, mode: str) -> pd.DataFrame:
        """
        mode: "salidas" o "llegadas"
        Usa:
          - .table-row.departure
          - .table-row.arrival
        Excluye nullfs (No hay información...)
        """
        sel = ".table-wrapper .table-row.departure" if mode == "salidas" else ".table-wrapper .table-row.arrival"

        # Espera al menos 1 fila de ese tipo en DOM
        self.wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, sel)))

        rows = self.driver.find_elements(By.CSS_SELECTOR, sel)

        # Filtrar filas "nullfs" y headers si se colaran
        cleaned = []
        for r in rows:
            cls = (r.get_attribute("class") or "")
            if "nullfs" in cls:
                continue
            if "table-header" in cls or "table-title-row" in cls:
                continue
            cleaned.append(r)

        data = []
        for r in cleaned:
            # celdas por clase .item__1..6
            def cell_text(css: str) -> str:
                try:
                    return (r.find_element(By.CSS_SELECTOR, css).text or "").strip()
                except Exception:
                    return ""

            hora = cell_text(".item__1")
            aerolinea = self._get_airline_name(r)
            no_vuelo = cell_text(".item__3")
            dest_origen = cell_text(".item__4")  # en HTML dice Destino pero en llegadas es Origen en UI
            terminal = cell_text(".item__5")
            estatus = cell_text(".item__6")

            # evitar filas basura
            if not (hora or no_vuelo or dest_origen):
                continue

            row = {
                "Hora": hora,
                "Aerolinea": aerolinea,
                "NoVuelo": no_vuelo,
                "Terminal": terminal,
                "Estatus": estatus,
            }
            if mode == "salidas":
                row["Destino"] = dest_origen
            else:
                row["Origen"] = dest_origen

            data.append(row)

        df = pd.DataFrame(data)

        today = date.today().isoformat()
        scraped_at = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        if not df.empty:
            df.insert(0, "Fecha", today)
            df.insert(1, "ScrapedAt", scraped_at)

        return df

    # --------------------------
    # Flujo principal
    # --------------------------
    def scrape_all(self) -> ScrapeResult:
        self.driver.get(self.url)
        self._dismiss_cookie_banner_if_any()

        # Expandir todo (una sola vez) — afecta ambos listados porque cargan en el mismo wrapper
        self.wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, ".table-wrapper")))
        self._expand_all_flights()

        # Salidas
        self._go_to_tab("SALIDAS")
        salidas = self._extract_rows("salidas")

        # Llegadas
        self._go_to_tab("LLEGADAS")
        llegadas = self._extract_rows("llegadas")

        return ScrapeResult(salidas=salidas, llegadas=llegadas)
