import pandas as pd
from app.dwh.paths import LINEAS_DIR

def _months_between(start: pd.Timestamp, end: pd.Timestamp):
    cur = start.normalize().replace(day=1)
    end = pd.to_datetime(end)
    while cur < end:
        yield cur
        cur = (cur + pd.offsets.MonthBegin(1)).normalize()

def read_lineas(
    start: str,
    end: str,
    sucursal: str | None = None,
    columns: list[str] | None = None,   # <-- permite pedir solo lo necesario
) -> pd.DataFrame:
    start_dt = pd.to_datetime(start)
    end_dt = pd.to_datetime(end)

    # filtros que se intentan empujar a parquet (pyarrow)
    filters = []
    # OJO: solo funciona si la columna "Fecha" en parquet está como datetime/date,
    # no como string.
    filters.append(("Fecha", ">=", start_dt))
    filters.append(("Fecha", "<", end_dt))

    if sucursal:
        s = str(sucursal).strip()
        filters.append(("NombreSucursal", "==", s))

    dfs = []
    for m in _months_between(start_dt, end_dt):
        fp = LINEAS_DIR / f"lineas_{m.strftime('%Y_%m')}.parquet"
        if not fp.exists():
            continue

        # lee solo lo necesario + filtros
        df_m = pd.read_parquet(
            fp,
            columns=columns,
            filters=filters,
            engine="pyarrow",
        )

        if not df_m.empty:
            dfs.append(df_m)

    if not dfs:
        return pd.DataFrame()

    # concat “normal”, pero ya con muchísimas menos filas/cols
    df = pd.concat(dfs, ignore_index=True, copy=False)

    return df




# import pandas as pd
# from app.dwh.paths import LINEAS_DIR

# def _months_between(start: pd.Timestamp, end: pd.Timestamp):
#     cur = start.normalize().replace(day=1)
#     end = pd.to_datetime(end)
#     while cur < end:
#         yield cur
#         cur = (cur + pd.offsets.MonthBegin(1)).normalize()

# def read_lineas(start: str, end: str, sucursal: str | None = None) -> pd.DataFrame:
#     start_dt = pd.to_datetime(start)
#     end_dt = pd.to_datetime(end)

#     dfs = []
#     for m in _months_between(start_dt, end_dt):
#         fp = LINEAS_DIR / f"lineas_{m.strftime('%Y_%m')}.parquet"
#         if fp.exists():
#             dfs.append(pd.read_parquet(fp))

#     if not dfs:
#         return pd.DataFrame()

#     df = pd.concat(dfs, ignore_index=True)

#     if "Fecha" in df.columns:
#         df["Fecha"] = pd.to_datetime(df["Fecha"], errors="coerce")
#         df = df[(df["Fecha"] >= start_dt) & (df["Fecha"] < end_dt)]

#     if sucursal:
#         s = str(sucursal).strip()
#         if "NombreSucursal" in df.columns:
#             df["NombreSucursal"] = df["NombreSucursal"].astype(str).str.strip()
#             df = df[df["NombreSucursal"] == s]

#     return df
