import pandas as pd
from app.dwh.paths import FOLIOS_DIR

def _months_between(start: pd.Timestamp, end: pd.Timestamp):
    cur = start.normalize().replace(day=1)
    end = pd.to_datetime(end)
    while cur < end:
        yield cur
        cur = (cur + pd.offsets.MonthBegin(1)).normalize()

def read_folios(start: str, end: str) -> pd.DataFrame:
    start_dt = pd.to_datetime(start)
    end_dt = pd.to_datetime(end)

    dfs = []
    for m in _months_between(start_dt, end_dt):
        fp = FOLIOS_DIR / f"folios_{m.strftime('%Y_%m')}.parquet"
        if fp.exists():
            dfs.append(pd.read_parquet(fp))

    if not dfs:
        return pd.DataFrame()

    df = pd.concat(dfs, ignore_index=True)

    if "Fecha" in df.columns:
        df["Fecha"] = pd.to_datetime(df["Fecha"], errors="coerce")
        df = df[(df["Fecha"] >= start_dt) & (df["Fecha"] < end_dt)]

    return df
