from playwright.sync_api import sync_playwright
import pandas as pd

def main():
    with sync_playwright() as p:
        checkin_date = '2024-09-07'
        checkout_date = '2024-09-10'
        base_url = 'https://www.pricetravel.com/hoteles/cabo-san-lucas'
        
        hotels_list = []

        for page_num in range(1, 5):  # Cambia el rango según la cantidad de páginas que quieras analizar
            page_url = f'{base_url}?placeId=62709&placeType=6&adults=1&checkin={checkin_date}&checkout={checkout_date}&room1.adults=1&rooms=1&ctInternal=sbUWLRjjXY0LgVWj6MXrvQkTVsBM8u0GvwqwwM4bzNM&page={page_num}'

            
            browser = p.chromium.launch(headless=True)
            page = browser.new_page()
            page.goto(page_url, timeout=90000)

            hotels = page.locator('//a[@class="HotelCard"]').all()

            for hotel in hotels:
                hotel_dict = {}
                hotel_dict['hoteles'] = hotel.locator('//h2[@class="ng-binding"]').inner_text()
                hotel_dict['precios'] = hotel.locator('//div[@class="footer__pricePerNight"]').inner_text()
                hotel_dict['stars'] = str(hotel.locator('//div[@class="flex-grow-1"]/i').get_attribute("class")).replace("d-flex mb-1 icon-", "").replace("-star", "").replace("-half", ".5")

                hotels_list.append(hotel_dict)
            
            browser.close()

        df = pd.DataFrame(hotels_list)
        df.to_csv('San_Lucasstars.csv ', index=False)

if __name__ == '__main__':
    main()