# import pandas as pd
# import json
# from datetime import datetime
# import re
# from collections import defaultdict
# import ast
#
#
# def get_bin(days):
#     if days == 1:
#         return '1_day_stays'
#     elif days == 2:
#         return '2_day_stays'
#     elif 3 <= days <= 4:
#         return '3_4_day_stays'
#     elif 5 <= days <= 6:
#         return '5_6_day_stays'
#     elif 7 <= days <= 14:
#         return '7_14_day_stays'
#     elif 15 <= days <= 28:
#         return '15_28_day_stays'
#     else:
#         return '28+_day_stays'
#
#
# def generate_recommendations(df, test_df):
#     # Handle NaN values and convert to list of datetime objects
#     df['available_dates'] = df['available_dates'].fillna('[]')
#     df['available_dates'] = df['available_dates'].apply(ast.literal_eval)
#     df['available_dates'] = df['available_dates'].apply(
#         lambda x: [pd.to_datetime(d) for d in x]  # Convert each date individually
#     )
#
#     # Calculate global date range
#     all_dates = [d for sublist in df['available_dates'] for d in sublist]
#     if not all_dates:
#         return json.dumps({}, indent=2)
#
#     min_date, max_date = min(all_dates), max(all_dates)
#
#     # Initialize structure to hold per-month stay counts
#     month_bins = defaultdict(lambda: defaultdict(int))
#
#     # Process each listing's available dates
#     for dates in df['available_dates']:
#         if not dates:  # Now properly checks if list is empty
#             continue
#
#         dates = sorted(dates)
#         gaps = []
#
#         # Pre gap (before first available date)
#         first_date = dates[0]
#         if first_date > min_date:
#             pre_start = min_date
#             pre_end = first_date - pd.Timedelta(days=1)
#             gaps.append((pre_start, pre_end))
#
#         # Between gaps
#         for i in range(1, len(dates)):
#             prev_date = dates[i - 1]
#             current_date = dates[i]
#             if prev_date + pd.Timedelta(days=1) < current_date:
#                 gap_start = prev_date + pd.Timedelta(days=1)
#                 gap_end = current_date - pd.Timedelta(days=1)
#                 gaps.append((gap_start, gap_end))
#
#         # Post gap (after last available date)
#         last_date = dates[-1]
#         if last_date < max_date:
#             post_start = last_date + pd.Timedelta(days=1)
#             post_end = max_date
#             gaps.append((post_start, post_end))
#
#         # Process gaps by month
#         for gap_start, gap_end in gaps:
#             current_start = gap_start
#             while current_start <= gap_end:
#                 next_month = current_start + pd.offsets.MonthBegin(1)
#                 month_end = min(gap_end, next_month - pd.Timedelta(days=1))
#                 days_in_gap = (month_end - current_start).days + 1
#
#                 bin_name = get_bin(days_in_gap)
#                 month_year = current_start.strftime('%b_%Y')
#                 month_bins[month_year][bin_name] += 1
#
#                 current_start = month_end + pd.Timedelta(days=1)
#
#     # Create booking trends DataFrame
#     booking_trends = pd.DataFrame.from_dict(month_bins, orient='index').fillna(0).T
#     sorted_months = sorted(booking_trends.columns,
#                            key=lambda x: datetime.strptime(x, "%b_%Y"))
#     booking_trends = booking_trends[sorted_months]
#
#     # Generate recommendations
#     recommendations = {}
#     if sorted_months:
#         current_occ = test_df.get(sorted_months[0], pd.Series([100])).iloc[0]
#         next_occ = test_df.get(sorted_months[1], pd.Series([100])).iloc[0] if len(sorted_months) > 1 else 100
#         low_occupancy = current_occ < 40 or next_occ < 40
#     else:
#         low_occupancy = False
#
#     for idx, month in enumerate(sorted_months):
#         if low_occupancy and idx < 2:
#             rec = {'weekday_min_stay': 1, 'weekend_min_stay': 1}
#         else:
#             if month in booking_trends:
#                 top_bins = booking_trends[month].nlargest(3)
#                 min_stay = 28
#                 stays = [int(re.findall(r'\d+', bin_name.split('_')[0])[0])
#                          for bin_name in top_bins.index if re.findall(r'\d+', bin_name.split('_')[0])]
#                 if stays:
#                     min_stay = min(stays)
#                 rec = {'weekday_min_stay': min_stay, 'weekend_min_stay': min_stay}
#             else:
#                 rec = {'weekday_min_stay': 1, 'weekend_min_stay': 1}
#         recommendations[month] = rec
#
#     return json.dumps(recommendations, indent=2)




############# ---- imp above

import pandas as pd
import json
from datetime import datetime
import re
from collections import defaultdict


def get_bin(days):
    if days == 1:
        return '1_day_stays'
    elif days == 2:
        return '2_day_stays'
    elif 3 <= days <= 4:
        return '3_4_day_stays'
    elif 5 <= days <= 6:
        return '5_6_day_stays'
    elif 7 <= days <= 14:
        return '7_14_day_stays'
    elif 15 <= days <= 28:
        return '15_28_day_stays'
    else:
        return '28+_day_stays'


def generate_recommendations(df, test_df):
    # Special parsing function for DatetimeIndex strings
    def parse_datetime_index(date_str):
        try:
            if pd.isna(date_str) or date_str == '[]' or date_str == 'ns':
                return []

            # Check if it's a DatetimeIndex string representation
            if isinstance(date_str, str) and date_str.startswith('DatetimeIndex(['):
                # Extract dates from the DatetimeIndex string format
                date_pattern = r"'(\d{4}-\d{2}-\d{2})'"
                dates = re.findall(date_pattern, date_str)
                return [pd.to_datetime(d) for d in dates]

            # Handle regular list format (for completeness)
            elif isinstance(date_str, str):
                try:
                    date_list = eval(date_str)
                    if isinstance(date_list, list):
                        return [pd.to_datetime(d) for d in date_list if d != 'ns']
                    return []
                except:
                    return []

            # Handle case where it's already a list
            elif isinstance(date_str, list):
                return [pd.to_datetime(d) for d in date_str if d != 'ns']

            return []
        except Exception as e:
            print(f"Error processing: {str(e)}")
            return []

    # Apply the specialized parsing function
    print("Converting date strings to datetime objects...")
    df['available_dates'] = df['available_dates'].apply(parse_datetime_index)

    # Print some statistics about the parsed dates
    dates_count = df['available_dates'].apply(len)
    print(f"Date counts statistics: Min={dates_count.min()}, Max={dates_count.max()}, Mean={dates_count.mean():.1f}")
    print(f"Number of listings with dates: {(dates_count > 0).sum()} out of {len(df)}")

    # Calculate global date range
    all_dates = []
    for dates in df['available_dates']:
        all_dates.extend([d for d in dates if not pd.isna(d)])

    if not all_dates:
        print("No valid dates found in the dataset!")
        return json.dumps({}, indent=2)

    min_date, max_date = min(all_dates), max(all_dates)
    print(f"Date range: {min_date.date()} to {max_date.date()}")

    # Initialize structure to hold per-month stay counts
    month_bins = defaultdict(lambda: defaultdict(int))

    # Process each listing's available dates
    processed_count = 0
    for dates in df['available_dates']:
        if not dates:  # Check if list is empty
            continue

        dates = sorted(dates)
        gaps = []

        # Pre gap (before first available date)
        first_date = dates[0]
        if first_date > min_date:
            pre_start = min_date
            pre_end = first_date - pd.Timedelta(days=1)
            gaps.append((pre_start, pre_end))

        # Between gaps
        for i in range(1, len(dates)):
            prev_date = dates[i - 1]
            current_date = dates[i]
            if prev_date + pd.Timedelta(days=1) < current_date:
                gap_start = prev_date + pd.Timedelta(days=1)
                gap_end = current_date - pd.Timedelta(days=1)
                gaps.append((gap_start, gap_end))

        # Post gap (after last available date)
        last_date = dates[-1]
        if last_date < max_date:
            post_start = last_date + pd.Timedelta(days=1)
            post_end = max_date
            gaps.append((post_start, post_end))

        # Process gaps by month
        for gap_start, gap_end in gaps:
            current_start = gap_start
            while current_start <= gap_end:
                next_month = current_start + pd.offsets.MonthBegin(1)
                month_end = min(gap_end, next_month - pd.Timedelta(days=1))
                days_in_gap = (month_end - current_start).days + 1

                bin_name = get_bin(days_in_gap)
                month_year = current_start.strftime('%b_%Y')
                month_bins[month_year][bin_name] += 1

                current_start = month_end + pd.Timedelta(days=1)

        processed_count += 1

    print(f"Processed {processed_count} listings with available dates")

    # Create booking trends DataFrame
    booking_trends = pd.DataFrame.from_dict(month_bins, orient='index').fillna(0).T
    sorted_months = []
    if not booking_trends.empty and booking_trends.columns.any():
        sorted_months = sorted(booking_trends.columns,
                               key=lambda x: datetime.strptime(x, "%b_%Y"))
        booking_trends = booking_trends[sorted_months]

    print(f"Generated trends for {len(sorted_months)} months")

    # Generate recommendations
    recommendations = {}
    if sorted_months:
        # Safely get current and next occupancy with fallbacks
        try:
            current_occ = test_df.get(sorted_months[0], pd.Series([100])).iloc[0]
            print(f"Current month occupancy ({sorted_months[0]}): {current_occ}")
        except (KeyError, IndexError, AttributeError) as e:
            current_occ = 100

        try:
            next_occ = test_df.get(sorted_months[1], pd.Series([100])).iloc[0] if len(sorted_months) > 1 else 100
            if len(sorted_months) > 1:
                print(f"Next month occupancy ({sorted_months[1]}): {next_occ}")
        except (KeyError, IndexError, AttributeError) as e:
            next_occ = 100

        low_occupancy = current_occ < 40 or next_occ < 40
        print(f"Low occupancy detected: {low_occupancy}")
    else:
        low_occupancy = False

    for idx, month in enumerate(sorted_months):
        if low_occupancy and idx < 2:
            rec = {'weekday_min_stay': 1, 'weekend_min_stay': 1}
        else:
            if month in booking_trends:
                top_bins = booking_trends[month].nlargest(3)
                min_stay = 28
                stays = []
                for bin_name in top_bins.index:
                    match = re.findall(r'\d+', bin_name.split('_')[0])
                    if match:
                        stays.append(int(match[0]))
                if stays:
                    min_stay = min(stays)
                rec = {'weekday_min_stay': min_stay, 'weekend_min_stay': min_stay}
            else:
                rec = {'weekday_min_stay': 1, 'weekend_min_stay': 1}
        recommendations[month] = rec

    print(f"Generated recommendations for {len(recommendations)} months")
    return json.dumps(recommendations, indent=2)


# Usage example:
# df = pd.read_csv("general.csv")
# test = pd.read_csv("processed_test.csv")
# print(generate_recommendations(df, test))

##########################################################################################################

#
# import pandas as pd
# import json
# from datetime import datetime
# import re
# from collections import defaultdict
# import ast
#
#
# def get_bin(days):
#     if days == 1:
#         return '1_day_stays'
#     elif days == 2:
#         return '2_day_stays'
#     elif 3 <= days <= 4:
#         return '3_4_day_stays'
#     elif 5 <= days <= 6:
#         return '5_6_day_stays'
#     elif 7 <= days <= 14:
#         return '7_14_day_stays'
#     elif 15 <= days <= 28:
#         return '15_28_day_stays'
#     else:
#         return '28+_day_stays'
#
#
# def generate_recommendations(df, test_df):
#     # Handle NaN values and convert to list of datetime objects
#     df['available_dates'] = df['available_dates'].fillna('[]')
#     df['available_dates'] = df['available_dates'].apply(ast.literal_eval)
#     df['available_dates'] = df['available_dates'].apply(
#         lambda x: [pd.to_datetime(d) for d in x]  # Convert each date individually
#     )
#
#     # Calculate global date range
#     all_dates = [d for sublist in df['available_dates'] for d in sublist]
#     if not all_dates:
#         return json.dumps({}, indent=2)
#
#     min_date, max_date = min(all_dates), max(all_dates)
#
#     # Initialize structure to hold per-month stay counts
#     month_bins = defaultdict(lambda: defaultdict(int))
#
#     # Process each listing's available dates
#     for dates in df['available_dates']:
#         if not dates:  # Now properly checks if list is empty
#             continue
#
#         dates = sorted(dates)
#         gaps = []
#
#         # Pre gap (before first available date)
#         first_date = dates[0]
#         if first_date > min_date:
#             pre_start = min_date
#             pre_end = first_date - pd.Timedelta(days=1)
#             gaps.append((pre_start, pre_end))
#
#         # Between gaps
#         for i in range(1, len(dates)):
#             prev_date = dates[i - 1]
#             current_date = dates[i]
#             if prev_date + pd.Timedelta(days=1) < current_date:
#                 gap_start = prev_date + pd.Timedelta(days=1)
#                 gap_end = current_date - pd.Timedelta(days=1)
#                 gaps.append((gap_start, gap_end))
#
#         # Post gap (after last available date)
#         last_date = dates[-1]
#         if last_date < max_date:
#             post_start = last_date + pd.Timedelta(days=1)
#             post_end = max_date
#             gaps.append((post_start, post_end))
#
#         # Process gaps by month
#         for gap_start, gap_end in gaps:
#             current_start = gap_start
#             while current_start <= gap_end:
#                 next_month = current_start + pd.offsets.MonthBegin(1)
#                 month_end = min(gap_end, next_month - pd.Timedelta(days=1))
#                 days_in_gap = (month_end - current_start).days + 1
#
#                 bin_name = get_bin(days_in_gap)
#                 month_year = current_start.strftime('%b_%Y')
#                 month_bins[month_year][bin_name] += 1
#
#                 current_start = month_end + pd.Timedelta(days=1)
#
#     # Create booking trends DataFrame
#     booking_trends = pd.DataFrame.from_dict(month_bins, orient='index').fillna(0).T
#     sorted_months = sorted(booking_trends.columns,
#                            key=lambda x: datetime.strptime(x, "%b_%Y"))
#     booking_trends = booking_trends[sorted_months]
#
#     # Generate recommendations
#     recommendations = {}
#     if sorted_months:
#         current_occ = test_df.get(sorted_months[0], pd.Series([100])).iloc[0]
#         next_occ = test_df.get(sorted_months[1], pd.Series([100])).iloc[0] if len(sorted_months) > 1 else 100
#         low_occupancy = current_occ < 40 or next_occ < 40
#     else:
#         low_occupancy = False
#
#     for idx, month in enumerate(sorted_months):
#         if low_occupancy and idx < 2:
#             rec = {'weekday_min_stay': 1, 'weekend_min_stay': 1}
#         else:
#             if month in booking_trends:
#                 top_bins = booking_trends[month].nlargest(3)
#                 min_stay = 28
#                 stays = [int(re.findall(r'\d+', bin_name.split('_')[0])[0])
#                          for bin_name in top_bins.index if re.findall(r'\d+', bin_name.split('_')[0])]
#                 if stays:
#                     min_stay = min(stays)
#                 rec = {'weekday_min_stay': min_stay, 'weekend_min_stay': min_stay}
#             else:
#                 rec = {'weekday_min_stay': 1, 'weekend_min_stay': 1}
#         recommendations[month] = rec
#
#     return json.dumps(recommendations, indent=2)
#
# # Usage example:
# df = pd.read_csv("general.csv")
# test = pd.read_csv("processed_test.csv")
# print(generate_recommendations(df, test))