import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import re
import matplotlib.colors as mcolors
import matplotlib.cm as cm
from scipy.optimize import curve_fit

def mse_model(data, A, B, alpha):
    return A + B / data**alpha

def parse_text(text):
    pattern = r"interto(\d+).*?_p(\d+).*?mse:(\d+\.\d+)"
    matches = re.findall(pattern, text, re.DOTALL)
    final_list = [(int(m[0]), int(m[1]), float(m[2])) for m in matches]
    return [(inter, perc, mse) for inter, perc, mse in final_list if perc >= 10 and perc!= 50 and inter >= 32 and inter<=336]  # Adjust filtering if necessary

def read_data(file_path):
    with open(file_path, 'r') as file:
        text = file.read()
    results = parse_text(text)
    return pd.DataFrame(results, columns=['Interto', 'Percentage', 'MSE'])

def plot_data(df):
    norm = mcolors.LogNorm(vmin=df['Percentage'].min(), vmax=df['Percentage'].max())
    scalar_map = cm.ScalarMappable(norm=norm, cmap=cm.plasma)

    plt.figure(figsize=(12, 8))
    grouped = df.groupby('Percentage')
    
    for perc, group in grouped:
        group.sort_values('Interto', inplace=True)
        color = scalar_map.to_rgba(perc)
        plt.scatter(group['Interto'], group['MSE'], label=f'Percentage {perc}%', color=color, alpha=0.6)
        
        # Fit the model to the data
        # print(group)
        popt, pcov = curve_fit(mse_model, group['Interto'], group['MSE'], maxfev=10000)
        A, B, alpha = popt
        std_alpha = np.sqrt(np.diag(pcov))[2]  # Standard deviation of alpha
        
        # Create a smooth line for the model
        smooth_data = np.linspace(group['Interto'].min(), group['Interto'].max(), 500)
        smooth_mse = mse_model(smooth_data, *popt)
        plt.plot(smooth_data, smooth_mse, color=color, label=f'Fit: α={alpha:.2f}±{std_alpha:.2f}')
    
    plt.title('MSE by Percentage and Interto with Nonlinear Fit')
    plt.xlabel('Interto')
    plt.ylabel('MSE')
    cbar = plt.colorbar(scalar_map, label='Percentage of Data Used')
    tick_locs = np.unique(df['Percentage'])
    cbar.set_ticks(tick_locs)
    cbar.set_ticklabels(tick_locs)
    
    plt.legend()
    plt.grid(True)
    plt.savefig("042401_trafficInterto_with_fit_interto.png")

if __name__ == "__main__":
    file_path = 'newresult_traffic.txt'
    df = read_data(file_path)
    plot_data(df)
