I am trying to plot the following sample data where a plane is worked on for (c_num days (x)) and how much someone works on that plane a day (MTC_Daily_Lbr_percent (y)).I created the below coding where I tried to do curve fit but I am very new to curve fit and Matplotlib. Is there any way to take this data and plot a histogram? I know that its supposed to be a left skewed bell (with maybe another slight upwards motion towards the end date)Also, is there some type of way to have an equation produced? That way I can forecast?
The data was too large so I can putting it in the comments below
import pandas as pd
import numpy as np
import datetime as dt
from datetime import timedelta
from datetime import datetime
from dateutil.relativedelta import relativedelta
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from numpy import arange
data={}
df=pd.DataFrame(data)
df1=df.copy()
df1["Tot_Lbr_Hrs"] = df1["Tot_Lbr_Hrs"].astype(float)
df1["Daily_Tot_Lbr_Hrs"] = df1["Daily_Tot_Lbr_Hrs"].astype(float)
df1["Day_Counter"] = df1["Day_Counter"].astype(int)
df1["MTC_Daily_Lbr_percent"] = df1["MTC_Daily_Lbr_percent"].astype(float)
# c_days: the number of days the plane was getting fixed. removes days from cdays. divide daycounter by the number in cdays
df1['Start_Date'] = pd.to_datetime(df1['Start_Date']).dt.date
df1['End_Date'] = pd.to_datetime(df1['End_Date']).dt.date
df1["c_days"] = (df1["End_Date"] - df1["Start_Date"]) + timedelta(days=1)
df1["c_days"] = df1["c_days"].astype(str)
df1['c_num'] = df1["c_days"].str.split(" ", n=1, expand=True)[0].astype(int)
df1['day_normalized']= df1['Day_Counter'] / df1['c_num']
#There are a select few bow where the start/end cdays does not equal the labor days. We are removing those from further analyses
optimal = df1.loc[(df1['day_normalized'] <= 1)]
optimal = df1.loc[(df1['day_normalized'] <= 1)]
xpoints= optimal['day_normalized']
ypoints= optimal['MTC_Daily_Lbr_percent']
plt.scatter(xpoints, ypoints)
plt.show()
x=xpoints.values
y=ypoints.values
# calculate polynomial
z = np.polyfit(x, y, 3)
f = np.poly1d(z)
# calculate new x's and y's
x_new = np.linspace(x[0], x[-1], 50)
y_new = f(x_new)
plt.plot(x,y,'o', x_new, y_new)
plt.xlim([x[0]-1, x[-1] + 1 ])
plt.show()