I have a pandas dataframe which I am trying to sort on the basis of values in a column, but the sorting is not alphabetical. The sorting is based on a “sorter” list (i.e. a list which gives the order in which values should be sorted).
However, I am getting error when I do this.
executable code below:
import pandas as pd
import numpy as np
df = pd.DataFrame({
'JDate':["2022-01-31","2022-12-05","2023-11-10","2023-12-03","2024-01-16","2024-01-06","2011-01-04"],
# 'Month':[1,12,11,12,1,1],
'Code':[None,'John Johnson',np.nan,'John Smith','Mary Williams','ted bundy','George Lucas'],
'Unit Price':[np.nan,200,None,56,75,65,60],
'Quantity':[1500, 140000, 1400000, 455, 648, 759,1000],
'Amount':[100, 10000, 100000, 5, 48, 59,449],
'Invoice':['soccer','basketball','baseball','football','baseball','ice hockey','football'],
'energy':[100.,100,100,54,98,3,45],
'Category':['alpha','bravo','kappa','alpha','bravo','bravo','kappa']
})
df["JDate"] = pd.to_datetime(df["JDate"])
df["JYearMonth"] = df['JDate'].dt.to_period('M')
index_to_use = ['Category','Code','Invoice','Unit Price']
values_to_use = ['Amount']
columns_to_use = ['JYearMonth']
df2 = df.pivot_table(index=index_to_use,
values=values_to_use,
columns=columns_to_use)
df4 = df2['Amount'].reset_index()
# setting up the sorter
sorter=['football','ice hockey','basketball','baseball']
#trying the categorical method
df4['Invoice'] = df['Invoice'].astype('Category').cat.set_categories(sorter)
df4.sort_values(['Invoice'],inplace=True)
df3 = df2.xs('alpha',level='Category')
df3 = df3.reset_index() #this prevents merging of rows
writer= pd.ExcelWriter(
"t2test11.xlsx",
engine='xlsxwriter'
)
df.to_excel(writer,sheet_name="t2",index=True)
df2.to_excel(writer,sheet_name="t2test",index=True)
df4.to_excel(writer,sheet_name="t2testFixHeader",index=True)
df3.to_excel(writer,sheet_name="t2filter",index=True)
writer.close()