MY REQUIREMENT IS TO HAVE THE OUTPUT of the Userdefined Functions open_duration(open_time): OR res_duration(res_time): in the new COLUMN [‘Open Resolve Duration’] in the selected_columns_df
import pandas as pdpd.options.mode.copy_on_write = True
def calculate_totduration(sr_creation, sr_resolved):
start_date = pd.to_datetime(sr_creation) end_date = pd.to_datetime(sr_resolved)
total_minutes = difference.total_seconds()/60 return total_minutes
def open_duration(open_time):
print(type(open_time)) #sr_open_time = open_time.astype(int)
sr_open_time = pd.to_numeric(open_time)
if sr_open_time <=240:
return('Open < 4 hours')
elif sr_open_time <=480:
return('Open from 4 hrs to 8 hrs')
elif sr_open_time <=720:
return('Open from 8 hrs to 12 hrs')
elif sr_open_time <=1440:
return('Open 12 hrs to 24 hrs')
elif sr_open_time <=2880:
return('Open 24 to 48 hrs')
else: return('OPEN > 48 hrs')
def res_duration(res_time):
sr_res_time = pd.to_numeric(res_time)
if sr_res_time <= 240:
return('Within 4 hours')
elif sr_res_time <=480:
return('Bet 4 hrs to 8 hrs')
elif sr_res_time <=720:
return('8 hrs to 12 hrs')
elif sr_res_time <=1440:
return('12 hrs to 24 hrs')
else:
return('> 24 hrs')
df = pd.read_csv('C:\Users\XXXXXX\Desktop\PythonCodes\Testing10.csv',sep=',',skiprows=0,low_memory=False,encoding='utf-8')
filtered_df = df[(df['Region/Circle']=='IND')]
selected_columns_df = filtered_df[['Product','Customer Name','SI Number','SI Name','SR Number','SR Status','Region/Circle','Case Type','Source','SR Creation Time','Resolved Time','Total Duration SUM','Case Type','Ser Segment']]
selected_columns_df['Total Minutes'] = selected_columns_df.apply(lambda row:calculate_totduration(row['SR Creation Time'],row['Resolved Time']),axis=1)
if selected_columns_df['SR Status'].str.lower == 'open' or selected_columns_df['SR Status'].str.lower == 're-open':
selected_columns_df['Open Resolve Duration'] = selected_columns_df.apply(lambda row:open_duration(calculate_totduration(row['SR Creation Time'],pd.to_datetime('today'))),axis=1)
else:
selected_columns_df['Open Resolve Duration'] = selected_columns_df.apply(lambda row :res_duration(row['Total Minutes']))
#selected_columns_df['Open Resolve Duration'] = selected_columns_df.apply(lambda row :res_duration(lambda row :calculate_totduration(row['SR Creation Time'],row['Resolved Time'])))
selected_columns_df.to_csv('MYoutfile4.csv')
print('FILE WRITTEN')
print(pd.to_datetime('today'))
But I am getting the following error:
File “C:UsersxXXXXXxAppDataLocalProgramsPythonPython312Libsite-packagespandascoreindexesbase.py”, line 3805, in get_loc
return self._engine.get_loc(casted_key)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File “index.pyx”, line 167, in pandas._libs.index.IndexEngine.get_loc
File “index.pyx”, line 175, in pandas._libs.index.IndexEngine.get_loc
File “pandas_libsindex_class_helper.pxi”, line 70, in pandas._libs.index.Int64Engine._check_type
KeyError: ‘Total Minutes’
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File “c:UsersxXXXXXxDesktopPythonCodesDATASETS.py”, line 71, in
selected_columns_df[‘Open Resolve Duration’] = selected_columns_df.apply(lambda row :res_duration(row[‘Total Minutes’]))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File “C:UsersxXXXXXxAppDataLocalProgramsPythonPython312Libsite-packagespandascoreframe.py”, line 10374, in apply
return op.apply().finalize(self, method=”apply”)
^^^^^^^^^^
File “C:UsersxXXXXXxAppDataLocalProgramsPythonPython312Libsite-your text
packagespandascoreapply.py”, line 916, in apply
return self.apply_standard()
^^^^^^^^^^^^^^^^^^^^^
File “C:UsersxXXXXXxAppDataLocalProgramsPythonPython312Libsite-packagespandascoreapply.py”, line 1063, in apply_standard
results, res_index = self.apply_series_generator()
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File “C:UsersxXXXXXxAppDataLocalProgramsPythonPython312Libsite-packagespandascoreapply.py”, line 1081, in apply_series_generator
results[i] = self.func(v, *self.args, **self.kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File “c:UsersxXXXXXxDesktopPythonCodesDATASETS.py”, line 71, in
selected_columns_df[‘Open Resolve Duration’] = selected_columns_df.apply(lambda row :res_duration(row[‘Total Minutes’]))
~~~^^^^^^^^^^^^^^^^^
File “C:UsersxXXXXXxAppDataLocalProgramsPythonPython312Libsite-packagespandascoreseries.py”, line 1121, in getitem
return self._get_value(key)
^^^^^^^^^^^^^^^^^^^^
File “C:UsersxXXXXXxAppDataLocalProgramsPythonPython312Libsite-packagespandascoreseries.py”, line 1237, in _get_value
loc = self.index.get_loc(label)
^^^^^^^^^^^^^^^^^^^^^^^^^
File “C:UsersxXXXXXxAppDataLocalProgramsPythonPython312Libsite-packagespandascoreindexesbase.py”, line 3812, in get_loc
raise KeyError(key) from err
KeyError: ‘Total Minutes’
Jaffer is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.