My goal is to create a DataFrame with unique name as Col A and a sumif of the original DataFrame from type, and the current iloc of the new DataFrame.
OLD_DF
| Type | nameDest | Column A | Column B |
| Payment | A | —— | ———|
| Debt | A | —— | ———|
| Payment | A | —— | ———|
| Payment | A | —— | ———|
| Payment | B | —— | ———|
| Debt | B | —— | ———|
| Debt | B | —— | ———|
| Payment | B | —— | ———|
New DF
| NameDest | Payment | Debt |
| A | 3 | 1 |
| B | 2 | 2 |
I tried to stay away from loops, but kept getting an error (error below the code)
def exercise_custom(df):
cust_df = pd.DataFrame(df.nameDest.dropna().unique(), columns=['DestName'])
cust_df['Payment_Count'] = len(df[(df['type']=='PAYMENT') & (df['nameDest']==cust_df.DestName)])
display(cust_df.head(5))
return cust_df
pass
The Traceback gives me
ValueError Traceback (most recent call last)
Cell In[72], line 56
52 return "TODO"
54 pass
---> 56 visual_custom(df)
Cell In[72], line 40, in visual_custom(df)
38 def visual_custom(df):
---> 40 exercise_custom(df)
43 fig, axs = plt.subplots(1, figsize=(6,10))
44 #updated to show bottom labels correctly & chg color
Cell In[72], line 6, in exercise_custom(df)
1 def exercise_custom(df):
2 #X
3 #for index, row in df:
4 #cust_df = df['nameDest'].unique()
5 cust_df = pd.DataFrame(df.nameDest.dropna().unique(), columns=['DestName'])
----> 6 cust_df['Payment_Count'] = len(df[(df['type']=='PAYMENT') & (df['nameDest']==cust_df.DestName)])
7 display(cust_df.head(5))
13 #X= pd.DataFrame(df['nameDest'].unique())
14 #df[df_payment_count] = len(df[(df['type']=='PAYMENT') & (df['isFraud']==1)])
15
(...)
32 # 'Cash In':len(df[(df['type']=='CASH_IN') & (df['isFraud']==1)])}
33 # }
File ~/anaconda3/envs/forageenv/lib/python3.9/site-packages/pandas/core/ops/common.py:76, in _unpack_zerodim_and_defer.<locals>.new_method(self, other)
72 return NotImplemented
74 other = item_from_zerodim(other)
---> 76 return method(self, other)
File ~/anaconda3/envs/forageenv/lib/python3.9/site-packages/pandas/core/arraylike.py:40, in OpsMixin.__eq__(self, other)
38 @unpack_zerodim_and_defer("__eq__")
39 def __eq__(self, other):
---> 40 return self._cmp_method(other, operator.eq)
File ~/anaconda3/envs/forageenv/lib/python3.9/site-packages/pandas/core/series.py:6105, in Series._cmp_method(self, other, op)
6102 res_name = ops.get_op_result_name(self, other)
6104 if isinstance(other, Series) and not self._indexed_same(other):
-> 6105 raise ValueError("Can only compare identically-labeled Series objects")
6107 lvalues = self._values
6108 rvalues = extract_array(other, extract_numpy=True, extract_range=True)
ValueError: Can only compare identically-labeled Series objects
One way I thought about fixing this is by looping, but its so slow and this needs to be quicker. (this was my last option cause all I was able to find was ‘Loops are bad’).
def exercise_custom(df):
payment_counts=[]
cust_df = pd.DataFrame(df.nameDest.dropna().unique(), columns=['DestName'])
for name in cust_df['DestName']:
filtered_df = df[(df['type'] == 'PAYMENT') & (df['nameDest'] == name)].dropna().reset_index(drop=True)
if len(filtered_df) == 0:
payment_count =0
else:
payment_count=len(filtered_df)
payment_count=len(filtered_df)
payment_counts.append(payment_count)
cust_df['Payment_Count'] = payment_counts
display(cust_df.head(5))
pass