The following script works when data4 is used to create dataframe, hours_date_match_barc. It breaks when I want to use data3 and I believe it is because it is not able to add more rows to ‘out‘.
ValueError: Length of values () does not match length of index ()
I really need to be able to use data3 because I need my compress_and_process_group function to be able to loop over all the rows in hours_date_match_barc.
I am looking for help to edit my function to loop over each row in hours_date_match_barc and compare it to laborcompl_ref. I tried to add a index += 1 and a continue statement but that didn’t work. I’m not sure where to tell my code to move on to the next index, row in hours_date_match_barc. It is also important to note that my output should at least have more rows since there is multiple instance of BARC in data4 but it doesn’t do that as well.
references to learn how to do this is welcome as well.
Here is my code:
import pandas as pd
import numpy as np
import math
pd.set_option('display.max_rows', None)
data2={
"ID": [ "BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR"],
"PHASENAME": [ "C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C",],
"DAY_COUNTER": [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100],
"DAILY_LABOR_PERCENT": [0.002,0.002,0.002,0.005,0.006,0.009,0.01,0.01,0.01,0.011,0.012,0.012,0.012,0.012,0.012,0.012,0.012,0.012,0.012,0.012,0.013,0.013,0.013,0.013,0.013,0.014,0.014,0.014,0.013,0.013,0.015,0.015,0.014,0.014,0.014,0.012,0.011,0.011,0.011,0.012,0.012,0.012,0.013,0.013,0.013,0.012,0.012,0.012,0.011,0.011,0.011,0.011,0.01,0.01,0.01,0.01,0.009,0.009,0.009,0.009,0.008,0.008,0.008,0.008,0.009,0.009,0.009,0.009,0.009,0.009,0.008,0.008,0.009,0.009,0.009,0.009,0.009,0.009,0.009,0.009,0.008,0.007,0.007,0.007,0.005,0.004,0.004,0.005,0.005,0.005,0.005,0.004,0.005,0.005,0.005,0.006,0.008,0.008,0.009,0.009],
"compl_ref_index" : ["BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC",],
}
data3={
"ID": ["FOO","FOO","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR"],
"PHASENAME": ["L","L","C","C","C","C","C","C","C","C","C","C","C","C","C","C","C"],
"C_DAYS": [17, 17, 11, 11, 11, 11, 13, 13, 13, 13, 12, 12, 12, 12, 14, 14, 14,],
"Multi_Factor": [5.882353,5.882353,9.090909,9.090909,9.090909,9.090909,7.692308,7.692308,7.692308,7.692308,8.333333,8.333333,8.333333,8.333333,7.142857,7.142857,7.142857],
"compl_ref_index" : ["FOOL","FOOL","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC", "BARC",]
}
data4={
"ID": ["BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR","BAR"],
"PHASENAME": ["C","C","C","C","C","C","C","C","C","C","C","C","C","C","C"],
"C_DAYS": [11, 11, 11, 11, 13, 13, 13, 13, 12, 12, 12, 12, 14, 14, 14,],
"Multi_Factor": [9.090909,9.090909,9.090909,9.090909,7.692308,7.692308,7.692308,7.692308,8.333333,8.333333,8.333333,8.333333,7.142857,7.142857,7.142857],
"compl_ref_index" : ["BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC","BARC", "BARC",]
}
laborcompl_ref=pd.DataFrame(data2)
hours_date_match_barc=pd.DataFrame(data3)
def compress_and_process_group(group):
# check if hours_date_match["compl_ref_index"] is in row["compl_ref_index"]
for index, row in hours_date_match_barc.iterrows():
if row["compl_ref_index"] in group["compl_ref_index"].values:
# perform action below
n = hours_date_match_barc["C_DAYS"].iloc[0]
mf = (
hours_date_match_barc["Multi_Factor"].mean().astype(int)
)
first_day = group["DAY_COUNTER"].iloc[0]
rep = np.tile(group.values, (n // len(group) + 1, 1))
out = pd.DataFrame(rep, columns=group.columns).iloc[:n]
out["DAY_COUNTER"] = range(first_day, n + first_day)
out["C_DAYS"] = n
labor_values = group["DAILY_LABOR_PERCENT"].values
print(f"found id {row['compl_ref_index']} in second dataset")
for i in range(0, len(out)):
sum_of_rows = np.add.reduceat(labor_values, range(0, len(labor_values), mf))
selected_data = sum_of_rows[:n]
out["NEW_LBR_COMPL"] = np.nan
return out
index += 1
print(f"done")
# Process each group separately and concatenate the results
result = pd.concat([compress_and_process_group(group) for _, group in laborcompl_ref.groupby(["ID", "PHASENAME"])])