i am looking for a way to reduce the time calculation of multiple functions calculations on a pandas dataframe ie i am trying to do.
It takes more than 3 minutes on my computer. Does it exist an equivalent to pandarallel with aggregate function or something equivalent to boost calculation. pandarallel does not seem to work with aggregate.
This is the code i use:
import pandas as pd
import numpy as np
from empyrical import (cagr,
annual_volatility,
max_drawdown,
)
import warnings
warnings.filterwarnings('ignore')
# Exemple
N = 10000
mu = 0.1/252
sigma = 0.15/np.sqrt(250)
# Créer un DataFrame vide pour stocker les séries temporelles
date_range = pd.date_range(start='1990-01-01', end='2020-01-01', freq='B')
# Créer un DataFrame vide avec l'index de dates
df = pd.DataFrame(index=date_range)
# Générer les séries temporelles
for i in range(N):
series = (100+ np.random.normal(mu, sigma, len(date_range)).cumsum())
df[f"Série {i+1}"] = series
tab = (df
.sort_index()
.aggregate([
# Date de première/dernière VL
lambda x: x.first_valid_index().date(),
lambda x: x.last_valid_index().date(),
# Mesure de performance
## Perf total
lambda x: 100*cagr(x[-52:].pct_change(), period='weekly') if x[-52:].isnull().sum() <= 1 else np.nan,
lambda x: 100*cagr(x[-3*52:].pct_change(), period='weekly'),
lambda x: 100*cagr(x[-5*52:].pct_change(), period='weekly'),
# Mesure de risque
# Volatilité
lambda x: 100*annual_volatility(x[-52:].pct_change(), period='weekly'),
lambda x: 100*annual_volatility(x[-3*52:].pct_change(), period='weekly'),
lambda x: 100*annual_volatility(x[-5*52:].pct_change(), period='weekly'),
## Max DD
lambda x: 100*max_drawdown(x[-52:].pct_change()),
lambda x: 100*max_drawdown(x[-3*52:].pct_change()),
lambda x: 100*max_drawdown(x[-5*52:].pct_change()),
],)
.set_axis([
'Date de début', 'Date de fin',
'Perf 1 an', 'Perf 3 ans', 'Perf 5 ans',
'Volat 1 an', 'Volat 3 ans', 'Volat 5 ans',
'Max DD 1 an', 'Max DD 3 ans', 'Max DD 5 ans',
])
.T
.dropna()
)
tab