I have this below code for marketing mix modelling application with thousands records and hundreds of media/control variables
functions {
// the Hill function
real Hill(real t, real ec, real slope) {
return 1 / (1 + (t / ec)^(-slope));
}
// the adstock transformation with a vector of weights
real Adstock(row_vector t, row_vector weights) {
return dot_product(t, weights) / sum(weights);
}
}
data {
// the total number of observations
int<lower=1> N;
// the total number of training observations
int<lower=1> T;
// the total number of holdout observations
int<lower=0> H;
int<lower=0> n_interactions;
int interaction_left[n_interactions];
int interaction_right[n_interactions];
int<lower=0> tau_dist_type;
int<lower=0> noise_var_dist_type;
real tau_dist_mean;
real<lower=0> tau_dist_sd;
real noise_var_dist_mean;
real<lower=0> noise_var_dist_sd;
// training data indexes
int training_index[T];
// holdout data indexes
int holdout_index[H];
real Y_train[T];
real Y_holdout[H];
// the maximum duration of lag effect, in weeks
int<lower=1> max_lag;
// the number of media channels
int<lower=1> num_media;
row_vector[num_media] media_prior_dist_type;
row_vector[num_media] media_prior_mean;
row_vector[num_media] media_prior_sd;
row_vector[num_media] retain_rate_dist_type;
row_vector[num_media] retain_rate_dist_mean;
row_vector[num_media] retain_rate_dist_sd;
row_vector[num_media] delay_dist_type;
row_vector[num_media] delay_dist_mean;
row_vector[num_media] delay_dist_sd;
row_vector[num_media] slope_dist_type;
row_vector[num_media] slope_dist_mean;
row_vector[num_media] slope_dist_sd;
row_vector[num_media] ec_dist_type;
row_vector[num_media] ec_dist_mean;
row_vector[num_media] ec_dist_sd;
// a vector of 0 to max_lag - 1
//row_vector[max_lag] lag_vec;
// 3D array of media variables
row_vector[max_lag] X_media[N, num_media];
// the number of other control variables
int<lower=1> num_ctrl;
row_vector[num_ctrl] ctrl_prior_dist_type;
row_vector[num_ctrl] ctrl_prior_mean;
row_vector[num_ctrl] ctrl_prior_sd;
// a matrix of control variables
row_vector[num_ctrl] X_ctrl[N];
row_vector<lower=0>[num_media] slope;
}
parameters {
// residual variance
real<lower=0> noise_var;
// the intercept
real tau;
// the coefficients for media variables
vector<lower=0>[num_media] beta_medias;
// coefficients for other control variables
vector[num_ctrl] gamma_ctrl;
// the retention rate and delay parameter for the adstock transformation of
// each media
vector<lower=0,upper=1>[num_media] retain_rate;
//vector<lower=0,upper=max_lag-1>[num_media] delay;
// ec50 and slope for Hill function of each media
vector<lower=0,upper=1>[num_media] ec;
vector<lower=0>[n_interactions] beta_interactions;
// vector<lower=0>[num_media] slope;
}
transformed parameters {
// a vector of the mean response
real mu[T];
// the cumulative media effect after adstock
real cum_effect;
// the cumulative media effect after adstock, and then Hill transformation
row_vector[num_media] cum_effects_hill[T];
row_vector[max_lag] lag_weights;
row_vector[n_interactions] cum_effects_hill_interaction[T];
for (nn in 1:T) {
for (media in 1 : num_media) {
for (lag in 1 : max_lag) {
lag_weights[lag] <- pow(retain_rate[media], (lag - 1) );
}
cum_effect <- Adstock(X_media[training_index[nn], media], lag_weights);
cum_effects_hill[nn, media] <- Hill(cum_effect, ec[media], slope[media]);
}
if(n_interactions > 0)
for (inter in 1:n_interactions){
cum_effects_hill_interaction[nn,inter] = cum_effects_hill[nn,interaction_left[inter]]*cum_effects_hill[nn,interaction_right[inter]];
}
if(n_interactions > 0)
mu[nn] <- tau +
dot_product(cum_effects_hill[nn], beta_medias) +
dot_product(X_ctrl[training_index[nn]], gamma_ctrl) +
dot_product(cum_effects_hill_interaction[nn],beta_interactions);
else
mu[nn] <- tau +
dot_product(cum_effects_hill[nn], beta_medias) +
dot_product(X_ctrl[training_index[nn]], gamma_ctrl);
}
}
model {
if (tau_dist_type == 0)
tau ~ uniform(tau_dist_mean,tau_dist_sd);
else if (tau_dist_type == 1)
tau ~ normal(tau_dist_mean,tau_dist_sd);
else if (tau_dist_type == 2)
tau ~ beta(tau_dist_mean,tau_dist_sd);
else if (tau_dist_type == 3)
tau ~ gamma(tau_dist_mean,tau_dist_sd);
else if (tau_dist_type == 4)
tau ~ inv_gamma(tau_dist_mean,tau_dist_sd);
for (media_index in 1 : num_media) {
if (media_prior_dist_type[media_index] == 0)
beta_medias[media_index] ~ uniform(media_prior_mean[media_index],media_prior_sd[media_index]);
else if (media_prior_dist_type[media_index] == 1)
beta_medias[media_index] ~ normal(media_prior_mean[media_index],media_prior_sd[media_index]);
else if (media_prior_dist_type[media_index] == 2)
beta_medias[media_index] ~ beta(media_prior_mean[media_index],media_prior_sd[media_index]);
else if (media_prior_dist_type[media_index] == 3)
beta_medias[media_index] ~ gamma(media_prior_mean[media_index],media_prior_sd[media_index]);
else if (media_prior_dist_type[media_index] == 4)
beta_medias[media_index] ~ inv_gamma(media_prior_mean[media_index],media_prior_sd[media_index]);
if (retain_rate_dist_type[media_index] == 0)
retain_rate[media_index] ~ uniform(retain_rate_dist_mean[media_index],retain_rate_dist_sd[media_index]);
else if (retain_rate_dist_type[media_index] == 1)
retain_rate[media_index] ~ normal(retain_rate_dist_mean[media_index],retain_rate_dist_sd[media_index]);
else if (retain_rate_dist_type[media_index] == 2)
retain_rate[media_index] ~ beta(retain_rate_dist_mean[media_index],retain_rate_dist_sd[media_index]);
else if (retain_rate_dist_type[media_index] == 3)
retain_rate[media_index] ~ gamma(retain_rate_dist_mean[media_index],retain_rate_dist_sd[media_index]);
else if (retain_rate_dist_type[media_index] == 4)
retain_rate[media_index] ~ inv_gamma(retain_rate_dist_mean[media_index],retain_rate_dist_sd[media_index]);
if (slope_dist_type[media_index] == 0)
slope[media_index] ~ uniform(slope_dist_mean[media_index],slope_dist_sd[media_index]);
else if (slope_dist_type[media_index] == 1)
slope[media_index] ~ normal(slope_dist_mean[media_index],slope_dist_sd[media_index]);
else if (slope_dist_type[media_index] == 2)
slope[media_index] ~ beta(slope_dist_mean[media_index],slope_dist_sd[media_index]);
else if (slope_dist_type[media_index] == 3)
slope[media_index] ~ gamma(slope_dist_mean[media_index],slope_dist_sd[media_index]);
else if (slope_dist_type[media_index] == 4)
slope[media_index] ~ inv_gamma(slope_dist_mean[media_index],slope_dist_sd[media_index]);
if (ec_dist_type[media_index] == 0)
ec[media_index] ~ uniform(ec_dist_mean[media_index],ec_dist_sd[media_index]);
else if (ec_dist_type[media_index] == 1)
ec[media_index] ~ normal(ec_dist_mean[media_index],ec_dist_sd[media_index]);
else if (ec_dist_type[media_index] == 2)
ec[media_index] ~ beta(ec_dist_mean[media_index],ec_dist_sd[media_index]);
else if (ec_dist_type[media_index] == 3)
ec[media_index] ~ gamma(ec_dist_mean[media_index],ec_dist_sd[media_index]);
else if (ec_dist_type[media_index] == 4)
ec[media_index] ~ inv_gamma(ec_dist_mean[media_index],ec_dist_sd[media_index]);
}
for (ctrl_index in 1 : num_ctrl) {
if (ctrl_prior_dist_type[ctrl_index] == 0)
gamma_ctrl[ctrl_index] ~ uniform(ctrl_prior_mean[ctrl_index],ctrl_prior_sd[ctrl_index]);
else if (ctrl_prior_dist_type[ctrl_index] == 1)
gamma_ctrl[ctrl_index] ~ normal(ctrl_prior_mean[ctrl_index],ctrl_prior_sd[ctrl_index]);
else if (ctrl_prior_dist_type[ctrl_index] == 2)
gamma_ctrl[ctrl_index] ~ beta(ctrl_prior_mean[ctrl_index],ctrl_prior_sd[ctrl_index]);
else if (ctrl_prior_dist_type[ctrl_index] == 3)
gamma_ctrl[ctrl_index] ~ gamma(ctrl_prior_mean[ctrl_index],ctrl_prior_sd[ctrl_index]);
else if (ctrl_prior_dist_type[ctrl_index] == 4)
gamma_ctrl[ctrl_index] ~ inv_gamma(ctrl_prior_mean[ctrl_index],ctrl_prior_sd[ctrl_index]);
}
if (noise_var_dist_type == 0)
noise_var ~ uniform(noise_var_dist_mean,noise_var_dist_sd);
else if (noise_var_dist_type == 1)
noise_var ~ normal(noise_var_dist_mean,noise_var_dist_sd);
else if (noise_var_dist_type == 2)
noise_var ~ beta(noise_var_dist_mean,noise_var_dist_sd);
else if (noise_var_dist_type == 3)
noise_var ~ gamma(noise_var_dist_mean,noise_var_dist_sd);
else if (noise_var_dist_type == 4)
noise_var ~ inv_gamma(noise_var_dist_mean,noise_var_dist_sd);
Y_train ~ normal(mu, sqrt(noise_var));
}
I tried to vectorized this by below code and with new version of STAN but this code getting several error, can you help me to vectorize this stan so I can reduce the run time.
Or if there is another way to reduce run time.
functions {
// The Hill function (vectorized for each media channel)
vector Hill(vector t, vector ec, vector slope) {
return 1 ./ (1 + pow((t ./ ec),(-slope)));
}
// The Adstock transformation (vectorized)
vector Adstock(vector t, vector weights) {
return (t.*weights)/ rowwise_sum(weights);
}
}
data {
// the total number of observations
int<lower=1> N;
// the total number of training observations
int<lower=1> T;
// the total number of holdout observations
int<lower=0> H;
int<lower=0> n_interactions;
array[n_interactions] int interaction_left;
array[n_interactions] int interaction_right;
int<lower=0> tau_dist_type;
int<lower=0> noise_var_dist_type;
real tau_dist_mean;
real<lower=0> tau_dist_sd;
real noise_var_dist_mean;
real<lower=0> noise_var_dist_sd;
// training data indexes
array[T] int training_index;
// holdout data indexes
array[H] int holdout_index;
array[T] real Y_train;
array[H] real Y_holdout;
// the maximum duration of lag effect, in weeks
int<lower=1> max_lag;
// the number of media channels
int<lower=1> num_media;
row_vector[num_media] media_prior_dist_type;
row_vector[num_media] media_prior_mean;
row_vector[num_media] media_prior_sd;
row_vector[num_media] retain_rate_dist_type;
row_vector[num_media] retain_rate_dist_mean;
row_vector[num_media] retain_rate_dist_sd;
row_vector[num_media] delay_dist_type;
row_vector[num_media] delay_dist_mean;
row_vector[num_media] delay_dist_sd;
row_vector[num_media] slope_dist_type;
row_vector[num_media] slope_dist_mean;
row_vector[num_media] slope_dist_sd;
row_vector[num_media] ec_dist_type;
row_vector[num_media] ec_dist_mean;
row_vector[num_media] ec_dist_sd;
// a vector of 0 to max_lag - 1
//row_vector[max_lag] lag_vec;
// 3D array of media variables
array[N, num_media] row_vector[max_lag] X_media;
// the number of other control variables
int<lower=1> num_ctrl;
row_vector[num_ctrl] ctrl_prior_dist_type;
row_vector[num_ctrl] ctrl_prior_mean;
row_vector[num_ctrl] ctrl_prior_sd;
// a matrix of control variables
array[N] row_vector[num_ctrl] X_ctrl;
row_vector<lower=0>[num_media] slope;
}
parameters {
// residual variance
real<lower=0> noise_var;
// the intercept
real tau;
// the coefficients for media variables
vector<lower=0>[num_media] beta_medias;
// coefficients for other control variables
vector[num_ctrl] gamma_ctrl;
// the retention rate and delay parameter for the adstock transformation of
// each media
vector<lower=0,upper=1>[num_media] retain_rate;
//vector<lower=0,upper=max_lag-1>[num_media] delay;
// ec50 and slope for Hill function of each media
vector<lower=0,upper=1>[num_media] ec;
vector<lower=0>[n_interactions] beta_interactions;
// vector<lower=0>[num_media] slope;
}
transformed parameters {
matrix[T, num_media] cum_effects; // Cumulative effects after Adstock
matrix[T, num_media] cum_effects_hill; // After Hill transformation
matrix[T, n_interactions] cum_effects_hill_interaction; // Interaction effects
vector[T] mu;
array[num_media] row_vector[max_lag] lag_weights; // Mean response
for (media in 1 : num_media) {
for (lag in 1 : max_lag) {
lag_weights[media,lag] = pow(retain_rate[media], (lag - 1) );
}
}
// Apply Adstock transformation
cum_effects = Adstock(X_media[training_index, ], lag_weights);
// Apply Hill transformation using vectorized `Hill` function
cum_effects_hill = Hill(cum_effects, ec, slope);
// Compute interaction effects if applicable
if (n_interactions > 0) {
for (inter in 1:n_interactions) {
cum_effects_hill_interaction[, inter] =
cum_effects_hill[, interaction_left[inter]] .* cum_effects_hill[, interaction_right[inter]];
}
}
// Compute mu using dot products for medias, controls, and interactions
if (n_interactions > 0) {
mu = tau +
cum_effects_hill * beta_medias +
X_ctrl[training_index, ] * gamma_ctrl +
cum_effects_hill_interaction * beta_interactions;
} else {
mu = tau +
cum_effects_hill * beta_medias +
X_ctrl[training_index, ] * gamma_ctrl;
}
}
model {
if (tau_dist_type == 0)
tau ~ uniform(tau_dist_mean,tau_dist_sd);
else if (tau_dist_type == 1)
tau ~ normal(tau_dist_mean,tau_dist_sd);
else if (tau_dist_type == 2)
tau ~ beta(tau_dist_mean,tau_dist_sd);
else if (tau_dist_type == 3)
tau ~ gamma(tau_dist_mean,tau_dist_sd);
else if (tau_dist_type == 4)
tau ~ inv_gamma(tau_dist_mean,tau_dist_sd);
for (media_index in 1 : num_media) {
if (media_prior_dist_type[media_index] == 0)
beta_medias[media_index] ~ uniform(media_prior_mean[media_index],media_prior_sd[media_index]);
else if (media_prior_dist_type[media_index] == 1)
beta_medias[media_index] ~ normal(media_prior_mean[media_index],media_prior_sd[media_index]);
else if (media_prior_dist_type[media_index] == 2)
beta_medias[media_index] ~ beta(media_prior_mean[media_index],media_prior_sd[media_index]);
else if (media_prior_dist_type[media_index] == 3)
beta_medias[media_index] ~ gamma(media_prior_mean[media_index],media_prior_sd[media_index]);
else if (media_prior_dist_type[media_index] == 4)
beta_medias[media_index] ~ inv_gamma(media_prior_mean[media_index],media_prior_sd[media_index]);
if (retain_rate_dist_type[media_index] == 0)
retain_rate[media_index] ~ uniform(retain_rate_dist_mean[media_index],retain_rate_dist_sd[media_index]);
else if (retain_rate_dist_type[media_index] == 1)
retain_rate[media_index] ~ normal(retain_rate_dist_mean[media_index],retain_rate_dist_sd[media_index]);
else if (retain_rate_dist_type[media_index] == 2)
retain_rate[media_index] ~ beta(retain_rate_dist_mean[media_index],retain_rate_dist_sd[media_index]);
else if (retain_rate_dist_type[media_index] == 3)
retain_rate[media_index] ~ gamma(retain_rate_dist_mean[media_index],retain_rate_dist_sd[media_index]);
else if (retain_rate_dist_type[media_index] == 4)
retain_rate[media_index] ~ inv_gamma(retain_rate_dist_mean[media_index],retain_rate_dist_sd[media_index]);
if (slope_dist_type[media_index] == 0)
slope[media_index] ~ uniform(slope_dist_mean[media_index],slope_dist_sd[media_index]);
else if (slope_dist_type[media_index] == 1)
slope[media_index] ~ normal(slope_dist_mean[media_index],slope_dist_sd[media_index]);
else if (slope_dist_type[media_index] == 2)
slope[media_index] ~ beta(slope_dist_mean[media_index],slope_dist_sd[media_index]);
else if (slope_dist_type[media_index] == 3)
slope[media_index] ~ gamma(slope_dist_mean[media_index],slope_dist_sd[media_index]);
else if (slope_dist_type[media_index] == 4)
slope[media_index] ~ inv_gamma(slope_dist_mean[media_index],slope_dist_sd[media_index]);
if (ec_dist_type[media_index] == 0)
ec[media_index] ~ uniform(ec_dist_mean[media_index],ec_dist_sd[media_index]);
else if (ec_dist_type[media_index] == 1)
ec[media_index] ~ normal(ec_dist_mean[media_index],ec_dist_sd[media_index]);
else if (ec_dist_type[media_index] == 2)
ec[media_index] ~ beta(ec_dist_mean[media_index],ec_dist_sd[media_index]);
else if (ec_dist_type[media_index] == 3)
ec[media_index] ~ gamma(ec_dist_mean[media_index],ec_dist_sd[media_index]);
else if (ec_dist_type[media_index] == 4)
ec[media_index] ~ inv_gamma(ec_dist_mean[media_index],ec_dist_sd[media_index]);
}
for (ctrl_index in 1 : num_ctrl) {
if (ctrl_prior_dist_type[ctrl_index] == 0)
gamma_ctrl[ctrl_index] ~ uniform(ctrl_prior_mean[ctrl_index],ctrl_prior_sd[ctrl_index]);
else if (ctrl_prior_dist_type[ctrl_index] == 1)
gamma_ctrl[ctrl_index] ~ normal(ctrl_prior_mean[ctrl_index],ctrl_prior_sd[ctrl_index]);
else if (ctrl_prior_dist_type[ctrl_index] == 2)
gamma_ctrl[ctrl_index] ~ beta(ctrl_prior_mean[ctrl_index],ctrl_prior_sd[ctrl_index]);
else if (ctrl_prior_dist_type[ctrl_index] == 3)
gamma_ctrl[ctrl_index] ~ gamma(ctrl_prior_mean[ctrl_index],ctrl_prior_sd[ctrl_index]);
else if (ctrl_prior_dist_type[ctrl_index] == 4)
gamma_ctrl[ctrl_index] ~ inv_gamma(ctrl_prior_mean[ctrl_index],ctrl_prior_sd[ctrl_index]);
}
if (noise_var_dist_type == 0)
noise_var ~ uniform(noise_var_dist_mean,noise_var_dist_sd);
else if (noise_var_dist_type == 1)
noise_var ~ normal(noise_var_dist_mean,noise_var_dist_sd);
else if (noise_var_dist_type == 2)
noise_var ~ beta(noise_var_dist_mean,noise_var_dist_sd);
else if (noise_var_dist_type == 3)
noise_var ~ gamma(noise_var_dist_mean,noise_var_dist_sd);
else if (noise_var_dist_type == 4)
noise_var ~ inv_gamma(noise_var_dist_mean,noise_var_dist_sd);
Y_train ~ normal(mu, sqrt(noise_var));
}
Thanks in advance
1