I am an R user trying to learn python.
I used to use the source() function in R to execute a script inside another script and I would like to do the same in python. Thank you in advance for your help.
Please see an example below.
script “common step.R” : the script I want to execute in the script “analysis1.R”
file_ID_2 = file_ID %>% group_by(ID) %>%
mutate(n_val = n())
data_2 = data %>%
left_join(file_ID_2, by=c("ID")) %>%
mutate(n_val = ifelse(!is.na(n_val), n_val, 0)) %>%
mutate(cdk=ifelse(n_val>0, "Yes", "No")) %>%
distinct(ID, .keep_all = T) %>%
filter(!is.na(.data[[variable]])) %>%
rename(variable := !!variable)
script ‘analysis1″ : script where I load my files/packages and were I perform my first analysis using the script ‘common step.R” that will do common steps for several analysis using the same raw data
library(dplyr)
library(tidyverse)
# data
#####################
file_ID = data.frame(ID=c("I1", "I1", "I2", "I3", "I6", "I9" ),
cl1=c(7,4,5,8,7, 1))
data = data.frame(ID=c("I1", "I2", "I3", "I4", "I5", "I6", "I7", "I8", "I9" ),
Sex = c("F", "F", "F", "M", "F", "M", "M", "F", "F"),
groupe = c("A", "A", "A", "B", "B", "B", "C", "C", "C"),
var1=c(8,9,6,6,NA,5,6,2, 5),
var2=c(7,12,5,9,9,11,8,2, 7),
var3=c(NA,4,7,9,5,10,6,NA, 4))
# variable that i want to analyse
####################################
variable="var1"
# Execution of the script "common step.R" that will use the data loaded in this script
#########################################################################################
source("./common step.R")
# Analysis using the output of the script previously executed
#########################################################################################
data_3 = data_2 %>%
group_by(Sex, groupe) %>%
mutate(mean_value = mean(variable))
......
Script “common_step.ipynb”
file_ID["count"] = file_ID.groupby('ID')['ID'].transform('count')
data_2 = pd.merge(data, file_ID, how="left", on="ID")
data_2["count"] = data_2["count"].replace(np.nan, 0)
data_2['cdk'] = ['Yes' if count > 0 else 'No' for count in data_2['count']]
data_2 = data_2.drop_duplicates(subset=['ID'])
data_2 = data_2.rename(columns = {variable : 'variable'})
data_2=data_2[ ~ data_2['variable'].isna()]
script ‘analysis1″ in python
import pandas as pd
import numpy as np
# data
#####################
df2 = pd.DataFrame(
{
"ID": ["I1", "I2", "I3", "I4", "I5", "I6", "I7", "I8", "I9"],
"Sex" : ["F", "F", "F", "M", "F", "M", "M", "F", "F"],
"groupe" : ["A", "A", "A", "B", "B", "B", "C", "C", "C"],
"var1" : [8,9,6,6,np.nan,5,6,2, 5],
"var2" : [7,12,5,9,9,11,8,2, 7],
"var3" : [np.nan,4,7,9,5,10,6,np.nan, 4]
}
)
file_ID = pd.DataFrame(
{
"ID" : ["I1", "I1", "I2", "I3", "I6", "I9" ],
"cl1" : [7,4,5,8,7, 1]
}
)
# variable that i want to analyse
####################################
variable="var1"
# Execution of the script "common_step.py" that will use the data loaded in this script
#########################################################################################
????????
# I have tried ->
# -> execfile("common_step.ipynb") -> NameError: name 'null' is not defined
# -> os.system("common_step.ipynb") -> error "permission denied"
# Analysis using the output of the script previously executed
#########################################################################################
data_2["mean_value"] = data_2.groupby(['Sex', 'groupe'])['variable'].transform('mean')
data_3 = data_2
......
2