I am using python R library to implement the ERGM model to a data set. I am following the below approach
def ergm_process(nodes, edges):
try:
logger.debug(f"Passed Node: {nodes}")
logger.debug(f"Edges: {edges}")
# Convert node and edge data for R
nodes_df = pd.DataFrame(nodes)
edges_df = pd.DataFrame(edges)
nodes_df['Attendance'] = pd.to_numeric(nodes_df['Attendance'], errors='coerce')
if 'id' not in nodes_df.columns:
logger.error("Node data must include 'id' keys")
return {"error": "Node data incomplete"}
# R DataFrame Conversion
nodes_r = pandas2ri.py2rpy(nodes_df)
# Prepare node and edge data for R
node_ids = nodes_df['id'].tolist()
edges_tuples = [(edge['from'], edge['to']) for edge in edges]
# Log and check edge tuples
if not all('from' in edge and 'to' in edge for edge in edges):
logger.error("One or more edges are missing 'from' or 'to' keys")
return {"error": "Edge data incomplete"}
flat_edges = [item for tup in edges_tuples for item in tup]
# Setup R environment
from rpy2.robjects.packages import importr
base = importr('base')
network = importr('network', on_conflict="warn")
ergm = importr('ergm', on_conflict="warn")
# Pass data to R
ro.globalenv['nodes'] = nodes_r
ro.globalenv['node_ids'] = ro.StrVector(node_ids)
ro.globalenv['edges'] = ro.IntVector(flat_edges) # Flatten list of tuples
# Run R code
ro.r('''
print("Node IDs:")
print(node_ids)
print("Edges:")
print(edges)
net <- network::network(matrix(edges, byrow=TRUE, ncol=2), directed=TRUE, vertices=node_ids)
# Ensure each attribute is set
for (attr_name in colnames(nodes)) {
set.vertex.attribute(net, attr_name, nodes[[attr_name]])
}
print("Network created with attributes:")
print(list.vertex.attributes(net))
fit <- ergm(net ~ edges + triangle + mutual + nodecov('Attendance'),
estimate = "MPLE")
print("Model fitted")
print(summary(fit))
''')
results = {}
summary = base.summary(ro.r('fit'))
# Getting the summary of the fit
fit_summary = r['summary'](ro.r('fit'), output='summary') # Ensure to get summary as a dataframe-like structure
# Convert R summary object to pandas DataFrame if possible
summary_df = pandas2ri.rpy2py_dataframe(fit_summary)
logger.debug(summary_df)
# Prepare results for JSON serialization
results = {
"Estimates": summary_df["Estimate"].tolist(),
"Std. Errors": summary_df["Std. Error"].tolist(),
"MCMC %": summary_df["MCMC %"].tolist(),
"Z-values": summary_df["z value"].tolist(),
"P-values": summary_df["Pr(>|z|)"].tolist(),
}
return results
except Exception as e:
logger.exception("Error during ERGM process: %s", str(e))
return {"error": str(e)}
When I implement that code I get the Conversion ‘rpy2py’ not defined for objects of type error
I tried to implement the following method
But when i approach it it gives the error related to fit_summary$se saying that the arguments are 4,0
Maximum Pseudolikelihood Results:
Estimate Std. Error MCMC % z value Pr(>|z|)
edges -11.728186 1.263969 0 -9.279 <1e-04 ***
triangle 2.088686 0.108218 0 19.301 <1e-04 ***
mutual 8.638105 0.817793 0 10.563 <1e-04 ***
nodecov.Attendance -0.034097 0.007857 0 -4.339 <1e-04 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Warning: The standard errors are based on naive pseudolikelihood and are suspect. Set control.ergm$MPLE.covariance.method='Godambe' for a simulation-based approximation of the standard errors.
Null Pseudo-deviance: 1.469e+09 on 1.059e+09 degrees of freedom
Residual Pseudo-deviance: 1.051e+03 on 1.059e+09 degrees of freedom
AIC: 1059 BIC: 1134 (Smaller is better. MC Std. Err. = 0)
# Adjust the R script for better compatibility and error handling
ro.r('''
library(ergm)
net <- network::network(matrix(edges, byrow=TRUE, ncol=2), directed=TRUE, vertices=node_ids)
for (attr_name in colnames(nodes)) {
set.vertex.attribute(net, attr_name, nodes[[attr_name]])
}
fit <- ergm(net ~ edges + triangle + mutual + nodecov('Attendance'), estimate = "MPLE")
# Check if fit is valid and prepare summary
if (!is.null(fit)) {
fit_summary <- summary(fit) # This should be the raw summary output
if (!is.null(fit_summary)) {
fit_df <- data.frame(
Estimate = fit_summary$coefficients,
Std.Error = fit_summary$se,
Z.value = fit_summary$coefficients / fit_summary$se,
P.value = fit_summary$p.values
)
print(fit_df)
.GlobalEnv$fit_df <- fit_df # Save DataFrame to global environment
} else {
print("Fit summary is NULL or not available.")
}
} else {
print("ERGM model did not fit properly.")
}
''')
if 'fit_df' in ro.globalenv and not r['is.null'](ro.globalenv['fit_df']):
summary_df = pandas2ri.rpy2py(ro.globalenv['fit_df'])
results = {
"Estimates": summary_df["Estimate"].tolist(),
"Std. Errors": summary_df["Std.Error"].tolist(),
"Z-values": summary_df["Z.value"].tolist(),
"P-values": summary_df["P.value"].tolist(),
}
return JsonResponse(results)
else:
logger.error("Model summary is NULL or the DataFrame could not be created.")
return JsonResponse({"error": "Model did not converge or insufficient data for summary"}, status=500)
except Exception as e:
logger.exception("Error during ERGM process: %s", str(e))
return JsonResponse({"error": str(e)}, status=500)