Thiết kế website giá rẻ

Question

I had to come up with a fairly complex FT-based process in Aspen Plus. The task is to calculate plant efficiency for varying boundary conditions. Because the calculations take a very long time, I am trying to use random forrest regression to estimate plant efficiency. The approach is to use a python script to access Aspen Plus via the COM interface to generate a large data set. In order to limit machine time, I would like to have multiple instances of Aspen Plus running at the same time. To do so I am using the multiprocessing toolbox to generate a pool of asynchronous workers. While the script works flawlessly for a single instance and when multi-threaded calculations are limited to several hundred cases, it starts to drop threads for larger data sets. The number of cases returned is random, but always a multiple of the number of Jobs designated to a thread.

<code>import os

import numpy as np

import matplotlib.pyplot as plt

import win32com.client as win32

import multiprocessing as mp

import time

import random

import pandas as pd # for working with DataFrames

from sklearn.model_selection import train_test_split # for splitting the data

from sklearn.metrics import mean_squared_error # for calculating the cost function

from sklearn.ensemble import RandomForestRegressor # for building the model

import pylab

start_time = time.time()

simulation_result = []

###############################################################################

#Calculation options

###############################################################################

JobsPerProcessor = 500

NumberOfProcessors = 10

###############################################################################

# Boundary condtions

###############################################################################

T_min = 600

T_max = 1000

p_min = 1

p_max =20

boundaries = [JobsPerProcessor, T_min, T_max, p_min, p_max]

###############################################################################

# Function to access Aspen Simulation

###############################################################################

def RunAspenSimulation(boundaries):

process = mp.current_process()

pid = process.name

results = []

# Create Aspen Plus Object

aspen = win32.Dispatch('Apwn.Document')

# Open the file

aspen.InitFromArchive2(os.path.abspath('Reformerbsp/240717_Reformer_v1.bkp'))

for point in range(boundaries[0]):

# Random boundary conditions

temperature = random.uniform(boundaries[1], boundaries[2])

pressure = random.uniform(boundaries[3], boundaries[4])

# Set Variables

aspen.Tree.FindNode('/Data/Blocks/REFORMER/Input/TEMP').Value = temperature

aspen.Tree.FindNode('/Data/Blocks/REFORMER/Input/PRES').Value = pressure

# Run simulation

aspen.Engine.Run2()

# Get results

x_CH4 = aspen.Tree.FindNode('/Data/Streams/SYNGAS/Output/MOLEFRAC/MIXED/CH4').Value

x_CO = aspen.Tree.FindNode('/Data/Streams/SYNGAS/Output/MOLEFRAC/MIXED/CO').Value

x_CO2 = aspen.Tree.FindNode('/Data/Streams/SYNGAS/Output/MOLEFRAC/MIXED/CO2').Value

x_H2 = aspen.Tree.FindNode('/Data/Streams/SYNGAS/Output/MOLEFRAC/MIXED/H2').Value

x_H2O = aspen.Tree.FindNode('/Data/Streams/SYNGAS/Output/MOLEFRAC/MIXED/H2O').Value

results.append([temperature, pressure, x_CH4, x_CO, x_CO2, x_H2, x_H2O])

print("Worker", pid, "finished Job ", point)

aspen.Close()

print("Worker", pid, "calculations finished")

return results

###############################################################################

# Single threaded calculations

###############################################################################

def SingleprocessAspen(boundaries):

simulation_result = RunAspenSimulation(boundaries)

print("elapsed time %s" % (time.time() - start_time))

return simulation_result

###############################################################################

# Fetching results

###############################################################################

def collect_results(result):

simulation_result.extend(result)

###############################################################################

# Multiprocessing

###############################################################################

def MultiprocessAspen(boundaries, NumberOfProcessors):

# Create a pool of processors. Each processor will have one instance of Aspen Plus to work with

pool = mp.Pool(NumberOfProcessors)

for i in range(NumberOfProcessors):

pool.apply_async(RunAspenSimulation,(boundaries,),callback=collect_results)

# Close parallel worker pool

pool.close()

# Wait for asynchronous calculations to finish

pool.join()

print("elapsed time %s" % (time.time() - start_time))

return simulation_result

###############################################################################

# Run Regression

###############################################################################

if __name__ == '__main__':

simulation_result = SingleprocessAspen(boundaries)

# simulation_result = MultiprocessAspen(boundaries, NumberOfProcessors)

###########################################################################

# Formatting of Data

###########################################################################

Sim_out = np.array(simulation_result)

print("datapoints:", len(simulation_result))

dataset = pd.DataFrame({'temperature': Sim_out[:, 0], 'pressure': Sim_out[:, 1], 'x_CH4': Sim_out[:, 2], 'x_CO': Sim_out[:, 3], 'x_CO2': Sim_out[:, 4], 'x_H2': Sim_out[:, 5], 'x_H2O': Sim_out[:, 6]})

###############################################################################

# Random Forest Regression

###############################################################################

# Splitting input data into input (x) and output (y) data

x = dataset[['temperature', 'pressure']] #Input

y = dataset[['x_CH4','x_CO','x_CO2', 'x_H2', 'x_H2O']] # Output

# Splitting the dataset into training and testing set (80/20)

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 28)

# Initializing the Random Forest Regression model with 10 decision trees

model = RandomForestRegressor(n_estimators = 100, random_state = 0)

# Fitting the Random Forest Regression model to the data

model.fit(x_train, y_train)

# Predicting the target values of the test set

y_pred = model.predict(x_test)

# Calculating Root Mean Square Error and mean error for evaluation

rmse = float(format(np.sqrt(mean_squared_error(y_test, y_pred)),'.3f'))

mean_error = rmse ** 0.5

print("mean error:", mean_error)

# Test prediction

pred_array = pd.DataFrame({'temperature': [800], 'pressure': [8]})

y_data = model.predict(pred_array)

print('results:', y_data)

plt.scatter(Sim_out[:,0], Sim_out[:,1])

plt.show()

print("elapsed time %s" % (time.time() - start_time))

</code>

<code>import os import numpy as np import matplotlib.pyplot as plt import win32com.client as win32 import multiprocessing as mp import time import random import pandas as pd # for working with DataFrames from sklearn.model_selection import train_test_split # for splitting the data from sklearn.metrics import mean_squared_error # for calculating the cost function from sklearn.ensemble import RandomForestRegressor # for building the model import pylab start_time = time.time() simulation_result = [] ############################################################################### #Calculation options ############################################################################### JobsPerProcessor = 500 NumberOfProcessors = 10 ############################################################################### # Boundary condtions ############################################################################### T_min = 600 T_max = 1000 p_min = 1 p_max =20 boundaries = [JobsPerProcessor, T_min, T_max, p_min, p_max] ############################################################################### # Function to access Aspen Simulation ############################################################################### def RunAspenSimulation(boundaries): process = mp.current_process() pid = process.name results = [] # Create Aspen Plus Object aspen = win32.Dispatch('Apwn.Document') # Open the file aspen.InitFromArchive2(os.path.abspath('Reformerbsp/240717_Reformer_v1.bkp')) for point in range(boundaries[0]): # Random boundary conditions temperature = random.uniform(boundaries[1], boundaries[2]) pressure = random.uniform(boundaries[3], boundaries[4]) # Set Variables aspen.Tree.FindNode('/Data/Blocks/REFORMER/Input/TEMP').Value = temperature aspen.Tree.FindNode('/Data/Blocks/REFORMER/Input/PRES').Value = pressure # Run simulation aspen.Engine.Run2() # Get results x_CH4 = aspen.Tree.FindNode('/Data/Streams/SYNGAS/Output/MOLEFRAC/MIXED/CH4').Value x_CO = aspen.Tree.FindNode('/Data/Streams/SYNGAS/Output/MOLEFRAC/MIXED/CO').Value x_CO2 = aspen.Tree.FindNode('/Data/Streams/SYNGAS/Output/MOLEFRAC/MIXED/CO2').Value x_H2 = aspen.Tree.FindNode('/Data/Streams/SYNGAS/Output/MOLEFRAC/MIXED/H2').Value x_H2O = aspen.Tree.FindNode('/Data/Streams/SYNGAS/Output/MOLEFRAC/MIXED/H2O').Value results.append([temperature, pressure, x_CH4, x_CO, x_CO2, x_H2, x_H2O]) print("Worker", pid, "finished Job ", point) aspen.Close() print("Worker", pid, "calculations finished") return results ############################################################################### # Single threaded calculations ############################################################################### def SingleprocessAspen(boundaries): simulation_result = RunAspenSimulation(boundaries) print("elapsed time %s" % (time.time() - start_time)) return simulation_result ############################################################################### # Fetching results ############################################################################### def collect_results(result): simulation_result.extend(result) ############################################################################### # Multiprocessing ############################################################################### def MultiprocessAspen(boundaries, NumberOfProcessors): # Create a pool of processors. Each processor will have one instance of Aspen Plus to work with pool = mp.Pool(NumberOfProcessors) for i in range(NumberOfProcessors): pool.apply_async(RunAspenSimulation,(boundaries,),callback=collect_results) # Close parallel worker pool pool.close() # Wait for asynchronous calculations to finish pool.join() print("elapsed time %s" % (time.time() - start_time)) return simulation_result ############################################################################### # Run Regression ############################################################################### if __name__ == '__main__': simulation_result = SingleprocessAspen(boundaries) # simulation_result = MultiprocessAspen(boundaries, NumberOfProcessors) ########################################################################### # Formatting of Data ########################################################################### Sim_out = np.array(simulation_result) print("datapoints:", len(simulation_result)) dataset = pd.DataFrame({'temperature': Sim_out[:, 0], 'pressure': Sim_out[:, 1], 'x_CH4': Sim_out[:, 2], 'x_CO': Sim_out[:, 3], 'x_CO2': Sim_out[:, 4], 'x_H2': Sim_out[:, 5], 'x_H2O': Sim_out[:, 6]}) ############################################################################### # Random Forest Regression ############################################################################### # Splitting input data into input (x) and output (y) data x = dataset[['temperature', 'pressure']] #Input y = dataset[['x_CH4','x_CO','x_CO2', 'x_H2', 'x_H2O']] # Output # Splitting the dataset into training and testing set (80/20) x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 28) # Initializing the Random Forest Regression model with 10 decision trees model = RandomForestRegressor(n_estimators = 100, random_state = 0) # Fitting the Random Forest Regression model to the data model.fit(x_train, y_train) # Predicting the target values of the test set y_pred = model.predict(x_test) # Calculating Root Mean Square Error and mean error for evaluation rmse = float(format(np.sqrt(mean_squared_error(y_test, y_pred)),'.3f')) mean_error = rmse ** 0.5 print("mean error:", mean_error) # Test prediction pred_array = pd.DataFrame({'temperature': [800], 'pressure': [8]}) y_data = model.predict(pred_array) print('results:', y_data) plt.scatter(Sim_out[:,0], Sim_out[:,1]) plt.show() print("elapsed time %s" % (time.time() - start_time)) </code>

import os
import numpy as np
import matplotlib.pyplot as plt
import win32com.client as win32
import multiprocessing as mp
import time
import random
import pandas as pd # for working with DataFrames
from sklearn.model_selection import train_test_split # for splitting the data
from sklearn.metrics import mean_squared_error # for calculating the cost function
from sklearn.ensemble import RandomForestRegressor # for building the model
import pylab

start_time = time.time()
simulation_result = []

###############################################################################
#Calculation options
###############################################################################

JobsPerProcessor = 500
NumberOfProcessors = 10

###############################################################################
# Boundary condtions
###############################################################################

T_min = 600
T_max = 1000

p_min = 1
p_max =20

boundaries = [JobsPerProcessor, T_min, T_max, p_min, p_max]

###############################################################################
# Function to access Aspen Simulation
###############################################################################

def RunAspenSimulation(boundaries):
    
    process = mp.current_process()
    pid = process.name
    
    results = []
     
    # Create Aspen Plus Object
    aspen = win32.Dispatch('Apwn.Document')
    
    # Open the file
    aspen.InitFromArchive2(os.path.abspath('Reformerbsp/240717_Reformer_v1.bkp'))
    
    for point in range(boundaries[0]):
        
        # Random boundary conditions
        temperature = random.uniform(boundaries[1], boundaries[2])
        pressure = random.uniform(boundaries[3], boundaries[4]) 
        
        # Set Variables
        aspen.Tree.FindNode('/Data/Blocks/REFORMER/Input/TEMP').Value = temperature
        aspen.Tree.FindNode('/Data/Blocks/REFORMER/Input/PRES').Value = pressure
        
        # Run simulation
        aspen.Engine.Run2()
        
        # Get results
        x_CH4 =  aspen.Tree.FindNode('/Data/Streams/SYNGAS/Output/MOLEFRAC/MIXED/CH4').Value
        x_CO = aspen.Tree.FindNode('/Data/Streams/SYNGAS/Output/MOLEFRAC/MIXED/CO').Value
        x_CO2 = aspen.Tree.FindNode('/Data/Streams/SYNGAS/Output/MOLEFRAC/MIXED/CO2').Value
        x_H2 = aspen.Tree.FindNode('/Data/Streams/SYNGAS/Output/MOLEFRAC/MIXED/H2').Value
        x_H2O = aspen.Tree.FindNode('/Data/Streams/SYNGAS/Output/MOLEFRAC/MIXED/H2O').Value
        
        results.append([temperature, pressure, x_CH4, x_CO, x_CO2, x_H2, x_H2O])
        
        print("Worker", pid, "finished Job ", point)
               
    aspen.Close()
    
    print("Worker", pid, "calculations finished")
    
    return results

###############################################################################
# Single threaded calculations
###############################################################################

def SingleprocessAspen(boundaries):

    simulation_result = RunAspenSimulation(boundaries)
    
    print("elapsed time %s" % (time.time() - start_time))
    
    return simulation_result

###############################################################################
# Fetching results
###############################################################################

def collect_results(result):
    simulation_result.extend(result)

###############################################################################
# Multiprocessing
###############################################################################

def MultiprocessAspen(boundaries, NumberOfProcessors):
    # Create a pool of processors. Each processor will have one instance of Aspen Plus to work with
    pool = mp.Pool(NumberOfProcessors)
    
    for i in range(NumberOfProcessors):
        pool.apply_async(RunAspenSimulation,(boundaries,),callback=collect_results) 
        
    # Close parallel worker pool
    pool.close()

    # Wait for asynchronous calculations to finish
    pool.join()
    
    print("elapsed time %s" % (time.time() - start_time))
    
    return simulation_result

###############################################################################
# Run Regression
###############################################################################

if __name__ == '__main__':
    simulation_result = SingleprocessAspen(boundaries)
   # simulation_result = MultiprocessAspen(boundaries, NumberOfProcessors)  

    ###########################################################################
    # Formatting of Data
    ###########################################################################

    Sim_out = np.array(simulation_result)
    
    print("datapoints:", len(simulation_result))
    
    dataset = pd.DataFrame({'temperature': Sim_out[:, 0], 'pressure': Sim_out[:, 1], 'x_CH4': Sim_out[:, 2], 'x_CO': Sim_out[:, 3], 'x_CO2': Sim_out[:, 4], 'x_H2': Sim_out[:, 5], 'x_H2O': Sim_out[:, 6]})
    
    ###############################################################################
    # Random Forest Regression
    ###############################################################################

    # Splitting input data into input (x) and output (y) data

    x = dataset[['temperature', 'pressure']] #Input

    y = dataset[['x_CH4','x_CO','x_CO2', 'x_H2', 'x_H2O']]  # Output

    # Splitting the dataset into training and testing set (80/20)

    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 28)

    # Initializing the Random Forest Regression model with 10 decision trees

    model = RandomForestRegressor(n_estimators = 100, random_state = 0)

    # Fitting the Random Forest Regression model to the data

    model.fit(x_train, y_train)

    # Predicting the target values of the test set

    y_pred = model.predict(x_test)

    # Calculating Root Mean Square Error and mean error for evaluation

    rmse = float(format(np.sqrt(mean_squared_error(y_test, y_pred)),'.3f'))
    mean_error = rmse ** 0.5

    print("mean error:", mean_error)

    # Test prediction
    pred_array = pd.DataFrame({'temperature': [800], 'pressure': [8]})

    y_data = model.predict(pred_array)

    print('results:', y_data)

    plt.scatter(Sim_out[:,0], Sim_out[:,1])
    plt.show()
          
    print("elapsed time %s" % (time.time() - start_time))

I am expecting to recieve a set of Jobs per processer * workers datapoints. For the single threaded calculations and for less than 100 datapoints this works. Beyond that it seems like workers are dropped or results are not fetched correctly.

I have tried a minimal case without the connection to Aspen Plus and that does generate the data set without issues.

Thiết kế website giá rẻ

Danh mục

Dropped threads when trying to run multiple Aspen Plus instances via Python script