I made a script that performs tri-linear interpolation on a set of points using pandas for data handling and Numba for computational efficiency. Currently, it requires $mathcal{O}(1) text{ s}$ if considering $10^{5}$ points.
This is the code, assuming some test tabulated data:
import numpy as np
import pandas as pd
from numba import jit
# Define the symbolic function
def custom_function(x, y, z):
return np.sin(y) * np.cos(3 * y)**(1 + 5 * x) * np.exp(-np.sqrt(z**2 + x**2) * np.cos(3 * y) / 20) / z
# Define the grid ranges
x_range = np.arange(0.5, 5.5, 0.5)
y_range = np.logspace(np.log10(0.0001), np.log10(0.1), int((np.log10(0.1) - np.log10(0.0001)) / 0.1) + 1)
z_range = np.arange(0.5, 101, 5)
# Generate the DataFrame
data = {'x': [], 'y': [], 'z': [], 'f': []}
for x in x_range:
for y in y_range:
for z in z_range:
data['x'].append(x)
data['y'].append(y)
data['z'].append(z)
data['f'].append(custom_function(x, y, z))
df = pd.DataFrame(data)
# Define the tri-linear interpolation function using Numba
@jit(nopython=True, parallel=True)
def trilinear_interpolation(rand_points, grid_x, grid_y, grid_z, distr):
results = np.empty(len(rand_points))
len_y, len_z = grid_y.shape[0], grid_z.shape[0]
for i in range(len(rand_points)):
x, y, z = rand_points[i]
idx_x1 = np.searchsorted(grid_x, x) - 1
idx_x2 = idx_x1 + 1
idx_y1 = np.searchsorted(grid_y, y) - 1
idx_y2 = idx_y1 + 1
idx_z1 = np.searchsorted(grid_z, z) - 1
idx_z2 = idx_z1 + 1
idx_x1 = max(0, min(idx_x1, len(grid_x) - 2))
idx_x2 = max(1, min(idx_x2, len(grid_x) - 1))
idx_y1 = max(0, min(idx_y1, len_y - 2))
idx_y2 = max(1, min(idx_y2, len_y - 1))
idx_z1 = max(0, min(idx_z1, len_z - 2))
idx_z2 = max(1, min(idx_z2, len_z - 1))
x1, x2 = grid_x[idx_x1], grid_x[idx_x2]
y1, y2 = grid_y[idx_y1], grid_y[idx_y2]
z1, z2 = grid_z[idx_z1], grid_z[idx_z2]
z111 = distr[idx_x1, idx_y1, idx_z1]
z211 = distr[idx_x2, idx_y1, idx_z1]
z121 = distr[idx_x1, idx_y2, idx_z1]
z221 = distr[idx_x2, idx_y2, idx_z1]
z112 = distr[idx_x1, idx_y1, idx_z2]
z212 = distr[idx_x2, idx_y1, idx_z2]
z122 = distr[idx_x1, idx_y2, idx_z2]
z222 = distr[idx_x2, idx_y2, idx_z2]
xd = (x - x1) / (x2 - x1)
yd = (y - y1) / (y2 - y1)
zd = (z - z1) / (z2 - z1)
c00 = z111 * (1 - xd) + z211 * xd
c01 = z112 * (1 - xd) + z212 * xd
c10 = z121 * (1 - xd) + z221 * xd
c11 = z122 * (1 - xd) + z222 * xd
c0 = c00 * (1 - yd) + c10 * yd
c1 = c01 * (1 - yd) + c11 * yd
result = c0 * (1 - zd) + c1 * zd
results[i] = np.exp(result)
return results
# Provided x value
fixed_x = 2.5 # example provided x value
# Random points for which we need to perform tri-linear interpolation
num_rand_points = 100000 # Large number of random points
rand_points = np.column_stack((
np.full(num_rand_points, fixed_x),
np.random.uniform(0.0001, 0.1, num_rand_points),
np.random.uniform(0.5, 101, num_rand_points)
))
# Prepare the grid and distribution values
grid_x = np.unique(df['x'])
grid_y = np.unique(df['y'])
grid_z = np.unique(df['z'])
distr = np.zeros((len(grid_x), len(grid_y), len(grid_z)))
for i in range(len(df)):
ix = np.searchsorted(grid_x, df['x'].values[i])
iy = np.searchsorted(grid_y, df['y'].values[i])
iz = np.searchsorted(grid_z, df['z'].values[i])
distr[ix, iy, iz] = df['f'].values[i]
# Perform tri-linear interpolation
interpolated_values = trilinear_interpolation(rand_points, grid_x, grid_y, grid_z, distr)
# Display the results
for point, value in zip(rand_points[:10], interpolated_values[:10]):
print(f"Point {point}: Interpolated value: {value}")
I am wondering if there are any optimization techniques or best practices that I can apply to further speed up this code, especially given that all x-values are fixed. Any suggestions or advice would be greatly appreciated!