I have two files points_small.txt and points_large.txt which contain the list of points on a 3D plane. Each point is defined by its coordinates and an associated number and is presented in the following format: (x, y, z, n), where x, y, and z are floats, and n is an integer ranging from 0 to 100.
I have to Identify the indices of four points that form a ‘valid’ tetrahedron with the smallest possible volume.
A valid tetrahedron has to be formed with points such that the sum of their n values is equal to 100
Example:
Suppose these are the following points:
(3.00, 4.00, 5.00, 22)
(2.00, 3.00, 3.00, 3)
(1.00, 2.00, 2.00, 4)
(3.50, 4.50, 5.50, 14)
(2.50, 3.50, 3.50, 24)
(6.70, 32.20, 93.0, 5)
(2.50, 3.00, 7.00, 40)
A tetrahedron formed by the points with indices 0, 3, 4, 6 is the only valid tetrahedron in this example; because 22 + 14 + 24 + 40 = 100
The output should list the zero-based indices of these four points in ascending order.
Hence the output is [0, 3, 4, 6].
I have the following code which is working for smaller data sets here it is :
import numpy as np
from itertools import combinations
from collections import defaultdict
from concurrent.futures import ProcessPoolExecutor
from scipy.spatial import KDTree
# Function to calculate the volume of a tetrahedron given four points
def tetrahedron_volume(a, b, c, d):
mat = np.array([
[a[0] - d[0], a[1] - d[1], a[2] - d[2]],
[b[0] - d[0], b[1] - d[1], b[2] - d[2]],
[c[0] - d[0], c[1] - d[1], c[2] - d[2]]
])
volume = np.abs(np.linalg.det(mat)) / 6
return volume
# Read the points from the file
def read_points(file_path):
points = []
with open(file_path, 'r') as file:
for line in file:
line = line.strip().replace('(', '').replace(')', '')
point = tuple(map(float, line.split(',')))
points.append(point)
return points
# Filter points based on weights summing to 100
def filter_points_by_weight(points):
weight_groups = defaultdict(list)
for idx, point in enumerate(points):
weight_groups[int(point[3])].append((idx, point))
valid_combinations = []
for comb in combinations(weight_groups.keys(), 4):
if sum(comb) == 100:
valid_combinations.extend(combinations(
[item for sublist in [weight_groups[w] for w in comb] for item in sublist], 4))
return valid_combinations
# Find the smallest tetrahedron from a subset of points
def find_smallest_tetrahedron_subset(tetrahedron_candidates):
min_volume = float('inf')
best_tetrahedron = None
for tetra in tetrahedron_candidates:
indices, tetra_points = zip(*tetra)
volume = tetrahedron_volume(*tetra_points)
if volume < min_volume:
min_volume = volume
best_tetrahedron = indices
return best_tetrahedron, min_volume
# Parallelized search for the smallest tetrahedron
def find_smallest_tetrahedron(points, num_workers=8):
tetrahedron_candidates = filter_points_by_weight(points)
min_volume = float('inf')
best_tetrahedron = None
num_chunks = min(num_workers, len(tetrahedron_candidates)) # Ensure number of chunks does not exceed candidates
chunk_size = max(1, len(tetrahedron_candidates) // num_chunks)
chunks = [tetrahedron_candidates[i:i + chunk_size] for i in range(0, len(tetrahedron_candidates), chunk_size)]
with ProcessPoolExecutor(max_workers=num_workers) as executor:
futures = [executor.submit(find_smallest_tetrahedron_subset, chunk) for chunk in chunks]
for future in futures:
result_tetrahedron, result_volume = future.result()
if result_volume < min_volume:
min_volume = result_volume
best_tetrahedron = result_tetrahedron
return best_tetrahedron, min_volume
# Main processing function
def process_file(file_path, num_workers=8):
points = read_points(file_path)
best_tetrahedron, min_volume = find_smallest_tetrahedron(points, num_workers)
if best_tetrahedron:
indices = sorted(best_tetrahedron)
print(f"Smallest Tetrahedron Points Indices ({file_path}): {indices}")
print(f"Volume: {min_volume}")
else:
print(f"No tetrahedron found with weight sum 100 in {file_path}")
# Test with provided example
test_points = [
(3.00, 4.00, 5.00, 22),
(2.00, 3.00, 3.00, 3),
(1.00, 2.00, 2.00, 4),
(3.50, 4.50, 5.50, 14),
(2.50, 3.50, 3.50, 24),
(6.70, 32.20, 93.0, 5),
(2.50, 3.00, 7.00, 40)
]
def process_points(points, num_workers=8):
best_tetrahedron, min_volume = find_smallest_tetrahedron(points, num_workers)
if best_tetrahedron:
indices = sorted(best_tetrahedron)
print(f"Smallest Tetrahedron Points Indices: {indices}")
print(f"Volume: {min_volume}")
else:
print(f"No tetrahedron found with weight sum 100")
# Test with provided test points
process_points(test_points)
# Process the points_small.txt file
process_file('/work/points_small.txt')
# Process the points_large.txt file
process_file('/work/points_large.txt')
but still it is not efficient for the large input files with over 1500 data points. Please help, how I can make my code more optimised for the large input size.