I have a python script which iterates through all of the videos in a directory and performs inference using a neural network to classify segments of the videos, and saves the inference results to a database which is saved as a pickle file. The idea each loop is to:
-
Open the existing pickle file
-
Assign its contents to a dictionary (ShotDB)
-
Perform inference
-
Append the results to the dictionary
-
Save the updated dictionary as a pickle (overwrite the original)
Somehow, I’ve messed up my pickle file such that when I try to load it subsequent times, I get an end of file error, and I’m not sure why.
As for my code, this part runs at the start of the script to load ShotDB:
# Check if ShotDB already exists in the output directory
existing_shotdb_path = os.path.join(output_directory, "shotdb.pkl")
if os.path.exists(existing_shotdb_path):
print("Loading existing ShotDB...")
with open(existing_shotdb_path, "rb") as f:
ShotDB = pickle.load(f)
else:
ShotDB = {}
Then, for my main loop, I have:
# Main loop to process each video in the input directories
for video_path in video_files:
# Skip files that have already been processed
if video_path in ShotDB:
print(f"Skipping already processed video: {video_path}")
progress_bar_main.update(1)
continue
# Process the video and update ShotDB
process_video(video_path, ShotDB)
# Save ShotDB after processing each video
with open(existing_shotdb_path, "wb") as f:
pickle.dump(ShotDB, f)
# Update the progress bar
progress_bar_main.update(1)
If it helps, here’s the process_video() function:
# Function to process a video file
def process_video(video_path, ShotDB, debug = False):
try:
# Check video height
video_capture = cv2.VideoCapture(video_path)
video_height = int(video_capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
total_frames = int(video_capture.get(cv2.CAP_PROP_FRAME_COUNT))
video_capture.release()
if video_height < minimum_video_height:
print(f"Skipping video {video_path} due to insufficient height ({video_height} pixels).")
return
print(f"nProcessing video: {video_path}")
# Initialize variables
inference_results = {}
no_improvement_counter = 0
reset_counter = False # Initialize reset_counter here
# Initialize variables to keep track of the best maximum probabilities for each category
best_max_probabilities = [0] * num_categories
# Loop until patience is reached
while no_improvement_counter < patience:
if debug:
print(f"nNo improvement counter: {no_improvement_counter} / {patience}")
# Randomly sample a start frame
start_frame = np.random.randint(0, total_frames - num_frames*2)
if debug:
print(f"Start frame: {start_frame}, (0, {total_frames - num_frames*2})")
# Initialize list to store inference results for sampled frame and surrounding frames
all_predictions = []
# Iterate over frame offsets
for offset in [-num_frames, 0, num_frames]:
#for offset in [0]:
if debug:
print(f"Start Frame + Offset: {start_frame+offset}")
# Get frames from the video
frames = get_frames(video_path, start_frame=start_frame+offset, num_frames=num_frames, target_height=target_height, background_subtraction=background_subtraction, debug=False)
# Check if frames is None
if frames is None:
print(f"Error reading frames for video: {video_path}")
return
# Convert frames to a numpy array
frames_array = np.array(frames)
# Reshape frames to match the input shape expected by the model
frames_array = np.expand_dims(frames_array, axis=0) # Add batch dimension
# Perform inference with verbose=0
predictions = model.predict(frames_array, verbose=0)
if debug:
print(f"Predictions: {predictions}")
# Store inference results
all_predictions.append(predictions[0])
# Average the predictions
averaged_predictions = np.mean(all_predictions, axis=0)
if debug:
print(f"Averaged predictions: {averaged_predictions}")
print(f"Best max probabilities: {best_max_probabilities}")
# Update the best maximum probabilities for each category
for category_index in range(num_categories):
if averaged_predictions[category_index] > best_max_probabilities[category_index]:
best_max_probabilities[category_index] = averaged_predictions[category_index]
reset_counter = True
if debug:
print(f"Updated best max probabilities for category {category_index}: {best_max_probabilities[category_index]}")
# Store inference results for the current start frame
inference_results[start_frame] = averaged_predictions
# Increment no_improvement_counter if needed
if reset_counter:
no_improvement_counter = 0
reset_counter = False
else:
no_improvement_counter += 1
# Update ShotDB with inference results for this video
print(f"Adding {len(inference_results)} entries.")
ShotDB[video_path] = inference_results
except Exception as e:
print(f"Error processing video: {video_path}. {e}")
Here’s the error that happened:
Processing video: F:FootageoJnM30dO.mp4
[ WARN:[email protected]] global cap_ffmpeg.cpp:441 cv_capture_retrieve FFmpeg: Exception is raised: OpenCV(4.9.0-pre) /build/opencv/modules/core/src/alloc.cpp:73: error: (-4:Insufficient memory) Failed to allocate 2764800 bytes in function 'OutOfMemoryError'
Error processing video: F:FootageoJnM30dO.mp4. Graph execution error:
Detected at node 'sequential/time_distributed/Reshape_1' defined at (most recent call last):
File "c:PythonShotDB_Creator.py", line 171, in <module>
process_video(video_path, ShotDB)
File "c:PythonShotDB_Creator.py", line 91, in process_video
predictions = model.predict(frames_array, verbose=0)
File "C:Usersthuhgminiconda3envstflibsite-packageskerasutilstraceback_utils.py", line 65, in error_handler
return fn(*args, **kwargs)
File "C:Usersthuhgminiconda3envstflibsite-packageskerasenginetraining.py", line 2253, in predict
tmp_batch_outputs = self.predict_function(iterator)
File "C:Usersthuhgminiconda3envstflibsite-packageskerasenginetraining.py", line 2041, in predict_function
return step_function(self, iterator)
File "C:Usersthuhgminiconda3envstflibsite-packageskerasenginetraining.py", line 2027, in step_function
outputs = model.distribute_strategy.run(run_step, args=(data,))
File "C:Usersthuhgminiconda3envstflibsite-packageskerasenginetraining.py", line 2015, in run_step
outputs = model.predict_step(data)
File "C:Usersthuhgminiconda3envstflibsite-packageskerasenginetraining.py", line 1983, in predict_step
return self(x, training=False)
File "C:Usersthuhgminiconda3envstflibsite-packageskerasutilstraceback_utils.py", line 65, in error_handler
return fn(*args, **kwargs)
File "C:Usersthuhgminiconda3envstflibsite-packageskerasenginetraining.py", line 557, in __call__
return super().__call__(*args, **kwargs)
File "C:Usersthuhgminiconda3envstflibsite-packageskerasutilstraceback_utils.py", line 65, in error_handler
return fn(*args, **kwargs)
File "C:Usersthuhgminiconda3envstflibsite-packageskerasenginebase_layer.py", line 1097, in __call__
outputs = call_fn(inputs, *args, **kwargs)
File "C:Usersthuhgminiconda3envstflibsite-packageskerasutilstraceback_utils.py", line 96, in error_handler
return fn(*args, **kwargs)
File "C:Usersthuhgminiconda3envstflibsite-packageskerasenginesequential.py", line 410, in call
return super().call(inputs, training=training, mask=mask)
File "C:Usersthuhgminiconda3envstflibsite-packageskerasenginefunctional.py", line 510, in call
return self._run_internal_graph(inputs, training=training, mask=mask)
File "C:Usersthuhgminiconda3envstflibsite-packageskerasenginefunctional.py", line 667, in _run_internal_graph
outputs = node.layer(*args, **kwargs)
File "C:Usersthuhgminiconda3envstflibsite-packageskerasutilstraceback_utils.py", line 65, in error_handler
return fn(*args, **kwargs)
File "C:Usersthuhgminiconda3envstflibsite-packageskerasenginebase_layer.py", line 1097, in __call__
outputs = call_fn(inputs, *args, **kwargs)
File "C:Usersthuhgminiconda3envstflibsite-packageskerasutilstraceback_utils.py", line 96, in error_handler
return fn(*args, **kwargs)
File "C:Usersthuhgminiconda3envstflibsite-packageskeraslayersrnntime_distributed.py", line 264, in call
y = tf.__internal__.nest.map_structure_up_to(
Node: 'sequential/time_distributed/Reshape_1'
Input to reshape is a tensor with 107520 values, but the requested shape requires a multiple of 268800
Traceback (most recent call last):
File "c:PythonShotDB_Creator.py", line 175, in <module>
pickle.dump(ShotDB, f)
MemoryError
Processing Videos: 16%|███████████████████████████▉ | 1130/7214 [24:11:11<130:13:17, 77.05s/it]
So, presumably, I had an out of memory error, and now somehow I’ve messed up my pickle file. Now, when I try to run the script subsequent (after removing the offending file), I get this EOFError and cannot open the pickle:
PS C:Python> & C:/Users/thuhg/miniconda3/envs/tf/python.exe c:/Python/ShotDB_Creator.py
Number of categories: 9
Loading existing ShotDB...
Traceback (most recent call last):
File "c:PythonShotDB_Creator.py", line 134, in <module>
ShotDB = pickle.load(f)
EOFError: Ran out of input
From what I can gather, it sounds like there’s been some type of issue saving the pickle file. The file is not empty–it’s 8,195 KB, so presumably the data from the previous ShotDB dictionary is still there, but I’m not sure what happened to the file such that now I am getting an end of file error when trying to read it. (I thought that if there was an error updating the ShotDB dictionary, we would break out of the loop and not try to save the pickle).
Any ideas?
Robert is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.