I want to create a fruit ripness detector.
For that I could use the API of roboflow but I want to generate a trained model locally.
I downloaded the dataset from this page: https://universe.roboflow.com/mixed-fruit-annotation/fruit-ripness-detector/dataset/2/download/tfrecord
This is how I’m parsing the record file and training the keras model:
import tensorflow as tf
import os
# Parsing and preprocessing function
def parse_tfrecord_fn(example):
feature_description = {
'image/object/bbox/ymin': tf.io.VarLenFeature(tf.float32),
'image/width': tf.io.FixedLenFeature([], tf.int64),
'image/object/bbox/xmax': tf.io.VarLenFeature(tf.float32),
'image/encoded': tf.io.FixedLenFeature([], tf.string),
'image/height': tf.io.FixedLenFeature([], tf.int64),
'image/object/bbox/xmin': tf.io.VarLenFeature(tf.float32),
'image/filename': tf.io.FixedLenFeature([], tf.string),
'image/format': tf.io.FixedLenFeature([], tf.string),
'image/object/bbox/ymax': tf.io.VarLenFeature(tf.float32),
'image/object/class/label': tf.io.VarLenFeature(tf.int64),
'image/object/class/text': tf.io.VarLenFeature(tf.string),
}
example = tf.io.parse_single_example(example, feature_description)
# Decode and preprocess image
image = tf.io.decode_jpeg(example['image/encoded'], channels=3)
image = tf.image.resize(image, [224, 224])
image = tf.cast(image, tf.float32) / 255.0 # Normalize to [0, 1]
# Extract labels (assuming single label per image for simplicity)
label = tf.sparse.to_dense(example['image/object/class/label'])[0]
return image, label
# Input function for creating a dataset
def input_fn(file_path, batch_size=32):
dataset = tf.data.TFRecordDataset(file_path)
dataset = dataset.map(parse_tfrecord_fn, num_parallel_calls=tf.data.experimental.AUTOTUNE)
dataset = dataset.shuffle(buffer_size=1000)
dataset = dataset.batch(batch_size)
dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
return dataset
# Model building function
def build_model(input_shape, num_classes):
model = tf.keras.Sequential([
tf.keras.layers.InputLayer(input_shape=input_shape),
tf.keras.layers.Conv2D(32, (3, 3), activation='relu'),
tf.keras.layers.MaxPooling2D((2, 2)),
tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
tf.keras.layers.MaxPooling2D((2, 2)),
tf.keras.layers.Conv2D(128, (3, 3), activation='relu'),
tf.keras.layers.MaxPooling2D((2, 2)),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(128, activation='relu'),
tf.keras.layers.Dense(num_classes)
])
model.compile(optimizer='adam',
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=['accuracy'])
return model
def main():
train_file_path = '<PATH TO Fruit_Ripness_Detector.v2i.tfrecord/train/Good-and-Bad-Fruits.tfrecord>'
valid_file_path = '<PATH TO Fruit_Ripness_Detector.v2i.tfrecord/valid/Good-and-Bad-Fruits.tfrecord>'
# Determine the number of classes in your dataset
num_classes = 6
# Define input shape based on your data
input_shape = (224, 224, 3)
# Define model
model = build_model(input_shape, num_classes)
# Train model
train_dataset = input_fn(train_file_path)
valid_dataset = input_fn(valid_file_path)
model.fit(train_dataset,
epochs=10,
validation_data=valid_dataset)
# Save model
model.save('fruit_ripeness_detector_model.keras')
if __name__ == "__main__":
main()
Currently I can pass an image to that model which predicts the class.
I want to expand my code so that it also detects where the class is in the image.
So the output I need for an image should be as follows (not necessarily as json):
{
"predictions": [
{
"x": 3910,
"y": 2126.5,
"width": 710,
"height": 543,
"confidence": 0.962,
"class": "Good ButterFruitrotation",
"class_id": 4
},
{
"x": 2755.5,
"y": 1673,
"width": 755,
"height": 1576,
"confidence": 0.955,
"class": "Good Bananarotation",
"class_id": 3
},
{
"x": 765,
"y": 1091,
"width": 500,
"height": 698,
"confidence": 0.947,
"class": "Bad ButterFruitrotation",
"class_id": 1
},
{
"x": 3882.5,
"y": 1017,
"width": 461,
"height": 428,
"confidence": 0.946,
"class": "Good Orangerotation",
"class_id": 5
},
{
"x": 1658.5,
"y": 1643,
"width": 781,
"height": 1672,
"confidence": 0.944,
"class": "Bad Bananarotation",
"class_id": 0
},
{
"x": 866,
"y": 2193.5,
"width": 488,
"height": 411,
"confidence": 0.927,
"class": "Bad Orangerotation",
"class_id": 2
}
]
}
How can I implement that?
I tried different things out e.g returning the bbox after parsing and feeding it into the model generation but it throws a bunch of errors.
Thank you in advance. 🙂