Thiết kế website giá rẻ

Question

I want to create a fruit ripness detector.
For that I could use the API of roboflow but I want to generate a trained model locally.
I downloaded the dataset from this page: https://universe.roboflow.com/mixed-fruit-annotation/fruit-ripness-detector/dataset/2/download/tfrecord

This is how I’m parsing the record file and training the keras model:

import tensorflow as tf
import os

# Parsing and preprocessing function
def parse_tfrecord_fn(example):
    feature_description = {
        'image/object/bbox/ymin': tf.io.VarLenFeature(tf.float32),
        'image/width': tf.io.FixedLenFeature([], tf.int64),
        'image/object/bbox/xmax': tf.io.VarLenFeature(tf.float32),
        'image/encoded': tf.io.FixedLenFeature([], tf.string),
        'image/height': tf.io.FixedLenFeature([], tf.int64),
        'image/object/bbox/xmin': tf.io.VarLenFeature(tf.float32),
        'image/filename': tf.io.FixedLenFeature([], tf.string),
        'image/format': tf.io.FixedLenFeature([], tf.string),
        'image/object/bbox/ymax': tf.io.VarLenFeature(tf.float32),
        'image/object/class/label': tf.io.VarLenFeature(tf.int64),
        'image/object/class/text': tf.io.VarLenFeature(tf.string),
    }
    example = tf.io.parse_single_example(example, feature_description)
    
    # Decode and preprocess image
    image = tf.io.decode_jpeg(example['image/encoded'], channels=3)
    image = tf.image.resize(image, [224, 224])
    image = tf.cast(image, tf.float32) / 255.0  # Normalize to [0, 1]
    
    # Extract labels (assuming single label per image for simplicity)
    label = tf.sparse.to_dense(example['image/object/class/label'])[0]
    
    return image, label

# Input function for creating a dataset
def input_fn(file_path, batch_size=32):
    dataset = tf.data.TFRecordDataset(file_path)
    dataset = dataset.map(parse_tfrecord_fn, num_parallel_calls=tf.data.experimental.AUTOTUNE)
    dataset = dataset.shuffle(buffer_size=1000)
    dataset = dataset.batch(batch_size)
    dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
    return dataset

# Model building function
def build_model(input_shape, num_classes):
    model = tf.keras.Sequential([
        tf.keras.layers.InputLayer(input_shape=input_shape),
        tf.keras.layers.Conv2D(32, (3, 3), activation='relu'),
        tf.keras.layers.MaxPooling2D((2, 2)),
        tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
        tf.keras.layers.MaxPooling2D((2, 2)),
        tf.keras.layers.Conv2D(128, (3, 3), activation='relu'),
        tf.keras.layers.MaxPooling2D((2, 2)),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dense(num_classes)
    ])
    model.compile(optimizer='adam',
                  loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
                  metrics=['accuracy'])
    return model

def main():
    train_file_path = '<PATH TO Fruit_Ripness_Detector.v2i.tfrecord/train/Good-and-Bad-Fruits.tfrecord>'
    valid_file_path = '<PATH TO Fruit_Ripness_Detector.v2i.tfrecord/valid/Good-and-Bad-Fruits.tfrecord>'

    # Determine the number of classes in your dataset
    num_classes = 6

    # Define input shape based on your data
    input_shape = (224, 224, 3)

    # Define model
    model = build_model(input_shape, num_classes)

    # Train model
    train_dataset = input_fn(train_file_path)
    valid_dataset = input_fn(valid_file_path)
    
    model.fit(train_dataset,
              epochs=10,
              validation_data=valid_dataset)

    # Save model
    model.save('fruit_ripeness_detector_model.keras')

if __name__ == "__main__":
    main()

Currently I can pass an image to that model which predicts the class.
I want to expand my code so that it also detects where the class is in the image.

So the output I need for an image should be as follows (not necessarily as json):

{
  "predictions": [
    {
      "x": 3910,
      "y": 2126.5,
      "width": 710,
      "height": 543,
      "confidence": 0.962,
      "class": "Good ButterFruitrotation",
      "class_id": 4
    },
    {
      "x": 2755.5,
      "y": 1673,
      "width": 755,
      "height": 1576,
      "confidence": 0.955,
      "class": "Good Bananarotation",
      "class_id": 3
    },
    {
      "x": 765,
      "y": 1091,
      "width": 500,
      "height": 698,
      "confidence": 0.947,
      "class": "Bad ButterFruitrotation",
      "class_id": 1
    },
    {
      "x": 3882.5,
      "y": 1017,
      "width": 461,
      "height": 428,
      "confidence": 0.946,
      "class": "Good Orangerotation",
      "class_id": 5
    },
    {
      "x": 1658.5,
      "y": 1643,
      "width": 781,
      "height": 1672,
      "confidence": 0.944,
      "class": "Bad Bananarotation",
      "class_id": 0
    },
    {
      "x": 866,
      "y": 2193.5,
      "width": 488,
      "height": 411,
      "confidence": 0.927,
      "class": "Bad Orangerotation",
      "class_id": 2
    }
  ]
}

How can I implement that?
I tried different things out e.g returning the bbox after parsing and feeding it into the model generation but it throws a bunch of errors.

Thank you in advance. 🙂

Thiết kế website giá rẻ

Danh mục

how to adapt output of a keras model