I’m trying to create and train an image classifier (with tensorflow 2.16.1, in pycharm).
I processed the data as follows:
def data_processing(train_dir, validation_dir, test_dir ):
# Data augmentation for training
train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
rescale=1. / 255,
rotation_range=20,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True,
fill_mode='nearest',
)
# For validation and testing, just rescale
validation_datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1. / 255)
# Load training images
train_generator, train_labels = train_datagen.flow_from_directory(
train_dir,
target_size=(150, 150),
batch_size=32,
class_mode='binary',
)
# Load validation images
validation_generator = validation_datagen.flow_from_directory(
validation_dir,
target_size=(150, 150),
batch_size=32,
class_mode='binary',
shuffle=False
)
test_generator = validation_datagen.flow_from_directory(
test_dir,
target_size=(150, 150),
batch_size=32,
class_mode='binary',
shuffle= False
)
return train_generator, validation_generator, test_generator```
And then created and trained the model like this:
def create_model():
#building the model (classic CNN, 3 layers of Conv2D + MaxPooling + fully connected (with dropout)
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Conv2D(32, (3, 3), activation='relu'))
model.add(tf.keras.layers.MaxPooling2D(2, 2))
model.add(tf.keras.layers.Conv2D(64, (3, 3), activation='relu'))
model.add(tf.keras.layers.MaxPooling2D(2, 2))
model.add(tf.keras.layers.Conv2D(128, (3, 3), activation='relu'))
model.add(tf.keras.layers.MaxPooling2D(2, 2))
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(512, activation='relu'))
model.add(tf.keras.layers.Dropout(0.5)) #dropt common rule of thumb
# Output Layer
model.add(tf.keras.layers.Dense(1, activation='sigmoid')) # For binary classification
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) #adam optimizer for adaptive learning rate
return model
def train_model(model, train_generator, validation_generator, epochs):
# Train the model
# Set the number of steps (batches) per epoch
steps_per_epoch = math.ceil(train_generator.samples / train_generator.batch_size)
validation_steps = math.ceil(validation_generator.samples / validation_generator.batch_size)
print(steps_per_epoch, validation_steps)
history = model.fit(
train_generator,
steps_per_epoch=steps_per_epoch,
epochs=epochs,
validation_data=validation_generator,
validation_steps=validation_steps,
)
I’m getting the following warning: UserWarning: Your PyDataset
class should call super().__init__(**kwargs)
in its constructor. **kwargs
can include workers
, use_multiprocessing
, max_queue_size
. Do not pass these arguments to fit()
, as they will be ignored.
self._warn_if_super_not_called() which I do not succeed to trace back th issue
and also encounter UserWarning: Your input ran out of data; interrupting training. Make sure that your dataset or generator can generate at least steps_per_epoch * epochs
batches. You may need to use the .repeat()
function when building your dataset.
self.gen.throw(value) which does not make sense to me:
train data is of 4704 images and validation data is of 496 images.
step size are calculated according to the batch size.
what am I missing?
tried to change the batch size to 1 and the stps to the dataset size
tried to move to tensorflow 2.17
1