I am working on a university project. My professor wanted me to work on this “[https://www.kaggle.com/code/paultimothymooney/medical-symptoms-text-and-audio-classification]” project.
He wants me to add ensemble models to this project. I am currently using XGBoost, GBM and VGG16.
XGBoost and GBM works fine but when it comes to VGG16 or CNN or PCENAS I always get low accuracy like this below:
Epoch 1/20 167/167 41s 238ms/step - accuracy: 0.0000e+00 - loss: 11.0473 - val_accuracy: 0.0000e+00 - val_loss: 8.8739
Epoch 2/20 167/167 40s 238ms/step - accuracy: 0.0000e+00 - loss: 8.7834 - val_accuracy: 0.0000e+00 - val_loss: 8.9999
I am currently using the code below:
import os
import pandas as pd
import numpy as np
import librosa
import librosa.display
import matplotlib.pyplot as plt
import cv2
import glob
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix
import xgboost as xgb
import lightgbm as lgb
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import Input, Dense, Conv2D, MaxPooling2D, Flatten, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.applications import VGG16
# Define constants
OVERVIEW_PATH = r'G:svkpyoverview-of-recordings.csv'
BASE_PATH = r'G:svkpyrecordings'
SPECTROGRAMS_DIR = 'spectrograms'
SUBDIRS = ['test', 'train', 'validate']
# Load CSV file
overview = pd.read_csv(OVERVIEW_PATH)
# Create spectrograms directory
os.makedirs(SPECTROGRAMS_DIR, exist_ok=True)
# Text Data Processing
texts = overview['phrase']
labels = overview['prompt']
vectorizer = TfidfVectorizer(max_features=5000)
X = vectorizer.fit_transform(texts).toarray()
# Encode labels
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(labels)
num_classes = len(np.unique(y))
# Normalize the data
scaler = StandardScaler()
X = scaler.fit_transform(X)
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Train XGBoost model
xgb_model = xgb.XGBClassifier()
xgb_model.fit(X_train, y_train)
# Evaluate XGBoost model
y_pred_xgb = xgb_model.predict(X_test)
print('XGBoost Accuracy:', accuracy_score(y_test, y_pred_xgb))
print('XGBoost Confusion Matrix:')
print(confusion_matrix(y_test, y_pred_xgb))
# Train GBM model
gbm_model = lgb.LGBMClassifier()
gbm_model.fit(X_train, y_train)
# Evaluate GBM model
y_pred_gbm = gbm_model.predict(X_test)
print('GBM Accuracy:', accuracy_score(y_test, y_pred_gbm))
print('GBM Confusion Matrix:')
print(confusion_matrix(y_test, y_pred_gbm))
# Audio Data Processing
def find_audio_file(filename):
for subdir in SUBDIRS:
audio_path = os.path.join(BASE_PATH, subdir, filename)
if os.path.exists(audio_path):
return audio_path
return None
# Convert audio files to spectrograms and save
audio_files = overview['file_name']
for file in tqdm(audio_files):
audio_path = find_audio_file(file)
if audio_path:
try:
y, sr = librosa.load(audio_path, sr=None)
S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)
pcen = librosa.pcen(S, sr=sr)
plt.figure(figsize=(10, 4))
librosa.display.specshow(pcen, sr=sr, x_axis='time', y_axis='mel')
plt.colorbar(format='%+2.0f dB')
plt.title('PCEN spectrogram')
plt.tight_layout()
plt.savefig(f'{SPECTROGRAMS_DIR}/{file}.png')
plt.close()
except Exception as e:
print(f"Error processing {audio_path}: {e}")
else:
print(f"File not found: {file}")
# Load spectrogram files
spectrogram_files = glob.glob(f'{SPECTROGRAMS_DIR}/*.png')
images = []
labels = []
for file in tqdm(spectrogram_files):
image = cv2.imread(file)
image = cv2.resize(image, (224, 224))
images.append(image)
label = os.path.basename(file).split('_')[2]
labels.append(label)
images = np.array(images)
labels = label_encoder.fit_transform(labels)
num_classes_audio = len(np.unique(labels))
labels = to_categorical(labels, num_classes=num_classes_audio)
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, random_state=42)
# Create VGG16 model with transfer learning
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
x = base_model.output
x = Flatten()(x)
x = Dense(128, activation='relu')(x)
x = Dropout(0.5)(x)
output = Dense(num_classes_audio, activation='softmax')(x)
model = Model(inputs=base_model.input, outputs=output)
# Freeze the layers in the base model
for layer in base_model.layers:
layer.trainable = False
# Compile the model
model.compile(optimizer=Adam(learning_rate=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])
# Train the model
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=0.00001)
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=30, callbacks=[early_stopping, reduce_lr])
# Evaluate the VGG16 model
_, accuracy = model.evaluate(X_test, y_test)
print('VGG16 Transfer Learning Model Accuracy:', accuracy)
# Save the model
model.save('audio_model_vgg16.keras')
# Load saved model
model = load_model('audio_model_vgg16.keras')
# Evaluate saved model
_, accuracy = model.evaluate(X_test, y_test)
print('Saved VGG16 Model Accuracy:', accuracy)
Can someone help me with my problem please? I don't really know about coding this much. Thanks.
I tried using other models which are: VGG16,CNN,RNN,PCENAS,VGG16-LSTM. Results were the same.
New contributor
Giray is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.