GTZAN Music Genre Classification: Tensor Mismatch and .wav file unrecognized

I’m using the GTZAN dataset and am trying to make a CNN music genre classifier. My .wav files are stores in a subdirectory called ‘genres_original’, which is within the root directory, ‘Data’. I’m having two issues: my .wav files aren’t being recognized although I’m certain they are in the right format for PyTorch (.wav) and I have a tensor mismatch issue which I think is related to the wav file problem as well?

Plain text
Copy to clipboard
Open code in new window
EnlighterJS 3 Syntax Highlighter
<code>import os
import glob
import random
import torch
import torchaudio
import pandas as pd
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from sklearn.model_selection import train_test_split
# Path to the directory containing .wav files
data_dir = 'Data/genres_original'
# List all .wav files
wav_files = glob.glob(os.path.join(data_dir, '*.wav'))
# print(wav_files)
# Extract labels and file paths
data = []
for root, dirs, files in os.walk(data_dir):
for file in files:
if file.endswith('.wav'):
wav_file = os.path.join(root, file)
genre, _ = os.path.splitext(file)
genre = genre.split('.')[0]
data.append((file, genre))
print(data)
# Convert to DataFrame
df = pd.DataFrame(data, columns=['file_path', 'label'])
# Mapping of labels to indices
label_to_index = {label: idx for idx, label in enumerate(sorted(df['label'].unique()))}
index_to_label = {idx: label for label, idx in label_to_index.items()}
# Convert labels to indices
df['label'] = df['label'].map(label_to_index)
# print("Genre to int mapping:")
# print(label_to_index)
class AudioUtil():
@staticmethod
def open(audio_file):
sig, sr = torchaudio.load(str(audio_file))
return (sig, sr)
@staticmethod
def rechannel(aud, new_channel):
sig, sr = aud
if sig.shape[0] == new_channel:
return aud
if new_channel == 1:
sig = sig.mean(dim=0, keepdim=True)
else:
sig = sig.expand(new_channel, -1)
return (sig, sr)
@staticmethod
def resample(aud, new_sr):
sig, sr = aud
if sr == new_sr:
return aud
num_channels = sig.shape[0]
resig = torchaudio.transforms.Resample(sr, new_sr)(sig[:1, :])
if num_channels > 1:
retwo = torchaudio.transforms.Resample(sr, new_sr)(sig[1:, :])
resig = torch.cat([resig, retwo])
return (resig, new_sr)
@staticmethod
def pad_trunc(aud, max_ms):
sig, sr = aud
num_rows, sig_len = sig.shape
max_len = sr // 1000 * max_ms
if sig_len > max_len:
sig = sig[:, :max_len]
elif sig_len < max_len:
pad_begin_len = random.randint(0, max_len - sig_len)
pad_end_len = max_len - sig_len - pad_begin_len
pad_begin = torch.zeros((num_rows, pad_begin_len))
pad_end = torch.zeros((num_rows, pad_end_len))
sig = torch.cat((pad_begin, sig, pad_end), 1)
return (sig, sr)
@staticmethod
def spectro_gram(aud, n_mels=64, n_fft=1024, hop_len=None):
sig, sr = aud
top_db = 80
sgram = torchaudio.transforms.MelSpectrogram(
sr, n_fft=n_fft, hop_length=hop_len, n_mels=n_mels)(sig)
sgram = torchaudio.transforms.AmplitudeToDB(top_db=top_db)(sgram)
return sgram
class GenreDataset(Dataset):
def __init__(self, df, duration=5000, sr=22050, transform=None):
self.df = df
self.duration = duration
self.sr = sr
self.transform = transform
def __len__(self):
return len(self.df)
def __getitem__(self, idx):
file_path = self.df.iloc[idx, 0]
label = self.df.iloc[idx, 1]
# Convert the label to an integer if it's a string
if isinstance(label, str):
label = genre_to_int[label]
# Split the file path to get the file name and extension separately
file_dir, file_name = os.path.split(file_path)
file_name_parts = file_name.split('.')
# Assume the last part is the extension
file_ext = file_name_parts[-1]
# Reconstruct the file path with the correct extension
corrected_file_path = os.path.join(file_dir, '.'.join(file_name_parts[:-1]) + '.' + file_ext)
try:
aud = AudioUtil.open(corrected_file_path)
except Exception as e:
print(f"Error opening file {corrected_file_path}: {e}")
return None, None
if aud is None:
return None, None
aud = AudioUtil.resample(aud, self.sr)
aud = AudioUtil.rechannel(aud, 1)
aud = AudioUtil.pad_trunc(aud, self.duration)
sgram = AudioUtil.spectro_gram(aud, n_mels=64, n_fft=1024, hop_len=None)
if self.transform:
sgram = self.transform(sgram)
return sgram, torch.tensor(label, dtype=torch.long)
# Ensure reproducibility
random.seed(42)
df = df.sample(frac=1, random_state=42).reset_index(drop=True)
# Split into train and validation sets (80% train, 20% validation)
train_df, val_df = train_test_split(df, test_size=0.2, stratify=df['label'], random_state=42)
train_dataset = GenreDataset(train_df)
val_dataset = GenreDataset(val_df)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, drop_last=True)
val_loader = DataLoader(val_dataset, batch_size=32, drop_last=True)
class AudioClassifier(nn.Module):
def __init__(self):
super(AudioClassifier, self).__init__()
self.conv1 = nn.Conv2d(1, 8, kernel_size=3, stride=1, padding=1)
self.conv2 = nn.Conv2d(8, 16, kernel_size=3, stride=1, padding=1)
self.conv3 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)
self.conv4 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
self.pool = nn.MaxPool2d(2, 2)
self.fc1 = nn.Linear(64 * 4 * 4, 128)
self.fc2 = nn.Linear(128, 10)
self.dropout = nn.Dropout(0.3)
def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = self.pool(F.relu(self.conv3(x)))
x = self.pool(F.relu(self.conv4(x)))
x = x.view(-1, 64 * 4 * 4)
x = F.relu(self.fc1(x))
x = self.dropout(x)
x = self.fc2(x)
return x
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = AudioClassifier().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=20):
for epoch in range(num_epochs):
model.train()
running_loss = 0.0
for inputs, labels in train_loader:
inputs, labels = inputs.to(device), labels.to(device)
optimizer.zero_grad()
outputs = model(inputs)
if outputs.size(0) != labels.size(0):
print(f"Mismatch in batch sizes: outputs={outputs.size(0)}, labels={labels.size(0)}")
print(f"Outputs shape: {outputs.shape}")
print(f"Labels shape: {labels.shape}")
continue
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item() * inputs.size(0)
epoch_loss = running_loss / len(train_loader.dataset)
print(f'Epoch {epoch}/{num_epochs - 1}, Loss: {epoch_loss:.4f}')
model.eval()
correct = 0
total = 0
with torch.no_grad():
for inputs, labels in val_loader:
inputs, labels = inputs.to(device), labels.to(device)
outputs = model(inputs)
_, preds = torch.max(outputs, 1)
correct += torch.sum(preds == labels.data)
total += labels.size(0)
val_acc = correct.double() / total
print(f'Validation Accuracy: {val_acc:.4f}')
train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=20)
</code>
<code>import os import glob import random import torch import torchaudio import pandas as pd from torch.utils.data import Dataset, DataLoader import torch.nn as nn import torch.nn.functional as F import torch.optim as optim from sklearn.model_selection import train_test_split # Path to the directory containing .wav files data_dir = 'Data/genres_original' # List all .wav files wav_files = glob.glob(os.path.join(data_dir, '*.wav')) # print(wav_files) # Extract labels and file paths data = [] for root, dirs, files in os.walk(data_dir): for file in files: if file.endswith('.wav'): wav_file = os.path.join(root, file) genre, _ = os.path.splitext(file) genre = genre.split('.')[0] data.append((file, genre)) print(data) # Convert to DataFrame df = pd.DataFrame(data, columns=['file_path', 'label']) # Mapping of labels to indices label_to_index = {label: idx for idx, label in enumerate(sorted(df['label'].unique()))} index_to_label = {idx: label for label, idx in label_to_index.items()} # Convert labels to indices df['label'] = df['label'].map(label_to_index) # print("Genre to int mapping:") # print(label_to_index) class AudioUtil(): @staticmethod def open(audio_file): sig, sr = torchaudio.load(str(audio_file)) return (sig, sr) @staticmethod def rechannel(aud, new_channel): sig, sr = aud if sig.shape[0] == new_channel: return aud if new_channel == 1: sig = sig.mean(dim=0, keepdim=True) else: sig = sig.expand(new_channel, -1) return (sig, sr) @staticmethod def resample(aud, new_sr): sig, sr = aud if sr == new_sr: return aud num_channels = sig.shape[0] resig = torchaudio.transforms.Resample(sr, new_sr)(sig[:1, :]) if num_channels > 1: retwo = torchaudio.transforms.Resample(sr, new_sr)(sig[1:, :]) resig = torch.cat([resig, retwo]) return (resig, new_sr) @staticmethod def pad_trunc(aud, max_ms): sig, sr = aud num_rows, sig_len = sig.shape max_len = sr // 1000 * max_ms if sig_len > max_len: sig = sig[:, :max_len] elif sig_len < max_len: pad_begin_len = random.randint(0, max_len - sig_len) pad_end_len = max_len - sig_len - pad_begin_len pad_begin = torch.zeros((num_rows, pad_begin_len)) pad_end = torch.zeros((num_rows, pad_end_len)) sig = torch.cat((pad_begin, sig, pad_end), 1) return (sig, sr) @staticmethod def spectro_gram(aud, n_mels=64, n_fft=1024, hop_len=None): sig, sr = aud top_db = 80 sgram = torchaudio.transforms.MelSpectrogram( sr, n_fft=n_fft, hop_length=hop_len, n_mels=n_mels)(sig) sgram = torchaudio.transforms.AmplitudeToDB(top_db=top_db)(sgram) return sgram class GenreDataset(Dataset): def __init__(self, df, duration=5000, sr=22050, transform=None): self.df = df self.duration = duration self.sr = sr self.transform = transform def __len__(self): return len(self.df) def __getitem__(self, idx): file_path = self.df.iloc[idx, 0] label = self.df.iloc[idx, 1] # Convert the label to an integer if it's a string if isinstance(label, str): label = genre_to_int[label] # Split the file path to get the file name and extension separately file_dir, file_name = os.path.split(file_path) file_name_parts = file_name.split('.') # Assume the last part is the extension file_ext = file_name_parts[-1] # Reconstruct the file path with the correct extension corrected_file_path = os.path.join(file_dir, '.'.join(file_name_parts[:-1]) + '.' + file_ext) try: aud = AudioUtil.open(corrected_file_path) except Exception as e: print(f"Error opening file {corrected_file_path}: {e}") return None, None if aud is None: return None, None aud = AudioUtil.resample(aud, self.sr) aud = AudioUtil.rechannel(aud, 1) aud = AudioUtil.pad_trunc(aud, self.duration) sgram = AudioUtil.spectro_gram(aud, n_mels=64, n_fft=1024, hop_len=None) if self.transform: sgram = self.transform(sgram) return sgram, torch.tensor(label, dtype=torch.long) # Ensure reproducibility random.seed(42) df = df.sample(frac=1, random_state=42).reset_index(drop=True) # Split into train and validation sets (80% train, 20% validation) train_df, val_df = train_test_split(df, test_size=0.2, stratify=df['label'], random_state=42) train_dataset = GenreDataset(train_df) val_dataset = GenreDataset(val_df) train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, drop_last=True) val_loader = DataLoader(val_dataset, batch_size=32, drop_last=True) class AudioClassifier(nn.Module): def __init__(self): super(AudioClassifier, self).__init__() self.conv1 = nn.Conv2d(1, 8, kernel_size=3, stride=1, padding=1) self.conv2 = nn.Conv2d(8, 16, kernel_size=3, stride=1, padding=1) self.conv3 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1) self.conv4 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1) self.pool = nn.MaxPool2d(2, 2) self.fc1 = nn.Linear(64 * 4 * 4, 128) self.fc2 = nn.Linear(128, 10) self.dropout = nn.Dropout(0.3) def forward(self, x): x = self.pool(F.relu(self.conv1(x))) x = self.pool(F.relu(self.conv2(x))) x = self.pool(F.relu(self.conv3(x))) x = self.pool(F.relu(self.conv4(x))) x = x.view(-1, 64 * 4 * 4) x = F.relu(self.fc1(x)) x = self.dropout(x) x = self.fc2(x) return x device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = AudioClassifier().to(device) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=0.001) def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=20): for epoch in range(num_epochs): model.train() running_loss = 0.0 for inputs, labels in train_loader: inputs, labels = inputs.to(device), labels.to(device) optimizer.zero_grad() outputs = model(inputs) if outputs.size(0) != labels.size(0): print(f"Mismatch in batch sizes: outputs={outputs.size(0)}, labels={labels.size(0)}") print(f"Outputs shape: {outputs.shape}") print(f"Labels shape: {labels.shape}") continue loss = criterion(outputs, labels) loss.backward() optimizer.step() running_loss += loss.item() * inputs.size(0) epoch_loss = running_loss / len(train_loader.dataset) print(f'Epoch {epoch}/{num_epochs - 1}, Loss: {epoch_loss:.4f}') model.eval() correct = 0 total = 0 with torch.no_grad(): for inputs, labels in val_loader: inputs, labels = inputs.to(device), labels.to(device) outputs = model(inputs) _, preds = torch.max(outputs, 1) correct += torch.sum(preds == labels.data) total += labels.size(0) val_acc = correct.double() / total print(f'Validation Accuracy: {val_acc:.4f}') train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=20) </code>
import os
import glob
import random
import torch
import torchaudio
import pandas as pd
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from sklearn.model_selection import train_test_split

# Path to the directory containing .wav files
data_dir = 'Data/genres_original'

# List all .wav files
wav_files = glob.glob(os.path.join(data_dir, '*.wav'))
# print(wav_files)

# Extract labels and file paths
data = []
for root, dirs, files in os.walk(data_dir):
    for file in files:
        if file.endswith('.wav'):
            wav_file = os.path.join(root, file)
            genre, _ = os.path.splitext(file)
            genre = genre.split('.')[0]
            data.append((file, genre))

print(data)

# Convert to DataFrame
df = pd.DataFrame(data, columns=['file_path', 'label'])

# Mapping of labels to indices
label_to_index = {label: idx for idx, label in enumerate(sorted(df['label'].unique()))}
index_to_label = {idx: label for label, idx in label_to_index.items()}

# Convert labels to indices
df['label'] = df['label'].map(label_to_index)

# print("Genre to int mapping:")
# print(label_to_index)

class AudioUtil():
    @staticmethod
    def open(audio_file):
        sig, sr = torchaudio.load(str(audio_file))
        return (sig, sr)

    @staticmethod
    def rechannel(aud, new_channel):
        sig, sr = aud
        if sig.shape[0] == new_channel:
            return aud
        if new_channel == 1:
            sig = sig.mean(dim=0, keepdim=True)
        else:
            sig = sig.expand(new_channel, -1)
        return (sig, sr)

    @staticmethod
    def resample(aud, new_sr):
        sig, sr = aud
        if sr == new_sr:
            return aud
        num_channels = sig.shape[0]
        resig = torchaudio.transforms.Resample(sr, new_sr)(sig[:1, :])
        if num_channels > 1:
            retwo = torchaudio.transforms.Resample(sr, new_sr)(sig[1:, :])
            resig = torch.cat([resig, retwo])
        return (resig, new_sr)

    @staticmethod
    def pad_trunc(aud, max_ms):
        sig, sr = aud
        num_rows, sig_len = sig.shape
        max_len = sr // 1000 * max_ms
        if sig_len > max_len:
            sig = sig[:, :max_len]
        elif sig_len < max_len:
            pad_begin_len = random.randint(0, max_len - sig_len)
            pad_end_len = max_len - sig_len - pad_begin_len
            pad_begin = torch.zeros((num_rows, pad_begin_len))
            pad_end = torch.zeros((num_rows, pad_end_len))
            sig = torch.cat((pad_begin, sig, pad_end), 1)
        return (sig, sr)

    @staticmethod
    def spectro_gram(aud, n_mels=64, n_fft=1024, hop_len=None):
        sig, sr = aud
        top_db = 80
        sgram = torchaudio.transforms.MelSpectrogram(
            sr, n_fft=n_fft, hop_length=hop_len, n_mels=n_mels)(sig)
        sgram = torchaudio.transforms.AmplitudeToDB(top_db=top_db)(sgram)
        return sgram

class GenreDataset(Dataset):
    def __init__(self, df, duration=5000, sr=22050, transform=None):
        self.df = df
        self.duration = duration
        self.sr = sr
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        file_path = self.df.iloc[idx, 0]
        label = self.df.iloc[idx, 1]
        # Convert the label to an integer if it's a string
        if isinstance(label, str):
            label = genre_to_int[label]
            # Split the file path to get the file name and extension separately
        file_dir, file_name = os.path.split(file_path)
        file_name_parts = file_name.split('.')
        # Assume the last part is the extension
        file_ext = file_name_parts[-1]
        # Reconstruct the file path with the correct extension
        corrected_file_path = os.path.join(file_dir, '.'.join(file_name_parts[:-1]) + '.' + file_ext)
        try:
            aud = AudioUtil.open(corrected_file_path)
        except Exception as e:
            print(f"Error opening file {corrected_file_path}: {e}")
            return None, None
        if aud is None:
            return None, None
        aud = AudioUtil.resample(aud, self.sr)
        aud = AudioUtil.rechannel(aud, 1)
        aud = AudioUtil.pad_trunc(aud, self.duration)
        sgram = AudioUtil.spectro_gram(aud, n_mels=64, n_fft=1024, hop_len=None)
        if self.transform:
            sgram = self.transform(sgram)
        return sgram, torch.tensor(label, dtype=torch.long)

# Ensure reproducibility
random.seed(42)
df = df.sample(frac=1, random_state=42).reset_index(drop=True)

# Split into train and validation sets (80% train, 20% validation)
train_df, val_df = train_test_split(df, test_size=0.2, stratify=df['label'], random_state=42)

train_dataset = GenreDataset(train_df)
val_dataset = GenreDataset(val_df)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, drop_last=True)
val_loader = DataLoader(val_dataset, batch_size=32, drop_last=True)

class AudioClassifier(nn.Module):
    def __init__(self):
        super(AudioClassifier, self).__init__()
        self.conv1 = nn.Conv2d(1, 8, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(8, 16, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)
        self.conv4 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(64 * 4 * 4, 128)
        self.fc2 = nn.Linear(128, 10)
        self.dropout = nn.Dropout(0.3)
    
    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = self.pool(F.relu(self.conv4(x)))
        x = x.view(-1, 64 * 4 * 4)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = AudioClassifier().to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=20):
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            if outputs.size(0) != labels.size(0):
                print(f"Mismatch in batch sizes: outputs={outputs.size(0)}, labels={labels.size(0)}")
                print(f"Outputs shape: {outputs.shape}")
                print(f"Labels shape: {labels.shape}")
                continue
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * inputs.size(0)
        
        epoch_loss = running_loss / len(train_loader.dataset)
        print(f'Epoch {epoch}/{num_epochs - 1}, Loss: {epoch_loss:.4f}')
        
        model.eval()
        correct = 0
        total = 0
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                _, preds = torch.max(outputs, 1)
                correct += torch.sum(preds == labels.data)
                total += labels.size(0)
        val_acc = correct.double() / total
        print(f'Validation Accuracy: {val_acc:.4f}')

train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=20)

When I print the file names, I get

Plain text
Copy to clipboard
Open code in new window
EnlighterJS 3 Syntax Highlighter
<code>Data/genres_original/blues_00067.wav
</code>
<code>Data/genres_original/blues_00067.wav </code>
Data/genres_original/blues_00067.wav 

and so on instead of just

Plain text
Copy to clipboard
Open code in new window
EnlighterJS 3 Syntax Highlighter
<code>blues_00067.wav,
</code>
<code>blues_00067.wav, </code>
blues_00067.wav,

which I think is stopping torchaudio from recognizing the format although they are .wav files. I tried the following to solve this:

Plain text
Copy to clipboard
Open code in new window
EnlighterJS 3 Syntax Highlighter
<code># Extract labels and file paths
data = []
for root, dirs, files in os.walk(data_dir):
for file in files:
if file.endswith('.wav'):
wav_file = os.path.join(root, file)
genre, _ = os.path.splitext(file)
genre = genre.split('.')[0]
data.append((file, genre))
print(data)
</code>
<code># Extract labels and file paths data = [] for root, dirs, files in os.walk(data_dir): for file in files: if file.endswith('.wav'): wav_file = os.path.join(root, file) genre, _ = os.path.splitext(file) genre = genre.split('.')[0] data.append((file, genre)) print(data) </code>
# Extract labels and file paths
data = []
for root, dirs, files in os.walk(data_dir):
    for file in files:
        if file.endswith('.wav'):
            wav_file = os.path.join(root, file)
            genre, _ = os.path.splitext(file)
            genre = genre.split('.')[0]
            data.append((file, genre))

print(data)

This prints [(blues.00067.wav, blues),(country.00054.wav, country)….] which is what I am expecting.

That still did not solve the unrecognized file format issue. So I created the corrected_file_path var in getitem:

Plain text
Copy to clipboard
Open code in new window
EnlighterJS 3 Syntax Highlighter
<code> def __getitem__(self, idx):
file_path = self.df.iloc[idx, 0]
label = self.df.iloc[idx, 1]
# Convert the label to an integer if it's a string
if isinstance(label, str):
label = genre_to_int[label]
# Split the file path to get the file name and extension separately
file_dir, file_name = os.path.split(file_path)
file_name_parts = file_name.split('.')
# Assume the last part is the extension
file_ext = file_name_parts[-1]
# Reconstruct the file path with the correct extension
corrected_file_path = os.path.join(file_dir, '.'.join(file_name_parts[:-1]) + '.' + file_ext)
try:
aud = AudioUtil.open(corrected_file_path)
except Exception as e:
print(f"Error opening file {corrected_file_path}: {e}")
return None, None
if aud is None:
return None, None
aud = AudioUtil.resample(aud, self.sr)
aud = AudioUtil.rechannel(aud, 1)
aud = AudioUtil.pad_trunc(aud, self.duration)
sgram = AudioUtil.spectro_gram(aud, n_mels=64, n_fft=1024, hop_len=None)
if self.transform:
sgram = self.transform(sgram)
return sgram, torch.tensor(label, dtype=torch.long)
</code>
<code> def __getitem__(self, idx): file_path = self.df.iloc[idx, 0] label = self.df.iloc[idx, 1] # Convert the label to an integer if it's a string if isinstance(label, str): label = genre_to_int[label] # Split the file path to get the file name and extension separately file_dir, file_name = os.path.split(file_path) file_name_parts = file_name.split('.') # Assume the last part is the extension file_ext = file_name_parts[-1] # Reconstruct the file path with the correct extension corrected_file_path = os.path.join(file_dir, '.'.join(file_name_parts[:-1]) + '.' + file_ext) try: aud = AudioUtil.open(corrected_file_path) except Exception as e: print(f"Error opening file {corrected_file_path}: {e}") return None, None if aud is None: return None, None aud = AudioUtil.resample(aud, self.sr) aud = AudioUtil.rechannel(aud, 1) aud = AudioUtil.pad_trunc(aud, self.duration) sgram = AudioUtil.spectro_gram(aud, n_mels=64, n_fft=1024, hop_len=None) if self.transform: sgram = self.transform(sgram) return sgram, torch.tensor(label, dtype=torch.long) </code>
    def __getitem__(self, idx):
        file_path = self.df.iloc[idx, 0]
        label = self.df.iloc[idx, 1]
        # Convert the label to an integer if it's a string
        if isinstance(label, str):
            label = genre_to_int[label]
            # Split the file path to get the file name and extension separately
        file_dir, file_name = os.path.split(file_path)
        file_name_parts = file_name.split('.')
        # Assume the last part is the extension
        file_ext = file_name_parts[-1]
        # Reconstruct the file path with the correct extension
        corrected_file_path = os.path.join(file_dir, '.'.join(file_name_parts[:-1]) + '.' + file_ext)
        try:
            aud = AudioUtil.open(corrected_file_path)
        except Exception as e:
            print(f"Error opening file {corrected_file_path}: {e}")
            return None, None
        if aud is None:
            return None, None
        aud = AudioUtil.resample(aud, self.sr)
        aud = AudioUtil.rechannel(aud, 1)
        aud = AudioUtil.pad_trunc(aud, self.duration)
        sgram = AudioUtil.spectro_gram(aud, n_mels=64, n_fft=1024, hop_len=None)
        if self.transform:
            sgram = self.transform(sgram)
        return sgram, torch.tensor(label, dtype=torch.long)

But that did not fix it either. I reinstalled soundfile too. Here is the exact error:

Plain text
Copy to clipboard
Open code in new window
EnlighterJS 3 Syntax Highlighter
<code>Error opening file disco.00069.wav: Error opening 'disco.00069.wav': System error.
Error opening file country.00053.wav: Error opening 'country.00053.wav': System error.
Error opening file reggae.00050.wav: Error opening 'reggae.00050.wav': System error.
Error opening file reggae.00095.wav: Error opening 'reggae.00095.wav': System error.
Error opening file metal.00057.wav: Error opening 'metal.00057.wav': System error.
(and so on for every file)
</code>
<code>Error opening file disco.00069.wav: Error opening 'disco.00069.wav': System error. Error opening file country.00053.wav: Error opening 'country.00053.wav': System error. Error opening file reggae.00050.wav: Error opening 'reggae.00050.wav': System error. Error opening file reggae.00095.wav: Error opening 'reggae.00095.wav': System error. Error opening file metal.00057.wav: Error opening 'metal.00057.wav': System error. (and so on for every file) </code>
Error opening file disco.00069.wav: Error opening 'disco.00069.wav': System error.
Error opening file country.00053.wav: Error opening 'country.00053.wav': System error.
Error opening file reggae.00050.wav: Error opening 'reggae.00050.wav': System error.
Error opening file reggae.00095.wav: Error opening 'reggae.00095.wav': System error.
Error opening file metal.00057.wav: Error opening 'metal.00057.wav': System error.
(and so on for every file)

This leads to another error:

Plain text
Copy to clipboard
Open code in new window
EnlighterJS 3 Syntax Highlighter
<code>Traceback (most recent call last):
File "HIDDEN_PATH/music.py", line 212, in <module>
train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=20)
File "HIDDEN_PATH/music.py", line 182, in train_model
for inputs, labels in train_loader:
File "/HIDDEN_PATH/anaconda3/lib/python3.11/site-packages/torch/utils/data/dataloader.py", line 631, in __next__
data = self._next_data()
^^^^^^^^^^^^^^^^^
File "HIDDEN_PATH/anaconda3/lib/python3.11/site-packages/torch/utils/data/dataloader.py", line 675, in _next_data
data = self._dataset_fetcher.fetch(index) # may raise StopIteration
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "HIDDEN_PATH/anaconda3/lib/python3.11/site-packages/torch/utils/data/_utils/fetch.py", line 54, in fetch
return self.collate_fn(data)
^^^^^^^^^^^^^^^^^^^^^
File "/HIDDEN_PATH/anaconda3/lib/python3.11/site-packages/torch/utils/data/_utils/collate.py", line 316, in default_collate
return collate(batch, collate_fn_map=default_collate_fn_map)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "HIDDEN_PATH/anaconda3/lib/python3.11/site-packages/torch/utils/data/_utils/collate.py", line 173, in collate
return [collate(samples, collate_fn_map=collate_fn_map) for samples in transposed] # Backwards compatibility.
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/HIDDEN_PATH/anaconda3/lib/python3.11/site-packages/torch/utils/data/_utils/collate.py", line 173, in <listcomp>
return [collate(samples, collate_fn_map=collate_fn_map) for samples in transposed] # Backwards compatibility.
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "HIDDEN_PATH/anaconda3/lib/python3.11/site-packages/torch/utils/data/_utils/collate.py", line 191, in collate
raise TypeError(default_collate_err_msg_format.format(elem_type))
TypeError: default_collate: batch must contain tensors, numpy arrays, numbers, dicts or lists; found <class 'NoneType'>
</code>
<code>Traceback (most recent call last): File "HIDDEN_PATH/music.py", line 212, in <module> train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=20) File "HIDDEN_PATH/music.py", line 182, in train_model for inputs, labels in train_loader: File "/HIDDEN_PATH/anaconda3/lib/python3.11/site-packages/torch/utils/data/dataloader.py", line 631, in __next__ data = self._next_data() ^^^^^^^^^^^^^^^^^ File "HIDDEN_PATH/anaconda3/lib/python3.11/site-packages/torch/utils/data/dataloader.py", line 675, in _next_data data = self._dataset_fetcher.fetch(index) # may raise StopIteration ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "HIDDEN_PATH/anaconda3/lib/python3.11/site-packages/torch/utils/data/_utils/fetch.py", line 54, in fetch return self.collate_fn(data) ^^^^^^^^^^^^^^^^^^^^^ File "/HIDDEN_PATH/anaconda3/lib/python3.11/site-packages/torch/utils/data/_utils/collate.py", line 316, in default_collate return collate(batch, collate_fn_map=default_collate_fn_map) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "HIDDEN_PATH/anaconda3/lib/python3.11/site-packages/torch/utils/data/_utils/collate.py", line 173, in collate return [collate(samples, collate_fn_map=collate_fn_map) for samples in transposed] # Backwards compatibility. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/HIDDEN_PATH/anaconda3/lib/python3.11/site-packages/torch/utils/data/_utils/collate.py", line 173, in <listcomp> return [collate(samples, collate_fn_map=collate_fn_map) for samples in transposed] # Backwards compatibility. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "HIDDEN_PATH/anaconda3/lib/python3.11/site-packages/torch/utils/data/_utils/collate.py", line 191, in collate raise TypeError(default_collate_err_msg_format.format(elem_type)) TypeError: default_collate: batch must contain tensors, numpy arrays, numbers, dicts or lists; found <class 'NoneType'> </code>
Traceback (most recent call last):
  File "HIDDEN_PATH/music.py", line 212, in <module>
    train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=20)
  File "HIDDEN_PATH/music.py", line 182, in train_model
    for inputs, labels in train_loader:
  File "/HIDDEN_PATH/anaconda3/lib/python3.11/site-packages/torch/utils/data/dataloader.py", line 631, in __next__
    data = self._next_data()
           ^^^^^^^^^^^^^^^^^
  File "HIDDEN_PATH/anaconda3/lib/python3.11/site-packages/torch/utils/data/dataloader.py", line 675, in _next_data
    data = self._dataset_fetcher.fetch(index)  # may raise StopIteration
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "HIDDEN_PATH/anaconda3/lib/python3.11/site-packages/torch/utils/data/_utils/fetch.py", line 54, in fetch
    return self.collate_fn(data)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/HIDDEN_PATH/anaconda3/lib/python3.11/site-packages/torch/utils/data/_utils/collate.py", line 316, in default_collate
    return collate(batch, collate_fn_map=default_collate_fn_map)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "HIDDEN_PATH/anaconda3/lib/python3.11/site-packages/torch/utils/data/_utils/collate.py", line 173, in collate
    return [collate(samples, collate_fn_map=collate_fn_map) for samples in transposed]  # Backwards compatibility.
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/HIDDEN_PATH/anaconda3/lib/python3.11/site-packages/torch/utils/data/_utils/collate.py", line 173, in <listcomp>
    return [collate(samples, collate_fn_map=collate_fn_map) for samples in transposed]  # Backwards compatibility.
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "HIDDEN_PATH/anaconda3/lib/python3.11/site-packages/torch/utils/data/_utils/collate.py", line 191, in collate
    raise TypeError(default_collate_err_msg_format.format(elem_type))
TypeError: default_collate: batch must contain tensors, numpy arrays, numbers, dicts or lists; found <class 'NoneType'>

Any help is much appreciated!

Trang chủ Giới thiệu Sinh nhật bé trai Sinh nhật bé gái Tổ chức sự kiện Biểu diễn giải trí Dịch vụ khác Trang trí tiệc cưới Tổ chức khai trương Tư vấn dịch vụ Thư viện ảnh Tin tức - sự kiện Liên hệ Chú hề sinh nhật Trang trí YEAR END PARTY công ty Trang trí tất niên cuối năm Trang trí tất niên xu hướng mới nhất Trang trí sinh nhật bé trai Hải Đăng Trang trí sinh nhật bé Khánh Vân Trang trí sinh nhật Bích Ngân Trang trí sinh nhật bé Thanh Trang Thuê ông già Noel phát quà Biểu diễn xiếc khỉ Xiếc quay đĩa Dịch vụ tổ chức sự kiện 5 sao Thông tin về chúng tôi Dịch vụ sinh nhật bé trai Dịch vụ sinh nhật bé gái Sự kiện trọn gói Các tiết mục giải trí Dịch vụ bổ trợ Tiệc cưới sang trọng Dịch vụ khai trương Tư vấn tổ chức sự kiện Hình ảnh sự kiện Cập nhật tin tức Liên hệ ngay Thuê chú hề chuyên nghiệp Tiệc tất niên cho công ty Trang trí tiệc cuối năm Tiệc tất niên độc đáo Sinh nhật bé Hải Đăng Sinh nhật đáng yêu bé Khánh Vân Sinh nhật sang trọng Bích Ngân Tiệc sinh nhật bé Thanh Trang Dịch vụ ông già Noel Xiếc thú vui nhộn Biểu diễn xiếc quay đĩa Dịch vụ tổ chức tiệc uy tín Khám phá dịch vụ của chúng tôi Tiệc sinh nhật cho bé trai Trang trí tiệc cho bé gái Gói sự kiện chuyên nghiệp Chương trình giải trí hấp dẫn Dịch vụ hỗ trợ sự kiện Trang trí tiệc cưới đẹp Khởi đầu thành công với khai trương Chuyên gia tư vấn sự kiện Xem ảnh các sự kiện đẹp Tin mới về sự kiện Kết nối với đội ngũ chuyên gia Chú hề vui nhộn cho tiệc sinh nhật Ý tưởng tiệc cuối năm Tất niên độc đáo Trang trí tiệc hiện đại Tổ chức sinh nhật cho Hải Đăng Sinh nhật độc quyền Khánh Vân Phong cách tiệc Bích Ngân Trang trí tiệc bé Thanh Trang Thuê dịch vụ ông già Noel chuyên nghiệp Xem xiếc khỉ đặc sắc Xiếc quay đĩa thú vị
Trang chủ Giới thiệu Sinh nhật bé trai Sinh nhật bé gái Tổ chức sự kiện Biểu diễn giải trí Dịch vụ khác Trang trí tiệc cưới Tổ chức khai trương Tư vấn dịch vụ Thư viện ảnh Tin tức - sự kiện Liên hệ Chú hề sinh nhật Trang trí YEAR END PARTY công ty Trang trí tất niên cuối năm Trang trí tất niên xu hướng mới nhất Trang trí sinh nhật bé trai Hải Đăng Trang trí sinh nhật bé Khánh Vân Trang trí sinh nhật Bích Ngân Trang trí sinh nhật bé Thanh Trang Thuê ông già Noel phát quà Biểu diễn xiếc khỉ Xiếc quay đĩa
Thiết kế website Thiết kế website Thiết kế website Cách kháng tài khoản quảng cáo Mua bán Fanpage Facebook Dịch vụ SEO Tổ chức sinh nhật