sounds.py

from torch.utils.data import Dataset
import pandas as pd
import torchaudio
import torch
from audiomentations import Compose, AddGaussianNoise, PitchShift, HighPassFilter
import warnings
from augment import augmentSignal

class sounds(Dataset):
    def __init__(self, csv_path, device):
        self.data = pd.read_csv(csv_path)
        self.device = device
        self.mel_spec = torchaudio.transforms.MelSpectrogram(sample_rate=16000, n_mels=64, hop_length=512, n_fft=1024).to("cuda")
        self.sample_len = 22050
        self.augment = True

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        audiopath = self.get_path(index);
        label = self.get_label(index)
        signal, sample_rate = torchaudio.load(audiopath)
        if(self.augment):
            signal = augmentSignal(signal, sample_rate)
            signal = torch.from_numpy(signal)
        signal = signal.to(self.device) #Passing signal to GPU

        #Audio processing
        signal = self.resample(signal, sample_rate) #Converts audio to 16000Hz sample rate.
        signal = self.make_mono(signal) #Converts audio to single channel.
        signal = self.add_length_end(signal) #Adds length to the sample if the length is too low
        signal = self.cut_start(signal) #Cuts silence at the beginning of the sample.

        #Create mel spectrogram
        signal = self.mel_spec(signal)
        return signal, label


    def cut_start(self, signal):
        if signal.shape[1] > self.sample_len:
            signal=signal[:, :self.sample_len]

        return signal

    def add_length_end(self, signal):
        if signal.shape[1] < self.sample_len:
            missing = self.sample_len - signal.shape[1]
            last_dim_padding = (0, missing)
            signal = torch.nn.functional.pad(signal, last_dim_padding)

        return signal


    def resample(self, signal, sample_rate):
        resampler = torchaudio.transforms.Resample(sample_rate, 16000).to(self.device)
        signal = resampler(signal)
        return signal

    def make_mono(self, signal):
        signal = torch.mean(signal, dim=0, keepdim=True)
        return signal


    def get_path(self, index):
        return self.data.iloc[index, 3]

    def get_label(self, index):
        return self.data.iloc[index, 2]

    def test(self):
        self.augment = False

if __name__=="__main__":
    warnings.filterwarnings("ignore")
    sounds = sounds(r"C:\Users\suraj\Desktop\Machine Learning\JupyterNotebooks\ML_Module\MIR\drums.csv", "cuda")

    x = sounds[0]
	from torch.utils.data import Dataset
	import pandas as pd
	import torchaudio
	import torch
	from audiomentations import Compose, AddGaussianNoise, PitchShift, HighPassFilter
	import warnings
	from augment import augmentSignal

	class sounds(Dataset):
	def __init__(self, csv_path, device):
	self.data = pd.read_csv(csv_path)
	self.device = device
	self.mel_spec = torchaudio.transforms.MelSpectrogram(sample_rate=16000, n_mels=64, hop_length=512, n_fft=1024).to("cuda")
	self.sample_len = 22050
	self.augment = True

	def __len__(self):
	return len(self.data)

	def __getitem__(self, index):
	audiopath = self.get_path(index);
	label = self.get_label(index)
	signal, sample_rate = torchaudio.load(audiopath)
	if(self.augment):
	signal = augmentSignal(signal, sample_rate)
	signal = torch.from_numpy(signal)
	signal = signal.to(self.device) #Passing signal to GPU

	#Audio processing
	signal = self.resample(signal, sample_rate) #Converts audio to 16000Hz sample rate.
	signal = self.make_mono(signal) #Converts audio to single channel.
	signal = self.add_length_end(signal) #Adds length to the sample if the length is too low
	signal = self.cut_start(signal) #Cuts silence at the beginning of the sample.

	#Create mel spectrogram
	signal = self.mel_spec(signal)
	return signal, label


	def cut_start(self, signal):
	if signal.shape[1] > self.sample_len:
	signal=signal[:, :self.sample_len]

	return signal

	def add_length_end(self, signal):
	if signal.shape[1] < self.sample_len:
	missing = self.sample_len - signal.shape[1]
	last_dim_padding = (0, missing)
	signal = torch.nn.functional.pad(signal, last_dim_padding)

	return signal


	def resample(self, signal, sample_rate):
	resampler = torchaudio.transforms.Resample(sample_rate, 16000).to(self.device)
	signal = resampler(signal)
	return signal

	def make_mono(self, signal):
	signal = torch.mean(signal, dim=0, keepdim=True)
	return signal


	def get_path(self, index):
	return self.data.iloc[index, 3]

	def get_label(self, index):
	return self.data.iloc[index, 2]

	def test(self):
	self.augment = False

	if __name__=="__main__":
	warnings.filterwarnings("ignore")
	sounds = sounds(r"C:\Users\suraj\Desktop\Machine Learning\JupyterNotebooks\ML_Module\MIR\drums.csv", "cuda")

	x = sounds[0]