Commit 299606f3 authored by Quentin Aristote's avatar Quentin Aristote
Browse files

added cutting and option to change the number of mels

parent aad60329
# -*- coding: utf-8 -*-
import math
import os.path
import torch
import torchaudio
import torchaudio.transforms as transforms
def soundToSpectrogram(path, sample_rate = 16000, duration = 30) :
r"""Return the Mel Spectrogram of an audio file.
def soundToSpectrogram(path, sample_rate = 16000, duration = 30, nb_mels = 128) :
r"""Return the Mel-spectrograms of consecutive parts of an audio file.
Args :
path (str): the path to the file.
sample_rate (int, optional) : the sample rate in Hz to use when getting the spectrogram. Default : 16000 Hz
duration (int, optional) : the number of seconds the recording should last. Default : 30s
duration (int, optional) : the number of seconds each part should last. Default : 30s
nb_mels (int, optional) : the number of frequencies in the resulting spectrograms. Default : 128
Returns :
torch.Tensor : Mel frequency spectrogram of size (2, 128, time)"""
......@@ -25,23 +27,30 @@ def soundToSpectrogram(path, sample_rate = 16000, duration = 30) :
resample = transforms.Resample(orig_freq = orig_freq, new_freq = sample_rate)
waveform = resample(waveform)
# Extract a certain duration
# Cut the recording and compute the Mel-spectrogram
nb_frames = duration * sample_rate
waveform = waveform[:, :nb_frames]
nb_frames_missing = nb_frames - waveform.size()[1]
waveform = torch.cat([waveform, torch.zeros((1, nb_frames_missing))], dim = 1)
# Compute the Mel-spectrogram
melspectrogram = transforms.MelSpectrogram(sample_rate = sample_rate)
spectrogram = melspectrogram(waveform)
return spectrogram
spectrograms = []
melspectrogram_of = transforms.MelSpectrogram(sample_rate = sample_rate)
while waveform.size()[1] > 0 :
sample = waveform[:nb_frames]
waveform = waveform[nb_frames:]
spectrogram = melspectrogram_of(sample)
spectorgrams.append(samples)
nb_frames_missing = nb_frames - sample.size()[0]
if nb_frames_missing > 0 :
spectrograms.pop()
return spectrograms
def getSpectrograms(dir_source, dir_target,
sample_rate = 16000,
overwrite = False,
duration = 30,
nb_mels = 128,
to_ignore = set()) :
r"""Compute the Mel spectrograms of all the sound files and write them to memory.
......@@ -51,18 +60,23 @@ def getSpectrograms(dir_source, dir_target,
sample_rate (int, optional) : the sample rate in Hz to use when getting the spectrogram. Default : 16000 Hz
overwrite (bool, optional) : whether to compute all spectrograms again. Default : False
duration (int, optional) : the number of seconds the recording should last. Default : 30s
nb_mels (int, optional) : the number of frequencies in the resulting spectrograms. Default : 128
to_ignore (set, optional) : the set of titles of recordings that should not be downloaded."""
for filename in os.listdir(dir_source) :
title, _ = os.path.splitext(filename)
path_source = os.path.join(dir_source, filename)
path_target = os.path.join(dir_target, title + '.pt')
path_target = os.path.join(dir_target, title + '-{part}.pt')
print('Computing the Mel-spectrogram of {filename} ...'.format(filename = filename))
if (overwrite or not(os.path.isfile(path_target))) and not(title in to_ignore) :
try :
spectrogram = soundToSpectrogram(path_source, sample_rate = sample_rate, duration = duration)
torch.save(spectrogram, path_target)
spectrograms = soundToSpectrogram(path_source,
sample_rate = sample_rate,
duration = duration,
nb_mels = nb_mels)
for i, spectrogram in enumerate(spectrograms) :
torch.save(spectrogram, path_target.format(part = i + 1))
print('Success.')
except Exception as exception :
print('Failure : {exception}.'.format(exception = exception))
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment