Source code for dcase_models.data.feature_extractor
import os
import numpy as np
import librosa
import soundfile as sf
import json
from ..util.files import load_json, mkdir_if_not_exists
from ..util.files import duplicate_folder_structure
from ..util.files import list_wav_files
from ..util.ui import progressbar
[docs]class FeatureExtractor():
""" Abstract base class for feature extraction.
Includes methods to load audio files, calculate features and
prepare sequences.
Inherit this class to define custom features
(e.g. features.MelSpectrogram, features.Openl3).
Parameters
----------
sequence_time : float, default=1.0
Length (in seconds) of the feature representation analysis
windows (model's input).
sequence_hop_time : float, default=0.5
Hop time (in seconds) of the feature representation analysis windows.
audio_win : int, default=1024
Window length (in samples) for the short-time audio processing
(e.g short-time Fourier Transform (STFT))
audio_hop : int, default=680
Hop length (in samples) for the short-time audio processing
(e.g short-time Fourier Transform (STFT))
sr : int, default=22050
Sampling rate of the audio signals.
If the original audio is not sampled at this rate, it is re-sampled
before feature extraction.
Attributes
----------
sequence_frames : int
Number of frames equivalent to the sequence_time.
sequence_hop : int
Number of frames equivalent to the sequence_hop_time.
Examples
--------
To create a new feature representation, it is necessary to define a class
that inherits from FeatureExtractor. It is required to define the
calculate() method.::
from dcase_models.data.feature_extractor import FeatureExtractor
class Chroma(FeatureExtractor):
def __init__(self, sequence_time=1.0, sequence_hop_time=0.5,
audio_win=1024, audio_hop=512, sr=44100,
# Add here your custom parameters
n_fft=1024, n_chroma=12):
# Don't forget this line
super().__init__(sequence_time=sequence_time,
sequence_hop_time=sequence_hop_time,
audio_win=audio_win,
audio_hop=audio_hop, sr=sr)
self.sequence_samples = int(librosa.core.frames_to_samples(
self.sequence_frames,
self.audio_hop,
n_fft=self.n_fft
))
def calculate(self, file_name):
# Here define your function to calculate the chroma features
# Load the audio signal
audio = self.load_audio(file_name)
# Pad audio signal
audio = librosa.util.fix_length(
audio,
audio.shape[0] + self.sequence_samples,
axis=0, mode='constant'
)
# Get the chroma features
chroma = librosa.feature.chroma_stft(y=audio,
sr=self.sr,
n_fft=self.n_fft,
hop_length=audio_hop,
win_length=audio_win
)
# Convert to sequences
chroma = np.ascontiguousarray(chroma)
chroma = librosa.util.frame(chroma,
self.sequence_frames,
self.sequence_hop,
axis=0
)
return chroma
"""
[docs] def __init__(self, sequence_time=1.0, sequence_hop_time=0.5,
audio_win=1024, audio_hop=680, sr=22050, **kwargs):
""" Initialize the FeatureExtractor
"""
self.sequence_time = sequence_time
self.sequence_hop_time = sequence_hop_time
self.audio_hop = audio_hop
self.audio_win = audio_win
self.sr = sr
self.sequence_frames = int(librosa.core.time_to_frames(
sequence_time, sr=sr, hop_length=audio_hop))
self.sequence_hop = int(librosa.core.time_to_frames(
sequence_hop_time, sr=sr, hop_length=audio_hop))
self.features_folder = kwargs.get('features_folder', 'features')
[docs] def load_audio(self, file_name, mono=True, change_sampling_rate=True):
""" Loads an audio signal and converts it to mono if needed
Parameters
----------
file_name : str
Path to the audio file
mono : bool
if True, only returns left channel
change_sampling_rate : bool
if True, the audio signal is re-sampled to self.sr
Returns
-------
array
audio signal
"""
audio, sr_old = sf.read(file_name)
# convert to mono
if (len(audio.shape) > 1) & (mono):
audio = audio[:, 0]
# continuous array (for some librosa functions)
audio = np.asfortranarray(audio)
if (self.sr != sr_old) & (change_sampling_rate):
print('Changing sampling rate from %d to %d' % (sr_old, self.sr))
audio = librosa.resample(audio, sr_old, self.sr)
return audio
[docs] def calculate(self, file_name):
""" Loads an audio file and calculates features
Parameters
----------
file_name : str
Path to the audio file
Returns
-------
ndarray
feature representation of the audio signal
"""
pass
[docs] def extract(self, dataset):
""" Extracts features for each file in dataset.
Call calculate() for each file in dataset and save the
result into the features path.
Parameters
----------
dataset : Dataset
Instance of the dataset.
"""
features_path = self.get_features_path(dataset)
mkdir_if_not_exists(features_path, parents=True)
if not dataset.check_sampling_rate(self.sr):
print('Changing sampling rate ...')
dataset.change_sampling_rate(self.sr)
print('Done!')
# Define path to audio and features folders
audio_path, subfolders = dataset.get_audio_paths(
self.sr
)
# Duplicate folder structure of audio in features folder
duplicate_folder_structure(audio_path, features_path)
for audio_folder in subfolders:
subfolder_name = os.path.basename(audio_folder)
features_path_sub = os.path.join(features_path, subfolder_name)
if not self.check_if_extracted_path(features_path_sub):
# Navigate in the structure of audio folder and extract
# features of the each wav file
for path_audio in progressbar(list_wav_files(audio_folder)):
features_array = self.calculate(
path_audio
)
path_to_features_file = path_audio.replace(
audio_path, features_path
)
path_to_features_file = path_to_features_file.replace(
'wav', 'npy'
)
np.save(path_to_features_file, features_array)
# Save parameters.json for future checking
self.set_as_extracted(features_path_sub)
[docs] def set_as_extracted(self, path):
""" Saves a json file with self.__dict__.
Useful for checking if the features files were calculated
with same parameters.
Parameters
----------
path : str
Path to the JSON file
"""
params = self.__dict__.copy()
remove = [
key for key in params.keys() if type(params[key]) not in [
int, str, float]
]
for key in remove:
del params[key]
json_path = os.path.join(path, "parameters.json")
with open(json_path, 'w') as fp:
json.dump(params, fp)
[docs] def check_if_extracted_path(self, path):
""" Checks if the features saved in path were calculated.
Compare if the features were calculated with the same parameters
of self.__dict__.
Parameters
----------
path : str
Path to the features folder
Returns
-------
bool
True if the features were already extracted.
"""
json_features_folder = os.path.join(path, "parameters.json")
if not os.path.exists(json_features_folder):
return False
parameters_features_folder = load_json(json_features_folder)
for key in parameters_features_folder.keys():
if key not in self.__dict__:
return False
if parameters_features_folder[key] != self.__dict__[key]:
return False
return True
[docs] def check_if_extracted(self, dataset):
""" Checks if the features of each file in dataset was calculated.
Calls check_if_extracted_path for each path in the dataset.
Parameters
----------
path : str
Path to the features folder
Returns
-------
bool
True if the features were already extracted.
"""
features_path = self.get_features_path(dataset)
audio_path, subfolders = dataset.get_audio_paths(self.sr)
for audio_folder in subfolders:
subfolder_name = os.path.basename(audio_folder)
features_path_sub = os.path.join(features_path, subfolder_name)
feat_extracted = self.check_if_extracted_path(features_path_sub)
if not feat_extracted:
return False
return True
[docs] def get_shape(self, length_sec=10.0):
"""
Calls calculate() with a dummy signal of length length_sec
and returns the shape of the feature representation.
Parameters
----------
length_sec : float
Duration in seconds of the test signal
Returns
-------
tuple
Shape of the feature representation
"""
audio_sample = np.zeros(int(length_sec*self.sr))
audio_file = 'zeros.wav'
sf.write('zeros.wav', audio_sample, self.sr)
features_sample = self.calculate(audio_file)
os.remove(audio_file)
return features_sample.shape
[docs] def get_features_path(self, dataset):
""" Returns the path to the features folder.
Parameters
----------
dataset : Dataset
Instance of the dataset.
Returns
-------
features_path : str
Path to the features folder.
"""
feature_name = self.__class__.__name__
features_path = os.path.join(
dataset.dataset_path, self.features_folder, feature_name
)
return features_path