Source code for dcase_models.data.feature_extractor

import os
import numpy as np
import librosa
import soundfile as sf
import json

from ..util.files import load_json, mkdir_if_not_exists
from ..util.files import duplicate_folder_structure
from ..util.files import list_wav_files
from ..util.ui import progressbar


[docs]class FeatureExtractor(): """ Abstract base class for feature extraction. Includes methods to load audio files, calculate features and prepare sequences. Inherit this class to define custom features (e.g. features.MelSpectrogram, features.Openl3). Parameters ---------- sequence_time : float, default=1.0 Length (in seconds) of the feature representation analysis windows (model's input). sequence_hop_time : float, default=0.5 Hop time (in seconds) of the feature representation analysis windows. audio_win : int, default=1024 Window length (in samples) for the short-time audio processing (e.g short-time Fourier Transform (STFT)) audio_hop : int, default=680 Hop length (in samples) for the short-time audio processing (e.g short-time Fourier Transform (STFT)) sr : int, default=22050 Sampling rate of the audio signals. If the original audio is not sampled at this rate, it is re-sampled before feature extraction. Attributes ---------- sequence_frames : int Number of frames equivalent to the sequence_time. sequence_hop : int Number of frames equivalent to the sequence_hop_time. Examples -------- To create a new feature representation, it is necessary to define a class that inherits from FeatureExtractor. It is required to define the calculate() method.:: from dcase_models.data.feature_extractor import FeatureExtractor class Chroma(FeatureExtractor): def __init__(self, sequence_time=1.0, sequence_hop_time=0.5,           audio_win=1024, audio_hop=512, sr=44100,           # Add here your custom parameters           n_fft=1024, n_chroma=12): # Don't forget this line    super().__init__(sequence_time=sequence_time,                      sequence_hop_time=sequence_hop_time,                      audio_win=audio_win, audio_hop=audio_hop, sr=sr)    self.sequence_samples = int(librosa.core.frames_to_samples( self.sequence_frames, self.audio_hop, n_fft=self.n_fft )) def calculate(self, file_name): # Here define your function to calculate the chroma features # Load the audio signal    audio = self.load_audio(file_name) # Pad audio signal    audio = librosa.util.fix_length( audio, audio.shape[0] + self.sequence_samples, axis=0, mode='constant' ) # Get the chroma features    chroma = librosa.feature.chroma_stft(y=audio, sr=self.sr, n_fft=self.n_fft, hop_length=audio_hop, win_length=audio_win ) # Convert to sequences    chroma = np.ascontiguousarray(chroma)    chroma = librosa.util.frame(chroma, self.sequence_frames, self.sequence_hop, axis=0 )    return chroma """
[docs] def __init__(self, sequence_time=1.0, sequence_hop_time=0.5, audio_win=1024, audio_hop=680, sr=22050, **kwargs): """ Initialize the FeatureExtractor """ self.sequence_time = sequence_time self.sequence_hop_time = sequence_hop_time self.audio_hop = audio_hop self.audio_win = audio_win self.sr = sr self.sequence_frames = int(librosa.core.time_to_frames( sequence_time, sr=sr, hop_length=audio_hop)) self.sequence_hop = int(librosa.core.time_to_frames( sequence_hop_time, sr=sr, hop_length=audio_hop)) self.features_folder = kwargs.get('features_folder', 'features')
[docs] def load_audio(self, file_name, mono=True, change_sampling_rate=True): """ Loads an audio signal and converts it to mono if needed Parameters ---------- file_name : str Path to the audio file mono : bool if True, only returns left channel change_sampling_rate : bool if True, the audio signal is re-sampled to self.sr Returns ------- array audio signal """ audio, sr_old = sf.read(file_name) # convert to mono if (len(audio.shape) > 1) & (mono): audio = audio[:, 0] # continuous array (for some librosa functions) audio = np.asfortranarray(audio) if (self.sr != sr_old) & (change_sampling_rate): print('Changing sampling rate from %d to %d' % (sr_old, self.sr)) audio = librosa.resample(audio, sr_old, self.sr) return audio
[docs] def calculate(self, file_name): """ Loads an audio file and calculates features Parameters ---------- file_name : str Path to the audio file Returns ------- ndarray feature representation of the audio signal """ pass
[docs] def extract(self, dataset): """ Extracts features for each file in dataset. Call calculate() for each file in dataset and save the result into the features path. Parameters ---------- dataset : Dataset Instance of the dataset. """ features_path = self.get_features_path(dataset) mkdir_if_not_exists(features_path, parents=True) if not dataset.check_sampling_rate(self.sr): print('Changing sampling rate ...') dataset.change_sampling_rate(self.sr) print('Done!') # Define path to audio and features folders audio_path, subfolders = dataset.get_audio_paths( self.sr ) # Duplicate folder structure of audio in features folder duplicate_folder_structure(audio_path, features_path) for audio_folder in subfolders: subfolder_name = os.path.basename(audio_folder) features_path_sub = os.path.join(features_path, subfolder_name) if not self.check_if_extracted_path(features_path_sub): # Navigate in the structure of audio folder and extract # features of the each wav file for path_audio in progressbar(list_wav_files(audio_folder)): features_array = self.calculate( path_audio ) path_to_features_file = path_audio.replace( audio_path, features_path ) path_to_features_file = path_to_features_file.replace( 'wav', 'npy' ) np.save(path_to_features_file, features_array) # Save parameters.json for future checking self.set_as_extracted(features_path_sub)
[docs] def set_as_extracted(self, path): """ Saves a json file with self.__dict__. Useful for checking if the features files were calculated with same parameters. Parameters ---------- path : str Path to the JSON file """ params = self.__dict__.copy() remove = [ key for key in params.keys() if type(params[key]) not in [ int, str, float] ] for key in remove: del params[key] json_path = os.path.join(path, "parameters.json") with open(json_path, 'w') as fp: json.dump(params, fp)
[docs] def check_if_extracted_path(self, path): """ Checks if the features saved in path were calculated. Compare if the features were calculated with the same parameters of self.__dict__. Parameters ---------- path : str Path to the features folder Returns ------- bool True if the features were already extracted. """ json_features_folder = os.path.join(path, "parameters.json") if not os.path.exists(json_features_folder): return False parameters_features_folder = load_json(json_features_folder) for key in parameters_features_folder.keys(): if key not in self.__dict__: return False if parameters_features_folder[key] != self.__dict__[key]: return False return True
[docs] def check_if_extracted(self, dataset): """ Checks if the features of each file in dataset was calculated. Calls check_if_extracted_path for each path in the dataset. Parameters ---------- path : str Path to the features folder Returns ------- bool True if the features were already extracted. """ features_path = self.get_features_path(dataset) audio_path, subfolders = dataset.get_audio_paths(self.sr) for audio_folder in subfolders: subfolder_name = os.path.basename(audio_folder) features_path_sub = os.path.join(features_path, subfolder_name) feat_extracted = self.check_if_extracted_path(features_path_sub) if not feat_extracted: return False return True
[docs] def get_shape(self, length_sec=10.0): """ Calls calculate() with a dummy signal of length length_sec and returns the shape of the feature representation. Parameters ---------- length_sec : float Duration in seconds of the test signal Returns ------- tuple Shape of the feature representation """ audio_sample = np.zeros(int(length_sec*self.sr)) audio_file = 'zeros.wav' sf.write('zeros.wav', audio_sample, self.sr) features_sample = self.calculate(audio_file) os.remove(audio_file) return features_sample.shape
[docs] def get_features_path(self, dataset): """ Returns the path to the features folder. Parameters ---------- dataset : Dataset Instance of the dataset. Returns ------- features_path : str Path to the features folder. """ feature_name = self.__class__.__name__ features_path = os.path.join( dataset.dataset_path, self.features_folder, feature_name ) return features_path