import os
import sox
import soundfile as sf
import numpy as np
from librosa.core import db_to_power, power_to_db
from dcase_models.data.dataset_base import Dataset
from dcase_models.util.files import duplicate_folder_structure
from dcase_models.util.files import list_wav_files
from dcase_models.util.ui import progressbar
[docs]class WhiteNoise():
""" Implements white noise augmentation.
The structure is similar to sox.Transformer in order to keep
compatibility with sox.
Parameters
----------
snr : float
Signal to noise ratio.
"""
[docs] def __init__(self, snr):
""" Initialize the white noise.
"""
self.snr = snr
[docs] def build(self, file_origin, file_destination):
""" Add noise to the file_origin and save the result in file_destination.
Parameters
----------
file_origin : str
Path to the source file.
file_destination : str
Path to the destination file.
"""
audio, sr = sf.read(file_origin)
# Calculate signal mean power
s_power = np.mean(audio**2)
s_db = power_to_db(s_power)
# Get noise power
n_db = s_db - self.snr
n_power = db_to_power(n_db)
# Define noise signal
noise = np.random.normal(
loc=0.0, scale=np.sqrt(n_power), size=audio.shape
)
# Sum noise
aug_audio = audio + noise
# Check if the new singal clipped
if np.any(aug_audio > 1.0):
# TODO: check this solution
aug_audio = aug_audio/np.amax(aug_audio)
# Save result to file
sf.write(file_destination, aug_audio, sr)
[docs]class AugmentedDataset(Dataset):
""" Class that manage data augmentation.
Basically, it takes an instance of Dataset and generates an augmented one.
Includes methods to generate data augmented versions of the audio files
in an existing Dataset.
Parameters
----------
dataset : Dataset
Instance of Dataset to be augmented.
augmentations_list : list
List of augmentation types and their parameters.
Dict of form: [{'type' : aug_type, 'param1': param1 ...} ...].
e.g.::
[
{'type': 'pitch_shift', 'n_semitones': -1},
{'type': 'time_stretching', 'factor': 1.05}
]
sr : int
Sampling rate
Examples
--------
Define an instance of UrbanSound8k and convert it into an augmented
instance of the dataset. Note that the actual augmentation is performed
when process() method is called.
>>> from dcase_models.data.datasets import UrbanSound8k
>>> from dcase_models.data.data_augmentation import AugmentedDataset
>>> dataset = UrbanSound8k('../datasets/UrbanSound8K')
>>> augmentations = [
{"type": "pitch_shift", "n_semitones": -1},
{"type": "time_stretching", "factor": 1.05},
{"type": "white_noise", "snr": 60}
]
>>> aug_dataset = AugmentedDataset(dataset, augmentations)
>>> aug_dataset.process()
"""
[docs] def __init__(self, dataset, sr,
augmentations_list):
""" Initialize the AugmentedDataset.
Initialize sox Transformers for each type of augmentation.
"""
self.dataset = dataset
self.augmentations_list = augmentations_list
self.sr = sr
# Init sox Transformers
# Append these to the self.augmentations_list as a new
# augmentation property.
for index in range(len(augmentations_list)):
augmentation = augmentations_list[index]
aug_type = augmentation['type']
tfm = sox.Transformer()
if aug_type == 'pitch_shift':
tfm.pitch(augmentation['n_semitones'])
if aug_type == 'time_stretching':
tfm.tempo(augmentation['factor'])
# tfm.stretch(augmentation['factor'])
if aug_type == 'white_noise':
tfm = WhiteNoise(augmentation['snr'])
augmentations_list[index]['transformer'] = tfm
# Copy attributes of dataset
self.__dict__.update(dataset.__dict__)
[docs] def get_annotations(self, file_path, features, time_resolution):
return self.dataset.get_annotations(file_path, features, time_resolution)
[docs] def generate_file_lists(self):
""" Create self.file_lists, a dict that includes a list of files per fold.
Just call dataset.generate_file_lists() and copy the attribute.
"""
self.dataset.generate_file_lists()
self.file_lists = self.dataset.file_lists.copy()
[docs] def process(self):
""" Generate augmentated data for each file in dataset.
Replicate the folder structure of {DATASET_PATH}/audio/original
into the folder of each augmentation folder.
"""
if not self.dataset.check_sampling_rate(self.sr):
print("Changing sampling rate ...")
self.dataset.change_sampling_rate(self.sr)
print('Done!')
# Get path to the original audio files and list of
# folders with augmented files.
_, sub_folders = self.get_audio_paths(self.sr)
path_original = sub_folders[0]
paths_augments = sub_folders[1:]
for index in range(len(self.augmentations_list)):
augmentation = self.augmentations_list[index]
path_augmented = paths_augments[index]
# Replicate folder structure of the original files into
# the augmented folder.
duplicate_folder_structure(path_original, path_augmented)
# Process each file in path_original
for path_to_file in progressbar(list_wav_files(path_original)):
path_to_destination = path_to_file.replace(
path_original, path_augmented
)
if os.path.exists(path_to_destination):
continue
augmentation['transformer'].build(
path_to_file, path_to_destination
)
[docs] def get_audio_paths(self, sr=None):
""" Returns a list of paths to the folders that include the dataset
augmented files.
The folder of each augmentation is defined using its name and
parameter values.
e.g. {DATASET_PATH}/audio/pitch_shift_1 where 1 is the 'n_semitones'
parameter.
Parameters
----------
sr : int or None, optional
Sampling rate (optional). We keep this parameter to keep
compatibility with Dataset.get_audio_paths() method.
Returns
-------
audio_path : str
Path to the root audio folder.
e.g. DATASET_PATH/audio
subfolders : list of str
List of subfolders include in audio folder.
e.g.::
[
'{DATASET_PATH}/audio/original',
'{DATASET_PATH}/audio/pitch_shift_1',
'{DATASET_PATH}/audio/time_stretching_1.1',
]
"""
if sr is not None:
audio_path = self.audio_path + str(sr)
else:
audio_path = self.audio_path
subfolders = [os.path.join(audio_path, 'original')]
for augmentation in self.augmentations_list:
aug_type = augmentation['type']
if aug_type == 'pitch_shift':
aug_folder = 'pitch_shift_%d' % augmentation['n_semitones']
if aug_type == 'time_stretching':
aug_folder = 'time_stretching_%2.2f' % augmentation['factor']
if aug_type == 'white_noise':
aug_folder = 'white_noise_%2.2f' % augmentation['snr']
subfolders.append(os.path.join(audio_path, aug_folder))
return audio_path, subfolders