import os
import numpy as np
import sys
import csv
from pandas import read_csv
import yaml
from sed_eval.util.event_roll import event_list_to_event_roll
from librosa.util import fix_length
from dcase_models.data.dataset_base import Dataset
from dcase_models.util.files import move_all_files_to_parent, move_all_files_to
from dcase_models.util.files import mkdir_if_not_exists, list_wav_files
import inspect
__all__ = ['UrbanSound8k', 'ESC50', 'ESC10', 'URBAN_SED',
'SONYC_UST', 'TAUUrbanAcousticScenes2019',
'TAUUrbanAcousticScenes2020Mobile',
'TUTSoundEvents2017', 'FSDKaggle2018', 'MAVD']
[docs]class UrbanSound8k(Dataset):
""" UrbanSound8k dataset.
This class inherits all functionality from Dataset and
defines specific attributs and methods for UrbanSound8k.
Url: https://urbansounddataset.weebly.com/urbansound8k.html
J. Salamon, C. Jacoby, and J. P. Bello
“A dataset and taxonomy for urban sound research,”
22st ACM International Conference on Multimedia (ACM-MM’14)
Orlando, FL, USA, November 2014
Parameters
----------
dataset_path : str
Path to the dataset fold. This is the path to the folder where the
complete dataset will be downloaded, decompressed and handled.
It is expected to use a folder name that represents the dataset
unambiguously (e.g. ../datasets/UrbanSound8k).
Examples
--------
To work with UrbanSound8k dataset, just initialize this class with the
path to the dataset.
>>> from dcase_models.data.datasets import UrbanSound8k
>>> dataset = UrbanSound8k('../datasets/UrbanSound8K')
Then, you can download the dataset and change the sampling rate.
>>> dataset.download()
>>> dataset.change_sampling_rate(22050)
"""
[docs] def __init__(self, dataset_path):
super().__init__(dataset_path)
[docs] def build(self):
self.audio_path = os.path.join(self.dataset_path, 'audio')
self.fold_list = ["fold1", "fold2", "fold3", "fold4",
"fold5", "fold6", "fold7", "fold8",
"fold9", "fold10"]
self.label_list = ["air_conditioner", "car_horn", "children_playing",
"dog_bark", "drilling", "engine_idling", "gun_shot",
"jackhammer", "siren", "street_music"]
[docs] def generate_file_lists(self):
for fold in self.fold_list:
audio_folder = os.path.join(self.audio_path, fold)
self.file_lists[fold] = list_wav_files(audio_folder)
[docs] def get_annotations(self, file_name, features, time_resolution):
y = np.zeros((len(features), len(self.label_list)))
class_ix = int(os.path.basename(file_name).split('-')[1])
y[:, class_ix] = 1
return y
[docs] def download(self, force_download=False):
zenodo_url = "https://zenodo.org/record/1203745/files"
zenodo_files = ["UrbanSound8K.tar.gz"]
downloaded = super().download(
zenodo_url, zenodo_files, force_download
)
if downloaded:
move_all_files_to_parent(self.dataset_path, "UrbanSound8K")
self.set_as_downloaded()
[docs]class ESC50(Dataset):
""" ESC-50 dataset.
This class inherits all functionality from Dataset and
defines specific attributes and methods for ESC-50.
Url: https://github.com/karolpiczak/ESC-50
K. J. Piczak
“Esc: Dataset for environmental sound classification,”
Proceedings of the 23rd ACM international conference on Multimedia
Brisbane, Australia, October, 2015.
Parameters
----------
dataset_path : str
Path to the dataset folder. This is the path to the folder where the
complete dataset will be downloaded, decompressed and handled.
It is expected to use a folder name that represents the dataset
unambiguously (e.g. ../datasets/ESC50).
Examples
--------
To work with ESC50 dataset, just initialize this class with the
path to the dataset.
>>> from dcase_models.data.datasets import ESC50
>>> dataset = ESC50('../datasets/ESC50')
Then, you can download the dataset and change the sampling rate.
>>> dataset.download()
>>> dataset.change_sampling_rate(22050)
"""
[docs] def __init__(self, dataset_path):
super().__init__(dataset_path)
[docs] def build(self):
# load metadata information and create label_list
self.audio_path = os.path.join(self.dataset_path, 'audio')
meta_file = os.path.join(self.dataset_path, 'meta/esc50.csv')
self.metadata = {}
if self.check_if_downloaded():
n_classes = 50
self.label_list = ['']*n_classes
with open(meta_file) as csv_file:
csv_reader = csv.reader(csv_file, delimiter=',')
line_count = 0
for row in csv_reader:
if line_count == 0:
line_count += 1
continue
filename = row[0]
fold = 'fold'+row[1]
class_ix = int(row[2])
class_name = row[3]
esc10 = row[4] == 'True'
self.metadata[filename] = {
'fold': fold, 'class_ix': class_ix,
'class_name': class_name, 'esc10': esc10}
if class_name not in self.label_list:
self.label_list[class_ix] = class_name
self.fold_list = ["fold1", "fold2", "fold3", "fold4", "fold5"]
self.evaluation_mode = 'cross-validation'
[docs] def generate_file_lists(self):
self.file_lists = {}
for fold in self.fold_list:
self.file_lists[fold] = []
# all_files = sorted(
# glob.glob(os.path.join(self.audio_path, '*.wav')))
all_files = list_wav_files(self.audio_path)
for fil in all_files:
basename = self.get_basename_wav(fil)
if basename in self.metadata:
if self.metadata[basename]['fold'] == fold:
self.file_lists[fold].append(fil)
[docs] def get_annotations(self, file_name, features, time_resolution):
y = np.zeros((len(self.label_list)))
basename = self.get_basename_wav(file_name)
class_ix = self.metadata[basename]['class_ix']
y[class_ix] = 1
y = np.expand_dims(y, 0)
y = np.repeat(y, len(features), 0)
return y
[docs] def get_basename_wav(self, filename):
# convert ..../xxxx.npy in xxxx.wav
return os.path.basename(filename).split('.')[0] + '.wav'
[docs] def download(self, force_download=False):
github_url = "https://github.com/karoldvl/ESC-50/archive/"
github_files = ["master.zip"]
downloaded = super().download(
github_url, github_files, force_download
)
if downloaded:
move_all_files_to_parent(self.dataset_path, "ESC-50-master")
self.set_as_downloaded()
[docs]class ESC10(ESC50):
""" ESC-10 dataset.
This class inherits all functionality from Dataset and
defines specific attributes and methods for ESC-10.
ESC-10 is a subsampled version of ESC-50.
Url: https://github.com/karolpiczak/ESC-50
K. J. Piczak
“Esc: Dataset for environmental sound classification,”
Proceedings of the 23rd ACM international conference on Multimedia
Brisbane, Australia, October, 2015.
Parameters
----------
dataset_path : str
Path to the dataset folder. This is the path to the folder where the
complete dataset will be downloaded, decompressed and handled.
It is expected to use a folder name that represents the dataset
unambiguously (e.g. ../datasets/ESC50).
Examples
--------
To work with ESC10 dataset, just initialize this class with the
path to the dataset.
>>> from dcase_models.data.datasets import ESC10
>>> dataset = ESC10('../datasets/ESC50')
Then, you can download the dataset and change the sampling rate.
>>> dataset.download()
>>> dataset.change_sampling_rate(22050)
"""
[docs] def __init__(self, dataset_path):
super().__init__(dataset_path)
[docs] def build(self):
super().build()
# then change self.metadata and self.label_list to keep only ESC-10
new_metada = {}
new_label_list_ids = []
for j in self.metadata.keys():
if self.metadata[j]['esc10']:
new_metada[j] = self.metadata[j].copy()
if new_metada[j]['class_ix'] not in new_label_list_ids:
new_label_list_ids.append(new_metada[j]['class_ix'])
new_label_list_ids.sort()
new_label_list = []
new_label_list = [self.label_list[i] for i in new_label_list_ids]
self.metadata = new_metada.copy()
self.label_list = new_label_list.copy()
for j in self.metadata.keys():
assert self.metadata[j]['esc10']
self.metadata[j]['class_ix'] = [i for i, x in enumerate(
self.label_list) if x == self.metadata[j]['class_name']][0]
# regenerate self.file_lists
self.generate_file_lists()
[docs]class URBAN_SED(Dataset):
""" URBAN-SED dataset.
This class inherits all functionality from Dataset and
defines specific attributes and methods for URBAN-SED.
Url: http://urbansed.weebly.com/
J. Salamon, D. MacConnell, M. Cartwright, P. Li, and J. P.Bello.
"Scaper: A library for soundscape synthesis and augmentation".
IEEE Workshop on Applications of Signal Processing to Audio and Acoustics
New York, USA, October 2017.
Parameters
----------
dataset_path : str
Path to the dataset folder. This is the path to the folder where the
complete dataset will be downloaded, decompressed and handled.
It is expected to use a folder name that represents the dataset
unambiguously (e.g. ../datasets/URBAN_SED).
Examples
--------
To work with URBAN_SED dataset, just initialize this class with the
path to the dataset.
>>> from dcase_models.data.datasets import URBAN_SED
>>> dataset = URBAN_SED('../datasets/URBAN_SED')
Then, you can download the dataset and change the sampling rate.
>>> dataset.download()
>>> dataset.change_sampling_rate(22050)
"""
[docs] def __init__(self, dataset_path):
super().__init__(dataset_path)
[docs] def build(self):
self.audio_path = os.path.join(self.dataset_path, 'audio')
self.annotations_folder = os.path.join(
self.dataset_path, 'annotations')
self.fold_list = ["train", "validate", "test"]
self.label_list = ["air_conditioner", "car_horn", "children_playing",
"dog_bark", "drilling", "engine_idling", "gun_shot",
"jackhammer", "siren", "street_music"]
self.evaluation_mode = 'train-validate-test'
[docs] def generate_file_lists(self):
for fold in self.fold_list:
audio_folder = os.path.join(self.audio_path, fold)
self.file_lists[fold] = list_wav_files(audio_folder)
self.wav_to_labels = {}
for fold in self.fold_list:
for fil in self.file_lists[fold]:
label_file = os.path.basename(fil).split('.')[0] + '.txt'
self.wav_to_labels[fil] = os.path.join(
self.annotations_folder, fold, label_file)
[docs] def get_annotations(self, file_name, features, time_resolution):
label_file = self.wav_to_labels[file_name]
labels = read_csv(label_file, delimiter='\t', header=None)
labels.columns = ['event_onset', 'event_offset', 'event_label']
event_roll = event_list_to_event_roll(
labels.to_dict('records'),
self.label_list,
time_resolution
)
if event_roll.shape[0] > features.shape[0]:
event_roll = event_roll[:len(features)]
else:
event_roll = fix_length(event_roll, features.shape[0], axis=0)
assert event_roll.shape[0] == features.shape[0]
return event_roll
[docs] def download(self, force_download=False):
zenodo_url = "https://zenodo.org/record/1324404/files"
zenodo_files = ["URBAN-SED_v2.0.0.tar.gz"]
downloaded = super().download(
zenodo_url, zenodo_files, force_download
)
if downloaded:
move_all_files_to_parent(self.dataset_path, "URBAN-SED_v2.0.0")
self.set_as_downloaded()
[docs]class SONYC_UST(Dataset):
""" SONYC-UST dataset.
This class inherits all functionality from Dataset and
defines specific attributes and methods for SONYC-UST.
Version: 2.1.0
Url: https://zenodo.org/record/3693077
M. Cartwright, et al.
"SONYC Urban Sound Tagging (SONYC-UST): A Multilabel Dataset
from an Urban Acoustic Sensor Network".
Proceedings of the Workshop on Detection and Classification
of Acoustic Scenes and Events (DCASE), 2019.
Parameters
----------
dataset_path : str
Path to the dataset folder. This is the path to the folder where the
complete dataset will be downloaded, decompressed and handled.
It is expected to use a folder name that represents the dataset
unambiguously (e.g. ../datasets/SONYC_UST).
Examples
--------
To work with SONYC_UST dataset, just initialize this class with the
path to the dataset.
>>> from dcase_models.data.datasets import SONYC_UST
>>> dataset = SONYC_UST('../datasets/SONYC_UST')
Then, you can download the dataset and change the sampling rate.
>>> dataset.download()
>>> dataset.change_sampling_rate(22050)
"""
[docs] def __init__(self, dataset_path):
super().__init__(dataset_path)
[docs] def build(self):
self.audio_path = os.path.join(self.dataset_path, 'audio')
self.fold_list = ["train", "validate"]
self.evaluation_mode = 'train-validate-test'
self.meta_file = os.path.join(self.dataset_path, 'annotations.csv')
self.taxonomy_file = os.path.join(
self.dataset_path, 'dcase-ust-taxonomy.yaml')
self.metada = {}
self.label_list = []
if self.check_if_downloaded():
self.metadata = read_csv(self.meta_file).sort_values(
'audio_filename')
with open(self.taxonomy_file, 'r') as f:
self.label_list = yaml.load(f, Loader=yaml.Loader)
[docs] def generate_file_lists(self):
self.file_lists = {}
all_files = list_wav_files(self.audio_path)
assert len(all_files) != 0
for fold in self.fold_list:
if fold == 'train':
metadata_fold = self.metadata[self.metadata['split'] == fold]
else:
metadata_fold = self.metadata[
((self.metadata['split'] == fold) &
(self.metadata['annotator_id'] == 0))
]
filename_list_fold = metadata_fold[
'audio_filename'].drop_duplicates().to_list()
self.file_lists[fold] = []
for fil in all_files:
basename = os.path.basename(fil)
if basename in filename_list_fold:
self.file_lists[fold].append(fil)
[docs] def get_annotations(self, file_name, features, time_resolution):
# only coarse level
# TODO add fine level
n_classes_coarse_level = len(self.label_list['coarse'])
y = np.zeros(n_classes_coarse_level)
basename = os.path.basename(file_name).split('.')[0] + '.wav'
metadata_of_file = self.metadata[
self.metadata['audio_filename'] == basename]
for class_ix in self.label_list['coarse']:
class_column = str(class_ix) + '_' + \
self.label_list['coarse'][class_ix] + '_presence'
if metadata_of_file['split'].values[0] == 'train':
# class present if any annotator check presence
y[class_ix-1] = np.sum(
metadata_of_file[class_column].values) >= 1
else:
# class present if annotator 0 check presence
if 0 in metadata_of_file['annotator_id'].values:
ix = np.argwhere(
metadata_of_file['annotator_id'].values == 0)
y[class_ix-1] = metadata_of_file[
class_column].values[ix] >= 1
y = np.expand_dims(y, 0)
y = np.repeat(y, len(features), 0)
return y
[docs] def download(self, force_download=False):
zenodo_url = "https://zenodo.org/record/3693077/files"
zenodo_files = ["annotations.csv", "audio.tar.gz",
"dcase-ust-taxonomy.yaml", "README.md"]
super().download(
zenodo_url, zenodo_files, force_download
)
self.set_as_downloaded()
class _TAUUrbanAcousticScenes(Dataset):
""" Base class for TAU Urban Acoustic Scenes datasets.
"""
def __init__(self, dataset_path):
super().__init__(dataset_path)
def build(self):
self.audio_path = os.path.join(self.dataset_path, 'audio')
self.fold_list = ["train", "test"]
self.meta_file = os.path.join(self.dataset_path, 'meta.csv')
self.label_list = ['airport', 'shopping_mall', 'metro_station',
'street_pedestrian', 'public_square',
'street_traffic', 'tram', 'bus', 'metro', 'park']
self.evaluation_setup_train = os.path.join(
self.dataset_path, 'evaluation_setup', 'fold1_train.csv')
self.evaluation_setup_test = os.path.join(
self.dataset_path, 'evaluation_setup', 'fold1_test.csv')
self.annotations_folder = os.path.join(
self.dataset_path, 'annotations')
def generate_file_lists(self):
self.file_lists = {}
evaluation_files = [self.evaluation_setup_train,
self.evaluation_setup_test]
for j, fold in enumerate(['train', 'test']):
self.file_lists[fold] = []
csv_filename = evaluation_files[j]
with open(csv_filename) as csv_file:
csv_reader = csv.reader(csv_file, delimiter='\t')
line_count = 0
for row in csv_reader:
if line_count == 0:
line_count += 1
continue
file_name = row[0].split('/')[-1]
self.file_lists[fold].append(
os.path.join(self.audio_path, file_name)
)
def get_annotations(self, file_name, features, time_resolution):
y = np.zeros((len(features), len(self.label_list)))
basename = os.path.basename(file_name)
# delete file extension
basename = basename.split('.')[0]
scene_label, city, location_id, segment_id, device_id = basename.split(
'-')
class_ix = self.label_list.index(scene_label)
y[:, class_ix] = 1
return y
def download(self, zenodo_url, zenodo_files, force_download=False):
return super().download(zenodo_url, zenodo_files,
force_download=force_download)
[docs]class TAUUrbanAcousticScenes2019(_TAUUrbanAcousticScenes):
""" TAU Urban Acoustic Scenes 2019 dataset.
This class inherits all functionality from Dataset and
defines specific attributes and methods for TAU Urban
Acoustic Scenes 2019.
Url: https://zenodo.org/record/2589280
A. Mesaros, T. Heittola, and T. Virtanen.
"A multi-devicedataset for urban acoustic scene classification".
Proceedings of the Detection and Classification of Acoustic
Scenes and Events 2018 Workshop (DCASE 2018).
November 2018.
Parameters
----------
dataset_path : str
Path to the dataset folder. This is the path to the folder where the
complete dataset will be downloaded, decompressed and handled.
It is expected to use a folder name that represents the dataset
unambiguously (e.g. ../datasets/TAUUrbanAcousticScenes2019).
Examples
--------
To work with TAUUrbanAcousticScenes2019 dataset, just initialize this
class with the path to the dataset.
>>> from dcase_models.data.datasets import TAUUrbanAcousticScenes2019
>>> dataset = TAUUrbanAcousticScenes2019(
'../datasets/TAUUrbanAcousticScenes2019')
Then, you can download the dataset and change the sampling rate.
>>> dataset.download()
>>> dataset.change_sampling_rate(22050)
"""
[docs] def __init__(self, dataset_path):
super().__init__(dataset_path)
[docs] def download(self, force_download=False):
zenodo_url = "https://zenodo.org/record/2589280/files"
zenodo_files = [
"TAU-urban-acoustic-scenes-2019-development.audio.%d.zip" %
j for j in range(1, 22)]
zenodo_files.append(
'TAU-urban-acoustic-scenes-2019-development.doc.zip')
zenodo_files.append(
'TAU-urban-acoustic-scenes-2019-development.meta.zip')
downloaded = super().download(
zenodo_url, zenodo_files, force_download
)
if downloaded:
move_all_files_to_parent(
self.dataset_path,
"TAU-urban-acoustic-scenes-2019-development")
self.set_as_downloaded()
[docs]class TAUUrbanAcousticScenes2020Mobile(_TAUUrbanAcousticScenes):
""" TAU Urban Acoustic Scenes 2019 dataset.
This class inherits all functionality from Dataset and
defines specific attributes and methods for TAU Urban
Acoustic Scenes 2020 Mobile.
Url: https://zenodo.org/record/3819968
T. Heittola, A. Mesaros, and T. Virtanen.
"Acoustic sceneclassification in DCASE 2020 challenge:
generalizationacross devices and low complexity solutions".
Proceedings of the Detection and Classification of Acoustic
Scenes and Events 2020 Workshop (DCASE 2020).
2020
Parameters
----------
dataset_path : str
Path to the dataset folder. This is the path to the folder where the
complete dataset will be downloaded, decompressed and handled.
It is expected to use a folder name that represents the dataset
unambiguously (e.g. ../datasets/TAUUrbanAcousticScenes2020Mobile).
Examples
--------
To work with TAUUrbanAcousticScenes2020Mobile dataset, just initialize this
class with the path to the dataset.
>>> from dcase_models.data.datasets import TAUUrbanAcousticScenes2020Mobile
>>> dataset = TAUUrbanAcousticScenes2020Mobile(
'../datasets/TAUUrbanAcousticScenes2020Mobile')
Then, you can download the dataset and change the sampling rate.
>>> dataset.download()
>>> dataset.change_sampling_rate(22050)
"""
[docs] def __init__(self, dataset_path):
super().__init__(dataset_path)
[docs] def download(self, force_download=False):
zenodo_url = "https://zenodo.org/record/3819968/files"
zenodo_files = [
"TAU-urban-acoustic-scenes-2020-mobile-development.audio.%d.zip" %
j for j in range(1, 17)]
zenodo_files.append(
'TAU-urban-acoustic-scenes-2020-mobile-development.doc.zip')
zenodo_files.append(
'TAU-urban-acoustic-scenes-2020-mobile-development.meta.zip')
downloaded = super().download(
zenodo_url, zenodo_files, force_download
)
print(downloaded)
if downloaded:
move_all_files_to_parent(
self.dataset_path,
"TAU-urban-acoustic-scenes-2020-mobile-development")
self.set_as_downloaded()
[docs]class TUTSoundEvents2017(Dataset):
""" TUT Sound Events 2017 dataset.
This class inherits all functionality from Dataset and
defines specific attributes and methods for TUT Sound
Events 2017.
Url: https://zenodo.org/record/814831
A. Mesaros et al.
DCASE 2017 challenge setup: tasks, datasets and baseline system.
Detection and Classification of Acoustic Scenes and Events 2017
Workshop (DCASE2017), 85–92.
November 2017.
Parameters
----------
dataset_path : str
Path to the dataset folder. This is the path to the folder where the
complete dataset will be downloaded, decompressed and handled.
It is expected to use a folder name that represents the dataset
unambiguously (e.g. ../datasets/TUTSoundEvents2017).
Examples
--------
To work with TUTSoundEvents2017 dataset, just initialize this
class with the path to the dataset.
>>> from dcase_models.data.datasets import TUTSoundEvents2017
>>> dataset = TUTSoundEvents2017('../datasets/TUTSoundEvents2017')
Then, you can download the dataset and change the sampling rate.
>>> dataset.download()
>>> dataset.change_sampling_rate(22050)
"""
[docs] def __init__(self, dataset_path):
super().__init__(dataset_path)
[docs] def build(self):
self.audio_path = os.path.join(self.dataset_path, 'audio')
self.fold_list = ["fold1", "fold2", "fold3", "fold4"]
self.meta_path = os.path.join(self.dataset_path, 'meta')
self.label_list = ['brakes squeaking', 'car', 'children',
'large vehicle', 'people speaking',
'people walking']
self.evaluation_setup_path = os.path.join(
self.dataset_path, 'evaluation_setup'
)
[docs] def generate_file_lists(self):
self.file_lists = {}
self.wav_to_labels = {}
for j, fold in enumerate(self.fold_list):
self.file_lists[fold] = []
evaluation_setup_file = os.path.join(
self.evaluation_setup_path, 'street_%s_test.txt' % fold
)
with open(evaluation_setup_file) as csv_file:
csv_reader = csv.reader(csv_file, delimiter='\t')
for row in csv_reader:
file_name = row[0].split('/')[-1]
file_path = os.path.join(
self.audio_path, 'street', file_name
)
self.file_lists[fold].append(file_path)
file_ann = file_path.replace(
self.audio_path, self.meta_path
)
file_ann = file_ann.replace('.wav', '.ann')
self.wav_to_labels[file_path] = file_ann
# test folder
self.file_lists['test'] = []
evaluation_setup_file = os.path.join(
self.evaluation_setup_path, 'street_test.txt'
)
with open(evaluation_setup_file) as csv_file:
csv_reader = csv.reader(csv_file, delimiter='\t')
for row in csv_reader:
file_name = row[0].split('/')[-1]
file_path = os.path.join(self.audio_path, 'street', file_name)
self.file_lists['test'].append(file_path)
file_ann = file_path.replace(self.audio_path, self.meta_path)
file_ann = file_ann.replace('.wav', '.ann')
self.wav_to_labels[file_path] = file_ann
[docs] def get_annotations(self, file_name, features, time_resolution):
label_file = self.wav_to_labels[file_name]
labels = read_csv(label_file, delimiter='\t', header=None)
if labels.shape[1] == 3:
labels.columns = ['event_onset', 'event_offset', 'event_label']
else:
labels.columns = ['file_path', 'scene', 'event_onset',
'event_offset', 'event_label',
'mixture', 'file_name']
event_roll = event_list_to_event_roll(
labels.to_dict('records'), self.label_list, time_resolution
)
if event_roll.shape[0] > features.shape[0]:
event_roll = event_roll[:len(features)]
else:
event_roll = fix_length(event_roll, features.shape[0], axis=0)
assert event_roll.shape[0] == features.shape[0]
return event_roll
[docs] def download(self, force_download=False):
zenodo_url = "https://zenodo.org/record/814831/files"
zenodo_files = [
'TUT-sound-events-2017-development.audio.1.zip',
'TUT-sound-events-2017-development.audio.2.zip',
'TUT-sound-events-2017-development.doc.zip',
'TUT-sound-events-2017-development.meta.zip'
]
downloaded = super().download(
zenodo_url, zenodo_files, force_download
)
if downloaded:
move_all_files_to_parent(
self.dataset_path,
"TUT-sound-events-2017-development")
zenodo_url = "https://zenodo.org/record/1040179/files"
zenodo_files = [
'TUT-sound-events-2017-evaluation.audio.zip',
'TUT-sound-events-2017-evaluation.meta.zip',
]
downloaded = super().download(
zenodo_url, zenodo_files, force_download
)
if downloaded:
move_all_files_to(
os.path.join(
self.dataset_path,
"TUT-sound-events-2017-evaluation/audio/street"
),
os.path.join(self.dataset_path, "audio/street")
)
move_all_files_to(
os.path.join(
self.dataset_path,
"TUT-sound-events-2017-evaluation/meta/street"
),
os.path.join(self.dataset_path, "meta/street")
)
move_all_files_to(
os.path.join(
self.dataset_path,
"TUT-sound-events-2017-evaluation/evaluation_setup"
),
os.path.join(self.dataset_path, "evaluation_setup")
)
self.set_as_downloaded()
[docs]class FSDKaggle2018(Dataset):
""" FSDKaggle2018 dataset.
This class inherits all functionality from Dataset and
defines specific attributes and methods for FSDKaggle2018.
Url: https://zenodo.org/record/2552860
Eduardo Fonseca et al.
"General-purpose Tagging of Freesound Audio with AudioSet Labels:
Task Description, Dataset, and Baseline".
Proceedings of the DCASE 2018 Workshop.
2018.
Parameters
----------
dataset_path : str
Path to the dataset folder. This is the path to the folder where the
complete dataset will be downloaded, decompressed and handled.
It is expected to use a folder name that represents the dataset
unambiguously (e.g. ../datasets/FSDKaggle2018).
Examples
--------
To work with FSDKaggle2018 dataset, just initialize this
class with the path to the dataset.
>>> from dcase_models.data.datasets import FSDKaggle2018
>>> dataset = FSDKaggle2018('../datasets/FSDKaggle2018')
Then, you can download the dataset and change the sampling rate.
>>> dataset.download()
>>> dataset.change_sampling_rate(22050)
"""
[docs] def __init__(self, dataset_path):
super().__init__(dataset_path)
[docs] def build(self):
self.audio_path = os.path.join(self.dataset_path, 'audio')
self.fold_list = ["train", "validate", "test"]
self.meta_path = os.path.join(self.dataset_path, 'meta')
self.label_list = []
meta_file_train = os.path.join(
self.meta_path, 'train_post_competition.csv'
)
meta_file_test = os.path.join(
self.meta_path, 'test_post_competition_scoring_clips.csv'
)
self.metadata = {}
if self.check_if_downloaded():
for meta_file in [meta_file_train, meta_file_test]:
with open(meta_file) as csv_file:
csv_reader = csv.reader(csv_file, delimiter=',')
line_count = 0
for row in csv_reader:
if line_count == 0:
line_count += 1
continue
filename = row[0]
label = row[1]
usage = row[2]
freesound_id = row[3]
license = row[4]
if meta_file == meta_file_train:
fold = 'train'
else:
if usage == 'Public':
fold = 'validate'
else:
fold = 'test'
self.metadata[filename] = {
'label': label, 'usage': usage,
'freesound_id': freesound_id, 'license': license,
'fold': fold}
if label not in self.label_list:
self.label_list.append(label)
self.label_list.sort()
[docs] def generate_file_lists(self):
self.file_lists = {fold: [] for fold in self.fold_list}
for filename in self.metadata.keys():
fold = self.metadata[filename]['fold']
fold_folder = fold
if fold == 'validate':
fold_folder = 'test'
file_path = os.path.join(
self.audio_path, fold_folder, filename
)
self.file_lists[fold].append(file_path)
[docs] def get_annotations(self, file_name, features, time_resolution):
y = np.zeros((len(features), len(self.label_list)))
label_name = self.metadata[os.path.basename(file_name)]['label']
label_index = self.label_list.index(label_name)
y[:, label_index] = 1
return y
[docs] def download(self, force_download=False):
zenodo_url = "https://zenodo.org/record/2552860/files"
zenodo_files = [
'FSDKaggle2018.audio_test.zip',
'FSDKaggle2018.audio_train.zip',
'FSDKaggle2018.doc.zip',
'FSDKaggle2018.meta.zip'
]
super().download(
zenodo_url, zenodo_files, force_download
)
mkdir_if_not_exists(self.audio_path)
os.rename(
os.path.join(self.dataset_path, 'FSDKaggle2018.audio_train'),
os.path.join(self.audio_path, 'train'),
)
os.rename(
os.path.join(self.dataset_path, 'FSDKaggle2018.audio_test'),
os.path.join(self.audio_path, 'test'),
)
os.rename(
os.path.join(self.dataset_path, 'FSDKaggle2018.meta'),
os.path.join(self.dataset_path, 'meta'),
)
os.rename(
os.path.join(self.dataset_path, 'FSDKaggle2018.doc'),
os.path.join(self.dataset_path, 'doc'),
)
self.set_as_downloaded()
[docs]class MAVD(Dataset):
""" MAVD-traffic dataset.
This class inherits all functionality from Dataset and
defines specific attributes and methods for MAVD-traffic.
Url: https://zenodo.org/record/3338727
P. Zinemanas, P. Cancela, and M. Rocamora.
"MAVD: a dataset for sound event detection in urban environments"
Proceedings of the Detection and Classification of Acoustic
Scenes and Events 2019 Workshop (DCASE 2019).
October, 2019.
Parameters
----------
dataset_path : str
Path to the dataset folder. This is the path to the folder where the
complete dataset will be downloaded, decompressed and handled.
It is expected to use a folder name that represents the dataset
unambiguously (e.g. ../datasets/MAVD).
Examples
--------
To work with MAVD dataset, just initialize this
class with the path to the dataset.
>>> from dcase_models.data.datasets import MAVD
>>> dataset = MAVD('../datasets/MAVD')
Then, you can download the dataset and change the sampling rate.
>>> dataset.download()
>>> dataset.change_sampling_rate(22050)
"""
[docs] def __init__(self, dataset_path):
super().__init__(dataset_path)
[docs] def build(self):
self.audio_path = os.path.join(self.dataset_path, 'audio')
self.annotations_path = os.path.join(self.dataset_path, 'annotations')
self.fold_list = ["train", "validate", "test"]
# Only vehicle level for now
# TODO: Add other levels
self.vehicle_list = ['car', 'bus', 'truck', 'motorcycle']
self.component_list = ['engine_idling', 'engine_accelerating',
'brakes', 'wheel_rolling', 'compressor']
self.label_list = self.vehicle_list + self.component_list
[docs] def generate_file_lists(self):
for fold in self.fold_list:
audio_folder = os.path.join(self.audio_path, fold)
self.file_lists[fold] = list_wav_files(audio_folder)
[docs] def get_annotations(self, file_name, features, time_resolution):
audio_path, _ = self.get_audio_paths()
label_file = file_name.replace(
audio_path,
self.annotations_path
).replace('.wav', '.txt')
labels = read_csv(label_file, delimiter='\t', header=None)
labels.columns = ['event_onset', 'event_offset', 'event_label']
labels_dict = labels.to_dict('records')
event_roll = np.zeros((len(features), len(self.label_list)))
for event in labels_dict:
event_label = event['event_label']
for sub_label in event_label.split('/'):
if sub_label in self.label_list:
label_ix = self.label_list.index(sub_label)
event_onset = event['event_onset']
event_offset = event['event_offset']
onset = int(np.floor(
event_onset / float(time_resolution))
)
offset = int(np.ceil(
event_offset / float(time_resolution))
)
event_roll[onset:offset, label_ix] = 1
return event_roll
[docs] def download(self, force_download=False):
zenodo_url = "https://zenodo.org/record/3338727/files/"
zenodo_files = ['audio_train.zip', 'audio_validate.zip',
'audio_test.zip', 'annotations_train.zip',
'annotations_validate.zip',
'annotations_test.zip', 'README']
super().download(
zenodo_url, zenodo_files, force_download
)
mkdir_if_not_exists(self.audio_path)
mkdir_if_not_exists(self.annotations_path)
for fold in self.fold_list:
os.rename(
os.path.join(self.dataset_path, 'audio_%s' % fold),
os.path.join(self.audio_path, fold)
)
os.rename(
os.path.join(self.dataset_path, 'annotations_%s' % fold),
os.path.join(self.annotations_path, fold)
)
# Convert .flac to .wav
self.convert_to_wav()
self.set_as_downloaded()
def get_available_datasets():
availabe_datasets = {m[0]: m[1] for m in inspect.getmembers(
sys.modules[__name__], inspect.isclass)
if m[1].__module__ == __name__ and m[0][0] != '_'}
return availabe_datasets