Source code for dcase_models.data.datasets

import os
import numpy as np
import sys
import csv
from pandas import read_csv
import yaml
from sed_eval.util.event_roll import event_list_to_event_roll
from librosa.util import fix_length

from dcase_models.data.dataset_base import Dataset
from dcase_models.util.files import move_all_files_to_parent, move_all_files_to
from dcase_models.util.files import mkdir_if_not_exists, list_wav_files

import inspect


__all__ = ['UrbanSound8k', 'ESC50', 'ESC10', 'URBAN_SED',
           'SONYC_UST', 'TAUUrbanAcousticScenes2019',
           'TAUUrbanAcousticScenes2020Mobile',
           'TUTSoundEvents2017', 'FSDKaggle2018', 'MAVD']


[docs]class UrbanSound8k(Dataset): """ UrbanSound8k dataset. This class inherits all functionality from Dataset and defines specific attributs and methods for UrbanSound8k. Url: https://urbansounddataset.weebly.com/urbansound8k.html J. Salamon, C. Jacoby, and J. P. Bello “A dataset and taxonomy for urban sound research,” 22st ACM International Conference on Multimedia (ACM-MM’14) Orlando, FL, USA, November 2014 Parameters ---------- dataset_path : str Path to the dataset fold. This is the path to the folder where the complete dataset will be downloaded, decompressed and handled. It is expected to use a folder name that represents the dataset unambiguously (e.g. ../datasets/UrbanSound8k). Examples -------- To work with UrbanSound8k dataset, just initialize this class with the path to the dataset. >>> from dcase_models.data.datasets import UrbanSound8k >>> dataset = UrbanSound8k('../datasets/UrbanSound8K') Then, you can download the dataset and change the sampling rate. >>> dataset.download() >>> dataset.change_sampling_rate(22050) """
[docs] def __init__(self, dataset_path): super().__init__(dataset_path)
[docs] def build(self): self.audio_path = os.path.join(self.dataset_path, 'audio') self.fold_list = ["fold1", "fold2", "fold3", "fold4", "fold5", "fold6", "fold7", "fold8", "fold9", "fold10"] self.label_list = ["air_conditioner", "car_horn", "children_playing", "dog_bark", "drilling", "engine_idling", "gun_shot", "jackhammer", "siren", "street_music"]
[docs] def generate_file_lists(self): for fold in self.fold_list: audio_folder = os.path.join(self.audio_path, fold) self.file_lists[fold] = list_wav_files(audio_folder)
[docs] def get_annotations(self, file_name, features, time_resolution): y = np.zeros((len(features), len(self.label_list))) class_ix = int(os.path.basename(file_name).split('-')[1]) y[:, class_ix] = 1 return y
[docs] def download(self, force_download=False): zenodo_url = "https://zenodo.org/record/1203745/files" zenodo_files = ["UrbanSound8K.tar.gz"] downloaded = super().download( zenodo_url, zenodo_files, force_download ) if downloaded: move_all_files_to_parent(self.dataset_path, "UrbanSound8K") self.set_as_downloaded()
[docs]class ESC50(Dataset): """ ESC-50 dataset. This class inherits all functionality from Dataset and defines specific attributes and methods for ESC-50. Url: https://github.com/karolpiczak/ESC-50 K. J. Piczak “Esc: Dataset for environmental sound classification,” Proceedings of the 23rd ACM international conference on Multimedia Brisbane, Australia, October, 2015. Parameters ---------- dataset_path : str Path to the dataset folder. This is the path to the folder where the complete dataset will be downloaded, decompressed and handled. It is expected to use a folder name that represents the dataset unambiguously (e.g. ../datasets/ESC50). Examples -------- To work with ESC50 dataset, just initialize this class with the path to the dataset. >>> from dcase_models.data.datasets import ESC50 >>> dataset = ESC50('../datasets/ESC50') Then, you can download the dataset and change the sampling rate. >>> dataset.download() >>> dataset.change_sampling_rate(22050) """
[docs] def __init__(self, dataset_path): super().__init__(dataset_path)
[docs] def build(self): # load metadata information and create label_list self.audio_path = os.path.join(self.dataset_path, 'audio') meta_file = os.path.join(self.dataset_path, 'meta/esc50.csv') self.metadata = {} if self.check_if_downloaded(): n_classes = 50 self.label_list = ['']*n_classes with open(meta_file) as csv_file: csv_reader = csv.reader(csv_file, delimiter=',') line_count = 0 for row in csv_reader: if line_count == 0: line_count += 1 continue filename = row[0] fold = 'fold'+row[1] class_ix = int(row[2]) class_name = row[3] esc10 = row[4] == 'True' self.metadata[filename] = { 'fold': fold, 'class_ix': class_ix, 'class_name': class_name, 'esc10': esc10} if class_name not in self.label_list: self.label_list[class_ix] = class_name self.fold_list = ["fold1", "fold2", "fold3", "fold4", "fold5"] self.evaluation_mode = 'cross-validation'
[docs] def generate_file_lists(self): self.file_lists = {} for fold in self.fold_list: self.file_lists[fold] = [] # all_files = sorted( # glob.glob(os.path.join(self.audio_path, '*.wav'))) all_files = list_wav_files(self.audio_path) for fil in all_files: basename = self.get_basename_wav(fil) if basename in self.metadata: if self.metadata[basename]['fold'] == fold: self.file_lists[fold].append(fil)
[docs] def get_annotations(self, file_name, features, time_resolution): y = np.zeros((len(self.label_list))) basename = self.get_basename_wav(file_name) class_ix = self.metadata[basename]['class_ix'] y[class_ix] = 1 y = np.expand_dims(y, 0) y = np.repeat(y, len(features), 0) return y
[docs] def get_basename_wav(self, filename): # convert ..../xxxx.npy in xxxx.wav return os.path.basename(filename).split('.')[0] + '.wav'
[docs] def download(self, force_download=False): github_url = "https://github.com/karoldvl/ESC-50/archive/" github_files = ["master.zip"] downloaded = super().download( github_url, github_files, force_download ) if downloaded: move_all_files_to_parent(self.dataset_path, "ESC-50-master") self.set_as_downloaded()
[docs]class ESC10(ESC50): """ ESC-10 dataset. This class inherits all functionality from Dataset and defines specific attributes and methods for ESC-10. ESC-10 is a subsampled version of ESC-50. Url: https://github.com/karolpiczak/ESC-50 K. J. Piczak “Esc: Dataset for environmental sound classification,” Proceedings of the 23rd ACM international conference on Multimedia Brisbane, Australia, October, 2015. Parameters ---------- dataset_path : str Path to the dataset folder. This is the path to the folder where the complete dataset will be downloaded, decompressed and handled. It is expected to use a folder name that represents the dataset unambiguously (e.g. ../datasets/ESC50). Examples -------- To work with ESC10 dataset, just initialize this class with the path to the dataset. >>> from dcase_models.data.datasets import ESC10 >>> dataset = ESC10('../datasets/ESC50') Then, you can download the dataset and change the sampling rate. >>> dataset.download() >>> dataset.change_sampling_rate(22050) """
[docs] def __init__(self, dataset_path): super().__init__(dataset_path)
[docs] def build(self): super().build() # then change self.metadata and self.label_list to keep only ESC-10 new_metada = {} new_label_list_ids = [] for j in self.metadata.keys(): if self.metadata[j]['esc10']: new_metada[j] = self.metadata[j].copy() if new_metada[j]['class_ix'] not in new_label_list_ids: new_label_list_ids.append(new_metada[j]['class_ix']) new_label_list_ids.sort() new_label_list = [] new_label_list = [self.label_list[i] for i in new_label_list_ids] self.metadata = new_metada.copy() self.label_list = new_label_list.copy() for j in self.metadata.keys(): assert self.metadata[j]['esc10'] self.metadata[j]['class_ix'] = [i for i, x in enumerate( self.label_list) if x == self.metadata[j]['class_name']][0] # regenerate self.file_lists self.generate_file_lists()
[docs]class URBAN_SED(Dataset): """ URBAN-SED dataset. This class inherits all functionality from Dataset and defines specific attributes and methods for URBAN-SED. Url: http://urbansed.weebly.com/ J. Salamon, D. MacConnell, M. Cartwright, P. Li, and J. P.Bello. "Scaper: A library for soundscape synthesis and augmentation". IEEE Workshop on Applications of Signal Processing to Audio and Acoustics New York, USA, October 2017. Parameters ---------- dataset_path : str Path to the dataset folder. This is the path to the folder where the complete dataset will be downloaded, decompressed and handled. It is expected to use a folder name that represents the dataset unambiguously (e.g. ../datasets/URBAN_SED). Examples -------- To work with URBAN_SED dataset, just initialize this class with the path to the dataset. >>> from dcase_models.data.datasets import URBAN_SED >>> dataset = URBAN_SED('../datasets/URBAN_SED') Then, you can download the dataset and change the sampling rate. >>> dataset.download() >>> dataset.change_sampling_rate(22050) """
[docs] def __init__(self, dataset_path): super().__init__(dataset_path)
[docs] def build(self): self.audio_path = os.path.join(self.dataset_path, 'audio') self.annotations_folder = os.path.join( self.dataset_path, 'annotations') self.fold_list = ["train", "validate", "test"] self.label_list = ["air_conditioner", "car_horn", "children_playing", "dog_bark", "drilling", "engine_idling", "gun_shot", "jackhammer", "siren", "street_music"] self.evaluation_mode = 'train-validate-test'
[docs] def generate_file_lists(self): for fold in self.fold_list: audio_folder = os.path.join(self.audio_path, fold) self.file_lists[fold] = list_wav_files(audio_folder) self.wav_to_labels = {} for fold in self.fold_list: for fil in self.file_lists[fold]: label_file = os.path.basename(fil).split('.')[0] + '.txt' self.wav_to_labels[fil] = os.path.join( self.annotations_folder, fold, label_file)
[docs] def get_annotations(self, file_name, features, time_resolution): label_file = self.wav_to_labels[file_name] labels = read_csv(label_file, delimiter='\t', header=None) labels.columns = ['event_onset', 'event_offset', 'event_label'] event_roll = event_list_to_event_roll( labels.to_dict('records'), self.label_list, time_resolution ) if event_roll.shape[0] > features.shape[0]: event_roll = event_roll[:len(features)] else: event_roll = fix_length(event_roll, features.shape[0], axis=0) assert event_roll.shape[0] == features.shape[0] return event_roll
[docs] def download(self, force_download=False): zenodo_url = "https://zenodo.org/record/1324404/files" zenodo_files = ["URBAN-SED_v2.0.0.tar.gz"] downloaded = super().download( zenodo_url, zenodo_files, force_download ) if downloaded: move_all_files_to_parent(self.dataset_path, "URBAN-SED_v2.0.0") self.set_as_downloaded()
[docs]class SONYC_UST(Dataset): """ SONYC-UST dataset. This class inherits all functionality from Dataset and defines specific attributes and methods for SONYC-UST. Version: 2.1.0 Url: https://zenodo.org/record/3693077 M. Cartwright, et al. "SONYC Urban Sound Tagging (SONYC-UST): A Multilabel Dataset from an Urban Acoustic Sensor Network". Proceedings of the Workshop on Detection and Classification of Acoustic Scenes and Events (DCASE), 2019. Parameters ---------- dataset_path : str Path to the dataset folder. This is the path to the folder where the complete dataset will be downloaded, decompressed and handled. It is expected to use a folder name that represents the dataset unambiguously (e.g. ../datasets/SONYC_UST). Examples -------- To work with SONYC_UST dataset, just initialize this class with the path to the dataset. >>> from dcase_models.data.datasets import SONYC_UST >>> dataset = SONYC_UST('../datasets/SONYC_UST') Then, you can download the dataset and change the sampling rate. >>> dataset.download() >>> dataset.change_sampling_rate(22050) """
[docs] def __init__(self, dataset_path): super().__init__(dataset_path)
[docs] def build(self): self.audio_path = os.path.join(self.dataset_path, 'audio') self.fold_list = ["train", "validate"] self.evaluation_mode = 'train-validate-test' self.meta_file = os.path.join(self.dataset_path, 'annotations.csv') self.taxonomy_file = os.path.join( self.dataset_path, 'dcase-ust-taxonomy.yaml') self.metada = {} self.label_list = [] if self.check_if_downloaded(): self.metadata = read_csv(self.meta_file).sort_values( 'audio_filename') with open(self.taxonomy_file, 'r') as f: self.label_list = yaml.load(f, Loader=yaml.Loader)
[docs] def generate_file_lists(self): self.file_lists = {} all_files = list_wav_files(self.audio_path) assert len(all_files) != 0 for fold in self.fold_list: if fold == 'train': metadata_fold = self.metadata[self.metadata['split'] == fold] else: metadata_fold = self.metadata[ ((self.metadata['split'] == fold) & (self.metadata['annotator_id'] == 0)) ] filename_list_fold = metadata_fold[ 'audio_filename'].drop_duplicates().to_list() self.file_lists[fold] = [] for fil in all_files: basename = os.path.basename(fil) if basename in filename_list_fold: self.file_lists[fold].append(fil)
[docs] def get_annotations(self, file_name, features, time_resolution): # only coarse level # TODO add fine level n_classes_coarse_level = len(self.label_list['coarse']) y = np.zeros(n_classes_coarse_level) basename = os.path.basename(file_name).split('.')[0] + '.wav' metadata_of_file = self.metadata[ self.metadata['audio_filename'] == basename] for class_ix in self.label_list['coarse']: class_column = str(class_ix) + '_' + \ self.label_list['coarse'][class_ix] + '_presence' if metadata_of_file['split'].values[0] == 'train': # class present if any annotator check presence y[class_ix-1] = np.sum( metadata_of_file[class_column].values) >= 1 else: # class present if annotator 0 check presence if 0 in metadata_of_file['annotator_id'].values: ix = np.argwhere( metadata_of_file['annotator_id'].values == 0) y[class_ix-1] = metadata_of_file[ class_column].values[ix] >= 1 y = np.expand_dims(y, 0) y = np.repeat(y, len(features), 0) return y
[docs] def download(self, force_download=False): zenodo_url = "https://zenodo.org/record/3693077/files" zenodo_files = ["annotations.csv", "audio.tar.gz", "dcase-ust-taxonomy.yaml", "README.md"] super().download( zenodo_url, zenodo_files, force_download ) self.set_as_downloaded()
class _TAUUrbanAcousticScenes(Dataset): """ Base class for TAU Urban Acoustic Scenes datasets. """ def __init__(self, dataset_path): super().__init__(dataset_path) def build(self): self.audio_path = os.path.join(self.dataset_path, 'audio') self.fold_list = ["train", "test"] self.meta_file = os.path.join(self.dataset_path, 'meta.csv') self.label_list = ['airport', 'shopping_mall', 'metro_station', 'street_pedestrian', 'public_square', 'street_traffic', 'tram', 'bus', 'metro', 'park'] self.evaluation_setup_train = os.path.join( self.dataset_path, 'evaluation_setup', 'fold1_train.csv') self.evaluation_setup_test = os.path.join( self.dataset_path, 'evaluation_setup', 'fold1_test.csv') self.annotations_folder = os.path.join( self.dataset_path, 'annotations') def generate_file_lists(self): self.file_lists = {} evaluation_files = [self.evaluation_setup_train, self.evaluation_setup_test] for j, fold in enumerate(['train', 'test']): self.file_lists[fold] = [] csv_filename = evaluation_files[j] with open(csv_filename) as csv_file: csv_reader = csv.reader(csv_file, delimiter='\t') line_count = 0 for row in csv_reader: if line_count == 0: line_count += 1 continue file_name = row[0].split('/')[-1] self.file_lists[fold].append( os.path.join(self.audio_path, file_name) ) def get_annotations(self, file_name, features, time_resolution): y = np.zeros((len(features), len(self.label_list))) basename = os.path.basename(file_name) # delete file extension basename = basename.split('.')[0] scene_label, city, location_id, segment_id, device_id = basename.split( '-') class_ix = self.label_list.index(scene_label) y[:, class_ix] = 1 return y def download(self, zenodo_url, zenodo_files, force_download=False): return super().download(zenodo_url, zenodo_files, force_download=force_download)
[docs]class TAUUrbanAcousticScenes2019(_TAUUrbanAcousticScenes): """ TAU Urban Acoustic Scenes 2019 dataset. This class inherits all functionality from Dataset and defines specific attributes and methods for TAU Urban Acoustic Scenes 2019. Url: https://zenodo.org/record/2589280 A. Mesaros, T. Heittola, and T. Virtanen. "A multi-devicedataset for urban acoustic scene classification". Proceedings of the Detection and Classification of Acoustic Scenes and Events 2018 Workshop (DCASE 2018). November 2018. Parameters ---------- dataset_path : str Path to the dataset folder. This is the path to the folder where the complete dataset will be downloaded, decompressed and handled. It is expected to use a folder name that represents the dataset unambiguously (e.g. ../datasets/TAUUrbanAcousticScenes2019). Examples -------- To work with TAUUrbanAcousticScenes2019 dataset, just initialize this class with the path to the dataset. >>> from dcase_models.data.datasets import TAUUrbanAcousticScenes2019 >>> dataset = TAUUrbanAcousticScenes2019( '../datasets/TAUUrbanAcousticScenes2019') Then, you can download the dataset and change the sampling rate. >>> dataset.download() >>> dataset.change_sampling_rate(22050) """
[docs] def __init__(self, dataset_path): super().__init__(dataset_path)
[docs] def download(self, force_download=False): zenodo_url = "https://zenodo.org/record/2589280/files" zenodo_files = [ "TAU-urban-acoustic-scenes-2019-development.audio.%d.zip" % j for j in range(1, 22)] zenodo_files.append( 'TAU-urban-acoustic-scenes-2019-development.doc.zip') zenodo_files.append( 'TAU-urban-acoustic-scenes-2019-development.meta.zip') downloaded = super().download( zenodo_url, zenodo_files, force_download ) if downloaded: move_all_files_to_parent( self.dataset_path, "TAU-urban-acoustic-scenes-2019-development") self.set_as_downloaded()
[docs]class TAUUrbanAcousticScenes2020Mobile(_TAUUrbanAcousticScenes): """ TAU Urban Acoustic Scenes 2019 dataset. This class inherits all functionality from Dataset and defines specific attributes and methods for TAU Urban Acoustic Scenes 2020 Mobile. Url: https://zenodo.org/record/3819968 T. Heittola, A. Mesaros, and T. Virtanen. "Acoustic sceneclassification in DCASE 2020 challenge: generalizationacross devices and low complexity solutions". Proceedings of the Detection and Classification of Acoustic Scenes and Events 2020 Workshop (DCASE 2020). 2020 Parameters ---------- dataset_path : str Path to the dataset folder. This is the path to the folder where the complete dataset will be downloaded, decompressed and handled. It is expected to use a folder name that represents the dataset unambiguously (e.g. ../datasets/TAUUrbanAcousticScenes2020Mobile). Examples -------- To work with TAUUrbanAcousticScenes2020Mobile dataset, just initialize this class with the path to the dataset. >>> from dcase_models.data.datasets import TAUUrbanAcousticScenes2020Mobile >>> dataset = TAUUrbanAcousticScenes2020Mobile( '../datasets/TAUUrbanAcousticScenes2020Mobile') Then, you can download the dataset and change the sampling rate. >>> dataset.download() >>> dataset.change_sampling_rate(22050) """
[docs] def __init__(self, dataset_path): super().__init__(dataset_path)
[docs] def download(self, force_download=False): zenodo_url = "https://zenodo.org/record/3819968/files" zenodo_files = [ "TAU-urban-acoustic-scenes-2020-mobile-development.audio.%d.zip" % j for j in range(1, 17)] zenodo_files.append( 'TAU-urban-acoustic-scenes-2020-mobile-development.doc.zip') zenodo_files.append( 'TAU-urban-acoustic-scenes-2020-mobile-development.meta.zip') downloaded = super().download( zenodo_url, zenodo_files, force_download ) print(downloaded) if downloaded: move_all_files_to_parent( self.dataset_path, "TAU-urban-acoustic-scenes-2020-mobile-development") self.set_as_downloaded()
[docs]class TUTSoundEvents2017(Dataset): """ TUT Sound Events 2017 dataset. This class inherits all functionality from Dataset and defines specific attributes and methods for TUT Sound Events 2017. Url: https://zenodo.org/record/814831 A. Mesaros et al. DCASE 2017 challenge setup: tasks, datasets and baseline system. Detection and Classification of Acoustic Scenes and Events 2017 Workshop (DCASE2017), 85–92. November 2017. Parameters ---------- dataset_path : str Path to the dataset folder. This is the path to the folder where the complete dataset will be downloaded, decompressed and handled. It is expected to use a folder name that represents the dataset unambiguously (e.g. ../datasets/TUTSoundEvents2017). Examples -------- To work with TUTSoundEvents2017 dataset, just initialize this class with the path to the dataset. >>> from dcase_models.data.datasets import TUTSoundEvents2017 >>> dataset = TUTSoundEvents2017('../datasets/TUTSoundEvents2017') Then, you can download the dataset and change the sampling rate. >>> dataset.download() >>> dataset.change_sampling_rate(22050) """
[docs] def __init__(self, dataset_path): super().__init__(dataset_path)
[docs] def build(self): self.audio_path = os.path.join(self.dataset_path, 'audio') self.fold_list = ["fold1", "fold2", "fold3", "fold4"] self.meta_path = os.path.join(self.dataset_path, 'meta') self.label_list = ['brakes squeaking', 'car', 'children', 'large vehicle', 'people speaking', 'people walking'] self.evaluation_setup_path = os.path.join( self.dataset_path, 'evaluation_setup' )
[docs] def generate_file_lists(self): self.file_lists = {} self.wav_to_labels = {} for j, fold in enumerate(self.fold_list): self.file_lists[fold] = [] evaluation_setup_file = os.path.join( self.evaluation_setup_path, 'street_%s_test.txt' % fold ) with open(evaluation_setup_file) as csv_file: csv_reader = csv.reader(csv_file, delimiter='\t') for row in csv_reader: file_name = row[0].split('/')[-1] file_path = os.path.join( self.audio_path, 'street', file_name ) self.file_lists[fold].append(file_path) file_ann = file_path.replace( self.audio_path, self.meta_path ) file_ann = file_ann.replace('.wav', '.ann') self.wav_to_labels[file_path] = file_ann # test folder self.file_lists['test'] = [] evaluation_setup_file = os.path.join( self.evaluation_setup_path, 'street_test.txt' ) with open(evaluation_setup_file) as csv_file: csv_reader = csv.reader(csv_file, delimiter='\t') for row in csv_reader: file_name = row[0].split('/')[-1] file_path = os.path.join(self.audio_path, 'street', file_name) self.file_lists['test'].append(file_path) file_ann = file_path.replace(self.audio_path, self.meta_path) file_ann = file_ann.replace('.wav', '.ann') self.wav_to_labels[file_path] = file_ann
[docs] def get_annotations(self, file_name, features, time_resolution): label_file = self.wav_to_labels[file_name] labels = read_csv(label_file, delimiter='\t', header=None) if labels.shape[1] == 3: labels.columns = ['event_onset', 'event_offset', 'event_label'] else: labels.columns = ['file_path', 'scene', 'event_onset', 'event_offset', 'event_label', 'mixture', 'file_name'] event_roll = event_list_to_event_roll( labels.to_dict('records'), self.label_list, time_resolution ) if event_roll.shape[0] > features.shape[0]: event_roll = event_roll[:len(features)] else: event_roll = fix_length(event_roll, features.shape[0], axis=0) assert event_roll.shape[0] == features.shape[0] return event_roll
[docs] def download(self, force_download=False): zenodo_url = "https://zenodo.org/record/814831/files" zenodo_files = [ 'TUT-sound-events-2017-development.audio.1.zip', 'TUT-sound-events-2017-development.audio.2.zip', 'TUT-sound-events-2017-development.doc.zip', 'TUT-sound-events-2017-development.meta.zip' ] downloaded = super().download( zenodo_url, zenodo_files, force_download ) if downloaded: move_all_files_to_parent( self.dataset_path, "TUT-sound-events-2017-development") zenodo_url = "https://zenodo.org/record/1040179/files" zenodo_files = [ 'TUT-sound-events-2017-evaluation.audio.zip', 'TUT-sound-events-2017-evaluation.meta.zip', ] downloaded = super().download( zenodo_url, zenodo_files, force_download ) if downloaded: move_all_files_to( os.path.join( self.dataset_path, "TUT-sound-events-2017-evaluation/audio/street" ), os.path.join(self.dataset_path, "audio/street") ) move_all_files_to( os.path.join( self.dataset_path, "TUT-sound-events-2017-evaluation/meta/street" ), os.path.join(self.dataset_path, "meta/street") ) move_all_files_to( os.path.join( self.dataset_path, "TUT-sound-events-2017-evaluation/evaluation_setup" ), os.path.join(self.dataset_path, "evaluation_setup") ) self.set_as_downloaded()
[docs]class FSDKaggle2018(Dataset): """ FSDKaggle2018 dataset. This class inherits all functionality from Dataset and defines specific attributes and methods for FSDKaggle2018. Url: https://zenodo.org/record/2552860 Eduardo Fonseca et al. "General-purpose Tagging of Freesound Audio with AudioSet Labels: Task Description, Dataset, and Baseline". Proceedings of the DCASE 2018 Workshop. 2018. Parameters ---------- dataset_path : str Path to the dataset folder. This is the path to the folder where the complete dataset will be downloaded, decompressed and handled. It is expected to use a folder name that represents the dataset unambiguously (e.g. ../datasets/FSDKaggle2018). Examples -------- To work with FSDKaggle2018 dataset, just initialize this class with the path to the dataset. >>> from dcase_models.data.datasets import FSDKaggle2018 >>> dataset = FSDKaggle2018('../datasets/FSDKaggle2018') Then, you can download the dataset and change the sampling rate. >>> dataset.download() >>> dataset.change_sampling_rate(22050) """
[docs] def __init__(self, dataset_path): super().__init__(dataset_path)
[docs] def build(self): self.audio_path = os.path.join(self.dataset_path, 'audio') self.fold_list = ["train", "validate", "test"] self.meta_path = os.path.join(self.dataset_path, 'meta') self.label_list = [] meta_file_train = os.path.join( self.meta_path, 'train_post_competition.csv' ) meta_file_test = os.path.join( self.meta_path, 'test_post_competition_scoring_clips.csv' ) self.metadata = {} if self.check_if_downloaded(): for meta_file in [meta_file_train, meta_file_test]: with open(meta_file) as csv_file: csv_reader = csv.reader(csv_file, delimiter=',') line_count = 0 for row in csv_reader: if line_count == 0: line_count += 1 continue filename = row[0] label = row[1] usage = row[2] freesound_id = row[3] license = row[4] if meta_file == meta_file_train: fold = 'train' else: if usage == 'Public': fold = 'validate' else: fold = 'test' self.metadata[filename] = { 'label': label, 'usage': usage, 'freesound_id': freesound_id, 'license': license, 'fold': fold} if label not in self.label_list: self.label_list.append(label) self.label_list.sort()
[docs] def generate_file_lists(self): self.file_lists = {fold: [] for fold in self.fold_list} for filename in self.metadata.keys(): fold = self.metadata[filename]['fold'] fold_folder = fold if fold == 'validate': fold_folder = 'test' file_path = os.path.join( self.audio_path, fold_folder, filename ) self.file_lists[fold].append(file_path)
[docs] def get_annotations(self, file_name, features, time_resolution): y = np.zeros((len(features), len(self.label_list))) label_name = self.metadata[os.path.basename(file_name)]['label'] label_index = self.label_list.index(label_name) y[:, label_index] = 1 return y
[docs] def download(self, force_download=False): zenodo_url = "https://zenodo.org/record/2552860/files" zenodo_files = [ 'FSDKaggle2018.audio_test.zip', 'FSDKaggle2018.audio_train.zip', 'FSDKaggle2018.doc.zip', 'FSDKaggle2018.meta.zip' ] super().download( zenodo_url, zenodo_files, force_download ) mkdir_if_not_exists(self.audio_path) os.rename( os.path.join(self.dataset_path, 'FSDKaggle2018.audio_train'), os.path.join(self.audio_path, 'train'), ) os.rename( os.path.join(self.dataset_path, 'FSDKaggle2018.audio_test'), os.path.join(self.audio_path, 'test'), ) os.rename( os.path.join(self.dataset_path, 'FSDKaggle2018.meta'), os.path.join(self.dataset_path, 'meta'), ) os.rename( os.path.join(self.dataset_path, 'FSDKaggle2018.doc'), os.path.join(self.dataset_path, 'doc'), ) self.set_as_downloaded()
[docs]class MAVD(Dataset): """ MAVD-traffic dataset. This class inherits all functionality from Dataset and defines specific attributes and methods for MAVD-traffic. Url: https://zenodo.org/record/3338727 P. Zinemanas, P. Cancela, and M. Rocamora. "MAVD: a dataset for sound event detection in urban environments" Proceedings of the Detection and Classification of Acoustic Scenes and Events 2019 Workshop (DCASE 2019). October, 2019. Parameters ---------- dataset_path : str Path to the dataset folder. This is the path to the folder where the complete dataset will be downloaded, decompressed and handled. It is expected to use a folder name that represents the dataset unambiguously (e.g. ../datasets/MAVD). Examples -------- To work with MAVD dataset, just initialize this class with the path to the dataset. >>> from dcase_models.data.datasets import MAVD >>> dataset = MAVD('../datasets/MAVD') Then, you can download the dataset and change the sampling rate. >>> dataset.download() >>> dataset.change_sampling_rate(22050) """
[docs] def __init__(self, dataset_path): super().__init__(dataset_path)
[docs] def build(self): self.audio_path = os.path.join(self.dataset_path, 'audio') self.annotations_path = os.path.join(self.dataset_path, 'annotations') self.fold_list = ["train", "validate", "test"] # Only vehicle level for now # TODO: Add other levels self.vehicle_list = ['car', 'bus', 'truck', 'motorcycle'] self.component_list = ['engine_idling', 'engine_accelerating', 'brakes', 'wheel_rolling', 'compressor'] self.label_list = self.vehicle_list + self.component_list
[docs] def generate_file_lists(self): for fold in self.fold_list: audio_folder = os.path.join(self.audio_path, fold) self.file_lists[fold] = list_wav_files(audio_folder)
[docs] def get_annotations(self, file_name, features, time_resolution): audio_path, _ = self.get_audio_paths() label_file = file_name.replace( audio_path, self.annotations_path ).replace('.wav', '.txt') labels = read_csv(label_file, delimiter='\t', header=None) labels.columns = ['event_onset', 'event_offset', 'event_label'] labels_dict = labels.to_dict('records') event_roll = np.zeros((len(features), len(self.label_list))) for event in labels_dict: event_label = event['event_label'] for sub_label in event_label.split('/'): if sub_label in self.label_list: label_ix = self.label_list.index(sub_label) event_onset = event['event_onset'] event_offset = event['event_offset'] onset = int(np.floor( event_onset / float(time_resolution)) ) offset = int(np.ceil( event_offset / float(time_resolution)) ) event_roll[onset:offset, label_ix] = 1 return event_roll
[docs] def download(self, force_download=False): zenodo_url = "https://zenodo.org/record/3338727/files/" zenodo_files = ['audio_train.zip', 'audio_validate.zip', 'audio_test.zip', 'annotations_train.zip', 'annotations_validate.zip', 'annotations_test.zip', 'README'] super().download( zenodo_url, zenodo_files, force_download ) mkdir_if_not_exists(self.audio_path) mkdir_if_not_exists(self.annotations_path) for fold in self.fold_list: os.rename( os.path.join(self.dataset_path, 'audio_%s' % fold), os.path.join(self.audio_path, fold) ) os.rename( os.path.join(self.dataset_path, 'annotations_%s' % fold), os.path.join(self.annotations_path, fold) ) # Convert .flac to .wav self.convert_to_wav() self.set_as_downloaded()
def get_available_datasets(): availabe_datasets = {m[0]: m[1] for m in inspect.getmembers( sys.modules[__name__], inspect.isclass) if m[1].__module__ == __name__ and m[0][0] != '_'} return availabe_datasets