Source code for dcase_models.util.metrics

# encoding: utf-8
"""Metric functions"""

# from scipy import interpolate
import numpy as np
from scipy.stats import mode
from dcase_models.util.events import event_roll_to_event_list
from dcase_models.util.events import tag_probabilities_to_tag_list
from sed_eval.sound_event import SegmentBasedMetrics
from sed_eval.scene import SceneClassificationMetrics
from sed_eval.audio_tag import AudioTaggingMetrics

eps = 1e-6


[docs]def predictions_temporal_integration(Y_predicted, type='sum'): """ Integrate temporal dimension. Parameters ---------- Y_predicted : ndarray Signal to be integrated. e.g. shape (N_times, N_classes) type : str Type of integration ('sum', 'mean', 'autopool') Returns ------- array Integrated signal. e.g. shape (N_classes,) """ if type == 'sum': Y_predicted = np.sum(Y_predicted, axis=0) if type == 'max': Y_predicted = np.max(Y_predicted, axis=0) if type == 'mode': Y_predicted, _ = mode(Y_predicted, axis=0) Y_predicted = np.squeeze(Y_predicted, axis=0) return Y_predicted
[docs]def evaluate_metrics(model, data, metrics, **kwargs): """ Calculate metrics over files with different length Parameters ---------- model : keras Model model to get the predictions data : tuple or KerasDataGenerator Validation data for model evaluation (X_val, Y_val) or KerasDataGenerator X_val : list of ndarray Each element in list is a 3D array with the mel-spectrograms of one file. Shape of each element: (N_windows, N_hops, N_mel_bands) N_windows can be different in each file (element) Y_val : list ndarray Each element in the list is a 1D array with the annotations (one hot encoding). Shape of each element (N_classes,) metrics : list List of metrics to apply. Each element can be a metric name or a function. Returns ------- dict Dict with the results information. {'annotations' : [Y0, Y1, ...], 'predictions' : [Yp0, Yp1, ...], metrics[0]: 0.1, metrics[1]: 0.54} """ predictions = [] annotations = [] results = {} if type(data) in [list, tuple]: X_val = data[0] Y_val = data[1] n_files = len(X_val) for i in range(n_files): X = X_val[i] Y_predicted = model.predict(X) # if multiple outputs, select the first if type(Y_predicted) == list: Y_predicted = Y_predicted[0] predictions.append(Y_predicted) annotations = Y_val else: # data type is DataGenerator for batch_index in range(0, len(data)): X_val, Y_val = data.get_data_batch(batch_index) n_files = len(X_val) for i in range(n_files): X = X_val[i] Y_predicted = model.predict(X) if type(Y_predicted) == list: Y_predicted = Y_predicted[0] predictions.append(Y_predicted) annotations.extend(Y_val) results['annotations'] = annotations results['predictions'] = predictions for metric in metrics: if callable(metric): metric_function = metric else: metric_function = globals()[metric] results[metric] = metric_function(annotations, predictions, **kwargs) return results
[docs]def sed(Y_val, Y_predicted, sequence_time_sec=0.5, metric_resolution_sec=1.0, label_list=[]): """ Calculate metrics for Sound Event Detection Parameters ---------- Y_val : ndarray 2D array with the ground-truth event roll shape: (N_times, N_classes) Y_predicted : ndarray 2D array with the predicted event roll shape: (N_times, N_classes) sequence_time_sec : float Resolution of Y_val and Y_predicted. metric_resolution_sec : float Resolution of the metrics. label_list: Label list. Returns ------- sef_eval.sound_events.SegmentBasedMetrics Object with the SED results """ seg_metrics = SegmentBasedMetrics( label_list, time_resolution=metric_resolution_sec ) n_files = len(Y_val) for i in range(n_files): y_true = Y_val[i] pred = Y_predicted[i] pred = (pred > 0.5).astype(int) event_list_val = event_roll_to_event_list( y_true, label_list, sequence_time_sec) event_list_pred = event_roll_to_event_list( pred, label_list, sequence_time_sec) seg_metrics.evaluate(event_list_val, event_list_pred) return seg_metrics
[docs]def classification(Y_val, Y_predicted, label_list=[]): """ Calculate metrics for Audio Classification Parameters ---------- Y_val : ndarray 2D array with the ground-truth event roll shape: (N_times, N_classes) Y_predicted : ndarray 2D array with the predicted event roll shape: (N_times, N_classes) label_list: Label list. Returns ------- sef_eval.scenes.SceneClassificationMetrics Object with the classification results """ acc_metrics = SceneClassificationMetrics(label_list) n_files = len(Y_val) for i in range(n_files): y_true = Y_val[i] pred = Y_predicted[i] pred = np.sum(pred, axis=0) pred = np.argmax(pred) label_predicted = label_list[pred] label_gt = label_list[np.argmax(y_true[0])] acc_metrics.evaluate( [{'scene_label': label_gt, 'file': ''}], [{'scene_label': label_predicted, 'file': ''}]) return acc_metrics
[docs]def tagging(Y_val, Y_predicted, label_list=[]): """ Calculate metrics for Audio Tagging Parameters ---------- Y_val : ndarray 2D array with the ground-truth event roll shape: (N_times, N_classes) Y_predicted : ndarray 2D array with the predicted event roll shape: (N_times, N_classes) label_list: Label list. Returns ------- sef_eval.scenes.AudioTaggingMetrics Object with the tagging results """ tagging_metrics = AudioTaggingMetrics(label_list) n_files = len(Y_val) for i in range(n_files): y_true = Y_val[i] pred = Y_predicted[i] pred = np.mean(pred, axis=0) tag_list_val = tag_probabilities_to_tag_list( y_true[0], label_list, threshold=0.5) tag_list_pred = tag_probabilities_to_tag_list( pred, label_list, threshold=0.5) tagging_metrics.evaluate( [{'tags': tag_list_val, 'file': ''}], [{'tags': tag_list_pred, 'file': ''}]) return tagging_metrics
[docs]def accuracy(Y_val, Y_predicted): n_files = len(Y_val) predictions = np.zeros(n_files) annotations = np.zeros(n_files) for i in range(n_files): Y = Y_val[i] pred = predictions_temporal_integration(Y_predicted[i], 'sum') pred = np.argmax(pred) Y = np.argmax(Y) annotations[i] = Y predictions[i] = pred acc = np.mean(annotations == predictions) return acc
[docs]def ER(Y_val, Y_predicted, sequence_time_sec=0.5, metric_resolution_sec=1.0): n_files = len(Y_val) predictions = [] annotations = [] for i in range(n_files): y_true = Y_val[i] pred = Y_predicted[i] if pred.shape[0] == y_true.shape[0]: y_pred = pred else: y_pred = np.zeros_like(y_true) ratio = int(np.round(metric_resolution_sec / sequence_time_sec)) for j in range(len(y_true)): y_pred[j] = np.mean(pred[j*ratio:(j+1)*ratio], axis=0) annotations.append(y_true) predictions.append(y_pred) annotations = np.concatenate(annotations, axis=0) predictions = np.concatenate(predictions, axis=0) assert annotations.shape[0] == predictions.shape[0] assert annotations.shape[1] == predictions.shape[1] predictions = (predictions > 0.5).astype(int) Ntp = np.sum(predictions + annotations > 1) Nref = np.sum(annotations) Nsys = np.sum(predictions) Sus = min(Nref, Nsys) - Ntp Del = max(0.0, Nref - Nsys) Ins = max(0.0, Nsys - Nref) ER = (Sus+Del+Ins)/float(Nref + eps) return ER
[docs]def F1(Y_val, Y_predicted, sequence_time_sec=0.5, metric_resolution_sec=1.0): n_files = len(Y_val) predictions = [] annotations = [] for i in range(n_files): y_true = Y_val[i] pred = Y_predicted[i] if pred.shape[0] == y_true.shape[0]: y_pred = pred else: y_pred = np.zeros_like(y_true) ratio = int(np.round(metric_resolution_sec / sequence_time_sec)) for j in range(len(y_true)): y_pred[j] = np.mean(pred[j*ratio:(j+1)*ratio], axis=0) annotations.append(y_true) predictions.append(y_pred) annotations = np.concatenate(annotations, axis=0) predictions = np.concatenate(predictions, axis=0) assert annotations.shape[0] == predictions.shape[0] assert annotations.shape[1] == predictions.shape[1] predictions = (predictions > 0.5).astype(int) Ntp = np.sum(predictions + annotations > 1) # Ntn = np.sum(predictions + annotations > 0) # Nfp = np.sum(predictions - annotations > 0) # Nfn = np.sum(annotations - predictions > 0) Nref = np.sum(annotations) Nsys = np.sum(predictions) P = Ntp / float(Nsys + eps) R = Ntp / float(Nref + eps) Fmeasure = 2*P*R/(P + R + eps) return Fmeasure