Source code for dcase_models.util.metrics

# encoding: utf-8
"""Metric functions"""

# from scipy import interpolate
import numpy as np
from scipy.stats import mode
from dcase_models.util.events import event_roll_to_event_list
from dcase_models.util.events import tag_probabilities_to_tag_list
from sed_eval.sound_event import SegmentBasedMetrics
from sed_eval.scene import SceneClassificationMetrics
from sed_eval.audio_tag import AudioTaggingMetrics

eps = 1e-6


[docs]def predictions_temporal_integration(Y_predicted, type='sum'):
    """ Integrate temporal dimension.

    Parameters
    ----------
    Y_predicted : ndarray
        Signal to be integrated.
        e.g. shape (N_times, N_classes)
    type : str
        Type of integration ('sum', 'mean', 'autopool')

    Returns
    -------
    array
        Integrated signal.
        e.g. shape (N_classes,)

    """
    if type == 'sum':
        Y_predicted = np.sum(Y_predicted, axis=0)
    if type == 'max':
        Y_predicted = np.max(Y_predicted, axis=0)
    if type == 'mode':
        Y_predicted, _ = mode(Y_predicted, axis=0)
        Y_predicted = np.squeeze(Y_predicted, axis=0)
    return Y_predicted


[docs]def evaluate_metrics(model, data, metrics, **kwargs):
    """ Calculate metrics over files with different length

    Parameters
    ----------
    model : keras Model
        model to get the predictions
    data : tuple or KerasDataGenerator
        Validation data for model evaluation
        (X_val, Y_val) or KerasDataGenerator

        X_val : list of ndarray
            Each element in list is a 3D array with the mel-spectrograms
            of one file. Shape of each element:
            (N_windows, N_hops, N_mel_bands)
            N_windows can be different in each file (element)
        Y_val : list ndarray
            Each element in the list is a 1D array with
            the annotations (one hot encoding).
            Shape of each element (N_classes,)

    metrics : list
        List of metrics to apply.
        Each element can be a metric name or a function.

    Returns
    -------
    dict
        Dict with the results information.

        {'annotations' : [Y0, Y1, ...],
         'predictions' : [Yp0, Yp1, ...],
         metrics[0]: 0.1,
         metrics[1]: 0.54}

    """
    predictions = []
    annotations = []
    results = {}

    if type(data) in [list, tuple]:
        X_val = data[0]
        Y_val = data[1]
        n_files = len(X_val)
        for i in range(n_files):
            X = X_val[i]
            Y_predicted = model.predict(X)
            # if multiple outputs, select the first
            if type(Y_predicted) == list:
                Y_predicted = Y_predicted[0]
            predictions.append(Y_predicted)

        annotations = Y_val

    else:
        # data type is DataGenerator
        for batch_index in range(0, len(data)):
            X_val, Y_val = data.get_data_batch(batch_index)
            n_files = len(X_val)
            for i in range(n_files):
                X = X_val[i]
                Y_predicted = model.predict(X)
                if type(Y_predicted) == list:
                    Y_predicted = Y_predicted[0]
                predictions.append(Y_predicted)

            annotations.extend(Y_val)

    results['annotations'] = annotations
    results['predictions'] = predictions

    for metric in metrics:
        if callable(metric):
            metric_function = metric
        else:
            metric_function = globals()[metric]

        results[metric] = metric_function(annotations, predictions, **kwargs)
    return results

def _check_lists_for_evaluation(Y_val, Y_predicted):
    """ Perform the following checks
        1) Y_val and Y_predicted are both of type list
        2) Y_val and Y_predicted are of the same length
        3) Each element in Y_val and Y_predicted is a 2D array

    Parameters
    ----------
    Y_val : list of ndarray
        2D array with the ground-truth event roll
        shape: (N_times, N_classes)
    Y_predicted : list of ndarray
        2D array with the predicted event roll
        shape: (N_times, N_classes)
    sequence_time_sec : float
        Resolution of Y_val and Y_predicted.
    metric_resolution_sec : float
        Resolution of the metrics.
    label_list:
        Label list.

    Returns
    -------
    bool
        True if checks passed.

    """
    
    if type(Y_val) is not list:
        raise AttributeError(
            'Y_val type is invalid. It should be a list of 2D array and received {}'.format(
                type(Y_val)
            )
        )

    if type(Y_predicted) is not list:
        raise AttributeError(
            'Y_predicted type is invalid. It should be a list of 2D array and received {}'.format(
                type(Y_predicted)
            )
        )

    if len(Y_val) != len(Y_predicted):
        raise AttributeError('Y_val and Y_predicted should have the same length (received {:d} and {:d})'.format(
            len(Y_val), len(Y_predicted) 
            )
        )

    for j in range(len(Y_val)):
        if type(Y_val[j]) is not np.ndarray:
            raise AttributeError('Each element of Y_val should be a 2D numpy array and received {}'.format(
                type(Y_val[j])
            )
        )
        if len(Y_val[j].shape) != 2:
            raise AttributeError('Each element of Y_val should be a 2D array and received an array of shape {}'.format(
                str(Y_val[j].shape)
            )
        )
        if type(Y_predicted[j]) is not np.ndarray:
            raise AttributeError('Each element of Y_predicted should be a 2D numpy array and received {}'.format(
                type(Y_predicted[j])
            )
        )
        if len(Y_predicted[j].shape) != 2:
            raise AttributeError('Each element of Y_predicted should be a 2D array and received an array of shape {}'.format(
                str(Y_predicted[j].shape)
            )
        )

[docs]def sed(Y_val, Y_predicted, sequence_time_sec=0.5,
        metric_resolution_sec=1.0, label_list=[]):
    """ Calculate metrics for Sound Event Detection

    Parameters
    ----------
    Y_val : list of ndarray
        2D array with the ground-truth event roll
        shape: (N_times, N_classes)
    Y_predicted : list of ndarray
        2D array with the predicted event roll
        shape: (N_times, N_classes)
    sequence_time_sec : float
        Resolution of Y_val and Y_predicted.
    metric_resolution_sec : float
        Resolution of the metrics.
    label_list:
        Label list.

    Returns
    -------
    sef_eval.sound_events.SegmentBasedMetrics
        Object with the SED results

    """

    _check_lists_for_evaluation(Y_val, Y_predicted)

    seg_metrics = SegmentBasedMetrics(
        label_list, time_resolution=metric_resolution_sec
    )

    n_files = len(Y_val)

    for i in range(n_files):
        y_true = Y_val[i]
        pred = Y_predicted[i]

        pred = (pred > 0.5).astype(int)
        event_list_val = event_roll_to_event_list(
            y_true, label_list, sequence_time_sec)
        event_list_pred = event_roll_to_event_list(
            pred, label_list, sequence_time_sec)

        seg_metrics.evaluate(event_list_val, event_list_pred)

    return seg_metrics


[docs]def classification(Y_val, Y_predicted, label_list=[]):
    """ Calculate metrics for Audio Classification

    Parameters
    ----------
    Y_val : listy of ndarray
        2D array with the ground-truth event roll
        shape: (N_times, N_classes)
    Y_predicted : list of ndarray
        2D array with the predicted event roll
        shape: (N_times, N_classes)
    label_list:
        Label list.

    Returns
    -------
    sef_eval.scenes.SceneClassificationMetrics
        Object with the classification results

    """
    _check_lists_for_evaluation(Y_val, Y_predicted)

    acc_metrics = SceneClassificationMetrics(label_list)

    n_files = len(Y_val)
    for i in range(n_files):
        y_true = Y_val[i]
        pred = Y_predicted[i]
        pred = np.sum(pred, axis=0)
        pred = np.argmax(pred)
        label_predicted = label_list[pred]
        label_gt = label_list[np.argmax(y_true[0])]
        acc_metrics.evaluate(
            [{'scene_label': label_gt, 'file': ''}],
            [{'scene_label': label_predicted, 'file': ''}])

    return acc_metrics


[docs]def tagging(Y_val, Y_predicted, label_list=[]):
    """ Calculate metrics for Audio Tagging

    Parameters
    ----------
    Y_val : list of ndarray
        2D array with the ground-truth event roll
        shape: (N_times, N_classes)
    Y_predicted : list of ndarray
        2D array with the predicted event roll
        shape: (N_times, N_classes)
    label_list:
        Label list.

    Returns
    -------
    sef_eval.scenes.AudioTaggingMetrics
        Object with the tagging results

    """
    _check_lists_for_evaluation(Y_val, Y_predicted)

    tagging_metrics = AudioTaggingMetrics(label_list)

    n_files = len(Y_val)

    for i in range(n_files):
        y_true = Y_val[i]
        pred = Y_predicted[i]
        pred = np.mean(pred, axis=0)

        tag_list_val = tag_probabilities_to_tag_list(
            y_true[0], label_list, threshold=0.5)
        tag_list_pred = tag_probabilities_to_tag_list(
            pred, label_list, threshold=0.5)

        tagging_metrics.evaluate(
            [{'tags': tag_list_val, 'file': ''}],
            [{'tags': tag_list_pred, 'file': ''}])

    return tagging_metrics