Source code for librosa.segment


# -*- coding: utf-8 -*-
"""
Temporal segmentation
=====================

Recurrence and self-similarity
------------------------------
.. autosummary::
    :toctree: generated/

    recurrence_matrix
    recurrence_to_lag
    lag_to_recurrence
    timelag_filter

Temporal clustering
-------------------
.. autosummary::
    :toctree: generated/

    agglomerative
    subsegment

Deprecated
----------
.. autosummary::
    :toctree: generated/

    structure_feature
"""

from decorator import decorator

import numpy as np
import scipy
import scipy.signal

import sklearn
import sklearn.cluster
import sklearn.feature_extraction

from . import cache
from . import util
from .util.exceptions import ParameterError

__all__ = ['recurrence_matrix',
           'recurrence_to_lag',
           'lag_to_recurrence',
           'timelag_filter',
           'agglomerative',
           'subsegment',
           # Deprecated functions
           'structure_feature']


@cache
def __band_infinite(n, width, v_in=0.0, v_out=np.inf, dtype=np.float32):
    '''Construct a square, banded matrix `X` where
    `X[i, j] == v_in` if `|i - j| <= width`
    `X[i, j] == v_out` if `|i - j| > width`

    This is used to suppress nearby links in `recurrence_matrix`.
    '''

    if width > n:
        raise ParameterError('width cannot exceed n')

    # Instantiate the matrix
    band = np.empty((n, n), dtype=dtype)

    # Fill the out-of-band values
    band.fill(v_out)

    # Fill the in-band values
    band[np.triu_indices_from(band, width)] = v_in
    band[np.tril_indices_from(band, -width)] = v_in

    return band


[docs]@cache def recurrence_matrix(data, k=None, width=1, metric='sqeuclidean', sym=False, axis=-1): '''Compute the binary recurrence matrix from a time-series. `rec[i,j] == True` if (and only if) (`data[:,i]`, `data[:,j]`) are k-nearest-neighbors and `|i-j| >= width` Parameters ---------- data : np.ndarray A feature matrix k : int > 0 [scalar] or None the number of nearest-neighbors for each sample Default: `k = 2 * ceil(sqrt(t - 2 * width + 1))`, or `k = 2` if `t <= 2 * width + 1` width : int >= 1 [scalar] only link neighbors `(data[:, i], data[:, j])` if `|i-j| >= width` metric : str Distance metric to use for nearest-neighbor calculation. See `scipy.spatial.distance.cdist` for details. sym : bool [scalar] set `sym=True` to only link mutual nearest-neighbors axis : int The axis along which to compute recurrence. By default, the last index (-1) is taken. Returns ------- rec : np.ndarray [shape=(t,t), dtype=bool] Binary recurrence matrix See Also -------- scipy.spatial.distance.cdist librosa.feature.stack_memory structure_feature Examples -------- Find nearest neighbors in MFCC space >>> y, sr = librosa.load(librosa.util.example_audio_file()) >>> mfcc = librosa.feature.mfcc(y=y, sr=sr) >>> R = librosa.segment.recurrence_matrix(mfcc) Or fix the number of nearest neighbors to 5 >>> R = librosa.segment.recurrence_matrix(mfcc, k=5) Suppress neighbors within +- 7 samples >>> R = librosa.segment.recurrence_matrix(mfcc, width=7) Use cosine similarity instead of Euclidean distance >>> R = librosa.segment.recurrence_matrix(mfcc, metric='cosine') Require mutual nearest neighbors >>> R = librosa.segment.recurrence_matrix(mfcc, sym=True) Plot the feature and recurrence matrices >>> import matplotlib.pyplot as plt >>> plt.figure(figsize=(10, 6)) >>> plt.subplot(1, 2, 1) >>> librosa.display.specshow(mfcc, x_axis='time') >>> plt.title('MFCC') >>> plt.subplot(1, 2, 2) >>> librosa.display.specshow(R, x_axis='time', y_axis='time', ... aspect='equal') >>> plt.title('MFCC recurrence (symmetric)') >>> plt.tight_layout() ''' data = np.atleast_2d(data) # Swap observations to the first dimension and flatten the rest data = np.swapaxes(data, axis, 0) t = data.shape[0] data = data.reshape((t, -1)) if width < 1: raise ParameterError('width must be at least 1') if k is None: if t > 2 * width + 1: k = 2 * np.ceil(np.sqrt(t - 2 * width + 1)) else: k = 2 k = int(k) # Build the distance matrix D = scipy.spatial.distance.cdist(data, data, metric=metric) # Max out the diagonal band D = D + __band_infinite(t, width) # build the recurrence plot rec = np.zeros((t, t), dtype=bool) # get the k nearest neighbors for each point for i in range(t): for j in np.argsort(D[i])[:k]: rec[i, j] = True # symmetrize if sym: rec = rec * rec.T return rec
[docs]def recurrence_to_lag(rec, pad=True, axis=-1): '''Convert a recurrence matrix into a lag matrix. `lag[i, j] == rec[i+j, j]` Parameters ---------- rec : np.ndarray, [shape=(n, n)] A (binary) recurrence matrix, as returned by `recurrence_matrix` pad : bool If False, `lag` matrix is square, which is equivalent to assuming that the signal repeats itself indefinitely. If True, `lag` is padded with `n` zeros, which eliminates the assumption of repetition. axis : int The axis along which to apply the recurrence-to-lag conversion Returns ------- lag : np.ndarray [shape=(2*n, n) or (n, n)] The recurrence matrix in (lag, time) coordinates Raises ------ ParameterError : if `rec` is non-square See Also -------- recurrence_matrix lag_to_recurrence Examples -------- >>> y, sr = librosa.load(librosa.util.example_audio_file()) >>> mfccs = librosa.feature.mfcc(y=y, sr=sr) >>> recurrence = librosa.segment.recurrence_matrix(mfccs) >>> lag_pad = librosa.segment.recurrence_to_lag(recurrence, pad=True) >>> lag_nopad = librosa.segment.recurrence_to_lag(recurrence, pad=False) >>> import matplotlib.pyplot as plt >>> plt.figure(figsize=(8, 4)) >>> plt.subplot(1, 2, 1) >>> librosa.display.specshow(lag_pad, x_axis='time', y_axis='lag') >>> plt.title('Lag (zero-padded)') >>> plt.subplot(1, 2, 2) >>> librosa.display.specshow(lag_nopad, x_axis='time', y_axis='time') >>> plt.title('Lag (no padding)') >>> plt.tight_layout() ''' axis = np.abs(axis) if rec.ndim != 2 or rec.shape[0] != rec.shape[1]: raise ParameterError('non-square recurrence matrix shape: ' '{}'.format(rec.shape)) t = rec.shape[axis] if pad: padding = [(0, 0), (0, 0)] padding[(1-axis)] = (0, t) lag = np.pad(rec, padding, mode='constant') else: lag = rec.copy() idx_slice = [Ellipsis] * lag.ndim for i in range(1, t): idx_slice[axis] = i lag[idx_slice] = np.roll(lag[idx_slice], -i) return np.ascontiguousarray(lag.T).T
[docs]def lag_to_recurrence(lag, axis=-1): '''Convert a lag matrix into a recurrence matrix. Parameters ---------- lag : np.ndarray [shape=(2*n, n) or (n, n)] A lag matrix, as produced by `recurrence_to_lag` axis : int The axis along which to apply the recurrence-to-lag conversion Returns ------- rec : np.ndarray [shape=(n, n)] A recurrence matrix in (time, time) coordinates Raises ------ ParameterError : if `lag` does not have the correct shape See Also -------- recurrence_to_lag Examples -------- >>> y, sr = librosa.load(librosa.util.example_audio_file()) >>> mfccs = librosa.feature.mfcc(y=y, sr=sr) >>> recurrence = librosa.segment.recurrence_matrix(mfccs) >>> lag_pad = librosa.segment.recurrence_to_lag(recurrence, pad=True) >>> lag_nopad = librosa.segment.recurrence_to_lag(recurrence, pad=False) >>> rec_pad = librosa.segment.lag_to_recurrence(lag_pad) >>> rec_nopad = librosa.segment.lag_to_recurrence(lag_nopad) >>> import matplotlib.pyplot as plt >>> plt.figure(figsize=(8, 4)) >>> plt.subplot(2, 2, 1) >>> librosa.display.specshow(lag_pad, x_axis='time', y_axis='lag') >>> plt.title('Lag (zero-padded)') >>> plt.subplot(2, 2, 2) >>> librosa.display.specshow(lag_nopad, x_axis='time', y_axis='time') >>> plt.title('Lag (no padding)') >>> plt.subplot(2, 2, 3) >>> librosa.display.specshow(rec_pad, x_axis='time', y_axis='time') >>> plt.title('Recurrence (with padding)') >>> plt.subplot(2, 2, 4) >>> librosa.display.specshow(rec_nopad, x_axis='time', y_axis='time') >>> plt.title('Recurrence (without padding)') >>> plt.tight_layout() ''' axis = np.abs(axis) if lag.ndim != 2 or (lag.shape[0] != lag.shape[1] and lag.shape[1 - axis] != 2 * lag.shape[axis]): raise ParameterError('Invalid lag matrix shape: {}'.format(lag.shape)) # Since lag must be 2-dimensional, abs(axis) = axis t = lag.shape[axis] lag = lag.copy() idx_slice = [Ellipsis] * lag.ndim for i in range(1, t): idx_slice[axis] = i lag[idx_slice] = np.roll(lag[idx_slice], i) sub_slice = [Ellipsis] * lag.ndim sub_slice[1 - axis] = slice(t) return np.ascontiguousarray(lag[sub_slice].T).T
[docs]def timelag_filter(function, pad=True, index=0): '''Filtering in the time-lag domain. This is primarily useful for adapting image filters to operate on `structure_feature` output. Using `timelag_filter` is equivalent to the following sequence of operations: >>> data_tl = librosa.segment.recurrence_to_lag(data) >>> data_filtered_tl = function(data_tl) >>> data_filtered = librosa.segment.lag_to_recurrence(data_filtered_tl) Parameters ---------- function : callable The filtering function to wrap, e.g., `scipy.ndimage.median_filter` pad : bool Whether to zero-pad the structure feature matrix index : int >= 0 If `function` accepts input data as a positional argument, it should be indexed by `index` Returns ------- wrapped_function : callable A new filter function which applies in time-lag space rather than time-time space. See Also -------- structure_feature Examples -------- Apply a 5-bin median filter to the diagonal of a recurrence matrix >>> y, sr = librosa.load(librosa.util.example_audio_file()) >>> mfcc = librosa.feature.mfcc(y=y, sr=sr) >>> rec = librosa.segment.recurrence_matrix(mfcc, sym=True) >>> from scipy.ndimage import median_filter >>> diagonal_median = librosa.segment.timelag_filter(median_filter) >>> rec_filtered = diagonal_median(rec, size=(1, 5), mode='mirror') >>> import matplotlib.pyplot as plt >>> plt.figure() >>> plt.subplot(1, 2, 1) >>> librosa.display.specshow(rec, x_axis='time', y_axis='time', ... aspect='equal') >>> plt.title('Raw recurrence matrix') >>> plt.subplot(1, 2, 2) >>> librosa.display.specshow(rec_filtered, x_axis='time', y_axis='time', ... aspect='equal') >>> plt.title('Filtered recurrence matrix') >>> plt.tight_layout() ''' @cache def __my_filter(wrapped_f, *args, **kwargs): '''Decorator to wrap the filter''' # Map the input data into time-lag space args = list(args) args[index] = recurrence_to_lag(args[index], pad=pad) # Apply the filtering function result = wrapped_f(*args, **kwargs) # Map back into time-time and return return lag_to_recurrence(result) return decorator(__my_filter, function)
[docs]@cache def subsegment(data, frames, n_segments=4, axis=-1): '''Sub-divide a segmentation by feature clustering. Given a set of frame boundaries (`frames`), and a data matrix (`data`), each successive interval defined by `frames` is partitioned into `n_segments` by constrained agglomerative clustering. .. note:: If an interval spans fewer than `n_segments` frames, then each frame becomes a sub-segment. Parameters ---------- data : np.ndarray Data matrix to use in clustering frames : np.ndarray [shape=(n_boundaries,)], dtype=int, non-negative] Array of beat or segment boundaries, as provided by `librosa.beat.beat_track`, `librosa.onset.onset_detect`, or `agglomerative`. n_segments : int > 0 Maximum number of frames to sub-divide each interval. axis : int Axis along which to apply the segmentation. By default, the last index (-1) is taken. Returns ------- boundaries : np.ndarray [shape=(n_subboundaries,)] List of sub-divided segment boundaries See Also -------- agglomerative : Temporal segmentation librosa.onset.onset_detect : Onset detection librosa.beat.beat_track : Beat tracking Examples -------- Load audio, detect beat frames, and subdivide in twos by CQT >>> y, sr = librosa.load(librosa.util.example_audio_file(), duration=15) >>> tempo, beats = librosa.beat.beat_track(y=y, sr=sr, hop_length=512) >>> cqt = librosa.cqt(y, sr=sr, hop_length=512) >>> subseg = librosa.segment.subsegment(cqt, beats, n_segments=2) >>> subseg array([ 0, 2, 4, 21, 23, 26, 43, 55, 63, 72, 83, 97, 102, 111, 122, 137, 142, 153, 162, 180, 182, 185, 202, 210, 221, 231, 241, 256, 261, 271, 281, 296, 301, 310, 320, 339, 341, 344, 361, 368, 382, 389, 401, 416, 420, 430, 436, 451, 456, 465, 476, 489, 496, 503, 515, 527, 535, 544, 553, 558, 571, 578, 590, 607, 609, 638]) >>> import matplotlib.pyplot as plt >>> plt.figure() >>> librosa.display.specshow(librosa.logamplitude(cqt**2, ... ref_power=np.max), ... y_axis='cqt_hz', x_axis='time') >>> plt.vlines(beats, 0, cqt.shape[0], color='r', alpha=0.5, ... label='Beats') >>> plt.vlines(subseg, 0, cqt.shape[0], color='b', linestyle='--', ... alpha=0.5, label='Sub-beats') >>> plt.legend(frameon=True, shadow=True) >>> plt.title('CQT + Beat and sub-beat markers') >>> plt.tight_layout() ''' frames = util.fix_frames(frames, x_min=0, x_max=data.shape[axis], pad=True) if n_segments < 1: raise ParameterError('n_segments must be a positive integer') boundaries = [] idx_slices = [Ellipsis] * data.ndim for seg_start, seg_end in zip(frames[:-1], frames[1:]): idx_slices[axis] = slice(seg_start, seg_end) boundaries.extend(seg_start + agglomerative(data[idx_slices], min(seg_end - seg_start, n_segments), axis=axis)) return np.ascontiguousarray(boundaries)
[docs]def agglomerative(data, k, clusterer=None, axis=-1): """Bottom-up temporal segmentation. Use a temporally-constrained agglomerative clustering routine to partition `data` into `k` contiguous segments. Parameters ---------- data : np.ndarray data to cluster k : int > 0 [scalar] number of segments to produce clusterer : sklearn.cluster.AgglomerativeClustering, optional An optional AgglomerativeClustering object. If `None`, a constrained Ward object is instantiated. axis : int axis along which to cluster. By default, the last axis (-1) is chosen. Returns ------- boundaries : np.ndarray [shape=(k,)] left-boundaries (frame numbers) of detected segments. This will always include `0` as the first left-boundary. See Also -------- sklearn.cluster.AgglomerativeClustering Examples -------- Cluster by chroma similarity, break into 20 segments >>> y, sr = librosa.load(librosa.util.example_audio_file(), duration=15) >>> chroma = librosa.feature.chroma_cqt(y=y, sr=sr) >>> boundary_frames = librosa.segment.agglomerative(chroma, 20) >>> librosa.frames_to_time(boundary_frames, sr=sr) array([ 0. , 1.672, 2.322, 2.624, 3.251, 3.506, 4.18 , 5.387, 6.014, 6.293, 6.943, 7.198, 7.848, 9.033, 9.706, 9.961, 10.635, 10.89 , 11.54 , 12.539]) Plot the segmentation against the spectrogram >>> import matplotlib.pyplot as plt >>> plt.figure() >>> S = np.abs(librosa.stft(y))**2 >>> librosa.display.specshow(librosa.logamplitude(S, ref_power=np.max), ... y_axis='log', x_axis='time') >>> plt.vlines(boundary_frames, 0, S.shape[0], color='r', alpha=0.9, ... label='Segment boundaries') >>> plt.legend(frameon=True, shadow=True) >>> plt.title('Power spectrogram') >>> plt.tight_layout() """ # Make sure we have at least two dimensions data = np.atleast_2d(data) # Swap data index to position 0 data = np.swapaxes(data, axis, 0) # Flatten the features n = data.shape[0] data = data.reshape((n, -1)) if clusterer is None: # Connect the temporal connectivity graph grid = sklearn.feature_extraction.image.grid_to_graph(n_x=n, n_y=1, n_z=1) # Instantiate the clustering object clusterer = sklearn.cluster.AgglomerativeClustering(n_clusters=k, connectivity=grid, memory=cache) # Fit the model clusterer.fit(data) # Find the change points from the labels boundaries = [0] boundaries.extend( list(1 + np.nonzero(np.diff(clusterer.labels_))[0].astype(int))) return np.asarray(boundaries)
# Deprecated functions
[docs]@util.decorators.deprecated('0.4', '0.5') @cache def structure_feature(rec, pad=True, inverse=False): '''Compute the structure feature from a recurrence matrix. The i'th column of the recurrence matrix is shifted up by i. The resulting matrix is indexed horizontally by time, and vertically by lag. .. warning:: Deprected in librosa 0.4 Functionality is superseded by `librosa.segment.recurrence_to_lag` and `librosa.segment.lag_to_recurrence`. Parameters ---------- rec : np.ndarray [shape=(t,t) or shape=(2*t, t)] recurrence matrix or pre-computed structure feature pad : bool [scalar] Pad the matrix with `t` rows of zeros to avoid looping. inverse : bool [scalar] Unroll the opposite direction. This is useful for converting structure features back into recurrence plots. .. note: Reversing with `pad==True` will truncate the inferred padding. Returns ------- struct : np.ndarray [shape=(2*t, t) or shape=(t, t)] `struct[i, t]` = the recurrence at time `t` with lag `i`. .. note:: negative lag values are supported by wrapping to the end of the array. See Also -------- recurrence_matrix : build a recurrence matrix from feature vectors Examples -------- Build the structure feature over mfcc similarity >>> y, sr = librosa.load(librosa.util.example_audio_file()) >>> mfccs = librosa.feature.mfcc(y=y, sr=sr) >>> recurrence = librosa.segment.recurrence_matrix(mfccs) >>> struct = librosa.segment.structure_feature(recurrence) Invert the structure feature to get a recurrence matrix >>> recurrence_2 = librosa.segment.structure_feature(struct, ... inverse=True) Display recurrence in time-time and time-lag space >>> import matplotlib.pyplot as plt >>> plt.figure(figsize=(10, 5)) >>> plt.subplot(1, 2, 1) >>> librosa.display.specshow(recurrence, aspect='equal', x_axis='time', ... y_axis='time') >>> plt.ylabel('Time') >>> plt.title('Recurrence (time-time)') >>> plt.subplot(1, 2, 2) >>> librosa.display.specshow(struct, aspect='auto', x_axis='time') >>> plt.ylabel('Lag') >>> plt.title('Structure feature') >>> plt.tight_layout() ''' if inverse: return lag_to_recurrence(rec) else: return recurrence_to_lag(rec, pad=pad)