# -*- coding: utf-8 -*-
"""Core IO, DSP and utility functions."""
import os
import warnings
import six
import audioread
import numpy as np
import scipy.signal
import scipy.fftpack as fft
from .time_frequency import frames_to_samples, time_to_samples
from .. import cache
from .. import util
from ..util.exceptions import ParameterError
__all__ = ['load', 'to_mono', 'resample', 'get_duration',
'autocorrelate', 'zero_crossings', 'clicks',
# Deprecated functions
'peak_pick', 'localmax']
# Resampling bandwidths as percentage of Nyquist
# http://www.mega-nerd.com/SRC/api_misc.html#Converters
BW_BEST = 0.97
BW_MEDIUM = 0.9
BW_FASTEST = 0.8
# Do we have scikits.samplerate?
try:
# Pylint won't handle dynamic imports, so we suppress this warning
import scikits.samplerate as samplerate # pylint: disable=import-error
_HAS_SAMPLERATE = True
except ImportError:
warnings.warn('Could not import scikits.samplerate. '
'Falling back to scipy.signal')
_HAS_SAMPLERATE = False
# -- CORE ROUTINES --#
# Load should never be cached, since we cannot verify that the contents of
# 'path' are unchanged across calls.
[docs]def load(path, sr=22050, mono=True, offset=0.0, duration=None,
dtype=np.float32):
"""Load an audio file as a floating point time series.
Parameters
----------
path : string
path to the input file.
Any format supported by `audioread` will work.
sr : number > 0 [scalar]
target sampling rate
'None' uses the native sampling rate
mono : bool
convert signal to mono
offset : float
start reading after this time (in seconds)
duration : float
only load up to this much audio (in seconds)
dtype : numeric type
data type of `y`
Returns
-------
y : np.ndarray [shape=(n,) or (2, n)]
audio time series
sr : number > 0 [scalar]
sampling rate of `y`
Examples
--------
>>> # Load a wav file
>>> filename = librosa.util.example_audio_file()
>>> y, sr = librosa.load(filename)
>>> y
array([ -4.756e-06, -6.020e-06, ..., -1.040e-06, 0.000e+00], dtype=float32)
>>> sr
22050
>>> # Load a wav file and resample to 11 KHz
>>> filename = librosa.util.example_audio_file()
>>> y, sr = librosa.load(filename, sr=11025)
>>> y
array([ -2.077e-06, -2.928e-06, ..., -4.395e-06, 0.000e+00], dtype=float32)
>>> sr
11025
>>> # Load 5 seconds of a wav file, starting 15 seconds in
>>> filename = librosa.util.example_audio_file()
>>> y, sr = librosa.load(filename, offset=15.0, duration=5.0)
>>> y
array([ 0.069, 0.1 , ..., -0.101, 0. ], dtype=float32)
>>> sr
22050
"""
y = []
with audioread.audio_open(os.path.realpath(path)) as input_file:
sr_native = input_file.samplerate
n_channels = input_file.channels
s_start = int(np.round(sr_native * offset)) * n_channels
if duration is None:
s_end = np.inf
else:
s_end = s_start + (int(np.round(sr_native * duration))
* n_channels)
n = 0
for frame in input_file:
frame = util.buf_to_float(frame, dtype=dtype)
n_prev = n
n = n + len(frame)
if n < s_start:
# offset is after the current frame
# keep reading
continue
if s_end < n_prev:
# we're off the end. stop reading
break
if s_end < n:
# the end is in this frame. crop.
frame = frame[:s_end - n_prev]
if n_prev <= s_start <= n:
# beginning is in this frame
frame = frame[(s_start - n_prev):]
# tack on the current frame
y.append(frame)
if y:
y = np.concatenate(y)
if n_channels > 1:
y = y.reshape((-1, 2)).T
if mono:
y = to_mono(y)
if sr is not None:
y = resample(y, sr_native, sr)
else:
sr = sr_native
# Final cleanup for dtype and contiguity
y = np.ascontiguousarray(y, dtype=dtype)
return (y, sr)
[docs]@cache
def to_mono(y):
'''Force an audio signal down to mono.
Parameters
----------
y : np.ndarray [shape=(2,n) or shape=(n,)]
audio time series, either stereo or mono
Returns
-------
y_mono : np.ndarray [shape=(n,)]
`y` as a monophonic time-series
Examples
--------
>>> y, sr = librosa.load(librosa.util.example_audio_file(), mono=False)
>>> y.shape
(2, 1355168)
>>> y_mono = librosa.to_mono(y)
>>> y_mono.shape
(1355168,)
'''
# Validate the buffer. Stereo is ok here.
util.valid_audio(y, mono=False)
if y.ndim > 1:
y = np.mean(y, axis=0)
return y
[docs]@cache
def resample(y, orig_sr, target_sr, res_type='sinc_best', fix=True, scale=False, **kwargs):
"""Resample a time series from orig_sr to target_sr
Parameters
----------
y : np.ndarray [shape=(n,) or shape=(2, n)]
audio time series. Can be mono or stereo.
orig_sr : number > 0 [scalar]
original sampling rate of `y`
target_sr : number > 0 [scalar]
target sampling rate
res_type : str
resample type (see note)
.. note::
If `scikits.samplerate` is installed, `resample`
will use `res_type`.
Otherwise, fall back on `scipy.signal.resample`.
To force use of `scipy.signal.resample`, set `res_type='scipy'`.
fix : bool
adjust the length of the resampled signal to be of size exactly
`ceil(target_sr * len(y) / orig_sr)`
scale : bool
Scale the resampled signal so that `y` and `y_hat` have approximately
equal total energy.
kwargs : additional keyword arguments
If `fix==True`, additional keyword arguments to pass to
`librosa.util.fix_length`.
Returns
-------
y_hat : np.ndarray [shape=(n * target_sr / orig_sr,)]
`y` resampled from `orig_sr` to `target_sr`
See Also
--------
librosa.util.fix_length
scipy.signal.resample
Examples
--------
Downsample from 22 KHz to 8 KHz
>>> y, sr = librosa.load(librosa.util.example_audio_file(), sr=22050)
>>> y_8k = librosa.resample(y, sr, 8000)
>>> y.shape, y_8k.shape
((1355168,), (491671,))
"""
if y.ndim > 1:
return np.vstack([resample(yi, orig_sr, target_sr,
res_type=res_type,
fix=fix,
**kwargs)
for yi in y])
# First, validate the audio buffer
util.valid_audio(y, mono=True)
if orig_sr == target_sr:
return y
ratio = float(target_sr) / orig_sr
n_samples = int(np.ceil(y.shape[-1] * ratio))
scipy_resample = (res_type == 'scipy')
if _HAS_SAMPLERATE and not scipy_resample:
y_hat = samplerate.resample(y.T, ratio, res_type).T
else:
y_hat = scipy.signal.resample(y, n_samples, axis=-1)
if fix:
y_hat = util.fix_length(y_hat, n_samples, **kwargs)
if scale:
y_hat /= np.sqrt(ratio)
return np.ascontiguousarray(y_hat, dtype=y.dtype)
[docs]def get_duration(y=None, sr=22050, S=None, n_fft=2048, hop_length=512,
center=True):
"""Compute the duration (in seconds) of an audio time series or STFT matrix.
Examples
--------
>>> # Load the example audio file
>>> y, sr = librosa.load(librosa.util.example_audio_file())
>>> librosa.get_duration(y=y, sr=sr)
61.44
>>> # Or compute duration from an STFT matrix
>>> y, sr = librosa.load(librosa.util.example_audio_file())
>>> S = librosa.stft(y)
>>> librosa.get_duration(S=S, sr=sr)
61.44
>>> # Or a non-centered STFT matrix
>>> S_left = librosa.stft(y, center=False)
>>> librosa.get_duration(S=S_left, sr=sr)
61.3471201814059
Parameters
----------
y : np.ndarray [shape=(n,), (2, n)] or None
audio time series
sr : number > 0 [scalar]
audio sampling rate of `y`
S : np.ndarray [shape=(d, t)] or None
STFT matrix, or any STFT-derived matrix (e.g., chromagram
or mel spectrogram).
n_fft : int > 0 [scalar]
FFT window size for `S`
hop_length : int > 0 [ scalar]
number of audio samples between columns of `S`
center : boolean
- If `True`, `S[:, t]` is centered at `y[t * hop_length]`
- If `False`, then `S[:, t]` begins at `y[t * hop_length]`
Returns
-------
d : float >= 0
Duration (in seconds) of the input time series or spectrogram.
"""
if y is None:
assert S is not None
n_frames = S.shape[1]
n_samples = n_fft + hop_length * (n_frames - 1)
# If centered, we lose half a window from each end of S
if center:
n_samples = n_samples - 2 * int(n_fft / 2)
else:
# Validate the audio buffer. Stereo is okay here.
util.valid_audio(y, mono=False)
if y.ndim == 1:
n_samples = len(y)
else:
n_samples = y.shape[-1]
return float(n_samples) / sr
[docs]@cache
def autocorrelate(y, max_size=None, axis=-1):
"""Bounded auto-correlation
Parameters
----------
y : np.ndarray
array to autocorrelate
max_size : int > 0 or None
maximum correlation lag.
If unspecified, defaults to `y.shape[axis]` (unbounded)
axis : int
The axis along which to autocorrelate.
By default, the last axis (-1) is taken.
Returns
-------
z : np.ndarray
truncated autocorrelation `y*y` along the specified axis.
If `max_size` is specified, then `z.shape[axis]` is bounded
to `max_size`.
Examples
--------
Compute full autocorrelation of y
>>> y, sr = librosa.load(librosa.util.example_audio_file())
>>> librosa.autocorrelate(y)
array([ 1.584e+04, 1.580e+04, ..., -1.154e-10, -2.725e-13])
Compute onset strength auto-correlation up to 4 seconds
>>> import matplotlib.pyplot as plt
>>> odf = librosa.onset.onset_strength(y=y, sr=sr, hop_length=512)
>>> ac = librosa.autocorrelate(odf, max_size=4* sr / 512)
>>> plt.plot(ac)
>>> plt.title('Auto-correlation')
>>> plt.xlabel('Lag (frames)')
"""
if max_size is None:
max_size = y.shape[axis]
max_size = int(min(max_size, y.shape[axis]))
# Compute the power spectrum along the chosen axis
# Pad out the signal to support full-length auto-correlation.
powspec = np.abs(fft.fft(y, n=2 * y.shape[axis] + 1, axis=axis))**2
# Convert back to time domain
autocorr = fft.ifft(powspec, axis=axis, overwrite_x=True)
# Slice down to max_size
subslice = [Ellipsis] * autocorr.ndim
subslice[axis] = slice(max_size)
autocorr = autocorr[subslice]
if not np.iscomplexobj(y):
autocorr = autocorr.real
return autocorr
[docs]@cache
def zero_crossings(y, threshold=1e-10, ref_magnitude=None, pad=True,
zero_pos=True, axis=-1):
'''Find the zero-crossings of a signal `y`: indices `i` such that
`sign(y[i]) != sign(y[j])`.
If `y` is multi-dimensional, then zero-crossings are computed along
the specified `axis`.
Examples
--------
>>> # Generate a time-series
>>> y = np.sin(np.linspace(0, 4 * 2 * np.pi, 20))
>>> y
array([ 0.000e+00, 9.694e-01, 4.759e-01, -7.357e-01,
-8.372e-01, 3.247e-01, 9.966e-01, 1.646e-01,
-9.158e-01, -6.142e-01, 6.142e-01, 9.158e-01,
-1.646e-01, -9.966e-01, -3.247e-01, 8.372e-01,
7.357e-01, -4.759e-01, -9.694e-01, -9.797e-16])
>>> # Compute zero-crossings
>>> z = librosa.zero_crossings(y)
>>> z
array([ True, False, False, True, False, True, False, False,
True, False, True, False, True, False, False, True,
False, True, False, True], dtype=bool)
>>> # Stack y against the zero-crossing indicator
>>> np.vstack([y, z]).T
array([[ 0.000e+00, 1.000e+00],
[ 9.694e-01, 0.000e+00],
[ 4.759e-01, 0.000e+00],
[ -7.357e-01, 1.000e+00],
[ -8.372e-01, 0.000e+00],
[ 3.247e-01, 1.000e+00],
[ 9.966e-01, 0.000e+00],
[ 1.646e-01, 0.000e+00],
[ -9.158e-01, 1.000e+00],
[ -6.142e-01, 0.000e+00],
[ 6.142e-01, 1.000e+00],
[ 9.158e-01, 0.000e+00],
[ -1.646e-01, 1.000e+00],
[ -9.966e-01, 0.000e+00],
[ -3.247e-01, 0.000e+00],
[ 8.372e-01, 1.000e+00],
[ 7.357e-01, 0.000e+00],
[ -4.759e-01, 1.000e+00],
[ -9.694e-01, 0.000e+00],
[ -9.797e-16, 1.000e+00]])
>>> # Find the indices of zero-crossings
>>> np.nonzero(z)
(array([ 0, 3, 5, 8, 10, 12, 15, 17, 19]),)
Parameters
----------
y : np.ndarray
The input array
threshold : float > 0 or None
If specified, values where `-threshold <= y <= threshold` are
clipped to 0.
ref_magnitude : float > 0 or callable
If numeric, the threshold is scaled relative to `ref_magnitude`.
If callable, the threshold is scaled relative to
`ref_magnitude(np.abs(y))`.
pad : boolean
If `True`, then `y[0]` is considered a valid zero-crossing.
zero_pos : boolean
If `True` then the value 0 is interpreted as having positive sign.
If `False`, then 0, -1, and +1 all have distinct signs.
axis : int
Axis along which to compute zero-crossings.
Returns
-------
zero_crossings : np.ndarray [shape=y.shape, dtype=boolean]
Indicator array of zero-crossings in `y` along the selected axis.
'''
# Clip within the threshold
if threshold is None:
threshold = 0.0
if six.callable(ref_magnitude):
threshold = threshold * ref_magnitude(np.abs(y))
elif ref_magnitude is not None:
threshold = threshold * ref_magnitude
if threshold > 0:
y = y.copy()
y[np.abs(y) <= threshold] = 0
# Extract the sign bit
if zero_pos:
y_sign = np.signbit(y)
else:
y_sign = np.sign(y)
# Find the change-points by slicing
slice_pre = [slice(None)] * y.ndim
slice_pre[axis] = slice(1, None)
slice_post = [slice(None)] * y.ndim
slice_post[axis] = slice(-1)
# Since we've offset the input by one, pad back onto the front
padding = [(0, 0)] * y.ndim
padding[axis] = (1, 0)
return np.pad((y_sign[slice_post] != y_sign[slice_pre]),
padding,
mode='constant',
constant_values=pad)
[docs]@cache
def clicks(times=None, frames=None, sr=22050, hop_length=512,
click_freq=1000.0, click_duration=0.1, click=None, length=None):
"""Returns a signal with the signal `click` placed at each specified time
Parameters
----------
times : np.ndarray or None
times to place clicks, in seconds
frames : np.ndarray or None
frame indices to place clicks
sr : number > 0
desired sampling rate of the output signal
hop_length : int > 0
if positions are specified by `frames`, the number of samples between frames.
click_freq : float > 0
frequency (in Hz) of the default click signal. Default is 1KHz.
click_duration : float > 0
duration (in seconds) of the default click signal. Default is 100ms.
click : np.ndarray or None
optional click signal sample to use instead of the default blip.
length : int > 0
desired number of samples in the output signal
Returns
-------
click_signal : np.ndarray
Synthesized click signal
Raises
------
ParameterError
- If neither `times` nor `frames` are provided.
- If any of `click_freq`, `click_duration`, or `length` are out of range.
Examples
--------
>>> # Sonify detected beat events
>>> y, sr = librosa.load(librosa.util.example_audio_file())
>>> tempo, beats = librosa.beat.beat_track(y=y, sr=sr)
>>> y_beats = librosa.clicks(frames=beats, sr=sr)
>>> # Or generate a signal of the same length as y
>>> y_beats = librosa.clicks(frames=beats, sr=sr, length=len(y))
>>> # Or use timing instead of frame indices
>>> times = librosa.frames_to_time(beats, sr=sr)
>>> y_beat_times = librosa.clicks(times=times, sr=sr)
>>> # Or with a click frequency of 880Hz and a 500ms sample
>>> y_beat_times880 = librosa.clicks(times=times, sr=sr,
... click_freq=880, click_duration=0.5)
"""
# Compute sample positions from time or frames
if times is None:
if frames is None:
raise ParameterError('either "times" or "frames" must be provided')
positions = frames_to_samples(frames, hop_length=hop_length)
else:
# Convert times to positions
positions = time_to_samples(times, sr=sr)
if click is not None:
# Check that we have a well-formed audio buffer
util.valid_audio(click, mono=True)
else:
# Create default click signal
if click_duration <= 0:
raise ParameterError('click_duration must be strictly positive')
if click_freq <= 0:
raise ParameterError('click_freq must be strictly positive')
angular_freq = 2 * np.pi * click_freq / float(sr)
click = np.logspace(0, -10,
num=int(np.round(sr * click_duration)),
base=2.0)
click *= np.sin(angular_freq * np.arange(len(click)))
# Set default length
if length is None:
length = positions.max() + click.shape[0]
else:
if length < 1:
raise ParameterError('length must be a positive integer')
# Filter out any positions past the length boundary
positions = positions[positions < length]
# Pre-allocate click signal
click_signal = np.zeros(length, dtype=np.float32)
# Place clicks
for start in positions:
# Compute the end-point of this click
end = start + click.shape[0]
if end >= length:
click_signal[start:] += click[:length - start]
else:
# Normally, just add a click here
click_signal[start:end] += click
return click_signal
# Moved/deprecated functions
peak_pick = util.decorators.moved('librosa.core.peak_pick',
'0.4', '0.5')(util.peak_pick)
localmax = util.decorators.moved('librosa.core.localmax',
'0.4', '0.5')(util.localmax)