diff --git a/madmom/audio/cepstrogram.py b/madmom/audio/cepstrogram.py index c8cfc0142..4b1485c15 100644 --- a/madmom/audio/cepstrogram.py +++ b/madmom/audio/cepstrogram.py @@ -10,7 +10,6 @@ from __future__ import absolute_import, division, print_function import inspect -import math from functools import partial import numpy as np @@ -125,9 +124,9 @@ def process(self, data, **kwargs): MFCC_NORM_FILTERS = True MFCC_MUL = 1. MFCC_ADD = np.spacing(1) -MFCC_DCT_NORM = "ortho" +MFCC_DCT_NORM = 'ortho' MFCC_DELTA_FILTER = np.linspace(4, -4, 9) / 60 -MFCC_DELTADELTA_FILTER = np.linspace(1, -1, 3) / 2 +MFCC_DELTA_DELTA_FILTER = np.linspace(1, -1, 3) / 2 class MFCC(Cepstrogram): @@ -159,7 +158,7 @@ class MFCC(Cepstrogram): Add this value before taking the logarithm of the magnitudes. dct_norm : {'ortho', None}, optional Normalization mode (see scipy.fftpack.dct). Default is 'ortho'. - kwargs : dict + kwargs : dict, optional If no :class:`.audio.spectrogram.Spectrogram` instance was given, one is instantiated and these keyword arguments are passed. @@ -234,7 +233,7 @@ def __new__(cls, spectrogram, filterbank=MelFilterbank, @staticmethod def calc_deltas(data, delta_filter): """ - Applies the given filter to the data after automatically padding by + Apply the given filter to the data after automatically padding by replicating the first and last frame. The length of the padding is calculated via ceil(len(delta_filter)). @@ -245,106 +244,111 @@ def calc_deltas(data, delta_filter): Parameters ---------- data: numpy array - containing the data to process + Data to process, i.e. MFCCs or deltas thereof. delta_filter: numpy array - the filter used for convolution + Filter used for convolution. Returns ------- deltas: numpy array - containing the deltas, has the same shape as data + Deltas of `data`, same shape as `data`. + """ - # prepare vectorized convolve function - # (requires transposed matrices in our use case) - vconv = np.vectorize(partial(np.convolve, mode="same"), - signature='(n),(m)->(k)') # pad data by replicating the first and the last frame - k = int(math.ceil(len(delta_filter) / 2)) - padded = np.vstack((np.array([data[0], ] * k), - data, + k = int(np.ceil(len(delta_filter) / 2)) + padded = np.vstack((np.array([data[0], ] * k), data, np.array([data[-1], ] * k))) # calculate the deltas for each coefficient - deltas = vconv(padded.transpose(), delta_filter) - return deltas.transpose()[k:-k] + deltas = [] + for band in padded.T: + deltas.append(np.convolve(band, delta_filter, 'same')) + # return deltas (first/last k frames truncated) + return np.vstack(deltas).T[k:-k] @lazyprop def deltas(self, delta_filter=MFCC_DELTA_FILTER): """ - Return the derivative of this MFCC's coefficients by convolving with - a filter. Accessing this property corresponds to the function call - ``MFCC.calc_deltas(self, delta_filter)``. However, using this property, - the result is calculated only once and cached for later access. - See ``@lazyprop``for further details. + First order derivative of the MFCCs. Parameters ---------- delta_filter: numpy array, optional - the filter used for convolution, defaults to MFCC_DELTA_FILTER + Filter to calculate the derivative of the MFCCs. Returns ------- deltas: numpy array - containing the deltas, has the same shape as self + Deltas of the MFCCs, same shape as MFCCs. + + Notes + ----- + Accessing this property corresponds to the function call + ``MFCC.calc_deltas(mfccs, delta_filter)``, with results being cached. + """ return MFCC.calc_deltas(self, delta_filter) @lazyprop - def deltadeltas(self, deltadelta_filter=MFCC_DELTADELTA_FILTER): + def delta_deltas(self, delta_delta_filter=MFCC_DELTA_DELTA_FILTER): """ - Return the second order derivative of this MFCC's coefficients by - convolving with a filter. Accessing this property corresponds to the - function call ``MFCC.calc_deltas(self, deltadelta_filter)``. However, - using this property, the result is calculated only once and cached - for later access. See ``@lazyprop``for further details. + Second order derivatives of the MFCCs. Parameters ---------- - delta_filter: numpy array, optional - the filter used for convolution, defaults to MFCC_DELTA_FILTER + delta_delta_filter: numpy array, optional + Filter to calculate the derivative of the derivative. Returns ------- deltas: numpy array - containing the deltas, has the same shape as self + Delta deltas of the MFCCs, same shape as MFCCs. + + Notes + ----- + Accessing this property corresponds to the function call + ``MFCC.calc_deltas(deltas, delta_delta_filter)``, with results being + cached. + """ - return MFCC.calc_deltas(self.deltas, deltadelta_filter) + return MFCC.calc_deltas(self.deltas, delta_delta_filter) def calc_voicebox_deltas(self, delta_filter=MFCC_DELTA_FILTER, - ddelta_filter=MFCC_DELTADELTA_FILTER): + delta_delta_filter=MFCC_DELTA_DELTA_FILTER): """ - Method to calculate deltas and deltadeltas the way it is done in the - voicebox MatLab toolbox. - - see http://www.ee.ic.ac.uk/hp/staff/dmb/voicebox/voicebox.html + Calculates deltas and delta deltas the way it is done in the voicebox + MatLab toolbox [1]_. Parameters ---------- delta_filter : numpy array - filter to calculate the derivative of this MFCC's data - ddelta_filter : numpy array - filter to calculate the derivative of the derivative + Filter to calculate the derivative of the MFCCs. + delta_delta_filter : numpy array + Filter to calculate the derivative of the derivative. Returns ------- - [self, deltas, deltadeltas] : numpy array, shape (|frames|, |bands|*3) - a horizontally stacked np array consisting of the MFCC coefficients - its derivative and the derivative of second order + [mfcc, delta, delta_delta] : numpy array, shape (num_frames, bands * 3) + Horizontally stacked array consisting of the MFCC coefficients, + their first and second order derivatives. + + References + ---------- + .. [1] http://www.ee.ic.ac.uk/hp/staff/dmb/voicebox/voicebox.html + """ padded_input = np.vstack( (np.array([self[0], ] * 5), self, np.array([self[-1], ] * 5))) deltashape = tuple(reversed(padded_input.shape)) flat_input = padded_input.transpose().flatten() - - deltas = np.convolve(flat_input, delta_filter, mode="same") \ - .reshape(deltashape).T[4:-4, ] + deltas = np.convolve(flat_input, delta_filter, mode='same') + deltas = deltas.reshape(deltashape).T[4:-4, ] deltadeltashape = tuple(reversed(deltas.shape)) flat_deltas = deltas.transpose().flatten() deltas = deltas[1:-1, ] - - deltadeltas = np.convolve(flat_deltas, ddelta_filter, mode="same") \ - .reshape(deltadeltashape).T[1:-1, ] - - return np.hstack((self, deltas, deltadeltas)) + delta_deltas = np.convolve(flat_deltas, delta_delta_filter, + mode='same') + delta_deltas = delta_deltas.reshape(deltadeltashape).T[1:-1, ] + return np.hstack((self, deltas, delta_deltas)) def __array_finalize__(self, obj): if obj is None: @@ -358,9 +362,8 @@ def __array_finalize__(self, obj): class MFCCProcessor(Processor): """ - MFCCProcessor is CepstrogramProcessor which filters the magnitude - spectrogram of the spectrogram with a Mel filterbank, takes the logarithm - and performs a discrete cosine transform afterwards. + MFCCProcessor filters the magnitude spectrogram with a Mel filterbank, + takes the logarithm and performs a discrete cosine transform afterwards. Parameters ---------- @@ -377,8 +380,6 @@ class MFCCProcessor(Processor): logarithm. add : float, optional Add this value before taking the logarithm of the magnitudes. - transform : numpy ufunc - Transformation applied to the Mel filtered spectrogram. """ @@ -403,7 +404,7 @@ def process(self, data, **kwargs): ---------- data : numpy array Data to be processed (a spectrogram). - kwargs : dict + kwargs : dict, optional Keyword arguments passed to :class:`MFCC`. Returns diff --git a/tests/test_audio_mfcc.py b/tests/test_audio_cepstrogram.py similarity index 64% rename from tests/test_audio_mfcc.py rename to tests/test_audio_cepstrogram.py index 65ecb5254..ed7569896 100644 --- a/tests/test_audio_mfcc.py +++ b/tests/test_audio_cepstrogram.py @@ -17,59 +17,56 @@ from . import AUDIO_PATH sample_file = pj(AUDIO_PATH, 'sample.wav') -sample_file_22050 = pj(AUDIO_PATH, 'sample_22050.wav') class TestMFCCClass(unittest.TestCase): + + def setUp(self): + self.mfcc = MFCC(sample_file) + def test_types(self): - result = MFCC(sample_file) - self.assertIsInstance(result, MFCC) - self.assertIsInstance(result, Cepstrogram) + self.assertIsInstance(self.mfcc, MFCC) + self.assertIsInstance(self.mfcc, Cepstrogram) # attributes - self.assertIsInstance(result.filterbank, MelFilterbank) + self.assertIsInstance(self.mfcc.filterbank, MelFilterbank) # properties - self.assertIsInstance(result.deltas, np.ndarray) - self.assertIsInstance(result.deltadeltas, np.ndarray) - self.assertIsInstance(result.num_bins, int) - self.assertIsInstance(result.num_frames, int) + self.assertIsInstance(self.mfcc.deltas, np.ndarray) + self.assertIsInstance(self.mfcc.delta_deltas, np.ndarray) + self.assertIsInstance(self.mfcc.num_bins, int) + self.assertIsInstance(self.mfcc.num_frames, int) # wrong filterbank type with self.assertRaises(TypeError): FilteredSpectrogram(sample_file, filterbank='bla') def test_values(self): - # from file - result = MFCC(sample_file) allclose = partial(np.allclose, rtol=1.e-3, atol=1.e-5) - self.assertTrue(allclose(result[0, :6], + # values + self.assertTrue(allclose(self.mfcc[0, :6], [-3.61102366, 6.81075716, 2.55457568, 1.88377929, 1.04133379, 0.6382336])) - self.assertTrue(allclose(result[0, -6:], + self.assertTrue(allclose(self.mfcc[0, -6:], [-0.20386486, -0.18468723, -0.00233107, 0.20703268, 0.21419463, 0.00598407])) # attributes - self.assertTrue(result.shape == (281, 30)) - + self.assertTrue(self.mfcc.shape == (281, 30)) # properties - self.assertEqual(result.num_bins, 30) - self.assertEqual(result.num_frames, 281) + self.assertEqual(self.mfcc.num_bins, 30) + self.assertEqual(self.mfcc.num_frames, 281) def test_deltas(self): - # from file - result = MFCC(sample_file) allclose = partial(np.allclose, rtol=1.e-2, atol=1.e-4) - # don't compare first element because it is dependent on the # padding used for filtering - self.assertTrue(allclose(result.deltas[1, :6], + self.assertTrue(allclose(self.mfcc.deltas[1, :6], [-0.02286286, -0.11329014, 0.05381977, 0.10438456, 0.04268386, -0.06839912])) - self.assertTrue(allclose(result.deltas[1, -6:], + self.assertTrue(allclose(self.mfcc.deltas[1, -6:], [-0.03156065, -0.019716, -0.03417692, -0.07768068, -0.05539324, -0.02616282])) - - self.assertTrue(allclose(result.deltadeltas[1, :6], + # delta deltas + self.assertTrue(allclose(self.mfcc.delta_deltas[1, :6], [-0.00804922, -0.009922, -0.00454391, 0.0038989, 0.00254525, 0.0120557])) - self.assertTrue(allclose(result.deltadeltas[1, -6:], + self.assertTrue(allclose(self.mfcc.delta_deltas[1, -6:], [0.0072148, 0.00094424, 0.00029913, 0.00530994, 0.00184207, -0.00276511]))