Skip to content

Commit

Permalink
docstring, formatting and Numpy 1.8 compatibility fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
Sebastian Böck authored and Sebastian Böck committed Dec 6, 2018
1 parent 6b70da4 commit b5fa495
Show file tree
Hide file tree
Showing 2 changed files with 82 additions and 84 deletions.
119 changes: 60 additions & 59 deletions madmom/audio/cepstrogram.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
from __future__ import absolute_import, division, print_function

import inspect
import math
from functools import partial

import numpy as np
Expand Down Expand Up @@ -125,9 +124,9 @@ def process(self, data, **kwargs):
MFCC_NORM_FILTERS = True
MFCC_MUL = 1.
MFCC_ADD = np.spacing(1)
MFCC_DCT_NORM = "ortho"
MFCC_DCT_NORM = 'ortho'
MFCC_DELTA_FILTER = np.linspace(4, -4, 9) / 60
MFCC_DELTADELTA_FILTER = np.linspace(1, -1, 3) / 2
MFCC_DELTA_DELTA_FILTER = np.linspace(1, -1, 3) / 2


class MFCC(Cepstrogram):
Expand Down Expand Up @@ -159,7 +158,7 @@ class MFCC(Cepstrogram):
Add this value before taking the logarithm of the magnitudes.
dct_norm : {'ortho', None}, optional
Normalization mode (see scipy.fftpack.dct). Default is 'ortho'.
kwargs : dict
kwargs : dict, optional
If no :class:`.audio.spectrogram.Spectrogram` instance was given, one
is instantiated and these keyword arguments are passed.
Expand Down Expand Up @@ -234,7 +233,7 @@ def __new__(cls, spectrogram, filterbank=MelFilterbank,
@staticmethod
def calc_deltas(data, delta_filter):
"""
Applies the given filter to the data after automatically padding by
Apply the given filter to the data after automatically padding by
replicating the first and last frame. The length of the padding is
calculated via ceil(len(delta_filter)).
Expand All @@ -245,106 +244,111 @@ def calc_deltas(data, delta_filter):
Parameters
----------
data: numpy array
containing the data to process
Data to process, i.e. MFCCs or deltas thereof.
delta_filter: numpy array
the filter used for convolution
Filter used for convolution.
Returns
-------
deltas: numpy array
containing the deltas, has the same shape as data
Deltas of `data`, same shape as `data`.
"""
# prepare vectorized convolve function
# (requires transposed matrices in our use case)
vconv = np.vectorize(partial(np.convolve, mode="same"),
signature='(n),(m)->(k)')
# pad data by replicating the first and the last frame
k = int(math.ceil(len(delta_filter) / 2))
padded = np.vstack((np.array([data[0], ] * k),
data,
k = int(np.ceil(len(delta_filter) / 2))
padded = np.vstack((np.array([data[0], ] * k), data,
np.array([data[-1], ] * k)))
# calculate the deltas for each coefficient
deltas = vconv(padded.transpose(), delta_filter)
return deltas.transpose()[k:-k]
deltas = []
for band in padded.T:
deltas.append(np.convolve(band, delta_filter, 'same'))
# return deltas (first/last k frames truncated)
return np.vstack(deltas).T[k:-k]

@lazyprop
def deltas(self, delta_filter=MFCC_DELTA_FILTER):
"""
Return the derivative of this MFCC's coefficients by convolving with
a filter. Accessing this property corresponds to the function call
``MFCC.calc_deltas(self, delta_filter)``. However, using this property,
the result is calculated only once and cached for later access.
See ``@lazyprop``for further details.
First order derivative of the MFCCs.
Parameters
----------
delta_filter: numpy array, optional
the filter used for convolution, defaults to MFCC_DELTA_FILTER
Filter to calculate the derivative of the MFCCs.
Returns
-------
deltas: numpy array
containing the deltas, has the same shape as self
Deltas of the MFCCs, same shape as MFCCs.
Notes
-----
Accessing this property corresponds to the function call
``MFCC.calc_deltas(mfccs, delta_filter)``, with results being cached.
"""
return MFCC.calc_deltas(self, delta_filter)

@lazyprop
def deltadeltas(self, deltadelta_filter=MFCC_DELTADELTA_FILTER):
def delta_deltas(self, delta_delta_filter=MFCC_DELTA_DELTA_FILTER):
"""
Return the second order derivative of this MFCC's coefficients by
convolving with a filter. Accessing this property corresponds to the
function call ``MFCC.calc_deltas(self, deltadelta_filter)``. However,
using this property, the result is calculated only once and cached
for later access. See ``@lazyprop``for further details.
Second order derivatives of the MFCCs.
Parameters
----------
delta_filter: numpy array, optional
the filter used for convolution, defaults to MFCC_DELTA_FILTER
delta_delta_filter: numpy array, optional
Filter to calculate the derivative of the derivative.
Returns
-------
deltas: numpy array
containing the deltas, has the same shape as self
Delta deltas of the MFCCs, same shape as MFCCs.
Notes
-----
Accessing this property corresponds to the function call
``MFCC.calc_deltas(deltas, delta_delta_filter)``, with results being
cached.
"""
return MFCC.calc_deltas(self.deltas, deltadelta_filter)
return MFCC.calc_deltas(self.deltas, delta_delta_filter)

def calc_voicebox_deltas(self, delta_filter=MFCC_DELTA_FILTER,
ddelta_filter=MFCC_DELTADELTA_FILTER):
delta_delta_filter=MFCC_DELTA_DELTA_FILTER):
"""
Method to calculate deltas and deltadeltas the way it is done in the
voicebox MatLab toolbox.
see http://www.ee.ic.ac.uk/hp/staff/dmb/voicebox/voicebox.html
Calculates deltas and delta deltas the way it is done in the voicebox
MatLab toolbox [1]_.
Parameters
----------
delta_filter : numpy array
filter to calculate the derivative of this MFCC's data
ddelta_filter : numpy array
filter to calculate the derivative of the derivative
Filter to calculate the derivative of the MFCCs.
delta_delta_filter : numpy array
Filter to calculate the derivative of the derivative.
Returns
-------
[self, deltas, deltadeltas] : numpy array, shape (|frames|, |bands|*3)
a horizontally stacked np array consisting of the MFCC coefficients
its derivative and the derivative of second order
[mfcc, delta, delta_delta] : numpy array, shape (num_frames, bands * 3)
Horizontally stacked array consisting of the MFCC coefficients,
their first and second order derivatives.
References
----------
.. [1] http://www.ee.ic.ac.uk/hp/staff/dmb/voicebox/voicebox.html
"""
padded_input = np.vstack(
(np.array([self[0], ] * 5), self, np.array([self[-1], ] * 5)))
deltashape = tuple(reversed(padded_input.shape))
flat_input = padded_input.transpose().flatten()

deltas = np.convolve(flat_input, delta_filter, mode="same") \
.reshape(deltashape).T[4:-4, ]
deltas = np.convolve(flat_input, delta_filter, mode='same')
deltas = deltas.reshape(deltashape).T[4:-4, ]
deltadeltashape = tuple(reversed(deltas.shape))
flat_deltas = deltas.transpose().flatten()
deltas = deltas[1:-1, ]

deltadeltas = np.convolve(flat_deltas, ddelta_filter, mode="same") \
.reshape(deltadeltashape).T[1:-1, ]

return np.hstack((self, deltas, deltadeltas))
delta_deltas = np.convolve(flat_deltas, delta_delta_filter,
mode='same')
delta_deltas = delta_deltas.reshape(deltadeltashape).T[1:-1, ]
return np.hstack((self, deltas, delta_deltas))

def __array_finalize__(self, obj):
if obj is None:
Expand All @@ -358,9 +362,8 @@ def __array_finalize__(self, obj):

class MFCCProcessor(Processor):
"""
MFCCProcessor is CepstrogramProcessor which filters the magnitude
spectrogram of the spectrogram with a Mel filterbank, takes the logarithm
and performs a discrete cosine transform afterwards.
MFCCProcessor filters the magnitude spectrogram with a Mel filterbank,
takes the logarithm and performs a discrete cosine transform afterwards.
Parameters
----------
Expand All @@ -377,8 +380,6 @@ class MFCCProcessor(Processor):
logarithm.
add : float, optional
Add this value before taking the logarithm of the magnitudes.
transform : numpy ufunc
Transformation applied to the Mel filtered spectrogram.
"""

Expand All @@ -403,7 +404,7 @@ def process(self, data, **kwargs):
----------
data : numpy array
Data to be processed (a spectrogram).
kwargs : dict
kwargs : dict, optional
Keyword arguments passed to :class:`MFCC`.
Returns
Expand Down
47 changes: 22 additions & 25 deletions tests/test_audio_mfcc.py → tests/test_audio_cepstrogram.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,59 +17,56 @@
from . import AUDIO_PATH

sample_file = pj(AUDIO_PATH, 'sample.wav')
sample_file_22050 = pj(AUDIO_PATH, 'sample_22050.wav')


class TestMFCCClass(unittest.TestCase):

def setUp(self):
self.mfcc = MFCC(sample_file)

def test_types(self):
result = MFCC(sample_file)
self.assertIsInstance(result, MFCC)
self.assertIsInstance(result, Cepstrogram)
self.assertIsInstance(self.mfcc, MFCC)
self.assertIsInstance(self.mfcc, Cepstrogram)
# attributes
self.assertIsInstance(result.filterbank, MelFilterbank)
self.assertIsInstance(self.mfcc.filterbank, MelFilterbank)
# properties
self.assertIsInstance(result.deltas, np.ndarray)
self.assertIsInstance(result.deltadeltas, np.ndarray)
self.assertIsInstance(result.num_bins, int)
self.assertIsInstance(result.num_frames, int)
self.assertIsInstance(self.mfcc.deltas, np.ndarray)
self.assertIsInstance(self.mfcc.delta_deltas, np.ndarray)
self.assertIsInstance(self.mfcc.num_bins, int)
self.assertIsInstance(self.mfcc.num_frames, int)
# wrong filterbank type
with self.assertRaises(TypeError):
FilteredSpectrogram(sample_file, filterbank='bla')

def test_values(self):
# from file
result = MFCC(sample_file)
allclose = partial(np.allclose, rtol=1.e-3, atol=1.e-5)
self.assertTrue(allclose(result[0, :6],
# values
self.assertTrue(allclose(self.mfcc[0, :6],
[-3.61102366, 6.81075716, 2.55457568,
1.88377929, 1.04133379, 0.6382336]))
self.assertTrue(allclose(result[0, -6:],
self.assertTrue(allclose(self.mfcc[0, -6:],
[-0.20386486, -0.18468723, -0.00233107,
0.20703268, 0.21419463, 0.00598407]))
# attributes
self.assertTrue(result.shape == (281, 30))

self.assertTrue(self.mfcc.shape == (281, 30))
# properties
self.assertEqual(result.num_bins, 30)
self.assertEqual(result.num_frames, 281)
self.assertEqual(self.mfcc.num_bins, 30)
self.assertEqual(self.mfcc.num_frames, 281)

def test_deltas(self):
# from file
result = MFCC(sample_file)
allclose = partial(np.allclose, rtol=1.e-2, atol=1.e-4)

# don't compare first element because it is dependent on the
# padding used for filtering
self.assertTrue(allclose(result.deltas[1, :6],
self.assertTrue(allclose(self.mfcc.deltas[1, :6],
[-0.02286286, -0.11329014, 0.05381977,
0.10438456, 0.04268386, -0.06839912]))
self.assertTrue(allclose(result.deltas[1, -6:],
self.assertTrue(allclose(self.mfcc.deltas[1, -6:],
[-0.03156065, -0.019716, -0.03417692,
-0.07768068, -0.05539324, -0.02616282]))

self.assertTrue(allclose(result.deltadeltas[1, :6],
# delta deltas
self.assertTrue(allclose(self.mfcc.delta_deltas[1, :6],
[-0.00804922, -0.009922, -0.00454391,
0.0038989, 0.00254525, 0.0120557]))
self.assertTrue(allclose(result.deltadeltas[1, -6:],
self.assertTrue(allclose(self.mfcc.delta_deltas[1, -6:],
[0.0072148, 0.00094424, 0.00029913,
0.00530994, 0.00184207, -0.00276511]))

0 comments on commit b5fa495

Please sign in to comment.