-
Notifications
You must be signed in to change notification settings - Fork 0
/
dataset.py
159 lines (136 loc) · 4.97 KB
/
dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
from pathlib import Path
import librosa
import numpy as np
from IPython.display import Audio
from src.objects import MelSGram, Tune
class TuneDataset():
"""Audio Dataset pertaining short tunes.
"""
def __init__(self,):
pass
def load_tunes(self,
folder: Path,
file_type: str = '*.wav'):
"""Load tunes from a given folder. The audio samples must pertain the same file type.
Parameters
----------
folder : Path
Folder to load tunes from.
file_type : str, optional
File type to glob, by default '*.wav'
Raises
------
FileNotFoundError
If folder does not exist.
"""
self.folder = folder
if not folder.exists():
raise FileNotFoundError(f'Folder does not exist.')
else:
self.file_paths = [file for file in folder.glob(file_type)]
for file in self.file_paths:
assert file.exists() and file.is_file()
return self
def __repr__(self) -> str:
return f'{len(self.file_paths)} files loaded from {self.folder}.'
def __getitem__(self,
file_name: str):
"""Play a previously loaded file.
Parameters
----------
file_name : str
The file name. Must contain extension.
Returns
-------
Audio
Playable audio. Suitable for Jupyter.
"""
for file in self.file_paths:
if file.name == file_name:
return Audio(str(file))
def extract_tune(self,
verbose: bool =True):
"""Extract contents from loaded files.
Parameters
----------
verbose : bool, optional
Show progress, by default True
"""
self.tunes = list()
for it, file_path in enumerate(self.file_paths):
samples, sample_rate = librosa.load(str(file_path),
sr = None)
tune = Tune(
samples = samples,
sample_rate = sample_rate,
file_path = file_path
)
self.tunes.append(tune)
if verbose:
print(f'{100*(it/len(self.file_paths))} %')
def pad_tunes(self, target_time: float) -> None:
"""Pad the tunes to a target time by repeating the existing samples. Tunes with longer times are left unchanged.
Parameters
----------
target_time : float
Target duration of the padded audio in seconds.
"""
for tune in self.tunes:
tune.pad(target_time)
def crop_tunes(self, target_time: float) -> None:
"""Crop the tunes to a target time by removing excess samples.
Parameters
----------
target_time : float
Target duration of the cropped audio in seconds.
"""
for tune in self.tunes:
tune.crop(target_time)
def extract_mel_sgrams(self,
n_mels: int = 128,
verbose: bool = True):
"""Extract Mel Spectrogram from previously extracted file contents.
Parameters
----------
verbose : bool, optional
Show progress, by default True
"""
self.mel_sgrams = list()
for it, tune in enumerate(self.tunes):
sgram = librosa.stft(tune.samples)
sgram_mag, _ = librosa.magphase(sgram)
mel_scale_sgram = librosa.feature.melspectrogram(S=sgram_mag,
n_mels = n_mels,
sr=tune.sample_rate)
# use the decibel scale to get the final Mel Spectrogram
mel_sgram = librosa.amplitude_to_db(mel_scale_sgram,
ref=np.min)
mel_sg = MelSGram(
file_path = tune.file_path,
content = mel_sgram,
sample_rate = tune.sample_rate
)
self.mel_sgrams.append(mel_sg)
if verbose:
print(f'{100*(it/len(self.tunes))} %')
def dump_tunes(self, path: Path, output_type: str = 'numpy') -> None:
"""Dump the audios to a folder in the specified format.
Parameters
----------
path : Path
Output folder path.
output_type : str, optional
Type of output file ('numpy' or 'wav'), by default 'numpy'.
"""
for tune in self.tunes:
tune.dump(path,
output_type)
def dump_mel_sgrams(self, path: Path) -> None:
"""Dump the Mel Sgrams to a folder in the specified format.
Parameters
----------
path : Path
Output folder path.
"""
for mel_sg in self.mel_sgrams:
mel_sg.dump(path)