diff --git a/pyannote/audio/pipelines/speech_separation.py b/pyannote/audio/pipelines/speech_separation.py index 45c10b9b5..da04040aa 100644 --- a/pyannote/audio/pipelines/speech_separation.py +++ b/pyannote/audio/pipelines/speech_separation.py @@ -654,6 +654,12 @@ def apply( sources.data * discrete_diarization.align(sources).data[:, :num_sources] ) + # separated sources might be scaled up/down due to SI-SDR loss used when training + # so we peak-normalize them + sources.data = sources.data / np.max( + np.abs(sources.data), axis=0, keepdims=True + ) + # convert to continuous diarization diarization = self.to_annotation( discrete_diarization,