From 286ea1a4e34e2dd7d7926f590e402dac1e17494b Mon Sep 17 00:00:00 2001 From: Patrice Ferlet Date: Mon, 1 Jul 2024 13:39:57 +0200 Subject: [PATCH] fix: fix support for `numpy==2.x` --- CHANGELOG.md | 4 ++++ pyannote/audio/pipelines/speaker_diarization.py | 3 +-- pyannote/audio/pipelines/speaker_verification.py | 10 +++++----- pyannote/audio/pipelines/speech_separation.py | 8 ++++---- 4 files changed, 14 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c9a271a67..8fd1ab187 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Changelog +## develop + +- fix: fix support for `numpy==2.x` ([@metal3d](https://github.com/metal3d/)) + ## Version 3.3.1 (2024-06-19) diff --git a/pyannote/audio/pipelines/speaker_diarization.py b/pyannote/audio/pipelines/speaker_diarization.py index 45ae085ed..edfa5966c 100644 --- a/pyannote/audio/pipelines/speaker_diarization.py +++ b/pyannote/audio/pipelines/speaker_diarization.py @@ -400,7 +400,7 @@ def reconstruct( num_chunks, num_frames, local_num_speakers = segmentations.data.shape num_clusters = np.max(hard_clusters) + 1 - clustered_segmentations = np.NAN * np.zeros( + clustered_segmentations = np.nan * np.zeros( (num_chunks, num_frames, num_clusters) ) @@ -515,7 +515,6 @@ def apply( centroids = None else: - # skip speaker embedding extraction with oracle clustering if self.klustering == "OracleClustering" and not return_embeddings: embeddings = None diff --git a/pyannote/audio/pipelines/speaker_verification.py b/pyannote/audio/pipelines/speaker_verification.py index 022c1ca6f..d2de51dc7 100644 --- a/pyannote/audio/pipelines/speaker_verification.py +++ b/pyannote/audio/pipelines/speaker_verification.py @@ -186,7 +186,7 @@ def __call__( # corner case: every signal is too short if max_len < self.min_num_samples: - return np.NAN * np.zeros((batch_size, self.dimension)) + return np.nan * np.zeros((batch_size, self.dimension)) too_short = wav_lens < self.min_num_samples wav_lens[too_short] = max_len @@ -197,7 +197,7 @@ def __call__( ) embeddings = embeddings.cpu().numpy() - embeddings[too_short.cpu().numpy()] = np.NAN + embeddings[too_short.cpu().numpy()] = np.nan return embeddings @@ -364,7 +364,7 @@ def __call__( # corner case: every signal is too short if max_len < self.min_num_samples: - return np.NAN * np.zeros((batch_size, self.dimension)) + return np.nan * np.zeros((batch_size, self.dimension)) too_short = wav_lens < self.min_num_samples wav_lens = wav_lens / max_len @@ -377,7 +377,7 @@ def __call__( .numpy() ) - embeddings[too_short.cpu().numpy()] = np.NAN + embeddings[too_short.cpu().numpy()] = np.nan return embeddings @@ -594,7 +594,7 @@ def __call__( imasks = imasks > 0.5 - embeddings = np.NAN * np.zeros((batch_size, self.dimension)) + embeddings = np.nan * np.zeros((batch_size, self.dimension)) for f, (feature, imask) in enumerate(zip(features, imasks)): masked_feature = feature[imask] diff --git a/pyannote/audio/pipelines/speech_separation.py b/pyannote/audio/pipelines/speech_separation.py index 45c10b9b5..c1b9b036c 100644 --- a/pyannote/audio/pipelines/speech_separation.py +++ b/pyannote/audio/pipelines/speech_separation.py @@ -419,7 +419,7 @@ def reconstruct( num_chunks, num_frames, local_num_speakers = segmentations.data.shape num_clusters = np.max(hard_clusters) + 1 - clustered_segmentations = np.NAN * np.zeros( + clustered_segmentations = np.nan * np.zeros( (num_chunks, num_frames, num_clusters) ) @@ -644,9 +644,9 @@ def apply( len(speaker_activation), dtype=float ) - speaker_activation_with_context[ - np.concatenate(remaining_zeros) - ] = 0.0 + speaker_activation_with_context[np.concatenate(remaining_zeros)] = ( + 0.0 + ) discrete_diarization.data.T[i] = speaker_activation_with_context num_sources = sources.data.shape[1]