Merge branch 'develop' into pixit-normalize-sources

pyannote · Jul 21, 2024 · 9972a91 · 9972a91
2 parents a766f2a + 286ea1a
commit 9972a91
Show file tree

Hide file tree

Showing 4 changed files with 14 additions and 11 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,9 @@
 # Changelog
 
+## develop
+
+- fix: fix support for `numpy==2.x` ([@metal3d](https://github.com/metal3d/))
+
 
 ## Version 3.3.1 (2024-06-19)
 

diff --git a/pyannote/audio/pipelines/speaker_diarization.py b/pyannote/audio/pipelines/speaker_diarization.py
@@ -400,7 +400,7 @@ def reconstruct(
         num_chunks, num_frames, local_num_speakers = segmentations.data.shape
 
         num_clusters = np.max(hard_clusters) + 1
-        clustered_segmentations = np.NAN * np.zeros(
+        clustered_segmentations = np.nan * np.zeros(
             (num_chunks, num_frames, num_clusters)
         )
 
@@ -515,7 +515,6 @@ def apply(
             centroids = None
 
         else:
-
             # skip speaker embedding extraction with oracle clustering
             if self.klustering == "OracleClustering" and not return_embeddings:
                 embeddings = None

diff --git a/pyannote/audio/pipelines/speaker_verification.py b/pyannote/audio/pipelines/speaker_verification.py
@@ -186,7 +186,7 @@ def __call__(
 
         # corner case: every signal is too short
         if max_len < self.min_num_samples:
-            return np.NAN * np.zeros((batch_size, self.dimension))
+            return np.nan * np.zeros((batch_size, self.dimension))
 
         too_short = wav_lens < self.min_num_samples
         wav_lens[too_short] = max_len
@@ -197,7 +197,7 @@ def __call__(
         )
 
         embeddings = embeddings.cpu().numpy()
-        embeddings[too_short.cpu().numpy()] = np.NAN
+        embeddings[too_short.cpu().numpy()] = np.nan
 
         return embeddings
 
@@ -364,7 +364,7 @@ def __call__(
 
         # corner case: every signal is too short
         if max_len < self.min_num_samples:
-            return np.NAN * np.zeros((batch_size, self.dimension))
+            return np.nan * np.zeros((batch_size, self.dimension))
 
         too_short = wav_lens < self.min_num_samples
         wav_lens = wav_lens / max_len
@@ -377,7 +377,7 @@ def __call__(
             .numpy()
         )
 
-        embeddings[too_short.cpu().numpy()] = np.NAN
+        embeddings[too_short.cpu().numpy()] = np.nan
 
         return embeddings
 
@@ -594,7 +594,7 @@ def __call__(
 
         imasks = imasks > 0.5
 
-        embeddings = np.NAN * np.zeros((batch_size, self.dimension))
+        embeddings = np.nan * np.zeros((batch_size, self.dimension))
 
         for f, (feature, imask) in enumerate(zip(features, imasks)):
             masked_feature = feature[imask]

diff --git a/pyannote/audio/pipelines/speech_separation.py b/pyannote/audio/pipelines/speech_separation.py
@@ -419,7 +419,7 @@ def reconstruct(
         num_chunks, num_frames, local_num_speakers = segmentations.data.shape
 
         num_clusters = np.max(hard_clusters) + 1
-        clustered_segmentations = np.NAN * np.zeros(
+        clustered_segmentations = np.nan * np.zeros(
             (num_chunks, num_frames, num_clusters)
         )
 
@@ -644,9 +644,9 @@ def apply(
                         len(speaker_activation), dtype=float
                     )
 
-                    speaker_activation_with_context[
-                        np.concatenate(remaining_zeros)
-                    ] = 0.0
+                    speaker_activation_with_context[np.concatenate(remaining_zeros)] = (
+                        0.0
+                    )
 
                     discrete_diarization.data.T[i] = speaker_activation_with_context
             num_sources = sources.data.shape[1]