From 286ea1a4e34e2dd7d7926f590e402dac1e17494b Mon Sep 17 00:00:00 2001
From: Patrice Ferlet <metal3d@gmail.com>
Date: Mon, 1 Jul 2024 13:39:57 +0200
Subject: [PATCH] fix: fix support for `numpy==2.x`

---
 CHANGELOG.md                                     |  4 ++++
 pyannote/audio/pipelines/speaker_diarization.py  |  3 +--
 pyannote/audio/pipelines/speaker_verification.py | 10 +++++-----
 pyannote/audio/pipelines/speech_separation.py    |  8 ++++----
 4 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index c9a271a67..8fd1ab187 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,9 @@
 # Changelog
 
+## develop
+
+- fix: fix support for `numpy==2.x` ([@metal3d](https://github.com/metal3d/))
+
 
 ## Version 3.3.1 (2024-06-19)
 
diff --git a/pyannote/audio/pipelines/speaker_diarization.py b/pyannote/audio/pipelines/speaker_diarization.py
index 45ae085ed..edfa5966c 100644
--- a/pyannote/audio/pipelines/speaker_diarization.py
+++ b/pyannote/audio/pipelines/speaker_diarization.py
@@ -400,7 +400,7 @@ def reconstruct(
         num_chunks, num_frames, local_num_speakers = segmentations.data.shape
 
         num_clusters = np.max(hard_clusters) + 1
-        clustered_segmentations = np.NAN * np.zeros(
+        clustered_segmentations = np.nan * np.zeros(
             (num_chunks, num_frames, num_clusters)
         )
 
@@ -515,7 +515,6 @@ def apply(
             centroids = None
 
         else:
-
             # skip speaker embedding extraction with oracle clustering
             if self.klustering == "OracleClustering" and not return_embeddings:
                 embeddings = None
diff --git a/pyannote/audio/pipelines/speaker_verification.py b/pyannote/audio/pipelines/speaker_verification.py
index 022c1ca6f..d2de51dc7 100644
--- a/pyannote/audio/pipelines/speaker_verification.py
+++ b/pyannote/audio/pipelines/speaker_verification.py
@@ -186,7 +186,7 @@ def __call__(
 
         # corner case: every signal is too short
         if max_len < self.min_num_samples:
-            return np.NAN * np.zeros((batch_size, self.dimension))
+            return np.nan * np.zeros((batch_size, self.dimension))
 
         too_short = wav_lens < self.min_num_samples
         wav_lens[too_short] = max_len
@@ -197,7 +197,7 @@ def __call__(
         )
 
         embeddings = embeddings.cpu().numpy()
-        embeddings[too_short.cpu().numpy()] = np.NAN
+        embeddings[too_short.cpu().numpy()] = np.nan
 
         return embeddings
 
@@ -364,7 +364,7 @@ def __call__(
 
         # corner case: every signal is too short
         if max_len < self.min_num_samples:
-            return np.NAN * np.zeros((batch_size, self.dimension))
+            return np.nan * np.zeros((batch_size, self.dimension))
 
         too_short = wav_lens < self.min_num_samples
         wav_lens = wav_lens / max_len
@@ -377,7 +377,7 @@ def __call__(
             .numpy()
         )
 
-        embeddings[too_short.cpu().numpy()] = np.NAN
+        embeddings[too_short.cpu().numpy()] = np.nan
 
         return embeddings
 
@@ -594,7 +594,7 @@ def __call__(
 
         imasks = imasks > 0.5
 
-        embeddings = np.NAN * np.zeros((batch_size, self.dimension))
+        embeddings = np.nan * np.zeros((batch_size, self.dimension))
 
         for f, (feature, imask) in enumerate(zip(features, imasks)):
             masked_feature = feature[imask]
diff --git a/pyannote/audio/pipelines/speech_separation.py b/pyannote/audio/pipelines/speech_separation.py
index 45c10b9b5..c1b9b036c 100644
--- a/pyannote/audio/pipelines/speech_separation.py
+++ b/pyannote/audio/pipelines/speech_separation.py
@@ -419,7 +419,7 @@ def reconstruct(
         num_chunks, num_frames, local_num_speakers = segmentations.data.shape
 
         num_clusters = np.max(hard_clusters) + 1
-        clustered_segmentations = np.NAN * np.zeros(
+        clustered_segmentations = np.nan * np.zeros(
             (num_chunks, num_frames, num_clusters)
         )
 
@@ -644,9 +644,9 @@ def apply(
                         len(speaker_activation), dtype=float
                     )
 
-                    speaker_activation_with_context[
-                        np.concatenate(remaining_zeros)
-                    ] = 0.0
+                    speaker_activation_with_context[np.concatenate(remaining_zeros)] = (
+                        0.0
+                    )
 
                     discrete_diarization.data.T[i] = speaker_activation_with_context
             num_sources = sources.data.shape[1]