Skip to content

Commit

Permalink
Merge branch 'develop' into pixit-normalize-sources
Browse files Browse the repository at this point in the history
  • Loading branch information
hbredin committed Jul 21, 2024
2 parents a766f2a + 286ea1a commit 9972a91
Show file tree
Hide file tree
Showing 4 changed files with 14 additions and 11 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# Changelog

## develop

- fix: fix support for `numpy==2.x` ([@metal3d](https://github.com/metal3d/))


## Version 3.3.1 (2024-06-19)

Expand Down
3 changes: 1 addition & 2 deletions pyannote/audio/pipelines/speaker_diarization.py
Original file line number Diff line number Diff line change
Expand Up @@ -400,7 +400,7 @@ def reconstruct(
num_chunks, num_frames, local_num_speakers = segmentations.data.shape

num_clusters = np.max(hard_clusters) + 1
clustered_segmentations = np.NAN * np.zeros(
clustered_segmentations = np.nan * np.zeros(
(num_chunks, num_frames, num_clusters)
)

Expand Down Expand Up @@ -515,7 +515,6 @@ def apply(
centroids = None

else:

# skip speaker embedding extraction with oracle clustering
if self.klustering == "OracleClustering" and not return_embeddings:
embeddings = None
Expand Down
10 changes: 5 additions & 5 deletions pyannote/audio/pipelines/speaker_verification.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ def __call__(

# corner case: every signal is too short
if max_len < self.min_num_samples:
return np.NAN * np.zeros((batch_size, self.dimension))
return np.nan * np.zeros((batch_size, self.dimension))

too_short = wav_lens < self.min_num_samples
wav_lens[too_short] = max_len
Expand All @@ -197,7 +197,7 @@ def __call__(
)

embeddings = embeddings.cpu().numpy()
embeddings[too_short.cpu().numpy()] = np.NAN
embeddings[too_short.cpu().numpy()] = np.nan

return embeddings

Expand Down Expand Up @@ -364,7 +364,7 @@ def __call__(

# corner case: every signal is too short
if max_len < self.min_num_samples:
return np.NAN * np.zeros((batch_size, self.dimension))
return np.nan * np.zeros((batch_size, self.dimension))

too_short = wav_lens < self.min_num_samples
wav_lens = wav_lens / max_len
Expand All @@ -377,7 +377,7 @@ def __call__(
.numpy()
)

embeddings[too_short.cpu().numpy()] = np.NAN
embeddings[too_short.cpu().numpy()] = np.nan

return embeddings

Expand Down Expand Up @@ -594,7 +594,7 @@ def __call__(

imasks = imasks > 0.5

embeddings = np.NAN * np.zeros((batch_size, self.dimension))
embeddings = np.nan * np.zeros((batch_size, self.dimension))

for f, (feature, imask) in enumerate(zip(features, imasks)):
masked_feature = feature[imask]
Expand Down
8 changes: 4 additions & 4 deletions pyannote/audio/pipelines/speech_separation.py
Original file line number Diff line number Diff line change
Expand Up @@ -419,7 +419,7 @@ def reconstruct(
num_chunks, num_frames, local_num_speakers = segmentations.data.shape

num_clusters = np.max(hard_clusters) + 1
clustered_segmentations = np.NAN * np.zeros(
clustered_segmentations = np.nan * np.zeros(
(num_chunks, num_frames, num_clusters)
)

Expand Down Expand Up @@ -644,9 +644,9 @@ def apply(
len(speaker_activation), dtype=float
)

speaker_activation_with_context[
np.concatenate(remaining_zeros)
] = 0.0
speaker_activation_with_context[np.concatenate(remaining_zeros)] = (
0.0
)

discrete_diarization.data.T[i] = speaker_activation_with_context
num_sources = sources.data.shape[1]
Expand Down

0 comments on commit 9972a91

Please sign in to comment.