Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Speaker ID demo for C# #862

Merged
merged 3 commits into from
May 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .github/scripts/test-dot-net.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,10 @@

cd dotnet-examples/

cd streaming-hlg-decoding/
cd speaker-identification
./run.sh

cd ../streaming-hlg-decoding/
./run.sh

cd ../spoken-language-identification
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/test-dot-net.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,7 @@ jobs:
cp -v scripts/dotnet/examples/speech-recognition-from-microphone.csproj dotnet-examples/speech-recognition-from-microphone/
cp -v scripts/dotnet/examples/spoken-language-identification.csproj dotnet-examples/spoken-language-identification/
cp -v scripts/dotnet/examples/streaming-hlg-decoding.csproj dotnet-examples/streaming-hlg-decoding
cp -v scripts/dotnet/examples/speaker-identification.csproj dotnet-examples/speaker-identification

ls -lh /tmp

Expand Down
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
project(sherpa-onnx)

set(SHERPA_ONNX_VERSION "1.9.23")
set(SHERPA_ONNX_VERSION "1.9.24")

# Disable warning about
#
Expand Down
6 changes: 6 additions & 0 deletions dotnet-examples/sherpa-onnx.sln
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "spoken-language-identificat
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "streaming-hlg-decoding", "streaming-hlg-decoding\streaming-hlg-decoding.csproj", "{C4A368A5-FCA0-419D-97C9-C8CE0B08EB99}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "speaker-identification", "speaker-identification\speaker-identification.csproj", "{2B1B140E-A92F-426B-B0DF-5D916B67304F}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Expand Down Expand Up @@ -54,5 +56,9 @@ Global
{C4A368A5-FCA0-419D-97C9-C8CE0B08EB99}.Debug|Any CPU.Build.0 = Debug|Any CPU
{C4A368A5-FCA0-419D-97C9-C8CE0B08EB99}.Release|Any CPU.ActiveCfg = Release|Any CPU
{C4A368A5-FCA0-419D-97C9-C8CE0B08EB99}.Release|Any CPU.Build.0 = Release|Any CPU
{2B1B140E-A92F-426B-B0DF-5D916B67304F}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{2B1B140E-A92F-426B-B0DF-5D916B67304F}.Debug|Any CPU.Build.0 = Debug|Any CPU
{2B1B140E-A92F-426B-B0DF-5D916B67304F}.Release|Any CPU.ActiveCfg = Release|Any CPU
{2B1B140E-A92F-426B-B0DF-5D916B67304F}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
EndGlobal
155 changes: 155 additions & 0 deletions dotnet-examples/speaker-identification/Program.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
// Copyright (c) 2024 Xiaomi Corporation
//
// This file shows how to do speaker identification with sherpa-onnx.
//
// 1. Download a model from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
//
// 2. Download test data from
//
// git clone https://github.com/csukuangfj/sr-data
//
// 3. Now run it
//
// dotnet run

using SherpaOnnx;
using System.Collections.Generic;
using System;

class SpeakerIdentificationDemo
{
public static float[] ComputeEmbedding(SpeakerEmbeddingExtractor extractor, String filename)
{
WaveReader reader = new WaveReader(filename);

OnlineStream stream = extractor.CreateStream();
stream.AcceptWaveform(reader.SampleRate, reader.Samples);
stream.InputFinished();

float[] embedding = extractor.Compute(stream);

return embedding;
}

static void Main(string[] args)
{
var config = new SpeakerEmbeddingExtractorConfig();
config.Model = "./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx";
config.Debug = 1;
var extractor = new SpeakerEmbeddingExtractor(config);

var manager = new SpeakerEmbeddingManager(extractor.Dim);

string[] spk1Files =
new string[] {
"./sr-data/enroll/fangjun-sr-1.wav",
"./sr-data/enroll/fangjun-sr-2.wav",
"./sr-data/enroll/fangjun-sr-3.wav",
};
float[][] spk1Vec = new float[spk1Files.Length][];

for (int i = 0; i < spk1Files.Length; ++i)
{
spk1Vec[i] = ComputeEmbedding(extractor, spk1Files[i]);
}

string[] spk2Files =
new string[] {
"./sr-data/enroll/leijun-sr-1.wav", "./sr-data/enroll/leijun-sr-2.wav",
};

float[][] spk2Vec = new float[spk2Files.Length][];

for (int i = 0; i < spk2Files.Length; ++i)
{
spk2Vec[i] = ComputeEmbedding(extractor, spk2Files[i]);
}

if (!manager.Add("fangjun", spk1Vec))
{
Console.WriteLine("Failed to register fangjun");
return;
}

if (!manager.Add("leijun", spk2Vec))
{
Console.WriteLine("Failed to register leijun");
return;
}

if (manager.NumSpeakers != 2)
{
Console.WriteLine("There should be two speakers");
return;
}

if (!manager.Contains("fangjun"))
{
Console.WriteLine("It should contain the speaker fangjun");
return;
}

if (!manager.Contains("leijun"))
{
Console.WriteLine("It should contain the speaker leijun");
return;
}

Console.WriteLine("---All speakers---");

string[] allSpeakers = manager.GetAllSpeakers();
foreach (var s in allSpeakers)
{
Console.WriteLine(s);
}
Console.WriteLine("------------");

string[] testFiles =
new string[] {
"./sr-data/test/fangjun-test-sr-1.wav",
"./sr-data/test/leijun-test-sr-1.wav",
"./sr-data/test/liudehua-test-sr-1.wav"
};

float threshold = 0.6f;
foreach (var file in testFiles)
{
float[] embedding = ComputeEmbedding(extractor, file);

String name = manager.Search(embedding, threshold);
if (name == "")
{
name = "<Unknown>";
}
Console.WriteLine("{0}: {1}", file, name);
}

// test verify
if (!manager.Verify("fangjun", ComputeEmbedding(extractor, testFiles[0]), threshold))
{
Console.WriteLine("testFiles[0] should match fangjun!");
return;
}

if (!manager.Remove("fangjun"))
{
Console.WriteLine("Failed to remove fangjun");
return;
}

if (manager.Verify("fangjun", ComputeEmbedding(extractor, testFiles[0]), threshold))
{
Console.WriteLine("{0} should match no one!", testFiles[0]);
return;
}

if (manager.NumSpeakers != 1)
{
Console.WriteLine("There should only 1 speaker left.");
return;
}
}
}
1 change: 1 addition & 0 deletions dotnet-examples/speaker-identification/WaveReader.cs
13 changes: 13 additions & 0 deletions dotnet-examples/speaker-identification/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/usr/bin/env bash

set -ex

if [ ! -e ./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
fi

if [ ! -d ./sr-data ]; then
git clone https://github.com/csukuangfj/sr-data
fi

dotnet run
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net6.0</TargetFramework>
<RootNamespace>speaker_identification</RootNamespace>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>

<ItemGroup>
<PackageReference Include="org.k2fsa.sherpa.onnx" Version="*" />
</ItemGroup>

</Project>
19 changes: 19 additions & 0 deletions scripts/dotnet/examples/speaker-identification.csproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net6.0</TargetFramework>
<RootNamespace>speaker_identification</RootNamespace>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>

<PropertyGroup>
<RestoreSources>/tmp/packages;$(RestoreSources);https://api.nuget.org/v3/index.json</RestoreSources>
</PropertyGroup>

<ItemGroup>
<PackageReference Include="org.k2fsa.sherpa.onnx" Version="*" />
</ItemGroup>

</Project>
Loading
Loading