From ab7cff2513c956e705c4bd5fd061de9c5c5f65e3 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Mon, 25 Mar 2024 15:16:47 +0800 Subject: [PATCH] Add C API for spoken language identification. (#695) --- .github/scripts/test-c-api.sh | 26 +++++ .../test-spoken-language-identification.sh | 52 +++++----- .github/workflows/android.yaml | 1 + .github/workflows/build-xcframework.yaml | 1 + .github/workflows/linux.yaml | 12 ++- .github/workflows/macos.yaml | 11 ++- .github/workflows/windows-x64.yaml | 10 +- .github/workflows/windows-x86.yaml | 8 ++ .gitignore | 1 + c-api-examples/CMakeLists.txt | 5 +- c-api-examples/Makefile | 2 +- c-api-examples/decode-file-c-api.c | 63 ++++++------ .../spoken-language-identification-c-api.c | 65 +++++++++++++ .../offline-decode-files/run-hotwords.sh | 2 +- .../offline-decode-files/run-zipformer.sh | 2 +- .../online-decode-files/run-transducer.sh | 2 +- sherpa-onnx/c-api/c-api.cc | 97 +++++++++++++++++++ sherpa-onnx/c-api/c-api.h | 70 +++++++++++++ 18 files changed, 363 insertions(+), 67 deletions(-) create mode 100755 .github/scripts/test-c-api.sh create mode 100644 c-api-examples/spoken-language-identification-c-api.c diff --git a/.github/scripts/test-c-api.sh b/.github/scripts/test-c-api.sh new file mode 100755 index 000000000..08a1a1a2e --- /dev/null +++ b/.github/scripts/test-c-api.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env bash + +set -e + +log() { + # This function is from espnet + local fname=${BASH_SOURCE[1]##*/} + echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" +} + +echo "SLID_EXE is $SLID_EXE" +echo "PATH: $PATH" + + +log "------------------------------------------------------------" +log "Download whisper tiny for spoken language identification " +log "------------------------------------------------------------" + +rm -rf sherpa-onnx-whisper-tiny* +curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.tar.bz2 +tar xvf sherpa-onnx-whisper-tiny.tar.bz2 +rm sherpa-onnx-whisper-tiny.tar.bz2 + +$SLID_EXE + +rm -rf sherpa-onnx-whisper-tiny* diff --git a/.github/scripts/test-spoken-language-identification.sh b/.github/scripts/test-spoken-language-identification.sh index 028e5c23a..4c15eff74 100755 --- a/.github/scripts/test-spoken-language-identification.sh +++ b/.github/scripts/test-spoken-language-identification.sh @@ -28,32 +28,32 @@ ar-arabic.wav bg-bulgarian.wav cs-czech.wav da-danish.wav -de-german.wav -el-greek.wav -en-english.wav -es-spanish.wav -fa-persian.wav -fi-finnish.wav -fr-french.wav -hi-hindi.wav -hr-croatian.wav -id-indonesian.wav -it-italian.wav -ja-japanese.wav -ko-korean.wav -nl-dutch.wav -no-norwegian.wav -po-polish.wav -pt-portuguese.wav -ro-romanian.wav -ru-russian.wav -sk-slovak.wav -sv-swedish.wav -ta-tamil.wav -tl-tagalog.wav -tr-turkish.wav -uk-ukrainian.wav -zh-chinese.wav +# de-german.wav +# el-greek.wav +# en-english.wav +# es-spanish.wav +# fa-persian.wav +# fi-finnish.wav +# fr-french.wav +# hi-hindi.wav +# hr-croatian.wav +# id-indonesian.wav +# it-italian.wav +# ja-japanese.wav +# ko-korean.wav +# nl-dutch.wav +# no-norwegian.wav +# po-polish.wav +# pt-portuguese.wav +# ro-romanian.wav +# ru-russian.wav +# sk-slovak.wav +# sv-swedish.wav +# ta-tamil.wav +# tl-tagalog.wav +# tr-turkish.wav +# uk-ukrainian.wav +# zh-chinese.wav ) for wav in ${waves[@]}; do diff --git a/.github/workflows/android.yaml b/.github/workflows/android.yaml index dd92a6d91..760b5759c 100644 --- a/.github/workflows/android.yaml +++ b/.github/workflows/android.yaml @@ -113,6 +113,7 @@ jobs: git config --global user.email "csukuangfj@gmail.com" git config --global user.name "Fangjun Kuang" + rm -rf huggingface GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-libs huggingface cd huggingface diff --git a/.github/workflows/build-xcframework.yaml b/.github/workflows/build-xcframework.yaml index be4e3666b..85ba4bcd6 100644 --- a/.github/workflows/build-xcframework.yaml +++ b/.github/workflows/build-xcframework.yaml @@ -90,6 +90,7 @@ jobs: git config --global user.email "csukuangfj@gmail.com" git config --global user.name "Fangjun Kuang" + rm -rf huggingface GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-libs huggingface cd huggingface diff --git a/.github/workflows/linux.yaml b/.github/workflows/linux.yaml index 754daa312..b1cf6ed91 100644 --- a/.github/workflows/linux.yaml +++ b/.github/workflows/linux.yaml @@ -123,8 +123,15 @@ jobs: name: release-${{ matrix.build_type }}-${{ matrix.shared_lib }} path: build/bin/* - - name: Test spoken language identification - if: matrix.build_type != 'Debug' + - name: Test spoken language identification (C API) + shell: bash + run: | + export PATH=$PWD/build/bin:$PATH + export SLID_EXE=spoken-language-identification-c-api + + .github/scripts/test-c-api.sh + + - name: Test spoken language identification (C++ API) shell: bash run: | export PATH=$PWD/build/bin:$PATH @@ -243,6 +250,7 @@ jobs: git config --global user.email "csukuangfj@gmail.com" git config --global user.name "Fangjun Kuang" + rm -rf huggingface GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-libs huggingface cd huggingface diff --git a/.github/workflows/macos.yaml b/.github/workflows/macos.yaml index 04abcd31d..a80456279 100644 --- a/.github/workflows/macos.yaml +++ b/.github/workflows/macos.yaml @@ -102,8 +102,15 @@ jobs: otool -L build/bin/sherpa-onnx otool -l build/bin/sherpa-onnx - - name: Test spoken language identification - if: matrix.build_type != 'Debug' + - name: Test spoken language identification (C API) + shell: bash + run: | + export PATH=$PWD/build/bin:$PATH + export SLID_EXE=spoken-language-identification-c-api + + .github/scripts/test-c-api.sh + + - name: Test spoken language identification (C++ API) shell: bash run: | export PATH=$PWD/build/bin:$PATH diff --git a/.github/workflows/windows-x64.yaml b/.github/workflows/windows-x64.yaml index cf982f6fb..306118621 100644 --- a/.github/workflows/windows-x64.yaml +++ b/.github/workflows/windows-x64.yaml @@ -68,7 +68,15 @@ jobs: ls -lh ./bin/Release/sherpa-onnx.exe - - name: Test spoken language identification + - name: Test spoken language identification (C API) + shell: bash + run: | + export PATH=$PWD/build/bin/Release:$PATH + export SLID_EXE=spoken-language-identification-c-api.exe + + .github/scripts/test-c-api.sh + + - name: Test spoken language identification (C++ API) shell: bash run: | export PATH=$PWD/build/bin/Release:$PATH diff --git a/.github/workflows/windows-x86.yaml b/.github/workflows/windows-x86.yaml index b701b8c0c..be0ddc8dd 100644 --- a/.github/workflows/windows-x86.yaml +++ b/.github/workflows/windows-x86.yaml @@ -69,6 +69,14 @@ jobs: ls -lh ./bin/Release/sherpa-onnx.exe + - name: Test spoken language identification (C API) + shell: bash + run: | + export PATH=$PWD/build/bin/Release:$PATH + export SLID_EXE=spoken-language-identification-c-api.exe + + .github/scripts/test-c-api.sh + # - name: Test spoken language identification # shell: bash # run: | diff --git a/.gitignore b/.gitignore index 06834c3c6..9700e5e47 100644 --- a/.gitignore +++ b/.gitignore @@ -85,3 +85,4 @@ log vits-piper-* vits-coqui-* vits-mms-* +*.tar.bz2 diff --git a/c-api-examples/CMakeLists.txt b/c-api-examples/CMakeLists.txt index 478dd8ee8..f2f9ea6dc 100644 --- a/c-api-examples/CMakeLists.txt +++ b/c-api-examples/CMakeLists.txt @@ -7,8 +7,11 @@ target_link_libraries(decode-file-c-api sherpa-onnx-c-api cargs) add_executable(offline-tts-c-api offline-tts-c-api.c) target_link_libraries(offline-tts-c-api sherpa-onnx-c-api cargs) +add_executable(spoken-language-identification-c-api spoken-language-identification-c-api.c) +target_link_libraries(spoken-language-identification-c-api sherpa-onnx-c-api) + if(SHERPA_ONNX_HAS_ALSA) add_subdirectory(./asr-microphone-example) -else() +elseif((UNIX AND NOT APPLE) OR LINUX) message(WARNING "Not include ./asr-microphone-example since alsa is not available") endif() diff --git a/c-api-examples/Makefile b/c-api-examples/Makefile index 7e9f2a916..3e2931424 100644 --- a/c-api-examples/Makefile +++ b/c-api-examples/Makefile @@ -4,7 +4,7 @@ CUR_DIR :=$(shell pwd) CFLAGS := -I ../ -I ../build/_deps/cargs-src/include/ LDFLAGS := -L ../build/lib LDFLAGS += -L ../build/_deps/onnxruntime-src/lib -LDFLAGS += -lsherpa-onnx-c-api -lsherpa-onnx-core -lonnxruntime -lkaldi-native-fbank-core -lkaldi-decoder-core -lsherpa-onnx-kaldifst-core -lsherpa-onnx-fst -lcargs +LDFLAGS += -lsherpa-onnx-c-api -lsherpa-onnx-core -lkaldi-decoder-core -lsherpa-onnx-kaldifst-core -lsherpa-onnx-fst -lkaldi-native-fbank-core -lpiper_phonemize -lespeak-ng -lucd -lcargs -lonnxruntime LDFLAGS += -framework Foundation LDFLAGS += -lc++ LDFLAGS += -Wl,-rpath,${CUR_DIR}/../build/lib diff --git a/c-api-examples/decode-file-c-api.c b/c-api-examples/decode-file-c-api.c index 46cb11a81..c7ea3bfb6 100644 --- a/c-api-examples/decode-file-c-api.c +++ b/c-api-examples/decode-file-c-api.c @@ -169,55 +169,56 @@ int32_t main(int32_t argc, char *argv[]) { int32_t segment_id = 0; const char *wav_filename = argv[context.index]; - FILE *fp = fopen(wav_filename, "rb"); - if (!fp) { - fprintf(stderr, "Failed to open %s\n", wav_filename); + const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename); + if (wave == NULL) { + fprintf(stderr, "Failed to read %s\n", wav_filename); return -1; } - - // Assume the wave header occupies 44 bytes. - fseek(fp, 44, SEEK_SET); - // simulate streaming #define N 3200 // 0.2 s. Sample rate is fixed to 16 kHz int16_t buffer[N]; float samples[N]; + fprintf(stderr, "sample rate: %d, num samples: %d, duration: %.2f s\n", + wave->sample_rate, wave->num_samples, + (float)wave->num_samples / wave->sample_rate); + + int32_t k = 0; + while (k < wave->num_samples) { + int32_t start = k; + int32_t end = + (start + N > wave->num_samples) ? wave->num_samples : (start + N); + k += N; + + AcceptWaveform(stream, wave->sample_rate, wave->samples + start, + end - start); + while (IsOnlineStreamReady(recognizer, stream)) { + DecodeOnlineStream(recognizer, stream); + } - while (!feof(fp)) { - size_t n = fread((void *)buffer, sizeof(int16_t), N, fp); - if (n > 0) { - for (size_t i = 0; i != n; ++i) { - samples[i] = buffer[i] / 32768.; - } - AcceptWaveform(stream, 16000, samples, n); - while (IsOnlineStreamReady(recognizer, stream)) { - DecodeOnlineStream(recognizer, stream); - } + const SherpaOnnxOnlineRecognizerResult *r = + GetOnlineStreamResult(recognizer, stream); - const SherpaOnnxOnlineRecognizerResult *r = - GetOnlineStreamResult(recognizer, stream); + if (strlen(r->text)) { + SherpaOnnxPrint(display, segment_id, r->text); + } + if (IsEndpoint(recognizer, stream)) { if (strlen(r->text)) { - SherpaOnnxPrint(display, segment_id, r->text); + ++segment_id; } - - if (IsEndpoint(recognizer, stream)) { - if (strlen(r->text)) { - ++segment_id; - } - Reset(recognizer, stream); - } - - DestroyOnlineRecognizerResult(r); + Reset(recognizer, stream); } + + DestroyOnlineRecognizerResult(r); } - fclose(fp); // add some tail padding float tail_paddings[4800] = {0}; // 0.3 seconds at 16 kHz sample rate - AcceptWaveform(stream, 16000, tail_paddings, 4800); + AcceptWaveform(stream, wave->sample_rate, tail_paddings, 4800); + + SherpaOnnxFreeWave(wave); InputFinished(stream); while (IsOnlineStreamReady(recognizer, stream)) { diff --git a/c-api-examples/spoken-language-identification-c-api.c b/c-api-examples/spoken-language-identification-c-api.c new file mode 100644 index 000000000..b99113037 --- /dev/null +++ b/c-api-examples/spoken-language-identification-c-api.c @@ -0,0 +1,65 @@ + +// We assume you have pre-downloaded the whisper multi-lingual models +// from https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models +// An example command to download the "tiny" whisper model is given below: +// +// clang-format off +// +// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.tar.bz2 +// tar xvf sherpa-onnx-whisper-tiny.tar.bz2 +// rm sherpa-onnx-whisper-tiny.tar.bz2 +// +// clang-format on + +#include +#include +#include + +#include "sherpa-onnx/c-api/c-api.h" + +int32_t main() { + SherpaOnnxSpokenLanguageIdentificationConfig config; + + memset(&config, 0, sizeof(config)); + + config.whisper.encoder = "./sherpa-onnx-whisper-tiny/tiny-encoder.int8.onnx"; + config.whisper.decoder = "./sherpa-onnx-whisper-tiny/tiny-decoder.int8.onnx"; + config.num_threads = 1; + config.debug = 1; + config.provider = "cpu"; + + const SherpaOnnxSpokenLanguageIdentification *slid = + SherpaOnnxCreateSpokenLanguageIdentification(&config); + if (!slid) { + fprintf(stderr, "Failed to create spoken language identifier"); + return -1; + } + + // You can find more test waves from + // https://hf-mirror.com/spaces/k2-fsa/spoken-language-identification/tree/main/test_wavs + const char *wav_filename = "./sherpa-onnx-whisper-tiny/test_wavs/0.wav"; + const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename); + if (wave == NULL) { + fprintf(stderr, "Failed to read %s\n", wav_filename); + return -1; + } + + SherpaOnnxOfflineStream *stream = + SherpaOnnxSpokenLanguageIdentificationCreateOfflineStream(slid); + + AcceptWaveformOffline(stream, wave->sample_rate, wave->samples, + wave->num_samples); + + const SherpaOnnxSpokenLanguageIdentificationResult *result = + SherpaOnnxSpokenLanguageIdentificationCompute(slid, stream); + + fprintf(stderr, "wav_filename: %s\n", wav_filename); + fprintf(stderr, "Detected language: %s\n", result->lang); + + SherpaOnnxDestroySpokenLanguageIdentificationResult(result); + DestroyOfflineStream(stream); + SherpaOnnxFreeWave(wave); + SherpaOnnxDestroySpokenLanguageIdentification(slid); + + return 0; +} diff --git a/dotnet-examples/offline-decode-files/run-hotwords.sh b/dotnet-examples/offline-decode-files/run-hotwords.sh index de17344f0..156160964 100755 --- a/dotnet-examples/offline-decode-files/run-hotwords.sh +++ b/dotnet-examples/offline-decode-files/run-hotwords.sh @@ -3,7 +3,7 @@ set -ex if [ ! -d ./sherpa-onnx-zipformer-en-2023-04-01 ]; then - wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-en-2023-04-01.tar.bz2 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-en-2023-04-01.tar.bz2 tar xvf sherpa-onnx-zipformer-en-2023-04-01.tar.bz2 rm sherpa-onnx-zipformer-en-2023-04-01.tar.bz2 fi diff --git a/dotnet-examples/offline-decode-files/run-zipformer.sh b/dotnet-examples/offline-decode-files/run-zipformer.sh index 4c69067a7..f700ee937 100755 --- a/dotnet-examples/offline-decode-files/run-zipformer.sh +++ b/dotnet-examples/offline-decode-files/run-zipformer.sh @@ -3,7 +3,7 @@ set -ex if [ ! -d ./sherpa-onnx-zipformer-en-2023-04-01 ]; then - wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-en-2023-04-01.tar.bz2 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-en-2023-04-01.tar.bz2 tar xvf sherpa-onnx-zipformer-en-2023-04-01.tar.bz2 rm sherpa-onnx-zipformer-en-2023-04-01.tar.bz2 fi diff --git a/dotnet-examples/online-decode-files/run-transducer.sh b/dotnet-examples/online-decode-files/run-transducer.sh index ad98af13a..b99fc1e53 100755 --- a/dotnet-examples/online-decode-files/run-transducer.sh +++ b/dotnet-examples/online-decode-files/run-transducer.sh @@ -6,7 +6,7 @@ set -ex if [ ! -d ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 ]; then - wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 fi diff --git a/sherpa-onnx/c-api/c-api.cc b/sherpa-onnx/c-api/c-api.cc index a064604ab..c0c60f33a 100644 --- a/sherpa-onnx/c-api/c-api.cc +++ b/sherpa-onnx/c-api/c-api.cc @@ -6,6 +6,7 @@ #include #include +#include #include #include @@ -16,7 +17,9 @@ #include "sherpa-onnx/csrc/offline-recognizer.h" #include "sherpa-onnx/csrc/offline-tts.h" #include "sherpa-onnx/csrc/online-recognizer.h" +#include "sherpa-onnx/csrc/spoken-language-identification.h" #include "sherpa-onnx/csrc/voice-activity-detector.h" +#include "sherpa-onnx/csrc/wave-reader.h" #include "sherpa-onnx/csrc/wave-writer.h" struct SherpaOnnxOnlineRecognizer { @@ -859,3 +862,97 @@ int32_t SherpaOnnxWriteWave(const float *samples, int32_t n, int32_t sample_rate, const char *filename) { return sherpa_onnx::WriteWave(filename, sample_rate, samples, n); } + +const SherpaOnnxWave *SherpaOnnxReadWave(const char *filename) { + int32_t sample_rate = -1; + bool is_ok = false; + std::vector samples = + sherpa_onnx::ReadWave(filename, &sample_rate, &is_ok); + if (!is_ok) { + return nullptr; + } + + float *c_samples = new float[samples.size()]; + std::copy(samples.begin(), samples.end(), c_samples); + + SherpaOnnxWave *wave = new SherpaOnnxWave; + wave->samples = c_samples; + wave->sample_rate = sample_rate; + wave->num_samples = samples.size(); + return wave; +} + +void SherpaOnnxFreeWave(const SherpaOnnxWave *wave) { + if (wave) { + delete[] wave->samples; + delete wave; + } +} + +struct SherpaOnnxSpokenLanguageIdentification { + std::unique_ptr impl; +}; + +const SherpaOnnxSpokenLanguageIdentification * +SherpaOnnxCreateSpokenLanguageIdentification( + const SherpaOnnxSpokenLanguageIdentificationConfig *config) { + sherpa_onnx::SpokenLanguageIdentificationConfig slid_config; + slid_config.whisper.encoder = SHERPA_ONNX_OR(config->whisper.encoder, ""); + slid_config.whisper.decoder = SHERPA_ONNX_OR(config->whisper.decoder, ""); + slid_config.whisper.tail_paddings = + SHERPA_ONNX_OR(config->whisper.tail_paddings, -1); + slid_config.num_threads = SHERPA_ONNX_OR(config->num_threads, 1); + slid_config.debug = config->debug; + slid_config.provider = SHERPA_ONNX_OR(config->provider, "cpu"); + + if (slid_config.debug) { + SHERPA_ONNX_LOGE("%s\n", slid_config.ToString().c_str()); + } + + if (!slid_config.Validate()) { + SHERPA_ONNX_LOGE("Errors in config"); + return nullptr; + } + + SherpaOnnxSpokenLanguageIdentification *slid = + new SherpaOnnxSpokenLanguageIdentification; + slid->impl = + std::make_unique(slid_config); + + return slid; +} + +void SherpaOnnxDestroySpokenLanguageIdentification( + const SherpaOnnxSpokenLanguageIdentification *slid) { + delete slid; +} + +SherpaOnnxOfflineStream * +SherpaOnnxSpokenLanguageIdentificationCreateOfflineStream( + const SherpaOnnxSpokenLanguageIdentification *slid) { + SherpaOnnxOfflineStream *stream = + new SherpaOnnxOfflineStream(slid->impl->CreateStream()); + return stream; +} + +const SherpaOnnxSpokenLanguageIdentificationResult * +SherpaOnnxSpokenLanguageIdentificationCompute( + const SherpaOnnxSpokenLanguageIdentification *slid, + const SherpaOnnxOfflineStream *s) { + std::string lang = slid->impl->Compute(s->impl.get()); + char *c_lang = new char[lang.size() + 1]; + std::copy(lang.begin(), lang.end(), c_lang); + c_lang[lang.size()] = '\0'; + SherpaOnnxSpokenLanguageIdentificationResult *r = + new SherpaOnnxSpokenLanguageIdentificationResult; + r->lang = c_lang; + return r; +} + +void SherpaOnnxDestroySpokenLanguageIdentificationResult( + const SherpaOnnxSpokenLanguageIdentificationResult *r) { + if (r) { + delete[] r->lang; + delete r; + } +} diff --git a/sherpa-onnx/c-api/c-api.h b/sherpa-onnx/c-api/c-api.h index 5b5dd803e..570cb8e81 100644 --- a/sherpa-onnx/c-api/c-api.h +++ b/sherpa-onnx/c-api/c-api.h @@ -820,6 +820,76 @@ SHERPA_ONNX_API int32_t SherpaOnnxWriteWave(const float *samples, int32_t n, int32_t sample_rate, const char *filename); +SHERPA_ONNX_API typedef struct SherpaOnnxWave { + // samples normalized to the range [-1, 1] + const float *samples; + int32_t sample_rate; + int32_t num_samples; +} SherpaOnnxWave; + +// Return a NULL pointer on error. It supports only standard WAVE file. +// Each sample should be 16-bit. It supports only single channel.. +// +// If the returned pointer is not NULL, the user has to invoke +// SherpaOnnxFreeWave() to free the returned pointer to avoid memory leak. +SHERPA_ONNX_API const SherpaOnnxWave *SherpaOnnxReadWave(const char *filename); + +SHERPA_ONNX_API void SherpaOnnxFreeWave(const SherpaOnnxWave *wave); + +// Spoken language identification + +SHERPA_ONNX_API typedef struct + SherpaOnnxSpokenLanguageIdentificationWhisperConfig { + const char *encoder; + const char *decoder; + int32_t tail_paddings; +} SherpaOnnxSpokenLanguageIdentificationWhisperConfig; + +SHERPA_ONNX_API typedef struct SherpaOnnxSpokenLanguageIdentificationConfig { + SherpaOnnxSpokenLanguageIdentificationWhisperConfig whisper; + int32_t num_threads; + int32_t debug; + const char *provider; +} SherpaOnnxSpokenLanguageIdentificationConfig; + +SHERPA_ONNX_API typedef struct SherpaOnnxSpokenLanguageIdentification + SherpaOnnxSpokenLanguageIdentification; + +// Create an instance of SpokenLanguageIdentification. +// The user has to invoke SherpaOnnxDestroySpokenLanguageIdentification() +// to free the returned pointer to avoid memory leak. +SHERPA_ONNX_API const SherpaOnnxSpokenLanguageIdentification * +SherpaOnnxCreateSpokenLanguageIdentification( + const SherpaOnnxSpokenLanguageIdentificationConfig *config); + +SHERPA_ONNX_API void SherpaOnnxDestroySpokenLanguageIdentification( + const SherpaOnnxSpokenLanguageIdentification *slid); + +// The user has to invoke DestroyOfflineStream() +// to free the returned pointer to avoid memory leak +SHERPA_ONNX_API SherpaOnnxOfflineStream * +SherpaOnnxSpokenLanguageIdentificationCreateOfflineStream( + const SherpaOnnxSpokenLanguageIdentification *slid); + +SHERPA_ONNX_API typedef struct SherpaOnnxSpokenLanguageIdentificationResult { + // en for English + // de for German + // zh for Chinese + // es for Spanish + // ... + const char *lang; +} SherpaOnnxSpokenLanguageIdentificationResult; + +// The user has to invoke SherpaOnnxDestroySpokenLanguageIdentificationResult() +// to free the returned pointer to avoid memory leak +SHERPA_ONNX_API const SherpaOnnxSpokenLanguageIdentificationResult * +SherpaOnnxSpokenLanguageIdentificationCompute( + const SherpaOnnxSpokenLanguageIdentification *slid, + const SherpaOnnxOfflineStream *s); + +SHERPA_ONNX_API void SherpaOnnxDestroySpokenLanguageIdentificationResult( + const SherpaOnnxSpokenLanguageIdentificationResult *r); + #if defined(__GNUC__) #pragma GCC diagnostic pop #endif