Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add VAD + Non-streaming ASR example for JavaScript API. #1170

Merged
merged 1 commit into from
Jul 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions .github/scripts/test-nodejs-addon-npm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,19 @@ arch=$(node -p "require('os').arch()")
platform=$(node -p "require('os').platform()")
node_version=$(node -p "process.versions.node.split('.')[0]")

echo "----------non-streaming asr + vad----------"
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
rm sherpa-onnx-whisper-tiny.en.tar.bz2

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx

node ./test_vad_with_non_streaming_asr_whisper.js
rm -rf sherpa-onnx-whisper*
rm *.wav
rm *.onnx

echo "----------asr----------"

if [[ $arch != "ia32" && $platform != "win32" ]]; then
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -112,3 +112,4 @@ sherpa-onnx-telespeech-ctc-*
.ccache
lib*.a
sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17
*.bak
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
## 1.10.18

* Fix the case when recognition results contain the symbol `"`. It caused
issues when converting results to a json string.

## 1.10.17

* Support SenseVoice CTC models.
Expand Down
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ project(sherpa-onnx)
# ./nodejs-addon-examples
# ./dart-api-examples/
# ./CHANGELOG.md
set(SHERPA_ONNX_VERSION "1.10.17")
set(SHERPA_ONNX_VERSION "1.10.18")

# Disable warning about
#
Expand Down
2 changes: 1 addition & 1 deletion dart-api-examples/keyword-spotter/pubspec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ environment:
sdk: ^3.4.0

dependencies:
sherpa_onnx: ^1.10.17
sherpa_onnx: ^1.10.18
# sherpa_onnx:
# path: ../../flutter/sherpa_onnx
path: ^1.9.0
Expand Down
2 changes: 1 addition & 1 deletion dart-api-examples/non-streaming-asr/pubspec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ environment:

# Add regular dependencies here.
dependencies:
sherpa_onnx: ^1.10.17
sherpa_onnx: ^1.10.18
path: ^1.9.0
args: ^2.5.0

Expand Down
2 changes: 1 addition & 1 deletion dart-api-examples/streaming-asr/pubspec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ environment:

# Add regular dependencies here.
dependencies:
sherpa_onnx: ^1.10.17
sherpa_onnx: ^1.10.18
path: ^1.9.0
args: ^2.5.0

Expand Down
2 changes: 1 addition & 1 deletion dart-api-examples/tts/pubspec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ environment:

# Add regular dependencies here.
dependencies:
sherpa_onnx: ^1.10.17
sherpa_onnx: ^1.10.18
path: ^1.9.0
args: ^2.5.0

Expand Down
2 changes: 1 addition & 1 deletion dart-api-examples/vad/pubspec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ environment:
sdk: ^3.4.0

dependencies:
sherpa_onnx: ^1.10.17
sherpa_onnx: ^1.10.18
path: ^1.9.0
args: ^2.5.0

Expand Down
4 changes: 2 additions & 2 deletions flutter-examples/streaming_asr/pubspec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ description: >

publish_to: 'none'

version: 1.10.17
version: 1.10.18

topics:
- speech-recognition
Expand All @@ -30,7 +30,7 @@ dependencies:
record: ^5.1.0
url_launcher: ^6.2.6

sherpa_onnx: ^1.10.17
sherpa_onnx: ^1.10.18
# sherpa_onnx:
# path: ../../flutter/sherpa_onnx

Expand Down
4 changes: 2 additions & 2 deletions flutter-examples/tts/pubspec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ description: >

publish_to: 'none' # Remove this line if you wish to publish to pub.dev

version: 1.10.17
version: 1.10.18

environment:
sdk: '>=3.4.0 <4.0.0'
Expand All @@ -17,7 +17,7 @@ dependencies:
cupertino_icons: ^1.0.6
path_provider: ^2.1.3
path: ^1.9.0
sherpa_onnx: ^1.10.17
sherpa_onnx: ^1.10.18
url_launcher: ^6.2.6
audioplayers: ^5.0.0

Expand Down
12 changes: 6 additions & 6 deletions flutter/sherpa_onnx/pubspec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ topics:
- voice-activity-detection

# remember to change the version in ../sherpa_onnx_macos/macos/sherpa_onnx_macos.podspec
version: 1.10.17
version: 1.10.18

homepage: https://github.com/k2-fsa/sherpa-onnx

Expand All @@ -30,23 +30,23 @@ dependencies:
flutter:
sdk: flutter

sherpa_onnx_android: ^1.10.17
sherpa_onnx_android: ^1.10.18
# sherpa_onnx_android:
# path: ../sherpa_onnx_android

sherpa_onnx_macos: ^1.10.17
sherpa_onnx_macos: ^1.10.18
# sherpa_onnx_macos:
# path: ../sherpa_onnx_macos

sherpa_onnx_linux: ^1.10.17
sherpa_onnx_linux: ^1.10.18
# sherpa_onnx_linux:
# path: ../sherpa_onnx_linux
#
sherpa_onnx_windows: ^1.10.17
sherpa_onnx_windows: ^1.10.18
# sherpa_onnx_windows:
# path: ../sherpa_onnx_windows

sherpa_onnx_ios: ^1.10.17
sherpa_onnx_ios: ^1.10.18
# sherpa_onnx_ios:
# path: ../sherpa_onnx_ios

Expand Down
2 changes: 1 addition & 1 deletion flutter/sherpa_onnx_ios/ios/sherpa_onnx_ios.podspec
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
# https://groups.google.com/g/dart-ffi/c/nUATMBy7r0c
Pod::Spec.new do |s|
s.name = 'sherpa_onnx_ios'
s.version = '1.10.17'
s.version = '1.10.18'
s.summary = 'A new Flutter FFI plugin project.'
s.description = <<-DESC
A new Flutter FFI plugin project.
Expand Down
2 changes: 1 addition & 1 deletion flutter/sherpa_onnx_macos/macos/sherpa_onnx_macos.podspec
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#
Pod::Spec.new do |s|
s.name = 'sherpa_onnx_macos'
s.version = '1.10.17'
s.version = '1.10.18'
s.summary = 'sherpa-onnx Flutter FFI plugin project.'
s.description = <<-DESC
sherpa-onnx Flutter FFI plugin project.
Expand Down
16 changes: 15 additions & 1 deletion nodejs-addon-examples/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ The following tables list the examples in this folder.
|---|---|
|[./test_asr_non_streaming_transducer.js](./test_asr_non_streaming_transducer.js)|Non-streaming speech recognition from a file with a Zipformer transducer model|
|[./test_asr_non_streaming_whisper.js](./test_asr_non_streaming_whisper.js)| Non-streaming speech recognition from a file using [Whisper](https://github.com/openai/whisper)|
|[./test_vad_with_non_streaming_asr_whisper.js](./test_vad_with_non_streaming_asr_whisper.js)| Non-streaming speech recognition from a file using [Whisper](https://github.com/openai/whisper) + [Silero VAD](https://github.com/snakers4/silero-vad)|
|[./test_asr_non_streaming_nemo_ctc.js](./test_asr_non_streaming_nemo_ctc.js)|Non-streaming speech recognition from a file using a [NeMo](https://github.com/NVIDIA/NeMo) CTC model with greedy search|
|[./test_asr_non_streaming_paraformer.js](./test_asr_non_streaming_paraformer.js)|Non-streaming speech recognition from a file using [Paraformer](https://github.com/alibaba-damo-academy/FunASR)|
|[./test_asr_non_streaming_sense_voice.js](./test_asr_non_streaming_sense_voice.js)|Non-streaming speech recognition from a file using [SenseVoice](https://github.com/FunAudioLLM/SenseVoice)|
Expand Down Expand Up @@ -221,11 +222,24 @@ rm sherpa-onnx-whisper-tiny.en.tar.bz2

node ./test_asr_non_streaming_whisper.js

# To run VAD + non-streaming ASR with Paraformer using a microphone
# To run VAD + non-streaming ASR with Whisper using a microphone
npm install naudiodon2
node ./test_vad_asr_non_streaming_whisper_microphone.js
```

### Non-streaming speech recognition with Whisper + VAD

```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
rm sherpa-onnx-whisper-tiny.en.tar.bz2

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx

node ./test_vad_with_non_streaming_asr_whisper.js
```

### Non-streaming speech recognition with NeMo CTC models

```bash
Expand Down
2 changes: 1 addition & 1 deletion nodejs-addon-examples/package.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"dependencies": {
"sherpa-onnx-node": "^1.10.17"
"sherpa-onnx-node": "^1.10.18"
}
}
127 changes: 127 additions & 0 deletions nodejs-addon-examples/test_vad_with_non_streaming_asr_whisper.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
// Copyright (c) 2023-2024 Xiaomi Corporation (authors: Fangjun Kuang)

const sherpa_onnx = require('sherpa-onnx-node');

function createRecognizer() {
// Please download test files from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
const config = {
'featConfig': {
'sampleRate': 16000,
'featureDim': 80,
},
'modelConfig': {
'whisper': {
'encoder': './sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx',
'decoder': './sherpa-onnx-whisper-tiny.en/tiny.en-decoder.int8.onnx',
},
'tokens': './sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt',
'numThreads': 2,
'provider': 'cpu',
'debug': 1,
}
};

return new sherpa_onnx.OfflineRecognizer(config);
}

function createVad() {
// please download silero_vad.onnx from
// https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
const config = {
sileroVad: {
model: './silero_vad.onnx',
threshold: 0.5,
minSpeechDuration: 0.25,
minSilenceDuration: 0.5,
windowSize: 512,
},
sampleRate: 16000,
debug: true,
numThreads: 1,
};

const bufferSizeInSeconds = 60;

return new sherpa_onnx.Vad(config, bufferSizeInSeconds);
}

const recognizer = createRecognizer();
const vad = createVad();

// please download ./Obama.wav from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
const waveFilename = './Obama.wav';
const wave = sherpa_onnx.readWave(waveFilename);

if (wave.sampleRate != recognizer.config.featConfig.sampleRate) {
throw new Error(
'Expected sample rate: ${recognizer.config.featConfig.sampleRate}. Given: ${wave.sampleRate}');
}

console.log('Started')
let start = Date.now();

const windowSize = vad.config.sileroVad.windowSize;
for (let i = 0; i < wave.samples.length; i += windowSize) {
const thisWindow = wave.samples.subarray(i, i + windowSize);
vad.acceptWaveform(thisWindow);

while (!vad.isEmpty()) {
const segment = vad.front();
vad.pop();

let start_time = segment.start / wave.sampleRate;
let end_time = start_time + segment.samples.length / wave.sampleRate;

start_time = start_time.toFixed(2);
end_time = end_time.toFixed(2);

const stream = recognizer.createStream();
stream.acceptWaveform(
{samples: segment.samples, sampleRate: wave.sampleRate});

recognizer.decode(stream);
const r = recognizer.getResult(stream);
if (r.text.length > 0) {
const text = r.text.toLowerCase().trim();
console.log(`${start_time} -- ${end_time}: ${text}`);
}
}
}

vad.flush();

while (!vad.isEmpty()) {
const segment = vad.front();
vad.pop();

let start_time = segment.start / wave.sampleRate;
let end_time = start_time + segment.samples.length / wave.sampleRate;

start_time = start_time.toFixed(2);
end_time = end_time.toFixed(2);

const stream = recognizer.createStream();
stream.acceptWaveform(
{samples: segment.samples, sampleRate: wave.sampleRate});

recognizer.decode(stream);
const r = recognizer.getResult(stream);
if (r.text.length > 0) {
const text = r.text.toLowerCase().trim();
console.log(`${start_time} -- ${end_time}: ${text}`);
}
}

let stop = Date.now();
console.log('Done')

const elapsed_seconds = (stop - start) / 1000;
const duration = wave.samples.length / wave.sampleRate;
const real_time_factor = elapsed_seconds / duration;
console.log('Wave duration', duration.toFixed(3), 'secodns')
console.log('Elapsed', elapsed_seconds.toFixed(3), 'secodns')
console.log(
`RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
real_time_factor.toFixed(3))
2 changes: 1 addition & 1 deletion scripts/dart/kws-pubspec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ environment:
sdk: ^3.4.0

dependencies:
# sherpa_onnx: ^1.10.17
# sherpa_onnx: ^1.10.18
sherpa_onnx:
path: ../../flutter/sherpa_onnx
path: ^1.9.0
Expand Down
2 changes: 1 addition & 1 deletion scripts/dart/sherpa-onnx-pubspec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ topics:
- voice-activity-detection

# remember to change the version in ../sherpa_onnx_macos/macos/sherpa_onnx.podspec
version: 1.10.17
version: 1.10.18

homepage: https://github.com/k2-fsa/sherpa-onnx

Expand Down
6 changes: 3 additions & 3 deletions scripts/node-addon-api/lib/vad.js
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ config = {
}

clear() {
addon.VoiceActivityDetectorClearWrapper(this.handle);
addon.voiceActivityDetectorClear(this.handle);
}

/*
Expand All @@ -79,11 +79,11 @@ config = {
}

reset() {
addon.VoiceActivityDetectorResetWrapper(this.handle);
addon.voiceActivityDetectorReset(this.handle);
}

flush() {
addon.VoiceActivityDetectorFlushWrapper(this.handle);
addon.voiceActivityDetectorFlush(this.handle);
}
}

Expand Down
Loading
Loading