diff --git a/.github/workflows/lazarus.yaml b/.github/workflows/lazarus.yaml new file mode 100644 index 000000000..2b2bf8980 --- /dev/null +++ b/.github/workflows/lazarus.yaml @@ -0,0 +1,369 @@ +name: lazarus + +on: + push: + branches: + - master + paths: + - '.github/workflows/lazarus.yaml' + - 'CMakeLists.txt' + - 'cmake/**' + - 'lazarus-examples/**' + - 'sherpa-onnx/csrc/*' + - 'sherpa-onnx/c-api/*' + - 'sherpa-onnx/pascal-api/*' + pull_request: + branches: + - master + paths: + - '.github/workflows/lazarus.yaml' + - 'CMakeLists.txt' + - 'cmake/**' + - 'lazarus-examples/**' + - 'sherpa-onnx/csrc/*' + - 'sherpa-onnx/c-api/*' + - 'sherpa-onnx/pascal-api/*' + + workflow_dispatch: + +concurrency: + group: lazarus-${{ github.ref }} + cancel-in-progress: true + +permissions: + contents: read + +jobs: + build: + name: ${{ matrix.os }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-20.04, macos-latest, macos-13, windows-latest] + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2 + with: + key: ${{ matrix.os }} + + # See https://github.com/gcarreno/setup-lazarus + - uses: gcarreno/setup-lazarus@v3 + with: + lazarus-version: "stable" + with-cache: true + + - name: Lazarus info + shell: bash + run: | + which lazbuild + lazbuild --help + + - name: FPC info + shell: bash + run: | + which fpc + fpc -i + + - name: OS info + shell: bash + run: | + uname -a + + - name: Install patchelf for ubuntu + if: matrix.os == 'ubuntu-20.04' + shell: bash + run: | + sudo apt-get update -q + sudo apt-get install -q -y patchelf + + - name: Show Patchelf version (ubuntu) + if: matrix.os == 'ubuntu-20.04' + shell: bash + run: | + patchelf --version + patchelf --help + which patchelf + + - name: Configure CMake + shell: bash + run: | + export CMAKE_CXX_COMPILER_LAUNCHER=ccache + export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH" + cmake --version + + mkdir build + cd build + os=${{ matrix.os }} + + if [[ $os == 'windows-latest' || $os == 'ubuntu-20.04' ]]; then + BUILD_SHARED_LIBS=ON + else + BUILD_SHARED_LIBS=OFF + fi + + cmake \ + -DCMAKE_INSTALL_PREFIX=./install \ + -D BUILD_SHARED_LIBS=$BUILD_SHARED_LIBS \ + -D SHERPA_ONNX_ENABLE_BINARY=OFF \ + -D CMAKE_BUILD_TYPE=Release \ + .. + + - name: Build sherpa-onnx + shell: bash + run: | + export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH" + + cd build + cmake --build . --target install --config Release -j 2 + + ls -lh install/lib/ + + cp -v install/lib/*.dll ../lazarus-examples/generate_subtitles/ || true + cp -v install/lib/*.so* ../lazarus-examples/generate_subtitles/ || true + + - name: Build generating subtitles + shell: bash + run: | + cd lazarus-examples/generate_subtitles + os=${{ matrix.os }} + if [[ $os == macos-13 ]]; then + lazbuild --verbose --build-mode=Release --widgetset=cocoa ./generate_subtitles.lpi + elif [[ $os == macos-latest ]]; then + lazbuild --verbose --build-mode=Release --widgetset=cocoa --cpu=aarch64 ./generate_subtitles.lpi + elif [[ $os == 'ubuntu-20.04' ]]; then + lazbuild --verbose --build-mode=Release-Linux ./generate_subtitles.lpi + else + lazbuild --verbose --build-mode=Release ./generate_subtitles.lpi + fi + + - name: Display generating subtitles + shell: bash + run: | + cd lazarus-examples/generate_subtitles + ls -lh + + - name: Collect generating subtitles (Ubuntu) + if: matrix.os == 'ubuntu-20.04' + shell: bash + run: | + SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) + cd lazarus-examples/generate_subtitles + ls -lh + readelf -d ./generate_subtitles + echo '----------' + ldd ./generate_subtitles + + d=generate_subtitles-linux-x64-$SHERPA_ONNX_VERSION + echo "---before running patchelf---" + readelf -d ./generate_subtitles + + patchelf --set-rpath '$ORIGIN' ./generate_subtitles + + echo "---after running patchelf---" + readelf -d ./generate_subtitles + + mkdir -p $d + cp -v ./generate_subtitles $d/ + cp -v *.so $d/ + + mv -v $d /tmp/linux-x64 + + ls -lh /tmp/linux-x64 + + - name: Collect generating subtitles (windows) + if: matrix.os == 'windows-latest' + shell: bash + run: | + SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) + cd lazarus-examples/generate_subtitles + ls -lh + + d=generate-subtitles-windows-x64-$SHERPA_ONNX_VERSION + mkdir -p $d + cp -v ./generate_subtitles.exe $d/ + cp -v onnxruntime.dll $d/ + cp -v sherpa-onnx-c-api.dll $d/ + mv $d ../../windows-x64 + cd ../.. + + ls -lh windows-x64 + + - name: Collect generating subtitles (macos) + if: matrix.os == 'macos-13' || matrix.os == 'macos-latest' + shell: bash + run: | + SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) + cd lazarus-examples/generate_subtitles + ls -lh + file ./generate_subtitles + echo '----------' + otool -L ./generate_subtitles + rm -v generate_subtitles.app/Contents/MacOS/generate_subtitles + cp -v ./generate_subtitles generate_subtitles.app/Contents/MacOS/generate_subtitles + chmod +x generate_subtitles.app/Contents/MacOS/generate_subtitles + + if [[ ${{ matrix.os }} == 'macos-latest' ]]; then + mv generate_subtitles.app /tmp/macos-arm64 + else + mv generate_subtitles.app /tmp/macos-x64 + d=generate-subtitles-macos-x64-$SHERPA_ONNX_VERSION.app + fi + + ls -lh /tmp + echo "---" + ls -lh /tmp/macos-* + + - uses: actions/upload-artifact@v4 + if: matrix.os == 'ubuntu-20.04' + with: + name: linux-x64 + path: /tmp/linux-x64 + + - uses: actions/upload-artifact@v4 + if: matrix.os == 'macos-latest' + with: + name: macos-arm64 + path: /tmp/macos-arm64 + + - uses: actions/upload-artifact@v4 + if: matrix.os == 'macos-13' + with: + name: macos-x64 + path: /tmp/macos-x64 + + - uses: actions/upload-artifact@v4 + if: matrix.os == 'windows-latest' + with: + name: windows-x64 + path: ./windows-x64 + + release: + runs-on: ${{ matrix.os }} + needs: [build] + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest] + total: ["2"] + index: ["0", "1"] + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Retrieve artifact from windows x64 + uses: actions/download-artifact@v4 + with: + name: windows-x64 + path: /tmp/windows-x64 + + - name: Retrieve artifact from linux x64 + uses: actions/download-artifact@v4 + with: + name: linux-x64 + path: /tmp/linux-x64 + + - name: Retrieve artifact from macos x64 + uses: actions/download-artifact@v4 + with: + name: macos-x64 + path: /tmp/macos-x64 + + - name: Retrieve artifact from macos arm64 + uses: actions/download-artifact@v4 + with: + name: macos-arm64 + path: /tmp/macos-arm64 + + - name: Display build files + shell: bash + run: | + ls -lh /tmp + echo "---linux-x64---" + ls -lh /tmp/linux-x64/ + readelf -d /tmp/linux-x64/generate_subtitles + echo "---" + ldd /tmp/linux-x64/generate_subtitles + + echo "---macos-x64---" + ls -lh /tmp/macos-x64/ + mkdir -p /tmp/macos-x64/Contents/Resources + chmod +x /tmp/macos-x64/Contents/MacOS/generate_subtitles + + echo "---macos-arm64---" + ls -lh /tmp/macos-arm64/ + mkdir -p /tmp/macos-arm64/Contents/Resources + chmod +x /tmp/macos-arm64/Contents/MacOS/generate_subtitles + + echo "---windows-x64---" + ls -lh /tmp/windows-x64/ + + - name: Install Python dependencies + shell: bash + run: | + python3 -m pip install --upgrade pip jinja2 + + - name: Generate build script + shell: bash + run: | + cd scripts/lazarus + + total=${{ matrix.total }} + index=${{ matrix.index }} + + ./generate-subtitles.py --total $total --index $index + + chmod +x build-generate-subtitles.sh + mv -v ./build-generate-subtitles.sh ../.. + + - name: Generate tar files + shell: bash + run: | + ./build-generate-subtitles.sh + + - name: Display tar files + shell: bash + run: | + ls -lh /tmp/out + + - name: Publish to huggingface + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} + uses: nick-fields/retry@v3 + with: + max_attempts: 20 + timeout_seconds: 200 + shell: bash + command: | + git config --global user.email "csukuangfj@gmail.com" + git config --global user.name "Fangjun Kuang" + + rm -rf huggingface + export GIT_LFS_SKIP_SMUDGE=1 + export GIT_CLONE_PROTECTION_ACTIVE=false + + SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) + echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION" + + git clone https://huggingface.co/csukuangfj/sherpa-onnx-bin huggingface + cd huggingface + git fetch + git pull + git merge -m "merge remote" --ff origin main + + d=generate-subtitles/$SHERPA_ONNX_VERSION + mkdir -p $d + + cp -v /tmp/out/*.tar.bz2 $d/ + git status + git lfs track "*.tar.bz2" + git add . + git commit -m "add more files" + git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-bin main diff --git a/lazarus-examples/.gitignore b/lazarus-examples/.gitignore new file mode 100644 index 000000000..b32943f1c --- /dev/null +++ b/lazarus-examples/.gitignore @@ -0,0 +1,30 @@ +# Lazarus compiler-generated binaries (safe to delete) +*.exe +*.dll +*.so +*.dylib +*.lrs +*.res +*.compiled +*.dbg +*.ppu +*.o +*.or +*.a + +# Lazarus autogenerated files (duplicated info) +*.rst +*.rsj +*.lrt + +# Lazarus local files (user-specific info) +*.lps + +# Lazarus backups and unit output folders. +# These can be changed by user in Lazarus/project options. +backup/ +*.bak +lib/ + +# Application bundle for Mac OS +*.app/ diff --git a/lazarus-examples/generate_subtitles/.gitignore b/lazarus-examples/generate_subtitles/.gitignore new file mode 100644 index 000000000..284c87ad9 --- /dev/null +++ b/lazarus-examples/generate_subtitles/.gitignore @@ -0,0 +1,3 @@ +generate_subtitles.app +generate_subtitles +generate_subtitles.dSYM diff --git a/lazarus-examples/generate_subtitles/generate_subtitles.ico b/lazarus-examples/generate_subtitles/generate_subtitles.ico new file mode 100644 index 000000000..10c5fc1a3 Binary files /dev/null and b/lazarus-examples/generate_subtitles/generate_subtitles.ico differ diff --git a/lazarus-examples/generate_subtitles/generate_subtitles.lpi b/lazarus-examples/generate_subtitles/generate_subtitles.lpi new file mode 100644 index 000000000..014f8cd17 --- /dev/null +++ b/lazarus-examples/generate_subtitles/generate_subtitles.lpi @@ -0,0 +1,208 @@ + + + + + + + + + <Scaled Value="True"/> + <ResourceType Value="res"/> + <UseXPManifest Value="True"/> + <XPManifest> + <DpiAware Value="True"/> + </XPManifest> + <Icon Value="0"/> + </General> + <BuildModes> + <Item Name="Default" Default="True"/> + <Item Name="Debug"> + <CompilerOptions> + <Version Value="11"/> + <PathDelim Value="\"/> + <Target> + <Filename Value="generate_subtitles"/> + </Target> + <SearchPaths> + <IncludeFiles Value="$(ProjOutDir)"/> + <Libraries Value="..\..\build-static\install\lib;..\..\build\install\lib"/> + <OtherUnitFiles Value="..\..\sherpa-onnx\pascal-api"/> + <UnitOutputDirectory Value="lib\$(TargetCPU)-$(TargetOS)"/> + </SearchPaths> + <Parsing> + <SyntaxOptions> + <IncludeAssertionCode Value="True"/> + </SyntaxOptions> + </Parsing> + <CodeGeneration> + <Checks> + <IOChecks Value="True"/> + <RangeChecks Value="True"/> + <OverflowChecks Value="True"/> + <StackChecks Value="True"/> + </Checks> + <VerifyObjMethodCallValidity Value="True"/> + </CodeGeneration> + <Linking> + <Debugging> + <DebugInfoType Value="dsDwarf3"/> + <UseHeaptrc Value="True"/> + <TrashVariables Value="True"/> + <StripSymbols Value="True"/> + <UseExternalDbgSyms Value="True"/> + </Debugging> + <Options> + <Win32> + <GraphicApplication Value="True"/> + </Win32> + </Options> + </Linking> + </CompilerOptions> + </Item> + <Item Name="Release"> + <CompilerOptions> + <Version Value="11"/> + <PathDelim Value="\"/> + <Target> + <Filename Value="generate_subtitles"/> + </Target> + <SearchPaths> + <IncludeFiles Value="$(ProjOutDir)"/> + <Libraries Value="..\..\build-static\install\lib;..\..\build\install\lib"/> + <OtherUnitFiles Value="..\..\sherpa-onnx\pascal-api"/> + <UnitOutputDirectory Value="lib\$(TargetCPU)-$(TargetOS)"/> + </SearchPaths> + <CodeGeneration> + <SmartLinkUnit Value="True"/> + <Optimizations> + <OptimizationLevel Value="3"/> + </Optimizations> + </CodeGeneration> + <Linking> + <Debugging> + <GenerateDebugInfo Value="False"/> + <RunWithoutDebug Value="True"/> + <StripSymbols Value="True"/> + </Debugging> + <LinkSmart Value="True"/> + <Options> + <Win32> + <GraphicApplication Value="True"/> + </Win32> + </Options> + </Linking> + </CompilerOptions> + </Item> + <Item Name="Release-Linux"> + <CompilerOptions> + <Version Value="11"/> + <PathDelim Value="\"/> + <Target> + <Filename Value="generate_subtitles"/> + </Target> + <SearchPaths> + <IncludeFiles Value="$(ProjOutDir)"/> + <Libraries Value="..\..\build-static\install\lib;..\..\build\install\lib"/> + <OtherUnitFiles Value="..\..\sherpa-onnx\pascal-api"/> + <UnitOutputDirectory Value="lib\$(TargetCPU)-$(TargetOS)"/> + </SearchPaths> + <CodeGeneration> + <SmartLinkUnit Value="True"/> + <Optimizations> + <OptimizationLevel Value="3"/> + </Optimizations> + </CodeGeneration> + <Linking> + <Debugging> + <GenerateDebugInfo Value="False"/> + <RunWithoutDebug Value="True"/> + <StripSymbols Value="True"/> + </Debugging> + <LinkSmart Value="True"/> + <Options> + <Win32> + <GraphicApplication Value="True"/> + </Win32> + </Options> + </Linking> + <Other> + <CustomOptions Value="-dSHERPA_ONNX_USE_SHARED_LIBS"/> + </Other> + </CompilerOptions> + </Item> + </BuildModes> + <PublishOptions> + <Version Value="2"/> + <UseFileFilters Value="True"/> + </PublishOptions> + <RunParams> + <FormatVersion Value="2"/> + </RunParams> + <RequiredPackages> + <Item> + <PackageName Value="LCL"/> + </Item> + </RequiredPackages> + <Units> + <Unit> + <Filename Value="generate_subtitles.lpr"/> + <IsPartOfProject Value="True"/> + </Unit> + <Unit> + <Filename Value="unit1.pas"/> + <IsPartOfProject Value="True"/> + <ComponentName Value="Form1"/> + <HasResources Value="True"/> + <ResourceBaseClass Value="Form"/> + <UnitName Value="Unit1"/> + </Unit> + <Unit> + <Filename Value="my_worker.pas"/> + <IsPartOfProject Value="True"/> + </Unit> + </Units> + </ProjectOptions> + <CompilerOptions> + <Version Value="11"/> + <PathDelim Value="\"/> + <Target> + <Filename Value="generate_subtitles"/> + </Target> + <SearchPaths> + <IncludeFiles Value="$(ProjOutDir)"/> + <Libraries Value="..\..\build-static\install\lib;..\..\build\install\lib"/> + <OtherUnitFiles Value="..\..\sherpa-onnx\pascal-api"/> + <UnitOutputDirectory Value="lib\$(TargetCPU)-$(TargetOS)"/> + </SearchPaths> + <CodeGeneration> + <Optimizations> + <OptimizationLevel Value="2"/> + </Optimizations> + </CodeGeneration> + <Linking> + <Debugging> + <GenerateDebugInfo Value="False"/> + <DebugInfoType Value="dsDwarf3"/> + <StripSymbols Value="True"/> + </Debugging> + <Options> + <Win32> + <GraphicApplication Value="True"/> + </Win32> + </Options> + </Linking> + </CompilerOptions> + <Debugging> + <Exceptions> + <Item> + <Name Value="EAbort"/> + </Item> + <Item> + <Name Value="ECodetoolError"/> + </Item> + <Item> + <Name Value="EFOpenError"/> + </Item> + </Exceptions> + </Debugging> +</CONFIG> diff --git a/lazarus-examples/generate_subtitles/generate_subtitles.lpr b/lazarus-examples/generate_subtitles/generate_subtitles.lpr new file mode 100644 index 000000000..de4d3c206 --- /dev/null +++ b/lazarus-examples/generate_subtitles/generate_subtitles.lpr @@ -0,0 +1,26 @@ +program generate_subtitles; + +{$mode objfpc}{$H+} + +uses + {$IFDEF UNIX} + cthreads, + cmem, + {$ENDIF} + {$IFDEF HASAMIGA} + athreads, + {$ENDIF} + Interfaces, // this includes the LCL widgetset + Forms, unit1, my_worker + { you can add units after this }; + +{$R *.res} + +begin + RequireDerivedFormResource:=True; + Application.Scaled:=True; + Application.Initialize; + Application.CreateForm(TForm1, Form1); + Application.Run; +end. + diff --git a/lazarus-examples/generate_subtitles/my_worker.pas b/lazarus-examples/generate_subtitles/my_worker.pas new file mode 100644 index 000000000..921162555 --- /dev/null +++ b/lazarus-examples/generate_subtitles/my_worker.pas @@ -0,0 +1,160 @@ +unit my_worker; + +{$mode ObjFPC}{$H+} + +{ +See +https://wiki.lazarus.freepascal.org/Multithreaded_Application_Tutorial + +https://www.freepascal.org/docs-html/rtl/classes/tthread.html +} + +interface + +uses + {$IFDEF UNIX} + cthreads, + cmem, + {$ENDIF} + {$IFDEF HASAMIGA} + athreads, + {$ENDIF} + Classes, SysUtils; + +type + TMyWorkerThread = class(TThread) + private + Status: AnsiString; + StartTime: Single; + StopTime: Single; + TotalDuration: Single; + procedure ShowStatus; + procedure ShowProgress; + protected + procedure Execute; override; + public + WaveFilename: AnsiString; + Constructor Create(CreateSuspended : boolean; Filename: AnsiString); + end; + +var + MyWorkerThread: TMyWorkerThread; + +implementation + +uses + unit1, sherpa_onnx; + +constructor TMyWorkerThread.Create(CreateSuspended : boolean; Filename: AnsiString); +begin + inherited Create(CreateSuspended); + WaveFilename := Filename; + FreeOnTerminate := True; +end; + +procedure TMyWorkerThread.ShowStatus; +begin + Form1.UpdateResult(Status, StartTime, StopTime, TotalDuration); +end; + +procedure TMyWorkerThread.ShowProgress; +begin + Form1.UpdateProgress(StopTime, TotalDuration); +end; + +procedure TMyWorkerThread.Execute; +var + Wave: TSherpaOnnxWave; + WindowSize: Integer; + Offset: Integer; + SpeechSegment: TSherpaOnnxSpeechSegment; + + Duration: Single; + + + Stream: TSherpaOnnxOfflineStream; + RecognitionResult: TSherpaOnnxOfflineRecognizerResult; +begin + Wave := SherpaOnnxReadWave(WaveFilename); + TotalDuration := 0; + StartTime := 0; + StopTime := 0; + if (Wave.Samples = nil) or (Length(Wave.Samples) = 0) then + begin + Status := Format('Failed to read %s. We only support 1 channel, 16000Hz, 16-bit encoded wave files', + [Wavefilename]); + Synchronize(@ShowStatus); + + Exit; + end; + if Wave.SampleRate <> 16000 then + begin + Status := Format('Expected sample rate 16000. Given %d. Please select a new file', [Wave.SampleRate]); + Synchronize(@ShowStatus); + Exit; + end; + TotalDuration := Length(Wave.Samples) / Wave.SampleRate; + WindowSize := Form1.Vad.Config.SileroVad.WindowSize; + + Offset := 0; + Form1.Vad.Reset; + + while not Terminated and (Offset + WindowSize <= Length(Wave.Samples)) do + begin + Form1.Vad.AcceptWaveform(Wave.Samples, Offset, WindowSize); + Offset += WindowSize; + StopTime := Offset / Wave.SampleRate; + + if (Offset mod 20480) = 0 then + Synchronize(@ShowProgress); + + while not Terminated and not Form1.Vad.IsEmpty do + begin + SpeechSegment := Form1.Vad.Front; + Form1.Vad.Pop; + Stream := Form1.OfflineRecognizer.CreateStream; + + Stream.AcceptWaveform(SpeechSegment.Samples, Wave.SampleRate); + Form1.OfflineRecognizer.Decode(Stream); + RecognitionResult := Form1.OfflineRecognizer.GetResult(Stream); + + StartTime := SpeechSegment.Start / Wave.SampleRate; + Duration := Length(SpeechSegment.Samples) / Wave.SampleRate; + StopTime := StartTime + Duration; + Status := RecognitionResult.Text; + + Synchronize(@ShowStatus); + FreeAndNil(Stream); + end; + end; + + Form1.Vad.Flush; + while not Terminated and not Form1.Vad.IsEmpty do + begin + SpeechSegment := Form1.Vad.Front; + Form1.Vad.Pop; + Stream := Form1.OfflineRecognizer.CreateStream; + + Stream.AcceptWaveform(SpeechSegment.Samples, Wave.SampleRate); + Form1.OfflineRecognizer.Decode(Stream); + RecognitionResult := Form1.OfflineRecognizer.GetResult(Stream); + + StartTime := SpeechSegment.Start / Wave.SampleRate; + Duration := Length(SpeechSegment.Samples) / Wave.SampleRate; + StopTime := StartTime + Duration; + Status := RecognitionResult.Text; + + Synchronize(@ShowStatus); + FreeAndNil(Stream); + end; + + if Terminated then + Status := 'Cancelled!' + else + Status := 'DONE!'; + + Synchronize(@ShowStatus); +end; + +end. + diff --git a/lazarus-examples/generate_subtitles/unit1.lfm b/lazarus-examples/generate_subtitles/unit1.lfm new file mode 100644 index 000000000..45b11e373 --- /dev/null +++ b/lazarus-examples/generate_subtitles/unit1.lfm @@ -0,0 +1,74 @@ +object Form1: TForm1 + Left = 366 + Height = 623 + Top = 117 + Width = 852 + Caption = 'Next-gen Kaldi: Generate Subtitles' + ClientHeight = 623 + ClientWidth = 852 + OnClose = FormClose + OnCreate = FormCreate + LCLVersion = '3.4.0.0' + object FileNameEdt: TEdit + Left = 200 + Height = 22 + Top = 40 + Width = 440 + TabOrder = 0 + OnChange = FileNameEdtChange + end + object SelectFileBtn: TButton + Left = 96 + Height = 25 + Top = 40 + Width = 97 + Caption = 'Select a file...' + TabOrder = 1 + OnClick = SelectFileBtnClick + end + object StartBtn: TButton + Left = 656 + Height = 25 + Top = 37 + Width = 75 + Caption = 'Start' + TabOrder = 2 + OnClick = StartBtnClick + end + object InitBtn: TButton + Left = 280 + Height = 25 + Top = 8 + Width = 280 + Caption = 'Click me to intialize models before you start' + TabOrder = 3 + OnClick = InitBtnClick + end + object ResultMemo: TMemo + Left = 24 + Height = 488 + Top = 72 + Width = 800 + ScrollBars = ssAutoBoth + TabOrder = 4 + end + object ProgressBar: TProgressBar + Left = 32 + Height = 16 + Top = 592 + Width = 792 + TabOrder = 5 + end + object ProgressLabel: TLabel + Left = 770 + Height = 16 + Top = 568 + Width = 8 + Caption = '0' + end + object SelectFileDlg: TOpenDialog + Title = 'Open a wave file' + Left = 600 + Top = 488 + end +end diff --git a/lazarus-examples/generate_subtitles/unit1.pas b/lazarus-examples/generate_subtitles/unit1.pas new file mode 100644 index 000000000..98c2cd386 --- /dev/null +++ b/lazarus-examples/generate_subtitles/unit1.pas @@ -0,0 +1,502 @@ +unit Unit1; + +{$mode objfpc}{$H+} + +{$IFDEF DARWIN} +{$modeswitch objectivec1} {For getting resource directory} +{$ENDIF} + +interface + +uses + Classes, SysUtils, StrUtils, Forms, Controls, + Graphics, Dialogs, StdCtrls, + sherpa_onnx, ComCtrls; + +type + + { TForm1 } + + TForm1 = class(TForm) + InitBtn: TButton; + ProgressBar: TProgressBar; + ResultMemo: TMemo; + StartBtn: TButton; + SelectFileDlg: TOpenDialog; + SelectFileBtn: TButton; + FileNameEdt: TEdit; + ProgressLabel: TLabel; + procedure FileNameEdtChange(Sender: TObject); + procedure FormClose(Sender: TObject; var CloseAction: TCloseAction); + procedure InitBtnClick(Sender: TObject); + procedure SelectFileBtnClick(Sender: TObject); + procedure FormCreate(Sender: TObject); + procedure StartBtnClick(Sender: TObject); + private + + public + procedure UpdateResult( + Msg: AnsiString; + StartTime: Single; + StopTime: Single; + TotalDuration: Single); + procedure UpdateProgress(StopTime: Single; TotalDuration: Single); + public + Vad: TSherpaOnnxVoiceActivityDetector; + OfflineRecognizer: TSherpaOnnxOfflineRecognizer; + end; + +var + Form1: TForm1; + +implementation + +uses + my_worker + {$IFDEF DARWIN} + ,MacOSAll + ,CocoaAll + {$ENDIF} + ; +{See https://wiki.lazarus.freepascal.org/Locating_the_macOS_application_resources_directory} + +{$IFDEF DARWIN} +{Note: The returned path contains a trailing /} +function GetResourcesPath(): AnsiString; +var + pathStr: shortstring; + status: Boolean = false; +begin + status := CFStringGetPascalString(CFStringRef(NSBundle.mainBundle.resourcePath), @pathStr, 255, CFStringGetSystemEncoding()); + + if status = true then + Result := pathStr + PathDelim + else + raise Exception.Create('Error in GetResourcesPath()'); +end; +{$ENDIF} + +function CreateVad(VadFilename: AnsiString): TSherpaOnnxVoiceActivityDetector; +var + Config: TSherpaOnnxVadModelConfig; + + SampleRate: Integer; + WindowSize: Integer; +begin + Initialize(Config); + + SampleRate := 16000; {Please don't change it unless you know the details} + WindowSize := 512; {Please don't change it unless you know the details} + + Config.SileroVad.Model := VadFilename; + Config.SileroVad.MinSpeechDuration := 0.5; + Config.SileroVad.MinSilenceDuration := 0.5; + Config.SileroVad.Threshold := 0.5; + Config.SileroVad.WindowSize := WindowSize; + Config.NumThreads:= 2; + Config.Debug:= True; + Config.Provider:= 'cpu'; + Config.SampleRate := SampleRate; + + Result := TSherpaOnnxVoiceActivityDetector.Create(Config, 30); +end; + +function CreateOfflineRecognizerTransducer( + Tokens: AnsiString; + Encoder: AnsiString; + Decoder: AnsiString; + Joiner: AnsiString; + ModelType: AnsiString): TSherpaOnnxOfflineRecognizer; +var + Config: TSherpaOnnxOfflineRecognizerConfig; +begin + Initialize(Config); + + Config.ModelConfig.Transducer.Encoder := Encoder; + Config.ModelConfig.Transducer.Decoder := Decoder; + Config.ModelConfig.Transducer.Joiner := Joiner; + + Config.ModelConfig.ModelType := ModelType; + Config.ModelConfig.Tokens := Tokens; + Config.ModelConfig.Provider := 'cpu'; + Config.ModelConfig.NumThreads := 2; + Config.ModelConfig.Debug := False; + + Result := TSherpaOnnxOfflineRecognizer.Create(Config); +end; + +function CreateOfflineRecognizerTeleSpeech( + Tokens: AnsiString; + TeleSpeech: AnsiString): TSherpaOnnxOfflineRecognizer; +var + Config: TSherpaOnnxOfflineRecognizerConfig; +begin + Initialize(Config); + + Config.ModelConfig.TeleSpeechCtc := TeleSpeech; + + Config.ModelConfig.Tokens := Tokens; + Config.ModelConfig.Provider := 'cpu'; + Config.ModelConfig.NumThreads := 2; + Config.ModelConfig.Debug := False; + + Result := TSherpaOnnxOfflineRecognizer.Create(Config); +end; + +function CreateOfflineRecognizerParaformer( + Tokens: AnsiString; + Paraformer: AnsiString): TSherpaOnnxOfflineRecognizer; +var + Config: TSherpaOnnxOfflineRecognizerConfig; +begin + Initialize(Config); + + Config.ModelConfig.Paraformer.Model := Paraformer; + + Config.ModelConfig.Tokens := Tokens; + Config.ModelConfig.Provider := 'cpu'; + Config.ModelConfig.NumThreads := 2; + Config.ModelConfig.Debug := False; + + Result := TSherpaOnnxOfflineRecognizer.Create(Config); +end; + +function CreateOfflineRecognizerSenseVoice( + Tokens: AnsiString; + SenseVoice: AnsiString): TSherpaOnnxOfflineRecognizer; +var + Config: TSherpaOnnxOfflineRecognizerConfig; +begin + Initialize(Config); + + Config.ModelConfig.SenseVoice.Model := SenseVoice; + Config.ModelConfig.SenseVoice.Language := 'auto'; + Config.ModelConfig.SenseVoice.UseItn := True; + Config.ModelConfig.Tokens := Tokens; + Config.ModelConfig.Provider := 'cpu'; + Config.ModelConfig.NumThreads := 2; + Config.ModelConfig.Debug := False; + + Result := TSherpaOnnxOfflineRecognizer.Create(Config); +end; + +function CreateOfflineRecognizerWhisper( + Tokens: AnsiString; + WhisperEncoder: AnsiString; + WhisperDecoder: AnsiString): TSherpaOnnxOfflineRecognizer; +var + Config: TSherpaOnnxOfflineRecognizerConfig; +begin + Initialize(Config); + + Config.ModelConfig.Whisper.Encoder := WhisperEncoder; + Config.ModelConfig.Whisper.Decoder := WhisperDecoder; + Config.ModelConfig.Tokens := Tokens; + Config.ModelConfig.Provider := 'cpu'; + Config.ModelConfig.NumThreads := 2; + Config.ModelConfig.Debug := False; + + Result := TSherpaOnnxOfflineRecognizer.Create(Config); +end; + +{$R *.lfm} + +{ TForm1 } + +procedure TForm1.FormCreate(Sender: TObject); +begin + StartBtn.Enabled := False; + SelectFileDlg.Filter := 'All Files|*.wav'; + FileNameEdt.Enabled := False; + SelectFileBtn.Enabled := False; + ResultMemo.Lines.Add('1. It supports only 1 channel, 16-bit, 16000Hz wav files'); + ResultMemo.Lines.Add('2. There should be no Chinese characters in the file path.'); + + ProgressBar.Position := 0; + ProgressLabel.Caption := ''; +end; + +procedure TForm1.StartBtnClick(Sender: TObject); +begin + if StartBtn.Caption = 'Stop' then + begin + if (MyWorkerThread <> nil) and not MyWorkerThread.Finished then + MyWorkerThread.Terminate; + + StartBtn.Caption := 'Start'; + Exit; + end; + + ResultMemo.Lines.Clear(); + ResultMemo.Lines.Add('Start processing'); + + ProgressBar.Position := 0; + ProgressLabel.Caption := Format('%d%%', [ProgressBar.Position]); + + MyWorkerThread := TMyWorkerThread.Create(False, FileNameEdt.Text); + + StartBtn.Caption := 'Stop'; +end; + +procedure TForm1.SelectFileBtnClick(Sender: TObject); +begin + if SelectFileDlg.Execute then + begin + FileNameEdt.Text := SelectFileDlg.FileName; + end; +end; + +procedure TForm1.FileNameEdtChange(Sender: TObject); +begin + if FileExists(FileNameEdt.Text) then + StartBtn.Enabled := True + else + StartBtn.Enabled := False; +end; + +procedure TForm1.FormClose(Sender: TObject; var CloseAction: TCloseAction); +begin + if (MyWorkerThread <> nil) and not MyWorkerThread.Finished then + begin + MyWorkerThread.Terminate; + MyWorkerThread.WaitFor; + end; + FreeAndNil(Vad); + FreeAndNil(OfflineRecognizer); +end; + +procedure TForm1.UpdateProgress(StopTime: Single; TotalDuration: Single); +var + Percent: Single; +begin + if (StopTime <> 0) and (TotalDuration <> 0) then + begin + Percent := StopTime / TotalDuration * 100; + ProgressBar.Position := Round(Percent); + ProgressLabel.Caption := Format('%d%%', [ProgressBar.Position]); + end; +end; + +procedure TForm1.UpdateResult( + Msg: AnsiString; + StartTime: Single; + StopTime: Single; + TotalDuration: Single); +var + NewResult: AnsiString; +begin + UpdateProgress(StopTime, TotalDuration); + + if (Msg = 'DONE!') or + (Msg = 'Cancelled!') or + EndsStr('16-bit encoded wave files', Msg) or + EndsStr('. Please select a new file', Msg) then + begin + Form1.StartBtn.Caption := 'Start'; + NewResult := Msg; + end + else + begin + NewResult := Format('%.3f -- %.3f %s', [StartTime, StopTime, Msg]); + end; + + if Msg = 'DONE!' then + begin + ProgressBar.Position := 100; + + ProgressLabel.Caption := '100%'; + end; + + Form1.ResultMemo.Lines.Add(NewResult); +end; + +procedure TForm1.InitBtnClick(Sender: TObject); +var + Msg: AnsiString; + ModelDir: AnsiString; + VadFilename: AnsiString; + Tokens: AnsiString; + + WhisperEncoder: AnsiString; + WhisperDecoder: AnsiString; + + SenseVoice: AnsiString; + + Paraformer: AnsiString; + + TeleSpeech: AnsiString; + + TransducerEncoder: AnsiString; // from icefall + TransducerDecoder: AnsiString; + TransducerJoiner: AnsiString; + + NeMoTransducerEncoder: AnsiString; + NeMoTransducerDecoder: AnsiString; + NeMoTransducerJoiner: AnsiString; +begin + {$IFDEF DARWIN} + ModelDir := GetResourcesPath; + {$ELSE} + ModelDir := './'; + {$ENDIF} + + VadFilename := ModelDir + 'silero_vad.onnx'; + Tokens := ModelDir + 'tokens.txt'; + + { + Please refer to + https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/export-onnx.html#available-models + for a list of whisper models. + + In the code, we use the normalized filename whisper-encoder.onnx, whisper-decoder.onnx, and tokens.txt + You need to rename the existing model files. + + For instance, if you use sherpa-onnx-whisper-tiny.en, you have to do + mv tiny.en-tokens.txt tokens.txt + + mv tiny.en-encoder.onnx whisper-encoder.onnx + mv tiny.en-decoder.onnx whisper-decoder.onnx + + // or use the int8.onnx + + mv tiny.en-encoder.int8.onnx whisper-encoder.onnx + mv tiny.en-decoder.int8.onnx whisper-decoder.onnx + } + WhisperEncoder := ModelDir + 'whisper-encoder.onnx'; + WhisperDecoder := ModelDir + 'whisper-decoder.onnx'; + + + { + Please refer to + https://k2-fsa.github.io/sherpa/onnx/sense-voice/pretrained.html#pre-trained-models + to download models for SenseVoice. + + In the code, we use the normalized model name sense-voice.onnx. You have + to rename the downloaded model files. + + For example, you need to use + + mv model.onnx sense-voice.onnx + + // or use the int8.onnx + mv model.int8.onnx sense-voice.onnx + } + + SenseVoice := ModelDir + 'sense-voice.onnx'; + + { + Please refer to + https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-paraformer/index.html + to download paraformer models. + + Note that you have to rename model.onnx or model.int8.onnx to paraformer.onnx. + An example is given below for the rename: + + cp model.onnx paraformer.onnx + + // or use int8.onnx + cp model.int8.onnx paraformer.onnx + } + Paraformer := ModelDir + 'paraformer.onnx'; + + + { + please refer to + https://k2-fsa.github.io/sherpa/onnx/pretrained_models/telespeech/models.html + to download TeleSpeech models. + + Note that you have to rename model files after downloading. The following + is an example + + mv model.onnx telespeech.onnx + + // or to use int8.onnx + + mv model.int8.onnx telespeech.onnx + } + + TeleSpeech := ModelDir + 'telespeech.onnx'; + + + { + Please refer to + https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html + to download an icefall offline transducer model. Note that you need to rename the + model files to transducer-encoder.onnx, transducer-decoder.onnx, and + transducer-joiner.onnx + } + TransducerEncoder := ModelDir + 'transducer-encoder.onnx'; + TransducerDecoder := ModelDir + 'transducer-decoder.onnx'; + TransducerJoiner := ModelDir + 'transducer-joiner.onnx'; + + { + Please visit + https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models + to donwload a NeMo transducer model. + } + NeMoTransducerEncoder := ModelDir + 'nemo-transducer-encoder.onnx'; + NeMoTransducerDecoder := ModelDir + 'nemo-transducer-decoder.onnx'; + NeMoTransducerJoiner := ModelDir + 'nemo-transducer-joiner.onnx'; + + if not FileExists(VadFilename) then + begin + ShowMessage(VadFilename + ' does not exist! Please download it from' + + sLineBreak + 'https://github.com/k2-fsa/sherpa-onnx/tree/asr-models' + ); + Exit; + end; + + Self.Vad := CreateVad(VadFilename); + + if not FileExists(Tokens) then + begin + ShowMessage(Tokens + ' not found. Please download a non-streaming ASR model first!'); + Exit; + end; + + if FileExists(WhisperEncoder) and FileExists(WhisperDecoder) then + begin + OfflineRecognizer := CreateOfflineRecognizerWhisper(Tokens, WhisperEncoder, WhisperDecoder); + Msg := 'Whisper'; + end + else if FileExists(SenseVoice) then + begin + OfflineRecognizer := CreateOfflineRecognizerSenseVoice(Tokens, SenseVoice); + Msg := 'SenseVoice'; + end + else if FileExists(Paraformer) then + begin + OfflineRecognizer := CreateOfflineRecognizerParaformer(Tokens, Paraformer); + Msg := 'Paraformer'; + end + else if FileExists(TeleSpeech) then + begin + OfflineRecognizer := CreateOfflineRecognizerTeleSpeech(Tokens, TeleSpeech); + Msg := 'TeleSpeech'; + end + else if FileExists(TransducerEncoder) and FileExists(TransducerDecoder) and FileExists(TransducerJoiner) then + begin + OfflineRecognizer := CreateOfflineRecognizerTransducer(Tokens, + TransducerEncoder, TransducerDecoder, TransducerJoiner, 'transducer'); + Msg := 'Zipformer transducer'; + end + else if FileExists(NeMoTransducerEncoder) and FileExists(NeMoTransducerDecoder) and FileExists(NeMoTransducerJoiner) then + begin + OfflineRecognizer := CreateOfflineRecognizerTransducer(Tokens, + NeMoTransducerEncoder, NeMoTransducerDecoder, NeMoTransducerJoiner, 'nemo_transducer'); + Msg := 'NeMo transducer'; + end + else + begin + ShowMessage('Please download at least one non-streaming speech recognition model first.'); + Exit; + end; + + MessageDlg('Congrat! The ' + Msg + ' model is initialized succesfully!', mtInformation, [mbOk], 0); + FileNameEdt.Enabled := True; + SelectFileBtn.Enabled := True; + InitBtn.Enabled := False; +end; + +end. + diff --git a/pascal-api-examples/non-streaming-asr/run-nemo-ctc.sh b/pascal-api-examples/non-streaming-asr/run-nemo-ctc.sh index 04fa08259..788aff85f 100755 --- a/pascal-api-examples/non-streaming-asr/run-nemo-ctc.sh +++ b/pascal-api-examples/non-streaming-asr/run-nemo-ctc.sh @@ -31,6 +31,7 @@ if [ ! -f ./sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20 fi fpc \ + -dSHERPA_ONNX_USE_SHARED_LIBS \ -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \ -Fl$SHERPA_ONNX_DIR/build/install/lib \ ./nemo_ctc.pas diff --git a/pascal-api-examples/non-streaming-asr/run-nemo-transducer.sh b/pascal-api-examples/non-streaming-asr/run-nemo-transducer.sh index a53277ec8..474dc9452 100755 --- a/pascal-api-examples/non-streaming-asr/run-nemo-transducer.sh +++ b/pascal-api-examples/non-streaming-asr/run-nemo-transducer.sh @@ -32,6 +32,7 @@ if [ ! -f ./sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-r fi fpc \ + -dSHERPA_ONNX_USE_SHARED_LIBS \ -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \ -Fl$SHERPA_ONNX_DIR/build/install/lib \ ./nemo_transducer.pas diff --git a/pascal-api-examples/non-streaming-asr/run-paraformer-itn.sh b/pascal-api-examples/non-streaming-asr/run-paraformer-itn.sh index 0212d072b..3b8236475 100755 --- a/pascal-api-examples/non-streaming-asr/run-paraformer-itn.sh +++ b/pascal-api-examples/non-streaming-asr/run-paraformer-itn.sh @@ -40,6 +40,7 @@ if [ ! -f ./itn_zh_number.fst ]; then fi fpc \ + -dSHERPA_ONNX_USE_SHARED_LIBS \ -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \ -Fl$SHERPA_ONNX_DIR/build/install/lib \ ./paraformer_itn.pas diff --git a/pascal-api-examples/non-streaming-asr/run-paraformer.sh b/pascal-api-examples/non-streaming-asr/run-paraformer.sh index cd1ad6d09..5f2a384a9 100755 --- a/pascal-api-examples/non-streaming-asr/run-paraformer.sh +++ b/pascal-api-examples/non-streaming-asr/run-paraformer.sh @@ -32,6 +32,7 @@ if [ ! -f ./sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt ]; then fi fpc \ + -dSHERPA_ONNX_USE_SHARED_LIBS \ -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \ -Fl$SHERPA_ONNX_DIR/build/install/lib \ ./paraformer.pas diff --git a/pascal-api-examples/non-streaming-asr/run-sense-voice.sh b/pascal-api-examples/non-streaming-asr/run-sense-voice.sh index 4323fa59c..ad01b15c9 100755 --- a/pascal-api-examples/non-streaming-asr/run-sense-voice.sh +++ b/pascal-api-examples/non-streaming-asr/run-sense-voice.sh @@ -31,6 +31,7 @@ if [ ! -f ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt ]; the fi fpc \ + -dSHERPA_ONNX_USE_SHARED_LIBS \ -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \ -Fl$SHERPA_ONNX_DIR/build/install/lib \ ./sense_voice.pas diff --git a/pascal-api-examples/non-streaming-asr/run-telespeech-ctc.sh b/pascal-api-examples/non-streaming-asr/run-telespeech-ctc.sh index e35892393..924fd4285 100755 --- a/pascal-api-examples/non-streaming-asr/run-telespeech-ctc.sh +++ b/pascal-api-examples/non-streaming-asr/run-telespeech-ctc.sh @@ -32,6 +32,7 @@ if [ ! -f ./sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/tokens.txt ]; then fi fpc \ + -dSHERPA_ONNX_USE_SHARED_LIBS \ -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \ -Fl$SHERPA_ONNX_DIR/build/install/lib \ ./telespeech_ctc.pas diff --git a/pascal-api-examples/non-streaming-asr/run-whisper.sh b/pascal-api-examples/non-streaming-asr/run-whisper.sh index de11620a4..1a648bcfe 100755 --- a/pascal-api-examples/non-streaming-asr/run-whisper.sh +++ b/pascal-api-examples/non-streaming-asr/run-whisper.sh @@ -32,6 +32,7 @@ if [ ! -f ./sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt ]; then fi fpc \ + -dSHERPA_ONNX_USE_SHARED_LIBS \ -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \ -Fl$SHERPA_ONNX_DIR/build/install/lib \ ./whisper.pas diff --git a/pascal-api-examples/non-streaming-asr/run-zipformer-transducer.sh b/pascal-api-examples/non-streaming-asr/run-zipformer-transducer.sh index 5d8c9687e..cf93c38ac 100755 --- a/pascal-api-examples/non-streaming-asr/run-zipformer-transducer.sh +++ b/pascal-api-examples/non-streaming-asr/run-zipformer-transducer.sh @@ -32,6 +32,7 @@ if [ ! -f ./sherpa-onnx-zipformer-gigaspeech-2023-12-12/tokens.txt ]; then fi fpc \ + -dSHERPA_ONNX_USE_SHARED_LIBS \ -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \ -Fl$SHERPA_ONNX_DIR/build/install/lib \ ./zipformer_transducer.pas diff --git a/pascal-api-examples/read-wav/run.sh b/pascal-api-examples/read-wav/run.sh index f71fbe909..3dc590f7e 100755 --- a/pascal-api-examples/read-wav/run.sh +++ b/pascal-api-examples/read-wav/run.sh @@ -28,6 +28,7 @@ if [ ! -f ./lei-jun-test.wav ]; then fi fpc \ + -dSHERPA_ONNX_USE_SHARED_LIBS \ -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \ -Fl$SHERPA_ONNX_DIR/build/install/lib \ ./main.pas diff --git a/pascal-api-examples/streaming-asr/run-nemo-transducer.sh b/pascal-api-examples/streaming-asr/run-nemo-transducer.sh index aba9fb89f..90f13ace1 100755 --- a/pascal-api-examples/streaming-asr/run-nemo-transducer.sh +++ b/pascal-api-examples/streaming-asr/run-nemo-transducer.sh @@ -31,6 +31,7 @@ if [ ! -f ./sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms/tokens. fi fpc \ + -dSHERPA_ONNX_USE_SHARED_LIBS \ -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \ -Fl$SHERPA_ONNX_DIR/build/install/lib \ ./nemo_transducer.pas diff --git a/pascal-api-examples/streaming-asr/run-paraformer.sh b/pascal-api-examples/streaming-asr/run-paraformer.sh index aa6f056e3..ff2b4b55f 100755 --- a/pascal-api-examples/streaming-asr/run-paraformer.sh +++ b/pascal-api-examples/streaming-asr/run-paraformer.sh @@ -32,6 +32,7 @@ if [ ! -f ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt ]; then fi fpc \ + -dSHERPA_ONNX_USE_SHARED_LIBS \ -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \ -Fl$SHERPA_ONNX_DIR/build/install/lib \ ./paraformer.pas diff --git a/pascal-api-examples/streaming-asr/run-zipformer-ctc-hlg.sh b/pascal-api-examples/streaming-asr/run-zipformer-ctc-hlg.sh index e0d97508c..caf771de9 100755 --- a/pascal-api-examples/streaming-asr/run-zipformer-ctc-hlg.sh +++ b/pascal-api-examples/streaming-asr/run-zipformer-ctc-hlg.sh @@ -31,6 +31,7 @@ if [ ! -f ./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt ]; t fi fpc \ + -dSHERPA_ONNX_USE_SHARED_LIBS \ -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \ -Fl$SHERPA_ONNX_DIR/build/install/lib \ ./zipformer_ctc_hlg.pas diff --git a/pascal-api-examples/streaming-asr/run-zipformer-ctc.sh b/pascal-api-examples/streaming-asr/run-zipformer-ctc.sh index 4892c1aec..a57b68475 100755 --- a/pascal-api-examples/streaming-asr/run-zipformer-ctc.sh +++ b/pascal-api-examples/streaming-asr/run-zipformer-ctc.sh @@ -31,6 +31,7 @@ if [ ! -f ./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt ]; t fi fpc \ + -dSHERPA_ONNX_USE_SHARED_LIBS \ -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \ -Fl$SHERPA_ONNX_DIR/build/install/lib \ ./zipformer_ctc.pas diff --git a/pascal-api-examples/streaming-asr/run-zipformer-transducer.sh b/pascal-api-examples/streaming-asr/run-zipformer-transducer.sh index ddcce7816..1aeb3d803 100755 --- a/pascal-api-examples/streaming-asr/run-zipformer-transducer.sh +++ b/pascal-api-examples/streaming-asr/run-zipformer-transducer.sh @@ -32,6 +32,7 @@ fi fpc \ + -dSHERPA_ONNX_USE_SHARED_LIBS \ -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \ -Fl$SHERPA_ONNX_DIR/build/install/lib \ ./zipformer_transducer.pas diff --git a/pascal-api-examples/vad-with-non-streaming-asr/run-vad-with-sense-voice.sh b/pascal-api-examples/vad-with-non-streaming-asr/run-vad-with-sense-voice.sh index b339b6cd0..8add5c85b 100755 --- a/pascal-api-examples/vad-with-non-streaming-asr/run-vad-with-sense-voice.sh +++ b/pascal-api-examples/vad-with-non-streaming-asr/run-vad-with-sense-voice.sh @@ -38,6 +38,7 @@ if [ ! -f ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt ]; the fi fpc \ + -dSHERPA_ONNX_USE_SHARED_LIBS \ -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \ -Fl$SHERPA_ONNX_DIR/build/install/lib \ ./vad_with_sense_voice.pas diff --git a/pascal-api-examples/vad-with-non-streaming-asr/run-vad-with-whisper.sh b/pascal-api-examples/vad-with-non-streaming-asr/run-vad-with-whisper.sh index 260fdf36a..1140c713a 100755 --- a/pascal-api-examples/vad-with-non-streaming-asr/run-vad-with-whisper.sh +++ b/pascal-api-examples/vad-with-non-streaming-asr/run-vad-with-whisper.sh @@ -39,6 +39,7 @@ if [ ! -f ./sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt ]; then fi fpc \ + -dSHERPA_ONNX_USE_SHARED_LIBS \ -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \ -Fl$SHERPA_ONNX_DIR/build/install/lib \ ./vad_with_whisper.pas diff --git a/pascal-api-examples/vad/run-circular-buffer.sh b/pascal-api-examples/vad/run-circular-buffer.sh index b46524b80..449d6ac93 100755 --- a/pascal-api-examples/vad/run-circular-buffer.sh +++ b/pascal-api-examples/vad/run-circular-buffer.sh @@ -24,6 +24,7 @@ if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../bui fi fpc \ + -dSHERPA_ONNX_USE_SHARED_LIBS \ -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \ -Fl$SHERPA_ONNX_DIR/build/install/lib \ ./circular_buffer.pas diff --git a/pascal-api-examples/vad/run-remove-silence.sh b/pascal-api-examples/vad/run-remove-silence.sh index d8157cb6d..1e61cf0c3 100755 --- a/pascal-api-examples/vad/run-remove-silence.sh +++ b/pascal-api-examples/vad/run-remove-silence.sh @@ -32,6 +32,7 @@ if [ ! -f ./lei-jun-test.wav ]; then fi fpc \ + -dSHERPA_ONNX_USE_SHARED_LIBS \ -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \ -Fl$SHERPA_ONNX_DIR/build/install/lib \ ./remove_silence.pas diff --git a/scripts/apk/generate-vad-asr-apk-script.py b/scripts/apk/generate-vad-asr-apk-script.py index 45ffadca7..22dd5f751 100755 --- a/scripts/apk/generate-vad-asr-apk-script.py +++ b/scripts/apk/generate-vad-asr-apk-script.py @@ -71,7 +71,7 @@ def get_models(): Model( model_name="sherpa-onnx-paraformer-zh-2023-09-14", idx=0, - lang="zh", + lang="zh_en", short_name="paraformer", rule_fsts="itn_zh_number.fst", cmd=""" @@ -109,7 +109,7 @@ def get_models(): Model( model_name="sherpa-onnx-paraformer-zh-small-2024-03-09", idx=14, - lang="zh", + lang="zh_en", short_name="small_paraformer", rule_fsts="itn_zh_number.fst", cmd=""" diff --git a/scripts/lazarus/build-generate-subtitles.sh.in b/scripts/lazarus/build-generate-subtitles.sh.in new file mode 100644 index 000000000..4662e966a --- /dev/null +++ b/scripts/lazarus/build-generate-subtitles.sh.in @@ -0,0 +1,81 @@ +#!/usr/bin/env bash +# +# It expects that there are 4 directories inside /tmp +# +# macos-x64 +# macos-arm64 +# linux-x64 +# windows-x64 +# +# Generated files are saved in /tmp/out/*.tar.bz2 + +set -ex + +log() { + # This function is from espnet + local fname=${BASH_SOURCE[1]##*/} + echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" +} + +SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) + +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx + +os_array=( +linux-x64 +macos-x64 +macos-arm64 +windows-x64 +) + +for os in ${os_array[@]}; do + if [[ $os == macos-x64 || $os == macos-arm64 ]]; then + cp -v silero_vad.onnx /tmp/$os/Contents/Resources/ + else + cp -v silero_vad.onnx /tmp/$os/ + fi +done + +{% for model in model_list %} +model_name={{ model.model_name }} +lang={{ model.lang }} +short_name={{ model.short_name }} + +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/${model_name}.tar.bz2 +tar xvf ${model_name}.tar.bz2 +rm ${model_name}.tar.bz2 + +{{ model.cmd }} + +ls -lh $model_name + +for os in ${os_array[@]}; do + dst=sherpa-onnx-$SHERPA_ONNX_VERSION-generate-subtitles-$os-$short_name-$lang + src=/tmp/$os + + cp -a $src $dst + d=$dst + + if [[ $os == macos-x64 || $os == macos-arm64 ]]; then + mv $dst $dst.app + + dst=$dst.app + d=$dst/Contents/Resources + fi + + cp -v $model_name/*.onnx $d + cp -v $model_name/tokens.txt $d + + d=$dst + + tar cjvf $d.tar.bz2 $d + + ls -lh + + mkdir -p /tmp/out + mv $d.tar.bz2 /tmp/out + ls -lh /tmp/out +done + +rm -rf $model_name +{% endfor %} diff --git a/scripts/lazarus/generate-subtitles.py b/scripts/lazarus/generate-subtitles.py new file mode 100755 index 000000000..608d7e9d6 --- /dev/null +++ b/scripts/lazarus/generate-subtitles.py @@ -0,0 +1,176 @@ +#!/usr/bin/env python3 + +import argparse +from dataclasses import dataclass +from typing import List, Optional + +import jinja2 + + +def get_args(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--total", + type=int, + default=1, + help="Number of runners", + ) + parser.add_argument( + "--index", + type=int, + default=0, + help="Index of the current runner", + ) + return parser.parse_args() + + +@dataclass +class Model: + model_name: str + lang: str + short_name: str = "" + cmd: str = "" + + +def get_models(): + models = [ + Model( + model_name="sherpa-onnx-whisper-tiny.en", + lang="en", + short_name="whisper_tiny.en", + cmd=""" + pushd $model_name + rm -fv tiny.en-encoder.onnx + rm -fv tiny.en-decoder.onnx + + mv -v tiny.en-encoder.int8.onnx whisper-encoder.onnx + mv -v tiny.en-decoder.int8.onnx whisper-decoder.onnx + mv -v tiny.en-tokens.txt tokens.txt + + popd + """, + ), + Model( + model_name="sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17", + lang="zh_en_ko_ja_yue", + short_name="sense_voice", + cmd=""" + pushd $model_name + rm -fv model.onnx + mv -v model.int8.onnx sense-voice.onnx + popd + """, + ), + Model( + model_name="sherpa-onnx-paraformer-zh-2023-09-14", + lang="zh_en", + short_name="paraformer_2023_09_14", + cmd=""" + pushd $model_name + rm -fv model.onnx + mv -v model.int8.onnx paraformer.onnx + popd + """, + ), + Model( + model_name="sherpa-onnx-paraformer-zh-small-2024-03-09", + lang="zh_en", + short_name="paraformer_small_2024_03_09", + cmd=""" + pushd $model_name + rm -fv model.onnx + mv -v model.int8.onnx paraformer.onnx + popd + """, + ), + Model( + model_name="sherpa-onnx-zipformer-gigaspeech-2023-12-12", + lang="en", + short_name="zipformer_gigaspeech_2023_12_12", + cmd=""" + pushd $model_name + mv encoder-epoch-30-avg-1.int8.onnx transducer-encoder.onnx + mv decoder-epoch-30-avg-1.onnx transducer-decoder.onnx + mv joiner-epoch-30-avg-1.int8.onnx transducer-joiner.onnx + + rm -fv encoder-epoch-30-avg-1.onnx + rm -fv decoder-epoch-30-avg-1.int8.onnx + rm -fv joiner-epoch-30-avg-1.onnx + + popd + """, + ), + Model( + model_name="icefall-asr-zipformer-wenetspeech-20230615", + lang="zh", + short_name="zipformer_wenetspeech", + cmd=""" + pushd $model_name + + rm -rfv test_wavs + rm -fv README.md + mv -v data/lang_char/tokens.txt ./ + rm -rfv data/lang_char + + mv -v exp/encoder-epoch-12-avg-4.int8.onnx ./ + mv -v exp/decoder-epoch-12-avg-4.onnx ./ + mv -v exp/joiner-epoch-12-avg-4.int8.onnx ./ + rm -rfv exp + + mv -v encoder-epoch-12-avg-4.int8.onnx transducer-encoder.onnx + mv -v decoder-epoch-12-avg-4.onnx transducer-decoder.onnx + mv -v joiner-epoch-12-avg-4.int8.onnx transducer-joiner.onnx + + ls -lh + + popd + """, + ), + ] + return models + + +def main(): + args = get_args() + index = args.index + total = args.total + assert 0 <= index < total, (index, total) + + all_model_list = get_models() + + num_models = len(all_model_list) + + num_per_runner = num_models // total + if num_per_runner <= 0: + raise ValueError(f"num_models: {num_models}, num_runners: {total}") + + start = index * num_per_runner + end = start + num_per_runner + + remaining = num_models - args.total * num_per_runner + + print(f"{index}/{total}: {start}-{end}/{num_models}") + + d = dict() + d["model_list"] = all_model_list[start:end] + if index < remaining: + s = args.total * num_per_runner + index + d["model_list"].append(all_model_list[s]) + print(f"{s}/{num_models}") + + filename_list = [ + "./build-generate-subtitles.sh", + ] + for filename in filename_list: + environment = jinja2.Environment() + with open(f"{filename}.in") as f: + s = f.read() + template = environment.from_string(s) + + s = template.render(**d) + with open(filename, "w") as f: + print(s, file=f) + + +if __name__ == "__main__": + main() diff --git a/sherpa-onnx/csrc/wave-reader.cc b/sherpa-onnx/csrc/wave-reader.cc index b1933bbf6..e859287e1 100644 --- a/sherpa-onnx/csrc/wave-reader.cc +++ b/sherpa-onnx/csrc/wave-reader.cc @@ -224,8 +224,6 @@ std::vector<float> ReadWaveImpl(std::istream &is, int32_t *sampling_rate, // header.subchunk2_size contains the number of bytes in the data. // As we assume each sample contains two bytes, so it is divided by 2 here std::vector<int16_t> samples(header.subchunk2_size / 2); - SHERPA_ONNX_LOGE("%d samples, bytes: %d", (int)samples.size(), - header.subchunk2_size); is.read(reinterpret_cast<char *>(samples.data()), header.subchunk2_size); if (!is) { diff --git a/sherpa-onnx/pascal-api/sherpa_onnx.pas b/sherpa-onnx/pascal-api/sherpa_onnx.pas index 6b6ccec6c..cc100868c 100644 --- a/sherpa-onnx/pascal-api/sherpa_onnx.pas +++ b/sherpa-onnx/pascal-api/sherpa_onnx.pas @@ -309,19 +309,50 @@ implementation SysUtils; const - {See https://www.freepascal.org/docs-html/prog/progap7.html} + { + See + - https://www.freepascal.org/docs-html/prog/progap7.html + - https://downloads.freepascal.org/fpc/docs-pdf/ + - https://downloads.freepascal.org/fpc/docs-pdf/CinFreePascal.pdf + } {$IFDEF WINDOWS} - SherpaOnnxLibName = 'sherpa-onnx-c-api.dll'; - {$ENDIF} - - {$IFDEF DARWIN} - SherpaOnnxLibName = 'sherpa-onnx-c-api'; - {$linklib sherpa-onnx-c-api} - {$ENDIF} - - {$IFDEF LINUX} - SherpaOnnxLibName = 'libsherpa-onnx-c-api.so'; + { For windows, we always use dynamic link. See + https://forum.lazarus.freepascal.org/index.php/topic,15712.msg84781.html#msg84781 + We need to rebuild the static lib for windows using Mingw or cygwin + } + SherpaOnnxLibName = 'sherpa-onnx-c-api.dll'; + {$ELSE} + {$IFNDEF SHERPA_ONNX_USE_SHARED_LIBS} + {static link for linux and macos} + {$linklib sherpa-onnx-c-api} + {$linklib sherpa-onnx-core} + {$linklib kaldi-decoder-core} + {$linklib sherpa-onnx-kaldifst-core} + {$linklib sherpa-onnx-fstfar} + {$linklib sherpa-onnx-fst} + {$linklib kaldi-native-fbank-core} + {$linklib piper_phonemize} + {$linklib espeak-ng} + {$linklib ucd} + {$linklib onnxruntime} + {$linklib ssentencepiece_core} + + {$IFDEF LINUX} + {$linklib m} + {$LINKLIB stdc++} + {$LINKLIB gcc_s} + {$ENDIF} + + {$IFDEF DARWIN} + {$linklib c++} + {$ENDIF} + SherpaOnnxLibName = ''; + {$ELSE} + {dynamic link for linux and macos} + SherpaOnnxLibName = 'sherpa-onnx-c-api'; + {$linklib sherpa-onnx-c-api} + {$ENDIF} {$ENDIF} type @@ -621,10 +652,17 @@ function SherpaOnnxReadWave(Filename: AnsiString): TSherpaOnnxWave; PWave: PSherpaOnnxWave; I: Integer; begin + Result.Samples := nil; + Result.SampleRate := 0; + PFilename := PAnsiChar(Filename); + PWave := SherpaOnnxReadWaveWrapper(PFilename); - Result.Samples := nil; + if PWave = nil then + Exit; + + SetLength(Result.Samples, PWave^.NumSamples); Result.SampleRate := PWave^.SampleRate;