diff --git a/.github/workflows/test-dot-net.yaml b/.github/workflows/test-dot-net.yaml index f2b18e528..88c430489 100644 --- a/.github/workflows/test-dot-net.yaml +++ b/.github/workflows/test-dot-net.yaml @@ -50,11 +50,30 @@ jobs: cmake -DBUILD_SHARED_LIBS=ON -DCMAKE_INSTALL_PREFIX=./install -DCMAKE_BUILD_TYPE=Release .. cmake --build . --target install --config Release + - name: Build sherpa-onnx for windows x86 + if: matrix.os == 'windows-latest' + shell: bash + run: | + export CMAKE_CXX_COMPILER_LAUNCHER=ccache + export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH" + cmake --version + + mkdir build-win32 + cd build-win32 + cmake -A Win32 -DBUILD_SHARED_LIBS=ON -DCMAKE_INSTALL_PREFIX=./install -DCMAKE_BUILD_TYPE=Release .. + cmake --build . --target install --config Release + - uses: actions/upload-artifact@v4 with: name: ${{ matrix.os }} path: ./build/install/lib/ + - uses: actions/upload-artifact@v4 + if: matrix.os == 'windows-latest' + with: + name: ${{ matrix.os }}-win32 + path: ./build-win32/install/lib/ + test-dot-net: runs-on: ${{ matrix.os }} needs: [build-libs] @@ -95,7 +114,13 @@ jobs: uses: actions/download-artifact@v4 with: name: windows-latest - path: /tmp/windows + path: /tmp/windows-x64 + + - name: Retrieve artifact from windows-latest + uses: actions/download-artifact@v4 + with: + name: windows-latest-win32 + path: /tmp/windows-x86 - name: Setup .NET uses: actions/setup-dotnet@v3 @@ -119,8 +144,11 @@ jobs: echo "----------/tmp/macos----------" ls -lh /tmp/macos - echo "----------/tmp/windows----------" - ls -lh /tmp/windows + echo "----------/tmp/windows-x64----------" + ls -lh /tmp/windows-x64 + + echo "----------/tmp/windows-x86----------" + ls -lh /tmp/windows-x86 - name: Build shell: bash diff --git a/scripts/dotnet/generate.py b/scripts/dotnet/generate.py index 8350af650..f24353f68 100755 --- a/scripts/dotnet/generate.py +++ b/scripts/dotnet/generate.py @@ -87,7 +87,7 @@ def process_macos(s): f.write(s) -def process_windows(s): +def process_windows(s, rid): libs = [ "espeak-ng.dll", "kaldi-decoder-core.dll", @@ -103,18 +103,18 @@ def process_windows(s): version = get_version() - prefix = "/tmp/windows/" + prefix = f"/tmp/windows-{rid}/" libs = [prefix + lib for lib in libs] libs = "\n ;".join(libs) d = get_dict() - d["dotnet_rid"] = "win-x64" + d["dotnet_rid"] = f"win-{rid}" d["libs"] = libs environment = jinja2.Environment() template = environment.from_string(s) s = template.render(**d) - with open("./windows/sherpa-onnx.runtime.csproj", "w") as f: + with open(f"./windows-{rid}/sherpa-onnx.runtime.csproj", "w") as f: f.write(s) @@ -122,7 +122,8 @@ def main(): s = read_proj_file("./sherpa-onnx.csproj.runtime.in") process_macos(s) process_linux(s) - process_windows(s) + process_windows(s, "x64") + process_windows(s, "x86") s = read_proj_file("./sherpa-onnx.csproj.in") d = get_dict() diff --git a/scripts/dotnet/run.sh b/scripts/dotnet/run.sh index 72bdbf796..d723a2d8d 100755 --- a/scripts/dotnet/run.sh +++ b/scripts/dotnet/run.sh @@ -16,14 +16,15 @@ HF_MIRROR=hf.co mkdir -p /tmp/ pushd /tmp -mkdir -p linux macos windows +mkdir -p linux macos windows-x64 windows-x86 # You can pre-download the required wheels to /tmp src_dir=/tmp linux_wheel=$src_dir/sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl macos_wheel=$src_dir/sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-macosx_11_0_x86_64.whl -windows_wheel=$src_dir/sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-win_amd64.whl +windows_x64_wheel=$src_dir/sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-win_amd64.whl +windows_x86_wheel=$src_dir/sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-win32.whl if [ ! -f /tmp/linux/libsherpa-onnx-core.so ]; then echo "---linux x86_64---" @@ -72,13 +73,13 @@ if [ ! -f /tmp/macos/libsherpa-onnx-core.dylib ]; then fi -if [ ! -f /tmp/windows/sherpa-onnx-core.dll ]; then +if [ ! -f /tmp/windows-x64/sherpa-onnx-core.dll ]; then echo "---windows x64---" - cd windows + cd windows-x64 mkdir -p wheel cd wheel - if [ -f $windows_wheel ]; then - cp -v $windows_wheel . + if [ -f $windows_x64_wheel ]; then + cp -v $windows_x64_wheel . else curl -OL https://$HF_MIRROR/csukuangfj/sherpa-onnx-wheels/resolve/main/sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-win_amd64.whl fi @@ -92,9 +93,29 @@ if [ ! -f /tmp/windows/sherpa-onnx-core.dll ]; then cd .. fi +if [ ! -f /tmp/windows-x86/sherpa-onnx-core.dll ]; then + echo "---windows x86---" + cd windows-x86 + mkdir -p wheel + cd wheel + if [ -f $windows_x86_wheel ]; then + cp -v $windows_x86_wheel . + else + curl -OL https://$HF_MIRROR/csukuangfj/sherpa-onnx-wheels/resolve/main/sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-win32.whl + fi + unzip sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-win32.whl + cp -v sherpa_onnx-${SHERPA_ONNX_VERSION}.data/data/bin/*.dll ../ + cp -v sherpa_onnx-${SHERPA_ONNX_VERSION}.data/data/bin/*.lib ../ + cd .. + + rm -rf wheel + ls -lh + cd .. +fi + popd -mkdir -p macos linux windows all +mkdir -p macos linux windows-x64 windows-x86 all cp ./online.cs all cp ./offline.cs all @@ -111,7 +132,12 @@ dotnet build -c Release dotnet pack -c Release -o ../packages popd -pushd windows +pushd windows-x64 +dotnet build -c Release +dotnet pack -c Release -o ../packages +popd + +pushd windows-x86 dotnet build -c Release dotnet pack -c Release -o ../packages popd diff --git a/scripts/dotnet/sherpa-onnx.csproj.in b/scripts/dotnet/sherpa-onnx.csproj.in index 4063bfe4e..a6f83a64d 100644 --- a/scripts/dotnet/sherpa-onnx.csproj.in +++ b/scripts/dotnet/sherpa-onnx.csproj.in @@ -51,6 +51,7 @@ + diff --git a/sherpa-onnx/csrc/lexicon.cc b/sherpa-onnx/csrc/lexicon.cc index c42184124..14c3d37a2 100644 --- a/sherpa-onnx/csrc/lexicon.cc +++ b/sherpa-onnx/csrc/lexicon.cc @@ -145,7 +145,9 @@ std::vector> Lexicon::ConvertTextToTokenIds( } std::vector> Lexicon::ConvertTextToTokenIdsChinese( - const std::string &text) const { + const std::string &_text) const { + std::string text(_text); + ToLowerCase(&text); std::vector words; if (pattern_) { // Handle polyphones @@ -206,6 +208,11 @@ std::vector> Lexicon::ConvertTextToTokenIdsChinese( eos = token2id_.at("eos"); } + int32_t pad = -1; + if (token2id_.count("#0")) { + pad = token2id_.at("#0"); + } + if (sil != -1) { this_sentence.push_back(sil); } @@ -219,6 +226,8 @@ std::vector> Lexicon::ConvertTextToTokenIdsChinese( if (punctuations_.count(w)) { if (token2id_.count(w)) { this_sentence.push_back(token2id_.at(w)); + } else if (pad != -1) { + this_sentence.push_back(pad); } else if (sil != -1) { this_sentence.push_back(sil); }