diff --git a/.github/workflows/test-dot-net.yaml b/.github/workflows/test-dot-net.yaml
index f2b18e528..88c430489 100644
--- a/.github/workflows/test-dot-net.yaml
+++ b/.github/workflows/test-dot-net.yaml
@@ -50,11 +50,30 @@ jobs:
cmake -DBUILD_SHARED_LIBS=ON -DCMAKE_INSTALL_PREFIX=./install -DCMAKE_BUILD_TYPE=Release ..
cmake --build . --target install --config Release
+ - name: Build sherpa-onnx for windows x86
+ if: matrix.os == 'windows-latest'
+ shell: bash
+ run: |
+ export CMAKE_CXX_COMPILER_LAUNCHER=ccache
+ export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
+ cmake --version
+
+ mkdir build-win32
+ cd build-win32
+ cmake -A Win32 -DBUILD_SHARED_LIBS=ON -DCMAKE_INSTALL_PREFIX=./install -DCMAKE_BUILD_TYPE=Release ..
+ cmake --build . --target install --config Release
+
- uses: actions/upload-artifact@v4
with:
name: ${{ matrix.os }}
path: ./build/install/lib/
+ - uses: actions/upload-artifact@v4
+ if: matrix.os == 'windows-latest'
+ with:
+ name: ${{ matrix.os }}-win32
+ path: ./build-win32/install/lib/
+
test-dot-net:
runs-on: ${{ matrix.os }}
needs: [build-libs]
@@ -95,7 +114,13 @@ jobs:
uses: actions/download-artifact@v4
with:
name: windows-latest
- path: /tmp/windows
+ path: /tmp/windows-x64
+
+ - name: Retrieve artifact from windows-latest
+ uses: actions/download-artifact@v4
+ with:
+ name: windows-latest-win32
+ path: /tmp/windows-x86
- name: Setup .NET
uses: actions/setup-dotnet@v3
@@ -119,8 +144,11 @@ jobs:
echo "----------/tmp/macos----------"
ls -lh /tmp/macos
- echo "----------/tmp/windows----------"
- ls -lh /tmp/windows
+ echo "----------/tmp/windows-x64----------"
+ ls -lh /tmp/windows-x64
+
+ echo "----------/tmp/windows-x86----------"
+ ls -lh /tmp/windows-x86
- name: Build
shell: bash
diff --git a/scripts/dotnet/generate.py b/scripts/dotnet/generate.py
index 8350af650..f24353f68 100755
--- a/scripts/dotnet/generate.py
+++ b/scripts/dotnet/generate.py
@@ -87,7 +87,7 @@ def process_macos(s):
f.write(s)
-def process_windows(s):
+def process_windows(s, rid):
libs = [
"espeak-ng.dll",
"kaldi-decoder-core.dll",
@@ -103,18 +103,18 @@ def process_windows(s):
version = get_version()
- prefix = "/tmp/windows/"
+ prefix = f"/tmp/windows-{rid}/"
libs = [prefix + lib for lib in libs]
libs = "\n ;".join(libs)
d = get_dict()
- d["dotnet_rid"] = "win-x64"
+ d["dotnet_rid"] = f"win-{rid}"
d["libs"] = libs
environment = jinja2.Environment()
template = environment.from_string(s)
s = template.render(**d)
- with open("./windows/sherpa-onnx.runtime.csproj", "w") as f:
+ with open(f"./windows-{rid}/sherpa-onnx.runtime.csproj", "w") as f:
f.write(s)
@@ -122,7 +122,8 @@ def main():
s = read_proj_file("./sherpa-onnx.csproj.runtime.in")
process_macos(s)
process_linux(s)
- process_windows(s)
+ process_windows(s, "x64")
+ process_windows(s, "x86")
s = read_proj_file("./sherpa-onnx.csproj.in")
d = get_dict()
diff --git a/scripts/dotnet/run.sh b/scripts/dotnet/run.sh
index 72bdbf796..d723a2d8d 100755
--- a/scripts/dotnet/run.sh
+++ b/scripts/dotnet/run.sh
@@ -16,14 +16,15 @@ HF_MIRROR=hf.co
mkdir -p /tmp/
pushd /tmp
-mkdir -p linux macos windows
+mkdir -p linux macos windows-x64 windows-x86
# You can pre-download the required wheels to /tmp
src_dir=/tmp
linux_wheel=$src_dir/sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
macos_wheel=$src_dir/sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-macosx_11_0_x86_64.whl
-windows_wheel=$src_dir/sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-win_amd64.whl
+windows_x64_wheel=$src_dir/sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-win_amd64.whl
+windows_x86_wheel=$src_dir/sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-win32.whl
if [ ! -f /tmp/linux/libsherpa-onnx-core.so ]; then
echo "---linux x86_64---"
@@ -72,13 +73,13 @@ if [ ! -f /tmp/macos/libsherpa-onnx-core.dylib ]; then
fi
-if [ ! -f /tmp/windows/sherpa-onnx-core.dll ]; then
+if [ ! -f /tmp/windows-x64/sherpa-onnx-core.dll ]; then
echo "---windows x64---"
- cd windows
+ cd windows-x64
mkdir -p wheel
cd wheel
- if [ -f $windows_wheel ]; then
- cp -v $windows_wheel .
+ if [ -f $windows_x64_wheel ]; then
+ cp -v $windows_x64_wheel .
else
curl -OL https://$HF_MIRROR/csukuangfj/sherpa-onnx-wheels/resolve/main/sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-win_amd64.whl
fi
@@ -92,9 +93,29 @@ if [ ! -f /tmp/windows/sherpa-onnx-core.dll ]; then
cd ..
fi
+if [ ! -f /tmp/windows-x86/sherpa-onnx-core.dll ]; then
+ echo "---windows x86---"
+ cd windows-x86
+ mkdir -p wheel
+ cd wheel
+ if [ -f $windows_x86_wheel ]; then
+ cp -v $windows_x86_wheel .
+ else
+ curl -OL https://$HF_MIRROR/csukuangfj/sherpa-onnx-wheels/resolve/main/sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-win32.whl
+ fi
+ unzip sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-win32.whl
+ cp -v sherpa_onnx-${SHERPA_ONNX_VERSION}.data/data/bin/*.dll ../
+ cp -v sherpa_onnx-${SHERPA_ONNX_VERSION}.data/data/bin/*.lib ../
+ cd ..
+
+ rm -rf wheel
+ ls -lh
+ cd ..
+fi
+
popd
-mkdir -p macos linux windows all
+mkdir -p macos linux windows-x64 windows-x86 all
cp ./online.cs all
cp ./offline.cs all
@@ -111,7 +132,12 @@ dotnet build -c Release
dotnet pack -c Release -o ../packages
popd
-pushd windows
+pushd windows-x64
+dotnet build -c Release
+dotnet pack -c Release -o ../packages
+popd
+
+pushd windows-x86
dotnet build -c Release
dotnet pack -c Release -o ../packages
popd
diff --git a/scripts/dotnet/sherpa-onnx.csproj.in b/scripts/dotnet/sherpa-onnx.csproj.in
index 4063bfe4e..a6f83a64d 100644
--- a/scripts/dotnet/sherpa-onnx.csproj.in
+++ b/scripts/dotnet/sherpa-onnx.csproj.in
@@ -51,6 +51,7 @@
+
diff --git a/sherpa-onnx/csrc/lexicon.cc b/sherpa-onnx/csrc/lexicon.cc
index c42184124..14c3d37a2 100644
--- a/sherpa-onnx/csrc/lexicon.cc
+++ b/sherpa-onnx/csrc/lexicon.cc
@@ -145,7 +145,9 @@ std::vector> Lexicon::ConvertTextToTokenIds(
}
std::vector> Lexicon::ConvertTextToTokenIdsChinese(
- const std::string &text) const {
+ const std::string &_text) const {
+ std::string text(_text);
+ ToLowerCase(&text);
std::vector words;
if (pattern_) {
// Handle polyphones
@@ -206,6 +208,11 @@ std::vector> Lexicon::ConvertTextToTokenIdsChinese(
eos = token2id_.at("eos");
}
+ int32_t pad = -1;
+ if (token2id_.count("#0")) {
+ pad = token2id_.at("#0");
+ }
+
if (sil != -1) {
this_sentence.push_back(sil);
}
@@ -219,6 +226,8 @@ std::vector> Lexicon::ConvertTextToTokenIdsChinese(
if (punctuations_.count(w)) {
if (token2id_.count(w)) {
this_sentence.push_back(token2id_.at(w));
+ } else if (pad != -1) {
+ this_sentence.push_back(pad);
} else if (sil != -1) {
this_sentence.push_back(sil);
}