From 0e0d04a97a034850048514c860954192d7a17a00 Mon Sep 17 00:00:00 2001
From: Fangjun Kuang <csukuangfj@gmail.com>
Date: Thu, 22 Aug 2024 19:36:24 +0800
Subject: [PATCH] Provide models for mobile-only platforms by fixing batch size
 to 1 (#1276)

---
 .github/workflows/mobile-asr-models.yaml      | 52 ++++++++++
 scripts/mobile-asr-models/README.md           | 18 ++++
 .../mobile-asr-models/dynamic_quantization.py | 38 ++++++++
 scripts/mobile-asr-models/parse_options.sh    | 97 +++++++++++++++++++
 scripts/mobile-asr-models/run-impl.sh         | 42 ++++++++
 scripts/mobile-asr-models/run.sh              | 40 ++++++++
 6 files changed, 287 insertions(+)
 create mode 100644 .github/workflows/mobile-asr-models.yaml
 create mode 100644 scripts/mobile-asr-models/README.md
 create mode 100755 scripts/mobile-asr-models/dynamic_quantization.py
 create mode 100755 scripts/mobile-asr-models/parse_options.sh
 create mode 100755 scripts/mobile-asr-models/run-impl.sh
 create mode 100755 scripts/mobile-asr-models/run.sh

diff --git a/.github/workflows/mobile-asr-models.yaml b/.github/workflows/mobile-asr-models.yaml
new file mode 100644
index 000000000..c58bb0396
--- /dev/null
+++ b/.github/workflows/mobile-asr-models.yaml
@@ -0,0 +1,52 @@
+name: mobile-asr-models
+
+on:
+  push:
+    branches:
+      - asr-mobile
+
+  workflow_dispatch:
+
+
+concurrency:
+  group: mobile-asr-models-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  mobile-asr-models:
+    if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj' || github.repository_owner == 'csu-fangjun'
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest]
+        python-version: ["3.8"]
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Setup Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Install dependencies
+        shell: bash
+        run: |
+          python3 -m pip install onnxruntime==1.16.3 onnx==1.15.0
+
+      - name: Run
+        shell: bash
+        run: |
+          cd scripts/mobile-asr-models
+          ./run.sh
+
+      - name: Release
+        uses: svenstaro/upload-release-action@v2
+        with:
+          file_glob: true
+          file: ./*.tar.bz2
+          overwrite: true
+          repo_name: k2-fsa/sherpa-onnx
+          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
+          tag: asr-models
diff --git a/scripts/mobile-asr-models/README.md b/scripts/mobile-asr-models/README.md
new file mode 100644
index 000000000..ff8715502
--- /dev/null
+++ b/scripts/mobile-asr-models/README.md
@@ -0,0 +1,18 @@
+# Introduction
+
+This folder contains scripts to convert ASR models for mobile platforms
+supporting only batch size equal to 1.
+
+The advantage of fixing the batch size to 1 is that it provides more
+opportunities for model optimization and quantization.
+
+To give you a concrete example, for the following model
+https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20-bilingual-chinese-english
+
+| | encoder-epoch-99-avg-1.onnx | encoder-epoch-99-avg-1.int8.onnx|
+|---|---|---|
+|Dynamic batch size| 315 MB| 174 MB|
+|Batch size fixed to 1| 242 MB | 100 MB |
+
+The following [colab notebook](https://colab.research.google.com/drive/1RsVZbsxbPjazeGrNNbZNjXCYbEG2F2DU?usp=sharing)
+provides examples to use the above two models.
diff --git a/scripts/mobile-asr-models/dynamic_quantization.py b/scripts/mobile-asr-models/dynamic_quantization.py
new file mode 100755
index 000000000..80828a823
--- /dev/null
+++ b/scripts/mobile-asr-models/dynamic_quantization.py
@@ -0,0 +1,38 @@
+#!/usr/bin/env python3
+import argparse
+
+from onnxruntime.quantization import QuantType, quantize_dynamic
+
+
+def get_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--input",
+        type=str,
+        required=True,
+        help="Input onnx model",
+    )
+
+    parser.add_argument(
+        "--output",
+        type=str,
+        required=True,
+        help="Output onnx model",
+    )
+    return parser.parse_args()
+
+
+def main():
+    args = get_args()
+    print(vars(args))
+
+    quantize_dynamic(
+        model_input=args.input,
+        model_output=args.output,
+        op_types_to_quantize=["MatMul"],
+        weight_type=QuantType.QInt8,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/mobile-asr-models/parse_options.sh b/scripts/mobile-asr-models/parse_options.sh
new file mode 100755
index 000000000..71fb9e5ea
--- /dev/null
+++ b/scripts/mobile-asr-models/parse_options.sh
@@ -0,0 +1,97 @@
+#!/usr/bin/env bash
+
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey);
+#                 Arnab Ghoshal, Karel Vesely
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+
+# Parse command-line options.
+# To be sourced by another script (as in ". parse_options.sh").
+# Option format is: --option-name arg
+# and shell variable "option_name" gets set to value "arg."
+# The exception is --help, which takes no arguments, but prints the
+# $help_message variable (if defined).
+
+
+###
+### The --config file options have lower priority to command line
+### options, so we need to import them first...
+###
+
+# Now import all the configs specified by command-line, in left-to-right order
+for ((argpos=1; argpos<$#; argpos++)); do
+  if [ "${!argpos}" == "--config" ]; then
+    argpos_plus1=$((argpos+1))
+    config=${!argpos_plus1}
+    [ ! -r $config ] && echo "$0: missing config '$config'" && exit 1
+    . $config  # source the config file.
+  fi
+done
+
+
+###
+### Now we process the command line options
+###
+while true; do
+  [ -z "${1:-}" ] && break;  # break if there are no arguments
+  case "$1" in
+    # If the enclosing script is called with --help option, print the help
+    # message and exit.  Scripts should put help messages in $help_message
+    --help|-h) if [ -z "$help_message" ]; then echo "No help found." 1>&2;
+      else printf "$help_message\n" 1>&2 ; fi;
+      exit 0 ;;
+    --*=*) echo "$0: options to scripts must be of the form --name value, got '$1'"
+      exit 1 ;;
+    # If the first command-line argument begins with "--" (e.g. --foo-bar),
+    # then work out the variable name as $name, which will equal "foo_bar".
+    --*) name=`echo "$1" | sed s/^--// | sed s/-/_/g`;
+      # Next we test whether the variable in question is undefned-- if so it's
+      # an invalid option and we die.  Note: $0 evaluates to the name of the
+      # enclosing script.
+      # The test [ -z ${foo_bar+xxx} ] will return true if the variable foo_bar
+      # is undefined.  We then have to wrap this test inside "eval" because
+      # foo_bar is itself inside a variable ($name).
+      eval '[ -z "${'$name'+xxx}" ]' && echo "$0: invalid option $1" 1>&2 && exit 1;
+
+      oldval="`eval echo \\$$name`";
+      # Work out whether we seem to be expecting a Boolean argument.
+      if [ "$oldval" == "true" ] || [ "$oldval" == "false" ]; then
+        was_bool=true;
+      else
+        was_bool=false;
+      fi
+
+      # Set the variable to the right value-- the escaped quotes make it work if
+      # the option had spaces, like --cmd "queue.pl -sync y"
+      eval $name=\"$2\";
+
+      # Check that Boolean-valued arguments are really Boolean.
+      if $was_bool && [[ "$2" != "true" && "$2" != "false" ]]; then
+        echo "$0: expected \"true\" or \"false\": $1 $2" 1>&2
+        exit 1;
+      fi
+      shift 2;
+      ;;
+  *) break;
+  esac
+done
+
+
+# Check for an empty argument to the --cmd option, which can easily occur as a
+# result of scripting errors.
+[ ! -z "${cmd+xxx}" ] && [ -z "$cmd" ] && echo "$0: empty argument to --cmd option" 1>&2 && exit 1;
+
+
+true; # so this script returns exit code 0.
diff --git a/scripts/mobile-asr-models/run-impl.sh b/scripts/mobile-asr-models/run-impl.sh
new file mode 100755
index 000000000..14184e163
--- /dev/null
+++ b/scripts/mobile-asr-models/run-impl.sh
@@ -0,0 +1,42 @@
+#!/usr/bin/env bash
+#
+# usage of this file:
+#  ./run.sh --input in.onnx --output1 out1.onnx --output2 out2.onnx
+# where out1.onnx is a float32 model with batch size fixed to 1
+# and out2.onnx is an int8 quantized version of out1.onnx
+
+set -ex
+
+input=
+output1=
+output2=
+batch_dim=N
+source ./parse_options.sh
+
+if [ -z $input ]; then
+  echo 'Please provide input model filename'
+  exit 1
+fi
+
+if [ -z $output1 ]; then
+  echo 'Please provide output1 model filename'
+  exit 1
+fi
+
+if [ -z $output2 ]; then
+  echo 'Please provide output2 model filename'
+  exit 1
+fi
+
+
+echo "input: $input"
+echo "output1: $output1"
+echo "output2: $output2"
+
+python3 -m onnxruntime.tools.make_dynamic_shape_fixed --dim_param $batch_dim --dim_value 1 $input tmp.fixed.onnx
+python3 -m onnxruntime.quantization.preprocess --input tmp.fixed.onnx --output $output1
+python3 ./dynamic_quantization.py --input $output1 --output $output2
+
+ls -lh $input tmp.fixed.onnx $output1 $output2
+
+rm tmp.fixed.onnx
diff --git a/scripts/mobile-asr-models/run.sh b/scripts/mobile-asr-models/run.sh
new file mode 100755
index 000000000..bc327519c
--- /dev/null
+++ b/scripts/mobile-asr-models/run.sh
@@ -0,0 +1,40 @@
+#!/usr/bin/env bash
+
+set -ex
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
+tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
+rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
+
+src=sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20
+dst=$src-mobile
+
+mkdir -p $dst
+
+./run-impl.sh \
+  --input $src/encoder-epoch-99-avg-1.onnx \
+  --output1 $dst/encoder-epoch-99-avg-1.onnx \
+  --output2 $dst/encoder-epoch-99-avg-1.int8.onnx
+
+cp -v $src/README.md $dst/
+cp -v $src/tokens.txt $dst/
+cp -av $src/test_wavs $dst/
+cp -v $src/decoder-epoch-99-avg-1.onnx $dst/
+cp -v $src/joiner-epoch-99-avg-1.int8.onnx $dst/
+
+cat > $dst/notes.md <<EOF
+# Introduction
+This model is converted from
+https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/$src.tar.bz2
+and it supports only batch size equal to 1.
+EOF
+
+echo "---$src---"
+ls -lh $src
+echo "---$dst---"
+ls -lh $dst
+rm -rf $src
+
+tar cjfv $dst.tar.bz2 $dst
+mv *.tar.bz2 ../../
+rm -rf $dst