From 0e0d04a97a034850048514c860954192d7a17a00 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Thu, 22 Aug 2024 19:36:24 +0800 Subject: [PATCH] Provide models for mobile-only platforms by fixing batch size to 1 (#1276) --- .github/workflows/mobile-asr-models.yaml | 52 ++++++++++ scripts/mobile-asr-models/README.md | 18 ++++ .../mobile-asr-models/dynamic_quantization.py | 38 ++++++++ scripts/mobile-asr-models/parse_options.sh | 97 +++++++++++++++++++ scripts/mobile-asr-models/run-impl.sh | 42 ++++++++ scripts/mobile-asr-models/run.sh | 40 ++++++++ 6 files changed, 287 insertions(+) create mode 100644 .github/workflows/mobile-asr-models.yaml create mode 100644 scripts/mobile-asr-models/README.md create mode 100755 scripts/mobile-asr-models/dynamic_quantization.py create mode 100755 scripts/mobile-asr-models/parse_options.sh create mode 100755 scripts/mobile-asr-models/run-impl.sh create mode 100755 scripts/mobile-asr-models/run.sh diff --git a/.github/workflows/mobile-asr-models.yaml b/.github/workflows/mobile-asr-models.yaml new file mode 100644 index 000000000..c58bb0396 --- /dev/null +++ b/.github/workflows/mobile-asr-models.yaml @@ -0,0 +1,52 @@ +name: mobile-asr-models + +on: + push: + branches: + - asr-mobile + + workflow_dispatch: + + +concurrency: + group: mobile-asr-models-${{ github.ref }} + cancel-in-progress: true + +jobs: + mobile-asr-models: + if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj' || github.repository_owner == 'csu-fangjun' + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest] + python-version: ["3.8"] + + steps: + - uses: actions/checkout@v4 + + - name: Setup Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Install dependencies + shell: bash + run: | + python3 -m pip install onnxruntime==1.16.3 onnx==1.15.0 + + - name: Run + shell: bash + run: | + cd scripts/mobile-asr-models + ./run.sh + + - name: Release + uses: svenstaro/upload-release-action@v2 + with: + file_glob: true + file: ./*.tar.bz2 + overwrite: true + repo_name: k2-fsa/sherpa-onnx + repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }} + tag: asr-models diff --git a/scripts/mobile-asr-models/README.md b/scripts/mobile-asr-models/README.md new file mode 100644 index 000000000..ff8715502 --- /dev/null +++ b/scripts/mobile-asr-models/README.md @@ -0,0 +1,18 @@ +# Introduction + +This folder contains scripts to convert ASR models for mobile platforms +supporting only batch size equal to 1. + +The advantage of fixing the batch size to 1 is that it provides more +opportunities for model optimization and quantization. + +To give you a concrete example, for the following model +https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20-bilingual-chinese-english + +| | encoder-epoch-99-avg-1.onnx | encoder-epoch-99-avg-1.int8.onnx| +|---|---|---| +|Dynamic batch size| 315 MB| 174 MB| +|Batch size fixed to 1| 242 MB | 100 MB | + +The following [colab notebook](https://colab.research.google.com/drive/1RsVZbsxbPjazeGrNNbZNjXCYbEG2F2DU?usp=sharing) +provides examples to use the above two models. diff --git a/scripts/mobile-asr-models/dynamic_quantization.py b/scripts/mobile-asr-models/dynamic_quantization.py new file mode 100755 index 000000000..80828a823 --- /dev/null +++ b/scripts/mobile-asr-models/dynamic_quantization.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python3 +import argparse + +from onnxruntime.quantization import QuantType, quantize_dynamic + + +def get_args(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--input", + type=str, + required=True, + help="Input onnx model", + ) + + parser.add_argument( + "--output", + type=str, + required=True, + help="Output onnx model", + ) + return parser.parse_args() + + +def main(): + args = get_args() + print(vars(args)) + + quantize_dynamic( + model_input=args.input, + model_output=args.output, + op_types_to_quantize=["MatMul"], + weight_type=QuantType.QInt8, + ) + + +if __name__ == "__main__": + main() diff --git a/scripts/mobile-asr-models/parse_options.sh b/scripts/mobile-asr-models/parse_options.sh new file mode 100755 index 000000000..71fb9e5ea --- /dev/null +++ b/scripts/mobile-asr-models/parse_options.sh @@ -0,0 +1,97 @@ +#!/usr/bin/env bash + +# Copyright 2012 Johns Hopkins University (Author: Daniel Povey); +# Arnab Ghoshal, Karel Vesely + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. + + +# Parse command-line options. +# To be sourced by another script (as in ". parse_options.sh"). +# Option format is: --option-name arg +# and shell variable "option_name" gets set to value "arg." +# The exception is --help, which takes no arguments, but prints the +# $help_message variable (if defined). + + +### +### The --config file options have lower priority to command line +### options, so we need to import them first... +### + +# Now import all the configs specified by command-line, in left-to-right order +for ((argpos=1; argpos<$#; argpos++)); do + if [ "${!argpos}" == "--config" ]; then + argpos_plus1=$((argpos+1)) + config=${!argpos_plus1} + [ ! -r $config ] && echo "$0: missing config '$config'" && exit 1 + . $config # source the config file. + fi +done + + +### +### Now we process the command line options +### +while true; do + [ -z "${1:-}" ] && break; # break if there are no arguments + case "$1" in + # If the enclosing script is called with --help option, print the help + # message and exit. Scripts should put help messages in $help_message + --help|-h) if [ -z "$help_message" ]; then echo "No help found." 1>&2; + else printf "$help_message\n" 1>&2 ; fi; + exit 0 ;; + --*=*) echo "$0: options to scripts must be of the form --name value, got '$1'" + exit 1 ;; + # If the first command-line argument begins with "--" (e.g. --foo-bar), + # then work out the variable name as $name, which will equal "foo_bar". + --*) name=`echo "$1" | sed s/^--// | sed s/-/_/g`; + # Next we test whether the variable in question is undefned-- if so it's + # an invalid option and we die. Note: $0 evaluates to the name of the + # enclosing script. + # The test [ -z ${foo_bar+xxx} ] will return true if the variable foo_bar + # is undefined. We then have to wrap this test inside "eval" because + # foo_bar is itself inside a variable ($name). + eval '[ -z "${'$name'+xxx}" ]' && echo "$0: invalid option $1" 1>&2 && exit 1; + + oldval="`eval echo \\$$name`"; + # Work out whether we seem to be expecting a Boolean argument. + if [ "$oldval" == "true" ] || [ "$oldval" == "false" ]; then + was_bool=true; + else + was_bool=false; + fi + + # Set the variable to the right value-- the escaped quotes make it work if + # the option had spaces, like --cmd "queue.pl -sync y" + eval $name=\"$2\"; + + # Check that Boolean-valued arguments are really Boolean. + if $was_bool && [[ "$2" != "true" && "$2" != "false" ]]; then + echo "$0: expected \"true\" or \"false\": $1 $2" 1>&2 + exit 1; + fi + shift 2; + ;; + *) break; + esac +done + + +# Check for an empty argument to the --cmd option, which can easily occur as a +# result of scripting errors. +[ ! -z "${cmd+xxx}" ] && [ -z "$cmd" ] && echo "$0: empty argument to --cmd option" 1>&2 && exit 1; + + +true; # so this script returns exit code 0. diff --git a/scripts/mobile-asr-models/run-impl.sh b/scripts/mobile-asr-models/run-impl.sh new file mode 100755 index 000000000..14184e163 --- /dev/null +++ b/scripts/mobile-asr-models/run-impl.sh @@ -0,0 +1,42 @@ +#!/usr/bin/env bash +# +# usage of this file: +# ./run.sh --input in.onnx --output1 out1.onnx --output2 out2.onnx +# where out1.onnx is a float32 model with batch size fixed to 1 +# and out2.onnx is an int8 quantized version of out1.onnx + +set -ex + +input= +output1= +output2= +batch_dim=N +source ./parse_options.sh + +if [ -z $input ]; then + echo 'Please provide input model filename' + exit 1 +fi + +if [ -z $output1 ]; then + echo 'Please provide output1 model filename' + exit 1 +fi + +if [ -z $output2 ]; then + echo 'Please provide output2 model filename' + exit 1 +fi + + +echo "input: $input" +echo "output1: $output1" +echo "output2: $output2" + +python3 -m onnxruntime.tools.make_dynamic_shape_fixed --dim_param $batch_dim --dim_value 1 $input tmp.fixed.onnx +python3 -m onnxruntime.quantization.preprocess --input tmp.fixed.onnx --output $output1 +python3 ./dynamic_quantization.py --input $output1 --output $output2 + +ls -lh $input tmp.fixed.onnx $output1 $output2 + +rm tmp.fixed.onnx diff --git a/scripts/mobile-asr-models/run.sh b/scripts/mobile-asr-models/run.sh new file mode 100755 index 000000000..bc327519c --- /dev/null +++ b/scripts/mobile-asr-models/run.sh @@ -0,0 +1,40 @@ +#!/usr/bin/env bash + +set -ex + +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 +tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 +rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 + +src=sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 +dst=$src-mobile + +mkdir -p $dst + +./run-impl.sh \ + --input $src/encoder-epoch-99-avg-1.onnx \ + --output1 $dst/encoder-epoch-99-avg-1.onnx \ + --output2 $dst/encoder-epoch-99-avg-1.int8.onnx + +cp -v $src/README.md $dst/ +cp -v $src/tokens.txt $dst/ +cp -av $src/test_wavs $dst/ +cp -v $src/decoder-epoch-99-avg-1.onnx $dst/ +cp -v $src/joiner-epoch-99-avg-1.int8.onnx $dst/ + +cat > $dst/notes.md <