Skip to content

Commit

Permalink
Reduce model initialization time for online speech recognition (#215)
Browse files Browse the repository at this point in the history
* Reduce model initialization time for online speech recognition

* Fixed Styling

---------

Co-authored-by: w11wo <[email protected]>
  • Loading branch information
w11wo and w11wo committed Jul 14, 2023
1 parent fe0630f commit 5a6b55c
Show file tree
Hide file tree
Showing 7 changed files with 69 additions and 8 deletions.
2 changes: 2 additions & 0 deletions sherpa-onnx/c-api/c-api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ SherpaOnnxOnlineRecognizer *CreateOnlineRecognizer(
SHERPA_ONNX_OR(config->model_config.num_threads, 1);
recognizer_config.model_config.provider =
SHERPA_ONNX_OR(config->model_config.provider, "cpu");
recognizer_config.model_config.model_type =
SHERPA_ONNX_OR(config->model_config.model_type, "");
recognizer_config.model_config.debug =
SHERPA_ONNX_OR(config->model_config.debug, 0);

Expand Down
1 change: 1 addition & 0 deletions sherpa-onnx/c-api/c-api.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOnlineTransducerModelConfig {
const char *tokens;
int32_t num_threads;
const char *provider;
const char *model_type;
int32_t debug; // true to print debug information of the model
} SherpaOnnxOnlineTransducerModelConfig;

Expand Down
13 changes: 9 additions & 4 deletions sherpa-onnx/csrc/online-transducer-model-config.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,26 +22,30 @@ void OnlineTransducerModelConfig::Register(ParseOptions *po) {

po->Register("debug", &debug,
"true to print model information while loading it.");
po->Register("model-type", &model_type,
"Specify it to reduce model initialization time. "
"Valid values are: conformer, lstm, zipformer, zipformer2. "
"All other values lead to loading the model twice.");
}

bool OnlineTransducerModelConfig::Validate() const {
if (!FileExists(tokens)) {
SHERPA_ONNX_LOGE("%s does not exist", tokens.c_str());
SHERPA_ONNX_LOGE("tokens: %s does not exist", tokens.c_str());
return false;
}

if (!FileExists(encoder_filename)) {
SHERPA_ONNX_LOGE("%s does not exist", encoder_filename.c_str());
SHERPA_ONNX_LOGE("encoder: %s does not exist", encoder_filename.c_str());
return false;
}

if (!FileExists(decoder_filename)) {
SHERPA_ONNX_LOGE("%s does not exist", decoder_filename.c_str());
SHERPA_ONNX_LOGE("decoder: %s does not exist", decoder_filename.c_str());
return false;
}

if (!FileExists(joiner_filename)) {
SHERPA_ONNX_LOGE("%s does not exist", joiner_filename.c_str());
SHERPA_ONNX_LOGE("joiner: %s does not exist", joiner_filename.c_str());
return false;
}

Expand All @@ -63,6 +67,7 @@ std::string OnlineTransducerModelConfig::ToString() const {
os << "tokens=\"" << tokens << "\", ";
os << "num_threads=" << num_threads << ", ";
os << "provider=\"" << provider << "\", ";
os << "model_type=\"" << model_type << "\", ";
os << "debug=" << (debug ? "True" : "False") << ")";

return os.str();
Expand Down
18 changes: 16 additions & 2 deletions sherpa-onnx/csrc/online-transducer-model-config.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,19 +19,33 @@ struct OnlineTransducerModelConfig {
bool debug = false;
std::string provider = "cpu";

// With the help of this field, we only need to load the model once
// instead of twice; and therefore it reduces initialization time.
//
// Valid values:
// - conformer
// - lstm
// - zipformer
// - zipformer2
//
// All other values are invalid and lead to loading the model twice.
std::string model_type;

OnlineTransducerModelConfig() = default;
OnlineTransducerModelConfig(const std::string &encoder_filename,
const std::string &decoder_filename,
const std::string &joiner_filename,
const std::string &tokens, int32_t num_threads,
bool debug, const std::string &provider)
bool debug, const std::string &provider,
const std::string &model_type)
: encoder_filename(encoder_filename),
decoder_filename(decoder_filename),
joiner_filename(joiner_filename),
tokens(tokens),
num_threads(num_threads),
debug(debug),
provider(provider) {}
provider(provider),
model_type(model_type) {}

void Register(ParseOptions *po);
bool Validate() const;
Expand Down
33 changes: 33 additions & 0 deletions sherpa-onnx/csrc/online-transducer-model.cc
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,22 @@ static ModelType GetModelType(char *model_data, size_t model_data_length,

std::unique_ptr<OnlineTransducerModel> OnlineTransducerModel::Create(
const OnlineTransducerModelConfig &config) {
if (!config.model_type.empty()) {
const auto &model_type = config.model_type;
if (model_type == "conformer") {
return std::make_unique<OnlineConformerTransducerModel>(config);
} else if (model_type == "lstm") {
return std::make_unique<OnlineLstmTransducerModel>(config);
} else if (model_type == "zipformer") {
return std::make_unique<OnlineZipformerTransducerModel>(config);
} else if (model_type == "zipformer2") {
return std::make_unique<OnlineZipformer2TransducerModel>(config);
} else {
SHERPA_ONNX_LOGE(
"Invalid model_type: %s. Trying to load the model to get its type",
model_type.c_str());
}
}
ModelType model_type = ModelType::kUnkown;

{
Expand Down Expand Up @@ -140,6 +156,23 @@ Ort::Value OnlineTransducerModel::BuildDecoderInput(
#if __ANDROID_API__ >= 9
std::unique_ptr<OnlineTransducerModel> OnlineTransducerModel::Create(
AAssetManager *mgr, const OnlineTransducerModelConfig &config) {
if (!config.model_type.empty()) {
const auto &model_type = config.model_type;
if (model_type == "conformer") {
return std::make_unique<OnlineConformerTransducerModel>(mgr, config);
} else if (model_type == "lstm") {
return std::make_unique<OnlineLstmTransducerModel>(mgr, config);
} else if (model_type == "zipformer") {
return std::make_unique<OnlineZipformerTransducerModel>(mgr, config);
} else if (model_type == "zipformer2") {
return std::make_unique<OnlineZipformer2TransducerModel>(mgr, config);
} else {
SHERPA_ONNX_LOGE(
"Invalid model_type: %s. Trying to load the model to get its type",
model_type.c_str());
}
}

auto buffer = ReadFile(mgr, config.encoder_filename);
auto model_type = GetModelType(buffer.data(), buffer.size(), config.debug);

Expand Down
5 changes: 3 additions & 2 deletions sherpa-onnx/python/csrc/online-transducer-model-config.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,19 @@ void PybindOnlineTransducerModelConfig(py::module *m) {
py::class_<PyClass>(*m, "OnlineTransducerModelConfig")
.def(py::init<const std::string &, const std::string &,
const std::string &, const std::string &, int32_t, bool,
const std::string &>(),
const std::string &, const std::string &>(),
py::arg("encoder_filename"), py::arg("decoder_filename"),
py::arg("joiner_filename"), py::arg("tokens"),
py::arg("num_threads"), py::arg("debug") = false,
py::arg("provider") = "cpu")
py::arg("provider") = "cpu", py::arg("model_type") = "")
.def_readwrite("encoder_filename", &PyClass::encoder_filename)
.def_readwrite("decoder_filename", &PyClass::decoder_filename)
.def_readwrite("joiner_filename", &PyClass::joiner_filename)
.def_readwrite("tokens", &PyClass::tokens)
.def_readwrite("num_threads", &PyClass::num_threads)
.def_readwrite("debug", &PyClass::debug)
.def_readwrite("provider", &PyClass::provider)
.def_readwrite("model_type", &PyClass::model_type)
.def("__str__", &PyClass::ToString);
}

Expand Down
5 changes: 5 additions & 0 deletions sherpa-onnx/python/sherpa_onnx/online_recognizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ def __init__(
max_active_paths: int = 4,
context_score: float = 1.5,
provider: str = "cpu",
model_type: str = "",
):
"""
Please refer to
Expand Down Expand Up @@ -90,6 +91,9 @@ def __init__(
the maximum number of active paths during beam search.
provider:
onnxruntime execution providers. Valid values are: cpu, cuda, coreml.
model_type:
Online transducer model type. Valid values are: conformer, lstm,
zipformer, zipformer2. All other values lead to loading the model twice.
"""
_assert_file_exists(tokens)
_assert_file_exists(encoder)
Expand All @@ -105,6 +109,7 @@ def __init__(
tokens=tokens,
num_threads=num_threads,
provider=provider,
model_type=model_type,
)

feat_config = FeatureExtractorConfig(
Expand Down

0 comments on commit 5a6b55c

Please sign in to comment.