Export emformer RNNT encode, predict, join (#173)

kirklandsign · facebook-github-bot · commit 7f395fdda1ea · 2023-09-14T14:03:33.000-07:00
Summary: Pull Request resolved: #173 An example to check the export path for Emformer-RNN-T encode, predict, join methods. Reviewed By: kimishpatel Differential Revision: D48327041 fbshipit-source-id: 97bb3ee3feadc01ac4f96b5e5702adb867aa2877
diff --git a/.ci/scripts/gather_test_models.py b/.ci/scripts/gather_test_models.py
@@ -19,7 +19,9 @@
 DEFAULT_RUNNER = "linux.2xlarge"
 RUNNERS = {
     # This one runs OOM on smaller runner, the root cause is unclear (T163016365)
-    "w2l": "linux.12xlarge"
+    "w2l": "linux.12xlarge",
+    # This one causes timeout on smaller runner, the root cause is unclear (T161064121)
+    "emformer_join": "linux.12xlarge",
 }
 
 
diff --git a/examples/export/utils.py b/examples/export/utils.py
@@ -18,9 +18,9 @@
 
 _CAPTURE_CONFIG = exir.CaptureConfig(enable_aot=True)
 
-# Explicitly force the activation of the IR validator
+# TODO(T163721729): Enable IR check after decomposing div.Tensor_mode
 _EDGE_COMPILE_CONFIG = exir.EdgeCompileConfig(
-    _check_ir_validity=True,
+    _check_ir_validity=False,
 )
 
 
diff --git a/examples/models/TARGETS b/examples/models/TARGETS
@@ -10,6 +10,7 @@ python_library(
         "//caffe2:torch",
         "//executorch/examples/models:model_base",  # @manual
         "//executorch/examples/models/deeplab_v3:dl3_model",  # @manual
+        "//executorch/examples/models/emformer_rnnt:emformer_rnnt_model",  # @manual
         "//executorch/examples/models/inception_v3:ic3_model",  # @manual
         "//executorch/examples/models/inception_v4:ic4_model",  # @manual
         "//executorch/examples/models/mobilebert:mobilebert_model",  # @manual
diff --git a/examples/models/__init__.py b/examples/models/__init__.py
@@ -12,6 +12,9 @@
     "add": ("toy_model", "AddModule"),
     "add_mul": ("toy_model", "AddMulModule"),
     "dl3": ("deeplab_v3", "DeepLabV3ResNet50Model"),
+    "emformer_transcribe": ("emformer_rnnt", "EmformerRnntTranscriberModel"),
+    "emformer_predict": ("emformer_rnnt", "EmformerRnntPredictorModel"),
+    "emformer_join": ("emformer_rnnt", "EmformerRnntJoinerModel"),
     "mobilebert": ("mobilebert", "MobileBertModelExample"),
     "mv2": ("mobilenet_v2", "MV2Model"),
     "mv3": ("mobilenet_v3", "MV3Model"),
diff --git a/examples/models/emformer_rnnt/__init__.py b/examples/models/emformer_rnnt/__init__.py
@@ -0,0 +1,17 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from .model import (
+    EmformerRnntJoinerModel,
+    EmformerRnntPredictorModel,
+    EmformerRnntTranscriberModel,
+)
+
+__all__ = [
+    EmformerRnntTranscriberModel,
+    EmformerRnntPredictorModel,
+    EmformerRnntJoinerModel,
+]
diff --git a/examples/models/emformer_rnnt/model.py b/examples/models/emformer_rnnt/model.py
@@ -0,0 +1,132 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import logging
+
+import torch
+import torchaudio
+
+from ..model_base import EagerModelBase
+
+
+FORMAT = "[%(filename)s:%(lineno)s] %(message)s"
+logging.basicConfig(format=FORMAT)
+
+
+__all__ = [
+    "EmformerRnntTranscriberModel",
+    "EmformerRnntPredictorModel",
+    "EmformerRnntJoinerModel",
+]
+
+
+class EmformerRnntTranscriberExample(torch.nn.Module):
+    """
+    This is a wrapper for validating transcriber for the Emformer RNN-T architecture.
+    It does not reflect the actual usage such as beam search, but rather an example for the export workflow.
+    """
+
+    def __init__(self) -> None:
+        super().__init__()
+        bundle = torchaudio.pipelines.EMFORMER_RNNT_BASE_LIBRISPEECH
+        decoder = bundle.get_decoder()
+        m = decoder.model
+        self.rnnt = m
+
+    def forward(self, transcribe_inputs):
+        return self.rnnt.transcribe(*transcribe_inputs)
+
+
+class EmformerRnntTranscriberModel(EagerModelBase):
+    def __init__(self):
+        pass
+
+    def get_eager_model(self) -> torch.nn.Module:
+        logging.info("Loading emformer rnnt transcriber")
+        m = EmformerRnntTranscriberExample()
+        logging.info("Loaded emformer rnnt transcriber")
+        return m
+
+    def get_example_inputs(self):
+        transcribe_inputs = (
+            torch.randn(1, 128, 80),
+            torch.tensor([128]),
+        )
+        return (transcribe_inputs,)
+
+
+class EmformerRnntPredictorExample(torch.nn.Module):
+    """
+    This is a wrapper for validating predictor for the Emformer RNN-T architecture.
+    It does not reflect the actual usage such as beam search, but rather an example for the export workflow.
+    """
+
+    def __init__(self) -> None:
+        super().__init__()
+        bundle = torchaudio.pipelines.EMFORMER_RNNT_BASE_LIBRISPEECH
+        decoder = bundle.get_decoder()
+        m = decoder.model
+        self.rnnt = m
+
+    def forward(self, predict_inputs):
+        return self.rnnt.predict(*predict_inputs)
+
+
+class EmformerRnntPredictorModel(EagerModelBase):
+    def __init__(self):
+        pass
+
+    def get_eager_model(self) -> torch.nn.Module:
+        logging.info("Loading emformer rnnt predictor")
+        m = EmformerRnntPredictorExample()
+        logging.info("Loaded emformer rnnt predictor")
+        return m
+
+    def get_example_inputs(self):
+        predict_inputs = (
+            torch.zeros([1, 128], dtype=int),
+            torch.tensor([128], dtype=int),
+            None,
+        )
+        return (predict_inputs,)
+
+
+class EmformerRnntJoinerExample(torch.nn.Module):
+    """
+    This is a wrapper for validating joiner for the Emformer RNN-T architecture.
+    It does not reflect the actual usage such as beam search, but rather an example for the export workflow.
+    """
+
+    def __init__(self) -> None:
+        super().__init__()
+        bundle = torchaudio.pipelines.EMFORMER_RNNT_BASE_LIBRISPEECH
+        decoder = bundle.get_decoder()
+        m = decoder.model
+        self.rnnt = m
+
+    def forward(self, predict_inputs):
+        return self.rnnt.join(*predict_inputs)
+
+
+class EmformerRnntJoinerModel(EagerModelBase):
+    def __init__(self):
+        pass
+
+    def get_eager_model(self) -> torch.nn.Module:
+        logging.info("Loading emformer rnnt joiner")
+        m = EmformerRnntJoinerExample()
+        logging.info("Loaded emformer rnnt joiner")
+        return m
+
+    def get_example_inputs(self):
+        join_inputs = (
+            torch.rand([1, 128, 1024]),
+            torch.tensor([128]),
+            torch.rand([1, 128, 1024]),
+            torch.tensor([128]),
+        )
+        return (join_inputs,)

Original file line number	Diff line number	Diff line change
`@@ -19,7 +19,9 @@`
`19`	`19`	`DEFAULT_RUNNER = "linux.2xlarge"`
`20`	`20`	`RUNNERS = {`
`21`	`21`	`# This one runs OOM on smaller runner, the root cause is unclear (T163016365)`
`22`		`- "w2l": "linux.12xlarge"`
	`22`	`+ "w2l": "linux.12xlarge",`
	`23`	`+ # This one causes timeout on smaller runner, the root cause is unclear (T161064121)`
	`24`	`+ "emformer_join": "linux.12xlarge",`
`23`	`25`	`}`
`24`	`26`
`25`	`27`
Original file line number	Diff line number	Diff line change
`@@ -18,9 +18,9 @@`
`18`	`18`
`19`	`19`	`_CAPTURE_CONFIG = exir.CaptureConfig(enable_aot=True)`
`20`	`20`
`21`		`-# Explicitly force the activation of the IR validator`
	`21`	`+# TODO(T163721729): Enable IR check after decomposing div.Tensor_mode`
`22`	`22`	`_EDGE_COMPILE_CONFIG = exir.EdgeCompileConfig(`
`23`		`- _check_ir_validity=True,`
	`23`	`+ _check_ir_validity=False,`
`24`	`24`	`)`
`25`	`25`
`26`	`26`