Skip to content

Commit 1e73b2b

Browse files
authored
Enhanced code to extract and parse specific config files from TorchScript archive (#378)
* Enhanced code to extract and parse specific config files from TorchScript archive Signed-off-by: M Q <[email protected]> * Fix flake8 complaint Signed-off-by: M Q <[email protected]> * Bundle names regarded as case insentitive and add "yml" ext Signed-off-by: M Q <[email protected]> Signed-off-by: M Q <[email protected]>
1 parent 4a2ff15 commit 1e73b2b

File tree

2 files changed

+102
-26
lines changed

2 files changed

+102
-26
lines changed

examples/apps/ai_spleen_seg_app/app.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,11 @@
2121
from monai.deploy.operators.dicom_seg_writer_operator import DICOMSegmentationWriterOperator, SegmentDescription
2222
from monai.deploy.operators.dicom_series_selector_operator import DICOMSeriesSelectorOperator
2323
from monai.deploy.operators.dicom_series_to_volume_operator import DICOMSeriesToVolumeOperator
24-
from monai.deploy.operators.monai_bundle_inference_operator import IOMapping, MonaiBundleInferenceOperator
24+
from monai.deploy.operators.monai_bundle_inference_operator import (
25+
BundleConfigNames,
26+
IOMapping,
27+
MonaiBundleInferenceOperator,
28+
)
2529

2630
# from monai.deploy.operators.stl_conversion_operator import STLConversionOperator # import as needed.
2731

@@ -62,9 +66,13 @@ def compose(self):
6266
#
6367
# Pertinent MONAI Bundle:
6468
# https://github.com/Project-MONAI/model-zoo/tree/dev/models/spleen_ct_segmentation
69+
70+
config_names = BundleConfigNames(config_names=["inference"]) # Same as the default
71+
6572
bundle_spleen_seg_op = MonaiBundleInferenceOperator(
6673
input_mapping=[IOMapping("image", Image, IOType.IN_MEMORY)],
6774
output_mapping=[IOMapping("pred", Image, IOType.IN_MEMORY)],
75+
bundle_config_names=config_names,
6876
)
6977

7078
# Create DICOM Seg writer providing the required segment description for each segment with

monai/deploy/operators/monai_bundle_inference_operator.py

Lines changed: 93 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import logging
1414
import os
1515
import pickle
16+
import tempfile
1617
import time
1718
import zipfile
1819
from copy import deepcopy
@@ -62,43 +63,110 @@ def get_bundle_config(bundle_path, config_names):
6263
Gets the configuration parser from the specified Torchscript bundle file path.
6364
"""
6465

65-
def _read_from_archive(archive, root_name: str, relative_path: str, path_list: List[str]):
66-
"""A helper function for reading a file in an zip archive.
66+
bundle_suffixes = (".json", ".yaml", "yml") # The only supported file ext(s)
67+
config_folder = "extra"
6768

68-
Tries to read with the full path of # a archive file, if error, then find the relative
69-
path and then read the file.
69+
def _read_from_archive(archive, root_name: str, config_name: str, do_search=True):
70+
"""A helper function for reading the content of a config in the zip archive.
71+
72+
Tries to read config content at the expected path in the archive, if error occurs,
73+
search and read with alternative paths.
7074
"""
75+
7176
content_text = None
72-
try:
73-
content_text = archive.read(f"{root_name}/{relative_path}")
74-
except KeyError:
75-
logging.debug(f"Trying to find the metadata/config file in the bundle archive: {relative_path}.")
76-
for n in path_list:
77-
if relative_path in n:
78-
content_text = archive.read(n)
79-
break
80-
if content_text is None:
81-
raise
77+
config_name = config_name.split(".")[0] # In case ext is present
78+
79+
# Try directly read with constructed and expected path into the archive
80+
for suffix in bundle_suffixes:
81+
try:
82+
path = Path(root_name, config_folder, config_name).with_suffix(suffix)
83+
logging.debug(f"Trying to read config '{config_name}' content from {path}.")
84+
content_text = archive.read(str(path))
85+
break
86+
except Exception:
87+
logging.debug(f"Error reading from {path}. Will try alternative ways.")
88+
continue
89+
90+
# Try search for the name in the name list of the archive
91+
if not content_text and do_search:
92+
logging.debug(f"Trying to find the file in the archive for config '{config_name}'.")
93+
name_list = archive.namelist()
94+
for suffix in bundle_suffixes:
95+
for n in name_list:
96+
if (f"{config_name}{suffix}").casefold in n.casefold():
97+
logging.debug(f"Trying to read content of config '{config_name}' from {n}.")
98+
content_text = archive.read(n)
99+
break
100+
101+
if not content_text:
102+
raise IOError(f"Cannot read config {config_name}{bundle_suffixes} or its content in the archive.")
82103

83104
return content_text
84105

106+
def _extract_from_archive(
107+
archive, root_name: str, config_names: List[str], dest_folder: Union[str, Path], do_search=True
108+
):
109+
"""A helper function for extract files of configs from the archive to the destination folder
110+
111+
Tries to extract with the full paths from the archive file, if error occurs, tries to search for
112+
and read from the file(s) if do_search is true.
113+
"""
114+
115+
config_names = [cn.split(".")[0] for cn in config_names] # In case the extension is present
116+
file_list = []
117+
118+
# Try directly read first with path into the archive
119+
for suffix in bundle_suffixes:
120+
try:
121+
logging.debug(f"Trying to extract {config_names} with ext {suffix}.")
122+
file_list = [str(Path(root_name, config_folder, cn).with_suffix(suffix)) for cn in config_names]
123+
archive.extractall(members=file_list, path=dest_folder)
124+
break
125+
except Exception as ex:
126+
file_list = []
127+
logging.debug(f"Will try file search after error on extracting {config_names} with {file_list}: {ex}")
128+
continue
129+
130+
# If files not extracted, try search for expected files in the name list of the archive
131+
if (len(file_list) < 1) and do_search:
132+
logging.debug(f"Trying to find the config files in the archive for {config_names}.")
133+
name_list = archive.namelist()
134+
leftovers = deepcopy(config_names) # to track any that are not found.
135+
for cn in config_names:
136+
for suffix in bundle_suffixes:
137+
found = False
138+
for n in name_list:
139+
if (f"{cn}{suffix}").casefold() in n.casefold():
140+
found = True
141+
archive.extract(member=n, path=dest_folder)
142+
break
143+
if found:
144+
leftovers.remove(cn)
145+
break
146+
147+
if len(leftovers) > 0:
148+
raise IOError(f"Failed to extract content for these config(s): {leftovers}.")
149+
150+
return file_list
151+
152+
# End of helper functions
153+
85154
if isinstance(config_names, str):
86155
config_names = [config_names]
87156

88-
name, _ = os.path.splitext(os.path.basename(bundle_path))
157+
name, _ = os.path.splitext(os.path.basename(bundle_path)) # bundle file name same archive folder name
89158
parser = ConfigParser()
90159

91160
# Parser to read the required metadata and extra config contents from the archive
92-
with zipfile.ZipFile(bundle_path, "r") as archive:
93-
name_list = archive.namelist()
94-
metadata_relative_path = "extra/metadata.json"
95-
metadata_text = _read_from_archive(archive, name, metadata_relative_path, name_list)
96-
parser.read_meta(f=json.loads(metadata_text))
161+
with tempfile.TemporaryDirectory() as tmp_dir:
162+
with zipfile.ZipFile(bundle_path, "r") as archive:
163+
metadata_config_name = "metadata"
164+
metadata_text = _read_from_archive(archive, name, metadata_config_name)
165+
parser.read_meta(f=json.loads(metadata_text))
97166

98-
for cn in config_names:
99-
config_relative_path = f"extra/{cn}.json"
100-
config_text = _read_from_archive(archive, name, config_relative_path, name_list)
101-
parser.read_config(f=json.loads(config_text))
167+
# now get the other named configs
168+
file_list = _extract_from_archive(archive, name, config_names, tmp_dir)
169+
parser.read_config([Path(tmp_dir, f_path) for f_path in file_list])
102170

103171
parser.parse()
104172

@@ -261,7 +329,7 @@ def __init__(
261329
Defaults to "".
262330
bundle_path (Optional[str], optional): For completing . Defaults to None.
263331
bundle_config_names (BundleConfigNames, optional): Relevant config item names in a the bundle.
264-
Defaults to None.
332+
Defaults to DEFAULT_BundleConfigNames.
265333
"""
266334

267335
super().__init__(*args, **kwargs)

0 commit comments

Comments
 (0)