Skip to content

Commit f6d8198

Browse files
committed
Experimental fast validator code path, using cwl-utils
Requires schema salad codegen improvements, these are pending On a very large workflow I was testing with, the validation time went 120 seconds to 20 seconds. This is a work in progress.
1 parent 0e2ced5 commit f6d8198

File tree

3 files changed

+62
-3
lines changed

3 files changed

+62
-3
lines changed

cwltool/argparser.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -579,6 +579,13 @@ def arg_parser() -> argparse.ArgumentParser:
579579
default=True,
580580
help=argparse.SUPPRESS,
581581
)
582+
parser.add_argument(
583+
"--fast-validator",
584+
dest="fast_validator",
585+
action="store_true",
586+
default=False,
587+
help=argparse.SUPPRESS,
588+
)
582589

583590
reggroup = parser.add_mutually_exclusive_group()
584591
reggroup.add_argument(

cwltool/context.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,8 @@ def __init__(self, kwargs: Optional[Dict[str, Any]] = None) -> None:
103103
self.singularity = False # type: bool
104104
self.podman = False # type: bool
105105
self.eval_timeout = 60 # type: float
106+
self.codegen_idx = {}
107+
self.fast_validator = False
106108

107109
super().__init__(kwargs)
108110

cwltool/load_tool.py

Lines changed: 53 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,9 @@
4040
from .update import ALLUPDATES
4141
from .utils import CWLObjectType, ResolverType, visit_class
4242

43+
import cwl_utils.parser.cwl_v1_2
44+
import cwl_utils.parser.cwl_v1_2_utils
45+
4346
jobloaderctx = {
4447
"cwl": "https://w3id.org/cwl/cwl#",
4548
"cwltool": "http://commonwl.org/cwltool#",
@@ -263,6 +266,45 @@ def _add_blank_ids(
263266
)
264267
)
265268

269+
def _fast_validator_convert_stdstreams_to_files(processobj) -> None:
270+
if isinstance(processobj, cwl_utils.parser.cwl_v1_2.CommandLineTool):
271+
cwl_utils.parser.cwl_v1_2_utils.convert_stdstreams_to_files(processobj)
272+
elif isinstance(processobj, cwl_utils.parser.cwl_v1_2.Workflow):
273+
for st in processobj.steps:
274+
_fast_validator_convert_stdstreams_to_files(st.run)
275+
elif isinstance(processobj, MutableSequence):
276+
for p in processobj:
277+
_fast_validator_convert_stdstreams_to_files(p)
278+
279+
280+
def fast_validator(workflowobj, fileuri, uri, loadingContext: LoadingContext):
281+
lopt = cwl_utils.parser.cwl_v1_2.LoadingOptions(idx=loadingContext.codegen_idx, fileuri=fileuri)
282+
283+
if uri not in loadingContext.codegen_idx:
284+
cwl_utils.parser.cwl_v1_2.load_document_with_metadata(workflowobj, fileuri, loadingOptions=lopt, addl_metadata_fields=("id", "cwlVersion"))
285+
286+
objects, loadopt = loadingContext.codegen_idx[uri]
287+
288+
_fast_validator_convert_stdstreams_to_files(objects)
289+
290+
processobj = cwl_utils.parser.cwl_v1_2.save(objects, relative_uris=False)
291+
292+
metadata = {}
293+
metadata["id"] = loadopt.fileuri
294+
295+
if loadopt.namespaces:
296+
metadata["$namespaces"] = loadopt.namespaces
297+
if loadopt.schemas:
298+
metadata["$schemas"] = loadopt.schemas
299+
if loadopt.baseuri:
300+
metadata["$base"] = loadopt.baseuri
301+
for k,v in loadopt.addl_metadata.items():
302+
if isinstance(processobj, MutableMapping) and k in processobj:
303+
metadata[k] = processobj[k]
304+
else:
305+
metadata[k] = v
306+
307+
return cmap(processobj), cmap(metadata)
266308

267309
def resolve_and_validate_document(
268310
loadingContext: LoadingContext,
@@ -381,8 +423,15 @@ def resolve_and_validate_document(
381423
if cwlVersion == "v1.0":
382424
_add_blank_ids(workflowobj)
383425

384-
document_loader.resolve_all(workflowobj, fileuri)
385-
processobj, metadata = document_loader.resolve_ref(uri)
426+
if cwlVersion != "v1.2":
427+
loadingContext.fast_validator = False
428+
429+
if loadingContext.fast_validator:
430+
processobj, metadata = fast_validator(workflowobj, fileuri, uri, loadingContext)
431+
else:
432+
document_loader.resolve_all(workflowobj, fileuri)
433+
processobj, metadata = document_loader.resolve_ref(uri)
434+
386435
if not isinstance(processobj, (CommentedMap, CommentedSeq)):
387436
raise ValidationException("Workflow must be a CommentedMap or CommentedSeq.")
388437

@@ -405,7 +454,8 @@ def resolve_and_validate_document(
405454
if isinstance(processobj, CommentedMap):
406455
uri = processobj["id"]
407456

408-
_convert_stdstreams_to_files(workflowobj)
457+
if not loadingContext.fast_validator:
458+
_convert_stdstreams_to_files(workflowobj)
409459

410460
if isinstance(jobobj, CommentedMap):
411461
loadingContext.jobdefaults = jobobj

0 commit comments

Comments
 (0)