1
+ """Loads a CWL document."""
1
2
from __future__ import absolute_import
2
3
# pylint: disable=unused-import
3
- """Loads a CWL document."""
4
4
5
5
import logging
6
6
import os
13
13
14
14
import requests .sessions
15
15
from six import itervalues , string_types
16
+ from six .moves import urllib
16
17
17
18
import schema_salad .schema as schema
18
19
from avro .schema import Names
19
20
from ruamel .yaml .comments import CommentedMap , CommentedSeq
20
21
from schema_salad .ref_resolver import Fetcher , Loader , file_uri
21
22
from schema_salad .sourceline import cmap
22
23
from schema_salad .validate import ValidationException
23
- from six .moves import urllib
24
24
25
25
from . import process , update
26
26
from .errors import WorkflowException
53
53
}
54
54
} # type: Dict[Text, Union[Dict[Any, Any], Text, Iterable[Text]]]
55
55
56
+
57
+ loaders = {}
58
+
59
+ def default_loader (fetcher_constructor ):
60
+ if fetcher_constructor in loaders :
61
+ return loaders [fetcher_constructor ]
62
+ else :
63
+ loader = Loader (jobloaderctx , fetcher_constructor = fetcher_constructor )
64
+ loaders [fetcher_constructor ] = loader
65
+ return loader
66
+
56
67
def resolve_tool_uri (argsworkflow , # type: Text
57
68
resolver = None , # type: Callable[[Loader, Union[Text, Dict[Text, Any]]], Text]
58
- fetcher_constructor = None ,
59
- # type: Callable[[Dict[Text, Text], requests.sessions.Session], Fetcher]
69
+ fetcher_constructor = None , # type: Callable[[Dict[Text, Text], requests.sessions.Session], Fetcher]
60
70
document_loader = None # type: Loader
61
- ):
62
- # type: (...) -> Tuple[Text, Text]
71
+ ): # type: (...) -> Tuple[Text, Text]
63
72
64
73
uri = None # type: Text
65
74
split = urllib .parse .urlsplit (argsworkflow )
66
75
# In case of Windows path, urlsplit misjudge Drive letters as scheme, here we are skipping that
67
- if split .scheme and split .scheme in [u'http' ,u'https' ,u'file' ]:
76
+ if split .scheme and split .scheme in [u'http' , u'https' , u'file' ]:
68
77
uri = argsworkflow
69
78
elif os .path .exists (os .path .abspath (argsworkflow )):
70
79
uri = file_uri (str (os .path .abspath (argsworkflow )))
71
80
elif resolver :
72
81
if document_loader is None :
73
- document_loader = Loader ( jobloaderctx , fetcher_constructor = fetcher_constructor ) # type: ignore
82
+ document_loader = default_loader ( fetcher_constructor ) # type: ignore
74
83
uri = resolver (document_loader , argsworkflow )
75
84
76
85
if uri is None :
@@ -85,18 +94,17 @@ def resolve_tool_uri(argsworkflow, # type: Text
85
94
86
95
def fetch_document (argsworkflow , # type: Union[Text, Dict[Text, Any]]
87
96
resolver = None , # type: Callable[[Loader, Union[Text, Dict[Text, Any]]], Text]
88
- fetcher_constructor = None
89
- # type: Callable[[Dict[Text, Text], requests.sessions.Session], Fetcher]
90
- ):
91
- # type: (...) -> Tuple[Loader, CommentedMap, Text]
97
+ fetcher_constructor = None # type: Callable[[Dict[Text, Text], requests.sessions.Session], Fetcher]
98
+ ): # type: (...) -> Tuple[Loader, CommentedMap, Text]
92
99
"""Retrieve a CWL document."""
93
100
94
- document_loader = Loader ( jobloaderctx , fetcher_constructor = fetcher_constructor ) # type: ignore
101
+ document_loader = default_loader ( fetcher_constructor ) # type: ignore
95
102
96
103
uri = None # type: Text
97
104
workflowobj = None # type: CommentedMap
98
105
if isinstance (argsworkflow , string_types ):
99
- uri , fileuri = resolve_tool_uri (argsworkflow , resolver = resolver , document_loader = document_loader )
106
+ uri , fileuri = resolve_tool_uri (argsworkflow , resolver = resolver ,
107
+ document_loader = document_loader )
100
108
workflowobj = document_loader .fetch (fileuri )
101
109
elif isinstance (argsworkflow , dict ):
102
110
uri = "#" + Text (id (argsworkflow ))
@@ -126,7 +134,7 @@ def _convert_stdstreams_to_files(workflowobj):
126
134
sort_keys = True ).encode ('utf-8' )).hexdigest ())
127
135
workflowobj [streamtype ] = filename
128
136
out ['type' ] = 'File'
129
- out ['outputBinding' ] = {'glob' : filename }
137
+ out ['outputBinding' ] = cmap ( {'glob' : filename })
130
138
for inp in workflowobj .get ('inputs' , []):
131
139
if inp .get ('type' ) == 'stdin' :
132
140
if 'inputBinding' in inp :
@@ -170,25 +178,25 @@ def validate_document(document_loader, # type: Loader
170
178
enable_dev = False , # type: bool
171
179
strict = True , # type: bool
172
180
preprocess_only = False , # type: bool
173
- fetcher_constructor = None ,
174
- skip_schemas = None ,
175
- # type: Callable[[ Dict[Text, Text], requests.sessions.Session], Fetcher ]
176
- overrides = None # type: List [Dict]
181
+ fetcher_constructor = None , # type: Callable[[Dict[Text, Text], requests.sessions.Session], Fetcher]
182
+ skip_schemas = None , # type: bool
183
+ overrides = None , # type: List[ Dict]
184
+ metadata = None , # type: Optional [Dict]
177
185
):
178
186
# type: (...) -> Tuple[Loader, Names, Union[Dict[Text, Any], List[Dict[Text, Any]]], Dict[Text, Any], Text]
179
187
"""Validate a CWL document."""
180
188
181
189
if isinstance (workflowobj , list ):
182
- workflowobj = {
190
+ workflowobj = cmap ( {
183
191
"$graph" : workflowobj
184
- }
192
+ }, fn = uri )
185
193
186
194
if not isinstance (workflowobj , dict ):
187
195
raise ValueError ("workflowjobj must be a dict, got '%s': %s" % (type (workflowobj ), workflowobj ))
188
196
189
197
jobobj = None
190
198
if "cwl:tool" in workflowobj :
191
- job_loader = Loader ( jobloaderctx , fetcher_constructor = fetcher_constructor ) # type: ignore
199
+ job_loader = default_loader ( fetcher_constructor ) # type: ignore
192
200
jobobj , _ = job_loader .resolve_all (workflowobj , uri )
193
201
uri = urllib .parse .urljoin (uri , workflowobj ["https://w3id.org/cwl/cwl#tool" ])
194
202
del cast (dict , jobobj )["https://w3id.org/cwl/cwl#tool" ]
@@ -200,22 +208,25 @@ def validate_document(document_loader, # type: Loader
200
208
workflowobj = fetch_document (uri , fetcher_constructor = fetcher_constructor )[1 ]
201
209
202
210
fileuri = urllib .parse .urldefrag (uri )[0 ]
203
-
204
- if "cwlVersion" in workflowobj :
205
- if not isinstance (workflowobj ["cwlVersion" ], (str , Text )):
206
- raise Exception ("'cwlVersion' must be a string, got %s" % type (workflowobj ["cwlVersion" ]))
207
- # strip out version
208
- workflowobj ["cwlVersion" ] = re .sub (
209
- r"^(?:cwl:|https://w3id.org/cwl/cwl#)" , "" ,
210
- workflowobj ["cwlVersion" ])
211
- if workflowobj ["cwlVersion" ] not in list (ALLUPDATES ):
212
- # print out all the Supported Versions of cwlVersion
213
- versions = list (ALLUPDATES ) # ALLUPDATES is a dict
214
- versions .sort ()
215
- raise ValidationException ("'cwlVersion' not valid. Supported CWL versions are: \n {}" .format ("\n " .join (versions )))
216
- else :
217
- raise ValidationException ("No cwlVersion found."
218
- "Use the following syntax in your CWL workflow to declare version: cwlVersion: <version>" )
211
+ if "cwlVersion" not in workflowobj :
212
+ if metadata and 'cwlVersion' in metadata :
213
+ workflowobj ['cwlVersion' ] = metadata ['cwlVersion' ]
214
+ else :
215
+ raise ValidationException ("No cwlVersion found."
216
+ "Use the following syntax in your CWL document to declare "
217
+ "the version: cwlVersion: <version>" )
218
+
219
+ if not isinstance (workflowobj ["cwlVersion" ], (str , Text )):
220
+ raise Exception ("'cwlVersion' must be a string, got %s" % type (workflowobj ["cwlVersion" ]))
221
+ # strip out version
222
+ workflowobj ["cwlVersion" ] = re .sub (
223
+ r"^(?:cwl:|https://w3id.org/cwl/cwl#)" , "" ,
224
+ workflowobj ["cwlVersion" ])
225
+ if workflowobj ["cwlVersion" ] not in list (ALLUPDATES ):
226
+ # print out all the Supported Versions of cwlVersion
227
+ versions = list (ALLUPDATES ) # ALLUPDATES is a dict
228
+ versions .sort ()
229
+ raise ValidationException ("'cwlVersion' not valid. Supported CWL versions are: \n {}" .format ("\n " .join (versions )))
219
230
220
231
if workflowobj ["cwlVersion" ] == "draft-2" :
221
232
workflowobj = cast (CommentedMap , cmap (update ._draft2toDraft3dev1 (
@@ -238,36 +249,36 @@ def validate_document(document_loader, # type: Loader
238
249
_add_blank_ids (workflowobj )
239
250
240
251
workflowobj ["id" ] = fileuri
241
- processobj , metadata = document_loader .resolve_all (workflowobj , fileuri )
252
+ processobj , new_metadata = document_loader .resolve_all (workflowobj , fileuri )
242
253
if not isinstance (processobj , (CommentedMap , CommentedSeq )):
243
254
raise ValidationException ("Workflow must be a dict or list." )
244
255
245
- if not metadata :
256
+ if not new_metadata :
246
257
if not isinstance (processobj , dict ):
247
258
raise ValidationException ("Draft-2 workflows must be a dict." )
248
- metadata = cast (CommentedMap , cmap ({ "$namespaces" : processobj . get ( "$namespaces" , {}),
249
- "$schemas " : processobj .get ("$schemas " , [] ),
250
- "cwlVersion " : processobj [ "cwlVersion" ]} ,
251
- fn = fileuri ))
259
+ new_metadata = cast (CommentedMap , cmap (
260
+ { "$namespaces " : processobj .get ("$namespaces " , {} ),
261
+ "$schemas " : processobj . get ( "$schemas" , []) ,
262
+ "cwlVersion" : processobj [ "cwlVersion" ]}, fn = fileuri ))
252
263
253
264
_convert_stdstreams_to_files (workflowobj )
254
265
255
266
if preprocess_only :
256
- return document_loader , avsc_names , processobj , metadata , uri
267
+ return document_loader , avsc_names , processobj , new_metadata , uri
257
268
258
269
schema .validate_doc (avsc_names , processobj , document_loader , strict )
259
270
260
- if metadata .get ("cwlVersion" ) != update .LATEST :
271
+ if new_metadata .get ("cwlVersion" ) != update .LATEST :
261
272
processobj = cast (CommentedMap , cmap (update .update (
262
- processobj , document_loader , fileuri , enable_dev , metadata )))
273
+ processobj , document_loader , fileuri , enable_dev , new_metadata )))
263
274
264
275
if jobobj :
265
- metadata [u"cwl:defaults" ] = jobobj
276
+ new_metadata [u"cwl:defaults" ] = jobobj
266
277
267
278
if overrides :
268
- metadata [u"cwltool:overrides" ] = overrides
279
+ new_metadata [u"cwltool:overrides" ] = overrides
269
280
270
- return document_loader , avsc_names , processobj , metadata , uri
281
+ return document_loader , avsc_names , processobj , new_metadata , uri
271
282
272
283
273
284
def make_tool (document_loader , # type: Loader
@@ -332,7 +343,8 @@ def load_tool(argsworkflow, # type: Union[Text, Dict[Text, Any]]
332
343
document_loader , avsc_names , processobj , metadata , uri = validate_document (
333
344
document_loader , workflowobj , uri , enable_dev = enable_dev ,
334
345
strict = strict , fetcher_constructor = fetcher_constructor ,
335
- overrides = overrides )
346
+ overrides = overrides , metadata = kwargs .get ('metadata' , None )
347
+ if kwargs else None )
336
348
return make_tool (document_loader , avsc_names , metadata , uri ,
337
349
makeTool , kwargs if kwargs else {})
338
350
0 commit comments