2
2
import json
3
3
import copy
4
4
from .flatten import flatten
5
- import functools
5
+ from functools import partial
6
6
import os
7
7
from .pathmapper import PathMapper , DockerPathMapper
8
8
from .job import CommandLineJob
23
23
import shellescape
24
24
import errno
25
25
from typing import Callable , Any , Union , Generator , cast
26
+ import hashlib
27
+ import shutil
26
28
27
29
_logger = logging .getLogger ("cwltool" )
28
30
@@ -94,6 +96,20 @@ def revmap_file(builder, outdir, f):
94
96
else :
95
97
raise WorkflowException (u"Output file path %s must be within designated output directory (%s) or an input file pass through." % (f ["path" ], builder .outdir ))
96
98
99
+ class CallbackJob (object ):
100
+ def __init__ (self , job , output_callback , cachebuilder , jobcache ):
101
+ # type: (CommandLineTool, Callable[[Any, Any], Any], Builder, str) -> None
102
+ self .job = job
103
+ self .output_callback = output_callback
104
+ self .cachebuilder = cachebuilder
105
+ self .outdir = jobcache
106
+
107
+ def run (self , ** kwargs ):
108
+ # type: (**Any) -> None
109
+ self .output_callback (self .job .collect_output_ports (self .job .tool ["outputs" ],
110
+ self .cachebuilder , self .outdir ),
111
+ "success" )
112
+
97
113
98
114
class CommandLineTool (Process ):
99
115
def __init__ (self , toolpath_object , ** kwargs ):
@@ -116,34 +132,73 @@ def makePathMapper(self, reffiles, input_basedir, **kwargs):
116
132
raise WorkflowException (u"Missing input file %s" % e )
117
133
118
134
def job (self , joborder , input_basedir , output_callback , ** kwargs ):
119
- # type: (Dict[str,str], str, Callable[[Any, Any], Any], **Any) -> Generator[CommandLineJob, None, None]
120
- builder = self ._init_job (joborder , input_basedir , ** kwargs )
121
-
122
- if self .tool ["baseCommand" ]:
123
- for n , b in enumerate (aslist (self .tool ["baseCommand" ])):
124
- builder .bindings .append ({
125
- "position" : [- 1000000 , n ],
126
- "valueFrom" : b
127
- })
128
-
129
- if self .tool .get ("arguments" ):
130
- for i , a in enumerate (self .tool ["arguments" ]):
131
- if isinstance (a , dict ):
132
- a = copy .copy (a )
133
- if a .get ("position" ):
134
- a ["position" ] = [a ["position" ], i ]
135
- else :
136
- a ["position" ] = [0 , i ]
137
- a ["do_eval" ] = a ["valueFrom" ]
138
- a ["valueFrom" ] = None
139
- builder .bindings .append (a )
135
+ # type: (Dict[str,str], str, Callable[..., Any], **Any) -> Generator[Union[CommandLineJob, CallbackJob], None, None]
136
+
137
+ jobname = uniquename (kwargs .get ("name" , shortname (self .tool .get ("id" , "job" ))))
138
+
139
+ if kwargs .get ("cachedir" ):
140
+ cacheargs = kwargs .copy ()
141
+ cacheargs ["outdir" ] = "/out"
142
+ cacheargs ["tmpdir" ] = "/tmp"
143
+ cachebuilder = self ._init_job (joborder , input_basedir , ** cacheargs )
144
+ cachebuilder .pathmapper = PathMapper (set ((f ["path" ] for f in cachebuilder .files )),
145
+ input_basedir )
146
+
147
+ cmdline = flatten (map (cachebuilder .generate_arg , cachebuilder .bindings ))
148
+ (docker_req , docker_is_req ) = self .get_requirement ("DockerRequirement" )
149
+ if docker_req and kwargs .get ("use_container" ) is not False :
150
+ dockerimg = docker_req .get ("dockerImageId" ) or docker_req .get ("dockerPull" )
151
+ cmdline = ["docker" , "run" , dockerimg ] + cmdline
152
+ keydict = {"cmdline" : cmdline }
153
+
154
+ for _ ,f in cachebuilder .pathmapper .items ():
155
+ st = os .stat (f [0 ])
156
+ keydict [f [0 ]] = [st .st_size , int (st .st_mtime * 1000 )]
157
+
158
+ interesting = {"DockerRequirement" ,
159
+ "EnvVarRequirement" ,
160
+ "CreateFileRequirement" ,
161
+ "ShellCommandRequirement" }
162
+ for rh in (self .requirements , self .hints ):
163
+ for r in reversed (rh ):
164
+ if r ["class" ] in interesting and r ["class" ] not in keydict :
165
+ keydict [r ["class" ]] = r
166
+
167
+ keydictstr = json .dumps (keydict , separators = (',' ,':' ), sort_keys = True )
168
+ cachekey = hashlib .md5 (keydictstr ).hexdigest ()
169
+
170
+ _logger .debug ("[job %s] keydictstr is %s -> %s" , jobname , keydictstr , cachekey )
171
+
172
+ jobcache = os .path .join (kwargs ["cachedir" ], cachekey )
173
+ jobcachepending = jobcache + ".pending"
174
+
175
+ if os .path .isdir (jobcache ) and not os .path .isfile (jobcachepending ):
176
+ if docker_req and kwargs .get ("use_container" ) is not False :
177
+ cachebuilder .outdir = kwargs .get ("docker_outdir" ) or "/var/spool/cwl"
140
178
else :
141
- builder .bindings .append ({
142
- "position" : [0 , i ],
143
- "valueFrom" : a
144
- })
179
+ cachebuilder .outdir = jobcache
145
180
146
- builder .bindings .sort (key = lambda a : a ["position" ])
181
+ _logger .info ("[job %s] Using cached output in %s" , jobname , jobcache )
182
+ yield CallbackJob (self , output_callback , cachebuilder , jobcache )
183
+ return
184
+ else :
185
+ _logger .info ("[job %s] Output of job will be cached in %s" , jobname , jobcache )
186
+ shutil .rmtree (jobcache , True )
187
+ os .makedirs (jobcache )
188
+ kwargs ["outdir" ] = jobcache
189
+ open (jobcachepending , "w" ).close ()
190
+ def rm_pending_output_callback (output_callback , jobcachepending ,
191
+ outputs , processStatus ):
192
+ if processStatus == "success" :
193
+ os .remove (jobcachepending )
194
+ output_callback (outputs , processStatus )
195
+ output_callback = cast (
196
+ Callable [..., Any ], # known bug in mypy
197
+ # https://github.com/python/mypy/issues/797
198
+ partial (rm_pending_output_callback , output_callback ,
199
+ jobcachepending ))
200
+
201
+ builder = self ._init_job (joborder , input_basedir , ** kwargs )
147
202
148
203
reffiles = set ((f ["path" ] for f in builder .files ))
149
204
@@ -157,7 +212,7 @@ def job(self, joborder, input_basedir, output_callback, **kwargs):
157
212
j .permanentFailCodes = self .tool .get ("permanentFailCodes" )
158
213
j .requirements = self .requirements
159
214
j .hints = self .hints
160
- j .name = uniquename ( kwargs . get ( "name" , str ( id ( j ))))
215
+ j .name = jobname
161
216
162
217
_logger .debug (u"[job %s] initializing from %s%s" ,
163
218
j .name ,
@@ -195,7 +250,7 @@ def _check_adjust(f): # type: (Dict[str,Any]) -> Dict[str,Any]
195
250
196
251
_logger .debug (u"[job %s] command line bindings is %s" , j .name , json .dumps (builder .bindings , indent = 4 ))
197
252
198
- dockerReq , _ = self .get_requirement ("DockerRequirement" )
253
+ dockerReq = self .get_requirement ("DockerRequirement" )[ 0 ]
199
254
if dockerReq and kwargs .get ("use_container" ):
200
255
out_prefix = kwargs .get ("tmp_outdir_prefix" )
201
256
j .outdir = kwargs .get ("outdir" ) or tempfile .mkdtemp (prefix = out_prefix )
@@ -205,19 +260,19 @@ def _check_adjust(f): # type: (Dict[str,Any]) -> Dict[str,Any]
205
260
j .outdir = builder .outdir
206
261
j .tmpdir = builder .tmpdir
207
262
208
- createFiles , _ = self .get_requirement ("CreateFileRequirement" )
263
+ createFiles = self .get_requirement ("CreateFileRequirement" )[ 0 ]
209
264
j .generatefiles = {}
210
265
if createFiles :
211
266
for t in createFiles ["fileDef" ]:
212
267
j .generatefiles [builder .do_eval (t ["filename" ])] = copy .deepcopy (builder .do_eval (t ["fileContent" ]))
213
268
214
269
j .environment = {}
215
- evr , _ = self .get_requirement ("EnvVarRequirement" )
270
+ evr = self .get_requirement ("EnvVarRequirement" )[ 0 ]
216
271
if evr :
217
272
for t in evr ["envDef" ]:
218
273
j .environment [t ["envName" ]] = builder .do_eval (t ["envValue" ])
219
274
220
- shellcmd , _ = self .get_requirement ("ShellCommandRequirement" )
275
+ shellcmd = self .get_requirement ("ShellCommandRequirement" )[ 0 ]
221
276
if shellcmd :
222
277
cmd = [] # type: List[str]
223
278
for b in builder .bindings :
@@ -230,7 +285,8 @@ def _check_adjust(f): # type: (Dict[str,Any]) -> Dict[str,Any]
230
285
j .command_line = flatten (map (builder .generate_arg , builder .bindings ))
231
286
232
287
j .pathmapper = builder .pathmapper
233
- j .collect_outputs = functools .partial (self .collect_output_ports , self .tool ["outputs" ], builder )
288
+ j .collect_outputs = partial (
289
+ self .collect_output_ports , self .tool ["outputs" ], builder )
234
290
j .output_callback = output_callback
235
291
236
292
yield j
@@ -246,9 +302,9 @@ def collect_output_ports(self, ports, builder, outdir):
246
302
_logger .debug (u"Raw output from %s: %s" , custom_output , json .dumps (ret , indent = 4 ))
247
303
adjustFileObjs (ret , remove_hostfs )
248
304
adjustFileObjs (ret ,
249
- cast (Callable [[Any ], Any ], # known bug in mypy
305
+ cast (Callable [[Any ], Any ], # known bug in mypy
250
306
# https://github.com/python/mypy/issues/797
251
- functools . partial (revmap_file , builder , outdir )))
307
+ partial (revmap_file , builder , outdir )))
252
308
adjustFileObjs (ret , remove_hostfs )
253
309
validate .validate_ex (self .names .get_name ("outputs_record_schema" , "" ), ret )
254
310
return ret
@@ -273,7 +329,7 @@ def collect_output(self, schema, builder, outdir):
273
329
binding = schema ["outputBinding" ]
274
330
globpatterns = [] # type: List[str]
275
331
276
- revmap = functools . partial (revmap_file , builder , outdir )
332
+ revmap = partial (revmap_file , builder , outdir )
277
333
278
334
if "glob" in binding :
279
335
for gb in aslist (binding ["glob" ]):
0 commit comments