Skip to content

Commit f26bbc7

Browse files
Peter Amstutzmr-c
authored andcommitted
Include input files with size and timestamp in cache hash.
1 parent ff8ca7f commit f26bbc7

File tree

2 files changed

+27
-4
lines changed

2 files changed

+27
-4
lines changed

cwltool/draft2tool.py

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import errno
2525
from typing import Callable, Any, Union, Generator, cast
2626
import hashlib
27+
import shutil
2728

2829
_logger = logging.getLogger("cwltool")
2930

@@ -119,20 +120,30 @@ def makePathMapper(self, reffiles, input_basedir, **kwargs):
119120
def job(self, joborder, input_basedir, output_callback, **kwargs):
120121
# type: (Dict[str,str], str, Callable[[Any, Any], Any], **Any) -> Generator[CommandLineJob, None, None]
121122

123+
jobname = uniquename(kwargs.get("name", shortname(self.tool["id"])))
124+
122125
if kwargs.get("cachedir"):
123126
cacheargs = kwargs.copy()
124127
cacheargs["outdir"] = "/out"
125128
cacheargs["tmpdir"] = "/tmp"
126129
cachebuilder = self._init_job(joborder, input_basedir, **cacheargs)
130+
cachebuilder.pathmapper = PathMapper(set((f["path"] for f in cachebuilder.files)),
131+
input_basedir)
127132
cmdline = flatten(map(cachebuilder.generate_arg, cachebuilder.bindings))
128133
(docker_req, docker_is_req) = self.get_requirement("DockerRequirement")
129134
if docker_req and kwargs.get("use_container") is not False:
130135
dockerimg = docker_req.get("dockerImageId") or docker_req.get("dockerPull")
131136
cmdline = ["docker", "run", dockerimg] + cmdline
132-
cmdlinestr = json.dumps(cmdline)
133-
cachekey = hashlib.md5(cmdlinestr).hexdigest()
137+
keydict = {"cmdline": cmdline}
138+
for _,f in cachebuilder.pathmapper.items():
139+
st = os.stat(f[0])
140+
keydict[f[0]] = [st.st_size, int(st.st_mtime * 1000)]
141+
keydictstr = json.dumps(keydict, separators=(',',':'), sort_keys=True)
142+
cachekey = hashlib.md5(keydictstr).hexdigest()
143+
_logger.debug("[job %s] keydictstr is %s -> %s", jobname, keydictstr, cachekey)
134144
jobcache = os.path.join(kwargs["cachedir"], cachekey)
135-
if os.path.isdir(jobcache):
145+
jobcachepending = jobcache + ".pending"
146+
if os.path.isdir(jobcache) and not os.path.isfile(jobcachepending):
136147
class CallbackJob(object):
137148
def __init__(self, job, output_callback, cachebuilder, jobcache):
138149
self.job = job
@@ -143,11 +154,20 @@ def run(self, **kwargs):
143154
self.output_callback(self.job.collect_output_ports(self.job.tool["outputs"],
144155
self.cachebuilder, self.outdir),
145156
"success")
157+
_logger.info("[job %s] Using cached output in %s", jobname, jobcache)
146158
yield CallbackJob(self, output_callback, cachebuilder, jobcache)
147159
return
148160
else:
161+
shutil.rmtree(jobcache, True)
149162
os.makedirs(jobcache)
150163
kwargs["outdir"] = jobcache
164+
open(jobcachepending, "w").close()
165+
def rm_pending_output_callback(output_callback, jobcachepending,
166+
outputs, processStatus):
167+
if processStatus == "success":
168+
os.remove(jobcachepending)
169+
output_callback(outputs, processStatus)
170+
output_callback = functools.partial(rm_pending_output_callback, output_callback, jobcachepending)
151171

152172
builder = self._init_job(joborder, input_basedir, **kwargs)
153173

@@ -163,7 +183,7 @@ def run(self, **kwargs):
163183
j.permanentFailCodes = self.tool.get("permanentFailCodes")
164184
j.requirements = self.requirements
165185
j.hints = self.hints
166-
j.name = uniquename(kwargs.get("name", str(id(j))))
186+
j.name = jobname
167187

168188
_logger.debug(u"[job %s] initializing from %s%s",
169189
j.name,

cwltool/pathmapper.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,9 @@ def mapper(self, src): # type: (str) -> Tuple[str,str]
4242
def files(self): # type: () -> List[str]
4343
return self._pathmap.keys()
4444

45+
def items(self): # type: () -> List[Tuple[str,Tuple[str,str]]]
46+
return self._pathmap.items()
47+
4548
def reversemap(self, target): # type: (str) -> Tuple[str, str]
4649
for k, v in self._pathmap.items():
4750
if v[1] == target:

0 commit comments

Comments
 (0)