Skip to content

Commit d738df0

Browse files
committed
Beta support for configurable dependency resolution & Biocontainers.
Consider the included tool ``seqtk_seq.cwl``. It includes the following SoftwareRequirement hint: ``` hints: SoftwareRequirement: packages: - package: seqtk version: - 1.2 ``` I'm not happy that ``version`` is a list - but I can live with it for now I guess. If cwltool is executed with the hidden ``--beta-conda-dependencies`` flag, this requirement will be processed by galaxy-lib, Conda will be installed, and seqtk will be installed, and a Conda environment including seqtk will be setup for the job. ``` virtualenv .venv . .venv/bin/activate python setup.py install pip install galaxy-lib cwltool --beta-conda-dependencies tests/seqtk_seq.cwl tests/seqtk_seq_job.json ``` Additional flags are available to configure dependency resolution in a more fine grained way - using Conda however has a number of advantages that make it particularily well suited to CWL. Conda packages are distributed as binaries that work across Mac and Linux and work on relatively old version of Linux (great for HPC). Conda also doesn't require root and supports installation of multiple different versions of a package - again these factors make it great for HPC and non-Docker targets. The Biocontainers project (previously Biodocker) dovetails nicely with this. Every version of every Bioconda package has a corresponding best-practice (very lightweight, very small) Docker container on quay.io (assembled by @bgruening and colleagues). There are over 1800 such containers currently. Continuing with the example above, the new ``--beta-use-biocontainers`` flag instructs cwltool to fetch the corresponding Biocontainers container from quay.io automatically or build one to use locally (required for instance for tools with multiple software requirements - fat tools). ``` cwltool --beta-use-biocontainers tests/seqtk_seq.cwl tests/seqtk_seq_job.json ``` These containers contain the same binaries that the package would use locally (outside of Docker). Therefore this technique allows cross platform reproducibility/remixability across CWL, Galaxy, and CLI - both inside and outside of Docker. My sincerest hope is that we move away from CWL-specific Dockerfiles. For less effort, a community bioconda package can be made and the result can be used in many more contexts. The Docker image will then be maintained by the community Biocontainer project. Rebased with correct spelling of DependenciesConfiguration thanks to @tetron.
1 parent d9de04f commit d738df0

29 files changed

+961
-11
lines changed

cwltool/builder.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,8 @@ def __init__(self): # type: () -> None
5050
# Will be default "no_listing" for CWL v1.1
5151
self.loadListing = "deep_listing" # type: Union[None, str]
5252

53+
self.find_default_container = None # type: Callable[[], Text]
54+
5355
def bind_input(self, schema, datum, lead_pos=None, tail_pos=None):
5456
# type: (Dict[Text, Any], Any, Union[int, List[int]], List[int]) -> List[Dict[Text, Any]]
5557
if tail_pos is None:

cwltool/draft2tool.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -174,9 +174,19 @@ class CommandLineTool(Process):
174174
def __init__(self, toolpath_object, **kwargs):
175175
# type: (Dict[Text, Any], **Any) -> None
176176
super(CommandLineTool, self).__init__(toolpath_object, **kwargs)
177+
self.find_default_container = kwargs["find_default_container"]
177178

178179
def makeJobRunner(self, use_container=True): # type: (Optional[bool]) -> JobBase
179180
dockerReq, _ = self.get_requirement("DockerRequirement")
181+
if not dockerReq and use_container:
182+
default_container = self.find_default_container(self)
183+
if default_container:
184+
self.requirements.insert(0, {
185+
"class": "DockerRequirement",
186+
"dockerPull": default_container
187+
})
188+
dockerReq = self.requirements[0]
189+
180190
if dockerReq and use_container:
181191
return DockerCommandLineJob()
182192
else:
@@ -276,7 +286,7 @@ def rm_pending_output_callback(output_callbacks, jobcachepending,
276286

277287
reffiles = copy.deepcopy(builder.files)
278288

279-
j = self.makeJobRunner(kwargs.get("use_container"))
289+
j = self.makeJobRunner(kwargs.get("find_default_container"))
280290
j.builder = builder
281291
j.joborder = builder.job
282292
j.stdin = None

cwltool/job.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333

3434
PYTHON_RUN_SCRIPT = """
3535
import json
36+
import os
3637
import sys
3738
import subprocess
3839
@@ -41,6 +42,7 @@
4142
commands = popen_description["commands"]
4243
cwd = popen_description["cwd"]
4344
env = popen_description["env"]
45+
env["PATH"] = os.environ.get("PATH")
4446
stdin_path = popen_description["stdin_path"]
4547
stdout_path = popen_description["stdout_path"]
4648
stderr_path = popen_description["stderr_path"]
@@ -145,7 +147,6 @@ def _setup(self): # type: () -> None
145147
_logger.debug(u"[job %s] initial work dir %s", self.name,
146148
json.dumps({p: self.generatemapper.mapper(p) for p in self.generatemapper.files()}, indent=4))
147149

148-
149150
def _execute(self, runtime, env, rm_tmpdir=True, move_outputs="move"):
150151
# type: (List[Text], MutableMapping[Text, Text], bool, Text) -> None
151152

@@ -328,8 +329,12 @@ def run(self, pull_image=True, rm_container=True,
328329
env = cast(MutableMapping[Text, Text], os.environ)
329330
if docker_req and kwargs.get("use_container") is not False:
330331
img_id = docker.get_from_requirements(docker_req, True, pull_image)
331-
elif kwargs.get("default_container", None) is not None:
332-
img_id = kwargs.get("default_container")
332+
if img_id is None:
333+
find_default_container = self.builder.find_default_container
334+
default_container = find_default_container and find_default_container()
335+
if default_container:
336+
img_id = default_container
337+
env = os.environ
333338

334339
if docker_req and img_id is None and kwargs.get("use_container"):
335340
raise Exception("Docker image not available")

cwltool/main.py

Lines changed: 126 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414
import pkg_resources # part of setuptools
1515
import requests
16+
import string
1617

1718
import ruamel.yaml as yaml
1819
import schema_salad.validate as validate
@@ -33,6 +34,14 @@
3334
from .resolver import ga4gh_tool_registries, tool_resolver
3435
from .stdfsaccess import StdFsAccess
3536
from .update import ALLUPDATES, UPDATES
37+
from .utils import get_feature
38+
try:
39+
from galaxy.tools.deps.requirements import ToolRequirement, ToolRequirements
40+
from galaxy.tools import deps
41+
except ImportError:
42+
ToolRequirement = None # type: ignore
43+
ToolRequirements = None # type: ignore
44+
deps = None
3645

3746
_logger = logging.getLogger("cwltool")
3847

@@ -149,6 +158,15 @@ def arg_parser(): # type: () -> argparse.ArgumentParser
149158
exgroup.add_argument("--quiet", action="store_true", help="Only print warnings and errors.")
150159
exgroup.add_argument("--debug", action="store_true", help="Print even more logging")
151160

161+
# help="Dependency resolver configuration file describing how to adapt 'SoftwareRequirement' packages to current system."
162+
parser.add_argument("--beta-dependency-resolvers-configuration", default=None, help=argparse.SUPPRESS)
163+
# help="Defaut root directory used by dependency resolvers configuration."
164+
parser.add_argument("--beta-dependencies-directory", default=None, help=argparse.SUPPRESS)
165+
# help="Use biocontainers for tools without an explicitly annotated Docker container."
166+
parser.add_argument("--beta-use-biocontainers", default=None, help=argparse.SUPPRESS, action="store_true")
167+
# help="Short cut to use Conda to resolve 'SoftwareRequirement' packages."
168+
parser.add_argument("--beta-conda-dependencies", default=None, help=argparse.SUPPRESS, action="store_true")
169+
152170
parser.add_argument("--tool-help", action="store_true", help="Print command line help for tool")
153171

154172
parser.add_argument("--relative-deps", choices=['primary', 'cwd'],
@@ -236,12 +254,6 @@ def output_callback(out, processStatus):
236254
for req in jobReqs:
237255
t.requirements.append(req)
238256

239-
if kwargs.get("default_container"):
240-
t.requirements.insert(0, {
241-
"class": "DockerRequirement",
242-
"dockerPull": kwargs["default_container"]
243-
})
244-
245257
jobiter = t.job(job_order_object,
246258
output_callback,
247259
**kwargs)
@@ -716,8 +728,20 @@ def main(argsl=None, # type: List[str]
716728
stdout.write(json.dumps(processobj, indent=4))
717729
return 0
718730

731+
conf_file = getattr(args, "beta_dependency_resolvers_configuration", None) # Text
732+
use_conda_dependencies = getattr(args, "beta_conda_dependencies", None) # Text
733+
734+
make_tool_kwds = vars(args)
735+
736+
build_job_script = None # type: Callable[[Any, List[str]], Text]
737+
if conf_file or use_conda_dependencies:
738+
dependencies_configuration = DependenciesConfiguration(args) # type: DependenciesConfiguration
739+
make_tool_kwds["build_job_script"] = dependencies_configuration.build_job_script
740+
741+
make_tool_kwds["find_default_container"] = functools.partial(find_default_container, args)
742+
719743
tool = make_tool(document_loader, avsc_names, metadata, uri,
720-
makeTool, vars(args))
744+
makeTool, make_tool_kwds)
721745

722746
if args.validate:
723747
return 0
@@ -838,5 +862,100 @@ def locToPath(p):
838862
_logger.addHandler(defaultStreamHandler)
839863

840864

865+
COMMAND_WITH_DEPENDENCIES_TEMPLATE = string.Template("""#!/bin/bash
866+
$handle_dependencies
867+
python "run_job.py" "job.json"
868+
""")
869+
870+
871+
def find_default_container(args, builder):
872+
if args.default_container:
873+
return args.default_container
874+
elif args.beta_use_biocontainers:
875+
try:
876+
from galaxy.tools.deps.containers import ContainerRegistry, AppInfo, ToolInfo, DOCKER_CONTAINER_TYPE
877+
except ImportError:
878+
raise Exception("galaxy-lib not found")
879+
880+
app_info = AppInfo(
881+
involucro_auto_init=True,
882+
enable_beta_mulled_containers=True,
883+
container_image_cache_path=".",
884+
) # type: AppInfo
885+
container_registry = ContainerRegistry(app_info) # type: ContainerRegistry
886+
requirements = _get_dependencies(builder)
887+
tool_info = ToolInfo(requirements=requirements) # type: ToolInfo
888+
container_description = container_registry.find_best_container_description([DOCKER_CONTAINER_TYPE], tool_info)
889+
if container_description:
890+
return container_description.identifier
891+
892+
return None
893+
894+
895+
class DependenciesConfiguration(object):
896+
897+
def __init__(self, args):
898+
# type: (argparse.Namespace) -> None
899+
conf_file = getattr(args, "beta_dependency_resolvers_configuration", None)
900+
tool_dependency_dir = getattr(args, "beta_dependencies_directory", None)
901+
conda_dependencies = getattr(args, "beta_conda_dependencies", None)
902+
if conf_file is not None and os.path.exists(conf_file):
903+
self.use_tool_dependencies = True
904+
if not tool_dependency_dir:
905+
tool_dependency_dir = os.path.abspath(os.path.dirname(conf_file))
906+
self.tool_dependency_dir = tool_dependency_dir
907+
self.dependency_resolvers_config_file = conf_file
908+
elif conda_dependencies:
909+
if not tool_dependency_dir:
910+
tool_dependency_dir = os.path.abspath("./cwltool_deps")
911+
self.tool_dependency_dir = tool_dependency_dir
912+
self.use_tool_dependencies = True
913+
self.dependency_resolvers_config_file = None
914+
else:
915+
self.use_tool_dependencies = False
916+
917+
@property
918+
def config_dict(self):
919+
return {
920+
'conda_auto_install': True,
921+
'conda_auto_init': True,
922+
}
923+
924+
def build_job_script(self, builder, command):
925+
# type: (Any, List[str]) -> Text
926+
if deps is None:
927+
raise Exception("galaxy-lib not found")
928+
tool_dependency_manager = deps.build_dependency_manager(self) # type: deps.DependencyManager
929+
dependencies = _get_dependencies(builder)
930+
handle_dependencies = "" # str
931+
if dependencies:
932+
handle_dependencies = "\n".join(tool_dependency_manager.dependency_shell_commands(dependencies, job_directory=builder.tmpdir))
933+
934+
template_kwds = dict(handle_dependencies=handle_dependencies) # type: Dict[str, str]
935+
job_script = COMMAND_WITH_DEPENDENCIES_TEMPLATE.substitute(template_kwds)
936+
return job_script
937+
938+
939+
def _get_dependencies(builder):
940+
# type: (Any) -> List[ToolRequirement]
941+
(software_requirement, _) = get_feature(builder, "SoftwareRequirement")
942+
dependencies = [] # type: List[ToolRequirement]
943+
if software_requirement and software_requirement.get("packages"):
944+
packages = software_requirement.get("packages")
945+
for package in packages:
946+
version = package.get("version", None)
947+
if isinstance(version, list):
948+
if version:
949+
version = version[0]
950+
else:
951+
version = None
952+
dependencies.append(ToolRequirement.from_dict(dict(
953+
name=package["package"].split("#")[-1],
954+
version=version,
955+
type="package",
956+
)))
957+
return ToolRequirements.from_list(dependencies)
958+
959+
841960
if __name__ == "__main__":
842961
sys.exit(main(sys.argv[1:]))

cwltool/process.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -598,6 +598,12 @@ def _init_job(self, joborder, **kwargs):
598598

599599
builder.resources = self.evalResources(builder, kwargs)
600600

601+
build_job_script = kwargs.get("build_job_script", None) # type: Callable[[Builder, List[str]], Text]
602+
curried_build_job_script = None # type: Callable[[List[str]], Text]
603+
if build_job_script:
604+
curried_build_job_script = lambda commands: build_job_script(builder, commands)
605+
builder.build_job_script = curried_build_job_script
606+
601607
return builder
602608

603609
def evalResources(self, builder, kwargs):

tests/2.fasta

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
>Sequence 561 BP; 135 A; 106 C; 98 G; 222 T; 0 other;
2+
gttcgatgcc taaaatacct tcttttgtcc ctacacagac cacagttttc ctaatggctt
3+
tacaccgact agaaattctt gtgcaagcac taattgaaag cggttggcct agagtgttac
4+
cggtttgtat agctgagcgc gtctcttgcc ctgatcaaag gttcattttc tctactttgg
5+
aagacgttgt ggaagaatac aacaagtacg agtctctccc ccctggtttg ctgattactg
6+
gatacagttg taataccctt cgcaacaccg cgtaactatc tatatgaatt attttccctt
7+
tattatatgt agtaggttcg tctttaatct tcctttagca agtcttttac tgttttcgac
8+
ctcaatgttc atgttcttag gttgttttgg ataatatgcg gtcagtttaa tcttcgttgt
9+
ttcttcttaa aatatttatt catggtttaa tttttggttt gtacttgttc aggggccagt
10+
tcattattta ctctgtttgt atacagcagt tcttttattt ttagtatgat tttaatttaa
11+
aacaattcta atggtcaaaa a

tests/2.fastq

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
@EAS54_6_R1_2_1_413_324
2+
CCCTTCTTGTCTTCAGCGTTTCTCC
3+
+
4+
;;3;;;;;;;;;;;;7;;;;;;;88
5+
@EAS54_6_R1_2_1_540_792
6+
TTGGCAGGCCAAGGCCGATGGATCA
7+
+
8+
;;;;;;;;;;;7;;;;;-;;;3;83
9+
@EAS54_6_R1_2_1_443_348
10+
GTTGCTTCTGGCGTGGGTGGGGGGG
11+
+EAS54_6_R1_2_1_443_348
12+
;;;;;;;;;;;9;7;;.7;393333

tests/seqtk_seq.cwl

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
cwlVersion: v1.0
2+
class: CommandLineTool
3+
id: "seqtk_seq"
4+
doc: "Convert to FASTA (seqtk)"
5+
inputs:
6+
- id: input1
7+
type: File
8+
inputBinding:
9+
position: 1
10+
prefix: "-a"
11+
outputs:
12+
- id: output1
13+
type: File
14+
outputBinding:
15+
glob: out
16+
baseCommand: ["seqtk", "seq"]
17+
arguments: []
18+
stdout: out
19+
hints:
20+
SoftwareRequirement:
21+
packages:
22+
- package: seqtk
23+
version:
24+
- r93

tests/seqtk_seq_job.json

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"input1": {
3+
"class": "File",
4+
"location": "2.fastq"
5+
}
6+
}

typeshed/2.7/galaxy/__init__.pyi

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# Stubs for galaxy (Python 3.5)
2+
#
3+
# NOTE: This dynamically typed stub was automatically generated by stubgen.
4+
5+
from typing import Any
6+
7+
PROJECT_NAME = ... # type: str
8+
PROJECT_OWNER = ... # type: str
9+
PROJECT_USERAME = ... # type: str
10+
PROJECT_URL = ... # type: str
11+
PROJECT_AUTHOR = ... # type: str
12+
PROJECT_EMAIL = ... # type: str
13+
RAW_CONTENT_URL = ... # type: Any
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
# Stubs for galaxy.tools (Python 3.5)
2+
#
3+
# NOTE: This dynamically typed stub was automatically generated by stubgen.
4+
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
# Stubs for galaxy.tools.deps (Python 3.5)
2+
#
3+
# NOTE: This dynamically typed stub was automatically generated by stubgen.
4+
5+
from typing import Any, Optional
6+
from .resolvers import NullDependency as NullDependency
7+
from .resolvers.conda import CondaDependencyResolver as CondaDependencyResolver
8+
from .resolvers.galaxy_packages import GalaxyPackageDependencyResolver as GalaxyPackageDependencyResolver
9+
from .resolvers.tool_shed_packages import ToolShedPackageDependencyResolver as ToolShedPackageDependencyResolver
10+
11+
log = ... # type: Any
12+
EXTRA_CONFIG_KWDS = ... # type: Any
13+
CONFIG_VAL_NOT_FOUND = ... # type: Any
14+
15+
def build_dependency_manager(config: Any): ... # type: DependencyManager
16+
17+
class NullDependencyManager:
18+
dependency_resolvers = ... # type: Any
19+
def uses_tool_shed_dependencies(self): ...
20+
def dependency_shell_commands(self, requirements: Any, **kwds) -> List[str]: ...
21+
def find_dep(self, name, version: Optional[Any] = ..., type: str = ..., **kwds): ...
22+
23+
class DependencyManager:
24+
extra_config = ... # type: Any
25+
default_base_path = ... # type: Any
26+
resolver_classes = ... # type: Any
27+
dependency_resolvers = ... # type: Any
28+
def __init__(self, default_base_path, conf_file: Optional[Any] = ..., **extra_config) -> None: ...
29+
def dependency_shell_commands(self, requirements: Any, **kwds) -> List[str]: ...
30+
def requirements_to_dependencies(self, requirements, **kwds): ...
31+
def uses_tool_shed_dependencies(self): ...
32+
def find_dep(self, name, version: Optional[Any] = ..., type: str = ..., **kwds): ...

0 commit comments

Comments
 (0)