Skip to content

Commit de6ce38

Browse files
chapmanbPeter Amstutz
authored andcommitted
Enable src/sink comparisons for records
The logic for comparing records from two different sections of the workflow previous failed due to falling back to a `==` comparison that would not pass due to differences in fully qualified record names. This fix adds an explicit record comparison function that normalizes field names by the record namespace and then ensures that all of the types of each field are identical from both records. Also adds a test to demonstrate the behavior.
1 parent 7d6282a commit de6ce38

File tree

2 files changed

+30
-0
lines changed

2 files changed

+30
-0
lines changed

cwltool/workflow.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,8 @@ def can_assign_src_to_sink(src, sink): # type: (Any, Any) -> bool
9696
if isinstance(src, dict) and isinstance(sink, dict):
9797
if src["type"] == "array" and sink["type"] == "array":
9898
return can_assign_src_to_sink(src["items"], sink["items"])
99+
elif src["type"] == "record" and sink["type"] == "record":
100+
return _compare_records(src, sink)
99101
elif isinstance(src, list):
100102
for t in src:
101103
if can_assign_src_to_sink(t, sink):
@@ -108,6 +110,28 @@ def can_assign_src_to_sink(src, sink): # type: (Any, Any) -> bool
108110
return src == sink
109111
return False
110112

113+
def _compare_records(rec1, rec2):
114+
"""Compare two records, ensuring they have compatible fields.
115+
116+
This handles normalizing record names, which will be relative to workflow
117+
step, so that they can be compared.
118+
"""
119+
def _rec_fields(rec):
120+
out = {}
121+
for field in rec["fields"]:
122+
name = field["name"].replace(rec["name"], "")
123+
out[name] = field["type"]
124+
return out
125+
fields1 = _rec_fields(rec1)
126+
fields2 = _rec_fields(rec2)
127+
for key in set(fields1.keys() + fields2.keys()):
128+
if fields1.get(key) != fields2.get(key):
129+
_logger.info("Record comparison failure for %s and %s\n"
130+
"Did not match fields for %s: %s and %s" %
131+
(rec1["name"], rec2["name"], key, fields1.get(key), fields2.get(key)))
132+
return False
133+
return True
134+
111135
def object_from_state(state, parms, frag_only, supportsMultipleInput, sourceField):
112136
# type: (Dict[unicode, WorkflowStateItem], List[Dict[unicode, Any]], bool, bool, unicode) -> Dict[unicode, Any]
113137
inputobj = {} # type: Dict[unicode, Any]

tests/test_examples.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,12 @@ def test_typecompare(self):
168168
{'items': ['string'], 'type': 'array'},
169169
{'items': ['int'], 'type': 'array'}))
170170

171+
def test_recordcompare(self):
172+
src = {'fields': [{'type': {'items': 'string', 'type': 'array'}, 'name': u'file:///home/chapmanb/drive/work/cwl/test_bcbio_cwl/run_info-cwl-workflow/wf-variantcall.cwl#vc_rec/vc_rec/description'}, {'type': {'items': 'File', 'type': 'array'}, 'name': u'file:///home/chapmanb/drive/work/cwl/test_bcbio_cwl/run_info-cwl-workflow/wf-variantcall.cwl#vc_rec/vc_rec/vrn_file'}], 'type': 'record', 'name': u'file:///home/chapmanb/drive/work/cwl/test_bcbio_cwl/run_info-cwl-workflow/wf-variantcall.cwl#vc_rec/vc_rec'}
173+
sink = {'fields': [{'type': {'items': 'string', 'type': 'array'}, 'name': u'file:///home/chapmanb/drive/work/cwl/test_bcbio_cwl/run_info-cwl-workflow/steps/vc_output_record.cwl#vc_rec/vc_rec/description'}, {'type': {'items': 'File', 'type': 'array'}, 'name': u'file:///home/chapmanb/drive/work/cwl/test_bcbio_cwl/run_info-cwl-workflow/steps/vc_output_record.cwl#vc_rec/vc_rec/vrn_file'}], 'type': 'record', 'name': u'file:///home/chapmanb/drive/work/cwl/test_bcbio_cwl/run_info-cwl-workflow/steps/vc_output_record.cwl#vc_rec/vc_rec'}
174+
self.assertTrue(cwltool.workflow.can_assign_src_to_sink(src, sink))
175+
176+
171177

172178
if __name__ == '__main__':
173179
unittest.main()

0 commit comments

Comments
 (0)