Skip to content

Commit 8a6eaff

Browse files
kapilkd13tetron
authored andcommitted
Validating Ids for duplicate Issue#56 (#98)
* Validating duplicate ids are not present Issue#56
1 parent a560ef3 commit 8a6eaff

File tree

9 files changed

+81
-19
lines changed

9 files changed

+81
-19
lines changed

schema_salad/main.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ def main(argsl=None): # type: (List[str]) -> int
139139
try:
140140
schema.validate_doc(metaschema_names, schema_doc,
141141
metaschema_loader, args.strict,
142-
source=schema_metadata["name"])
142+
source=schema_metadata.get("name"))
143143
except validate.ValidationException as e:
144144
_logger.error("While validating schema `%s`:\n%s" %
145145
(args.schema, str(e)))

schema_salad/ref_resolver.py

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -822,7 +822,8 @@ def resolve_all(self,
822822
loader.idx[metadata[identifer]] = document
823823

824824
if checklinks:
825-
self.validate_links(document, u"")
825+
all_doc_ids={} # type: Dict[Text, Text]
826+
self.validate_links(document, u"", all_doc_ids)
826827

827828
return document, metadata
828829

@@ -877,8 +878,8 @@ def validate_scoped(self, field, link, docid):
877878
raise validate.ValidationException(
878879
"Field `%s` references unknown identifier `%s`, tried %s" % (field, link, ", ".join(tried)))
879880

880-
def validate_link(self, field, link, docid):
881-
# type: (unicode, FieldType, unicode) -> FieldType
881+
def validate_link(self, field, link, docid, all_doc_ids):
882+
# type: (unicode, FieldType, unicode, Dict[Text, Text]) -> FieldType
882883
if field in self.nolinkcheck:
883884
return link
884885
if isinstance(link, (str, unicode)):
@@ -901,14 +902,14 @@ def validate_link(self, field, link, docid):
901902
errors = []
902903
for n, i in enumerate(link):
903904
try:
904-
link[n] = self.validate_link(field, i, docid)
905+
link[n] = self.validate_link(field, i, docid, all_doc_ids)
905906
except validate.ValidationException as v:
906907
errors.append(v)
907908
if bool(errors):
908909
raise validate.ValidationException(
909910
"\n".join([unicode(e) for e in errors]))
910911
elif isinstance(link, CommentedMap):
911-
self.validate_links(link, docid)
912+
self.validate_links(link, docid, all_doc_ids)
912913
else:
913914
raise validate.ValidationException(
914915
"`%s` field is %s, expected string, list, or a dict."
@@ -924,8 +925,8 @@ def getid(self, d): # type: (Any) -> Optional[Text]
924925
return idd
925926
return None
926927

927-
def validate_links(self, document, base_url):
928-
# type: (Union[CommentedMap, CommentedSeq, unicode, None], unicode) -> None
928+
def validate_links(self, document, base_url, all_doc_ids):
929+
# type: (Union[CommentedMap, CommentedSeq, unicode, None], unicode, Dict[Text, Text]) -> None
929930
docid = self.getid(document)
930931
if not docid:
931932
docid = base_url
@@ -939,7 +940,15 @@ def validate_links(self, document, base_url):
939940
for d in self.url_fields:
940941
sl = SourceLine(document, d, validate.ValidationException)
941942
if d in document and d not in self.identity_links:
942-
document[d] = self.validate_link(d, document[d], docid)
943+
document[d] = self.validate_link(d, document[d], docid, all_doc_ids)
944+
for identifier in self.identifiers: # validate that each id is defined uniquely
945+
if identifier in document:
946+
sl = SourceLine(document, identifier, validate.ValidationException)
947+
if document[identifier] in all_doc_ids and sl.makeLead() != all_doc_ids[document[identifier]]:
948+
raise validate.ValidationException(
949+
"%s object %s `%s` previously defined" % (all_doc_ids[document[identifier]], identifier, relname(document[identifier]), ))
950+
else:
951+
all_doc_ids[document[identifier]] = sl.makeLead()
943952
except validate.ValidationException as v:
944953
errors.append(sl.makeError(unicode(v)))
945954
if hasattr(document, "iteritems"):
@@ -952,7 +961,7 @@ def validate_links(self, document, base_url):
952961
for key, val in iterator:
953962
sl = SourceLine(document, key, validate.ValidationException)
954963
try:
955-
self.validate_links(val, docid)
964+
self.validate_links(val, docid, all_doc_ids)
956965
except validate.ValidationException as v:
957966
if key not in self.nolinkcheck:
958967
docid2 = self.getid(val)

schema_salad/schema.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,7 @@ def load_and_validate(document_loader, # type: Loader
242242

243243
validationErrors = u""
244244
try:
245-
document_loader.validate_links(data, u"")
245+
document_loader.validate_links(data, u"", {})
246246
except validate.ValidationException as v:
247247
validationErrors = unicode(v) + "\n"
248248

schema_salad/sourceline.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -149,18 +149,21 @@ def __exit__(self,
149149
return
150150
raise self.makeError(unicode(exc_value))
151151

152-
def makeError(self, msg): # type: (Text) -> Any
153-
if not isinstance(self.item, ruamel.yaml.comments.CommentedBase):
154-
return self.raise_type(msg)
155-
errs = []
152+
def makeLead(self): # type: () -> Text
156153
if self.key is None or self.item.lc.data is None or self.key not in self.item.lc.data:
157-
lead = "%s:%i:%i:" % (self.item.lc.filename if hasattr(self.item.lc, "filename") else "",
154+
return "%s:%i:%i:" % (self.item.lc.filename if hasattr(self.item.lc, "filename") else "",
158155
(self.item.lc.line or 0)+1,
159156
(self.item.lc.col or 0)+1)
160157
else:
161-
lead = "%s:%i:%i:" % (self.item.lc.filename if hasattr(self.item.lc, "filename") else "",
158+
return "%s:%i:%i:" % (self.item.lc.filename if hasattr(self.item.lc, "filename") else "",
162159
(self.item.lc.data[self.key][0] or 0)+1,
163160
(self.item.lc.data[self.key][1] or 0)+1)
161+
162+
def makeError(self, msg): # type: (Text) -> Any
163+
if not isinstance(self.item, ruamel.yaml.comments.CommentedBase):
164+
return self.raise_type(msg)
165+
errs = []
166+
lead = self.makeLead()
164167
for m in msg.splitlines():
165168
if bool(lineno_re.match(m)):
166169
errs.append(m)

schema_salad/tests/test_errors.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,10 @@ def test_errors(self):
2121
"test_schema/test8.cwl",
2222
"test_schema/test9.cwl",
2323
"test_schema/test10.cwl",
24-
"test_schema/test11.cwl"):
24+
"test_schema/test11.cwl",
25+
"test_schema/test12.cwl",
26+
"test_schema/test13.cwl",
27+
"test_schema/test14.cwl"):
2528
with self.assertRaises(ValidationException):
2629
try:
2730
load_and_validate(document_loader, avsc_names,
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
cwlVersion: v1.0
2+
class: CommandLineTool
3+
baseCommand: echo
4+
inputs:
5+
- id: example_flag
6+
type: boolean
7+
inputBinding:
8+
position: 1
9+
prefix: -f
10+
- id: example_flag
11+
type: int
12+
inputBinding:
13+
position: 3
14+
prefix: --example-string
15+
16+
outputs: []
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
cwlVersion: v1.0
2+
class: Workflow
3+
inputs:
4+
example_flag:
5+
type: boolean
6+
inputBinding:
7+
position: 1
8+
prefix: -f
9+
10+
outputs: []
11+
12+
steps:
13+
example_flag:
14+
in: []
15+
out: []
16+
run:
17+
id: blah
18+
class: CommandLineTool
19+
inputs: []
20+
outputs: []
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
cwlVersion: v1.0
2+
class: CommandLineTool
3+
baseCommand: echo
4+
inputs:
5+
example_flag:
6+
type: boolean
7+
inputBinding:
8+
position: 1
9+
prefix: -f
10+
outputs:
11+
example_flag: int

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@
4747
extras_require = {} # TODO: to be removed when the above is added
4848

4949
setup(name='schema-salad',
50-
version='2.4',
50+
version='2.5',
5151
description='Schema Annotations for Linked Avro Data (SALAD)',
5252
long_description=open(README).read(),
5353
author='Common workflow language working group',

0 commit comments

Comments
 (0)