Skip to content

Commit 5a15c58

Browse files
authored
merge 1.21 changes into salad 2.0 (#68)
* Loader constructor can accept custom "Fetcher" object for fetching files and checking links. * Add test for custom fetcher feature. * Fetcher is a constructor instead of an object. Fix load_schema to update cache instead of replacing it. * Add cache test. check_exists checks cache. * Fetcher includes custom urljoin. * Fix fetcher_constructor to default to None instead of DefaultFetcher. * Adjust package dependencies to be more specific about versions. * Linting * Tweak versioning to reduce chance of future unpleasant suprises from 3rd party upgrades and clean up requirements.txt. * Bump to 2.1
1 parent a1db0ac commit 5a15c58

File tree

5 files changed

+167
-76
lines changed

5 files changed

+167
-76
lines changed

requirements.txt

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
1-
requests
2-
ruamel.yaml==0.12.4
3-
rdflib>=4.1.
4-
rdflib-jsonld>=0.3.0
5-
mistune
6-
typing>=3.5.2 ; python_version>="2.7"
7-
avro ; python_version<"3"
1+
typing==3.5.2.2 ; python_version>="2.7"
82
avro-python3 ; python_version>="3"
9-
CacheControl
10-
lockfile
3+
avro==1.8.1 ; python_version<"3"
4+
ruamel.yaml==0.12.4
5+
rdflib==4.2.1
6+
rdflib-jsonld==0.4.0
7+
html5lib==0.9999999
8+
mistune==0.7.3
9+
CacheControl==0.11.7
10+
lockfile==0.12.2

schema_salad/ref_resolver.py

Lines changed: 90 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -73,20 +73,89 @@ def merge_properties(a, b):
7373
def SubLoader(loader): # type: (Loader) -> Loader
7474
return Loader(loader.ctx, schemagraph=loader.graph,
7575
foreign_properties=loader.foreign_properties, idx=loader.idx,
76-
cache=loader.cache, session=loader.session)
76+
cache=loader.cache, fetcher_constructor=loader.fetcher_constructor)
7777

78+
class Fetcher(object):
79+
def fetch_text(self, url): # type: (unicode) -> unicode
80+
raise NotImplementedError()
7881

79-
class Loader(object):
82+
def check_exists(self, url): # type: (unicode) -> bool
83+
raise NotImplementedError()
84+
85+
def urljoin(self, base_url, url): # type: (unicode, unicode) -> unicode
86+
raise NotImplementedError()
87+
88+
89+
class DefaultFetcher(Fetcher):
90+
def __init__(self, cache, session): # type: (dict, requests.sessions.Session) -> None
91+
self.cache = cache
92+
self.session = session
93+
94+
def fetch_text(self, url):
95+
# type: (unicode) -> unicode
96+
if url in self.cache:
97+
return self.cache[url]
98+
99+
split = urlparse.urlsplit(url)
100+
scheme, path = split.scheme, split.path
101+
102+
if scheme in [u'http', u'https'] and self.session:
103+
try:
104+
resp = self.session.get(url)
105+
resp.raise_for_status()
106+
except Exception as e:
107+
raise RuntimeError(url, e)
108+
return resp.text
109+
elif scheme == 'file':
110+
try:
111+
with open(path) as fp:
112+
read = fp.read()
113+
if hasattr(read, "decode"):
114+
return read.decode("utf-8")
115+
else:
116+
return read
117+
except (OSError, IOError) as e:
118+
if e.filename == path:
119+
raise RuntimeError(unicode(e))
120+
else:
121+
raise RuntimeError('Error reading %s: %s' % (url, e))
122+
else:
123+
raise ValueError('Unsupported scheme in url: %s' % url)
124+
125+
def check_exists(self, url): # type: (unicode) -> bool
126+
if url in self.cache:
127+
return True
128+
129+
split = urlparse.urlsplit(url)
130+
scheme, path = split.scheme, split.path
131+
132+
if scheme in [u'http', u'https'] and self.session:
133+
try:
134+
resp = self.session.head(url)
135+
resp.raise_for_status()
136+
except Exception as e:
137+
return False
138+
return True
139+
elif scheme == 'file':
140+
return os.path.exists(path)
141+
else:
142+
raise ValueError('Unsupported scheme in url: %s' % url)
80143

144+
def urljoin(self, base_url, url):
145+
return urlparse.urljoin(base_url, url)
146+
147+
class Loader(object):
81148
def __init__(self,
82149
ctx, # type: ContextType
83-
schemagraph=None, # type: Graph
150+
schemagraph=None, # type: rdflib.graph.Graph
84151
foreign_properties=None, # type: Set[unicode]
85152
idx=None, # type: Dict[unicode, Union[CommentedMap, CommentedSeq, unicode]]
86153
cache=None, # type: Dict[unicode, Any]
87-
session=None # type: requests.sessions.Session
154+
session=None, # type: requests.sessions.Session
155+
fetcher_constructor=None # type: Callable[[Dict[unicode, unicode], requests.sessions.Session], Fetcher]
88156
):
89157
# type: (...) -> None
158+
90159
normalize = lambda url: urlparse.urlsplit(url).geturl()
91160
self.idx = None # type: Dict[unicode, Union[CommentedMap, CommentedSeq, unicode]]
92161
if idx is not None:
@@ -113,12 +182,20 @@ def __init__(self,
113182
else:
114183
self.cache = {}
115184

116-
self.session = None # type: requests.sessions.Session
117-
if session is not None:
185+
if session is None:
186+
self.session = CacheControl(requests.Session(),
187+
cache=FileCache(os.path.join(os.environ["HOME"], ".cache", "salad")))
188+
else:
118189
self.session = session
190+
191+
if fetcher_constructor:
192+
self.fetcher_constructor = fetcher_constructor
119193
else:
120-
self.session = CacheControl(requests.Session(),
121-
cache=FileCache(os.path.join(os.environ["HOME"], ".cache", "salad")))
194+
self.fetcher_constructor = DefaultFetcher
195+
self.fetcher = self.fetcher_constructor(self.cache, self.session)
196+
197+
self.fetch_text = self.fetcher.fetch_text
198+
self.check_exists = self.fetcher.check_exists
122199

123200
self.url_fields = None # type: Set[unicode]
124201
self.scoped_ref_fields = None # type: Dict[unicode, int]
@@ -171,7 +248,7 @@ def expand_url(self,
171248
elif scoped_ref is not None and not split.fragment:
172249
pass
173250
else:
174-
url = urlparse.urljoin(base_url, url)
251+
url = self.fetcher.urljoin(base_url, url)
175252

176253
if vocab_term and url in self.rvocab:
177254
return self.rvocab[url]
@@ -195,7 +272,7 @@ def add_namespaces(self, ns): # type: (Dict[unicode, unicode]) -> None
195272
def add_schemas(self, ns, base_url):
196273
# type: (Union[List[unicode], unicode], unicode) -> None
197274
for sch in aslist(ns):
198-
fetchurl = urlparse.urljoin(base_url, sch)
275+
fetchurl = self.fetcher.urljoin(base_url, sch)
199276
if fetchurl not in self.cache:
200277
_logger.debug("Getting external schema %s", fetchurl)
201278
content = self.fetch_text(fetchurl)
@@ -346,6 +423,7 @@ def resolve_ref(self,
346423
if url in self.idx and (not mixin):
347424
return self.idx[url], {}
348425

426+
sl.raise_type = RuntimeError
349427
with sl:
350428
# "$include" directive means load raw text
351429
if inc:
@@ -704,37 +782,6 @@ def resolve_all(self,
704782

705783
return document, metadata
706784

707-
def fetch_text(self, url):
708-
# type: (unicode) -> unicode
709-
if url in self.cache:
710-
return self.cache[url]
711-
712-
split = urlparse.urlsplit(url)
713-
scheme, path = split.scheme, split.path
714-
715-
if scheme in [u'http', u'https'] and self.session:
716-
try:
717-
resp = self.session.get(url)
718-
resp.raise_for_status()
719-
except Exception as e:
720-
raise RuntimeError(url, e)
721-
return resp.text
722-
elif scheme == 'file':
723-
try:
724-
with open(path) as fp:
725-
read = fp.read()
726-
if hasattr(read, "decode"):
727-
return read.decode("utf-8")
728-
else:
729-
return read
730-
except (OSError, IOError) as e:
731-
if e.filename == path:
732-
raise RuntimeError(unicode(e))
733-
else:
734-
raise RuntimeError('Error reading %s: %s' % (url, e))
735-
else:
736-
raise ValueError('Unsupported scheme in url: %s' % url)
737-
738785
def fetch(self, url, inject_ids=True): # type: (unicode, bool) -> Any
739786
if url in self.idx:
740787
return self.idx[url]
@@ -758,21 +805,6 @@ def fetch(self, url, inject_ids=True): # type: (unicode, bool) -> Any
758805
self.idx[url] = result
759806
return result
760807

761-
def check_file(self, url): # type: (unicode) -> bool
762-
split = urlparse.urlsplit(url)
763-
scheme, path = split.scheme, split.path
764-
765-
if scheme in [u'http', u'https'] and self.session:
766-
try:
767-
resp = self.session.head(url)
768-
resp.raise_for_status()
769-
except Exception as e:
770-
return False
771-
return True
772-
elif scheme == 'file':
773-
return os.path.exists(path)
774-
else:
775-
raise ValueError('Unsupported scheme in url: %s' % url)
776808

777809
FieldType = TypeVar('FieldType', unicode, CommentedSeq, CommentedMap)
778810

@@ -809,13 +841,13 @@ def validate_link(self, field, link, docid):
809841
if link not in self.vocab and link not in self.idx and link not in self.rvocab:
810842
if field in self.scoped_ref_fields:
811843
return self.validate_scoped(field, link, docid)
812-
elif not self.check_file(link):
844+
elif not self.check_exists(link):
813845
raise validate.ValidationException(
814846
"Field `%s` contains undefined reference to `%s`" % (field, link))
815847
elif link not in self.idx and link not in self.rvocab:
816848
if field in self.scoped_ref_fields:
817849
return self.validate_scoped(field, link, docid)
818-
elif not self.check_file(link):
850+
elif not self.check_exists(link):
819851
raise validate.ValidationException(
820852
"Field `%s` contains undefined reference to `%s`" % (field, link))
821853
elif isinstance(link, CommentedSeq):

schema_salad/schema.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,7 @@ def load_schema(schema_ref, # type: Union[CommentedMap, CommentedSeq, unicode]
188188

189189
metaschema_names, metaschema_doc, metaschema_loader = get_metaschema()
190190
if cache is not None:
191-
metaschema_loader.cache = cache
191+
metaschema_loader.cache.update(cache)
192192
schema_doc, schema_metadata = metaschema_loader.resolve_ref(schema_ref, "")
193193

194194
if not isinstance(schema_doc, list):

schema_salad/tests/test_fetch.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
import unittest
2+
import schema_salad.ref_resolver
3+
import schema_salad.main
4+
import schema_salad.schema
5+
from schema_salad.jsonld_context import makerdf
6+
import rdflib
7+
import ruamel.yaml as yaml
8+
import json
9+
import os
10+
import urlparse
11+
12+
class TestFetcher(unittest.TestCase):
13+
def test_fetcher(self):
14+
class TestFetcher(schema_salad.ref_resolver.Fetcher):
15+
def __init__(self, a, b):
16+
pass
17+
18+
def fetch_text(self, url): # type: (unicode) -> unicode
19+
if url == "keep:abc+123/foo.txt":
20+
return "hello: keepfoo"
21+
if url.endswith("foo.txt"):
22+
return "hello: foo"
23+
else:
24+
raise RuntimeError("Not foo.txt")
25+
26+
def check_exists(self, url): # type: (unicode) -> bool
27+
if url.endswith("foo.txt"):
28+
return True
29+
else:
30+
return False
31+
32+
def urljoin(self, base, url):
33+
urlsp = urlparse.urlsplit(url)
34+
if urlsp.scheme:
35+
return url
36+
basesp = urlparse.urlsplit(base)
37+
38+
if basesp.scheme == "keep":
39+
return base + "/" + url
40+
return urlparse.urljoin(base, url)
41+
42+
loader = schema_salad.ref_resolver.Loader({}, fetcher_constructor=TestFetcher)
43+
self.assertEqual({"hello": "foo"}, loader.resolve_ref("foo.txt")[0])
44+
self.assertEqual({"hello": "keepfoo"}, loader.resolve_ref("foo.txt", base_url="keep:abc+123")[0])
45+
self.assertTrue(loader.check_exists("foo.txt"))
46+
47+
with self.assertRaises(RuntimeError):
48+
loader.resolve_ref("bar.txt")
49+
self.assertFalse(loader.check_exists("bar.txt"))
50+
51+
def test_cache(self):
52+
loader = schema_salad.ref_resolver.Loader({})
53+
foo = "file://%s/foo.txt" % os.getcwd()
54+
loader.cache.update({foo: "hello: foo"})
55+
print loader.cache
56+
self.assertEqual({"hello": "foo"}, loader.resolve_ref("foo.txt")[0])
57+
self.assertTrue(loader.check_exists(foo))

setup.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -28,13 +28,15 @@
2828
requirements = []
2929

3030
install_requires = [
31-
'requests',
32-
'ruamel.yaml == 0.12.4',
33-
'rdflib >= 4.1.0',
34-
'rdflib-jsonld >= 0.3.0',
35-
'mistune',
36-
'typing >= 3.5.2',
37-
'CacheControl',
31+
'setuptools',
32+
'requests >= 1.0',
33+
'ruamel.yaml >= 0.12.4, < 0.12.5',
34+
'rdflib >= 4.2.0, < 4.3.0',
35+
'rdflib-jsonld >= 0.3.0, < 0.5.0',
36+
'html5lib >= 0.90, <= 0.9999999',
37+
'mistune >= 0.7.3, < 0.8',
38+
'typing >= 3.5.2, < 3.6',
39+
'CacheControl >= 0.11.7, < 0.12',
3840
'lockfile >= 0.9']
3941

4042
install_requires.append("avro") # TODO: remove me once cwltool is
@@ -46,7 +48,7 @@
4648
extras_require = {} # TODO: to be removed when the above is added
4749

4850
setup(name='schema-salad',
49-
version='2.0',
51+
version='2.1',
5052
description='Schema Annotations for Linked Avro Data (SALAD)',
5153
long_description=open(README).read(),
5254
author='Common workflow language working group',

0 commit comments

Comments
 (0)