Skip to content

Commit c646a62

Browse files
authored
Merge pull request #49 from common-workflow-language/schema-cache
Cache external schemas after being loaded 1st time
2 parents 2300cb0 + 14d51ca commit c646a62

File tree

14 files changed

+190
-27
lines changed

14 files changed

+190
-27
lines changed

schema_salad/ref_resolver.py

Lines changed: 40 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -4,20 +4,26 @@
44
import hashlib
55
import logging
66
import collections
7-
import requests
87
import urlparse
98
import re
109
import copy
10+
import pprint
11+
from StringIO import StringIO
12+
13+
from . import validate
14+
from .aslist import aslist
15+
from .flatten import flatten
16+
17+
import requests
18+
from cachecontrol.wrapper import CacheControl
19+
from cachecontrol.caches import FileCache
1120
import ruamel.yaml as yaml
21+
1222
try:
1323
from ruamel.yaml import CSafeLoader as SafeLoader
1424
except ImportError:
1525
from ruamel.yaml import SafeLoader # type: ignore
16-
from . import validate
17-
import pprint
18-
from StringIO import StringIO
19-
from .aslist import aslist
20-
from .flatten import flatten
26+
2127
import rdflib
2228
from rdflib.namespace import RDF, RDFS, OWL
2329
from rdflib.plugins.parsers.notation3 import BadSyntax
@@ -64,7 +70,7 @@ def merge_properties(a, b):
6470
def SubLoader(loader): # type: (Loader) -> Loader
6571
return Loader(loader.ctx, schemagraph=loader.graph,
6672
foreign_properties=loader.foreign_properties, idx=loader.idx,
67-
cache=loader.cache)
73+
cache=loader.cache, session=loader.session)
6874

6975

7076
class Loader(object):
@@ -73,8 +79,8 @@ class Loader(object):
7379
DocumentType = TypeVar('DocumentType', List, Dict[unicode, Any])
7480

7581
def __init__(self, ctx, schemagraph=None, foreign_properties=None,
76-
idx=None, cache=None):
77-
# type: (Loader.ContextType, rdflib.Graph, Set[unicode], Dict[unicode, Union[List, Dict[unicode, Any], unicode]], Dict[unicode, Any]) -> None
82+
idx=None, cache=None, session=None):
83+
# type: (Loader.ContextType, rdflib.Graph, Set[unicode], Dict[unicode, Union[List, Dict[unicode, Any], unicode]], Dict[unicode, Any], requests.sessions.Session) -> None
7884
normalize = lambda url: urlparse.urlsplit(url).geturl()
7985
if idx is not None:
8086
self.idx = idx
@@ -97,6 +103,13 @@ def __init__(self, ctx, schemagraph=None, foreign_properties=None,
97103
else:
98104
self.cache = {}
99105

106+
self.session = None # type: requests.sessions.Session
107+
if session is not None:
108+
self.session = session
109+
else:
110+
self.session = CacheControl(requests.Session(),
111+
cache=FileCache(os.path.join(os.environ["HOME"], ".cache", "salad")))
112+
100113
self.url_fields = None # type: Set[unicode]
101114
self.scoped_ref_fields = None # type: Dict[unicode, int]
102115
self.vocab_fields = None # type: Set[unicode]
@@ -166,17 +179,22 @@ def add_namespaces(self, ns): # type: (Dict[unicode, unicode]) -> None
166179
def add_schemas(self, ns, base_url):
167180
# type: (Union[List[unicode], unicode], unicode) -> None
168181
for sch in aslist(ns):
169-
for fmt in ['xml', 'turtle', 'rdfa']:
170-
try:
171-
self.graph.parse(urlparse.urljoin(base_url, sch),
172-
format=fmt)
173-
break
174-
except xml.sax.SAXParseException: # type: ignore
175-
pass
176-
except TypeError:
177-
pass
178-
except BadSyntax:
179-
pass
182+
fetchurl = urlparse.urljoin(base_url, sch)
183+
if fetchurl not in self.cache:
184+
_logger.info("Getting external schema %s", fetchurl)
185+
content = self.fetch_text(fetchurl)
186+
self.cache[fetchurl] = rdflib.graph.Graph()
187+
for fmt in ['xml', 'turtle', 'rdfa']:
188+
try:
189+
self.cache[fetchurl].parse(data=content, format=fmt)
190+
self.graph += self.cache[fetchurl]
191+
break
192+
except xml.sax.SAXParseException: # type: ignore
193+
pass
194+
except TypeError:
195+
pass
196+
except BadSyntax:
197+
pass
180198

181199
for s, _, _ in self.graph.triples((None, RDF.type, RDF.Property)):
182200
self._add_properties(s)
@@ -601,9 +619,9 @@ def fetch_text(self, url):
601619
split = urlparse.urlsplit(url)
602620
scheme, path = split.scheme, split.path
603621

604-
if scheme in [u'http', u'https'] and requests:
622+
if scheme in [u'http', u'https'] and self.session:
605623
try:
606-
resp = requests.get(url)
624+
resp = self.session.get(url)
607625
resp.raise_for_status()
608626
except Exception as e:
609627
raise RuntimeError(url, e)

schema_salad/tests/test_examples.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,13 +32,13 @@ def test_schemas(self):
3232
l = schema_salad.ref_resolver.Loader({})
3333

3434
ra, _ = l.resolve_all({
35-
u"$schemas": [get_data("tests/EDAM.owl")],
35+
u"$schemas": ["file://" + get_data("tests/EDAM.owl")],
3636
u"$namespaces": {u"edam": u"http://edamontology.org/"},
3737
u"edam:has_format": u"edam:format_1915"
3838
}, "")
3939

4040
self.assertEqual({
41-
u"$schemas": [get_data("tests/EDAM.owl")],
41+
u"$schemas": ["file://" + get_data("tests/EDAM.owl")],
4242
u"$namespaces": {u"edam": u"http://edamontology.org/"},
4343
u'http://edamontology.org/has_format': u'http://edamontology.org/format_1915'
4444
}, ra)

setup.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,9 @@
3333
'rdflib >= 4.1.0',
3434
'rdflib-jsonld >= 0.3.0',
3535
'mistune',
36-
'typing']
36+
'typing',
37+
'CacheControl',
38+
'lockfile']
3739

3840
install_requires.append("avro") # TODO: remove me once cwltool is
3941
# available in Debian Stable, Ubuntu 12.04 LTS
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# Stubs for cachecontrol (Python 2)
2+
#
3+
# NOTE: This dynamically typed stub was automatically generated by stubgen.
4+
5+
from typing import Any
6+
from .wrapper import CacheControl as CacheControl
7+
#from .adapter import CacheControlAdapter as CacheControlAdapter
8+
#from .controller import CacheController as CacheController
9+
10+
__email__ = ... # type: Any

typeshed/2.7/cachecontrol/adapter.pyi

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# Stubs for cachecontrol.adapter (Python 2)
2+
#
3+
# NOTE: This dynamically typed stub was automatically generated by stubgen.
4+
5+
from typing import Any
6+
from requests.adapters import HTTPAdapter
7+
from .controller import CacheController as CacheController
8+
from .cache import DictCache as DictCache
9+
from .filewrapper import CallbackFileWrapper as CallbackFileWrapper
10+
11+
class CacheControlAdapter(HTTPAdapter):
12+
invalidating_methods = ... # type: Any
13+
cache = ... # type: Any
14+
heuristic = ... # type: Any
15+
controller = ... # type: Any
16+
def __init__(self, cache=None, cache_etags=True, controller_class=None, serializer=None, heuristic=None, *args, **kw): ...
17+
def send(self, request, **kw): ...
18+
def build_response(self, request, response, from_cache=False): ...
19+
def close(self): ...

typeshed/2.7/cachecontrol/cache.pyi

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# Stubs for cachecontrol.cache (Python 2)
2+
#
3+
# NOTE: This dynamically typed stub was automatically generated by stubgen.
4+
5+
from typing import Any
6+
7+
class BaseCache:
8+
def get(self, key): ...
9+
def set(self, key, value): ...
10+
def delete(self, key): ...
11+
def close(self): ...
12+
13+
class DictCache(BaseCache):
14+
lock = ... # type: Any
15+
data = ... # type: Any
16+
def __init__(self, init_dict=None): ...
17+
def get(self, key): ...
18+
def set(self, key, value): ...
19+
def delete(self, key): ...
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
# Stubs for cachecontrol.caches (Python 2)
2+
#
3+
# NOTE: This dynamically typed stub was automatically generated by stubgen.
4+
5+
from typing import Any
6+
from .file_cache import FileCache as FileCache
7+
#from .redis_cache import RedisCache as RedisCache
8+
9+
notice = ... # type: Any
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
# Stubs for cachecontrol.caches.file_cache (Python 2)
2+
#
3+
# NOTE: This dynamically typed stub was automatically generated by stubgen.
4+
5+
from typing import Any, Callable
6+
from ..cache import BaseCache as BaseCache
7+
from ..controller import CacheController as CacheController
8+
9+
class FileCache(BaseCache):
10+
directory = ... # type: str
11+
forever = ... # type: bool
12+
filemode = ... # type: str
13+
dirmode = ... # type: str
14+
lock_class = ... # type: Callable
15+
def __init__(self, directory: str, forever=False, filemode=384, dirmode=448, use_dir_lock=None, lock_class=None) -> None: ...
16+
@staticmethod
17+
def encode(x): ...
18+
def get(self, key): ...
19+
def set(self, key, value): ...
20+
def delete(self, key): ...
21+
22+
def url_to_file_path(url, filecache): ...

typeshed/2.7/cachecontrol/compat.pyi

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
# Stubs for cachecontrol.compat (Python 2)
2+
#
3+
# NOTE: This dynamically typed stub was automatically generated by stubgen.
4+
5+
from typing import Any
6+
7+
text_type = ... # type: Any
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
# Stubs for cachecontrol.controller (Python 2)
2+
#
3+
# NOTE: This dynamically typed stub was automatically generated by stubgen.
4+
5+
from typing import Any
6+
#from .cache import DictCache as DictCache
7+
#from .serialize import Serializer as Serializer
8+
9+
logger = ... # type: Any
10+
URI = ... # type: Any
11+
12+
def parse_uri(uri): ...
13+
14+
class CacheController:
15+
cache = ... # type: Any
16+
cache_etags = ... # type: Any
17+
serializer = ... # type: Any
18+
def __init__(self, cache=None, cache_etags=True, serializer=None): ...
19+
@classmethod
20+
def cache_url(cls, uri): ...
21+
def parse_cache_control(self, headers): ...
22+
def cached_request(self, request): ...
23+
def conditional_headers(self, request): ...
24+
def cache_response(self, request, response, body=None): ...
25+
def update_cached_response(self, request, response): ...
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
# Stubs for cachecontrol.filewrapper (Python 2)
2+
#
3+
# NOTE: This dynamically typed stub was automatically generated by stubgen.
4+
5+
class CallbackFileWrapper:
6+
def __init__(self, fp, callback): ...
7+
def __getattr__(self, name): ...
8+
def read(self, amt=None): ...
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# Stubs for cachecontrol.serialize (Python 2)
2+
#
3+
# NOTE: This dynamically typed stub was automatically generated by stubgen.
4+
5+
from .compat import HTTPResponse as HTTPResponse, pickle as pickle, text_type as text_type
6+
7+
class Serializer:
8+
def dumps(self, request, response, body=None): ...
9+
def loads(self, request, data): ...
10+
def prepare_response(self, request, cached): ...

typeshed/2.7/cachecontrol/wrapper.pyi

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
# Stubs for cachecontrol.wrapper (Python 2)
2+
#
3+
# NOTE: This dynamically typed stub was automatically generated by stubgen.
4+
5+
from typing import Union
6+
from .adapter import CacheControlAdapter as CacheControlAdapter
7+
from .cache import DictCache, BaseCache
8+
import requests
9+
10+
def CacheControl(sess: requests.sessions.Session,
11+
cache: Union[DictCache, BaseCache] = None,
12+
cache_etags: bool = True,
13+
serializer=None,
14+
heuristic=None) -> requests.sessions.Session: ...

typeshed/2.7/requests/sessions.pyi

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# Stubs for requests.sessions (Python 3)
22

3-
from typing import Any, Union, MutableMapping
3+
from typing import Any, AnyStr, Union, MutableMapping
44
from . import auth
55
from . import compat
66
from . import cookies
@@ -76,7 +76,7 @@ class Session(SessionRedirectMixin):
7676
cookies=..., files=..., auth=..., timeout=..., allow_redirects=...,
7777
proxies=..., hooks=..., stream=..., verify=..., cert=...,
7878
json=...) -> Response: ...
79-
def get(self, url: str, **kwargs) -> Response: ...
79+
def get(self, url: AnyStr, **kwargs) -> Response: ...
8080
def options(self, url: str, **kwargs) -> Response: ...
8181
def head(self, url: str, **kwargs) -> Response: ...
8282
def post(self, url: str, data=..., json=..., **kwargs) -> Response: ...

0 commit comments

Comments
 (0)