Skip to content

Commit 7dbffaf

Browse files
author
Peter Amstutz
committed
Add general http caching support. Applies to $import, $include and $schemas.
1 parent f8297cc commit 7dbffaf

File tree

3 files changed

+43
-30
lines changed

3 files changed

+43
-30
lines changed

schema_salad/ref_resolver.py

Lines changed: 38 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -4,20 +4,26 @@
44
import hashlib
55
import logging
66
import collections
7-
import requests
87
import urlparse
98
import re
109
import copy
10+
import pprint
11+
from StringIO import StringIO
12+
13+
from . import validate
14+
from .aslist import aslist
15+
from .flatten import flatten
16+
17+
import requests
18+
from cachecontrol import CacheControl
19+
from cachecontrol.caches import FileCache
1120
import ruamel.yaml as yaml
21+
1222
try:
1323
from ruamel.yaml import CSafeLoader as SafeLoader
1424
except ImportError:
1525
from ruamel.yaml import SafeLoader # type: ignore
16-
from . import validate
17-
import pprint
18-
from StringIO import StringIO
19-
from .aslist import aslist
20-
from .flatten import flatten
26+
2127
import rdflib
2228
from rdflib.namespace import RDF, RDFS, OWL
2329
from rdflib.plugins.parsers.notation3 import BadSyntax
@@ -64,7 +70,7 @@ def merge_properties(a, b):
6470
def SubLoader(loader): # type: (Loader) -> Loader
6571
return Loader(loader.ctx, schemagraph=loader.graph,
6672
foreign_properties=loader.foreign_properties, idx=loader.idx,
67-
cache=loader.cache)
73+
cache=loader.cache, session=loader.session)
6874

6975

7076
class Loader(object):
@@ -73,7 +79,7 @@ class Loader(object):
7379
DocumentType = TypeVar('DocumentType', List, Dict[unicode, Any])
7480

7581
def __init__(self, ctx, schemagraph=None, foreign_properties=None,
76-
idx=None, cache=None):
82+
idx=None, cache=None, session=None):
7783
# type: (Loader.ContextType, rdflib.Graph, Set[unicode], Dict[unicode, Union[List, Dict[unicode, Any], unicode]], Dict[unicode, Any]) -> None
7884
normalize = lambda url: urlparse.urlsplit(url).geturl()
7985
if idx is not None:
@@ -97,6 +103,12 @@ def __init__(self, ctx, schemagraph=None, foreign_properties=None,
97103
else:
98104
self.cache = {}
99105

106+
if session is not None:
107+
self.session = session
108+
else:
109+
self.session = CacheControl(requests.Session(),
110+
cache=FileCache(os.path.join(os.environ["HOME"], ".cache", "salad")))
111+
100112
self.url_fields = None # type: Set[unicode]
101113
self.scoped_ref_fields = None # type: Dict[unicode, int]
102114
self.vocab_fields = None # type: Set[unicode]
@@ -166,23 +178,22 @@ def add_namespaces(self, ns): # type: (Dict[unicode, unicode]) -> None
166178
def add_schemas(self, ns, base_url):
167179
# type: (Union[List[unicode], unicode], unicode) -> None
168180
for sch in aslist(ns):
169-
for fmt in ['xml', 'turtle', 'rdfa']:
170-
try:
171-
fetchurl = urlparse.urljoin(base_url, sch)
172-
if fetchurl not in self.cache:
173-
_logger.info("Getting external schema %s", fetchurl)
174-
cachekey = hashlib.md5(fetchurl).hexdigest()
175-
if os.path.join(os.environ["HOME"], ".settings
176-
self.cache[fetchurl] = rdflib.graph.Graph()
177-
self.cache[fetchurl].parse(fetchurl, format=fmt)
178-
self.graph += self.cache[fetchurl]
179-
break
180-
except xml.sax.SAXParseException: # type: ignore
181-
pass
182-
except TypeError:
183-
pass
184-
except BadSyntax:
185-
pass
181+
fetchurl = urlparse.urljoin(base_url, sch)
182+
if fetchurl not in self.cache:
183+
_logger.info("Getting external schema %s", fetchurl)
184+
content = self.fetch_text(fetchurl)
185+
self.cache[fetchurl] = rdflib.graph.Graph()
186+
for fmt in ['xml', 'turtle', 'rdfa']:
187+
try:
188+
self.cache[fetchurl].parse(data=content, format=fmt)
189+
self.graph += self.cache[fetchurl]
190+
break
191+
except xml.sax.SAXParseException: # type: ignore
192+
pass
193+
except TypeError:
194+
pass
195+
except BadSyntax:
196+
pass
186197

187198
for s, _, _ in self.graph.triples((None, RDF.type, RDF.Property)):
188199
self._add_properties(s)
@@ -607,9 +618,9 @@ def fetch_text(self, url):
607618
split = urlparse.urlsplit(url)
608619
scheme, path = split.scheme, split.path
609620

610-
if scheme in [u'http', u'https'] and requests:
621+
if scheme in [u'http', u'https'] and self.session:
611622
try:
612-
resp = requests.get(url)
623+
resp = self.session.get(url)
613624
resp.raise_for_status()
614625
except Exception as e:
615626
raise RuntimeError(url, e)

schema_salad/tests/test_examples.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,13 +32,13 @@ def test_schemas(self):
3232
l = schema_salad.ref_resolver.Loader({})
3333

3434
ra, _ = l.resolve_all({
35-
u"$schemas": [get_data("tests/EDAM.owl")],
35+
u"$schemas": ["file://" + get_data("tests/EDAM.owl")],
3636
u"$namespaces": {u"edam": u"http://edamontology.org/"},
3737
u"edam:has_format": u"edam:format_1915"
3838
}, "")
3939

4040
self.assertEqual({
41-
u"$schemas": [get_data("tests/EDAM.owl")],
41+
u"$schemas": ["file://" + get_data("tests/EDAM.owl")],
4242
u"$namespaces": {u"edam": u"http://edamontology.org/"},
4343
u'http://edamontology.org/has_format': u'http://edamontology.org/format_1915'
4444
}, ra)

setup.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,9 @@
3333
'rdflib >= 4.1.0',
3434
'rdflib-jsonld >= 0.3.0',
3535
'mistune',
36-
'typing']
36+
'typing',
37+
'CacheControl',
38+
'lockfile']
3739

3840
install_requires.append("avro") # TODO: remove me once cwltool is
3941
# available in Debian Stable, Ubuntu 12.04 LTS

0 commit comments

Comments
 (0)