4
4
import hashlib
5
5
import logging
6
6
import collections
7
- import requests
8
7
import urlparse
9
8
import re
10
9
import copy
10
+ import pprint
11
+ from StringIO import StringIO
12
+
13
+ from . import validate
14
+ from .aslist import aslist
15
+ from .flatten import flatten
16
+
17
+ import requests
18
+ from cachecontrol import CacheControl
19
+ from cachecontrol .caches import FileCache
11
20
import ruamel .yaml as yaml
21
+
12
22
try :
13
23
from ruamel .yaml import CSafeLoader as SafeLoader
14
24
except ImportError :
15
25
from ruamel .yaml import SafeLoader # type: ignore
16
- from . import validate
17
- import pprint
18
- from StringIO import StringIO
19
- from .aslist import aslist
20
- from .flatten import flatten
26
+
21
27
import rdflib
22
28
from rdflib .namespace import RDF , RDFS , OWL
23
29
from rdflib .plugins .parsers .notation3 import BadSyntax
@@ -64,7 +70,7 @@ def merge_properties(a, b):
64
70
def SubLoader (loader ): # type: (Loader) -> Loader
65
71
return Loader (loader .ctx , schemagraph = loader .graph ,
66
72
foreign_properties = loader .foreign_properties , idx = loader .idx ,
67
- cache = loader .cache )
73
+ cache = loader .cache , session = loader . session )
68
74
69
75
70
76
class Loader (object ):
@@ -73,7 +79,7 @@ class Loader(object):
73
79
DocumentType = TypeVar ('DocumentType' , List , Dict [unicode , Any ])
74
80
75
81
def __init__ (self , ctx , schemagraph = None , foreign_properties = None ,
76
- idx = None , cache = None ):
82
+ idx = None , cache = None , session = None ):
77
83
# type: (Loader.ContextType, rdflib.Graph, Set[unicode], Dict[unicode, Union[List, Dict[unicode, Any], unicode]], Dict[unicode, Any]) -> None
78
84
normalize = lambda url : urlparse .urlsplit (url ).geturl ()
79
85
if idx is not None :
@@ -97,6 +103,12 @@ def __init__(self, ctx, schemagraph=None, foreign_properties=None,
97
103
else :
98
104
self .cache = {}
99
105
106
+ if session is not None :
107
+ self .session = session
108
+ else :
109
+ self .session = CacheControl (requests .Session (),
110
+ cache = FileCache (os .path .join (os .environ ["HOME" ], ".cache" , "salad" )))
111
+
100
112
self .url_fields = None # type: Set[unicode]
101
113
self .scoped_ref_fields = None # type: Dict[unicode, int]
102
114
self .vocab_fields = None # type: Set[unicode]
@@ -166,23 +178,22 @@ def add_namespaces(self, ns): # type: (Dict[unicode, unicode]) -> None
166
178
def add_schemas (self , ns , base_url ):
167
179
# type: (Union[List[unicode], unicode], unicode) -> None
168
180
for sch in aslist (ns ):
169
- for fmt in ['xml' , 'turtle' , 'rdfa' ]:
170
- try :
171
- fetchurl = urlparse .urljoin (base_url , sch )
172
- if fetchurl not in self .cache :
173
- _logger .info ("Getting external schema %s" , fetchurl )
174
- cachekey = hashlib .md5 (fetchurl ).hexdigest ()
175
- if os .path .join (os .environ ["HOME" ], " .settings
176
- self .cache [fetchurl ] = rdflib .graph .Graph ()
177
- self .cache [fetchurl ].parse (fetchurl , format = fmt )
178
- self .graph + = self .cache [fetchurl ]
179
- break
180
- except xml .sax .SAXParseException : # type: ignore
181
- pass
182
- except TypeError :
183
- pass
184
- except BadSyntax :
185
- pass
181
+ fetchurl = urlparse .urljoin (base_url , sch )
182
+ if fetchurl not in self .cache :
183
+ _logger .info ("Getting external schema %s" , fetchurl )
184
+ content = self .fetch_text (fetchurl )
185
+ self .cache [fetchurl ] = rdflib .graph .Graph ()
186
+ for fmt in ['xml' , 'turtle' , 'rdfa' ]:
187
+ try :
188
+ self .cache [fetchurl ].parse (data = content , format = fmt )
189
+ self .graph += self .cache [fetchurl ]
190
+ break
191
+ except xml .sax .SAXParseException : # type: ignore
192
+ pass
193
+ except TypeError :
194
+ pass
195
+ except BadSyntax :
196
+ pass
186
197
187
198
for s , _ , _ in self .graph .triples ((None , RDF .type , RDF .Property )):
188
199
self ._add_properties (s )
@@ -607,9 +618,9 @@ def fetch_text(self, url):
607
618
split = urlparse .urlsplit (url )
608
619
scheme , path = split .scheme , split .path
609
620
610
- if scheme in [u'http' , u'https' ] and requests :
621
+ if scheme in [u'http' , u'https' ] and self . session :
611
622
try :
612
- resp = requests .get (url )
623
+ resp = self . session .get (url )
613
624
resp .raise_for_status ()
614
625
except Exception as e :
615
626
raise RuntimeError (url , e )
0 commit comments