@@ -73,20 +73,89 @@ def merge_properties(a, b):
73
73
def SubLoader (loader ): # type: (Loader) -> Loader
74
74
return Loader (loader .ctx , schemagraph = loader .graph ,
75
75
foreign_properties = loader .foreign_properties , idx = loader .idx ,
76
- cache = loader .cache , session = loader .session )
76
+ cache = loader .cache , fetcher_constructor = loader .fetcher_constructor )
77
77
78
+ class Fetcher (object ):
79
+ def fetch_text (self , url ): # type: (unicode) -> unicode
80
+ raise NotImplementedError ()
78
81
79
- class Loader (object ):
82
+ def check_exists (self , url ): # type: (unicode) -> bool
83
+ raise NotImplementedError ()
84
+
85
+ def urljoin (self , base_url , url ): # type: (unicode, unicode) -> unicode
86
+ raise NotImplementedError ()
87
+
88
+
89
+ class DefaultFetcher (Fetcher ):
90
+ def __init__ (self , cache , session ): # type: (dict, requests.sessions.Session) -> None
91
+ self .cache = cache
92
+ self .session = session
93
+
94
+ def fetch_text (self , url ):
95
+ # type: (unicode) -> unicode
96
+ if url in self .cache :
97
+ return self .cache [url ]
98
+
99
+ split = urlparse .urlsplit (url )
100
+ scheme , path = split .scheme , split .path
101
+
102
+ if scheme in [u'http' , u'https' ] and self .session :
103
+ try :
104
+ resp = self .session .get (url )
105
+ resp .raise_for_status ()
106
+ except Exception as e :
107
+ raise RuntimeError (url , e )
108
+ return resp .text
109
+ elif scheme == 'file' :
110
+ try :
111
+ with open (path ) as fp :
112
+ read = fp .read ()
113
+ if hasattr (read , "decode" ):
114
+ return read .decode ("utf-8" )
115
+ else :
116
+ return read
117
+ except (OSError , IOError ) as e :
118
+ if e .filename == path :
119
+ raise RuntimeError (unicode (e ))
120
+ else :
121
+ raise RuntimeError ('Error reading %s: %s' % (url , e ))
122
+ else :
123
+ raise ValueError ('Unsupported scheme in url: %s' % url )
124
+
125
+ def check_exists (self , url ): # type: (unicode) -> bool
126
+ if url in self .cache :
127
+ return True
128
+
129
+ split = urlparse .urlsplit (url )
130
+ scheme , path = split .scheme , split .path
131
+
132
+ if scheme in [u'http' , u'https' ] and self .session :
133
+ try :
134
+ resp = self .session .head (url )
135
+ resp .raise_for_status ()
136
+ except Exception as e :
137
+ return False
138
+ return True
139
+ elif scheme == 'file' :
140
+ return os .path .exists (path )
141
+ else :
142
+ raise ValueError ('Unsupported scheme in url: %s' % url )
80
143
144
+ def urljoin (self , base_url , url ):
145
+ return urlparse .urljoin (base_url , url )
146
+
147
+ class Loader (object ):
81
148
def __init__ (self ,
82
149
ctx , # type: ContextType
83
- schemagraph = None , # type: Graph
150
+ schemagraph = None , # type: rdflib.graph. Graph
84
151
foreign_properties = None , # type: Set[unicode]
85
152
idx = None , # type: Dict[unicode, Union[CommentedMap, CommentedSeq, unicode]]
86
153
cache = None , # type: Dict[unicode, Any]
87
- session = None # type: requests.sessions.Session
154
+ session = None , # type: requests.sessions.Session
155
+ fetcher_constructor = None # type: Callable[[Dict[unicode, unicode], requests.sessions.Session], Fetcher]
88
156
):
89
157
# type: (...) -> None
158
+
90
159
normalize = lambda url : urlparse .urlsplit (url ).geturl ()
91
160
self .idx = None # type: Dict[unicode, Union[CommentedMap, CommentedSeq, unicode]]
92
161
if idx is not None :
@@ -113,12 +182,20 @@ def __init__(self,
113
182
else :
114
183
self .cache = {}
115
184
116
- self .session = None # type: requests.sessions.Session
117
- if session is not None :
185
+ if session is None :
186
+ self .session = CacheControl (requests .Session (),
187
+ cache = FileCache (os .path .join (os .environ ["HOME" ], ".cache" , "salad" )))
188
+ else :
118
189
self .session = session
190
+
191
+ if fetcher_constructor :
192
+ self .fetcher_constructor = fetcher_constructor
119
193
else :
120
- self .session = CacheControl (requests .Session (),
121
- cache = FileCache (os .path .join (os .environ ["HOME" ], ".cache" , "salad" )))
194
+ self .fetcher_constructor = DefaultFetcher
195
+ self .fetcher = self .fetcher_constructor (self .cache , self .session )
196
+
197
+ self .fetch_text = self .fetcher .fetch_text
198
+ self .check_exists = self .fetcher .check_exists
122
199
123
200
self .url_fields = None # type: Set[unicode]
124
201
self .scoped_ref_fields = None # type: Dict[unicode, int]
@@ -171,7 +248,7 @@ def expand_url(self,
171
248
elif scoped_ref is not None and not split .fragment :
172
249
pass
173
250
else :
174
- url = urlparse .urljoin (base_url , url )
251
+ url = self . fetcher .urljoin (base_url , url )
175
252
176
253
if vocab_term and url in self .rvocab :
177
254
return self .rvocab [url ]
@@ -195,7 +272,7 @@ def add_namespaces(self, ns): # type: (Dict[unicode, unicode]) -> None
195
272
def add_schemas (self , ns , base_url ):
196
273
# type: (Union[List[unicode], unicode], unicode) -> None
197
274
for sch in aslist (ns ):
198
- fetchurl = urlparse .urljoin (base_url , sch )
275
+ fetchurl = self . fetcher .urljoin (base_url , sch )
199
276
if fetchurl not in self .cache :
200
277
_logger .debug ("Getting external schema %s" , fetchurl )
201
278
content = self .fetch_text (fetchurl )
@@ -346,6 +423,7 @@ def resolve_ref(self,
346
423
if url in self .idx and (not mixin ):
347
424
return self .idx [url ], {}
348
425
426
+ sl .raise_type = RuntimeError
349
427
with sl :
350
428
# "$include" directive means load raw text
351
429
if inc :
@@ -704,37 +782,6 @@ def resolve_all(self,
704
782
705
783
return document , metadata
706
784
707
- def fetch_text (self , url ):
708
- # type: (unicode) -> unicode
709
- if url in self .cache :
710
- return self .cache [url ]
711
-
712
- split = urlparse .urlsplit (url )
713
- scheme , path = split .scheme , split .path
714
-
715
- if scheme in [u'http' , u'https' ] and self .session :
716
- try :
717
- resp = self .session .get (url )
718
- resp .raise_for_status ()
719
- except Exception as e :
720
- raise RuntimeError (url , e )
721
- return resp .text
722
- elif scheme == 'file' :
723
- try :
724
- with open (path ) as fp :
725
- read = fp .read ()
726
- if hasattr (read , "decode" ):
727
- return read .decode ("utf-8" )
728
- else :
729
- return read
730
- except (OSError , IOError ) as e :
731
- if e .filename == path :
732
- raise RuntimeError (unicode (e ))
733
- else :
734
- raise RuntimeError ('Error reading %s: %s' % (url , e ))
735
- else :
736
- raise ValueError ('Unsupported scheme in url: %s' % url )
737
-
738
785
def fetch (self , url , inject_ids = True ): # type: (unicode, bool) -> Any
739
786
if url in self .idx :
740
787
return self .idx [url ]
@@ -758,21 +805,6 @@ def fetch(self, url, inject_ids=True): # type: (unicode, bool) -> Any
758
805
self .idx [url ] = result
759
806
return result
760
807
761
- def check_file (self , url ): # type: (unicode) -> bool
762
- split = urlparse .urlsplit (url )
763
- scheme , path = split .scheme , split .path
764
-
765
- if scheme in [u'http' , u'https' ] and self .session :
766
- try :
767
- resp = self .session .head (url )
768
- resp .raise_for_status ()
769
- except Exception as e :
770
- return False
771
- return True
772
- elif scheme == 'file' :
773
- return os .path .exists (path )
774
- else :
775
- raise ValueError ('Unsupported scheme in url: %s' % url )
776
808
777
809
FieldType = TypeVar ('FieldType' , unicode , CommentedSeq , CommentedMap )
778
810
@@ -809,13 +841,13 @@ def validate_link(self, field, link, docid):
809
841
if link not in self .vocab and link not in self .idx and link not in self .rvocab :
810
842
if field in self .scoped_ref_fields :
811
843
return self .validate_scoped (field , link , docid )
812
- elif not self .check_file (link ):
844
+ elif not self .check_exists (link ):
813
845
raise validate .ValidationException (
814
846
"Field `%s` contains undefined reference to `%s`" % (field , link ))
815
847
elif link not in self .idx and link not in self .rvocab :
816
848
if field in self .scoped_ref_fields :
817
849
return self .validate_scoped (field , link , docid )
818
- elif not self .check_file (link ):
850
+ elif not self .check_exists (link ):
819
851
raise validate .ValidationException (
820
852
"Field `%s` contains undefined reference to `%s`" % (field , link ))
821
853
elif isinstance (link , CommentedSeq ):
0 commit comments