Skip to content

Commit 4bad634

Browse files
GlassOfWhiskeymr-c
andauthored
Regenerate parsers with schema graph property (#145)
Co-authored-by: Michael R. Crusoe <[email protected]>
1 parent 9ab5512 commit 4bad634

File tree

10 files changed

+289
-20
lines changed

10 files changed

+289
-20
lines changed

README.rst

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -93,13 +93,13 @@ Regenerate parsers
9393
To regenerate install the ``schema_salad`` package and run:
9494

9595
``cwl_utils/parser/cwl_v1_0.py`` was created via
96-
``schema-salad-tool --codegen python https://github.com/common-workflow-language/common-workflow-language/raw/main/v1.0/CommonWorkflowLanguage.yml``
96+
``schema-salad-tool --codegen python https://github.com/common-workflow-language/common-workflow-language/raw/main/v1.0/CommonWorkflowLanguage.yml --codegen-parser-info "org.w3id.cwl.v1_0"``
9797

9898
``cwl_utils/parser/cwl_v1_1.py`` was created via
99-
``schema-salad-tool --codegen python https://github.com/common-workflow-language/cwl-v1.1/raw/main/CommonWorkflowLanguage.yml``
99+
``schema-salad-tool --codegen python https://github.com/common-workflow-language/cwl-v1.1/raw/main/CommonWorkflowLanguage.yml --codegen-parser-info "org.w3id.cwl.v1_1"``
100100

101101
``cwl_utils/parser/cwl_v1_2.py`` was created via
102-
``schema-salad-tool --codegen python https://github.com/common-workflow-language/cwl-v1.2/raw/1.2.1_proposed/CommonWorkflowLanguage.yml``
102+
``schema-salad-tool --codegen python https://github.com/common-workflow-language/cwl-v1.2/raw/1.2.1_proposed/CommonWorkflowLanguage.yml --codegen-parser-info "org.w3id.cwl.v1_2"``
103103

104104
Release
105105
~~~~~~~

cwl_utils/parser/cwl_v1_0.py

Lines changed: 49 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,13 @@
33
# The code itself is released under the Apache 2.0 license and the help text is
44
# subject to the license of the original schema.
55
import copy
6+
import logging
67
import os
78
import pathlib
89
import re
910
import tempfile
1011
import uuid as _uuid__ # pylint: disable=unused-import # noqa: F401
12+
import xml.sax # nosec
1113
from abc import ABC, abstractmethod
1214
from io import StringIO
1315
from typing import (
@@ -21,27 +23,32 @@
2123
Tuple,
2224
Type,
2325
Union,
26+
cast,
2427
)
2528
from urllib.parse import quote, urlparse, urlsplit, urlunsplit
2629
from urllib.request import pathname2url
2730

31+
from rdflib import Graph
32+
from rdflib.plugins.parsers.notation3 import BadSyntax
2833
from ruamel.yaml.comments import CommentedMap
2934

3035
from schema_salad.exceptions import SchemaSaladException, ValidationException
31-
from schema_salad.fetcher import DefaultFetcher, Fetcher
36+
from schema_salad.fetcher import DefaultFetcher, Fetcher, MemoryCachingFetcher
3237
from schema_salad.sourceline import SourceLine, add_lc_filename
3338
from schema_salad.utils import yaml_no_ts # requires schema-salad v8.2+
3439

3540
_vocab: Dict[str, str] = {}
3641
_rvocab: Dict[str, str] = {}
3742

43+
_logger = logging.getLogger("salad")
44+
3845

3946
class LoadingOptions:
4047
def __init__(
4148
self,
4249
fetcher: Optional[Fetcher] = None,
4350
namespaces: Optional[Dict[str, str]] = None,
44-
schemas: Optional[Dict[str, str]] = None,
51+
schemas: Optional[List[str]] = None,
4552
fileuri: Optional[str] = None,
4653
copyfrom: Optional["LoadingOptions"] = None,
4754
original_doc: Optional[Any] = None,
@@ -77,6 +84,10 @@ def __init__(
7784
else:
7885
self.fetcher = fetcher
7986

87+
self.cache = (
88+
self.fetcher.cache if isinstance(self.fetcher, MemoryCachingFetcher) else {}
89+
)
90+
8091
self.vocab = _vocab
8192
self.rvocab = _rvocab
8293

@@ -87,6 +98,42 @@ def __init__(
8798
self.vocab[k] = v
8899
self.rvocab[v] = k
89100

101+
@property
102+
def graph(self) -> Graph:
103+
"""Generate a merged rdflib.Graph from all entries in self.schemas."""
104+
graph = Graph()
105+
if not self.schemas:
106+
return graph
107+
key = str(hash(tuple(self.schemas)))
108+
if key in self.cache:
109+
return cast(Graph, self.cache[key])
110+
for schema in self.schemas:
111+
fetchurl = (
112+
self.fetcher.urljoin(self.fileuri, schema)
113+
if self.fileuri is not None
114+
else pathlib.Path(schema).resolve().as_uri()
115+
)
116+
try:
117+
if fetchurl not in self.cache or self.cache[fetchurl] is True:
118+
_logger.debug("Getting external schema %s", fetchurl)
119+
content = self.fetcher.fetch_text(fetchurl)
120+
self.cache[fetchurl] = newGraph = Graph()
121+
for fmt in ["xml", "turtle"]:
122+
try:
123+
newGraph.parse(
124+
data=content, format=fmt, publicID=str(fetchurl)
125+
)
126+
break
127+
except (xml.sax.SAXParseException, TypeError, BadSyntax):
128+
pass
129+
graph += self.cache[fetchurl]
130+
except Exception as e:
131+
_logger.warning(
132+
"Could not load extension schema %s: %s", fetchurl, str(e)
133+
)
134+
self.cache[key] = graph
135+
return graph
136+
90137

91138
class Savable(ABC):
92139
"""Mark classes than have a save() and fromDoc() function."""
@@ -101,14 +148,12 @@ def fromDoc(
101148
docRoot: Optional[str] = None,
102149
) -> "Savable":
103150
"""Construct this object from the result of yaml.load()."""
104-
pass
105151

106152
@abstractmethod
107153
def save(
108154
self, top: bool = False, base_url: str = "", relative_uris: bool = True
109155
) -> Dict[str, Any]:
110156
"""Convert this object to a JSON/YAML friendly dictionary."""
111-
pass
112157

113158

114159
def load_field(val, fieldtype, baseuri, loadingOptions):
@@ -140,7 +185,6 @@ def save(
140185
base_url: str = "",
141186
relative_uris: bool = True,
142187
) -> save_type:
143-
144188
if isinstance(val, Savable):
145189
return val.save(top=top, base_url=base_url, relative_uris=relative_uris)
146190
if isinstance(val, MutableSequence):

cwl_utils/parser/cwl_v1_1.py

Lines changed: 49 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,13 @@
33
# The code itself is released under the Apache 2.0 license and the help text is
44
# subject to the license of the original schema.
55
import copy
6+
import logging
67
import os
78
import pathlib
89
import re
910
import tempfile
1011
import uuid as _uuid__ # pylint: disable=unused-import # noqa: F401
12+
import xml.sax # nosec
1113
from abc import ABC, abstractmethod
1214
from io import StringIO
1315
from typing import (
@@ -21,27 +23,32 @@
2123
Tuple,
2224
Type,
2325
Union,
26+
cast,
2427
)
2528
from urllib.parse import quote, urlparse, urlsplit, urlunsplit
2629
from urllib.request import pathname2url
2730

31+
from rdflib import Graph
32+
from rdflib.plugins.parsers.notation3 import BadSyntax
2833
from ruamel.yaml.comments import CommentedMap
2934

3035
from schema_salad.exceptions import SchemaSaladException, ValidationException
31-
from schema_salad.fetcher import DefaultFetcher, Fetcher
36+
from schema_salad.fetcher import DefaultFetcher, Fetcher, MemoryCachingFetcher
3237
from schema_salad.sourceline import SourceLine, add_lc_filename
3338
from schema_salad.utils import yaml_no_ts # requires schema-salad v8.2+
3439

3540
_vocab: Dict[str, str] = {}
3641
_rvocab: Dict[str, str] = {}
3742

43+
_logger = logging.getLogger("salad")
44+
3845

3946
class LoadingOptions:
4047
def __init__(
4148
self,
4249
fetcher: Optional[Fetcher] = None,
4350
namespaces: Optional[Dict[str, str]] = None,
44-
schemas: Optional[Dict[str, str]] = None,
51+
schemas: Optional[List[str]] = None,
4552
fileuri: Optional[str] = None,
4653
copyfrom: Optional["LoadingOptions"] = None,
4754
original_doc: Optional[Any] = None,
@@ -77,6 +84,10 @@ def __init__(
7784
else:
7885
self.fetcher = fetcher
7986

87+
self.cache = (
88+
self.fetcher.cache if isinstance(self.fetcher, MemoryCachingFetcher) else {}
89+
)
90+
8091
self.vocab = _vocab
8192
self.rvocab = _rvocab
8293

@@ -87,6 +98,42 @@ def __init__(
8798
self.vocab[k] = v
8899
self.rvocab[v] = k
89100

101+
@property
102+
def graph(self) -> Graph:
103+
"""Generate a merged rdflib.Graph from all entries in self.schemas."""
104+
graph = Graph()
105+
if not self.schemas:
106+
return graph
107+
key = str(hash(tuple(self.schemas)))
108+
if key in self.cache:
109+
return cast(Graph, self.cache[key])
110+
for schema in self.schemas:
111+
fetchurl = (
112+
self.fetcher.urljoin(self.fileuri, schema)
113+
if self.fileuri is not None
114+
else pathlib.Path(schema).resolve().as_uri()
115+
)
116+
try:
117+
if fetchurl not in self.cache or self.cache[fetchurl] is True:
118+
_logger.debug("Getting external schema %s", fetchurl)
119+
content = self.fetcher.fetch_text(fetchurl)
120+
self.cache[fetchurl] = newGraph = Graph()
121+
for fmt in ["xml", "turtle"]:
122+
try:
123+
newGraph.parse(
124+
data=content, format=fmt, publicID=str(fetchurl)
125+
)
126+
break
127+
except (xml.sax.SAXParseException, TypeError, BadSyntax):
128+
pass
129+
graph += self.cache[fetchurl]
130+
except Exception as e:
131+
_logger.warning(
132+
"Could not load extension schema %s: %s", fetchurl, str(e)
133+
)
134+
self.cache[key] = graph
135+
return graph
136+
90137

91138
class Savable(ABC):
92139
"""Mark classes than have a save() and fromDoc() function."""
@@ -101,14 +148,12 @@ def fromDoc(
101148
docRoot: Optional[str] = None,
102149
) -> "Savable":
103150
"""Construct this object from the result of yaml.load()."""
104-
pass
105151

106152
@abstractmethod
107153
def save(
108154
self, top: bool = False, base_url: str = "", relative_uris: bool = True
109155
) -> Dict[str, Any]:
110156
"""Convert this object to a JSON/YAML friendly dictionary."""
111-
pass
112157

113158

114159
def load_field(val, fieldtype, baseuri, loadingOptions):
@@ -140,7 +185,6 @@ def save(
140185
base_url: str = "",
141186
relative_uris: bool = True,
142187
) -> save_type:
143-
144188
if isinstance(val, Savable):
145189
return val.save(top=top, base_url=base_url, relative_uris=relative_uris)
146190
if isinstance(val, MutableSequence):

0 commit comments

Comments
 (0)