Skip to content

Commit 881d0b0

Browse files
committed
Allow to provide HTTP headers
1 parent 06ab639 commit 881d0b0

File tree

2 files changed

+23
-15
lines changed

2 files changed

+23
-15
lines changed

stac_validator/utilities.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import ssl
44
from typing import Dict
55
from urllib.parse import urlparse
6-
from urllib.request import urlopen
6+
from urllib.request import Request, urlopen
77

88
import requests # type: ignore
99

@@ -77,7 +77,7 @@ def get_stac_type(stac_content: Dict) -> str:
7777
return str(e)
7878

7979

80-
def fetch_and_parse_file(input_path: str) -> Dict:
80+
def fetch_and_parse_file(input_path: str, headers: Dict = {}) -> Dict:
8181
"""Fetches and parses a JSON file from a URL or local file.
8282
8383
Given a URL or local file path to a JSON file, this function fetches the file,
@@ -87,6 +87,7 @@ def fetch_and_parse_file(input_path: str) -> Dict:
8787
8888
Args:
8989
input_path: A string representing the URL or local file path to the JSON file.
90+
headers: For URLs: HTTP headers to include in the request
9091
9192
Returns:
9293
A dictionary containing the parsed contents of the JSON file.
@@ -97,7 +98,7 @@ def fetch_and_parse_file(input_path: str) -> Dict:
9798
"""
9899
try:
99100
if is_url(input_path):
100-
resp = requests.get(input_path)
101+
resp = requests.get(input_path, headers=headers)
101102
resp.raise_for_status()
102103
data = resp.json()
103104
else:
@@ -150,9 +151,7 @@ def set_schema_addr(version: str, stac_type: str) -> str:
150151

151152

152153
def link_request(
153-
link: Dict,
154-
initial_message: Dict,
155-
open_urls: bool = True,
154+
link: Dict, initial_message: Dict, open_urls: bool = True, headers: Dict = {}
156155
) -> None:
157156
"""Makes a request to a URL and appends it to the relevant field of the initial message.
158157
@@ -161,6 +160,7 @@ def link_request(
161160
initial_message: A dictionary containing lists for "request_valid", "request_invalid",
162161
"format_valid", and "format_invalid" URLs.
163162
open_urls: Whether to open link href URL
163+
headers: HTTP headers to include in the request
164164
165165
Returns:
166166
None
@@ -169,11 +169,12 @@ def link_request(
169169
if is_url(link["href"]):
170170
try:
171171
if open_urls:
172+
request = Request(link["href"], headers=headers)
172173
if "s3" in link["href"]:
173174
context = ssl._create_unverified_context()
174-
response = urlopen(link["href"], context=context)
175+
response = urlopen(request, context=context)
175176
else:
176-
response = urlopen(link["href"])
177+
response = urlopen(request)
177178
status_code = response.getcode()
178179
if status_code == 200:
179180
initial_message["request_valid"].append(link["href"])

stac_validator/validate.py

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ class StacValidate:
3434
links (bool): Whether to additionally validate links (only works in default mode).
3535
assets (bool): Whether to additionally validate assets (only works in default mode).
3636
assets_open_urls (bool): Whether to open assets URLs when validating assets.
37+
headers (dict): HTTP headers to include in the requests.
3738
extensions (bool): Whether to only validate STAC object extensions.
3839
custom (str): The local filepath or remote URL of a custom JSON schema to validate the STAC object.
3940
verbose (bool): Whether to enable verbose output in recursive mode.
@@ -56,6 +57,7 @@ def __init__(
5657
links: bool = False,
5758
assets: bool = False,
5859
assets_open_urls: bool = True,
60+
headers: dict = {},
5961
extensions: bool = False,
6062
custom: str = "",
6163
verbose: bool = False,
@@ -70,6 +72,7 @@ def __init__(
7072
self.links = links
7173
self.assets = assets
7274
self.assets_open_urls = assets_open_urls
75+
self.headers: Dict = headers
7376
self.recursive = recursive
7477
self.max_depth = max_depth
7578
self.extensions = extensions
@@ -125,7 +128,9 @@ def assets_validator(self) -> Dict:
125128
assets = self.stac_content.get("assets")
126129
if assets:
127130
for asset in assets.values():
128-
link_request(asset, initial_message, self.assets_open_urls)
131+
link_request(
132+
asset, initial_message, self.assets_open_urls, self.headers
133+
)
129134
return initial_message
130135

131136
def links_validator(self) -> Dict:
@@ -145,7 +150,7 @@ def links_validator(self) -> Dict:
145150
for link in self.stac_content["links"]:
146151
if not is_valid_url(link["href"]):
147152
link["href"] = root_url + link["href"][1:]
148-
link_request(link, initial_message)
153+
link_request(link, initial_message, True, self.headers)
149154

150155
return initial_message
151156

@@ -345,7 +350,9 @@ def recursive_validator(self, stac_type: str) -> bool:
345350
self.stac_file = st + "/" + address
346351
else:
347352
self.stac_file = address
348-
self.stac_content = fetch_and_parse_file(str(self.stac_file))
353+
self.stac_content = fetch_and_parse_file(
354+
str(self.stac_file), self.headers
355+
)
349356
self.stac_content["stac_version"] = self.version
350357
stac_type = get_stac_type(self.stac_content).lower()
351358

@@ -414,7 +421,7 @@ def validate_collections(self) -> None:
414421
Returns:
415422
None
416423
"""
417-
collections = fetch_and_parse_file(str(self.stac_file))
424+
collections = fetch_and_parse_file(str(self.stac_file), self.headers)
418425
for collection in collections["collections"]:
419426
self.schema = ""
420427
self.validate_dict(collection)
@@ -437,7 +444,7 @@ def validate_item_collection(self) -> None:
437444
"""
438445
page = 1
439446
print(f"processing page {page}")
440-
item_collection = fetch_and_parse_file(str(self.stac_file))
447+
item_collection = fetch_and_parse_file(str(self.stac_file), self.headers)
441448
self.validate_item_collection_dict(item_collection)
442449
try:
443450
if self.pages is not None:
@@ -450,7 +457,7 @@ def validate_item_collection(self) -> None:
450457
next_link = link["href"]
451458
self.stac_file = next_link
452459
item_collection = fetch_and_parse_file(
453-
str(self.stac_file)
460+
str(self.stac_file), self.headers
454461
)
455462
self.validate_item_collection_dict(item_collection)
456463
break
@@ -489,7 +496,7 @@ def run(self) -> bool:
489496
and not self.item_collection
490497
and not self.collections
491498
):
492-
self.stac_content = fetch_and_parse_file(self.stac_file)
499+
self.stac_content = fetch_and_parse_file(self.stac_file, self.headers)
493500

494501
stac_type = get_stac_type(self.stac_content).upper()
495502
self.version = self.stac_content["stac_version"]

0 commit comments

Comments
 (0)