Skip to content

Commit c599955

Browse files
committed
add data directory option, move data loader tool
1 parent 8b4720a commit c599955

File tree

4 files changed

+35
-42
lines changed

4 files changed

+35
-42
lines changed

README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,11 +104,12 @@ Options:
104104
--base-url TEXT Base URL of the STAC API [required]
105105
--collection-id TEXT ID of the collection to which items are added
106106
--use-bulk Use bulk insert method for items
107+
--data-dir PATH Directory containing collection.json and feature
108+
collection file
107109
--help Show this message and exit.
108110
```
109111

110112
```shell
111-
cd data_loader
112113
python3 data_loader.py --base-url http://localhost:8080
113114
```
114115

Lines changed: 33 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,20 @@
1-
"""Data Loader CLI tool."""
21
import json
32
import os
4-
53
import click
64
import requests
75

8-
# Define the directory where your data files are located
9-
DATA_DIR = os.path.join(os.path.dirname(__file__), "setup_data/")
10-
11-
12-
def load_data(filename):
13-
"""Load json data from a file."""
14-
with open(os.path.join(DATA_DIR, filename)) as file:
6+
def load_data(data_dir, filename):
7+
"""Load json data from a file within the specified data directory."""
8+
filepath = os.path.join(data_dir, filename)
9+
if not os.path.exists(filepath):
10+
click.secho(f"File not found: {filepath}", fg="red", err=True)
11+
raise click.Abort()
12+
with open(filepath) as file:
1513
return json.load(file)
1614

17-
18-
def load_collection(base_url, collection_id):
15+
def load_collection(base_url, collection_id, data_dir):
1916
"""Load a STAC collection into the database."""
20-
collection = load_data("collection.json")
17+
collection = load_data(data_dir, "collection.json")
2118
collection["id"] = collection_id
2219
try:
2320
resp = requests.post(f"{base_url}/collections", json=collection)
@@ -28,45 +25,45 @@ def load_collection(base_url, collection_id):
2825
click.echo(f"Status code: {resp.status_code}")
2926
click.echo(f"Collection: {collection['id']} already exists")
3027
except requests.ConnectionError:
31-
click.secho("Failed to connect", fg="red")
32-
28+
click.secho("Failed to connect", fg="red", err=True)
3329

34-
def load_items(base_url, collection_id, use_bulk):
30+
def load_items(base_url, collection_id, use_bulk, data_dir):
3531
"""Load STAC items into the database based on the method selected."""
36-
feature_collection = load_data("sentinel-s2-l2a-cogs_0_100.json")
37-
load_collection(base_url, collection_id)
32+
# Attempt to dynamically find a suitable feature collection file
33+
feature_files = [file for file in os.listdir(data_dir) if file.endswith('.json') and file != "collection.json"]
34+
if not feature_files:
35+
click.secho("No feature collection files found in the specified directory.", fg="red", err=True)
36+
raise click.Abort()
37+
feature_collection_file = feature_files[0] # Use the first found feature collection file
38+
feature_collection = load_data(data_dir, feature_collection_file)
39+
40+
load_collection(base_url, collection_id, data_dir)
3841
if use_bulk:
39-
load_items_bulk_insert(base_url, collection_id, feature_collection)
42+
load_items_bulk_insert(base_url, collection_id, feature_collection, data_dir)
4043
else:
41-
load_items_one_by_one(base_url, collection_id, feature_collection)
44+
load_items_one_by_one(base_url, collection_id, feature_collection, data_dir)
4245

43-
44-
def load_items_one_by_one(base_url, collection_id, feature_collection):
46+
def load_items_one_by_one(base_url, collection_id, feature_collection, data_dir):
4547
"""Load STAC items into the database one by one."""
4648
for feature in feature_collection["features"]:
4749
try:
4850
feature["collection"] = collection_id
49-
resp = requests.post(
50-
f"{base_url}/collections/{collection_id}/items", json=feature
51-
)
51+
resp = requests.post(f"{base_url}/collections/{collection_id}/items", json=feature)
5252
if resp.status_code == 200:
5353
click.echo(f"Status code: {resp.status_code}")
5454
click.echo(f"Added item: {feature['id']}")
5555
elif resp.status_code == 409:
5656
click.echo(f"Status code: {resp.status_code}")
5757
click.echo(f"Item: {feature['id']} already exists")
5858
except requests.ConnectionError:
59-
click.secho("Failed to connect", fg="red")
60-
59+
click.secho("Failed to connect", fg="red", err=True)
6160

62-
def load_items_bulk_insert(base_url, collection_id, feature_collection):
61+
def load_items_bulk_insert(base_url, collection_id, feature_collection, data_dir):
6362
"""Load STAC items into the database via bulk insert."""
6463
try:
6564
for i, _ in enumerate(feature_collection["features"]):
6665
feature_collection["features"][i]["collection"] = collection_id
67-
resp = requests.post(
68-
f"{base_url}/collections/{collection_id}/items", json=feature_collection
69-
) # Adjust this endpoint as necessary
66+
resp = requests.post(f"{base_url}/collections/{collection_id}/items", json=feature_collection) # Adjust this endpoint as necessary
7067
if resp.status_code == 200:
7168
click.echo(f"Status code: {resp.status_code}")
7269
click.echo("Bulk inserted items successfully.")
@@ -77,21 +74,16 @@ def load_items_bulk_insert(base_url, collection_id, feature_collection):
7774
click.echo(f"Status code: {resp.status_code}")
7875
click.echo("Conflict detected, some items might already exist.")
7976
except requests.ConnectionError:
80-
click.secho("Failed to connect", fg="red")
81-
77+
click.secho("Failed to connect", fg="red", err=True)
8278

8379
@click.command()
8480
@click.option("--base-url", required=True, help="Base URL of the STAC API")
85-
@click.option(
86-
"--collection-id",
87-
default="test-collection",
88-
help="ID of the collection to which items are added",
89-
)
81+
@click.option("--collection-id", default="test-collection", help="ID of the collection to which items are added")
9082
@click.option("--use-bulk", is_flag=True, help="Use bulk insert method for items")
91-
def main(base_url, collection_id, use_bulk):
83+
@click.option("--data-dir", type=click.Path(exists=True), default="sample_data/", help="Directory containing collection.json and feature collection file")
84+
def main(base_url, collection_id, use_bulk, data_dir):
9285
"""Load STAC items into the database."""
93-
load_items(base_url, collection_id, use_bulk)
94-
86+
load_items(base_url, collection_id, use_bulk, data_dir)
9587

9688
if __name__ == "__main__":
97-
main()
89+
main()
File renamed without changes.

0 commit comments

Comments
 (0)