Skip to content

Ingest sample data #32

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 14 commits into from
Feb 25, 2022
4 changes: 4 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,7 @@ docs-image:
docs: docs-image
docker-compose -f docker-compose.docs.yml \
run docs

.PHONY: ingest
ingest:
python3 data_loader/data_loader.py
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,9 @@ docker-compose up
```
make test
```

## Ingest sample data

```
make ingest
```
56 changes: 56 additions & 0 deletions data_loader/data_loader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
"""Database ingestion script."""
import json
import os

import click
import requests

DATA_DIR = os.path.join(os.path.dirname(__file__), "setup_data/")
STAC_API_BASE_URL = "http://localhost:8083"


def load_data(filename):
"""Load json data."""
with open(os.path.join(DATA_DIR, filename)) as file:
return json.load(file)


def load_collection(collection_id):
"""Load stac collection into the database."""
collection = load_data("collection.json")
collection["id"] = collection_id
try:
resp = requests.post(f"{STAC_API_BASE_URL}/collections", json=collection)
if resp.status_code == 200:
print(f"Status code: {resp.status_code}")
print(f"Added collection: {collection['id']}")
elif resp.status_code == 409:
print(f"Status code: {resp.status_code}")
print(f"Collection: {collection['id']} already exists")
except requests.ConnectionError:
click.secho("failed to connect")


def load_items():
"""Load stac items into the database."""
feature_collection = load_data("sentinel-s2-l2a-cogs_0_100.json")
collection = "test-collection"
load_collection(collection)

for feature in feature_collection["features"]:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The Transactions Extension defines that either an Item or ItemCollection can be posted to /collections/{cid}/items -- if stac-fastapi doesn't support this, there should be a bug filed

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so, apparently stac-fastapi does not support this. It does, however, have a bulk_items endpoint. But, that doesn't work with the code in transactions.py because of a bug. I'll create a PR for that bug and put my code for bulk_items somewhere.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch here!

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just assumed that stac-fastapi was following the spec before

Copy link
Collaborator Author

@jonhealy1 jonhealy1 Feb 25, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we add this pr, then update the code when the bulk transactions endpoint is fixed? Or should we wait?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd wait -- this is fine for loading locally

try:
feature["collection"] = collection
resp = requests.post(
f"{STAC_API_BASE_URL}/collections/{collection}/items", json=feature
)
if resp.status_code == 200:
print(f"Status code: {resp.status_code}")
print(f"Added item: {feature['id']}")
elif resp.status_code == 409:
print(f"Status code: {resp.status_code}")
print(f"Item: {feature['id']} already exists")
except requests.ConnectionError:
click.secho("failed to connect")


load_items()
194 changes: 194 additions & 0 deletions data_loader/setup_data/collection.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
{
"id":"sentinel-s2-l2a-cogs",
"stac_version":"1.0.0",
"description":"Sentinel-2a and Sentinel-2b imagery, processed to Level 2A (Surface Reflectance) and converted to Cloud-Optimized GeoTIFFs",
"links":[
{"rel":"self","href":"https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l2a-cogs"},
{"rel":"license","href":"https://sentinel.esa.int/documents/247904/690755/Sentinel_Data_Legal_Notice"},
{"rel":"about","href":"https://github.com/stac-utils/stac-sentinel"},
{"rel":"parent","href":"https://earth-search.aws.element84.com/v0/"},
{"rel":"root","href":"https://earth-search.aws.element84.com/v0/"},
{"rel":"items","href":"https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l2a-cogs/items"}
],
"stac_extensions":["https://stac-extensions.github.io/item-assets/v1.0.0/schema.json"],
"title":"Sentinel 2 L2A COGs",
"keywords":["sentinel","earth observation","esa"],
"providers":[
{"name":"ESA","roles":["producer"],"url":"https://earth.esa.int/web/guest/home"},
{"name":"Sinergise","roles":["processor"],"url":"https://registry.opendata.aws/sentinel-2/"},
{"name":"AWS","roles":["host"],"url":"http://sentinel-pds.s3-website.eu-central-1.amazonaws.com/"},
{"name":"Element 84","roles":["processor"],"url":"https://element84.com"}
],
"summaries":{
"platform":["sentinel-2a","sentinel-2b"],
"constellation":["sentinel-2"],
"instruments":["msi"],
"gsd":[10],
"view:off_nadir":[0]
},
"item_assets":{
"thumbnail":{
"title":"Thumbnail",
"type":"image/png",
"roles":["thumbnail"]
},
"overview":{
"title":"True color image",
"type":"image/tiff; application=geotiff; profile=cloud-optimized",
"roles":["overview"],
"gsd":10,
"eo:bands":[
{"name":"B04","common_name":"red","center_wavelength":0.6645,"full_width_half_max":0.038},
{"name":"B03","common_name":"green","center_wavelength":0.56,"full_width_half_max":0.045},
{"name":"B02","common_name":"blue","center_wavelength":0.4966,"full_width_half_max":0.098}
]
},
"info":{
"title":"Original JSON metadata",
"type":"application/json",
"roles":["metadata"]
},
"metadata":{
"title":"Original XML metadata",
"type":"application/xml",
"roles":["metadata"]
},
"visual":{
"title":"True color image",
"type":"image/tiff; application=geotiff; profile=cloud-optimized",
"roles":["overview"],
"gsd":10,
"eo:bands":[
{"name":"B04","common_name":"red","center_wavelength":0.6645,"full_width_half_max":0.038},
{"name":"B03","common_name":"green","center_wavelength":0.56,"full_width_half_max":0.045},
{"name":"B02","common_name":"blue","center_wavelength":0.4966,"full_width_half_max":0.098}
]
},
"B01":{
"title":"Band 1 (coastal)",
"type":"image/tiff; application=geotiff; profile=cloud-optimized",
"roles":["data"],
"gsd":60,
"eo:bands":[
{"name":"B01","common_name":"coastal","center_wavelength":0.4439,"full_width_half_max":0.027}
]
},
"B02":{
"title":"Band 2 (blue)",
"type":"image/tiff; application=geotiff; profile=cloud-optimized",
"roles":["data"],
"gsd":10,
"eo:bands":[
{"name":"B02","common_name":"blue","center_wavelength":0.4966,"full_width_half_max":0.098}
]
},
"B03":{
"title":"Band 3 (green)",
"type":"image/tiff; application=geotiff; profile=cloud-optimized",
"roles":["data"],
"gsd":10,
"eo:bands":[
{"name":"B03","common_name":"green","center_wavelength":0.56,"full_width_half_max":0.045}
]
},
"B04":{
"title":"Band 4 (red)",
"type":"image/tiff; application=geotiff; profile=cloud-optimized",
"roles":["data"],
"gsd":10,
"eo:bands":[
{"name":"B04","common_name":"red","center_wavelength":0.6645,"full_width_half_max":0.038}
]},
"B05":{
"title":"Band 5",
"type":"image/tiff; application=geotiff; profile=cloud-optimized",
"roles":["data"],
"gsd":20,
"eo:bands":[
{"name":"B05","center_wavelength":0.7039,"full_width_half_max":0.019}
]
},
"B06":{
"title":"Band 6",
"type":"image/tiff; application=geotiff; profile=cloud-optimized",
"roles":["data"],
"gsd":20,
"eo:bands":[
{"name":"B06","center_wavelength":0.7402,"full_width_half_max":0.018}
]
},
"B07":{
"title":"Band 7",
"type":"image/tiff; application=geotiff; profile=cloud-optimized",
"roles":["data"],
"gsd":20,
"eo:bands":[
{"name":"B07","center_wavelength":0.7825,"full_width_half_max":0.028}
]
},
"B08":{
"title":"Band 8 (nir)",
"type":"image/tiff; application=geotiff; profile=cloud-optimized",
"roles":["data"],
"gsd":10,
"eo:bands":[
{"name":"B08","common_name":"nir","center_wavelength":0.8351,"full_width_half_max":0.145}
]
},
"B8A":{
"title":"Band 8A",
"type":"image/tiff; application=geotiff; profile=cloud-optimized",
"roles":["data"],
"gsd":20,
"eo:bands":[
{"name":"B8A","center_wavelength":0.8648,"full_width_half_max":0.033}
]
},
"B09":{
"title":"Band 9",
"type":"image/tiff; application=geotiff; profile=cloud-optimized",
"roles":["data"],
"gsd":60,
"eo:bands":[
{"name":"B09","center_wavelength":0.945,"full_width_half_max":0.026}
]
},
"B11":{
"title":"Band 11 (swir16)",
"type":"image/tiff; application=geotiff; profile=cloud-optimized",
"roles":["data"],
"gsd":20,
"eo:bands":[
{"name":"B11","common_name":"swir16","center_wavelength":1.6137,"full_width_half_max":0.143}
]
},
"B12":{
"title":"Band 12 (swir22)",
"type":"image/tiff; application=geotiff; profile=cloud-optimized",
"roles":["data"],
"gsd":20,
"eo:bands":[
{"name":"B12","common_name":"swir22","center_wavelength":2.22024,"full_width_half_max":0.242}
]
},
"AOT":{
"title":"Aerosol Optical Thickness (AOT)",
"type":"image/tiff; application=geotiff; profile=cloud-optimized",
"roles":["data"]},
"WVP":{
"title":"Water Vapour (WVP)",
"type":"image/tiff; application=geotiff; profile=cloud-optimized",
"roles":["data"]
},
"SCL":{
"title":"Scene Classification Map (SCL)",
"type":"image/tiff; application=geotiff; profile=cloud-optimized",
"roles":["data"]
}
},
"extent":{
"spatial":{"bbox":[[-180,-90,180,90]]},
"temporal":{"interval":[["2015-06-27T10:25:31.456000Z",null]]}
},
"license":"proprietary"
}
1 change: 1 addition & 0 deletions data_loader/setup_data/sentinel-s2-l2a-cogs_0_100.json

Large diffs are not rendered by default.