Skip to content

Clean up transactions logic #39

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Mar 1, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
"""Serializers."""
import abc
from datetime import datetime
from typing import TypedDict

import attr
from stac_pydantic.shared import DATETIME_RFC339

from stac_fastapi.types import stac as stac_types
from stac_fastapi.types.links import CollectionLinks, ItemLinks, resolve_links
Expand All @@ -22,6 +24,30 @@ def db_to_stac(cls, item: dict, base_url: str) -> TypedDict:
class ItemSerializer(Serializer):
"""Serialization methods for STAC items."""

@classmethod
def stac_to_db(cls, stac_data: TypedDict, base_url: str) -> stac_types.Item:
"""Transform STAC Item to database-ready STAC Item."""
item_links = ItemLinks(
collection_id=stac_data["collection"],
item_id=stac_data["id"],
base_url=base_url,
).create_links()
stac_data["links"] = item_links

# elasticsearch doesn't like the fact that some values are float and some were int
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm going to fix this in #31 -- I'm about to merge the stac-server issue about the same thing, so it should be straightforward to port it.

if "eo:bands" in stac_data["properties"]:
for wave in stac_data["properties"]["eo:bands"]:
for k, v in wave.items():
if type(v) != str:
v = float(v)
wave.update({k: v})

now = datetime.utcnow().strftime(DATETIME_RFC339)
if "created" not in stac_data["properties"]:
stac_data["properties"]["created"] = str(now)
stac_data["properties"]["updated"] = str(now)
return stac_data

@classmethod
def db_to_stac(cls, item: dict, base_url: str) -> stac_types.Item:
"""Transform database model to stac item."""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from stac_fastapi.types import stac as stac_types
from stac_fastapi.types.core import BaseTransactionsClient
from stac_fastapi.types.errors import ConflictError, ForeignKeyError, NotFoundError
from stac_fastapi.types.links import CollectionLinks, ItemLinks
from stac_fastapi.types.links import CollectionLinks

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -69,19 +69,6 @@ def create_item(self, model: stac_types.Item, **kwargs):
return return_msg

# If a single item is posted
item_links = ItemLinks(
collection_id=model["collection"], item_id=model["id"], base_url=base_url
).create_links()
model["links"] = item_links

# elasticsearch doesn't like the fact that some values are float and some were int
if "eo:bands" in model["properties"]:
for wave in model["properties"]["eo:bands"]:
for k, v in wave.items():
if type(v) != str:
v = float(v)
wave.update({k: v})

if not self.client.exists(index="stac_collections", id=model["collection"]):
raise ForeignKeyError(f"Collection {model['collection']} does not exist")

Expand All @@ -90,12 +77,10 @@ def create_item(self, model: stac_types.Item, **kwargs):
f"Item {model['id']} in collection {model['collection']} already exists"
)

now = datetime.utcnow().strftime(DATETIME_RFC339)
if "created" not in model["properties"]:
model["properties"]["created"] = str(now)
data = ItemSerializer.stac_to_db(model, base_url)

self.client.index(
index="stac_items", doc_type="_doc", id=model["id"], document=model
index="stac_items", doc_type="_doc", id=model["id"], document=data
)
return ItemSerializer.db_to_stac(model, base_url)

Expand Down Expand Up @@ -174,33 +159,8 @@ def __attrs_post_init__(self):
settings = ElasticsearchSettings()
self.client = settings.create_client

def _create_item_index(self):
mapping = {
"mappings": {
"properties": {
"geometry": {"type": "geo_shape"},
"id": {"type": "text", "fields": {"keyword": {"type": "keyword"}}},
"properties__datetime": {
"type": "text",
"fields": {"keyword": {"type": "keyword"}},
},
}
}
}

_ = self.client.indices.create(
index="stac_items",
body=mapping,
ignore=400, # ignore 400 already exists code
)

def _preprocess_item(self, model: stac_types.Item, base_url) -> stac_types.Item:
"""Preprocess items to match data model."""
item_links = ItemLinks(
collection_id=model["collection"], item_id=model["id"], base_url=base_url
).create_links()
model["links"] = item_links

if not self.client.exists(index="stac_collections", id=model["collection"]):
raise ForeignKeyError(f"Collection {model['collection']} does not exist")

Expand All @@ -209,18 +169,8 @@ def _preprocess_item(self, model: stac_types.Item, base_url) -> stac_types.Item:
f"Item {model['id']} in collection {model['collection']} already exists"
)

now = datetime.utcnow().strftime(DATETIME_RFC339)
if "created" not in model["properties"]:
model["properties"]["created"] = str(now)

# elasticsearch doesn't like the fact that some values are float and some were int
if "eo:bands" in model["properties"]:
for wave in model["properties"]["eo:bands"]:
for k, v in wave.items():
if type(v) != str:
v = float(v)
wave.update({k: v})
return model
item = ItemSerializer.stac_to_db(model, base_url)
return item

def bulk_sync(self, processed_items):
"""Elasticsearch bulk insertion."""
Expand All @@ -231,7 +181,8 @@ def bulk_sync(self, processed_items):

def bulk_item_insert(self, items: Items, **kwargs) -> str:
"""Bulk item insertion using es."""
self._create_item_index()
transactions_client = TransactionsClient()
transactions_client._create_item_index()
try:
base_url = str(kwargs["request"].base_url)
except Exception:
Expand Down