Skip to content
This repository was archived by the owner on Jun 2, 2025. It is now read-only.

new gfs loader #338

Merged
merged 29 commits into from
Jun 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
35a032b
new gfs loader
AUdaltsova Jun 27, 2024
e3a002b
add warning to filter_channels.py
AUdaltsova Jun 27, 2024
e20c917
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jun 27, 2024
5735f9f
pass provider label to filter_channels
AUdaltsova Jun 27, 2024
ce4d82a
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jun 27, 2024
4d9a132
Update warning in filter_channels.py
AUdaltsova Jun 27, 2024
da13cfd
add gfs test data
AUdaltsova Jun 27, 2024
fcd6fe7
Added gfs load test to test_load_nwp.py
AUdaltsova Jun 27, 2024
5cfcf9c
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jun 27, 2024
a61b8d4
Update gfs.py
AUdaltsova Jun 27, 2024
8beef57
Update filter_channels.py
AUdaltsova Jun 27, 2024
6d93665
Update test_load_nwp.py
AUdaltsova Jun 27, 2024
c19f4af
linting filter_channels.py
AUdaltsova Jun 27, 2024
6f3393b
linting gfs.py
AUdaltsova Jun 27, 2024
5795881
Delete tests/data/gfs.zarr.zip
AUdaltsova Jun 27, 2024
50ac208
add gfs test data
AUdaltsova Jun 27, 2024
ab097f0
Create .zmetadata
AUdaltsova Jun 28, 2024
7432995
Create .zgroup
AUdaltsova Jun 28, 2024
26d18c4
Create .zattrs
AUdaltsova Jun 28, 2024
32bfe1e
Update gfs test data path test_load_nwp.py
AUdaltsova Jun 28, 2024
d6f7a9f
Update provider label for gfs, cleaner type checks in test_load_nwp.py
AUdaltsova Jun 28, 2024
eed00a5
Update comment in filter_channels.py
AUdaltsova Jun 28, 2024
a8e8cab
remove unnecessary bool in test_load_nwp.py
AUdaltsova Jun 28, 2024
4f28218
remove extrapolation option gfs.py
AUdaltsova Jun 28, 2024
5fb9eee
change warning text filter_channels.py
AUdaltsova Jun 28, 2024
6066ec8
Update gfs consts in consts.py
AUdaltsova Jun 28, 2024
93df966
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jun 28, 2024
ff7a302
typo consts.py
AUdaltsova Jun 28, 2024
2264e61
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jun 28, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 11 additions & 28 deletions ocf_datapipes/load/nwp/providers/gfs.py
Original file line number Diff line number Diff line change
@@ -1,50 +1,33 @@
"""Open GFS Forecast data"""

import logging
from pathlib import Path
from typing import Union

import pandas as pd
import xarray as xr

from ocf_datapipes.load.nwp.providers.utils import open_zarr_paths

_log = logging.getLogger(__name__)


def open_gfs(zarr_path: Union[Path, str]) -> xr.Dataset:
def open_gfs(zarr_path) -> xr.DataArray:
"""
Opens GFS dataset
Opens the GFS data

Args:
zarr_path: Path to Zarr(s) to open
zarr_path: Path to the zarr to open

Returns:
Xarray dataset of GFS Forecasts
Xarray DataArray of the NWP data
"""

_log.info("Loading NWP GFS data")

if "*" in zarr_path:
nwp = xr.open_mfdataset(zarr_path, engine="zarr", combine="time", chunks="auto")
else:
nwp = xr.load_dataset(zarr_path, engine="zarr", mode="r", chunks="auto")

variables = list(nwp.keys())
# Open data
gfs: xr.Dataset = open_zarr_paths(zarr_path, time_dim="init_time_utc")
nwp: xr.DataArray = gfs.to_array()

nwp = xr.concat([nwp[v] for v in variables], "channel")
nwp = nwp.assign_coords(channel=variables)
del gfs

nwp = nwp.transpose("time", "step", "channel", "latitude", "longitude")
nwp = nwp.rename({"time": "init_time_utc"})
nwp = nwp.rename({"variable": "channel"})
nwp = nwp.transpose("init_time_utc", "step", "channel", "latitude", "longitude")
if "valid_time" in nwp.coords.keys():
nwp = nwp.drop("valid_time")

_log.debug("Interpolating hour 0 to NWP data")
nwp_step0 = nwp.interp(step=[pd.Timedelta(hours=0)])
nwp = xr.concat([nwp_step0, nwp], dim="step")
nwp = nwp.resample(init_time_utc="60min").pad()
nwp = nwp.resample(step="60min").pad()

_log.debug(nwp)

return nwp
14 changes: 14 additions & 0 deletions ocf_datapipes/select/filter_channels.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import logging
from typing import List, Union

import numpy as np
import xarray as xr
from torch.utils.data import IterDataPipe, functional_datapipe

Expand All @@ -18,6 +19,7 @@ def __init__(
source_datapipe: IterDataPipe,
channels: List[str],
dim_name: str = "channel",
provider: str = None,
):
"""
Filter channels
Expand All @@ -26,10 +28,22 @@ def __init__(
source_datapipe: Datapipe of Xarray objects
channels: Channel names to keep
dim_name: Dimension name for channels
provider: Name of NWP source, if available
"""
self.source_datapipe = source_datapipe
self.channels = channels
self.dim_name = dim_name
self.provider = provider

if self.provider == "gfs":
flux_vars = np.intersect1d(self.channels, ["dswrf", "dlwrf"])

if len(flux_vars) > 0:
logger.warning(
f"You have requested channels that have no step 0: {flux_vars}. "
f"Step 0 will be set to NaN. "
f"For more info see https://github.com/openclimatefix/ocf_datapipes/issues/253"
)

def __iter__(self) -> Union[xr.DataArray, xr.Dataset]:
for xr_data in self.source_datapipe:
Expand Down
5 changes: 4 additions & 1 deletion ocf_datapipes/training/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,10 @@ def open_and_return_datapipes(
nwp_conf.nwp_zarr_path,
provider=nwp_conf.nwp_provider,
)
.filter_channels(nwp_conf.nwp_channels)
.filter_channels(
nwp_conf.nwp_channels,
provider=nwp_conf.nwp_provider,
)
.add_t0_idx_and_sample_period_duration(
sample_period_duration=minutes(nwp_conf.time_resolution_minutes),
history_duration=minutes(nwp_conf.history_minutes),
Expand Down
47 changes: 35 additions & 12 deletions ocf_datapipes/utils/consts.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,22 +134,45 @@ def __getitem__(self, key):

# ------ GFS
GFS_STD = {
"t": 5.017000766747606,
"dswrf": 233.1834250473355,
"prate": 0.00021690701537950742,
"dlwrf": 46.571,
"u": 4.165,
"v": 4.123,
"dlwrf": 96.305916,
"dswrf": 246.18533,
"hcc": 42.525383,
"lcc": 44.3732,
"mcc": 43.150745,
"prate": 0.00010159573,
"r": 25.440672,
"sde": 0.43345627,
"t": 22.825893,
"tcc": 41.030598,
"u10": 5.470838,
"u100": 6.8899174,
"v10": 4.7401133,
"v100": 6.076132,
"vis": 8294.022,
"u": 10.614556,
"v": 7.176398,
}
GFS_MEAN = {
"t": 285.7799539185846,
"dswrf": 294.6696933986283,
"prate": 3.6078121378638696e-05,
"dlwrf": 319,
"u": 0.552,
"v": -0.477,
"dlwrf": 298.342,
"dswrf": 168.12321,
"hcc": 35.272,
"lcc": 43.578342,
"mcc": 33.738823,
"prate": 2.8190969e-05,
"r": 18.359747,
"sde": 0.36937004,
"t": 278.5223,
"tcc": 66.841606,
"u10": -0.0022310058,
"u100": 0.0823025,
"v10": 0.06219831,
"v100": 0.0797807,
"vis": 19628.32,
"u": 11.645444,
"v": 0.12330122,
}


GFS_VARIABLE_NAMES = tuple(GFS_MEAN.keys())
GFS_STD = _to_data_array(GFS_STD)
GFS_MEAN = _to_data_array(GFS_MEAN)
Expand Down
14 changes: 14 additions & 0 deletions tests/data/gfs.zarr/.zattrs
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
{
"Conventions": "CF-1.7",
"GRIB_centre": "kwbc",
"GRIB_centreDescription": "US National Weather Service - NCEP",
"GRIB_edition": 2,
"GRIB_subCentre": 0,
"description": "Global Forecast System",
"institution": "US National Weather Service - NCEP",
"local_grib": "/root/data/gfs/20230624/subset_56ef2a91__gfs.t00z.pgrb2.1p00.f000",
"model": "gfs",
"product": "pgrb2.1p00",
"remote_grib": "https://noaa-gfs-bdp-pds.s3.amazonaws.com/gfs.20230624/00/atmos/gfs.t00z.pgrb2.1p00.f000",
"search": ":[TLMH]CDC:|:[UV]GRD:(?:10|100) m|:RH:sigma|:(?:TMP|VIS|PRATE|SNOD|D[SL]WRF):surface|:RH:100 mb"
}
3 changes: 3 additions & 0 deletions tests/data/gfs.zarr/.zgroup
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"zarr_format": 2
}
Loading
Loading