Skip to content
This repository was archived by the owner on Jun 2, 2025. It is now read-only.

Commit 9effc61

Browse files
authored
Merge pull request #338 from openclimatefix/gfs_loader
new gfs loader
2 parents 0010180 + 2264e61 commit 9effc61

File tree

36 files changed

+445
-49
lines changed

36 files changed

+445
-49
lines changed
Lines changed: 11 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,50 +1,33 @@
11
"""Open GFS Forecast data"""
22

33
import logging
4-
from pathlib import Path
5-
from typing import Union
64

7-
import pandas as pd
85
import xarray as xr
96

7+
from ocf_datapipes.load.nwp.providers.utils import open_zarr_paths
8+
109
_log = logging.getLogger(__name__)
1110

1211

13-
def open_gfs(zarr_path: Union[Path, str]) -> xr.Dataset:
12+
def open_gfs(zarr_path) -> xr.DataArray:
1413
"""
15-
Opens GFS dataset
14+
Opens the GFS data
1615
1716
Args:
18-
zarr_path: Path to Zarr(s) to open
17+
zarr_path: Path to the zarr to open
1918
2019
Returns:
21-
Xarray dataset of GFS Forecasts
20+
Xarray DataArray of the NWP data
2221
"""
23-
2422
_log.info("Loading NWP GFS data")
2523

26-
if "*" in zarr_path:
27-
nwp = xr.open_mfdataset(zarr_path, engine="zarr", combine="time", chunks="auto")
28-
else:
29-
nwp = xr.load_dataset(zarr_path, engine="zarr", mode="r", chunks="auto")
30-
31-
variables = list(nwp.keys())
24+
# Open data
25+
gfs: xr.Dataset = open_zarr_paths(zarr_path, time_dim="init_time_utc")
26+
nwp: xr.DataArray = gfs.to_array()
3227

33-
nwp = xr.concat([nwp[v] for v in variables], "channel")
34-
nwp = nwp.assign_coords(channel=variables)
28+
del gfs
3529

36-
nwp = nwp.transpose("time", "step", "channel", "latitude", "longitude")
37-
nwp = nwp.rename({"time": "init_time_utc"})
30+
nwp = nwp.rename({"variable": "channel"})
3831
nwp = nwp.transpose("init_time_utc", "step", "channel", "latitude", "longitude")
39-
if "valid_time" in nwp.coords.keys():
40-
nwp = nwp.drop("valid_time")
41-
42-
_log.debug("Interpolating hour 0 to NWP data")
43-
nwp_step0 = nwp.interp(step=[pd.Timedelta(hours=0)])
44-
nwp = xr.concat([nwp_step0, nwp], dim="step")
45-
nwp = nwp.resample(init_time_utc="60min").pad()
46-
nwp = nwp.resample(step="60min").pad()
47-
48-
_log.debug(nwp)
4932

5033
return nwp

ocf_datapipes/select/filter_channels.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import logging
44
from typing import List, Union
55

6+
import numpy as np
67
import xarray as xr
78
from torch.utils.data import IterDataPipe, functional_datapipe
89

@@ -18,6 +19,7 @@ def __init__(
1819
source_datapipe: IterDataPipe,
1920
channels: List[str],
2021
dim_name: str = "channel",
22+
provider: str = None,
2123
):
2224
"""
2325
Filter channels
@@ -26,10 +28,22 @@ def __init__(
2628
source_datapipe: Datapipe of Xarray objects
2729
channels: Channel names to keep
2830
dim_name: Dimension name for channels
31+
provider: Name of NWP source, if available
2932
"""
3033
self.source_datapipe = source_datapipe
3134
self.channels = channels
3235
self.dim_name = dim_name
36+
self.provider = provider
37+
38+
if self.provider == "gfs":
39+
flux_vars = np.intersect1d(self.channels, ["dswrf", "dlwrf"])
40+
41+
if len(flux_vars) > 0:
42+
logger.warning(
43+
f"You have requested channels that have no step 0: {flux_vars}. "
44+
f"Step 0 will be set to NaN. "
45+
f"For more info see https://github.com/openclimatefix/ocf_datapipes/issues/253"
46+
)
3347

3448
def __iter__(self) -> Union[xr.DataArray, xr.Dataset]:
3549
for xr_data in self.source_datapipe:

ocf_datapipes/training/common.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,10 @@ def open_and_return_datapipes(
159159
nwp_conf.nwp_zarr_path,
160160
provider=nwp_conf.nwp_provider,
161161
)
162-
.filter_channels(nwp_conf.nwp_channels)
162+
.filter_channels(
163+
nwp_conf.nwp_channels,
164+
provider=nwp_conf.nwp_provider,
165+
)
163166
.add_t0_idx_and_sample_period_duration(
164167
sample_period_duration=minutes(nwp_conf.time_resolution_minutes),
165168
history_duration=minutes(nwp_conf.history_minutes),

ocf_datapipes/utils/consts.py

Lines changed: 35 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -134,22 +134,45 @@ def __getitem__(self, key):
134134

135135
# ------ GFS
136136
GFS_STD = {
137-
"t": 5.017000766747606,
138-
"dswrf": 233.1834250473355,
139-
"prate": 0.00021690701537950742,
140-
"dlwrf": 46.571,
141-
"u": 4.165,
142-
"v": 4.123,
137+
"dlwrf": 96.305916,
138+
"dswrf": 246.18533,
139+
"hcc": 42.525383,
140+
"lcc": 44.3732,
141+
"mcc": 43.150745,
142+
"prate": 0.00010159573,
143+
"r": 25.440672,
144+
"sde": 0.43345627,
145+
"t": 22.825893,
146+
"tcc": 41.030598,
147+
"u10": 5.470838,
148+
"u100": 6.8899174,
149+
"v10": 4.7401133,
150+
"v100": 6.076132,
151+
"vis": 8294.022,
152+
"u": 10.614556,
153+
"v": 7.176398,
143154
}
144155
GFS_MEAN = {
145-
"t": 285.7799539185846,
146-
"dswrf": 294.6696933986283,
147-
"prate": 3.6078121378638696e-05,
148-
"dlwrf": 319,
149-
"u": 0.552,
150-
"v": -0.477,
156+
"dlwrf": 298.342,
157+
"dswrf": 168.12321,
158+
"hcc": 35.272,
159+
"lcc": 43.578342,
160+
"mcc": 33.738823,
161+
"prate": 2.8190969e-05,
162+
"r": 18.359747,
163+
"sde": 0.36937004,
164+
"t": 278.5223,
165+
"tcc": 66.841606,
166+
"u10": -0.0022310058,
167+
"u100": 0.0823025,
168+
"v10": 0.06219831,
169+
"v100": 0.0797807,
170+
"vis": 19628.32,
171+
"u": 11.645444,
172+
"v": 0.12330122,
151173
}
152174

175+
153176
GFS_VARIABLE_NAMES = tuple(GFS_MEAN.keys())
154177
GFS_STD = _to_data_array(GFS_STD)
155178
GFS_MEAN = _to_data_array(GFS_MEAN)

tests/data/gfs.zarr/.zattrs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
{
2+
"Conventions": "CF-1.7",
3+
"GRIB_centre": "kwbc",
4+
"GRIB_centreDescription": "US National Weather Service - NCEP",
5+
"GRIB_edition": 2,
6+
"GRIB_subCentre": 0,
7+
"description": "Global Forecast System",
8+
"institution": "US National Weather Service - NCEP",
9+
"local_grib": "/root/data/gfs/20230624/subset_56ef2a91__gfs.t00z.pgrb2.1p00.f000",
10+
"model": "gfs",
11+
"product": "pgrb2.1p00",
12+
"remote_grib": "https://noaa-gfs-bdp-pds.s3.amazonaws.com/gfs.20230624/00/atmos/gfs.t00z.pgrb2.1p00.f000",
13+
"search": ":[TLMH]CDC:|:[UV]GRD:(?:10|100) m|:RH:sigma|:(?:TMP|VIS|PRATE|SNOD|D[SL]WRF):surface|:RH:100 mb"
14+
}

tests/data/gfs.zarr/.zgroup

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
{
2+
"zarr_format": 2
3+
}

0 commit comments

Comments
 (0)