Skip to content
This repository was archived by the owner on Jun 2, 2025. It is now read-only.

Commit ced7e1d

Browse files
authored
Merge pull request #312 from openclimatefix/NWP-patch
Fix hardcode NWP time resolution and merra2 loader bug
2 parents c4776ba + 98cf693 commit ced7e1d

File tree

6 files changed

+57
-6
lines changed

6 files changed

+57
-6
lines changed

ocf_datapipes/load/README.md

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,49 @@ This module contains the code for loading the data off disk.
44
This data is always opened into Xarray `DataSet` or `DataArray` objects for further processing.
55
The opened data should only have dimensions renamed to the common format, and minimal processing done.
66
Any real processing should be performed in the `transform` module.
7+
8+
## NWP
9+
10+
The NWP data is loaded into an ```IterDataPipe``` in `ocf_datapipes/load/nwp/nwp.py` using a provider from `ocf_datapipes/load/nwp/providers`. Providers open the data file and transform the data into a `DataAray` of a standardised shape the datapipes use; normally this means having the following 5 dimensions: ```init_time_utc, step, channel, latitude, longitude```.
11+
12+
Example of loaded ECMWF data:
13+
14+
```
15+
<xarray.DataArray 'ECMWF_BLAH' (init_time_utc: 1, step: 2, channel: 2,
16+
latitude: 221, longitude: 221)> Size: 781kB
17+
dask.array<transpose, shape=(1, 2, 2, 221, 221), dtype=float32, chunksize=(1, 2, 2, 221, 221), chunktype=numpy.ndarray>
18+
Coordinates:
19+
* init_time_utc (init_time_utc) datetime64[ns] 8B 2023-09-25T12:00:00
20+
* latitude (latitude) float64 2kB 31.0 30.95 30.9 ... 20.1 20.05 20.0
21+
* longitude (longitude) float64 2kB 68.0 68.05 68.1 ... 78.9 78.95 79.0
22+
* step (step) timedelta64[ns] 16B 00:00:00 01:00:00
23+
* channel (channel) <U5 40B 'dlwrf' 'dswrf'
24+
Attributes:
25+
Conventions: CF-1.7
26+
GRIB_centre: ecmf
27+
GRIB_centreDescription: European Centre for Medium-Range Weather Forecasts
28+
GRIB_subCentre: 0
29+
institution: European Centre for Medium-Range Weather Forecasts
30+
```
31+
32+
There are exceptions, e.g. ICON Global uses an isohedral grid, so it is differently organised and does not have ```latitude``` and ```longitude``` dimensions.
33+
34+
### Adding an NWP provider
35+
36+
1. Add a [provider].py file to `ocf_datapipes/load/nwp/providers` that uses `open_zarr_paths` from `ocf_datapipes.load.nwp.providers.utils` to load the file(s) and returns your data in the right shape, where the dimensions contain:
37+
- `init_time_utc`: when the data was initialised
38+
- `step`: distance from datapoint to its init_time
39+
- `channel`: list of variables
40+
- `latitude`: latitude
41+
- `longitude`: longitude
42+
43+
Add sanity checks to ensure time is unique and monotonic
44+
45+
2. Add your provider as an option to the `IterDataPipe` in `ocf_datapipes/load/nwp/nwp.py`
46+
47+
3. Add test data to `ocf_datapipes/tests/data` and create a test in `ocf_datapipes/tests/load/nwp/test_load_nwp.py`
48+
Current tests include:
49+
- checking the loaded data is not None
50+
- checking all expected dimensions are present
51+
- (for some data) checking returns a xr.DataArray
52+
4. Calculate the mean and std of your data and add to `ocf_datapipes/utils/consts.py`

ocf_datapipes/load/nwp/providers/ecmwf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ def open_ifs(zarr_path) -> xr.DataArray:
2222
raise Exception("Too many TLDVs")
2323
else:
2424
dataVar = dataVars[0]
25-
ifs: xr.Dataset = nwp[dataVar]
25+
ifs: xr.DataArray = nwp[dataVar]
2626
del nwp
2727
ifs = ifs.transpose("init_time", "step", "variable", "latitude", "longitude")
2828
ifs = ifs.rename(

ocf_datapipes/load/nwp/providers/excarta.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ def preprocess_excarta(ds: xr.Dataset) -> xr.Dataset:
2626
return ds
2727

2828

29-
def open_excarta(zarr_path) -> xr.Dataset:
29+
def open_excarta(zarr_path) -> xr.DataArray:
3030
"""
3131
Opens the Excarta hindcast data
3232

ocf_datapipes/load/nwp/providers/merra2.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,11 @@ def open_merra2(zarr_path) -> xr.DataArray:
2626
{"channel": list(nwp.keys())}
2727
)
2828
nwp = nwp.transpose("init_time_utc", "step", "channel", "latitude", "longitude")
29+
aodana: xr.DataArray = nwp["AODANA"]
30+
del nwp
2931

3032
# Sanity checks.
31-
time = pd.DatetimeIndex(nwp.step + nwp.init_time_utc.values)
33+
time = pd.DatetimeIndex(aodana.step + aodana.init_time_utc.values)
3234
assert time.is_unique
3335
assert time.is_monotonic_increasing
34-
return nwp
36+
return aodana

ocf_datapipes/training/common.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ def open_and_return_datapipes(
160160
)
161161
.filter_channels(nwp_conf.nwp_channels)
162162
.add_t0_idx_and_sample_period_duration(
163-
sample_period_duration=timedelta(hours=1),
163+
sample_period_duration=minutes(nwp_conf.time_resolution_minutes),
164164
history_duration=minutes(nwp_conf.history_minutes),
165165
)
166166
)
@@ -731,7 +731,7 @@ def slice_datapipes_by_time(
731731

732732
datapipes_dict["nwp"][nwp_key] = dp.select_time_slice_nwp(
733733
t0_datapipe=get_t0_datapipe(f"nwp/{nwp_key}"),
734-
sample_period_duration=minutes(60),
734+
sample_period_duration=minutes(conf_in.nwp[nwp_key].time_resolution_minutes),
735735
history_duration=minutes(conf_in.nwp[nwp_key].history_minutes),
736736
forecast_duration=minutes(conf_in.nwp[nwp_key].forecast_minutes),
737737
dropout_timedeltas=dropout_timedeltas,

tests/load/nwp/test_load_nwp.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import pandas as pd
2+
from xarray import DataArray
23

34
from ocf_datapipes.load import OpenNWP
45

@@ -49,6 +50,7 @@ def test_load_ecmwf():
4950
)
5051
metadata = next(iter(nwp_datapipe))
5152
assert metadata is not None
53+
assert type(next(enumerate(metadata))[1]) == DataArray
5254
dim_keys = set(["channel", "init_time_utc", "latitude", "longitude", "step"])
5355
if bool(dim_keys - set(metadata.dims)):
5456
raise ValueError(
@@ -63,6 +65,7 @@ def test_load_merra2():
6365
)
6466
metadata = next(iter(nwp_datapipe))
6567
assert metadata is not None
68+
assert type(next(enumerate(metadata))[1]) == DataArray
6669
dim_keys = set(["channel", "init_time_utc", "latitude", "longitude", "step"])
6770
if bool(dim_keys - set(metadata.dims)):
6871
raise ValueError(

0 commit comments

Comments
 (0)