Skip to content
This repository was archived by the owner on Jun 2, 2025. It is now read-only.

Commit 8735ff2

Browse files
committed
Fix lint errors
1 parent d7e8abd commit 8735ff2

19 files changed

+128
-89
lines changed

ocf_datapipes/convert/gsp.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,9 @@ def __iter__(self) -> NumpyBatch:
2727
BatchKey.gsp: xr_data.values,
2828
BatchKey.gsp_t0_idx: xr_data.attrs["t0_idx"],
2929
BatchKey.gsp_id: xr_data.gsp_id.values,
30-
BatchKey.gsp_capacity_megawatt_power: xr_data.isel(time_utc=0)["capacity_megawatt_power"].values,
30+
BatchKey.gsp_capacity_megawatt_power: xr_data.isel(time_utc=0)[
31+
"capacity_megawatt_power"
32+
].values,
3133
BatchKey.gsp_time_utc: datetime64_to_float(xr_data["time_utc"].values),
3234
}
3335

ocf_datapipes/load/gsp.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,9 @@ def __iter__(self) -> xr.DataArray:
8585
# TODO: Try using `gsp_id_to_shape.geometry.envelope.centroid`. See issue #76.
8686
x_osgb=gsp_id_to_shape.geometry.centroid.x.astype(np.float32),
8787
y_osgb=gsp_id_to_shape.geometry.centroid.y.astype(np.float32),
88-
capacity_megawatt_power=gsp_pv_power_mw_ds.installedcapacity_mwp.data.astype(np.float32),
88+
capacity_megawatt_power=gsp_pv_power_mw_ds.installedcapacity_mwp.data.astype( # noqa
89+
np.float32
90+
),
8991
)
9092

9193
del gsp_id_to_shape, gsp_pv_power_mw_ds

ocf_datapipes/load/pv/pv.py

Lines changed: 10 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,11 @@ def __iter__(self):
5151
def load_everything_into_ram(pv_power_filename, pv_metadata_filename) -> xr.DataArray:
5252
"""Open AND load PV data into RAM."""
5353
# Load pd.DataFrame of power and pd.Series of capacities:
54-
pv_power_watts, pv_capacity_watt_power, pv_system_row_number = _load_pv_power_watts_and_capacity_watt_power(
54+
(
55+
pv_power_watts,
56+
pv_capacity_watt_power,
57+
pv_system_row_number,
58+
) = _load_pv_power_watts_and_capacity_watt_power(
5559
pv_power_filename,
5660
)
5761
pv_metadata = _load_pv_metadata(pv_metadata_filename)
@@ -108,7 +112,8 @@ def _load_pv_power_watts_and_capacity_watt_power(
108112
pv_capacity_watt_power.index = [np.int32(col) for col in pv_capacity_watt_power.index]
109113
pv_power_watts.columns = pv_power_watts.columns.astype(np.int64)
110114

111-
# Create pv_system_row_number. We use the index of `pv_capacity_watt_power` because that includes
115+
# Create pv_system_row_number. We use the index of
116+
# `pv_capacity_watt_power` because that includes
112117
# the PV system IDs for the entire dataset (independent of `start_date` and `end_date`).
113118
# We use `float32` for the ID because we use NaN to indicate a missing PV system,
114119
# or that this whole example doesn't include PV.
@@ -134,7 +139,9 @@ def _load_pv_power_watts_and_capacity_watt_power(
134139

135140
# Drop any PV systems whose PV capacity is too low:
136141
PV_CAPACITY_THRESHOLD_W = 100
137-
pv_systems_to_drop = pv_capacity_watt_power.index[pv_capacity_watt_power <= PV_CAPACITY_THRESHOLD_W]
142+
pv_systems_to_drop = pv_capacity_watt_power.index[
143+
pv_capacity_watt_power <= PV_CAPACITY_THRESHOLD_W
144+
]
138145
pv_systems_to_drop = pv_systems_to_drop.intersection(pv_power_watts.columns)
139146
_log.info(
140147
f"Dropping {len(pv_systems_to_drop)} PV systems because their max power is less than"
@@ -164,53 +171,6 @@ def _load_pv_power_watts_and_capacity_watt_power(
164171
return pv_power_watts, pv_capacity_watt_power, pv_system_row_number
165172

166173

167-
"""Filtering to be added in a different IterDataPipe
168-
169-
pv_power_watts = pv_power_watts.clip(lower=0, upper=5e7)
170-
# Convert the pv_system_id column names from strings to ints:
171-
pv_power_watts.columns = [np.int32(col) for col in pv_power_watts.columns]
172-
173-
if "passiv" not in filename:
174-
_log.warning("Converting timezone. ARE YOU SURE THAT'S WHAT YOU WANT TO DO?")
175-
pv_power_watts = (
176-
pv_power_watts.tz_localize("Europe/London").tz_convert("UTC").tz_convert(None)
177-
)
178-
179-
pv_power_watts = _drop_pv_systems_which_produce_overnight(pv_power_watts)
180-
181-
# Resample to 5-minutely and interpolate up to 15 minutes ahead.
182-
# TODO: Issue #74: Give users the option to NOT resample (because Perceiver IO
183-
# doesn't need all the data to be perfectly aligned).
184-
pv_power_watts = pv_power_watts.resample("5T").interpolate(method="time", limit=3)
185-
pv_power_watts.dropna(axis="index", how="all", inplace=True)
186-
pv_power_watts.dropna(axis="columns", how="all", inplace=True)
187-
188-
# Drop any PV systems whose PV capacity is too low:
189-
PV_CAPACITY_THRESHOLD_W = 100
190-
pv_systems_to_drop = pv_capacity_watt_power.index[pv_capacity_watt_power <= PV_CAPACITY_THRESHOLD_W]
191-
pv_systems_to_drop = pv_systems_to_drop.intersection(pv_power_watts.columns)
192-
_log.info(
193-
f"Dropping {len(pv_systems_to_drop)} PV systems because their max power is less than"
194-
f" {PV_CAPACITY_THRESHOLD_W}"
195-
)
196-
pv_power_watts.drop(columns=pv_systems_to_drop, inplace=True)
197-
198-
# Ensure that capacity and pv_system_row_num use the same PV system IDs as the power DF:
199-
pv_system_ids = pv_power_watts.columns
200-
pv_capacity_watt_power = pv_capacity_watt_power.loc[pv_system_ids]
201-
pv_system_row_number = pv_system_row_number.loc[pv_system_ids]
202-
203-
_log.info(
204-
"After filtering & resampling to 5 minutes:"
205-
f" pv_power = {pv_power_watts.values.nbytes / 1e6:,.1f} MBytes."
206-
f" {len(pv_power_watts)} PV power datetimes."
207-
f" {len(pv_power_watts.columns)} PV power PV system IDs."
208-
)
209-
210-
211-
"""
212-
213-
214174
# Adapted from nowcasting_dataset.data_sources.pv.pv_data_source
215175
def _load_pv_metadata(filename: str) -> pd.DataFrame:
216176
"""Return pd.DataFrame of PV metadata.

ocf_datapipes/load/pv/utils.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,8 @@ def put_pv_data_into_an_xr_dataarray(
3535
ints), and the index is UTC datetime.
3636
x_osgb: The x location. Index = PV system ID ints.
3737
y_osgb: The y location. Index = PV system ID ints.
38-
capacity_watt_power: The max power output of each PV system in Watts. Index = PV system ID ints.
38+
capacity_watt_power: The max power output of each PV system in Watts.
39+
Index = PV system ID ints.
3940
pv_system_row_number: The integer position of the PV system in the metadata.
4041
Used to create the PV system ID embedding.
4142
"""

ocf_datapipes/production/power_perceiver.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,9 @@ def power_perceiver_production_datapipe(configuration_filename: Union[Path, str]
111111
)
112112
.fork(2)
113113
)
114-
topo_datapipe = topo_datapipe.reproject_topography().normalize(calculate_mean_std_from_example=True)
114+
topo_datapipe = topo_datapipe.reproject_topography().normalize(
115+
calculate_mean_std_from_example=True
116+
)
115117
sat_hrv_datapipe, sat_t0_datapipe = (
116118
sat_hrv_datapipe.convert_satellite_to_int8()
117119
.add_t0_idx_and_sample_period_duration(

ocf_datapipes/select/select_pv_systems_on_capacity.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,8 @@ def __iter__(self) -> Union[xr.DataArray, xr.Dataset]:
4040
4141
# Drop any PV systems whose PV capacity is too low:
4242
PV_CAPACITY_THRESHOLD_W = 100
43-
pv_systems_to_drop = pv_capacity_watt_power.index[pv_capacity_watt_power <= PV_CAPACITY_THRESHOLD_W]
43+
pv_systems_to_drop =
44+
pv_capacity_watt_power.index[pv_capacity_watt_power <= PV_CAPACITY_THRESHOLD_W]
4445
pv_systems_to_drop = pv_systems_to_drop.intersection(pv_power_watts.columns)
4546
_log.info(
4647
f"Dropping {len(pv_systems_to_drop)} PV systems because their max power is less than"

tests/batch/test_merge_numpy_modalities.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,17 @@ def test_merge_modalities(sat_hrv_datapipe, nwp_datapipe, gsp_datapipe, passiv_d
1414
batch_size = 4
1515

1616
sat_hrv_datapipe = AddT0IdxAndSamplePeriodDuration(
17-
sat_hrv_datapipe, sample_period_duration=timedelta(minutes=5), history_duration=timedelta(hours=1)
17+
sat_hrv_datapipe,
18+
sample_period_duration=timedelta(minutes=5),
19+
history_duration=timedelta(hours=1),
1820
)
1921
sat_hrv_datapipe = ConvertSatelliteToNumpyBatch(sat_hrv_datapipe, is_hrv=True)
2022
sat_hrv_datapipe = MergeNumpyExamplesToBatch(sat_hrv_datapipe, n_examples_per_batch=batch_size)
2123

2224
nwp_datapipe = AddT0IdxAndSamplePeriodDuration(
23-
nwp_datapipe, sample_period_duration=timedelta(minutes=30), history_duration=timedelta(hours=1)
25+
nwp_datapipe,
26+
sample_period_duration=timedelta(minutes=30),
27+
history_duration=timedelta(hours=1),
2428
)
2529
nwp_datapipe = ConvertNWPToNumpyBatch(nwp_datapipe)
2630
nwp_datapipe = MergeNumpyExamplesToBatch(nwp_datapipe, n_examples_per_batch=batch_size)
@@ -32,7 +36,9 @@ def test_merge_modalities(sat_hrv_datapipe, nwp_datapipe, gsp_datapipe, passiv_d
3236
gsp_datapipe = MergeNumpyExamplesToBatch(gsp_datapipe, n_examples_per_batch=batch_size)
3337

3438
passiv_datapipe = AddT0IdxAndSamplePeriodDuration(
35-
passiv_datapipe, sample_period_duration=timedelta(minutes=5), history_duration=timedelta(hours=1)
39+
passiv_datapipe,
40+
sample_period_duration=timedelta(minutes=5),
41+
history_duration=timedelta(hours=1),
3642
)
3743
passiv_datapipe = ConvertPVToNumpyBatch(passiv_datapipe)
3844
passiv_datapipe = MergeNumpyExamplesToBatch(passiv_datapipe, n_examples_per_batch=batch_size)

tests/convert/test_gsp.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,9 @@
77

88
def test_convert_gsp_to_numpy_batch(gsp_datapipe):
99
gsp_datapipe = AddT0IdxAndSamplePeriodDuration(
10-
gsp_datapipe, sample_period_duration=timedelta(minutes=5), history_duration=timedelta(minutes=60)
10+
gsp_datapipe,
11+
sample_period_duration=timedelta(minutes=5),
12+
history_duration=timedelta(minutes=60),
1113
)
1214
gsp_datapipe = ConvertGSPToNumpyBatch(gsp_datapipe)
1315
data = next(iter(gsp_datapipe))

tests/convert/test_nwp.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,9 @@
88

99
def test_convert_nwp_to_numpy_batch(nwp_datapipe):
1010
nwp_datapipe = AddT0IdxAndSamplePeriodDuration(
11-
nwp_datapipe, sample_period_duration=timedelta(minutes=60), history_duration=timedelta(minutes=60)
11+
nwp_datapipe,
12+
sample_period_duration=timedelta(minutes=60),
13+
history_duration=timedelta(minutes=60),
1214
)
1315
t0_datapipe = SelectLiveT0Time(nwp_datapipe, dim_name="init_time_utc")
1416

tests/convert/test_satellite.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,9 @@
88
def test_convert_satellite_to_numpy_batch(sat_datapipe):
99

1010
sat_datapipe = AddT0IdxAndSamplePeriodDuration(
11-
sat_datapipe, sample_period_duration=timedelta(minutes=5), history_duration=timedelta(minutes=60)
11+
sat_datapipe,
12+
sample_period_duration=timedelta(minutes=5),
13+
history_duration=timedelta(minutes=60),
1214
)
1315
sat_datapipe = ConvertSatelliteToNumpyBatch(sat_datapipe, is_hrv=False)
1416
data = next(iter(sat_datapipe))
@@ -20,7 +22,9 @@ def test_convert_satellite_to_numpy_batch(sat_datapipe):
2022

2123
def test_convert_hrvsatellite_to_numpy_batch(sat_datapipe):
2224
sat_datapipe = AddT0IdxAndSamplePeriodDuration(
23-
sat_datapipe, sample_period_duration=timedelta(minutes=5), history_duration=timedelta(minutes=60)
25+
sat_datapipe,
26+
sample_period_duration=timedelta(minutes=5),
27+
history_duration=timedelta(minutes=60),
2428
)
2529
sat_datapipe = ConvertSatelliteToNumpyBatch(sat_datapipe, is_hrv=True)
2630
data = next(iter(sat_datapipe))

tests/end2end/test_power_perceiver_production.py

Lines changed: 34 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,9 @@
4646
from ocf_datapipes.utils.consts import NWP_MEAN, NWP_STD, SAT_MEAN, SAT_STD, BatchKey
4747

4848

49-
def test_power_perceiver_production(sat_hrv_datapipe, passiv_datapipe, topo_datapipe, gsp_datapipe, nwp_datapipe):
49+
def test_power_perceiver_production(
50+
sat_hrv_datapipe, passiv_datapipe, topo_datapipe, gsp_datapipe, nwp_datapipe
51+
):
5052
####################################
5153
#
5254
# Equivalent to PP's loading and filtering methods
@@ -58,13 +60,19 @@ def test_power_perceiver_production(sat_hrv_datapipe, passiv_datapipe, topo_data
5860
topo_datapipe = ReprojectTopography(topo_datapipe)
5961
sat_datapipe = ConvertSatelliteToInt8(sat_hrv_datapipe)
6062
sat_datapipe = AddT0IdxAndSamplePeriodDuration(
61-
sat_datapipe, sample_period_duration=timedelta(minutes=5), history_duration=timedelta(minutes=60)
63+
sat_datapipe,
64+
sample_period_duration=timedelta(minutes=5),
65+
history_duration=timedelta(minutes=60),
6266
)
6367
pv_datapipe = AddT0IdxAndSamplePeriodDuration(
64-
pv_datapipe, sample_period_duration=timedelta(minutes=5), history_duration=timedelta(minutes=60)
68+
pv_datapipe,
69+
sample_period_duration=timedelta(minutes=5),
70+
history_duration=timedelta(minutes=60),
6571
)
6672
gsp_datapipe = AddT0IdxAndSamplePeriodDuration(
67-
gsp_datapipe, sample_period_duration=timedelta(minutes=30), history_duration=timedelta(hours=2)
73+
gsp_datapipe,
74+
sample_period_duration=timedelta(minutes=30),
75+
history_duration=timedelta(hours=2),
6876
)
6977
nwp_datapipe = AddT0IdxAndSamplePeriodDuration(
7078
nwp_datapipe, sample_period_duration=timedelta(hours=1), history_duration=timedelta(hours=2)
@@ -82,9 +90,14 @@ def test_power_perceiver_production(sat_hrv_datapipe, passiv_datapipe, topo_data
8290
3
8391
) # Its in order then
8492
pv_datapipe = SelectSpatialSliceMeters(
85-
pv_datapipe, location_datapipe=location_datapipe1, roi_width_meters=960_000, roi_height_meters=960_000
93+
pv_datapipe,
94+
location_datapipe=location_datapipe1,
95+
roi_width_meters=960_000,
96+
roi_height_meters=960_000,
8697
) # Has to be large as test PV systems aren't in first 20 GSPs it seems
87-
pv_datapipe, pv_t0_datapipe = EnsureNPVSystemsPerExample(pv_datapipe, n_pv_systems_per_example=8).fork(2)
98+
pv_datapipe, pv_t0_datapipe = EnsureNPVSystemsPerExample(
99+
pv_datapipe, n_pv_systems_per_example=8
100+
).fork(2)
88101
sat_datapipe, sat_t0_datapipe = SelectSpatialSlicePixels(
89102
sat_datapipe,
90103
location_datapipe=location_datapipe2,
@@ -166,7 +179,9 @@ def test_power_perceiver_production(sat_hrv_datapipe, passiv_datapipe, topo_data
166179
# Don't need to do NWP as it does go into the future
167180
nwp_datapipe = ConvertNWPToNumpyBatch(nwp_datapipe)
168181
nwp_datapipe = MergeNumpyExamplesToBatch(nwp_datapipe, n_examples_per_batch=4)
169-
combined_datapipe = MergeNumpyModalities([gsp_datapipe, pv_datapipe, sat_datapipe, nwp_datapipe])
182+
combined_datapipe = MergeNumpyModalities(
183+
[gsp_datapipe, pv_datapipe, sat_datapipe, nwp_datapipe]
184+
)
170185

171186
combined_datapipe = AlignGSPto5Min(
172187
combined_datapipe, batch_key_for_5_min_datetimes=BatchKey.hrvsatellite_time_utc
@@ -197,7 +212,9 @@ def test_power_perceiver_production(sat_hrv_datapipe, passiv_datapipe, topo_data
197212
assert batch[BatchKey.hrvsatellite_surface_height].shape == (4, 128, 256)
198213

199214

200-
def test_power_perceiver_production_functional(sat_hrv_datapipe, passiv_datapipe, topo_datapipe, gsp_datapipe, nwp_datapipe):
215+
def test_power_perceiver_production_functional(
216+
sat_hrv_datapipe, passiv_datapipe, topo_datapipe, gsp_datapipe, nwp_datapipe
217+
):
201218
####################################
202219
#
203220
# Equivalent to PP's loading and filtering methods
@@ -220,12 +237,16 @@ def test_power_perceiver_production_functional(sat_hrv_datapipe, passiv_datapipe
220237
sample_period_duration=timedelta(minutes=5), history_duration=timedelta(minutes=60)
221238
)
222239
.select_spatial_slice_meters(
223-
location_datapipe=location_datapipe1, roi_width_meters=960_000, roi_height_meters=960_000
240+
location_datapipe=location_datapipe1,
241+
roi_width_meters=960_000,
242+
roi_height_meters=960_000,
224243
)
225244
.ensure_n_pv_systems_per_example(n_pv_systems_per_example=8)
226245
.fork(2)
227246
)
228-
topo_datapipe = topo_datapipe.reproject_topography().normalize(calculate_mean_std_from_example=True)
247+
topo_datapipe = topo_datapipe.reproject_topography().normalize(
248+
calculate_mean_std_from_example=True
249+
)
229250
sat_hrv_datapipe, sat_t0_datapipe = (
230251
sat_hrv_datapipe.convert_satellite_to_int8()
231252
.add_t0_idx_and_sample_period_duration(
@@ -262,7 +283,9 @@ def test_power_perceiver_production_functional(sat_hrv_datapipe, passiv_datapipe
262283
pv_t0_datapipe = pv_t0_datapipe.select_live_t0_time()
263284

264285
gsp_datapipe = (
265-
gsp_datapipe.select_live_time_slice(t0_datapipe=gsp_t0_datapipe, history_duration=timedelta(hours=2))
286+
gsp_datapipe.select_live_time_slice(
287+
t0_datapipe=gsp_t0_datapipe, history_duration=timedelta(hours=2)
288+
)
266289
.gsp_iterator()
267290
.convert_gsp_to_numpy_batch()
268291
.extend_timesteps_to_future(

tests/select/test_location_picker.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,9 @@
88

99
def test_location_picker_single_location(gsp_datapipe):
1010
gsp_datapipe = AddT0IdxAndSamplePeriodDuration(
11-
gsp_datapipe, sample_period_duration=timedelta(minutes=30), history_duration=timedelta(hours=1)
11+
gsp_datapipe,
12+
sample_period_duration=timedelta(minutes=30),
13+
history_duration=timedelta(hours=1),
1214
)
1315
location_datapipe = LocationPicker(gsp_datapipe)
1416
data = next(iter(location_datapipe))
@@ -18,7 +20,9 @@ def test_location_picker_single_location(gsp_datapipe):
1820
def test_location_picker_all_locations(gsp_datapipe):
1921
dataset = next(iter(gsp_datapipe))
2022
gsp_datapipe = AddT0IdxAndSamplePeriodDuration(
21-
gsp_datapipe, sample_period_duration=timedelta(minutes=30), history_duration=timedelta(hours=1)
23+
gsp_datapipe,
24+
sample_period_duration=timedelta(minutes=30),
25+
history_duration=timedelta(hours=1),
2226
)
2327
location_datapipe = LocationPicker(gsp_datapipe, return_all_locations=True)
2428
loc_iterator = iter(location_datapipe)

tests/select/test_select_live_time_slice.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,9 @@ def test_select_hrv(sat_hrv_datapipe):
1818
def test_select_gsp(gsp_datapipe):
1919
time_len = len(next(iter(gsp_datapipe)).time_utc.values)
2020
t0_datapipe = SelectLiveT0Time(gsp_datapipe, dim_name="time_utc")
21-
gsp_datapipe = SelectLiveTimeSlice(gsp_datapipe, history_duration=timedelta(minutes=120), t0_datapipe=t0_datapipe)
21+
gsp_datapipe = SelectLiveTimeSlice(
22+
gsp_datapipe, history_duration=timedelta(minutes=120), t0_datapipe=t0_datapipe
23+
)
2224
data = next(iter(gsp_datapipe))
2325
assert len(data.time_utc.values) == 5
2426
assert len(data.time_utc.values) < time_len

tests/select/test_select_spatial_slice.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,10 @@
44
def test_select_spatial_slice_meters_passiv(passiv_datapipe):
55
loc_datapipe = LocationPicker(passiv_datapipe)
66
passiv_datapipe = SelectSpatialSliceMeters(
7-
passiv_datapipe, location_datapipe=loc_datapipe, roi_width_meters=96_000, roi_height_meters=96_000
7+
passiv_datapipe,
8+
location_datapipe=loc_datapipe,
9+
roi_width_meters=96_000,
10+
roi_height_meters=96_000,
811
)
912
data = next(iter(passiv_datapipe))
1013
assert len(data.pv_system_id) == 1

0 commit comments

Comments
 (0)