diff --git a/src/climatebenchpress/data_loader/datasets/cams.py b/src/climatebenchpress/data_loader/datasets/cams.py index a176c8a..5f84631 100644 --- a/src/climatebenchpress/data_loader/datasets/cams.py +++ b/src/climatebenchpress/data_loader/datasets/cams.py @@ -13,16 +13,18 @@ from ..download import _download_netcdf from .abc import Dataset -NO2_FILE = "https://object-store.os-api.cci1.ecmwf.int/esiwacebucket/CAMS/eac4-plev-no2-2023.nc" +NO2_FILE = ( + "https://object-store.os-api.cci1.ecmwf.int/esiwacebucket/CAMS_hej6/cams_no2.nc" +) NUM_RETRIES = 3 class CamsNitrogenDioxideDataset(Dataset): """Dataset for CAMS Nitrogen Dioxide data. - The dataset comes from the - [Copernicus Atmosphere Monitoring Service (CAMS)](https://atmosphere.copernicus.eu/). - This particular class downloads Nitrogen Dioxide reanalysis data. + Uses data from the CAMS model published at [https://apps.ecmwf.int/ifs-experiments/rd/hej6/]. + This is output of a run of the CAMS model that is stored in single-precision + floating point data without any linear packing. """ name = "cams-nitrogen-dioxide" @@ -41,15 +43,18 @@ def download(download_path: Path, progress: bool = True): @staticmethod def open(download_path: Path) -> xr.Dataset: - ds = xr.open_dataset(download_path / Path(NO2_FILE).name) + ds = xr.open_dataset(download_path / Path(NO2_FILE).name).chunk(-1) - # Restrict data to a single day. - # The specific day is arbitrary. - ds = ds.sel(valid_time=slice("2023-06-15", "2023-06-15")).chunk(-1) + # valid_time contains actual dates, whereas step is the seconds (in simulated time) + # since the model as been initialised. + ds = ds.assign_coords(valid_time=("step", ds.valid_time.data)) + ds = ds.swap_dims({"step": "valid_time"}) + ds = ds.reset_coords("step", drop=True) # Needed to make the dataset CF-compliant. ds.longitude.attrs["axis"] = "X" ds.latitude.attrs["axis"] = "Y" - ds.pressure_level.attrs["axis"] = "Z" + ds.hybrid.attrs["axis"] = "Z" + ds.valid_time.attrs["axis"] = "T" return ds