Source code for ai4water.datasets._rc4uscoast


__all__ = ["RC4USCoast"]

from typing import Union, List

import numpy as np

from ai4water.backend import pd, os, xr

from ._datasets import Datasets
from .utils import check_st_en

[docs]class RC4USCoast(Datasets): """ Monthly river water chemistry (N, P, SIO2, DO, ... etc), discharge and temperature of 140 monitoring sites of US coasts from 1950 to 2020 following the work of `Gomez et al., 2022 <https://doi.org/10.5194/essd-2022-341>`_. Examples -------- >>> from ai4water.datasets import RC4USCoast >>> dataset = RC4USCoast() """ url = { 'RC4USCoast.zip': 'https://www.ncei.noaa.gov/data/oceans/ncei/ocads/data/0260455/RC4USCoast.zip', 'info.xlsx': 'https://www.ncei.noaa.gov/data/oceans/ncei/ocads/data/0260455/supplemental/dataset_info.xlsx' }
[docs] def __init__(self, path=None, *args, **kwargs): """ Parameters ---------- path : path where the data is already downloaded. If None, the data will be downloaded into the disk. """ super(RC4USCoast, self).__init__(path=path, *args, **kwargs) self.ds_dir = path self._download()
@property def chem_fname(self)->str: return os.path.join(self.ds_dir, "RC4USCoast", "series_chem.nc") @property def q_fname(self) -> str: return os.path.join(self.ds_dir, "RC4USCoast", "series_disc.nc") @property def info_fname(self) -> str: return os.path.join(self.ds_dir, "RC4USCoast", "info.xlsx") @property def stations(self)->np.ndarray: """ >>> from ai4water.datasets import RC4USCoast >>> ds = RC4USCoast(path=r'F:\data\RC4USCoast') >>> len(ds.stations) 140 """ return xr.load_dataset(self.q_fname).RC4USCoast_ID.data @property def parameters(self)->List[str]: """ >>> from ai4water.datasets import RC4USCoast >>> ds = RC4USCoast() >>> len(ds.parameters) 27 """ df = xr.load_dataset(self.chem_fname) return list(df.data_vars.keys()) @property def start(self)->pd.Timestamp: return pd.Timestamp(xr.load_dataset(self.q_fname).time.data[0]) @property def end(self)->pd.Timestamp: return pd.Timestamp(xr.load_dataset(self.q_fname).time.data[-1])
[docs] def fetch_chem( self, parameter, stations: Union[List[int], int, str] = "all", as_dataframe:bool = False, st: Union[int, str, pd.DatetimeIndex] = None, en: Union[int, str, pd.DatetimeIndex] = None, ): """ Returns water chemistry parameters from one or more stations. parameters ---------- parameter : list, str name/names of parameters to fetch stations : list, str name/names of stations from which the parameters are to be fetched as_dataframe : bool (default=False) whether to return data as pandas.DataFrame or xarray.Dataset st : start time of data to be fetched. The default starting date is 19500101 en : end time of data to be fetched. The default end date is 20201201 Returns ------- pandas DataFrame or xarray Dataset Examples -------- >>> from ai4water.datasets import RC4USCoast >>> ds = RC4USCoast() >>> data = ds.fetch_chem(['temp', 'do']) >>> data >>> data = ds.fetch_chem(['temp', 'do'], as_dataframe=True) >>> data.shape # this is a multi-indexed dataframe (119280, 4) >>> data = ds.fetch_chem(['temp', 'do'], st="19800101", en="20181230") """ if isinstance(parameter, str): parameter = [parameter] ds = xr.load_dataset(self.chem_fname)[parameter] if stations == "all": pass elif not isinstance(stations, list): stations = [stations] ds = ds.sel(RC4USCoast_ID=stations) elif isinstance(stations, list): ds = ds.sel(RC4USCoast_ID = stations) else: assert stations is None ds = ds.sel(time=slice(st or self.start, en or self.end)) if as_dataframe: return ds.to_dataframe() return ds
[docs] def fetch_q( self, stations:Union[int, List[int], str, np.ndarray] = "all", as_dataframe:bool=True, nv=0, st: Union[int, str, pd.DatetimeIndex] = None, en: Union[int, str, pd.DatetimeIndex] = None, ): """returns discharge data parameters ----------- stations : stations for which q is to be fetched as_dataframe : bool (default=True) whether to return the data as pd.DataFrame or as xarray.Dataset nv : int (default=0) st : start time of data to be fetched. The default starting date is 19500101 en : end time of data to be fetched. The default end date is 20201201 Examples -------- >>> from ai4water.datasets import RC4USCoast >>> ds = RC4USCoast() # get data of all stations as DataFrame >>> q = ds.fetch_q("all") >>> q.shape (852, 140) # where 140 is the number of stations # get data of only two stations >>> q = ds.fetch_q([1,10]) >>> q.shape (852, 2) # get data as xarray Dataset >>> q = ds.fetch_q("all", as_dataframe=False) >>> type(q) xarray.core.dataset.Dataset # getting data between specific periods >>> data = ds.fetch_q("all", st="20000101", en="20181230") """ q = xr.load_dataset(self.q_fname) if stations: if stations == "all": q = q.sel(nv=nv) elif not isinstance(stations, list): stations = [stations] q = q.sel(RC4USCoast_ID=stations, nv=nv) elif isinstance(stations, list): q = q.sel(RC4USCoast_ID=stations, nv=nv) else: raise ValueError(f"invalid {stations}") q = q.sel(time=slice(st or self.start, en or self.end)) if as_dataframe: return q.to_dataframe()['disc'].unstack() return q