CAMELS US

This notebook explores CAMELS US dataset which introduced by Newman et al., 2015.

[39]:

from ai4water.datasets import CAMELS_US
from ai4water.utils.utils import get_version_info
from ai4water.eda import EDA
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt

from easy_mpl import hist, ridge
from easy_mpl import scatter
from easy_mpl.utils import process_cbar

[40]:

for k,v in get_version_info().items():
    print(k, v)

python 3.8.17 (default, Jul  5 2023, 20:44:21) [MSC v.1916 64 bit (AMD64)]
os nt
ai4water 1.07
easy_mpl 0.21.3
SeqMetrics 1.3.4
numpy 1.24.3
pandas 1.3.4
matplotlib 3.6.0
sklearn 1.3.0
xarray 0.20.1
netCDF4 1.5.7
seaborn 0.12.2

[41]:

dataset = CAMELS_US(
    path=r"E:\data\gscad\CAMELS\CAMELS_US"   # set path to None if you have not already downloaded data
                    )

dataset is already downloaded at E:\data\gscad\CAMELS\CAMELS_US

[42]:

dataset.start

[42]:

'19800101'

[43]:

dataset.end

[43]:

'20141231'

[44]:

stations = dataset.stations()
len(stations)

[44]:

[7]:

dataset.plot_stations()

../../_images/_notebooks_datasets_CAMELS_US_7_0.png

[7]:

<AxesSubplot: >

Static Data

[8]:

dataset.static_features

[8]:

['p_mean',
 'pet_mean',
 'p_seasonality',
 'frac_snow',
 'aridity',
 'high_prec_freq',
 'high_prec_dur',
 'high_prec_timing',
 'low_prec_freq',
 'low_prec_dur',
 'low_prec_timing',
 'geol_1st_class',
 'glim_1st_class_frac',
 'geol_2nd_class',
 'glim_2nd_class_frac',
 'carbonate_rocks_frac',
 'geol_porostiy',
 'geol_permeability',
 'q_mean',
 'runoff_ratio',
 'slope_fdc',
 'baseflow_index',
 'stream_elas',
 'q5',
 'q95',
 'high_q_freq',
 'high_q_dur',
 'low_q_freq',
 'low_q_dur',
 'zero_q_freq',
 'hfd_mean',
 'huc_02',
 'gauge_name',
 'soil_depth_pelletier',
 'soil_depth_statsgo',
 'soil_porosity',
 'soil_conductivity',
 'max_water_content',
 'sand_frac',
 'silt_frac',
 'clay_frac',
 'water_frac',
 'organic_frac',
 'other_frac',
 'gauge_lat',
 'gauge_lon',
 'elev_mean',
 'slope_mean',
 'area_gages2',
 'area_geospa_fabric',
 'frac_forest',
 'lai_max',
 'lai_diff',
 'gvf_max',
 'gvf_diff',
 'dom_land_cover_frac',
 'dom_land_cover',
 'root_depth_50',
 'root_depth_99']

[9]:

coords = dataset.stn_coords()
lat = coords['lat'].astype(int).values.reshape(-1,)
long = coords['long'].astype(int).values.reshape(-1,)

[10]:

q = ''
lc01 = ''
nvis = ''
anngro = ''
gromega = ''
npp = ''

[11]:

static = dataset.fetch_static_features(stn_id=stations)
static.shape

[11]:

(671, 59)

[12]:

EDA(data=static, save=False).heatmap()

../../_images/_notebooks_datasets_CAMELS_US_13_0.png

[12]:

<AxesSubplot: ylabel='Examples'>

[13]:

physical_features = []
soil_features = []
geological_features = []
# flow_characteristics = []

[14]:

static = static.dropna(axis=1)
static.shape

[14]:

(671, 43)

[19]:

idx = 0
ax_num = 0

fig, axes = plt.subplots(5, 5, figsize=(15, 11))
axes = axes.flatten()

while ax_num < 25 and idx<static.shape[1]:

    val = static.iloc[:, idx]
    idx += 1

    try:
        c = val.astype(float).values.reshape(-1,)

        en = static.shape[0]
        ax = axes[ax_num]
        ax, sc = scatter(long[0:en], lat[0:en], c=c[0:en], cmap="hot", show=False, ax=ax)

        process_cbar(ax, sc, border=False, title=val.name, #title_kws ={"fontsize": 14}
                    )
        ax_num += 1
    except ValueError:
        continue



plt.tight_layout()
plt.show()
print(idx)

../../_images/_notebooks_datasets_CAMELS_US_16_0.png

[20]:

idx = 29
ax_num = 0

fig, axes = plt.subplots(5, 5, figsize=(15, 12))
axes = axes.flatten()

while ax_num < 25 and idx<static.shape[1]:

    val = static.iloc[:, idx]
    idx += 1

    try:
        c = val.astype(float).values.reshape(-1,)

        en = static.shape[0]
        ax = axes[ax_num]
        ax, sc = scatter(long[0:en], lat[0:en], c=c[0:en], cmap="hot", show=False, ax=ax)

        process_cbar(ax, sc, border=False, title=val.name, #title_kws ={"fontsize": 14}
                    )
        ax_num += 1
    except ValueError:
        continue



plt.tight_layout()
plt.show()
print(idx)

../../_images/_notebooks_datasets_CAMELS_US_17_0.png

Dynamic Features

[21]:

dataset.dynamic_features

[21]:

['dayl(s)',
 'prcp(mm/day)',
 'srad(W/m2)',
 'swe(mm)',
 'tmax(C)',
 'tmin(C)',
 'vp(Pa)',
 'Flow']

[22]:

streamflow = dataset.q_mmd()
streamflow.shape

[22]:

(12784, 671)

streamflow

[23]:

EDA(data=streamflow, save=False).heatmap()

../../_images/_notebooks_datasets_CAMELS_US_22_0.png

[23]:

<AxesSubplot: ylabel='Examples'>

[24]:

st = 0

fig, axes = plt.subplots(7, 7, figsize=(10, 10), sharey="all")

idx = st
for _, ax in enumerate(axes.flat):

    hist(streamflow.iloc[:, idx].values.reshape(-1,),
         bins=20,
         ax=ax,
         show=False
        )

    idx += 1

plt.show()
print(idx)

../../_images/_notebooks_datasets_CAMELS_US_23_0.png

[25]:

st = 49

fig, axes = plt.subplots(7, 7, figsize=(10, 10), sharey="all")

idx = st
for _, ax in enumerate(axes.flat):

    hist(streamflow.iloc[:, idx].values.reshape(-1,),
         bins=20,
         ax=ax,
         show=False
        )

    idx += 1

plt.show()
print(idx)

../../_images/_notebooks_datasets_CAMELS_US_24_0.png

[26]:

st = 98

fig, axes = plt.subplots(7, 7, figsize=(10, 10), sharey="all")

idx = st
for _, ax in enumerate(axes.flat):

    hist(streamflow.iloc[:, idx].values.reshape(-1,),
         bins=20,
         ax=ax,
         show=False
        )

    idx += 1

plt.show()
print(idx)

../../_images/_notebooks_datasets_CAMELS_US_25_0.png

[27]:

st = 147

fig, axes = plt.subplots(7, 7, figsize=(10, 10), sharey="all")

idx = st
for _, ax in enumerate(axes.flat):

    hist(streamflow.iloc[:, idx].values.reshape(-1,),
         bins=20,
         ax=ax,
         show=False
        )

    idx += 1

plt.show()
print(idx)

../../_images/_notebooks_datasets_CAMELS_US_26_0.png

[28]:

st = 196

fig, axes = plt.subplots(7, 7, figsize=(10, 10), sharey="all")

idx = st
for _, ax in enumerate(axes.flat):


    hist(streamflow.iloc[:, idx].values.reshape(-1,),
     bins=20,
     ax=ax,
     show=False
    )

    idx += 1

plt.show()
print(idx)

../../_images/_notebooks_datasets_CAMELS_US_27_0.png

[29]:

st = 245

fig, axes = plt.subplots(7, 7, figsize=(10, 10), sharey="all")

idx = st
for _, ax in enumerate(axes.flat):


    hist(streamflow.iloc[:, idx].values.reshape(-1,),
     bins=20,
     ax=ax,
     show=False
    )

    idx += 1

plt.show()
print(idx)

../../_images/_notebooks_datasets_CAMELS_US_28_0.png

[30]:

st = 294

fig, axes = plt.subplots(7, 7, figsize=(10, 10), sharey="all")

idx = st
for _, ax in enumerate(axes.flat):


    hist(streamflow.iloc[:, idx].values.reshape(-1,),
     bins=20,
     ax=ax,
     show=False
    )

    idx += 1

plt.show()
print(idx)

../../_images/_notebooks_datasets_CAMELS_US_29_0.png

[31]:

st = 343

fig, axes = plt.subplots(7, 7, figsize=(10, 10), sharey="all")

idx = st
for _, ax in enumerate(axes.flat):


    hist(streamflow.iloc[:, idx].values.reshape(-1,),
     bins=20,
     ax=ax,
     show=False
    )

    idx += 1

plt.show()
print(idx)

../../_images/_notebooks_datasets_CAMELS_US_30_0.png

[32]:

st = 392

fig, axes = plt.subplots(7, 7, figsize=(10, 10), sharey="all")

idx = st
for _, ax in enumerate(axes.flat):


    hist(streamflow.iloc[:, idx].values.reshape(-1,),
     bins=20,
     ax=ax,
     show=False
    )

    idx += 1

plt.show()
print(idx)

../../_images/_notebooks_datasets_CAMELS_US_31_0.png

[33]:

st = 441

fig, axes = plt.subplots(7, 7, figsize=(10, 10), sharey="all")

idx = st
for _, ax in enumerate(axes.flat):


    hist(streamflow.iloc[:, idx].values.reshape(-1,),
     bins=20,
     ax=ax,
     show=False
    )

    idx += 1

plt.show()
print(idx)

../../_images/_notebooks_datasets_CAMELS_US_32_0.png

[34]:

st = 490

fig, axes = plt.subplots(7, 7, figsize=(10, 10), sharey="all")

idx = st
for _, ax in enumerate(axes.flat):


    hist(streamflow.iloc[:, idx].values.reshape(-1,),
     bins=20,
     ax=ax,
     show=False
    )

    idx += 1

plt.show()
print(idx)

../../_images/_notebooks_datasets_CAMELS_US_33_0.png

[35]:

st = 539

fig, axes = plt.subplots(7, 7, figsize=(10, 10), sharey="all")

idx = st
for _, ax in enumerate(axes.flat):


    hist(streamflow.iloc[:, idx].values.reshape(-1,),
     bins=20,
     ax=ax,
     show=False
    )

    idx += 1

plt.show()
print(idx)

../../_images/_notebooks_datasets_CAMELS_US_34_0.png

[36]:

st = 588

fig, axes = plt.subplots(7, 7, figsize=(10, 10), sharey="all")

idx = st
for _, ax in enumerate(axes.flat):


    hist(streamflow.iloc[:, idx].values.reshape(-1,),
     bins=20,
     ax=ax,
     show=False
    )

    idx += 1

plt.show()
print(idx)

../../_images/_notebooks_datasets_CAMELS_US_35_0.png

[37]:

fig, axes = plt.subplots(7, 7, figsize=(10, 10), sharey="all")

idx = 637
for _, ax in enumerate(axes.flat):

    if idx >= 671:
        break

    hist(streamflow.iloc[:, idx].values.reshape(-1,),
     bins=20,
     ax=ax,
     show=False
    )

    idx += 1

plt.show()
print(idx)

../../_images/_notebooks_datasets_CAMELS_US_36_0.png

[38]:

# skewness of streamflow

_ = hist(streamflow.skew().values.reshape(-1,), bins=50)

../../_images/_notebooks_datasets_CAMELS_US_37_0.png

potential evapotranspiration

[36]:

# pet = dataset.fetch(dynamic_features = 'pet', as_dataframe=True)
# pet = pet.reset_index()
# pet.index = pd.to_datetime(pet.pop('time'))
# pet.pop('dynamic_features')
# print(pet.shape)
# EDA(data=pet, save=False).heatmap()

[37]:

#_ = hist(pet.skew().values.reshape(-1,), bins=50)

Air Temperature

[38]:

tmax = dataset.fetch(dynamic_features = 'tmax(C)', as_dataframe=True)
tmax = tmax.reset_index()
tmax.index = pd.to_datetime(tmax.pop('time'))
tmax.pop('dynamic_features')
print(tmax.shape)

EDA(data=tmax, save=False).heatmap()

(12784, 671)

../../_images/_notebooks_datasets_CAMELS_US_42_1.png

[38]:

<AxesSubplot: ylabel='Examples'>

[39]:

_ = hist(tmax.skew().values.reshape(-1,), bins=50)

../../_images/_notebooks_datasets_CAMELS_US_43_0.png

[40]:

tmin = dataset.fetch(dynamic_features = 'tmin(C)', as_dataframe=True)
tmin = tmin.reset_index()
tmin.index = pd.to_datetime(tmin.pop('time'))
tmin.pop('dynamic_features')

print(tmin.shape)

EDA(data=tmin, save=False).heatmap()

(12784, 671)

../../_images/_notebooks_datasets_CAMELS_US_44_1.png

[40]:

<AxesSubplot: ylabel='Examples'>

[41]:

_ = hist(tmin.skew().values.reshape(-1,), bins=50)

../../_images/_notebooks_datasets_CAMELS_US_45_0.png

[45]:

temp = np.mean([tmax.values, tmin.values], axis=0)

temp = pd.DataFrame(temp, columns=tmax.columns, index=tmax.index)

print(temp.shape)

EDA(data=temp, save=False).heatmap()
#tmax.shape, tmin.shape

(12784, 671)

../../_images/_notebooks_datasets_CAMELS_US_46_1.png

[45]:

<AxesSubplot: ylabel='Examples'>

[46]:

_ = hist(temp.skew().values.reshape(-1,), bins=50)

../../_images/_notebooks_datasets_CAMELS_US_47_0.png

Precipitation

[47]:

pcp = dataset.fetch(dynamic_features = "prcp(mm/day)", as_dataframe=True)
pcp = pcp.reset_index()
pcp.index = pd.to_datetime(pcp.pop('time'))
pcp.pop('dynamic_features')

print(pcp.shape)

_ = EDA(data=pcp, save=False).heatmap()

(12784, 671)

../../_images/_notebooks_datasets_CAMELS_US_49_1.png

[47]:

<AxesSubplot: ylabel='Examples'>

[48]:

_ = hist(pcp.skew().values.reshape(-1,), bins=50)

../../_images/_notebooks_datasets_CAMELS_US_50_0.png

[ ]: