CAMELS US
This notebook explores CAMELS US dataset which introduced by Newman et al., 2015.
[39]:
from ai4water.datasets import CAMELS_US
from ai4water.utils.utils import get_version_info
from ai4water.eda import EDA
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from easy_mpl import hist, ridge
from easy_mpl import scatter
from easy_mpl.utils import process_cbar
[40]:
for k,v in get_version_info().items():
print(k, v)
python 3.8.17 (default, Jul 5 2023, 20:44:21) [MSC v.1916 64 bit (AMD64)]
os nt
ai4water 1.07
easy_mpl 0.21.3
SeqMetrics 1.3.4
numpy 1.24.3
pandas 1.3.4
matplotlib 3.6.0
sklearn 1.3.0
xarray 0.20.1
netCDF4 1.5.7
seaborn 0.12.2
[41]:
dataset = CAMELS_US(
path=r"E:\data\gscad\CAMELS\CAMELS_US" # set path to None if you have not already downloaded data
)
dataset is already downloaded at E:\data\gscad\CAMELS\CAMELS_US
[42]:
dataset.start
[42]:
'19800101'
[43]:
dataset.end
[43]:
'20141231'
[44]:
stations = dataset.stations()
len(stations)
[44]:
671
[7]:
dataset.plot_stations()
[7]:
<AxesSubplot: >
Static Data
[8]:
dataset.static_features
[8]:
['p_mean',
'pet_mean',
'p_seasonality',
'frac_snow',
'aridity',
'high_prec_freq',
'high_prec_dur',
'high_prec_timing',
'low_prec_freq',
'low_prec_dur',
'low_prec_timing',
'geol_1st_class',
'glim_1st_class_frac',
'geol_2nd_class',
'glim_2nd_class_frac',
'carbonate_rocks_frac',
'geol_porostiy',
'geol_permeability',
'q_mean',
'runoff_ratio',
'slope_fdc',
'baseflow_index',
'stream_elas',
'q5',
'q95',
'high_q_freq',
'high_q_dur',
'low_q_freq',
'low_q_dur',
'zero_q_freq',
'hfd_mean',
'huc_02',
'gauge_name',
'soil_depth_pelletier',
'soil_depth_statsgo',
'soil_porosity',
'soil_conductivity',
'max_water_content',
'sand_frac',
'silt_frac',
'clay_frac',
'water_frac',
'organic_frac',
'other_frac',
'gauge_lat',
'gauge_lon',
'elev_mean',
'slope_mean',
'area_gages2',
'area_geospa_fabric',
'frac_forest',
'lai_max',
'lai_diff',
'gvf_max',
'gvf_diff',
'dom_land_cover_frac',
'dom_land_cover',
'root_depth_50',
'root_depth_99']
[9]:
[10]:
q = ''
lc01 = ''
nvis = ''
anngro = ''
gromega = ''
npp = ''
[11]:
static = dataset.fetch_static_features(stn_id=stations)
static.shape
[11]:
(671, 59)
[12]:
EDA(data=static, save=False).heatmap()
[12]:
<AxesSubplot: ylabel='Examples'>
[13]:
physical_features = []
soil_features = []
geological_features = []
# flow_characteristics = []
[14]:
static = static.dropna(axis=1)
static.shape
[14]:
(671, 43)
[19]:
idx = 0
ax_num = 0
fig, axes = plt.subplots(5, 5, figsize=(15, 11))
axes = axes.flatten()
while ax_num < 25 and idx<static.shape[1]:
val = static.iloc[:, idx]
idx += 1
try:
c = val.astype(float).values.reshape(-1,)
en = static.shape[0]
ax = axes[ax_num]
ax, sc = scatter(long[0:en], lat[0:en], c=c[0:en], cmap="hot", show=False, ax=ax)
process_cbar(ax, sc, border=False, title=val.name, #title_kws ={"fontsize": 14}
)
ax_num += 1
except ValueError:
continue
plt.tight_layout()
plt.show()
print(idx)
29
[20]:
idx = 29
ax_num = 0
fig, axes = plt.subplots(5, 5, figsize=(15, 12))
axes = axes.flatten()
while ax_num < 25 and idx<static.shape[1]:
val = static.iloc[:, idx]
idx += 1
try:
c = val.astype(float).values.reshape(-1,)
en = static.shape[0]
ax = axes[ax_num]
ax, sc = scatter(long[0:en], lat[0:en], c=c[0:en], cmap="hot", show=False, ax=ax)
process_cbar(ax, sc, border=False, title=val.name, #title_kws ={"fontsize": 14}
)
ax_num += 1
except ValueError:
continue
plt.tight_layout()
plt.show()
print(idx)
43
Dynamic Features
[21]:
dataset.dynamic_features
[21]:
['dayl(s)',
'prcp(mm/day)',
'srad(W/m2)',
'swe(mm)',
'tmax(C)',
'tmin(C)',
'vp(Pa)',
'Flow']
[22]:
streamflow = dataset.q_mmd()
streamflow.shape
[22]:
(12784, 671)
streamflow
[23]:
EDA(data=streamflow, save=False).heatmap()
[23]:
<AxesSubplot: ylabel='Examples'>
[24]:
st = 0
fig, axes = plt.subplots(7, 7, figsize=(10, 10), sharey="all")
idx = st
for _, ax in enumerate(axes.flat):
hist(streamflow.iloc[:, idx].values.reshape(-1,),
bins=20,
ax=ax,
show=False
)
idx += 1
plt.show()
print(idx)
49
[25]:
st = 49
fig, axes = plt.subplots(7, 7, figsize=(10, 10), sharey="all")
idx = st
for _, ax in enumerate(axes.flat):
hist(streamflow.iloc[:, idx].values.reshape(-1,),
bins=20,
ax=ax,
show=False
)
idx += 1
plt.show()
print(idx)
98
[26]:
st = 98
fig, axes = plt.subplots(7, 7, figsize=(10, 10), sharey="all")
idx = st
for _, ax in enumerate(axes.flat):
hist(streamflow.iloc[:, idx].values.reshape(-1,),
bins=20,
ax=ax,
show=False
)
idx += 1
plt.show()
print(idx)
147
[27]:
st = 147
fig, axes = plt.subplots(7, 7, figsize=(10, 10), sharey="all")
idx = st
for _, ax in enumerate(axes.flat):
hist(streamflow.iloc[:, idx].values.reshape(-1,),
bins=20,
ax=ax,
show=False
)
idx += 1
plt.show()
print(idx)
196
[28]:
st = 196
fig, axes = plt.subplots(7, 7, figsize=(10, 10), sharey="all")
idx = st
for _, ax in enumerate(axes.flat):
hist(streamflow.iloc[:, idx].values.reshape(-1,),
bins=20,
ax=ax,
show=False
)
idx += 1
plt.show()
print(idx)
245
[29]:
st = 245
fig, axes = plt.subplots(7, 7, figsize=(10, 10), sharey="all")
idx = st
for _, ax in enumerate(axes.flat):
hist(streamflow.iloc[:, idx].values.reshape(-1,),
bins=20,
ax=ax,
show=False
)
idx += 1
plt.show()
print(idx)
294
[30]:
st = 294
fig, axes = plt.subplots(7, 7, figsize=(10, 10), sharey="all")
idx = st
for _, ax in enumerate(axes.flat):
hist(streamflow.iloc[:, idx].values.reshape(-1,),
bins=20,
ax=ax,
show=False
)
idx += 1
plt.show()
print(idx)
343
[31]:
st = 343
fig, axes = plt.subplots(7, 7, figsize=(10, 10), sharey="all")
idx = st
for _, ax in enumerate(axes.flat):
hist(streamflow.iloc[:, idx].values.reshape(-1,),
bins=20,
ax=ax,
show=False
)
idx += 1
plt.show()
print(idx)
392
[32]:
st = 392
fig, axes = plt.subplots(7, 7, figsize=(10, 10), sharey="all")
idx = st
for _, ax in enumerate(axes.flat):
hist(streamflow.iloc[:, idx].values.reshape(-1,),
bins=20,
ax=ax,
show=False
)
idx += 1
plt.show()
print(idx)
441
[33]:
st = 441
fig, axes = plt.subplots(7, 7, figsize=(10, 10), sharey="all")
idx = st
for _, ax in enumerate(axes.flat):
hist(streamflow.iloc[:, idx].values.reshape(-1,),
bins=20,
ax=ax,
show=False
)
idx += 1
plt.show()
print(idx)
490
[34]:
st = 490
fig, axes = plt.subplots(7, 7, figsize=(10, 10), sharey="all")
idx = st
for _, ax in enumerate(axes.flat):
hist(streamflow.iloc[:, idx].values.reshape(-1,),
bins=20,
ax=ax,
show=False
)
idx += 1
plt.show()
print(idx)
539
[35]:
st = 539
fig, axes = plt.subplots(7, 7, figsize=(10, 10), sharey="all")
idx = st
for _, ax in enumerate(axes.flat):
hist(streamflow.iloc[:, idx].values.reshape(-1,),
bins=20,
ax=ax,
show=False
)
idx += 1
plt.show()
print(idx)
588
[36]:
st = 588
fig, axes = plt.subplots(7, 7, figsize=(10, 10), sharey="all")
idx = st
for _, ax in enumerate(axes.flat):
hist(streamflow.iloc[:, idx].values.reshape(-1,),
bins=20,
ax=ax,
show=False
)
idx += 1
plt.show()
print(idx)
637
[37]:
fig, axes = plt.subplots(7, 7, figsize=(10, 10), sharey="all")
idx = 637
for _, ax in enumerate(axes.flat):
if idx >= 671:
break
hist(streamflow.iloc[:, idx].values.reshape(-1,),
bins=20,
ax=ax,
show=False
)
idx += 1
plt.show()
print(idx)
671
[38]:
# skewness of streamflow
_ = hist(streamflow.skew().values.reshape(-1,), bins=50)
potential evapotranspiration
[36]:
# pet = dataset.fetch(dynamic_features = 'pet', as_dataframe=True)
# pet = pet.reset_index()
# pet.index = pd.to_datetime(pet.pop('time'))
# pet.pop('dynamic_features')
# print(pet.shape)
# EDA(data=pet, save=False).heatmap()
[37]:
#_ = hist(pet.skew().values.reshape(-1,), bins=50)
Air Temperature
[38]:
tmax = dataset.fetch(dynamic_features = 'tmax(C)', as_dataframe=True)
tmax = tmax.reset_index()
tmax.index = pd.to_datetime(tmax.pop('time'))
tmax.pop('dynamic_features')
print(tmax.shape)
EDA(data=tmax, save=False).heatmap()
(12784, 671)
[38]:
<AxesSubplot: ylabel='Examples'>
[39]:
_ = hist(tmax.skew().values.reshape(-1,), bins=50)
[40]:
tmin = dataset.fetch(dynamic_features = 'tmin(C)', as_dataframe=True)
tmin = tmin.reset_index()
tmin.index = pd.to_datetime(tmin.pop('time'))
tmin.pop('dynamic_features')
print(tmin.shape)
EDA(data=tmin, save=False).heatmap()
(12784, 671)
[40]:
<AxesSubplot: ylabel='Examples'>
[41]:
_ = hist(tmin.skew().values.reshape(-1,), bins=50)
[45]:
temp = np.mean([tmax.values, tmin.values], axis=0)
temp = pd.DataFrame(temp, columns=tmax.columns, index=tmax.index)
print(temp.shape)
EDA(data=temp, save=False).heatmap()
#tmax.shape, tmin.shape
(12784, 671)
[45]:
<AxesSubplot: ylabel='Examples'>
[46]:
_ = hist(temp.skew().values.reshape(-1,), bins=50)
Precipitation
[47]:
pcp = dataset.fetch(dynamic_features = "prcp(mm/day)", as_dataframe=True)
pcp = pcp.reset_index()
pcp.index = pd.to_datetime(pcp.pop('time'))
pcp.pop('dynamic_features')
print(pcp.shape)
_ = EDA(data=pcp, save=False).heatmap()
(12784, 671)
[47]:
<AxesSubplot: ylabel='Examples'>
[48]:
_ = hist(pcp.skew().values.reshape(-1,), bins=50)
[ ]: