CAMELS Australia

Open In Colab

View Source on GitHub

This notebook explores CAMELS Australia dataset which was introduced in Fowler et al., 2020.

[1]:
import pandas as pd
import numpy as np

from ai4water.datasets import CAMELS_AUS
from ai4water.utils.utils import get_version_info
from ai4water.eda import EDA

import matplotlib.pyplot as plt

from easy_mpl import hist, ridge, scatter
from easy_mpl.utils import process_cbar

**********Tensorflow models could not be imported **********

C:\Users\ather\.conda\envs\cat_aware\lib\site-packages\sklearn\experimental\enable_hist_gradient_boosting.py:15: UserWarning: Since version 1.0, it is not needed to import enable_hist_gradient_boosting anymore. HistGradientBoostingClassifier and HistGradientBoostingRegressor are now stable and can be normally imported from sklearn.ensemble.
  warnings.warn(
[2]:
for k,v in get_version_info().items():
    print(k, v)
python 3.8.17 (default, Jul  5 2023, 20:44:21) [MSC v.1916 64 bit (AMD64)]
os nt
ai4water 1.07
easy_mpl 0.21.3
SeqMetrics 1.3.4
numpy 1.24.3
pandas 1.3.4
matplotlib 3.6.0
sklearn 1.3.0
xarray 0.20.1
netCDF4 1.5.7
seaborn 0.12.2
[4]:

dataset = CAMELS_AUS( path=r"E:\data\gscad\CAMELS\CAMELS_AUS" # set path to None if you have not already downloaded data )
[5]:
dataset.start
[5]:
'19570101'
[6]:
dataset.end
[6]:
'20181231'
[7]:
stations = dataset.stations()
len(stations)
[7]:
222

Static Features

[8]:
dataset.static_features
[8]:
['station_name',
 'drainage_division',
 'river_region',
 'notes',
 'lat_outlet',
 'long_outlet',
 'lat_centroid',
 'long_centroid',
 'map_zone',
 'catchment_area',
 'nested_status',
 'next_station_ds',
 'num_nested_within',
 'start_date',
 'end_date',
 'prop_missing_data',
 'q_uncert_num_curves',
 'q_uncert_n',
 'q_uncert_q10',
 'q_uncert_q10_upper',
 'q_uncert_q10_lower',
 'q_uncert_q50',
 'q_uncert_q50_upper',
 'q_uncert_q50_lower',
 'q_uncert_q90',
 'q_uncert_q90_upper',
 'q_uncert_q90_lower',
 'p_mean',
 'pet_mean',
 'aridity',
 'p_seasonality',
 'frac_snow',
 'high_prec_freq',
 'high_prec_dur',
 'high_prec_timing',
 'low_prec_freq',
 'low_prec_dur',
 'low_prec_timing',
 'q_mean',
 'runoff_ratio',
 'stream_elas',
 'slope_fdc',
 'baseflow_index',
 'hdf_mean',
 'Q5',
 'Q95',
 'high_q_freq',
 'high_q_dur',
 'low_q_freq',
 'low_q_dur',
 'zero_q_freq',
 'geol_prim',
 'geol_prim_prop',
 'geol_sec',
 'geol_sec_prop',
 'unconsoldted',
 'igneous',
 'silicsed',
 'carbnatesed',
 'othersed',
 'metamorph',
 'sedvolc',
 'oldrock',
 'claya',
 'clayb',
 'sanda',
 'solum_thickness',
 'ksat',
 'solpawhc',
 'elev_min',
 'elev_max',
 'elev_mean',
 'elev_range',
 'mean_slope_pct',
 'upsdist',
 'strdensity',
 'strahler',
 'elongratio',
 'relief',
 'reliefratio',
 'mrvbf_prop_0',
 'mrvbf_prop_1',
 'mrvbf_prop_2',
 'mrvbf_prop_3',
 'mrvbf_prop_4',
 'mrvbf_prop_5',
 'mrvbf_prop_6',
 'mrvbf_prop_7',
 'mrvbf_prop_8',
 'mrvbf_prop_9',
 'confinement',
 'lc01_extracti',
 'lc03_waterbo',
 'lc04_saltlak',
 'lc05_irrcrop',
 'lc06_irrpast',
 'lc07_irrsuga',
 'lc08_rfcropp',
 'lc09_rfpastu',
 'lc10_rfsugar',
 'lc11_wetlands',
 'lc14_tussclo',
 'lc15_alpineg',
 'lc16_openhum',
 'lc18_opentus',
 'lc19_shrbsca',
 'lc24_shrbden',
 'lc25_shrbope',
 'lc31_forclos',
 'lc32_foropen',
 'lc33_woodope',
 'lc34_woodspa',
 'lc35_urbanar',
 'prop_forested',
 'nvis_grasses_n',
 'nvis_grasses_e',
 'nvis_forests_n',
 'nvis_forests_e',
 'nvis_shrubs_n',
 'nvis_shrubs_e',
 'nvis_woodlands_n',
 'nvis_woodlands_e',
 'nvis_bare_n',
 'nvis_bare_e',
 'nvis_nodata_n',
 'nvis_nodata_e',
 'distupdamw',
 'impound_fac',
 'flow_div_fac',
 'leveebank_fac',
 'infrastruc_fac',
 'settlement_fac',
 'extract_ind_fac',
 'landuse_fac',
 'catchment_di',
 'flow_regime_di',
 'river_di',
 'pop_mean',
 'pop_max',
 'pop_gt_1',
 'pop_gt_10',
 'erosivity',
 'anngro_mega',
 'anngro_meso',
 'anngro_micro',
 'gromega_seas',
 'gromeso_seas',
 'gromicro_seas',
 'npp_ann',
 'npp_1',
 'npp_2',
 'npp_3',
 'npp_4',
 'npp_5',
 'npp_6',
 'npp_7',
 'npp_8',
 'npp_9',
 'npp_10',
 'npp_11',
 'npp_12']
[9]:
mrvbf = 'proportion of catchment occupied by classes of MultiResolution Valley Bottom Flatness'
lc01 = 'land cover codes'
nvis = 'vegetation sub-groups'
anngro = 'Average annual growth index value for some plants'
gromega = 'Seasonality of growth index value'
npp = 'net primary productivity'
[10]:
static = dataset.fetch_static_features(stn_id=stations)
static.shape
[10]:
(222, 161)
[11]:
EDA(data=static, save=False).heatmap()
../../_images/_notebooks_datasets_CAMELS_AUS_11_0.png
[11]:
<AxesSubplot: ylabel='Examples'>
[12]:
physical_features = []
soil_features = []
geological_features = []
flow_characteristics = []
[13]:
static = static.dropna(axis=1)
static.shape
[13]:
(222, 149)
[14]:
coords = dataset.stn_coords()
coords
[14]:
lat long
station_id
912101A -18.643611 139.253056
912105A -18.970833 138.803333
915011A -20.177500 144.524444
917107A -18.133611 144.306667
919003A -16.471389 144.290556
... ... ...
312061 -41.419444 145.670556
314207 -41.250833 146.090000
314213 -40.872222 145.297778
315450 -41.611667 146.130556
318076 -41.494444 147.384167

222 rows × 2 columns

[15]:
dataset.plot_stations()
../../_images/_notebooks_datasets_CAMELS_AUS_15_0.png
[15]:
<AxesSubplot: >
[16]:
lat = coords['lat'].astype(float).values.reshape(-1,)
long = coords['long'].astype(float).values.reshape(-1,)
[17]:
idx = 0
ax_num = 0

fig, axes = plt.subplots(5, 5, figsize=(15, 12))
axes = axes.flatten()

while ax_num < 25:

    val = static.iloc[:, idx]
    idx += 1

    try:
        c = val.astype(float).values.reshape(-1,)

        en = 222
        ax = axes[ax_num]
        ax, sc = scatter(long[0:en], lat[0:en], c=c[0:en], cmap="hot", show=False, ax=ax)

        process_cbar(ax, sc, border=False, title=val.name, #title_kws ={"fontsize": 14}
                    )
        ax_num += 1
    except ValueError:
        continue



plt.tight_layout()
plt.show()
print(idx)
../../_images/_notebooks_datasets_CAMELS_AUS_17_0.png
32
[18]:
idx = 32
ax_num = 0

fig, axes = plt.subplots(5, 5, figsize=(15, 12))
axes = axes.flatten()

while ax_num < 25:

    val = static.iloc[:, idx]
    idx += 1

    try:
        c = val.astype(float).values.reshape(-1,)

        en = 222
        ax = axes[ax_num]
        ax, sc = scatter(long[0:en], lat[0:en], c=c[0:en], cmap="hot", show=False, ax=ax)

        process_cbar(ax, sc, border=False, title=val.name, #title_kws ={"fontsize": 14}
                    )
        ax_num += 1
    except ValueError:
        continue



plt.tight_layout()
plt.show()
print(idx)
../../_images/_notebooks_datasets_CAMELS_AUS_18_0.png
59
[19]:
idx = 59
ax_num = 0

fig, axes = plt.subplots(5, 5, figsize=(15, 12))
axes = axes.flatten()

while ax_num < 25:

    val = static.iloc[:, idx]
    idx += 1

    try:
        c = val.astype(float).values.reshape(-1,)

        en = 222
        ax = axes[ax_num]
        ax, sc = scatter(long[0:en], lat[0:en], c=c[0:en], cmap="hot", show=False, ax=ax)

        process_cbar(ax, sc, border=False, title=val.name, #title_kws ={"fontsize": 14}
                    )
        ax_num += 1
    except ValueError:
        continue



plt.tight_layout()
plt.show()
print(idx)
../../_images/_notebooks_datasets_CAMELS_AUS_19_0.png
84
[20]:
idx = 84
ax_num = 0

fig, axes = plt.subplots(5, 5, figsize=(15, 12))
axes = axes.flatten()

while ax_num < 25:

    val = static.iloc[:, idx]
    idx += 1

    try:
        c = val.astype(float).values.reshape(-1,)

        en = 222
        ax = axes[ax_num]
        ax, sc = scatter(long[0:en], lat[0:en], c=c[0:en], cmap="hot", show=False, ax=ax)

        process_cbar(ax, sc, border=False, title=val.name, #title_kws ={"fontsize": 14}
                    )
        ax_num += 1
    except ValueError:
        continue



plt.tight_layout()
plt.show()
print(idx)
../../_images/_notebooks_datasets_CAMELS_AUS_20_0.png
109
[21]:
idx = 109
ax_num = 0

fig, axes = plt.subplots(5, 5, figsize=(15, 12))
axes = axes.flatten()

while ax_num < 25:

    val = static.iloc[:, idx]
    idx += 1

    try:
        c = val.astype(float).values.reshape(-1,)

        en = 222
        ax = axes[ax_num]
        ax, sc = scatter(long[0:en], lat[0:en], c=c[0:en], cmap="hot", show=False, ax=ax)

        process_cbar(ax, sc, border=False, title=val.name, #title_kws ={"fontsize": 14}
                    )
        ax_num += 1
    except ValueError:
        continue

plt.tight_layout()
plt.show()
print(idx)
../../_images/_notebooks_datasets_CAMELS_AUS_21_0.png
134
[22]:
idx = 134
ax_num = 0

fig, axes = plt.subplots(5, 5, figsize=(15, 12))
axes = axes.flatten()

while ax_num < 25 and idx<static.shape[1]:

    val = static.iloc[:, idx]
    idx += 1

    try:
        c = val.astype(float).values.reshape(-1,)

        en = 222
        ax = axes[ax_num]
        ax, sc = scatter(long[0:en], lat[0:en], c=c[0:en], cmap="hot", show=False, ax=ax)

        process_cbar(ax, sc, border=False, title=val.name, #title_kws ={"fontsize": 14}
                    )
        ax_num += 1
    except ValueError:
        continue

plt.tight_layout()
plt.show()
print(idx)
../../_images/_notebooks_datasets_CAMELS_AUS_22_0.png
149

Dyanmic Features

[23]:
dataset.dynamic_features
[23]:
['streamflow_MLd',
 'streamflow_MLd_inclInfilled',
 'streamflow_mmd',
 'et_morton_actual_SILO',
 'et_morton_point_SILO',
 'et_morton_wet_SILO',
 'et_short_crop_SILO',
 'et_tall_crop_SILO',
 'evap_morton_lake_SILO',
 'evap_pan_SILO',
 'evap_syn_SILO',
 'precipitation_AWAP',
 'precipitation_SILO',
 'precipitation_var_SWAP',
 'solarrad_AWAP',
 'tmax_AWAP',
 'tmin_AWAP',
 'vprp_AWAP',
 'mslp_SILO',
 'radiation_SILO',
 'rh_tmax_SILO',
 'rh_tmin_SILO',
 'tmax_SILO',
 'tmin_SILO',
 'vp_deficit_SILO',
 'vp_SILO']

Streamflow

streamflow normalized by area

[24]:
streamflow = dataset.q_mmd()

streamflow.shape
[24]:
(21184, 222)
[25]:
streamflow
[25]:
912101A 912105A 915011A 917107A 919003A 919201A 919309A 922101B 925001A 926002A ... 304499 305202 307473 308145 308799 312061 314207 314213 315450 318076
time
1957-01-01 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN 24.086996 NaN 4.093378 NaN NaN NaN 0.614182
1957-01-02 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN 8.254936 NaN 4.266787 NaN NaN NaN 0.691010
1957-01-03 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN 4.943169 NaN 4.095510 NaN NaN NaN 0.559474
1957-01-04 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN 3.404512 NaN 4.297611 NaN NaN NaN 0.508319
1957-01-05 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN 3.317023 NaN 4.293357 NaN NaN NaN 0.478276
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2014-12-27 0.033409 0.032417 0.000000 0.010655 0.003816 0.0 0.0 0.0 0.036582 0.300530 ... 0.505348 0.447075 2.489122 1.391061 1.407268 0.547383 0.415754 0.093841 0.944416 0.243313
2014-12-28 0.037260 0.028840 0.000000 0.010369 0.002212 0.0 0.0 0.0 0.036308 0.293852 ... 0.506314 0.126054 2.198925 1.255558 1.330847 0.524695 0.386941 0.084121 0.881395 0.219409
2014-12-29 0.037653 0.027334 0.000000 0.010321 0.001903 0.0 0.0 0.0 0.036154 0.292314 ... 0.482631 0.057792 1.557511 1.061568 1.150340 0.501252 0.378870 0.075581 0.802402 0.207761
2014-12-30 0.032432 0.027785 0.000000 0.028903 0.001662 0.0 0.0 0.0 0.034776 0.293543 ... 0.548162 0.041354 1.354081 12.621202 17.800031 0.731634 0.387640 0.078101 7.128175 0.205275
2014-12-31 0.032191 0.169300 0.003321 0.042891 0.002262 0.0 0.0 0.0 0.034613 0.291518 ... 2.707964 0.135351 11.706610 10.158581 9.200823 1.745331 1.042905 0.134921 6.993984 0.223863

21184 rows × 222 columns

[26]:
EDA(data=streamflow, save=False).heatmap()
../../_images/_notebooks_datasets_CAMELS_AUS_28_0.png
[26]:
<AxesSubplot: ylabel='Examples'>
[27]:
fig, axes = plt.subplots(7, 7, figsize=(10, 10), sharey="all")

for idx, ax in enumerate(axes.flat):

    hist(streamflow.iloc[:, idx].values.reshape(-1,),
         bins=20,
         ax=ax,
         show=False
        )

plt.show()
../../_images/_notebooks_datasets_CAMELS_AUS_29_0.png
[28]:
st = 49

fig, axes = plt.subplots(7, 7, figsize=(10, 10), sharey="all")

idx = st
for _, ax in enumerate(axes.flat):

    hist(streamflow.iloc[:, idx].values.reshape(-1,),
         bins=20,
         ax=ax,
         show=False
        )

    idx += 1

plt.show()
print(idx)
../../_images/_notebooks_datasets_CAMELS_AUS_30_0.png
98
[29]:
st = 98

fig, axes = plt.subplots(7, 7, figsize=(10, 10), sharey="all")

idx = st
for _, ax in enumerate(axes.flat):

    hist(streamflow.iloc[:, idx].values.reshape(-1,),
         bins=20,
         ax=ax,
         show=False
        )

    idx += 1

plt.show()
print(idx)
../../_images/_notebooks_datasets_CAMELS_AUS_31_0.png
147
[30]:
st = 147

fig, axes = plt.subplots(7, 7, figsize=(10, 10), sharey="all")

idx = st
for _, ax in enumerate(axes.flat):

    hist(streamflow.iloc[:, idx].values.reshape(-1,),
         bins=20,
         ax=ax,
         show=False
        )

    idx += 1

plt.show()
print(idx)
../../_images/_notebooks_datasets_CAMELS_AUS_32_0.png
196
[31]:
st = 196

fig, axes = plt.subplots(7, 7, figsize=(10, 10), sharey="all")

idx = st
for _, ax in enumerate(axes.flat):

    if idx > 221:
        pass
    else:
        hist(streamflow.iloc[:, idx].values.reshape(-1,),
         bins=20,
         ax=ax,
         show=False
        )

    idx += 1

plt.show()
print(idx)
../../_images/_notebooks_datasets_CAMELS_AUS_33_0.png
245
[32]:
_ = hist(streamflow.skew().values.reshape(-1,), bins=50)
../../_images/_notebooks_datasets_CAMELS_AUS_34_0.png

Evapotranspiration

[60]:
et_morton_actual_SILO = dataset.fetch(dynamic_features = 'et_morton_actual_SILO', as_dataframe=True)
et_morton_actual_SILO = et_morton_actual_SILO.reset_index()
et_morton_actual_SILO.index = pd.to_datetime(et_morton_actual_SILO.pop('time'))
et_morton_actual_SILO.pop('dynamic_features')
print(et_morton_actual_SILO.shape)
print(et_morton_actual_SILO.isna().sum())
EDA(data=et_morton_actual_SILO, save=False).heatmap()
(21184, 222)
912101A    0
912105A    0
915011A    0
917107A    0
919003A    0
          ..
312061     0
314207     0
314213     0
315450     0
318076     0
Length: 222, dtype: int64
../../_images/_notebooks_datasets_CAMELS_AUS_36_1.png
[60]:
<AxesSubplot: ylabel='Examples'>
[61]:
_ = hist(et_morton_actual_SILO.skew().values.reshape(-1,), bins=50)
../../_images/_notebooks_datasets_CAMELS_AUS_37_0.png

Air Temperature

[ ]:
tmax_SILO = dataset.fetch(dynamic_features = 'tmax_SILO', as_dataframe=True)
tmax_SILO = tmax_SILO.reset_index()
tmax_SILO.index = pd.to_datetime(tmax_SILO.pop('time'))
tmax_SILO.pop('dynamic_features')
print(tmax_SILO.shape)

tmin_SILO = dataset.fetch(dynamic_features = 'tmin_SILO', as_dataframe=True)
tmin_SILO = tmin_SILO.reset_index()
tmin_SILO.index = pd.to_datetime(tmin_SILO.pop('time'))
tmin_SILO.pop('dynamic_features')
print(tmin_SILO.shape)

tavg = np.mean([tmax_SILO.values, tmin_SILO.values], axis=0)
print(tavg.shape)

tavg = pd.DataFrame(tavg, index = tmin_SILO.index, columns=tmin_SILO.columns.tolist())
print(tavg.shape)

EDA(data=tavg, save=False).heatmap()
[66]:

_ = hist(tavg.skew().values.reshape(-1,), bins=50)
../../_images/_notebooks_datasets_CAMELS_AUS_40_0.png

Precipitation

[64]:
pcp = dataset.fetch(dynamic_features = 'precipitation_SILO', as_dataframe=True)
pcp = pcp.reset_index()
pcp.index = pd.to_datetime(pcp.pop('time'))
pcp.pop('dynamic_features')
print(pcp.shape)
EDA(data=pcp, save=False).heatmap()
(21184, 222)
../../_images/_notebooks_datasets_CAMELS_AUS_42_1.png
[64]:
<AxesSubplot: ylabel='Examples'>
[65]:
_ = hist(pcp.skew().values.reshape(-1,), bins=50)
../../_images/_notebooks_datasets_CAMELS_AUS_43_0.png
[ ]: